diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,350041 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.010031634190783, + "eval_steps": 500, + "global_step": 500000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 2.0200632683815657e-05, + "grad_norm": 316.38043212890625, + "learning_rate": 2e-09, + "loss": 26.2272, + "step": 10 + }, + { + "epoch": 4.040126536763131e-05, + "grad_norm": 576.4369506835938, + "learning_rate": 4e-09, + "loss": 32.9983, + "step": 20 + }, + { + "epoch": 6.060189805144697e-05, + "grad_norm": 472.15478515625, + "learning_rate": 6e-09, + "loss": 23.2158, + "step": 30 + }, + { + "epoch": 8.080253073526263e-05, + "grad_norm": 20.11992835998535, + "learning_rate": 8e-09, + "loss": 19.9305, + "step": 40 + }, + { + "epoch": 0.00010100316341907829, + "grad_norm": 766.17529296875, + "learning_rate": 1e-08, + "loss": 35.6484, + "step": 50 + }, + { + "epoch": 0.00012120379610289395, + "grad_norm": 649.330810546875, + "learning_rate": 1.2e-08, + "loss": 23.5938, + "step": 60 + }, + { + "epoch": 0.0001414044287867096, + "grad_norm": 638.9619750976562, + "learning_rate": 1.4000000000000001e-08, + "loss": 20.7426, + "step": 70 + }, + { + "epoch": 0.00016160506147052525, + "grad_norm": 703.6967163085938, + "learning_rate": 1.6e-08, + "loss": 33.9524, + "step": 80 + }, + { + "epoch": 0.0001818056941543409, + "grad_norm": 785.22998046875, + "learning_rate": 1.8000000000000002e-08, + "loss": 29.1029, + "step": 90 + }, + { + "epoch": 0.00020200632683815657, + "grad_norm": 676.3937377929688, + "learning_rate": 2e-08, + "loss": 26.1458, + "step": 100 + }, + { + "epoch": 0.00022220695952197223, + "grad_norm": 246.762451171875, + "learning_rate": 2.2000000000000002e-08, + "loss": 25.4453, + "step": 110 + }, + { + "epoch": 0.0002424075922057879, + "grad_norm": 1033.1109619140625, + "learning_rate": 2.4e-08, + "loss": 38.0849, + "step": 120 + }, + { + "epoch": 0.0002626082248896035, + "grad_norm": 724.1307983398438, + "learning_rate": 2.6e-08, + "loss": 42.023, + "step": 130 + }, + { + "epoch": 0.0002828088575734192, + "grad_norm": 493.44427490234375, + "learning_rate": 2.8000000000000003e-08, + "loss": 27.2123, + "step": 140 + }, + { + "epoch": 0.00030300949025723485, + "grad_norm": 665.4328002929688, + "learning_rate": 3.0000000000000004e-08, + "loss": 34.0098, + "step": 150 + }, + { + "epoch": 0.0003232101229410505, + "grad_norm": 844.9791259765625, + "learning_rate": 3.2e-08, + "loss": 34.3665, + "step": 160 + }, + { + "epoch": 0.00034341075562486617, + "grad_norm": 32.71566390991211, + "learning_rate": 3.4e-08, + "loss": 33.4008, + "step": 170 + }, + { + "epoch": 0.0003636113883086818, + "grad_norm": 1014.241455078125, + "learning_rate": 3.6000000000000005e-08, + "loss": 37.7545, + "step": 180 + }, + { + "epoch": 0.0003838120209924975, + "grad_norm": 883.66259765625, + "learning_rate": 3.8e-08, + "loss": 27.2009, + "step": 190 + }, + { + "epoch": 0.00040401265367631315, + "grad_norm": 984.9612426757812, + "learning_rate": 4e-08, + "loss": 16.6661, + "step": 200 + }, + { + "epoch": 0.0004242132863601288, + "grad_norm": 553.194091796875, + "learning_rate": 4.2e-08, + "loss": 36.3195, + "step": 210 + }, + { + "epoch": 0.00044441391904394446, + "grad_norm": 313.7496032714844, + "learning_rate": 4.4000000000000004e-08, + "loss": 16.8871, + "step": 220 + }, + { + "epoch": 0.0004646145517277601, + "grad_norm": 525.6804809570312, + "learning_rate": 4.6e-08, + "loss": 17.3601, + "step": 230 + }, + { + "epoch": 0.0004848151844115758, + "grad_norm": 206.4374237060547, + "learning_rate": 4.8e-08, + "loss": 34.4888, + "step": 240 + }, + { + "epoch": 0.0005050158170953914, + "grad_norm": 665.89453125, + "learning_rate": 5.0000000000000004e-08, + "loss": 40.5891, + "step": 250 + }, + { + "epoch": 0.000525216449779207, + "grad_norm": 1490.542724609375, + "learning_rate": 5.2e-08, + "loss": 26.4835, + "step": 260 + }, + { + "epoch": 0.0005454170824630227, + "grad_norm": 157.42401123046875, + "learning_rate": 5.400000000000001e-08, + "loss": 10.1466, + "step": 270 + }, + { + "epoch": 0.0005656177151468384, + "grad_norm": 896.1552124023438, + "learning_rate": 5.6000000000000005e-08, + "loss": 46.9781, + "step": 280 + }, + { + "epoch": 0.000585818347830654, + "grad_norm": 1364.9993896484375, + "learning_rate": 5.8e-08, + "loss": 40.9419, + "step": 290 + }, + { + "epoch": 0.0006060189805144697, + "grad_norm": 1216.1474609375, + "learning_rate": 6.000000000000001e-08, + "loss": 30.2287, + "step": 300 + }, + { + "epoch": 0.0006262196131982854, + "grad_norm": 328.39678955078125, + "learning_rate": 6.2e-08, + "loss": 38.7195, + "step": 310 + }, + { + "epoch": 0.000646420245882101, + "grad_norm": 433.4118957519531, + "learning_rate": 6.4e-08, + "loss": 22.1306, + "step": 320 + }, + { + "epoch": 0.0006666208785659167, + "grad_norm": 420.328857421875, + "learning_rate": 6.600000000000001e-08, + "loss": 27.7195, + "step": 330 + }, + { + "epoch": 0.0006868215112497323, + "grad_norm": 185.5102081298828, + "learning_rate": 6.8e-08, + "loss": 37.9056, + "step": 340 + }, + { + "epoch": 0.000707022143933548, + "grad_norm": 337.02044677734375, + "learning_rate": 7e-08, + "loss": 22.7152, + "step": 350 + }, + { + "epoch": 0.0007272227766173637, + "grad_norm": 670.1768188476562, + "learning_rate": 7.200000000000001e-08, + "loss": 28.3785, + "step": 360 + }, + { + "epoch": 0.0007474234093011793, + "grad_norm": 475.4079895019531, + "learning_rate": 7.400000000000001e-08, + "loss": 22.1121, + "step": 370 + }, + { + "epoch": 0.000767624041984995, + "grad_norm": 607.6908569335938, + "learning_rate": 7.6e-08, + "loss": 31.8589, + "step": 380 + }, + { + "epoch": 0.0007878246746688106, + "grad_norm": 800.4915161132812, + "learning_rate": 7.8e-08, + "loss": 21.862, + "step": 390 + }, + { + "epoch": 0.0008080253073526263, + "grad_norm": 1185.45849609375, + "learning_rate": 8e-08, + "loss": 35.8806, + "step": 400 + }, + { + "epoch": 0.000828225940036442, + "grad_norm": 452.5521240234375, + "learning_rate": 8.200000000000002e-08, + "loss": 17.0217, + "step": 410 + }, + { + "epoch": 0.0008484265727202576, + "grad_norm": 125.98954010009766, + "learning_rate": 8.4e-08, + "loss": 29.4705, + "step": 420 + }, + { + "epoch": 0.0008686272054040733, + "grad_norm": 301.3433532714844, + "learning_rate": 8.6e-08, + "loss": 22.8971, + "step": 430 + }, + { + "epoch": 0.0008888278380878889, + "grad_norm": 578.7197875976562, + "learning_rate": 8.800000000000001e-08, + "loss": 20.5219, + "step": 440 + }, + { + "epoch": 0.0009090284707717046, + "grad_norm": 1337.2423095703125, + "learning_rate": 9e-08, + "loss": 38.1059, + "step": 450 + }, + { + "epoch": 0.0009292291034555202, + "grad_norm": 971.8609619140625, + "learning_rate": 9.2e-08, + "loss": 28.7212, + "step": 460 + }, + { + "epoch": 0.0009494297361393359, + "grad_norm": 635.0576171875, + "learning_rate": 9.400000000000001e-08, + "loss": 28.1718, + "step": 470 + }, + { + "epoch": 0.0009696303688231516, + "grad_norm": 347.902587890625, + "learning_rate": 9.6e-08, + "loss": 35.6753, + "step": 480 + }, + { + "epoch": 0.0009898310015069671, + "grad_norm": 804.568115234375, + "learning_rate": 9.8e-08, + "loss": 52.5154, + "step": 490 + }, + { + "epoch": 0.0010100316341907828, + "grad_norm": 357.3641662597656, + "learning_rate": 1.0000000000000001e-07, + "loss": 23.8221, + "step": 500 + }, + { + "epoch": 0.0010302322668745984, + "grad_norm": 695.1265869140625, + "learning_rate": 1.0200000000000001e-07, + "loss": 29.7152, + "step": 510 + }, + { + "epoch": 0.001050432899558414, + "grad_norm": 466.5853271484375, + "learning_rate": 1.04e-07, + "loss": 23.3024, + "step": 520 + }, + { + "epoch": 0.0010706335322422298, + "grad_norm": 725.106201171875, + "learning_rate": 1.0600000000000001e-07, + "loss": 20.0807, + "step": 530 + }, + { + "epoch": 0.0010908341649260454, + "grad_norm": 500.51708984375, + "learning_rate": 1.0800000000000001e-07, + "loss": 16.9731, + "step": 540 + }, + { + "epoch": 0.001111034797609861, + "grad_norm": 643.19580078125, + "learning_rate": 1.1e-07, + "loss": 35.556, + "step": 550 + }, + { + "epoch": 0.0011312354302936767, + "grad_norm": 606.3302001953125, + "learning_rate": 1.1200000000000001e-07, + "loss": 27.9096, + "step": 560 + }, + { + "epoch": 0.0011514360629774924, + "grad_norm": 455.76116943359375, + "learning_rate": 1.1400000000000001e-07, + "loss": 27.992, + "step": 570 + }, + { + "epoch": 0.001171636695661308, + "grad_norm": 890.6718139648438, + "learning_rate": 1.16e-07, + "loss": 20.3737, + "step": 580 + }, + { + "epoch": 0.0011918373283451237, + "grad_norm": 521.8313598632812, + "learning_rate": 1.1800000000000001e-07, + "loss": 21.4314, + "step": 590 + }, + { + "epoch": 0.0012120379610289394, + "grad_norm": 526.7328491210938, + "learning_rate": 1.2000000000000002e-07, + "loss": 35.9307, + "step": 600 + }, + { + "epoch": 0.001232238593712755, + "grad_norm": 491.2201843261719, + "learning_rate": 1.22e-07, + "loss": 15.4924, + "step": 610 + }, + { + "epoch": 0.0012524392263965707, + "grad_norm": 1669.0023193359375, + "learning_rate": 1.24e-07, + "loss": 31.0749, + "step": 620 + }, + { + "epoch": 0.0012726398590803864, + "grad_norm": 747.734619140625, + "learning_rate": 1.2600000000000002e-07, + "loss": 27.6358, + "step": 630 + }, + { + "epoch": 0.001292840491764202, + "grad_norm": 415.5186767578125, + "learning_rate": 1.28e-07, + "loss": 34.9266, + "step": 640 + }, + { + "epoch": 0.0013130411244480177, + "grad_norm": 812.7051391601562, + "learning_rate": 1.3e-07, + "loss": 28.9554, + "step": 650 + }, + { + "epoch": 0.0013332417571318333, + "grad_norm": 391.8579406738281, + "learning_rate": 1.3200000000000002e-07, + "loss": 16.3211, + "step": 660 + }, + { + "epoch": 0.001353442389815649, + "grad_norm": 569.1922607421875, + "learning_rate": 1.34e-07, + "loss": 40.6097, + "step": 670 + }, + { + "epoch": 0.0013736430224994647, + "grad_norm": 784.5740356445312, + "learning_rate": 1.36e-07, + "loss": 38.8948, + "step": 680 + }, + { + "epoch": 0.0013938436551832803, + "grad_norm": 284.9936218261719, + "learning_rate": 1.3800000000000002e-07, + "loss": 28.3477, + "step": 690 + }, + { + "epoch": 0.001414044287867096, + "grad_norm": 518.3870239257812, + "learning_rate": 1.4e-07, + "loss": 25.8091, + "step": 700 + }, + { + "epoch": 0.0014342449205509116, + "grad_norm": 448.25848388671875, + "learning_rate": 1.4200000000000003e-07, + "loss": 21.6013, + "step": 710 + }, + { + "epoch": 0.0014544455532347273, + "grad_norm": 175.99990844726562, + "learning_rate": 1.4400000000000002e-07, + "loss": 30.1821, + "step": 720 + }, + { + "epoch": 0.001474646185918543, + "grad_norm": 863.377685546875, + "learning_rate": 1.46e-07, + "loss": 51.8515, + "step": 730 + }, + { + "epoch": 0.0014948468186023586, + "grad_norm": 175.85592651367188, + "learning_rate": 1.4800000000000003e-07, + "loss": 25.4575, + "step": 740 + }, + { + "epoch": 0.0015150474512861743, + "grad_norm": 640.6859741210938, + "learning_rate": 1.5000000000000002e-07, + "loss": 26.2666, + "step": 750 + }, + { + "epoch": 0.00153524808396999, + "grad_norm": 811.0674438476562, + "learning_rate": 1.52e-07, + "loss": 24.3395, + "step": 760 + }, + { + "epoch": 0.0015554487166538056, + "grad_norm": 628.3569946289062, + "learning_rate": 1.5400000000000003e-07, + "loss": 25.5079, + "step": 770 + }, + { + "epoch": 0.0015756493493376213, + "grad_norm": 796.8837890625, + "learning_rate": 1.56e-07, + "loss": 22.1897, + "step": 780 + }, + { + "epoch": 0.001595849982021437, + "grad_norm": 634.3350830078125, + "learning_rate": 1.5800000000000004e-07, + "loss": 13.3538, + "step": 790 + }, + { + "epoch": 0.0016160506147052526, + "grad_norm": 0.0, + "learning_rate": 1.6e-07, + "loss": 40.9726, + "step": 800 + }, + { + "epoch": 0.0016362512473890682, + "grad_norm": 891.0134887695312, + "learning_rate": 1.62e-07, + "loss": 33.7623, + "step": 810 + }, + { + "epoch": 0.001656451880072884, + "grad_norm": 124.5013198852539, + "learning_rate": 1.6400000000000004e-07, + "loss": 32.6563, + "step": 820 + }, + { + "epoch": 0.0016766525127566996, + "grad_norm": 557.0490112304688, + "learning_rate": 1.66e-07, + "loss": 11.069, + "step": 830 + }, + { + "epoch": 0.0016968531454405152, + "grad_norm": 370.4107971191406, + "learning_rate": 1.68e-07, + "loss": 18.7746, + "step": 840 + }, + { + "epoch": 0.0017170537781243309, + "grad_norm": 470.0067443847656, + "learning_rate": 1.7000000000000001e-07, + "loss": 53.0731, + "step": 850 + }, + { + "epoch": 0.0017372544108081465, + "grad_norm": 250.24252319335938, + "learning_rate": 1.72e-07, + "loss": 19.3808, + "step": 860 + }, + { + "epoch": 0.0017574550434919622, + "grad_norm": 1099.9942626953125, + "learning_rate": 1.74e-07, + "loss": 50.3866, + "step": 870 + }, + { + "epoch": 0.0017776556761757779, + "grad_norm": 599.7510375976562, + "learning_rate": 1.7600000000000001e-07, + "loss": 23.1743, + "step": 880 + }, + { + "epoch": 0.0017978563088595935, + "grad_norm": 401.0577087402344, + "learning_rate": 1.78e-07, + "loss": 18.497, + "step": 890 + }, + { + "epoch": 0.0018180569415434092, + "grad_norm": 1774.6864013671875, + "learning_rate": 1.8e-07, + "loss": 39.7275, + "step": 900 + }, + { + "epoch": 0.0018382575742272248, + "grad_norm": 950.1702880859375, + "learning_rate": 1.8200000000000002e-07, + "loss": 34.1336, + "step": 910 + }, + { + "epoch": 0.0018584582069110405, + "grad_norm": 681.1715087890625, + "learning_rate": 1.84e-07, + "loss": 22.6729, + "step": 920 + }, + { + "epoch": 0.0018786588395948562, + "grad_norm": 548.5640869140625, + "learning_rate": 1.86e-07, + "loss": 17.3724, + "step": 930 + }, + { + "epoch": 0.0018988594722786718, + "grad_norm": 570.0945434570312, + "learning_rate": 1.8800000000000002e-07, + "loss": 40.4859, + "step": 940 + }, + { + "epoch": 0.0019190601049624875, + "grad_norm": 411.9521789550781, + "learning_rate": 1.9e-07, + "loss": 19.4746, + "step": 950 + }, + { + "epoch": 0.0019392607376463031, + "grad_norm": 781.8963623046875, + "learning_rate": 1.92e-07, + "loss": 54.7843, + "step": 960 + }, + { + "epoch": 0.0019594613703301186, + "grad_norm": 296.8689270019531, + "learning_rate": 1.9400000000000002e-07, + "loss": 29.0034, + "step": 970 + }, + { + "epoch": 0.0019796620030139342, + "grad_norm": 842.5889282226562, + "learning_rate": 1.96e-07, + "loss": 37.4423, + "step": 980 + }, + { + "epoch": 0.00199986263569775, + "grad_norm": 948.009033203125, + "learning_rate": 1.9800000000000003e-07, + "loss": 30.0773, + "step": 990 + }, + { + "epoch": 0.0020200632683815656, + "grad_norm": 215.3368377685547, + "learning_rate": 2.0000000000000002e-07, + "loss": 26.6014, + "step": 1000 + }, + { + "epoch": 0.0020402639010653812, + "grad_norm": 494.67578125, + "learning_rate": 2.02e-07, + "loss": 21.6492, + "step": 1010 + }, + { + "epoch": 0.002060464533749197, + "grad_norm": 234.77099609375, + "learning_rate": 2.0400000000000003e-07, + "loss": 24.3645, + "step": 1020 + }, + { + "epoch": 0.0020806651664330125, + "grad_norm": 327.6997985839844, + "learning_rate": 2.0600000000000002e-07, + "loss": 24.5405, + "step": 1030 + }, + { + "epoch": 0.002100865799116828, + "grad_norm": 611.9917602539062, + "learning_rate": 2.08e-07, + "loss": 24.577, + "step": 1040 + }, + { + "epoch": 0.002121066431800644, + "grad_norm": 133.8150177001953, + "learning_rate": 2.1000000000000003e-07, + "loss": 33.8902, + "step": 1050 + }, + { + "epoch": 0.0021412670644844595, + "grad_norm": 163.73504638671875, + "learning_rate": 2.1200000000000002e-07, + "loss": 17.783, + "step": 1060 + }, + { + "epoch": 0.002161467697168275, + "grad_norm": 193.5026397705078, + "learning_rate": 2.14e-07, + "loss": 20.8054, + "step": 1070 + }, + { + "epoch": 0.002181668329852091, + "grad_norm": 624.8909301757812, + "learning_rate": 2.1600000000000003e-07, + "loss": 10.6967, + "step": 1080 + }, + { + "epoch": 0.0022018689625359065, + "grad_norm": 430.49560546875, + "learning_rate": 2.1800000000000002e-07, + "loss": 45.0397, + "step": 1090 + }, + { + "epoch": 0.002222069595219722, + "grad_norm": 1073.7850341796875, + "learning_rate": 2.2e-07, + "loss": 42.1204, + "step": 1100 + }, + { + "epoch": 0.002242270227903538, + "grad_norm": 486.751708984375, + "learning_rate": 2.2200000000000003e-07, + "loss": 23.1064, + "step": 1110 + }, + { + "epoch": 0.0022624708605873535, + "grad_norm": 1003.8533935546875, + "learning_rate": 2.2400000000000002e-07, + "loss": 43.3564, + "step": 1120 + }, + { + "epoch": 0.002282671493271169, + "grad_norm": 572.0302734375, + "learning_rate": 2.26e-07, + "loss": 23.8053, + "step": 1130 + }, + { + "epoch": 0.002302872125954985, + "grad_norm": 447.3976745605469, + "learning_rate": 2.2800000000000003e-07, + "loss": 18.1645, + "step": 1140 + }, + { + "epoch": 0.0023230727586388005, + "grad_norm": 931.8524780273438, + "learning_rate": 2.3000000000000002e-07, + "loss": 30.1086, + "step": 1150 + }, + { + "epoch": 0.002343273391322616, + "grad_norm": 875.0267333984375, + "learning_rate": 2.32e-07, + "loss": 44.5649, + "step": 1160 + }, + { + "epoch": 0.0023634740240064318, + "grad_norm": 683.9933471679688, + "learning_rate": 2.3400000000000003e-07, + "loss": 28.6991, + "step": 1170 + }, + { + "epoch": 0.0023836746566902474, + "grad_norm": 1272.7320556640625, + "learning_rate": 2.3600000000000002e-07, + "loss": 53.9565, + "step": 1180 + }, + { + "epoch": 0.002403875289374063, + "grad_norm": 540.0521240234375, + "learning_rate": 2.3800000000000004e-07, + "loss": 23.6139, + "step": 1190 + }, + { + "epoch": 0.0024240759220578788, + "grad_norm": 578.9132690429688, + "learning_rate": 2.4000000000000003e-07, + "loss": 22.7089, + "step": 1200 + }, + { + "epoch": 0.0024442765547416944, + "grad_norm": 382.25244140625, + "learning_rate": 2.42e-07, + "loss": 33.4145, + "step": 1210 + }, + { + "epoch": 0.00246447718742551, + "grad_norm": 1396.89697265625, + "learning_rate": 2.44e-07, + "loss": 35.8553, + "step": 1220 + }, + { + "epoch": 0.0024846778201093257, + "grad_norm": 507.25848388671875, + "learning_rate": 2.46e-07, + "loss": 39.8554, + "step": 1230 + }, + { + "epoch": 0.0025048784527931414, + "grad_norm": 236.29849243164062, + "learning_rate": 2.48e-07, + "loss": 20.3849, + "step": 1240 + }, + { + "epoch": 0.002525079085476957, + "grad_norm": 766.2069702148438, + "learning_rate": 2.5000000000000004e-07, + "loss": 35.435, + "step": 1250 + }, + { + "epoch": 0.0025452797181607727, + "grad_norm": 92.45375061035156, + "learning_rate": 2.5200000000000003e-07, + "loss": 33.7958, + "step": 1260 + }, + { + "epoch": 0.0025654803508445884, + "grad_norm": 206.84521484375, + "learning_rate": 2.54e-07, + "loss": 19.2633, + "step": 1270 + }, + { + "epoch": 0.002585680983528404, + "grad_norm": 1618.84521484375, + "learning_rate": 2.56e-07, + "loss": 36.3744, + "step": 1280 + }, + { + "epoch": 0.0026058816162122197, + "grad_norm": 322.9664001464844, + "learning_rate": 2.58e-07, + "loss": 19.4322, + "step": 1290 + }, + { + "epoch": 0.0026260822488960354, + "grad_norm": 479.2985534667969, + "learning_rate": 2.6e-07, + "loss": 44.4554, + "step": 1300 + }, + { + "epoch": 0.002646282881579851, + "grad_norm": 839.0703125, + "learning_rate": 2.6200000000000004e-07, + "loss": 25.7475, + "step": 1310 + }, + { + "epoch": 0.0026664835142636667, + "grad_norm": 496.9524841308594, + "learning_rate": 2.6400000000000003e-07, + "loss": 14.5702, + "step": 1320 + }, + { + "epoch": 0.0026866841469474823, + "grad_norm": 206.1656951904297, + "learning_rate": 2.66e-07, + "loss": 30.1201, + "step": 1330 + }, + { + "epoch": 0.002706884779631298, + "grad_norm": 688.9887084960938, + "learning_rate": 2.68e-07, + "loss": 20.7835, + "step": 1340 + }, + { + "epoch": 0.0027270854123151137, + "grad_norm": 173.92941284179688, + "learning_rate": 2.7e-07, + "loss": 38.9963, + "step": 1350 + }, + { + "epoch": 0.0027472860449989293, + "grad_norm": 416.7894287109375, + "learning_rate": 2.72e-07, + "loss": 40.8383, + "step": 1360 + }, + { + "epoch": 0.002767486677682745, + "grad_norm": 602.2549438476562, + "learning_rate": 2.7400000000000004e-07, + "loss": 22.1494, + "step": 1370 + }, + { + "epoch": 0.0027876873103665606, + "grad_norm": 1489.0169677734375, + "learning_rate": 2.7600000000000004e-07, + "loss": 30.0528, + "step": 1380 + }, + { + "epoch": 0.0028078879430503763, + "grad_norm": 410.69769287109375, + "learning_rate": 2.7800000000000003e-07, + "loss": 36.4594, + "step": 1390 + }, + { + "epoch": 0.002828088575734192, + "grad_norm": 99.08312225341797, + "learning_rate": 2.8e-07, + "loss": 19.1275, + "step": 1400 + }, + { + "epoch": 0.0028482892084180076, + "grad_norm": 579.4361572265625, + "learning_rate": 2.82e-07, + "loss": 23.6028, + "step": 1410 + }, + { + "epoch": 0.0028684898411018233, + "grad_norm": 932.9219970703125, + "learning_rate": 2.8400000000000005e-07, + "loss": 33.0755, + "step": 1420 + }, + { + "epoch": 0.002888690473785639, + "grad_norm": 736.8807373046875, + "learning_rate": 2.8600000000000005e-07, + "loss": 33.5045, + "step": 1430 + }, + { + "epoch": 0.0029088911064694546, + "grad_norm": 1126.778076171875, + "learning_rate": 2.8800000000000004e-07, + "loss": 33.3801, + "step": 1440 + }, + { + "epoch": 0.0029290917391532703, + "grad_norm": 1085.3497314453125, + "learning_rate": 2.9000000000000003e-07, + "loss": 28.1494, + "step": 1450 + }, + { + "epoch": 0.002949292371837086, + "grad_norm": 374.22149658203125, + "learning_rate": 2.92e-07, + "loss": 32.5694, + "step": 1460 + }, + { + "epoch": 0.0029694930045209016, + "grad_norm": 238.65066528320312, + "learning_rate": 2.94e-07, + "loss": 18.5137, + "step": 1470 + }, + { + "epoch": 0.0029896936372047172, + "grad_norm": 2642.603759765625, + "learning_rate": 2.9600000000000006e-07, + "loss": 33.0043, + "step": 1480 + }, + { + "epoch": 0.003009894269888533, + "grad_norm": 371.8793640136719, + "learning_rate": 2.9800000000000005e-07, + "loss": 22.4324, + "step": 1490 + }, + { + "epoch": 0.0030300949025723486, + "grad_norm": 463.6688232421875, + "learning_rate": 3.0000000000000004e-07, + "loss": 19.1007, + "step": 1500 + }, + { + "epoch": 0.0030502955352561642, + "grad_norm": 710.45068359375, + "learning_rate": 3.0200000000000003e-07, + "loss": 17.671, + "step": 1510 + }, + { + "epoch": 0.00307049616793998, + "grad_norm": 231.896240234375, + "learning_rate": 3.04e-07, + "loss": 25.546, + "step": 1520 + }, + { + "epoch": 0.0030906968006237955, + "grad_norm": 263.27044677734375, + "learning_rate": 3.06e-07, + "loss": 27.5296, + "step": 1530 + }, + { + "epoch": 0.003110897433307611, + "grad_norm": 447.924072265625, + "learning_rate": 3.0800000000000006e-07, + "loss": 65.7333, + "step": 1540 + }, + { + "epoch": 0.003131098065991427, + "grad_norm": 415.6585998535156, + "learning_rate": 3.1000000000000005e-07, + "loss": 17.1337, + "step": 1550 + }, + { + "epoch": 0.0031512986986752425, + "grad_norm": 743.8123779296875, + "learning_rate": 3.12e-07, + "loss": 31.4509, + "step": 1560 + }, + { + "epoch": 0.003171499331359058, + "grad_norm": 379.4447326660156, + "learning_rate": 3.14e-07, + "loss": 33.4806, + "step": 1570 + }, + { + "epoch": 0.003191699964042874, + "grad_norm": 1378.80615234375, + "learning_rate": 3.160000000000001e-07, + "loss": 41.5661, + "step": 1580 + }, + { + "epoch": 0.0032119005967266895, + "grad_norm": 721.19384765625, + "learning_rate": 3.1800000000000007e-07, + "loss": 24.3997, + "step": 1590 + }, + { + "epoch": 0.003232101229410505, + "grad_norm": 447.39154052734375, + "learning_rate": 3.2e-07, + "loss": 20.2326, + "step": 1600 + }, + { + "epoch": 0.003252301862094321, + "grad_norm": 616.3694458007812, + "learning_rate": 3.22e-07, + "loss": 28.001, + "step": 1610 + }, + { + "epoch": 0.0032725024947781365, + "grad_norm": 942.26904296875, + "learning_rate": 3.24e-07, + "loss": 34.0719, + "step": 1620 + }, + { + "epoch": 0.003292703127461952, + "grad_norm": 340.3528137207031, + "learning_rate": 3.26e-07, + "loss": 23.7475, + "step": 1630 + }, + { + "epoch": 0.003312903760145768, + "grad_norm": 616.3563842773438, + "learning_rate": 3.280000000000001e-07, + "loss": 25.5421, + "step": 1640 + }, + { + "epoch": 0.0033331043928295835, + "grad_norm": 287.14404296875, + "learning_rate": 3.3e-07, + "loss": 21.6099, + "step": 1650 + }, + { + "epoch": 0.003353305025513399, + "grad_norm": 325.98907470703125, + "learning_rate": 3.32e-07, + "loss": 19.8994, + "step": 1660 + }, + { + "epoch": 0.0033735056581972148, + "grad_norm": 455.73468017578125, + "learning_rate": 3.34e-07, + "loss": 24.4489, + "step": 1670 + }, + { + "epoch": 0.0033937062908810304, + "grad_norm": 970.495361328125, + "learning_rate": 3.36e-07, + "loss": 25.1155, + "step": 1680 + }, + { + "epoch": 0.003413906923564846, + "grad_norm": 820.18359375, + "learning_rate": 3.38e-07, + "loss": 35.97, + "step": 1690 + }, + { + "epoch": 0.0034341075562486618, + "grad_norm": 135.357177734375, + "learning_rate": 3.4000000000000003e-07, + "loss": 22.5324, + "step": 1700 + }, + { + "epoch": 0.0034543081889324774, + "grad_norm": 280.0291748046875, + "learning_rate": 3.42e-07, + "loss": 27.6606, + "step": 1710 + }, + { + "epoch": 0.003474508821616293, + "grad_norm": 345.8559265136719, + "learning_rate": 3.44e-07, + "loss": 39.866, + "step": 1720 + }, + { + "epoch": 0.0034947094543001087, + "grad_norm": 705.6763305664062, + "learning_rate": 3.46e-07, + "loss": 18.8156, + "step": 1730 + }, + { + "epoch": 0.0035149100869839244, + "grad_norm": 416.8535461425781, + "learning_rate": 3.48e-07, + "loss": 33.5592, + "step": 1740 + }, + { + "epoch": 0.00353511071966774, + "grad_norm": 552.7217407226562, + "learning_rate": 3.5000000000000004e-07, + "loss": 12.8888, + "step": 1750 + }, + { + "epoch": 0.0035553113523515557, + "grad_norm": 208.27780151367188, + "learning_rate": 3.5200000000000003e-07, + "loss": 27.6918, + "step": 1760 + }, + { + "epoch": 0.0035755119850353714, + "grad_norm": 537.1244506835938, + "learning_rate": 3.54e-07, + "loss": 44.3155, + "step": 1770 + }, + { + "epoch": 0.003595712617719187, + "grad_norm": 284.45733642578125, + "learning_rate": 3.56e-07, + "loss": 28.734, + "step": 1780 + }, + { + "epoch": 0.0036159132504030027, + "grad_norm": 387.14532470703125, + "learning_rate": 3.58e-07, + "loss": 18.956, + "step": 1790 + }, + { + "epoch": 0.0036361138830868184, + "grad_norm": 370.3746337890625, + "learning_rate": 3.6e-07, + "loss": 27.5186, + "step": 1800 + }, + { + "epoch": 0.003656314515770634, + "grad_norm": 302.4902038574219, + "learning_rate": 3.6200000000000004e-07, + "loss": 30.0567, + "step": 1810 + }, + { + "epoch": 0.0036765151484544497, + "grad_norm": 381.73919677734375, + "learning_rate": 3.6400000000000003e-07, + "loss": 29.6831, + "step": 1820 + }, + { + "epoch": 0.0036967157811382653, + "grad_norm": 463.9573974609375, + "learning_rate": 3.66e-07, + "loss": 51.3698, + "step": 1830 + }, + { + "epoch": 0.003716916413822081, + "grad_norm": 156.73703002929688, + "learning_rate": 3.68e-07, + "loss": 27.487, + "step": 1840 + }, + { + "epoch": 0.0037371170465058967, + "grad_norm": 124.4668197631836, + "learning_rate": 3.7e-07, + "loss": 14.7367, + "step": 1850 + }, + { + "epoch": 0.0037573176791897123, + "grad_norm": 733.3515625, + "learning_rate": 3.72e-07, + "loss": 28.2449, + "step": 1860 + }, + { + "epoch": 0.003777518311873528, + "grad_norm": 337.3455810546875, + "learning_rate": 3.7400000000000004e-07, + "loss": 38.7027, + "step": 1870 + }, + { + "epoch": 0.0037977189445573436, + "grad_norm": 987.9752197265625, + "learning_rate": 3.7600000000000003e-07, + "loss": 46.9669, + "step": 1880 + }, + { + "epoch": 0.0038179195772411593, + "grad_norm": 186.5377655029297, + "learning_rate": 3.78e-07, + "loss": 38.3815, + "step": 1890 + }, + { + "epoch": 0.003838120209924975, + "grad_norm": 49.196285247802734, + "learning_rate": 3.8e-07, + "loss": 18.3016, + "step": 1900 + }, + { + "epoch": 0.0038583208426087906, + "grad_norm": 309.545166015625, + "learning_rate": 3.82e-07, + "loss": 17.9834, + "step": 1910 + }, + { + "epoch": 0.0038785214752926063, + "grad_norm": 540.118408203125, + "learning_rate": 3.84e-07, + "loss": 29.434, + "step": 1920 + }, + { + "epoch": 0.003898722107976422, + "grad_norm": 20.59901237487793, + "learning_rate": 3.8600000000000004e-07, + "loss": 34.1916, + "step": 1930 + }, + { + "epoch": 0.003918922740660237, + "grad_norm": 742.4661254882812, + "learning_rate": 3.8800000000000003e-07, + "loss": 26.6047, + "step": 1940 + }, + { + "epoch": 0.003939123373344053, + "grad_norm": 275.4309387207031, + "learning_rate": 3.9e-07, + "loss": 15.0218, + "step": 1950 + }, + { + "epoch": 0.0039593240060278685, + "grad_norm": 547.0128173828125, + "learning_rate": 3.92e-07, + "loss": 30.1546, + "step": 1960 + }, + { + "epoch": 0.003979524638711684, + "grad_norm": 109.45767211914062, + "learning_rate": 3.94e-07, + "loss": 22.6386, + "step": 1970 + }, + { + "epoch": 0.0039997252713955, + "grad_norm": 498.2757263183594, + "learning_rate": 3.9600000000000005e-07, + "loss": 28.6018, + "step": 1980 + }, + { + "epoch": 0.0040199259040793155, + "grad_norm": 2670.8740234375, + "learning_rate": 3.9800000000000004e-07, + "loss": 45.6287, + "step": 1990 + }, + { + "epoch": 0.004040126536763131, + "grad_norm": 733.6322021484375, + "learning_rate": 4.0000000000000003e-07, + "loss": 22.9162, + "step": 2000 + }, + { + "epoch": 0.004060327169446947, + "grad_norm": 774.7802734375, + "learning_rate": 4.02e-07, + "loss": 19.3823, + "step": 2010 + }, + { + "epoch": 0.0040805278021307624, + "grad_norm": 323.9580078125, + "learning_rate": 4.04e-07, + "loss": 23.0773, + "step": 2020 + }, + { + "epoch": 0.004100728434814578, + "grad_norm": 1654.9927978515625, + "learning_rate": 4.06e-07, + "loss": 37.3542, + "step": 2030 + }, + { + "epoch": 0.004120929067498394, + "grad_norm": 831.7189331054688, + "learning_rate": 4.0800000000000005e-07, + "loss": 19.7393, + "step": 2040 + }, + { + "epoch": 0.004141129700182209, + "grad_norm": 921.7901611328125, + "learning_rate": 4.1000000000000004e-07, + "loss": 44.1891, + "step": 2050 + }, + { + "epoch": 0.004161330332866025, + "grad_norm": 432.77813720703125, + "learning_rate": 4.1200000000000004e-07, + "loss": 17.2742, + "step": 2060 + }, + { + "epoch": 0.004181530965549841, + "grad_norm": 1103.80029296875, + "learning_rate": 4.1400000000000003e-07, + "loss": 34.2473, + "step": 2070 + }, + { + "epoch": 0.004201731598233656, + "grad_norm": 534.106689453125, + "learning_rate": 4.16e-07, + "loss": 45.6846, + "step": 2080 + }, + { + "epoch": 0.004221932230917472, + "grad_norm": 867.72412109375, + "learning_rate": 4.18e-07, + "loss": 47.1584, + "step": 2090 + }, + { + "epoch": 0.004242132863601288, + "grad_norm": 230.99696350097656, + "learning_rate": 4.2000000000000006e-07, + "loss": 18.7068, + "step": 2100 + }, + { + "epoch": 0.004262333496285103, + "grad_norm": 531.1705322265625, + "learning_rate": 4.2200000000000005e-07, + "loss": 33.7492, + "step": 2110 + }, + { + "epoch": 0.004282534128968919, + "grad_norm": 897.9804077148438, + "learning_rate": 4.2400000000000004e-07, + "loss": 29.6693, + "step": 2120 + }, + { + "epoch": 0.004302734761652735, + "grad_norm": 1721.717529296875, + "learning_rate": 4.2600000000000003e-07, + "loss": 39.9116, + "step": 2130 + }, + { + "epoch": 0.00432293539433655, + "grad_norm": 799.24658203125, + "learning_rate": 4.28e-07, + "loss": 36.7716, + "step": 2140 + }, + { + "epoch": 0.004343136027020366, + "grad_norm": 243.7528076171875, + "learning_rate": 4.3e-07, + "loss": 17.6017, + "step": 2150 + }, + { + "epoch": 0.004363336659704182, + "grad_norm": 611.0807495117188, + "learning_rate": 4.3200000000000006e-07, + "loss": 28.1989, + "step": 2160 + }, + { + "epoch": 0.004383537292387997, + "grad_norm": 146.26116943359375, + "learning_rate": 4.3400000000000005e-07, + "loss": 35.3578, + "step": 2170 + }, + { + "epoch": 0.004403737925071813, + "grad_norm": 1156.3270263671875, + "learning_rate": 4.3600000000000004e-07, + "loss": 41.4281, + "step": 2180 + }, + { + "epoch": 0.004423938557755629, + "grad_norm": 309.5587463378906, + "learning_rate": 4.3800000000000003e-07, + "loss": 18.0717, + "step": 2190 + }, + { + "epoch": 0.004444139190439444, + "grad_norm": 573.9265747070312, + "learning_rate": 4.4e-07, + "loss": 21.9999, + "step": 2200 + }, + { + "epoch": 0.00446433982312326, + "grad_norm": 86.95663452148438, + "learning_rate": 4.4200000000000007e-07, + "loss": 33.4672, + "step": 2210 + }, + { + "epoch": 0.004484540455807076, + "grad_norm": 818.9146118164062, + "learning_rate": 4.4400000000000006e-07, + "loss": 30.7386, + "step": 2220 + }, + { + "epoch": 0.004504741088490891, + "grad_norm": 523.1422729492188, + "learning_rate": 4.4600000000000005e-07, + "loss": 29.29, + "step": 2230 + }, + { + "epoch": 0.004524941721174707, + "grad_norm": 1081.639892578125, + "learning_rate": 4.4800000000000004e-07, + "loss": 24.7567, + "step": 2240 + }, + { + "epoch": 0.004545142353858523, + "grad_norm": 438.20458984375, + "learning_rate": 4.5000000000000003e-07, + "loss": 24.615, + "step": 2250 + }, + { + "epoch": 0.004565342986542338, + "grad_norm": 248.22909545898438, + "learning_rate": 4.52e-07, + "loss": 25.8396, + "step": 2260 + }, + { + "epoch": 0.004585543619226154, + "grad_norm": 861.9166870117188, + "learning_rate": 4.5400000000000007e-07, + "loss": 29.9438, + "step": 2270 + }, + { + "epoch": 0.00460574425190997, + "grad_norm": 823.51318359375, + "learning_rate": 4.5600000000000006e-07, + "loss": 30.2976, + "step": 2280 + }, + { + "epoch": 0.004625944884593785, + "grad_norm": 1101.48779296875, + "learning_rate": 4.5800000000000005e-07, + "loss": 32.0947, + "step": 2290 + }, + { + "epoch": 0.004646145517277601, + "grad_norm": 512.0722045898438, + "learning_rate": 4.6000000000000004e-07, + "loss": 37.1648, + "step": 2300 + }, + { + "epoch": 0.004666346149961417, + "grad_norm": 1179.549560546875, + "learning_rate": 4.6200000000000003e-07, + "loss": 41.9576, + "step": 2310 + }, + { + "epoch": 0.004686546782645232, + "grad_norm": 472.5940246582031, + "learning_rate": 4.64e-07, + "loss": 22.1231, + "step": 2320 + }, + { + "epoch": 0.004706747415329048, + "grad_norm": 0.0, + "learning_rate": 4.6600000000000007e-07, + "loss": 22.0882, + "step": 2330 + }, + { + "epoch": 0.0047269480480128636, + "grad_norm": 783.9161376953125, + "learning_rate": 4.6800000000000006e-07, + "loss": 29.3172, + "step": 2340 + }, + { + "epoch": 0.004747148680696679, + "grad_norm": 1057.4423828125, + "learning_rate": 4.7000000000000005e-07, + "loss": 28.8642, + "step": 2350 + }, + { + "epoch": 0.004767349313380495, + "grad_norm": 563.2919921875, + "learning_rate": 4.7200000000000004e-07, + "loss": 21.4855, + "step": 2360 + }, + { + "epoch": 0.0047875499460643105, + "grad_norm": 594.2389526367188, + "learning_rate": 4.7400000000000004e-07, + "loss": 38.7323, + "step": 2370 + }, + { + "epoch": 0.004807750578748126, + "grad_norm": 207.3581085205078, + "learning_rate": 4.760000000000001e-07, + "loss": 28.6296, + "step": 2380 + }, + { + "epoch": 0.004827951211431942, + "grad_norm": 874.9584350585938, + "learning_rate": 4.78e-07, + "loss": 22.6688, + "step": 2390 + }, + { + "epoch": 0.0048481518441157575, + "grad_norm": 76.04383087158203, + "learning_rate": 4.800000000000001e-07, + "loss": 21.4943, + "step": 2400 + }, + { + "epoch": 0.004868352476799573, + "grad_norm": 961.9263305664062, + "learning_rate": 4.82e-07, + "loss": 17.3612, + "step": 2410 + }, + { + "epoch": 0.004888553109483389, + "grad_norm": 789.2753295898438, + "learning_rate": 4.84e-07, + "loss": 17.5381, + "step": 2420 + }, + { + "epoch": 0.0049087537421672045, + "grad_norm": 526.267578125, + "learning_rate": 4.86e-07, + "loss": 15.962, + "step": 2430 + }, + { + "epoch": 0.00492895437485102, + "grad_norm": 909.1647338867188, + "learning_rate": 4.88e-07, + "loss": 31.8372, + "step": 2440 + }, + { + "epoch": 0.004949155007534836, + "grad_norm": 392.2228698730469, + "learning_rate": 4.900000000000001e-07, + "loss": 28.5269, + "step": 2450 + }, + { + "epoch": 0.0049693556402186515, + "grad_norm": 572.967529296875, + "learning_rate": 4.92e-07, + "loss": 21.5063, + "step": 2460 + }, + { + "epoch": 0.004989556272902467, + "grad_norm": 1103.427001953125, + "learning_rate": 4.940000000000001e-07, + "loss": 32.4583, + "step": 2470 + }, + { + "epoch": 0.005009756905586283, + "grad_norm": 816.0195922851562, + "learning_rate": 4.96e-07, + "loss": 27.6086, + "step": 2480 + }, + { + "epoch": 0.0050299575382700985, + "grad_norm": 440.2334289550781, + "learning_rate": 4.98e-07, + "loss": 29.0266, + "step": 2490 + }, + { + "epoch": 0.005050158170953914, + "grad_norm": 528.32421875, + "learning_rate": 5.000000000000001e-07, + "loss": 47.6612, + "step": 2500 + }, + { + "epoch": 0.00507035880363773, + "grad_norm": 596.6814575195312, + "learning_rate": 5.02e-07, + "loss": 26.5699, + "step": 2510 + }, + { + "epoch": 0.0050905594363215454, + "grad_norm": 503.26715087890625, + "learning_rate": 5.040000000000001e-07, + "loss": 35.7597, + "step": 2520 + }, + { + "epoch": 0.005110760069005361, + "grad_norm": 402.7926330566406, + "learning_rate": 5.06e-07, + "loss": 49.0305, + "step": 2530 + }, + { + "epoch": 0.005130960701689177, + "grad_norm": 734.1798706054688, + "learning_rate": 5.08e-07, + "loss": 33.3107, + "step": 2540 + }, + { + "epoch": 0.005151161334372992, + "grad_norm": 430.47332763671875, + "learning_rate": 5.1e-07, + "loss": 17.1561, + "step": 2550 + }, + { + "epoch": 0.005171361967056808, + "grad_norm": 171.5887451171875, + "learning_rate": 5.12e-07, + "loss": 24.0431, + "step": 2560 + }, + { + "epoch": 0.005191562599740624, + "grad_norm": 303.52740478515625, + "learning_rate": 5.140000000000001e-07, + "loss": 25.7299, + "step": 2570 + }, + { + "epoch": 0.005211763232424439, + "grad_norm": 103.0549087524414, + "learning_rate": 5.16e-07, + "loss": 50.7616, + "step": 2580 + }, + { + "epoch": 0.005231963865108255, + "grad_norm": 713.0372314453125, + "learning_rate": 5.180000000000001e-07, + "loss": 30.2423, + "step": 2590 + }, + { + "epoch": 0.005252164497792071, + "grad_norm": 860.4334716796875, + "learning_rate": 5.2e-07, + "loss": 22.214, + "step": 2600 + }, + { + "epoch": 0.005272365130475886, + "grad_norm": 2196.531494140625, + "learning_rate": 5.22e-07, + "loss": 48.7065, + "step": 2610 + }, + { + "epoch": 0.005292565763159702, + "grad_norm": 193.64439392089844, + "learning_rate": 5.240000000000001e-07, + "loss": 20.2737, + "step": 2620 + }, + { + "epoch": 0.005312766395843518, + "grad_norm": 609.69140625, + "learning_rate": 5.26e-07, + "loss": 47.1675, + "step": 2630 + }, + { + "epoch": 0.005332967028527333, + "grad_norm": 1125.103759765625, + "learning_rate": 5.280000000000001e-07, + "loss": 44.6597, + "step": 2640 + }, + { + "epoch": 0.005353167661211149, + "grad_norm": 624.8217163085938, + "learning_rate": 5.3e-07, + "loss": 29.0659, + "step": 2650 + }, + { + "epoch": 0.005373368293894965, + "grad_norm": 366.8795471191406, + "learning_rate": 5.32e-07, + "loss": 28.1106, + "step": 2660 + }, + { + "epoch": 0.00539356892657878, + "grad_norm": 585.3496704101562, + "learning_rate": 5.340000000000001e-07, + "loss": 22.2636, + "step": 2670 + }, + { + "epoch": 0.005413769559262596, + "grad_norm": 785.0697631835938, + "learning_rate": 5.36e-07, + "loss": 54.8097, + "step": 2680 + }, + { + "epoch": 0.005433970191946412, + "grad_norm": 433.1402893066406, + "learning_rate": 5.380000000000001e-07, + "loss": 14.2988, + "step": 2690 + }, + { + "epoch": 0.005454170824630227, + "grad_norm": 614.6635131835938, + "learning_rate": 5.4e-07, + "loss": 36.9219, + "step": 2700 + }, + { + "epoch": 0.005474371457314043, + "grad_norm": 1097.97314453125, + "learning_rate": 5.420000000000001e-07, + "loss": 32.1943, + "step": 2710 + }, + { + "epoch": 0.005494572089997859, + "grad_norm": 221.27752685546875, + "learning_rate": 5.44e-07, + "loss": 17.7383, + "step": 2720 + }, + { + "epoch": 0.005514772722681674, + "grad_norm": 846.3823852539062, + "learning_rate": 5.46e-07, + "loss": 32.384, + "step": 2730 + }, + { + "epoch": 0.00553497335536549, + "grad_norm": 1222.7154541015625, + "learning_rate": 5.480000000000001e-07, + "loss": 40.9341, + "step": 2740 + }, + { + "epoch": 0.005555173988049306, + "grad_norm": 1121.3111572265625, + "learning_rate": 5.5e-07, + "loss": 23.7586, + "step": 2750 + }, + { + "epoch": 0.005575374620733121, + "grad_norm": 1418.0941162109375, + "learning_rate": 5.520000000000001e-07, + "loss": 44.7959, + "step": 2760 + }, + { + "epoch": 0.005595575253416937, + "grad_norm": 352.4377746582031, + "learning_rate": 5.54e-07, + "loss": 18.5532, + "step": 2770 + }, + { + "epoch": 0.005615775886100753, + "grad_norm": 682.9746704101562, + "learning_rate": 5.560000000000001e-07, + "loss": 62.9547, + "step": 2780 + }, + { + "epoch": 0.005635976518784568, + "grad_norm": 69.55480194091797, + "learning_rate": 5.580000000000001e-07, + "loss": 44.0819, + "step": 2790 + }, + { + "epoch": 0.005656177151468384, + "grad_norm": 178.6747589111328, + "learning_rate": 5.6e-07, + "loss": 28.2389, + "step": 2800 + }, + { + "epoch": 0.0056763777841522, + "grad_norm": 662.1491088867188, + "learning_rate": 5.620000000000001e-07, + "loss": 19.7388, + "step": 2810 + }, + { + "epoch": 0.005696578416836015, + "grad_norm": 310.38323974609375, + "learning_rate": 5.64e-07, + "loss": 39.9212, + "step": 2820 + }, + { + "epoch": 0.005716779049519831, + "grad_norm": 256.0929870605469, + "learning_rate": 5.660000000000001e-07, + "loss": 22.1921, + "step": 2830 + }, + { + "epoch": 0.0057369796822036466, + "grad_norm": 934.5228881835938, + "learning_rate": 5.680000000000001e-07, + "loss": 32.7585, + "step": 2840 + }, + { + "epoch": 0.005757180314887462, + "grad_norm": 703.0299072265625, + "learning_rate": 5.7e-07, + "loss": 37.5443, + "step": 2850 + }, + { + "epoch": 0.005777380947571278, + "grad_norm": 687.6595458984375, + "learning_rate": 5.720000000000001e-07, + "loss": 24.1511, + "step": 2860 + }, + { + "epoch": 0.0057975815802550935, + "grad_norm": 400.9228820800781, + "learning_rate": 5.74e-07, + "loss": 23.191, + "step": 2870 + }, + { + "epoch": 0.005817782212938909, + "grad_norm": 810.8984375, + "learning_rate": 5.760000000000001e-07, + "loss": 28.8146, + "step": 2880 + }, + { + "epoch": 0.005837982845622725, + "grad_norm": 528.00634765625, + "learning_rate": 5.78e-07, + "loss": 27.1717, + "step": 2890 + }, + { + "epoch": 0.0058581834783065405, + "grad_norm": 282.5981140136719, + "learning_rate": 5.800000000000001e-07, + "loss": 19.146, + "step": 2900 + }, + { + "epoch": 0.005878384110990356, + "grad_norm": 767.5343627929688, + "learning_rate": 5.820000000000001e-07, + "loss": 29.7168, + "step": 2910 + }, + { + "epoch": 0.005898584743674172, + "grad_norm": 1321.3330078125, + "learning_rate": 5.84e-07, + "loss": 51.2614, + "step": 2920 + }, + { + "epoch": 0.0059187853763579875, + "grad_norm": 422.82183837890625, + "learning_rate": 5.860000000000001e-07, + "loss": 25.8771, + "step": 2930 + }, + { + "epoch": 0.005938986009041803, + "grad_norm": 170.95977783203125, + "learning_rate": 5.88e-07, + "loss": 16.7884, + "step": 2940 + }, + { + "epoch": 0.005959186641725619, + "grad_norm": 752.912841796875, + "learning_rate": 5.900000000000001e-07, + "loss": 24.5547, + "step": 2950 + }, + { + "epoch": 0.0059793872744094345, + "grad_norm": 47.68494415283203, + "learning_rate": 5.920000000000001e-07, + "loss": 22.6978, + "step": 2960 + }, + { + "epoch": 0.00599958790709325, + "grad_norm": 537.8870849609375, + "learning_rate": 5.94e-07, + "loss": 49.1119, + "step": 2970 + }, + { + "epoch": 0.006019788539777066, + "grad_norm": 487.58251953125, + "learning_rate": 5.960000000000001e-07, + "loss": 33.8852, + "step": 2980 + }, + { + "epoch": 0.0060399891724608815, + "grad_norm": 578.7280883789062, + "learning_rate": 5.98e-07, + "loss": 19.5557, + "step": 2990 + }, + { + "epoch": 0.006060189805144697, + "grad_norm": 357.2806701660156, + "learning_rate": 6.000000000000001e-07, + "loss": 13.2956, + "step": 3000 + }, + { + "epoch": 0.006080390437828513, + "grad_norm": 49.976646423339844, + "learning_rate": 6.02e-07, + "loss": 23.7729, + "step": 3010 + }, + { + "epoch": 0.0061005910705123284, + "grad_norm": 1392.4190673828125, + "learning_rate": 6.040000000000001e-07, + "loss": 27.7141, + "step": 3020 + }, + { + "epoch": 0.006120791703196144, + "grad_norm": 547.042236328125, + "learning_rate": 6.060000000000001e-07, + "loss": 22.2183, + "step": 3030 + }, + { + "epoch": 0.00614099233587996, + "grad_norm": 363.829833984375, + "learning_rate": 6.08e-07, + "loss": 25.6521, + "step": 3040 + }, + { + "epoch": 0.006161192968563775, + "grad_norm": 1138.9224853515625, + "learning_rate": 6.100000000000001e-07, + "loss": 28.1272, + "step": 3050 + }, + { + "epoch": 0.006181393601247591, + "grad_norm": 388.612548828125, + "learning_rate": 6.12e-07, + "loss": 25.8512, + "step": 3060 + }, + { + "epoch": 0.006201594233931407, + "grad_norm": 633.0219116210938, + "learning_rate": 6.140000000000001e-07, + "loss": 19.1016, + "step": 3070 + }, + { + "epoch": 0.006221794866615222, + "grad_norm": 0.0, + "learning_rate": 6.160000000000001e-07, + "loss": 38.8512, + "step": 3080 + }, + { + "epoch": 0.006241995499299038, + "grad_norm": 579.1844482421875, + "learning_rate": 6.180000000000001e-07, + "loss": 23.9865, + "step": 3090 + }, + { + "epoch": 0.006262196131982854, + "grad_norm": 860.4633178710938, + "learning_rate": 6.200000000000001e-07, + "loss": 20.4271, + "step": 3100 + }, + { + "epoch": 0.006282396764666669, + "grad_norm": 406.67791748046875, + "learning_rate": 6.22e-07, + "loss": 32.6324, + "step": 3110 + }, + { + "epoch": 0.006302597397350485, + "grad_norm": 110.9633560180664, + "learning_rate": 6.24e-07, + "loss": 14.6998, + "step": 3120 + }, + { + "epoch": 0.006322798030034301, + "grad_norm": 445.50244140625, + "learning_rate": 6.260000000000001e-07, + "loss": 16.6768, + "step": 3130 + }, + { + "epoch": 0.006342998662718116, + "grad_norm": 1032.125244140625, + "learning_rate": 6.28e-07, + "loss": 37.2087, + "step": 3140 + }, + { + "epoch": 0.006363199295401932, + "grad_norm": 284.1348876953125, + "learning_rate": 6.3e-07, + "loss": 25.7215, + "step": 3150 + }, + { + "epoch": 0.006383399928085748, + "grad_norm": 1127.0638427734375, + "learning_rate": 6.320000000000002e-07, + "loss": 43.177, + "step": 3160 + }, + { + "epoch": 0.006403600560769563, + "grad_norm": 858.960205078125, + "learning_rate": 6.34e-07, + "loss": 29.6178, + "step": 3170 + }, + { + "epoch": 0.006423801193453379, + "grad_norm": 632.6044921875, + "learning_rate": 6.360000000000001e-07, + "loss": 24.2948, + "step": 3180 + }, + { + "epoch": 0.006444001826137195, + "grad_norm": 385.60772705078125, + "learning_rate": 6.38e-07, + "loss": 29.6091, + "step": 3190 + }, + { + "epoch": 0.00646420245882101, + "grad_norm": 735.461181640625, + "learning_rate": 6.4e-07, + "loss": 36.3189, + "step": 3200 + }, + { + "epoch": 0.006484403091504826, + "grad_norm": 806.14208984375, + "learning_rate": 6.42e-07, + "loss": 40.9133, + "step": 3210 + }, + { + "epoch": 0.006504603724188642, + "grad_norm": 197.8910675048828, + "learning_rate": 6.44e-07, + "loss": 22.794, + "step": 3220 + }, + { + "epoch": 0.006524804356872457, + "grad_norm": 464.4063720703125, + "learning_rate": 6.460000000000001e-07, + "loss": 25.8888, + "step": 3230 + }, + { + "epoch": 0.006545004989556273, + "grad_norm": 897.4555053710938, + "learning_rate": 6.48e-07, + "loss": 45.3982, + "step": 3240 + }, + { + "epoch": 0.006565205622240089, + "grad_norm": 1184.5333251953125, + "learning_rate": 6.5e-07, + "loss": 45.8197, + "step": 3250 + }, + { + "epoch": 0.006585406254923904, + "grad_norm": 245.31578063964844, + "learning_rate": 6.52e-07, + "loss": 22.9282, + "step": 3260 + }, + { + "epoch": 0.00660560688760772, + "grad_norm": 396.6833190917969, + "learning_rate": 6.54e-07, + "loss": 36.6589, + "step": 3270 + }, + { + "epoch": 0.006625807520291536, + "grad_norm": 827.7760620117188, + "learning_rate": 6.560000000000002e-07, + "loss": 27.5275, + "step": 3280 + }, + { + "epoch": 0.006646008152975351, + "grad_norm": 698.2625122070312, + "learning_rate": 6.58e-07, + "loss": 43.7663, + "step": 3290 + }, + { + "epoch": 0.006666208785659167, + "grad_norm": 804.8634033203125, + "learning_rate": 6.6e-07, + "loss": 42.3979, + "step": 3300 + }, + { + "epoch": 0.006686409418342983, + "grad_norm": 381.66253662109375, + "learning_rate": 6.62e-07, + "loss": 28.6512, + "step": 3310 + }, + { + "epoch": 0.006706610051026798, + "grad_norm": 274.6836242675781, + "learning_rate": 6.64e-07, + "loss": 13.3558, + "step": 3320 + }, + { + "epoch": 0.006726810683710614, + "grad_norm": 640.2205200195312, + "learning_rate": 6.660000000000002e-07, + "loss": 31.3027, + "step": 3330 + }, + { + "epoch": 0.0067470113163944296, + "grad_norm": 545.2610473632812, + "learning_rate": 6.68e-07, + "loss": 19.2501, + "step": 3340 + }, + { + "epoch": 0.006767211949078245, + "grad_norm": 652.1705322265625, + "learning_rate": 6.7e-07, + "loss": 37.1378, + "step": 3350 + }, + { + "epoch": 0.006787412581762061, + "grad_norm": 1061.9658203125, + "learning_rate": 6.72e-07, + "loss": 21.7822, + "step": 3360 + }, + { + "epoch": 0.0068076132144458765, + "grad_norm": 556.8729248046875, + "learning_rate": 6.74e-07, + "loss": 24.4347, + "step": 3370 + }, + { + "epoch": 0.006827813847129692, + "grad_norm": 522.956787109375, + "learning_rate": 6.76e-07, + "loss": 27.4032, + "step": 3380 + }, + { + "epoch": 0.006848014479813508, + "grad_norm": 531.7928466796875, + "learning_rate": 6.78e-07, + "loss": 19.0332, + "step": 3390 + }, + { + "epoch": 0.0068682151124973235, + "grad_norm": 804.1568603515625, + "learning_rate": 6.800000000000001e-07, + "loss": 34.7435, + "step": 3400 + }, + { + "epoch": 0.006888415745181139, + "grad_norm": 1052.230712890625, + "learning_rate": 6.82e-07, + "loss": 25.6694, + "step": 3410 + }, + { + "epoch": 0.006908616377864955, + "grad_norm": 526.1654663085938, + "learning_rate": 6.84e-07, + "loss": 22.8071, + "step": 3420 + }, + { + "epoch": 0.0069288170105487705, + "grad_norm": 1492.260986328125, + "learning_rate": 6.86e-07, + "loss": 20.8931, + "step": 3430 + }, + { + "epoch": 0.006949017643232586, + "grad_norm": 2416.557373046875, + "learning_rate": 6.88e-07, + "loss": 45.9088, + "step": 3440 + }, + { + "epoch": 0.006969218275916402, + "grad_norm": 138.57374572753906, + "learning_rate": 6.900000000000001e-07, + "loss": 20.0898, + "step": 3450 + }, + { + "epoch": 0.0069894189086002175, + "grad_norm": 517.4945678710938, + "learning_rate": 6.92e-07, + "loss": 26.9102, + "step": 3460 + }, + { + "epoch": 0.007009619541284033, + "grad_norm": 426.5979919433594, + "learning_rate": 6.94e-07, + "loss": 39.5767, + "step": 3470 + }, + { + "epoch": 0.007029820173967849, + "grad_norm": 208.76681518554688, + "learning_rate": 6.96e-07, + "loss": 42.304, + "step": 3480 + }, + { + "epoch": 0.0070500208066516645, + "grad_norm": 478.20050048828125, + "learning_rate": 6.98e-07, + "loss": 23.9574, + "step": 3490 + }, + { + "epoch": 0.00707022143933548, + "grad_norm": 1073.7861328125, + "learning_rate": 7.000000000000001e-07, + "loss": 40.8396, + "step": 3500 + }, + { + "epoch": 0.007090422072019296, + "grad_norm": 164.99581909179688, + "learning_rate": 7.02e-07, + "loss": 54.3387, + "step": 3510 + }, + { + "epoch": 0.0071106227047031114, + "grad_norm": 559.994873046875, + "learning_rate": 7.040000000000001e-07, + "loss": 23.7497, + "step": 3520 + }, + { + "epoch": 0.007130823337386927, + "grad_norm": 248.2763671875, + "learning_rate": 7.06e-07, + "loss": 14.4129, + "step": 3530 + }, + { + "epoch": 0.007151023970070743, + "grad_norm": 115.21528625488281, + "learning_rate": 7.08e-07, + "loss": 20.9172, + "step": 3540 + }, + { + "epoch": 0.007171224602754558, + "grad_norm": 529.1926879882812, + "learning_rate": 7.1e-07, + "loss": 29.7155, + "step": 3550 + }, + { + "epoch": 0.007191425235438374, + "grad_norm": 931.08056640625, + "learning_rate": 7.12e-07, + "loss": 48.6689, + "step": 3560 + }, + { + "epoch": 0.00721162586812219, + "grad_norm": 325.41485595703125, + "learning_rate": 7.140000000000001e-07, + "loss": 19.5409, + "step": 3570 + }, + { + "epoch": 0.007231826500806005, + "grad_norm": 255.26974487304688, + "learning_rate": 7.16e-07, + "loss": 13.3323, + "step": 3580 + }, + { + "epoch": 0.007252027133489821, + "grad_norm": 174.78330993652344, + "learning_rate": 7.18e-07, + "loss": 27.6813, + "step": 3590 + }, + { + "epoch": 0.007272227766173637, + "grad_norm": 259.9703369140625, + "learning_rate": 7.2e-07, + "loss": 23.6667, + "step": 3600 + }, + { + "epoch": 0.007292428398857452, + "grad_norm": 510.34515380859375, + "learning_rate": 7.22e-07, + "loss": 43.1148, + "step": 3610 + }, + { + "epoch": 0.007312629031541268, + "grad_norm": 369.45806884765625, + "learning_rate": 7.240000000000001e-07, + "loss": 18.297, + "step": 3620 + }, + { + "epoch": 0.007332829664225084, + "grad_norm": 492.9736022949219, + "learning_rate": 7.26e-07, + "loss": 34.2994, + "step": 3630 + }, + { + "epoch": 0.007353030296908899, + "grad_norm": 611.5191650390625, + "learning_rate": 7.280000000000001e-07, + "loss": 40.5437, + "step": 3640 + }, + { + "epoch": 0.007373230929592715, + "grad_norm": 478.23126220703125, + "learning_rate": 7.3e-07, + "loss": 42.8498, + "step": 3650 + }, + { + "epoch": 0.007393431562276531, + "grad_norm": 822.2637329101562, + "learning_rate": 7.32e-07, + "loss": 42.8805, + "step": 3660 + }, + { + "epoch": 0.007413632194960346, + "grad_norm": 572.3778686523438, + "learning_rate": 7.340000000000001e-07, + "loss": 21.5523, + "step": 3670 + }, + { + "epoch": 0.007433832827644162, + "grad_norm": 610.9832153320312, + "learning_rate": 7.36e-07, + "loss": 24.501, + "step": 3680 + }, + { + "epoch": 0.007454033460327978, + "grad_norm": 444.3849792480469, + "learning_rate": 7.380000000000001e-07, + "loss": 29.0815, + "step": 3690 + }, + { + "epoch": 0.007474234093011793, + "grad_norm": 334.1302490234375, + "learning_rate": 7.4e-07, + "loss": 29.4803, + "step": 3700 + }, + { + "epoch": 0.007494434725695609, + "grad_norm": 484.7538757324219, + "learning_rate": 7.420000000000001e-07, + "loss": 34.7338, + "step": 3710 + }, + { + "epoch": 0.007514635358379425, + "grad_norm": 557.7028198242188, + "learning_rate": 7.44e-07, + "loss": 45.3084, + "step": 3720 + }, + { + "epoch": 0.00753483599106324, + "grad_norm": 419.6079406738281, + "learning_rate": 7.46e-07, + "loss": 38.1011, + "step": 3730 + }, + { + "epoch": 0.007555036623747056, + "grad_norm": 0.0, + "learning_rate": 7.480000000000001e-07, + "loss": 34.1749, + "step": 3740 + }, + { + "epoch": 0.007575237256430872, + "grad_norm": 608.0559692382812, + "learning_rate": 7.5e-07, + "loss": 12.6068, + "step": 3750 + }, + { + "epoch": 0.007595437889114687, + "grad_norm": 1825.6767578125, + "learning_rate": 7.520000000000001e-07, + "loss": 36.8878, + "step": 3760 + }, + { + "epoch": 0.007615638521798503, + "grad_norm": 347.3094787597656, + "learning_rate": 7.54e-07, + "loss": 26.2681, + "step": 3770 + }, + { + "epoch": 0.007635839154482319, + "grad_norm": 1999.683837890625, + "learning_rate": 7.56e-07, + "loss": 40.3283, + "step": 3780 + }, + { + "epoch": 0.007656039787166134, + "grad_norm": 520.2977905273438, + "learning_rate": 7.580000000000001e-07, + "loss": 34.4978, + "step": 3790 + }, + { + "epoch": 0.00767624041984995, + "grad_norm": 457.4718322753906, + "learning_rate": 7.6e-07, + "loss": 37.1305, + "step": 3800 + }, + { + "epoch": 0.007696441052533766, + "grad_norm": 241.23533630371094, + "learning_rate": 7.620000000000001e-07, + "loss": 34.1395, + "step": 3810 + }, + { + "epoch": 0.007716641685217581, + "grad_norm": 1090.16943359375, + "learning_rate": 7.64e-07, + "loss": 32.3401, + "step": 3820 + }, + { + "epoch": 0.007736842317901397, + "grad_norm": 799.810546875, + "learning_rate": 7.660000000000001e-07, + "loss": 18.844, + "step": 3830 + }, + { + "epoch": 0.0077570429505852126, + "grad_norm": 513.3098754882812, + "learning_rate": 7.68e-07, + "loss": 56.5756, + "step": 3840 + }, + { + "epoch": 0.007777243583269028, + "grad_norm": 412.8589172363281, + "learning_rate": 7.7e-07, + "loss": 28.4811, + "step": 3850 + }, + { + "epoch": 0.007797444215952844, + "grad_norm": 818.9476318359375, + "learning_rate": 7.720000000000001e-07, + "loss": 23.9097, + "step": 3860 + }, + { + "epoch": 0.00781764484863666, + "grad_norm": 660.37158203125, + "learning_rate": 7.74e-07, + "loss": 33.1159, + "step": 3870 + }, + { + "epoch": 0.007837845481320474, + "grad_norm": 554.0394287109375, + "learning_rate": 7.760000000000001e-07, + "loss": 41.6907, + "step": 3880 + }, + { + "epoch": 0.00785804611400429, + "grad_norm": 609.9306640625, + "learning_rate": 7.78e-07, + "loss": 23.2773, + "step": 3890 + }, + { + "epoch": 0.007878246746688106, + "grad_norm": 509.7363586425781, + "learning_rate": 7.8e-07, + "loss": 27.8946, + "step": 3900 + }, + { + "epoch": 0.007898447379371922, + "grad_norm": 617.0547485351562, + "learning_rate": 7.820000000000001e-07, + "loss": 40.5662, + "step": 3910 + }, + { + "epoch": 0.007918648012055737, + "grad_norm": 45.31270217895508, + "learning_rate": 7.84e-07, + "loss": 19.5, + "step": 3920 + }, + { + "epoch": 0.007938848644739553, + "grad_norm": 936.0264282226562, + "learning_rate": 7.860000000000001e-07, + "loss": 31.398, + "step": 3930 + }, + { + "epoch": 0.007959049277423368, + "grad_norm": 4046.48974609375, + "learning_rate": 7.88e-07, + "loss": 61.3853, + "step": 3940 + }, + { + "epoch": 0.007979249910107185, + "grad_norm": 678.829345703125, + "learning_rate": 7.900000000000001e-07, + "loss": 52.0202, + "step": 3950 + }, + { + "epoch": 0.007999450542791, + "grad_norm": 174.16665649414062, + "learning_rate": 7.920000000000001e-07, + "loss": 18.2646, + "step": 3960 + }, + { + "epoch": 0.008019651175474816, + "grad_norm": 631.7503051757812, + "learning_rate": 7.94e-07, + "loss": 22.1172, + "step": 3970 + }, + { + "epoch": 0.008039851808158631, + "grad_norm": 863.083984375, + "learning_rate": 7.960000000000001e-07, + "loss": 31.1579, + "step": 3980 + }, + { + "epoch": 0.008060052440842447, + "grad_norm": 395.8502197265625, + "learning_rate": 7.98e-07, + "loss": 36.5626, + "step": 3990 + }, + { + "epoch": 0.008080253073526262, + "grad_norm": 551.8558349609375, + "learning_rate": 8.000000000000001e-07, + "loss": 27.3743, + "step": 4000 + }, + { + "epoch": 0.008100453706210079, + "grad_norm": 350.1429138183594, + "learning_rate": 8.02e-07, + "loss": 27.0405, + "step": 4010 + }, + { + "epoch": 0.008120654338893894, + "grad_norm": 1742.940185546875, + "learning_rate": 8.04e-07, + "loss": 44.1298, + "step": 4020 + }, + { + "epoch": 0.00814085497157771, + "grad_norm": 773.5975341796875, + "learning_rate": 8.060000000000001e-07, + "loss": 22.7084, + "step": 4030 + }, + { + "epoch": 0.008161055604261525, + "grad_norm": 996.5304565429688, + "learning_rate": 8.08e-07, + "loss": 40.6753, + "step": 4040 + }, + { + "epoch": 0.008181256236945341, + "grad_norm": 671.2755126953125, + "learning_rate": 8.100000000000001e-07, + "loss": 39.0983, + "step": 4050 + }, + { + "epoch": 0.008201456869629156, + "grad_norm": 350.6563415527344, + "learning_rate": 8.12e-07, + "loss": 30.7569, + "step": 4060 + }, + { + "epoch": 0.008221657502312973, + "grad_norm": 1149.395263671875, + "learning_rate": 8.140000000000001e-07, + "loss": 28.9356, + "step": 4070 + }, + { + "epoch": 0.008241858134996788, + "grad_norm": 990.6906127929688, + "learning_rate": 8.160000000000001e-07, + "loss": 39.5627, + "step": 4080 + }, + { + "epoch": 0.008262058767680604, + "grad_norm": 364.4676208496094, + "learning_rate": 8.18e-07, + "loss": 14.7625, + "step": 4090 + }, + { + "epoch": 0.008282259400364419, + "grad_norm": 811.5744018554688, + "learning_rate": 8.200000000000001e-07, + "loss": 22.5022, + "step": 4100 + }, + { + "epoch": 0.008302460033048235, + "grad_norm": 582.2328491210938, + "learning_rate": 8.22e-07, + "loss": 39.7949, + "step": 4110 + }, + { + "epoch": 0.00832266066573205, + "grad_norm": 1127.8995361328125, + "learning_rate": 8.240000000000001e-07, + "loss": 37.47, + "step": 4120 + }, + { + "epoch": 0.008342861298415867, + "grad_norm": 502.9598693847656, + "learning_rate": 8.260000000000001e-07, + "loss": 50.2378, + "step": 4130 + }, + { + "epoch": 0.008363061931099681, + "grad_norm": 630.9242553710938, + "learning_rate": 8.280000000000001e-07, + "loss": 25.1314, + "step": 4140 + }, + { + "epoch": 0.008383262563783498, + "grad_norm": 632.32958984375, + "learning_rate": 8.300000000000001e-07, + "loss": 42.2978, + "step": 4150 + }, + { + "epoch": 0.008403463196467313, + "grad_norm": 513.2865600585938, + "learning_rate": 8.32e-07, + "loss": 22.9661, + "step": 4160 + }, + { + "epoch": 0.00842366382915113, + "grad_norm": 437.45941162109375, + "learning_rate": 8.340000000000001e-07, + "loss": 24.9955, + "step": 4170 + }, + { + "epoch": 0.008443864461834944, + "grad_norm": 732.6044311523438, + "learning_rate": 8.36e-07, + "loss": 37.1115, + "step": 4180 + }, + { + "epoch": 0.00846406509451876, + "grad_norm": 270.413330078125, + "learning_rate": 8.380000000000001e-07, + "loss": 20.6703, + "step": 4190 + }, + { + "epoch": 0.008484265727202575, + "grad_norm": 8.429944038391113, + "learning_rate": 8.400000000000001e-07, + "loss": 25.673, + "step": 4200 + }, + { + "epoch": 0.008504466359886392, + "grad_norm": 555.270751953125, + "learning_rate": 8.42e-07, + "loss": 22.8865, + "step": 4210 + }, + { + "epoch": 0.008524666992570207, + "grad_norm": 638.0531005859375, + "learning_rate": 8.440000000000001e-07, + "loss": 21.5478, + "step": 4220 + }, + { + "epoch": 0.008544867625254023, + "grad_norm": 1256.038818359375, + "learning_rate": 8.46e-07, + "loss": 41.5287, + "step": 4230 + }, + { + "epoch": 0.008565068257937838, + "grad_norm": 414.123046875, + "learning_rate": 8.480000000000001e-07, + "loss": 19.9983, + "step": 4240 + }, + { + "epoch": 0.008585268890621655, + "grad_norm": 594.0344848632812, + "learning_rate": 8.500000000000001e-07, + "loss": 33.3682, + "step": 4250 + }, + { + "epoch": 0.00860546952330547, + "grad_norm": 580.2907104492188, + "learning_rate": 8.520000000000001e-07, + "loss": 26.1064, + "step": 4260 + }, + { + "epoch": 0.008625670155989286, + "grad_norm": 910.2868041992188, + "learning_rate": 8.540000000000001e-07, + "loss": 25.9703, + "step": 4270 + }, + { + "epoch": 0.0086458707886731, + "grad_norm": 401.0901184082031, + "learning_rate": 8.56e-07, + "loss": 31.2655, + "step": 4280 + }, + { + "epoch": 0.008666071421356917, + "grad_norm": 586.83447265625, + "learning_rate": 8.580000000000001e-07, + "loss": 22.6068, + "step": 4290 + }, + { + "epoch": 0.008686272054040732, + "grad_norm": 384.7112731933594, + "learning_rate": 8.6e-07, + "loss": 20.9198, + "step": 4300 + }, + { + "epoch": 0.008706472686724549, + "grad_norm": 861.9733276367188, + "learning_rate": 8.620000000000001e-07, + "loss": 30.633, + "step": 4310 + }, + { + "epoch": 0.008726673319408363, + "grad_norm": 508.044189453125, + "learning_rate": 8.640000000000001e-07, + "loss": 37.4199, + "step": 4320 + }, + { + "epoch": 0.00874687395209218, + "grad_norm": 757.0943603515625, + "learning_rate": 8.66e-07, + "loss": 31.3223, + "step": 4330 + }, + { + "epoch": 0.008767074584775995, + "grad_norm": 481.5451965332031, + "learning_rate": 8.680000000000001e-07, + "loss": 30.192, + "step": 4340 + }, + { + "epoch": 0.008787275217459811, + "grad_norm": 1465.626220703125, + "learning_rate": 8.7e-07, + "loss": 36.7876, + "step": 4350 + }, + { + "epoch": 0.008807475850143626, + "grad_norm": 537.8186645507812, + "learning_rate": 8.720000000000001e-07, + "loss": 18.696, + "step": 4360 + }, + { + "epoch": 0.008827676482827443, + "grad_norm": 421.6712951660156, + "learning_rate": 8.740000000000001e-07, + "loss": 16.2779, + "step": 4370 + }, + { + "epoch": 0.008847877115511257, + "grad_norm": 625.7432861328125, + "learning_rate": 8.760000000000001e-07, + "loss": 23.2634, + "step": 4380 + }, + { + "epoch": 0.008868077748195074, + "grad_norm": 630.1290893554688, + "learning_rate": 8.780000000000001e-07, + "loss": 22.0466, + "step": 4390 + }, + { + "epoch": 0.008888278380878889, + "grad_norm": 19.57767105102539, + "learning_rate": 8.8e-07, + "loss": 32.943, + "step": 4400 + }, + { + "epoch": 0.008908479013562705, + "grad_norm": 563.815185546875, + "learning_rate": 8.820000000000001e-07, + "loss": 20.2435, + "step": 4410 + }, + { + "epoch": 0.00892867964624652, + "grad_norm": 565.490234375, + "learning_rate": 8.840000000000001e-07, + "loss": 30.1036, + "step": 4420 + }, + { + "epoch": 0.008948880278930336, + "grad_norm": 553.708984375, + "learning_rate": 8.860000000000001e-07, + "loss": 24.4606, + "step": 4430 + }, + { + "epoch": 0.008969080911614151, + "grad_norm": 433.3461608886719, + "learning_rate": 8.880000000000001e-07, + "loss": 24.9225, + "step": 4440 + }, + { + "epoch": 0.008989281544297968, + "grad_norm": 939.7255859375, + "learning_rate": 8.900000000000001e-07, + "loss": 32.4033, + "step": 4450 + }, + { + "epoch": 0.009009482176981783, + "grad_norm": 207.4413604736328, + "learning_rate": 8.920000000000001e-07, + "loss": 38.8472, + "step": 4460 + }, + { + "epoch": 0.0090296828096656, + "grad_norm": 1215.0419921875, + "learning_rate": 8.94e-07, + "loss": 41.1395, + "step": 4470 + }, + { + "epoch": 0.009049883442349414, + "grad_norm": 761.4100341796875, + "learning_rate": 8.960000000000001e-07, + "loss": 31.6368, + "step": 4480 + }, + { + "epoch": 0.00907008407503323, + "grad_norm": 815.1241455078125, + "learning_rate": 8.980000000000001e-07, + "loss": 25.794, + "step": 4490 + }, + { + "epoch": 0.009090284707717045, + "grad_norm": 664.5227661132812, + "learning_rate": 9.000000000000001e-07, + "loss": 26.1395, + "step": 4500 + }, + { + "epoch": 0.009110485340400862, + "grad_norm": 621.558349609375, + "learning_rate": 9.020000000000001e-07, + "loss": 20.145, + "step": 4510 + }, + { + "epoch": 0.009130685973084677, + "grad_norm": 1977.96240234375, + "learning_rate": 9.04e-07, + "loss": 31.9802, + "step": 4520 + }, + { + "epoch": 0.009150886605768493, + "grad_norm": 638.37109375, + "learning_rate": 9.060000000000001e-07, + "loss": 20.8006, + "step": 4530 + }, + { + "epoch": 0.009171087238452308, + "grad_norm": 1067.8172607421875, + "learning_rate": 9.080000000000001e-07, + "loss": 55.018, + "step": 4540 + }, + { + "epoch": 0.009191287871136124, + "grad_norm": 1054.7120361328125, + "learning_rate": 9.100000000000001e-07, + "loss": 28.5558, + "step": 4550 + }, + { + "epoch": 0.00921148850381994, + "grad_norm": 297.5315856933594, + "learning_rate": 9.120000000000001e-07, + "loss": 29.3597, + "step": 4560 + }, + { + "epoch": 0.009231689136503756, + "grad_norm": 683.9236450195312, + "learning_rate": 9.140000000000001e-07, + "loss": 20.7864, + "step": 4570 + }, + { + "epoch": 0.00925188976918757, + "grad_norm": 822.9036865234375, + "learning_rate": 9.160000000000001e-07, + "loss": 26.9667, + "step": 4580 + }, + { + "epoch": 0.009272090401871387, + "grad_norm": 630.2400512695312, + "learning_rate": 9.180000000000001e-07, + "loss": 12.3359, + "step": 4590 + }, + { + "epoch": 0.009292291034555202, + "grad_norm": 608.8886108398438, + "learning_rate": 9.200000000000001e-07, + "loss": 23.5769, + "step": 4600 + }, + { + "epoch": 0.009312491667239018, + "grad_norm": 827.4570922851562, + "learning_rate": 9.220000000000001e-07, + "loss": 18.0923, + "step": 4610 + }, + { + "epoch": 0.009332692299922833, + "grad_norm": 412.2288818359375, + "learning_rate": 9.240000000000001e-07, + "loss": 38.5852, + "step": 4620 + }, + { + "epoch": 0.00935289293260665, + "grad_norm": 950.310302734375, + "learning_rate": 9.260000000000001e-07, + "loss": 28.0527, + "step": 4630 + }, + { + "epoch": 0.009373093565290464, + "grad_norm": 442.42523193359375, + "learning_rate": 9.28e-07, + "loss": 20.4062, + "step": 4640 + }, + { + "epoch": 0.009393294197974281, + "grad_norm": 524.7567749023438, + "learning_rate": 9.300000000000001e-07, + "loss": 27.0559, + "step": 4650 + }, + { + "epoch": 0.009413494830658096, + "grad_norm": 602.6281127929688, + "learning_rate": 9.320000000000001e-07, + "loss": 26.432, + "step": 4660 + }, + { + "epoch": 0.009433695463341912, + "grad_norm": 1005.23974609375, + "learning_rate": 9.340000000000001e-07, + "loss": 30.3101, + "step": 4670 + }, + { + "epoch": 0.009453896096025727, + "grad_norm": 511.9738464355469, + "learning_rate": 9.360000000000001e-07, + "loss": 20.6791, + "step": 4680 + }, + { + "epoch": 0.009474096728709544, + "grad_norm": 503.60693359375, + "learning_rate": 9.380000000000001e-07, + "loss": 26.5743, + "step": 4690 + }, + { + "epoch": 0.009494297361393358, + "grad_norm": 674.8211059570312, + "learning_rate": 9.400000000000001e-07, + "loss": 31.4228, + "step": 4700 + }, + { + "epoch": 0.009514497994077175, + "grad_norm": 473.37396240234375, + "learning_rate": 9.420000000000002e-07, + "loss": 44.0672, + "step": 4710 + }, + { + "epoch": 0.00953469862676099, + "grad_norm": 831.3383178710938, + "learning_rate": 9.440000000000001e-07, + "loss": 47.9142, + "step": 4720 + }, + { + "epoch": 0.009554899259444806, + "grad_norm": 2031.18798828125, + "learning_rate": 9.460000000000001e-07, + "loss": 43.8193, + "step": 4730 + }, + { + "epoch": 0.009575099892128621, + "grad_norm": 1036.27978515625, + "learning_rate": 9.480000000000001e-07, + "loss": 19.8682, + "step": 4740 + }, + { + "epoch": 0.009595300524812438, + "grad_norm": 367.123046875, + "learning_rate": 9.500000000000001e-07, + "loss": 19.759, + "step": 4750 + }, + { + "epoch": 0.009615501157496252, + "grad_norm": 1182.7408447265625, + "learning_rate": 9.520000000000002e-07, + "loss": 29.7404, + "step": 4760 + }, + { + "epoch": 0.009635701790180069, + "grad_norm": 326.5006408691406, + "learning_rate": 9.54e-07, + "loss": 62.1775, + "step": 4770 + }, + { + "epoch": 0.009655902422863884, + "grad_norm": 206.4904022216797, + "learning_rate": 9.56e-07, + "loss": 20.3732, + "step": 4780 + }, + { + "epoch": 0.0096761030555477, + "grad_norm": 148.8917999267578, + "learning_rate": 9.58e-07, + "loss": 32.3255, + "step": 4790 + }, + { + "epoch": 0.009696303688231515, + "grad_norm": 392.4678649902344, + "learning_rate": 9.600000000000001e-07, + "loss": 36.1733, + "step": 4800 + }, + { + "epoch": 0.009716504320915332, + "grad_norm": 1750.965087890625, + "learning_rate": 9.62e-07, + "loss": 28.3648, + "step": 4810 + }, + { + "epoch": 0.009736704953599146, + "grad_norm": 149.6742706298828, + "learning_rate": 9.64e-07, + "loss": 16.249, + "step": 4820 + }, + { + "epoch": 0.009756905586282963, + "grad_norm": 268.1315002441406, + "learning_rate": 9.660000000000002e-07, + "loss": 24.6534, + "step": 4830 + }, + { + "epoch": 0.009777106218966778, + "grad_norm": 312.3633728027344, + "learning_rate": 9.68e-07, + "loss": 19.7529, + "step": 4840 + }, + { + "epoch": 0.009797306851650594, + "grad_norm": 335.4103698730469, + "learning_rate": 9.7e-07, + "loss": 25.3802, + "step": 4850 + }, + { + "epoch": 0.009817507484334409, + "grad_norm": 588.1651000976562, + "learning_rate": 9.72e-07, + "loss": 37.7548, + "step": 4860 + }, + { + "epoch": 0.009837708117018226, + "grad_norm": 1084.7984619140625, + "learning_rate": 9.740000000000001e-07, + "loss": 28.1253, + "step": 4870 + }, + { + "epoch": 0.00985790874970204, + "grad_norm": 841.3825073242188, + "learning_rate": 9.76e-07, + "loss": 17.4928, + "step": 4880 + }, + { + "epoch": 0.009878109382385857, + "grad_norm": 258.460205078125, + "learning_rate": 9.78e-07, + "loss": 26.3879, + "step": 4890 + }, + { + "epoch": 0.009898310015069672, + "grad_norm": 299.3013916015625, + "learning_rate": 9.800000000000001e-07, + "loss": 13.678, + "step": 4900 + }, + { + "epoch": 0.009918510647753488, + "grad_norm": 191.6232452392578, + "learning_rate": 9.82e-07, + "loss": 21.8842, + "step": 4910 + }, + { + "epoch": 0.009938711280437303, + "grad_norm": 565.6702270507812, + "learning_rate": 9.84e-07, + "loss": 11.0409, + "step": 4920 + }, + { + "epoch": 0.00995891191312112, + "grad_norm": 291.95526123046875, + "learning_rate": 9.86e-07, + "loss": 36.9195, + "step": 4930 + }, + { + "epoch": 0.009979112545804934, + "grad_norm": 428.879150390625, + "learning_rate": 9.880000000000001e-07, + "loss": 26.7375, + "step": 4940 + }, + { + "epoch": 0.00999931317848875, + "grad_norm": 177.6942138671875, + "learning_rate": 9.9e-07, + "loss": 26.29, + "step": 4950 + }, + { + "epoch": 0.010019513811172566, + "grad_norm": 662.8630981445312, + "learning_rate": 9.92e-07, + "loss": 25.1648, + "step": 4960 + }, + { + "epoch": 0.010039714443856382, + "grad_norm": 489.55889892578125, + "learning_rate": 9.940000000000001e-07, + "loss": 41.3737, + "step": 4970 + }, + { + "epoch": 0.010059915076540197, + "grad_norm": 350.4941101074219, + "learning_rate": 9.96e-07, + "loss": 42.1294, + "step": 4980 + }, + { + "epoch": 0.010080115709224013, + "grad_norm": 320.653076171875, + "learning_rate": 9.98e-07, + "loss": 26.9559, + "step": 4990 + }, + { + "epoch": 0.010100316341907828, + "grad_norm": 231.248046875, + "learning_rate": 1.0000000000000002e-06, + "loss": 6.5142, + "step": 5000 + }, + { + "epoch": 0.010120516974591645, + "grad_norm": 521.7716674804688, + "learning_rate": 1.002e-06, + "loss": 31.8698, + "step": 5010 + }, + { + "epoch": 0.01014071760727546, + "grad_norm": 800.10888671875, + "learning_rate": 1.004e-06, + "loss": 28.1427, + "step": 5020 + }, + { + "epoch": 0.010160918239959276, + "grad_norm": 420.4501037597656, + "learning_rate": 1.006e-06, + "loss": 28.2438, + "step": 5030 + }, + { + "epoch": 0.010181118872643091, + "grad_norm": 480.6717224121094, + "learning_rate": 1.0080000000000001e-06, + "loss": 20.2761, + "step": 5040 + }, + { + "epoch": 0.010201319505326907, + "grad_norm": 1004.8648681640625, + "learning_rate": 1.01e-06, + "loss": 22.479, + "step": 5050 + }, + { + "epoch": 0.010221520138010722, + "grad_norm": 591.0374755859375, + "learning_rate": 1.012e-06, + "loss": 12.5654, + "step": 5060 + }, + { + "epoch": 0.010241720770694539, + "grad_norm": 921.0972290039062, + "learning_rate": 1.0140000000000002e-06, + "loss": 28.9, + "step": 5070 + }, + { + "epoch": 0.010261921403378354, + "grad_norm": 537.0875244140625, + "learning_rate": 1.016e-06, + "loss": 14.163, + "step": 5080 + }, + { + "epoch": 0.01028212203606217, + "grad_norm": 672.0023193359375, + "learning_rate": 1.018e-06, + "loss": 40.8729, + "step": 5090 + }, + { + "epoch": 0.010302322668745985, + "grad_norm": 761.9652709960938, + "learning_rate": 1.02e-06, + "loss": 29.6747, + "step": 5100 + }, + { + "epoch": 0.010322523301429801, + "grad_norm": 934.7273559570312, + "learning_rate": 1.0220000000000001e-06, + "loss": 23.6142, + "step": 5110 + }, + { + "epoch": 0.010342723934113616, + "grad_norm": 364.7134704589844, + "learning_rate": 1.024e-06, + "loss": 29.4059, + "step": 5120 + }, + { + "epoch": 0.010362924566797433, + "grad_norm": 662.9189453125, + "learning_rate": 1.026e-06, + "loss": 25.676, + "step": 5130 + }, + { + "epoch": 0.010383125199481247, + "grad_norm": 1470.47802734375, + "learning_rate": 1.0280000000000002e-06, + "loss": 58.6466, + "step": 5140 + }, + { + "epoch": 0.010403325832165064, + "grad_norm": 1297.0006103515625, + "learning_rate": 1.03e-06, + "loss": 34.2537, + "step": 5150 + }, + { + "epoch": 0.010423526464848879, + "grad_norm": 546.3383178710938, + "learning_rate": 1.032e-06, + "loss": 18.1663, + "step": 5160 + }, + { + "epoch": 0.010443727097532695, + "grad_norm": 1221.919189453125, + "learning_rate": 1.0340000000000002e-06, + "loss": 27.4059, + "step": 5170 + }, + { + "epoch": 0.01046392773021651, + "grad_norm": 460.9737243652344, + "learning_rate": 1.0360000000000001e-06, + "loss": 16.2116, + "step": 5180 + }, + { + "epoch": 0.010484128362900327, + "grad_norm": 413.9505920410156, + "learning_rate": 1.038e-06, + "loss": 21.2082, + "step": 5190 + }, + { + "epoch": 0.010504328995584141, + "grad_norm": 227.77557373046875, + "learning_rate": 1.04e-06, + "loss": 27.9558, + "step": 5200 + }, + { + "epoch": 0.010524529628267958, + "grad_norm": 606.6741943359375, + "learning_rate": 1.0420000000000001e-06, + "loss": 20.9653, + "step": 5210 + }, + { + "epoch": 0.010544730260951773, + "grad_norm": 541.234619140625, + "learning_rate": 1.044e-06, + "loss": 36.7112, + "step": 5220 + }, + { + "epoch": 0.01056493089363559, + "grad_norm": 145.1033477783203, + "learning_rate": 1.046e-06, + "loss": 30.0167, + "step": 5230 + }, + { + "epoch": 0.010585131526319404, + "grad_norm": 373.5177917480469, + "learning_rate": 1.0480000000000002e-06, + "loss": 31.7274, + "step": 5240 + }, + { + "epoch": 0.01060533215900322, + "grad_norm": 563.7691650390625, + "learning_rate": 1.0500000000000001e-06, + "loss": 30.9538, + "step": 5250 + }, + { + "epoch": 0.010625532791687035, + "grad_norm": 170.43959045410156, + "learning_rate": 1.052e-06, + "loss": 44.6468, + "step": 5260 + }, + { + "epoch": 0.010645733424370852, + "grad_norm": 695.9778442382812, + "learning_rate": 1.054e-06, + "loss": 42.9755, + "step": 5270 + }, + { + "epoch": 0.010665934057054667, + "grad_norm": 390.760009765625, + "learning_rate": 1.0560000000000001e-06, + "loss": 28.6079, + "step": 5280 + }, + { + "epoch": 0.010686134689738483, + "grad_norm": 483.7234191894531, + "learning_rate": 1.058e-06, + "loss": 22.693, + "step": 5290 + }, + { + "epoch": 0.010706335322422298, + "grad_norm": 694.895263671875, + "learning_rate": 1.06e-06, + "loss": 31.7093, + "step": 5300 + }, + { + "epoch": 0.010726535955106115, + "grad_norm": 0.03282935544848442, + "learning_rate": 1.0620000000000002e-06, + "loss": 29.5697, + "step": 5310 + }, + { + "epoch": 0.01074673658778993, + "grad_norm": 197.685546875, + "learning_rate": 1.064e-06, + "loss": 25.7651, + "step": 5320 + }, + { + "epoch": 0.010766937220473746, + "grad_norm": 692.8652954101562, + "learning_rate": 1.066e-06, + "loss": 28.2902, + "step": 5330 + }, + { + "epoch": 0.01078713785315756, + "grad_norm": 295.2193603515625, + "learning_rate": 1.0680000000000002e-06, + "loss": 35.8585, + "step": 5340 + }, + { + "epoch": 0.010807338485841377, + "grad_norm": 179.0888214111328, + "learning_rate": 1.0700000000000001e-06, + "loss": 13.3468, + "step": 5350 + }, + { + "epoch": 0.010827539118525192, + "grad_norm": 369.4256896972656, + "learning_rate": 1.072e-06, + "loss": 42.9108, + "step": 5360 + }, + { + "epoch": 0.010847739751209009, + "grad_norm": 380.3387756347656, + "learning_rate": 1.074e-06, + "loss": 35.2701, + "step": 5370 + }, + { + "epoch": 0.010867940383892823, + "grad_norm": 735.1011962890625, + "learning_rate": 1.0760000000000002e-06, + "loss": 37.2414, + "step": 5380 + }, + { + "epoch": 0.01088814101657664, + "grad_norm": 459.38720703125, + "learning_rate": 1.078e-06, + "loss": 36.9988, + "step": 5390 + }, + { + "epoch": 0.010908341649260455, + "grad_norm": 1027.1058349609375, + "learning_rate": 1.08e-06, + "loss": 22.0428, + "step": 5400 + }, + { + "epoch": 0.010928542281944271, + "grad_norm": 2424.2099609375, + "learning_rate": 1.0820000000000002e-06, + "loss": 31.5498, + "step": 5410 + }, + { + "epoch": 0.010948742914628086, + "grad_norm": 194.22132873535156, + "learning_rate": 1.0840000000000001e-06, + "loss": 73.3618, + "step": 5420 + }, + { + "epoch": 0.010968943547311902, + "grad_norm": 756.6248168945312, + "learning_rate": 1.086e-06, + "loss": 74.6274, + "step": 5430 + }, + { + "epoch": 0.010989144179995717, + "grad_norm": 472.0841369628906, + "learning_rate": 1.088e-06, + "loss": 22.9419, + "step": 5440 + }, + { + "epoch": 0.011009344812679534, + "grad_norm": 600.0186157226562, + "learning_rate": 1.0900000000000002e-06, + "loss": 25.0131, + "step": 5450 + }, + { + "epoch": 0.011029545445363349, + "grad_norm": 305.5976257324219, + "learning_rate": 1.092e-06, + "loss": 31.3711, + "step": 5460 + }, + { + "epoch": 0.011049746078047165, + "grad_norm": 108.77481842041016, + "learning_rate": 1.094e-06, + "loss": 17.3308, + "step": 5470 + }, + { + "epoch": 0.01106994671073098, + "grad_norm": 1116.296142578125, + "learning_rate": 1.0960000000000002e-06, + "loss": 37.9978, + "step": 5480 + }, + { + "epoch": 0.011090147343414796, + "grad_norm": 390.4914245605469, + "learning_rate": 1.0980000000000001e-06, + "loss": 14.3431, + "step": 5490 + }, + { + "epoch": 0.011110347976098611, + "grad_norm": 598.1354370117188, + "learning_rate": 1.1e-06, + "loss": 30.984, + "step": 5500 + }, + { + "epoch": 0.011130548608782428, + "grad_norm": 711.5731201171875, + "learning_rate": 1.1020000000000002e-06, + "loss": 30.1973, + "step": 5510 + }, + { + "epoch": 0.011150749241466243, + "grad_norm": 371.409423828125, + "learning_rate": 1.1040000000000001e-06, + "loss": 24.4026, + "step": 5520 + }, + { + "epoch": 0.011170949874150059, + "grad_norm": 665.7667846679688, + "learning_rate": 1.106e-06, + "loss": 19.5368, + "step": 5530 + }, + { + "epoch": 0.011191150506833874, + "grad_norm": 502.86566162109375, + "learning_rate": 1.108e-06, + "loss": 18.791, + "step": 5540 + }, + { + "epoch": 0.01121135113951769, + "grad_norm": 404.4401550292969, + "learning_rate": 1.1100000000000002e-06, + "loss": 20.3444, + "step": 5550 + }, + { + "epoch": 0.011231551772201505, + "grad_norm": 323.6523132324219, + "learning_rate": 1.1120000000000001e-06, + "loss": 36.3596, + "step": 5560 + }, + { + "epoch": 0.011251752404885322, + "grad_norm": 743.5354614257812, + "learning_rate": 1.114e-06, + "loss": 29.4033, + "step": 5570 + }, + { + "epoch": 0.011271953037569137, + "grad_norm": 281.8879699707031, + "learning_rate": 1.1160000000000002e-06, + "loss": 23.6649, + "step": 5580 + }, + { + "epoch": 0.011292153670252953, + "grad_norm": 812.1799926757812, + "learning_rate": 1.1180000000000001e-06, + "loss": 34.1818, + "step": 5590 + }, + { + "epoch": 0.011312354302936768, + "grad_norm": 148.88833618164062, + "learning_rate": 1.12e-06, + "loss": 28.1046, + "step": 5600 + }, + { + "epoch": 0.011332554935620584, + "grad_norm": 471.3416748046875, + "learning_rate": 1.122e-06, + "loss": 24.278, + "step": 5610 + }, + { + "epoch": 0.0113527555683044, + "grad_norm": 353.2952575683594, + "learning_rate": 1.1240000000000002e-06, + "loss": 30.7999, + "step": 5620 + }, + { + "epoch": 0.011372956200988216, + "grad_norm": 585.4158325195312, + "learning_rate": 1.126e-06, + "loss": 16.3788, + "step": 5630 + }, + { + "epoch": 0.01139315683367203, + "grad_norm": 629.95703125, + "learning_rate": 1.128e-06, + "loss": 22.8701, + "step": 5640 + }, + { + "epoch": 0.011413357466355847, + "grad_norm": 1085.2030029296875, + "learning_rate": 1.1300000000000002e-06, + "loss": 31.6779, + "step": 5650 + }, + { + "epoch": 0.011433558099039662, + "grad_norm": 1200.4349365234375, + "learning_rate": 1.1320000000000001e-06, + "loss": 26.5144, + "step": 5660 + }, + { + "epoch": 0.011453758731723478, + "grad_norm": 869.242919921875, + "learning_rate": 1.134e-06, + "loss": 26.5842, + "step": 5670 + }, + { + "epoch": 0.011473959364407293, + "grad_norm": 107.99213409423828, + "learning_rate": 1.1360000000000002e-06, + "loss": 27.156, + "step": 5680 + }, + { + "epoch": 0.01149415999709111, + "grad_norm": 908.802734375, + "learning_rate": 1.1380000000000002e-06, + "loss": 34.6566, + "step": 5690 + }, + { + "epoch": 0.011514360629774924, + "grad_norm": 497.6568298339844, + "learning_rate": 1.14e-06, + "loss": 21.8019, + "step": 5700 + }, + { + "epoch": 0.011534561262458741, + "grad_norm": 293.9489440917969, + "learning_rate": 1.142e-06, + "loss": 22.4049, + "step": 5710 + }, + { + "epoch": 0.011554761895142556, + "grad_norm": 295.17230224609375, + "learning_rate": 1.1440000000000002e-06, + "loss": 37.8035, + "step": 5720 + }, + { + "epoch": 0.011574962527826372, + "grad_norm": 475.5271301269531, + "learning_rate": 1.1460000000000001e-06, + "loss": 24.764, + "step": 5730 + }, + { + "epoch": 0.011595163160510187, + "grad_norm": 1417.2218017578125, + "learning_rate": 1.148e-06, + "loss": 31.2254, + "step": 5740 + }, + { + "epoch": 0.011615363793194004, + "grad_norm": 271.8209533691406, + "learning_rate": 1.1500000000000002e-06, + "loss": 14.2182, + "step": 5750 + }, + { + "epoch": 0.011635564425877818, + "grad_norm": 612.4166259765625, + "learning_rate": 1.1520000000000002e-06, + "loss": 18.7975, + "step": 5760 + }, + { + "epoch": 0.011655765058561635, + "grad_norm": 314.46990966796875, + "learning_rate": 1.154e-06, + "loss": 19.6362, + "step": 5770 + }, + { + "epoch": 0.01167596569124545, + "grad_norm": 627.936767578125, + "learning_rate": 1.156e-06, + "loss": 38.5957, + "step": 5780 + }, + { + "epoch": 0.011696166323929266, + "grad_norm": 1000.9813842773438, + "learning_rate": 1.1580000000000002e-06, + "loss": 34.4684, + "step": 5790 + }, + { + "epoch": 0.011716366956613081, + "grad_norm": 637.470703125, + "learning_rate": 1.1600000000000001e-06, + "loss": 34.59, + "step": 5800 + }, + { + "epoch": 0.011736567589296898, + "grad_norm": 750.3966674804688, + "learning_rate": 1.162e-06, + "loss": 44.1128, + "step": 5810 + }, + { + "epoch": 0.011756768221980712, + "grad_norm": 1139.5133056640625, + "learning_rate": 1.1640000000000002e-06, + "loss": 23.7449, + "step": 5820 + }, + { + "epoch": 0.011776968854664529, + "grad_norm": 370.25567626953125, + "learning_rate": 1.1660000000000001e-06, + "loss": 17.9951, + "step": 5830 + }, + { + "epoch": 0.011797169487348344, + "grad_norm": 194.2151336669922, + "learning_rate": 1.168e-06, + "loss": 23.3352, + "step": 5840 + }, + { + "epoch": 0.01181737012003216, + "grad_norm": 812.2650756835938, + "learning_rate": 1.1700000000000002e-06, + "loss": 21.2374, + "step": 5850 + }, + { + "epoch": 0.011837570752715975, + "grad_norm": 422.30401611328125, + "learning_rate": 1.1720000000000002e-06, + "loss": 19.3065, + "step": 5860 + }, + { + "epoch": 0.011857771385399792, + "grad_norm": 1042.8720703125, + "learning_rate": 1.1740000000000001e-06, + "loss": 38.1709, + "step": 5870 + }, + { + "epoch": 0.011877972018083606, + "grad_norm": 463.48712158203125, + "learning_rate": 1.176e-06, + "loss": 20.1112, + "step": 5880 + }, + { + "epoch": 0.011898172650767423, + "grad_norm": 251.95362854003906, + "learning_rate": 1.1780000000000002e-06, + "loss": 19.2058, + "step": 5890 + }, + { + "epoch": 0.011918373283451238, + "grad_norm": 644.2528076171875, + "learning_rate": 1.1800000000000001e-06, + "loss": 55.8893, + "step": 5900 + }, + { + "epoch": 0.011938573916135054, + "grad_norm": 546.8013305664062, + "learning_rate": 1.182e-06, + "loss": 65.9524, + "step": 5910 + }, + { + "epoch": 0.011958774548818869, + "grad_norm": 649.1343994140625, + "learning_rate": 1.1840000000000002e-06, + "loss": 27.965, + "step": 5920 + }, + { + "epoch": 0.011978975181502685, + "grad_norm": 526.4266357421875, + "learning_rate": 1.1860000000000002e-06, + "loss": 34.2897, + "step": 5930 + }, + { + "epoch": 0.0119991758141865, + "grad_norm": 1568.1280517578125, + "learning_rate": 1.188e-06, + "loss": 35.1816, + "step": 5940 + }, + { + "epoch": 0.012019376446870317, + "grad_norm": 318.4168395996094, + "learning_rate": 1.19e-06, + "loss": 22.1523, + "step": 5950 + }, + { + "epoch": 0.012039577079554132, + "grad_norm": 402.7890930175781, + "learning_rate": 1.1920000000000002e-06, + "loss": 33.6881, + "step": 5960 + }, + { + "epoch": 0.012059777712237948, + "grad_norm": 497.604736328125, + "learning_rate": 1.1940000000000001e-06, + "loss": 32.794, + "step": 5970 + }, + { + "epoch": 0.012079978344921763, + "grad_norm": 444.6767883300781, + "learning_rate": 1.196e-06, + "loss": 37.6437, + "step": 5980 + }, + { + "epoch": 0.01210017897760558, + "grad_norm": 863.6239624023438, + "learning_rate": 1.1980000000000002e-06, + "loss": 26.2152, + "step": 5990 + }, + { + "epoch": 0.012120379610289394, + "grad_norm": 592.9383544921875, + "learning_rate": 1.2000000000000002e-06, + "loss": 14.3755, + "step": 6000 + }, + { + "epoch": 0.01214058024297321, + "grad_norm": 1376.8350830078125, + "learning_rate": 1.202e-06, + "loss": 36.9957, + "step": 6010 + }, + { + "epoch": 0.012160780875657026, + "grad_norm": 894.4027709960938, + "learning_rate": 1.204e-06, + "loss": 25.3654, + "step": 6020 + }, + { + "epoch": 0.012180981508340842, + "grad_norm": 587.0653686523438, + "learning_rate": 1.2060000000000002e-06, + "loss": 29.8984, + "step": 6030 + }, + { + "epoch": 0.012201182141024657, + "grad_norm": 121.9262466430664, + "learning_rate": 1.2080000000000001e-06, + "loss": 20.7306, + "step": 6040 + }, + { + "epoch": 0.012221382773708473, + "grad_norm": 488.919677734375, + "learning_rate": 1.21e-06, + "loss": 27.6776, + "step": 6050 + }, + { + "epoch": 0.012241583406392288, + "grad_norm": 461.9215393066406, + "learning_rate": 1.2120000000000002e-06, + "loss": 29.0396, + "step": 6060 + }, + { + "epoch": 0.012261784039076105, + "grad_norm": 399.4093322753906, + "learning_rate": 1.214e-06, + "loss": 60.5062, + "step": 6070 + }, + { + "epoch": 0.01228198467175992, + "grad_norm": 167.42552185058594, + "learning_rate": 1.216e-06, + "loss": 29.8078, + "step": 6080 + }, + { + "epoch": 0.012302185304443734, + "grad_norm": 412.9476623535156, + "learning_rate": 1.2180000000000002e-06, + "loss": 25.1903, + "step": 6090 + }, + { + "epoch": 0.01232238593712755, + "grad_norm": 402.8238220214844, + "learning_rate": 1.2200000000000002e-06, + "loss": 39.7322, + "step": 6100 + }, + { + "epoch": 0.012342586569811366, + "grad_norm": 503.3050537109375, + "learning_rate": 1.2220000000000001e-06, + "loss": 19.9504, + "step": 6110 + }, + { + "epoch": 0.012362787202495182, + "grad_norm": 251.07025146484375, + "learning_rate": 1.224e-06, + "loss": 29.8188, + "step": 6120 + }, + { + "epoch": 0.012382987835178997, + "grad_norm": 1289.874755859375, + "learning_rate": 1.2260000000000002e-06, + "loss": 26.9208, + "step": 6130 + }, + { + "epoch": 0.012403188467862813, + "grad_norm": 839.939453125, + "learning_rate": 1.2280000000000001e-06, + "loss": 29.809, + "step": 6140 + }, + { + "epoch": 0.012423389100546628, + "grad_norm": 0.0, + "learning_rate": 1.23e-06, + "loss": 10.9996, + "step": 6150 + }, + { + "epoch": 0.012443589733230445, + "grad_norm": 687.5811157226562, + "learning_rate": 1.2320000000000002e-06, + "loss": 30.5085, + "step": 6160 + }, + { + "epoch": 0.01246379036591426, + "grad_norm": 127.76237487792969, + "learning_rate": 1.234e-06, + "loss": 29.9668, + "step": 6170 + }, + { + "epoch": 0.012483990998598076, + "grad_norm": 1359.7757568359375, + "learning_rate": 1.2360000000000001e-06, + "loss": 30.7715, + "step": 6180 + }, + { + "epoch": 0.012504191631281891, + "grad_norm": 436.6647644042969, + "learning_rate": 1.238e-06, + "loss": 33.9171, + "step": 6190 + }, + { + "epoch": 0.012524392263965707, + "grad_norm": 281.2616882324219, + "learning_rate": 1.2400000000000002e-06, + "loss": 41.9189, + "step": 6200 + }, + { + "epoch": 0.012544592896649522, + "grad_norm": 758.2962036132812, + "learning_rate": 1.2420000000000001e-06, + "loss": 33.9747, + "step": 6210 + }, + { + "epoch": 0.012564793529333339, + "grad_norm": 684.6299438476562, + "learning_rate": 1.244e-06, + "loss": 35.7783, + "step": 6220 + }, + { + "epoch": 0.012584994162017154, + "grad_norm": 1113.127197265625, + "learning_rate": 1.2460000000000002e-06, + "loss": 48.0597, + "step": 6230 + }, + { + "epoch": 0.01260519479470097, + "grad_norm": 840.9987182617188, + "learning_rate": 1.248e-06, + "loss": 22.5444, + "step": 6240 + }, + { + "epoch": 0.012625395427384785, + "grad_norm": 520.2071533203125, + "learning_rate": 1.25e-06, + "loss": 15.9313, + "step": 6250 + }, + { + "epoch": 0.012645596060068601, + "grad_norm": 556.5119018554688, + "learning_rate": 1.2520000000000003e-06, + "loss": 32.9128, + "step": 6260 + }, + { + "epoch": 0.012665796692752416, + "grad_norm": 228.95762634277344, + "learning_rate": 1.2540000000000002e-06, + "loss": 16.7764, + "step": 6270 + }, + { + "epoch": 0.012685997325436233, + "grad_norm": 316.0266418457031, + "learning_rate": 1.256e-06, + "loss": 25.5342, + "step": 6280 + }, + { + "epoch": 0.012706197958120048, + "grad_norm": 18.748266220092773, + "learning_rate": 1.258e-06, + "loss": 42.3436, + "step": 6290 + }, + { + "epoch": 0.012726398590803864, + "grad_norm": 1323.001953125, + "learning_rate": 1.26e-06, + "loss": 32.5139, + "step": 6300 + }, + { + "epoch": 0.012746599223487679, + "grad_norm": 942.7252197265625, + "learning_rate": 1.2620000000000002e-06, + "loss": 29.162, + "step": 6310 + }, + { + "epoch": 0.012766799856171495, + "grad_norm": 1211.36962890625, + "learning_rate": 1.2640000000000003e-06, + "loss": 29.0307, + "step": 6320 + }, + { + "epoch": 0.01278700048885531, + "grad_norm": 802.3575439453125, + "learning_rate": 1.266e-06, + "loss": 21.0279, + "step": 6330 + }, + { + "epoch": 0.012807201121539127, + "grad_norm": 740.271728515625, + "learning_rate": 1.268e-06, + "loss": 20.676, + "step": 6340 + }, + { + "epoch": 0.012827401754222941, + "grad_norm": 419.27020263671875, + "learning_rate": 1.2700000000000001e-06, + "loss": 51.1324, + "step": 6350 + }, + { + "epoch": 0.012847602386906758, + "grad_norm": 692.2057495117188, + "learning_rate": 1.2720000000000003e-06, + "loss": 36.6126, + "step": 6360 + }, + { + "epoch": 0.012867803019590573, + "grad_norm": 132.77894592285156, + "learning_rate": 1.2740000000000002e-06, + "loss": 21.4999, + "step": 6370 + }, + { + "epoch": 0.01288800365227439, + "grad_norm": 595.9766845703125, + "learning_rate": 1.276e-06, + "loss": 28.293, + "step": 6380 + }, + { + "epoch": 0.012908204284958204, + "grad_norm": 1459.1014404296875, + "learning_rate": 1.278e-06, + "loss": 21.5185, + "step": 6390 + }, + { + "epoch": 0.01292840491764202, + "grad_norm": 1731.2054443359375, + "learning_rate": 1.28e-06, + "loss": 47.9539, + "step": 6400 + }, + { + "epoch": 0.012948605550325835, + "grad_norm": 750.8800048828125, + "learning_rate": 1.2820000000000002e-06, + "loss": 42.8562, + "step": 6410 + }, + { + "epoch": 0.012968806183009652, + "grad_norm": 646.1731567382812, + "learning_rate": 1.284e-06, + "loss": 32.3806, + "step": 6420 + }, + { + "epoch": 0.012989006815693467, + "grad_norm": 306.4696044921875, + "learning_rate": 1.286e-06, + "loss": 24.9085, + "step": 6430 + }, + { + "epoch": 0.013009207448377283, + "grad_norm": 169.34942626953125, + "learning_rate": 1.288e-06, + "loss": 32.6633, + "step": 6440 + }, + { + "epoch": 0.013029408081061098, + "grad_norm": 469.367919921875, + "learning_rate": 1.2900000000000001e-06, + "loss": 27.3972, + "step": 6450 + }, + { + "epoch": 0.013049608713744915, + "grad_norm": 553.5028686523438, + "learning_rate": 1.2920000000000003e-06, + "loss": 35.9573, + "step": 6460 + }, + { + "epoch": 0.01306980934642873, + "grad_norm": 262.7359924316406, + "learning_rate": 1.294e-06, + "loss": 30.1157, + "step": 6470 + }, + { + "epoch": 0.013090009979112546, + "grad_norm": 839.3622436523438, + "learning_rate": 1.296e-06, + "loss": 38.7697, + "step": 6480 + }, + { + "epoch": 0.01311021061179636, + "grad_norm": 469.48895263671875, + "learning_rate": 1.2980000000000001e-06, + "loss": 33.9329, + "step": 6490 + }, + { + "epoch": 0.013130411244480177, + "grad_norm": 435.9288635253906, + "learning_rate": 1.3e-06, + "loss": 30.6598, + "step": 6500 + }, + { + "epoch": 0.013150611877163992, + "grad_norm": 1100.7294921875, + "learning_rate": 1.3020000000000002e-06, + "loss": 38.6149, + "step": 6510 + }, + { + "epoch": 0.013170812509847809, + "grad_norm": 978.08349609375, + "learning_rate": 1.304e-06, + "loss": 29.158, + "step": 6520 + }, + { + "epoch": 0.013191013142531623, + "grad_norm": 440.118408203125, + "learning_rate": 1.306e-06, + "loss": 51.669, + "step": 6530 + }, + { + "epoch": 0.01321121377521544, + "grad_norm": 217.9273223876953, + "learning_rate": 1.308e-06, + "loss": 24.4326, + "step": 6540 + }, + { + "epoch": 0.013231414407899255, + "grad_norm": 356.3488464355469, + "learning_rate": 1.3100000000000002e-06, + "loss": 26.9776, + "step": 6550 + }, + { + "epoch": 0.013251615040583071, + "grad_norm": 340.18060302734375, + "learning_rate": 1.3120000000000003e-06, + "loss": 39.9926, + "step": 6560 + }, + { + "epoch": 0.013271815673266886, + "grad_norm": 984.8867797851562, + "learning_rate": 1.314e-06, + "loss": 54.8693, + "step": 6570 + }, + { + "epoch": 0.013292016305950703, + "grad_norm": 1010.78466796875, + "learning_rate": 1.316e-06, + "loss": 23.7788, + "step": 6580 + }, + { + "epoch": 0.013312216938634517, + "grad_norm": 733.6953125, + "learning_rate": 1.3180000000000001e-06, + "loss": 34.9994, + "step": 6590 + }, + { + "epoch": 0.013332417571318334, + "grad_norm": 373.2271728515625, + "learning_rate": 1.32e-06, + "loss": 37.3207, + "step": 6600 + }, + { + "epoch": 0.013352618204002149, + "grad_norm": 171.13934326171875, + "learning_rate": 1.3220000000000002e-06, + "loss": 12.7663, + "step": 6610 + }, + { + "epoch": 0.013372818836685965, + "grad_norm": 357.4339599609375, + "learning_rate": 1.324e-06, + "loss": 35.8358, + "step": 6620 + }, + { + "epoch": 0.01339301946936978, + "grad_norm": 220.25674438476562, + "learning_rate": 1.326e-06, + "loss": 15.3749, + "step": 6630 + }, + { + "epoch": 0.013413220102053596, + "grad_norm": 462.05841064453125, + "learning_rate": 1.328e-06, + "loss": 38.8295, + "step": 6640 + }, + { + "epoch": 0.013433420734737411, + "grad_norm": 469.8040466308594, + "learning_rate": 1.3300000000000002e-06, + "loss": 22.9475, + "step": 6650 + }, + { + "epoch": 0.013453621367421228, + "grad_norm": 971.557373046875, + "learning_rate": 1.3320000000000003e-06, + "loss": 44.1314, + "step": 6660 + }, + { + "epoch": 0.013473822000105043, + "grad_norm": 298.933837890625, + "learning_rate": 1.334e-06, + "loss": 28.8329, + "step": 6670 + }, + { + "epoch": 0.013494022632788859, + "grad_norm": 623.2842407226562, + "learning_rate": 1.336e-06, + "loss": 35.835, + "step": 6680 + }, + { + "epoch": 0.013514223265472674, + "grad_norm": 441.5866394042969, + "learning_rate": 1.3380000000000001e-06, + "loss": 19.0923, + "step": 6690 + }, + { + "epoch": 0.01353442389815649, + "grad_norm": 1859.7860107421875, + "learning_rate": 1.34e-06, + "loss": 35.8646, + "step": 6700 + }, + { + "epoch": 0.013554624530840305, + "grad_norm": 64.71363067626953, + "learning_rate": 1.3420000000000002e-06, + "loss": 15.234, + "step": 6710 + }, + { + "epoch": 0.013574825163524122, + "grad_norm": 333.45794677734375, + "learning_rate": 1.344e-06, + "loss": 22.3284, + "step": 6720 + }, + { + "epoch": 0.013595025796207937, + "grad_norm": 393.3450622558594, + "learning_rate": 1.3460000000000001e-06, + "loss": 54.5108, + "step": 6730 + }, + { + "epoch": 0.013615226428891753, + "grad_norm": 587.0012817382812, + "learning_rate": 1.348e-06, + "loss": 31.177, + "step": 6740 + }, + { + "epoch": 0.013635427061575568, + "grad_norm": 309.5393371582031, + "learning_rate": 1.3500000000000002e-06, + "loss": 13.8727, + "step": 6750 + }, + { + "epoch": 0.013655627694259384, + "grad_norm": 468.2892150878906, + "learning_rate": 1.352e-06, + "loss": 16.0866, + "step": 6760 + }, + { + "epoch": 0.0136758283269432, + "grad_norm": 878.9414672851562, + "learning_rate": 1.354e-06, + "loss": 31.6945, + "step": 6770 + }, + { + "epoch": 0.013696028959627016, + "grad_norm": 963.8592529296875, + "learning_rate": 1.356e-06, + "loss": 26.1409, + "step": 6780 + }, + { + "epoch": 0.01371622959231083, + "grad_norm": 742.5006713867188, + "learning_rate": 1.3580000000000002e-06, + "loss": 20.0253, + "step": 6790 + }, + { + "epoch": 0.013736430224994647, + "grad_norm": 302.4109802246094, + "learning_rate": 1.3600000000000001e-06, + "loss": 30.4902, + "step": 6800 + }, + { + "epoch": 0.013756630857678462, + "grad_norm": 703.2691650390625, + "learning_rate": 1.362e-06, + "loss": 101.9395, + "step": 6810 + }, + { + "epoch": 0.013776831490362278, + "grad_norm": 1015.4454956054688, + "learning_rate": 1.364e-06, + "loss": 22.2598, + "step": 6820 + }, + { + "epoch": 0.013797032123046093, + "grad_norm": 394.2761535644531, + "learning_rate": 1.3660000000000001e-06, + "loss": 36.6149, + "step": 6830 + }, + { + "epoch": 0.01381723275572991, + "grad_norm": 867.6441650390625, + "learning_rate": 1.368e-06, + "loss": 32.4753, + "step": 6840 + }, + { + "epoch": 0.013837433388413724, + "grad_norm": 386.0970458984375, + "learning_rate": 1.3700000000000002e-06, + "loss": 31.9341, + "step": 6850 + }, + { + "epoch": 0.013857634021097541, + "grad_norm": 650.8311767578125, + "learning_rate": 1.372e-06, + "loss": 25.4292, + "step": 6860 + }, + { + "epoch": 0.013877834653781356, + "grad_norm": 281.85302734375, + "learning_rate": 1.374e-06, + "loss": 15.7737, + "step": 6870 + }, + { + "epoch": 0.013898035286465172, + "grad_norm": 494.3153991699219, + "learning_rate": 1.376e-06, + "loss": 12.811, + "step": 6880 + }, + { + "epoch": 0.013918235919148987, + "grad_norm": 103.57233428955078, + "learning_rate": 1.3780000000000002e-06, + "loss": 24.2835, + "step": 6890 + }, + { + "epoch": 0.013938436551832804, + "grad_norm": 313.1036071777344, + "learning_rate": 1.3800000000000001e-06, + "loss": 26.341, + "step": 6900 + }, + { + "epoch": 0.013958637184516618, + "grad_norm": 364.7337951660156, + "learning_rate": 1.382e-06, + "loss": 19.7548, + "step": 6910 + }, + { + "epoch": 0.013978837817200435, + "grad_norm": 708.6334838867188, + "learning_rate": 1.384e-06, + "loss": 24.823, + "step": 6920 + }, + { + "epoch": 0.01399903844988425, + "grad_norm": 370.78387451171875, + "learning_rate": 1.3860000000000002e-06, + "loss": 22.2033, + "step": 6930 + }, + { + "epoch": 0.014019239082568066, + "grad_norm": 584.7822875976562, + "learning_rate": 1.388e-06, + "loss": 21.2259, + "step": 6940 + }, + { + "epoch": 0.014039439715251881, + "grad_norm": 434.93896484375, + "learning_rate": 1.3900000000000002e-06, + "loss": 17.3879, + "step": 6950 + }, + { + "epoch": 0.014059640347935698, + "grad_norm": 1412.916015625, + "learning_rate": 1.392e-06, + "loss": 43.3578, + "step": 6960 + }, + { + "epoch": 0.014079840980619512, + "grad_norm": 690.4659423828125, + "learning_rate": 1.3940000000000001e-06, + "loss": 29.9748, + "step": 6970 + }, + { + "epoch": 0.014100041613303329, + "grad_norm": 631.6376953125, + "learning_rate": 1.396e-06, + "loss": 27.4844, + "step": 6980 + }, + { + "epoch": 0.014120242245987144, + "grad_norm": 136.48374938964844, + "learning_rate": 1.3980000000000002e-06, + "loss": 14.787, + "step": 6990 + }, + { + "epoch": 0.01414044287867096, + "grad_norm": 419.7654724121094, + "learning_rate": 1.4000000000000001e-06, + "loss": 29.0004, + "step": 7000 + }, + { + "epoch": 0.014160643511354775, + "grad_norm": 468.74468994140625, + "learning_rate": 1.402e-06, + "loss": 19.4345, + "step": 7010 + }, + { + "epoch": 0.014180844144038592, + "grad_norm": 468.9650573730469, + "learning_rate": 1.404e-06, + "loss": 26.9539, + "step": 7020 + }, + { + "epoch": 0.014201044776722406, + "grad_norm": 256.4075622558594, + "learning_rate": 1.4060000000000002e-06, + "loss": 21.5345, + "step": 7030 + }, + { + "epoch": 0.014221245409406223, + "grad_norm": 363.6224060058594, + "learning_rate": 1.4080000000000001e-06, + "loss": 28.2582, + "step": 7040 + }, + { + "epoch": 0.014241446042090038, + "grad_norm": 2213.649169921875, + "learning_rate": 1.41e-06, + "loss": 29.7299, + "step": 7050 + }, + { + "epoch": 0.014261646674773854, + "grad_norm": 525.2031860351562, + "learning_rate": 1.412e-06, + "loss": 15.0173, + "step": 7060 + }, + { + "epoch": 0.014281847307457669, + "grad_norm": 571.482666015625, + "learning_rate": 1.4140000000000001e-06, + "loss": 22.69, + "step": 7070 + }, + { + "epoch": 0.014302047940141486, + "grad_norm": 432.14385986328125, + "learning_rate": 1.416e-06, + "loss": 21.501, + "step": 7080 + }, + { + "epoch": 0.0143222485728253, + "grad_norm": 230.9308319091797, + "learning_rate": 1.4180000000000002e-06, + "loss": 23.7384, + "step": 7090 + }, + { + "epoch": 0.014342449205509117, + "grad_norm": 689.4318237304688, + "learning_rate": 1.42e-06, + "loss": 16.4564, + "step": 7100 + }, + { + "epoch": 0.014362649838192932, + "grad_norm": 833.6482543945312, + "learning_rate": 1.4220000000000001e-06, + "loss": 42.0199, + "step": 7110 + }, + { + "epoch": 0.014382850470876748, + "grad_norm": 263.74810791015625, + "learning_rate": 1.424e-06, + "loss": 16.752, + "step": 7120 + }, + { + "epoch": 0.014403051103560563, + "grad_norm": 69.91686248779297, + "learning_rate": 1.4260000000000002e-06, + "loss": 11.7691, + "step": 7130 + }, + { + "epoch": 0.01442325173624438, + "grad_norm": 802.9617309570312, + "learning_rate": 1.4280000000000001e-06, + "loss": 29.6307, + "step": 7140 + }, + { + "epoch": 0.014443452368928194, + "grad_norm": 1388.6160888671875, + "learning_rate": 1.43e-06, + "loss": 17.46, + "step": 7150 + }, + { + "epoch": 0.01446365300161201, + "grad_norm": 202.5712127685547, + "learning_rate": 1.432e-06, + "loss": 14.3374, + "step": 7160 + }, + { + "epoch": 0.014483853634295826, + "grad_norm": 547.8307495117188, + "learning_rate": 1.4340000000000002e-06, + "loss": 30.6452, + "step": 7170 + }, + { + "epoch": 0.014504054266979642, + "grad_norm": 211.10414123535156, + "learning_rate": 1.436e-06, + "loss": 13.0712, + "step": 7180 + }, + { + "epoch": 0.014524254899663457, + "grad_norm": 756.9300537109375, + "learning_rate": 1.4380000000000003e-06, + "loss": 23.8358, + "step": 7190 + }, + { + "epoch": 0.014544455532347273, + "grad_norm": 378.86663818359375, + "learning_rate": 1.44e-06, + "loss": 26.3164, + "step": 7200 + }, + { + "epoch": 0.014564656165031088, + "grad_norm": 337.78131103515625, + "learning_rate": 1.4420000000000001e-06, + "loss": 30.7286, + "step": 7210 + }, + { + "epoch": 0.014584856797714905, + "grad_norm": 1024.835205078125, + "learning_rate": 1.444e-06, + "loss": 29.8971, + "step": 7220 + }, + { + "epoch": 0.01460505743039872, + "grad_norm": 938.3273315429688, + "learning_rate": 1.4460000000000002e-06, + "loss": 27.8823, + "step": 7230 + }, + { + "epoch": 0.014625258063082536, + "grad_norm": 552.2940063476562, + "learning_rate": 1.4480000000000002e-06, + "loss": 25.7623, + "step": 7240 + }, + { + "epoch": 0.014645458695766351, + "grad_norm": 352.24383544921875, + "learning_rate": 1.45e-06, + "loss": 12.5614, + "step": 7250 + }, + { + "epoch": 0.014665659328450167, + "grad_norm": 380.1072082519531, + "learning_rate": 1.452e-06, + "loss": 21.0116, + "step": 7260 + }, + { + "epoch": 0.014685859961133982, + "grad_norm": 89.25300598144531, + "learning_rate": 1.4540000000000002e-06, + "loss": 37.3816, + "step": 7270 + }, + { + "epoch": 0.014706060593817799, + "grad_norm": 742.039306640625, + "learning_rate": 1.4560000000000001e-06, + "loss": 20.3916, + "step": 7280 + }, + { + "epoch": 0.014726261226501614, + "grad_norm": 408.6346435546875, + "learning_rate": 1.4580000000000003e-06, + "loss": 22.624, + "step": 7290 + }, + { + "epoch": 0.01474646185918543, + "grad_norm": 489.7460632324219, + "learning_rate": 1.46e-06, + "loss": 51.9042, + "step": 7300 + }, + { + "epoch": 0.014766662491869245, + "grad_norm": 378.1903991699219, + "learning_rate": 1.4620000000000001e-06, + "loss": 22.1225, + "step": 7310 + }, + { + "epoch": 0.014786863124553061, + "grad_norm": 118.77698516845703, + "learning_rate": 1.464e-06, + "loss": 27.0962, + "step": 7320 + }, + { + "epoch": 0.014807063757236876, + "grad_norm": 497.64141845703125, + "learning_rate": 1.4660000000000002e-06, + "loss": 23.5584, + "step": 7330 + }, + { + "epoch": 0.014827264389920693, + "grad_norm": 26.968568801879883, + "learning_rate": 1.4680000000000002e-06, + "loss": 46.2282, + "step": 7340 + }, + { + "epoch": 0.014847465022604507, + "grad_norm": 621.72314453125, + "learning_rate": 1.4700000000000001e-06, + "loss": 33.6739, + "step": 7350 + }, + { + "epoch": 0.014867665655288324, + "grad_norm": 1012.1883544921875, + "learning_rate": 1.472e-06, + "loss": 30.1342, + "step": 7360 + }, + { + "epoch": 0.014887866287972139, + "grad_norm": 164.61184692382812, + "learning_rate": 1.4740000000000002e-06, + "loss": 22.5415, + "step": 7370 + }, + { + "epoch": 0.014908066920655955, + "grad_norm": 382.0716552734375, + "learning_rate": 1.4760000000000001e-06, + "loss": 63.1735, + "step": 7380 + }, + { + "epoch": 0.01492826755333977, + "grad_norm": 807.2562866210938, + "learning_rate": 1.478e-06, + "loss": 51.1082, + "step": 7390 + }, + { + "epoch": 0.014948468186023587, + "grad_norm": 600.788330078125, + "learning_rate": 1.48e-06, + "loss": 41.2442, + "step": 7400 + }, + { + "epoch": 0.014968668818707401, + "grad_norm": 547.8676147460938, + "learning_rate": 1.4820000000000002e-06, + "loss": 33.4473, + "step": 7410 + }, + { + "epoch": 0.014988869451391218, + "grad_norm": 785.7941284179688, + "learning_rate": 1.4840000000000001e-06, + "loss": 21.1182, + "step": 7420 + }, + { + "epoch": 0.015009070084075033, + "grad_norm": 373.2097473144531, + "learning_rate": 1.4860000000000003e-06, + "loss": 19.4519, + "step": 7430 + }, + { + "epoch": 0.01502927071675885, + "grad_norm": 283.32928466796875, + "learning_rate": 1.488e-06, + "loss": 33.4474, + "step": 7440 + }, + { + "epoch": 0.015049471349442664, + "grad_norm": 601.2964477539062, + "learning_rate": 1.4900000000000001e-06, + "loss": 37.3402, + "step": 7450 + }, + { + "epoch": 0.01506967198212648, + "grad_norm": 461.42108154296875, + "learning_rate": 1.492e-06, + "loss": 33.2678, + "step": 7460 + }, + { + "epoch": 0.015089872614810295, + "grad_norm": 301.6255187988281, + "learning_rate": 1.4940000000000002e-06, + "loss": 21.1947, + "step": 7470 + }, + { + "epoch": 0.015110073247494112, + "grad_norm": 528.6672973632812, + "learning_rate": 1.4960000000000002e-06, + "loss": 30.9436, + "step": 7480 + }, + { + "epoch": 0.015130273880177927, + "grad_norm": 429.56304931640625, + "learning_rate": 1.498e-06, + "loss": 43.7286, + "step": 7490 + }, + { + "epoch": 0.015150474512861743, + "grad_norm": 477.3526306152344, + "learning_rate": 1.5e-06, + "loss": 44.6997, + "step": 7500 + }, + { + "epoch": 0.015170675145545558, + "grad_norm": 694.835205078125, + "learning_rate": 1.5020000000000002e-06, + "loss": 33.5782, + "step": 7510 + }, + { + "epoch": 0.015190875778229375, + "grad_norm": 437.0287780761719, + "learning_rate": 1.5040000000000001e-06, + "loss": 14.402, + "step": 7520 + }, + { + "epoch": 0.01521107641091319, + "grad_norm": 432.5226135253906, + "learning_rate": 1.5060000000000003e-06, + "loss": 54.8615, + "step": 7530 + }, + { + "epoch": 0.015231277043597006, + "grad_norm": 739.1048583984375, + "learning_rate": 1.508e-06, + "loss": 28.0186, + "step": 7540 + }, + { + "epoch": 0.01525147767628082, + "grad_norm": 751.8126220703125, + "learning_rate": 1.5100000000000002e-06, + "loss": 14.3841, + "step": 7550 + }, + { + "epoch": 0.015271678308964637, + "grad_norm": 699.0371704101562, + "learning_rate": 1.512e-06, + "loss": 26.7135, + "step": 7560 + }, + { + "epoch": 0.015291878941648452, + "grad_norm": 580.2321166992188, + "learning_rate": 1.5140000000000002e-06, + "loss": 43.0058, + "step": 7570 + }, + { + "epoch": 0.015312079574332269, + "grad_norm": 953.4240112304688, + "learning_rate": 1.5160000000000002e-06, + "loss": 30.3746, + "step": 7580 + }, + { + "epoch": 0.015332280207016083, + "grad_norm": 498.05792236328125, + "learning_rate": 1.5180000000000001e-06, + "loss": 19.2205, + "step": 7590 + }, + { + "epoch": 0.0153524808396999, + "grad_norm": 510.3348083496094, + "learning_rate": 1.52e-06, + "loss": 15.5117, + "step": 7600 + }, + { + "epoch": 0.015372681472383715, + "grad_norm": 1451.5672607421875, + "learning_rate": 1.5220000000000002e-06, + "loss": 29.8484, + "step": 7610 + }, + { + "epoch": 0.015392882105067531, + "grad_norm": 402.60919189453125, + "learning_rate": 1.5240000000000001e-06, + "loss": 28.366, + "step": 7620 + }, + { + "epoch": 0.015413082737751346, + "grad_norm": 191.9103240966797, + "learning_rate": 1.5260000000000003e-06, + "loss": 35.3297, + "step": 7630 + }, + { + "epoch": 0.015433283370435162, + "grad_norm": 371.203125, + "learning_rate": 1.528e-06, + "loss": 10.1235, + "step": 7640 + }, + { + "epoch": 0.015453484003118977, + "grad_norm": 465.6565856933594, + "learning_rate": 1.5300000000000002e-06, + "loss": 21.8699, + "step": 7650 + }, + { + "epoch": 0.015473684635802794, + "grad_norm": 703.3648681640625, + "learning_rate": 1.5320000000000001e-06, + "loss": 32.0473, + "step": 7660 + }, + { + "epoch": 0.015493885268486609, + "grad_norm": 468.8616638183594, + "learning_rate": 1.5340000000000003e-06, + "loss": 21.0127, + "step": 7670 + }, + { + "epoch": 0.015514085901170425, + "grad_norm": 1880.29248046875, + "learning_rate": 1.536e-06, + "loss": 26.7551, + "step": 7680 + }, + { + "epoch": 0.01553428653385424, + "grad_norm": 274.2958068847656, + "learning_rate": 1.5380000000000001e-06, + "loss": 19.2041, + "step": 7690 + }, + { + "epoch": 0.015554487166538056, + "grad_norm": 539.3201904296875, + "learning_rate": 1.54e-06, + "loss": 32.8616, + "step": 7700 + }, + { + "epoch": 0.015574687799221871, + "grad_norm": 87.47315216064453, + "learning_rate": 1.5420000000000002e-06, + "loss": 22.3236, + "step": 7710 + }, + { + "epoch": 0.015594888431905688, + "grad_norm": 319.38226318359375, + "learning_rate": 1.5440000000000002e-06, + "loss": 30.9488, + "step": 7720 + }, + { + "epoch": 0.015615089064589503, + "grad_norm": 1636.626220703125, + "learning_rate": 1.546e-06, + "loss": 30.0025, + "step": 7730 + }, + { + "epoch": 0.01563528969727332, + "grad_norm": 128.85711669921875, + "learning_rate": 1.548e-06, + "loss": 15.6842, + "step": 7740 + }, + { + "epoch": 0.015655490329957136, + "grad_norm": 682.8628540039062, + "learning_rate": 1.5500000000000002e-06, + "loss": 14.3482, + "step": 7750 + }, + { + "epoch": 0.01567569096264095, + "grad_norm": 274.7837829589844, + "learning_rate": 1.5520000000000001e-06, + "loss": 26.8116, + "step": 7760 + }, + { + "epoch": 0.015695891595324765, + "grad_norm": 698.118408203125, + "learning_rate": 1.5540000000000003e-06, + "loss": 40.2702, + "step": 7770 + }, + { + "epoch": 0.01571609222800858, + "grad_norm": 505.4995422363281, + "learning_rate": 1.556e-06, + "loss": 19.0552, + "step": 7780 + }, + { + "epoch": 0.015736292860692398, + "grad_norm": 604.3401489257812, + "learning_rate": 1.5580000000000002e-06, + "loss": 25.9554, + "step": 7790 + }, + { + "epoch": 0.01575649349337621, + "grad_norm": 551.531494140625, + "learning_rate": 1.56e-06, + "loss": 31.4826, + "step": 7800 + }, + { + "epoch": 0.015776694126060028, + "grad_norm": 597.856689453125, + "learning_rate": 1.5620000000000002e-06, + "loss": 33.1883, + "step": 7810 + }, + { + "epoch": 0.015796894758743844, + "grad_norm": 255.5839385986328, + "learning_rate": 1.5640000000000002e-06, + "loss": 16.2388, + "step": 7820 + }, + { + "epoch": 0.01581709539142766, + "grad_norm": 638.1707763671875, + "learning_rate": 1.566e-06, + "loss": 12.3245, + "step": 7830 + }, + { + "epoch": 0.015837296024111474, + "grad_norm": 561.0966796875, + "learning_rate": 1.568e-06, + "loss": 34.3022, + "step": 7840 + }, + { + "epoch": 0.01585749665679529, + "grad_norm": 181.12254333496094, + "learning_rate": 1.5700000000000002e-06, + "loss": 14.8665, + "step": 7850 + }, + { + "epoch": 0.015877697289479107, + "grad_norm": 320.9978332519531, + "learning_rate": 1.5720000000000002e-06, + "loss": 40.0919, + "step": 7860 + }, + { + "epoch": 0.015897897922162924, + "grad_norm": 488.3760681152344, + "learning_rate": 1.5740000000000003e-06, + "loss": 23.0373, + "step": 7870 + }, + { + "epoch": 0.015918098554846737, + "grad_norm": 725.4952392578125, + "learning_rate": 1.576e-06, + "loss": 17.3022, + "step": 7880 + }, + { + "epoch": 0.015938299187530553, + "grad_norm": 807.7362060546875, + "learning_rate": 1.5780000000000002e-06, + "loss": 29.6214, + "step": 7890 + }, + { + "epoch": 0.01595849982021437, + "grad_norm": 402.11663818359375, + "learning_rate": 1.5800000000000001e-06, + "loss": 37.3078, + "step": 7900 + }, + { + "epoch": 0.015978700452898186, + "grad_norm": 416.4609680175781, + "learning_rate": 1.5820000000000003e-06, + "loss": 24.7349, + "step": 7910 + }, + { + "epoch": 0.015998901085582, + "grad_norm": 794.6650390625, + "learning_rate": 1.5840000000000002e-06, + "loss": 38.0387, + "step": 7920 + }, + { + "epoch": 0.016019101718265816, + "grad_norm": 593.4451293945312, + "learning_rate": 1.586e-06, + "loss": 23.7971, + "step": 7930 + }, + { + "epoch": 0.016039302350949632, + "grad_norm": 258.35321044921875, + "learning_rate": 1.588e-06, + "loss": 27.5747, + "step": 7940 + }, + { + "epoch": 0.01605950298363345, + "grad_norm": 773.8060302734375, + "learning_rate": 1.5900000000000002e-06, + "loss": 38.5026, + "step": 7950 + }, + { + "epoch": 0.016079703616317262, + "grad_norm": 408.9706115722656, + "learning_rate": 1.5920000000000002e-06, + "loss": 16.2559, + "step": 7960 + }, + { + "epoch": 0.01609990424900108, + "grad_norm": 763.1421508789062, + "learning_rate": 1.594e-06, + "loss": 29.3452, + "step": 7970 + }, + { + "epoch": 0.016120104881684895, + "grad_norm": 139.5187530517578, + "learning_rate": 1.596e-06, + "loss": 21.015, + "step": 7980 + }, + { + "epoch": 0.01614030551436871, + "grad_norm": 920.0736083984375, + "learning_rate": 1.5980000000000002e-06, + "loss": 27.6155, + "step": 7990 + }, + { + "epoch": 0.016160506147052525, + "grad_norm": 302.7886657714844, + "learning_rate": 1.6000000000000001e-06, + "loss": 38.6215, + "step": 8000 + }, + { + "epoch": 0.01618070677973634, + "grad_norm": 741.8783569335938, + "learning_rate": 1.6020000000000003e-06, + "loss": 37.2063, + "step": 8010 + }, + { + "epoch": 0.016200907412420158, + "grad_norm": 433.6229248046875, + "learning_rate": 1.604e-06, + "loss": 22.6892, + "step": 8020 + }, + { + "epoch": 0.016221108045103974, + "grad_norm": 453.72900390625, + "learning_rate": 1.606e-06, + "loss": 22.5018, + "step": 8030 + }, + { + "epoch": 0.016241308677787787, + "grad_norm": 1120.4896240234375, + "learning_rate": 1.608e-06, + "loss": 38.778, + "step": 8040 + }, + { + "epoch": 0.016261509310471604, + "grad_norm": 410.22259521484375, + "learning_rate": 1.6100000000000003e-06, + "loss": 43.0265, + "step": 8050 + }, + { + "epoch": 0.01628170994315542, + "grad_norm": 603.0123901367188, + "learning_rate": 1.6120000000000002e-06, + "loss": 26.0176, + "step": 8060 + }, + { + "epoch": 0.016301910575839237, + "grad_norm": 504.2850341796875, + "learning_rate": 1.614e-06, + "loss": 53.1275, + "step": 8070 + }, + { + "epoch": 0.01632211120852305, + "grad_norm": 294.2729797363281, + "learning_rate": 1.616e-06, + "loss": 33.1675, + "step": 8080 + }, + { + "epoch": 0.016342311841206866, + "grad_norm": 470.6175537109375, + "learning_rate": 1.6180000000000002e-06, + "loss": 14.7254, + "step": 8090 + }, + { + "epoch": 0.016362512473890683, + "grad_norm": 949.7745361328125, + "learning_rate": 1.6200000000000002e-06, + "loss": 35.2812, + "step": 8100 + }, + { + "epoch": 0.0163827131065745, + "grad_norm": 1021.1493530273438, + "learning_rate": 1.6220000000000003e-06, + "loss": 60.0575, + "step": 8110 + }, + { + "epoch": 0.016402913739258312, + "grad_norm": 210.9373779296875, + "learning_rate": 1.624e-06, + "loss": 29.2961, + "step": 8120 + }, + { + "epoch": 0.01642311437194213, + "grad_norm": 972.39404296875, + "learning_rate": 1.626e-06, + "loss": 31.1454, + "step": 8130 + }, + { + "epoch": 0.016443315004625945, + "grad_norm": 878.7973022460938, + "learning_rate": 1.6280000000000001e-06, + "loss": 26.4723, + "step": 8140 + }, + { + "epoch": 0.016463515637309762, + "grad_norm": 205.9188232421875, + "learning_rate": 1.6300000000000003e-06, + "loss": 27.026, + "step": 8150 + }, + { + "epoch": 0.016483716269993575, + "grad_norm": 527.09326171875, + "learning_rate": 1.6320000000000002e-06, + "loss": 18.5285, + "step": 8160 + }, + { + "epoch": 0.01650391690267739, + "grad_norm": 667.0870361328125, + "learning_rate": 1.634e-06, + "loss": 45.9654, + "step": 8170 + }, + { + "epoch": 0.016524117535361208, + "grad_norm": 418.41314697265625, + "learning_rate": 1.636e-06, + "loss": 36.5291, + "step": 8180 + }, + { + "epoch": 0.016544318168045025, + "grad_norm": 132.48226928710938, + "learning_rate": 1.6380000000000002e-06, + "loss": 18.3192, + "step": 8190 + }, + { + "epoch": 0.016564518800728838, + "grad_norm": 327.0763244628906, + "learning_rate": 1.6400000000000002e-06, + "loss": 30.456, + "step": 8200 + }, + { + "epoch": 0.016584719433412654, + "grad_norm": 439.4847106933594, + "learning_rate": 1.6420000000000003e-06, + "loss": 23.4486, + "step": 8210 + }, + { + "epoch": 0.01660492006609647, + "grad_norm": 508.11163330078125, + "learning_rate": 1.644e-06, + "loss": 23.7181, + "step": 8220 + }, + { + "epoch": 0.016625120698780287, + "grad_norm": 689.62646484375, + "learning_rate": 1.646e-06, + "loss": 39.5628, + "step": 8230 + }, + { + "epoch": 0.0166453213314641, + "grad_norm": 126.10978698730469, + "learning_rate": 1.6480000000000001e-06, + "loss": 32.6063, + "step": 8240 + }, + { + "epoch": 0.016665521964147917, + "grad_norm": 348.2756042480469, + "learning_rate": 1.6500000000000003e-06, + "loss": 19.646, + "step": 8250 + }, + { + "epoch": 0.016685722596831733, + "grad_norm": 159.87612915039062, + "learning_rate": 1.6520000000000002e-06, + "loss": 29.4104, + "step": 8260 + }, + { + "epoch": 0.01670592322951555, + "grad_norm": 137.435546875, + "learning_rate": 1.654e-06, + "loss": 22.5388, + "step": 8270 + }, + { + "epoch": 0.016726123862199363, + "grad_norm": 630.7896118164062, + "learning_rate": 1.6560000000000001e-06, + "loss": 22.1155, + "step": 8280 + }, + { + "epoch": 0.01674632449488318, + "grad_norm": 581.4238891601562, + "learning_rate": 1.6580000000000003e-06, + "loss": 46.8483, + "step": 8290 + }, + { + "epoch": 0.016766525127566996, + "grad_norm": 383.9845275878906, + "learning_rate": 1.6600000000000002e-06, + "loss": 36.6675, + "step": 8300 + }, + { + "epoch": 0.016786725760250813, + "grad_norm": 303.4525146484375, + "learning_rate": 1.662e-06, + "loss": 35.5097, + "step": 8310 + }, + { + "epoch": 0.016806926392934626, + "grad_norm": 214.48594665527344, + "learning_rate": 1.664e-06, + "loss": 25.3765, + "step": 8320 + }, + { + "epoch": 0.016827127025618442, + "grad_norm": 675.5895385742188, + "learning_rate": 1.666e-06, + "loss": 43.5899, + "step": 8330 + }, + { + "epoch": 0.01684732765830226, + "grad_norm": 477.797119140625, + "learning_rate": 1.6680000000000002e-06, + "loss": 33.3804, + "step": 8340 + }, + { + "epoch": 0.016867528290986075, + "grad_norm": 279.4515075683594, + "learning_rate": 1.6700000000000003e-06, + "loss": 32.5172, + "step": 8350 + }, + { + "epoch": 0.016887728923669888, + "grad_norm": 924.8307495117188, + "learning_rate": 1.672e-06, + "loss": 31.6212, + "step": 8360 + }, + { + "epoch": 0.016907929556353705, + "grad_norm": 195.5023956298828, + "learning_rate": 1.674e-06, + "loss": 24.5261, + "step": 8370 + }, + { + "epoch": 0.01692813018903752, + "grad_norm": 300.647705078125, + "learning_rate": 1.6760000000000001e-06, + "loss": 18.4933, + "step": 8380 + }, + { + "epoch": 0.016948330821721338, + "grad_norm": 199.65211486816406, + "learning_rate": 1.6780000000000003e-06, + "loss": 19.2608, + "step": 8390 + }, + { + "epoch": 0.01696853145440515, + "grad_norm": 353.1869812011719, + "learning_rate": 1.6800000000000002e-06, + "loss": 19.9096, + "step": 8400 + }, + { + "epoch": 0.016988732087088967, + "grad_norm": 342.279296875, + "learning_rate": 1.682e-06, + "loss": 22.6966, + "step": 8410 + }, + { + "epoch": 0.017008932719772784, + "grad_norm": 485.9510803222656, + "learning_rate": 1.684e-06, + "loss": 29.0974, + "step": 8420 + }, + { + "epoch": 0.0170291333524566, + "grad_norm": 492.27593994140625, + "learning_rate": 1.686e-06, + "loss": 12.7701, + "step": 8430 + }, + { + "epoch": 0.017049333985140414, + "grad_norm": 228.1291961669922, + "learning_rate": 1.6880000000000002e-06, + "loss": 25.0399, + "step": 8440 + }, + { + "epoch": 0.01706953461782423, + "grad_norm": 641.7828369140625, + "learning_rate": 1.6900000000000003e-06, + "loss": 41.747, + "step": 8450 + }, + { + "epoch": 0.017089735250508047, + "grad_norm": 469.2547302246094, + "learning_rate": 1.692e-06, + "loss": 28.9467, + "step": 8460 + }, + { + "epoch": 0.017109935883191863, + "grad_norm": 474.65185546875, + "learning_rate": 1.694e-06, + "loss": 43.8951, + "step": 8470 + }, + { + "epoch": 0.017130136515875676, + "grad_norm": 92.68016815185547, + "learning_rate": 1.6960000000000002e-06, + "loss": 21.1349, + "step": 8480 + }, + { + "epoch": 0.017150337148559493, + "grad_norm": 437.2278747558594, + "learning_rate": 1.6980000000000003e-06, + "loss": 45.1486, + "step": 8490 + }, + { + "epoch": 0.01717053778124331, + "grad_norm": 1121.7486572265625, + "learning_rate": 1.7000000000000002e-06, + "loss": 20.8305, + "step": 8500 + }, + { + "epoch": 0.017190738413927126, + "grad_norm": 991.1923217773438, + "learning_rate": 1.702e-06, + "loss": 32.8894, + "step": 8510 + }, + { + "epoch": 0.01721093904661094, + "grad_norm": 477.5975341796875, + "learning_rate": 1.7040000000000001e-06, + "loss": 24.5803, + "step": 8520 + }, + { + "epoch": 0.017231139679294755, + "grad_norm": 63.715213775634766, + "learning_rate": 1.706e-06, + "loss": 11.6225, + "step": 8530 + }, + { + "epoch": 0.017251340311978572, + "grad_norm": 919.2877197265625, + "learning_rate": 1.7080000000000002e-06, + "loss": 42.0231, + "step": 8540 + }, + { + "epoch": 0.01727154094466239, + "grad_norm": 2045.6112060546875, + "learning_rate": 1.7100000000000004e-06, + "loss": 51.4979, + "step": 8550 + }, + { + "epoch": 0.0172917415773462, + "grad_norm": 728.2069091796875, + "learning_rate": 1.712e-06, + "loss": 31.7765, + "step": 8560 + }, + { + "epoch": 0.017311942210030018, + "grad_norm": 1136.805419921875, + "learning_rate": 1.714e-06, + "loss": 15.2412, + "step": 8570 + }, + { + "epoch": 0.017332142842713835, + "grad_norm": 305.28338623046875, + "learning_rate": 1.7160000000000002e-06, + "loss": 11.1767, + "step": 8580 + }, + { + "epoch": 0.01735234347539765, + "grad_norm": 480.2168884277344, + "learning_rate": 1.7180000000000003e-06, + "loss": 17.7779, + "step": 8590 + }, + { + "epoch": 0.017372544108081464, + "grad_norm": 594.7160034179688, + "learning_rate": 1.72e-06, + "loss": 39.2654, + "step": 8600 + }, + { + "epoch": 0.01739274474076528, + "grad_norm": 713.5752563476562, + "learning_rate": 1.722e-06, + "loss": 42.5888, + "step": 8610 + }, + { + "epoch": 0.017412945373449097, + "grad_norm": 665.0403442382812, + "learning_rate": 1.7240000000000001e-06, + "loss": 31.9991, + "step": 8620 + }, + { + "epoch": 0.017433146006132914, + "grad_norm": 502.7206115722656, + "learning_rate": 1.726e-06, + "loss": 20.1092, + "step": 8630 + }, + { + "epoch": 0.017453346638816727, + "grad_norm": 610.836181640625, + "learning_rate": 1.7280000000000002e-06, + "loss": 29.8465, + "step": 8640 + }, + { + "epoch": 0.017473547271500543, + "grad_norm": 435.49249267578125, + "learning_rate": 1.73e-06, + "loss": 18.4336, + "step": 8650 + }, + { + "epoch": 0.01749374790418436, + "grad_norm": 337.9139709472656, + "learning_rate": 1.732e-06, + "loss": 27.6431, + "step": 8660 + }, + { + "epoch": 0.017513948536868176, + "grad_norm": 623.5963745117188, + "learning_rate": 1.734e-06, + "loss": 23.6477, + "step": 8670 + }, + { + "epoch": 0.01753414916955199, + "grad_norm": 474.98681640625, + "learning_rate": 1.7360000000000002e-06, + "loss": 39.8223, + "step": 8680 + }, + { + "epoch": 0.017554349802235806, + "grad_norm": 210.0931854248047, + "learning_rate": 1.7380000000000003e-06, + "loss": 26.6607, + "step": 8690 + }, + { + "epoch": 0.017574550434919622, + "grad_norm": 346.7015380859375, + "learning_rate": 1.74e-06, + "loss": 25.1463, + "step": 8700 + }, + { + "epoch": 0.01759475106760344, + "grad_norm": 705.1433715820312, + "learning_rate": 1.742e-06, + "loss": 35.5323, + "step": 8710 + }, + { + "epoch": 0.017614951700287252, + "grad_norm": 579.8189697265625, + "learning_rate": 1.7440000000000002e-06, + "loss": 11.3822, + "step": 8720 + }, + { + "epoch": 0.01763515233297107, + "grad_norm": 245.68829345703125, + "learning_rate": 1.746e-06, + "loss": 22.4856, + "step": 8730 + }, + { + "epoch": 0.017655352965654885, + "grad_norm": 226.94000244140625, + "learning_rate": 1.7480000000000002e-06, + "loss": 33.9477, + "step": 8740 + }, + { + "epoch": 0.0176755535983387, + "grad_norm": 240.374267578125, + "learning_rate": 1.75e-06, + "loss": 41.3515, + "step": 8750 + }, + { + "epoch": 0.017695754231022515, + "grad_norm": 460.3675842285156, + "learning_rate": 1.7520000000000001e-06, + "loss": 30.2187, + "step": 8760 + }, + { + "epoch": 0.01771595486370633, + "grad_norm": 442.6200866699219, + "learning_rate": 1.754e-06, + "loss": 20.0955, + "step": 8770 + }, + { + "epoch": 0.017736155496390148, + "grad_norm": 534.3021850585938, + "learning_rate": 1.7560000000000002e-06, + "loss": 15.8904, + "step": 8780 + }, + { + "epoch": 0.017756356129073964, + "grad_norm": 429.71075439453125, + "learning_rate": 1.7580000000000004e-06, + "loss": 28.6299, + "step": 8790 + }, + { + "epoch": 0.017776556761757777, + "grad_norm": 428.8110656738281, + "learning_rate": 1.76e-06, + "loss": 29.9164, + "step": 8800 + }, + { + "epoch": 0.017796757394441594, + "grad_norm": 297.4836730957031, + "learning_rate": 1.762e-06, + "loss": 29.891, + "step": 8810 + }, + { + "epoch": 0.01781695802712541, + "grad_norm": 514.8496704101562, + "learning_rate": 1.7640000000000002e-06, + "loss": 30.6904, + "step": 8820 + }, + { + "epoch": 0.017837158659809227, + "grad_norm": 668.380126953125, + "learning_rate": 1.7660000000000001e-06, + "loss": 18.5691, + "step": 8830 + }, + { + "epoch": 0.01785735929249304, + "grad_norm": 667.9898681640625, + "learning_rate": 1.7680000000000003e-06, + "loss": 21.9439, + "step": 8840 + }, + { + "epoch": 0.017877559925176856, + "grad_norm": 515.5223999023438, + "learning_rate": 1.77e-06, + "loss": 35.0883, + "step": 8850 + }, + { + "epoch": 0.017897760557860673, + "grad_norm": 456.4408874511719, + "learning_rate": 1.7720000000000001e-06, + "loss": 21.8389, + "step": 8860 + }, + { + "epoch": 0.01791796119054449, + "grad_norm": 457.3421936035156, + "learning_rate": 1.774e-06, + "loss": 30.5864, + "step": 8870 + }, + { + "epoch": 0.017938161823228303, + "grad_norm": 911.0169677734375, + "learning_rate": 1.7760000000000002e-06, + "loss": 21.5449, + "step": 8880 + }, + { + "epoch": 0.01795836245591212, + "grad_norm": 1107.2392578125, + "learning_rate": 1.7780000000000004e-06, + "loss": 31.0002, + "step": 8890 + }, + { + "epoch": 0.017978563088595936, + "grad_norm": 309.9970703125, + "learning_rate": 1.7800000000000001e-06, + "loss": 20.7838, + "step": 8900 + }, + { + "epoch": 0.017998763721279752, + "grad_norm": 525.5556030273438, + "learning_rate": 1.782e-06, + "loss": 21.8871, + "step": 8910 + }, + { + "epoch": 0.018018964353963565, + "grad_norm": 330.1131286621094, + "learning_rate": 1.7840000000000002e-06, + "loss": 33.9941, + "step": 8920 + }, + { + "epoch": 0.018039164986647382, + "grad_norm": 1103.0606689453125, + "learning_rate": 1.7860000000000001e-06, + "loss": 36.5735, + "step": 8930 + }, + { + "epoch": 0.0180593656193312, + "grad_norm": 505.9988708496094, + "learning_rate": 1.788e-06, + "loss": 17.7174, + "step": 8940 + }, + { + "epoch": 0.018079566252015015, + "grad_norm": 143.58360290527344, + "learning_rate": 1.79e-06, + "loss": 26.4376, + "step": 8950 + }, + { + "epoch": 0.018099766884698828, + "grad_norm": 1293.5728759765625, + "learning_rate": 1.7920000000000002e-06, + "loss": 38.0046, + "step": 8960 + }, + { + "epoch": 0.018119967517382644, + "grad_norm": 1032.229248046875, + "learning_rate": 1.794e-06, + "loss": 26.7437, + "step": 8970 + }, + { + "epoch": 0.01814016815006646, + "grad_norm": 393.4200744628906, + "learning_rate": 1.7960000000000003e-06, + "loss": 28.0399, + "step": 8980 + }, + { + "epoch": 0.018160368782750277, + "grad_norm": 1051.5867919921875, + "learning_rate": 1.798e-06, + "loss": 28.9257, + "step": 8990 + }, + { + "epoch": 0.01818056941543409, + "grad_norm": 266.2267150878906, + "learning_rate": 1.8000000000000001e-06, + "loss": 26.0235, + "step": 9000 + }, + { + "epoch": 0.018200770048117907, + "grad_norm": 613.0821533203125, + "learning_rate": 1.802e-06, + "loss": 31.5658, + "step": 9010 + }, + { + "epoch": 0.018220970680801724, + "grad_norm": 498.7320251464844, + "learning_rate": 1.8040000000000002e-06, + "loss": 23.0911, + "step": 9020 + }, + { + "epoch": 0.01824117131348554, + "grad_norm": 68.88175964355469, + "learning_rate": 1.8060000000000002e-06, + "loss": 20.3582, + "step": 9030 + }, + { + "epoch": 0.018261371946169353, + "grad_norm": 316.1387634277344, + "learning_rate": 1.808e-06, + "loss": 22.0128, + "step": 9040 + }, + { + "epoch": 0.01828157257885317, + "grad_norm": 616.5128784179688, + "learning_rate": 1.81e-06, + "loss": 17.5261, + "step": 9050 + }, + { + "epoch": 0.018301773211536986, + "grad_norm": 600.02197265625, + "learning_rate": 1.8120000000000002e-06, + "loss": 28.4692, + "step": 9060 + }, + { + "epoch": 0.018321973844220803, + "grad_norm": 619.6358642578125, + "learning_rate": 1.8140000000000001e-06, + "loss": 37.1422, + "step": 9070 + }, + { + "epoch": 0.018342174476904616, + "grad_norm": 122.20323944091797, + "learning_rate": 1.8160000000000003e-06, + "loss": 12.6013, + "step": 9080 + }, + { + "epoch": 0.018362375109588432, + "grad_norm": 474.7579650878906, + "learning_rate": 1.818e-06, + "loss": 38.2704, + "step": 9090 + }, + { + "epoch": 0.01838257574227225, + "grad_norm": 338.4971618652344, + "learning_rate": 1.8200000000000002e-06, + "loss": 49.1372, + "step": 9100 + }, + { + "epoch": 0.018402776374956065, + "grad_norm": 358.2091064453125, + "learning_rate": 1.822e-06, + "loss": 20.0141, + "step": 9110 + }, + { + "epoch": 0.01842297700763988, + "grad_norm": 779.68798828125, + "learning_rate": 1.8240000000000002e-06, + "loss": 45.5779, + "step": 9120 + }, + { + "epoch": 0.018443177640323695, + "grad_norm": 0.0, + "learning_rate": 1.8260000000000002e-06, + "loss": 20.6921, + "step": 9130 + }, + { + "epoch": 0.01846337827300751, + "grad_norm": 762.357177734375, + "learning_rate": 1.8280000000000001e-06, + "loss": 26.0861, + "step": 9140 + }, + { + "epoch": 0.018483578905691325, + "grad_norm": 776.2432250976562, + "learning_rate": 1.83e-06, + "loss": 44.6114, + "step": 9150 + }, + { + "epoch": 0.01850377953837514, + "grad_norm": 442.65899658203125, + "learning_rate": 1.8320000000000002e-06, + "loss": 19.8247, + "step": 9160 + }, + { + "epoch": 0.018523980171058958, + "grad_norm": 314.3350830078125, + "learning_rate": 1.8340000000000001e-06, + "loss": 23.1571, + "step": 9170 + }, + { + "epoch": 0.018544180803742774, + "grad_norm": 207.27491760253906, + "learning_rate": 1.8360000000000003e-06, + "loss": 26.3099, + "step": 9180 + }, + { + "epoch": 0.018564381436426587, + "grad_norm": 669.66845703125, + "learning_rate": 1.838e-06, + "loss": 28.7703, + "step": 9190 + }, + { + "epoch": 0.018584582069110404, + "grad_norm": 817.0938720703125, + "learning_rate": 1.8400000000000002e-06, + "loss": 40.8719, + "step": 9200 + }, + { + "epoch": 0.01860478270179422, + "grad_norm": 783.8439331054688, + "learning_rate": 1.8420000000000001e-06, + "loss": 30.5884, + "step": 9210 + }, + { + "epoch": 0.018624983334478037, + "grad_norm": 205.8710174560547, + "learning_rate": 1.8440000000000003e-06, + "loss": 17.3456, + "step": 9220 + }, + { + "epoch": 0.01864518396716185, + "grad_norm": 422.68719482421875, + "learning_rate": 1.846e-06, + "loss": 20.6496, + "step": 9230 + }, + { + "epoch": 0.018665384599845666, + "grad_norm": 21.8781795501709, + "learning_rate": 1.8480000000000001e-06, + "loss": 14.5447, + "step": 9240 + }, + { + "epoch": 0.018685585232529483, + "grad_norm": 801.7711791992188, + "learning_rate": 1.85e-06, + "loss": 23.7558, + "step": 9250 + }, + { + "epoch": 0.0187057858652133, + "grad_norm": 279.67120361328125, + "learning_rate": 1.8520000000000002e-06, + "loss": 15.6061, + "step": 9260 + }, + { + "epoch": 0.018725986497897112, + "grad_norm": 771.0630493164062, + "learning_rate": 1.8540000000000002e-06, + "loss": 17.1959, + "step": 9270 + }, + { + "epoch": 0.01874618713058093, + "grad_norm": 674.9140014648438, + "learning_rate": 1.856e-06, + "loss": 28.1603, + "step": 9280 + }, + { + "epoch": 0.018766387763264746, + "grad_norm": 703.7227172851562, + "learning_rate": 1.858e-06, + "loss": 31.4647, + "step": 9290 + }, + { + "epoch": 0.018786588395948562, + "grad_norm": 923.1442260742188, + "learning_rate": 1.8600000000000002e-06, + "loss": 27.7094, + "step": 9300 + }, + { + "epoch": 0.018806789028632375, + "grad_norm": 874.7780151367188, + "learning_rate": 1.8620000000000001e-06, + "loss": 40.6183, + "step": 9310 + }, + { + "epoch": 0.01882698966131619, + "grad_norm": 526.9776611328125, + "learning_rate": 1.8640000000000003e-06, + "loss": 31.6663, + "step": 9320 + }, + { + "epoch": 0.018847190294000008, + "grad_norm": 696.9180908203125, + "learning_rate": 1.866e-06, + "loss": 33.0455, + "step": 9330 + }, + { + "epoch": 0.018867390926683825, + "grad_norm": 287.4581298828125, + "learning_rate": 1.8680000000000002e-06, + "loss": 28.8934, + "step": 9340 + }, + { + "epoch": 0.018887591559367638, + "grad_norm": 158.7447967529297, + "learning_rate": 1.87e-06, + "loss": 11.3888, + "step": 9350 + }, + { + "epoch": 0.018907792192051454, + "grad_norm": 465.53643798828125, + "learning_rate": 1.8720000000000002e-06, + "loss": 20.2694, + "step": 9360 + }, + { + "epoch": 0.01892799282473527, + "grad_norm": 504.10064697265625, + "learning_rate": 1.8740000000000002e-06, + "loss": 36.4953, + "step": 9370 + }, + { + "epoch": 0.018948193457419087, + "grad_norm": 873.6696166992188, + "learning_rate": 1.8760000000000001e-06, + "loss": 16.1505, + "step": 9380 + }, + { + "epoch": 0.0189683940901029, + "grad_norm": 356.315673828125, + "learning_rate": 1.878e-06, + "loss": 15.7461, + "step": 9390 + }, + { + "epoch": 0.018988594722786717, + "grad_norm": 898.4686279296875, + "learning_rate": 1.8800000000000002e-06, + "loss": 46.1245, + "step": 9400 + }, + { + "epoch": 0.019008795355470533, + "grad_norm": 704.2838745117188, + "learning_rate": 1.8820000000000001e-06, + "loss": 39.3683, + "step": 9410 + }, + { + "epoch": 0.01902899598815435, + "grad_norm": 163.83099365234375, + "learning_rate": 1.8840000000000003e-06, + "loss": 20.4353, + "step": 9420 + }, + { + "epoch": 0.019049196620838163, + "grad_norm": 624.2174072265625, + "learning_rate": 1.886e-06, + "loss": 25.4072, + "step": 9430 + }, + { + "epoch": 0.01906939725352198, + "grad_norm": 2548.589599609375, + "learning_rate": 1.8880000000000002e-06, + "loss": 27.8204, + "step": 9440 + }, + { + "epoch": 0.019089597886205796, + "grad_norm": 302.43408203125, + "learning_rate": 1.8900000000000001e-06, + "loss": 17.2062, + "step": 9450 + }, + { + "epoch": 0.019109798518889613, + "grad_norm": 615.1204833984375, + "learning_rate": 1.8920000000000003e-06, + "loss": 28.6013, + "step": 9460 + }, + { + "epoch": 0.019129999151573426, + "grad_norm": 710.912109375, + "learning_rate": 1.8940000000000002e-06, + "loss": 29.2889, + "step": 9470 + }, + { + "epoch": 0.019150199784257242, + "grad_norm": 620.056884765625, + "learning_rate": 1.8960000000000001e-06, + "loss": 30.7066, + "step": 9480 + }, + { + "epoch": 0.01917040041694106, + "grad_norm": 858.3339233398438, + "learning_rate": 1.898e-06, + "loss": 34.2609, + "step": 9490 + }, + { + "epoch": 0.019190601049624875, + "grad_norm": 787.121826171875, + "learning_rate": 1.9000000000000002e-06, + "loss": 26.5648, + "step": 9500 + }, + { + "epoch": 0.01921080168230869, + "grad_norm": 248.5078582763672, + "learning_rate": 1.9020000000000002e-06, + "loss": 11.6471, + "step": 9510 + }, + { + "epoch": 0.019231002314992505, + "grad_norm": 395.26434326171875, + "learning_rate": 1.9040000000000003e-06, + "loss": 37.8632, + "step": 9520 + }, + { + "epoch": 0.01925120294767632, + "grad_norm": 353.8317565917969, + "learning_rate": 1.906e-06, + "loss": 36.6945, + "step": 9530 + }, + { + "epoch": 0.019271403580360138, + "grad_norm": 422.2906188964844, + "learning_rate": 1.908e-06, + "loss": 26.6615, + "step": 9540 + }, + { + "epoch": 0.01929160421304395, + "grad_norm": 740.6773681640625, + "learning_rate": 1.9100000000000003e-06, + "loss": 23.6173, + "step": 9550 + }, + { + "epoch": 0.019311804845727767, + "grad_norm": 691.2985229492188, + "learning_rate": 1.912e-06, + "loss": 26.8522, + "step": 9560 + }, + { + "epoch": 0.019332005478411584, + "grad_norm": 248.71804809570312, + "learning_rate": 1.9140000000000002e-06, + "loss": 16.9979, + "step": 9570 + }, + { + "epoch": 0.0193522061110954, + "grad_norm": 195.49794006347656, + "learning_rate": 1.916e-06, + "loss": 19.1804, + "step": 9580 + }, + { + "epoch": 0.019372406743779214, + "grad_norm": 0.0, + "learning_rate": 1.918e-06, + "loss": 26.4794, + "step": 9590 + }, + { + "epoch": 0.01939260737646303, + "grad_norm": 1275.6842041015625, + "learning_rate": 1.9200000000000003e-06, + "loss": 16.8437, + "step": 9600 + }, + { + "epoch": 0.019412808009146847, + "grad_norm": 1192.946533203125, + "learning_rate": 1.9220000000000004e-06, + "loss": 36.0156, + "step": 9610 + }, + { + "epoch": 0.019433008641830663, + "grad_norm": 599.123291015625, + "learning_rate": 1.924e-06, + "loss": 15.0497, + "step": 9620 + }, + { + "epoch": 0.019453209274514476, + "grad_norm": 386.57440185546875, + "learning_rate": 1.9260000000000003e-06, + "loss": 45.0766, + "step": 9630 + }, + { + "epoch": 0.019473409907198293, + "grad_norm": 687.6180419921875, + "learning_rate": 1.928e-06, + "loss": 17.8253, + "step": 9640 + }, + { + "epoch": 0.01949361053988211, + "grad_norm": 94.41118621826172, + "learning_rate": 1.93e-06, + "loss": 31.9916, + "step": 9650 + }, + { + "epoch": 0.019513811172565926, + "grad_norm": 939.4404296875, + "learning_rate": 1.9320000000000003e-06, + "loss": 28.7754, + "step": 9660 + }, + { + "epoch": 0.01953401180524974, + "grad_norm": 535.195556640625, + "learning_rate": 1.934e-06, + "loss": 18.8377, + "step": 9670 + }, + { + "epoch": 0.019554212437933555, + "grad_norm": 1218.1092529296875, + "learning_rate": 1.936e-06, + "loss": 19.8742, + "step": 9680 + }, + { + "epoch": 0.019574413070617372, + "grad_norm": 822.753173828125, + "learning_rate": 1.9380000000000003e-06, + "loss": 34.7596, + "step": 9690 + }, + { + "epoch": 0.01959461370330119, + "grad_norm": 553.1561279296875, + "learning_rate": 1.94e-06, + "loss": 39.8842, + "step": 9700 + }, + { + "epoch": 0.019614814335985, + "grad_norm": 689.9955444335938, + "learning_rate": 1.942e-06, + "loss": 16.5021, + "step": 9710 + }, + { + "epoch": 0.019635014968668818, + "grad_norm": 5.810490608215332, + "learning_rate": 1.944e-06, + "loss": 11.6419, + "step": 9720 + }, + { + "epoch": 0.019655215601352635, + "grad_norm": 772.7295532226562, + "learning_rate": 1.946e-06, + "loss": 28.1842, + "step": 9730 + }, + { + "epoch": 0.01967541623403645, + "grad_norm": 1233.431884765625, + "learning_rate": 1.9480000000000002e-06, + "loss": 39.9902, + "step": 9740 + }, + { + "epoch": 0.019695616866720264, + "grad_norm": 492.22119140625, + "learning_rate": 1.9500000000000004e-06, + "loss": 17.8986, + "step": 9750 + }, + { + "epoch": 0.01971581749940408, + "grad_norm": 244.38047790527344, + "learning_rate": 1.952e-06, + "loss": 25.3728, + "step": 9760 + }, + { + "epoch": 0.019736018132087897, + "grad_norm": 304.52117919921875, + "learning_rate": 1.9540000000000003e-06, + "loss": 28.4243, + "step": 9770 + }, + { + "epoch": 0.019756218764771714, + "grad_norm": 309.2403869628906, + "learning_rate": 1.956e-06, + "loss": 14.3713, + "step": 9780 + }, + { + "epoch": 0.019776419397455527, + "grad_norm": 609.8701171875, + "learning_rate": 1.958e-06, + "loss": 43.4776, + "step": 9790 + }, + { + "epoch": 0.019796620030139343, + "grad_norm": 296.7516174316406, + "learning_rate": 1.9600000000000003e-06, + "loss": 35.1687, + "step": 9800 + }, + { + "epoch": 0.01981682066282316, + "grad_norm": 873.6741943359375, + "learning_rate": 1.9620000000000004e-06, + "loss": 30.533, + "step": 9810 + }, + { + "epoch": 0.019837021295506976, + "grad_norm": 610.7356567382812, + "learning_rate": 1.964e-06, + "loss": 23.3563, + "step": 9820 + }, + { + "epoch": 0.01985722192819079, + "grad_norm": 465.5621643066406, + "learning_rate": 1.9660000000000003e-06, + "loss": 11.8833, + "step": 9830 + }, + { + "epoch": 0.019877422560874606, + "grad_norm": 169.4474334716797, + "learning_rate": 1.968e-06, + "loss": 19.3861, + "step": 9840 + }, + { + "epoch": 0.019897623193558422, + "grad_norm": 507.5028381347656, + "learning_rate": 1.97e-06, + "loss": 22.765, + "step": 9850 + }, + { + "epoch": 0.01991782382624224, + "grad_norm": 836.77587890625, + "learning_rate": 1.972e-06, + "loss": 40.2573, + "step": 9860 + }, + { + "epoch": 0.019938024458926052, + "grad_norm": 826.0131225585938, + "learning_rate": 1.974e-06, + "loss": 36.6529, + "step": 9870 + }, + { + "epoch": 0.01995822509160987, + "grad_norm": 577.0784912109375, + "learning_rate": 1.9760000000000002e-06, + "loss": 35.453, + "step": 9880 + }, + { + "epoch": 0.019978425724293685, + "grad_norm": 592.3372802734375, + "learning_rate": 1.9780000000000004e-06, + "loss": 26.5375, + "step": 9890 + }, + { + "epoch": 0.0199986263569775, + "grad_norm": 272.7134704589844, + "learning_rate": 1.98e-06, + "loss": 19.0487, + "step": 9900 + }, + { + "epoch": 0.020018826989661315, + "grad_norm": 732.1380004882812, + "learning_rate": 1.982e-06, + "loss": 13.1576, + "step": 9910 + }, + { + "epoch": 0.02003902762234513, + "grad_norm": 972.0679931640625, + "learning_rate": 1.984e-06, + "loss": 21.3812, + "step": 9920 + }, + { + "epoch": 0.020059228255028948, + "grad_norm": 374.1379089355469, + "learning_rate": 1.986e-06, + "loss": 27.0574, + "step": 9930 + }, + { + "epoch": 0.020079428887712764, + "grad_norm": 441.6177062988281, + "learning_rate": 1.9880000000000003e-06, + "loss": 43.1995, + "step": 9940 + }, + { + "epoch": 0.020099629520396577, + "grad_norm": 377.7627868652344, + "learning_rate": 1.9900000000000004e-06, + "loss": 27.2779, + "step": 9950 + }, + { + "epoch": 0.020119830153080394, + "grad_norm": 265.7279357910156, + "learning_rate": 1.992e-06, + "loss": 17.3237, + "step": 9960 + }, + { + "epoch": 0.02014003078576421, + "grad_norm": 997.96923828125, + "learning_rate": 1.9940000000000003e-06, + "loss": 34.881, + "step": 9970 + }, + { + "epoch": 0.020160231418448027, + "grad_norm": 457.5382080078125, + "learning_rate": 1.996e-06, + "loss": 28.9901, + "step": 9980 + }, + { + "epoch": 0.02018043205113184, + "grad_norm": 1992.1358642578125, + "learning_rate": 1.998e-06, + "loss": 48.9051, + "step": 9990 + }, + { + "epoch": 0.020200632683815656, + "grad_norm": 551.3404541015625, + "learning_rate": 2.0000000000000003e-06, + "loss": 20.4688, + "step": 10000 + }, + { + "epoch": 0.020220833316499473, + "grad_norm": 725.7215576171875, + "learning_rate": 2.002e-06, + "loss": 20.0534, + "step": 10010 + }, + { + "epoch": 0.02024103394918329, + "grad_norm": 341.03387451171875, + "learning_rate": 2.004e-06, + "loss": 40.7382, + "step": 10020 + }, + { + "epoch": 0.020261234581867103, + "grad_norm": 1009.0243530273438, + "learning_rate": 2.0060000000000004e-06, + "loss": 18.6586, + "step": 10030 + }, + { + "epoch": 0.02028143521455092, + "grad_norm": 393.10284423828125, + "learning_rate": 2.008e-06, + "loss": 17.7199, + "step": 10040 + }, + { + "epoch": 0.020301635847234736, + "grad_norm": 488.662353515625, + "learning_rate": 2.0100000000000002e-06, + "loss": 28.6032, + "step": 10050 + }, + { + "epoch": 0.020321836479918552, + "grad_norm": 770.6610717773438, + "learning_rate": 2.012e-06, + "loss": 35.9281, + "step": 10060 + }, + { + "epoch": 0.020342037112602365, + "grad_norm": 432.2557067871094, + "learning_rate": 2.014e-06, + "loss": 33.4646, + "step": 10070 + }, + { + "epoch": 0.020362237745286182, + "grad_norm": 854.4219360351562, + "learning_rate": 2.0160000000000003e-06, + "loss": 31.1678, + "step": 10080 + }, + { + "epoch": 0.02038243837797, + "grad_norm": 269.6304626464844, + "learning_rate": 2.0180000000000004e-06, + "loss": 13.6562, + "step": 10090 + }, + { + "epoch": 0.020402639010653815, + "grad_norm": 388.16375732421875, + "learning_rate": 2.02e-06, + "loss": 33.808, + "step": 10100 + }, + { + "epoch": 0.020422839643337628, + "grad_norm": 1145.10546875, + "learning_rate": 2.022e-06, + "loss": 39.483, + "step": 10110 + }, + { + "epoch": 0.020443040276021444, + "grad_norm": 0.0, + "learning_rate": 2.024e-06, + "loss": 29.5741, + "step": 10120 + }, + { + "epoch": 0.02046324090870526, + "grad_norm": 239.4766082763672, + "learning_rate": 2.026e-06, + "loss": 23.1077, + "step": 10130 + }, + { + "epoch": 0.020483441541389077, + "grad_norm": 693.35888671875, + "learning_rate": 2.0280000000000003e-06, + "loss": 19.3912, + "step": 10140 + }, + { + "epoch": 0.02050364217407289, + "grad_norm": 448.5995788574219, + "learning_rate": 2.0300000000000005e-06, + "loss": 37.3191, + "step": 10150 + }, + { + "epoch": 0.020523842806756707, + "grad_norm": 717.5645141601562, + "learning_rate": 2.032e-06, + "loss": 37.5115, + "step": 10160 + }, + { + "epoch": 0.020544043439440524, + "grad_norm": 232.80697631835938, + "learning_rate": 2.0340000000000003e-06, + "loss": 33.9487, + "step": 10170 + }, + { + "epoch": 0.02056424407212434, + "grad_norm": 231.29507446289062, + "learning_rate": 2.036e-06, + "loss": 31.3771, + "step": 10180 + }, + { + "epoch": 0.020584444704808153, + "grad_norm": 637.5938720703125, + "learning_rate": 2.0380000000000002e-06, + "loss": 30.6479, + "step": 10190 + }, + { + "epoch": 0.02060464533749197, + "grad_norm": 358.1338195800781, + "learning_rate": 2.04e-06, + "loss": 15.479, + "step": 10200 + }, + { + "epoch": 0.020624845970175786, + "grad_norm": 267.0400085449219, + "learning_rate": 2.042e-06, + "loss": 15.4971, + "step": 10210 + }, + { + "epoch": 0.020645046602859603, + "grad_norm": 526.010498046875, + "learning_rate": 2.0440000000000003e-06, + "loss": 45.7432, + "step": 10220 + }, + { + "epoch": 0.020665247235543416, + "grad_norm": 507.1458435058594, + "learning_rate": 2.0460000000000004e-06, + "loss": 12.3678, + "step": 10230 + }, + { + "epoch": 0.020685447868227232, + "grad_norm": 257.6839904785156, + "learning_rate": 2.048e-06, + "loss": 32.5355, + "step": 10240 + }, + { + "epoch": 0.02070564850091105, + "grad_norm": 573.19482421875, + "learning_rate": 2.05e-06, + "loss": 19.0711, + "step": 10250 + }, + { + "epoch": 0.020725849133594865, + "grad_norm": 686.1099243164062, + "learning_rate": 2.052e-06, + "loss": 20.8575, + "step": 10260 + }, + { + "epoch": 0.02074604976627868, + "grad_norm": 415.1260070800781, + "learning_rate": 2.054e-06, + "loss": 22.5357, + "step": 10270 + }, + { + "epoch": 0.020766250398962495, + "grad_norm": 484.9674377441406, + "learning_rate": 2.0560000000000003e-06, + "loss": 19.3595, + "step": 10280 + }, + { + "epoch": 0.02078645103164631, + "grad_norm": 957.904052734375, + "learning_rate": 2.0580000000000005e-06, + "loss": 19.152, + "step": 10290 + }, + { + "epoch": 0.020806651664330128, + "grad_norm": 580.3505859375, + "learning_rate": 2.06e-06, + "loss": 24.9238, + "step": 10300 + }, + { + "epoch": 0.02082685229701394, + "grad_norm": 253.50173950195312, + "learning_rate": 2.062e-06, + "loss": 19.7234, + "step": 10310 + }, + { + "epoch": 0.020847052929697758, + "grad_norm": 451.6479797363281, + "learning_rate": 2.064e-06, + "loss": 22.097, + "step": 10320 + }, + { + "epoch": 0.020867253562381574, + "grad_norm": 219.24774169921875, + "learning_rate": 2.066e-06, + "loss": 21.591, + "step": 10330 + }, + { + "epoch": 0.02088745419506539, + "grad_norm": 557.32958984375, + "learning_rate": 2.0680000000000004e-06, + "loss": 27.9227, + "step": 10340 + }, + { + "epoch": 0.020907654827749204, + "grad_norm": 778.280517578125, + "learning_rate": 2.07e-06, + "loss": 32.9145, + "step": 10350 + }, + { + "epoch": 0.02092785546043302, + "grad_norm": 685.9678344726562, + "learning_rate": 2.0720000000000002e-06, + "loss": 25.987, + "step": 10360 + }, + { + "epoch": 0.020948056093116837, + "grad_norm": 627.4102783203125, + "learning_rate": 2.0740000000000004e-06, + "loss": 37.0447, + "step": 10370 + }, + { + "epoch": 0.020968256725800653, + "grad_norm": 560.6248779296875, + "learning_rate": 2.076e-06, + "loss": 23.6747, + "step": 10380 + }, + { + "epoch": 0.020988457358484466, + "grad_norm": 563.7539672851562, + "learning_rate": 2.0780000000000003e-06, + "loss": 33.1649, + "step": 10390 + }, + { + "epoch": 0.021008657991168283, + "grad_norm": 498.6136474609375, + "learning_rate": 2.08e-06, + "loss": 26.6104, + "step": 10400 + }, + { + "epoch": 0.0210288586238521, + "grad_norm": 1427.0985107421875, + "learning_rate": 2.082e-06, + "loss": 21.6541, + "step": 10410 + }, + { + "epoch": 0.021049059256535916, + "grad_norm": 595.067138671875, + "learning_rate": 2.0840000000000003e-06, + "loss": 25.8228, + "step": 10420 + }, + { + "epoch": 0.02106925988921973, + "grad_norm": 0.0, + "learning_rate": 2.0860000000000004e-06, + "loss": 7.918, + "step": 10430 + }, + { + "epoch": 0.021089460521903546, + "grad_norm": 484.4460754394531, + "learning_rate": 2.088e-06, + "loss": 31.523, + "step": 10440 + }, + { + "epoch": 0.021109661154587362, + "grad_norm": 427.2276611328125, + "learning_rate": 2.09e-06, + "loss": 17.9014, + "step": 10450 + }, + { + "epoch": 0.02112986178727118, + "grad_norm": 231.73727416992188, + "learning_rate": 2.092e-06, + "loss": 37.3303, + "step": 10460 + }, + { + "epoch": 0.02115006241995499, + "grad_norm": 1066.0413818359375, + "learning_rate": 2.094e-06, + "loss": 39.7092, + "step": 10470 + }, + { + "epoch": 0.021170263052638808, + "grad_norm": 564.856689453125, + "learning_rate": 2.0960000000000003e-06, + "loss": 28.4659, + "step": 10480 + }, + { + "epoch": 0.021190463685322625, + "grad_norm": 888.2863159179688, + "learning_rate": 2.098e-06, + "loss": 21.1032, + "step": 10490 + }, + { + "epoch": 0.02121066431800644, + "grad_norm": 589.4425048828125, + "learning_rate": 2.1000000000000002e-06, + "loss": 46.9519, + "step": 10500 + }, + { + "epoch": 0.021230864950690254, + "grad_norm": 630.2435913085938, + "learning_rate": 2.102e-06, + "loss": 47.407, + "step": 10510 + }, + { + "epoch": 0.02125106558337407, + "grad_norm": 972.0075073242188, + "learning_rate": 2.104e-06, + "loss": 29.9414, + "step": 10520 + }, + { + "epoch": 0.021271266216057887, + "grad_norm": 317.7508850097656, + "learning_rate": 2.1060000000000002e-06, + "loss": 13.2156, + "step": 10530 + }, + { + "epoch": 0.021291466848741704, + "grad_norm": 616.036376953125, + "learning_rate": 2.108e-06, + "loss": 44.4217, + "step": 10540 + }, + { + "epoch": 0.021311667481425517, + "grad_norm": 833.261474609375, + "learning_rate": 2.11e-06, + "loss": 28.1556, + "step": 10550 + }, + { + "epoch": 0.021331868114109333, + "grad_norm": 382.53973388671875, + "learning_rate": 2.1120000000000003e-06, + "loss": 33.0207, + "step": 10560 + }, + { + "epoch": 0.02135206874679315, + "grad_norm": 557.8203125, + "learning_rate": 2.1140000000000004e-06, + "loss": 22.0744, + "step": 10570 + }, + { + "epoch": 0.021372269379476967, + "grad_norm": 1072.9012451171875, + "learning_rate": 2.116e-06, + "loss": 33.3419, + "step": 10580 + }, + { + "epoch": 0.02139247001216078, + "grad_norm": 883.43603515625, + "learning_rate": 2.118e-06, + "loss": 21.8559, + "step": 10590 + }, + { + "epoch": 0.021412670644844596, + "grad_norm": 466.87884521484375, + "learning_rate": 2.12e-06, + "loss": 32.9837, + "step": 10600 + }, + { + "epoch": 0.021432871277528413, + "grad_norm": 445.830078125, + "learning_rate": 2.122e-06, + "loss": 34.1535, + "step": 10610 + }, + { + "epoch": 0.02145307191021223, + "grad_norm": 915.6752319335938, + "learning_rate": 2.1240000000000003e-06, + "loss": 25.3785, + "step": 10620 + }, + { + "epoch": 0.021473272542896042, + "grad_norm": 135.380859375, + "learning_rate": 2.1260000000000005e-06, + "loss": 20.2444, + "step": 10630 + }, + { + "epoch": 0.02149347317557986, + "grad_norm": 963.3563842773438, + "learning_rate": 2.128e-06, + "loss": 24.7575, + "step": 10640 + }, + { + "epoch": 0.021513673808263675, + "grad_norm": 416.7027893066406, + "learning_rate": 2.13e-06, + "loss": 25.5095, + "step": 10650 + }, + { + "epoch": 0.021533874440947492, + "grad_norm": 443.62103271484375, + "learning_rate": 2.132e-06, + "loss": 36.6364, + "step": 10660 + }, + { + "epoch": 0.021554075073631305, + "grad_norm": 429.9660339355469, + "learning_rate": 2.1340000000000002e-06, + "loss": 24.7778, + "step": 10670 + }, + { + "epoch": 0.02157427570631512, + "grad_norm": 238.11019897460938, + "learning_rate": 2.1360000000000004e-06, + "loss": 23.2085, + "step": 10680 + }, + { + "epoch": 0.021594476338998938, + "grad_norm": 742.6220703125, + "learning_rate": 2.138e-06, + "loss": 29.7714, + "step": 10690 + }, + { + "epoch": 0.021614676971682754, + "grad_norm": 488.2852478027344, + "learning_rate": 2.1400000000000003e-06, + "loss": 22.6061, + "step": 10700 + }, + { + "epoch": 0.021634877604366567, + "grad_norm": 997.8129272460938, + "learning_rate": 2.142e-06, + "loss": 40.28, + "step": 10710 + }, + { + "epoch": 0.021655078237050384, + "grad_norm": 178.46737670898438, + "learning_rate": 2.144e-06, + "loss": 36.7309, + "step": 10720 + }, + { + "epoch": 0.0216752788697342, + "grad_norm": 1194.1746826171875, + "learning_rate": 2.1460000000000003e-06, + "loss": 27.8014, + "step": 10730 + }, + { + "epoch": 0.021695479502418017, + "grad_norm": 482.2351379394531, + "learning_rate": 2.148e-06, + "loss": 34.1063, + "step": 10740 + }, + { + "epoch": 0.02171568013510183, + "grad_norm": 520.8737182617188, + "learning_rate": 2.15e-06, + "loss": 30.059, + "step": 10750 + }, + { + "epoch": 0.021735880767785647, + "grad_norm": 334.5901794433594, + "learning_rate": 2.1520000000000003e-06, + "loss": 31.9883, + "step": 10760 + }, + { + "epoch": 0.021756081400469463, + "grad_norm": 340.0887451171875, + "learning_rate": 2.1540000000000005e-06, + "loss": 24.3153, + "step": 10770 + }, + { + "epoch": 0.02177628203315328, + "grad_norm": 569.4065551757812, + "learning_rate": 2.156e-06, + "loss": 34.2532, + "step": 10780 + }, + { + "epoch": 0.021796482665837093, + "grad_norm": 480.6165771484375, + "learning_rate": 2.158e-06, + "loss": 38.8058, + "step": 10790 + }, + { + "epoch": 0.02181668329852091, + "grad_norm": 992.14013671875, + "learning_rate": 2.16e-06, + "loss": 32.2644, + "step": 10800 + }, + { + "epoch": 0.021836883931204726, + "grad_norm": 267.1018981933594, + "learning_rate": 2.1620000000000002e-06, + "loss": 21.6186, + "step": 10810 + }, + { + "epoch": 0.021857084563888542, + "grad_norm": 423.60675048828125, + "learning_rate": 2.1640000000000004e-06, + "loss": 43.1586, + "step": 10820 + }, + { + "epoch": 0.021877285196572355, + "grad_norm": 331.4594421386719, + "learning_rate": 2.166e-06, + "loss": 21.7183, + "step": 10830 + }, + { + "epoch": 0.021897485829256172, + "grad_norm": 931.9136962890625, + "learning_rate": 2.1680000000000002e-06, + "loss": 40.9695, + "step": 10840 + }, + { + "epoch": 0.02191768646193999, + "grad_norm": 667.7154541015625, + "learning_rate": 2.17e-06, + "loss": 18.6281, + "step": 10850 + }, + { + "epoch": 0.021937887094623805, + "grad_norm": 1308.722900390625, + "learning_rate": 2.172e-06, + "loss": 36.8308, + "step": 10860 + }, + { + "epoch": 0.021958087727307618, + "grad_norm": 1057.197509765625, + "learning_rate": 2.1740000000000003e-06, + "loss": 27.0626, + "step": 10870 + }, + { + "epoch": 0.021978288359991435, + "grad_norm": 206.7808074951172, + "learning_rate": 2.176e-06, + "loss": 35.9422, + "step": 10880 + }, + { + "epoch": 0.02199848899267525, + "grad_norm": 162.13706970214844, + "learning_rate": 2.178e-06, + "loss": 40.699, + "step": 10890 + }, + { + "epoch": 0.022018689625359068, + "grad_norm": 902.00634765625, + "learning_rate": 2.1800000000000003e-06, + "loss": 57.6747, + "step": 10900 + }, + { + "epoch": 0.02203889025804288, + "grad_norm": 483.8782653808594, + "learning_rate": 2.182e-06, + "loss": 38.8847, + "step": 10910 + }, + { + "epoch": 0.022059090890726697, + "grad_norm": 536.849609375, + "learning_rate": 2.184e-06, + "loss": 32.1737, + "step": 10920 + }, + { + "epoch": 0.022079291523410514, + "grad_norm": 821.1912231445312, + "learning_rate": 2.186e-06, + "loss": 29.3968, + "step": 10930 + }, + { + "epoch": 0.02209949215609433, + "grad_norm": 963.5684204101562, + "learning_rate": 2.188e-06, + "loss": 17.3173, + "step": 10940 + }, + { + "epoch": 0.022119692788778143, + "grad_norm": 978.8640747070312, + "learning_rate": 2.19e-06, + "loss": 37.9862, + "step": 10950 + }, + { + "epoch": 0.02213989342146196, + "grad_norm": 797.7247924804688, + "learning_rate": 2.1920000000000004e-06, + "loss": 23.9345, + "step": 10960 + }, + { + "epoch": 0.022160094054145776, + "grad_norm": 209.15562438964844, + "learning_rate": 2.194e-06, + "loss": 31.8476, + "step": 10970 + }, + { + "epoch": 0.022180294686829593, + "grad_norm": 993.0297241210938, + "learning_rate": 2.1960000000000002e-06, + "loss": 27.8631, + "step": 10980 + }, + { + "epoch": 0.022200495319513406, + "grad_norm": 1262.285400390625, + "learning_rate": 2.198e-06, + "loss": 39.0367, + "step": 10990 + }, + { + "epoch": 0.022220695952197222, + "grad_norm": 957.9039916992188, + "learning_rate": 2.2e-06, + "loss": 28.4492, + "step": 11000 + }, + { + "epoch": 0.02224089658488104, + "grad_norm": 154.41236877441406, + "learning_rate": 2.2020000000000003e-06, + "loss": 15.0966, + "step": 11010 + }, + { + "epoch": 0.022261097217564856, + "grad_norm": 237.81320190429688, + "learning_rate": 2.2040000000000004e-06, + "loss": 28.2412, + "step": 11020 + }, + { + "epoch": 0.02228129785024867, + "grad_norm": 614.3018798828125, + "learning_rate": 2.206e-06, + "loss": 30.5096, + "step": 11030 + }, + { + "epoch": 0.022301498482932485, + "grad_norm": 548.9668579101562, + "learning_rate": 2.2080000000000003e-06, + "loss": 24.7881, + "step": 11040 + }, + { + "epoch": 0.0223216991156163, + "grad_norm": 600.4301147460938, + "learning_rate": 2.21e-06, + "loss": 19.2475, + "step": 11050 + }, + { + "epoch": 0.022341899748300118, + "grad_norm": 439.44921875, + "learning_rate": 2.212e-06, + "loss": 25.5505, + "step": 11060 + }, + { + "epoch": 0.02236210038098393, + "grad_norm": 764.2405395507812, + "learning_rate": 2.2140000000000003e-06, + "loss": 30.3772, + "step": 11070 + }, + { + "epoch": 0.022382301013667748, + "grad_norm": 1142.8809814453125, + "learning_rate": 2.216e-06, + "loss": 32.3536, + "step": 11080 + }, + { + "epoch": 0.022402501646351564, + "grad_norm": 686.0266723632812, + "learning_rate": 2.218e-06, + "loss": 43.1244, + "step": 11090 + }, + { + "epoch": 0.02242270227903538, + "grad_norm": 450.30145263671875, + "learning_rate": 2.2200000000000003e-06, + "loss": 25.1819, + "step": 11100 + }, + { + "epoch": 0.022442902911719194, + "grad_norm": 689.900390625, + "learning_rate": 2.222e-06, + "loss": 20.4868, + "step": 11110 + }, + { + "epoch": 0.02246310354440301, + "grad_norm": 380.3027648925781, + "learning_rate": 2.2240000000000002e-06, + "loss": 35.7335, + "step": 11120 + }, + { + "epoch": 0.022483304177086827, + "grad_norm": 398.4967956542969, + "learning_rate": 2.226e-06, + "loss": 14.332, + "step": 11130 + }, + { + "epoch": 0.022503504809770643, + "grad_norm": 377.4896240234375, + "learning_rate": 2.228e-06, + "loss": 9.5262, + "step": 11140 + }, + { + "epoch": 0.022523705442454457, + "grad_norm": 149.10630798339844, + "learning_rate": 2.2300000000000002e-06, + "loss": 19.6389, + "step": 11150 + }, + { + "epoch": 0.022543906075138273, + "grad_norm": 207.62815856933594, + "learning_rate": 2.2320000000000004e-06, + "loss": 38.4066, + "step": 11160 + }, + { + "epoch": 0.02256410670782209, + "grad_norm": 401.92633056640625, + "learning_rate": 2.234e-06, + "loss": 33.9303, + "step": 11170 + }, + { + "epoch": 0.022584307340505906, + "grad_norm": 344.4341125488281, + "learning_rate": 2.2360000000000003e-06, + "loss": 26.8016, + "step": 11180 + }, + { + "epoch": 0.02260450797318972, + "grad_norm": 724.0303955078125, + "learning_rate": 2.238e-06, + "loss": 23.6284, + "step": 11190 + }, + { + "epoch": 0.022624708605873536, + "grad_norm": 679.6116333007812, + "learning_rate": 2.24e-06, + "loss": 25.3203, + "step": 11200 + }, + { + "epoch": 0.022644909238557352, + "grad_norm": 656.9351806640625, + "learning_rate": 2.2420000000000003e-06, + "loss": 16.0256, + "step": 11210 + }, + { + "epoch": 0.02266510987124117, + "grad_norm": 1038.72314453125, + "learning_rate": 2.244e-06, + "loss": 33.1557, + "step": 11220 + }, + { + "epoch": 0.022685310503924982, + "grad_norm": 396.61016845703125, + "learning_rate": 2.246e-06, + "loss": 30.6392, + "step": 11230 + }, + { + "epoch": 0.0227055111366088, + "grad_norm": 1008.8660888671875, + "learning_rate": 2.2480000000000003e-06, + "loss": 32.2001, + "step": 11240 + }, + { + "epoch": 0.022725711769292615, + "grad_norm": 527.4194946289062, + "learning_rate": 2.25e-06, + "loss": 51.1306, + "step": 11250 + }, + { + "epoch": 0.02274591240197643, + "grad_norm": 780.5740356445312, + "learning_rate": 2.252e-06, + "loss": 15.5578, + "step": 11260 + }, + { + "epoch": 0.022766113034660244, + "grad_norm": 178.80038452148438, + "learning_rate": 2.254e-06, + "loss": 20.6808, + "step": 11270 + }, + { + "epoch": 0.02278631366734406, + "grad_norm": 151.0431365966797, + "learning_rate": 2.256e-06, + "loss": 28.8247, + "step": 11280 + }, + { + "epoch": 0.022806514300027877, + "grad_norm": 640.3284301757812, + "learning_rate": 2.2580000000000002e-06, + "loss": 30.0148, + "step": 11290 + }, + { + "epoch": 0.022826714932711694, + "grad_norm": 1466.9593505859375, + "learning_rate": 2.2600000000000004e-06, + "loss": 32.5548, + "step": 11300 + }, + { + "epoch": 0.022846915565395507, + "grad_norm": 141.0211639404297, + "learning_rate": 2.262e-06, + "loss": 24.7166, + "step": 11310 + }, + { + "epoch": 0.022867116198079324, + "grad_norm": 375.2640075683594, + "learning_rate": 2.2640000000000003e-06, + "loss": 33.2573, + "step": 11320 + }, + { + "epoch": 0.02288731683076314, + "grad_norm": 296.55914306640625, + "learning_rate": 2.266e-06, + "loss": 13.459, + "step": 11330 + }, + { + "epoch": 0.022907517463446957, + "grad_norm": 572.7025756835938, + "learning_rate": 2.268e-06, + "loss": 28.0534, + "step": 11340 + }, + { + "epoch": 0.02292771809613077, + "grad_norm": 154.33242797851562, + "learning_rate": 2.2700000000000003e-06, + "loss": 22.2405, + "step": 11350 + }, + { + "epoch": 0.022947918728814586, + "grad_norm": 109.69579315185547, + "learning_rate": 2.2720000000000004e-06, + "loss": 17.0903, + "step": 11360 + }, + { + "epoch": 0.022968119361498403, + "grad_norm": 554.3320922851562, + "learning_rate": 2.274e-06, + "loss": 31.1534, + "step": 11370 + }, + { + "epoch": 0.02298831999418222, + "grad_norm": 1792.9173583984375, + "learning_rate": 2.2760000000000003e-06, + "loss": 40.4314, + "step": 11380 + }, + { + "epoch": 0.023008520626866032, + "grad_norm": 451.7829895019531, + "learning_rate": 2.278e-06, + "loss": 32.3919, + "step": 11390 + }, + { + "epoch": 0.02302872125954985, + "grad_norm": 329.4390563964844, + "learning_rate": 2.28e-06, + "loss": 16.4796, + "step": 11400 + }, + { + "epoch": 0.023048921892233665, + "grad_norm": 1457.64208984375, + "learning_rate": 2.282e-06, + "loss": 33.6929, + "step": 11410 + }, + { + "epoch": 0.023069122524917482, + "grad_norm": 706.9736938476562, + "learning_rate": 2.284e-06, + "loss": 19.0316, + "step": 11420 + }, + { + "epoch": 0.023089323157601295, + "grad_norm": 41.44367218017578, + "learning_rate": 2.2860000000000002e-06, + "loss": 26.5766, + "step": 11430 + }, + { + "epoch": 0.02310952379028511, + "grad_norm": 243.57997131347656, + "learning_rate": 2.2880000000000004e-06, + "loss": 19.6041, + "step": 11440 + }, + { + "epoch": 0.023129724422968928, + "grad_norm": 826.9224853515625, + "learning_rate": 2.29e-06, + "loss": 36.845, + "step": 11450 + }, + { + "epoch": 0.023149925055652745, + "grad_norm": 426.7275695800781, + "learning_rate": 2.2920000000000002e-06, + "loss": 12.6838, + "step": 11460 + }, + { + "epoch": 0.023170125688336558, + "grad_norm": 825.5134887695312, + "learning_rate": 2.294e-06, + "loss": 31.3542, + "step": 11470 + }, + { + "epoch": 0.023190326321020374, + "grad_norm": 189.77012634277344, + "learning_rate": 2.296e-06, + "loss": 22.3825, + "step": 11480 + }, + { + "epoch": 0.02321052695370419, + "grad_norm": 769.2461547851562, + "learning_rate": 2.2980000000000003e-06, + "loss": 29.7142, + "step": 11490 + }, + { + "epoch": 0.023230727586388007, + "grad_norm": 485.4276428222656, + "learning_rate": 2.3000000000000004e-06, + "loss": 30.7081, + "step": 11500 + }, + { + "epoch": 0.02325092821907182, + "grad_norm": 922.8281860351562, + "learning_rate": 2.302e-06, + "loss": 41.9903, + "step": 11510 + }, + { + "epoch": 0.023271128851755637, + "grad_norm": 1233.290283203125, + "learning_rate": 2.3040000000000003e-06, + "loss": 47.3183, + "step": 11520 + }, + { + "epoch": 0.023291329484439453, + "grad_norm": 375.4042053222656, + "learning_rate": 2.306e-06, + "loss": 16.7128, + "step": 11530 + }, + { + "epoch": 0.02331153011712327, + "grad_norm": 698.8474731445312, + "learning_rate": 2.308e-06, + "loss": 30.0822, + "step": 11540 + }, + { + "epoch": 0.023331730749807083, + "grad_norm": 491.28009033203125, + "learning_rate": 2.3100000000000003e-06, + "loss": 20.6356, + "step": 11550 + }, + { + "epoch": 0.0233519313824909, + "grad_norm": 284.2083740234375, + "learning_rate": 2.312e-06, + "loss": 29.2223, + "step": 11560 + }, + { + "epoch": 0.023372132015174716, + "grad_norm": 423.8869323730469, + "learning_rate": 2.314e-06, + "loss": 26.4561, + "step": 11570 + }, + { + "epoch": 0.023392332647858533, + "grad_norm": 66.48220825195312, + "learning_rate": 2.3160000000000004e-06, + "loss": 40.7723, + "step": 11580 + }, + { + "epoch": 0.023412533280542346, + "grad_norm": 377.4560852050781, + "learning_rate": 2.318e-06, + "loss": 22.6986, + "step": 11590 + }, + { + "epoch": 0.023432733913226162, + "grad_norm": 300.7611999511719, + "learning_rate": 2.3200000000000002e-06, + "loss": 34.0606, + "step": 11600 + }, + { + "epoch": 0.02345293454590998, + "grad_norm": 524.640625, + "learning_rate": 2.322e-06, + "loss": 28.0742, + "step": 11610 + }, + { + "epoch": 0.023473135178593795, + "grad_norm": 772.0435180664062, + "learning_rate": 2.324e-06, + "loss": 19.5605, + "step": 11620 + }, + { + "epoch": 0.023493335811277608, + "grad_norm": 766.981201171875, + "learning_rate": 2.3260000000000003e-06, + "loss": 23.2606, + "step": 11630 + }, + { + "epoch": 0.023513536443961425, + "grad_norm": 902.1786499023438, + "learning_rate": 2.3280000000000004e-06, + "loss": 36.3294, + "step": 11640 + }, + { + "epoch": 0.02353373707664524, + "grad_norm": 569.1446533203125, + "learning_rate": 2.33e-06, + "loss": 23.0629, + "step": 11650 + }, + { + "epoch": 0.023553937709329058, + "grad_norm": 199.93630981445312, + "learning_rate": 2.3320000000000003e-06, + "loss": 16.678, + "step": 11660 + }, + { + "epoch": 0.02357413834201287, + "grad_norm": 188.4761505126953, + "learning_rate": 2.334e-06, + "loss": 37.7719, + "step": 11670 + }, + { + "epoch": 0.023594338974696687, + "grad_norm": 416.677001953125, + "learning_rate": 2.336e-06, + "loss": 29.8293, + "step": 11680 + }, + { + "epoch": 0.023614539607380504, + "grad_norm": 316.926513671875, + "learning_rate": 2.3380000000000003e-06, + "loss": 32.9385, + "step": 11690 + }, + { + "epoch": 0.02363474024006432, + "grad_norm": 644.300048828125, + "learning_rate": 2.3400000000000005e-06, + "loss": 57.1091, + "step": 11700 + }, + { + "epoch": 0.023654940872748133, + "grad_norm": 855.0596923828125, + "learning_rate": 2.342e-06, + "loss": 36.5547, + "step": 11710 + }, + { + "epoch": 0.02367514150543195, + "grad_norm": 955.8478393554688, + "learning_rate": 2.3440000000000003e-06, + "loss": 32.1896, + "step": 11720 + }, + { + "epoch": 0.023695342138115767, + "grad_norm": 567.6542358398438, + "learning_rate": 2.346e-06, + "loss": 21.2947, + "step": 11730 + }, + { + "epoch": 0.023715542770799583, + "grad_norm": 826.7879638671875, + "learning_rate": 2.3480000000000002e-06, + "loss": 24.1162, + "step": 11740 + }, + { + "epoch": 0.023735743403483396, + "grad_norm": 471.20343017578125, + "learning_rate": 2.35e-06, + "loss": 28.2968, + "step": 11750 + }, + { + "epoch": 0.023755944036167213, + "grad_norm": 60.918678283691406, + "learning_rate": 2.352e-06, + "loss": 22.3088, + "step": 11760 + }, + { + "epoch": 0.02377614466885103, + "grad_norm": 232.92462158203125, + "learning_rate": 2.3540000000000002e-06, + "loss": 24.5559, + "step": 11770 + }, + { + "epoch": 0.023796345301534846, + "grad_norm": 991.3208618164062, + "learning_rate": 2.3560000000000004e-06, + "loss": 36.239, + "step": 11780 + }, + { + "epoch": 0.02381654593421866, + "grad_norm": 2609.4541015625, + "learning_rate": 2.358e-06, + "loss": 54.3551, + "step": 11790 + }, + { + "epoch": 0.023836746566902475, + "grad_norm": 1228.8785400390625, + "learning_rate": 2.3600000000000003e-06, + "loss": 37.026, + "step": 11800 + }, + { + "epoch": 0.023856947199586292, + "grad_norm": 1025.142333984375, + "learning_rate": 2.362e-06, + "loss": 34.8462, + "step": 11810 + }, + { + "epoch": 0.02387714783227011, + "grad_norm": 550.499755859375, + "learning_rate": 2.364e-06, + "loss": 28.3715, + "step": 11820 + }, + { + "epoch": 0.02389734846495392, + "grad_norm": 510.32012939453125, + "learning_rate": 2.3660000000000003e-06, + "loss": 14.5547, + "step": 11830 + }, + { + "epoch": 0.023917549097637738, + "grad_norm": 844.2149658203125, + "learning_rate": 2.3680000000000005e-06, + "loss": 13.7765, + "step": 11840 + }, + { + "epoch": 0.023937749730321554, + "grad_norm": 225.61790466308594, + "learning_rate": 2.37e-06, + "loss": 20.3906, + "step": 11850 + }, + { + "epoch": 0.02395795036300537, + "grad_norm": 483.1874694824219, + "learning_rate": 2.3720000000000003e-06, + "loss": 22.1851, + "step": 11860 + }, + { + "epoch": 0.023978150995689184, + "grad_norm": 462.12750244140625, + "learning_rate": 2.374e-06, + "loss": 29.5714, + "step": 11870 + }, + { + "epoch": 0.023998351628373, + "grad_norm": 297.9912109375, + "learning_rate": 2.376e-06, + "loss": 16.9498, + "step": 11880 + }, + { + "epoch": 0.024018552261056817, + "grad_norm": 686.6878662109375, + "learning_rate": 2.3780000000000004e-06, + "loss": 35.8019, + "step": 11890 + }, + { + "epoch": 0.024038752893740634, + "grad_norm": 796.0972290039062, + "learning_rate": 2.38e-06, + "loss": 22.4394, + "step": 11900 + }, + { + "epoch": 0.024058953526424447, + "grad_norm": 1740.537841796875, + "learning_rate": 2.3820000000000002e-06, + "loss": 27.8693, + "step": 11910 + }, + { + "epoch": 0.024079154159108263, + "grad_norm": 504.74810791015625, + "learning_rate": 2.3840000000000004e-06, + "loss": 25.0214, + "step": 11920 + }, + { + "epoch": 0.02409935479179208, + "grad_norm": 456.0522766113281, + "learning_rate": 2.386e-06, + "loss": 40.904, + "step": 11930 + }, + { + "epoch": 0.024119555424475896, + "grad_norm": 195.16200256347656, + "learning_rate": 2.3880000000000003e-06, + "loss": 16.5891, + "step": 11940 + }, + { + "epoch": 0.02413975605715971, + "grad_norm": 634.192626953125, + "learning_rate": 2.39e-06, + "loss": 35.2282, + "step": 11950 + }, + { + "epoch": 0.024159956689843526, + "grad_norm": 404.48834228515625, + "learning_rate": 2.392e-06, + "loss": 20.9582, + "step": 11960 + }, + { + "epoch": 0.024180157322527342, + "grad_norm": 280.0358581542969, + "learning_rate": 2.3940000000000003e-06, + "loss": 12.3714, + "step": 11970 + }, + { + "epoch": 0.02420035795521116, + "grad_norm": 537.2835083007812, + "learning_rate": 2.3960000000000004e-06, + "loss": 13.5036, + "step": 11980 + }, + { + "epoch": 0.024220558587894972, + "grad_norm": 683.5922241210938, + "learning_rate": 2.398e-06, + "loss": 26.1619, + "step": 11990 + }, + { + "epoch": 0.02424075922057879, + "grad_norm": 380.0113220214844, + "learning_rate": 2.4000000000000003e-06, + "loss": 23.8011, + "step": 12000 + }, + { + "epoch": 0.024260959853262605, + "grad_norm": 320.9659118652344, + "learning_rate": 2.402e-06, + "loss": 26.1275, + "step": 12010 + }, + { + "epoch": 0.02428116048594642, + "grad_norm": 239.40464782714844, + "learning_rate": 2.404e-06, + "loss": 22.1058, + "step": 12020 + }, + { + "epoch": 0.024301361118630235, + "grad_norm": 488.48895263671875, + "learning_rate": 2.4060000000000003e-06, + "loss": 27.6829, + "step": 12030 + }, + { + "epoch": 0.02432156175131405, + "grad_norm": 677.6295776367188, + "learning_rate": 2.408e-06, + "loss": 26.9782, + "step": 12040 + }, + { + "epoch": 0.024341762383997868, + "grad_norm": 706.8380126953125, + "learning_rate": 2.4100000000000002e-06, + "loss": 36.5065, + "step": 12050 + }, + { + "epoch": 0.024361963016681684, + "grad_norm": 887.48486328125, + "learning_rate": 2.4120000000000004e-06, + "loss": 25.6709, + "step": 12060 + }, + { + "epoch": 0.024382163649365497, + "grad_norm": 584.556396484375, + "learning_rate": 2.414e-06, + "loss": 16.778, + "step": 12070 + }, + { + "epoch": 0.024402364282049314, + "grad_norm": 341.5478210449219, + "learning_rate": 2.4160000000000002e-06, + "loss": 20.7892, + "step": 12080 + }, + { + "epoch": 0.02442256491473313, + "grad_norm": 372.71002197265625, + "learning_rate": 2.418e-06, + "loss": 30.1106, + "step": 12090 + }, + { + "epoch": 0.024442765547416947, + "grad_norm": 443.1629333496094, + "learning_rate": 2.42e-06, + "loss": 18.8971, + "step": 12100 + }, + { + "epoch": 0.02446296618010076, + "grad_norm": 303.4381103515625, + "learning_rate": 2.4220000000000003e-06, + "loss": 28.22, + "step": 12110 + }, + { + "epoch": 0.024483166812784576, + "grad_norm": 246.98573303222656, + "learning_rate": 2.4240000000000004e-06, + "loss": 16.5033, + "step": 12120 + }, + { + "epoch": 0.024503367445468393, + "grad_norm": 21.39580535888672, + "learning_rate": 2.426e-06, + "loss": 35.4571, + "step": 12130 + }, + { + "epoch": 0.02452356807815221, + "grad_norm": 454.01171875, + "learning_rate": 2.428e-06, + "loss": 32.0765, + "step": 12140 + }, + { + "epoch": 0.024543768710836023, + "grad_norm": 529.3364868164062, + "learning_rate": 2.43e-06, + "loss": 23.7121, + "step": 12150 + }, + { + "epoch": 0.02456396934351984, + "grad_norm": 1050.9063720703125, + "learning_rate": 2.432e-06, + "loss": 19.5663, + "step": 12160 + }, + { + "epoch": 0.024584169976203656, + "grad_norm": 118.3066635131836, + "learning_rate": 2.4340000000000003e-06, + "loss": 34.4346, + "step": 12170 + }, + { + "epoch": 0.02460437060888747, + "grad_norm": 412.7197570800781, + "learning_rate": 2.4360000000000005e-06, + "loss": 20.1538, + "step": 12180 + }, + { + "epoch": 0.024624571241571285, + "grad_norm": 853.94384765625, + "learning_rate": 2.438e-06, + "loss": 36.14, + "step": 12190 + }, + { + "epoch": 0.0246447718742551, + "grad_norm": 529.248291015625, + "learning_rate": 2.4400000000000004e-06, + "loss": 37.7406, + "step": 12200 + }, + { + "epoch": 0.024664972506938918, + "grad_norm": 352.5622253417969, + "learning_rate": 2.442e-06, + "loss": 39.3457, + "step": 12210 + }, + { + "epoch": 0.02468517313962273, + "grad_norm": 284.78564453125, + "learning_rate": 2.4440000000000002e-06, + "loss": 22.3211, + "step": 12220 + }, + { + "epoch": 0.024705373772306548, + "grad_norm": 436.9090576171875, + "learning_rate": 2.4460000000000004e-06, + "loss": 71.9069, + "step": 12230 + }, + { + "epoch": 0.024725574404990364, + "grad_norm": 496.68157958984375, + "learning_rate": 2.448e-06, + "loss": 23.0423, + "step": 12240 + }, + { + "epoch": 0.02474577503767418, + "grad_norm": 768.3191528320312, + "learning_rate": 2.4500000000000003e-06, + "loss": 33.8757, + "step": 12250 + }, + { + "epoch": 0.024765975670357994, + "grad_norm": 665.5794067382812, + "learning_rate": 2.4520000000000004e-06, + "loss": 24.1933, + "step": 12260 + }, + { + "epoch": 0.02478617630304181, + "grad_norm": 678.7279663085938, + "learning_rate": 2.454e-06, + "loss": 23.873, + "step": 12270 + }, + { + "epoch": 0.024806376935725627, + "grad_norm": 600.2070922851562, + "learning_rate": 2.4560000000000003e-06, + "loss": 39.3005, + "step": 12280 + }, + { + "epoch": 0.024826577568409443, + "grad_norm": 462.02838134765625, + "learning_rate": 2.458e-06, + "loss": 40.3633, + "step": 12290 + }, + { + "epoch": 0.024846778201093257, + "grad_norm": 276.6780700683594, + "learning_rate": 2.46e-06, + "loss": 20.4624, + "step": 12300 + }, + { + "epoch": 0.024866978833777073, + "grad_norm": 259.2904357910156, + "learning_rate": 2.4620000000000003e-06, + "loss": 25.5969, + "step": 12310 + }, + { + "epoch": 0.02488717946646089, + "grad_norm": 259.7370910644531, + "learning_rate": 2.4640000000000005e-06, + "loss": 36.7475, + "step": 12320 + }, + { + "epoch": 0.024907380099144706, + "grad_norm": 374.19952392578125, + "learning_rate": 2.466e-06, + "loss": 35.6893, + "step": 12330 + }, + { + "epoch": 0.02492758073182852, + "grad_norm": 353.00543212890625, + "learning_rate": 2.468e-06, + "loss": 23.8052, + "step": 12340 + }, + { + "epoch": 0.024947781364512336, + "grad_norm": 1175.15771484375, + "learning_rate": 2.47e-06, + "loss": 28.6438, + "step": 12350 + }, + { + "epoch": 0.024967981997196152, + "grad_norm": 258.8357849121094, + "learning_rate": 2.4720000000000002e-06, + "loss": 23.1932, + "step": 12360 + }, + { + "epoch": 0.02498818262987997, + "grad_norm": 804.6754150390625, + "learning_rate": 2.4740000000000004e-06, + "loss": 34.3967, + "step": 12370 + }, + { + "epoch": 0.025008383262563782, + "grad_norm": 720.6771850585938, + "learning_rate": 2.476e-06, + "loss": 20.0733, + "step": 12380 + }, + { + "epoch": 0.0250285838952476, + "grad_norm": 1006.309326171875, + "learning_rate": 2.4780000000000002e-06, + "loss": 25.1847, + "step": 12390 + }, + { + "epoch": 0.025048784527931415, + "grad_norm": 500.6513366699219, + "learning_rate": 2.4800000000000004e-06, + "loss": 42.6183, + "step": 12400 + }, + { + "epoch": 0.02506898516061523, + "grad_norm": 500.9101867675781, + "learning_rate": 2.482e-06, + "loss": 19.1311, + "step": 12410 + }, + { + "epoch": 0.025089185793299044, + "grad_norm": 4534.8544921875, + "learning_rate": 2.4840000000000003e-06, + "loss": 39.3053, + "step": 12420 + }, + { + "epoch": 0.02510938642598286, + "grad_norm": 861.6122436523438, + "learning_rate": 2.486e-06, + "loss": 37.6291, + "step": 12430 + }, + { + "epoch": 0.025129587058666678, + "grad_norm": 218.59344482421875, + "learning_rate": 2.488e-06, + "loss": 23.8732, + "step": 12440 + }, + { + "epoch": 0.025149787691350494, + "grad_norm": 515.483642578125, + "learning_rate": 2.4900000000000003e-06, + "loss": 25.6926, + "step": 12450 + }, + { + "epoch": 0.025169988324034307, + "grad_norm": 160.76766967773438, + "learning_rate": 2.4920000000000005e-06, + "loss": 26.8572, + "step": 12460 + }, + { + "epoch": 0.025190188956718124, + "grad_norm": 256.6075744628906, + "learning_rate": 2.494e-06, + "loss": 24.3597, + "step": 12470 + }, + { + "epoch": 0.02521038958940194, + "grad_norm": 380.9504699707031, + "learning_rate": 2.496e-06, + "loss": 22.8597, + "step": 12480 + }, + { + "epoch": 0.025230590222085757, + "grad_norm": 1375.69189453125, + "learning_rate": 2.498e-06, + "loss": 41.9291, + "step": 12490 + }, + { + "epoch": 0.02525079085476957, + "grad_norm": 86.29317474365234, + "learning_rate": 2.5e-06, + "loss": 38.4579, + "step": 12500 + }, + { + "epoch": 0.025270991487453386, + "grad_norm": 166.09983825683594, + "learning_rate": 2.502e-06, + "loss": 18.8884, + "step": 12510 + }, + { + "epoch": 0.025291192120137203, + "grad_norm": 528.3165283203125, + "learning_rate": 2.5040000000000005e-06, + "loss": 37.1205, + "step": 12520 + }, + { + "epoch": 0.02531139275282102, + "grad_norm": 620.2875366210938, + "learning_rate": 2.5060000000000002e-06, + "loss": 43.7914, + "step": 12530 + }, + { + "epoch": 0.025331593385504832, + "grad_norm": 1003.9613037109375, + "learning_rate": 2.5080000000000004e-06, + "loss": 39.7105, + "step": 12540 + }, + { + "epoch": 0.02535179401818865, + "grad_norm": 1062.6812744140625, + "learning_rate": 2.51e-06, + "loss": 30.7475, + "step": 12550 + }, + { + "epoch": 0.025371994650872465, + "grad_norm": 968.7026977539062, + "learning_rate": 2.512e-06, + "loss": 29.899, + "step": 12560 + }, + { + "epoch": 0.025392195283556282, + "grad_norm": 599.2593383789062, + "learning_rate": 2.5140000000000004e-06, + "loss": 15.0186, + "step": 12570 + }, + { + "epoch": 0.025412395916240095, + "grad_norm": 342.73529052734375, + "learning_rate": 2.516e-06, + "loss": 16.4279, + "step": 12580 + }, + { + "epoch": 0.02543259654892391, + "grad_norm": 2192.99462890625, + "learning_rate": 2.5180000000000003e-06, + "loss": 47.0873, + "step": 12590 + }, + { + "epoch": 0.025452797181607728, + "grad_norm": 663.18896484375, + "learning_rate": 2.52e-06, + "loss": 30.0429, + "step": 12600 + }, + { + "epoch": 0.025472997814291545, + "grad_norm": 222.49244689941406, + "learning_rate": 2.522e-06, + "loss": 22.5173, + "step": 12610 + }, + { + "epoch": 0.025493198446975358, + "grad_norm": 169.51467895507812, + "learning_rate": 2.5240000000000003e-06, + "loss": 23.6477, + "step": 12620 + }, + { + "epoch": 0.025513399079659174, + "grad_norm": 774.110595703125, + "learning_rate": 2.526e-06, + "loss": 21.803, + "step": 12630 + }, + { + "epoch": 0.02553359971234299, + "grad_norm": 936.8486328125, + "learning_rate": 2.5280000000000006e-06, + "loss": 40.9788, + "step": 12640 + }, + { + "epoch": 0.025553800345026807, + "grad_norm": 490.3465270996094, + "learning_rate": 2.5300000000000003e-06, + "loss": 38.5848, + "step": 12650 + }, + { + "epoch": 0.02557400097771062, + "grad_norm": 962.7054443359375, + "learning_rate": 2.532e-06, + "loss": 29.9581, + "step": 12660 + }, + { + "epoch": 0.025594201610394437, + "grad_norm": 298.3330383300781, + "learning_rate": 2.5340000000000002e-06, + "loss": 21.7127, + "step": 12670 + }, + { + "epoch": 0.025614402243078253, + "grad_norm": 382.0990905761719, + "learning_rate": 2.536e-06, + "loss": 36.149, + "step": 12680 + }, + { + "epoch": 0.02563460287576207, + "grad_norm": 657.50390625, + "learning_rate": 2.5380000000000005e-06, + "loss": 32.5378, + "step": 12690 + }, + { + "epoch": 0.025654803508445883, + "grad_norm": 448.74298095703125, + "learning_rate": 2.5400000000000002e-06, + "loss": 30.5162, + "step": 12700 + }, + { + "epoch": 0.0256750041411297, + "grad_norm": 600.510498046875, + "learning_rate": 2.542e-06, + "loss": 25.7998, + "step": 12710 + }, + { + "epoch": 0.025695204773813516, + "grad_norm": 712.5263061523438, + "learning_rate": 2.5440000000000005e-06, + "loss": 31.6482, + "step": 12720 + }, + { + "epoch": 0.025715405406497333, + "grad_norm": 610.2012939453125, + "learning_rate": 2.5460000000000003e-06, + "loss": 22.5416, + "step": 12730 + }, + { + "epoch": 0.025735606039181146, + "grad_norm": 494.62847900390625, + "learning_rate": 2.5480000000000004e-06, + "loss": 26.9063, + "step": 12740 + }, + { + "epoch": 0.025755806671864962, + "grad_norm": 426.4195556640625, + "learning_rate": 2.55e-06, + "loss": 36.5014, + "step": 12750 + }, + { + "epoch": 0.02577600730454878, + "grad_norm": 796.3436889648438, + "learning_rate": 2.552e-06, + "loss": 29.9939, + "step": 12760 + }, + { + "epoch": 0.025796207937232595, + "grad_norm": 763.7683715820312, + "learning_rate": 2.5540000000000004e-06, + "loss": 46.7902, + "step": 12770 + }, + { + "epoch": 0.025816408569916408, + "grad_norm": 1770.569580078125, + "learning_rate": 2.556e-06, + "loss": 42.3702, + "step": 12780 + }, + { + "epoch": 0.025836609202600225, + "grad_norm": 337.1780090332031, + "learning_rate": 2.5580000000000003e-06, + "loss": 27.2789, + "step": 12790 + }, + { + "epoch": 0.02585680983528404, + "grad_norm": 154.74667358398438, + "learning_rate": 2.56e-06, + "loss": 13.85, + "step": 12800 + }, + { + "epoch": 0.025877010467967858, + "grad_norm": 567.01416015625, + "learning_rate": 2.562e-06, + "loss": 30.453, + "step": 12810 + }, + { + "epoch": 0.02589721110065167, + "grad_norm": 228.02456665039062, + "learning_rate": 2.5640000000000004e-06, + "loss": 17.5226, + "step": 12820 + }, + { + "epoch": 0.025917411733335487, + "grad_norm": 331.27764892578125, + "learning_rate": 2.566e-06, + "loss": 20.7665, + "step": 12830 + }, + { + "epoch": 0.025937612366019304, + "grad_norm": 730.0822143554688, + "learning_rate": 2.568e-06, + "loss": 22.1979, + "step": 12840 + }, + { + "epoch": 0.02595781299870312, + "grad_norm": 770.920654296875, + "learning_rate": 2.5700000000000004e-06, + "loss": 25.3876, + "step": 12850 + }, + { + "epoch": 0.025978013631386934, + "grad_norm": 241.62933349609375, + "learning_rate": 2.572e-06, + "loss": 31.7164, + "step": 12860 + }, + { + "epoch": 0.02599821426407075, + "grad_norm": 306.0582580566406, + "learning_rate": 2.5740000000000003e-06, + "loss": 34.9088, + "step": 12870 + }, + { + "epoch": 0.026018414896754567, + "grad_norm": 659.4343872070312, + "learning_rate": 2.576e-06, + "loss": 37.9334, + "step": 12880 + }, + { + "epoch": 0.026038615529438383, + "grad_norm": 1271.260986328125, + "learning_rate": 2.578e-06, + "loss": 43.3547, + "step": 12890 + }, + { + "epoch": 0.026058816162122196, + "grad_norm": 942.1395263671875, + "learning_rate": 2.5800000000000003e-06, + "loss": 26.849, + "step": 12900 + }, + { + "epoch": 0.026079016794806013, + "grad_norm": 775.384521484375, + "learning_rate": 2.582e-06, + "loss": 18.4199, + "step": 12910 + }, + { + "epoch": 0.02609921742748983, + "grad_norm": 668.6947631835938, + "learning_rate": 2.5840000000000006e-06, + "loss": 25.0121, + "step": 12920 + }, + { + "epoch": 0.026119418060173646, + "grad_norm": 288.4560546875, + "learning_rate": 2.5860000000000003e-06, + "loss": 28.1159, + "step": 12930 + }, + { + "epoch": 0.02613961869285746, + "grad_norm": 372.0718688964844, + "learning_rate": 2.588e-06, + "loss": 17.4488, + "step": 12940 + }, + { + "epoch": 0.026159819325541275, + "grad_norm": 392.7027893066406, + "learning_rate": 2.59e-06, + "loss": 35.8984, + "step": 12950 + }, + { + "epoch": 0.026180019958225092, + "grad_norm": 491.2992248535156, + "learning_rate": 2.592e-06, + "loss": 29.4578, + "step": 12960 + }, + { + "epoch": 0.02620022059090891, + "grad_norm": 556.0179443359375, + "learning_rate": 2.5940000000000005e-06, + "loss": 27.1833, + "step": 12970 + }, + { + "epoch": 0.02622042122359272, + "grad_norm": 505.3508605957031, + "learning_rate": 2.5960000000000002e-06, + "loss": 21.2242, + "step": 12980 + }, + { + "epoch": 0.026240621856276538, + "grad_norm": 477.8688049316406, + "learning_rate": 2.598e-06, + "loss": 32.3539, + "step": 12990 + }, + { + "epoch": 0.026260822488960354, + "grad_norm": 437.2344970703125, + "learning_rate": 2.6e-06, + "loss": 31.2965, + "step": 13000 + }, + { + "epoch": 0.02628102312164417, + "grad_norm": 441.76470947265625, + "learning_rate": 2.6020000000000002e-06, + "loss": 31.3963, + "step": 13010 + }, + { + "epoch": 0.026301223754327984, + "grad_norm": 348.7921142578125, + "learning_rate": 2.6040000000000004e-06, + "loss": 26.3613, + "step": 13020 + }, + { + "epoch": 0.0263214243870118, + "grad_norm": 863.64111328125, + "learning_rate": 2.606e-06, + "loss": 35.6339, + "step": 13030 + }, + { + "epoch": 0.026341625019695617, + "grad_norm": 506.5108947753906, + "learning_rate": 2.608e-06, + "loss": 23.0733, + "step": 13040 + }, + { + "epoch": 0.026361825652379434, + "grad_norm": 802.7584228515625, + "learning_rate": 2.6100000000000004e-06, + "loss": 41.8787, + "step": 13050 + }, + { + "epoch": 0.026382026285063247, + "grad_norm": 1173.11279296875, + "learning_rate": 2.612e-06, + "loss": 32.2501, + "step": 13060 + }, + { + "epoch": 0.026402226917747063, + "grad_norm": 412.14056396484375, + "learning_rate": 2.6140000000000003e-06, + "loss": 23.2096, + "step": 13070 + }, + { + "epoch": 0.02642242755043088, + "grad_norm": 631.819091796875, + "learning_rate": 2.616e-06, + "loss": 36.8238, + "step": 13080 + }, + { + "epoch": 0.026442628183114696, + "grad_norm": 1280.29345703125, + "learning_rate": 2.618e-06, + "loss": 19.2417, + "step": 13090 + }, + { + "epoch": 0.02646282881579851, + "grad_norm": 919.0343017578125, + "learning_rate": 2.6200000000000003e-06, + "loss": 28.6367, + "step": 13100 + }, + { + "epoch": 0.026483029448482326, + "grad_norm": 461.51678466796875, + "learning_rate": 2.622e-06, + "loss": 27.8108, + "step": 13110 + }, + { + "epoch": 0.026503230081166142, + "grad_norm": 352.7019348144531, + "learning_rate": 2.6240000000000006e-06, + "loss": 24.0391, + "step": 13120 + }, + { + "epoch": 0.02652343071384996, + "grad_norm": 114.39356994628906, + "learning_rate": 2.6260000000000004e-06, + "loss": 17.8535, + "step": 13130 + }, + { + "epoch": 0.026543631346533772, + "grad_norm": 487.0043029785156, + "learning_rate": 2.628e-06, + "loss": 29.1434, + "step": 13140 + }, + { + "epoch": 0.02656383197921759, + "grad_norm": 837.979736328125, + "learning_rate": 2.6300000000000002e-06, + "loss": 36.1627, + "step": 13150 + }, + { + "epoch": 0.026584032611901405, + "grad_norm": 575.2749633789062, + "learning_rate": 2.632e-06, + "loss": 22.1858, + "step": 13160 + }, + { + "epoch": 0.02660423324458522, + "grad_norm": 407.6467590332031, + "learning_rate": 2.6340000000000005e-06, + "loss": 22.8118, + "step": 13170 + }, + { + "epoch": 0.026624433877269035, + "grad_norm": 386.979736328125, + "learning_rate": 2.6360000000000003e-06, + "loss": 17.3379, + "step": 13180 + }, + { + "epoch": 0.02664463450995285, + "grad_norm": 484.42340087890625, + "learning_rate": 2.638e-06, + "loss": 26.2426, + "step": 13190 + }, + { + "epoch": 0.026664835142636668, + "grad_norm": 538.97998046875, + "learning_rate": 2.64e-06, + "loss": 33.0226, + "step": 13200 + }, + { + "epoch": 0.026685035775320484, + "grad_norm": 1009.0739135742188, + "learning_rate": 2.6420000000000003e-06, + "loss": 29.435, + "step": 13210 + }, + { + "epoch": 0.026705236408004297, + "grad_norm": 333.44110107421875, + "learning_rate": 2.6440000000000004e-06, + "loss": 32.6955, + "step": 13220 + }, + { + "epoch": 0.026725437040688114, + "grad_norm": 1260.854248046875, + "learning_rate": 2.646e-06, + "loss": 15.8343, + "step": 13230 + }, + { + "epoch": 0.02674563767337193, + "grad_norm": 466.25958251953125, + "learning_rate": 2.648e-06, + "loss": 30.9633, + "step": 13240 + }, + { + "epoch": 0.026765838306055747, + "grad_norm": 1081.712646484375, + "learning_rate": 2.6500000000000005e-06, + "loss": 46.0157, + "step": 13250 + }, + { + "epoch": 0.02678603893873956, + "grad_norm": 134.25177001953125, + "learning_rate": 2.652e-06, + "loss": 33.3005, + "step": 13260 + }, + { + "epoch": 0.026806239571423376, + "grad_norm": 711.9703979492188, + "learning_rate": 2.6540000000000003e-06, + "loss": 24.0814, + "step": 13270 + }, + { + "epoch": 0.026826440204107193, + "grad_norm": 453.7552490234375, + "learning_rate": 2.656e-06, + "loss": 20.5775, + "step": 13280 + }, + { + "epoch": 0.02684664083679101, + "grad_norm": 1155.71435546875, + "learning_rate": 2.6580000000000002e-06, + "loss": 24.0799, + "step": 13290 + }, + { + "epoch": 0.026866841469474823, + "grad_norm": 453.0130615234375, + "learning_rate": 2.6600000000000004e-06, + "loss": 17.9642, + "step": 13300 + }, + { + "epoch": 0.02688704210215864, + "grad_norm": 157.45489501953125, + "learning_rate": 2.662e-06, + "loss": 15.6941, + "step": 13310 + }, + { + "epoch": 0.026907242734842456, + "grad_norm": 676.5046997070312, + "learning_rate": 2.6640000000000007e-06, + "loss": 35.0601, + "step": 13320 + }, + { + "epoch": 0.026927443367526272, + "grad_norm": 5.334859848022461, + "learning_rate": 2.6660000000000004e-06, + "loss": 24.3542, + "step": 13330 + }, + { + "epoch": 0.026947644000210085, + "grad_norm": 446.80706787109375, + "learning_rate": 2.668e-06, + "loss": 32.5696, + "step": 13340 + }, + { + "epoch": 0.0269678446328939, + "grad_norm": 753.0740356445312, + "learning_rate": 2.6700000000000003e-06, + "loss": 19.3994, + "step": 13350 + }, + { + "epoch": 0.026988045265577718, + "grad_norm": 378.42303466796875, + "learning_rate": 2.672e-06, + "loss": 25.0429, + "step": 13360 + }, + { + "epoch": 0.027008245898261535, + "grad_norm": 59.517295837402344, + "learning_rate": 2.6740000000000006e-06, + "loss": 28.762, + "step": 13370 + }, + { + "epoch": 0.027028446530945348, + "grad_norm": 177.29518127441406, + "learning_rate": 2.6760000000000003e-06, + "loss": 20.5681, + "step": 13380 + }, + { + "epoch": 0.027048647163629164, + "grad_norm": 259.7153015136719, + "learning_rate": 2.678e-06, + "loss": 37.4309, + "step": 13390 + }, + { + "epoch": 0.02706884779631298, + "grad_norm": 1043.8868408203125, + "learning_rate": 2.68e-06, + "loss": 38.3263, + "step": 13400 + }, + { + "epoch": 0.027089048428996797, + "grad_norm": 252.46734619140625, + "learning_rate": 2.6820000000000003e-06, + "loss": 29.4893, + "step": 13410 + }, + { + "epoch": 0.02710924906168061, + "grad_norm": 494.4878234863281, + "learning_rate": 2.6840000000000005e-06, + "loss": 32.0994, + "step": 13420 + }, + { + "epoch": 0.027129449694364427, + "grad_norm": 590.9215087890625, + "learning_rate": 2.686e-06, + "loss": 25.1901, + "step": 13430 + }, + { + "epoch": 0.027149650327048244, + "grad_norm": 551.9838256835938, + "learning_rate": 2.688e-06, + "loss": 32.5244, + "step": 13440 + }, + { + "epoch": 0.02716985095973206, + "grad_norm": 517.9137573242188, + "learning_rate": 2.6900000000000005e-06, + "loss": 30.3936, + "step": 13450 + }, + { + "epoch": 0.027190051592415873, + "grad_norm": 737.2365112304688, + "learning_rate": 2.6920000000000002e-06, + "loss": 47.1574, + "step": 13460 + }, + { + "epoch": 0.02721025222509969, + "grad_norm": 670.0670776367188, + "learning_rate": 2.694e-06, + "loss": 59.7301, + "step": 13470 + }, + { + "epoch": 0.027230452857783506, + "grad_norm": 657.5059204101562, + "learning_rate": 2.696e-06, + "loss": 12.9818, + "step": 13480 + }, + { + "epoch": 0.027250653490467323, + "grad_norm": 977.0647583007812, + "learning_rate": 2.6980000000000003e-06, + "loss": 40.2026, + "step": 13490 + }, + { + "epoch": 0.027270854123151136, + "grad_norm": 1071.2728271484375, + "learning_rate": 2.7000000000000004e-06, + "loss": 46.9622, + "step": 13500 + }, + { + "epoch": 0.027291054755834952, + "grad_norm": 769.448974609375, + "learning_rate": 2.702e-06, + "loss": 25.3011, + "step": 13510 + }, + { + "epoch": 0.02731125538851877, + "grad_norm": 624.0108032226562, + "learning_rate": 2.704e-06, + "loss": 33.9974, + "step": 13520 + }, + { + "epoch": 0.027331456021202585, + "grad_norm": 886.1692504882812, + "learning_rate": 2.7060000000000004e-06, + "loss": 20.8574, + "step": 13530 + }, + { + "epoch": 0.0273516566538864, + "grad_norm": 345.9872131347656, + "learning_rate": 2.708e-06, + "loss": 27.7531, + "step": 13540 + }, + { + "epoch": 0.027371857286570215, + "grad_norm": 443.1385192871094, + "learning_rate": 2.7100000000000003e-06, + "loss": 34.9584, + "step": 13550 + }, + { + "epoch": 0.02739205791925403, + "grad_norm": 355.1740417480469, + "learning_rate": 2.712e-06, + "loss": 27.923, + "step": 13560 + }, + { + "epoch": 0.027412258551937848, + "grad_norm": 915.899169921875, + "learning_rate": 2.7139999999999998e-06, + "loss": 36.5688, + "step": 13570 + }, + { + "epoch": 0.02743245918462166, + "grad_norm": 1138.944091796875, + "learning_rate": 2.7160000000000003e-06, + "loss": 21.1642, + "step": 13580 + }, + { + "epoch": 0.027452659817305478, + "grad_norm": 641.07080078125, + "learning_rate": 2.718e-06, + "loss": 46.8754, + "step": 13590 + }, + { + "epoch": 0.027472860449989294, + "grad_norm": 1295.2271728515625, + "learning_rate": 2.7200000000000002e-06, + "loss": 32.1504, + "step": 13600 + }, + { + "epoch": 0.02749306108267311, + "grad_norm": 575.6077270507812, + "learning_rate": 2.7220000000000004e-06, + "loss": 31.1421, + "step": 13610 + }, + { + "epoch": 0.027513261715356924, + "grad_norm": 651.1995239257812, + "learning_rate": 2.724e-06, + "loss": 21.735, + "step": 13620 + }, + { + "epoch": 0.02753346234804074, + "grad_norm": 384.02783203125, + "learning_rate": 2.7260000000000002e-06, + "loss": 30.4357, + "step": 13630 + }, + { + "epoch": 0.027553662980724557, + "grad_norm": 283.46966552734375, + "learning_rate": 2.728e-06, + "loss": 25.7516, + "step": 13640 + }, + { + "epoch": 0.027573863613408373, + "grad_norm": 254.54684448242188, + "learning_rate": 2.7300000000000005e-06, + "loss": 19.1426, + "step": 13650 + }, + { + "epoch": 0.027594064246092186, + "grad_norm": 926.5336303710938, + "learning_rate": 2.7320000000000003e-06, + "loss": 33.8098, + "step": 13660 + }, + { + "epoch": 0.027614264878776003, + "grad_norm": 769.6925048828125, + "learning_rate": 2.734e-06, + "loss": 19.3071, + "step": 13670 + }, + { + "epoch": 0.02763446551145982, + "grad_norm": 402.8377380371094, + "learning_rate": 2.736e-06, + "loss": 21.2851, + "step": 13680 + }, + { + "epoch": 0.027654666144143636, + "grad_norm": 491.4398193359375, + "learning_rate": 2.7380000000000003e-06, + "loss": 34.1278, + "step": 13690 + }, + { + "epoch": 0.02767486677682745, + "grad_norm": 312.103759765625, + "learning_rate": 2.7400000000000004e-06, + "loss": 32.734, + "step": 13700 + }, + { + "epoch": 0.027695067409511265, + "grad_norm": 529.0984497070312, + "learning_rate": 2.742e-06, + "loss": 15.2163, + "step": 13710 + }, + { + "epoch": 0.027715268042195082, + "grad_norm": 734.2744750976562, + "learning_rate": 2.744e-06, + "loss": 32.0383, + "step": 13720 + }, + { + "epoch": 0.0277354686748789, + "grad_norm": 937.9981079101562, + "learning_rate": 2.7460000000000005e-06, + "loss": 32.932, + "step": 13730 + }, + { + "epoch": 0.02775566930756271, + "grad_norm": 1043.4871826171875, + "learning_rate": 2.748e-06, + "loss": 24.3559, + "step": 13740 + }, + { + "epoch": 0.027775869940246528, + "grad_norm": 875.7940063476562, + "learning_rate": 2.7500000000000004e-06, + "loss": 31.5296, + "step": 13750 + }, + { + "epoch": 0.027796070572930345, + "grad_norm": 649.774169921875, + "learning_rate": 2.752e-06, + "loss": 40.3713, + "step": 13760 + }, + { + "epoch": 0.02781627120561416, + "grad_norm": 550.9602661132812, + "learning_rate": 2.754e-06, + "loss": 51.3406, + "step": 13770 + }, + { + "epoch": 0.027836471838297974, + "grad_norm": 777.0423583984375, + "learning_rate": 2.7560000000000004e-06, + "loss": 31.8327, + "step": 13780 + }, + { + "epoch": 0.02785667247098179, + "grad_norm": 448.1114807128906, + "learning_rate": 2.758e-06, + "loss": 17.6592, + "step": 13790 + }, + { + "epoch": 0.027876873103665607, + "grad_norm": 725.5538330078125, + "learning_rate": 2.7600000000000003e-06, + "loss": 26.8034, + "step": 13800 + }, + { + "epoch": 0.027897073736349424, + "grad_norm": 722.1695556640625, + "learning_rate": 2.7620000000000004e-06, + "loss": 34.467, + "step": 13810 + }, + { + "epoch": 0.027917274369033237, + "grad_norm": 517.4332885742188, + "learning_rate": 2.764e-06, + "loss": 15.6391, + "step": 13820 + }, + { + "epoch": 0.027937475001717053, + "grad_norm": 458.1663818359375, + "learning_rate": 2.7660000000000003e-06, + "loss": 36.6212, + "step": 13830 + }, + { + "epoch": 0.02795767563440087, + "grad_norm": 411.2251892089844, + "learning_rate": 2.768e-06, + "loss": 21.7761, + "step": 13840 + }, + { + "epoch": 0.027977876267084686, + "grad_norm": 364.0091552734375, + "learning_rate": 2.7700000000000006e-06, + "loss": 22.7303, + "step": 13850 + }, + { + "epoch": 0.0279980768997685, + "grad_norm": 614.5543212890625, + "learning_rate": 2.7720000000000003e-06, + "loss": 21.6346, + "step": 13860 + }, + { + "epoch": 0.028018277532452316, + "grad_norm": 713.9307861328125, + "learning_rate": 2.774e-06, + "loss": 23.9566, + "step": 13870 + }, + { + "epoch": 0.028038478165136133, + "grad_norm": 884.5711669921875, + "learning_rate": 2.776e-06, + "loss": 26.8261, + "step": 13880 + }, + { + "epoch": 0.02805867879781995, + "grad_norm": 616.1358642578125, + "learning_rate": 2.7780000000000003e-06, + "loss": 27.573, + "step": 13890 + }, + { + "epoch": 0.028078879430503762, + "grad_norm": 491.07427978515625, + "learning_rate": 2.7800000000000005e-06, + "loss": 24.4312, + "step": 13900 + }, + { + "epoch": 0.02809908006318758, + "grad_norm": 291.7569580078125, + "learning_rate": 2.7820000000000002e-06, + "loss": 16.2176, + "step": 13910 + }, + { + "epoch": 0.028119280695871395, + "grad_norm": 719.0975341796875, + "learning_rate": 2.784e-06, + "loss": 24.8783, + "step": 13920 + }, + { + "epoch": 0.02813948132855521, + "grad_norm": 247.43075561523438, + "learning_rate": 2.7860000000000005e-06, + "loss": 12.6267, + "step": 13930 + }, + { + "epoch": 0.028159681961239025, + "grad_norm": 0.0, + "learning_rate": 2.7880000000000002e-06, + "loss": 21.0015, + "step": 13940 + }, + { + "epoch": 0.02817988259392284, + "grad_norm": 683.0343627929688, + "learning_rate": 2.7900000000000004e-06, + "loss": 36.7794, + "step": 13950 + }, + { + "epoch": 0.028200083226606658, + "grad_norm": 2201.24267578125, + "learning_rate": 2.792e-06, + "loss": 26.4449, + "step": 13960 + }, + { + "epoch": 0.028220283859290474, + "grad_norm": 907.6283569335938, + "learning_rate": 2.794e-06, + "loss": 24.2639, + "step": 13970 + }, + { + "epoch": 0.028240484491974287, + "grad_norm": 647.8742065429688, + "learning_rate": 2.7960000000000004e-06, + "loss": 26.6286, + "step": 13980 + }, + { + "epoch": 0.028260685124658104, + "grad_norm": 434.18438720703125, + "learning_rate": 2.798e-06, + "loss": 20.2479, + "step": 13990 + }, + { + "epoch": 0.02828088575734192, + "grad_norm": 572.0900268554688, + "learning_rate": 2.8000000000000003e-06, + "loss": 29.5283, + "step": 14000 + }, + { + "epoch": 0.028301086390025737, + "grad_norm": 222.71372985839844, + "learning_rate": 2.8020000000000004e-06, + "loss": 31.4689, + "step": 14010 + }, + { + "epoch": 0.02832128702270955, + "grad_norm": 832.9528198242188, + "learning_rate": 2.804e-06, + "loss": 27.296, + "step": 14020 + }, + { + "epoch": 0.028341487655393367, + "grad_norm": 322.5573425292969, + "learning_rate": 2.8060000000000003e-06, + "loss": 41.7819, + "step": 14030 + }, + { + "epoch": 0.028361688288077183, + "grad_norm": 436.2952575683594, + "learning_rate": 2.808e-06, + "loss": 31.179, + "step": 14040 + }, + { + "epoch": 0.028381888920761, + "grad_norm": 245.27647399902344, + "learning_rate": 2.8100000000000006e-06, + "loss": 27.0722, + "step": 14050 + }, + { + "epoch": 0.028402089553444813, + "grad_norm": 266.8973693847656, + "learning_rate": 2.8120000000000004e-06, + "loss": 18.3411, + "step": 14060 + }, + { + "epoch": 0.02842229018612863, + "grad_norm": 362.5631103515625, + "learning_rate": 2.814e-06, + "loss": 30.4403, + "step": 14070 + }, + { + "epoch": 0.028442490818812446, + "grad_norm": 571.2347412109375, + "learning_rate": 2.8160000000000002e-06, + "loss": 21.4641, + "step": 14080 + }, + { + "epoch": 0.028462691451496262, + "grad_norm": 935.2764282226562, + "learning_rate": 2.8180000000000004e-06, + "loss": 29.7431, + "step": 14090 + }, + { + "epoch": 0.028482892084180075, + "grad_norm": 447.9610290527344, + "learning_rate": 2.82e-06, + "loss": 36.1941, + "step": 14100 + }, + { + "epoch": 0.028503092716863892, + "grad_norm": 363.7928771972656, + "learning_rate": 2.8220000000000003e-06, + "loss": 26.6371, + "step": 14110 + }, + { + "epoch": 0.02852329334954771, + "grad_norm": 419.3346252441406, + "learning_rate": 2.824e-06, + "loss": 34.1951, + "step": 14120 + }, + { + "epoch": 0.028543493982231525, + "grad_norm": 234.77806091308594, + "learning_rate": 2.8260000000000006e-06, + "loss": 9.7577, + "step": 14130 + }, + { + "epoch": 0.028563694614915338, + "grad_norm": 385.75970458984375, + "learning_rate": 2.8280000000000003e-06, + "loss": 31.3418, + "step": 14140 + }, + { + "epoch": 0.028583895247599155, + "grad_norm": 192.51187133789062, + "learning_rate": 2.83e-06, + "loss": 37.0611, + "step": 14150 + }, + { + "epoch": 0.02860409588028297, + "grad_norm": 845.19921875, + "learning_rate": 2.832e-06, + "loss": 15.7974, + "step": 14160 + }, + { + "epoch": 0.028624296512966788, + "grad_norm": 13.857260704040527, + "learning_rate": 2.834e-06, + "loss": 25.4985, + "step": 14170 + }, + { + "epoch": 0.0286444971456506, + "grad_norm": 798.99560546875, + "learning_rate": 2.8360000000000005e-06, + "loss": 27.4345, + "step": 14180 + }, + { + "epoch": 0.028664697778334417, + "grad_norm": 1094.974853515625, + "learning_rate": 2.838e-06, + "loss": 32.5372, + "step": 14190 + }, + { + "epoch": 0.028684898411018234, + "grad_norm": 494.9541015625, + "learning_rate": 2.84e-06, + "loss": 39.5633, + "step": 14200 + }, + { + "epoch": 0.02870509904370205, + "grad_norm": 525.530029296875, + "learning_rate": 2.8420000000000005e-06, + "loss": 26.2091, + "step": 14210 + }, + { + "epoch": 0.028725299676385863, + "grad_norm": 350.001708984375, + "learning_rate": 2.8440000000000002e-06, + "loss": 23.9767, + "step": 14220 + }, + { + "epoch": 0.02874550030906968, + "grad_norm": 285.36102294921875, + "learning_rate": 2.8460000000000004e-06, + "loss": 38.4315, + "step": 14230 + }, + { + "epoch": 0.028765700941753496, + "grad_norm": 234.65621948242188, + "learning_rate": 2.848e-06, + "loss": 27.0473, + "step": 14240 + }, + { + "epoch": 0.028785901574437313, + "grad_norm": 1140.964599609375, + "learning_rate": 2.85e-06, + "loss": 32.4249, + "step": 14250 + }, + { + "epoch": 0.028806102207121126, + "grad_norm": 476.35528564453125, + "learning_rate": 2.8520000000000004e-06, + "loss": 11.1549, + "step": 14260 + }, + { + "epoch": 0.028826302839804942, + "grad_norm": 312.71795654296875, + "learning_rate": 2.854e-06, + "loss": 23.1731, + "step": 14270 + }, + { + "epoch": 0.02884650347248876, + "grad_norm": 307.3836364746094, + "learning_rate": 2.8560000000000003e-06, + "loss": 47.9001, + "step": 14280 + }, + { + "epoch": 0.028866704105172575, + "grad_norm": 635.443603515625, + "learning_rate": 2.8580000000000004e-06, + "loss": 45.4959, + "step": 14290 + }, + { + "epoch": 0.02888690473785639, + "grad_norm": 976.892333984375, + "learning_rate": 2.86e-06, + "loss": 43.5277, + "step": 14300 + }, + { + "epoch": 0.028907105370540205, + "grad_norm": 691.7721557617188, + "learning_rate": 2.8620000000000003e-06, + "loss": 35.6253, + "step": 14310 + }, + { + "epoch": 0.02892730600322402, + "grad_norm": 777.8397827148438, + "learning_rate": 2.864e-06, + "loss": 40.4026, + "step": 14320 + }, + { + "epoch": 0.028947506635907838, + "grad_norm": 521.339111328125, + "learning_rate": 2.8660000000000006e-06, + "loss": 15.8446, + "step": 14330 + }, + { + "epoch": 0.02896770726859165, + "grad_norm": 925.6652221679688, + "learning_rate": 2.8680000000000003e-06, + "loss": 30.4705, + "step": 14340 + }, + { + "epoch": 0.028987907901275468, + "grad_norm": 591.0034790039062, + "learning_rate": 2.87e-06, + "loss": 30.5676, + "step": 14350 + }, + { + "epoch": 0.029008108533959284, + "grad_norm": 3101.965087890625, + "learning_rate": 2.872e-06, + "loss": 49.2806, + "step": 14360 + }, + { + "epoch": 0.0290283091666431, + "grad_norm": 438.4816589355469, + "learning_rate": 2.874e-06, + "loss": 30.102, + "step": 14370 + }, + { + "epoch": 0.029048509799326914, + "grad_norm": 684.9572143554688, + "learning_rate": 2.8760000000000005e-06, + "loss": 32.759, + "step": 14380 + }, + { + "epoch": 0.02906871043201073, + "grad_norm": 810.0599365234375, + "learning_rate": 2.8780000000000002e-06, + "loss": 17.7539, + "step": 14390 + }, + { + "epoch": 0.029088911064694547, + "grad_norm": 1185.997802734375, + "learning_rate": 2.88e-06, + "loss": 33.1232, + "step": 14400 + }, + { + "epoch": 0.029109111697378363, + "grad_norm": 452.52032470703125, + "learning_rate": 2.8820000000000005e-06, + "loss": 17.2496, + "step": 14410 + }, + { + "epoch": 0.029129312330062176, + "grad_norm": 801.4060668945312, + "learning_rate": 2.8840000000000003e-06, + "loss": 24.6073, + "step": 14420 + }, + { + "epoch": 0.029149512962745993, + "grad_norm": 243.73663330078125, + "learning_rate": 2.8860000000000004e-06, + "loss": 19.0073, + "step": 14430 + }, + { + "epoch": 0.02916971359542981, + "grad_norm": 244.5675506591797, + "learning_rate": 2.888e-06, + "loss": 32.802, + "step": 14440 + }, + { + "epoch": 0.029189914228113626, + "grad_norm": 700.1507568359375, + "learning_rate": 2.89e-06, + "loss": 29.7115, + "step": 14450 + }, + { + "epoch": 0.02921011486079744, + "grad_norm": 291.5823059082031, + "learning_rate": 2.8920000000000004e-06, + "loss": 26.6881, + "step": 14460 + }, + { + "epoch": 0.029230315493481256, + "grad_norm": 175.30331420898438, + "learning_rate": 2.894e-06, + "loss": 17.5004, + "step": 14470 + }, + { + "epoch": 0.029250516126165072, + "grad_norm": 677.0678100585938, + "learning_rate": 2.8960000000000003e-06, + "loss": 28.2859, + "step": 14480 + }, + { + "epoch": 0.02927071675884889, + "grad_norm": 1238.733154296875, + "learning_rate": 2.8980000000000005e-06, + "loss": 36.7792, + "step": 14490 + }, + { + "epoch": 0.029290917391532702, + "grad_norm": 314.10546875, + "learning_rate": 2.9e-06, + "loss": 29.563, + "step": 14500 + }, + { + "epoch": 0.02931111802421652, + "grad_norm": 455.9222717285156, + "learning_rate": 2.9020000000000003e-06, + "loss": 21.9228, + "step": 14510 + }, + { + "epoch": 0.029331318656900335, + "grad_norm": 793.5556640625, + "learning_rate": 2.904e-06, + "loss": 17.6044, + "step": 14520 + }, + { + "epoch": 0.02935151928958415, + "grad_norm": 481.8849182128906, + "learning_rate": 2.9060000000000006e-06, + "loss": 19.1541, + "step": 14530 + }, + { + "epoch": 0.029371719922267964, + "grad_norm": 0.0, + "learning_rate": 2.9080000000000004e-06, + "loss": 20.9598, + "step": 14540 + }, + { + "epoch": 0.02939192055495178, + "grad_norm": 744.2693481445312, + "learning_rate": 2.91e-06, + "loss": 31.1545, + "step": 14550 + }, + { + "epoch": 0.029412121187635597, + "grad_norm": 337.4864196777344, + "learning_rate": 2.9120000000000002e-06, + "loss": 27.8448, + "step": 14560 + }, + { + "epoch": 0.029432321820319414, + "grad_norm": 710.1856689453125, + "learning_rate": 2.914e-06, + "loss": 21.5089, + "step": 14570 + }, + { + "epoch": 0.029452522453003227, + "grad_norm": 391.34686279296875, + "learning_rate": 2.9160000000000005e-06, + "loss": 45.8425, + "step": 14580 + }, + { + "epoch": 0.029472723085687044, + "grad_norm": 736.681396484375, + "learning_rate": 2.9180000000000003e-06, + "loss": 20.4341, + "step": 14590 + }, + { + "epoch": 0.02949292371837086, + "grad_norm": 806.5769653320312, + "learning_rate": 2.92e-06, + "loss": 22.0079, + "step": 14600 + }, + { + "epoch": 0.029513124351054677, + "grad_norm": 203.32749938964844, + "learning_rate": 2.9220000000000006e-06, + "loss": 28.6116, + "step": 14610 + }, + { + "epoch": 0.02953332498373849, + "grad_norm": 837.3760375976562, + "learning_rate": 2.9240000000000003e-06, + "loss": 29.4047, + "step": 14620 + }, + { + "epoch": 0.029553525616422306, + "grad_norm": 671.6912841796875, + "learning_rate": 2.9260000000000004e-06, + "loss": 44.9515, + "step": 14630 + }, + { + "epoch": 0.029573726249106123, + "grad_norm": 722.2235107421875, + "learning_rate": 2.928e-06, + "loss": 17.3497, + "step": 14640 + }, + { + "epoch": 0.02959392688178994, + "grad_norm": 1134.066162109375, + "learning_rate": 2.93e-06, + "loss": 25.729, + "step": 14650 + }, + { + "epoch": 0.029614127514473752, + "grad_norm": 6.874101638793945, + "learning_rate": 2.9320000000000005e-06, + "loss": 31.8318, + "step": 14660 + }, + { + "epoch": 0.02963432814715757, + "grad_norm": 810.16845703125, + "learning_rate": 2.934e-06, + "loss": 26.2838, + "step": 14670 + }, + { + "epoch": 0.029654528779841385, + "grad_norm": 908.3150024414062, + "learning_rate": 2.9360000000000003e-06, + "loss": 37.4466, + "step": 14680 + }, + { + "epoch": 0.029674729412525202, + "grad_norm": 996.0183715820312, + "learning_rate": 2.9380000000000005e-06, + "loss": 27.5585, + "step": 14690 + }, + { + "epoch": 0.029694930045209015, + "grad_norm": 613.7411499023438, + "learning_rate": 2.9400000000000002e-06, + "loss": 15.2366, + "step": 14700 + }, + { + "epoch": 0.02971513067789283, + "grad_norm": 0.0, + "learning_rate": 2.9420000000000004e-06, + "loss": 10.9313, + "step": 14710 + }, + { + "epoch": 0.029735331310576648, + "grad_norm": 396.75531005859375, + "learning_rate": 2.944e-06, + "loss": 32.1213, + "step": 14720 + }, + { + "epoch": 0.029755531943260465, + "grad_norm": 1955.3218994140625, + "learning_rate": 2.946e-06, + "loss": 19.4884, + "step": 14730 + }, + { + "epoch": 0.029775732575944278, + "grad_norm": 392.6688232421875, + "learning_rate": 2.9480000000000004e-06, + "loss": 21.4439, + "step": 14740 + }, + { + "epoch": 0.029795933208628094, + "grad_norm": 1024.240234375, + "learning_rate": 2.95e-06, + "loss": 31.6278, + "step": 14750 + }, + { + "epoch": 0.02981613384131191, + "grad_norm": 371.4592590332031, + "learning_rate": 2.9520000000000003e-06, + "loss": 18.927, + "step": 14760 + }, + { + "epoch": 0.029836334473995727, + "grad_norm": 719.5804443359375, + "learning_rate": 2.954e-06, + "loss": 32.5111, + "step": 14770 + }, + { + "epoch": 0.02985653510667954, + "grad_norm": 783.6981811523438, + "learning_rate": 2.956e-06, + "loss": 28.8026, + "step": 14780 + }, + { + "epoch": 0.029876735739363357, + "grad_norm": 463.20074462890625, + "learning_rate": 2.9580000000000003e-06, + "loss": 38.6224, + "step": 14790 + }, + { + "epoch": 0.029896936372047173, + "grad_norm": 757.3056640625, + "learning_rate": 2.96e-06, + "loss": 47.2772, + "step": 14800 + }, + { + "epoch": 0.02991713700473099, + "grad_norm": 981.7617797851562, + "learning_rate": 2.9620000000000006e-06, + "loss": 23.6404, + "step": 14810 + }, + { + "epoch": 0.029937337637414803, + "grad_norm": 492.99432373046875, + "learning_rate": 2.9640000000000003e-06, + "loss": 27.6951, + "step": 14820 + }, + { + "epoch": 0.02995753827009862, + "grad_norm": 1653.3358154296875, + "learning_rate": 2.966e-06, + "loss": 42.0704, + "step": 14830 + }, + { + "epoch": 0.029977738902782436, + "grad_norm": 619.4223022460938, + "learning_rate": 2.9680000000000002e-06, + "loss": 31.683, + "step": 14840 + }, + { + "epoch": 0.029997939535466252, + "grad_norm": 496.3249206542969, + "learning_rate": 2.97e-06, + "loss": 16.9797, + "step": 14850 + }, + { + "epoch": 0.030018140168150065, + "grad_norm": 862.78271484375, + "learning_rate": 2.9720000000000005e-06, + "loss": 25.0164, + "step": 14860 + }, + { + "epoch": 0.030038340800833882, + "grad_norm": 523.188720703125, + "learning_rate": 2.9740000000000002e-06, + "loss": 19.1842, + "step": 14870 + }, + { + "epoch": 0.0300585414335177, + "grad_norm": 859.2904052734375, + "learning_rate": 2.976e-06, + "loss": 29.1439, + "step": 14880 + }, + { + "epoch": 0.030078742066201515, + "grad_norm": 510.52191162109375, + "learning_rate": 2.9780000000000005e-06, + "loss": 44.8978, + "step": 14890 + }, + { + "epoch": 0.030098942698885328, + "grad_norm": 478.2204284667969, + "learning_rate": 2.9800000000000003e-06, + "loss": 30.1172, + "step": 14900 + }, + { + "epoch": 0.030119143331569145, + "grad_norm": 514.6478271484375, + "learning_rate": 2.9820000000000004e-06, + "loss": 21.5455, + "step": 14910 + }, + { + "epoch": 0.03013934396425296, + "grad_norm": 939.9052734375, + "learning_rate": 2.984e-06, + "loss": 34.8996, + "step": 14920 + }, + { + "epoch": 0.030159544596936778, + "grad_norm": 715.1705322265625, + "learning_rate": 2.986e-06, + "loss": 20.6811, + "step": 14930 + }, + { + "epoch": 0.03017974522962059, + "grad_norm": 682.1227416992188, + "learning_rate": 2.9880000000000004e-06, + "loss": 33.3376, + "step": 14940 + }, + { + "epoch": 0.030199945862304407, + "grad_norm": 198.76992797851562, + "learning_rate": 2.99e-06, + "loss": 35.4152, + "step": 14950 + }, + { + "epoch": 0.030220146494988224, + "grad_norm": 260.5537109375, + "learning_rate": 2.9920000000000003e-06, + "loss": 34.4215, + "step": 14960 + }, + { + "epoch": 0.03024034712767204, + "grad_norm": 765.5437622070312, + "learning_rate": 2.994e-06, + "loss": 33.299, + "step": 14970 + }, + { + "epoch": 0.030260547760355853, + "grad_norm": 333.50396728515625, + "learning_rate": 2.996e-06, + "loss": 40.3484, + "step": 14980 + }, + { + "epoch": 0.03028074839303967, + "grad_norm": 68.164794921875, + "learning_rate": 2.9980000000000003e-06, + "loss": 31.7485, + "step": 14990 + }, + { + "epoch": 0.030300949025723486, + "grad_norm": 627.5897827148438, + "learning_rate": 3e-06, + "loss": 33.2965, + "step": 15000 + }, + { + "epoch": 0.030321149658407303, + "grad_norm": 333.0770568847656, + "learning_rate": 3.0020000000000006e-06, + "loss": 27.5349, + "step": 15010 + }, + { + "epoch": 0.030341350291091116, + "grad_norm": 201.7154998779297, + "learning_rate": 3.0040000000000004e-06, + "loss": 40.4812, + "step": 15020 + }, + { + "epoch": 0.030361550923774933, + "grad_norm": 638.8067016601562, + "learning_rate": 3.006e-06, + "loss": 30.0714, + "step": 15030 + }, + { + "epoch": 0.03038175155645875, + "grad_norm": 721.8273315429688, + "learning_rate": 3.0080000000000003e-06, + "loss": 29.557, + "step": 15040 + }, + { + "epoch": 0.030401952189142566, + "grad_norm": 940.406005859375, + "learning_rate": 3.01e-06, + "loss": 30.875, + "step": 15050 + }, + { + "epoch": 0.03042215282182638, + "grad_norm": 537.4491577148438, + "learning_rate": 3.0120000000000006e-06, + "loss": 27.1281, + "step": 15060 + }, + { + "epoch": 0.030442353454510195, + "grad_norm": 349.7826843261719, + "learning_rate": 3.0140000000000003e-06, + "loss": 33.0032, + "step": 15070 + }, + { + "epoch": 0.030462554087194012, + "grad_norm": 615.8168334960938, + "learning_rate": 3.016e-06, + "loss": 29.8807, + "step": 15080 + }, + { + "epoch": 0.03048275471987783, + "grad_norm": 543.2507934570312, + "learning_rate": 3.0180000000000006e-06, + "loss": 24.4638, + "step": 15090 + }, + { + "epoch": 0.03050295535256164, + "grad_norm": 1828.9852294921875, + "learning_rate": 3.0200000000000003e-06, + "loss": 22.2808, + "step": 15100 + }, + { + "epoch": 0.030523155985245458, + "grad_norm": 460.4822692871094, + "learning_rate": 3.0220000000000005e-06, + "loss": 18.9824, + "step": 15110 + }, + { + "epoch": 0.030543356617929274, + "grad_norm": 363.14569091796875, + "learning_rate": 3.024e-06, + "loss": 17.4815, + "step": 15120 + }, + { + "epoch": 0.03056355725061309, + "grad_norm": 574.8248901367188, + "learning_rate": 3.026e-06, + "loss": 16.694, + "step": 15130 + }, + { + "epoch": 0.030583757883296904, + "grad_norm": 619.8360595703125, + "learning_rate": 3.0280000000000005e-06, + "loss": 18.2632, + "step": 15140 + }, + { + "epoch": 0.03060395851598072, + "grad_norm": 720.516357421875, + "learning_rate": 3.0300000000000002e-06, + "loss": 44.8752, + "step": 15150 + }, + { + "epoch": 0.030624159148664537, + "grad_norm": 769.7462768554688, + "learning_rate": 3.0320000000000004e-06, + "loss": 29.0794, + "step": 15160 + }, + { + "epoch": 0.030644359781348354, + "grad_norm": 860.6575317382812, + "learning_rate": 3.034e-06, + "loss": 42.3765, + "step": 15170 + }, + { + "epoch": 0.030664560414032167, + "grad_norm": 443.2711486816406, + "learning_rate": 3.0360000000000002e-06, + "loss": 33.6403, + "step": 15180 + }, + { + "epoch": 0.030684761046715983, + "grad_norm": 733.3158569335938, + "learning_rate": 3.0380000000000004e-06, + "loss": 27.6277, + "step": 15190 + }, + { + "epoch": 0.0307049616793998, + "grad_norm": 787.5064697265625, + "learning_rate": 3.04e-06, + "loss": 37.7274, + "step": 15200 + }, + { + "epoch": 0.030725162312083613, + "grad_norm": 2191.33349609375, + "learning_rate": 3.0420000000000007e-06, + "loss": 53.4131, + "step": 15210 + }, + { + "epoch": 0.03074536294476743, + "grad_norm": 680.395751953125, + "learning_rate": 3.0440000000000004e-06, + "loss": 19.9371, + "step": 15220 + }, + { + "epoch": 0.030765563577451246, + "grad_norm": 281.8812561035156, + "learning_rate": 3.046e-06, + "loss": 26.9937, + "step": 15230 + }, + { + "epoch": 0.030785764210135062, + "grad_norm": 522.1564331054688, + "learning_rate": 3.0480000000000003e-06, + "loss": 24.629, + "step": 15240 + }, + { + "epoch": 0.030805964842818875, + "grad_norm": 254.390869140625, + "learning_rate": 3.05e-06, + "loss": 37.8011, + "step": 15250 + }, + { + "epoch": 0.030826165475502692, + "grad_norm": 156.7596893310547, + "learning_rate": 3.0520000000000006e-06, + "loss": 20.2772, + "step": 15260 + }, + { + "epoch": 0.03084636610818651, + "grad_norm": 224.06796264648438, + "learning_rate": 3.0540000000000003e-06, + "loss": 24.4388, + "step": 15270 + }, + { + "epoch": 0.030866566740870325, + "grad_norm": 270.6825866699219, + "learning_rate": 3.056e-06, + "loss": 21.0277, + "step": 15280 + }, + { + "epoch": 0.030886767373554138, + "grad_norm": 1013.138916015625, + "learning_rate": 3.0580000000000006e-06, + "loss": 34.1167, + "step": 15290 + }, + { + "epoch": 0.030906968006237955, + "grad_norm": 577.4921264648438, + "learning_rate": 3.0600000000000003e-06, + "loss": 27.4165, + "step": 15300 + }, + { + "epoch": 0.03092716863892177, + "grad_norm": 218.55142211914062, + "learning_rate": 3.0620000000000005e-06, + "loss": 29.5243, + "step": 15310 + }, + { + "epoch": 0.030947369271605588, + "grad_norm": 683.9786987304688, + "learning_rate": 3.0640000000000002e-06, + "loss": 19.9124, + "step": 15320 + }, + { + "epoch": 0.0309675699042894, + "grad_norm": 456.1734619140625, + "learning_rate": 3.066e-06, + "loss": 32.225, + "step": 15330 + }, + { + "epoch": 0.030987770536973217, + "grad_norm": 379.7656555175781, + "learning_rate": 3.0680000000000005e-06, + "loss": 25.2169, + "step": 15340 + }, + { + "epoch": 0.031007971169657034, + "grad_norm": 566.6279296875, + "learning_rate": 3.0700000000000003e-06, + "loss": 51.9398, + "step": 15350 + }, + { + "epoch": 0.03102817180234085, + "grad_norm": 866.3634643554688, + "learning_rate": 3.072e-06, + "loss": 25.7627, + "step": 15360 + }, + { + "epoch": 0.031048372435024663, + "grad_norm": 508.4942932128906, + "learning_rate": 3.074e-06, + "loss": 41.3464, + "step": 15370 + }, + { + "epoch": 0.03106857306770848, + "grad_norm": 591.5121459960938, + "learning_rate": 3.0760000000000003e-06, + "loss": 23.6245, + "step": 15380 + }, + { + "epoch": 0.031088773700392296, + "grad_norm": 600.5582275390625, + "learning_rate": 3.0780000000000004e-06, + "loss": 41.7437, + "step": 15390 + }, + { + "epoch": 0.031108974333076113, + "grad_norm": 516.2938842773438, + "learning_rate": 3.08e-06, + "loss": 17.6149, + "step": 15400 + }, + { + "epoch": 0.031129174965759926, + "grad_norm": 245.33395385742188, + "learning_rate": 3.082e-06, + "loss": 23.1306, + "step": 15410 + }, + { + "epoch": 0.031149375598443742, + "grad_norm": 754.3231201171875, + "learning_rate": 3.0840000000000005e-06, + "loss": 42.3905, + "step": 15420 + }, + { + "epoch": 0.03116957623112756, + "grad_norm": 183.8521728515625, + "learning_rate": 3.086e-06, + "loss": 26.3259, + "step": 15430 + }, + { + "epoch": 0.031189776863811376, + "grad_norm": 543.6480712890625, + "learning_rate": 3.0880000000000003e-06, + "loss": 26.1311, + "step": 15440 + }, + { + "epoch": 0.03120997749649519, + "grad_norm": 549.1738891601562, + "learning_rate": 3.09e-06, + "loss": 31.1268, + "step": 15450 + }, + { + "epoch": 0.031230178129179005, + "grad_norm": 409.1370544433594, + "learning_rate": 3.092e-06, + "loss": 25.4154, + "step": 15460 + }, + { + "epoch": 0.03125037876186282, + "grad_norm": 639.0555419921875, + "learning_rate": 3.0940000000000004e-06, + "loss": 20.0562, + "step": 15470 + }, + { + "epoch": 0.03127057939454664, + "grad_norm": 562.7499389648438, + "learning_rate": 3.096e-06, + "loss": 19.9571, + "step": 15480 + }, + { + "epoch": 0.03129078002723045, + "grad_norm": 588.2476196289062, + "learning_rate": 3.0980000000000007e-06, + "loss": 29.1326, + "step": 15490 + }, + { + "epoch": 0.03131098065991427, + "grad_norm": 402.01629638671875, + "learning_rate": 3.1000000000000004e-06, + "loss": 42.9993, + "step": 15500 + }, + { + "epoch": 0.031331181292598084, + "grad_norm": 464.4310607910156, + "learning_rate": 3.102e-06, + "loss": 28.3306, + "step": 15510 + }, + { + "epoch": 0.0313513819252819, + "grad_norm": 1001.6248168945312, + "learning_rate": 3.1040000000000003e-06, + "loss": 27.8757, + "step": 15520 + }, + { + "epoch": 0.03137158255796572, + "grad_norm": 291.364501953125, + "learning_rate": 3.106e-06, + "loss": 26.5913, + "step": 15530 + }, + { + "epoch": 0.03139178319064953, + "grad_norm": 512.9525756835938, + "learning_rate": 3.1080000000000006e-06, + "loss": 22.5451, + "step": 15540 + }, + { + "epoch": 0.03141198382333334, + "grad_norm": 583.1221313476562, + "learning_rate": 3.1100000000000003e-06, + "loss": 22.0143, + "step": 15550 + }, + { + "epoch": 0.03143218445601716, + "grad_norm": 184.39239501953125, + "learning_rate": 3.112e-06, + "loss": 21.4702, + "step": 15560 + }, + { + "epoch": 0.031452385088700976, + "grad_norm": 146.10752868652344, + "learning_rate": 3.114e-06, + "loss": 12.1912, + "step": 15570 + }, + { + "epoch": 0.031472585721384796, + "grad_norm": 392.8891906738281, + "learning_rate": 3.1160000000000003e-06, + "loss": 44.2547, + "step": 15580 + }, + { + "epoch": 0.03149278635406861, + "grad_norm": 429.8520812988281, + "learning_rate": 3.1180000000000005e-06, + "loss": 25.8197, + "step": 15590 + }, + { + "epoch": 0.03151298698675242, + "grad_norm": 991.6886596679688, + "learning_rate": 3.12e-06, + "loss": 24.9521, + "step": 15600 + }, + { + "epoch": 0.03153318761943624, + "grad_norm": 614.9202270507812, + "learning_rate": 3.122e-06, + "loss": 24.8537, + "step": 15610 + }, + { + "epoch": 0.031553388252120056, + "grad_norm": 645.7086181640625, + "learning_rate": 3.1240000000000005e-06, + "loss": 15.0873, + "step": 15620 + }, + { + "epoch": 0.03157358888480387, + "grad_norm": 1097.1376953125, + "learning_rate": 3.1260000000000002e-06, + "loss": 26.1902, + "step": 15630 + }, + { + "epoch": 0.03159378951748769, + "grad_norm": 916.2590942382812, + "learning_rate": 3.1280000000000004e-06, + "loss": 33.1488, + "step": 15640 + }, + { + "epoch": 0.0316139901501715, + "grad_norm": 716.00341796875, + "learning_rate": 3.13e-06, + "loss": 23.5244, + "step": 15650 + }, + { + "epoch": 0.03163419078285532, + "grad_norm": 329.9820861816406, + "learning_rate": 3.132e-06, + "loss": 24.3839, + "step": 15660 + }, + { + "epoch": 0.031654391415539135, + "grad_norm": 434.7264099121094, + "learning_rate": 3.1340000000000004e-06, + "loss": 20.0705, + "step": 15670 + }, + { + "epoch": 0.03167459204822295, + "grad_norm": 1124.01708984375, + "learning_rate": 3.136e-06, + "loss": 31.049, + "step": 15680 + }, + { + "epoch": 0.03169479268090677, + "grad_norm": 966.0602416992188, + "learning_rate": 3.1380000000000003e-06, + "loss": 40.8942, + "step": 15690 + }, + { + "epoch": 0.03171499331359058, + "grad_norm": 1354.5238037109375, + "learning_rate": 3.1400000000000004e-06, + "loss": 25.6929, + "step": 15700 + }, + { + "epoch": 0.031735193946274394, + "grad_norm": 986.6156005859375, + "learning_rate": 3.142e-06, + "loss": 15.9512, + "step": 15710 + }, + { + "epoch": 0.031755394578958214, + "grad_norm": 782.9063110351562, + "learning_rate": 3.1440000000000003e-06, + "loss": 43.8836, + "step": 15720 + }, + { + "epoch": 0.03177559521164203, + "grad_norm": 897.754150390625, + "learning_rate": 3.146e-06, + "loss": 23.8829, + "step": 15730 + }, + { + "epoch": 0.03179579584432585, + "grad_norm": 811.4805908203125, + "learning_rate": 3.1480000000000006e-06, + "loss": 27.5326, + "step": 15740 + }, + { + "epoch": 0.03181599647700966, + "grad_norm": 261.16192626953125, + "learning_rate": 3.1500000000000003e-06, + "loss": 38.2281, + "step": 15750 + }, + { + "epoch": 0.03183619710969347, + "grad_norm": 1244.820068359375, + "learning_rate": 3.152e-06, + "loss": 51.8557, + "step": 15760 + }, + { + "epoch": 0.03185639774237729, + "grad_norm": 416.2852478027344, + "learning_rate": 3.154e-06, + "loss": 14.7294, + "step": 15770 + }, + { + "epoch": 0.031876598375061106, + "grad_norm": 627.7940063476562, + "learning_rate": 3.1560000000000004e-06, + "loss": 28.2093, + "step": 15780 + }, + { + "epoch": 0.03189679900774492, + "grad_norm": 628.6441040039062, + "learning_rate": 3.1580000000000005e-06, + "loss": 22.0732, + "step": 15790 + }, + { + "epoch": 0.03191699964042874, + "grad_norm": 381.6886291503906, + "learning_rate": 3.1600000000000002e-06, + "loss": 14.0432, + "step": 15800 + }, + { + "epoch": 0.03193720027311255, + "grad_norm": 430.179931640625, + "learning_rate": 3.162e-06, + "loss": 33.138, + "step": 15810 + }, + { + "epoch": 0.03195740090579637, + "grad_norm": 562.0535888671875, + "learning_rate": 3.1640000000000005e-06, + "loss": 34.5442, + "step": 15820 + }, + { + "epoch": 0.031977601538480185, + "grad_norm": 603.3442993164062, + "learning_rate": 3.1660000000000003e-06, + "loss": 21.5006, + "step": 15830 + }, + { + "epoch": 0.031997802171164, + "grad_norm": 367.27880859375, + "learning_rate": 3.1680000000000004e-06, + "loss": 35.3885, + "step": 15840 + }, + { + "epoch": 0.03201800280384782, + "grad_norm": 612.0071411132812, + "learning_rate": 3.17e-06, + "loss": 33.6516, + "step": 15850 + }, + { + "epoch": 0.03203820343653163, + "grad_norm": 335.4685974121094, + "learning_rate": 3.172e-06, + "loss": 31.1106, + "step": 15860 + }, + { + "epoch": 0.032058404069215445, + "grad_norm": 356.3526306152344, + "learning_rate": 3.1740000000000004e-06, + "loss": 28.422, + "step": 15870 + }, + { + "epoch": 0.032078604701899265, + "grad_norm": 0.0, + "learning_rate": 3.176e-06, + "loss": 21.6766, + "step": 15880 + }, + { + "epoch": 0.03209880533458308, + "grad_norm": 1218.23583984375, + "learning_rate": 3.1780000000000003e-06, + "loss": 41.5568, + "step": 15890 + }, + { + "epoch": 0.0321190059672669, + "grad_norm": 251.69009399414062, + "learning_rate": 3.1800000000000005e-06, + "loss": 38.6637, + "step": 15900 + }, + { + "epoch": 0.03213920659995071, + "grad_norm": 393.4754333496094, + "learning_rate": 3.182e-06, + "loss": 57.3885, + "step": 15910 + }, + { + "epoch": 0.032159407232634524, + "grad_norm": 527.3384399414062, + "learning_rate": 3.1840000000000003e-06, + "loss": 43.3291, + "step": 15920 + }, + { + "epoch": 0.032179607865318344, + "grad_norm": 568.7590942382812, + "learning_rate": 3.186e-06, + "loss": 33.8822, + "step": 15930 + }, + { + "epoch": 0.03219980849800216, + "grad_norm": 729.0355224609375, + "learning_rate": 3.188e-06, + "loss": 38.2755, + "step": 15940 + }, + { + "epoch": 0.03222000913068597, + "grad_norm": 706.9700927734375, + "learning_rate": 3.1900000000000004e-06, + "loss": 27.8433, + "step": 15950 + }, + { + "epoch": 0.03224020976336979, + "grad_norm": 757.6204833984375, + "learning_rate": 3.192e-06, + "loss": 42.868, + "step": 15960 + }, + { + "epoch": 0.0322604103960536, + "grad_norm": 395.45770263671875, + "learning_rate": 3.1940000000000003e-06, + "loss": 22.6544, + "step": 15970 + }, + { + "epoch": 0.03228061102873742, + "grad_norm": 357.60565185546875, + "learning_rate": 3.1960000000000004e-06, + "loss": 30.446, + "step": 15980 + }, + { + "epoch": 0.032300811661421236, + "grad_norm": 440.5813293457031, + "learning_rate": 3.198e-06, + "loss": 38.8448, + "step": 15990 + }, + { + "epoch": 0.03232101229410505, + "grad_norm": 292.3539733886719, + "learning_rate": 3.2000000000000003e-06, + "loss": 27.1719, + "step": 16000 + }, + { + "epoch": 0.03234121292678887, + "grad_norm": 301.79400634765625, + "learning_rate": 3.202e-06, + "loss": 27.114, + "step": 16010 + }, + { + "epoch": 0.03236141355947268, + "grad_norm": 477.06793212890625, + "learning_rate": 3.2040000000000006e-06, + "loss": 38.1142, + "step": 16020 + }, + { + "epoch": 0.032381614192156495, + "grad_norm": 0.0, + "learning_rate": 3.2060000000000003e-06, + "loss": 27.6631, + "step": 16030 + }, + { + "epoch": 0.032401814824840315, + "grad_norm": 891.1085815429688, + "learning_rate": 3.208e-06, + "loss": 44.7654, + "step": 16040 + }, + { + "epoch": 0.03242201545752413, + "grad_norm": 272.23663330078125, + "learning_rate": 3.21e-06, + "loss": 44.5703, + "step": 16050 + }, + { + "epoch": 0.03244221609020795, + "grad_norm": 58.88239288330078, + "learning_rate": 3.212e-06, + "loss": 32.7265, + "step": 16060 + }, + { + "epoch": 0.03246241672289176, + "grad_norm": 791.5468139648438, + "learning_rate": 3.2140000000000005e-06, + "loss": 27.4122, + "step": 16070 + }, + { + "epoch": 0.032482617355575574, + "grad_norm": 42.52566146850586, + "learning_rate": 3.216e-06, + "loss": 19.6953, + "step": 16080 + }, + { + "epoch": 0.032502817988259394, + "grad_norm": 573.6635131835938, + "learning_rate": 3.218e-06, + "loss": 27.0275, + "step": 16090 + }, + { + "epoch": 0.03252301862094321, + "grad_norm": 370.3996276855469, + "learning_rate": 3.2200000000000005e-06, + "loss": 17.8417, + "step": 16100 + }, + { + "epoch": 0.03254321925362702, + "grad_norm": 424.79132080078125, + "learning_rate": 3.2220000000000002e-06, + "loss": 32.9271, + "step": 16110 + }, + { + "epoch": 0.03256341988631084, + "grad_norm": 591.5966796875, + "learning_rate": 3.2240000000000004e-06, + "loss": 52.1652, + "step": 16120 + }, + { + "epoch": 0.03258362051899465, + "grad_norm": 472.8325500488281, + "learning_rate": 3.226e-06, + "loss": 21.4173, + "step": 16130 + }, + { + "epoch": 0.03260382115167847, + "grad_norm": 1556.498046875, + "learning_rate": 3.228e-06, + "loss": 25.8709, + "step": 16140 + }, + { + "epoch": 0.032624021784362287, + "grad_norm": 441.6474914550781, + "learning_rate": 3.2300000000000004e-06, + "loss": 34.7153, + "step": 16150 + }, + { + "epoch": 0.0326442224170461, + "grad_norm": 221.67434692382812, + "learning_rate": 3.232e-06, + "loss": 23.3257, + "step": 16160 + }, + { + "epoch": 0.03266442304972992, + "grad_norm": 0.0, + "learning_rate": 3.2340000000000003e-06, + "loss": 22.1946, + "step": 16170 + }, + { + "epoch": 0.03268462368241373, + "grad_norm": 361.2557373046875, + "learning_rate": 3.2360000000000004e-06, + "loss": 22.125, + "step": 16180 + }, + { + "epoch": 0.032704824315097546, + "grad_norm": 480.7691650390625, + "learning_rate": 3.238e-06, + "loss": 14.4723, + "step": 16190 + }, + { + "epoch": 0.032725024947781366, + "grad_norm": 1278.60009765625, + "learning_rate": 3.2400000000000003e-06, + "loss": 30.1542, + "step": 16200 + }, + { + "epoch": 0.03274522558046518, + "grad_norm": 427.308837890625, + "learning_rate": 3.242e-06, + "loss": 28.6297, + "step": 16210 + }, + { + "epoch": 0.032765426213149, + "grad_norm": 690.6139526367188, + "learning_rate": 3.2440000000000006e-06, + "loss": 23.8278, + "step": 16220 + }, + { + "epoch": 0.03278562684583281, + "grad_norm": 759.2803955078125, + "learning_rate": 3.2460000000000003e-06, + "loss": 37.0073, + "step": 16230 + }, + { + "epoch": 0.032805827478516625, + "grad_norm": 487.7970275878906, + "learning_rate": 3.248e-06, + "loss": 21.3471, + "step": 16240 + }, + { + "epoch": 0.032826028111200445, + "grad_norm": 638.1491088867188, + "learning_rate": 3.2500000000000002e-06, + "loss": 22.9298, + "step": 16250 + }, + { + "epoch": 0.03284622874388426, + "grad_norm": 230.1177215576172, + "learning_rate": 3.252e-06, + "loss": 30.0071, + "step": 16260 + }, + { + "epoch": 0.03286642937656807, + "grad_norm": 596.5241088867188, + "learning_rate": 3.2540000000000005e-06, + "loss": 17.0724, + "step": 16270 + }, + { + "epoch": 0.03288663000925189, + "grad_norm": 181.33509826660156, + "learning_rate": 3.2560000000000003e-06, + "loss": 23.5193, + "step": 16280 + }, + { + "epoch": 0.032906830641935704, + "grad_norm": 404.99420166015625, + "learning_rate": 3.258e-06, + "loss": 22.4026, + "step": 16290 + }, + { + "epoch": 0.032927031274619524, + "grad_norm": 352.00689697265625, + "learning_rate": 3.2600000000000006e-06, + "loss": 18.9548, + "step": 16300 + }, + { + "epoch": 0.03294723190730334, + "grad_norm": 602.6947021484375, + "learning_rate": 3.2620000000000003e-06, + "loss": 26.5133, + "step": 16310 + }, + { + "epoch": 0.03296743253998715, + "grad_norm": 1009.1510009765625, + "learning_rate": 3.2640000000000004e-06, + "loss": 23.3099, + "step": 16320 + }, + { + "epoch": 0.03298763317267097, + "grad_norm": 650.4998168945312, + "learning_rate": 3.266e-06, + "loss": 23.7321, + "step": 16330 + }, + { + "epoch": 0.03300783380535478, + "grad_norm": 463.7991943359375, + "learning_rate": 3.268e-06, + "loss": 19.2079, + "step": 16340 + }, + { + "epoch": 0.033028034438038596, + "grad_norm": 732.532958984375, + "learning_rate": 3.2700000000000005e-06, + "loss": 36.9818, + "step": 16350 + }, + { + "epoch": 0.033048235070722416, + "grad_norm": 540.9727783203125, + "learning_rate": 3.272e-06, + "loss": 19.345, + "step": 16360 + }, + { + "epoch": 0.03306843570340623, + "grad_norm": 566.6883544921875, + "learning_rate": 3.2740000000000003e-06, + "loss": 26.3954, + "step": 16370 + }, + { + "epoch": 0.03308863633609005, + "grad_norm": 441.031982421875, + "learning_rate": 3.2760000000000005e-06, + "loss": 25.699, + "step": 16380 + }, + { + "epoch": 0.03310883696877386, + "grad_norm": 437.2505187988281, + "learning_rate": 3.278e-06, + "loss": 22.3838, + "step": 16390 + }, + { + "epoch": 0.033129037601457675, + "grad_norm": 839.2191162109375, + "learning_rate": 3.2800000000000004e-06, + "loss": 52.5256, + "step": 16400 + }, + { + "epoch": 0.033149238234141495, + "grad_norm": 385.2565612792969, + "learning_rate": 3.282e-06, + "loss": 32.6833, + "step": 16410 + }, + { + "epoch": 0.03316943886682531, + "grad_norm": 336.7561340332031, + "learning_rate": 3.2840000000000007e-06, + "loss": 26.1039, + "step": 16420 + }, + { + "epoch": 0.03318963949950912, + "grad_norm": 154.6703338623047, + "learning_rate": 3.2860000000000004e-06, + "loss": 19.3196, + "step": 16430 + }, + { + "epoch": 0.03320984013219294, + "grad_norm": 307.9013671875, + "learning_rate": 3.288e-06, + "loss": 21.57, + "step": 16440 + }, + { + "epoch": 0.033230040764876755, + "grad_norm": 304.7125549316406, + "learning_rate": 3.2900000000000003e-06, + "loss": 26.7927, + "step": 16450 + }, + { + "epoch": 0.033250241397560575, + "grad_norm": 709.2648315429688, + "learning_rate": 3.292e-06, + "loss": 23.7141, + "step": 16460 + }, + { + "epoch": 0.03327044203024439, + "grad_norm": 161.2997589111328, + "learning_rate": 3.2940000000000006e-06, + "loss": 17.6274, + "step": 16470 + }, + { + "epoch": 0.0332906426629282, + "grad_norm": 234.00225830078125, + "learning_rate": 3.2960000000000003e-06, + "loss": 24.9305, + "step": 16480 + }, + { + "epoch": 0.03331084329561202, + "grad_norm": 266.7884826660156, + "learning_rate": 3.298e-06, + "loss": 17.2077, + "step": 16490 + }, + { + "epoch": 0.033331043928295834, + "grad_norm": 459.37664794921875, + "learning_rate": 3.3000000000000006e-06, + "loss": 31.7385, + "step": 16500 + }, + { + "epoch": 0.03335124456097965, + "grad_norm": 394.70391845703125, + "learning_rate": 3.3020000000000003e-06, + "loss": 36.4949, + "step": 16510 + }, + { + "epoch": 0.03337144519366347, + "grad_norm": 261.3290100097656, + "learning_rate": 3.3040000000000005e-06, + "loss": 16.4725, + "step": 16520 + }, + { + "epoch": 0.03339164582634728, + "grad_norm": 590.04931640625, + "learning_rate": 3.306e-06, + "loss": 25.0828, + "step": 16530 + }, + { + "epoch": 0.0334118464590311, + "grad_norm": 1402.6011962890625, + "learning_rate": 3.308e-06, + "loss": 54.7112, + "step": 16540 + }, + { + "epoch": 0.03343204709171491, + "grad_norm": 662.274658203125, + "learning_rate": 3.3100000000000005e-06, + "loss": 36.125, + "step": 16550 + }, + { + "epoch": 0.033452247724398726, + "grad_norm": 477.1455383300781, + "learning_rate": 3.3120000000000002e-06, + "loss": 39.5076, + "step": 16560 + }, + { + "epoch": 0.033472448357082546, + "grad_norm": 240.7284698486328, + "learning_rate": 3.314e-06, + "loss": 35.2164, + "step": 16570 + }, + { + "epoch": 0.03349264898976636, + "grad_norm": 531.02685546875, + "learning_rate": 3.3160000000000005e-06, + "loss": 29.3164, + "step": 16580 + }, + { + "epoch": 0.03351284962245017, + "grad_norm": 568.7428588867188, + "learning_rate": 3.3180000000000003e-06, + "loss": 24.5444, + "step": 16590 + }, + { + "epoch": 0.03353305025513399, + "grad_norm": 236.66079711914062, + "learning_rate": 3.3200000000000004e-06, + "loss": 18.6516, + "step": 16600 + }, + { + "epoch": 0.033553250887817805, + "grad_norm": 331.0774230957031, + "learning_rate": 3.322e-06, + "loss": 16.5033, + "step": 16610 + }, + { + "epoch": 0.033573451520501625, + "grad_norm": 357.0018310546875, + "learning_rate": 3.324e-06, + "loss": 26.9735, + "step": 16620 + }, + { + "epoch": 0.03359365215318544, + "grad_norm": 430.9911193847656, + "learning_rate": 3.3260000000000004e-06, + "loss": 28.2231, + "step": 16630 + }, + { + "epoch": 0.03361385278586925, + "grad_norm": 956.1023559570312, + "learning_rate": 3.328e-06, + "loss": 45.2886, + "step": 16640 + }, + { + "epoch": 0.03363405341855307, + "grad_norm": 546.6738891601562, + "learning_rate": 3.3300000000000003e-06, + "loss": 30.9994, + "step": 16650 + }, + { + "epoch": 0.033654254051236884, + "grad_norm": 175.43746948242188, + "learning_rate": 3.332e-06, + "loss": 20.7395, + "step": 16660 + }, + { + "epoch": 0.0336744546839207, + "grad_norm": 673.0543823242188, + "learning_rate": 3.334e-06, + "loss": 29.1951, + "step": 16670 + }, + { + "epoch": 0.03369465531660452, + "grad_norm": 93.5210189819336, + "learning_rate": 3.3360000000000003e-06, + "loss": 19.8843, + "step": 16680 + }, + { + "epoch": 0.03371485594928833, + "grad_norm": 507.43133544921875, + "learning_rate": 3.338e-06, + "loss": 26.0732, + "step": 16690 + }, + { + "epoch": 0.03373505658197215, + "grad_norm": 732.0919189453125, + "learning_rate": 3.3400000000000006e-06, + "loss": 39.2512, + "step": 16700 + }, + { + "epoch": 0.03375525721465596, + "grad_norm": 207.01541137695312, + "learning_rate": 3.3420000000000004e-06, + "loss": 25.9437, + "step": 16710 + }, + { + "epoch": 0.033775457847339777, + "grad_norm": 430.7888488769531, + "learning_rate": 3.344e-06, + "loss": 26.1944, + "step": 16720 + }, + { + "epoch": 0.033795658480023597, + "grad_norm": 216.97750854492188, + "learning_rate": 3.3460000000000002e-06, + "loss": 17.1715, + "step": 16730 + }, + { + "epoch": 0.03381585911270741, + "grad_norm": 896.1383056640625, + "learning_rate": 3.348e-06, + "loss": 27.9904, + "step": 16740 + }, + { + "epoch": 0.03383605974539122, + "grad_norm": 424.6932678222656, + "learning_rate": 3.3500000000000005e-06, + "loss": 19.8765, + "step": 16750 + }, + { + "epoch": 0.03385626037807504, + "grad_norm": 296.5496520996094, + "learning_rate": 3.3520000000000003e-06, + "loss": 14.7035, + "step": 16760 + }, + { + "epoch": 0.033876461010758856, + "grad_norm": 694.663330078125, + "learning_rate": 3.354e-06, + "loss": 39.6883, + "step": 16770 + }, + { + "epoch": 0.033896661643442676, + "grad_norm": 1065.7681884765625, + "learning_rate": 3.3560000000000006e-06, + "loss": 32.8346, + "step": 16780 + }, + { + "epoch": 0.03391686227612649, + "grad_norm": 620.96826171875, + "learning_rate": 3.3580000000000003e-06, + "loss": 24.6682, + "step": 16790 + }, + { + "epoch": 0.0339370629088103, + "grad_norm": 1143.7791748046875, + "learning_rate": 3.3600000000000004e-06, + "loss": 12.908, + "step": 16800 + }, + { + "epoch": 0.03395726354149412, + "grad_norm": 414.8392333984375, + "learning_rate": 3.362e-06, + "loss": 36.5095, + "step": 16810 + }, + { + "epoch": 0.033977464174177935, + "grad_norm": 129.9327850341797, + "learning_rate": 3.364e-06, + "loss": 25.4761, + "step": 16820 + }, + { + "epoch": 0.03399766480686175, + "grad_norm": 928.8633422851562, + "learning_rate": 3.3660000000000005e-06, + "loss": 29.5998, + "step": 16830 + }, + { + "epoch": 0.03401786543954557, + "grad_norm": 347.5965576171875, + "learning_rate": 3.368e-06, + "loss": 16.697, + "step": 16840 + }, + { + "epoch": 0.03403806607222938, + "grad_norm": 429.3285827636719, + "learning_rate": 3.3700000000000003e-06, + "loss": 28.8614, + "step": 16850 + }, + { + "epoch": 0.0340582667049132, + "grad_norm": 388.8118591308594, + "learning_rate": 3.372e-06, + "loss": 20.4107, + "step": 16860 + }, + { + "epoch": 0.034078467337597014, + "grad_norm": 855.3677368164062, + "learning_rate": 3.3740000000000002e-06, + "loss": 32.0758, + "step": 16870 + }, + { + "epoch": 0.03409866797028083, + "grad_norm": 424.2457580566406, + "learning_rate": 3.3760000000000004e-06, + "loss": 24.5486, + "step": 16880 + }, + { + "epoch": 0.03411886860296465, + "grad_norm": 243.54652404785156, + "learning_rate": 3.378e-06, + "loss": 11.4537, + "step": 16890 + }, + { + "epoch": 0.03413906923564846, + "grad_norm": 582.7598266601562, + "learning_rate": 3.3800000000000007e-06, + "loss": 23.3055, + "step": 16900 + }, + { + "epoch": 0.03415926986833227, + "grad_norm": 443.8633117675781, + "learning_rate": 3.3820000000000004e-06, + "loss": 36.2278, + "step": 16910 + }, + { + "epoch": 0.03417947050101609, + "grad_norm": 539.4478759765625, + "learning_rate": 3.384e-06, + "loss": 26.0477, + "step": 16920 + }, + { + "epoch": 0.034199671133699906, + "grad_norm": 674.6696166992188, + "learning_rate": 3.3860000000000003e-06, + "loss": 26.3475, + "step": 16930 + }, + { + "epoch": 0.034219871766383726, + "grad_norm": 360.42352294921875, + "learning_rate": 3.388e-06, + "loss": 22.5731, + "step": 16940 + }, + { + "epoch": 0.03424007239906754, + "grad_norm": 218.48251342773438, + "learning_rate": 3.3900000000000006e-06, + "loss": 22.8484, + "step": 16950 + }, + { + "epoch": 0.03426027303175135, + "grad_norm": 756.140380859375, + "learning_rate": 3.3920000000000003e-06, + "loss": 38.419, + "step": 16960 + }, + { + "epoch": 0.03428047366443517, + "grad_norm": 429.0950012207031, + "learning_rate": 3.394e-06, + "loss": 55.9932, + "step": 16970 + }, + { + "epoch": 0.034300674297118985, + "grad_norm": 429.5851135253906, + "learning_rate": 3.3960000000000006e-06, + "loss": 10.647, + "step": 16980 + }, + { + "epoch": 0.0343208749298028, + "grad_norm": 314.9561462402344, + "learning_rate": 3.3980000000000003e-06, + "loss": 25.6604, + "step": 16990 + }, + { + "epoch": 0.03434107556248662, + "grad_norm": 92.45370483398438, + "learning_rate": 3.4000000000000005e-06, + "loss": 13.6651, + "step": 17000 + }, + { + "epoch": 0.03436127619517043, + "grad_norm": 1021.7738647460938, + "learning_rate": 3.402e-06, + "loss": 53.2146, + "step": 17010 + }, + { + "epoch": 0.03438147682785425, + "grad_norm": 304.0646057128906, + "learning_rate": 3.404e-06, + "loss": 27.0317, + "step": 17020 + }, + { + "epoch": 0.034401677460538065, + "grad_norm": 615.5056762695312, + "learning_rate": 3.4060000000000005e-06, + "loss": 21.2823, + "step": 17030 + }, + { + "epoch": 0.03442187809322188, + "grad_norm": 178.4603729248047, + "learning_rate": 3.4080000000000002e-06, + "loss": 38.0386, + "step": 17040 + }, + { + "epoch": 0.0344420787259057, + "grad_norm": 868.1829223632812, + "learning_rate": 3.4100000000000004e-06, + "loss": 31.8253, + "step": 17050 + }, + { + "epoch": 0.03446227935858951, + "grad_norm": 365.075927734375, + "learning_rate": 3.412e-06, + "loss": 26.7497, + "step": 17060 + }, + { + "epoch": 0.034482479991273324, + "grad_norm": 397.6850891113281, + "learning_rate": 3.4140000000000003e-06, + "loss": 20.5952, + "step": 17070 + }, + { + "epoch": 0.034502680623957144, + "grad_norm": 1088.1448974609375, + "learning_rate": 3.4160000000000004e-06, + "loss": 29.8785, + "step": 17080 + }, + { + "epoch": 0.03452288125664096, + "grad_norm": 1024.5538330078125, + "learning_rate": 3.418e-06, + "loss": 42.1487, + "step": 17090 + }, + { + "epoch": 0.03454308188932478, + "grad_norm": 811.8573608398438, + "learning_rate": 3.4200000000000007e-06, + "loss": 35.5794, + "step": 17100 + }, + { + "epoch": 0.03456328252200859, + "grad_norm": 488.4293212890625, + "learning_rate": 3.4220000000000004e-06, + "loss": 39.9756, + "step": 17110 + }, + { + "epoch": 0.0345834831546924, + "grad_norm": 706.856201171875, + "learning_rate": 3.424e-06, + "loss": 16.4257, + "step": 17120 + }, + { + "epoch": 0.03460368378737622, + "grad_norm": 500.7957458496094, + "learning_rate": 3.4260000000000003e-06, + "loss": 21.9832, + "step": 17130 + }, + { + "epoch": 0.034623884420060036, + "grad_norm": 313.5267028808594, + "learning_rate": 3.428e-06, + "loss": 39.0117, + "step": 17140 + }, + { + "epoch": 0.03464408505274385, + "grad_norm": 459.3201599121094, + "learning_rate": 3.4300000000000006e-06, + "loss": 26.1945, + "step": 17150 + }, + { + "epoch": 0.03466428568542767, + "grad_norm": 694.1936645507812, + "learning_rate": 3.4320000000000003e-06, + "loss": 24.7299, + "step": 17160 + }, + { + "epoch": 0.03468448631811148, + "grad_norm": 0.0, + "learning_rate": 3.434e-06, + "loss": 21.6834, + "step": 17170 + }, + { + "epoch": 0.0347046869507953, + "grad_norm": 404.22015380859375, + "learning_rate": 3.4360000000000006e-06, + "loss": 22.3646, + "step": 17180 + }, + { + "epoch": 0.034724887583479115, + "grad_norm": 912.0065307617188, + "learning_rate": 3.4380000000000004e-06, + "loss": 53.3527, + "step": 17190 + }, + { + "epoch": 0.03474508821616293, + "grad_norm": 502.2011413574219, + "learning_rate": 3.44e-06, + "loss": 53.6234, + "step": 17200 + }, + { + "epoch": 0.03476528884884675, + "grad_norm": 2277.644287109375, + "learning_rate": 3.4420000000000002e-06, + "loss": 46.4125, + "step": 17210 + }, + { + "epoch": 0.03478548948153056, + "grad_norm": 428.8004455566406, + "learning_rate": 3.444e-06, + "loss": 22.3706, + "step": 17220 + }, + { + "epoch": 0.034805690114214374, + "grad_norm": 797.9168090820312, + "learning_rate": 3.4460000000000005e-06, + "loss": 21.4223, + "step": 17230 + }, + { + "epoch": 0.034825890746898194, + "grad_norm": 166.49786376953125, + "learning_rate": 3.4480000000000003e-06, + "loss": 19.3625, + "step": 17240 + }, + { + "epoch": 0.03484609137958201, + "grad_norm": 639.303466796875, + "learning_rate": 3.45e-06, + "loss": 30.4237, + "step": 17250 + }, + { + "epoch": 0.03486629201226583, + "grad_norm": 1690.0789794921875, + "learning_rate": 3.452e-06, + "loss": 31.5046, + "step": 17260 + }, + { + "epoch": 0.03488649264494964, + "grad_norm": 234.71603393554688, + "learning_rate": 3.4540000000000003e-06, + "loss": 32.2156, + "step": 17270 + }, + { + "epoch": 0.034906693277633453, + "grad_norm": 600.4459838867188, + "learning_rate": 3.4560000000000005e-06, + "loss": 34.8896, + "step": 17280 + }, + { + "epoch": 0.034926893910317273, + "grad_norm": 702.6697998046875, + "learning_rate": 3.458e-06, + "loss": 23.0082, + "step": 17290 + }, + { + "epoch": 0.03494709454300109, + "grad_norm": 317.1498107910156, + "learning_rate": 3.46e-06, + "loss": 19.3729, + "step": 17300 + }, + { + "epoch": 0.0349672951756849, + "grad_norm": 749.3865966796875, + "learning_rate": 3.4620000000000005e-06, + "loss": 29.1623, + "step": 17310 + }, + { + "epoch": 0.03498749580836872, + "grad_norm": 401.767822265625, + "learning_rate": 3.464e-06, + "loss": 20.5925, + "step": 17320 + }, + { + "epoch": 0.03500769644105253, + "grad_norm": 470.0053405761719, + "learning_rate": 3.4660000000000004e-06, + "loss": 32.5431, + "step": 17330 + }, + { + "epoch": 0.03502789707373635, + "grad_norm": 497.3592224121094, + "learning_rate": 3.468e-06, + "loss": 16.0818, + "step": 17340 + }, + { + "epoch": 0.035048097706420166, + "grad_norm": 660.4264526367188, + "learning_rate": 3.4700000000000002e-06, + "loss": 18.0981, + "step": 17350 + }, + { + "epoch": 0.03506829833910398, + "grad_norm": 625.1888427734375, + "learning_rate": 3.4720000000000004e-06, + "loss": 30.1002, + "step": 17360 + }, + { + "epoch": 0.0350884989717878, + "grad_norm": 1246.976318359375, + "learning_rate": 3.474e-06, + "loss": 32.2393, + "step": 17370 + }, + { + "epoch": 0.03510869960447161, + "grad_norm": 108.3302993774414, + "learning_rate": 3.4760000000000007e-06, + "loss": 8.8377, + "step": 17380 + }, + { + "epoch": 0.035128900237155425, + "grad_norm": 718.4808959960938, + "learning_rate": 3.4780000000000004e-06, + "loss": 36.9861, + "step": 17390 + }, + { + "epoch": 0.035149100869839245, + "grad_norm": 285.5974426269531, + "learning_rate": 3.48e-06, + "loss": 34.1444, + "step": 17400 + }, + { + "epoch": 0.03516930150252306, + "grad_norm": 683.3985595703125, + "learning_rate": 3.4820000000000003e-06, + "loss": 17.0873, + "step": 17410 + }, + { + "epoch": 0.03518950213520688, + "grad_norm": 893.44677734375, + "learning_rate": 3.484e-06, + "loss": 39.6395, + "step": 17420 + }, + { + "epoch": 0.03520970276789069, + "grad_norm": 682.7764892578125, + "learning_rate": 3.4860000000000006e-06, + "loss": 75.2991, + "step": 17430 + }, + { + "epoch": 0.035229903400574504, + "grad_norm": 647.171142578125, + "learning_rate": 3.4880000000000003e-06, + "loss": 15.9127, + "step": 17440 + }, + { + "epoch": 0.035250104033258324, + "grad_norm": 618.6836547851562, + "learning_rate": 3.49e-06, + "loss": 26.5643, + "step": 17450 + }, + { + "epoch": 0.03527030466594214, + "grad_norm": 348.82342529296875, + "learning_rate": 3.492e-06, + "loss": 32.2348, + "step": 17460 + }, + { + "epoch": 0.03529050529862595, + "grad_norm": 591.755615234375, + "learning_rate": 3.4940000000000003e-06, + "loss": 38.7832, + "step": 17470 + }, + { + "epoch": 0.03531070593130977, + "grad_norm": 855.585693359375, + "learning_rate": 3.4960000000000005e-06, + "loss": 37.9762, + "step": 17480 + }, + { + "epoch": 0.03533090656399358, + "grad_norm": 319.1050109863281, + "learning_rate": 3.4980000000000002e-06, + "loss": 43.616, + "step": 17490 + }, + { + "epoch": 0.0353511071966774, + "grad_norm": 951.2423706054688, + "learning_rate": 3.5e-06, + "loss": 19.7842, + "step": 17500 + }, + { + "epoch": 0.035371307829361216, + "grad_norm": 605.2108154296875, + "learning_rate": 3.5020000000000005e-06, + "loss": 24.4234, + "step": 17510 + }, + { + "epoch": 0.03539150846204503, + "grad_norm": 81.48918151855469, + "learning_rate": 3.5040000000000002e-06, + "loss": 23.4033, + "step": 17520 + }, + { + "epoch": 0.03541170909472885, + "grad_norm": 352.0289001464844, + "learning_rate": 3.5060000000000004e-06, + "loss": 17.1986, + "step": 17530 + }, + { + "epoch": 0.03543190972741266, + "grad_norm": 220.1028289794922, + "learning_rate": 3.508e-06, + "loss": 28.5471, + "step": 17540 + }, + { + "epoch": 0.035452110360096475, + "grad_norm": 170.3765106201172, + "learning_rate": 3.5100000000000003e-06, + "loss": 29.259, + "step": 17550 + }, + { + "epoch": 0.035472310992780295, + "grad_norm": 262.6505126953125, + "learning_rate": 3.5120000000000004e-06, + "loss": 18.0606, + "step": 17560 + }, + { + "epoch": 0.03549251162546411, + "grad_norm": 424.7445068359375, + "learning_rate": 3.514e-06, + "loss": 26.9656, + "step": 17570 + }, + { + "epoch": 0.03551271225814793, + "grad_norm": 1122.718994140625, + "learning_rate": 3.5160000000000007e-06, + "loss": 29.4671, + "step": 17580 + }, + { + "epoch": 0.03553291289083174, + "grad_norm": 848.5406494140625, + "learning_rate": 3.5180000000000005e-06, + "loss": 28.6708, + "step": 17590 + }, + { + "epoch": 0.035553113523515555, + "grad_norm": 661.636474609375, + "learning_rate": 3.52e-06, + "loss": 22.97, + "step": 17600 + }, + { + "epoch": 0.035573314156199375, + "grad_norm": 2700.53515625, + "learning_rate": 3.5220000000000003e-06, + "loss": 62.9361, + "step": 17610 + }, + { + "epoch": 0.03559351478888319, + "grad_norm": 1014.1585693359375, + "learning_rate": 3.524e-06, + "loss": 32.995, + "step": 17620 + }, + { + "epoch": 0.035613715421567, + "grad_norm": 382.7838134765625, + "learning_rate": 3.5260000000000006e-06, + "loss": 20.5589, + "step": 17630 + }, + { + "epoch": 0.03563391605425082, + "grad_norm": 455.59600830078125, + "learning_rate": 3.5280000000000004e-06, + "loss": 21.9954, + "step": 17640 + }, + { + "epoch": 0.035654116686934634, + "grad_norm": 383.1478576660156, + "learning_rate": 3.53e-06, + "loss": 51.5302, + "step": 17650 + }, + { + "epoch": 0.035674317319618454, + "grad_norm": 348.96173095703125, + "learning_rate": 3.5320000000000002e-06, + "loss": 9.3253, + "step": 17660 + }, + { + "epoch": 0.03569451795230227, + "grad_norm": 456.492431640625, + "learning_rate": 3.5340000000000004e-06, + "loss": 17.8969, + "step": 17670 + }, + { + "epoch": 0.03571471858498608, + "grad_norm": 391.4908447265625, + "learning_rate": 3.5360000000000005e-06, + "loss": 30.9601, + "step": 17680 + }, + { + "epoch": 0.0357349192176699, + "grad_norm": 89.62699127197266, + "learning_rate": 3.5380000000000003e-06, + "loss": 23.1197, + "step": 17690 + }, + { + "epoch": 0.03575511985035371, + "grad_norm": 8.72734260559082, + "learning_rate": 3.54e-06, + "loss": 27.353, + "step": 17700 + }, + { + "epoch": 0.035775320483037526, + "grad_norm": 440.166259765625, + "learning_rate": 3.5420000000000006e-06, + "loss": 28.6035, + "step": 17710 + }, + { + "epoch": 0.035795521115721346, + "grad_norm": 141.1392059326172, + "learning_rate": 3.5440000000000003e-06, + "loss": 23.6285, + "step": 17720 + }, + { + "epoch": 0.03581572174840516, + "grad_norm": 1183.832275390625, + "learning_rate": 3.5460000000000004e-06, + "loss": 36.4731, + "step": 17730 + }, + { + "epoch": 0.03583592238108898, + "grad_norm": 308.72686767578125, + "learning_rate": 3.548e-06, + "loss": 13.224, + "step": 17740 + }, + { + "epoch": 0.03585612301377279, + "grad_norm": 625.1483154296875, + "learning_rate": 3.5500000000000003e-06, + "loss": 28.4961, + "step": 17750 + }, + { + "epoch": 0.035876323646456605, + "grad_norm": 377.944091796875, + "learning_rate": 3.5520000000000005e-06, + "loss": 33.3541, + "step": 17760 + }, + { + "epoch": 0.035896524279140425, + "grad_norm": 453.13470458984375, + "learning_rate": 3.554e-06, + "loss": 42.5362, + "step": 17770 + }, + { + "epoch": 0.03591672491182424, + "grad_norm": 377.8801574707031, + "learning_rate": 3.5560000000000008e-06, + "loss": 23.4628, + "step": 17780 + }, + { + "epoch": 0.03593692554450805, + "grad_norm": 64.29519653320312, + "learning_rate": 3.5580000000000005e-06, + "loss": 13.6451, + "step": 17790 + }, + { + "epoch": 0.03595712617719187, + "grad_norm": 345.0412902832031, + "learning_rate": 3.5600000000000002e-06, + "loss": 44.6675, + "step": 17800 + }, + { + "epoch": 0.035977326809875684, + "grad_norm": 381.2247619628906, + "learning_rate": 3.5620000000000004e-06, + "loss": 25.701, + "step": 17810 + }, + { + "epoch": 0.035997527442559504, + "grad_norm": 1403.4259033203125, + "learning_rate": 3.564e-06, + "loss": 35.4093, + "step": 17820 + }, + { + "epoch": 0.03601772807524332, + "grad_norm": 516.9909057617188, + "learning_rate": 3.566e-06, + "loss": 22.2979, + "step": 17830 + }, + { + "epoch": 0.03603792870792713, + "grad_norm": 453.0965270996094, + "learning_rate": 3.5680000000000004e-06, + "loss": 23.0691, + "step": 17840 + }, + { + "epoch": 0.03605812934061095, + "grad_norm": 1489.0709228515625, + "learning_rate": 3.57e-06, + "loss": 49.8878, + "step": 17850 + }, + { + "epoch": 0.036078329973294763, + "grad_norm": 391.0494384765625, + "learning_rate": 3.5720000000000003e-06, + "loss": 20.696, + "step": 17860 + }, + { + "epoch": 0.03609853060597858, + "grad_norm": 669.98779296875, + "learning_rate": 3.5740000000000004e-06, + "loss": 28.2965, + "step": 17870 + }, + { + "epoch": 0.0361187312386624, + "grad_norm": 420.1402587890625, + "learning_rate": 3.576e-06, + "loss": 23.3762, + "step": 17880 + }, + { + "epoch": 0.03613893187134621, + "grad_norm": 620.1629638671875, + "learning_rate": 3.5780000000000003e-06, + "loss": 18.8507, + "step": 17890 + }, + { + "epoch": 0.03615913250403003, + "grad_norm": 995.0001831054688, + "learning_rate": 3.58e-06, + "loss": 48.6008, + "step": 17900 + }, + { + "epoch": 0.03617933313671384, + "grad_norm": 569.3245239257812, + "learning_rate": 3.5820000000000006e-06, + "loss": 22.7512, + "step": 17910 + }, + { + "epoch": 0.036199533769397656, + "grad_norm": 572.5311279296875, + "learning_rate": 3.5840000000000003e-06, + "loss": 42.9053, + "step": 17920 + }, + { + "epoch": 0.036219734402081476, + "grad_norm": 473.06280517578125, + "learning_rate": 3.586e-06, + "loss": 23.6802, + "step": 17930 + }, + { + "epoch": 0.03623993503476529, + "grad_norm": 416.5789794921875, + "learning_rate": 3.588e-06, + "loss": 37.7727, + "step": 17940 + }, + { + "epoch": 0.0362601356674491, + "grad_norm": 627.5274658203125, + "learning_rate": 3.5900000000000004e-06, + "loss": 17.963, + "step": 17950 + }, + { + "epoch": 0.03628033630013292, + "grad_norm": 332.6874694824219, + "learning_rate": 3.5920000000000005e-06, + "loss": 19.0672, + "step": 17960 + }, + { + "epoch": 0.036300536932816735, + "grad_norm": 3338.54443359375, + "learning_rate": 3.5940000000000002e-06, + "loss": 51.021, + "step": 17970 + }, + { + "epoch": 0.036320737565500555, + "grad_norm": 842.3485107421875, + "learning_rate": 3.596e-06, + "loss": 34.2707, + "step": 17980 + }, + { + "epoch": 0.03634093819818437, + "grad_norm": 299.7825927734375, + "learning_rate": 3.5980000000000005e-06, + "loss": 32.0701, + "step": 17990 + }, + { + "epoch": 0.03636113883086818, + "grad_norm": 951.4468994140625, + "learning_rate": 3.6000000000000003e-06, + "loss": 25.0228, + "step": 18000 + }, + { + "epoch": 0.036381339463552, + "grad_norm": 422.8774108886719, + "learning_rate": 3.6020000000000004e-06, + "loss": 30.886, + "step": 18010 + }, + { + "epoch": 0.036401540096235814, + "grad_norm": 254.622314453125, + "learning_rate": 3.604e-06, + "loss": 25.8877, + "step": 18020 + }, + { + "epoch": 0.03642174072891963, + "grad_norm": 163.25782775878906, + "learning_rate": 3.606e-06, + "loss": 30.17, + "step": 18030 + }, + { + "epoch": 0.03644194136160345, + "grad_norm": 355.1414489746094, + "learning_rate": 3.6080000000000004e-06, + "loss": 19.5321, + "step": 18040 + }, + { + "epoch": 0.03646214199428726, + "grad_norm": 604.4697265625, + "learning_rate": 3.61e-06, + "loss": 13.9627, + "step": 18050 + }, + { + "epoch": 0.03648234262697108, + "grad_norm": 414.0849914550781, + "learning_rate": 3.6120000000000003e-06, + "loss": 9.15, + "step": 18060 + }, + { + "epoch": 0.03650254325965489, + "grad_norm": 669.28369140625, + "learning_rate": 3.6140000000000005e-06, + "loss": 24.235, + "step": 18070 + }, + { + "epoch": 0.036522743892338706, + "grad_norm": 644.69873046875, + "learning_rate": 3.616e-06, + "loss": 29.0484, + "step": 18080 + }, + { + "epoch": 0.036542944525022526, + "grad_norm": 537.6671142578125, + "learning_rate": 3.6180000000000003e-06, + "loss": 21.0134, + "step": 18090 + }, + { + "epoch": 0.03656314515770634, + "grad_norm": 423.848876953125, + "learning_rate": 3.62e-06, + "loss": 20.3401, + "step": 18100 + }, + { + "epoch": 0.03658334579039015, + "grad_norm": 1117.3096923828125, + "learning_rate": 3.6220000000000006e-06, + "loss": 50.8774, + "step": 18110 + }, + { + "epoch": 0.03660354642307397, + "grad_norm": 1382.8843994140625, + "learning_rate": 3.6240000000000004e-06, + "loss": 37.2023, + "step": 18120 + }, + { + "epoch": 0.036623747055757785, + "grad_norm": 284.3473205566406, + "learning_rate": 3.626e-06, + "loss": 32.0322, + "step": 18130 + }, + { + "epoch": 0.036643947688441605, + "grad_norm": 1064.8023681640625, + "learning_rate": 3.6280000000000002e-06, + "loss": 38.1642, + "step": 18140 + }, + { + "epoch": 0.03666414832112542, + "grad_norm": 359.7071533203125, + "learning_rate": 3.6300000000000004e-06, + "loss": 25.7966, + "step": 18150 + }, + { + "epoch": 0.03668434895380923, + "grad_norm": 543.3189086914062, + "learning_rate": 3.6320000000000005e-06, + "loss": 21.5021, + "step": 18160 + }, + { + "epoch": 0.03670454958649305, + "grad_norm": 690.4579467773438, + "learning_rate": 3.6340000000000003e-06, + "loss": 22.0808, + "step": 18170 + }, + { + "epoch": 0.036724750219176865, + "grad_norm": 29.11857032775879, + "learning_rate": 3.636e-06, + "loss": 21.0527, + "step": 18180 + }, + { + "epoch": 0.03674495085186068, + "grad_norm": 218.865234375, + "learning_rate": 3.6380000000000006e-06, + "loss": 44.4114, + "step": 18190 + }, + { + "epoch": 0.0367651514845445, + "grad_norm": 823.153076171875, + "learning_rate": 3.6400000000000003e-06, + "loss": 40.9089, + "step": 18200 + }, + { + "epoch": 0.03678535211722831, + "grad_norm": 815.5130004882812, + "learning_rate": 3.6420000000000005e-06, + "loss": 27.5567, + "step": 18210 + }, + { + "epoch": 0.03680555274991213, + "grad_norm": 357.49871826171875, + "learning_rate": 3.644e-06, + "loss": 12.2424, + "step": 18220 + }, + { + "epoch": 0.036825753382595944, + "grad_norm": 865.4989624023438, + "learning_rate": 3.646e-06, + "loss": 22.4987, + "step": 18230 + }, + { + "epoch": 0.03684595401527976, + "grad_norm": 759.4170532226562, + "learning_rate": 3.6480000000000005e-06, + "loss": 24.3934, + "step": 18240 + }, + { + "epoch": 0.03686615464796358, + "grad_norm": 542.9179077148438, + "learning_rate": 3.65e-06, + "loss": 20.8989, + "step": 18250 + }, + { + "epoch": 0.03688635528064739, + "grad_norm": 210.0708465576172, + "learning_rate": 3.6520000000000004e-06, + "loss": 15.2844, + "step": 18260 + }, + { + "epoch": 0.0369065559133312, + "grad_norm": 147.86729431152344, + "learning_rate": 3.6540000000000005e-06, + "loss": 24.368, + "step": 18270 + }, + { + "epoch": 0.03692675654601502, + "grad_norm": 848.0860595703125, + "learning_rate": 3.6560000000000002e-06, + "loss": 15.2613, + "step": 18280 + }, + { + "epoch": 0.036946957178698836, + "grad_norm": 471.3977355957031, + "learning_rate": 3.6580000000000004e-06, + "loss": 29.735, + "step": 18290 + }, + { + "epoch": 0.03696715781138265, + "grad_norm": 221.9785614013672, + "learning_rate": 3.66e-06, + "loss": 19.94, + "step": 18300 + }, + { + "epoch": 0.03698735844406647, + "grad_norm": 1696.908935546875, + "learning_rate": 3.6620000000000007e-06, + "loss": 60.9916, + "step": 18310 + }, + { + "epoch": 0.03700755907675028, + "grad_norm": 803.4354248046875, + "learning_rate": 3.6640000000000004e-06, + "loss": 36.4354, + "step": 18320 + }, + { + "epoch": 0.0370277597094341, + "grad_norm": 613.2241821289062, + "learning_rate": 3.666e-06, + "loss": 28.5093, + "step": 18330 + }, + { + "epoch": 0.037047960342117915, + "grad_norm": 892.1907958984375, + "learning_rate": 3.6680000000000003e-06, + "loss": 34.7192, + "step": 18340 + }, + { + "epoch": 0.03706816097480173, + "grad_norm": 288.34228515625, + "learning_rate": 3.6700000000000004e-06, + "loss": 17.6466, + "step": 18350 + }, + { + "epoch": 0.03708836160748555, + "grad_norm": 946.3200073242188, + "learning_rate": 3.6720000000000006e-06, + "loss": 31.1397, + "step": 18360 + }, + { + "epoch": 0.03710856224016936, + "grad_norm": 577.26416015625, + "learning_rate": 3.6740000000000003e-06, + "loss": 24.5398, + "step": 18370 + }, + { + "epoch": 0.037128762872853174, + "grad_norm": 775.6914672851562, + "learning_rate": 3.676e-06, + "loss": 35.2331, + "step": 18380 + }, + { + "epoch": 0.037148963505536994, + "grad_norm": 404.9434814453125, + "learning_rate": 3.6780000000000006e-06, + "loss": 32.2677, + "step": 18390 + }, + { + "epoch": 0.03716916413822081, + "grad_norm": 995.0198364257812, + "learning_rate": 3.6800000000000003e-06, + "loss": 17.7147, + "step": 18400 + }, + { + "epoch": 0.03718936477090463, + "grad_norm": 753.1340942382812, + "learning_rate": 3.6820000000000005e-06, + "loss": 17.9428, + "step": 18410 + }, + { + "epoch": 0.03720956540358844, + "grad_norm": 2061.60888671875, + "learning_rate": 3.6840000000000002e-06, + "loss": 48.1814, + "step": 18420 + }, + { + "epoch": 0.037229766036272254, + "grad_norm": 260.50567626953125, + "learning_rate": 3.686e-06, + "loss": 30.1308, + "step": 18430 + }, + { + "epoch": 0.037249966668956074, + "grad_norm": 373.1856384277344, + "learning_rate": 3.6880000000000005e-06, + "loss": 19.3992, + "step": 18440 + }, + { + "epoch": 0.03727016730163989, + "grad_norm": 483.49462890625, + "learning_rate": 3.6900000000000002e-06, + "loss": 18.4943, + "step": 18450 + }, + { + "epoch": 0.0372903679343237, + "grad_norm": 266.2514953613281, + "learning_rate": 3.692e-06, + "loss": 24.6773, + "step": 18460 + }, + { + "epoch": 0.03731056856700752, + "grad_norm": 536.02099609375, + "learning_rate": 3.6940000000000005e-06, + "loss": 21.7613, + "step": 18470 + }, + { + "epoch": 0.03733076919969133, + "grad_norm": 332.672119140625, + "learning_rate": 3.6960000000000003e-06, + "loss": 32.3539, + "step": 18480 + }, + { + "epoch": 0.03735096983237515, + "grad_norm": 308.9702453613281, + "learning_rate": 3.6980000000000004e-06, + "loss": 26.1693, + "step": 18490 + }, + { + "epoch": 0.037371170465058966, + "grad_norm": 611.457763671875, + "learning_rate": 3.7e-06, + "loss": 27.8067, + "step": 18500 + }, + { + "epoch": 0.03739137109774278, + "grad_norm": 357.3343505859375, + "learning_rate": 3.702e-06, + "loss": 16.9885, + "step": 18510 + }, + { + "epoch": 0.0374115717304266, + "grad_norm": 1875.06591796875, + "learning_rate": 3.7040000000000005e-06, + "loss": 27.6352, + "step": 18520 + }, + { + "epoch": 0.03743177236311041, + "grad_norm": 583.4190673828125, + "learning_rate": 3.706e-06, + "loss": 38.8018, + "step": 18530 + }, + { + "epoch": 0.037451972995794225, + "grad_norm": 63.66743469238281, + "learning_rate": 3.7080000000000003e-06, + "loss": 23.2965, + "step": 18540 + }, + { + "epoch": 0.037472173628478045, + "grad_norm": 179.1241912841797, + "learning_rate": 3.7100000000000005e-06, + "loss": 26.9773, + "step": 18550 + }, + { + "epoch": 0.03749237426116186, + "grad_norm": 284.5545654296875, + "learning_rate": 3.712e-06, + "loss": 25.3305, + "step": 18560 + }, + { + "epoch": 0.03751257489384568, + "grad_norm": 436.8062744140625, + "learning_rate": 3.7140000000000004e-06, + "loss": 54.7485, + "step": 18570 + }, + { + "epoch": 0.03753277552652949, + "grad_norm": 298.08514404296875, + "learning_rate": 3.716e-06, + "loss": 26.955, + "step": 18580 + }, + { + "epoch": 0.037552976159213304, + "grad_norm": 176.87818908691406, + "learning_rate": 3.7180000000000007e-06, + "loss": 24.4092, + "step": 18590 + }, + { + "epoch": 0.037573176791897124, + "grad_norm": 442.5617980957031, + "learning_rate": 3.7200000000000004e-06, + "loss": 37.3868, + "step": 18600 + }, + { + "epoch": 0.03759337742458094, + "grad_norm": 348.6301574707031, + "learning_rate": 3.722e-06, + "loss": 17.5085, + "step": 18610 + }, + { + "epoch": 0.03761357805726475, + "grad_norm": 178.97604370117188, + "learning_rate": 3.7240000000000003e-06, + "loss": 27.4701, + "step": 18620 + }, + { + "epoch": 0.03763377868994857, + "grad_norm": 574.3960571289062, + "learning_rate": 3.726e-06, + "loss": 26.0964, + "step": 18630 + }, + { + "epoch": 0.03765397932263238, + "grad_norm": 278.8275146484375, + "learning_rate": 3.7280000000000006e-06, + "loss": 48.7265, + "step": 18640 + }, + { + "epoch": 0.0376741799553162, + "grad_norm": 816.7874755859375, + "learning_rate": 3.7300000000000003e-06, + "loss": 43.5803, + "step": 18650 + }, + { + "epoch": 0.037694380588000016, + "grad_norm": 453.5743713378906, + "learning_rate": 3.732e-06, + "loss": 33.0941, + "step": 18660 + }, + { + "epoch": 0.03771458122068383, + "grad_norm": 672.2986450195312, + "learning_rate": 3.7340000000000006e-06, + "loss": 34.2774, + "step": 18670 + }, + { + "epoch": 0.03773478185336765, + "grad_norm": 443.0143127441406, + "learning_rate": 3.7360000000000003e-06, + "loss": 20.1083, + "step": 18680 + }, + { + "epoch": 0.03775498248605146, + "grad_norm": 176.78070068359375, + "learning_rate": 3.7380000000000005e-06, + "loss": 14.7314, + "step": 18690 + }, + { + "epoch": 0.037775183118735275, + "grad_norm": 384.6247253417969, + "learning_rate": 3.74e-06, + "loss": 10.5879, + "step": 18700 + }, + { + "epoch": 0.037795383751419095, + "grad_norm": 291.50128173828125, + "learning_rate": 3.742e-06, + "loss": 39.9164, + "step": 18710 + }, + { + "epoch": 0.03781558438410291, + "grad_norm": 596.7171630859375, + "learning_rate": 3.7440000000000005e-06, + "loss": 38.8251, + "step": 18720 + }, + { + "epoch": 0.03783578501678673, + "grad_norm": 798.280029296875, + "learning_rate": 3.7460000000000002e-06, + "loss": 18.4048, + "step": 18730 + }, + { + "epoch": 0.03785598564947054, + "grad_norm": 673.656494140625, + "learning_rate": 3.7480000000000004e-06, + "loss": 25.538, + "step": 18740 + }, + { + "epoch": 0.037876186282154355, + "grad_norm": 384.20733642578125, + "learning_rate": 3.7500000000000005e-06, + "loss": 30.774, + "step": 18750 + }, + { + "epoch": 0.037896386914838175, + "grad_norm": 381.9043273925781, + "learning_rate": 3.7520000000000002e-06, + "loss": 34.7811, + "step": 18760 + }, + { + "epoch": 0.03791658754752199, + "grad_norm": 290.19525146484375, + "learning_rate": 3.7540000000000004e-06, + "loss": 17.331, + "step": 18770 + }, + { + "epoch": 0.0379367881802058, + "grad_norm": 447.8214416503906, + "learning_rate": 3.756e-06, + "loss": 22.043, + "step": 18780 + }, + { + "epoch": 0.03795698881288962, + "grad_norm": 715.4262084960938, + "learning_rate": 3.7580000000000007e-06, + "loss": 19.8474, + "step": 18790 + }, + { + "epoch": 0.037977189445573434, + "grad_norm": 296.80535888671875, + "learning_rate": 3.7600000000000004e-06, + "loss": 26.4522, + "step": 18800 + }, + { + "epoch": 0.037997390078257254, + "grad_norm": 809.5372314453125, + "learning_rate": 3.762e-06, + "loss": 27.2477, + "step": 18810 + }, + { + "epoch": 0.03801759071094107, + "grad_norm": 95.88043975830078, + "learning_rate": 3.7640000000000003e-06, + "loss": 28.4378, + "step": 18820 + }, + { + "epoch": 0.03803779134362488, + "grad_norm": 326.20733642578125, + "learning_rate": 3.766e-06, + "loss": 19.4922, + "step": 18830 + }, + { + "epoch": 0.0380579919763087, + "grad_norm": 882.5034790039062, + "learning_rate": 3.7680000000000006e-06, + "loss": 27.9748, + "step": 18840 + }, + { + "epoch": 0.03807819260899251, + "grad_norm": 727.4618530273438, + "learning_rate": 3.7700000000000003e-06, + "loss": 25.738, + "step": 18850 + }, + { + "epoch": 0.038098393241676326, + "grad_norm": 28.941675186157227, + "learning_rate": 3.772e-06, + "loss": 19.6749, + "step": 18860 + }, + { + "epoch": 0.038118593874360146, + "grad_norm": 410.4114990234375, + "learning_rate": 3.7740000000000006e-06, + "loss": 24.7041, + "step": 18870 + }, + { + "epoch": 0.03813879450704396, + "grad_norm": 427.81427001953125, + "learning_rate": 3.7760000000000004e-06, + "loss": 16.8343, + "step": 18880 + }, + { + "epoch": 0.03815899513972778, + "grad_norm": 459.8018798828125, + "learning_rate": 3.7780000000000005e-06, + "loss": 30.7509, + "step": 18890 + }, + { + "epoch": 0.03817919577241159, + "grad_norm": 1102.4735107421875, + "learning_rate": 3.7800000000000002e-06, + "loss": 25.0651, + "step": 18900 + }, + { + "epoch": 0.038199396405095405, + "grad_norm": 731.9132690429688, + "learning_rate": 3.782e-06, + "loss": 31.4007, + "step": 18910 + }, + { + "epoch": 0.038219597037779225, + "grad_norm": 449.3602294921875, + "learning_rate": 3.7840000000000005e-06, + "loss": 26.8892, + "step": 18920 + }, + { + "epoch": 0.03823979767046304, + "grad_norm": 1201.197509765625, + "learning_rate": 3.7860000000000003e-06, + "loss": 30.9846, + "step": 18930 + }, + { + "epoch": 0.03825999830314685, + "grad_norm": 785.8412475585938, + "learning_rate": 3.7880000000000004e-06, + "loss": 18.5037, + "step": 18940 + }, + { + "epoch": 0.03828019893583067, + "grad_norm": 443.92962646484375, + "learning_rate": 3.79e-06, + "loss": 37.715, + "step": 18950 + }, + { + "epoch": 0.038300399568514484, + "grad_norm": 1796.4639892578125, + "learning_rate": 3.7920000000000003e-06, + "loss": 32.3944, + "step": 18960 + }, + { + "epoch": 0.038320600201198304, + "grad_norm": 455.0303955078125, + "learning_rate": 3.7940000000000004e-06, + "loss": 25.4756, + "step": 18970 + }, + { + "epoch": 0.03834080083388212, + "grad_norm": 473.8932800292969, + "learning_rate": 3.796e-06, + "loss": 18.867, + "step": 18980 + }, + { + "epoch": 0.03836100146656593, + "grad_norm": 446.3844299316406, + "learning_rate": 3.7980000000000007e-06, + "loss": 23.4951, + "step": 18990 + }, + { + "epoch": 0.03838120209924975, + "grad_norm": 151.72618103027344, + "learning_rate": 3.8000000000000005e-06, + "loss": 26.9601, + "step": 19000 + }, + { + "epoch": 0.038401402731933564, + "grad_norm": 473.8542785644531, + "learning_rate": 3.802e-06, + "loss": 8.5569, + "step": 19010 + }, + { + "epoch": 0.03842160336461738, + "grad_norm": 406.4317626953125, + "learning_rate": 3.8040000000000003e-06, + "loss": 33.9643, + "step": 19020 + }, + { + "epoch": 0.0384418039973012, + "grad_norm": 921.1906127929688, + "learning_rate": 3.806e-06, + "loss": 41.9266, + "step": 19030 + }, + { + "epoch": 0.03846200462998501, + "grad_norm": 478.7823486328125, + "learning_rate": 3.8080000000000006e-06, + "loss": 27.2125, + "step": 19040 + }, + { + "epoch": 0.03848220526266883, + "grad_norm": 218.63922119140625, + "learning_rate": 3.8100000000000004e-06, + "loss": 24.6184, + "step": 19050 + }, + { + "epoch": 0.03850240589535264, + "grad_norm": 1037.8466796875, + "learning_rate": 3.812e-06, + "loss": 28.6672, + "step": 19060 + }, + { + "epoch": 0.038522606528036456, + "grad_norm": 1018.7424926757812, + "learning_rate": 3.8140000000000007e-06, + "loss": 20.6852, + "step": 19070 + }, + { + "epoch": 0.038542807160720276, + "grad_norm": 256.0588073730469, + "learning_rate": 3.816e-06, + "loss": 13.2979, + "step": 19080 + }, + { + "epoch": 0.03856300779340409, + "grad_norm": 489.6903381347656, + "learning_rate": 3.818e-06, + "loss": 29.6457, + "step": 19090 + }, + { + "epoch": 0.0385832084260879, + "grad_norm": 459.4139709472656, + "learning_rate": 3.820000000000001e-06, + "loss": 29.233, + "step": 19100 + }, + { + "epoch": 0.03860340905877172, + "grad_norm": 996.184326171875, + "learning_rate": 3.822e-06, + "loss": 33.8466, + "step": 19110 + }, + { + "epoch": 0.038623609691455535, + "grad_norm": 690.0992431640625, + "learning_rate": 3.824e-06, + "loss": 19.5614, + "step": 19120 + }, + { + "epoch": 0.038643810324139355, + "grad_norm": 806.515869140625, + "learning_rate": 3.826e-06, + "loss": 19.1219, + "step": 19130 + }, + { + "epoch": 0.03866401095682317, + "grad_norm": 422.1471862792969, + "learning_rate": 3.8280000000000004e-06, + "loss": 63.3176, + "step": 19140 + }, + { + "epoch": 0.03868421158950698, + "grad_norm": 559.8536376953125, + "learning_rate": 3.830000000000001e-06, + "loss": 24.9242, + "step": 19150 + }, + { + "epoch": 0.0387044122221908, + "grad_norm": 107.43516540527344, + "learning_rate": 3.832e-06, + "loss": 21.9817, + "step": 19160 + }, + { + "epoch": 0.038724612854874614, + "grad_norm": 413.239013671875, + "learning_rate": 3.834000000000001e-06, + "loss": 12.4822, + "step": 19170 + }, + { + "epoch": 0.03874481348755843, + "grad_norm": 283.6053161621094, + "learning_rate": 3.836e-06, + "loss": 35.2526, + "step": 19180 + }, + { + "epoch": 0.03876501412024225, + "grad_norm": 451.2077941894531, + "learning_rate": 3.838e-06, + "loss": 14.6541, + "step": 19190 + }, + { + "epoch": 0.03878521475292606, + "grad_norm": 488.7024230957031, + "learning_rate": 3.8400000000000005e-06, + "loss": 32.0479, + "step": 19200 + }, + { + "epoch": 0.03880541538560988, + "grad_norm": 913.99755859375, + "learning_rate": 3.842e-06, + "loss": 24.4827, + "step": 19210 + }, + { + "epoch": 0.03882561601829369, + "grad_norm": 368.6611022949219, + "learning_rate": 3.844000000000001e-06, + "loss": 22.7566, + "step": 19220 + }, + { + "epoch": 0.038845816650977506, + "grad_norm": 582.5567016601562, + "learning_rate": 3.846e-06, + "loss": 18.064, + "step": 19230 + }, + { + "epoch": 0.038866017283661326, + "grad_norm": 546.2643432617188, + "learning_rate": 3.848e-06, + "loss": 30.0649, + "step": 19240 + }, + { + "epoch": 0.03888621791634514, + "grad_norm": 701.2210693359375, + "learning_rate": 3.85e-06, + "loss": 40.8547, + "step": 19250 + }, + { + "epoch": 0.03890641854902895, + "grad_norm": 1697.3839111328125, + "learning_rate": 3.8520000000000006e-06, + "loss": 28.2806, + "step": 19260 + }, + { + "epoch": 0.03892661918171277, + "grad_norm": 800.674560546875, + "learning_rate": 3.854000000000001e-06, + "loss": 25.5319, + "step": 19270 + }, + { + "epoch": 0.038946819814396585, + "grad_norm": 330.03033447265625, + "learning_rate": 3.856e-06, + "loss": 25.4214, + "step": 19280 + }, + { + "epoch": 0.038967020447080405, + "grad_norm": 159.86805725097656, + "learning_rate": 3.858e-06, + "loss": 15.4458, + "step": 19290 + }, + { + "epoch": 0.03898722107976422, + "grad_norm": 540.61474609375, + "learning_rate": 3.86e-06, + "loss": 37.4303, + "step": 19300 + }, + { + "epoch": 0.03900742171244803, + "grad_norm": 577.3270263671875, + "learning_rate": 3.8620000000000005e-06, + "loss": 28.7745, + "step": 19310 + }, + { + "epoch": 0.03902762234513185, + "grad_norm": 161.7196502685547, + "learning_rate": 3.864000000000001e-06, + "loss": 23.6797, + "step": 19320 + }, + { + "epoch": 0.039047822977815665, + "grad_norm": 377.9838562011719, + "learning_rate": 3.866e-06, + "loss": 24.1954, + "step": 19330 + }, + { + "epoch": 0.03906802361049948, + "grad_norm": 531.781005859375, + "learning_rate": 3.868e-06, + "loss": 25.0392, + "step": 19340 + }, + { + "epoch": 0.0390882242431833, + "grad_norm": 175.0643310546875, + "learning_rate": 3.87e-06, + "loss": 26.4424, + "step": 19350 + }, + { + "epoch": 0.03910842487586711, + "grad_norm": 490.6230163574219, + "learning_rate": 3.872e-06, + "loss": 30.3526, + "step": 19360 + }, + { + "epoch": 0.03912862550855093, + "grad_norm": 653.54736328125, + "learning_rate": 3.8740000000000005e-06, + "loss": 36.0134, + "step": 19370 + }, + { + "epoch": 0.039148826141234744, + "grad_norm": 785.630615234375, + "learning_rate": 3.876000000000001e-06, + "loss": 29.7962, + "step": 19380 + }, + { + "epoch": 0.03916902677391856, + "grad_norm": 442.81591796875, + "learning_rate": 3.878e-06, + "loss": 29.3342, + "step": 19390 + }, + { + "epoch": 0.03918922740660238, + "grad_norm": 284.63079833984375, + "learning_rate": 3.88e-06, + "loss": 33.5628, + "step": 19400 + }, + { + "epoch": 0.03920942803928619, + "grad_norm": 1588.289306640625, + "learning_rate": 3.882e-06, + "loss": 41.3988, + "step": 19410 + }, + { + "epoch": 0.03922962867197, + "grad_norm": 248.40194702148438, + "learning_rate": 3.884e-06, + "loss": 31.8697, + "step": 19420 + }, + { + "epoch": 0.03924982930465382, + "grad_norm": 332.9631042480469, + "learning_rate": 3.8860000000000006e-06, + "loss": 21.1231, + "step": 19430 + }, + { + "epoch": 0.039270029937337636, + "grad_norm": 978.757080078125, + "learning_rate": 3.888e-06, + "loss": 39.1596, + "step": 19440 + }, + { + "epoch": 0.039290230570021456, + "grad_norm": 407.6144104003906, + "learning_rate": 3.89e-06, + "loss": 12.5934, + "step": 19450 + }, + { + "epoch": 0.03931043120270527, + "grad_norm": 265.3919372558594, + "learning_rate": 3.892e-06, + "loss": 21.8525, + "step": 19460 + }, + { + "epoch": 0.03933063183538908, + "grad_norm": 401.0389709472656, + "learning_rate": 3.894e-06, + "loss": 28.4071, + "step": 19470 + }, + { + "epoch": 0.0393508324680729, + "grad_norm": 441.74005126953125, + "learning_rate": 3.8960000000000005e-06, + "loss": 18.6537, + "step": 19480 + }, + { + "epoch": 0.039371033100756715, + "grad_norm": 791.5307006835938, + "learning_rate": 3.898e-06, + "loss": 21.6818, + "step": 19490 + }, + { + "epoch": 0.03939123373344053, + "grad_norm": 941.1528930664062, + "learning_rate": 3.900000000000001e-06, + "loss": 36.7498, + "step": 19500 + }, + { + "epoch": 0.03941143436612435, + "grad_norm": 551.3410034179688, + "learning_rate": 3.902e-06, + "loss": 28.612, + "step": 19510 + }, + { + "epoch": 0.03943163499880816, + "grad_norm": 1068.077392578125, + "learning_rate": 3.904e-06, + "loss": 30.1779, + "step": 19520 + }, + { + "epoch": 0.03945183563149198, + "grad_norm": 702.95263671875, + "learning_rate": 3.906e-06, + "loss": 12.9496, + "step": 19530 + }, + { + "epoch": 0.039472036264175794, + "grad_norm": 757.014404296875, + "learning_rate": 3.9080000000000005e-06, + "loss": 39.9891, + "step": 19540 + }, + { + "epoch": 0.03949223689685961, + "grad_norm": 745.016845703125, + "learning_rate": 3.910000000000001e-06, + "loss": 25.0764, + "step": 19550 + }, + { + "epoch": 0.03951243752954343, + "grad_norm": 836.84912109375, + "learning_rate": 3.912e-06, + "loss": 11.6613, + "step": 19560 + }, + { + "epoch": 0.03953263816222724, + "grad_norm": 751.01220703125, + "learning_rate": 3.914000000000001e-06, + "loss": 28.168, + "step": 19570 + }, + { + "epoch": 0.039552838794911054, + "grad_norm": 424.0301208496094, + "learning_rate": 3.916e-06, + "loss": 37.3475, + "step": 19580 + }, + { + "epoch": 0.039573039427594874, + "grad_norm": 500.0715026855469, + "learning_rate": 3.9180000000000004e-06, + "loss": 17.2219, + "step": 19590 + }, + { + "epoch": 0.03959324006027869, + "grad_norm": 280.797607421875, + "learning_rate": 3.920000000000001e-06, + "loss": 24.3042, + "step": 19600 + }, + { + "epoch": 0.03961344069296251, + "grad_norm": 421.28741455078125, + "learning_rate": 3.922e-06, + "loss": 51.7029, + "step": 19610 + }, + { + "epoch": 0.03963364132564632, + "grad_norm": 663.9564208984375, + "learning_rate": 3.924000000000001e-06, + "loss": 25.3921, + "step": 19620 + }, + { + "epoch": 0.03965384195833013, + "grad_norm": 618.3223876953125, + "learning_rate": 3.926e-06, + "loss": 22.4242, + "step": 19630 + }, + { + "epoch": 0.03967404259101395, + "grad_norm": 374.6733093261719, + "learning_rate": 3.928e-06, + "loss": 27.1011, + "step": 19640 + }, + { + "epoch": 0.039694243223697766, + "grad_norm": 702.5897827148438, + "learning_rate": 3.9300000000000005e-06, + "loss": 34.1476, + "step": 19650 + }, + { + "epoch": 0.03971444385638158, + "grad_norm": 373.4047546386719, + "learning_rate": 3.932000000000001e-06, + "loss": 18.5248, + "step": 19660 + }, + { + "epoch": 0.0397346444890654, + "grad_norm": 535.1874389648438, + "learning_rate": 3.934000000000001e-06, + "loss": 38.1432, + "step": 19670 + }, + { + "epoch": 0.03975484512174921, + "grad_norm": 1303.091796875, + "learning_rate": 3.936e-06, + "loss": 26.7679, + "step": 19680 + }, + { + "epoch": 0.03977504575443303, + "grad_norm": 1000.7589721679688, + "learning_rate": 3.938e-06, + "loss": 21.4519, + "step": 19690 + }, + { + "epoch": 0.039795246387116845, + "grad_norm": 316.948486328125, + "learning_rate": 3.94e-06, + "loss": 21.0921, + "step": 19700 + }, + { + "epoch": 0.03981544701980066, + "grad_norm": 554.349609375, + "learning_rate": 3.9420000000000005e-06, + "loss": 32.6151, + "step": 19710 + }, + { + "epoch": 0.03983564765248448, + "grad_norm": 569.5929565429688, + "learning_rate": 3.944e-06, + "loss": 34.9574, + "step": 19720 + }, + { + "epoch": 0.03985584828516829, + "grad_norm": 199.5447540283203, + "learning_rate": 3.946e-06, + "loss": 27.4674, + "step": 19730 + }, + { + "epoch": 0.039876048917852104, + "grad_norm": 492.1817626953125, + "learning_rate": 3.948e-06, + "loss": 31.3917, + "step": 19740 + }, + { + "epoch": 0.039896249550535924, + "grad_norm": 1029.6348876953125, + "learning_rate": 3.95e-06, + "loss": 24.2852, + "step": 19750 + }, + { + "epoch": 0.03991645018321974, + "grad_norm": 380.540283203125, + "learning_rate": 3.9520000000000004e-06, + "loss": 16.6415, + "step": 19760 + }, + { + "epoch": 0.03993665081590356, + "grad_norm": 436.20355224609375, + "learning_rate": 3.954e-06, + "loss": 15.9499, + "step": 19770 + }, + { + "epoch": 0.03995685144858737, + "grad_norm": 396.8489990234375, + "learning_rate": 3.956000000000001e-06, + "loss": 19.8399, + "step": 19780 + }, + { + "epoch": 0.03997705208127118, + "grad_norm": 664.4743041992188, + "learning_rate": 3.958e-06, + "loss": 21.8307, + "step": 19790 + }, + { + "epoch": 0.039997252713955, + "grad_norm": 334.7715759277344, + "learning_rate": 3.96e-06, + "loss": 27.2744, + "step": 19800 + }, + { + "epoch": 0.040017453346638816, + "grad_norm": 503.5437927246094, + "learning_rate": 3.962e-06, + "loss": 33.6565, + "step": 19810 + }, + { + "epoch": 0.04003765397932263, + "grad_norm": 355.5002746582031, + "learning_rate": 3.964e-06, + "loss": 43.4189, + "step": 19820 + }, + { + "epoch": 0.04005785461200645, + "grad_norm": 397.135009765625, + "learning_rate": 3.966000000000001e-06, + "loss": 32.1225, + "step": 19830 + }, + { + "epoch": 0.04007805524469026, + "grad_norm": 1114.3900146484375, + "learning_rate": 3.968e-06, + "loss": 27.9584, + "step": 19840 + }, + { + "epoch": 0.04009825587737408, + "grad_norm": 305.44964599609375, + "learning_rate": 3.97e-06, + "loss": 29.9003, + "step": 19850 + }, + { + "epoch": 0.040118456510057895, + "grad_norm": 609.80615234375, + "learning_rate": 3.972e-06, + "loss": 36.946, + "step": 19860 + }, + { + "epoch": 0.04013865714274171, + "grad_norm": 6.036598205566406, + "learning_rate": 3.974e-06, + "loss": 9.1207, + "step": 19870 + }, + { + "epoch": 0.04015885777542553, + "grad_norm": 352.4873962402344, + "learning_rate": 3.9760000000000006e-06, + "loss": 15.7391, + "step": 19880 + }, + { + "epoch": 0.04017905840810934, + "grad_norm": 734.5411987304688, + "learning_rate": 3.978e-06, + "loss": 17.07, + "step": 19890 + }, + { + "epoch": 0.040199259040793155, + "grad_norm": 532.0252685546875, + "learning_rate": 3.980000000000001e-06, + "loss": 31.1751, + "step": 19900 + }, + { + "epoch": 0.040219459673476975, + "grad_norm": 639.2634887695312, + "learning_rate": 3.982e-06, + "loss": 41.4132, + "step": 19910 + }, + { + "epoch": 0.04023966030616079, + "grad_norm": 328.55352783203125, + "learning_rate": 3.984e-06, + "loss": 16.7614, + "step": 19920 + }, + { + "epoch": 0.04025986093884461, + "grad_norm": 298.96435546875, + "learning_rate": 3.9860000000000005e-06, + "loss": 44.6898, + "step": 19930 + }, + { + "epoch": 0.04028006157152842, + "grad_norm": 578.205322265625, + "learning_rate": 3.988000000000001e-06, + "loss": 20.3982, + "step": 19940 + }, + { + "epoch": 0.040300262204212234, + "grad_norm": 367.17327880859375, + "learning_rate": 3.990000000000001e-06, + "loss": 30.3627, + "step": 19950 + }, + { + "epoch": 0.040320462836896054, + "grad_norm": 356.40936279296875, + "learning_rate": 3.992e-06, + "loss": 24.3473, + "step": 19960 + }, + { + "epoch": 0.04034066346957987, + "grad_norm": 2358.637939453125, + "learning_rate": 3.994e-06, + "loss": 26.5293, + "step": 19970 + }, + { + "epoch": 0.04036086410226368, + "grad_norm": 329.34173583984375, + "learning_rate": 3.996e-06, + "loss": 31.2316, + "step": 19980 + }, + { + "epoch": 0.0403810647349475, + "grad_norm": 141.7036590576172, + "learning_rate": 3.9980000000000005e-06, + "loss": 18.3415, + "step": 19990 + }, + { + "epoch": 0.04040126536763131, + "grad_norm": 246.96900939941406, + "learning_rate": 4.000000000000001e-06, + "loss": 21.3769, + "step": 20000 + }, + { + "epoch": 0.04042146600031513, + "grad_norm": 492.83416748046875, + "learning_rate": 4.002e-06, + "loss": 22.297, + "step": 20010 + }, + { + "epoch": 0.040441666632998946, + "grad_norm": 244.3651885986328, + "learning_rate": 4.004e-06, + "loss": 14.4005, + "step": 20020 + }, + { + "epoch": 0.04046186726568276, + "grad_norm": 555.0216674804688, + "learning_rate": 4.006e-06, + "loss": 23.6152, + "step": 20030 + }, + { + "epoch": 0.04048206789836658, + "grad_norm": 803.3456420898438, + "learning_rate": 4.008e-06, + "loss": 30.0964, + "step": 20040 + }, + { + "epoch": 0.04050226853105039, + "grad_norm": 895.5619506835938, + "learning_rate": 4.0100000000000006e-06, + "loss": 28.1923, + "step": 20050 + }, + { + "epoch": 0.040522469163734205, + "grad_norm": 160.98851013183594, + "learning_rate": 4.012000000000001e-06, + "loss": 41.5665, + "step": 20060 + }, + { + "epoch": 0.040542669796418025, + "grad_norm": 487.1036682128906, + "learning_rate": 4.014e-06, + "loss": 29.195, + "step": 20070 + }, + { + "epoch": 0.04056287042910184, + "grad_norm": 658.0955200195312, + "learning_rate": 4.016e-06, + "loss": 22.3123, + "step": 20080 + }, + { + "epoch": 0.04058307106178566, + "grad_norm": 512.8436279296875, + "learning_rate": 4.018e-06, + "loss": 30.6884, + "step": 20090 + }, + { + "epoch": 0.04060327169446947, + "grad_norm": 20.911073684692383, + "learning_rate": 4.0200000000000005e-06, + "loss": 14.7529, + "step": 20100 + }, + { + "epoch": 0.040623472327153284, + "grad_norm": 220.57025146484375, + "learning_rate": 4.022000000000001e-06, + "loss": 21.3989, + "step": 20110 + }, + { + "epoch": 0.040643672959837104, + "grad_norm": 1230.2901611328125, + "learning_rate": 4.024e-06, + "loss": 22.7145, + "step": 20120 + }, + { + "epoch": 0.04066387359252092, + "grad_norm": 375.266357421875, + "learning_rate": 4.026e-06, + "loss": 26.7046, + "step": 20130 + }, + { + "epoch": 0.04068407422520473, + "grad_norm": 627.1782836914062, + "learning_rate": 4.028e-06, + "loss": 30.1456, + "step": 20140 + }, + { + "epoch": 0.04070427485788855, + "grad_norm": 146.41339111328125, + "learning_rate": 4.03e-06, + "loss": 22.8548, + "step": 20150 + }, + { + "epoch": 0.040724475490572364, + "grad_norm": 1083.543701171875, + "learning_rate": 4.0320000000000005e-06, + "loss": 37.469, + "step": 20160 + }, + { + "epoch": 0.040744676123256184, + "grad_norm": 253.15589904785156, + "learning_rate": 4.034e-06, + "loss": 30.0814, + "step": 20170 + }, + { + "epoch": 0.04076487675594, + "grad_norm": 460.3359069824219, + "learning_rate": 4.036000000000001e-06, + "loss": 38.5531, + "step": 20180 + }, + { + "epoch": 0.04078507738862381, + "grad_norm": 923.38525390625, + "learning_rate": 4.038e-06, + "loss": 30.4211, + "step": 20190 + }, + { + "epoch": 0.04080527802130763, + "grad_norm": 426.0430908203125, + "learning_rate": 4.04e-06, + "loss": 24.8516, + "step": 20200 + }, + { + "epoch": 0.04082547865399144, + "grad_norm": 599.9994506835938, + "learning_rate": 4.0420000000000004e-06, + "loss": 24.7395, + "step": 20210 + }, + { + "epoch": 0.040845679286675256, + "grad_norm": 1300.9736328125, + "learning_rate": 4.044e-06, + "loss": 30.9326, + "step": 20220 + }, + { + "epoch": 0.040865879919359076, + "grad_norm": 230.04566955566406, + "learning_rate": 4.046000000000001e-06, + "loss": 13.643, + "step": 20230 + }, + { + "epoch": 0.04088608055204289, + "grad_norm": 585.1691284179688, + "learning_rate": 4.048e-06, + "loss": 29.9727, + "step": 20240 + }, + { + "epoch": 0.04090628118472671, + "grad_norm": 885.5676879882812, + "learning_rate": 4.05e-06, + "loss": 29.2169, + "step": 20250 + }, + { + "epoch": 0.04092648181741052, + "grad_norm": 555.5040893554688, + "learning_rate": 4.052e-06, + "loss": 44.2696, + "step": 20260 + }, + { + "epoch": 0.040946682450094335, + "grad_norm": 752.9579467773438, + "learning_rate": 4.0540000000000005e-06, + "loss": 21.3835, + "step": 20270 + }, + { + "epoch": 0.040966883082778155, + "grad_norm": 697.6809692382812, + "learning_rate": 4.056000000000001e-06, + "loss": 30.0799, + "step": 20280 + }, + { + "epoch": 0.04098708371546197, + "grad_norm": 551.8582763671875, + "learning_rate": 4.058e-06, + "loss": 27.5682, + "step": 20290 + }, + { + "epoch": 0.04100728434814578, + "grad_norm": 641.0758666992188, + "learning_rate": 4.060000000000001e-06, + "loss": 23.9342, + "step": 20300 + }, + { + "epoch": 0.0410274849808296, + "grad_norm": 747.5087280273438, + "learning_rate": 4.062e-06, + "loss": 30.3778, + "step": 20310 + }, + { + "epoch": 0.041047685613513414, + "grad_norm": 1012.8115844726562, + "learning_rate": 4.064e-06, + "loss": 27.7297, + "step": 20320 + }, + { + "epoch": 0.041067886246197234, + "grad_norm": 462.903076171875, + "learning_rate": 4.0660000000000005e-06, + "loss": 26.1605, + "step": 20330 + }, + { + "epoch": 0.04108808687888105, + "grad_norm": 692.3569946289062, + "learning_rate": 4.068000000000001e-06, + "loss": 33.3362, + "step": 20340 + }, + { + "epoch": 0.04110828751156486, + "grad_norm": 91.17338562011719, + "learning_rate": 4.07e-06, + "loss": 32.5818, + "step": 20350 + }, + { + "epoch": 0.04112848814424868, + "grad_norm": 314.2921142578125, + "learning_rate": 4.072e-06, + "loss": 22.8175, + "step": 20360 + }, + { + "epoch": 0.04114868877693249, + "grad_norm": 198.06179809570312, + "learning_rate": 4.074e-06, + "loss": 40.0786, + "step": 20370 + }, + { + "epoch": 0.041168889409616306, + "grad_norm": 1603.201171875, + "learning_rate": 4.0760000000000004e-06, + "loss": 38.0158, + "step": 20380 + }, + { + "epoch": 0.041189090042300126, + "grad_norm": 375.8652038574219, + "learning_rate": 4.078000000000001e-06, + "loss": 19.2669, + "step": 20390 + }, + { + "epoch": 0.04120929067498394, + "grad_norm": 589.5870361328125, + "learning_rate": 4.08e-06, + "loss": 21.8247, + "step": 20400 + }, + { + "epoch": 0.04122949130766776, + "grad_norm": 775.0773315429688, + "learning_rate": 4.082e-06, + "loss": 27.2401, + "step": 20410 + }, + { + "epoch": 0.04124969194035157, + "grad_norm": 568.6592407226562, + "learning_rate": 4.084e-06, + "loss": 21.7321, + "step": 20420 + }, + { + "epoch": 0.041269892573035385, + "grad_norm": 523.823486328125, + "learning_rate": 4.086e-06, + "loss": 34.5144, + "step": 20430 + }, + { + "epoch": 0.041290093205719205, + "grad_norm": 831.2176513671875, + "learning_rate": 4.0880000000000005e-06, + "loss": 32.7675, + "step": 20440 + }, + { + "epoch": 0.04131029383840302, + "grad_norm": 158.96939086914062, + "learning_rate": 4.09e-06, + "loss": 19.2685, + "step": 20450 + }, + { + "epoch": 0.04133049447108683, + "grad_norm": 521.3656005859375, + "learning_rate": 4.092000000000001e-06, + "loss": 17.2066, + "step": 20460 + }, + { + "epoch": 0.04135069510377065, + "grad_norm": 206.03237915039062, + "learning_rate": 4.094e-06, + "loss": 23.385, + "step": 20470 + }, + { + "epoch": 0.041370895736454465, + "grad_norm": 878.8639526367188, + "learning_rate": 4.096e-06, + "loss": 30.1263, + "step": 20480 + }, + { + "epoch": 0.041391096369138285, + "grad_norm": 310.53326416015625, + "learning_rate": 4.098e-06, + "loss": 36.0104, + "step": 20490 + }, + { + "epoch": 0.0414112970018221, + "grad_norm": 447.5044250488281, + "learning_rate": 4.1e-06, + "loss": 46.8563, + "step": 20500 + }, + { + "epoch": 0.04143149763450591, + "grad_norm": 219.1395721435547, + "learning_rate": 4.102000000000001e-06, + "loss": 16.7209, + "step": 20510 + }, + { + "epoch": 0.04145169826718973, + "grad_norm": 788.0994262695312, + "learning_rate": 4.104e-06, + "loss": 25.6534, + "step": 20520 + }, + { + "epoch": 0.041471898899873544, + "grad_norm": 446.28533935546875, + "learning_rate": 4.106e-06, + "loss": 26.2097, + "step": 20530 + }, + { + "epoch": 0.04149209953255736, + "grad_norm": 286.25164794921875, + "learning_rate": 4.108e-06, + "loss": 23.721, + "step": 20540 + }, + { + "epoch": 0.04151230016524118, + "grad_norm": 1015.564208984375, + "learning_rate": 4.1100000000000005e-06, + "loss": 26.9486, + "step": 20550 + }, + { + "epoch": 0.04153250079792499, + "grad_norm": 386.4916076660156, + "learning_rate": 4.112000000000001e-06, + "loss": 37.2864, + "step": 20560 + }, + { + "epoch": 0.04155270143060881, + "grad_norm": 321.2320861816406, + "learning_rate": 4.114e-06, + "loss": 18.8058, + "step": 20570 + }, + { + "epoch": 0.04157290206329262, + "grad_norm": 380.9525451660156, + "learning_rate": 4.116000000000001e-06, + "loss": 17.8627, + "step": 20580 + }, + { + "epoch": 0.041593102695976436, + "grad_norm": 767.234619140625, + "learning_rate": 4.118e-06, + "loss": 31.6556, + "step": 20590 + }, + { + "epoch": 0.041613303328660256, + "grad_norm": 434.169189453125, + "learning_rate": 4.12e-06, + "loss": 25.3457, + "step": 20600 + }, + { + "epoch": 0.04163350396134407, + "grad_norm": 490.3370361328125, + "learning_rate": 4.1220000000000005e-06, + "loss": 17.2889, + "step": 20610 + }, + { + "epoch": 0.04165370459402788, + "grad_norm": 480.8441467285156, + "learning_rate": 4.124e-06, + "loss": 26.1735, + "step": 20620 + }, + { + "epoch": 0.0416739052267117, + "grad_norm": 275.5316162109375, + "learning_rate": 4.126000000000001e-06, + "loss": 27.8963, + "step": 20630 + }, + { + "epoch": 0.041694105859395515, + "grad_norm": 215.60633850097656, + "learning_rate": 4.128e-06, + "loss": 36.1411, + "step": 20640 + }, + { + "epoch": 0.041714306492079335, + "grad_norm": 645.2214965820312, + "learning_rate": 4.13e-06, + "loss": 22.818, + "step": 20650 + }, + { + "epoch": 0.04173450712476315, + "grad_norm": 339.7776184082031, + "learning_rate": 4.132e-06, + "loss": 12.7336, + "step": 20660 + }, + { + "epoch": 0.04175470775744696, + "grad_norm": 540.5206298828125, + "learning_rate": 4.1340000000000006e-06, + "loss": 16.8377, + "step": 20670 + }, + { + "epoch": 0.04177490839013078, + "grad_norm": 492.8855285644531, + "learning_rate": 4.136000000000001e-06, + "loss": 19.1989, + "step": 20680 + }, + { + "epoch": 0.041795109022814594, + "grad_norm": 551.1146850585938, + "learning_rate": 4.138e-06, + "loss": 23.7704, + "step": 20690 + }, + { + "epoch": 0.04181530965549841, + "grad_norm": 675.3240966796875, + "learning_rate": 4.14e-06, + "loss": 26.2867, + "step": 20700 + }, + { + "epoch": 0.04183551028818223, + "grad_norm": 690.68505859375, + "learning_rate": 4.142e-06, + "loss": 18.1141, + "step": 20710 + }, + { + "epoch": 0.04185571092086604, + "grad_norm": 486.8795166015625, + "learning_rate": 4.1440000000000005e-06, + "loss": 20.9204, + "step": 20720 + }, + { + "epoch": 0.04187591155354986, + "grad_norm": 317.1226501464844, + "learning_rate": 4.146000000000001e-06, + "loss": 22.8506, + "step": 20730 + }, + { + "epoch": 0.041896112186233674, + "grad_norm": 1490.643798828125, + "learning_rate": 4.148000000000001e-06, + "loss": 48.6961, + "step": 20740 + }, + { + "epoch": 0.04191631281891749, + "grad_norm": 782.0410766601562, + "learning_rate": 4.15e-06, + "loss": 37.6037, + "step": 20750 + }, + { + "epoch": 0.04193651345160131, + "grad_norm": 582.6185913085938, + "learning_rate": 4.152e-06, + "loss": 41.3482, + "step": 20760 + }, + { + "epoch": 0.04195671408428512, + "grad_norm": 272.0113830566406, + "learning_rate": 4.154e-06, + "loss": 24.9711, + "step": 20770 + }, + { + "epoch": 0.04197691471696893, + "grad_norm": 79.8502197265625, + "learning_rate": 4.1560000000000005e-06, + "loss": 24.9636, + "step": 20780 + }, + { + "epoch": 0.04199711534965275, + "grad_norm": 540.5105590820312, + "learning_rate": 4.158000000000001e-06, + "loss": 32.4017, + "step": 20790 + }, + { + "epoch": 0.042017315982336566, + "grad_norm": 407.08599853515625, + "learning_rate": 4.16e-06, + "loss": 25.5935, + "step": 20800 + }, + { + "epoch": 0.042037516615020386, + "grad_norm": 374.0444641113281, + "learning_rate": 4.162e-06, + "loss": 31.7913, + "step": 20810 + }, + { + "epoch": 0.0420577172477042, + "grad_norm": 336.9476318359375, + "learning_rate": 4.164e-06, + "loss": 17.5922, + "step": 20820 + }, + { + "epoch": 0.04207791788038801, + "grad_norm": 281.6684875488281, + "learning_rate": 4.1660000000000004e-06, + "loss": 36.504, + "step": 20830 + }, + { + "epoch": 0.04209811851307183, + "grad_norm": 1016.922119140625, + "learning_rate": 4.168000000000001e-06, + "loss": 16.7514, + "step": 20840 + }, + { + "epoch": 0.042118319145755645, + "grad_norm": 651.076416015625, + "learning_rate": 4.17e-06, + "loss": 26.3376, + "step": 20850 + }, + { + "epoch": 0.04213851977843946, + "grad_norm": 348.99554443359375, + "learning_rate": 4.172000000000001e-06, + "loss": 23.9043, + "step": 20860 + }, + { + "epoch": 0.04215872041112328, + "grad_norm": 204.27989196777344, + "learning_rate": 4.174e-06, + "loss": 19.5862, + "step": 20870 + }, + { + "epoch": 0.04217892104380709, + "grad_norm": 568.8946533203125, + "learning_rate": 4.176e-06, + "loss": 41.6269, + "step": 20880 + }, + { + "epoch": 0.04219912167649091, + "grad_norm": 515.9381713867188, + "learning_rate": 4.1780000000000005e-06, + "loss": 33.0815, + "step": 20890 + }, + { + "epoch": 0.042219322309174724, + "grad_norm": 437.7154846191406, + "learning_rate": 4.18e-06, + "loss": 23.53, + "step": 20900 + }, + { + "epoch": 0.04223952294185854, + "grad_norm": 603.0464477539062, + "learning_rate": 4.182000000000001e-06, + "loss": 24.8234, + "step": 20910 + }, + { + "epoch": 0.04225972357454236, + "grad_norm": 336.46868896484375, + "learning_rate": 4.184e-06, + "loss": 22.0417, + "step": 20920 + }, + { + "epoch": 0.04227992420722617, + "grad_norm": 297.6182556152344, + "learning_rate": 4.186e-06, + "loss": 27.2698, + "step": 20930 + }, + { + "epoch": 0.04230012483990998, + "grad_norm": 258.5321044921875, + "learning_rate": 4.188e-06, + "loss": 12.9438, + "step": 20940 + }, + { + "epoch": 0.0423203254725938, + "grad_norm": 378.1133117675781, + "learning_rate": 4.1900000000000005e-06, + "loss": 13.9638, + "step": 20950 + }, + { + "epoch": 0.042340526105277616, + "grad_norm": 486.2174072265625, + "learning_rate": 4.192000000000001e-06, + "loss": 25.7613, + "step": 20960 + }, + { + "epoch": 0.042360726737961436, + "grad_norm": 1096.19482421875, + "learning_rate": 4.194e-06, + "loss": 17.6337, + "step": 20970 + }, + { + "epoch": 0.04238092737064525, + "grad_norm": 336.497314453125, + "learning_rate": 4.196e-06, + "loss": 40.8516, + "step": 20980 + }, + { + "epoch": 0.04240112800332906, + "grad_norm": 255.76902770996094, + "learning_rate": 4.198e-06, + "loss": 23.0373, + "step": 20990 + }, + { + "epoch": 0.04242132863601288, + "grad_norm": 757.0595092773438, + "learning_rate": 4.2000000000000004e-06, + "loss": 27.2249, + "step": 21000 + }, + { + "epoch": 0.042441529268696696, + "grad_norm": 809.0731811523438, + "learning_rate": 4.202000000000001e-06, + "loss": 23.6627, + "step": 21010 + }, + { + "epoch": 0.04246172990138051, + "grad_norm": 107.8954086303711, + "learning_rate": 4.204e-06, + "loss": 32.8102, + "step": 21020 + }, + { + "epoch": 0.04248193053406433, + "grad_norm": 815.7806396484375, + "learning_rate": 4.206e-06, + "loss": 27.9359, + "step": 21030 + }, + { + "epoch": 0.04250213116674814, + "grad_norm": 799.5923461914062, + "learning_rate": 4.208e-06, + "loss": 27.1343, + "step": 21040 + }, + { + "epoch": 0.04252233179943196, + "grad_norm": 453.0689697265625, + "learning_rate": 4.21e-06, + "loss": 26.1435, + "step": 21050 + }, + { + "epoch": 0.042542532432115775, + "grad_norm": 172.1396026611328, + "learning_rate": 4.2120000000000005e-06, + "loss": 20.6205, + "step": 21060 + }, + { + "epoch": 0.04256273306479959, + "grad_norm": 497.14508056640625, + "learning_rate": 4.214000000000001e-06, + "loss": 23.1049, + "step": 21070 + }, + { + "epoch": 0.04258293369748341, + "grad_norm": 343.8851318359375, + "learning_rate": 4.216e-06, + "loss": 26.9667, + "step": 21080 + }, + { + "epoch": 0.04260313433016722, + "grad_norm": 127.44200897216797, + "learning_rate": 4.218e-06, + "loss": 21.4246, + "step": 21090 + }, + { + "epoch": 0.042623334962851034, + "grad_norm": 117.90656280517578, + "learning_rate": 4.22e-06, + "loss": 14.2156, + "step": 21100 + }, + { + "epoch": 0.042643535595534854, + "grad_norm": 997.7525024414062, + "learning_rate": 4.222e-06, + "loss": 20.8012, + "step": 21110 + }, + { + "epoch": 0.04266373622821867, + "grad_norm": 814.050537109375, + "learning_rate": 4.2240000000000006e-06, + "loss": 22.8416, + "step": 21120 + }, + { + "epoch": 0.04268393686090249, + "grad_norm": 757.04052734375, + "learning_rate": 4.226e-06, + "loss": 15.7824, + "step": 21130 + }, + { + "epoch": 0.0427041374935863, + "grad_norm": 518.5238647460938, + "learning_rate": 4.228000000000001e-06, + "loss": 22.685, + "step": 21140 + }, + { + "epoch": 0.04272433812627011, + "grad_norm": 719.8343505859375, + "learning_rate": 4.23e-06, + "loss": 25.4759, + "step": 21150 + }, + { + "epoch": 0.04274453875895393, + "grad_norm": 431.3028869628906, + "learning_rate": 4.232e-06, + "loss": 22.5114, + "step": 21160 + }, + { + "epoch": 0.042764739391637746, + "grad_norm": 621.0961303710938, + "learning_rate": 4.2340000000000005e-06, + "loss": 24.1686, + "step": 21170 + }, + { + "epoch": 0.04278494002432156, + "grad_norm": 486.9888000488281, + "learning_rate": 4.236e-06, + "loss": 27.8239, + "step": 21180 + }, + { + "epoch": 0.04280514065700538, + "grad_norm": 252.33909606933594, + "learning_rate": 4.238000000000001e-06, + "loss": 16.7194, + "step": 21190 + }, + { + "epoch": 0.04282534128968919, + "grad_norm": 347.6598205566406, + "learning_rate": 4.24e-06, + "loss": 29.6472, + "step": 21200 + }, + { + "epoch": 0.04284554192237301, + "grad_norm": 56.823944091796875, + "learning_rate": 4.242e-06, + "loss": 24.4186, + "step": 21210 + }, + { + "epoch": 0.042865742555056825, + "grad_norm": 464.31768798828125, + "learning_rate": 4.244e-06, + "loss": 46.5157, + "step": 21220 + }, + { + "epoch": 0.04288594318774064, + "grad_norm": 642.3323364257812, + "learning_rate": 4.2460000000000005e-06, + "loss": 37.0643, + "step": 21230 + }, + { + "epoch": 0.04290614382042446, + "grad_norm": 822.0488891601562, + "learning_rate": 4.248000000000001e-06, + "loss": 30.2411, + "step": 21240 + }, + { + "epoch": 0.04292634445310827, + "grad_norm": 329.6660461425781, + "learning_rate": 4.25e-06, + "loss": 20.6211, + "step": 21250 + }, + { + "epoch": 0.042946545085792084, + "grad_norm": 846.5374145507812, + "learning_rate": 4.252000000000001e-06, + "loss": 22.8153, + "step": 21260 + }, + { + "epoch": 0.042966745718475904, + "grad_norm": 179.9952850341797, + "learning_rate": 4.254e-06, + "loss": 25.1175, + "step": 21270 + }, + { + "epoch": 0.04298694635115972, + "grad_norm": 972.83837890625, + "learning_rate": 4.256e-06, + "loss": 30.9154, + "step": 21280 + }, + { + "epoch": 0.04300714698384353, + "grad_norm": 552.5194702148438, + "learning_rate": 4.2580000000000006e-06, + "loss": 24.2954, + "step": 21290 + }, + { + "epoch": 0.04302734761652735, + "grad_norm": 539.9232177734375, + "learning_rate": 4.26e-06, + "loss": 24.2965, + "step": 21300 + }, + { + "epoch": 0.043047548249211164, + "grad_norm": 679.17431640625, + "learning_rate": 4.262000000000001e-06, + "loss": 23.8783, + "step": 21310 + }, + { + "epoch": 0.043067748881894984, + "grad_norm": 237.664306640625, + "learning_rate": 4.264e-06, + "loss": 28.1514, + "step": 21320 + }, + { + "epoch": 0.0430879495145788, + "grad_norm": 342.8701171875, + "learning_rate": 4.266e-06, + "loss": 19.4848, + "step": 21330 + }, + { + "epoch": 0.04310815014726261, + "grad_norm": 330.9326171875, + "learning_rate": 4.2680000000000005e-06, + "loss": 31.6348, + "step": 21340 + }, + { + "epoch": 0.04312835077994643, + "grad_norm": 249.25387573242188, + "learning_rate": 4.270000000000001e-06, + "loss": 17.7517, + "step": 21350 + }, + { + "epoch": 0.04314855141263024, + "grad_norm": 455.8252868652344, + "learning_rate": 4.272000000000001e-06, + "loss": 24.1304, + "step": 21360 + }, + { + "epoch": 0.043168752045314056, + "grad_norm": 480.63372802734375, + "learning_rate": 4.274e-06, + "loss": 25.5798, + "step": 21370 + }, + { + "epoch": 0.043188952677997876, + "grad_norm": 395.6570739746094, + "learning_rate": 4.276e-06, + "loss": 25.7934, + "step": 21380 + }, + { + "epoch": 0.04320915331068169, + "grad_norm": 392.75115966796875, + "learning_rate": 4.278e-06, + "loss": 34.5832, + "step": 21390 + }, + { + "epoch": 0.04322935394336551, + "grad_norm": 539.7963256835938, + "learning_rate": 4.2800000000000005e-06, + "loss": 25.2617, + "step": 21400 + }, + { + "epoch": 0.04324955457604932, + "grad_norm": 310.9663391113281, + "learning_rate": 4.282000000000001e-06, + "loss": 28.5294, + "step": 21410 + }, + { + "epoch": 0.043269755208733135, + "grad_norm": 250.9676971435547, + "learning_rate": 4.284e-06, + "loss": 32.0291, + "step": 21420 + }, + { + "epoch": 0.043289955841416955, + "grad_norm": 267.46063232421875, + "learning_rate": 4.286e-06, + "loss": 35.4653, + "step": 21430 + }, + { + "epoch": 0.04331015647410077, + "grad_norm": 443.68096923828125, + "learning_rate": 4.288e-06, + "loss": 22.1667, + "step": 21440 + }, + { + "epoch": 0.04333035710678458, + "grad_norm": 655.0978393554688, + "learning_rate": 4.2900000000000004e-06, + "loss": 20.0005, + "step": 21450 + }, + { + "epoch": 0.0433505577394684, + "grad_norm": 766.2430419921875, + "learning_rate": 4.292000000000001e-06, + "loss": 26.8155, + "step": 21460 + }, + { + "epoch": 0.043370758372152214, + "grad_norm": 676.1296997070312, + "learning_rate": 4.294000000000001e-06, + "loss": 21.5991, + "step": 21470 + }, + { + "epoch": 0.043390959004836034, + "grad_norm": 208.3021240234375, + "learning_rate": 4.296e-06, + "loss": 25.6596, + "step": 21480 + }, + { + "epoch": 0.04341115963751985, + "grad_norm": 328.14739990234375, + "learning_rate": 4.298e-06, + "loss": 39.369, + "step": 21490 + }, + { + "epoch": 0.04343136027020366, + "grad_norm": 18.049570083618164, + "learning_rate": 4.3e-06, + "loss": 32.681, + "step": 21500 + }, + { + "epoch": 0.04345156090288748, + "grad_norm": 787.612548828125, + "learning_rate": 4.3020000000000005e-06, + "loss": 20.5986, + "step": 21510 + }, + { + "epoch": 0.04347176153557129, + "grad_norm": 1166.1243896484375, + "learning_rate": 4.304000000000001e-06, + "loss": 59.2746, + "step": 21520 + }, + { + "epoch": 0.043491962168255106, + "grad_norm": 529.6400756835938, + "learning_rate": 4.306e-06, + "loss": 23.4076, + "step": 21530 + }, + { + "epoch": 0.043512162800938926, + "grad_norm": 445.4692077636719, + "learning_rate": 4.308000000000001e-06, + "loss": 31.6689, + "step": 21540 + }, + { + "epoch": 0.04353236343362274, + "grad_norm": 295.2471618652344, + "learning_rate": 4.31e-06, + "loss": 22.4568, + "step": 21550 + }, + { + "epoch": 0.04355256406630656, + "grad_norm": 414.5696716308594, + "learning_rate": 4.312e-06, + "loss": 13.6062, + "step": 21560 + }, + { + "epoch": 0.04357276469899037, + "grad_norm": 62.93955612182617, + "learning_rate": 4.3140000000000005e-06, + "loss": 25.5571, + "step": 21570 + }, + { + "epoch": 0.043592965331674186, + "grad_norm": 712.908447265625, + "learning_rate": 4.316e-06, + "loss": 38.6598, + "step": 21580 + }, + { + "epoch": 0.043613165964358006, + "grad_norm": 766.0816650390625, + "learning_rate": 4.318000000000001e-06, + "loss": 27.1536, + "step": 21590 + }, + { + "epoch": 0.04363336659704182, + "grad_norm": 401.24298095703125, + "learning_rate": 4.32e-06, + "loss": 26.8595, + "step": 21600 + }, + { + "epoch": 0.04365356722972563, + "grad_norm": 594.0333862304688, + "learning_rate": 4.322e-06, + "loss": 28.8221, + "step": 21610 + }, + { + "epoch": 0.04367376786240945, + "grad_norm": 216.5138397216797, + "learning_rate": 4.3240000000000004e-06, + "loss": 39.8338, + "step": 21620 + }, + { + "epoch": 0.043693968495093265, + "grad_norm": 384.75299072265625, + "learning_rate": 4.326000000000001e-06, + "loss": 32.6422, + "step": 21630 + }, + { + "epoch": 0.043714169127777085, + "grad_norm": 245.19192504882812, + "learning_rate": 4.328000000000001e-06, + "loss": 23.9303, + "step": 21640 + }, + { + "epoch": 0.0437343697604609, + "grad_norm": 220.33888244628906, + "learning_rate": 4.33e-06, + "loss": 29.9623, + "step": 21650 + }, + { + "epoch": 0.04375457039314471, + "grad_norm": 299.3458557128906, + "learning_rate": 4.332e-06, + "loss": 29.4365, + "step": 21660 + }, + { + "epoch": 0.04377477102582853, + "grad_norm": 0.0, + "learning_rate": 4.334e-06, + "loss": 16.2625, + "step": 21670 + }, + { + "epoch": 0.043794971658512344, + "grad_norm": 1265.8236083984375, + "learning_rate": 4.3360000000000005e-06, + "loss": 36.5488, + "step": 21680 + }, + { + "epoch": 0.04381517229119616, + "grad_norm": 524.7084350585938, + "learning_rate": 4.338000000000001e-06, + "loss": 30.5848, + "step": 21690 + }, + { + "epoch": 0.04383537292387998, + "grad_norm": 640.1143798828125, + "learning_rate": 4.34e-06, + "loss": 38.6122, + "step": 21700 + }, + { + "epoch": 0.04385557355656379, + "grad_norm": 603.3218383789062, + "learning_rate": 4.342e-06, + "loss": 27.8987, + "step": 21710 + }, + { + "epoch": 0.04387577418924761, + "grad_norm": 407.31451416015625, + "learning_rate": 4.344e-06, + "loss": 19.4499, + "step": 21720 + }, + { + "epoch": 0.04389597482193142, + "grad_norm": 1363.579833984375, + "learning_rate": 4.346e-06, + "loss": 39.3889, + "step": 21730 + }, + { + "epoch": 0.043916175454615236, + "grad_norm": 367.12200927734375, + "learning_rate": 4.3480000000000006e-06, + "loss": 32.9657, + "step": 21740 + }, + { + "epoch": 0.043936376087299056, + "grad_norm": 1004.193603515625, + "learning_rate": 4.350000000000001e-06, + "loss": 36.1381, + "step": 21750 + }, + { + "epoch": 0.04395657671998287, + "grad_norm": 673.9029541015625, + "learning_rate": 4.352e-06, + "loss": 27.1183, + "step": 21760 + }, + { + "epoch": 0.04397677735266668, + "grad_norm": 451.542724609375, + "learning_rate": 4.354e-06, + "loss": 25.8514, + "step": 21770 + }, + { + "epoch": 0.0439969779853505, + "grad_norm": 283.10491943359375, + "learning_rate": 4.356e-06, + "loss": 28.4057, + "step": 21780 + }, + { + "epoch": 0.044017178618034315, + "grad_norm": 492.3885803222656, + "learning_rate": 4.3580000000000005e-06, + "loss": 23.065, + "step": 21790 + }, + { + "epoch": 0.044037379250718135, + "grad_norm": 499.5768737792969, + "learning_rate": 4.360000000000001e-06, + "loss": 29.2149, + "step": 21800 + }, + { + "epoch": 0.04405757988340195, + "grad_norm": 185.2124481201172, + "learning_rate": 4.362e-06, + "loss": 19.6476, + "step": 21810 + }, + { + "epoch": 0.04407778051608576, + "grad_norm": 826.69873046875, + "learning_rate": 4.364e-06, + "loss": 23.436, + "step": 21820 + }, + { + "epoch": 0.04409798114876958, + "grad_norm": 1106.534912109375, + "learning_rate": 4.366e-06, + "loss": 29.2976, + "step": 21830 + }, + { + "epoch": 0.044118181781453394, + "grad_norm": 445.1199035644531, + "learning_rate": 4.368e-06, + "loss": 18.8045, + "step": 21840 + }, + { + "epoch": 0.04413838241413721, + "grad_norm": 593.8103637695312, + "learning_rate": 4.3700000000000005e-06, + "loss": 27.7079, + "step": 21850 + }, + { + "epoch": 0.04415858304682103, + "grad_norm": 526.8739624023438, + "learning_rate": 4.372e-06, + "loss": 20.9647, + "step": 21860 + }, + { + "epoch": 0.04417878367950484, + "grad_norm": 300.53143310546875, + "learning_rate": 4.374000000000001e-06, + "loss": 18.3609, + "step": 21870 + }, + { + "epoch": 0.04419898431218866, + "grad_norm": 473.26971435546875, + "learning_rate": 4.376e-06, + "loss": 33.3246, + "step": 21880 + }, + { + "epoch": 0.044219184944872474, + "grad_norm": 600.6439819335938, + "learning_rate": 4.378e-06, + "loss": 15.1279, + "step": 21890 + }, + { + "epoch": 0.04423938557755629, + "grad_norm": 832.4254150390625, + "learning_rate": 4.38e-06, + "loss": 20.1099, + "step": 21900 + }, + { + "epoch": 0.04425958621024011, + "grad_norm": 382.2060852050781, + "learning_rate": 4.382e-06, + "loss": 16.0935, + "step": 21910 + }, + { + "epoch": 0.04427978684292392, + "grad_norm": 397.8320007324219, + "learning_rate": 4.384000000000001e-06, + "loss": 21.0683, + "step": 21920 + }, + { + "epoch": 0.04429998747560773, + "grad_norm": 252.54330444335938, + "learning_rate": 4.386e-06, + "loss": 24.7409, + "step": 21930 + }, + { + "epoch": 0.04432018810829155, + "grad_norm": 62.560665130615234, + "learning_rate": 4.388e-06, + "loss": 17.3035, + "step": 21940 + }, + { + "epoch": 0.044340388740975366, + "grad_norm": 2055.7509765625, + "learning_rate": 4.39e-06, + "loss": 40.6105, + "step": 21950 + }, + { + "epoch": 0.044360589373659186, + "grad_norm": 162.68014526367188, + "learning_rate": 4.3920000000000005e-06, + "loss": 18.1075, + "step": 21960 + }, + { + "epoch": 0.044380790006343, + "grad_norm": 806.7841796875, + "learning_rate": 4.394000000000001e-06, + "loss": 47.3545, + "step": 21970 + }, + { + "epoch": 0.04440099063902681, + "grad_norm": 114.22157287597656, + "learning_rate": 4.396e-06, + "loss": 40.4132, + "step": 21980 + }, + { + "epoch": 0.04442119127171063, + "grad_norm": 971.5330200195312, + "learning_rate": 4.398000000000001e-06, + "loss": 33.0444, + "step": 21990 + }, + { + "epoch": 0.044441391904394445, + "grad_norm": 77.30171203613281, + "learning_rate": 4.4e-06, + "loss": 15.9642, + "step": 22000 + }, + { + "epoch": 0.04446159253707826, + "grad_norm": 634.365234375, + "learning_rate": 4.402e-06, + "loss": 30.1873, + "step": 22010 + }, + { + "epoch": 0.04448179316976208, + "grad_norm": 859.5525512695312, + "learning_rate": 4.4040000000000005e-06, + "loss": 29.8951, + "step": 22020 + }, + { + "epoch": 0.04450199380244589, + "grad_norm": 607.345458984375, + "learning_rate": 4.406000000000001e-06, + "loss": 33.4096, + "step": 22030 + }, + { + "epoch": 0.04452219443512971, + "grad_norm": 912.7593383789062, + "learning_rate": 4.408000000000001e-06, + "loss": 22.3692, + "step": 22040 + }, + { + "epoch": 0.044542395067813524, + "grad_norm": 617.1201171875, + "learning_rate": 4.41e-06, + "loss": 29.6909, + "step": 22050 + }, + { + "epoch": 0.04456259570049734, + "grad_norm": 623.3720092773438, + "learning_rate": 4.412e-06, + "loss": 20.6357, + "step": 22060 + }, + { + "epoch": 0.04458279633318116, + "grad_norm": 799.55078125, + "learning_rate": 4.4140000000000004e-06, + "loss": 34.0577, + "step": 22070 + }, + { + "epoch": 0.04460299696586497, + "grad_norm": 437.06719970703125, + "learning_rate": 4.416000000000001e-06, + "loss": 28.1422, + "step": 22080 + }, + { + "epoch": 0.04462319759854878, + "grad_norm": 342.1743469238281, + "learning_rate": 4.418000000000001e-06, + "loss": 25.9955, + "step": 22090 + }, + { + "epoch": 0.0446433982312326, + "grad_norm": 270.8495178222656, + "learning_rate": 4.42e-06, + "loss": 18.9199, + "step": 22100 + }, + { + "epoch": 0.044663598863916416, + "grad_norm": 357.5106506347656, + "learning_rate": 4.422e-06, + "loss": 20.252, + "step": 22110 + }, + { + "epoch": 0.044683799496600236, + "grad_norm": 453.18798828125, + "learning_rate": 4.424e-06, + "loss": 15.3347, + "step": 22120 + }, + { + "epoch": 0.04470400012928405, + "grad_norm": 189.09625244140625, + "learning_rate": 4.4260000000000005e-06, + "loss": 15.3332, + "step": 22130 + }, + { + "epoch": 0.04472420076196786, + "grad_norm": 91.37895202636719, + "learning_rate": 4.428000000000001e-06, + "loss": 27.1369, + "step": 22140 + }, + { + "epoch": 0.04474440139465168, + "grad_norm": 858.945068359375, + "learning_rate": 4.430000000000001e-06, + "loss": 31.7565, + "step": 22150 + }, + { + "epoch": 0.044764602027335496, + "grad_norm": 432.4228515625, + "learning_rate": 4.432e-06, + "loss": 27.2611, + "step": 22160 + }, + { + "epoch": 0.04478480266001931, + "grad_norm": 629.383056640625, + "learning_rate": 4.434e-06, + "loss": 23.4669, + "step": 22170 + }, + { + "epoch": 0.04480500329270313, + "grad_norm": 695.8665771484375, + "learning_rate": 4.436e-06, + "loss": 19.5067, + "step": 22180 + }, + { + "epoch": 0.04482520392538694, + "grad_norm": 321.7768859863281, + "learning_rate": 4.438e-06, + "loss": 17.0404, + "step": 22190 + }, + { + "epoch": 0.04484540455807076, + "grad_norm": 662.2603759765625, + "learning_rate": 4.440000000000001e-06, + "loss": 53.422, + "step": 22200 + }, + { + "epoch": 0.044865605190754575, + "grad_norm": 1252.63623046875, + "learning_rate": 4.442e-06, + "loss": 36.9159, + "step": 22210 + }, + { + "epoch": 0.04488580582343839, + "grad_norm": 585.7415161132812, + "learning_rate": 4.444e-06, + "loss": 18.0069, + "step": 22220 + }, + { + "epoch": 0.04490600645612221, + "grad_norm": 947.59375, + "learning_rate": 4.446e-06, + "loss": 37.0844, + "step": 22230 + }, + { + "epoch": 0.04492620708880602, + "grad_norm": 551.6622314453125, + "learning_rate": 4.4480000000000004e-06, + "loss": 19.1681, + "step": 22240 + }, + { + "epoch": 0.044946407721489834, + "grad_norm": 297.9669494628906, + "learning_rate": 4.450000000000001e-06, + "loss": 10.2288, + "step": 22250 + }, + { + "epoch": 0.044966608354173654, + "grad_norm": 514.8941040039062, + "learning_rate": 4.452e-06, + "loss": 12.0541, + "step": 22260 + }, + { + "epoch": 0.04498680898685747, + "grad_norm": 902.2230224609375, + "learning_rate": 4.454000000000001e-06, + "loss": 28.4228, + "step": 22270 + }, + { + "epoch": 0.04500700961954129, + "grad_norm": 650.3019409179688, + "learning_rate": 4.456e-06, + "loss": 42.667, + "step": 22280 + }, + { + "epoch": 0.0450272102522251, + "grad_norm": 913.4136962890625, + "learning_rate": 4.458e-06, + "loss": 42.5742, + "step": 22290 + }, + { + "epoch": 0.04504741088490891, + "grad_norm": 507.68182373046875, + "learning_rate": 4.4600000000000005e-06, + "loss": 22.1308, + "step": 22300 + }, + { + "epoch": 0.04506761151759273, + "grad_norm": 257.96783447265625, + "learning_rate": 4.462e-06, + "loss": 21.2215, + "step": 22310 + }, + { + "epoch": 0.045087812150276546, + "grad_norm": 486.583740234375, + "learning_rate": 4.464000000000001e-06, + "loss": 34.8724, + "step": 22320 + }, + { + "epoch": 0.04510801278296036, + "grad_norm": 419.0981750488281, + "learning_rate": 4.466e-06, + "loss": 26.7813, + "step": 22330 + }, + { + "epoch": 0.04512821341564418, + "grad_norm": 341.603515625, + "learning_rate": 4.468e-06, + "loss": 18.4198, + "step": 22340 + }, + { + "epoch": 0.04514841404832799, + "grad_norm": 207.7368621826172, + "learning_rate": 4.47e-06, + "loss": 30.2265, + "step": 22350 + }, + { + "epoch": 0.04516861468101181, + "grad_norm": 744.3402709960938, + "learning_rate": 4.4720000000000006e-06, + "loss": 18.9786, + "step": 22360 + }, + { + "epoch": 0.045188815313695625, + "grad_norm": 314.3277282714844, + "learning_rate": 4.474000000000001e-06, + "loss": 20.393, + "step": 22370 + }, + { + "epoch": 0.04520901594637944, + "grad_norm": 600.7283935546875, + "learning_rate": 4.476e-06, + "loss": 23.1794, + "step": 22380 + }, + { + "epoch": 0.04522921657906326, + "grad_norm": 220.0872039794922, + "learning_rate": 4.478e-06, + "loss": 36.213, + "step": 22390 + }, + { + "epoch": 0.04524941721174707, + "grad_norm": 363.21209716796875, + "learning_rate": 4.48e-06, + "loss": 40.3618, + "step": 22400 + }, + { + "epoch": 0.045269617844430884, + "grad_norm": 1180.115234375, + "learning_rate": 4.4820000000000005e-06, + "loss": 27.6139, + "step": 22410 + }, + { + "epoch": 0.045289818477114704, + "grad_norm": 824.1642456054688, + "learning_rate": 4.484000000000001e-06, + "loss": 25.0623, + "step": 22420 + }, + { + "epoch": 0.04531001910979852, + "grad_norm": 424.63665771484375, + "learning_rate": 4.486000000000001e-06, + "loss": 38.7007, + "step": 22430 + }, + { + "epoch": 0.04533021974248234, + "grad_norm": 540.9491577148438, + "learning_rate": 4.488e-06, + "loss": 36.2141, + "step": 22440 + }, + { + "epoch": 0.04535042037516615, + "grad_norm": 432.8280944824219, + "learning_rate": 4.49e-06, + "loss": 23.0217, + "step": 22450 + }, + { + "epoch": 0.045370621007849964, + "grad_norm": 618.2141723632812, + "learning_rate": 4.492e-06, + "loss": 19.9363, + "step": 22460 + }, + { + "epoch": 0.045390821640533784, + "grad_norm": 909.5447998046875, + "learning_rate": 4.4940000000000005e-06, + "loss": 32.4973, + "step": 22470 + }, + { + "epoch": 0.0454110222732176, + "grad_norm": 932.4380493164062, + "learning_rate": 4.496000000000001e-06, + "loss": 36.2815, + "step": 22480 + }, + { + "epoch": 0.04543122290590141, + "grad_norm": 619.4166870117188, + "learning_rate": 4.498e-06, + "loss": 26.1476, + "step": 22490 + }, + { + "epoch": 0.04545142353858523, + "grad_norm": 292.8052978515625, + "learning_rate": 4.5e-06, + "loss": 23.0709, + "step": 22500 + }, + { + "epoch": 0.04547162417126904, + "grad_norm": 911.8253784179688, + "learning_rate": 4.502e-06, + "loss": 27.5998, + "step": 22510 + }, + { + "epoch": 0.04549182480395286, + "grad_norm": 658.0123291015625, + "learning_rate": 4.504e-06, + "loss": 30.1506, + "step": 22520 + }, + { + "epoch": 0.045512025436636676, + "grad_norm": 512.3121948242188, + "learning_rate": 4.5060000000000006e-06, + "loss": 28.9311, + "step": 22530 + }, + { + "epoch": 0.04553222606932049, + "grad_norm": 882.61328125, + "learning_rate": 4.508e-06, + "loss": 34.5642, + "step": 22540 + }, + { + "epoch": 0.04555242670200431, + "grad_norm": 323.4769592285156, + "learning_rate": 4.510000000000001e-06, + "loss": 19.5451, + "step": 22550 + }, + { + "epoch": 0.04557262733468812, + "grad_norm": 549.1060180664062, + "learning_rate": 4.512e-06, + "loss": 32.6389, + "step": 22560 + }, + { + "epoch": 0.045592827967371935, + "grad_norm": 265.7471008300781, + "learning_rate": 4.514e-06, + "loss": 47.5208, + "step": 22570 + }, + { + "epoch": 0.045613028600055755, + "grad_norm": 2399.81103515625, + "learning_rate": 4.5160000000000005e-06, + "loss": 34.385, + "step": 22580 + }, + { + "epoch": 0.04563322923273957, + "grad_norm": 521.2456665039062, + "learning_rate": 4.518e-06, + "loss": 23.1079, + "step": 22590 + }, + { + "epoch": 0.04565342986542339, + "grad_norm": 392.2706604003906, + "learning_rate": 4.520000000000001e-06, + "loss": 14.7095, + "step": 22600 + }, + { + "epoch": 0.0456736304981072, + "grad_norm": 368.1333312988281, + "learning_rate": 4.522e-06, + "loss": 19.6538, + "step": 22610 + }, + { + "epoch": 0.045693831130791014, + "grad_norm": 251.01956176757812, + "learning_rate": 4.524e-06, + "loss": 32.6746, + "step": 22620 + }, + { + "epoch": 0.045714031763474834, + "grad_norm": 686.638671875, + "learning_rate": 4.526e-06, + "loss": 29.6795, + "step": 22630 + }, + { + "epoch": 0.04573423239615865, + "grad_norm": 422.67034912109375, + "learning_rate": 4.5280000000000005e-06, + "loss": 24.4751, + "step": 22640 + }, + { + "epoch": 0.04575443302884246, + "grad_norm": 505.2594909667969, + "learning_rate": 4.530000000000001e-06, + "loss": 42.698, + "step": 22650 + }, + { + "epoch": 0.04577463366152628, + "grad_norm": 897.6318359375, + "learning_rate": 4.532e-06, + "loss": 15.6817, + "step": 22660 + }, + { + "epoch": 0.04579483429421009, + "grad_norm": 484.6026611328125, + "learning_rate": 4.534000000000001e-06, + "loss": 21.0916, + "step": 22670 + }, + { + "epoch": 0.04581503492689391, + "grad_norm": 864.950927734375, + "learning_rate": 4.536e-06, + "loss": 30.6341, + "step": 22680 + }, + { + "epoch": 0.045835235559577726, + "grad_norm": 221.57028198242188, + "learning_rate": 4.5380000000000004e-06, + "loss": 34.7302, + "step": 22690 + }, + { + "epoch": 0.04585543619226154, + "grad_norm": 19.70734977722168, + "learning_rate": 4.540000000000001e-06, + "loss": 32.5144, + "step": 22700 + }, + { + "epoch": 0.04587563682494536, + "grad_norm": 577.8696899414062, + "learning_rate": 4.542e-06, + "loss": 36.3954, + "step": 22710 + }, + { + "epoch": 0.04589583745762917, + "grad_norm": 774.3411865234375, + "learning_rate": 4.544000000000001e-06, + "loss": 37.3928, + "step": 22720 + }, + { + "epoch": 0.045916038090312986, + "grad_norm": 290.1728515625, + "learning_rate": 4.546e-06, + "loss": 27.9667, + "step": 22730 + }, + { + "epoch": 0.045936238722996806, + "grad_norm": 558.4825439453125, + "learning_rate": 4.548e-06, + "loss": 12.0116, + "step": 22740 + }, + { + "epoch": 0.04595643935568062, + "grad_norm": 616.5971069335938, + "learning_rate": 4.5500000000000005e-06, + "loss": 23.0712, + "step": 22750 + }, + { + "epoch": 0.04597663998836444, + "grad_norm": 227.1874542236328, + "learning_rate": 4.552000000000001e-06, + "loss": 24.5525, + "step": 22760 + }, + { + "epoch": 0.04599684062104825, + "grad_norm": 110.66157531738281, + "learning_rate": 4.554000000000001e-06, + "loss": 34.7334, + "step": 22770 + }, + { + "epoch": 0.046017041253732065, + "grad_norm": 309.23272705078125, + "learning_rate": 4.556e-06, + "loss": 25.0262, + "step": 22780 + }, + { + "epoch": 0.046037241886415885, + "grad_norm": 504.0193786621094, + "learning_rate": 4.558e-06, + "loss": 19.2853, + "step": 22790 + }, + { + "epoch": 0.0460574425190997, + "grad_norm": 295.9443054199219, + "learning_rate": 4.56e-06, + "loss": 30.692, + "step": 22800 + }, + { + "epoch": 0.04607764315178351, + "grad_norm": 507.11151123046875, + "learning_rate": 4.5620000000000005e-06, + "loss": 25.5672, + "step": 22810 + }, + { + "epoch": 0.04609784378446733, + "grad_norm": 558.9776611328125, + "learning_rate": 4.564e-06, + "loss": 23.321, + "step": 22820 + }, + { + "epoch": 0.046118044417151144, + "grad_norm": 350.1549987792969, + "learning_rate": 4.566000000000001e-06, + "loss": 34.0735, + "step": 22830 + }, + { + "epoch": 0.046138245049834964, + "grad_norm": 663.969970703125, + "learning_rate": 4.568e-06, + "loss": 29.8717, + "step": 22840 + }, + { + "epoch": 0.04615844568251878, + "grad_norm": 124.18944549560547, + "learning_rate": 4.57e-06, + "loss": 24.4411, + "step": 22850 + }, + { + "epoch": 0.04617864631520259, + "grad_norm": 773.56591796875, + "learning_rate": 4.5720000000000004e-06, + "loss": 31.0459, + "step": 22860 + }, + { + "epoch": 0.04619884694788641, + "grad_norm": 1089.1876220703125, + "learning_rate": 4.574e-06, + "loss": 20.154, + "step": 22870 + }, + { + "epoch": 0.04621904758057022, + "grad_norm": 657.5020751953125, + "learning_rate": 4.576000000000001e-06, + "loss": 30.6423, + "step": 22880 + }, + { + "epoch": 0.046239248213254036, + "grad_norm": 285.689697265625, + "learning_rate": 4.578e-06, + "loss": 23.1769, + "step": 22890 + }, + { + "epoch": 0.046259448845937856, + "grad_norm": 456.78521728515625, + "learning_rate": 4.58e-06, + "loss": 21.8306, + "step": 22900 + }, + { + "epoch": 0.04627964947862167, + "grad_norm": 617.4722900390625, + "learning_rate": 4.582e-06, + "loss": 29.9266, + "step": 22910 + }, + { + "epoch": 0.04629985011130549, + "grad_norm": 425.8707275390625, + "learning_rate": 4.5840000000000005e-06, + "loss": 26.158, + "step": 22920 + }, + { + "epoch": 0.0463200507439893, + "grad_norm": 545.0387573242188, + "learning_rate": 4.586000000000001e-06, + "loss": 31.1487, + "step": 22930 + }, + { + "epoch": 0.046340251376673115, + "grad_norm": 751.0917358398438, + "learning_rate": 4.588e-06, + "loss": 17.7194, + "step": 22940 + }, + { + "epoch": 0.046360452009356935, + "grad_norm": 545.7406616210938, + "learning_rate": 4.590000000000001e-06, + "loss": 21.742, + "step": 22950 + }, + { + "epoch": 0.04638065264204075, + "grad_norm": 634.4990844726562, + "learning_rate": 4.592e-06, + "loss": 40.033, + "step": 22960 + }, + { + "epoch": 0.04640085327472456, + "grad_norm": 299.349365234375, + "learning_rate": 4.594e-06, + "loss": 23.369, + "step": 22970 + }, + { + "epoch": 0.04642105390740838, + "grad_norm": 407.2045593261719, + "learning_rate": 4.5960000000000006e-06, + "loss": 22.4216, + "step": 22980 + }, + { + "epoch": 0.046441254540092194, + "grad_norm": 361.1532897949219, + "learning_rate": 4.598e-06, + "loss": 20.676, + "step": 22990 + }, + { + "epoch": 0.046461455172776014, + "grad_norm": 962.6221923828125, + "learning_rate": 4.600000000000001e-06, + "loss": 29.557, + "step": 23000 + }, + { + "epoch": 0.04648165580545983, + "grad_norm": 514.6326293945312, + "learning_rate": 4.602e-06, + "loss": 29.2714, + "step": 23010 + }, + { + "epoch": 0.04650185643814364, + "grad_norm": 532.843505859375, + "learning_rate": 4.604e-06, + "loss": 20.6181, + "step": 23020 + }, + { + "epoch": 0.04652205707082746, + "grad_norm": 177.03302001953125, + "learning_rate": 4.6060000000000005e-06, + "loss": 28.7298, + "step": 23030 + }, + { + "epoch": 0.046542257703511274, + "grad_norm": 214.79881286621094, + "learning_rate": 4.608000000000001e-06, + "loss": 18.3866, + "step": 23040 + }, + { + "epoch": 0.04656245833619509, + "grad_norm": 517.9189453125, + "learning_rate": 4.610000000000001e-06, + "loss": 25.5109, + "step": 23050 + }, + { + "epoch": 0.04658265896887891, + "grad_norm": 599.70068359375, + "learning_rate": 4.612e-06, + "loss": 22.2792, + "step": 23060 + }, + { + "epoch": 0.04660285960156272, + "grad_norm": 98.75985717773438, + "learning_rate": 4.614e-06, + "loss": 34.458, + "step": 23070 + }, + { + "epoch": 0.04662306023424654, + "grad_norm": 420.987548828125, + "learning_rate": 4.616e-06, + "loss": 20.0997, + "step": 23080 + }, + { + "epoch": 0.04664326086693035, + "grad_norm": 691.1615600585938, + "learning_rate": 4.6180000000000005e-06, + "loss": 20.6392, + "step": 23090 + }, + { + "epoch": 0.046663461499614166, + "grad_norm": 637.1849365234375, + "learning_rate": 4.620000000000001e-06, + "loss": 31.1125, + "step": 23100 + }, + { + "epoch": 0.046683662132297986, + "grad_norm": 816.533203125, + "learning_rate": 4.622e-06, + "loss": 46.0801, + "step": 23110 + }, + { + "epoch": 0.0467038627649818, + "grad_norm": 549.866455078125, + "learning_rate": 4.624e-06, + "loss": 24.4189, + "step": 23120 + }, + { + "epoch": 0.04672406339766561, + "grad_norm": 685.1592407226562, + "learning_rate": 4.626e-06, + "loss": 32.2336, + "step": 23130 + }, + { + "epoch": 0.04674426403034943, + "grad_norm": 319.0460510253906, + "learning_rate": 4.628e-06, + "loss": 23.6327, + "step": 23140 + }, + { + "epoch": 0.046764464663033245, + "grad_norm": 575.0809326171875, + "learning_rate": 4.6300000000000006e-06, + "loss": 19.5776, + "step": 23150 + }, + { + "epoch": 0.046784665295717065, + "grad_norm": 328.0433349609375, + "learning_rate": 4.632000000000001e-06, + "loss": 11.1229, + "step": 23160 + }, + { + "epoch": 0.04680486592840088, + "grad_norm": 133.4390411376953, + "learning_rate": 4.634e-06, + "loss": 22.5893, + "step": 23170 + }, + { + "epoch": 0.04682506656108469, + "grad_norm": 399.1858825683594, + "learning_rate": 4.636e-06, + "loss": 33.8585, + "step": 23180 + }, + { + "epoch": 0.04684526719376851, + "grad_norm": 256.7257080078125, + "learning_rate": 4.638e-06, + "loss": 22.3619, + "step": 23190 + }, + { + "epoch": 0.046865467826452324, + "grad_norm": 544.70361328125, + "learning_rate": 4.6400000000000005e-06, + "loss": 19.1737, + "step": 23200 + }, + { + "epoch": 0.04688566845913614, + "grad_norm": 787.859619140625, + "learning_rate": 4.642000000000001e-06, + "loss": 18.535, + "step": 23210 + }, + { + "epoch": 0.04690586909181996, + "grad_norm": 411.52203369140625, + "learning_rate": 4.644e-06, + "loss": 10.0353, + "step": 23220 + }, + { + "epoch": 0.04692606972450377, + "grad_norm": 543.5707397460938, + "learning_rate": 4.646000000000001e-06, + "loss": 22.038, + "step": 23230 + }, + { + "epoch": 0.04694627035718759, + "grad_norm": 508.89617919921875, + "learning_rate": 4.648e-06, + "loss": 21.708, + "step": 23240 + }, + { + "epoch": 0.0469664709898714, + "grad_norm": 1186.1513671875, + "learning_rate": 4.65e-06, + "loss": 35.502, + "step": 23250 + }, + { + "epoch": 0.046986671622555216, + "grad_norm": 785.4786987304688, + "learning_rate": 4.6520000000000005e-06, + "loss": 23.0378, + "step": 23260 + }, + { + "epoch": 0.047006872255239036, + "grad_norm": 331.0532531738281, + "learning_rate": 4.654e-06, + "loss": 37.4566, + "step": 23270 + }, + { + "epoch": 0.04702707288792285, + "grad_norm": 323.4093017578125, + "learning_rate": 4.656000000000001e-06, + "loss": 16.0199, + "step": 23280 + }, + { + "epoch": 0.04704727352060666, + "grad_norm": 464.79132080078125, + "learning_rate": 4.658e-06, + "loss": 29.0295, + "step": 23290 + }, + { + "epoch": 0.04706747415329048, + "grad_norm": 634.6414794921875, + "learning_rate": 4.66e-06, + "loss": 23.0239, + "step": 23300 + }, + { + "epoch": 0.047087674785974296, + "grad_norm": 317.646240234375, + "learning_rate": 4.6620000000000004e-06, + "loss": 13.0783, + "step": 23310 + }, + { + "epoch": 0.047107875418658116, + "grad_norm": 131.8998260498047, + "learning_rate": 4.664000000000001e-06, + "loss": 22.0581, + "step": 23320 + }, + { + "epoch": 0.04712807605134193, + "grad_norm": 505.8997497558594, + "learning_rate": 4.666000000000001e-06, + "loss": 15.9947, + "step": 23330 + }, + { + "epoch": 0.04714827668402574, + "grad_norm": 349.62799072265625, + "learning_rate": 4.668e-06, + "loss": 23.1838, + "step": 23340 + }, + { + "epoch": 0.04716847731670956, + "grad_norm": 415.26513671875, + "learning_rate": 4.670000000000001e-06, + "loss": 44.291, + "step": 23350 + }, + { + "epoch": 0.047188677949393375, + "grad_norm": 591.033203125, + "learning_rate": 4.672e-06, + "loss": 62.548, + "step": 23360 + }, + { + "epoch": 0.04720887858207719, + "grad_norm": 685.7374877929688, + "learning_rate": 4.6740000000000005e-06, + "loss": 32.069, + "step": 23370 + }, + { + "epoch": 0.04722907921476101, + "grad_norm": 864.9512329101562, + "learning_rate": 4.676000000000001e-06, + "loss": 62.1026, + "step": 23380 + }, + { + "epoch": 0.04724927984744482, + "grad_norm": 923.1744995117188, + "learning_rate": 4.678e-06, + "loss": 27.0128, + "step": 23390 + }, + { + "epoch": 0.04726948048012864, + "grad_norm": 353.6065673828125, + "learning_rate": 4.680000000000001e-06, + "loss": 19.9927, + "step": 23400 + }, + { + "epoch": 0.047289681112812454, + "grad_norm": 1081.422119140625, + "learning_rate": 4.682e-06, + "loss": 22.436, + "step": 23410 + }, + { + "epoch": 0.04730988174549627, + "grad_norm": 374.8186340332031, + "learning_rate": 4.684e-06, + "loss": 18.5459, + "step": 23420 + }, + { + "epoch": 0.04733008237818009, + "grad_norm": 409.6661376953125, + "learning_rate": 4.6860000000000005e-06, + "loss": 27.5921, + "step": 23430 + }, + { + "epoch": 0.0473502830108639, + "grad_norm": 544.1531372070312, + "learning_rate": 4.688000000000001e-06, + "loss": 20.9284, + "step": 23440 + }, + { + "epoch": 0.04737048364354771, + "grad_norm": 724.7265014648438, + "learning_rate": 4.69e-06, + "loss": 35.8183, + "step": 23450 + }, + { + "epoch": 0.04739068427623153, + "grad_norm": 342.1605224609375, + "learning_rate": 4.692e-06, + "loss": 19.5327, + "step": 23460 + }, + { + "epoch": 0.047410884908915346, + "grad_norm": 850.796630859375, + "learning_rate": 4.694e-06, + "loss": 22.3838, + "step": 23470 + }, + { + "epoch": 0.047431085541599166, + "grad_norm": 171.35581970214844, + "learning_rate": 4.6960000000000004e-06, + "loss": 21.14, + "step": 23480 + }, + { + "epoch": 0.04745128617428298, + "grad_norm": 156.30577087402344, + "learning_rate": 4.698000000000001e-06, + "loss": 23.0832, + "step": 23490 + }, + { + "epoch": 0.04747148680696679, + "grad_norm": 580.611083984375, + "learning_rate": 4.7e-06, + "loss": 29.439, + "step": 23500 + }, + { + "epoch": 0.04749168743965061, + "grad_norm": 407.2830505371094, + "learning_rate": 4.702e-06, + "loss": 16.5055, + "step": 23510 + }, + { + "epoch": 0.047511888072334425, + "grad_norm": 446.4722900390625, + "learning_rate": 4.704e-06, + "loss": 17.8526, + "step": 23520 + }, + { + "epoch": 0.04753208870501824, + "grad_norm": 509.90594482421875, + "learning_rate": 4.706e-06, + "loss": 25.0278, + "step": 23530 + }, + { + "epoch": 0.04755228933770206, + "grad_norm": 1361.8035888671875, + "learning_rate": 4.7080000000000005e-06, + "loss": 37.3306, + "step": 23540 + }, + { + "epoch": 0.04757248997038587, + "grad_norm": 82.47450256347656, + "learning_rate": 4.71e-06, + "loss": 9.7433, + "step": 23550 + }, + { + "epoch": 0.04759269060306969, + "grad_norm": 530.8025512695312, + "learning_rate": 4.712000000000001e-06, + "loss": 34.0402, + "step": 23560 + }, + { + "epoch": 0.047612891235753504, + "grad_norm": 420.6856689453125, + "learning_rate": 4.714e-06, + "loss": 25.9908, + "step": 23570 + }, + { + "epoch": 0.04763309186843732, + "grad_norm": 506.2064514160156, + "learning_rate": 4.716e-06, + "loss": 26.0172, + "step": 23580 + }, + { + "epoch": 0.04765329250112114, + "grad_norm": 558.8671264648438, + "learning_rate": 4.718e-06, + "loss": 26.991, + "step": 23590 + }, + { + "epoch": 0.04767349313380495, + "grad_norm": 188.64163208007812, + "learning_rate": 4.7200000000000005e-06, + "loss": 26.4591, + "step": 23600 + }, + { + "epoch": 0.047693693766488764, + "grad_norm": 831.897216796875, + "learning_rate": 4.722000000000001e-06, + "loss": 27.7616, + "step": 23610 + }, + { + "epoch": 0.047713894399172584, + "grad_norm": 247.0804901123047, + "learning_rate": 4.724e-06, + "loss": 32.929, + "step": 23620 + }, + { + "epoch": 0.0477340950318564, + "grad_norm": 682.9000244140625, + "learning_rate": 4.726000000000001e-06, + "loss": 29.1023, + "step": 23630 + }, + { + "epoch": 0.04775429566454022, + "grad_norm": 476.44012451171875, + "learning_rate": 4.728e-06, + "loss": 23.3636, + "step": 23640 + }, + { + "epoch": 0.04777449629722403, + "grad_norm": 297.98577880859375, + "learning_rate": 4.7300000000000005e-06, + "loss": 20.2331, + "step": 23650 + }, + { + "epoch": 0.04779469692990784, + "grad_norm": 553.1089477539062, + "learning_rate": 4.732000000000001e-06, + "loss": 40.8277, + "step": 23660 + }, + { + "epoch": 0.04781489756259166, + "grad_norm": 514.8232421875, + "learning_rate": 4.734e-06, + "loss": 36.9933, + "step": 23670 + }, + { + "epoch": 0.047835098195275476, + "grad_norm": 709.031494140625, + "learning_rate": 4.736000000000001e-06, + "loss": 22.8072, + "step": 23680 + }, + { + "epoch": 0.04785529882795929, + "grad_norm": 531.7922973632812, + "learning_rate": 4.738e-06, + "loss": 17.322, + "step": 23690 + }, + { + "epoch": 0.04787549946064311, + "grad_norm": 525.7501831054688, + "learning_rate": 4.74e-06, + "loss": 26.0231, + "step": 23700 + }, + { + "epoch": 0.04789570009332692, + "grad_norm": 724.34619140625, + "learning_rate": 4.7420000000000005e-06, + "loss": 21.1959, + "step": 23710 + }, + { + "epoch": 0.04791590072601074, + "grad_norm": 863.712646484375, + "learning_rate": 4.744000000000001e-06, + "loss": 26.1296, + "step": 23720 + }, + { + "epoch": 0.047936101358694555, + "grad_norm": 200.90933227539062, + "learning_rate": 4.746000000000001e-06, + "loss": 29.482, + "step": 23730 + }, + { + "epoch": 0.04795630199137837, + "grad_norm": 465.8497009277344, + "learning_rate": 4.748e-06, + "loss": 34.7471, + "step": 23740 + }, + { + "epoch": 0.04797650262406219, + "grad_norm": 517.1136474609375, + "learning_rate": 4.75e-06, + "loss": 29.8758, + "step": 23750 + }, + { + "epoch": 0.047996703256746, + "grad_norm": 613.1840209960938, + "learning_rate": 4.752e-06, + "loss": 43.5371, + "step": 23760 + }, + { + "epoch": 0.048016903889429814, + "grad_norm": 957.7688598632812, + "learning_rate": 4.7540000000000006e-06, + "loss": 22.8681, + "step": 23770 + }, + { + "epoch": 0.048037104522113634, + "grad_norm": 539.5322265625, + "learning_rate": 4.756000000000001e-06, + "loss": 41.239, + "step": 23780 + }, + { + "epoch": 0.04805730515479745, + "grad_norm": 495.3214416503906, + "learning_rate": 4.758e-06, + "loss": 24.3765, + "step": 23790 + }, + { + "epoch": 0.04807750578748127, + "grad_norm": 4683.61572265625, + "learning_rate": 4.76e-06, + "loss": 50.0535, + "step": 23800 + }, + { + "epoch": 0.04809770642016508, + "grad_norm": 551.005615234375, + "learning_rate": 4.762e-06, + "loss": 26.6315, + "step": 23810 + }, + { + "epoch": 0.04811790705284889, + "grad_norm": 546.5647583007812, + "learning_rate": 4.7640000000000005e-06, + "loss": 21.7942, + "step": 23820 + }, + { + "epoch": 0.04813810768553271, + "grad_norm": 450.4869689941406, + "learning_rate": 4.766000000000001e-06, + "loss": 14.371, + "step": 23830 + }, + { + "epoch": 0.048158308318216526, + "grad_norm": 794.6976318359375, + "learning_rate": 4.768000000000001e-06, + "loss": 26.081, + "step": 23840 + }, + { + "epoch": 0.04817850895090034, + "grad_norm": 412.3992919921875, + "learning_rate": 4.77e-06, + "loss": 25.6854, + "step": 23850 + }, + { + "epoch": 0.04819870958358416, + "grad_norm": 484.59619140625, + "learning_rate": 4.772e-06, + "loss": 24.9677, + "step": 23860 + }, + { + "epoch": 0.04821891021626797, + "grad_norm": 381.2744445800781, + "learning_rate": 4.774e-06, + "loss": 20.9764, + "step": 23870 + }, + { + "epoch": 0.04823911084895179, + "grad_norm": 279.7530212402344, + "learning_rate": 4.7760000000000005e-06, + "loss": 34.305, + "step": 23880 + }, + { + "epoch": 0.048259311481635606, + "grad_norm": 921.8336181640625, + "learning_rate": 4.778000000000001e-06, + "loss": 45.3881, + "step": 23890 + }, + { + "epoch": 0.04827951211431942, + "grad_norm": 784.5956420898438, + "learning_rate": 4.78e-06, + "loss": 22.424, + "step": 23900 + }, + { + "epoch": 0.04829971274700324, + "grad_norm": 574.390380859375, + "learning_rate": 4.782e-06, + "loss": 23.9142, + "step": 23910 + }, + { + "epoch": 0.04831991337968705, + "grad_norm": 546.51416015625, + "learning_rate": 4.784e-06, + "loss": 22.9982, + "step": 23920 + }, + { + "epoch": 0.048340114012370865, + "grad_norm": 187.9654083251953, + "learning_rate": 4.7860000000000004e-06, + "loss": 13.9286, + "step": 23930 + }, + { + "epoch": 0.048360314645054685, + "grad_norm": 343.1470947265625, + "learning_rate": 4.7880000000000006e-06, + "loss": 21.124, + "step": 23940 + }, + { + "epoch": 0.0483805152777385, + "grad_norm": 603.2246704101562, + "learning_rate": 4.79e-06, + "loss": 27.6511, + "step": 23950 + }, + { + "epoch": 0.04840071591042232, + "grad_norm": 338.4572448730469, + "learning_rate": 4.792000000000001e-06, + "loss": 28.9174, + "step": 23960 + }, + { + "epoch": 0.04842091654310613, + "grad_norm": 942.7728271484375, + "learning_rate": 4.794e-06, + "loss": 23.8593, + "step": 23970 + }, + { + "epoch": 0.048441117175789944, + "grad_norm": 911.1484375, + "learning_rate": 4.796e-06, + "loss": 53.2721, + "step": 23980 + }, + { + "epoch": 0.048461317808473764, + "grad_norm": 342.5573425292969, + "learning_rate": 4.7980000000000005e-06, + "loss": 22.4286, + "step": 23990 + }, + { + "epoch": 0.04848151844115758, + "grad_norm": 232.32217407226562, + "learning_rate": 4.800000000000001e-06, + "loss": 31.6875, + "step": 24000 + }, + { + "epoch": 0.04850171907384139, + "grad_norm": 1032.619873046875, + "learning_rate": 4.802000000000001e-06, + "loss": 29.0297, + "step": 24010 + }, + { + "epoch": 0.04852191970652521, + "grad_norm": 470.9522705078125, + "learning_rate": 4.804e-06, + "loss": 25.9012, + "step": 24020 + }, + { + "epoch": 0.04854212033920902, + "grad_norm": 280.8020324707031, + "learning_rate": 4.806000000000001e-06, + "loss": 18.4737, + "step": 24030 + }, + { + "epoch": 0.04856232097189284, + "grad_norm": 607.37158203125, + "learning_rate": 4.808e-06, + "loss": 15.5083, + "step": 24040 + }, + { + "epoch": 0.048582521604576656, + "grad_norm": 206.01007080078125, + "learning_rate": 4.8100000000000005e-06, + "loss": 27.6266, + "step": 24050 + }, + { + "epoch": 0.04860272223726047, + "grad_norm": 217.52645874023438, + "learning_rate": 4.812000000000001e-06, + "loss": 51.8305, + "step": 24060 + }, + { + "epoch": 0.04862292286994429, + "grad_norm": 450.17938232421875, + "learning_rate": 4.814e-06, + "loss": 26.5461, + "step": 24070 + }, + { + "epoch": 0.0486431235026281, + "grad_norm": 2200.3466796875, + "learning_rate": 4.816e-06, + "loss": 43.0113, + "step": 24080 + }, + { + "epoch": 0.048663324135311915, + "grad_norm": 487.3904724121094, + "learning_rate": 4.818e-06, + "loss": 37.733, + "step": 24090 + }, + { + "epoch": 0.048683524767995735, + "grad_norm": 150.5276336669922, + "learning_rate": 4.8200000000000004e-06, + "loss": 19.6755, + "step": 24100 + }, + { + "epoch": 0.04870372540067955, + "grad_norm": 505.09893798828125, + "learning_rate": 4.822000000000001e-06, + "loss": 36.4785, + "step": 24110 + }, + { + "epoch": 0.04872392603336337, + "grad_norm": 1320.265625, + "learning_rate": 4.824000000000001e-06, + "loss": 22.6899, + "step": 24120 + }, + { + "epoch": 0.04874412666604718, + "grad_norm": 425.0314025878906, + "learning_rate": 4.826e-06, + "loss": 16.7171, + "step": 24130 + }, + { + "epoch": 0.048764327298730994, + "grad_norm": 572.2984619140625, + "learning_rate": 4.828e-06, + "loss": 30.4576, + "step": 24140 + }, + { + "epoch": 0.048784527931414814, + "grad_norm": 663.1596069335938, + "learning_rate": 4.83e-06, + "loss": 24.7246, + "step": 24150 + }, + { + "epoch": 0.04880472856409863, + "grad_norm": 497.4083251953125, + "learning_rate": 4.8320000000000005e-06, + "loss": 23.1729, + "step": 24160 + }, + { + "epoch": 0.04882492919678244, + "grad_norm": 407.599609375, + "learning_rate": 4.834000000000001e-06, + "loss": 33.097, + "step": 24170 + }, + { + "epoch": 0.04884512982946626, + "grad_norm": 864.8488159179688, + "learning_rate": 4.836e-06, + "loss": 50.6225, + "step": 24180 + }, + { + "epoch": 0.048865330462150074, + "grad_norm": 681.3571166992188, + "learning_rate": 4.838e-06, + "loss": 32.4285, + "step": 24190 + }, + { + "epoch": 0.048885531094833894, + "grad_norm": 503.53436279296875, + "learning_rate": 4.84e-06, + "loss": 33.0225, + "step": 24200 + }, + { + "epoch": 0.04890573172751771, + "grad_norm": 1192.0885009765625, + "learning_rate": 4.842e-06, + "loss": 30.067, + "step": 24210 + }, + { + "epoch": 0.04892593236020152, + "grad_norm": 1693.944091796875, + "learning_rate": 4.8440000000000005e-06, + "loss": 30.9601, + "step": 24220 + }, + { + "epoch": 0.04894613299288534, + "grad_norm": 1430.281005859375, + "learning_rate": 4.846e-06, + "loss": 31.5682, + "step": 24230 + }, + { + "epoch": 0.04896633362556915, + "grad_norm": 862.6809692382812, + "learning_rate": 4.848000000000001e-06, + "loss": 27.8008, + "step": 24240 + }, + { + "epoch": 0.048986534258252966, + "grad_norm": 631.509033203125, + "learning_rate": 4.85e-06, + "loss": 23.8648, + "step": 24250 + }, + { + "epoch": 0.049006734890936786, + "grad_norm": 606.4420776367188, + "learning_rate": 4.852e-06, + "loss": 28.055, + "step": 24260 + }, + { + "epoch": 0.0490269355236206, + "grad_norm": 348.8546142578125, + "learning_rate": 4.8540000000000005e-06, + "loss": 30.1481, + "step": 24270 + }, + { + "epoch": 0.04904713615630442, + "grad_norm": 219.95846557617188, + "learning_rate": 4.856e-06, + "loss": 27.218, + "step": 24280 + }, + { + "epoch": 0.04906733678898823, + "grad_norm": 160.05877685546875, + "learning_rate": 4.858000000000001e-06, + "loss": 31.1108, + "step": 24290 + }, + { + "epoch": 0.049087537421672045, + "grad_norm": 630.1192626953125, + "learning_rate": 4.86e-06, + "loss": 33.9822, + "step": 24300 + }, + { + "epoch": 0.049107738054355865, + "grad_norm": 575.1064453125, + "learning_rate": 4.862e-06, + "loss": 29.9274, + "step": 24310 + }, + { + "epoch": 0.04912793868703968, + "grad_norm": 3472.619140625, + "learning_rate": 4.864e-06, + "loss": 46.5845, + "step": 24320 + }, + { + "epoch": 0.04914813931972349, + "grad_norm": 1044.9791259765625, + "learning_rate": 4.8660000000000005e-06, + "loss": 38.5026, + "step": 24330 + }, + { + "epoch": 0.04916833995240731, + "grad_norm": 49.58512496948242, + "learning_rate": 4.868000000000001e-06, + "loss": 24.5532, + "step": 24340 + }, + { + "epoch": 0.049188540585091124, + "grad_norm": 404.6474304199219, + "learning_rate": 4.87e-06, + "loss": 35.5195, + "step": 24350 + }, + { + "epoch": 0.04920874121777494, + "grad_norm": 975.424560546875, + "learning_rate": 4.872000000000001e-06, + "loss": 25.8168, + "step": 24360 + }, + { + "epoch": 0.04922894185045876, + "grad_norm": 822.1325073242188, + "learning_rate": 4.874e-06, + "loss": 46.778, + "step": 24370 + }, + { + "epoch": 0.04924914248314257, + "grad_norm": 615.5921630859375, + "learning_rate": 4.876e-06, + "loss": 20.9036, + "step": 24380 + }, + { + "epoch": 0.04926934311582639, + "grad_norm": 437.8776550292969, + "learning_rate": 4.8780000000000006e-06, + "loss": 30.9095, + "step": 24390 + }, + { + "epoch": 0.0492895437485102, + "grad_norm": 321.6142883300781, + "learning_rate": 4.880000000000001e-06, + "loss": 19.3862, + "step": 24400 + }, + { + "epoch": 0.049309744381194016, + "grad_norm": 385.64056396484375, + "learning_rate": 4.882000000000001e-06, + "loss": 24.997, + "step": 24410 + }, + { + "epoch": 0.049329945013877836, + "grad_norm": 735.8389892578125, + "learning_rate": 4.884e-06, + "loss": 29.0518, + "step": 24420 + }, + { + "epoch": 0.04935014564656165, + "grad_norm": 293.65618896484375, + "learning_rate": 4.886e-06, + "loss": 23.6545, + "step": 24430 + }, + { + "epoch": 0.04937034627924546, + "grad_norm": 531.4093017578125, + "learning_rate": 4.8880000000000005e-06, + "loss": 19.9625, + "step": 24440 + }, + { + "epoch": 0.04939054691192928, + "grad_norm": 592.0188598632812, + "learning_rate": 4.890000000000001e-06, + "loss": 19.9698, + "step": 24450 + }, + { + "epoch": 0.049410747544613096, + "grad_norm": 1100.807861328125, + "learning_rate": 4.892000000000001e-06, + "loss": 42.1264, + "step": 24460 + }, + { + "epoch": 0.049430948177296916, + "grad_norm": 1083.6982421875, + "learning_rate": 4.894e-06, + "loss": 40.0795, + "step": 24470 + }, + { + "epoch": 0.04945114880998073, + "grad_norm": 481.7140808105469, + "learning_rate": 4.896e-06, + "loss": 25.7213, + "step": 24480 + }, + { + "epoch": 0.04947134944266454, + "grad_norm": 319.0828552246094, + "learning_rate": 4.898e-06, + "loss": 48.0054, + "step": 24490 + }, + { + "epoch": 0.04949155007534836, + "grad_norm": 708.9186401367188, + "learning_rate": 4.9000000000000005e-06, + "loss": 26.4516, + "step": 24500 + }, + { + "epoch": 0.049511750708032175, + "grad_norm": 923.2367553710938, + "learning_rate": 4.902000000000001e-06, + "loss": 33.8592, + "step": 24510 + }, + { + "epoch": 0.04953195134071599, + "grad_norm": 22.35181999206543, + "learning_rate": 4.904000000000001e-06, + "loss": 21.0809, + "step": 24520 + }, + { + "epoch": 0.04955215197339981, + "grad_norm": 1950.263671875, + "learning_rate": 4.906e-06, + "loss": 23.3087, + "step": 24530 + }, + { + "epoch": 0.04957235260608362, + "grad_norm": 792.556884765625, + "learning_rate": 4.908e-06, + "loss": 21.0367, + "step": 24540 + }, + { + "epoch": 0.04959255323876744, + "grad_norm": 700.4295654296875, + "learning_rate": 4.9100000000000004e-06, + "loss": 15.7874, + "step": 24550 + }, + { + "epoch": 0.049612753871451254, + "grad_norm": 370.0625, + "learning_rate": 4.9120000000000006e-06, + "loss": 31.146, + "step": 24560 + }, + { + "epoch": 0.04963295450413507, + "grad_norm": 179.5491180419922, + "learning_rate": 4.914000000000001e-06, + "loss": 18.8988, + "step": 24570 + }, + { + "epoch": 0.04965315513681889, + "grad_norm": 284.713623046875, + "learning_rate": 4.916e-06, + "loss": 15.084, + "step": 24580 + }, + { + "epoch": 0.0496733557695027, + "grad_norm": 527.2337646484375, + "learning_rate": 4.918e-06, + "loss": 36.8712, + "step": 24590 + }, + { + "epoch": 0.04969355640218651, + "grad_norm": 243.3169708251953, + "learning_rate": 4.92e-06, + "loss": 21.4062, + "step": 24600 + }, + { + "epoch": 0.04971375703487033, + "grad_norm": 437.5592041015625, + "learning_rate": 4.9220000000000005e-06, + "loss": 18.7186, + "step": 24610 + }, + { + "epoch": 0.049733957667554146, + "grad_norm": 833.3284301757812, + "learning_rate": 4.924000000000001e-06, + "loss": 23.0917, + "step": 24620 + }, + { + "epoch": 0.049754158300237966, + "grad_norm": 586.6253662109375, + "learning_rate": 4.926e-06, + "loss": 13.9485, + "step": 24630 + }, + { + "epoch": 0.04977435893292178, + "grad_norm": 841.759521484375, + "learning_rate": 4.928000000000001e-06, + "loss": 35.0147, + "step": 24640 + }, + { + "epoch": 0.04979455956560559, + "grad_norm": 549.2539672851562, + "learning_rate": 4.93e-06, + "loss": 34.2477, + "step": 24650 + }, + { + "epoch": 0.04981476019828941, + "grad_norm": 929.2364501953125, + "learning_rate": 4.932e-06, + "loss": 23.9616, + "step": 24660 + }, + { + "epoch": 0.049834960830973225, + "grad_norm": 924.4048461914062, + "learning_rate": 4.9340000000000005e-06, + "loss": 50.0695, + "step": 24670 + }, + { + "epoch": 0.04985516146365704, + "grad_norm": 229.69725036621094, + "learning_rate": 4.936e-06, + "loss": 28.8764, + "step": 24680 + }, + { + "epoch": 0.04987536209634086, + "grad_norm": 772.1044921875, + "learning_rate": 4.938000000000001e-06, + "loss": 22.3637, + "step": 24690 + }, + { + "epoch": 0.04989556272902467, + "grad_norm": 261.3887939453125, + "learning_rate": 4.94e-06, + "loss": 34.9425, + "step": 24700 + }, + { + "epoch": 0.04991576336170849, + "grad_norm": 172.681396484375, + "learning_rate": 4.942e-06, + "loss": 17.9873, + "step": 24710 + }, + { + "epoch": 0.049935963994392304, + "grad_norm": 762.8919677734375, + "learning_rate": 4.9440000000000004e-06, + "loss": 37.2343, + "step": 24720 + }, + { + "epoch": 0.04995616462707612, + "grad_norm": 111.18013763427734, + "learning_rate": 4.946000000000001e-06, + "loss": 30.2754, + "step": 24730 + }, + { + "epoch": 0.04997636525975994, + "grad_norm": 475.6956481933594, + "learning_rate": 4.948000000000001e-06, + "loss": 24.3305, + "step": 24740 + }, + { + "epoch": 0.04999656589244375, + "grad_norm": 369.7061767578125, + "learning_rate": 4.95e-06, + "loss": 25.6071, + "step": 24750 + }, + { + "epoch": 0.050016766525127564, + "grad_norm": 313.71014404296875, + "learning_rate": 4.952e-06, + "loss": 38.6992, + "step": 24760 + }, + { + "epoch": 0.050036967157811384, + "grad_norm": 1185.03857421875, + "learning_rate": 4.954e-06, + "loss": 42.283, + "step": 24770 + }, + { + "epoch": 0.0500571677904952, + "grad_norm": 1039.0086669921875, + "learning_rate": 4.9560000000000005e-06, + "loss": 32.5469, + "step": 24780 + }, + { + "epoch": 0.05007736842317902, + "grad_norm": 208.5982666015625, + "learning_rate": 4.958000000000001e-06, + "loss": 37.9008, + "step": 24790 + }, + { + "epoch": 0.05009756905586283, + "grad_norm": 206.29354858398438, + "learning_rate": 4.960000000000001e-06, + "loss": 14.8712, + "step": 24800 + }, + { + "epoch": 0.05011776968854664, + "grad_norm": 524.4188232421875, + "learning_rate": 4.962e-06, + "loss": 32.2982, + "step": 24810 + }, + { + "epoch": 0.05013797032123046, + "grad_norm": 579.9808959960938, + "learning_rate": 4.964e-06, + "loss": 44.4891, + "step": 24820 + }, + { + "epoch": 0.050158170953914276, + "grad_norm": 1486.5931396484375, + "learning_rate": 4.966e-06, + "loss": 29.96, + "step": 24830 + }, + { + "epoch": 0.05017837158659809, + "grad_norm": 689.2452392578125, + "learning_rate": 4.9680000000000005e-06, + "loss": 30.3831, + "step": 24840 + }, + { + "epoch": 0.05019857221928191, + "grad_norm": 681.37353515625, + "learning_rate": 4.970000000000001e-06, + "loss": 37.4051, + "step": 24850 + }, + { + "epoch": 0.05021877285196572, + "grad_norm": 763.9005126953125, + "learning_rate": 4.972e-06, + "loss": 31.2682, + "step": 24860 + }, + { + "epoch": 0.05023897348464954, + "grad_norm": 898.73828125, + "learning_rate": 4.974e-06, + "loss": 33.807, + "step": 24870 + }, + { + "epoch": 0.050259174117333355, + "grad_norm": 997.1743774414062, + "learning_rate": 4.976e-06, + "loss": 32.9283, + "step": 24880 + }, + { + "epoch": 0.05027937475001717, + "grad_norm": 238.991943359375, + "learning_rate": 4.9780000000000005e-06, + "loss": 28.0845, + "step": 24890 + }, + { + "epoch": 0.05029957538270099, + "grad_norm": 625.0321655273438, + "learning_rate": 4.980000000000001e-06, + "loss": 19.4249, + "step": 24900 + }, + { + "epoch": 0.0503197760153848, + "grad_norm": 310.7481994628906, + "learning_rate": 4.982e-06, + "loss": 19.4228, + "step": 24910 + }, + { + "epoch": 0.050339976648068614, + "grad_norm": 370.5620422363281, + "learning_rate": 4.984000000000001e-06, + "loss": 30.3915, + "step": 24920 + }, + { + "epoch": 0.050360177280752434, + "grad_norm": 211.54971313476562, + "learning_rate": 4.986e-06, + "loss": 10.3256, + "step": 24930 + }, + { + "epoch": 0.05038037791343625, + "grad_norm": 2.3289971351623535, + "learning_rate": 4.988e-06, + "loss": 35.4139, + "step": 24940 + }, + { + "epoch": 0.05040057854612007, + "grad_norm": 155.1433868408203, + "learning_rate": 4.9900000000000005e-06, + "loss": 26.2599, + "step": 24950 + }, + { + "epoch": 0.05042077917880388, + "grad_norm": 20.515810012817383, + "learning_rate": 4.992e-06, + "loss": 30.6685, + "step": 24960 + }, + { + "epoch": 0.05044097981148769, + "grad_norm": 538.8347778320312, + "learning_rate": 4.994000000000001e-06, + "loss": 34.4724, + "step": 24970 + }, + { + "epoch": 0.05046118044417151, + "grad_norm": 293.76153564453125, + "learning_rate": 4.996e-06, + "loss": 29.6654, + "step": 24980 + }, + { + "epoch": 0.050481381076855326, + "grad_norm": 297.9040222167969, + "learning_rate": 4.998e-06, + "loss": 33.8897, + "step": 24990 + }, + { + "epoch": 0.05050158170953914, + "grad_norm": 362.0632629394531, + "learning_rate": 5e-06, + "loss": 12.0616, + "step": 25000 + }, + { + "epoch": 0.05052178234222296, + "grad_norm": 240.0549774169922, + "learning_rate": 5.0020000000000006e-06, + "loss": 17.6489, + "step": 25010 + }, + { + "epoch": 0.05054198297490677, + "grad_norm": 312.6167297363281, + "learning_rate": 5.004e-06, + "loss": 29.2342, + "step": 25020 + }, + { + "epoch": 0.05056218360759059, + "grad_norm": 13.600961685180664, + "learning_rate": 5.006000000000001e-06, + "loss": 20.8579, + "step": 25030 + }, + { + "epoch": 0.050582384240274406, + "grad_norm": 524.6953125, + "learning_rate": 5.008000000000001e-06, + "loss": 36.6206, + "step": 25040 + }, + { + "epoch": 0.05060258487295822, + "grad_norm": 282.6379089355469, + "learning_rate": 5.01e-06, + "loss": 16.8534, + "step": 25050 + }, + { + "epoch": 0.05062278550564204, + "grad_norm": 256.9676208496094, + "learning_rate": 5.0120000000000005e-06, + "loss": 22.1196, + "step": 25060 + }, + { + "epoch": 0.05064298613832585, + "grad_norm": 330.0611572265625, + "learning_rate": 5.014e-06, + "loss": 18.613, + "step": 25070 + }, + { + "epoch": 0.050663186771009665, + "grad_norm": 2463.804931640625, + "learning_rate": 5.016000000000001e-06, + "loss": 59.9745, + "step": 25080 + }, + { + "epoch": 0.050683387403693485, + "grad_norm": 1185.2828369140625, + "learning_rate": 5.018000000000001e-06, + "loss": 39.1468, + "step": 25090 + }, + { + "epoch": 0.0507035880363773, + "grad_norm": 601.2512817382812, + "learning_rate": 5.02e-06, + "loss": 13.1898, + "step": 25100 + }, + { + "epoch": 0.05072378866906112, + "grad_norm": 360.78369140625, + "learning_rate": 5.022e-06, + "loss": 21.2782, + "step": 25110 + }, + { + "epoch": 0.05074398930174493, + "grad_norm": 2.3611438274383545, + "learning_rate": 5.024e-06, + "loss": 30.023, + "step": 25120 + }, + { + "epoch": 0.050764189934428744, + "grad_norm": 518.9421997070312, + "learning_rate": 5.026000000000001e-06, + "loss": 53.1602, + "step": 25130 + }, + { + "epoch": 0.050784390567112564, + "grad_norm": 327.6961975097656, + "learning_rate": 5.028000000000001e-06, + "loss": 24.3413, + "step": 25140 + }, + { + "epoch": 0.05080459119979638, + "grad_norm": 2383.177001953125, + "learning_rate": 5.03e-06, + "loss": 54.4062, + "step": 25150 + }, + { + "epoch": 0.05082479183248019, + "grad_norm": 389.3732604980469, + "learning_rate": 5.032e-06, + "loss": 22.387, + "step": 25160 + }, + { + "epoch": 0.05084499246516401, + "grad_norm": 52.716522216796875, + "learning_rate": 5.0339999999999996e-06, + "loss": 64.8972, + "step": 25170 + }, + { + "epoch": 0.05086519309784782, + "grad_norm": 324.30316162109375, + "learning_rate": 5.0360000000000006e-06, + "loss": 25.4915, + "step": 25180 + }, + { + "epoch": 0.05088539373053164, + "grad_norm": 478.4629211425781, + "learning_rate": 5.038000000000001e-06, + "loss": 16.3394, + "step": 25190 + }, + { + "epoch": 0.050905594363215456, + "grad_norm": 552.5181274414062, + "learning_rate": 5.04e-06, + "loss": 20.6003, + "step": 25200 + }, + { + "epoch": 0.05092579499589927, + "grad_norm": 479.90289306640625, + "learning_rate": 5.042e-06, + "loss": 18.2474, + "step": 25210 + }, + { + "epoch": 0.05094599562858309, + "grad_norm": 612.0270385742188, + "learning_rate": 5.044e-06, + "loss": 27.702, + "step": 25220 + }, + { + "epoch": 0.0509661962612669, + "grad_norm": 1635.6959228515625, + "learning_rate": 5.0460000000000005e-06, + "loss": 41.5422, + "step": 25230 + }, + { + "epoch": 0.050986396893950715, + "grad_norm": 454.4499816894531, + "learning_rate": 5.048000000000001e-06, + "loss": 31.0758, + "step": 25240 + }, + { + "epoch": 0.051006597526634535, + "grad_norm": 480.1141357421875, + "learning_rate": 5.050000000000001e-06, + "loss": 30.5071, + "step": 25250 + }, + { + "epoch": 0.05102679815931835, + "grad_norm": 433.2947082519531, + "learning_rate": 5.052e-06, + "loss": 27.527, + "step": 25260 + }, + { + "epoch": 0.05104699879200217, + "grad_norm": 594.5740356445312, + "learning_rate": 5.054e-06, + "loss": 32.5962, + "step": 25270 + }, + { + "epoch": 0.05106719942468598, + "grad_norm": 227.5206756591797, + "learning_rate": 5.056000000000001e-06, + "loss": 24.3647, + "step": 25280 + }, + { + "epoch": 0.051087400057369795, + "grad_norm": 50.3040657043457, + "learning_rate": 5.0580000000000005e-06, + "loss": 31.5149, + "step": 25290 + }, + { + "epoch": 0.051107600690053615, + "grad_norm": 1063.2158203125, + "learning_rate": 5.060000000000001e-06, + "loss": 40.9574, + "step": 25300 + }, + { + "epoch": 0.05112780132273743, + "grad_norm": 0.0, + "learning_rate": 5.062e-06, + "loss": 14.2233, + "step": 25310 + }, + { + "epoch": 0.05114800195542124, + "grad_norm": 363.94091796875, + "learning_rate": 5.064e-06, + "loss": 25.1291, + "step": 25320 + }, + { + "epoch": 0.05116820258810506, + "grad_norm": 220.59275817871094, + "learning_rate": 5.066000000000001e-06, + "loss": 26.7395, + "step": 25330 + }, + { + "epoch": 0.051188403220788874, + "grad_norm": 968.5359497070312, + "learning_rate": 5.0680000000000004e-06, + "loss": 24.0608, + "step": 25340 + }, + { + "epoch": 0.051208603853472694, + "grad_norm": 459.5111999511719, + "learning_rate": 5.070000000000001e-06, + "loss": 27.4477, + "step": 25350 + }, + { + "epoch": 0.05122880448615651, + "grad_norm": 610.3914794921875, + "learning_rate": 5.072e-06, + "loss": 22.6033, + "step": 25360 + }, + { + "epoch": 0.05124900511884032, + "grad_norm": 192.24658203125, + "learning_rate": 5.074e-06, + "loss": 18.4317, + "step": 25370 + }, + { + "epoch": 0.05126920575152414, + "grad_norm": 610.5033569335938, + "learning_rate": 5.076000000000001e-06, + "loss": 35.5431, + "step": 25380 + }, + { + "epoch": 0.05128940638420795, + "grad_norm": 458.8445739746094, + "learning_rate": 5.078e-06, + "loss": 23.0244, + "step": 25390 + }, + { + "epoch": 0.051309607016891766, + "grad_norm": 78.25531005859375, + "learning_rate": 5.0800000000000005e-06, + "loss": 31.0752, + "step": 25400 + }, + { + "epoch": 0.051329807649575586, + "grad_norm": 114.08269500732422, + "learning_rate": 5.082000000000001e-06, + "loss": 17.9002, + "step": 25410 + }, + { + "epoch": 0.0513500082822594, + "grad_norm": 569.843994140625, + "learning_rate": 5.084e-06, + "loss": 30.2337, + "step": 25420 + }, + { + "epoch": 0.05137020891494322, + "grad_norm": 561.10107421875, + "learning_rate": 5.086000000000001e-06, + "loss": 14.2098, + "step": 25430 + }, + { + "epoch": 0.05139040954762703, + "grad_norm": 566.8888549804688, + "learning_rate": 5.088000000000001e-06, + "loss": 17.1384, + "step": 25440 + }, + { + "epoch": 0.051410610180310845, + "grad_norm": 1191.8035888671875, + "learning_rate": 5.09e-06, + "loss": 26.895, + "step": 25450 + }, + { + "epoch": 0.051430810812994665, + "grad_norm": 789.3928833007812, + "learning_rate": 5.0920000000000005e-06, + "loss": 30.4841, + "step": 25460 + }, + { + "epoch": 0.05145101144567848, + "grad_norm": 739.6887817382812, + "learning_rate": 5.094e-06, + "loss": 22.1056, + "step": 25470 + }, + { + "epoch": 0.05147121207836229, + "grad_norm": 736.8461303710938, + "learning_rate": 5.096000000000001e-06, + "loss": 24.0518, + "step": 25480 + }, + { + "epoch": 0.05149141271104611, + "grad_norm": 150.58872985839844, + "learning_rate": 5.098000000000001e-06, + "loss": 31.6776, + "step": 25490 + }, + { + "epoch": 0.051511613343729924, + "grad_norm": 275.2126159667969, + "learning_rate": 5.1e-06, + "loss": 14.6784, + "step": 25500 + }, + { + "epoch": 0.051531813976413744, + "grad_norm": 519.9595336914062, + "learning_rate": 5.1020000000000004e-06, + "loss": 36.6511, + "step": 25510 + }, + { + "epoch": 0.05155201460909756, + "grad_norm": 542.5691528320312, + "learning_rate": 5.104e-06, + "loss": 23.9241, + "step": 25520 + }, + { + "epoch": 0.05157221524178137, + "grad_norm": 875.2158813476562, + "learning_rate": 5.106000000000001e-06, + "loss": 43.4997, + "step": 25530 + }, + { + "epoch": 0.05159241587446519, + "grad_norm": 1521.4261474609375, + "learning_rate": 5.108000000000001e-06, + "loss": 45.2179, + "step": 25540 + }, + { + "epoch": 0.051612616507149, + "grad_norm": 265.73333740234375, + "learning_rate": 5.11e-06, + "loss": 32.8597, + "step": 25550 + }, + { + "epoch": 0.051632817139832816, + "grad_norm": 205.95445251464844, + "learning_rate": 5.112e-06, + "loss": 31.428, + "step": 25560 + }, + { + "epoch": 0.051653017772516636, + "grad_norm": 840.8372192382812, + "learning_rate": 5.114e-06, + "loss": 42.3025, + "step": 25570 + }, + { + "epoch": 0.05167321840520045, + "grad_norm": 357.9036560058594, + "learning_rate": 5.116000000000001e-06, + "loss": 34.0856, + "step": 25580 + }, + { + "epoch": 0.05169341903788427, + "grad_norm": 545.6062622070312, + "learning_rate": 5.118000000000001e-06, + "loss": 60.5683, + "step": 25590 + }, + { + "epoch": 0.05171361967056808, + "grad_norm": 466.7431335449219, + "learning_rate": 5.12e-06, + "loss": 19.1757, + "step": 25600 + }, + { + "epoch": 0.051733820303251896, + "grad_norm": 368.5271301269531, + "learning_rate": 5.122e-06, + "loss": 40.7344, + "step": 25610 + }, + { + "epoch": 0.051754020935935716, + "grad_norm": 164.5128936767578, + "learning_rate": 5.124e-06, + "loss": 46.2509, + "step": 25620 + }, + { + "epoch": 0.05177422156861953, + "grad_norm": 1387.013427734375, + "learning_rate": 5.126e-06, + "loss": 30.2785, + "step": 25630 + }, + { + "epoch": 0.05179442220130334, + "grad_norm": 488.8163757324219, + "learning_rate": 5.128000000000001e-06, + "loss": 23.5856, + "step": 25640 + }, + { + "epoch": 0.05181462283398716, + "grad_norm": 358.9884033203125, + "learning_rate": 5.130000000000001e-06, + "loss": 44.6823, + "step": 25650 + }, + { + "epoch": 0.051834823466670975, + "grad_norm": 455.64697265625, + "learning_rate": 5.132e-06, + "loss": 35.8214, + "step": 25660 + }, + { + "epoch": 0.051855024099354795, + "grad_norm": 241.6067657470703, + "learning_rate": 5.134e-06, + "loss": 22.7348, + "step": 25670 + }, + { + "epoch": 0.05187522473203861, + "grad_norm": 388.8199157714844, + "learning_rate": 5.136e-06, + "loss": 32.5241, + "step": 25680 + }, + { + "epoch": 0.05189542536472242, + "grad_norm": 361.8164367675781, + "learning_rate": 5.138000000000001e-06, + "loss": 18.9053, + "step": 25690 + }, + { + "epoch": 0.05191562599740624, + "grad_norm": 522.1819458007812, + "learning_rate": 5.140000000000001e-06, + "loss": 29.0187, + "step": 25700 + }, + { + "epoch": 0.051935826630090054, + "grad_norm": 1628.5198974609375, + "learning_rate": 5.142e-06, + "loss": 25.0132, + "step": 25710 + }, + { + "epoch": 0.05195602726277387, + "grad_norm": 340.24664306640625, + "learning_rate": 5.144e-06, + "loss": 17.7403, + "step": 25720 + }, + { + "epoch": 0.05197622789545769, + "grad_norm": 355.582275390625, + "learning_rate": 5.1459999999999995e-06, + "loss": 27.1877, + "step": 25730 + }, + { + "epoch": 0.0519964285281415, + "grad_norm": 778.4319458007812, + "learning_rate": 5.1480000000000005e-06, + "loss": 28.3525, + "step": 25740 + }, + { + "epoch": 0.05201662916082532, + "grad_norm": 404.39862060546875, + "learning_rate": 5.150000000000001e-06, + "loss": 29.3748, + "step": 25750 + }, + { + "epoch": 0.05203682979350913, + "grad_norm": 9.620052337646484, + "learning_rate": 5.152e-06, + "loss": 29.4001, + "step": 25760 + }, + { + "epoch": 0.052057030426192946, + "grad_norm": 383.12115478515625, + "learning_rate": 5.154e-06, + "loss": 25.0565, + "step": 25770 + }, + { + "epoch": 0.052077231058876766, + "grad_norm": 1093.5860595703125, + "learning_rate": 5.156e-06, + "loss": 24.3118, + "step": 25780 + }, + { + "epoch": 0.05209743169156058, + "grad_norm": 761.9879150390625, + "learning_rate": 5.158e-06, + "loss": 43.068, + "step": 25790 + }, + { + "epoch": 0.05211763232424439, + "grad_norm": 204.88401794433594, + "learning_rate": 5.1600000000000006e-06, + "loss": 22.7605, + "step": 25800 + }, + { + "epoch": 0.05213783295692821, + "grad_norm": 465.07574462890625, + "learning_rate": 5.162000000000001e-06, + "loss": 32.0228, + "step": 25810 + }, + { + "epoch": 0.052158033589612025, + "grad_norm": 30.820846557617188, + "learning_rate": 5.164e-06, + "loss": 28.8089, + "step": 25820 + }, + { + "epoch": 0.052178234222295845, + "grad_norm": 315.1690979003906, + "learning_rate": 5.166e-06, + "loss": 31.5778, + "step": 25830 + }, + { + "epoch": 0.05219843485497966, + "grad_norm": 662.5512084960938, + "learning_rate": 5.168000000000001e-06, + "loss": 32.5336, + "step": 25840 + }, + { + "epoch": 0.05221863548766347, + "grad_norm": 359.843017578125, + "learning_rate": 5.1700000000000005e-06, + "loss": 31.0579, + "step": 25850 + }, + { + "epoch": 0.05223883612034729, + "grad_norm": 391.7662658691406, + "learning_rate": 5.172000000000001e-06, + "loss": 29.8749, + "step": 25860 + }, + { + "epoch": 0.052259036753031105, + "grad_norm": 750.6495361328125, + "learning_rate": 5.174e-06, + "loss": 26.085, + "step": 25870 + }, + { + "epoch": 0.05227923738571492, + "grad_norm": 456.728515625, + "learning_rate": 5.176e-06, + "loss": 27.9467, + "step": 25880 + }, + { + "epoch": 0.05229943801839874, + "grad_norm": 314.82476806640625, + "learning_rate": 5.178000000000001e-06, + "loss": 25.5998, + "step": 25890 + }, + { + "epoch": 0.05231963865108255, + "grad_norm": 866.3818969726562, + "learning_rate": 5.18e-06, + "loss": 39.6875, + "step": 25900 + }, + { + "epoch": 0.05233983928376637, + "grad_norm": 504.4011535644531, + "learning_rate": 5.1820000000000005e-06, + "loss": 40.7531, + "step": 25910 + }, + { + "epoch": 0.052360039916450184, + "grad_norm": 598.9783325195312, + "learning_rate": 5.184e-06, + "loss": 29.8502, + "step": 25920 + }, + { + "epoch": 0.052380240549134, + "grad_norm": 439.1234130859375, + "learning_rate": 5.186e-06, + "loss": 26.7706, + "step": 25930 + }, + { + "epoch": 0.05240044118181782, + "grad_norm": 307.6906433105469, + "learning_rate": 5.188000000000001e-06, + "loss": 21.9047, + "step": 25940 + }, + { + "epoch": 0.05242064181450163, + "grad_norm": 665.8407592773438, + "learning_rate": 5.19e-06, + "loss": 13.5805, + "step": 25950 + }, + { + "epoch": 0.05244084244718544, + "grad_norm": 433.3497009277344, + "learning_rate": 5.1920000000000004e-06, + "loss": 25.7976, + "step": 25960 + }, + { + "epoch": 0.05246104307986926, + "grad_norm": 1.4457974433898926, + "learning_rate": 5.194e-06, + "loss": 12.8288, + "step": 25970 + }, + { + "epoch": 0.052481243712553076, + "grad_norm": 427.34283447265625, + "learning_rate": 5.196e-06, + "loss": 23.3719, + "step": 25980 + }, + { + "epoch": 0.052501444345236896, + "grad_norm": 48.45802688598633, + "learning_rate": 5.198000000000001e-06, + "loss": 33.9045, + "step": 25990 + }, + { + "epoch": 0.05252164497792071, + "grad_norm": 507.5506896972656, + "learning_rate": 5.2e-06, + "loss": 19.1086, + "step": 26000 + }, + { + "epoch": 0.05254184561060452, + "grad_norm": 601.8560791015625, + "learning_rate": 5.202e-06, + "loss": 22.5702, + "step": 26010 + }, + { + "epoch": 0.05256204624328834, + "grad_norm": 856.8712768554688, + "learning_rate": 5.2040000000000005e-06, + "loss": 31.4809, + "step": 26020 + }, + { + "epoch": 0.052582246875972155, + "grad_norm": 367.30010986328125, + "learning_rate": 5.206e-06, + "loss": 20.9368, + "step": 26030 + }, + { + "epoch": 0.05260244750865597, + "grad_norm": 485.85498046875, + "learning_rate": 5.208000000000001e-06, + "loss": 14.6849, + "step": 26040 + }, + { + "epoch": 0.05262264814133979, + "grad_norm": 573.361328125, + "learning_rate": 5.210000000000001e-06, + "loss": 15.6729, + "step": 26050 + }, + { + "epoch": 0.0526428487740236, + "grad_norm": 273.30010986328125, + "learning_rate": 5.212e-06, + "loss": 33.1244, + "step": 26060 + }, + { + "epoch": 0.05266304940670742, + "grad_norm": 379.7786560058594, + "learning_rate": 5.214e-06, + "loss": 22.4082, + "step": 26070 + }, + { + "epoch": 0.052683250039391234, + "grad_norm": 784.4563598632812, + "learning_rate": 5.216e-06, + "loss": 25.6582, + "step": 26080 + }, + { + "epoch": 0.05270345067207505, + "grad_norm": 449.5173645019531, + "learning_rate": 5.218000000000001e-06, + "loss": 25.8007, + "step": 26090 + }, + { + "epoch": 0.05272365130475887, + "grad_norm": 201.3383026123047, + "learning_rate": 5.220000000000001e-06, + "loss": 10.4071, + "step": 26100 + }, + { + "epoch": 0.05274385193744268, + "grad_norm": 658.666259765625, + "learning_rate": 5.222e-06, + "loss": 16.5268, + "step": 26110 + }, + { + "epoch": 0.05276405257012649, + "grad_norm": 557.2667236328125, + "learning_rate": 5.224e-06, + "loss": 26.9591, + "step": 26120 + }, + { + "epoch": 0.05278425320281031, + "grad_norm": 5351.18505859375, + "learning_rate": 5.226e-06, + "loss": 48.7041, + "step": 26130 + }, + { + "epoch": 0.052804453835494126, + "grad_norm": 1389.563720703125, + "learning_rate": 5.228000000000001e-06, + "loss": 47.4646, + "step": 26140 + }, + { + "epoch": 0.052824654468177946, + "grad_norm": 272.3363952636719, + "learning_rate": 5.230000000000001e-06, + "loss": 23.3891, + "step": 26150 + }, + { + "epoch": 0.05284485510086176, + "grad_norm": 575.9677124023438, + "learning_rate": 5.232e-06, + "loss": 25.75, + "step": 26160 + }, + { + "epoch": 0.05286505573354557, + "grad_norm": 689.4549560546875, + "learning_rate": 5.234e-06, + "loss": 31.6495, + "step": 26170 + }, + { + "epoch": 0.05288525636622939, + "grad_norm": 23.626998901367188, + "learning_rate": 5.236e-06, + "loss": 10.3843, + "step": 26180 + }, + { + "epoch": 0.052905456998913206, + "grad_norm": 392.9609069824219, + "learning_rate": 5.2380000000000005e-06, + "loss": 21.8561, + "step": 26190 + }, + { + "epoch": 0.05292565763159702, + "grad_norm": 348.66900634765625, + "learning_rate": 5.240000000000001e-06, + "loss": 20.0394, + "step": 26200 + }, + { + "epoch": 0.05294585826428084, + "grad_norm": 677.1149291992188, + "learning_rate": 5.242000000000001e-06, + "loss": 21.0038, + "step": 26210 + }, + { + "epoch": 0.05296605889696465, + "grad_norm": 564.3142700195312, + "learning_rate": 5.244e-06, + "loss": 20.4818, + "step": 26220 + }, + { + "epoch": 0.05298625952964847, + "grad_norm": 331.88494873046875, + "learning_rate": 5.246e-06, + "loss": 34.9781, + "step": 26230 + }, + { + "epoch": 0.053006460162332285, + "grad_norm": 787.8898315429688, + "learning_rate": 5.248000000000001e-06, + "loss": 18.9843, + "step": 26240 + }, + { + "epoch": 0.0530266607950161, + "grad_norm": 123.94518280029297, + "learning_rate": 5.2500000000000006e-06, + "loss": 43.6385, + "step": 26250 + }, + { + "epoch": 0.05304686142769992, + "grad_norm": 96.2759017944336, + "learning_rate": 5.252000000000001e-06, + "loss": 30.9721, + "step": 26260 + }, + { + "epoch": 0.05306706206038373, + "grad_norm": 746.7877197265625, + "learning_rate": 5.254e-06, + "loss": 35.5861, + "step": 26270 + }, + { + "epoch": 0.053087262693067544, + "grad_norm": 134.33531188964844, + "learning_rate": 5.256e-06, + "loss": 16.4126, + "step": 26280 + }, + { + "epoch": 0.053107463325751364, + "grad_norm": 256.2925109863281, + "learning_rate": 5.258000000000001e-06, + "loss": 23.2211, + "step": 26290 + }, + { + "epoch": 0.05312766395843518, + "grad_norm": 440.4696960449219, + "learning_rate": 5.2600000000000005e-06, + "loss": 32.416, + "step": 26300 + }, + { + "epoch": 0.053147864591119, + "grad_norm": 204.34536743164062, + "learning_rate": 5.262000000000001e-06, + "loss": 32.0368, + "step": 26310 + }, + { + "epoch": 0.05316806522380281, + "grad_norm": 923.6111450195312, + "learning_rate": 5.264e-06, + "loss": 38.4226, + "step": 26320 + }, + { + "epoch": 0.05318826585648662, + "grad_norm": 641.5208740234375, + "learning_rate": 5.266e-06, + "loss": 26.3782, + "step": 26330 + }, + { + "epoch": 0.05320846648917044, + "grad_norm": 368.361328125, + "learning_rate": 5.268000000000001e-06, + "loss": 24.4569, + "step": 26340 + }, + { + "epoch": 0.053228667121854256, + "grad_norm": 1073.1708984375, + "learning_rate": 5.27e-06, + "loss": 42.5734, + "step": 26350 + }, + { + "epoch": 0.05324886775453807, + "grad_norm": 1146.4925537109375, + "learning_rate": 5.2720000000000005e-06, + "loss": 43.8536, + "step": 26360 + }, + { + "epoch": 0.05326906838722189, + "grad_norm": 216.10980224609375, + "learning_rate": 5.274e-06, + "loss": 22.712, + "step": 26370 + }, + { + "epoch": 0.0532892690199057, + "grad_norm": 3.1001787185668945, + "learning_rate": 5.276e-06, + "loss": 13.0433, + "step": 26380 + }, + { + "epoch": 0.05330946965258952, + "grad_norm": 931.7252197265625, + "learning_rate": 5.278000000000001e-06, + "loss": 17.813, + "step": 26390 + }, + { + "epoch": 0.053329670285273335, + "grad_norm": 404.67572021484375, + "learning_rate": 5.28e-06, + "loss": 18.4228, + "step": 26400 + }, + { + "epoch": 0.05334987091795715, + "grad_norm": 165.9590606689453, + "learning_rate": 5.282e-06, + "loss": 23.0529, + "step": 26410 + }, + { + "epoch": 0.05337007155064097, + "grad_norm": 537.7362060546875, + "learning_rate": 5.2840000000000006e-06, + "loss": 43.9211, + "step": 26420 + }, + { + "epoch": 0.05339027218332478, + "grad_norm": 621.5162963867188, + "learning_rate": 5.286e-06, + "loss": 18.1904, + "step": 26430 + }, + { + "epoch": 0.053410472816008595, + "grad_norm": 333.20269775390625, + "learning_rate": 5.288000000000001e-06, + "loss": 19.8481, + "step": 26440 + }, + { + "epoch": 0.053430673448692415, + "grad_norm": 3.1346991062164307, + "learning_rate": 5.290000000000001e-06, + "loss": 25.3854, + "step": 26450 + }, + { + "epoch": 0.05345087408137623, + "grad_norm": 352.5599365234375, + "learning_rate": 5.292e-06, + "loss": 39.946, + "step": 26460 + }, + { + "epoch": 0.05347107471406005, + "grad_norm": 556.1923828125, + "learning_rate": 5.2940000000000005e-06, + "loss": 40.7556, + "step": 26470 + }, + { + "epoch": 0.05349127534674386, + "grad_norm": 610.7059936523438, + "learning_rate": 5.296e-06, + "loss": 14.8259, + "step": 26480 + }, + { + "epoch": 0.053511475979427674, + "grad_norm": 366.200439453125, + "learning_rate": 5.298000000000001e-06, + "loss": 30.2629, + "step": 26490 + }, + { + "epoch": 0.053531676612111494, + "grad_norm": 455.81463623046875, + "learning_rate": 5.300000000000001e-06, + "loss": 62.1175, + "step": 26500 + }, + { + "epoch": 0.05355187724479531, + "grad_norm": 386.99066162109375, + "learning_rate": 5.302e-06, + "loss": 20.9097, + "step": 26510 + }, + { + "epoch": 0.05357207787747912, + "grad_norm": 311.41229248046875, + "learning_rate": 5.304e-06, + "loss": 25.0126, + "step": 26520 + }, + { + "epoch": 0.05359227851016294, + "grad_norm": 589.1319580078125, + "learning_rate": 5.306e-06, + "loss": 42.5743, + "step": 26530 + }, + { + "epoch": 0.05361247914284675, + "grad_norm": 496.6630859375, + "learning_rate": 5.308000000000001e-06, + "loss": 9.1405, + "step": 26540 + }, + { + "epoch": 0.05363267977553057, + "grad_norm": 729.2218017578125, + "learning_rate": 5.310000000000001e-06, + "loss": 31.1716, + "step": 26550 + }, + { + "epoch": 0.053652880408214386, + "grad_norm": 49.607547760009766, + "learning_rate": 5.312e-06, + "loss": 28.6454, + "step": 26560 + }, + { + "epoch": 0.0536730810408982, + "grad_norm": 479.96075439453125, + "learning_rate": 5.314e-06, + "loss": 20.7474, + "step": 26570 + }, + { + "epoch": 0.05369328167358202, + "grad_norm": 851.2595825195312, + "learning_rate": 5.3160000000000004e-06, + "loss": 25.3047, + "step": 26580 + }, + { + "epoch": 0.05371348230626583, + "grad_norm": 662.3512573242188, + "learning_rate": 5.318000000000001e-06, + "loss": 26.4715, + "step": 26590 + }, + { + "epoch": 0.053733682938949645, + "grad_norm": 940.9957275390625, + "learning_rate": 5.320000000000001e-06, + "loss": 42.6483, + "step": 26600 + }, + { + "epoch": 0.053753883571633465, + "grad_norm": 612.0733642578125, + "learning_rate": 5.322000000000001e-06, + "loss": 22.5218, + "step": 26610 + }, + { + "epoch": 0.05377408420431728, + "grad_norm": 237.8662872314453, + "learning_rate": 5.324e-06, + "loss": 14.5526, + "step": 26620 + }, + { + "epoch": 0.0537942848370011, + "grad_norm": 835.09423828125, + "learning_rate": 5.326e-06, + "loss": 26.9957, + "step": 26630 + }, + { + "epoch": 0.05381448546968491, + "grad_norm": 222.43446350097656, + "learning_rate": 5.328000000000001e-06, + "loss": 15.0452, + "step": 26640 + }, + { + "epoch": 0.053834686102368724, + "grad_norm": 700.3217163085938, + "learning_rate": 5.330000000000001e-06, + "loss": 39.1123, + "step": 26650 + }, + { + "epoch": 0.053854886735052544, + "grad_norm": 337.627197265625, + "learning_rate": 5.332000000000001e-06, + "loss": 46.5012, + "step": 26660 + }, + { + "epoch": 0.05387508736773636, + "grad_norm": 424.27667236328125, + "learning_rate": 5.334e-06, + "loss": 27.7794, + "step": 26670 + }, + { + "epoch": 0.05389528800042017, + "grad_norm": 866.9254760742188, + "learning_rate": 5.336e-06, + "loss": 27.4633, + "step": 26680 + }, + { + "epoch": 0.05391548863310399, + "grad_norm": 563.7420043945312, + "learning_rate": 5.338000000000001e-06, + "loss": 32.9408, + "step": 26690 + }, + { + "epoch": 0.0539356892657878, + "grad_norm": 257.2831726074219, + "learning_rate": 5.3400000000000005e-06, + "loss": 16.7897, + "step": 26700 + }, + { + "epoch": 0.05395588989847162, + "grad_norm": 591.7484130859375, + "learning_rate": 5.342000000000001e-06, + "loss": 39.6581, + "step": 26710 + }, + { + "epoch": 0.053976090531155436, + "grad_norm": 792.9822387695312, + "learning_rate": 5.344e-06, + "loss": 24.9949, + "step": 26720 + }, + { + "epoch": 0.05399629116383925, + "grad_norm": 868.6183471679688, + "learning_rate": 5.346e-06, + "loss": 22.9792, + "step": 26730 + }, + { + "epoch": 0.05401649179652307, + "grad_norm": 615.1279296875, + "learning_rate": 5.348000000000001e-06, + "loss": 33.8277, + "step": 26740 + }, + { + "epoch": 0.05403669242920688, + "grad_norm": 352.4540710449219, + "learning_rate": 5.3500000000000004e-06, + "loss": 15.8923, + "step": 26750 + }, + { + "epoch": 0.054056893061890696, + "grad_norm": 951.6677856445312, + "learning_rate": 5.352000000000001e-06, + "loss": 30.66, + "step": 26760 + }, + { + "epoch": 0.054077093694574516, + "grad_norm": 1502.649169921875, + "learning_rate": 5.354e-06, + "loss": 25.9936, + "step": 26770 + }, + { + "epoch": 0.05409729432725833, + "grad_norm": 392.32305908203125, + "learning_rate": 5.356e-06, + "loss": 22.2297, + "step": 26780 + }, + { + "epoch": 0.05411749495994215, + "grad_norm": 237.27383422851562, + "learning_rate": 5.358000000000001e-06, + "loss": 17.8372, + "step": 26790 + }, + { + "epoch": 0.05413769559262596, + "grad_norm": 539.2459106445312, + "learning_rate": 5.36e-06, + "loss": 25.7624, + "step": 26800 + }, + { + "epoch": 0.054157896225309775, + "grad_norm": 1028.1107177734375, + "learning_rate": 5.3620000000000005e-06, + "loss": 33.4106, + "step": 26810 + }, + { + "epoch": 0.054178096857993595, + "grad_norm": 974.437255859375, + "learning_rate": 5.364000000000001e-06, + "loss": 20.8408, + "step": 26820 + }, + { + "epoch": 0.05419829749067741, + "grad_norm": 327.8843078613281, + "learning_rate": 5.366e-06, + "loss": 24.7608, + "step": 26830 + }, + { + "epoch": 0.05421849812336122, + "grad_norm": 437.9151306152344, + "learning_rate": 5.368000000000001e-06, + "loss": 39.364, + "step": 26840 + }, + { + "epoch": 0.05423869875604504, + "grad_norm": 712.4976806640625, + "learning_rate": 5.370000000000001e-06, + "loss": 18.5657, + "step": 26850 + }, + { + "epoch": 0.054258899388728854, + "grad_norm": 497.657470703125, + "learning_rate": 5.372e-06, + "loss": 15.1377, + "step": 26860 + }, + { + "epoch": 0.054279100021412674, + "grad_norm": 400.9642333984375, + "learning_rate": 5.3740000000000006e-06, + "loss": 27.0087, + "step": 26870 + }, + { + "epoch": 0.05429930065409649, + "grad_norm": 445.33245849609375, + "learning_rate": 5.376e-06, + "loss": 26.6219, + "step": 26880 + }, + { + "epoch": 0.0543195012867803, + "grad_norm": 434.8671875, + "learning_rate": 5.378e-06, + "loss": 39.3349, + "step": 26890 + }, + { + "epoch": 0.05433970191946412, + "grad_norm": 150.58702087402344, + "learning_rate": 5.380000000000001e-06, + "loss": 27.9425, + "step": 26900 + }, + { + "epoch": 0.05435990255214793, + "grad_norm": 894.13671875, + "learning_rate": 5.382e-06, + "loss": 22.3421, + "step": 26910 + }, + { + "epoch": 0.054380103184831746, + "grad_norm": 230.39698791503906, + "learning_rate": 5.3840000000000005e-06, + "loss": 21.8025, + "step": 26920 + }, + { + "epoch": 0.054400303817515566, + "grad_norm": 366.86859130859375, + "learning_rate": 5.386e-06, + "loss": 26.456, + "step": 26930 + }, + { + "epoch": 0.05442050445019938, + "grad_norm": 938.0694580078125, + "learning_rate": 5.388e-06, + "loss": 30.2199, + "step": 26940 + }, + { + "epoch": 0.0544407050828832, + "grad_norm": 464.6997985839844, + "learning_rate": 5.390000000000001e-06, + "loss": 22.3986, + "step": 26950 + }, + { + "epoch": 0.05446090571556701, + "grad_norm": 477.46185302734375, + "learning_rate": 5.392e-06, + "loss": 32.1648, + "step": 26960 + }, + { + "epoch": 0.054481106348250825, + "grad_norm": 951.6552124023438, + "learning_rate": 5.394e-06, + "loss": 38.6276, + "step": 26970 + }, + { + "epoch": 0.054501306980934645, + "grad_norm": 219.8541717529297, + "learning_rate": 5.3960000000000005e-06, + "loss": 16.5651, + "step": 26980 + }, + { + "epoch": 0.05452150761361846, + "grad_norm": 503.9372863769531, + "learning_rate": 5.398e-06, + "loss": 41.0035, + "step": 26990 + }, + { + "epoch": 0.05454170824630227, + "grad_norm": 800.7157592773438, + "learning_rate": 5.400000000000001e-06, + "loss": 16.4245, + "step": 27000 + }, + { + "epoch": 0.05456190887898609, + "grad_norm": 646.6304931640625, + "learning_rate": 5.402000000000001e-06, + "loss": 20.2078, + "step": 27010 + }, + { + "epoch": 0.054582109511669905, + "grad_norm": 525.7156372070312, + "learning_rate": 5.404e-06, + "loss": 34.3341, + "step": 27020 + }, + { + "epoch": 0.054602310144353725, + "grad_norm": 778.0181884765625, + "learning_rate": 5.406e-06, + "loss": 26.0942, + "step": 27030 + }, + { + "epoch": 0.05462251077703754, + "grad_norm": 305.4831848144531, + "learning_rate": 5.408e-06, + "loss": 14.2851, + "step": 27040 + }, + { + "epoch": 0.05464271140972135, + "grad_norm": 554.4490966796875, + "learning_rate": 5.410000000000001e-06, + "loss": 18.3887, + "step": 27050 + }, + { + "epoch": 0.05466291204240517, + "grad_norm": 271.5789794921875, + "learning_rate": 5.412000000000001e-06, + "loss": 20.0249, + "step": 27060 + }, + { + "epoch": 0.054683112675088984, + "grad_norm": 697.5492553710938, + "learning_rate": 5.414e-06, + "loss": 40.7504, + "step": 27070 + }, + { + "epoch": 0.0547033133077728, + "grad_norm": 187.14932250976562, + "learning_rate": 5.416e-06, + "loss": 35.2618, + "step": 27080 + }, + { + "epoch": 0.05472351394045662, + "grad_norm": 360.2887878417969, + "learning_rate": 5.418e-06, + "loss": 35.6201, + "step": 27090 + }, + { + "epoch": 0.05474371457314043, + "grad_norm": 118.17030334472656, + "learning_rate": 5.420000000000001e-06, + "loss": 34.1158, + "step": 27100 + }, + { + "epoch": 0.05476391520582425, + "grad_norm": 835.5042114257812, + "learning_rate": 5.422000000000001e-06, + "loss": 23.7981, + "step": 27110 + }, + { + "epoch": 0.05478411583850806, + "grad_norm": 293.77093505859375, + "learning_rate": 5.424e-06, + "loss": 32.4919, + "step": 27120 + }, + { + "epoch": 0.054804316471191876, + "grad_norm": 352.4538879394531, + "learning_rate": 5.426e-06, + "loss": 26.8516, + "step": 27130 + }, + { + "epoch": 0.054824517103875696, + "grad_norm": 221.325439453125, + "learning_rate": 5.4279999999999995e-06, + "loss": 34.1622, + "step": 27140 + }, + { + "epoch": 0.05484471773655951, + "grad_norm": 590.2091064453125, + "learning_rate": 5.4300000000000005e-06, + "loss": 14.4413, + "step": 27150 + }, + { + "epoch": 0.05486491836924332, + "grad_norm": 734.5629272460938, + "learning_rate": 5.432000000000001e-06, + "loss": 21.4118, + "step": 27160 + }, + { + "epoch": 0.05488511900192714, + "grad_norm": 799.1691284179688, + "learning_rate": 5.434e-06, + "loss": 41.7156, + "step": 27170 + }, + { + "epoch": 0.054905319634610955, + "grad_norm": 371.6385803222656, + "learning_rate": 5.436e-06, + "loss": 21.9291, + "step": 27180 + }, + { + "epoch": 0.054925520267294775, + "grad_norm": 286.462646484375, + "learning_rate": 5.438e-06, + "loss": 14.176, + "step": 27190 + }, + { + "epoch": 0.05494572089997859, + "grad_norm": 326.9037780761719, + "learning_rate": 5.4400000000000004e-06, + "loss": 28.3926, + "step": 27200 + }, + { + "epoch": 0.0549659215326624, + "grad_norm": 1330.7659912109375, + "learning_rate": 5.442000000000001e-06, + "loss": 53.405, + "step": 27210 + }, + { + "epoch": 0.05498612216534622, + "grad_norm": 321.39202880859375, + "learning_rate": 5.444000000000001e-06, + "loss": 26.6436, + "step": 27220 + }, + { + "epoch": 0.055006322798030034, + "grad_norm": 347.4004211425781, + "learning_rate": 5.446e-06, + "loss": 26.5977, + "step": 27230 + }, + { + "epoch": 0.05502652343071385, + "grad_norm": 205.0798797607422, + "learning_rate": 5.448e-06, + "loss": 13.3988, + "step": 27240 + }, + { + "epoch": 0.05504672406339767, + "grad_norm": 429.0446472167969, + "learning_rate": 5.450000000000001e-06, + "loss": 37.5181, + "step": 27250 + }, + { + "epoch": 0.05506692469608148, + "grad_norm": 1414.781494140625, + "learning_rate": 5.4520000000000005e-06, + "loss": 25.0189, + "step": 27260 + }, + { + "epoch": 0.0550871253287653, + "grad_norm": 271.6662292480469, + "learning_rate": 5.454000000000001e-06, + "loss": 14.168, + "step": 27270 + }, + { + "epoch": 0.05510732596144911, + "grad_norm": 670.027099609375, + "learning_rate": 5.456e-06, + "loss": 36.156, + "step": 27280 + }, + { + "epoch": 0.055127526594132926, + "grad_norm": 621.5324096679688, + "learning_rate": 5.458e-06, + "loss": 31.7824, + "step": 27290 + }, + { + "epoch": 0.055147727226816746, + "grad_norm": 553.9097290039062, + "learning_rate": 5.460000000000001e-06, + "loss": 36.1782, + "step": 27300 + }, + { + "epoch": 0.05516792785950056, + "grad_norm": 89.8622817993164, + "learning_rate": 5.462e-06, + "loss": 13.8733, + "step": 27310 + }, + { + "epoch": 0.05518812849218437, + "grad_norm": 298.27581787109375, + "learning_rate": 5.4640000000000005e-06, + "loss": 53.1346, + "step": 27320 + }, + { + "epoch": 0.05520832912486819, + "grad_norm": 692.3800048828125, + "learning_rate": 5.466e-06, + "loss": 27.956, + "step": 27330 + }, + { + "epoch": 0.055228529757552006, + "grad_norm": 474.23175048828125, + "learning_rate": 5.468e-06, + "loss": 19.6887, + "step": 27340 + }, + { + "epoch": 0.05524873039023582, + "grad_norm": 925.652587890625, + "learning_rate": 5.470000000000001e-06, + "loss": 17.0159, + "step": 27350 + }, + { + "epoch": 0.05526893102291964, + "grad_norm": 488.05560302734375, + "learning_rate": 5.472e-06, + "loss": 15.8511, + "step": 27360 + }, + { + "epoch": 0.05528913165560345, + "grad_norm": 387.1439514160156, + "learning_rate": 5.4740000000000004e-06, + "loss": 33.8116, + "step": 27370 + }, + { + "epoch": 0.05530933228828727, + "grad_norm": 455.251220703125, + "learning_rate": 5.476000000000001e-06, + "loss": 21.4182, + "step": 27380 + }, + { + "epoch": 0.055329532920971085, + "grad_norm": 940.2066650390625, + "learning_rate": 5.478e-06, + "loss": 34.1586, + "step": 27390 + }, + { + "epoch": 0.0553497335536549, + "grad_norm": 457.9208984375, + "learning_rate": 5.480000000000001e-06, + "loss": 26.7798, + "step": 27400 + }, + { + "epoch": 0.05536993418633872, + "grad_norm": 839.2479248046875, + "learning_rate": 5.482000000000001e-06, + "loss": 20.7492, + "step": 27410 + }, + { + "epoch": 0.05539013481902253, + "grad_norm": 477.7880554199219, + "learning_rate": 5.484e-06, + "loss": 40.5622, + "step": 27420 + }, + { + "epoch": 0.055410335451706344, + "grad_norm": 490.5758056640625, + "learning_rate": 5.4860000000000005e-06, + "loss": 27.7741, + "step": 27430 + }, + { + "epoch": 0.055430536084390164, + "grad_norm": 643.51806640625, + "learning_rate": 5.488e-06, + "loss": 23.1479, + "step": 27440 + }, + { + "epoch": 0.05545073671707398, + "grad_norm": 431.0497131347656, + "learning_rate": 5.490000000000001e-06, + "loss": 15.9826, + "step": 27450 + }, + { + "epoch": 0.0554709373497578, + "grad_norm": 527.7308349609375, + "learning_rate": 5.492000000000001e-06, + "loss": 39.9755, + "step": 27460 + }, + { + "epoch": 0.05549113798244161, + "grad_norm": 1432.783203125, + "learning_rate": 5.494e-06, + "loss": 30.6298, + "step": 27470 + }, + { + "epoch": 0.05551133861512542, + "grad_norm": 804.2708129882812, + "learning_rate": 5.496e-06, + "loss": 28.3745, + "step": 27480 + }, + { + "epoch": 0.05553153924780924, + "grad_norm": 501.12506103515625, + "learning_rate": 5.498e-06, + "loss": 11.7346, + "step": 27490 + }, + { + "epoch": 0.055551739880493056, + "grad_norm": 718.4681396484375, + "learning_rate": 5.500000000000001e-06, + "loss": 34.8546, + "step": 27500 + }, + { + "epoch": 0.05557194051317687, + "grad_norm": 392.0679626464844, + "learning_rate": 5.502000000000001e-06, + "loss": 22.2465, + "step": 27510 + }, + { + "epoch": 0.05559214114586069, + "grad_norm": 1332.5615234375, + "learning_rate": 5.504e-06, + "loss": 29.1452, + "step": 27520 + }, + { + "epoch": 0.0556123417785445, + "grad_norm": 188.90589904785156, + "learning_rate": 5.506e-06, + "loss": 13.0475, + "step": 27530 + }, + { + "epoch": 0.05563254241122832, + "grad_norm": 598.53857421875, + "learning_rate": 5.508e-06, + "loss": 50.0424, + "step": 27540 + }, + { + "epoch": 0.055652743043912135, + "grad_norm": 673.3407592773438, + "learning_rate": 5.510000000000001e-06, + "loss": 23.7318, + "step": 27550 + }, + { + "epoch": 0.05567294367659595, + "grad_norm": 392.29791259765625, + "learning_rate": 5.512000000000001e-06, + "loss": 18.1459, + "step": 27560 + }, + { + "epoch": 0.05569314430927977, + "grad_norm": 40.12874221801758, + "learning_rate": 5.514e-06, + "loss": 24.9282, + "step": 27570 + }, + { + "epoch": 0.05571334494196358, + "grad_norm": 98.27652740478516, + "learning_rate": 5.516e-06, + "loss": 24.3814, + "step": 27580 + }, + { + "epoch": 0.055733545574647395, + "grad_norm": 220.50205993652344, + "learning_rate": 5.518e-06, + "loss": 34.8111, + "step": 27590 + }, + { + "epoch": 0.055753746207331215, + "grad_norm": 415.7088317871094, + "learning_rate": 5.5200000000000005e-06, + "loss": 31.3995, + "step": 27600 + }, + { + "epoch": 0.05577394684001503, + "grad_norm": 536.6727905273438, + "learning_rate": 5.522000000000001e-06, + "loss": 18.9631, + "step": 27610 + }, + { + "epoch": 0.05579414747269885, + "grad_norm": 312.55487060546875, + "learning_rate": 5.524000000000001e-06, + "loss": 16.6055, + "step": 27620 + }, + { + "epoch": 0.05581434810538266, + "grad_norm": 1098.286376953125, + "learning_rate": 5.526e-06, + "loss": 32.6939, + "step": 27630 + }, + { + "epoch": 0.055834548738066474, + "grad_norm": 935.418212890625, + "learning_rate": 5.528e-06, + "loss": 23.3494, + "step": 27640 + }, + { + "epoch": 0.055854749370750294, + "grad_norm": 559.8922119140625, + "learning_rate": 5.530000000000001e-06, + "loss": 37.6899, + "step": 27650 + }, + { + "epoch": 0.05587495000343411, + "grad_norm": 128.9083251953125, + "learning_rate": 5.5320000000000006e-06, + "loss": 20.8371, + "step": 27660 + }, + { + "epoch": 0.05589515063611792, + "grad_norm": 861.5052490234375, + "learning_rate": 5.534000000000001e-06, + "loss": 18.674, + "step": 27670 + }, + { + "epoch": 0.05591535126880174, + "grad_norm": 359.53204345703125, + "learning_rate": 5.536e-06, + "loss": 27.9867, + "step": 27680 + }, + { + "epoch": 0.05593555190148555, + "grad_norm": 724.4784545898438, + "learning_rate": 5.538e-06, + "loss": 22.9095, + "step": 27690 + }, + { + "epoch": 0.05595575253416937, + "grad_norm": 403.0739440917969, + "learning_rate": 5.540000000000001e-06, + "loss": 28.841, + "step": 27700 + }, + { + "epoch": 0.055975953166853186, + "grad_norm": 130.4065704345703, + "learning_rate": 5.5420000000000005e-06, + "loss": 22.1536, + "step": 27710 + }, + { + "epoch": 0.055996153799537, + "grad_norm": 407.27777099609375, + "learning_rate": 5.544000000000001e-06, + "loss": 27.9353, + "step": 27720 + }, + { + "epoch": 0.05601635443222082, + "grad_norm": 445.86962890625, + "learning_rate": 5.546e-06, + "loss": 32.5215, + "step": 27730 + }, + { + "epoch": 0.05603655506490463, + "grad_norm": 201.13758850097656, + "learning_rate": 5.548e-06, + "loss": 33.0553, + "step": 27740 + }, + { + "epoch": 0.056056755697588445, + "grad_norm": 782.3064575195312, + "learning_rate": 5.550000000000001e-06, + "loss": 26.2349, + "step": 27750 + }, + { + "epoch": 0.056076956330272265, + "grad_norm": 640.4067993164062, + "learning_rate": 5.552e-06, + "loss": 30.5439, + "step": 27760 + }, + { + "epoch": 0.05609715696295608, + "grad_norm": 15.612726211547852, + "learning_rate": 5.5540000000000005e-06, + "loss": 10.874, + "step": 27770 + }, + { + "epoch": 0.0561173575956399, + "grad_norm": 514.4762573242188, + "learning_rate": 5.556000000000001e-06, + "loss": 21.3809, + "step": 27780 + }, + { + "epoch": 0.05613755822832371, + "grad_norm": 238.87423706054688, + "learning_rate": 5.558e-06, + "loss": 14.5158, + "step": 27790 + }, + { + "epoch": 0.056157758861007524, + "grad_norm": 221.79910278320312, + "learning_rate": 5.560000000000001e-06, + "loss": 30.4484, + "step": 27800 + }, + { + "epoch": 0.056177959493691344, + "grad_norm": 451.6899108886719, + "learning_rate": 5.562000000000001e-06, + "loss": 18.7587, + "step": 27810 + }, + { + "epoch": 0.05619816012637516, + "grad_norm": 189.8982391357422, + "learning_rate": 5.5640000000000004e-06, + "loss": 30.954, + "step": 27820 + }, + { + "epoch": 0.05621836075905897, + "grad_norm": 1845.3804931640625, + "learning_rate": 5.566000000000001e-06, + "loss": 35.3793, + "step": 27830 + }, + { + "epoch": 0.05623856139174279, + "grad_norm": 196.89622497558594, + "learning_rate": 5.568e-06, + "loss": 38.2027, + "step": 27840 + }, + { + "epoch": 0.0562587620244266, + "grad_norm": 14.610710144042969, + "learning_rate": 5.570000000000001e-06, + "loss": 21.6815, + "step": 27850 + }, + { + "epoch": 0.05627896265711042, + "grad_norm": 822.1419677734375, + "learning_rate": 5.572000000000001e-06, + "loss": 26.8906, + "step": 27860 + }, + { + "epoch": 0.056299163289794237, + "grad_norm": 721.5017700195312, + "learning_rate": 5.574e-06, + "loss": 26.2475, + "step": 27870 + }, + { + "epoch": 0.05631936392247805, + "grad_norm": 285.6672058105469, + "learning_rate": 5.5760000000000005e-06, + "loss": 17.6087, + "step": 27880 + }, + { + "epoch": 0.05633956455516187, + "grad_norm": 549.2312622070312, + "learning_rate": 5.578e-06, + "loss": 25.9142, + "step": 27890 + }, + { + "epoch": 0.05635976518784568, + "grad_norm": 359.40325927734375, + "learning_rate": 5.580000000000001e-06, + "loss": 23.0134, + "step": 27900 + }, + { + "epoch": 0.056379965820529496, + "grad_norm": 310.5208435058594, + "learning_rate": 5.582000000000001e-06, + "loss": 25.5474, + "step": 27910 + }, + { + "epoch": 0.056400166453213316, + "grad_norm": 298.7782287597656, + "learning_rate": 5.584e-06, + "loss": 21.7849, + "step": 27920 + }, + { + "epoch": 0.05642036708589713, + "grad_norm": 388.15966796875, + "learning_rate": 5.586e-06, + "loss": 22.7005, + "step": 27930 + }, + { + "epoch": 0.05644056771858095, + "grad_norm": 476.7641296386719, + "learning_rate": 5.588e-06, + "loss": 28.5089, + "step": 27940 + }, + { + "epoch": 0.05646076835126476, + "grad_norm": 199.51100158691406, + "learning_rate": 5.590000000000001e-06, + "loss": 35.6762, + "step": 27950 + }, + { + "epoch": 0.056480968983948575, + "grad_norm": 1052.156005859375, + "learning_rate": 5.592000000000001e-06, + "loss": 16.5051, + "step": 27960 + }, + { + "epoch": 0.056501169616632395, + "grad_norm": 970.8603515625, + "learning_rate": 5.594e-06, + "loss": 42.4477, + "step": 27970 + }, + { + "epoch": 0.05652137024931621, + "grad_norm": 60.73828887939453, + "learning_rate": 5.596e-06, + "loss": 28.0446, + "step": 27980 + }, + { + "epoch": 0.05654157088200002, + "grad_norm": 277.9892272949219, + "learning_rate": 5.5980000000000004e-06, + "loss": 25.1444, + "step": 27990 + }, + { + "epoch": 0.05656177151468384, + "grad_norm": 363.2925109863281, + "learning_rate": 5.600000000000001e-06, + "loss": 18.1469, + "step": 28000 + }, + { + "epoch": 0.056581972147367654, + "grad_norm": 1076.0853271484375, + "learning_rate": 5.602000000000001e-06, + "loss": 52.504, + "step": 28010 + }, + { + "epoch": 0.056602172780051474, + "grad_norm": 755.5100708007812, + "learning_rate": 5.604000000000001e-06, + "loss": 38.6469, + "step": 28020 + }, + { + "epoch": 0.05662237341273529, + "grad_norm": 540.84033203125, + "learning_rate": 5.606e-06, + "loss": 24.8698, + "step": 28030 + }, + { + "epoch": 0.0566425740454191, + "grad_norm": 660.2512817382812, + "learning_rate": 5.608e-06, + "loss": 30.3958, + "step": 28040 + }, + { + "epoch": 0.05666277467810292, + "grad_norm": 417.4425048828125, + "learning_rate": 5.610000000000001e-06, + "loss": 26.1476, + "step": 28050 + }, + { + "epoch": 0.05668297531078673, + "grad_norm": 597.3268432617188, + "learning_rate": 5.612000000000001e-06, + "loss": 20.3178, + "step": 28060 + }, + { + "epoch": 0.056703175943470546, + "grad_norm": 592.311279296875, + "learning_rate": 5.614000000000001e-06, + "loss": 35.6682, + "step": 28070 + }, + { + "epoch": 0.056723376576154366, + "grad_norm": 420.3757629394531, + "learning_rate": 5.616e-06, + "loss": 20.0005, + "step": 28080 + }, + { + "epoch": 0.05674357720883818, + "grad_norm": 8.282299995422363, + "learning_rate": 5.618e-06, + "loss": 27.9005, + "step": 28090 + }, + { + "epoch": 0.056763777841522, + "grad_norm": 512.5679321289062, + "learning_rate": 5.620000000000001e-06, + "loss": 28.1501, + "step": 28100 + }, + { + "epoch": 0.05678397847420581, + "grad_norm": 176.4183807373047, + "learning_rate": 5.6220000000000006e-06, + "loss": 30.822, + "step": 28110 + }, + { + "epoch": 0.056804179106889625, + "grad_norm": 391.804931640625, + "learning_rate": 5.624000000000001e-06, + "loss": 28.2099, + "step": 28120 + }, + { + "epoch": 0.056824379739573445, + "grad_norm": 374.36712646484375, + "learning_rate": 5.626e-06, + "loss": 32.8329, + "step": 28130 + }, + { + "epoch": 0.05684458037225726, + "grad_norm": 729.12158203125, + "learning_rate": 5.628e-06, + "loss": 26.7154, + "step": 28140 + }, + { + "epoch": 0.05686478100494107, + "grad_norm": 1403.9803466796875, + "learning_rate": 5.63e-06, + "loss": 22.4779, + "step": 28150 + }, + { + "epoch": 0.05688498163762489, + "grad_norm": 307.5541076660156, + "learning_rate": 5.6320000000000005e-06, + "loss": 15.9695, + "step": 28160 + }, + { + "epoch": 0.056905182270308705, + "grad_norm": 62.129268646240234, + "learning_rate": 5.634000000000001e-06, + "loss": 26.3056, + "step": 28170 + }, + { + "epoch": 0.056925382902992525, + "grad_norm": 1090.5374755859375, + "learning_rate": 5.636000000000001e-06, + "loss": 14.7648, + "step": 28180 + }, + { + "epoch": 0.05694558353567634, + "grad_norm": 1130.8397216796875, + "learning_rate": 5.638e-06, + "loss": 29.261, + "step": 28190 + }, + { + "epoch": 0.05696578416836015, + "grad_norm": 694.0996704101562, + "learning_rate": 5.64e-06, + "loss": 46.7732, + "step": 28200 + }, + { + "epoch": 0.05698598480104397, + "grad_norm": 255.87850952148438, + "learning_rate": 5.642000000000001e-06, + "loss": 23.1361, + "step": 28210 + }, + { + "epoch": 0.057006185433727784, + "grad_norm": 1019.9904174804688, + "learning_rate": 5.6440000000000005e-06, + "loss": 23.3889, + "step": 28220 + }, + { + "epoch": 0.0570263860664116, + "grad_norm": 362.83929443359375, + "learning_rate": 5.646000000000001e-06, + "loss": 27.8546, + "step": 28230 + }, + { + "epoch": 0.05704658669909542, + "grad_norm": 252.81332397460938, + "learning_rate": 5.648e-06, + "loss": 20.3522, + "step": 28240 + }, + { + "epoch": 0.05706678733177923, + "grad_norm": 412.9476623535156, + "learning_rate": 5.65e-06, + "loss": 28.927, + "step": 28250 + }, + { + "epoch": 0.05708698796446305, + "grad_norm": 492.25262451171875, + "learning_rate": 5.652000000000001e-06, + "loss": 24.6241, + "step": 28260 + }, + { + "epoch": 0.05710718859714686, + "grad_norm": 1780.2408447265625, + "learning_rate": 5.654e-06, + "loss": 53.1644, + "step": 28270 + }, + { + "epoch": 0.057127389229830676, + "grad_norm": 1033.7132568359375, + "learning_rate": 5.6560000000000006e-06, + "loss": 26.9228, + "step": 28280 + }, + { + "epoch": 0.057147589862514496, + "grad_norm": 213.87942504882812, + "learning_rate": 5.658e-06, + "loss": 23.8045, + "step": 28290 + }, + { + "epoch": 0.05716779049519831, + "grad_norm": 137.75071716308594, + "learning_rate": 5.66e-06, + "loss": 22.3189, + "step": 28300 + }, + { + "epoch": 0.05718799112788212, + "grad_norm": 113.6178207397461, + "learning_rate": 5.662000000000001e-06, + "loss": 30.8098, + "step": 28310 + }, + { + "epoch": 0.05720819176056594, + "grad_norm": 164.42572021484375, + "learning_rate": 5.664e-06, + "loss": 7.4192, + "step": 28320 + }, + { + "epoch": 0.057228392393249755, + "grad_norm": 1245.4927978515625, + "learning_rate": 5.6660000000000005e-06, + "loss": 25.0924, + "step": 28330 + }, + { + "epoch": 0.057248593025933575, + "grad_norm": 1074.7960205078125, + "learning_rate": 5.668e-06, + "loss": 31.0009, + "step": 28340 + }, + { + "epoch": 0.05726879365861739, + "grad_norm": 379.931396484375, + "learning_rate": 5.67e-06, + "loss": 20.8451, + "step": 28350 + }, + { + "epoch": 0.0572889942913012, + "grad_norm": 500.2812194824219, + "learning_rate": 5.672000000000001e-06, + "loss": 24.8504, + "step": 28360 + }, + { + "epoch": 0.05730919492398502, + "grad_norm": 1326.780029296875, + "learning_rate": 5.674e-06, + "loss": 33.9426, + "step": 28370 + }, + { + "epoch": 0.057329395556668834, + "grad_norm": 334.5964660644531, + "learning_rate": 5.676e-06, + "loss": 19.2081, + "step": 28380 + }, + { + "epoch": 0.05734959618935265, + "grad_norm": 432.8052673339844, + "learning_rate": 5.6780000000000005e-06, + "loss": 14.1455, + "step": 28390 + }, + { + "epoch": 0.05736979682203647, + "grad_norm": 398.933837890625, + "learning_rate": 5.68e-06, + "loss": 25.0682, + "step": 28400 + }, + { + "epoch": 0.05738999745472028, + "grad_norm": 225.60511779785156, + "learning_rate": 5.682000000000001e-06, + "loss": 10.3286, + "step": 28410 + }, + { + "epoch": 0.0574101980874041, + "grad_norm": 473.37274169921875, + "learning_rate": 5.684000000000001e-06, + "loss": 46.4121, + "step": 28420 + }, + { + "epoch": 0.05743039872008791, + "grad_norm": 379.1407775878906, + "learning_rate": 5.686e-06, + "loss": 28.0916, + "step": 28430 + }, + { + "epoch": 0.057450599352771727, + "grad_norm": 263.548828125, + "learning_rate": 5.6880000000000004e-06, + "loss": 15.1848, + "step": 28440 + }, + { + "epoch": 0.057470799985455547, + "grad_norm": 196.68814086914062, + "learning_rate": 5.69e-06, + "loss": 18.4118, + "step": 28450 + }, + { + "epoch": 0.05749100061813936, + "grad_norm": 345.37841796875, + "learning_rate": 5.692000000000001e-06, + "loss": 27.8511, + "step": 28460 + }, + { + "epoch": 0.05751120125082317, + "grad_norm": 506.7391052246094, + "learning_rate": 5.694000000000001e-06, + "loss": 39.8623, + "step": 28470 + }, + { + "epoch": 0.05753140188350699, + "grad_norm": 420.26214599609375, + "learning_rate": 5.696e-06, + "loss": 24.7865, + "step": 28480 + }, + { + "epoch": 0.057551602516190806, + "grad_norm": 0.0, + "learning_rate": 5.698e-06, + "loss": 24.2103, + "step": 28490 + }, + { + "epoch": 0.057571803148874626, + "grad_norm": 754.0606689453125, + "learning_rate": 5.7e-06, + "loss": 28.7258, + "step": 28500 + }, + { + "epoch": 0.05759200378155844, + "grad_norm": 399.36328125, + "learning_rate": 5.702000000000001e-06, + "loss": 31.9229, + "step": 28510 + }, + { + "epoch": 0.05761220441424225, + "grad_norm": 570.1122436523438, + "learning_rate": 5.704000000000001e-06, + "loss": 30.0433, + "step": 28520 + }, + { + "epoch": 0.05763240504692607, + "grad_norm": 348.0009765625, + "learning_rate": 5.706e-06, + "loss": 22.6739, + "step": 28530 + }, + { + "epoch": 0.057652605679609885, + "grad_norm": 48.64787292480469, + "learning_rate": 5.708e-06, + "loss": 27.7151, + "step": 28540 + }, + { + "epoch": 0.0576728063122937, + "grad_norm": 294.9898986816406, + "learning_rate": 5.71e-06, + "loss": 31.1165, + "step": 28550 + }, + { + "epoch": 0.05769300694497752, + "grad_norm": 37.3386116027832, + "learning_rate": 5.7120000000000005e-06, + "loss": 31.529, + "step": 28560 + }, + { + "epoch": 0.05771320757766133, + "grad_norm": 1066.211669921875, + "learning_rate": 5.714000000000001e-06, + "loss": 27.5422, + "step": 28570 + }, + { + "epoch": 0.05773340821034515, + "grad_norm": 183.66064453125, + "learning_rate": 5.716000000000001e-06, + "loss": 44.7641, + "step": 28580 + }, + { + "epoch": 0.057753608843028964, + "grad_norm": 352.4803771972656, + "learning_rate": 5.718e-06, + "loss": 28.767, + "step": 28590 + }, + { + "epoch": 0.05777380947571278, + "grad_norm": 391.5818176269531, + "learning_rate": 5.72e-06, + "loss": 29.0731, + "step": 28600 + }, + { + "epoch": 0.0577940101083966, + "grad_norm": 1137.9501953125, + "learning_rate": 5.722000000000001e-06, + "loss": 22.2734, + "step": 28610 + }, + { + "epoch": 0.05781421074108041, + "grad_norm": 1351.6412353515625, + "learning_rate": 5.724000000000001e-06, + "loss": 29.5629, + "step": 28620 + }, + { + "epoch": 0.05783441137376422, + "grad_norm": 258.6549072265625, + "learning_rate": 5.726000000000001e-06, + "loss": 22.9389, + "step": 28630 + }, + { + "epoch": 0.05785461200644804, + "grad_norm": 661.7940063476562, + "learning_rate": 5.728e-06, + "loss": 20.8142, + "step": 28640 + }, + { + "epoch": 0.057874812639131856, + "grad_norm": 267.9925231933594, + "learning_rate": 5.73e-06, + "loss": 17.6126, + "step": 28650 + }, + { + "epoch": 0.057895013271815676, + "grad_norm": 1019.2171630859375, + "learning_rate": 5.732000000000001e-06, + "loss": 17.9472, + "step": 28660 + }, + { + "epoch": 0.05791521390449949, + "grad_norm": 1014.6627197265625, + "learning_rate": 5.7340000000000005e-06, + "loss": 36.2276, + "step": 28670 + }, + { + "epoch": 0.0579354145371833, + "grad_norm": 274.5294494628906, + "learning_rate": 5.736000000000001e-06, + "loss": 14.5732, + "step": 28680 + }, + { + "epoch": 0.05795561516986712, + "grad_norm": 872.638916015625, + "learning_rate": 5.738e-06, + "loss": 26.672, + "step": 28690 + }, + { + "epoch": 0.057975815802550935, + "grad_norm": 299.70025634765625, + "learning_rate": 5.74e-06, + "loss": 47.1673, + "step": 28700 + }, + { + "epoch": 0.05799601643523475, + "grad_norm": 427.842041015625, + "learning_rate": 5.742000000000001e-06, + "loss": 33.3254, + "step": 28710 + }, + { + "epoch": 0.05801621706791857, + "grad_norm": 760.5520629882812, + "learning_rate": 5.744e-06, + "loss": 31.6733, + "step": 28720 + }, + { + "epoch": 0.05803641770060238, + "grad_norm": 713.591552734375, + "learning_rate": 5.7460000000000006e-06, + "loss": 22.4928, + "step": 28730 + }, + { + "epoch": 0.0580566183332862, + "grad_norm": 800.7971801757812, + "learning_rate": 5.748e-06, + "loss": 25.0551, + "step": 28740 + }, + { + "epoch": 0.058076818965970015, + "grad_norm": 345.371826171875, + "learning_rate": 5.75e-06, + "loss": 27.9757, + "step": 28750 + }, + { + "epoch": 0.05809701959865383, + "grad_norm": 607.2361450195312, + "learning_rate": 5.752000000000001e-06, + "loss": 35.9077, + "step": 28760 + }, + { + "epoch": 0.05811722023133765, + "grad_norm": 549.5829467773438, + "learning_rate": 5.754e-06, + "loss": 30.5123, + "step": 28770 + }, + { + "epoch": 0.05813742086402146, + "grad_norm": 1182.8603515625, + "learning_rate": 5.7560000000000005e-06, + "loss": 29.0875, + "step": 28780 + }, + { + "epoch": 0.058157621496705274, + "grad_norm": 617.0576171875, + "learning_rate": 5.758000000000001e-06, + "loss": 24.269, + "step": 28790 + }, + { + "epoch": 0.058177822129389094, + "grad_norm": 317.1587219238281, + "learning_rate": 5.76e-06, + "loss": 22.0337, + "step": 28800 + }, + { + "epoch": 0.05819802276207291, + "grad_norm": 493.50311279296875, + "learning_rate": 5.762000000000001e-06, + "loss": 36.1685, + "step": 28810 + }, + { + "epoch": 0.05821822339475673, + "grad_norm": 894.058837890625, + "learning_rate": 5.764000000000001e-06, + "loss": 33.8523, + "step": 28820 + }, + { + "epoch": 0.05823842402744054, + "grad_norm": 114.72391510009766, + "learning_rate": 5.766e-06, + "loss": 27.9367, + "step": 28830 + }, + { + "epoch": 0.05825862466012435, + "grad_norm": 566.5399169921875, + "learning_rate": 5.7680000000000005e-06, + "loss": 24.7258, + "step": 28840 + }, + { + "epoch": 0.05827882529280817, + "grad_norm": 473.056396484375, + "learning_rate": 5.77e-06, + "loss": 20.1041, + "step": 28850 + }, + { + "epoch": 0.058299025925491986, + "grad_norm": 318.22601318359375, + "learning_rate": 5.772000000000001e-06, + "loss": 15.9743, + "step": 28860 + }, + { + "epoch": 0.0583192265581758, + "grad_norm": 528.20361328125, + "learning_rate": 5.774000000000001e-06, + "loss": 26.7739, + "step": 28870 + }, + { + "epoch": 0.05833942719085962, + "grad_norm": 289.8639221191406, + "learning_rate": 5.776e-06, + "loss": 17.5716, + "step": 28880 + }, + { + "epoch": 0.05835962782354343, + "grad_norm": 610.1603393554688, + "learning_rate": 5.778e-06, + "loss": 19.3612, + "step": 28890 + }, + { + "epoch": 0.05837982845622725, + "grad_norm": 513.0499267578125, + "learning_rate": 5.78e-06, + "loss": 22.6479, + "step": 28900 + }, + { + "epoch": 0.058400029088911065, + "grad_norm": 651.9042358398438, + "learning_rate": 5.782000000000001e-06, + "loss": 36.6599, + "step": 28910 + }, + { + "epoch": 0.05842022972159488, + "grad_norm": 806.208740234375, + "learning_rate": 5.784000000000001e-06, + "loss": 21.1247, + "step": 28920 + }, + { + "epoch": 0.0584404303542787, + "grad_norm": 488.8306884765625, + "learning_rate": 5.786e-06, + "loss": 20.5835, + "step": 28930 + }, + { + "epoch": 0.05846063098696251, + "grad_norm": 274.180419921875, + "learning_rate": 5.788e-06, + "loss": 19.2631, + "step": 28940 + }, + { + "epoch": 0.058480831619646324, + "grad_norm": 688.8336181640625, + "learning_rate": 5.7900000000000005e-06, + "loss": 40.8068, + "step": 28950 + }, + { + "epoch": 0.058501032252330144, + "grad_norm": 472.7912902832031, + "learning_rate": 5.792000000000001e-06, + "loss": 60.7401, + "step": 28960 + }, + { + "epoch": 0.05852123288501396, + "grad_norm": 250.30914306640625, + "learning_rate": 5.794000000000001e-06, + "loss": 21.1607, + "step": 28970 + }, + { + "epoch": 0.05854143351769778, + "grad_norm": 152.17755126953125, + "learning_rate": 5.796000000000001e-06, + "loss": 37.4124, + "step": 28980 + }, + { + "epoch": 0.05856163415038159, + "grad_norm": 401.095458984375, + "learning_rate": 5.798e-06, + "loss": 29.4962, + "step": 28990 + }, + { + "epoch": 0.058581834783065403, + "grad_norm": 544.0389404296875, + "learning_rate": 5.8e-06, + "loss": 14.7385, + "step": 29000 + }, + { + "epoch": 0.058602035415749223, + "grad_norm": 470.6712341308594, + "learning_rate": 5.802000000000001e-06, + "loss": 17.5305, + "step": 29010 + }, + { + "epoch": 0.05862223604843304, + "grad_norm": 594.8572998046875, + "learning_rate": 5.804000000000001e-06, + "loss": 18.4892, + "step": 29020 + }, + { + "epoch": 0.05864243668111685, + "grad_norm": 411.4410400390625, + "learning_rate": 5.806000000000001e-06, + "loss": 32.5558, + "step": 29030 + }, + { + "epoch": 0.05866263731380067, + "grad_norm": 666.5599365234375, + "learning_rate": 5.808e-06, + "loss": 29.7149, + "step": 29040 + }, + { + "epoch": 0.05868283794648448, + "grad_norm": 1009.18603515625, + "learning_rate": 5.81e-06, + "loss": 29.5219, + "step": 29050 + }, + { + "epoch": 0.0587030385791683, + "grad_norm": 312.2843017578125, + "learning_rate": 5.812000000000001e-06, + "loss": 24.1587, + "step": 29060 + }, + { + "epoch": 0.058723239211852116, + "grad_norm": 183.3750457763672, + "learning_rate": 5.814000000000001e-06, + "loss": 25.3271, + "step": 29070 + }, + { + "epoch": 0.05874343984453593, + "grad_norm": 323.680419921875, + "learning_rate": 5.816000000000001e-06, + "loss": 22.3888, + "step": 29080 + }, + { + "epoch": 0.05876364047721975, + "grad_norm": 306.0316162109375, + "learning_rate": 5.818e-06, + "loss": 23.2418, + "step": 29090 + }, + { + "epoch": 0.05878384110990356, + "grad_norm": 492.77154541015625, + "learning_rate": 5.82e-06, + "loss": 27.0235, + "step": 29100 + }, + { + "epoch": 0.058804041742587375, + "grad_norm": 844.0591430664062, + "learning_rate": 5.822000000000001e-06, + "loss": 23.0761, + "step": 29110 + }, + { + "epoch": 0.058824242375271195, + "grad_norm": 551.2830810546875, + "learning_rate": 5.8240000000000005e-06, + "loss": 30.2424, + "step": 29120 + }, + { + "epoch": 0.05884444300795501, + "grad_norm": 521.5521850585938, + "learning_rate": 5.826000000000001e-06, + "loss": 24.9471, + "step": 29130 + }, + { + "epoch": 0.05886464364063883, + "grad_norm": 288.21112060546875, + "learning_rate": 5.828e-06, + "loss": 15.9028, + "step": 29140 + }, + { + "epoch": 0.05888484427332264, + "grad_norm": 757.6616821289062, + "learning_rate": 5.83e-06, + "loss": 23.7516, + "step": 29150 + }, + { + "epoch": 0.058905044906006454, + "grad_norm": 628.8690795898438, + "learning_rate": 5.832000000000001e-06, + "loss": 15.357, + "step": 29160 + }, + { + "epoch": 0.058925245538690274, + "grad_norm": 128.40367126464844, + "learning_rate": 5.834e-06, + "loss": 19.509, + "step": 29170 + }, + { + "epoch": 0.05894544617137409, + "grad_norm": 635.416259765625, + "learning_rate": 5.8360000000000005e-06, + "loss": 22.5968, + "step": 29180 + }, + { + "epoch": 0.0589656468040579, + "grad_norm": 470.54034423828125, + "learning_rate": 5.838000000000001e-06, + "loss": 32.891, + "step": 29190 + }, + { + "epoch": 0.05898584743674172, + "grad_norm": 869.0327758789062, + "learning_rate": 5.84e-06, + "loss": 33.6656, + "step": 29200 + }, + { + "epoch": 0.05900604806942553, + "grad_norm": 693.0562133789062, + "learning_rate": 5.842000000000001e-06, + "loss": 27.4441, + "step": 29210 + }, + { + "epoch": 0.05902624870210935, + "grad_norm": 577.6492919921875, + "learning_rate": 5.844000000000001e-06, + "loss": 17.9136, + "step": 29220 + }, + { + "epoch": 0.059046449334793166, + "grad_norm": 654.5208129882812, + "learning_rate": 5.8460000000000004e-06, + "loss": 30.9071, + "step": 29230 + }, + { + "epoch": 0.05906664996747698, + "grad_norm": 454.5464172363281, + "learning_rate": 5.848000000000001e-06, + "loss": 16.8235, + "step": 29240 + }, + { + "epoch": 0.0590868506001608, + "grad_norm": 664.4609375, + "learning_rate": 5.85e-06, + "loss": 14.7665, + "step": 29250 + }, + { + "epoch": 0.05910705123284461, + "grad_norm": 202.01416015625, + "learning_rate": 5.852000000000001e-06, + "loss": 15.4509, + "step": 29260 + }, + { + "epoch": 0.059127251865528425, + "grad_norm": 292.506103515625, + "learning_rate": 5.854000000000001e-06, + "loss": 31.813, + "step": 29270 + }, + { + "epoch": 0.059147452498212245, + "grad_norm": 258.66070556640625, + "learning_rate": 5.856e-06, + "loss": 20.3693, + "step": 29280 + }, + { + "epoch": 0.05916765313089606, + "grad_norm": 467.66607666015625, + "learning_rate": 5.8580000000000005e-06, + "loss": 24.9156, + "step": 29290 + }, + { + "epoch": 0.05918785376357988, + "grad_norm": 478.3450012207031, + "learning_rate": 5.86e-06, + "loss": 16.7802, + "step": 29300 + }, + { + "epoch": 0.05920805439626369, + "grad_norm": 183.59373474121094, + "learning_rate": 5.862000000000001e-06, + "loss": 24.7533, + "step": 29310 + }, + { + "epoch": 0.059228255028947505, + "grad_norm": 174.06256103515625, + "learning_rate": 5.864000000000001e-06, + "loss": 18.6498, + "step": 29320 + }, + { + "epoch": 0.059248455661631325, + "grad_norm": 600.2014770507812, + "learning_rate": 5.866e-06, + "loss": 22.7986, + "step": 29330 + }, + { + "epoch": 0.05926865629431514, + "grad_norm": 542.9607543945312, + "learning_rate": 5.868e-06, + "loss": 31.9283, + "step": 29340 + }, + { + "epoch": 0.05928885692699895, + "grad_norm": 691.7803955078125, + "learning_rate": 5.8700000000000005e-06, + "loss": 26.8886, + "step": 29350 + }, + { + "epoch": 0.05930905755968277, + "grad_norm": 85.30669403076172, + "learning_rate": 5.872000000000001e-06, + "loss": 34.907, + "step": 29360 + }, + { + "epoch": 0.059329258192366584, + "grad_norm": 566.6461791992188, + "learning_rate": 5.874000000000001e-06, + "loss": 19.5393, + "step": 29370 + }, + { + "epoch": 0.059349458825050404, + "grad_norm": 593.9346313476562, + "learning_rate": 5.876000000000001e-06, + "loss": 30.4646, + "step": 29380 + }, + { + "epoch": 0.05936965945773422, + "grad_norm": 287.89715576171875, + "learning_rate": 5.878e-06, + "loss": 32.8259, + "step": 29390 + }, + { + "epoch": 0.05938986009041803, + "grad_norm": 294.3763122558594, + "learning_rate": 5.8800000000000005e-06, + "loss": 38.1019, + "step": 29400 + }, + { + "epoch": 0.05941006072310185, + "grad_norm": 315.1238098144531, + "learning_rate": 5.882e-06, + "loss": 27.3966, + "step": 29410 + }, + { + "epoch": 0.05943026135578566, + "grad_norm": 270.62091064453125, + "learning_rate": 5.884000000000001e-06, + "loss": 25.031, + "step": 29420 + }, + { + "epoch": 0.059450461988469476, + "grad_norm": 895.1936645507812, + "learning_rate": 5.886000000000001e-06, + "loss": 35.2896, + "step": 29430 + }, + { + "epoch": 0.059470662621153296, + "grad_norm": 1002.9981079101562, + "learning_rate": 5.888e-06, + "loss": 23.6158, + "step": 29440 + }, + { + "epoch": 0.05949086325383711, + "grad_norm": 1407.23876953125, + "learning_rate": 5.89e-06, + "loss": 29.6958, + "step": 29450 + }, + { + "epoch": 0.05951106388652093, + "grad_norm": 439.24884033203125, + "learning_rate": 5.892e-06, + "loss": 28.9774, + "step": 29460 + }, + { + "epoch": 0.05953126451920474, + "grad_norm": 423.88079833984375, + "learning_rate": 5.894000000000001e-06, + "loss": 21.4935, + "step": 29470 + }, + { + "epoch": 0.059551465151888555, + "grad_norm": 906.2281494140625, + "learning_rate": 5.896000000000001e-06, + "loss": 26.5107, + "step": 29480 + }, + { + "epoch": 0.059571665784572375, + "grad_norm": 526.7550048828125, + "learning_rate": 5.898e-06, + "loss": 14.423, + "step": 29490 + }, + { + "epoch": 0.05959186641725619, + "grad_norm": 251.88986206054688, + "learning_rate": 5.9e-06, + "loss": 28.0242, + "step": 29500 + }, + { + "epoch": 0.05961206704994, + "grad_norm": 336.952880859375, + "learning_rate": 5.9019999999999996e-06, + "loss": 28.3279, + "step": 29510 + }, + { + "epoch": 0.05963226768262382, + "grad_norm": 618.0857543945312, + "learning_rate": 5.9040000000000006e-06, + "loss": 32.9698, + "step": 29520 + }, + { + "epoch": 0.059652468315307634, + "grad_norm": 900.3553466796875, + "learning_rate": 5.906000000000001e-06, + "loss": 32.9725, + "step": 29530 + }, + { + "epoch": 0.059672668947991454, + "grad_norm": 574.4266967773438, + "learning_rate": 5.908e-06, + "loss": 24.3543, + "step": 29540 + }, + { + "epoch": 0.05969286958067527, + "grad_norm": 407.38482666015625, + "learning_rate": 5.91e-06, + "loss": 22.2853, + "step": 29550 + }, + { + "epoch": 0.05971307021335908, + "grad_norm": 427.1022033691406, + "learning_rate": 5.912e-06, + "loss": 16.2504, + "step": 29560 + }, + { + "epoch": 0.0597332708460429, + "grad_norm": 117.8338394165039, + "learning_rate": 5.9140000000000005e-06, + "loss": 19.4939, + "step": 29570 + }, + { + "epoch": 0.059753471478726713, + "grad_norm": 780.7388916015625, + "learning_rate": 5.916000000000001e-06, + "loss": 36.2819, + "step": 29580 + }, + { + "epoch": 0.05977367211141053, + "grad_norm": 1370.9173583984375, + "learning_rate": 5.918000000000001e-06, + "loss": 23.8863, + "step": 29590 + }, + { + "epoch": 0.05979387274409435, + "grad_norm": 487.5325927734375, + "learning_rate": 5.92e-06, + "loss": 36.2767, + "step": 29600 + }, + { + "epoch": 0.05981407337677816, + "grad_norm": 534.7539672851562, + "learning_rate": 5.922e-06, + "loss": 21.9024, + "step": 29610 + }, + { + "epoch": 0.05983427400946198, + "grad_norm": 304.4093017578125, + "learning_rate": 5.924000000000001e-06, + "loss": 35.4952, + "step": 29620 + }, + { + "epoch": 0.05985447464214579, + "grad_norm": 727.6774291992188, + "learning_rate": 5.9260000000000005e-06, + "loss": 45.8392, + "step": 29630 + }, + { + "epoch": 0.059874675274829606, + "grad_norm": 239.7957763671875, + "learning_rate": 5.928000000000001e-06, + "loss": 16.7934, + "step": 29640 + }, + { + "epoch": 0.059894875907513426, + "grad_norm": 315.3820495605469, + "learning_rate": 5.93e-06, + "loss": 23.4135, + "step": 29650 + }, + { + "epoch": 0.05991507654019724, + "grad_norm": 520.0550537109375, + "learning_rate": 5.932e-06, + "loss": 20.385, + "step": 29660 + }, + { + "epoch": 0.05993527717288105, + "grad_norm": 549.9379272460938, + "learning_rate": 5.934000000000001e-06, + "loss": 28.5402, + "step": 29670 + }, + { + "epoch": 0.05995547780556487, + "grad_norm": 255.78863525390625, + "learning_rate": 5.9360000000000004e-06, + "loss": 22.6324, + "step": 29680 + }, + { + "epoch": 0.059975678438248685, + "grad_norm": 328.67315673828125, + "learning_rate": 5.9380000000000006e-06, + "loss": 38.5843, + "step": 29690 + }, + { + "epoch": 0.059995879070932505, + "grad_norm": 311.1226501464844, + "learning_rate": 5.94e-06, + "loss": 24.8409, + "step": 29700 + }, + { + "epoch": 0.06001607970361632, + "grad_norm": 496.9958190917969, + "learning_rate": 5.942e-06, + "loss": 19.7917, + "step": 29710 + }, + { + "epoch": 0.06003628033630013, + "grad_norm": 999.963623046875, + "learning_rate": 5.944000000000001e-06, + "loss": 24.5253, + "step": 29720 + }, + { + "epoch": 0.06005648096898395, + "grad_norm": 1216.32470703125, + "learning_rate": 5.946e-06, + "loss": 29.5817, + "step": 29730 + }, + { + "epoch": 0.060076681601667764, + "grad_norm": 298.50994873046875, + "learning_rate": 5.9480000000000005e-06, + "loss": 31.0966, + "step": 29740 + }, + { + "epoch": 0.06009688223435158, + "grad_norm": 653.6453247070312, + "learning_rate": 5.950000000000001e-06, + "loss": 25.8964, + "step": 29750 + }, + { + "epoch": 0.0601170828670354, + "grad_norm": 553.80078125, + "learning_rate": 5.952e-06, + "loss": 18.5255, + "step": 29760 + }, + { + "epoch": 0.06013728349971921, + "grad_norm": 333.39764404296875, + "learning_rate": 5.954000000000001e-06, + "loss": 23.6987, + "step": 29770 + }, + { + "epoch": 0.06015748413240303, + "grad_norm": 514.716064453125, + "learning_rate": 5.956000000000001e-06, + "loss": 28.1732, + "step": 29780 + }, + { + "epoch": 0.06017768476508684, + "grad_norm": 538.5047607421875, + "learning_rate": 5.958e-06, + "loss": 29.9077, + "step": 29790 + }, + { + "epoch": 0.060197885397770656, + "grad_norm": 434.8916931152344, + "learning_rate": 5.9600000000000005e-06, + "loss": 26.0934, + "step": 29800 + }, + { + "epoch": 0.060218086030454476, + "grad_norm": 4665.8720703125, + "learning_rate": 5.962e-06, + "loss": 33.1499, + "step": 29810 + }, + { + "epoch": 0.06023828666313829, + "grad_norm": 1068.4248046875, + "learning_rate": 5.964000000000001e-06, + "loss": 28.2813, + "step": 29820 + }, + { + "epoch": 0.0602584872958221, + "grad_norm": 1261.244384765625, + "learning_rate": 5.966000000000001e-06, + "loss": 31.9004, + "step": 29830 + }, + { + "epoch": 0.06027868792850592, + "grad_norm": 410.3675231933594, + "learning_rate": 5.968e-06, + "loss": 24.5451, + "step": 29840 + }, + { + "epoch": 0.060298888561189735, + "grad_norm": 367.4793701171875, + "learning_rate": 5.9700000000000004e-06, + "loss": 20.9293, + "step": 29850 + }, + { + "epoch": 0.060319089193873555, + "grad_norm": 807.4345092773438, + "learning_rate": 5.972e-06, + "loss": 36.9779, + "step": 29860 + }, + { + "epoch": 0.06033928982655737, + "grad_norm": 1063.368896484375, + "learning_rate": 5.974000000000001e-06, + "loss": 31.3741, + "step": 29870 + }, + { + "epoch": 0.06035949045924118, + "grad_norm": 320.45513916015625, + "learning_rate": 5.976000000000001e-06, + "loss": 14.5253, + "step": 29880 + }, + { + "epoch": 0.060379691091925, + "grad_norm": 337.5420227050781, + "learning_rate": 5.978e-06, + "loss": 22.4189, + "step": 29890 + }, + { + "epoch": 0.060399891724608815, + "grad_norm": 515.797607421875, + "learning_rate": 5.98e-06, + "loss": 28.1367, + "step": 29900 + }, + { + "epoch": 0.06042009235729263, + "grad_norm": 351.6451721191406, + "learning_rate": 5.982e-06, + "loss": 16.7312, + "step": 29910 + }, + { + "epoch": 0.06044029298997645, + "grad_norm": 226.80123901367188, + "learning_rate": 5.984000000000001e-06, + "loss": 19.0455, + "step": 29920 + }, + { + "epoch": 0.06046049362266026, + "grad_norm": 372.4876403808594, + "learning_rate": 5.986000000000001e-06, + "loss": 45.9898, + "step": 29930 + }, + { + "epoch": 0.06048069425534408, + "grad_norm": 748.814208984375, + "learning_rate": 5.988e-06, + "loss": 21.8749, + "step": 29940 + }, + { + "epoch": 0.060500894888027894, + "grad_norm": 396.53118896484375, + "learning_rate": 5.99e-06, + "loss": 32.856, + "step": 29950 + }, + { + "epoch": 0.06052109552071171, + "grad_norm": 220.9365692138672, + "learning_rate": 5.992e-06, + "loss": 24.7886, + "step": 29960 + }, + { + "epoch": 0.06054129615339553, + "grad_norm": 139.06324768066406, + "learning_rate": 5.9940000000000005e-06, + "loss": 21.2531, + "step": 29970 + }, + { + "epoch": 0.06056149678607934, + "grad_norm": 371.89813232421875, + "learning_rate": 5.996000000000001e-06, + "loss": 19.0003, + "step": 29980 + }, + { + "epoch": 0.06058169741876315, + "grad_norm": 906.2913818359375, + "learning_rate": 5.998000000000001e-06, + "loss": 25.2816, + "step": 29990 + }, + { + "epoch": 0.06060189805144697, + "grad_norm": 551.5726318359375, + "learning_rate": 6e-06, + "loss": 28.9025, + "step": 30000 + }, + { + "epoch": 0.060622098684130786, + "grad_norm": 230.41876220703125, + "learning_rate": 6.002e-06, + "loss": 28.3692, + "step": 30010 + }, + { + "epoch": 0.060642299316814606, + "grad_norm": 437.0330810546875, + "learning_rate": 6.004000000000001e-06, + "loss": 30.0274, + "step": 30020 + }, + { + "epoch": 0.06066249994949842, + "grad_norm": 119.54045104980469, + "learning_rate": 6.006000000000001e-06, + "loss": 25.2697, + "step": 30030 + }, + { + "epoch": 0.06068270058218223, + "grad_norm": 893.7681274414062, + "learning_rate": 6.008000000000001e-06, + "loss": 23.0002, + "step": 30040 + }, + { + "epoch": 0.06070290121486605, + "grad_norm": 666.832275390625, + "learning_rate": 6.01e-06, + "loss": 21.5462, + "step": 30050 + }, + { + "epoch": 0.060723101847549865, + "grad_norm": 364.53448486328125, + "learning_rate": 6.012e-06, + "loss": 31.678, + "step": 30060 + }, + { + "epoch": 0.06074330248023368, + "grad_norm": 388.3205871582031, + "learning_rate": 6.014000000000001e-06, + "loss": 21.7389, + "step": 30070 + }, + { + "epoch": 0.0607635031129175, + "grad_norm": 307.82366943359375, + "learning_rate": 6.0160000000000005e-06, + "loss": 23.023, + "step": 30080 + }, + { + "epoch": 0.06078370374560131, + "grad_norm": 654.1393432617188, + "learning_rate": 6.018000000000001e-06, + "loss": 26.87, + "step": 30090 + }, + { + "epoch": 0.06080390437828513, + "grad_norm": 377.3679504394531, + "learning_rate": 6.02e-06, + "loss": 26.1135, + "step": 30100 + }, + { + "epoch": 0.060824105010968944, + "grad_norm": 1051.38720703125, + "learning_rate": 6.022e-06, + "loss": 31.1944, + "step": 30110 + }, + { + "epoch": 0.06084430564365276, + "grad_norm": 176.04995727539062, + "learning_rate": 6.024000000000001e-06, + "loss": 17.6782, + "step": 30120 + }, + { + "epoch": 0.06086450627633658, + "grad_norm": 398.30059814453125, + "learning_rate": 6.026e-06, + "loss": 36.5331, + "step": 30130 + }, + { + "epoch": 0.06088470690902039, + "grad_norm": 442.10675048828125, + "learning_rate": 6.0280000000000006e-06, + "loss": 23.4857, + "step": 30140 + }, + { + "epoch": 0.060904907541704204, + "grad_norm": 497.2967529296875, + "learning_rate": 6.030000000000001e-06, + "loss": 29.0141, + "step": 30150 + }, + { + "epoch": 0.060925108174388024, + "grad_norm": 419.6001892089844, + "learning_rate": 6.032e-06, + "loss": 30.8048, + "step": 30160 + }, + { + "epoch": 0.06094530880707184, + "grad_norm": 311.8349609375, + "learning_rate": 6.034000000000001e-06, + "loss": 21.1442, + "step": 30170 + }, + { + "epoch": 0.06096550943975566, + "grad_norm": 1235.242431640625, + "learning_rate": 6.036000000000001e-06, + "loss": 35.1707, + "step": 30180 + }, + { + "epoch": 0.06098571007243947, + "grad_norm": 920.2532348632812, + "learning_rate": 6.0380000000000005e-06, + "loss": 23.4196, + "step": 30190 + }, + { + "epoch": 0.06100591070512328, + "grad_norm": 1023.5107421875, + "learning_rate": 6.040000000000001e-06, + "loss": 35.9319, + "step": 30200 + }, + { + "epoch": 0.0610261113378071, + "grad_norm": 3.849163770675659, + "learning_rate": 6.042e-06, + "loss": 21.4135, + "step": 30210 + }, + { + "epoch": 0.061046311970490916, + "grad_norm": 175.37745666503906, + "learning_rate": 6.044000000000001e-06, + "loss": 20.4032, + "step": 30220 + }, + { + "epoch": 0.06106651260317473, + "grad_norm": 527.3060302734375, + "learning_rate": 6.046000000000001e-06, + "loss": 30.6245, + "step": 30230 + }, + { + "epoch": 0.06108671323585855, + "grad_norm": 461.25970458984375, + "learning_rate": 6.048e-06, + "loss": 38.1651, + "step": 30240 + }, + { + "epoch": 0.06110691386854236, + "grad_norm": 544.8977661132812, + "learning_rate": 6.0500000000000005e-06, + "loss": 13.7668, + "step": 30250 + }, + { + "epoch": 0.06112711450122618, + "grad_norm": 530.8619384765625, + "learning_rate": 6.052e-06, + "loss": 19.7257, + "step": 30260 + }, + { + "epoch": 0.061147315133909995, + "grad_norm": 527.7964477539062, + "learning_rate": 6.054000000000001e-06, + "loss": 32.3616, + "step": 30270 + }, + { + "epoch": 0.06116751576659381, + "grad_norm": 1873.7520751953125, + "learning_rate": 6.056000000000001e-06, + "loss": 24.6919, + "step": 30280 + }, + { + "epoch": 0.06118771639927763, + "grad_norm": 659.0830078125, + "learning_rate": 6.058e-06, + "loss": 23.7662, + "step": 30290 + }, + { + "epoch": 0.06120791703196144, + "grad_norm": 380.70068359375, + "learning_rate": 6.0600000000000004e-06, + "loss": 35.3404, + "step": 30300 + }, + { + "epoch": 0.061228117664645254, + "grad_norm": 907.1119384765625, + "learning_rate": 6.062e-06, + "loss": 47.9935, + "step": 30310 + }, + { + "epoch": 0.061248318297329074, + "grad_norm": 571.4940795898438, + "learning_rate": 6.064000000000001e-06, + "loss": 26.3058, + "step": 30320 + }, + { + "epoch": 0.06126851893001289, + "grad_norm": 566.0093383789062, + "learning_rate": 6.066000000000001e-06, + "loss": 14.3268, + "step": 30330 + }, + { + "epoch": 0.06128871956269671, + "grad_norm": 629.2047119140625, + "learning_rate": 6.068e-06, + "loss": 34.8342, + "step": 30340 + }, + { + "epoch": 0.06130892019538052, + "grad_norm": 177.77943420410156, + "learning_rate": 6.07e-06, + "loss": 38.0346, + "step": 30350 + }, + { + "epoch": 0.06132912082806433, + "grad_norm": 706.6749877929688, + "learning_rate": 6.0720000000000005e-06, + "loss": 33.0479, + "step": 30360 + }, + { + "epoch": 0.06134932146074815, + "grad_norm": 424.2728576660156, + "learning_rate": 6.074000000000001e-06, + "loss": 30.1139, + "step": 30370 + }, + { + "epoch": 0.061369522093431966, + "grad_norm": 693.5032958984375, + "learning_rate": 6.076000000000001e-06, + "loss": 20.3498, + "step": 30380 + }, + { + "epoch": 0.06138972272611578, + "grad_norm": 748.1320190429688, + "learning_rate": 6.078000000000001e-06, + "loss": 28.7987, + "step": 30390 + }, + { + "epoch": 0.0614099233587996, + "grad_norm": 350.1024475097656, + "learning_rate": 6.08e-06, + "loss": 34.3633, + "step": 30400 + }, + { + "epoch": 0.06143012399148341, + "grad_norm": 470.6011657714844, + "learning_rate": 6.082e-06, + "loss": 26.6126, + "step": 30410 + }, + { + "epoch": 0.061450324624167225, + "grad_norm": 171.24346923828125, + "learning_rate": 6.084000000000001e-06, + "loss": 29.7189, + "step": 30420 + }, + { + "epoch": 0.061470525256851045, + "grad_norm": 281.1046447753906, + "learning_rate": 6.086000000000001e-06, + "loss": 28.274, + "step": 30430 + }, + { + "epoch": 0.06149072588953486, + "grad_norm": 729.0284423828125, + "learning_rate": 6.088000000000001e-06, + "loss": 37.4033, + "step": 30440 + }, + { + "epoch": 0.06151092652221868, + "grad_norm": 1197.5462646484375, + "learning_rate": 6.09e-06, + "loss": 21.4996, + "step": 30450 + }, + { + "epoch": 0.06153112715490249, + "grad_norm": 180.23403930664062, + "learning_rate": 6.092e-06, + "loss": 11.861, + "step": 30460 + }, + { + "epoch": 0.061551327787586305, + "grad_norm": 551.1353149414062, + "learning_rate": 6.094000000000001e-06, + "loss": 22.2927, + "step": 30470 + }, + { + "epoch": 0.061571528420270125, + "grad_norm": 967.9293823242188, + "learning_rate": 6.096000000000001e-06, + "loss": 21.8577, + "step": 30480 + }, + { + "epoch": 0.06159172905295394, + "grad_norm": 401.4379577636719, + "learning_rate": 6.098000000000001e-06, + "loss": 38.6864, + "step": 30490 + }, + { + "epoch": 0.06161192968563775, + "grad_norm": 584.6387939453125, + "learning_rate": 6.1e-06, + "loss": 32.8549, + "step": 30500 + }, + { + "epoch": 0.06163213031832157, + "grad_norm": 384.2839660644531, + "learning_rate": 6.102e-06, + "loss": 15.1533, + "step": 30510 + }, + { + "epoch": 0.061652330951005384, + "grad_norm": 302.5103454589844, + "learning_rate": 6.104000000000001e-06, + "loss": 17.0309, + "step": 30520 + }, + { + "epoch": 0.061672531583689204, + "grad_norm": 477.59912109375, + "learning_rate": 6.1060000000000005e-06, + "loss": 51.6508, + "step": 30530 + }, + { + "epoch": 0.06169273221637302, + "grad_norm": 848.4017333984375, + "learning_rate": 6.108000000000001e-06, + "loss": 43.8784, + "step": 30540 + }, + { + "epoch": 0.06171293284905683, + "grad_norm": 580.9694213867188, + "learning_rate": 6.110000000000001e-06, + "loss": 22.3975, + "step": 30550 + }, + { + "epoch": 0.06173313348174065, + "grad_norm": 70.8717041015625, + "learning_rate": 6.112e-06, + "loss": 20.8196, + "step": 30560 + }, + { + "epoch": 0.06175333411442446, + "grad_norm": 200.7247772216797, + "learning_rate": 6.114000000000001e-06, + "loss": 14.8975, + "step": 30570 + }, + { + "epoch": 0.061773534747108276, + "grad_norm": 68.50727844238281, + "learning_rate": 6.116000000000001e-06, + "loss": 18.8313, + "step": 30580 + }, + { + "epoch": 0.061793735379792096, + "grad_norm": 365.78631591796875, + "learning_rate": 6.1180000000000005e-06, + "loss": 31.1022, + "step": 30590 + }, + { + "epoch": 0.06181393601247591, + "grad_norm": 924.5538330078125, + "learning_rate": 6.120000000000001e-06, + "loss": 18.5757, + "step": 30600 + }, + { + "epoch": 0.06183413664515973, + "grad_norm": 309.0931091308594, + "learning_rate": 6.122e-06, + "loss": 26.9238, + "step": 30610 + }, + { + "epoch": 0.06185433727784354, + "grad_norm": 982.596923828125, + "learning_rate": 6.124000000000001e-06, + "loss": 32.0771, + "step": 30620 + }, + { + "epoch": 0.061874537910527355, + "grad_norm": 107.05025482177734, + "learning_rate": 6.126000000000001e-06, + "loss": 22.885, + "step": 30630 + }, + { + "epoch": 0.061894738543211175, + "grad_norm": 861.4071044921875, + "learning_rate": 6.1280000000000005e-06, + "loss": 24.9972, + "step": 30640 + }, + { + "epoch": 0.06191493917589499, + "grad_norm": 552.7598876953125, + "learning_rate": 6.130000000000001e-06, + "loss": 14.113, + "step": 30650 + }, + { + "epoch": 0.0619351398085788, + "grad_norm": 980.1431884765625, + "learning_rate": 6.132e-06, + "loss": 16.5971, + "step": 30660 + }, + { + "epoch": 0.06195534044126262, + "grad_norm": 354.41943359375, + "learning_rate": 6.134e-06, + "loss": 23.506, + "step": 30670 + }, + { + "epoch": 0.061975541073946434, + "grad_norm": 525.1093139648438, + "learning_rate": 6.136000000000001e-06, + "loss": 45.2582, + "step": 30680 + }, + { + "epoch": 0.061995741706630254, + "grad_norm": 1747.66748046875, + "learning_rate": 6.138e-06, + "loss": 28.076, + "step": 30690 + }, + { + "epoch": 0.06201594233931407, + "grad_norm": 133.58242797851562, + "learning_rate": 6.1400000000000005e-06, + "loss": 29.0578, + "step": 30700 + }, + { + "epoch": 0.06203614297199788, + "grad_norm": 971.960205078125, + "learning_rate": 6.142e-06, + "loss": 23.5747, + "step": 30710 + }, + { + "epoch": 0.0620563436046817, + "grad_norm": 390.8254699707031, + "learning_rate": 6.144e-06, + "loss": 14.7553, + "step": 30720 + }, + { + "epoch": 0.062076544237365514, + "grad_norm": 931.520751953125, + "learning_rate": 6.146000000000001e-06, + "loss": 15.7332, + "step": 30730 + }, + { + "epoch": 0.06209674487004933, + "grad_norm": 725.7781372070312, + "learning_rate": 6.148e-06, + "loss": 27.3765, + "step": 30740 + }, + { + "epoch": 0.06211694550273315, + "grad_norm": 184.05690002441406, + "learning_rate": 6.15e-06, + "loss": 27.3351, + "step": 30750 + }, + { + "epoch": 0.06213714613541696, + "grad_norm": 147.2608184814453, + "learning_rate": 6.1520000000000006e-06, + "loss": 14.9159, + "step": 30760 + }, + { + "epoch": 0.06215734676810078, + "grad_norm": 605.9739379882812, + "learning_rate": 6.154e-06, + "loss": 27.2257, + "step": 30770 + }, + { + "epoch": 0.06217754740078459, + "grad_norm": 607.0244750976562, + "learning_rate": 6.156000000000001e-06, + "loss": 21.6824, + "step": 30780 + }, + { + "epoch": 0.062197748033468406, + "grad_norm": 247.36895751953125, + "learning_rate": 6.158000000000001e-06, + "loss": 33.5357, + "step": 30790 + }, + { + "epoch": 0.062217948666152226, + "grad_norm": 854.2710571289062, + "learning_rate": 6.16e-06, + "loss": 26.6299, + "step": 30800 + }, + { + "epoch": 0.06223814929883604, + "grad_norm": 443.6426696777344, + "learning_rate": 6.1620000000000005e-06, + "loss": 26.0815, + "step": 30810 + }, + { + "epoch": 0.06225834993151985, + "grad_norm": 189.7944793701172, + "learning_rate": 6.164e-06, + "loss": 34.9507, + "step": 30820 + }, + { + "epoch": 0.06227855056420367, + "grad_norm": 1154.957275390625, + "learning_rate": 6.166000000000001e-06, + "loss": 30.0567, + "step": 30830 + }, + { + "epoch": 0.062298751196887485, + "grad_norm": 879.346435546875, + "learning_rate": 6.168000000000001e-06, + "loss": 19.3368, + "step": 30840 + }, + { + "epoch": 0.062318951829571305, + "grad_norm": 118.4106674194336, + "learning_rate": 6.17e-06, + "loss": 20.9169, + "step": 30850 + }, + { + "epoch": 0.06233915246225512, + "grad_norm": 302.1161193847656, + "learning_rate": 6.172e-06, + "loss": 27.4503, + "step": 30860 + }, + { + "epoch": 0.06235935309493893, + "grad_norm": 300.6957702636719, + "learning_rate": 6.174e-06, + "loss": 20.6476, + "step": 30870 + }, + { + "epoch": 0.06237955372762275, + "grad_norm": 860.6785888671875, + "learning_rate": 6.176000000000001e-06, + "loss": 50.3861, + "step": 30880 + }, + { + "epoch": 0.062399754360306564, + "grad_norm": 398.2225036621094, + "learning_rate": 6.178000000000001e-06, + "loss": 15.2574, + "step": 30890 + }, + { + "epoch": 0.06241995499299038, + "grad_norm": 388.7832336425781, + "learning_rate": 6.18e-06, + "loss": 33.1759, + "step": 30900 + }, + { + "epoch": 0.0624401556256742, + "grad_norm": 1116.7479248046875, + "learning_rate": 6.182e-06, + "loss": 22.5325, + "step": 30910 + }, + { + "epoch": 0.06246035625835801, + "grad_norm": 470.1541442871094, + "learning_rate": 6.184e-06, + "loss": 12.1148, + "step": 30920 + }, + { + "epoch": 0.06248055689104183, + "grad_norm": 761.6276245117188, + "learning_rate": 6.1860000000000006e-06, + "loss": 28.2619, + "step": 30930 + }, + { + "epoch": 0.06250075752372564, + "grad_norm": 312.6024475097656, + "learning_rate": 6.188000000000001e-06, + "loss": 25.9364, + "step": 30940 + }, + { + "epoch": 0.06252095815640946, + "grad_norm": 79.20448303222656, + "learning_rate": 6.190000000000001e-06, + "loss": 30.2768, + "step": 30950 + }, + { + "epoch": 0.06254115878909328, + "grad_norm": 838.1323852539062, + "learning_rate": 6.192e-06, + "loss": 19.8499, + "step": 30960 + }, + { + "epoch": 0.0625613594217771, + "grad_norm": 205.89064025878906, + "learning_rate": 6.194e-06, + "loss": 31.2351, + "step": 30970 + }, + { + "epoch": 0.0625815600544609, + "grad_norm": 652.1013793945312, + "learning_rate": 6.196000000000001e-06, + "loss": 29.7136, + "step": 30980 + }, + { + "epoch": 0.06260176068714472, + "grad_norm": 473.4364318847656, + "learning_rate": 6.198000000000001e-06, + "loss": 15.5697, + "step": 30990 + }, + { + "epoch": 0.06262196131982854, + "grad_norm": 803.982177734375, + "learning_rate": 6.200000000000001e-06, + "loss": 42.4467, + "step": 31000 + }, + { + "epoch": 0.06264216195251235, + "grad_norm": 490.3743896484375, + "learning_rate": 6.202e-06, + "loss": 19.5958, + "step": 31010 + }, + { + "epoch": 0.06266236258519617, + "grad_norm": 1799.8692626953125, + "learning_rate": 6.204e-06, + "loss": 29.0938, + "step": 31020 + }, + { + "epoch": 0.06268256321787999, + "grad_norm": 395.7022705078125, + "learning_rate": 6.206000000000001e-06, + "loss": 14.4859, + "step": 31030 + }, + { + "epoch": 0.0627027638505638, + "grad_norm": 780.4808349609375, + "learning_rate": 6.2080000000000005e-06, + "loss": 28.1231, + "step": 31040 + }, + { + "epoch": 0.06272296448324761, + "grad_norm": 642.3253173828125, + "learning_rate": 6.210000000000001e-06, + "loss": 23.1921, + "step": 31050 + }, + { + "epoch": 0.06274316511593143, + "grad_norm": 771.5405883789062, + "learning_rate": 6.212e-06, + "loss": 30.7311, + "step": 31060 + }, + { + "epoch": 0.06276336574861524, + "grad_norm": 260.2312927246094, + "learning_rate": 6.214e-06, + "loss": 25.5861, + "step": 31070 + }, + { + "epoch": 0.06278356638129906, + "grad_norm": 457.0404052734375, + "learning_rate": 6.216000000000001e-06, + "loss": 16.7189, + "step": 31080 + }, + { + "epoch": 0.06280376701398288, + "grad_norm": 109.95401000976562, + "learning_rate": 6.2180000000000004e-06, + "loss": 19.15, + "step": 31090 + }, + { + "epoch": 0.06282396764666669, + "grad_norm": 976.7431030273438, + "learning_rate": 6.220000000000001e-06, + "loss": 27.3597, + "step": 31100 + }, + { + "epoch": 0.0628441682793505, + "grad_norm": 385.8603515625, + "learning_rate": 6.222e-06, + "loss": 27.3513, + "step": 31110 + }, + { + "epoch": 0.06286436891203433, + "grad_norm": 527.4700927734375, + "learning_rate": 6.224e-06, + "loss": 29.6751, + "step": 31120 + }, + { + "epoch": 0.06288456954471815, + "grad_norm": 1336.8475341796875, + "learning_rate": 6.226000000000001e-06, + "loss": 41.498, + "step": 31130 + }, + { + "epoch": 0.06290477017740195, + "grad_norm": 305.7466735839844, + "learning_rate": 6.228e-06, + "loss": 25.7337, + "step": 31140 + }, + { + "epoch": 0.06292497081008577, + "grad_norm": 138.3699951171875, + "learning_rate": 6.2300000000000005e-06, + "loss": 34.1218, + "step": 31150 + }, + { + "epoch": 0.06294517144276959, + "grad_norm": 585.0834350585938, + "learning_rate": 6.232000000000001e-06, + "loss": 19.9407, + "step": 31160 + }, + { + "epoch": 0.0629653720754534, + "grad_norm": 746.5474243164062, + "learning_rate": 6.234e-06, + "loss": 51.4981, + "step": 31170 + }, + { + "epoch": 0.06298557270813722, + "grad_norm": 270.2710266113281, + "learning_rate": 6.236000000000001e-06, + "loss": 36.1254, + "step": 31180 + }, + { + "epoch": 0.06300577334082104, + "grad_norm": 1371.7850341796875, + "learning_rate": 6.238000000000001e-06, + "loss": 31.2997, + "step": 31190 + }, + { + "epoch": 0.06302597397350485, + "grad_norm": 673.2770385742188, + "learning_rate": 6.24e-06, + "loss": 36.9119, + "step": 31200 + }, + { + "epoch": 0.06304617460618867, + "grad_norm": 301.5875549316406, + "learning_rate": 6.2420000000000005e-06, + "loss": 14.8065, + "step": 31210 + }, + { + "epoch": 0.06306637523887249, + "grad_norm": 581.1113891601562, + "learning_rate": 6.244e-06, + "loss": 32.8933, + "step": 31220 + }, + { + "epoch": 0.06308657587155629, + "grad_norm": 321.9029846191406, + "learning_rate": 6.246000000000001e-06, + "loss": 25.5634, + "step": 31230 + }, + { + "epoch": 0.06310677650424011, + "grad_norm": 990.6182250976562, + "learning_rate": 6.248000000000001e-06, + "loss": 26.876, + "step": 31240 + }, + { + "epoch": 0.06312697713692393, + "grad_norm": 425.1790466308594, + "learning_rate": 6.25e-06, + "loss": 25.7884, + "step": 31250 + }, + { + "epoch": 0.06314717776960774, + "grad_norm": 337.3599853515625, + "learning_rate": 6.2520000000000004e-06, + "loss": 30.3455, + "step": 31260 + }, + { + "epoch": 0.06316737840229156, + "grad_norm": 2128.223876953125, + "learning_rate": 6.254e-06, + "loss": 52.7797, + "step": 31270 + }, + { + "epoch": 0.06318757903497538, + "grad_norm": 400.2874450683594, + "learning_rate": 6.256000000000001e-06, + "loss": 17.9244, + "step": 31280 + }, + { + "epoch": 0.0632077796676592, + "grad_norm": 628.6130981445312, + "learning_rate": 6.258000000000001e-06, + "loss": 29.7181, + "step": 31290 + }, + { + "epoch": 0.063227980300343, + "grad_norm": 393.5443115234375, + "learning_rate": 6.26e-06, + "loss": 17.6657, + "step": 31300 + }, + { + "epoch": 0.06324818093302682, + "grad_norm": 5.618537425994873, + "learning_rate": 6.262e-06, + "loss": 31.0318, + "step": 31310 + }, + { + "epoch": 0.06326838156571064, + "grad_norm": 641.4444580078125, + "learning_rate": 6.264e-06, + "loss": 38.701, + "step": 31320 + }, + { + "epoch": 0.06328858219839445, + "grad_norm": 293.72271728515625, + "learning_rate": 6.266000000000001e-06, + "loss": 27.233, + "step": 31330 + }, + { + "epoch": 0.06330878283107827, + "grad_norm": 391.3696594238281, + "learning_rate": 6.268000000000001e-06, + "loss": 19.8116, + "step": 31340 + }, + { + "epoch": 0.06332898346376209, + "grad_norm": 362.88311767578125, + "learning_rate": 6.27e-06, + "loss": 37.2613, + "step": 31350 + }, + { + "epoch": 0.0633491840964459, + "grad_norm": 417.72015380859375, + "learning_rate": 6.272e-06, + "loss": 23.4655, + "step": 31360 + }, + { + "epoch": 0.06336938472912972, + "grad_norm": 864.805419921875, + "learning_rate": 6.274e-06, + "loss": 38.7416, + "step": 31370 + }, + { + "epoch": 0.06338958536181354, + "grad_norm": 366.3962707519531, + "learning_rate": 6.2760000000000006e-06, + "loss": 28.7714, + "step": 31380 + }, + { + "epoch": 0.06340978599449734, + "grad_norm": 299.3522033691406, + "learning_rate": 6.278000000000001e-06, + "loss": 26.9056, + "step": 31390 + }, + { + "epoch": 0.06342998662718116, + "grad_norm": 617.9619750976562, + "learning_rate": 6.280000000000001e-06, + "loss": 18.5786, + "step": 31400 + }, + { + "epoch": 0.06345018725986498, + "grad_norm": 506.8660888671875, + "learning_rate": 6.282e-06, + "loss": 24.2889, + "step": 31410 + }, + { + "epoch": 0.06347038789254879, + "grad_norm": 244.0608367919922, + "learning_rate": 6.284e-06, + "loss": 21.0116, + "step": 31420 + }, + { + "epoch": 0.06349058852523261, + "grad_norm": 389.8390808105469, + "learning_rate": 6.286000000000001e-06, + "loss": 27.5439, + "step": 31430 + }, + { + "epoch": 0.06351078915791643, + "grad_norm": 474.32275390625, + "learning_rate": 6.288000000000001e-06, + "loss": 37.2891, + "step": 31440 + }, + { + "epoch": 0.06353098979060025, + "grad_norm": 518.6574096679688, + "learning_rate": 6.290000000000001e-06, + "loss": 23.395, + "step": 31450 + }, + { + "epoch": 0.06355119042328405, + "grad_norm": 961.990234375, + "learning_rate": 6.292e-06, + "loss": 26.7638, + "step": 31460 + }, + { + "epoch": 0.06357139105596787, + "grad_norm": 654.8812255859375, + "learning_rate": 6.294e-06, + "loss": 33.6812, + "step": 31470 + }, + { + "epoch": 0.0635915916886517, + "grad_norm": 669.5548706054688, + "learning_rate": 6.296000000000001e-06, + "loss": 29.9715, + "step": 31480 + }, + { + "epoch": 0.0636117923213355, + "grad_norm": 642.5733032226562, + "learning_rate": 6.2980000000000005e-06, + "loss": 30.399, + "step": 31490 + }, + { + "epoch": 0.06363199295401932, + "grad_norm": 1096.3792724609375, + "learning_rate": 6.300000000000001e-06, + "loss": 27.0487, + "step": 31500 + }, + { + "epoch": 0.06365219358670314, + "grad_norm": 182.9304962158203, + "learning_rate": 6.302e-06, + "loss": 28.001, + "step": 31510 + }, + { + "epoch": 0.06367239421938695, + "grad_norm": 481.4287414550781, + "learning_rate": 6.304e-06, + "loss": 14.7878, + "step": 31520 + }, + { + "epoch": 0.06369259485207077, + "grad_norm": 332.9795227050781, + "learning_rate": 6.306000000000001e-06, + "loss": 33.3002, + "step": 31530 + }, + { + "epoch": 0.06371279548475459, + "grad_norm": 222.80126953125, + "learning_rate": 6.308e-06, + "loss": 15.4948, + "step": 31540 + }, + { + "epoch": 0.06373299611743839, + "grad_norm": 375.0700378417969, + "learning_rate": 6.3100000000000006e-06, + "loss": 22.6265, + "step": 31550 + }, + { + "epoch": 0.06375319675012221, + "grad_norm": 736.5299682617188, + "learning_rate": 6.312000000000001e-06, + "loss": 30.0435, + "step": 31560 + }, + { + "epoch": 0.06377339738280603, + "grad_norm": 557.098876953125, + "learning_rate": 6.314e-06, + "loss": 17.332, + "step": 31570 + }, + { + "epoch": 0.06379359801548984, + "grad_norm": 1109.0279541015625, + "learning_rate": 6.316000000000001e-06, + "loss": 29.9884, + "step": 31580 + }, + { + "epoch": 0.06381379864817366, + "grad_norm": 246.1799774169922, + "learning_rate": 6.318000000000001e-06, + "loss": 20.189, + "step": 31590 + }, + { + "epoch": 0.06383399928085748, + "grad_norm": 131.45822143554688, + "learning_rate": 6.3200000000000005e-06, + "loss": 41.6542, + "step": 31600 + }, + { + "epoch": 0.0638541999135413, + "grad_norm": 508.4758605957031, + "learning_rate": 6.322000000000001e-06, + "loss": 27.1823, + "step": 31610 + }, + { + "epoch": 0.0638744005462251, + "grad_norm": 616.266357421875, + "learning_rate": 6.324e-06, + "loss": 15.2132, + "step": 31620 + }, + { + "epoch": 0.06389460117890892, + "grad_norm": 694.1072998046875, + "learning_rate": 6.326000000000001e-06, + "loss": 20.8863, + "step": 31630 + }, + { + "epoch": 0.06391480181159274, + "grad_norm": 476.1229553222656, + "learning_rate": 6.328000000000001e-06, + "loss": 21.2325, + "step": 31640 + }, + { + "epoch": 0.06393500244427655, + "grad_norm": 584.6588134765625, + "learning_rate": 6.33e-06, + "loss": 24.3116, + "step": 31650 + }, + { + "epoch": 0.06395520307696037, + "grad_norm": 637.4476318359375, + "learning_rate": 6.3320000000000005e-06, + "loss": 33.3525, + "step": 31660 + }, + { + "epoch": 0.06397540370964419, + "grad_norm": 428.1219482421875, + "learning_rate": 6.334e-06, + "loss": 4.7495, + "step": 31670 + }, + { + "epoch": 0.063995604342328, + "grad_norm": 2742.7109375, + "learning_rate": 6.336000000000001e-06, + "loss": 35.9556, + "step": 31680 + }, + { + "epoch": 0.06401580497501182, + "grad_norm": 357.019287109375, + "learning_rate": 6.338000000000001e-06, + "loss": 24.6162, + "step": 31690 + }, + { + "epoch": 0.06403600560769564, + "grad_norm": 378.7497863769531, + "learning_rate": 6.34e-06, + "loss": 28.2976, + "step": 31700 + }, + { + "epoch": 0.06405620624037944, + "grad_norm": 616.9625854492188, + "learning_rate": 6.3420000000000004e-06, + "loss": 44.8492, + "step": 31710 + }, + { + "epoch": 0.06407640687306326, + "grad_norm": 279.8904724121094, + "learning_rate": 6.344e-06, + "loss": 23.5007, + "step": 31720 + }, + { + "epoch": 0.06409660750574708, + "grad_norm": 15.231189727783203, + "learning_rate": 6.346000000000001e-06, + "loss": 24.3203, + "step": 31730 + }, + { + "epoch": 0.06411680813843089, + "grad_norm": 624.3145751953125, + "learning_rate": 6.348000000000001e-06, + "loss": 30.2905, + "step": 31740 + }, + { + "epoch": 0.06413700877111471, + "grad_norm": 362.0500183105469, + "learning_rate": 6.35e-06, + "loss": 23.1855, + "step": 31750 + }, + { + "epoch": 0.06415720940379853, + "grad_norm": 706.1433715820312, + "learning_rate": 6.352e-06, + "loss": 18.3678, + "step": 31760 + }, + { + "epoch": 0.06417741003648235, + "grad_norm": 518.0941162109375, + "learning_rate": 6.3540000000000005e-06, + "loss": 43.9327, + "step": 31770 + }, + { + "epoch": 0.06419761066916616, + "grad_norm": 395.8467102050781, + "learning_rate": 6.356000000000001e-06, + "loss": 42.6036, + "step": 31780 + }, + { + "epoch": 0.06421781130184998, + "grad_norm": 283.4975891113281, + "learning_rate": 6.358000000000001e-06, + "loss": 19.784, + "step": 31790 + }, + { + "epoch": 0.0642380119345338, + "grad_norm": 301.6070556640625, + "learning_rate": 6.360000000000001e-06, + "loss": 31.1704, + "step": 31800 + }, + { + "epoch": 0.0642582125672176, + "grad_norm": 72.76226043701172, + "learning_rate": 6.362e-06, + "loss": 22.481, + "step": 31810 + }, + { + "epoch": 0.06427841319990142, + "grad_norm": 1006.912353515625, + "learning_rate": 6.364e-06, + "loss": 21.1927, + "step": 31820 + }, + { + "epoch": 0.06429861383258524, + "grad_norm": 550.6497802734375, + "learning_rate": 6.366000000000001e-06, + "loss": 46.4444, + "step": 31830 + }, + { + "epoch": 0.06431881446526905, + "grad_norm": 496.5099792480469, + "learning_rate": 6.368000000000001e-06, + "loss": 8.5152, + "step": 31840 + }, + { + "epoch": 0.06433901509795287, + "grad_norm": 731.7362670898438, + "learning_rate": 6.370000000000001e-06, + "loss": 31.8156, + "step": 31850 + }, + { + "epoch": 0.06435921573063669, + "grad_norm": 705.6895751953125, + "learning_rate": 6.372e-06, + "loss": 31.1569, + "step": 31860 + }, + { + "epoch": 0.0643794163633205, + "grad_norm": 208.77008056640625, + "learning_rate": 6.374e-06, + "loss": 31.0229, + "step": 31870 + }, + { + "epoch": 0.06439961699600431, + "grad_norm": 233.83226013183594, + "learning_rate": 6.376e-06, + "loss": 17.1957, + "step": 31880 + }, + { + "epoch": 0.06441981762868813, + "grad_norm": 683.5333251953125, + "learning_rate": 6.378000000000001e-06, + "loss": 17.1572, + "step": 31890 + }, + { + "epoch": 0.06444001826137194, + "grad_norm": 219.8205108642578, + "learning_rate": 6.380000000000001e-06, + "loss": 23.2732, + "step": 31900 + }, + { + "epoch": 0.06446021889405576, + "grad_norm": 104.92378997802734, + "learning_rate": 6.382e-06, + "loss": 22.6116, + "step": 31910 + }, + { + "epoch": 0.06448041952673958, + "grad_norm": 391.84906005859375, + "learning_rate": 6.384e-06, + "loss": 26.0803, + "step": 31920 + }, + { + "epoch": 0.0645006201594234, + "grad_norm": 468.4878845214844, + "learning_rate": 6.386e-06, + "loss": 26.0893, + "step": 31930 + }, + { + "epoch": 0.0645208207921072, + "grad_norm": 485.8976745605469, + "learning_rate": 6.3880000000000005e-06, + "loss": 33.3459, + "step": 31940 + }, + { + "epoch": 0.06454102142479103, + "grad_norm": 492.9491271972656, + "learning_rate": 6.390000000000001e-06, + "loss": 20.5961, + "step": 31950 + }, + { + "epoch": 0.06456122205747485, + "grad_norm": 346.7865905761719, + "learning_rate": 6.392000000000001e-06, + "loss": 28.4751, + "step": 31960 + }, + { + "epoch": 0.06458142269015865, + "grad_norm": 1298.2874755859375, + "learning_rate": 6.394e-06, + "loss": 31.6345, + "step": 31970 + }, + { + "epoch": 0.06460162332284247, + "grad_norm": 636.1900024414062, + "learning_rate": 6.396e-06, + "loss": 32.0049, + "step": 31980 + }, + { + "epoch": 0.06462182395552629, + "grad_norm": 366.033203125, + "learning_rate": 6.398000000000001e-06, + "loss": 45.5496, + "step": 31990 + }, + { + "epoch": 0.0646420245882101, + "grad_norm": 1303.1556396484375, + "learning_rate": 6.4000000000000006e-06, + "loss": 27.9838, + "step": 32000 + }, + { + "epoch": 0.06466222522089392, + "grad_norm": 295.89105224609375, + "learning_rate": 6.402000000000001e-06, + "loss": 23.4524, + "step": 32010 + }, + { + "epoch": 0.06468242585357774, + "grad_norm": 1682.515625, + "learning_rate": 6.404e-06, + "loss": 42.7231, + "step": 32020 + }, + { + "epoch": 0.06470262648626154, + "grad_norm": 2316.4873046875, + "learning_rate": 6.406e-06, + "loss": 34.1805, + "step": 32030 + }, + { + "epoch": 0.06472282711894536, + "grad_norm": 405.20916748046875, + "learning_rate": 6.408000000000001e-06, + "loss": 26.5095, + "step": 32040 + }, + { + "epoch": 0.06474302775162918, + "grad_norm": 343.07684326171875, + "learning_rate": 6.4100000000000005e-06, + "loss": 28.8234, + "step": 32050 + }, + { + "epoch": 0.06476322838431299, + "grad_norm": 927.8602294921875, + "learning_rate": 6.412000000000001e-06, + "loss": 32.3761, + "step": 32060 + }, + { + "epoch": 0.06478342901699681, + "grad_norm": 832.3965454101562, + "learning_rate": 6.414e-06, + "loss": 35.6177, + "step": 32070 + }, + { + "epoch": 0.06480362964968063, + "grad_norm": 385.98297119140625, + "learning_rate": 6.416e-06, + "loss": 40.5609, + "step": 32080 + }, + { + "epoch": 0.06482383028236445, + "grad_norm": 680.3007202148438, + "learning_rate": 6.418000000000001e-06, + "loss": 18.3833, + "step": 32090 + }, + { + "epoch": 0.06484403091504826, + "grad_norm": 706.6058349609375, + "learning_rate": 6.42e-06, + "loss": 32.0983, + "step": 32100 + }, + { + "epoch": 0.06486423154773208, + "grad_norm": 458.92181396484375, + "learning_rate": 6.4220000000000005e-06, + "loss": 25.9199, + "step": 32110 + }, + { + "epoch": 0.0648844321804159, + "grad_norm": 312.37945556640625, + "learning_rate": 6.424e-06, + "loss": 17.622, + "step": 32120 + }, + { + "epoch": 0.0649046328130997, + "grad_norm": 419.53485107421875, + "learning_rate": 6.426e-06, + "loss": 38.1022, + "step": 32130 + }, + { + "epoch": 0.06492483344578352, + "grad_norm": 449.0639343261719, + "learning_rate": 6.428000000000001e-06, + "loss": 22.688, + "step": 32140 + }, + { + "epoch": 0.06494503407846734, + "grad_norm": 484.6932678222656, + "learning_rate": 6.43e-06, + "loss": 17.4194, + "step": 32150 + }, + { + "epoch": 0.06496523471115115, + "grad_norm": 705.7068481445312, + "learning_rate": 6.432e-06, + "loss": 28.7297, + "step": 32160 + }, + { + "epoch": 0.06498543534383497, + "grad_norm": 824.8680419921875, + "learning_rate": 6.4340000000000006e-06, + "loss": 32.2345, + "step": 32170 + }, + { + "epoch": 0.06500563597651879, + "grad_norm": 481.2254333496094, + "learning_rate": 6.436e-06, + "loss": 20.6795, + "step": 32180 + }, + { + "epoch": 0.0650258366092026, + "grad_norm": 523.8160400390625, + "learning_rate": 6.438000000000001e-06, + "loss": 21.3793, + "step": 32190 + }, + { + "epoch": 0.06504603724188641, + "grad_norm": 1548.1605224609375, + "learning_rate": 6.440000000000001e-06, + "loss": 26.5394, + "step": 32200 + }, + { + "epoch": 0.06506623787457023, + "grad_norm": 825.455078125, + "learning_rate": 6.442e-06, + "loss": 23.6385, + "step": 32210 + }, + { + "epoch": 0.06508643850725404, + "grad_norm": 539.9469604492188, + "learning_rate": 6.4440000000000005e-06, + "loss": 26.1361, + "step": 32220 + }, + { + "epoch": 0.06510663913993786, + "grad_norm": 320.2283020019531, + "learning_rate": 6.446e-06, + "loss": 26.6205, + "step": 32230 + }, + { + "epoch": 0.06512683977262168, + "grad_norm": 509.67828369140625, + "learning_rate": 6.448000000000001e-06, + "loss": 20.657, + "step": 32240 + }, + { + "epoch": 0.0651470404053055, + "grad_norm": 1097.630615234375, + "learning_rate": 6.450000000000001e-06, + "loss": 33.5726, + "step": 32250 + }, + { + "epoch": 0.0651672410379893, + "grad_norm": 1908.3387451171875, + "learning_rate": 6.452e-06, + "loss": 33.8112, + "step": 32260 + }, + { + "epoch": 0.06518744167067313, + "grad_norm": 439.6868591308594, + "learning_rate": 6.454e-06, + "loss": 30.5285, + "step": 32270 + }, + { + "epoch": 0.06520764230335695, + "grad_norm": 438.2578430175781, + "learning_rate": 6.456e-06, + "loss": 37.5442, + "step": 32280 + }, + { + "epoch": 0.06522784293604075, + "grad_norm": 481.5923156738281, + "learning_rate": 6.458000000000001e-06, + "loss": 28.0664, + "step": 32290 + }, + { + "epoch": 0.06524804356872457, + "grad_norm": 774.6879272460938, + "learning_rate": 6.460000000000001e-06, + "loss": 24.0665, + "step": 32300 + }, + { + "epoch": 0.06526824420140839, + "grad_norm": 291.2333679199219, + "learning_rate": 6.462e-06, + "loss": 18.9364, + "step": 32310 + }, + { + "epoch": 0.0652884448340922, + "grad_norm": 1116.8297119140625, + "learning_rate": 6.464e-06, + "loss": 22.6748, + "step": 32320 + }, + { + "epoch": 0.06530864546677602, + "grad_norm": 247.5875244140625, + "learning_rate": 6.4660000000000004e-06, + "loss": 29.2354, + "step": 32330 + }, + { + "epoch": 0.06532884609945984, + "grad_norm": 364.7977600097656, + "learning_rate": 6.468000000000001e-06, + "loss": 29.4752, + "step": 32340 + }, + { + "epoch": 0.06534904673214365, + "grad_norm": 442.0473327636719, + "learning_rate": 6.470000000000001e-06, + "loss": 22.6673, + "step": 32350 + }, + { + "epoch": 0.06536924736482747, + "grad_norm": 288.97265625, + "learning_rate": 6.472000000000001e-06, + "loss": 20.4483, + "step": 32360 + }, + { + "epoch": 0.06538944799751129, + "grad_norm": 332.4429626464844, + "learning_rate": 6.474e-06, + "loss": 45.8978, + "step": 32370 + }, + { + "epoch": 0.06540964863019509, + "grad_norm": 511.8432922363281, + "learning_rate": 6.476e-06, + "loss": 29.034, + "step": 32380 + }, + { + "epoch": 0.06542984926287891, + "grad_norm": 433.4559631347656, + "learning_rate": 6.478000000000001e-06, + "loss": 31.5313, + "step": 32390 + }, + { + "epoch": 0.06545004989556273, + "grad_norm": 283.2999572753906, + "learning_rate": 6.480000000000001e-06, + "loss": 14.6274, + "step": 32400 + }, + { + "epoch": 0.06547025052824655, + "grad_norm": 1162.132080078125, + "learning_rate": 6.482000000000001e-06, + "loss": 40.5958, + "step": 32410 + }, + { + "epoch": 0.06549045116093036, + "grad_norm": 344.6202392578125, + "learning_rate": 6.484e-06, + "loss": 28.4084, + "step": 32420 + }, + { + "epoch": 0.06551065179361418, + "grad_norm": 27.689971923828125, + "learning_rate": 6.486e-06, + "loss": 31.6779, + "step": 32430 + }, + { + "epoch": 0.065530852426298, + "grad_norm": 364.23773193359375, + "learning_rate": 6.488000000000001e-06, + "loss": 21.4971, + "step": 32440 + }, + { + "epoch": 0.0655510530589818, + "grad_norm": 1166.5234375, + "learning_rate": 6.4900000000000005e-06, + "loss": 37.5931, + "step": 32450 + }, + { + "epoch": 0.06557125369166562, + "grad_norm": 428.5572204589844, + "learning_rate": 6.492000000000001e-06, + "loss": 27.2606, + "step": 32460 + }, + { + "epoch": 0.06559145432434944, + "grad_norm": 1008.4483032226562, + "learning_rate": 6.494e-06, + "loss": 18.103, + "step": 32470 + }, + { + "epoch": 0.06561165495703325, + "grad_norm": 712.0404052734375, + "learning_rate": 6.496e-06, + "loss": 36.1967, + "step": 32480 + }, + { + "epoch": 0.06563185558971707, + "grad_norm": 345.80810546875, + "learning_rate": 6.498000000000001e-06, + "loss": 28.1766, + "step": 32490 + }, + { + "epoch": 0.06565205622240089, + "grad_norm": 260.66192626953125, + "learning_rate": 6.5000000000000004e-06, + "loss": 9.4512, + "step": 32500 + }, + { + "epoch": 0.0656722568550847, + "grad_norm": 346.3865051269531, + "learning_rate": 6.502000000000001e-06, + "loss": 43.6116, + "step": 32510 + }, + { + "epoch": 0.06569245748776852, + "grad_norm": 260.44671630859375, + "learning_rate": 6.504e-06, + "loss": 19.4687, + "step": 32520 + }, + { + "epoch": 0.06571265812045234, + "grad_norm": 568.5048828125, + "learning_rate": 6.506e-06, + "loss": 28.7252, + "step": 32530 + }, + { + "epoch": 0.06573285875313614, + "grad_norm": 391.73309326171875, + "learning_rate": 6.508000000000001e-06, + "loss": 38.2764, + "step": 32540 + }, + { + "epoch": 0.06575305938581996, + "grad_norm": 367.7117004394531, + "learning_rate": 6.51e-06, + "loss": 48.0191, + "step": 32550 + }, + { + "epoch": 0.06577326001850378, + "grad_norm": 991.9547119140625, + "learning_rate": 6.5120000000000005e-06, + "loss": 50.86, + "step": 32560 + }, + { + "epoch": 0.0657934606511876, + "grad_norm": 620.3666381835938, + "learning_rate": 6.514000000000001e-06, + "loss": 24.1156, + "step": 32570 + }, + { + "epoch": 0.06581366128387141, + "grad_norm": 265.3491516113281, + "learning_rate": 6.516e-06, + "loss": 31.6695, + "step": 32580 + }, + { + "epoch": 0.06583386191655523, + "grad_norm": 793.039306640625, + "learning_rate": 6.518000000000001e-06, + "loss": 42.4639, + "step": 32590 + }, + { + "epoch": 0.06585406254923905, + "grad_norm": 682.8428955078125, + "learning_rate": 6.520000000000001e-06, + "loss": 34.3652, + "step": 32600 + }, + { + "epoch": 0.06587426318192285, + "grad_norm": 316.3118896484375, + "learning_rate": 6.522e-06, + "loss": 30.0757, + "step": 32610 + }, + { + "epoch": 0.06589446381460667, + "grad_norm": 754.0253295898438, + "learning_rate": 6.5240000000000006e-06, + "loss": 21.8935, + "step": 32620 + }, + { + "epoch": 0.0659146644472905, + "grad_norm": 1146.3603515625, + "learning_rate": 6.526e-06, + "loss": 31.3927, + "step": 32630 + }, + { + "epoch": 0.0659348650799743, + "grad_norm": 555.3991088867188, + "learning_rate": 6.528000000000001e-06, + "loss": 21.5758, + "step": 32640 + }, + { + "epoch": 0.06595506571265812, + "grad_norm": 314.7017517089844, + "learning_rate": 6.530000000000001e-06, + "loss": 38.0184, + "step": 32650 + }, + { + "epoch": 0.06597526634534194, + "grad_norm": 259.7373962402344, + "learning_rate": 6.532e-06, + "loss": 29.7005, + "step": 32660 + }, + { + "epoch": 0.06599546697802575, + "grad_norm": 671.1743774414062, + "learning_rate": 6.5340000000000005e-06, + "loss": 23.3408, + "step": 32670 + }, + { + "epoch": 0.06601566761070957, + "grad_norm": 899.598876953125, + "learning_rate": 6.536e-06, + "loss": 32.6499, + "step": 32680 + }, + { + "epoch": 0.06603586824339339, + "grad_norm": 207.99526977539062, + "learning_rate": 6.538000000000001e-06, + "loss": 40.8453, + "step": 32690 + }, + { + "epoch": 0.06605606887607719, + "grad_norm": 759.3076782226562, + "learning_rate": 6.540000000000001e-06, + "loss": 32.5255, + "step": 32700 + }, + { + "epoch": 0.06607626950876101, + "grad_norm": 131.973388671875, + "learning_rate": 6.542e-06, + "loss": 29.4312, + "step": 32710 + }, + { + "epoch": 0.06609647014144483, + "grad_norm": 171.9645233154297, + "learning_rate": 6.544e-06, + "loss": 19.8939, + "step": 32720 + }, + { + "epoch": 0.06611667077412865, + "grad_norm": 578.665283203125, + "learning_rate": 6.5460000000000005e-06, + "loss": 29.4161, + "step": 32730 + }, + { + "epoch": 0.06613687140681246, + "grad_norm": 300.7922668457031, + "learning_rate": 6.548000000000001e-06, + "loss": 18.9477, + "step": 32740 + }, + { + "epoch": 0.06615707203949628, + "grad_norm": 429.305419921875, + "learning_rate": 6.550000000000001e-06, + "loss": 27.5192, + "step": 32750 + }, + { + "epoch": 0.0661772726721801, + "grad_norm": 475.9083251953125, + "learning_rate": 6.552000000000001e-06, + "loss": 29.6226, + "step": 32760 + }, + { + "epoch": 0.0661974733048639, + "grad_norm": 243.16636657714844, + "learning_rate": 6.554e-06, + "loss": 29.2542, + "step": 32770 + }, + { + "epoch": 0.06621767393754772, + "grad_norm": 330.93707275390625, + "learning_rate": 6.556e-06, + "loss": 25.9672, + "step": 32780 + }, + { + "epoch": 0.06623787457023154, + "grad_norm": 633.3106079101562, + "learning_rate": 6.558000000000001e-06, + "loss": 16.0552, + "step": 32790 + }, + { + "epoch": 0.06625807520291535, + "grad_norm": 676.6880493164062, + "learning_rate": 6.560000000000001e-06, + "loss": 25.8537, + "step": 32800 + }, + { + "epoch": 0.06627827583559917, + "grad_norm": 398.6380310058594, + "learning_rate": 6.562000000000001e-06, + "loss": 23.9169, + "step": 32810 + }, + { + "epoch": 0.06629847646828299, + "grad_norm": 1161.01171875, + "learning_rate": 6.564e-06, + "loss": 26.0018, + "step": 32820 + }, + { + "epoch": 0.0663186771009668, + "grad_norm": 528.7507934570312, + "learning_rate": 6.566e-06, + "loss": 33.0478, + "step": 32830 + }, + { + "epoch": 0.06633887773365062, + "grad_norm": 517.101806640625, + "learning_rate": 6.568000000000001e-06, + "loss": 16.322, + "step": 32840 + }, + { + "epoch": 0.06635907836633444, + "grad_norm": 289.5611572265625, + "learning_rate": 6.570000000000001e-06, + "loss": 16.0017, + "step": 32850 + }, + { + "epoch": 0.06637927899901824, + "grad_norm": 819.7901611328125, + "learning_rate": 6.572000000000001e-06, + "loss": 18.3419, + "step": 32860 + }, + { + "epoch": 0.06639947963170206, + "grad_norm": 297.51336669921875, + "learning_rate": 6.574e-06, + "loss": 30.9874, + "step": 32870 + }, + { + "epoch": 0.06641968026438588, + "grad_norm": 402.6764221191406, + "learning_rate": 6.576e-06, + "loss": 19.8634, + "step": 32880 + }, + { + "epoch": 0.0664398808970697, + "grad_norm": 232.97459411621094, + "learning_rate": 6.578000000000001e-06, + "loss": 22.6124, + "step": 32890 + }, + { + "epoch": 0.06646008152975351, + "grad_norm": 69.75105285644531, + "learning_rate": 6.5800000000000005e-06, + "loss": 17.1709, + "step": 32900 + }, + { + "epoch": 0.06648028216243733, + "grad_norm": 387.6180725097656, + "learning_rate": 6.582000000000001e-06, + "loss": 19.689, + "step": 32910 + }, + { + "epoch": 0.06650048279512115, + "grad_norm": 84.04486083984375, + "learning_rate": 6.584e-06, + "loss": 20.6682, + "step": 32920 + }, + { + "epoch": 0.06652068342780496, + "grad_norm": 775.0733642578125, + "learning_rate": 6.586e-06, + "loss": 23.5076, + "step": 32930 + }, + { + "epoch": 0.06654088406048878, + "grad_norm": 890.520751953125, + "learning_rate": 6.588000000000001e-06, + "loss": 29.0297, + "step": 32940 + }, + { + "epoch": 0.0665610846931726, + "grad_norm": 527.3336791992188, + "learning_rate": 6.5900000000000004e-06, + "loss": 14.0379, + "step": 32950 + }, + { + "epoch": 0.0665812853258564, + "grad_norm": 458.6322326660156, + "learning_rate": 6.592000000000001e-06, + "loss": 19.9573, + "step": 32960 + }, + { + "epoch": 0.06660148595854022, + "grad_norm": 520.3087158203125, + "learning_rate": 6.594000000000001e-06, + "loss": 25.0068, + "step": 32970 + }, + { + "epoch": 0.06662168659122404, + "grad_norm": 451.0412902832031, + "learning_rate": 6.596e-06, + "loss": 19.663, + "step": 32980 + }, + { + "epoch": 0.06664188722390785, + "grad_norm": 375.05084228515625, + "learning_rate": 6.598000000000001e-06, + "loss": 19.3326, + "step": 32990 + }, + { + "epoch": 0.06666208785659167, + "grad_norm": 502.0182189941406, + "learning_rate": 6.600000000000001e-06, + "loss": 33.7013, + "step": 33000 + }, + { + "epoch": 0.06668228848927549, + "grad_norm": 208.86871337890625, + "learning_rate": 6.6020000000000005e-06, + "loss": 22.5661, + "step": 33010 + }, + { + "epoch": 0.0667024891219593, + "grad_norm": 312.8105163574219, + "learning_rate": 6.604000000000001e-06, + "loss": 18.6504, + "step": 33020 + }, + { + "epoch": 0.06672268975464311, + "grad_norm": 385.7571716308594, + "learning_rate": 6.606e-06, + "loss": 18.8472, + "step": 33030 + }, + { + "epoch": 0.06674289038732693, + "grad_norm": 104.6175765991211, + "learning_rate": 6.608000000000001e-06, + "loss": 42.2416, + "step": 33040 + }, + { + "epoch": 0.06676309102001075, + "grad_norm": 884.9609375, + "learning_rate": 6.610000000000001e-06, + "loss": 31.9267, + "step": 33050 + }, + { + "epoch": 0.06678329165269456, + "grad_norm": 300.7400817871094, + "learning_rate": 6.612e-06, + "loss": 14.5247, + "step": 33060 + }, + { + "epoch": 0.06680349228537838, + "grad_norm": 644.3165283203125, + "learning_rate": 6.6140000000000005e-06, + "loss": 17.6598, + "step": 33070 + }, + { + "epoch": 0.0668236929180622, + "grad_norm": 379.55096435546875, + "learning_rate": 6.616e-06, + "loss": 22.8861, + "step": 33080 + }, + { + "epoch": 0.066843893550746, + "grad_norm": 1268.85888671875, + "learning_rate": 6.618000000000001e-06, + "loss": 33.9961, + "step": 33090 + }, + { + "epoch": 0.06686409418342983, + "grad_norm": 794.061767578125, + "learning_rate": 6.620000000000001e-06, + "loss": 17.8204, + "step": 33100 + }, + { + "epoch": 0.06688429481611365, + "grad_norm": 170.29322814941406, + "learning_rate": 6.622e-06, + "loss": 33.9078, + "step": 33110 + }, + { + "epoch": 0.06690449544879745, + "grad_norm": 742.6322021484375, + "learning_rate": 6.6240000000000004e-06, + "loss": 17.3571, + "step": 33120 + }, + { + "epoch": 0.06692469608148127, + "grad_norm": 141.3765869140625, + "learning_rate": 6.626000000000001e-06, + "loss": 28.4878, + "step": 33130 + }, + { + "epoch": 0.06694489671416509, + "grad_norm": 505.18768310546875, + "learning_rate": 6.628e-06, + "loss": 23.0358, + "step": 33140 + }, + { + "epoch": 0.0669650973468489, + "grad_norm": 922.85009765625, + "learning_rate": 6.630000000000001e-06, + "loss": 37.6132, + "step": 33150 + }, + { + "epoch": 0.06698529797953272, + "grad_norm": 694.0914916992188, + "learning_rate": 6.632000000000001e-06, + "loss": 21.8531, + "step": 33160 + }, + { + "epoch": 0.06700549861221654, + "grad_norm": 326.4519958496094, + "learning_rate": 6.634e-06, + "loss": 26.4938, + "step": 33170 + }, + { + "epoch": 0.06702569924490034, + "grad_norm": 842.4217529296875, + "learning_rate": 6.6360000000000005e-06, + "loss": 38.9571, + "step": 33180 + }, + { + "epoch": 0.06704589987758416, + "grad_norm": 392.56304931640625, + "learning_rate": 6.638e-06, + "loss": 47.6006, + "step": 33190 + }, + { + "epoch": 0.06706610051026798, + "grad_norm": 550.9373779296875, + "learning_rate": 6.640000000000001e-06, + "loss": 23.8491, + "step": 33200 + }, + { + "epoch": 0.0670863011429518, + "grad_norm": 589.4515991210938, + "learning_rate": 6.642000000000001e-06, + "loss": 32.0475, + "step": 33210 + }, + { + "epoch": 0.06710650177563561, + "grad_norm": 214.31448364257812, + "learning_rate": 6.644e-06, + "loss": 24.7882, + "step": 33220 + }, + { + "epoch": 0.06712670240831943, + "grad_norm": 458.9374694824219, + "learning_rate": 6.646e-06, + "loss": 29.4549, + "step": 33230 + }, + { + "epoch": 0.06714690304100325, + "grad_norm": 716.244140625, + "learning_rate": 6.648e-06, + "loss": 29.5145, + "step": 33240 + }, + { + "epoch": 0.06716710367368706, + "grad_norm": 407.79412841796875, + "learning_rate": 6.650000000000001e-06, + "loss": 31.9646, + "step": 33250 + }, + { + "epoch": 0.06718730430637088, + "grad_norm": 821.392333984375, + "learning_rate": 6.652000000000001e-06, + "loss": 27.7069, + "step": 33260 + }, + { + "epoch": 0.0672075049390547, + "grad_norm": 598.4210205078125, + "learning_rate": 6.654e-06, + "loss": 28.4975, + "step": 33270 + }, + { + "epoch": 0.0672277055717385, + "grad_norm": 488.873046875, + "learning_rate": 6.656e-06, + "loss": 45.5605, + "step": 33280 + }, + { + "epoch": 0.06724790620442232, + "grad_norm": 1378.087646484375, + "learning_rate": 6.658e-06, + "loss": 27.3599, + "step": 33290 + }, + { + "epoch": 0.06726810683710614, + "grad_norm": 251.3562774658203, + "learning_rate": 6.660000000000001e-06, + "loss": 17.7338, + "step": 33300 + }, + { + "epoch": 0.06728830746978995, + "grad_norm": 486.3319091796875, + "learning_rate": 6.662000000000001e-06, + "loss": 15.1403, + "step": 33310 + }, + { + "epoch": 0.06730850810247377, + "grad_norm": 283.7904052734375, + "learning_rate": 6.664e-06, + "loss": 26.435, + "step": 33320 + }, + { + "epoch": 0.06732870873515759, + "grad_norm": 448.2616882324219, + "learning_rate": 6.666e-06, + "loss": 26.329, + "step": 33330 + }, + { + "epoch": 0.0673489093678414, + "grad_norm": 294.36798095703125, + "learning_rate": 6.668e-06, + "loss": 27.094, + "step": 33340 + }, + { + "epoch": 0.06736911000052521, + "grad_norm": 496.7976989746094, + "learning_rate": 6.6700000000000005e-06, + "loss": 18.7006, + "step": 33350 + }, + { + "epoch": 0.06738931063320903, + "grad_norm": 505.17828369140625, + "learning_rate": 6.672000000000001e-06, + "loss": 22.3869, + "step": 33360 + }, + { + "epoch": 0.06740951126589285, + "grad_norm": 1472.1234130859375, + "learning_rate": 6.674000000000001e-06, + "loss": 20.5633, + "step": 33370 + }, + { + "epoch": 0.06742971189857666, + "grad_norm": 226.0415802001953, + "learning_rate": 6.676e-06, + "loss": 33.7317, + "step": 33380 + }, + { + "epoch": 0.06744991253126048, + "grad_norm": 208.1175079345703, + "learning_rate": 6.678e-06, + "loss": 26.1456, + "step": 33390 + }, + { + "epoch": 0.0674701131639443, + "grad_norm": 288.8869323730469, + "learning_rate": 6.680000000000001e-06, + "loss": 23.2932, + "step": 33400 + }, + { + "epoch": 0.0674903137966281, + "grad_norm": 67.0960922241211, + "learning_rate": 6.6820000000000006e-06, + "loss": 26.3463, + "step": 33410 + }, + { + "epoch": 0.06751051442931193, + "grad_norm": 340.869140625, + "learning_rate": 6.684000000000001e-06, + "loss": 21.618, + "step": 33420 + }, + { + "epoch": 0.06753071506199575, + "grad_norm": 330.3170471191406, + "learning_rate": 6.686e-06, + "loss": 28.7773, + "step": 33430 + }, + { + "epoch": 0.06755091569467955, + "grad_norm": 529.8251953125, + "learning_rate": 6.688e-06, + "loss": 21.6966, + "step": 33440 + }, + { + "epoch": 0.06757111632736337, + "grad_norm": 46.65123748779297, + "learning_rate": 6.690000000000001e-06, + "loss": 31.1109, + "step": 33450 + }, + { + "epoch": 0.06759131696004719, + "grad_norm": 809.0994873046875, + "learning_rate": 6.6920000000000005e-06, + "loss": 24.3731, + "step": 33460 + }, + { + "epoch": 0.067611517592731, + "grad_norm": 439.84765625, + "learning_rate": 6.694000000000001e-06, + "loss": 23.559, + "step": 33470 + }, + { + "epoch": 0.06763171822541482, + "grad_norm": 334.27618408203125, + "learning_rate": 6.696e-06, + "loss": 23.3025, + "step": 33480 + }, + { + "epoch": 0.06765191885809864, + "grad_norm": 465.69964599609375, + "learning_rate": 6.698e-06, + "loss": 25.1898, + "step": 33490 + }, + { + "epoch": 0.06767211949078245, + "grad_norm": 409.4376525878906, + "learning_rate": 6.700000000000001e-06, + "loss": 18.741, + "step": 33500 + }, + { + "epoch": 0.06769232012346627, + "grad_norm": 1168.0740966796875, + "learning_rate": 6.702e-06, + "loss": 29.5806, + "step": 33510 + }, + { + "epoch": 0.06771252075615009, + "grad_norm": 647.8543701171875, + "learning_rate": 6.7040000000000005e-06, + "loss": 17.401, + "step": 33520 + }, + { + "epoch": 0.06773272138883389, + "grad_norm": 760.6053466796875, + "learning_rate": 6.706000000000001e-06, + "loss": 23.0411, + "step": 33530 + }, + { + "epoch": 0.06775292202151771, + "grad_norm": 522.3080444335938, + "learning_rate": 6.708e-06, + "loss": 21.4989, + "step": 33540 + }, + { + "epoch": 0.06777312265420153, + "grad_norm": 568.3932495117188, + "learning_rate": 6.710000000000001e-06, + "loss": 28.6894, + "step": 33550 + }, + { + "epoch": 0.06779332328688535, + "grad_norm": 320.2074279785156, + "learning_rate": 6.712000000000001e-06, + "loss": 32.6937, + "step": 33560 + }, + { + "epoch": 0.06781352391956916, + "grad_norm": 511.3542785644531, + "learning_rate": 6.7140000000000004e-06, + "loss": 22.9541, + "step": 33570 + }, + { + "epoch": 0.06783372455225298, + "grad_norm": 508.94610595703125, + "learning_rate": 6.716000000000001e-06, + "loss": 32.7813, + "step": 33580 + }, + { + "epoch": 0.0678539251849368, + "grad_norm": 158.2616729736328, + "learning_rate": 6.718e-06, + "loss": 29.6369, + "step": 33590 + }, + { + "epoch": 0.0678741258176206, + "grad_norm": 801.9617309570312, + "learning_rate": 6.720000000000001e-06, + "loss": 26.601, + "step": 33600 + }, + { + "epoch": 0.06789432645030442, + "grad_norm": 248.82423400878906, + "learning_rate": 6.722000000000001e-06, + "loss": 58.0029, + "step": 33610 + }, + { + "epoch": 0.06791452708298824, + "grad_norm": 1176.00341796875, + "learning_rate": 6.724e-06, + "loss": 37.8949, + "step": 33620 + }, + { + "epoch": 0.06793472771567205, + "grad_norm": 1805.271728515625, + "learning_rate": 6.7260000000000005e-06, + "loss": 34.2037, + "step": 33630 + }, + { + "epoch": 0.06795492834835587, + "grad_norm": 403.9906311035156, + "learning_rate": 6.728e-06, + "loss": 22.5022, + "step": 33640 + }, + { + "epoch": 0.06797512898103969, + "grad_norm": 635.4937133789062, + "learning_rate": 6.730000000000001e-06, + "loss": 23.3953, + "step": 33650 + }, + { + "epoch": 0.0679953296137235, + "grad_norm": 622.2322998046875, + "learning_rate": 6.732000000000001e-06, + "loss": 23.7058, + "step": 33660 + }, + { + "epoch": 0.06801553024640732, + "grad_norm": 362.6100158691406, + "learning_rate": 6.734e-06, + "loss": 17.6944, + "step": 33670 + }, + { + "epoch": 0.06803573087909114, + "grad_norm": 689.5000610351562, + "learning_rate": 6.736e-06, + "loss": 43.9024, + "step": 33680 + }, + { + "epoch": 0.06805593151177494, + "grad_norm": 245.59693908691406, + "learning_rate": 6.738e-06, + "loss": 24.8223, + "step": 33690 + }, + { + "epoch": 0.06807613214445876, + "grad_norm": 1100.160400390625, + "learning_rate": 6.740000000000001e-06, + "loss": 30.857, + "step": 33700 + }, + { + "epoch": 0.06809633277714258, + "grad_norm": 413.7121276855469, + "learning_rate": 6.742000000000001e-06, + "loss": 20.6621, + "step": 33710 + }, + { + "epoch": 0.0681165334098264, + "grad_norm": 1544.26904296875, + "learning_rate": 6.744e-06, + "loss": 41.2719, + "step": 33720 + }, + { + "epoch": 0.06813673404251021, + "grad_norm": 639.9199829101562, + "learning_rate": 6.746e-06, + "loss": 43.955, + "step": 33730 + }, + { + "epoch": 0.06815693467519403, + "grad_norm": 280.11669921875, + "learning_rate": 6.7480000000000004e-06, + "loss": 27.9973, + "step": 33740 + }, + { + "epoch": 0.06817713530787785, + "grad_norm": 436.47222900390625, + "learning_rate": 6.750000000000001e-06, + "loss": 19.7929, + "step": 33750 + }, + { + "epoch": 0.06819733594056165, + "grad_norm": 411.2103576660156, + "learning_rate": 6.752000000000001e-06, + "loss": 31.768, + "step": 33760 + }, + { + "epoch": 0.06821753657324547, + "grad_norm": 815.0191650390625, + "learning_rate": 6.754000000000001e-06, + "loss": 22.2022, + "step": 33770 + }, + { + "epoch": 0.0682377372059293, + "grad_norm": 494.87969970703125, + "learning_rate": 6.756e-06, + "loss": 22.3034, + "step": 33780 + }, + { + "epoch": 0.0682579378386131, + "grad_norm": 394.14605712890625, + "learning_rate": 6.758e-06, + "loss": 32.416, + "step": 33790 + }, + { + "epoch": 0.06827813847129692, + "grad_norm": 46.48622512817383, + "learning_rate": 6.760000000000001e-06, + "loss": 31.4422, + "step": 33800 + }, + { + "epoch": 0.06829833910398074, + "grad_norm": 711.196533203125, + "learning_rate": 6.762000000000001e-06, + "loss": 27.2625, + "step": 33810 + }, + { + "epoch": 0.06831853973666455, + "grad_norm": 304.4548034667969, + "learning_rate": 6.764000000000001e-06, + "loss": 27.4918, + "step": 33820 + }, + { + "epoch": 0.06833874036934837, + "grad_norm": 586.8805541992188, + "learning_rate": 6.766e-06, + "loss": 25.6049, + "step": 33830 + }, + { + "epoch": 0.06835894100203219, + "grad_norm": 508.9487609863281, + "learning_rate": 6.768e-06, + "loss": 31.5067, + "step": 33840 + }, + { + "epoch": 0.06837914163471599, + "grad_norm": 1170.845703125, + "learning_rate": 6.770000000000001e-06, + "loss": 39.1824, + "step": 33850 + }, + { + "epoch": 0.06839934226739981, + "grad_norm": 595.26904296875, + "learning_rate": 6.7720000000000006e-06, + "loss": 17.4453, + "step": 33860 + }, + { + "epoch": 0.06841954290008363, + "grad_norm": 496.1965026855469, + "learning_rate": 6.774000000000001e-06, + "loss": 33.8451, + "step": 33870 + }, + { + "epoch": 0.06843974353276745, + "grad_norm": 356.40869140625, + "learning_rate": 6.776e-06, + "loss": 31.5227, + "step": 33880 + }, + { + "epoch": 0.06845994416545126, + "grad_norm": 473.4547119140625, + "learning_rate": 6.778e-06, + "loss": 19.9006, + "step": 33890 + }, + { + "epoch": 0.06848014479813508, + "grad_norm": 346.06463623046875, + "learning_rate": 6.780000000000001e-06, + "loss": 19.9775, + "step": 33900 + }, + { + "epoch": 0.0685003454308189, + "grad_norm": 676.5089111328125, + "learning_rate": 6.7820000000000005e-06, + "loss": 43.2861, + "step": 33910 + }, + { + "epoch": 0.0685205460635027, + "grad_norm": 493.92626953125, + "learning_rate": 6.784000000000001e-06, + "loss": 24.8858, + "step": 33920 + }, + { + "epoch": 0.06854074669618652, + "grad_norm": 289.1258850097656, + "learning_rate": 6.786000000000001e-06, + "loss": 34.1276, + "step": 33930 + }, + { + "epoch": 0.06856094732887034, + "grad_norm": 1374.667236328125, + "learning_rate": 6.788e-06, + "loss": 36.3009, + "step": 33940 + }, + { + "epoch": 0.06858114796155415, + "grad_norm": 400.775146484375, + "learning_rate": 6.790000000000001e-06, + "loss": 21.4908, + "step": 33950 + }, + { + "epoch": 0.06860134859423797, + "grad_norm": 117.53766632080078, + "learning_rate": 6.792000000000001e-06, + "loss": 29.6389, + "step": 33960 + }, + { + "epoch": 0.06862154922692179, + "grad_norm": 472.3504638671875, + "learning_rate": 6.7940000000000005e-06, + "loss": 31.6954, + "step": 33970 + }, + { + "epoch": 0.0686417498596056, + "grad_norm": 478.7676086425781, + "learning_rate": 6.796000000000001e-06, + "loss": 36.4441, + "step": 33980 + }, + { + "epoch": 0.06866195049228942, + "grad_norm": 0.0, + "learning_rate": 6.798e-06, + "loss": 76.5987, + "step": 33990 + }, + { + "epoch": 0.06868215112497324, + "grad_norm": 302.47833251953125, + "learning_rate": 6.800000000000001e-06, + "loss": 22.5997, + "step": 34000 + }, + { + "epoch": 0.06870235175765704, + "grad_norm": 469.24676513671875, + "learning_rate": 6.802000000000001e-06, + "loss": 26.1139, + "step": 34010 + }, + { + "epoch": 0.06872255239034086, + "grad_norm": 500.22796630859375, + "learning_rate": 6.804e-06, + "loss": 26.9558, + "step": 34020 + }, + { + "epoch": 0.06874275302302468, + "grad_norm": 539.3820190429688, + "learning_rate": 6.8060000000000006e-06, + "loss": 28.284, + "step": 34030 + }, + { + "epoch": 0.0687629536557085, + "grad_norm": 205.44790649414062, + "learning_rate": 6.808e-06, + "loss": 24.4893, + "step": 34040 + }, + { + "epoch": 0.06878315428839231, + "grad_norm": 1270.7779541015625, + "learning_rate": 6.810000000000001e-06, + "loss": 28.4215, + "step": 34050 + }, + { + "epoch": 0.06880335492107613, + "grad_norm": 177.77996826171875, + "learning_rate": 6.812000000000001e-06, + "loss": 28.7874, + "step": 34060 + }, + { + "epoch": 0.06882355555375995, + "grad_norm": 363.94549560546875, + "learning_rate": 6.814e-06, + "loss": 27.8043, + "step": 34070 + }, + { + "epoch": 0.06884375618644376, + "grad_norm": 990.454833984375, + "learning_rate": 6.8160000000000005e-06, + "loss": 31.9178, + "step": 34080 + }, + { + "epoch": 0.06886395681912758, + "grad_norm": 265.3861999511719, + "learning_rate": 6.818e-06, + "loss": 21.8929, + "step": 34090 + }, + { + "epoch": 0.0688841574518114, + "grad_norm": 8.25837516784668, + "learning_rate": 6.820000000000001e-06, + "loss": 39.5831, + "step": 34100 + }, + { + "epoch": 0.0689043580844952, + "grad_norm": 376.444580078125, + "learning_rate": 6.822000000000001e-06, + "loss": 24.0087, + "step": 34110 + }, + { + "epoch": 0.06892455871717902, + "grad_norm": 737.147216796875, + "learning_rate": 6.824e-06, + "loss": 25.5048, + "step": 34120 + }, + { + "epoch": 0.06894475934986284, + "grad_norm": 423.600341796875, + "learning_rate": 6.826e-06, + "loss": 13.3663, + "step": 34130 + }, + { + "epoch": 0.06896495998254665, + "grad_norm": 554.6629028320312, + "learning_rate": 6.8280000000000005e-06, + "loss": 32.9468, + "step": 34140 + }, + { + "epoch": 0.06898516061523047, + "grad_norm": 894.8736572265625, + "learning_rate": 6.830000000000001e-06, + "loss": 16.0512, + "step": 34150 + }, + { + "epoch": 0.06900536124791429, + "grad_norm": 596.2340087890625, + "learning_rate": 6.832000000000001e-06, + "loss": 20.5294, + "step": 34160 + }, + { + "epoch": 0.0690255618805981, + "grad_norm": 782.9882202148438, + "learning_rate": 6.834000000000001e-06, + "loss": 21.4276, + "step": 34170 + }, + { + "epoch": 0.06904576251328191, + "grad_norm": 761.8718872070312, + "learning_rate": 6.836e-06, + "loss": 22.4803, + "step": 34180 + }, + { + "epoch": 0.06906596314596573, + "grad_norm": 400.63726806640625, + "learning_rate": 6.8380000000000004e-06, + "loss": 26.6845, + "step": 34190 + }, + { + "epoch": 0.06908616377864955, + "grad_norm": 430.4891357421875, + "learning_rate": 6.8400000000000014e-06, + "loss": 32.4808, + "step": 34200 + }, + { + "epoch": 0.06910636441133336, + "grad_norm": 244.76751708984375, + "learning_rate": 6.842000000000001e-06, + "loss": 32.8935, + "step": 34210 + }, + { + "epoch": 0.06912656504401718, + "grad_norm": 432.182861328125, + "learning_rate": 6.844000000000001e-06, + "loss": 32.0066, + "step": 34220 + }, + { + "epoch": 0.069146765676701, + "grad_norm": 583.138671875, + "learning_rate": 6.846e-06, + "loss": 18.7571, + "step": 34230 + }, + { + "epoch": 0.0691669663093848, + "grad_norm": 120.03465270996094, + "learning_rate": 6.848e-06, + "loss": 18.3551, + "step": 34240 + }, + { + "epoch": 0.06918716694206863, + "grad_norm": 786.241943359375, + "learning_rate": 6.850000000000001e-06, + "loss": 28.8558, + "step": 34250 + }, + { + "epoch": 0.06920736757475245, + "grad_norm": 479.44879150390625, + "learning_rate": 6.852000000000001e-06, + "loss": 28.4533, + "step": 34260 + }, + { + "epoch": 0.06922756820743625, + "grad_norm": 770.8941040039062, + "learning_rate": 6.854000000000001e-06, + "loss": 28.8545, + "step": 34270 + }, + { + "epoch": 0.06924776884012007, + "grad_norm": 744.919921875, + "learning_rate": 6.856e-06, + "loss": 29.4833, + "step": 34280 + }, + { + "epoch": 0.06926796947280389, + "grad_norm": 441.5009460449219, + "learning_rate": 6.858e-06, + "loss": 32.4952, + "step": 34290 + }, + { + "epoch": 0.0692881701054877, + "grad_norm": 2179.65625, + "learning_rate": 6.860000000000001e-06, + "loss": 24.934, + "step": 34300 + }, + { + "epoch": 0.06930837073817152, + "grad_norm": 671.8490600585938, + "learning_rate": 6.8620000000000005e-06, + "loss": 25.6753, + "step": 34310 + }, + { + "epoch": 0.06932857137085534, + "grad_norm": 768.4308471679688, + "learning_rate": 6.864000000000001e-06, + "loss": 24.2447, + "step": 34320 + }, + { + "epoch": 0.06934877200353914, + "grad_norm": 901.4434814453125, + "learning_rate": 6.866000000000001e-06, + "loss": 44.3051, + "step": 34330 + }, + { + "epoch": 0.06936897263622296, + "grad_norm": 268.15606689453125, + "learning_rate": 6.868e-06, + "loss": 16.9973, + "step": 34340 + }, + { + "epoch": 0.06938917326890678, + "grad_norm": 617.8140869140625, + "learning_rate": 6.870000000000001e-06, + "loss": 21.9639, + "step": 34350 + }, + { + "epoch": 0.0694093739015906, + "grad_norm": 1412.798095703125, + "learning_rate": 6.872000000000001e-06, + "loss": 40.11, + "step": 34360 + }, + { + "epoch": 0.06942957453427441, + "grad_norm": 1573.531494140625, + "learning_rate": 6.874000000000001e-06, + "loss": 35.3232, + "step": 34370 + }, + { + "epoch": 0.06944977516695823, + "grad_norm": 717.170654296875, + "learning_rate": 6.876000000000001e-06, + "loss": 32.4138, + "step": 34380 + }, + { + "epoch": 0.06946997579964205, + "grad_norm": 529.8059692382812, + "learning_rate": 6.878e-06, + "loss": 30.7321, + "step": 34390 + }, + { + "epoch": 0.06949017643232586, + "grad_norm": 435.5838623046875, + "learning_rate": 6.88e-06, + "loss": 17.2373, + "step": 34400 + }, + { + "epoch": 0.06951037706500968, + "grad_norm": 550.3357543945312, + "learning_rate": 6.882000000000001e-06, + "loss": 28.1552, + "step": 34410 + }, + { + "epoch": 0.0695305776976935, + "grad_norm": 620.171875, + "learning_rate": 6.8840000000000005e-06, + "loss": 17.8972, + "step": 34420 + }, + { + "epoch": 0.0695507783303773, + "grad_norm": 333.9394836425781, + "learning_rate": 6.886000000000001e-06, + "loss": 25.1844, + "step": 34430 + }, + { + "epoch": 0.06957097896306112, + "grad_norm": 400.9455261230469, + "learning_rate": 6.888e-06, + "loss": 35.5136, + "step": 34440 + }, + { + "epoch": 0.06959117959574494, + "grad_norm": 664.9127807617188, + "learning_rate": 6.89e-06, + "loss": 35.7593, + "step": 34450 + }, + { + "epoch": 0.06961138022842875, + "grad_norm": 761.8721923828125, + "learning_rate": 6.892000000000001e-06, + "loss": 26.314, + "step": 34460 + }, + { + "epoch": 0.06963158086111257, + "grad_norm": 1453.0860595703125, + "learning_rate": 6.894e-06, + "loss": 26.4827, + "step": 34470 + }, + { + "epoch": 0.06965178149379639, + "grad_norm": 151.14361572265625, + "learning_rate": 6.8960000000000006e-06, + "loss": 15.2546, + "step": 34480 + }, + { + "epoch": 0.0696719821264802, + "grad_norm": 337.78021240234375, + "learning_rate": 6.898e-06, + "loss": 35.3916, + "step": 34490 + }, + { + "epoch": 0.06969218275916401, + "grad_norm": 181.8362274169922, + "learning_rate": 6.9e-06, + "loss": 19.3162, + "step": 34500 + }, + { + "epoch": 0.06971238339184783, + "grad_norm": 643.0160522460938, + "learning_rate": 6.902000000000001e-06, + "loss": 15.7593, + "step": 34510 + }, + { + "epoch": 0.06973258402453165, + "grad_norm": 287.9972229003906, + "learning_rate": 6.904e-06, + "loss": 13.9202, + "step": 34520 + }, + { + "epoch": 0.06975278465721546, + "grad_norm": 454.93878173828125, + "learning_rate": 6.9060000000000005e-06, + "loss": 20.3274, + "step": 34530 + }, + { + "epoch": 0.06977298528989928, + "grad_norm": 279.34271240234375, + "learning_rate": 6.908000000000001e-06, + "loss": 25.0116, + "step": 34540 + }, + { + "epoch": 0.0697931859225831, + "grad_norm": 1011.8807373046875, + "learning_rate": 6.91e-06, + "loss": 27.7776, + "step": 34550 + }, + { + "epoch": 0.06981338655526691, + "grad_norm": 447.10052490234375, + "learning_rate": 6.912000000000001e-06, + "loss": 40.0177, + "step": 34560 + }, + { + "epoch": 0.06983358718795073, + "grad_norm": 399.48968505859375, + "learning_rate": 6.914000000000001e-06, + "loss": 41.0893, + "step": 34570 + }, + { + "epoch": 0.06985378782063455, + "grad_norm": 556.7623291015625, + "learning_rate": 6.916e-06, + "loss": 23.7829, + "step": 34580 + }, + { + "epoch": 0.06987398845331835, + "grad_norm": 629.1818237304688, + "learning_rate": 6.9180000000000005e-06, + "loss": 25.9176, + "step": 34590 + }, + { + "epoch": 0.06989418908600217, + "grad_norm": 825.7970581054688, + "learning_rate": 6.92e-06, + "loss": 33.1165, + "step": 34600 + }, + { + "epoch": 0.06991438971868599, + "grad_norm": 309.82586669921875, + "learning_rate": 6.922000000000001e-06, + "loss": 25.6488, + "step": 34610 + }, + { + "epoch": 0.0699345903513698, + "grad_norm": 297.77362060546875, + "learning_rate": 6.924000000000001e-06, + "loss": 30.7852, + "step": 34620 + }, + { + "epoch": 0.06995479098405362, + "grad_norm": 648.6033325195312, + "learning_rate": 6.926e-06, + "loss": 31.115, + "step": 34630 + }, + { + "epoch": 0.06997499161673744, + "grad_norm": 41.54879379272461, + "learning_rate": 6.928e-06, + "loss": 25.9116, + "step": 34640 + }, + { + "epoch": 0.06999519224942125, + "grad_norm": 231.39988708496094, + "learning_rate": 6.93e-06, + "loss": 26.1055, + "step": 34650 + }, + { + "epoch": 0.07001539288210507, + "grad_norm": 141.19129943847656, + "learning_rate": 6.932000000000001e-06, + "loss": 14.3568, + "step": 34660 + }, + { + "epoch": 0.07003559351478889, + "grad_norm": 491.8395080566406, + "learning_rate": 6.934000000000001e-06, + "loss": 30.6901, + "step": 34670 + }, + { + "epoch": 0.0700557941474727, + "grad_norm": 413.2252502441406, + "learning_rate": 6.936e-06, + "loss": 28.1796, + "step": 34680 + }, + { + "epoch": 0.07007599478015651, + "grad_norm": 494.89678955078125, + "learning_rate": 6.938e-06, + "loss": 30.1756, + "step": 34690 + }, + { + "epoch": 0.07009619541284033, + "grad_norm": 344.2539367675781, + "learning_rate": 6.9400000000000005e-06, + "loss": 25.5514, + "step": 34700 + }, + { + "epoch": 0.07011639604552415, + "grad_norm": 1064.0335693359375, + "learning_rate": 6.942000000000001e-06, + "loss": 42.9528, + "step": 34710 + }, + { + "epoch": 0.07013659667820796, + "grad_norm": 535.0779418945312, + "learning_rate": 6.944000000000001e-06, + "loss": 16.0364, + "step": 34720 + }, + { + "epoch": 0.07015679731089178, + "grad_norm": 490.56683349609375, + "learning_rate": 6.946000000000001e-06, + "loss": 37.9951, + "step": 34730 + }, + { + "epoch": 0.0701769979435756, + "grad_norm": 1186.7283935546875, + "learning_rate": 6.948e-06, + "loss": 44.0284, + "step": 34740 + }, + { + "epoch": 0.0701971985762594, + "grad_norm": 765.0109252929688, + "learning_rate": 6.95e-06, + "loss": 37.0233, + "step": 34750 + }, + { + "epoch": 0.07021739920894322, + "grad_norm": 861.0193481445312, + "learning_rate": 6.952000000000001e-06, + "loss": 34.5704, + "step": 34760 + }, + { + "epoch": 0.07023759984162704, + "grad_norm": 221.90745544433594, + "learning_rate": 6.954000000000001e-06, + "loss": 20.0659, + "step": 34770 + }, + { + "epoch": 0.07025780047431085, + "grad_norm": 145.42893981933594, + "learning_rate": 6.956000000000001e-06, + "loss": 24.7001, + "step": 34780 + }, + { + "epoch": 0.07027800110699467, + "grad_norm": 274.7716979980469, + "learning_rate": 6.958e-06, + "loss": 17.1338, + "step": 34790 + }, + { + "epoch": 0.07029820173967849, + "grad_norm": 202.82875061035156, + "learning_rate": 6.96e-06, + "loss": 13.7413, + "step": 34800 + }, + { + "epoch": 0.0703184023723623, + "grad_norm": 153.72035217285156, + "learning_rate": 6.962000000000001e-06, + "loss": 41.7966, + "step": 34810 + }, + { + "epoch": 0.07033860300504612, + "grad_norm": 124.36219787597656, + "learning_rate": 6.964000000000001e-06, + "loss": 31.4384, + "step": 34820 + }, + { + "epoch": 0.07035880363772994, + "grad_norm": 485.8753356933594, + "learning_rate": 6.966000000000001e-06, + "loss": 34.8664, + "step": 34830 + }, + { + "epoch": 0.07037900427041376, + "grad_norm": 839.91552734375, + "learning_rate": 6.968e-06, + "loss": 38.5283, + "step": 34840 + }, + { + "epoch": 0.07039920490309756, + "grad_norm": 885.7486572265625, + "learning_rate": 6.97e-06, + "loss": 30.9444, + "step": 34850 + }, + { + "epoch": 0.07041940553578138, + "grad_norm": 479.7201232910156, + "learning_rate": 6.972000000000001e-06, + "loss": 22.7172, + "step": 34860 + }, + { + "epoch": 0.0704396061684652, + "grad_norm": 513.4949951171875, + "learning_rate": 6.9740000000000005e-06, + "loss": 17.2636, + "step": 34870 + }, + { + "epoch": 0.07045980680114901, + "grad_norm": 555.849609375, + "learning_rate": 6.976000000000001e-06, + "loss": 24.6814, + "step": 34880 + }, + { + "epoch": 0.07048000743383283, + "grad_norm": 395.6819152832031, + "learning_rate": 6.978e-06, + "loss": 14.5689, + "step": 34890 + }, + { + "epoch": 0.07050020806651665, + "grad_norm": 256.9186706542969, + "learning_rate": 6.98e-06, + "loss": 21.4932, + "step": 34900 + }, + { + "epoch": 0.07052040869920045, + "grad_norm": 403.16693115234375, + "learning_rate": 6.982000000000001e-06, + "loss": 33.3816, + "step": 34910 + }, + { + "epoch": 0.07054060933188427, + "grad_norm": 724.1705322265625, + "learning_rate": 6.984e-06, + "loss": 16.0022, + "step": 34920 + }, + { + "epoch": 0.0705608099645681, + "grad_norm": 353.1549987792969, + "learning_rate": 6.9860000000000005e-06, + "loss": 14.607, + "step": 34930 + }, + { + "epoch": 0.0705810105972519, + "grad_norm": 2520.662109375, + "learning_rate": 6.988000000000001e-06, + "loss": 54.9936, + "step": 34940 + }, + { + "epoch": 0.07060121122993572, + "grad_norm": 330.6929016113281, + "learning_rate": 6.99e-06, + "loss": 14.1727, + "step": 34950 + }, + { + "epoch": 0.07062141186261954, + "grad_norm": 504.8486022949219, + "learning_rate": 6.992000000000001e-06, + "loss": 38.3436, + "step": 34960 + }, + { + "epoch": 0.07064161249530335, + "grad_norm": 133.2992401123047, + "learning_rate": 6.994000000000001e-06, + "loss": 27.9458, + "step": 34970 + }, + { + "epoch": 0.07066181312798717, + "grad_norm": 201.37075805664062, + "learning_rate": 6.9960000000000004e-06, + "loss": 17.002, + "step": 34980 + }, + { + "epoch": 0.07068201376067099, + "grad_norm": 170.57139587402344, + "learning_rate": 6.998000000000001e-06, + "loss": 27.4595, + "step": 34990 + }, + { + "epoch": 0.0707022143933548, + "grad_norm": 151.04176330566406, + "learning_rate": 7e-06, + "loss": 24.4173, + "step": 35000 + }, + { + "epoch": 0.07072241502603861, + "grad_norm": 687.1515502929688, + "learning_rate": 7.002000000000001e-06, + "loss": 32.1208, + "step": 35010 + }, + { + "epoch": 0.07074261565872243, + "grad_norm": 1763.760986328125, + "learning_rate": 7.004000000000001e-06, + "loss": 38.3857, + "step": 35020 + }, + { + "epoch": 0.07076281629140625, + "grad_norm": 922.94287109375, + "learning_rate": 7.006e-06, + "loss": 24.6238, + "step": 35030 + }, + { + "epoch": 0.07078301692409006, + "grad_norm": 73.7891616821289, + "learning_rate": 7.0080000000000005e-06, + "loss": 15.4841, + "step": 35040 + }, + { + "epoch": 0.07080321755677388, + "grad_norm": 844.647216796875, + "learning_rate": 7.01e-06, + "loss": 31.8864, + "step": 35050 + }, + { + "epoch": 0.0708234181894577, + "grad_norm": 832.1209106445312, + "learning_rate": 7.012000000000001e-06, + "loss": 20.9632, + "step": 35060 + }, + { + "epoch": 0.0708436188221415, + "grad_norm": 629.44775390625, + "learning_rate": 7.014000000000001e-06, + "loss": 24.6583, + "step": 35070 + }, + { + "epoch": 0.07086381945482532, + "grad_norm": 291.5755310058594, + "learning_rate": 7.016e-06, + "loss": 23.2967, + "step": 35080 + }, + { + "epoch": 0.07088402008750914, + "grad_norm": 599.63916015625, + "learning_rate": 7.018e-06, + "loss": 34.7414, + "step": 35090 + }, + { + "epoch": 0.07090422072019295, + "grad_norm": 700.6422729492188, + "learning_rate": 7.0200000000000006e-06, + "loss": 53.2773, + "step": 35100 + }, + { + "epoch": 0.07092442135287677, + "grad_norm": 354.1465148925781, + "learning_rate": 7.022000000000001e-06, + "loss": 39.586, + "step": 35110 + }, + { + "epoch": 0.07094462198556059, + "grad_norm": 316.0738525390625, + "learning_rate": 7.024000000000001e-06, + "loss": 33.9372, + "step": 35120 + }, + { + "epoch": 0.0709648226182444, + "grad_norm": 215.59765625, + "learning_rate": 7.026000000000001e-06, + "loss": 18.8616, + "step": 35130 + }, + { + "epoch": 0.07098502325092822, + "grad_norm": 479.970947265625, + "learning_rate": 7.028e-06, + "loss": 21.9399, + "step": 35140 + }, + { + "epoch": 0.07100522388361204, + "grad_norm": 552.5803833007812, + "learning_rate": 7.0300000000000005e-06, + "loss": 26.6272, + "step": 35150 + }, + { + "epoch": 0.07102542451629586, + "grad_norm": 471.1194763183594, + "learning_rate": 7.0320000000000015e-06, + "loss": 30.7594, + "step": 35160 + }, + { + "epoch": 0.07104562514897966, + "grad_norm": 560.67431640625, + "learning_rate": 7.034000000000001e-06, + "loss": 16.5104, + "step": 35170 + }, + { + "epoch": 0.07106582578166348, + "grad_norm": 297.8233947753906, + "learning_rate": 7.036000000000001e-06, + "loss": 36.3182, + "step": 35180 + }, + { + "epoch": 0.0710860264143473, + "grad_norm": 770.4078979492188, + "learning_rate": 7.038e-06, + "loss": 33.7109, + "step": 35190 + }, + { + "epoch": 0.07110622704703111, + "grad_norm": 431.954345703125, + "learning_rate": 7.04e-06, + "loss": 36.4231, + "step": 35200 + }, + { + "epoch": 0.07112642767971493, + "grad_norm": 527.4671020507812, + "learning_rate": 7.042000000000001e-06, + "loss": 28.5493, + "step": 35210 + }, + { + "epoch": 0.07114662831239875, + "grad_norm": 252.7281951904297, + "learning_rate": 7.044000000000001e-06, + "loss": 22.4609, + "step": 35220 + }, + { + "epoch": 0.07116682894508256, + "grad_norm": 1668.7276611328125, + "learning_rate": 7.046000000000001e-06, + "loss": 33.1983, + "step": 35230 + }, + { + "epoch": 0.07118702957776638, + "grad_norm": 0.0, + "learning_rate": 7.048e-06, + "loss": 33.8626, + "step": 35240 + }, + { + "epoch": 0.0712072302104502, + "grad_norm": 98.54589080810547, + "learning_rate": 7.05e-06, + "loss": 25.6418, + "step": 35250 + }, + { + "epoch": 0.071227430843134, + "grad_norm": 377.0586242675781, + "learning_rate": 7.052000000000001e-06, + "loss": 20.7303, + "step": 35260 + }, + { + "epoch": 0.07124763147581782, + "grad_norm": 396.9246520996094, + "learning_rate": 7.0540000000000006e-06, + "loss": 24.3956, + "step": 35270 + }, + { + "epoch": 0.07126783210850164, + "grad_norm": 313.577880859375, + "learning_rate": 7.056000000000001e-06, + "loss": 18.0199, + "step": 35280 + }, + { + "epoch": 0.07128803274118545, + "grad_norm": 1487.6373291015625, + "learning_rate": 7.058e-06, + "loss": 27.6879, + "step": 35290 + }, + { + "epoch": 0.07130823337386927, + "grad_norm": 543.5441284179688, + "learning_rate": 7.06e-06, + "loss": 36.6739, + "step": 35300 + }, + { + "epoch": 0.07132843400655309, + "grad_norm": 402.0924072265625, + "learning_rate": 7.062000000000001e-06, + "loss": 17.9225, + "step": 35310 + }, + { + "epoch": 0.07134863463923691, + "grad_norm": 373.1673889160156, + "learning_rate": 7.0640000000000005e-06, + "loss": 30.948, + "step": 35320 + }, + { + "epoch": 0.07136883527192071, + "grad_norm": 1144.22900390625, + "learning_rate": 7.066000000000001e-06, + "loss": 28.6735, + "step": 35330 + }, + { + "epoch": 0.07138903590460453, + "grad_norm": 1234.5067138671875, + "learning_rate": 7.068000000000001e-06, + "loss": 33.1719, + "step": 35340 + }, + { + "epoch": 0.07140923653728835, + "grad_norm": 249.57534790039062, + "learning_rate": 7.07e-06, + "loss": 64.2075, + "step": 35350 + }, + { + "epoch": 0.07142943716997216, + "grad_norm": 735.7301025390625, + "learning_rate": 7.072000000000001e-06, + "loss": 20.6333, + "step": 35360 + }, + { + "epoch": 0.07144963780265598, + "grad_norm": 528.9682006835938, + "learning_rate": 7.074000000000001e-06, + "loss": 27.5707, + "step": 35370 + }, + { + "epoch": 0.0714698384353398, + "grad_norm": 175.97882080078125, + "learning_rate": 7.0760000000000005e-06, + "loss": 32.1693, + "step": 35380 + }, + { + "epoch": 0.0714900390680236, + "grad_norm": 267.4575500488281, + "learning_rate": 7.078000000000001e-06, + "loss": 26.42, + "step": 35390 + }, + { + "epoch": 0.07151023970070743, + "grad_norm": 662.626708984375, + "learning_rate": 7.08e-06, + "loss": 22.6109, + "step": 35400 + }, + { + "epoch": 0.07153044033339125, + "grad_norm": 953.2594604492188, + "learning_rate": 7.082000000000001e-06, + "loss": 30.7239, + "step": 35410 + }, + { + "epoch": 0.07155064096607505, + "grad_norm": 174.0382080078125, + "learning_rate": 7.084000000000001e-06, + "loss": 18.2796, + "step": 35420 + }, + { + "epoch": 0.07157084159875887, + "grad_norm": 881.6152954101562, + "learning_rate": 7.0860000000000004e-06, + "loss": 24.4007, + "step": 35430 + }, + { + "epoch": 0.07159104223144269, + "grad_norm": 658.0614624023438, + "learning_rate": 7.088000000000001e-06, + "loss": 20.0999, + "step": 35440 + }, + { + "epoch": 0.0716112428641265, + "grad_norm": 332.91650390625, + "learning_rate": 7.09e-06, + "loss": 24.0056, + "step": 35450 + }, + { + "epoch": 0.07163144349681032, + "grad_norm": 573.6510009765625, + "learning_rate": 7.092000000000001e-06, + "loss": 38.1989, + "step": 35460 + }, + { + "epoch": 0.07165164412949414, + "grad_norm": 526.1616821289062, + "learning_rate": 7.094000000000001e-06, + "loss": 24.0661, + "step": 35470 + }, + { + "epoch": 0.07167184476217796, + "grad_norm": 206.29298400878906, + "learning_rate": 7.096e-06, + "loss": 15.9931, + "step": 35480 + }, + { + "epoch": 0.07169204539486176, + "grad_norm": 919.5665893554688, + "learning_rate": 7.0980000000000005e-06, + "loss": 24.3283, + "step": 35490 + }, + { + "epoch": 0.07171224602754558, + "grad_norm": 810.0609741210938, + "learning_rate": 7.100000000000001e-06, + "loss": 19.033, + "step": 35500 + }, + { + "epoch": 0.0717324466602294, + "grad_norm": 258.5408935546875, + "learning_rate": 7.102000000000001e-06, + "loss": 13.9003, + "step": 35510 + }, + { + "epoch": 0.07175264729291321, + "grad_norm": 451.9220886230469, + "learning_rate": 7.104000000000001e-06, + "loss": 23.7401, + "step": 35520 + }, + { + "epoch": 0.07177284792559703, + "grad_norm": 697.3738403320312, + "learning_rate": 7.106000000000001e-06, + "loss": 39.4503, + "step": 35530 + }, + { + "epoch": 0.07179304855828085, + "grad_norm": 576.223388671875, + "learning_rate": 7.108e-06, + "loss": 40.5201, + "step": 35540 + }, + { + "epoch": 0.07181324919096466, + "grad_norm": 150.14456176757812, + "learning_rate": 7.1100000000000005e-06, + "loss": 37.7105, + "step": 35550 + }, + { + "epoch": 0.07183344982364848, + "grad_norm": 319.6799011230469, + "learning_rate": 7.1120000000000015e-06, + "loss": 23.044, + "step": 35560 + }, + { + "epoch": 0.0718536504563323, + "grad_norm": 231.80503845214844, + "learning_rate": 7.114000000000001e-06, + "loss": 12.1974, + "step": 35570 + }, + { + "epoch": 0.0718738510890161, + "grad_norm": 284.95037841796875, + "learning_rate": 7.116000000000001e-06, + "loss": 23.1263, + "step": 35580 + }, + { + "epoch": 0.07189405172169992, + "grad_norm": 820.784423828125, + "learning_rate": 7.118e-06, + "loss": 30.4677, + "step": 35590 + }, + { + "epoch": 0.07191425235438374, + "grad_norm": 461.59442138671875, + "learning_rate": 7.1200000000000004e-06, + "loss": 11.0868, + "step": 35600 + }, + { + "epoch": 0.07193445298706755, + "grad_norm": 278.0670471191406, + "learning_rate": 7.1220000000000014e-06, + "loss": 21.7604, + "step": 35610 + }, + { + "epoch": 0.07195465361975137, + "grad_norm": 696.705078125, + "learning_rate": 7.124000000000001e-06, + "loss": 16.4526, + "step": 35620 + }, + { + "epoch": 0.07197485425243519, + "grad_norm": 852.6187744140625, + "learning_rate": 7.126000000000001e-06, + "loss": 28.0098, + "step": 35630 + }, + { + "epoch": 0.07199505488511901, + "grad_norm": 47.386940002441406, + "learning_rate": 7.128e-06, + "loss": 23.8823, + "step": 35640 + }, + { + "epoch": 0.07201525551780281, + "grad_norm": 398.6817321777344, + "learning_rate": 7.13e-06, + "loss": 22.399, + "step": 35650 + }, + { + "epoch": 0.07203545615048663, + "grad_norm": 349.03173828125, + "learning_rate": 7.132e-06, + "loss": 21.1377, + "step": 35660 + }, + { + "epoch": 0.07205565678317045, + "grad_norm": 340.30853271484375, + "learning_rate": 7.134000000000001e-06, + "loss": 19.7428, + "step": 35670 + }, + { + "epoch": 0.07207585741585426, + "grad_norm": 534.1447143554688, + "learning_rate": 7.136000000000001e-06, + "loss": 36.1049, + "step": 35680 + }, + { + "epoch": 0.07209605804853808, + "grad_norm": 240.12698364257812, + "learning_rate": 7.138e-06, + "loss": 25.354, + "step": 35690 + }, + { + "epoch": 0.0721162586812219, + "grad_norm": 668.1033935546875, + "learning_rate": 7.14e-06, + "loss": 33.0883, + "step": 35700 + }, + { + "epoch": 0.07213645931390571, + "grad_norm": 303.3044128417969, + "learning_rate": 7.142e-06, + "loss": 24.0498, + "step": 35710 + }, + { + "epoch": 0.07215665994658953, + "grad_norm": 401.3393249511719, + "learning_rate": 7.1440000000000005e-06, + "loss": 12.1863, + "step": 35720 + }, + { + "epoch": 0.07217686057927335, + "grad_norm": 262.6009826660156, + "learning_rate": 7.146000000000001e-06, + "loss": 20.9297, + "step": 35730 + }, + { + "epoch": 0.07219706121195715, + "grad_norm": 295.8729553222656, + "learning_rate": 7.148000000000001e-06, + "loss": 24.8015, + "step": 35740 + }, + { + "epoch": 0.07221726184464097, + "grad_norm": 249.96107482910156, + "learning_rate": 7.15e-06, + "loss": 39.1403, + "step": 35750 + }, + { + "epoch": 0.0722374624773248, + "grad_norm": 666.444091796875, + "learning_rate": 7.152e-06, + "loss": 25.2217, + "step": 35760 + }, + { + "epoch": 0.0722576631100086, + "grad_norm": 302.3631896972656, + "learning_rate": 7.154000000000001e-06, + "loss": 28.7105, + "step": 35770 + }, + { + "epoch": 0.07227786374269242, + "grad_norm": 661.980224609375, + "learning_rate": 7.156000000000001e-06, + "loss": 31.7649, + "step": 35780 + }, + { + "epoch": 0.07229806437537624, + "grad_norm": 483.1505126953125, + "learning_rate": 7.158000000000001e-06, + "loss": 20.0861, + "step": 35790 + }, + { + "epoch": 0.07231826500806006, + "grad_norm": 726.7387084960938, + "learning_rate": 7.16e-06, + "loss": 35.2117, + "step": 35800 + }, + { + "epoch": 0.07233846564074387, + "grad_norm": 285.33734130859375, + "learning_rate": 7.162e-06, + "loss": 14.649, + "step": 35810 + }, + { + "epoch": 0.07235866627342769, + "grad_norm": 552.3336791992188, + "learning_rate": 7.164000000000001e-06, + "loss": 33.3316, + "step": 35820 + }, + { + "epoch": 0.0723788669061115, + "grad_norm": 341.7799072265625, + "learning_rate": 7.1660000000000005e-06, + "loss": 17.5601, + "step": 35830 + }, + { + "epoch": 0.07239906753879531, + "grad_norm": 663.0716552734375, + "learning_rate": 7.168000000000001e-06, + "loss": 15.9708, + "step": 35840 + }, + { + "epoch": 0.07241926817147913, + "grad_norm": 444.8297119140625, + "learning_rate": 7.17e-06, + "loss": 30.985, + "step": 35850 + }, + { + "epoch": 0.07243946880416295, + "grad_norm": 179.97125244140625, + "learning_rate": 7.172e-06, + "loss": 18.7185, + "step": 35860 + }, + { + "epoch": 0.07245966943684676, + "grad_norm": 562.4083251953125, + "learning_rate": 7.174000000000001e-06, + "loss": 50.2738, + "step": 35870 + }, + { + "epoch": 0.07247987006953058, + "grad_norm": 1533.6148681640625, + "learning_rate": 7.176e-06, + "loss": 34.0883, + "step": 35880 + }, + { + "epoch": 0.0725000707022144, + "grad_norm": 263.4617004394531, + "learning_rate": 7.1780000000000006e-06, + "loss": 13.4116, + "step": 35890 + }, + { + "epoch": 0.0725202713348982, + "grad_norm": 1126.4736328125, + "learning_rate": 7.180000000000001e-06, + "loss": 40.3042, + "step": 35900 + }, + { + "epoch": 0.07254047196758202, + "grad_norm": 796.5320434570312, + "learning_rate": 7.182e-06, + "loss": 33.9886, + "step": 35910 + }, + { + "epoch": 0.07256067260026584, + "grad_norm": 769.1730346679688, + "learning_rate": 7.184000000000001e-06, + "loss": 33.7612, + "step": 35920 + }, + { + "epoch": 0.07258087323294965, + "grad_norm": 232.10711669921875, + "learning_rate": 7.186000000000001e-06, + "loss": 29.5518, + "step": 35930 + }, + { + "epoch": 0.07260107386563347, + "grad_norm": 311.0737609863281, + "learning_rate": 7.1880000000000005e-06, + "loss": 21.697, + "step": 35940 + }, + { + "epoch": 0.07262127449831729, + "grad_norm": 411.9050598144531, + "learning_rate": 7.190000000000001e-06, + "loss": 14.3407, + "step": 35950 + }, + { + "epoch": 0.07264147513100111, + "grad_norm": 597.1206665039062, + "learning_rate": 7.192e-06, + "loss": 23.7892, + "step": 35960 + }, + { + "epoch": 0.07266167576368492, + "grad_norm": 156.48336791992188, + "learning_rate": 7.194000000000001e-06, + "loss": 16.2914, + "step": 35970 + }, + { + "epoch": 0.07268187639636874, + "grad_norm": 631.177001953125, + "learning_rate": 7.196000000000001e-06, + "loss": 30.2218, + "step": 35980 + }, + { + "epoch": 0.07270207702905256, + "grad_norm": 348.7903747558594, + "learning_rate": 7.198e-06, + "loss": 14.2282, + "step": 35990 + }, + { + "epoch": 0.07272227766173636, + "grad_norm": 1277.896484375, + "learning_rate": 7.2000000000000005e-06, + "loss": 35.9558, + "step": 36000 + }, + { + "epoch": 0.07274247829442018, + "grad_norm": 936.1773071289062, + "learning_rate": 7.202e-06, + "loss": 36.1961, + "step": 36010 + }, + { + "epoch": 0.072762678927104, + "grad_norm": 792.3339233398438, + "learning_rate": 7.204000000000001e-06, + "loss": 22.9023, + "step": 36020 + }, + { + "epoch": 0.07278287955978781, + "grad_norm": 298.2560119628906, + "learning_rate": 7.206000000000001e-06, + "loss": 24.574, + "step": 36030 + }, + { + "epoch": 0.07280308019247163, + "grad_norm": 499.6198425292969, + "learning_rate": 7.208e-06, + "loss": 17.3295, + "step": 36040 + }, + { + "epoch": 0.07282328082515545, + "grad_norm": 446.3153381347656, + "learning_rate": 7.2100000000000004e-06, + "loss": 18.4231, + "step": 36050 + }, + { + "epoch": 0.07284348145783925, + "grad_norm": 449.7459411621094, + "learning_rate": 7.212e-06, + "loss": 21.9795, + "step": 36060 + }, + { + "epoch": 0.07286368209052307, + "grad_norm": 578.6666259765625, + "learning_rate": 7.214000000000001e-06, + "loss": 19.4359, + "step": 36070 + }, + { + "epoch": 0.0728838827232069, + "grad_norm": 533.8427734375, + "learning_rate": 7.216000000000001e-06, + "loss": 15.0341, + "step": 36080 + }, + { + "epoch": 0.0729040833558907, + "grad_norm": 628.3088989257812, + "learning_rate": 7.218e-06, + "loss": 24.2054, + "step": 36090 + }, + { + "epoch": 0.07292428398857452, + "grad_norm": 297.69281005859375, + "learning_rate": 7.22e-06, + "loss": 33.3355, + "step": 36100 + }, + { + "epoch": 0.07294448462125834, + "grad_norm": 297.65924072265625, + "learning_rate": 7.2220000000000005e-06, + "loss": 18.034, + "step": 36110 + }, + { + "epoch": 0.07296468525394216, + "grad_norm": 1627.181396484375, + "learning_rate": 7.224000000000001e-06, + "loss": 37.0366, + "step": 36120 + }, + { + "epoch": 0.07298488588662597, + "grad_norm": 278.7560119628906, + "learning_rate": 7.226000000000001e-06, + "loss": 20.4195, + "step": 36130 + }, + { + "epoch": 0.07300508651930979, + "grad_norm": 490.5456848144531, + "learning_rate": 7.228000000000001e-06, + "loss": 22.7641, + "step": 36140 + }, + { + "epoch": 0.0730252871519936, + "grad_norm": 352.07110595703125, + "learning_rate": 7.23e-06, + "loss": 23.3919, + "step": 36150 + }, + { + "epoch": 0.07304548778467741, + "grad_norm": 635.0588989257812, + "learning_rate": 7.232e-06, + "loss": 14.4249, + "step": 36160 + }, + { + "epoch": 0.07306568841736123, + "grad_norm": 655.6961669921875, + "learning_rate": 7.234000000000001e-06, + "loss": 21.8436, + "step": 36170 + }, + { + "epoch": 0.07308588905004505, + "grad_norm": 426.8301086425781, + "learning_rate": 7.236000000000001e-06, + "loss": 31.068, + "step": 36180 + }, + { + "epoch": 0.07310608968272886, + "grad_norm": 150.89291381835938, + "learning_rate": 7.238000000000001e-06, + "loss": 19.2602, + "step": 36190 + }, + { + "epoch": 0.07312629031541268, + "grad_norm": 315.3755798339844, + "learning_rate": 7.24e-06, + "loss": 11.7244, + "step": 36200 + }, + { + "epoch": 0.0731464909480965, + "grad_norm": 285.57879638671875, + "learning_rate": 7.242e-06, + "loss": 26.9657, + "step": 36210 + }, + { + "epoch": 0.0731666915807803, + "grad_norm": 414.1665344238281, + "learning_rate": 7.244000000000001e-06, + "loss": 23.4058, + "step": 36220 + }, + { + "epoch": 0.07318689221346412, + "grad_norm": 256.0746154785156, + "learning_rate": 7.246000000000001e-06, + "loss": 28.3898, + "step": 36230 + }, + { + "epoch": 0.07320709284614794, + "grad_norm": 397.8353271484375, + "learning_rate": 7.248000000000001e-06, + "loss": 22.2447, + "step": 36240 + }, + { + "epoch": 0.07322729347883175, + "grad_norm": 1492.809814453125, + "learning_rate": 7.25e-06, + "loss": 20.0398, + "step": 36250 + }, + { + "epoch": 0.07324749411151557, + "grad_norm": 957.6799926757812, + "learning_rate": 7.252e-06, + "loss": 39.4348, + "step": 36260 + }, + { + "epoch": 0.07326769474419939, + "grad_norm": 912.636474609375, + "learning_rate": 7.254000000000001e-06, + "loss": 23.8131, + "step": 36270 + }, + { + "epoch": 0.07328789537688321, + "grad_norm": 400.9259033203125, + "learning_rate": 7.2560000000000005e-06, + "loss": 26.8982, + "step": 36280 + }, + { + "epoch": 0.07330809600956702, + "grad_norm": 912.023193359375, + "learning_rate": 7.258000000000001e-06, + "loss": 23.0974, + "step": 36290 + }, + { + "epoch": 0.07332829664225084, + "grad_norm": 584.3211059570312, + "learning_rate": 7.260000000000001e-06, + "loss": 32.7051, + "step": 36300 + }, + { + "epoch": 0.07334849727493466, + "grad_norm": 632.460693359375, + "learning_rate": 7.262e-06, + "loss": 36.9517, + "step": 36310 + }, + { + "epoch": 0.07336869790761846, + "grad_norm": 358.9119873046875, + "learning_rate": 7.264000000000001e-06, + "loss": 21.1253, + "step": 36320 + }, + { + "epoch": 0.07338889854030228, + "grad_norm": 192.64894104003906, + "learning_rate": 7.266000000000001e-06, + "loss": 35.9661, + "step": 36330 + }, + { + "epoch": 0.0734090991729861, + "grad_norm": 820.314697265625, + "learning_rate": 7.2680000000000005e-06, + "loss": 28.4836, + "step": 36340 + }, + { + "epoch": 0.07342929980566991, + "grad_norm": 559.251953125, + "learning_rate": 7.270000000000001e-06, + "loss": 20.2625, + "step": 36350 + }, + { + "epoch": 0.07344950043835373, + "grad_norm": 406.0582580566406, + "learning_rate": 7.272e-06, + "loss": 22.5509, + "step": 36360 + }, + { + "epoch": 0.07346970107103755, + "grad_norm": 671.7197875976562, + "learning_rate": 7.274000000000001e-06, + "loss": 30.905, + "step": 36370 + }, + { + "epoch": 0.07348990170372136, + "grad_norm": 291.09039306640625, + "learning_rate": 7.276000000000001e-06, + "loss": 25.4618, + "step": 36380 + }, + { + "epoch": 0.07351010233640518, + "grad_norm": 462.43133544921875, + "learning_rate": 7.2780000000000005e-06, + "loss": 30.4532, + "step": 36390 + }, + { + "epoch": 0.073530302969089, + "grad_norm": 318.0280456542969, + "learning_rate": 7.280000000000001e-06, + "loss": 16.4905, + "step": 36400 + }, + { + "epoch": 0.0735505036017728, + "grad_norm": 732.2247924804688, + "learning_rate": 7.282e-06, + "loss": 35.1043, + "step": 36410 + }, + { + "epoch": 0.07357070423445662, + "grad_norm": 281.3781433105469, + "learning_rate": 7.284000000000001e-06, + "loss": 26.6184, + "step": 36420 + }, + { + "epoch": 0.07359090486714044, + "grad_norm": 899.387939453125, + "learning_rate": 7.286000000000001e-06, + "loss": 30.4034, + "step": 36430 + }, + { + "epoch": 0.07361110549982426, + "grad_norm": 637.453857421875, + "learning_rate": 7.288e-06, + "loss": 22.0351, + "step": 36440 + }, + { + "epoch": 0.07363130613250807, + "grad_norm": 352.7566833496094, + "learning_rate": 7.2900000000000005e-06, + "loss": 13.5987, + "step": 36450 + }, + { + "epoch": 0.07365150676519189, + "grad_norm": 1261.2196044921875, + "learning_rate": 7.292e-06, + "loss": 44.5814, + "step": 36460 + }, + { + "epoch": 0.07367170739787571, + "grad_norm": 465.0794982910156, + "learning_rate": 7.294000000000001e-06, + "loss": 31.6626, + "step": 36470 + }, + { + "epoch": 0.07369190803055951, + "grad_norm": 848.2520141601562, + "learning_rate": 7.296000000000001e-06, + "loss": 20.8244, + "step": 36480 + }, + { + "epoch": 0.07371210866324333, + "grad_norm": 310.5050964355469, + "learning_rate": 7.298e-06, + "loss": 29.6421, + "step": 36490 + }, + { + "epoch": 0.07373230929592715, + "grad_norm": 772.1388549804688, + "learning_rate": 7.3e-06, + "loss": 23.1746, + "step": 36500 + }, + { + "epoch": 0.07375250992861096, + "grad_norm": 317.6028747558594, + "learning_rate": 7.3020000000000006e-06, + "loss": 20.4439, + "step": 36510 + }, + { + "epoch": 0.07377271056129478, + "grad_norm": 456.0571594238281, + "learning_rate": 7.304000000000001e-06, + "loss": 22.3645, + "step": 36520 + }, + { + "epoch": 0.0737929111939786, + "grad_norm": 543.1962280273438, + "learning_rate": 7.306000000000001e-06, + "loss": 25.9287, + "step": 36530 + }, + { + "epoch": 0.0738131118266624, + "grad_norm": 459.4672546386719, + "learning_rate": 7.308000000000001e-06, + "loss": 46.0588, + "step": 36540 + }, + { + "epoch": 0.07383331245934623, + "grad_norm": 68.5689926147461, + "learning_rate": 7.31e-06, + "loss": 31.2692, + "step": 36550 + }, + { + "epoch": 0.07385351309203005, + "grad_norm": 289.3775634765625, + "learning_rate": 7.3120000000000005e-06, + "loss": 27.7922, + "step": 36560 + }, + { + "epoch": 0.07387371372471385, + "grad_norm": 480.0758972167969, + "learning_rate": 7.3140000000000015e-06, + "loss": 48.8788, + "step": 36570 + }, + { + "epoch": 0.07389391435739767, + "grad_norm": 232.5970001220703, + "learning_rate": 7.316000000000001e-06, + "loss": 23.6512, + "step": 36580 + }, + { + "epoch": 0.07391411499008149, + "grad_norm": 491.4234924316406, + "learning_rate": 7.318000000000001e-06, + "loss": 27.9012, + "step": 36590 + }, + { + "epoch": 0.0739343156227653, + "grad_norm": 1804.0177001953125, + "learning_rate": 7.32e-06, + "loss": 57.0006, + "step": 36600 + }, + { + "epoch": 0.07395451625544912, + "grad_norm": 464.07696533203125, + "learning_rate": 7.322e-06, + "loss": 15.404, + "step": 36610 + }, + { + "epoch": 0.07397471688813294, + "grad_norm": 552.944580078125, + "learning_rate": 7.324000000000001e-06, + "loss": 35.9958, + "step": 36620 + }, + { + "epoch": 0.07399491752081676, + "grad_norm": 357.04803466796875, + "learning_rate": 7.326000000000001e-06, + "loss": 34.9606, + "step": 36630 + }, + { + "epoch": 0.07401511815350056, + "grad_norm": 120.2250747680664, + "learning_rate": 7.328000000000001e-06, + "loss": 22.1038, + "step": 36640 + }, + { + "epoch": 0.07403531878618438, + "grad_norm": 1244.16455078125, + "learning_rate": 7.33e-06, + "loss": 32.6981, + "step": 36650 + }, + { + "epoch": 0.0740555194188682, + "grad_norm": 892.9081420898438, + "learning_rate": 7.332e-06, + "loss": 48.3434, + "step": 36660 + }, + { + "epoch": 0.07407572005155201, + "grad_norm": 213.5165252685547, + "learning_rate": 7.334000000000001e-06, + "loss": 19.2829, + "step": 36670 + }, + { + "epoch": 0.07409592068423583, + "grad_norm": 789.1298828125, + "learning_rate": 7.3360000000000006e-06, + "loss": 35.462, + "step": 36680 + }, + { + "epoch": 0.07411612131691965, + "grad_norm": 417.73455810546875, + "learning_rate": 7.338000000000001e-06, + "loss": 26.294, + "step": 36690 + }, + { + "epoch": 0.07413632194960346, + "grad_norm": 606.4683837890625, + "learning_rate": 7.340000000000001e-06, + "loss": 15.4818, + "step": 36700 + }, + { + "epoch": 0.07415652258228728, + "grad_norm": 234.27586364746094, + "learning_rate": 7.342e-06, + "loss": 16.8294, + "step": 36710 + }, + { + "epoch": 0.0741767232149711, + "grad_norm": 132.23135375976562, + "learning_rate": 7.344000000000001e-06, + "loss": 26.813, + "step": 36720 + }, + { + "epoch": 0.0741969238476549, + "grad_norm": 132.92832946777344, + "learning_rate": 7.346000000000001e-06, + "loss": 35.4096, + "step": 36730 + }, + { + "epoch": 0.07421712448033872, + "grad_norm": 393.9335021972656, + "learning_rate": 7.348000000000001e-06, + "loss": 47.1393, + "step": 36740 + }, + { + "epoch": 0.07423732511302254, + "grad_norm": 359.0769958496094, + "learning_rate": 7.350000000000001e-06, + "loss": 24.7488, + "step": 36750 + }, + { + "epoch": 0.07425752574570635, + "grad_norm": 415.5875244140625, + "learning_rate": 7.352e-06, + "loss": 25.986, + "step": 36760 + }, + { + "epoch": 0.07427772637839017, + "grad_norm": 689.1558837890625, + "learning_rate": 7.354000000000001e-06, + "loss": 42.3704, + "step": 36770 + }, + { + "epoch": 0.07429792701107399, + "grad_norm": 563.744140625, + "learning_rate": 7.356000000000001e-06, + "loss": 28.5835, + "step": 36780 + }, + { + "epoch": 0.07431812764375781, + "grad_norm": 282.9549255371094, + "learning_rate": 7.3580000000000005e-06, + "loss": 34.0733, + "step": 36790 + }, + { + "epoch": 0.07433832827644161, + "grad_norm": 133.72024536132812, + "learning_rate": 7.360000000000001e-06, + "loss": 15.5981, + "step": 36800 + }, + { + "epoch": 0.07435852890912543, + "grad_norm": 118.42461395263672, + "learning_rate": 7.362e-06, + "loss": 43.5447, + "step": 36810 + }, + { + "epoch": 0.07437872954180925, + "grad_norm": 428.64483642578125, + "learning_rate": 7.364000000000001e-06, + "loss": 45.605, + "step": 36820 + }, + { + "epoch": 0.07439893017449306, + "grad_norm": 167.21322631835938, + "learning_rate": 7.366000000000001e-06, + "loss": 21.6644, + "step": 36830 + }, + { + "epoch": 0.07441913080717688, + "grad_norm": 282.8342590332031, + "learning_rate": 7.3680000000000004e-06, + "loss": 15.7841, + "step": 36840 + }, + { + "epoch": 0.0744393314398607, + "grad_norm": 996.6889038085938, + "learning_rate": 7.370000000000001e-06, + "loss": 25.6472, + "step": 36850 + }, + { + "epoch": 0.07445953207254451, + "grad_norm": 406.68341064453125, + "learning_rate": 7.372e-06, + "loss": 28.4243, + "step": 36860 + }, + { + "epoch": 0.07447973270522833, + "grad_norm": 260.7794189453125, + "learning_rate": 7.374000000000001e-06, + "loss": 7.6112, + "step": 36870 + }, + { + "epoch": 0.07449993333791215, + "grad_norm": 580.6053466796875, + "learning_rate": 7.376000000000001e-06, + "loss": 20.1667, + "step": 36880 + }, + { + "epoch": 0.07452013397059595, + "grad_norm": 344.7051086425781, + "learning_rate": 7.378e-06, + "loss": 19.748, + "step": 36890 + }, + { + "epoch": 0.07454033460327977, + "grad_norm": 460.07928466796875, + "learning_rate": 7.3800000000000005e-06, + "loss": 25.7779, + "step": 36900 + }, + { + "epoch": 0.0745605352359636, + "grad_norm": 302.0501708984375, + "learning_rate": 7.382000000000001e-06, + "loss": 25.0444, + "step": 36910 + }, + { + "epoch": 0.0745807358686474, + "grad_norm": 419.9612731933594, + "learning_rate": 7.384e-06, + "loss": 33.317, + "step": 36920 + }, + { + "epoch": 0.07460093650133122, + "grad_norm": 366.3216552734375, + "learning_rate": 7.386000000000001e-06, + "loss": 28.3039, + "step": 36930 + }, + { + "epoch": 0.07462113713401504, + "grad_norm": 539.5303344726562, + "learning_rate": 7.388000000000001e-06, + "loss": 25.8313, + "step": 36940 + }, + { + "epoch": 0.07464133776669886, + "grad_norm": 284.8318176269531, + "learning_rate": 7.39e-06, + "loss": 28.2287, + "step": 36950 + }, + { + "epoch": 0.07466153839938267, + "grad_norm": 441.8493347167969, + "learning_rate": 7.3920000000000005e-06, + "loss": 15.1471, + "step": 36960 + }, + { + "epoch": 0.07468173903206649, + "grad_norm": 221.10379028320312, + "learning_rate": 7.394e-06, + "loss": 21.8838, + "step": 36970 + }, + { + "epoch": 0.0747019396647503, + "grad_norm": 668.7821655273438, + "learning_rate": 7.396000000000001e-06, + "loss": 29.002, + "step": 36980 + }, + { + "epoch": 0.07472214029743411, + "grad_norm": 157.21408081054688, + "learning_rate": 7.398000000000001e-06, + "loss": 18.1363, + "step": 36990 + }, + { + "epoch": 0.07474234093011793, + "grad_norm": 352.7605285644531, + "learning_rate": 7.4e-06, + "loss": 23.2426, + "step": 37000 + }, + { + "epoch": 0.07476254156280175, + "grad_norm": 495.28564453125, + "learning_rate": 7.4020000000000005e-06, + "loss": 27.4683, + "step": 37010 + }, + { + "epoch": 0.07478274219548556, + "grad_norm": 372.5489501953125, + "learning_rate": 7.404e-06, + "loss": 19.4716, + "step": 37020 + }, + { + "epoch": 0.07480294282816938, + "grad_norm": 595.1874389648438, + "learning_rate": 7.406000000000001e-06, + "loss": 17.6525, + "step": 37030 + }, + { + "epoch": 0.0748231434608532, + "grad_norm": 270.90087890625, + "learning_rate": 7.408000000000001e-06, + "loss": 20.5332, + "step": 37040 + }, + { + "epoch": 0.074843344093537, + "grad_norm": 218.74630737304688, + "learning_rate": 7.41e-06, + "loss": 31.4177, + "step": 37050 + }, + { + "epoch": 0.07486354472622082, + "grad_norm": 651.9527587890625, + "learning_rate": 7.412e-06, + "loss": 13.3607, + "step": 37060 + }, + { + "epoch": 0.07488374535890464, + "grad_norm": 434.46826171875, + "learning_rate": 7.4140000000000005e-06, + "loss": 28.5692, + "step": 37070 + }, + { + "epoch": 0.07490394599158845, + "grad_norm": 726.3607177734375, + "learning_rate": 7.416000000000001e-06, + "loss": 39.4051, + "step": 37080 + }, + { + "epoch": 0.07492414662427227, + "grad_norm": 281.404541015625, + "learning_rate": 7.418000000000001e-06, + "loss": 29.5586, + "step": 37090 + }, + { + "epoch": 0.07494434725695609, + "grad_norm": 226.3792266845703, + "learning_rate": 7.420000000000001e-06, + "loss": 35.242, + "step": 37100 + }, + { + "epoch": 0.07496454788963991, + "grad_norm": 130.22166442871094, + "learning_rate": 7.422e-06, + "loss": 19.7581, + "step": 37110 + }, + { + "epoch": 0.07498474852232372, + "grad_norm": 604.023681640625, + "learning_rate": 7.424e-06, + "loss": 13.2463, + "step": 37120 + }, + { + "epoch": 0.07500494915500754, + "grad_norm": 749.285400390625, + "learning_rate": 7.426000000000001e-06, + "loss": 25.0143, + "step": 37130 + }, + { + "epoch": 0.07502514978769136, + "grad_norm": 455.9515380859375, + "learning_rate": 7.428000000000001e-06, + "loss": 22.274, + "step": 37140 + }, + { + "epoch": 0.07504535042037516, + "grad_norm": 388.0310974121094, + "learning_rate": 7.430000000000001e-06, + "loss": 42.7054, + "step": 37150 + }, + { + "epoch": 0.07506555105305898, + "grad_norm": 435.63006591796875, + "learning_rate": 7.432e-06, + "loss": 18.8218, + "step": 37160 + }, + { + "epoch": 0.0750857516857428, + "grad_norm": 390.9641418457031, + "learning_rate": 7.434e-06, + "loss": 23.881, + "step": 37170 + }, + { + "epoch": 0.07510595231842661, + "grad_norm": 200.30760192871094, + "learning_rate": 7.436000000000001e-06, + "loss": 9.9231, + "step": 37180 + }, + { + "epoch": 0.07512615295111043, + "grad_norm": 1305.658935546875, + "learning_rate": 7.438000000000001e-06, + "loss": 49.221, + "step": 37190 + }, + { + "epoch": 0.07514635358379425, + "grad_norm": 560.2576904296875, + "learning_rate": 7.440000000000001e-06, + "loss": 37.7084, + "step": 37200 + }, + { + "epoch": 0.07516655421647805, + "grad_norm": 796.267333984375, + "learning_rate": 7.442e-06, + "loss": 37.0953, + "step": 37210 + }, + { + "epoch": 0.07518675484916187, + "grad_norm": 212.97508239746094, + "learning_rate": 7.444e-06, + "loss": 25.6259, + "step": 37220 + }, + { + "epoch": 0.0752069554818457, + "grad_norm": 151.27481079101562, + "learning_rate": 7.446000000000001e-06, + "loss": 21.353, + "step": 37230 + }, + { + "epoch": 0.0752271561145295, + "grad_norm": 427.7612609863281, + "learning_rate": 7.4480000000000005e-06, + "loss": 28.3006, + "step": 37240 + }, + { + "epoch": 0.07524735674721332, + "grad_norm": 374.7218017578125, + "learning_rate": 7.450000000000001e-06, + "loss": 21.3628, + "step": 37250 + }, + { + "epoch": 0.07526755737989714, + "grad_norm": 285.76763916015625, + "learning_rate": 7.452e-06, + "loss": 26.1006, + "step": 37260 + }, + { + "epoch": 0.07528775801258096, + "grad_norm": 399.39837646484375, + "learning_rate": 7.454e-06, + "loss": 25.5958, + "step": 37270 + }, + { + "epoch": 0.07530795864526477, + "grad_norm": 261.0355224609375, + "learning_rate": 7.456000000000001e-06, + "loss": 32.5744, + "step": 37280 + }, + { + "epoch": 0.07532815927794859, + "grad_norm": 261.322998046875, + "learning_rate": 7.458e-06, + "loss": 15.9234, + "step": 37290 + }, + { + "epoch": 0.0753483599106324, + "grad_norm": 595.6270141601562, + "learning_rate": 7.4600000000000006e-06, + "loss": 20.6847, + "step": 37300 + }, + { + "epoch": 0.07536856054331621, + "grad_norm": 687.22900390625, + "learning_rate": 7.462000000000001e-06, + "loss": 23.3383, + "step": 37310 + }, + { + "epoch": 0.07538876117600003, + "grad_norm": 506.7188415527344, + "learning_rate": 7.464e-06, + "loss": 36.2405, + "step": 37320 + }, + { + "epoch": 0.07540896180868385, + "grad_norm": 164.11167907714844, + "learning_rate": 7.466000000000001e-06, + "loss": 27.747, + "step": 37330 + }, + { + "epoch": 0.07542916244136766, + "grad_norm": 796.3577880859375, + "learning_rate": 7.468000000000001e-06, + "loss": 35.0851, + "step": 37340 + }, + { + "epoch": 0.07544936307405148, + "grad_norm": 323.20159912109375, + "learning_rate": 7.4700000000000005e-06, + "loss": 24.5756, + "step": 37350 + }, + { + "epoch": 0.0754695637067353, + "grad_norm": 509.2854309082031, + "learning_rate": 7.472000000000001e-06, + "loss": 18.1768, + "step": 37360 + }, + { + "epoch": 0.0754897643394191, + "grad_norm": 312.6730651855469, + "learning_rate": 7.474e-06, + "loss": 17.1089, + "step": 37370 + }, + { + "epoch": 0.07550996497210292, + "grad_norm": 584.5643920898438, + "learning_rate": 7.476000000000001e-06, + "loss": 17.1565, + "step": 37380 + }, + { + "epoch": 0.07553016560478674, + "grad_norm": 795.0660400390625, + "learning_rate": 7.478000000000001e-06, + "loss": 29.7825, + "step": 37390 + }, + { + "epoch": 0.07555036623747055, + "grad_norm": 894.1143798828125, + "learning_rate": 7.48e-06, + "loss": 21.5171, + "step": 37400 + }, + { + "epoch": 0.07557056687015437, + "grad_norm": 1436.5306396484375, + "learning_rate": 7.4820000000000005e-06, + "loss": 31.2726, + "step": 37410 + }, + { + "epoch": 0.07559076750283819, + "grad_norm": 465.63702392578125, + "learning_rate": 7.484e-06, + "loss": 59.6799, + "step": 37420 + }, + { + "epoch": 0.07561096813552201, + "grad_norm": 656.0516357421875, + "learning_rate": 7.486000000000001e-06, + "loss": 30.0095, + "step": 37430 + }, + { + "epoch": 0.07563116876820582, + "grad_norm": 269.5618896484375, + "learning_rate": 7.488000000000001e-06, + "loss": 33.9991, + "step": 37440 + }, + { + "epoch": 0.07565136940088964, + "grad_norm": 410.20654296875, + "learning_rate": 7.49e-06, + "loss": 38.3308, + "step": 37450 + }, + { + "epoch": 0.07567157003357346, + "grad_norm": 316.13446044921875, + "learning_rate": 7.4920000000000004e-06, + "loss": 23.907, + "step": 37460 + }, + { + "epoch": 0.07569177066625726, + "grad_norm": 325.43585205078125, + "learning_rate": 7.494000000000001e-06, + "loss": 25.014, + "step": 37470 + }, + { + "epoch": 0.07571197129894108, + "grad_norm": 317.10723876953125, + "learning_rate": 7.496000000000001e-06, + "loss": 25.0031, + "step": 37480 + }, + { + "epoch": 0.0757321719316249, + "grad_norm": 154.67532348632812, + "learning_rate": 7.498000000000001e-06, + "loss": 14.9261, + "step": 37490 + }, + { + "epoch": 0.07575237256430871, + "grad_norm": 840.64013671875, + "learning_rate": 7.500000000000001e-06, + "loss": 28.7005, + "step": 37500 + }, + { + "epoch": 0.07577257319699253, + "grad_norm": 220.19874572753906, + "learning_rate": 7.502e-06, + "loss": 18.3881, + "step": 37510 + }, + { + "epoch": 0.07579277382967635, + "grad_norm": 475.55853271484375, + "learning_rate": 7.5040000000000005e-06, + "loss": 23.9347, + "step": 37520 + }, + { + "epoch": 0.07581297446236016, + "grad_norm": 700.2653198242188, + "learning_rate": 7.506000000000001e-06, + "loss": 25.2646, + "step": 37530 + }, + { + "epoch": 0.07583317509504398, + "grad_norm": 204.63330078125, + "learning_rate": 7.508000000000001e-06, + "loss": 20.2139, + "step": 37540 + }, + { + "epoch": 0.0758533757277278, + "grad_norm": 661.1112060546875, + "learning_rate": 7.510000000000001e-06, + "loss": 14.1901, + "step": 37550 + }, + { + "epoch": 0.0758735763604116, + "grad_norm": 962.9714965820312, + "learning_rate": 7.512e-06, + "loss": 27.8115, + "step": 37560 + }, + { + "epoch": 0.07589377699309542, + "grad_norm": 300.95953369140625, + "learning_rate": 7.514e-06, + "loss": 17.9731, + "step": 37570 + }, + { + "epoch": 0.07591397762577924, + "grad_norm": 0.0, + "learning_rate": 7.516000000000001e-06, + "loss": 16.0024, + "step": 37580 + }, + { + "epoch": 0.07593417825846306, + "grad_norm": 1225.2772216796875, + "learning_rate": 7.518000000000001e-06, + "loss": 25.6451, + "step": 37590 + }, + { + "epoch": 0.07595437889114687, + "grad_norm": 384.1285095214844, + "learning_rate": 7.520000000000001e-06, + "loss": 25.0455, + "step": 37600 + }, + { + "epoch": 0.07597457952383069, + "grad_norm": 333.9071960449219, + "learning_rate": 7.522e-06, + "loss": 52.661, + "step": 37610 + }, + { + "epoch": 0.07599478015651451, + "grad_norm": 674.0836791992188, + "learning_rate": 7.524e-06, + "loss": 31.7111, + "step": 37620 + }, + { + "epoch": 0.07601498078919831, + "grad_norm": 640.943603515625, + "learning_rate": 7.526000000000001e-06, + "loss": 20.8616, + "step": 37630 + }, + { + "epoch": 0.07603518142188213, + "grad_norm": 501.4905090332031, + "learning_rate": 7.528000000000001e-06, + "loss": 30.3123, + "step": 37640 + }, + { + "epoch": 0.07605538205456595, + "grad_norm": 301.7007751464844, + "learning_rate": 7.530000000000001e-06, + "loss": 28.5057, + "step": 37650 + }, + { + "epoch": 0.07607558268724976, + "grad_norm": 521.26953125, + "learning_rate": 7.532e-06, + "loss": 31.6623, + "step": 37660 + }, + { + "epoch": 0.07609578331993358, + "grad_norm": 436.21356201171875, + "learning_rate": 7.534e-06, + "loss": 29.2193, + "step": 37670 + }, + { + "epoch": 0.0761159839526174, + "grad_norm": 395.0469055175781, + "learning_rate": 7.536000000000001e-06, + "loss": 10.7322, + "step": 37680 + }, + { + "epoch": 0.0761361845853012, + "grad_norm": 680.1278076171875, + "learning_rate": 7.5380000000000005e-06, + "loss": 26.2934, + "step": 37690 + }, + { + "epoch": 0.07615638521798503, + "grad_norm": 171.3746337890625, + "learning_rate": 7.540000000000001e-06, + "loss": 28.6653, + "step": 37700 + }, + { + "epoch": 0.07617658585066885, + "grad_norm": 638.10009765625, + "learning_rate": 7.542000000000001e-06, + "loss": 31.7623, + "step": 37710 + }, + { + "epoch": 0.07619678648335265, + "grad_norm": 997.4443969726562, + "learning_rate": 7.544e-06, + "loss": 26.3061, + "step": 37720 + }, + { + "epoch": 0.07621698711603647, + "grad_norm": 659.7255249023438, + "learning_rate": 7.546000000000001e-06, + "loss": 24.0984, + "step": 37730 + }, + { + "epoch": 0.07623718774872029, + "grad_norm": 463.6721496582031, + "learning_rate": 7.548000000000001e-06, + "loss": 29.6272, + "step": 37740 + }, + { + "epoch": 0.07625738838140411, + "grad_norm": 301.8927917480469, + "learning_rate": 7.5500000000000006e-06, + "loss": 23.1385, + "step": 37750 + }, + { + "epoch": 0.07627758901408792, + "grad_norm": 422.1348876953125, + "learning_rate": 7.552000000000001e-06, + "loss": 21.7568, + "step": 37760 + }, + { + "epoch": 0.07629778964677174, + "grad_norm": 290.8968200683594, + "learning_rate": 7.554e-06, + "loss": 26.4895, + "step": 37770 + }, + { + "epoch": 0.07631799027945556, + "grad_norm": 453.95074462890625, + "learning_rate": 7.556000000000001e-06, + "loss": 24.2269, + "step": 37780 + }, + { + "epoch": 0.07633819091213936, + "grad_norm": 1822.8984375, + "learning_rate": 7.558000000000001e-06, + "loss": 32.174, + "step": 37790 + }, + { + "epoch": 0.07635839154482318, + "grad_norm": 482.6763000488281, + "learning_rate": 7.5600000000000005e-06, + "loss": 30.5826, + "step": 37800 + }, + { + "epoch": 0.076378592177507, + "grad_norm": 306.6064147949219, + "learning_rate": 7.562000000000001e-06, + "loss": 25.7277, + "step": 37810 + }, + { + "epoch": 0.07639879281019081, + "grad_norm": 189.5863494873047, + "learning_rate": 7.564e-06, + "loss": 26.8762, + "step": 37820 + }, + { + "epoch": 0.07641899344287463, + "grad_norm": 473.7322692871094, + "learning_rate": 7.566000000000001e-06, + "loss": 34.0479, + "step": 37830 + }, + { + "epoch": 0.07643919407555845, + "grad_norm": 84.52055358886719, + "learning_rate": 7.568000000000001e-06, + "loss": 49.771, + "step": 37840 + }, + { + "epoch": 0.07645939470824226, + "grad_norm": 611.2411499023438, + "learning_rate": 7.57e-06, + "loss": 20.2221, + "step": 37850 + }, + { + "epoch": 0.07647959534092608, + "grad_norm": 550.1533813476562, + "learning_rate": 7.5720000000000005e-06, + "loss": 22.9262, + "step": 37860 + }, + { + "epoch": 0.0764997959736099, + "grad_norm": 524.09521484375, + "learning_rate": 7.574e-06, + "loss": 19.1146, + "step": 37870 + }, + { + "epoch": 0.0765199966062937, + "grad_norm": 432.2309265136719, + "learning_rate": 7.576000000000001e-06, + "loss": 19.8361, + "step": 37880 + }, + { + "epoch": 0.07654019723897752, + "grad_norm": 1506.61328125, + "learning_rate": 7.578000000000001e-06, + "loss": 33.7366, + "step": 37890 + }, + { + "epoch": 0.07656039787166134, + "grad_norm": 379.86492919921875, + "learning_rate": 7.58e-06, + "loss": 27.6723, + "step": 37900 + }, + { + "epoch": 0.07658059850434516, + "grad_norm": 290.55078125, + "learning_rate": 7.582e-06, + "loss": 25.5951, + "step": 37910 + }, + { + "epoch": 0.07660079913702897, + "grad_norm": 501.2511291503906, + "learning_rate": 7.5840000000000006e-06, + "loss": 31.3163, + "step": 37920 + }, + { + "epoch": 0.07662099976971279, + "grad_norm": 277.7619934082031, + "learning_rate": 7.586000000000001e-06, + "loss": 25.5508, + "step": 37930 + }, + { + "epoch": 0.07664120040239661, + "grad_norm": 225.18263244628906, + "learning_rate": 7.588000000000001e-06, + "loss": 14.8932, + "step": 37940 + }, + { + "epoch": 0.07666140103508041, + "grad_norm": 688.5034790039062, + "learning_rate": 7.590000000000001e-06, + "loss": 30.3668, + "step": 37950 + }, + { + "epoch": 0.07668160166776423, + "grad_norm": 1253.2955322265625, + "learning_rate": 7.592e-06, + "loss": 25.6947, + "step": 37960 + }, + { + "epoch": 0.07670180230044805, + "grad_norm": 444.8907775878906, + "learning_rate": 7.5940000000000005e-06, + "loss": 31.8752, + "step": 37970 + }, + { + "epoch": 0.07672200293313186, + "grad_norm": 1363.9368896484375, + "learning_rate": 7.5960000000000015e-06, + "loss": 37.3974, + "step": 37980 + }, + { + "epoch": 0.07674220356581568, + "grad_norm": 808.9375, + "learning_rate": 7.598000000000001e-06, + "loss": 44.4144, + "step": 37990 + }, + { + "epoch": 0.0767624041984995, + "grad_norm": 914.0721435546875, + "learning_rate": 7.600000000000001e-06, + "loss": 44.3968, + "step": 38000 + }, + { + "epoch": 0.07678260483118331, + "grad_norm": 372.5037536621094, + "learning_rate": 7.602e-06, + "loss": 11.9006, + "step": 38010 + }, + { + "epoch": 0.07680280546386713, + "grad_norm": 615.7632446289062, + "learning_rate": 7.604e-06, + "loss": 23.4646, + "step": 38020 + }, + { + "epoch": 0.07682300609655095, + "grad_norm": 372.4645690917969, + "learning_rate": 7.606000000000001e-06, + "loss": 23.2694, + "step": 38030 + }, + { + "epoch": 0.07684320672923475, + "grad_norm": 420.320068359375, + "learning_rate": 7.608000000000001e-06, + "loss": 26.0293, + "step": 38040 + }, + { + "epoch": 0.07686340736191857, + "grad_norm": 64.26026153564453, + "learning_rate": 7.610000000000001e-06, + "loss": 14.9356, + "step": 38050 + }, + { + "epoch": 0.0768836079946024, + "grad_norm": 496.4963073730469, + "learning_rate": 7.612e-06, + "loss": 29.7388, + "step": 38060 + }, + { + "epoch": 0.07690380862728621, + "grad_norm": 507.73486328125, + "learning_rate": 7.614e-06, + "loss": 35.2588, + "step": 38070 + }, + { + "epoch": 0.07692400925997002, + "grad_norm": 749.43505859375, + "learning_rate": 7.616000000000001e-06, + "loss": 22.8871, + "step": 38080 + }, + { + "epoch": 0.07694420989265384, + "grad_norm": 670.1237182617188, + "learning_rate": 7.618000000000001e-06, + "loss": 36.7901, + "step": 38090 + }, + { + "epoch": 0.07696441052533766, + "grad_norm": 603.0145874023438, + "learning_rate": 7.620000000000001e-06, + "loss": 34.3796, + "step": 38100 + }, + { + "epoch": 0.07698461115802147, + "grad_norm": 187.35418701171875, + "learning_rate": 7.622000000000001e-06, + "loss": 51.1629, + "step": 38110 + }, + { + "epoch": 0.07700481179070529, + "grad_norm": 321.2225646972656, + "learning_rate": 7.624e-06, + "loss": 29.4901, + "step": 38120 + }, + { + "epoch": 0.0770250124233891, + "grad_norm": 518.65478515625, + "learning_rate": 7.626e-06, + "loss": 17.8433, + "step": 38130 + }, + { + "epoch": 0.07704521305607291, + "grad_norm": 843.0776977539062, + "learning_rate": 7.628000000000001e-06, + "loss": 29.3418, + "step": 38140 + }, + { + "epoch": 0.07706541368875673, + "grad_norm": 927.7374877929688, + "learning_rate": 7.630000000000001e-06, + "loss": 28.081, + "step": 38150 + }, + { + "epoch": 0.07708561432144055, + "grad_norm": 97.3183364868164, + "learning_rate": 7.632e-06, + "loss": 22.5968, + "step": 38160 + }, + { + "epoch": 0.07710581495412436, + "grad_norm": 285.63079833984375, + "learning_rate": 7.634e-06, + "loss": 30.5677, + "step": 38170 + }, + { + "epoch": 0.07712601558680818, + "grad_norm": 595.7236938476562, + "learning_rate": 7.636e-06, + "loss": 22.4367, + "step": 38180 + }, + { + "epoch": 0.077146216219492, + "grad_norm": 415.005859375, + "learning_rate": 7.638e-06, + "loss": 19.8356, + "step": 38190 + }, + { + "epoch": 0.0771664168521758, + "grad_norm": 306.705322265625, + "learning_rate": 7.640000000000001e-06, + "loss": 28.9574, + "step": 38200 + }, + { + "epoch": 0.07718661748485962, + "grad_norm": 787.25439453125, + "learning_rate": 7.642e-06, + "loss": 30.5896, + "step": 38210 + }, + { + "epoch": 0.07720681811754344, + "grad_norm": 534.6594848632812, + "learning_rate": 7.644e-06, + "loss": 18.7692, + "step": 38220 + }, + { + "epoch": 0.07722701875022726, + "grad_norm": 585.4483032226562, + "learning_rate": 7.646e-06, + "loss": 15.8626, + "step": 38230 + }, + { + "epoch": 0.07724721938291107, + "grad_norm": 363.90533447265625, + "learning_rate": 7.648e-06, + "loss": 25.8909, + "step": 38240 + }, + { + "epoch": 0.07726742001559489, + "grad_norm": 1323.8687744140625, + "learning_rate": 7.650000000000001e-06, + "loss": 19.5509, + "step": 38250 + }, + { + "epoch": 0.07728762064827871, + "grad_norm": 408.2150573730469, + "learning_rate": 7.652e-06, + "loss": 56.9264, + "step": 38260 + }, + { + "epoch": 0.07730782128096252, + "grad_norm": 560.2208862304688, + "learning_rate": 7.654e-06, + "loss": 18.0374, + "step": 38270 + }, + { + "epoch": 0.07732802191364634, + "grad_norm": 481.2164306640625, + "learning_rate": 7.656000000000001e-06, + "loss": 20.3647, + "step": 38280 + }, + { + "epoch": 0.07734822254633016, + "grad_norm": 482.8931579589844, + "learning_rate": 7.658e-06, + "loss": 13.6266, + "step": 38290 + }, + { + "epoch": 0.07736842317901396, + "grad_norm": 443.9976806640625, + "learning_rate": 7.660000000000001e-06, + "loss": 16.6408, + "step": 38300 + }, + { + "epoch": 0.07738862381169778, + "grad_norm": 210.58482360839844, + "learning_rate": 7.662e-06, + "loss": 13.5715, + "step": 38310 + }, + { + "epoch": 0.0774088244443816, + "grad_norm": 504.7493896484375, + "learning_rate": 7.664e-06, + "loss": 21.5304, + "step": 38320 + }, + { + "epoch": 0.07742902507706541, + "grad_norm": 289.60467529296875, + "learning_rate": 7.666e-06, + "loss": 15.3804, + "step": 38330 + }, + { + "epoch": 0.07744922570974923, + "grad_norm": 631.2286376953125, + "learning_rate": 7.668000000000002e-06, + "loss": 14.5907, + "step": 38340 + }, + { + "epoch": 0.07746942634243305, + "grad_norm": 502.9691162109375, + "learning_rate": 7.670000000000001e-06, + "loss": 31.2539, + "step": 38350 + }, + { + "epoch": 0.07748962697511685, + "grad_norm": 769.7857055664062, + "learning_rate": 7.672e-06, + "loss": 26.5301, + "step": 38360 + }, + { + "epoch": 0.07750982760780067, + "grad_norm": 1897.988525390625, + "learning_rate": 7.674e-06, + "loss": 44.4017, + "step": 38370 + }, + { + "epoch": 0.0775300282404845, + "grad_norm": 465.952880859375, + "learning_rate": 7.676e-06, + "loss": 37.5696, + "step": 38380 + }, + { + "epoch": 0.07755022887316831, + "grad_norm": 1166.8406982421875, + "learning_rate": 7.678000000000002e-06, + "loss": 19.16, + "step": 38390 + }, + { + "epoch": 0.07757042950585212, + "grad_norm": 434.853271484375, + "learning_rate": 7.680000000000001e-06, + "loss": 27.5619, + "step": 38400 + }, + { + "epoch": 0.07759063013853594, + "grad_norm": 784.1865844726562, + "learning_rate": 7.682e-06, + "loss": 38.5985, + "step": 38410 + }, + { + "epoch": 0.07761083077121976, + "grad_norm": 383.14056396484375, + "learning_rate": 7.684e-06, + "loss": 24.7993, + "step": 38420 + }, + { + "epoch": 0.07763103140390357, + "grad_norm": 652.2393798828125, + "learning_rate": 7.686e-06, + "loss": 38.6169, + "step": 38430 + }, + { + "epoch": 0.07765123203658739, + "grad_norm": 150.37486267089844, + "learning_rate": 7.688000000000002e-06, + "loss": 31.2974, + "step": 38440 + }, + { + "epoch": 0.0776714326692712, + "grad_norm": 388.88787841796875, + "learning_rate": 7.690000000000001e-06, + "loss": 27.2446, + "step": 38450 + }, + { + "epoch": 0.07769163330195501, + "grad_norm": 366.23748779296875, + "learning_rate": 7.692e-06, + "loss": 18.4109, + "step": 38460 + }, + { + "epoch": 0.07771183393463883, + "grad_norm": 170.95111083984375, + "learning_rate": 7.694e-06, + "loss": 39.7728, + "step": 38470 + }, + { + "epoch": 0.07773203456732265, + "grad_norm": 687.3958129882812, + "learning_rate": 7.696e-06, + "loss": 29.1284, + "step": 38480 + }, + { + "epoch": 0.07775223520000646, + "grad_norm": 922.0366821289062, + "learning_rate": 7.698000000000002e-06, + "loss": 27.472, + "step": 38490 + }, + { + "epoch": 0.07777243583269028, + "grad_norm": 536.3035888671875, + "learning_rate": 7.7e-06, + "loss": 20.7643, + "step": 38500 + }, + { + "epoch": 0.0777926364653741, + "grad_norm": 902.5062866210938, + "learning_rate": 7.702e-06, + "loss": 37.1712, + "step": 38510 + }, + { + "epoch": 0.0778128370980579, + "grad_norm": 434.46832275390625, + "learning_rate": 7.704000000000001e-06, + "loss": 34.8708, + "step": 38520 + }, + { + "epoch": 0.07783303773074172, + "grad_norm": 1345.6851806640625, + "learning_rate": 7.706e-06, + "loss": 26.2673, + "step": 38530 + }, + { + "epoch": 0.07785323836342554, + "grad_norm": 687.81591796875, + "learning_rate": 7.708000000000001e-06, + "loss": 30.508, + "step": 38540 + }, + { + "epoch": 0.07787343899610936, + "grad_norm": 121.17589569091797, + "learning_rate": 7.71e-06, + "loss": 28.7651, + "step": 38550 + }, + { + "epoch": 0.07789363962879317, + "grad_norm": 371.45782470703125, + "learning_rate": 7.712e-06, + "loss": 10.9951, + "step": 38560 + }, + { + "epoch": 0.07791384026147699, + "grad_norm": 213.26759338378906, + "learning_rate": 7.714000000000001e-06, + "loss": 16.4614, + "step": 38570 + }, + { + "epoch": 0.07793404089416081, + "grad_norm": 198.55184936523438, + "learning_rate": 7.716e-06, + "loss": 12.8972, + "step": 38580 + }, + { + "epoch": 0.07795424152684462, + "grad_norm": 536.1241455078125, + "learning_rate": 7.718000000000001e-06, + "loss": 35.0614, + "step": 38590 + }, + { + "epoch": 0.07797444215952844, + "grad_norm": 51.67824935913086, + "learning_rate": 7.72e-06, + "loss": 20.0434, + "step": 38600 + }, + { + "epoch": 0.07799464279221226, + "grad_norm": 495.4625549316406, + "learning_rate": 7.722e-06, + "loss": 20.3598, + "step": 38610 + }, + { + "epoch": 0.07801484342489606, + "grad_norm": 378.313232421875, + "learning_rate": 7.724000000000001e-06, + "loss": 22.1983, + "step": 38620 + }, + { + "epoch": 0.07803504405757988, + "grad_norm": 619.906005859375, + "learning_rate": 7.726e-06, + "loss": 33.8938, + "step": 38630 + }, + { + "epoch": 0.0780552446902637, + "grad_norm": 631.1979370117188, + "learning_rate": 7.728000000000001e-06, + "loss": 58.8985, + "step": 38640 + }, + { + "epoch": 0.07807544532294751, + "grad_norm": 437.4310607910156, + "learning_rate": 7.73e-06, + "loss": 30.0452, + "step": 38650 + }, + { + "epoch": 0.07809564595563133, + "grad_norm": 698.7709350585938, + "learning_rate": 7.732e-06, + "loss": 40.7639, + "step": 38660 + }, + { + "epoch": 0.07811584658831515, + "grad_norm": 877.6707153320312, + "learning_rate": 7.734e-06, + "loss": 32.1464, + "step": 38670 + }, + { + "epoch": 0.07813604722099896, + "grad_norm": 1028.7607421875, + "learning_rate": 7.736e-06, + "loss": 35.8577, + "step": 38680 + }, + { + "epoch": 0.07815624785368278, + "grad_norm": 375.9439392089844, + "learning_rate": 7.738000000000001e-06, + "loss": 22.3812, + "step": 38690 + }, + { + "epoch": 0.0781764484863666, + "grad_norm": 413.5697021484375, + "learning_rate": 7.74e-06, + "loss": 23.9866, + "step": 38700 + }, + { + "epoch": 0.07819664911905042, + "grad_norm": 441.6208801269531, + "learning_rate": 7.742000000000001e-06, + "loss": 31.8802, + "step": 38710 + }, + { + "epoch": 0.07821684975173422, + "grad_norm": 555.9667358398438, + "learning_rate": 7.744e-06, + "loss": 42.2381, + "step": 38720 + }, + { + "epoch": 0.07823705038441804, + "grad_norm": 448.4810485839844, + "learning_rate": 7.746e-06, + "loss": 25.8219, + "step": 38730 + }, + { + "epoch": 0.07825725101710186, + "grad_norm": 741.141357421875, + "learning_rate": 7.748000000000001e-06, + "loss": 22.2343, + "step": 38740 + }, + { + "epoch": 0.07827745164978567, + "grad_norm": 1138.6595458984375, + "learning_rate": 7.75e-06, + "loss": 34.1904, + "step": 38750 + }, + { + "epoch": 0.07829765228246949, + "grad_norm": 892.67138671875, + "learning_rate": 7.752000000000001e-06, + "loss": 40.0788, + "step": 38760 + }, + { + "epoch": 0.07831785291515331, + "grad_norm": 363.2459411621094, + "learning_rate": 7.754e-06, + "loss": 30.8245, + "step": 38770 + }, + { + "epoch": 0.07833805354783711, + "grad_norm": 301.23675537109375, + "learning_rate": 7.756e-06, + "loss": 40.9477, + "step": 38780 + }, + { + "epoch": 0.07835825418052093, + "grad_norm": 0.0, + "learning_rate": 7.758000000000001e-06, + "loss": 22.1821, + "step": 38790 + }, + { + "epoch": 0.07837845481320475, + "grad_norm": 2011.7015380859375, + "learning_rate": 7.76e-06, + "loss": 50.0491, + "step": 38800 + }, + { + "epoch": 0.07839865544588856, + "grad_norm": 141.40940856933594, + "learning_rate": 7.762000000000001e-06, + "loss": 27.8431, + "step": 38810 + }, + { + "epoch": 0.07841885607857238, + "grad_norm": 507.6337890625, + "learning_rate": 7.764e-06, + "loss": 37.4765, + "step": 38820 + }, + { + "epoch": 0.0784390567112562, + "grad_norm": 957.7791137695312, + "learning_rate": 7.766e-06, + "loss": 44.3315, + "step": 38830 + }, + { + "epoch": 0.07845925734394, + "grad_norm": 348.3440856933594, + "learning_rate": 7.768e-06, + "loss": 19.1895, + "step": 38840 + }, + { + "epoch": 0.07847945797662383, + "grad_norm": 295.80853271484375, + "learning_rate": 7.77e-06, + "loss": 19.3399, + "step": 38850 + }, + { + "epoch": 0.07849965860930765, + "grad_norm": 333.095947265625, + "learning_rate": 7.772000000000001e-06, + "loss": 40.4358, + "step": 38860 + }, + { + "epoch": 0.07851985924199147, + "grad_norm": 905.6741943359375, + "learning_rate": 7.774e-06, + "loss": 31.8227, + "step": 38870 + }, + { + "epoch": 0.07854005987467527, + "grad_norm": 240.92953491210938, + "learning_rate": 7.776e-06, + "loss": 15.9759, + "step": 38880 + }, + { + "epoch": 0.07856026050735909, + "grad_norm": 402.4757080078125, + "learning_rate": 7.778e-06, + "loss": 29.6348, + "step": 38890 + }, + { + "epoch": 0.07858046114004291, + "grad_norm": 0.0, + "learning_rate": 7.78e-06, + "loss": 30.8198, + "step": 38900 + }, + { + "epoch": 0.07860066177272672, + "grad_norm": 806.3101196289062, + "learning_rate": 7.782000000000001e-06, + "loss": 20.9412, + "step": 38910 + }, + { + "epoch": 0.07862086240541054, + "grad_norm": 77.98436737060547, + "learning_rate": 7.784e-06, + "loss": 18.515, + "step": 38920 + }, + { + "epoch": 0.07864106303809436, + "grad_norm": 394.6458435058594, + "learning_rate": 7.786e-06, + "loss": 30.4633, + "step": 38930 + }, + { + "epoch": 0.07866126367077816, + "grad_norm": 214.89698791503906, + "learning_rate": 7.788e-06, + "loss": 29.3289, + "step": 38940 + }, + { + "epoch": 0.07868146430346198, + "grad_norm": 907.835693359375, + "learning_rate": 7.790000000000002e-06, + "loss": 21.0371, + "step": 38950 + }, + { + "epoch": 0.0787016649361458, + "grad_norm": 1484.2606201171875, + "learning_rate": 7.792000000000001e-06, + "loss": 14.1647, + "step": 38960 + }, + { + "epoch": 0.07872186556882961, + "grad_norm": 491.3181457519531, + "learning_rate": 7.794e-06, + "loss": 27.0969, + "step": 38970 + }, + { + "epoch": 0.07874206620151343, + "grad_norm": 291.0534973144531, + "learning_rate": 7.796e-06, + "loss": 47.8539, + "step": 38980 + }, + { + "epoch": 0.07876226683419725, + "grad_norm": 1481.9825439453125, + "learning_rate": 7.798e-06, + "loss": 29.2362, + "step": 38990 + }, + { + "epoch": 0.07878246746688106, + "grad_norm": 337.2052917480469, + "learning_rate": 7.800000000000002e-06, + "loss": 24.4363, + "step": 39000 + }, + { + "epoch": 0.07880266809956488, + "grad_norm": 1043.8558349609375, + "learning_rate": 7.802000000000001e-06, + "loss": 28.7056, + "step": 39010 + }, + { + "epoch": 0.0788228687322487, + "grad_norm": 354.3676452636719, + "learning_rate": 7.804e-06, + "loss": 13.9814, + "step": 39020 + }, + { + "epoch": 0.07884306936493252, + "grad_norm": 678.2699584960938, + "learning_rate": 7.806e-06, + "loss": 27.087, + "step": 39030 + }, + { + "epoch": 0.07886326999761632, + "grad_norm": 736.5831298828125, + "learning_rate": 7.808e-06, + "loss": 23.6024, + "step": 39040 + }, + { + "epoch": 0.07888347063030014, + "grad_norm": 760.6795654296875, + "learning_rate": 7.810000000000001e-06, + "loss": 22.7807, + "step": 39050 + }, + { + "epoch": 0.07890367126298396, + "grad_norm": 691.625244140625, + "learning_rate": 7.812e-06, + "loss": 34.0978, + "step": 39060 + }, + { + "epoch": 0.07892387189566777, + "grad_norm": 589.5587768554688, + "learning_rate": 7.814e-06, + "loss": 16.2141, + "step": 39070 + }, + { + "epoch": 0.07894407252835159, + "grad_norm": 801.611328125, + "learning_rate": 7.816000000000001e-06, + "loss": 34.3691, + "step": 39080 + }, + { + "epoch": 0.07896427316103541, + "grad_norm": 787.2601318359375, + "learning_rate": 7.818e-06, + "loss": 24.3882, + "step": 39090 + }, + { + "epoch": 0.07898447379371921, + "grad_norm": 602.1470336914062, + "learning_rate": 7.820000000000001e-06, + "loss": 26.3296, + "step": 39100 + }, + { + "epoch": 0.07900467442640303, + "grad_norm": 12.870512008666992, + "learning_rate": 7.822e-06, + "loss": 33.7834, + "step": 39110 + }, + { + "epoch": 0.07902487505908685, + "grad_norm": 243.6926727294922, + "learning_rate": 7.824e-06, + "loss": 24.5471, + "step": 39120 + }, + { + "epoch": 0.07904507569177066, + "grad_norm": 546.6156616210938, + "learning_rate": 7.826000000000001e-06, + "loss": 35.1595, + "step": 39130 + }, + { + "epoch": 0.07906527632445448, + "grad_norm": 60.834861755371094, + "learning_rate": 7.828000000000002e-06, + "loss": 16.6797, + "step": 39140 + }, + { + "epoch": 0.0790854769571383, + "grad_norm": 789.2298583984375, + "learning_rate": 7.830000000000001e-06, + "loss": 22.5289, + "step": 39150 + }, + { + "epoch": 0.07910567758982211, + "grad_norm": 857.40380859375, + "learning_rate": 7.832e-06, + "loss": 17.3143, + "step": 39160 + }, + { + "epoch": 0.07912587822250593, + "grad_norm": 490.2104797363281, + "learning_rate": 7.834e-06, + "loss": 16.5558, + "step": 39170 + }, + { + "epoch": 0.07914607885518975, + "grad_norm": 827.0983276367188, + "learning_rate": 7.836000000000001e-06, + "loss": 14.9396, + "step": 39180 + }, + { + "epoch": 0.07916627948787357, + "grad_norm": 579.365966796875, + "learning_rate": 7.838000000000002e-06, + "loss": 27.6349, + "step": 39190 + }, + { + "epoch": 0.07918648012055737, + "grad_norm": 382.62188720703125, + "learning_rate": 7.840000000000001e-06, + "loss": 47.7565, + "step": 39200 + }, + { + "epoch": 0.0792066807532412, + "grad_norm": 320.55560302734375, + "learning_rate": 7.842e-06, + "loss": 19.2766, + "step": 39210 + }, + { + "epoch": 0.07922688138592501, + "grad_norm": 399.2356262207031, + "learning_rate": 7.844e-06, + "loss": 32.178, + "step": 39220 + }, + { + "epoch": 0.07924708201860882, + "grad_norm": 368.3410949707031, + "learning_rate": 7.846e-06, + "loss": 17.7341, + "step": 39230 + }, + { + "epoch": 0.07926728265129264, + "grad_norm": 279.0954284667969, + "learning_rate": 7.848000000000002e-06, + "loss": 22.8076, + "step": 39240 + }, + { + "epoch": 0.07928748328397646, + "grad_norm": 974.9163208007812, + "learning_rate": 7.850000000000001e-06, + "loss": 33.6329, + "step": 39250 + }, + { + "epoch": 0.07930768391666027, + "grad_norm": 469.6398010253906, + "learning_rate": 7.852e-06, + "loss": 23.4467, + "step": 39260 + }, + { + "epoch": 0.07932788454934409, + "grad_norm": 507.614990234375, + "learning_rate": 7.854e-06, + "loss": 26.2057, + "step": 39270 + }, + { + "epoch": 0.0793480851820279, + "grad_norm": 944.2115478515625, + "learning_rate": 7.856e-06, + "loss": 30.0214, + "step": 39280 + }, + { + "epoch": 0.07936828581471171, + "grad_norm": 562.4888305664062, + "learning_rate": 7.858000000000002e-06, + "loss": 24.1479, + "step": 39290 + }, + { + "epoch": 0.07938848644739553, + "grad_norm": 451.3738098144531, + "learning_rate": 7.860000000000001e-06, + "loss": 19.7802, + "step": 39300 + }, + { + "epoch": 0.07940868708007935, + "grad_norm": 360.2376403808594, + "learning_rate": 7.862e-06, + "loss": 24.9804, + "step": 39310 + }, + { + "epoch": 0.07942888771276316, + "grad_norm": 306.64459228515625, + "learning_rate": 7.864000000000001e-06, + "loss": 20.8103, + "step": 39320 + }, + { + "epoch": 0.07944908834544698, + "grad_norm": 688.0297241210938, + "learning_rate": 7.866e-06, + "loss": 26.7215, + "step": 39330 + }, + { + "epoch": 0.0794692889781308, + "grad_norm": 685.5408325195312, + "learning_rate": 7.868000000000002e-06, + "loss": 33.9052, + "step": 39340 + }, + { + "epoch": 0.07948948961081462, + "grad_norm": 216.55715942382812, + "learning_rate": 7.870000000000001e-06, + "loss": 18.1017, + "step": 39350 + }, + { + "epoch": 0.07950969024349842, + "grad_norm": 402.8645935058594, + "learning_rate": 7.872e-06, + "loss": 24.513, + "step": 39360 + }, + { + "epoch": 0.07952989087618224, + "grad_norm": 946.2855224609375, + "learning_rate": 7.874000000000001e-06, + "loss": 45.8529, + "step": 39370 + }, + { + "epoch": 0.07955009150886606, + "grad_norm": 129.7798614501953, + "learning_rate": 7.876e-06, + "loss": 22.3973, + "step": 39380 + }, + { + "epoch": 0.07957029214154987, + "grad_norm": 50.45730972290039, + "learning_rate": 7.878e-06, + "loss": 25.0302, + "step": 39390 + }, + { + "epoch": 0.07959049277423369, + "grad_norm": 310.8236999511719, + "learning_rate": 7.88e-06, + "loss": 12.803, + "step": 39400 + }, + { + "epoch": 0.07961069340691751, + "grad_norm": 492.551513671875, + "learning_rate": 7.882e-06, + "loss": 21.0392, + "step": 39410 + }, + { + "epoch": 0.07963089403960132, + "grad_norm": 242.3144073486328, + "learning_rate": 7.884000000000001e-06, + "loss": 30.2967, + "step": 39420 + }, + { + "epoch": 0.07965109467228514, + "grad_norm": 1357.2841796875, + "learning_rate": 7.886e-06, + "loss": 27.1293, + "step": 39430 + }, + { + "epoch": 0.07967129530496896, + "grad_norm": 923.7060546875, + "learning_rate": 7.888e-06, + "loss": 18.4986, + "step": 39440 + }, + { + "epoch": 0.07969149593765276, + "grad_norm": 380.8133544921875, + "learning_rate": 7.89e-06, + "loss": 29.5138, + "step": 39450 + }, + { + "epoch": 0.07971169657033658, + "grad_norm": 702.3418579101562, + "learning_rate": 7.892e-06, + "loss": 36.8634, + "step": 39460 + }, + { + "epoch": 0.0797318972030204, + "grad_norm": 795.5443725585938, + "learning_rate": 7.894000000000001e-06, + "loss": 34.3167, + "step": 39470 + }, + { + "epoch": 0.07975209783570421, + "grad_norm": 436.4936218261719, + "learning_rate": 7.896e-06, + "loss": 18.4773, + "step": 39480 + }, + { + "epoch": 0.07977229846838803, + "grad_norm": 289.3657531738281, + "learning_rate": 7.898e-06, + "loss": 18.9045, + "step": 39490 + }, + { + "epoch": 0.07979249910107185, + "grad_norm": 681.8690185546875, + "learning_rate": 7.9e-06, + "loss": 21.1877, + "step": 39500 + }, + { + "epoch": 0.07981269973375565, + "grad_norm": 562.03125, + "learning_rate": 7.902000000000002e-06, + "loss": 22.3586, + "step": 39510 + }, + { + "epoch": 0.07983290036643947, + "grad_norm": 277.8089599609375, + "learning_rate": 7.904000000000001e-06, + "loss": 30.3118, + "step": 39520 + }, + { + "epoch": 0.0798531009991233, + "grad_norm": 378.3243713378906, + "learning_rate": 7.906e-06, + "loss": 35.7515, + "step": 39530 + }, + { + "epoch": 0.07987330163180711, + "grad_norm": 674.659912109375, + "learning_rate": 7.908e-06, + "loss": 24.888, + "step": 39540 + }, + { + "epoch": 0.07989350226449092, + "grad_norm": 233.80078125, + "learning_rate": 7.91e-06, + "loss": 21.2106, + "step": 39550 + }, + { + "epoch": 0.07991370289717474, + "grad_norm": 414.6405944824219, + "learning_rate": 7.912000000000001e-06, + "loss": 17.9159, + "step": 39560 + }, + { + "epoch": 0.07993390352985856, + "grad_norm": 317.096435546875, + "learning_rate": 7.914e-06, + "loss": 19.6499, + "step": 39570 + }, + { + "epoch": 0.07995410416254237, + "grad_norm": 554.5511474609375, + "learning_rate": 7.916e-06, + "loss": 19.308, + "step": 39580 + }, + { + "epoch": 0.07997430479522619, + "grad_norm": 446.277587890625, + "learning_rate": 7.918e-06, + "loss": 17.1525, + "step": 39590 + }, + { + "epoch": 0.07999450542791, + "grad_norm": 869.7025756835938, + "learning_rate": 7.92e-06, + "loss": 29.8401, + "step": 39600 + }, + { + "epoch": 0.08001470606059381, + "grad_norm": 195.5814666748047, + "learning_rate": 7.922000000000001e-06, + "loss": 19.0599, + "step": 39610 + }, + { + "epoch": 0.08003490669327763, + "grad_norm": 928.6428833007812, + "learning_rate": 7.924e-06, + "loss": 43.4772, + "step": 39620 + }, + { + "epoch": 0.08005510732596145, + "grad_norm": 448.8506164550781, + "learning_rate": 7.926e-06, + "loss": 27.3181, + "step": 39630 + }, + { + "epoch": 0.08007530795864526, + "grad_norm": 295.6615295410156, + "learning_rate": 7.928e-06, + "loss": 31.1969, + "step": 39640 + }, + { + "epoch": 0.08009550859132908, + "grad_norm": 844.4463500976562, + "learning_rate": 7.93e-06, + "loss": 31.1826, + "step": 39650 + }, + { + "epoch": 0.0801157092240129, + "grad_norm": 1275.2359619140625, + "learning_rate": 7.932000000000001e-06, + "loss": 60.3057, + "step": 39660 + }, + { + "epoch": 0.0801359098566967, + "grad_norm": 669.5518188476562, + "learning_rate": 7.934e-06, + "loss": 32.1826, + "step": 39670 + }, + { + "epoch": 0.08015611048938052, + "grad_norm": 370.1067810058594, + "learning_rate": 7.936e-06, + "loss": 48.7568, + "step": 39680 + }, + { + "epoch": 0.08017631112206434, + "grad_norm": 85.74179077148438, + "learning_rate": 7.938000000000001e-06, + "loss": 32.242, + "step": 39690 + }, + { + "epoch": 0.08019651175474816, + "grad_norm": 449.3262023925781, + "learning_rate": 7.94e-06, + "loss": 35.4012, + "step": 39700 + }, + { + "epoch": 0.08021671238743197, + "grad_norm": 253.4001922607422, + "learning_rate": 7.942000000000001e-06, + "loss": 9.3638, + "step": 39710 + }, + { + "epoch": 0.08023691302011579, + "grad_norm": 427.928955078125, + "learning_rate": 7.944e-06, + "loss": 20.7755, + "step": 39720 + }, + { + "epoch": 0.08025711365279961, + "grad_norm": 963.2529296875, + "learning_rate": 7.946e-06, + "loss": 20.6616, + "step": 39730 + }, + { + "epoch": 0.08027731428548342, + "grad_norm": 557.8382568359375, + "learning_rate": 7.948e-06, + "loss": 19.3359, + "step": 39740 + }, + { + "epoch": 0.08029751491816724, + "grad_norm": 347.09466552734375, + "learning_rate": 7.950000000000002e-06, + "loss": 36.1432, + "step": 39750 + }, + { + "epoch": 0.08031771555085106, + "grad_norm": 204.5667724609375, + "learning_rate": 7.952000000000001e-06, + "loss": 28.8233, + "step": 39760 + }, + { + "epoch": 0.08033791618353486, + "grad_norm": 401.3494873046875, + "learning_rate": 7.954e-06, + "loss": 28.2676, + "step": 39770 + }, + { + "epoch": 0.08035811681621868, + "grad_norm": 305.70440673828125, + "learning_rate": 7.956e-06, + "loss": 37.6146, + "step": 39780 + }, + { + "epoch": 0.0803783174489025, + "grad_norm": 255.9058837890625, + "learning_rate": 7.958e-06, + "loss": 41.6842, + "step": 39790 + }, + { + "epoch": 0.08039851808158631, + "grad_norm": 766.6397705078125, + "learning_rate": 7.960000000000002e-06, + "loss": 13.5986, + "step": 39800 + }, + { + "epoch": 0.08041871871427013, + "grad_norm": 427.0234069824219, + "learning_rate": 7.962000000000001e-06, + "loss": 40.9575, + "step": 39810 + }, + { + "epoch": 0.08043891934695395, + "grad_norm": 841.5509033203125, + "learning_rate": 7.964e-06, + "loss": 58.3562, + "step": 39820 + }, + { + "epoch": 0.08045911997963776, + "grad_norm": 283.16986083984375, + "learning_rate": 7.966e-06, + "loss": 18.2624, + "step": 39830 + }, + { + "epoch": 0.08047932061232158, + "grad_norm": 504.2042236328125, + "learning_rate": 7.968e-06, + "loss": 22.2934, + "step": 39840 + }, + { + "epoch": 0.0804995212450054, + "grad_norm": 275.0429992675781, + "learning_rate": 7.970000000000002e-06, + "loss": 21.0991, + "step": 39850 + }, + { + "epoch": 0.08051972187768922, + "grad_norm": 710.8739624023438, + "learning_rate": 7.972000000000001e-06, + "loss": 18.4247, + "step": 39860 + }, + { + "epoch": 0.08053992251037302, + "grad_norm": 728.702880859375, + "learning_rate": 7.974e-06, + "loss": 22.7606, + "step": 39870 + }, + { + "epoch": 0.08056012314305684, + "grad_norm": 521.3509521484375, + "learning_rate": 7.976000000000001e-06, + "loss": 21.6127, + "step": 39880 + }, + { + "epoch": 0.08058032377574066, + "grad_norm": 398.2906188964844, + "learning_rate": 7.978e-06, + "loss": 22.7838, + "step": 39890 + }, + { + "epoch": 0.08060052440842447, + "grad_norm": 405.0233154296875, + "learning_rate": 7.980000000000002e-06, + "loss": 20.4245, + "step": 39900 + }, + { + "epoch": 0.08062072504110829, + "grad_norm": 275.5707702636719, + "learning_rate": 7.982e-06, + "loss": 49.7182, + "step": 39910 + }, + { + "epoch": 0.08064092567379211, + "grad_norm": 360.2571716308594, + "learning_rate": 7.984e-06, + "loss": 16.3727, + "step": 39920 + }, + { + "epoch": 0.08066112630647591, + "grad_norm": 792.240478515625, + "learning_rate": 7.986000000000001e-06, + "loss": 40.9525, + "step": 39930 + }, + { + "epoch": 0.08068132693915973, + "grad_norm": 495.4645690917969, + "learning_rate": 7.988e-06, + "loss": 41.8557, + "step": 39940 + }, + { + "epoch": 0.08070152757184355, + "grad_norm": 78.95877838134766, + "learning_rate": 7.990000000000001e-06, + "loss": 31.4824, + "step": 39950 + }, + { + "epoch": 0.08072172820452736, + "grad_norm": 834.5568237304688, + "learning_rate": 7.992e-06, + "loss": 31.6639, + "step": 39960 + }, + { + "epoch": 0.08074192883721118, + "grad_norm": 1125.6707763671875, + "learning_rate": 7.994e-06, + "loss": 22.9925, + "step": 39970 + }, + { + "epoch": 0.080762129469895, + "grad_norm": 341.31854248046875, + "learning_rate": 7.996000000000001e-06, + "loss": 20.0642, + "step": 39980 + }, + { + "epoch": 0.0807823301025788, + "grad_norm": 537.1127319335938, + "learning_rate": 7.998e-06, + "loss": 39.629, + "step": 39990 + }, + { + "epoch": 0.08080253073526263, + "grad_norm": 627.4376831054688, + "learning_rate": 8.000000000000001e-06, + "loss": 10.9614, + "step": 40000 + }, + { + "epoch": 0.08082273136794645, + "grad_norm": 1759.805908203125, + "learning_rate": 8.002e-06, + "loss": 34.1295, + "step": 40010 + }, + { + "epoch": 0.08084293200063027, + "grad_norm": 330.1541748046875, + "learning_rate": 8.004e-06, + "loss": 38.8135, + "step": 40020 + }, + { + "epoch": 0.08086313263331407, + "grad_norm": 339.01983642578125, + "learning_rate": 8.006000000000001e-06, + "loss": 19.9088, + "step": 40030 + }, + { + "epoch": 0.08088333326599789, + "grad_norm": 237.59225463867188, + "learning_rate": 8.008e-06, + "loss": 28.8385, + "step": 40040 + }, + { + "epoch": 0.08090353389868171, + "grad_norm": 666.0283813476562, + "learning_rate": 8.010000000000001e-06, + "loss": 27.669, + "step": 40050 + }, + { + "epoch": 0.08092373453136552, + "grad_norm": 650.1888427734375, + "learning_rate": 8.012e-06, + "loss": 34.3803, + "step": 40060 + }, + { + "epoch": 0.08094393516404934, + "grad_norm": 311.5391845703125, + "learning_rate": 8.014e-06, + "loss": 41.4724, + "step": 40070 + }, + { + "epoch": 0.08096413579673316, + "grad_norm": 422.935791015625, + "learning_rate": 8.016e-06, + "loss": 24.8886, + "step": 40080 + }, + { + "epoch": 0.08098433642941696, + "grad_norm": 1059.035400390625, + "learning_rate": 8.018e-06, + "loss": 43.1621, + "step": 40090 + }, + { + "epoch": 0.08100453706210078, + "grad_norm": 480.57464599609375, + "learning_rate": 8.020000000000001e-06, + "loss": 14.7568, + "step": 40100 + }, + { + "epoch": 0.0810247376947846, + "grad_norm": 879.3117065429688, + "learning_rate": 8.022e-06, + "loss": 30.1447, + "step": 40110 + }, + { + "epoch": 0.08104493832746841, + "grad_norm": 210.75437927246094, + "learning_rate": 8.024000000000001e-06, + "loss": 25.4639, + "step": 40120 + }, + { + "epoch": 0.08106513896015223, + "grad_norm": 235.2727813720703, + "learning_rate": 8.026e-06, + "loss": 31.0911, + "step": 40130 + }, + { + "epoch": 0.08108533959283605, + "grad_norm": 977.4114379882812, + "learning_rate": 8.028e-06, + "loss": 55.1275, + "step": 40140 + }, + { + "epoch": 0.08110554022551986, + "grad_norm": 594.6533203125, + "learning_rate": 8.030000000000001e-06, + "loss": 24.6549, + "step": 40150 + }, + { + "epoch": 0.08112574085820368, + "grad_norm": 637.489013671875, + "learning_rate": 8.032e-06, + "loss": 44.8955, + "step": 40160 + }, + { + "epoch": 0.0811459414908875, + "grad_norm": 305.0829162597656, + "learning_rate": 8.034000000000001e-06, + "loss": 27.1901, + "step": 40170 + }, + { + "epoch": 0.08116614212357132, + "grad_norm": 196.19229125976562, + "learning_rate": 8.036e-06, + "loss": 25.8945, + "step": 40180 + }, + { + "epoch": 0.08118634275625512, + "grad_norm": 977.69580078125, + "learning_rate": 8.038e-06, + "loss": 27.8558, + "step": 40190 + }, + { + "epoch": 0.08120654338893894, + "grad_norm": 780.17138671875, + "learning_rate": 8.040000000000001e-06, + "loss": 23.9422, + "step": 40200 + }, + { + "epoch": 0.08122674402162276, + "grad_norm": 611.3806762695312, + "learning_rate": 8.042e-06, + "loss": 28.3511, + "step": 40210 + }, + { + "epoch": 0.08124694465430657, + "grad_norm": 622.1024780273438, + "learning_rate": 8.044000000000001e-06, + "loss": 21.6604, + "step": 40220 + }, + { + "epoch": 0.08126714528699039, + "grad_norm": 918.8472900390625, + "learning_rate": 8.046e-06, + "loss": 26.455, + "step": 40230 + }, + { + "epoch": 0.08128734591967421, + "grad_norm": 341.0751037597656, + "learning_rate": 8.048e-06, + "loss": 28.0223, + "step": 40240 + }, + { + "epoch": 0.08130754655235801, + "grad_norm": 465.3175964355469, + "learning_rate": 8.050000000000001e-06, + "loss": 29.9542, + "step": 40250 + }, + { + "epoch": 0.08132774718504183, + "grad_norm": 136.3564453125, + "learning_rate": 8.052e-06, + "loss": 32.5239, + "step": 40260 + }, + { + "epoch": 0.08134794781772565, + "grad_norm": 725.799072265625, + "learning_rate": 8.054000000000001e-06, + "loss": 43.8185, + "step": 40270 + }, + { + "epoch": 0.08136814845040946, + "grad_norm": 1356.5460205078125, + "learning_rate": 8.056e-06, + "loss": 24.7778, + "step": 40280 + }, + { + "epoch": 0.08138834908309328, + "grad_norm": 517.8104248046875, + "learning_rate": 8.058e-06, + "loss": 35.1107, + "step": 40290 + }, + { + "epoch": 0.0814085497157771, + "grad_norm": 297.4921875, + "learning_rate": 8.06e-06, + "loss": 25.8174, + "step": 40300 + }, + { + "epoch": 0.08142875034846091, + "grad_norm": 340.6484375, + "learning_rate": 8.062000000000002e-06, + "loss": 25.051, + "step": 40310 + }, + { + "epoch": 0.08144895098114473, + "grad_norm": 443.00933837890625, + "learning_rate": 8.064000000000001e-06, + "loss": 20.8189, + "step": 40320 + }, + { + "epoch": 0.08146915161382855, + "grad_norm": 438.75347900390625, + "learning_rate": 8.066e-06, + "loss": 22.4965, + "step": 40330 + }, + { + "epoch": 0.08148935224651237, + "grad_norm": 481.3415222167969, + "learning_rate": 8.068e-06, + "loss": 23.0569, + "step": 40340 + }, + { + "epoch": 0.08150955287919617, + "grad_norm": 1392.9268798828125, + "learning_rate": 8.07e-06, + "loss": 41.2481, + "step": 40350 + }, + { + "epoch": 0.08152975351188, + "grad_norm": 249.26956176757812, + "learning_rate": 8.072000000000002e-06, + "loss": 20.832, + "step": 40360 + }, + { + "epoch": 0.08154995414456381, + "grad_norm": 21843.982421875, + "learning_rate": 8.074000000000001e-06, + "loss": 163.6426, + "step": 40370 + }, + { + "epoch": 0.08157015477724762, + "grad_norm": 975.552490234375, + "learning_rate": 8.076e-06, + "loss": 36.2325, + "step": 40380 + }, + { + "epoch": 0.08159035540993144, + "grad_norm": 376.71392822265625, + "learning_rate": 8.078e-06, + "loss": 27.1425, + "step": 40390 + }, + { + "epoch": 0.08161055604261526, + "grad_norm": 438.13592529296875, + "learning_rate": 8.08e-06, + "loss": 26.5299, + "step": 40400 + }, + { + "epoch": 0.08163075667529907, + "grad_norm": 700.2650756835938, + "learning_rate": 8.082000000000002e-06, + "loss": 19.8693, + "step": 40410 + }, + { + "epoch": 0.08165095730798289, + "grad_norm": 253.17955017089844, + "learning_rate": 8.084000000000001e-06, + "loss": 27.1577, + "step": 40420 + }, + { + "epoch": 0.0816711579406667, + "grad_norm": 170.56356811523438, + "learning_rate": 8.086e-06, + "loss": 17.2469, + "step": 40430 + }, + { + "epoch": 0.08169135857335051, + "grad_norm": 399.56024169921875, + "learning_rate": 8.088e-06, + "loss": 44.8697, + "step": 40440 + }, + { + "epoch": 0.08171155920603433, + "grad_norm": 262.854736328125, + "learning_rate": 8.09e-06, + "loss": 41.7183, + "step": 40450 + }, + { + "epoch": 0.08173175983871815, + "grad_norm": 2068.342041015625, + "learning_rate": 8.092000000000001e-06, + "loss": 30.9143, + "step": 40460 + }, + { + "epoch": 0.08175196047140196, + "grad_norm": 0.0, + "learning_rate": 8.094e-06, + "loss": 24.21, + "step": 40470 + }, + { + "epoch": 0.08177216110408578, + "grad_norm": 625.5025024414062, + "learning_rate": 8.096e-06, + "loss": 44.1145, + "step": 40480 + }, + { + "epoch": 0.0817923617367696, + "grad_norm": 492.1724853515625, + "learning_rate": 8.098000000000001e-06, + "loss": 18.3544, + "step": 40490 + }, + { + "epoch": 0.08181256236945342, + "grad_norm": 634.8037109375, + "learning_rate": 8.1e-06, + "loss": 24.2937, + "step": 40500 + }, + { + "epoch": 0.08183276300213722, + "grad_norm": 666.6829833984375, + "learning_rate": 8.102000000000001e-06, + "loss": 26.6929, + "step": 40510 + }, + { + "epoch": 0.08185296363482104, + "grad_norm": 754.0609130859375, + "learning_rate": 8.104e-06, + "loss": 28.598, + "step": 40520 + }, + { + "epoch": 0.08187316426750486, + "grad_norm": 714.6564331054688, + "learning_rate": 8.106e-06, + "loss": 29.988, + "step": 40530 + }, + { + "epoch": 0.08189336490018867, + "grad_norm": 269.427001953125, + "learning_rate": 8.108000000000001e-06, + "loss": 21.3572, + "step": 40540 + }, + { + "epoch": 0.08191356553287249, + "grad_norm": 756.2377319335938, + "learning_rate": 8.110000000000002e-06, + "loss": 22.3681, + "step": 40550 + }, + { + "epoch": 0.08193376616555631, + "grad_norm": 526.88427734375, + "learning_rate": 8.112000000000001e-06, + "loss": 44.0786, + "step": 40560 + }, + { + "epoch": 0.08195396679824012, + "grad_norm": 170.62673950195312, + "learning_rate": 8.114e-06, + "loss": 30.5698, + "step": 40570 + }, + { + "epoch": 0.08197416743092394, + "grad_norm": 158.55050659179688, + "learning_rate": 8.116e-06, + "loss": 11.1812, + "step": 40580 + }, + { + "epoch": 0.08199436806360776, + "grad_norm": 1319.344970703125, + "learning_rate": 8.118000000000001e-06, + "loss": 23.4195, + "step": 40590 + }, + { + "epoch": 0.08201456869629156, + "grad_norm": 318.0577392578125, + "learning_rate": 8.120000000000002e-06, + "loss": 15.1838, + "step": 40600 + }, + { + "epoch": 0.08203476932897538, + "grad_norm": 1007.6936645507812, + "learning_rate": 8.122000000000001e-06, + "loss": 42.0716, + "step": 40610 + }, + { + "epoch": 0.0820549699616592, + "grad_norm": 743.8515014648438, + "learning_rate": 8.124e-06, + "loss": 32.5392, + "step": 40620 + }, + { + "epoch": 0.08207517059434301, + "grad_norm": 240.04466247558594, + "learning_rate": 8.126e-06, + "loss": 11.0315, + "step": 40630 + }, + { + "epoch": 0.08209537122702683, + "grad_norm": 562.1881103515625, + "learning_rate": 8.128e-06, + "loss": 33.486, + "step": 40640 + }, + { + "epoch": 0.08211557185971065, + "grad_norm": 497.35400390625, + "learning_rate": 8.13e-06, + "loss": 31.8643, + "step": 40650 + }, + { + "epoch": 0.08213577249239447, + "grad_norm": 762.3814086914062, + "learning_rate": 8.132000000000001e-06, + "loss": 21.6367, + "step": 40660 + }, + { + "epoch": 0.08215597312507827, + "grad_norm": 468.392578125, + "learning_rate": 8.134e-06, + "loss": 23.6273, + "step": 40670 + }, + { + "epoch": 0.0821761737577621, + "grad_norm": 641.936279296875, + "learning_rate": 8.136000000000001e-06, + "loss": 23.3873, + "step": 40680 + }, + { + "epoch": 0.08219637439044591, + "grad_norm": 321.6557922363281, + "learning_rate": 8.138e-06, + "loss": 24.4871, + "step": 40690 + }, + { + "epoch": 0.08221657502312972, + "grad_norm": 459.4052734375, + "learning_rate": 8.14e-06, + "loss": 25.6145, + "step": 40700 + }, + { + "epoch": 0.08223677565581354, + "grad_norm": 196.30247497558594, + "learning_rate": 8.142000000000001e-06, + "loss": 27.1357, + "step": 40710 + }, + { + "epoch": 0.08225697628849736, + "grad_norm": 385.11175537109375, + "learning_rate": 8.144e-06, + "loss": 24.2411, + "step": 40720 + }, + { + "epoch": 0.08227717692118117, + "grad_norm": 504.1175537109375, + "learning_rate": 8.146000000000001e-06, + "loss": 43.324, + "step": 40730 + }, + { + "epoch": 0.08229737755386499, + "grad_norm": 414.9750061035156, + "learning_rate": 8.148e-06, + "loss": 32.9271, + "step": 40740 + }, + { + "epoch": 0.0823175781865488, + "grad_norm": 504.9588623046875, + "learning_rate": 8.15e-06, + "loss": 13.1979, + "step": 40750 + }, + { + "epoch": 0.08233777881923261, + "grad_norm": 1631.643310546875, + "learning_rate": 8.152000000000001e-06, + "loss": 35.6901, + "step": 40760 + }, + { + "epoch": 0.08235797945191643, + "grad_norm": 403.29083251953125, + "learning_rate": 8.154e-06, + "loss": 33.276, + "step": 40770 + }, + { + "epoch": 0.08237818008460025, + "grad_norm": 369.1541748046875, + "learning_rate": 8.156000000000001e-06, + "loss": 25.6719, + "step": 40780 + }, + { + "epoch": 0.08239838071728406, + "grad_norm": 772.0247802734375, + "learning_rate": 8.158e-06, + "loss": 32.0478, + "step": 40790 + }, + { + "epoch": 0.08241858134996788, + "grad_norm": 72.48571014404297, + "learning_rate": 8.16e-06, + "loss": 23.0447, + "step": 40800 + }, + { + "epoch": 0.0824387819826517, + "grad_norm": 208.5259246826172, + "learning_rate": 8.162e-06, + "loss": 16.0053, + "step": 40810 + }, + { + "epoch": 0.08245898261533552, + "grad_norm": 603.0857543945312, + "learning_rate": 8.164e-06, + "loss": 23.1499, + "step": 40820 + }, + { + "epoch": 0.08247918324801932, + "grad_norm": 1333.773193359375, + "learning_rate": 8.166000000000001e-06, + "loss": 43.6377, + "step": 40830 + }, + { + "epoch": 0.08249938388070314, + "grad_norm": 736.5938720703125, + "learning_rate": 8.168e-06, + "loss": 29.6003, + "step": 40840 + }, + { + "epoch": 0.08251958451338696, + "grad_norm": 653.9880981445312, + "learning_rate": 8.17e-06, + "loss": 22.1198, + "step": 40850 + }, + { + "epoch": 0.08253978514607077, + "grad_norm": 335.3724365234375, + "learning_rate": 8.172e-06, + "loss": 25.5172, + "step": 40860 + }, + { + "epoch": 0.08255998577875459, + "grad_norm": 306.0758972167969, + "learning_rate": 8.174e-06, + "loss": 22.6132, + "step": 40870 + }, + { + "epoch": 0.08258018641143841, + "grad_norm": 241.45751953125, + "learning_rate": 8.176000000000001e-06, + "loss": 27.1, + "step": 40880 + }, + { + "epoch": 0.08260038704412222, + "grad_norm": 831.9751586914062, + "learning_rate": 8.178e-06, + "loss": 22.5524, + "step": 40890 + }, + { + "epoch": 0.08262058767680604, + "grad_norm": 880.759765625, + "learning_rate": 8.18e-06, + "loss": 29.0195, + "step": 40900 + }, + { + "epoch": 0.08264078830948986, + "grad_norm": 794.4937744140625, + "learning_rate": 8.182e-06, + "loss": 21.1029, + "step": 40910 + }, + { + "epoch": 0.08266098894217366, + "grad_norm": 198.36871337890625, + "learning_rate": 8.184000000000002e-06, + "loss": 35.888, + "step": 40920 + }, + { + "epoch": 0.08268118957485748, + "grad_norm": 371.2920837402344, + "learning_rate": 8.186000000000001e-06, + "loss": 29.9417, + "step": 40930 + }, + { + "epoch": 0.0827013902075413, + "grad_norm": 761.3952026367188, + "learning_rate": 8.188e-06, + "loss": 27.3552, + "step": 40940 + }, + { + "epoch": 0.08272159084022511, + "grad_norm": 397.6307067871094, + "learning_rate": 8.19e-06, + "loss": 25.9441, + "step": 40950 + }, + { + "epoch": 0.08274179147290893, + "grad_norm": 450.3551025390625, + "learning_rate": 8.192e-06, + "loss": 18.6319, + "step": 40960 + }, + { + "epoch": 0.08276199210559275, + "grad_norm": 1369.5721435546875, + "learning_rate": 8.194000000000002e-06, + "loss": 43.8313, + "step": 40970 + }, + { + "epoch": 0.08278219273827657, + "grad_norm": 1892.084716796875, + "learning_rate": 8.196e-06, + "loss": 28.8405, + "step": 40980 + }, + { + "epoch": 0.08280239337096038, + "grad_norm": 391.256103515625, + "learning_rate": 8.198e-06, + "loss": 15.2958, + "step": 40990 + }, + { + "epoch": 0.0828225940036442, + "grad_norm": 531.6309204101562, + "learning_rate": 8.2e-06, + "loss": 55.6875, + "step": 41000 + }, + { + "epoch": 0.08284279463632802, + "grad_norm": 297.855224609375, + "learning_rate": 8.202e-06, + "loss": 57.8646, + "step": 41010 + }, + { + "epoch": 0.08286299526901182, + "grad_norm": 479.3824157714844, + "learning_rate": 8.204000000000001e-06, + "loss": 21.4301, + "step": 41020 + }, + { + "epoch": 0.08288319590169564, + "grad_norm": 607.3418579101562, + "learning_rate": 8.206e-06, + "loss": 18.8603, + "step": 41030 + }, + { + "epoch": 0.08290339653437946, + "grad_norm": 906.0545043945312, + "learning_rate": 8.208e-06, + "loss": 19.5641, + "step": 41040 + }, + { + "epoch": 0.08292359716706327, + "grad_norm": 424.09490966796875, + "learning_rate": 8.210000000000001e-06, + "loss": 27.5386, + "step": 41050 + }, + { + "epoch": 0.08294379779974709, + "grad_norm": 570.6898803710938, + "learning_rate": 8.212e-06, + "loss": 42.5885, + "step": 41060 + }, + { + "epoch": 0.08296399843243091, + "grad_norm": 418.614990234375, + "learning_rate": 8.214000000000001e-06, + "loss": 33.9309, + "step": 41070 + }, + { + "epoch": 0.08298419906511471, + "grad_norm": 165.93856811523438, + "learning_rate": 8.216e-06, + "loss": 9.762, + "step": 41080 + }, + { + "epoch": 0.08300439969779853, + "grad_norm": 193.1525421142578, + "learning_rate": 8.218e-06, + "loss": 18.7477, + "step": 41090 + }, + { + "epoch": 0.08302460033048235, + "grad_norm": 979.6331787109375, + "learning_rate": 8.220000000000001e-06, + "loss": 25.1848, + "step": 41100 + }, + { + "epoch": 0.08304480096316616, + "grad_norm": 431.3668212890625, + "learning_rate": 8.222000000000002e-06, + "loss": 29.4433, + "step": 41110 + }, + { + "epoch": 0.08306500159584998, + "grad_norm": 455.6502990722656, + "learning_rate": 8.224000000000001e-06, + "loss": 26.0422, + "step": 41120 + }, + { + "epoch": 0.0830852022285338, + "grad_norm": 230.00115966796875, + "learning_rate": 8.226e-06, + "loss": 17.0421, + "step": 41130 + }, + { + "epoch": 0.08310540286121762, + "grad_norm": 495.62384033203125, + "learning_rate": 8.228e-06, + "loss": 9.0154, + "step": 41140 + }, + { + "epoch": 0.08312560349390143, + "grad_norm": 102.5909423828125, + "learning_rate": 8.23e-06, + "loss": 37.1204, + "step": 41150 + }, + { + "epoch": 0.08314580412658525, + "grad_norm": 502.5970764160156, + "learning_rate": 8.232000000000002e-06, + "loss": 39.7973, + "step": 41160 + }, + { + "epoch": 0.08316600475926907, + "grad_norm": 378.26171875, + "learning_rate": 8.234000000000001e-06, + "loss": 16.8596, + "step": 41170 + }, + { + "epoch": 0.08318620539195287, + "grad_norm": 492.6539306640625, + "learning_rate": 8.236e-06, + "loss": 41.4776, + "step": 41180 + }, + { + "epoch": 0.08320640602463669, + "grad_norm": 383.61334228515625, + "learning_rate": 8.238e-06, + "loss": 45.7282, + "step": 41190 + }, + { + "epoch": 0.08322660665732051, + "grad_norm": 531.486083984375, + "learning_rate": 8.24e-06, + "loss": 34.7144, + "step": 41200 + }, + { + "epoch": 0.08324680729000432, + "grad_norm": 308.7742004394531, + "learning_rate": 8.242000000000002e-06, + "loss": 52.3713, + "step": 41210 + }, + { + "epoch": 0.08326700792268814, + "grad_norm": 354.1903991699219, + "learning_rate": 8.244000000000001e-06, + "loss": 37.182, + "step": 41220 + }, + { + "epoch": 0.08328720855537196, + "grad_norm": 454.5882263183594, + "learning_rate": 8.246e-06, + "loss": 38.7506, + "step": 41230 + }, + { + "epoch": 0.08330740918805576, + "grad_norm": 902.9432983398438, + "learning_rate": 8.248e-06, + "loss": 23.8521, + "step": 41240 + }, + { + "epoch": 0.08332760982073958, + "grad_norm": 92.36296081542969, + "learning_rate": 8.25e-06, + "loss": 27.6562, + "step": 41250 + }, + { + "epoch": 0.0833478104534234, + "grad_norm": 790.8438110351562, + "learning_rate": 8.252000000000002e-06, + "loss": 37.9746, + "step": 41260 + }, + { + "epoch": 0.08336801108610721, + "grad_norm": 457.1481628417969, + "learning_rate": 8.254000000000001e-06, + "loss": 28.6681, + "step": 41270 + }, + { + "epoch": 0.08338821171879103, + "grad_norm": 494.378173828125, + "learning_rate": 8.256e-06, + "loss": 25.9664, + "step": 41280 + }, + { + "epoch": 0.08340841235147485, + "grad_norm": 270.1199951171875, + "learning_rate": 8.258000000000001e-06, + "loss": 15.3111, + "step": 41290 + }, + { + "epoch": 0.08342861298415867, + "grad_norm": 325.8103942871094, + "learning_rate": 8.26e-06, + "loss": 31.758, + "step": 41300 + }, + { + "epoch": 0.08344881361684248, + "grad_norm": 400.83905029296875, + "learning_rate": 8.262000000000002e-06, + "loss": 31.7576, + "step": 41310 + }, + { + "epoch": 0.0834690142495263, + "grad_norm": 578.0596313476562, + "learning_rate": 8.264e-06, + "loss": 45.9719, + "step": 41320 + }, + { + "epoch": 0.08348921488221012, + "grad_norm": 1000.2903442382812, + "learning_rate": 8.266e-06, + "loss": 41.1022, + "step": 41330 + }, + { + "epoch": 0.08350941551489392, + "grad_norm": 926.373291015625, + "learning_rate": 8.268000000000001e-06, + "loss": 40.61, + "step": 41340 + }, + { + "epoch": 0.08352961614757774, + "grad_norm": 606.2439575195312, + "learning_rate": 8.27e-06, + "loss": 18.0386, + "step": 41350 + }, + { + "epoch": 0.08354981678026156, + "grad_norm": 614.6911010742188, + "learning_rate": 8.272000000000001e-06, + "loss": 31.1184, + "step": 41360 + }, + { + "epoch": 0.08357001741294537, + "grad_norm": 477.9070129394531, + "learning_rate": 8.274e-06, + "loss": 20.5355, + "step": 41370 + }, + { + "epoch": 0.08359021804562919, + "grad_norm": 652.8970947265625, + "learning_rate": 8.276e-06, + "loss": 38.6469, + "step": 41380 + }, + { + "epoch": 0.08361041867831301, + "grad_norm": 843.1055908203125, + "learning_rate": 8.278000000000001e-06, + "loss": 38.3296, + "step": 41390 + }, + { + "epoch": 0.08363061931099681, + "grad_norm": 509.9186096191406, + "learning_rate": 8.28e-06, + "loss": 27.9005, + "step": 41400 + }, + { + "epoch": 0.08365081994368063, + "grad_norm": 819.67041015625, + "learning_rate": 8.282000000000001e-06, + "loss": 46.2935, + "step": 41410 + }, + { + "epoch": 0.08367102057636445, + "grad_norm": 179.84751892089844, + "learning_rate": 8.284e-06, + "loss": 21.5093, + "step": 41420 + }, + { + "epoch": 0.08369122120904826, + "grad_norm": 1020.0496826171875, + "learning_rate": 8.286e-06, + "loss": 26.0462, + "step": 41430 + }, + { + "epoch": 0.08371142184173208, + "grad_norm": 596.0717163085938, + "learning_rate": 8.288000000000001e-06, + "loss": 25.1455, + "step": 41440 + }, + { + "epoch": 0.0837316224744159, + "grad_norm": 339.4415588378906, + "learning_rate": 8.29e-06, + "loss": 37.9272, + "step": 41450 + }, + { + "epoch": 0.08375182310709972, + "grad_norm": 407.4787902832031, + "learning_rate": 8.292000000000001e-06, + "loss": 27.6669, + "step": 41460 + }, + { + "epoch": 0.08377202373978353, + "grad_norm": 475.36175537109375, + "learning_rate": 8.294e-06, + "loss": 28.2954, + "step": 41470 + }, + { + "epoch": 0.08379222437246735, + "grad_norm": 855.4002075195312, + "learning_rate": 8.296000000000002e-06, + "loss": 53.6953, + "step": 41480 + }, + { + "epoch": 0.08381242500515117, + "grad_norm": 402.96697998046875, + "learning_rate": 8.298000000000001e-06, + "loss": 14.4904, + "step": 41490 + }, + { + "epoch": 0.08383262563783497, + "grad_norm": 970.7698974609375, + "learning_rate": 8.3e-06, + "loss": 16.2845, + "step": 41500 + }, + { + "epoch": 0.0838528262705188, + "grad_norm": 15.433819770812988, + "learning_rate": 8.302000000000001e-06, + "loss": 29.1261, + "step": 41510 + }, + { + "epoch": 0.08387302690320261, + "grad_norm": 988.315673828125, + "learning_rate": 8.304e-06, + "loss": 28.2776, + "step": 41520 + }, + { + "epoch": 0.08389322753588642, + "grad_norm": 191.4070281982422, + "learning_rate": 8.306000000000001e-06, + "loss": 12.7386, + "step": 41530 + }, + { + "epoch": 0.08391342816857024, + "grad_norm": 406.33984375, + "learning_rate": 8.308e-06, + "loss": 30.4329, + "step": 41540 + }, + { + "epoch": 0.08393362880125406, + "grad_norm": 2645.128662109375, + "learning_rate": 8.31e-06, + "loss": 32.924, + "step": 41550 + }, + { + "epoch": 0.08395382943393787, + "grad_norm": 257.9555358886719, + "learning_rate": 8.312000000000001e-06, + "loss": 18.9842, + "step": 41560 + }, + { + "epoch": 0.08397403006662169, + "grad_norm": 1188.1697998046875, + "learning_rate": 8.314e-06, + "loss": 29.7597, + "step": 41570 + }, + { + "epoch": 0.0839942306993055, + "grad_norm": 760.7401733398438, + "learning_rate": 8.316000000000001e-06, + "loss": 26.4646, + "step": 41580 + }, + { + "epoch": 0.08401443133198931, + "grad_norm": 395.8313293457031, + "learning_rate": 8.318e-06, + "loss": 27.8486, + "step": 41590 + }, + { + "epoch": 0.08403463196467313, + "grad_norm": 300.6554870605469, + "learning_rate": 8.32e-06, + "loss": 52.6849, + "step": 41600 + }, + { + "epoch": 0.08405483259735695, + "grad_norm": 352.93463134765625, + "learning_rate": 8.322000000000001e-06, + "loss": 35.2932, + "step": 41610 + }, + { + "epoch": 0.08407503323004077, + "grad_norm": 1065.8187255859375, + "learning_rate": 8.324e-06, + "loss": 21.1793, + "step": 41620 + }, + { + "epoch": 0.08409523386272458, + "grad_norm": 468.0779724121094, + "learning_rate": 8.326000000000001e-06, + "loss": 16.2983, + "step": 41630 + }, + { + "epoch": 0.0841154344954084, + "grad_norm": 404.51318359375, + "learning_rate": 8.328e-06, + "loss": 29.0949, + "step": 41640 + }, + { + "epoch": 0.08413563512809222, + "grad_norm": 540.3251342773438, + "learning_rate": 8.33e-06, + "loss": 29.328, + "step": 41650 + }, + { + "epoch": 0.08415583576077602, + "grad_norm": 335.04888916015625, + "learning_rate": 8.332000000000001e-06, + "loss": 19.6916, + "step": 41660 + }, + { + "epoch": 0.08417603639345984, + "grad_norm": 330.9485168457031, + "learning_rate": 8.334e-06, + "loss": 13.4891, + "step": 41670 + }, + { + "epoch": 0.08419623702614366, + "grad_norm": 615.0405883789062, + "learning_rate": 8.336000000000001e-06, + "loss": 45.3571, + "step": 41680 + }, + { + "epoch": 0.08421643765882747, + "grad_norm": 740.5963745117188, + "learning_rate": 8.338e-06, + "loss": 19.0211, + "step": 41690 + }, + { + "epoch": 0.08423663829151129, + "grad_norm": 243.19345092773438, + "learning_rate": 8.34e-06, + "loss": 15.5291, + "step": 41700 + }, + { + "epoch": 0.08425683892419511, + "grad_norm": 326.7149963378906, + "learning_rate": 8.342e-06, + "loss": 22.8227, + "step": 41710 + }, + { + "epoch": 0.08427703955687892, + "grad_norm": 531.5779418945312, + "learning_rate": 8.344000000000002e-06, + "loss": 17.4885, + "step": 41720 + }, + { + "epoch": 0.08429724018956274, + "grad_norm": 329.50665283203125, + "learning_rate": 8.346000000000001e-06, + "loss": 17.6037, + "step": 41730 + }, + { + "epoch": 0.08431744082224656, + "grad_norm": 404.5074157714844, + "learning_rate": 8.348e-06, + "loss": 21.7302, + "step": 41740 + }, + { + "epoch": 0.08433764145493036, + "grad_norm": 2198.21484375, + "learning_rate": 8.35e-06, + "loss": 39.81, + "step": 41750 + }, + { + "epoch": 0.08435784208761418, + "grad_norm": 482.79779052734375, + "learning_rate": 8.352e-06, + "loss": 27.2771, + "step": 41760 + }, + { + "epoch": 0.084378042720298, + "grad_norm": 507.9755859375, + "learning_rate": 8.354000000000002e-06, + "loss": 31.3565, + "step": 41770 + }, + { + "epoch": 0.08439824335298182, + "grad_norm": 497.9572448730469, + "learning_rate": 8.356000000000001e-06, + "loss": 15.4224, + "step": 41780 + }, + { + "epoch": 0.08441844398566563, + "grad_norm": 1225.9915771484375, + "learning_rate": 8.358e-06, + "loss": 31.6797, + "step": 41790 + }, + { + "epoch": 0.08443864461834945, + "grad_norm": 1556.139404296875, + "learning_rate": 8.36e-06, + "loss": 31.5198, + "step": 41800 + }, + { + "epoch": 0.08445884525103327, + "grad_norm": 128.2238006591797, + "learning_rate": 8.362e-06, + "loss": 19.0833, + "step": 41810 + }, + { + "epoch": 0.08447904588371707, + "grad_norm": 409.283935546875, + "learning_rate": 8.364000000000002e-06, + "loss": 66.9144, + "step": 41820 + }, + { + "epoch": 0.0844992465164009, + "grad_norm": 339.4384460449219, + "learning_rate": 8.366000000000001e-06, + "loss": 31.6918, + "step": 41830 + }, + { + "epoch": 0.08451944714908471, + "grad_norm": 182.897216796875, + "learning_rate": 8.368e-06, + "loss": 19.6857, + "step": 41840 + }, + { + "epoch": 0.08453964778176852, + "grad_norm": 490.0975341796875, + "learning_rate": 8.370000000000001e-06, + "loss": 25.7417, + "step": 41850 + }, + { + "epoch": 0.08455984841445234, + "grad_norm": 459.1452331542969, + "learning_rate": 8.372e-06, + "loss": 26.4446, + "step": 41860 + }, + { + "epoch": 0.08458004904713616, + "grad_norm": 581.1602172851562, + "learning_rate": 8.374000000000001e-06, + "loss": 31.0742, + "step": 41870 + }, + { + "epoch": 0.08460024967981997, + "grad_norm": 451.20562744140625, + "learning_rate": 8.376e-06, + "loss": 26.2722, + "step": 41880 + }, + { + "epoch": 0.08462045031250379, + "grad_norm": 299.28204345703125, + "learning_rate": 8.378e-06, + "loss": 36.2561, + "step": 41890 + }, + { + "epoch": 0.0846406509451876, + "grad_norm": 705.5419311523438, + "learning_rate": 8.380000000000001e-06, + "loss": 27.1341, + "step": 41900 + }, + { + "epoch": 0.08466085157787141, + "grad_norm": 602.3165283203125, + "learning_rate": 8.382e-06, + "loss": 17.7116, + "step": 41910 + }, + { + "epoch": 0.08468105221055523, + "grad_norm": 337.2932434082031, + "learning_rate": 8.384000000000001e-06, + "loss": 24.8853, + "step": 41920 + }, + { + "epoch": 0.08470125284323905, + "grad_norm": 604.894775390625, + "learning_rate": 8.386e-06, + "loss": 35.2355, + "step": 41930 + }, + { + "epoch": 0.08472145347592287, + "grad_norm": 403.151123046875, + "learning_rate": 8.388e-06, + "loss": 25.5887, + "step": 41940 + }, + { + "epoch": 0.08474165410860668, + "grad_norm": 304.0711669921875, + "learning_rate": 8.390000000000001e-06, + "loss": 14.1724, + "step": 41950 + }, + { + "epoch": 0.0847618547412905, + "grad_norm": 502.6972961425781, + "learning_rate": 8.392e-06, + "loss": 28.5268, + "step": 41960 + }, + { + "epoch": 0.08478205537397432, + "grad_norm": 705.1716918945312, + "learning_rate": 8.394000000000001e-06, + "loss": 30.1254, + "step": 41970 + }, + { + "epoch": 0.08480225600665812, + "grad_norm": 112.1724853515625, + "learning_rate": 8.396e-06, + "loss": 28.2086, + "step": 41980 + }, + { + "epoch": 0.08482245663934194, + "grad_norm": 685.7117919921875, + "learning_rate": 8.398e-06, + "loss": 25.5067, + "step": 41990 + }, + { + "epoch": 0.08484265727202576, + "grad_norm": 233.19610595703125, + "learning_rate": 8.400000000000001e-06, + "loss": 19.437, + "step": 42000 + }, + { + "epoch": 0.08486285790470957, + "grad_norm": 568.2144775390625, + "learning_rate": 8.402e-06, + "loss": 25.4206, + "step": 42010 + }, + { + "epoch": 0.08488305853739339, + "grad_norm": 160.93862915039062, + "learning_rate": 8.404000000000001e-06, + "loss": 67.4517, + "step": 42020 + }, + { + "epoch": 0.08490325917007721, + "grad_norm": 427.6259460449219, + "learning_rate": 8.406e-06, + "loss": 25.9073, + "step": 42030 + }, + { + "epoch": 0.08492345980276102, + "grad_norm": 1021.2572631835938, + "learning_rate": 8.408e-06, + "loss": 48.5253, + "step": 42040 + }, + { + "epoch": 0.08494366043544484, + "grad_norm": 609.3449096679688, + "learning_rate": 8.41e-06, + "loss": 23.9602, + "step": 42050 + }, + { + "epoch": 0.08496386106812866, + "grad_norm": 300.64569091796875, + "learning_rate": 8.412e-06, + "loss": 20.4327, + "step": 42060 + }, + { + "epoch": 0.08498406170081246, + "grad_norm": 641.7628784179688, + "learning_rate": 8.414000000000001e-06, + "loss": 20.2711, + "step": 42070 + }, + { + "epoch": 0.08500426233349628, + "grad_norm": 452.14080810546875, + "learning_rate": 8.416e-06, + "loss": 19.057, + "step": 42080 + }, + { + "epoch": 0.0850244629661801, + "grad_norm": 560.966064453125, + "learning_rate": 8.418000000000001e-06, + "loss": 20.311, + "step": 42090 + }, + { + "epoch": 0.08504466359886392, + "grad_norm": 113.4162826538086, + "learning_rate": 8.42e-06, + "loss": 29.6927, + "step": 42100 + }, + { + "epoch": 0.08506486423154773, + "grad_norm": 379.02099609375, + "learning_rate": 8.422e-06, + "loss": 25.708, + "step": 42110 + }, + { + "epoch": 0.08508506486423155, + "grad_norm": 514.345703125, + "learning_rate": 8.424000000000001e-06, + "loss": 14.1769, + "step": 42120 + }, + { + "epoch": 0.08510526549691537, + "grad_norm": 212.86944580078125, + "learning_rate": 8.426e-06, + "loss": 38.5426, + "step": 42130 + }, + { + "epoch": 0.08512546612959918, + "grad_norm": 208.7589874267578, + "learning_rate": 8.428000000000001e-06, + "loss": 31.9014, + "step": 42140 + }, + { + "epoch": 0.085145666762283, + "grad_norm": 830.7401123046875, + "learning_rate": 8.43e-06, + "loss": 20.2828, + "step": 42150 + }, + { + "epoch": 0.08516586739496682, + "grad_norm": 324.7993469238281, + "learning_rate": 8.432e-06, + "loss": 32.6256, + "step": 42160 + }, + { + "epoch": 0.08518606802765062, + "grad_norm": 223.50123596191406, + "learning_rate": 8.434000000000001e-06, + "loss": 28.1592, + "step": 42170 + }, + { + "epoch": 0.08520626866033444, + "grad_norm": 1005.3001098632812, + "learning_rate": 8.436e-06, + "loss": 29.8392, + "step": 42180 + }, + { + "epoch": 0.08522646929301826, + "grad_norm": 510.07598876953125, + "learning_rate": 8.438000000000001e-06, + "loss": 29.9206, + "step": 42190 + }, + { + "epoch": 0.08524666992570207, + "grad_norm": 157.23410034179688, + "learning_rate": 8.44e-06, + "loss": 20.0351, + "step": 42200 + }, + { + "epoch": 0.08526687055838589, + "grad_norm": 873.9767456054688, + "learning_rate": 8.442e-06, + "loss": 36.1325, + "step": 42210 + }, + { + "epoch": 0.08528707119106971, + "grad_norm": 298.45013427734375, + "learning_rate": 8.444e-06, + "loss": 21.6595, + "step": 42220 + }, + { + "epoch": 0.08530727182375351, + "grad_norm": 1407.209716796875, + "learning_rate": 8.446e-06, + "loss": 37.8436, + "step": 42230 + }, + { + "epoch": 0.08532747245643733, + "grad_norm": 272.6626892089844, + "learning_rate": 8.448000000000001e-06, + "loss": 20.1577, + "step": 42240 + }, + { + "epoch": 0.08534767308912115, + "grad_norm": 602.26513671875, + "learning_rate": 8.45e-06, + "loss": 41.1474, + "step": 42250 + }, + { + "epoch": 0.08536787372180497, + "grad_norm": 456.1973571777344, + "learning_rate": 8.452e-06, + "loss": 20.4341, + "step": 42260 + }, + { + "epoch": 0.08538807435448878, + "grad_norm": 117.47479248046875, + "learning_rate": 8.454e-06, + "loss": 42.2305, + "step": 42270 + }, + { + "epoch": 0.0854082749871726, + "grad_norm": 460.8744812011719, + "learning_rate": 8.456000000000002e-06, + "loss": 34.738, + "step": 42280 + }, + { + "epoch": 0.08542847561985642, + "grad_norm": 72.98493194580078, + "learning_rate": 8.458000000000001e-06, + "loss": 28.1704, + "step": 42290 + }, + { + "epoch": 0.08544867625254023, + "grad_norm": 652.7301025390625, + "learning_rate": 8.46e-06, + "loss": 16.1724, + "step": 42300 + }, + { + "epoch": 0.08546887688522405, + "grad_norm": 89.0145492553711, + "learning_rate": 8.462e-06, + "loss": 27.9235, + "step": 42310 + }, + { + "epoch": 0.08548907751790787, + "grad_norm": 408.6658935546875, + "learning_rate": 8.464e-06, + "loss": 15.6284, + "step": 42320 + }, + { + "epoch": 0.08550927815059167, + "grad_norm": 62.5884895324707, + "learning_rate": 8.466000000000002e-06, + "loss": 30.4507, + "step": 42330 + }, + { + "epoch": 0.08552947878327549, + "grad_norm": 414.3277587890625, + "learning_rate": 8.468000000000001e-06, + "loss": 20.8086, + "step": 42340 + }, + { + "epoch": 0.08554967941595931, + "grad_norm": 403.589599609375, + "learning_rate": 8.47e-06, + "loss": 30.4291, + "step": 42350 + }, + { + "epoch": 0.08556988004864312, + "grad_norm": 324.6241149902344, + "learning_rate": 8.472e-06, + "loss": 15.3373, + "step": 42360 + }, + { + "epoch": 0.08559008068132694, + "grad_norm": 705.1282958984375, + "learning_rate": 8.474e-06, + "loss": 34.0037, + "step": 42370 + }, + { + "epoch": 0.08561028131401076, + "grad_norm": 260.4348449707031, + "learning_rate": 8.476000000000002e-06, + "loss": 30.7474, + "step": 42380 + }, + { + "epoch": 0.08563048194669456, + "grad_norm": 382.897705078125, + "learning_rate": 8.478e-06, + "loss": 35.5057, + "step": 42390 + }, + { + "epoch": 0.08565068257937838, + "grad_norm": 1128.849853515625, + "learning_rate": 8.48e-06, + "loss": 21.7381, + "step": 42400 + }, + { + "epoch": 0.0856708832120622, + "grad_norm": 723.4257202148438, + "learning_rate": 8.482e-06, + "loss": 44.8299, + "step": 42410 + }, + { + "epoch": 0.08569108384474602, + "grad_norm": 4.07509183883667, + "learning_rate": 8.484e-06, + "loss": 22.9919, + "step": 42420 + }, + { + "epoch": 0.08571128447742983, + "grad_norm": 623.5599365234375, + "learning_rate": 8.486000000000001e-06, + "loss": 35.1298, + "step": 42430 + }, + { + "epoch": 0.08573148511011365, + "grad_norm": 4151.32666015625, + "learning_rate": 8.488e-06, + "loss": 52.9023, + "step": 42440 + }, + { + "epoch": 0.08575168574279747, + "grad_norm": 431.2698059082031, + "learning_rate": 8.49e-06, + "loss": 27.7349, + "step": 42450 + }, + { + "epoch": 0.08577188637548128, + "grad_norm": 922.9758911132812, + "learning_rate": 8.492000000000001e-06, + "loss": 32.2484, + "step": 42460 + }, + { + "epoch": 0.0857920870081651, + "grad_norm": 564.1514282226562, + "learning_rate": 8.494e-06, + "loss": 16.0605, + "step": 42470 + }, + { + "epoch": 0.08581228764084892, + "grad_norm": 268.5475158691406, + "learning_rate": 8.496000000000001e-06, + "loss": 32.3987, + "step": 42480 + }, + { + "epoch": 0.08583248827353272, + "grad_norm": 47.12990951538086, + "learning_rate": 8.498e-06, + "loss": 24.0045, + "step": 42490 + }, + { + "epoch": 0.08585268890621654, + "grad_norm": 445.939453125, + "learning_rate": 8.5e-06, + "loss": 23.9519, + "step": 42500 + }, + { + "epoch": 0.08587288953890036, + "grad_norm": 4.202223777770996, + "learning_rate": 8.502000000000001e-06, + "loss": 28.0244, + "step": 42510 + }, + { + "epoch": 0.08589309017158417, + "grad_norm": 554.5347290039062, + "learning_rate": 8.504000000000002e-06, + "loss": 15.4307, + "step": 42520 + }, + { + "epoch": 0.08591329080426799, + "grad_norm": 451.445556640625, + "learning_rate": 8.506000000000001e-06, + "loss": 23.211, + "step": 42530 + }, + { + "epoch": 0.08593349143695181, + "grad_norm": 1390.3988037109375, + "learning_rate": 8.508e-06, + "loss": 25.8337, + "step": 42540 + }, + { + "epoch": 0.08595369206963561, + "grad_norm": 660.3360595703125, + "learning_rate": 8.51e-06, + "loss": 43.6705, + "step": 42550 + }, + { + "epoch": 0.08597389270231943, + "grad_norm": 749.7850952148438, + "learning_rate": 8.512e-06, + "loss": 37.2106, + "step": 42560 + }, + { + "epoch": 0.08599409333500325, + "grad_norm": 137.21722412109375, + "learning_rate": 8.514000000000002e-06, + "loss": 46.5237, + "step": 42570 + }, + { + "epoch": 0.08601429396768706, + "grad_norm": 347.36920166015625, + "learning_rate": 8.516000000000001e-06, + "loss": 30.8775, + "step": 42580 + }, + { + "epoch": 0.08603449460037088, + "grad_norm": 0.0, + "learning_rate": 8.518e-06, + "loss": 15.3735, + "step": 42590 + }, + { + "epoch": 0.0860546952330547, + "grad_norm": 262.8627624511719, + "learning_rate": 8.52e-06, + "loss": 29.8606, + "step": 42600 + }, + { + "epoch": 0.08607489586573852, + "grad_norm": 183.59539794921875, + "learning_rate": 8.522e-06, + "loss": 21.1203, + "step": 42610 + }, + { + "epoch": 0.08609509649842233, + "grad_norm": 1127.3863525390625, + "learning_rate": 8.524000000000002e-06, + "loss": 28.7105, + "step": 42620 + }, + { + "epoch": 0.08611529713110615, + "grad_norm": 848.1657104492188, + "learning_rate": 8.526000000000001e-06, + "loss": 23.0514, + "step": 42630 + }, + { + "epoch": 0.08613549776378997, + "grad_norm": 511.5576477050781, + "learning_rate": 8.528e-06, + "loss": 24.8539, + "step": 42640 + }, + { + "epoch": 0.08615569839647377, + "grad_norm": 261.4380187988281, + "learning_rate": 8.530000000000001e-06, + "loss": 21.1395, + "step": 42650 + }, + { + "epoch": 0.0861758990291576, + "grad_norm": 525.8989868164062, + "learning_rate": 8.532e-06, + "loss": 21.3154, + "step": 42660 + }, + { + "epoch": 0.08619609966184141, + "grad_norm": 393.46014404296875, + "learning_rate": 8.534000000000002e-06, + "loss": 27.8852, + "step": 42670 + }, + { + "epoch": 0.08621630029452522, + "grad_norm": 438.6636047363281, + "learning_rate": 8.536000000000001e-06, + "loss": 37.7932, + "step": 42680 + }, + { + "epoch": 0.08623650092720904, + "grad_norm": 417.7564697265625, + "learning_rate": 8.538e-06, + "loss": 24.7863, + "step": 42690 + }, + { + "epoch": 0.08625670155989286, + "grad_norm": 387.63525390625, + "learning_rate": 8.540000000000001e-06, + "loss": 19.4297, + "step": 42700 + }, + { + "epoch": 0.08627690219257667, + "grad_norm": 145.06236267089844, + "learning_rate": 8.542e-06, + "loss": 28.4741, + "step": 42710 + }, + { + "epoch": 0.08629710282526049, + "grad_norm": 590.2205200195312, + "learning_rate": 8.544000000000002e-06, + "loss": 16.8451, + "step": 42720 + }, + { + "epoch": 0.0863173034579443, + "grad_norm": 425.0194396972656, + "learning_rate": 8.546000000000001e-06, + "loss": 26.8013, + "step": 42730 + }, + { + "epoch": 0.08633750409062811, + "grad_norm": 338.0776062011719, + "learning_rate": 8.548e-06, + "loss": 10.24, + "step": 42740 + }, + { + "epoch": 0.08635770472331193, + "grad_norm": 435.5176086425781, + "learning_rate": 8.550000000000001e-06, + "loss": 18.2302, + "step": 42750 + }, + { + "epoch": 0.08637790535599575, + "grad_norm": 366.2986755371094, + "learning_rate": 8.552e-06, + "loss": 27.9482, + "step": 42760 + }, + { + "epoch": 0.08639810598867957, + "grad_norm": 741.6912231445312, + "learning_rate": 8.554000000000001e-06, + "loss": 22.2683, + "step": 42770 + }, + { + "epoch": 0.08641830662136338, + "grad_norm": 501.5782165527344, + "learning_rate": 8.556e-06, + "loss": 27.7317, + "step": 42780 + }, + { + "epoch": 0.0864385072540472, + "grad_norm": 475.4088134765625, + "learning_rate": 8.558e-06, + "loss": 39.8292, + "step": 42790 + }, + { + "epoch": 0.08645870788673102, + "grad_norm": 1028.5577392578125, + "learning_rate": 8.560000000000001e-06, + "loss": 40.9841, + "step": 42800 + }, + { + "epoch": 0.08647890851941482, + "grad_norm": 702.6845703125, + "learning_rate": 8.562e-06, + "loss": 29.6573, + "step": 42810 + }, + { + "epoch": 0.08649910915209864, + "grad_norm": 397.6463317871094, + "learning_rate": 8.564000000000001e-06, + "loss": 39.3842, + "step": 42820 + }, + { + "epoch": 0.08651930978478246, + "grad_norm": 340.2057800292969, + "learning_rate": 8.566e-06, + "loss": 31.5082, + "step": 42830 + }, + { + "epoch": 0.08653951041746627, + "grad_norm": 237.69635009765625, + "learning_rate": 8.568e-06, + "loss": 19.8738, + "step": 42840 + }, + { + "epoch": 0.08655971105015009, + "grad_norm": 706.278076171875, + "learning_rate": 8.570000000000001e-06, + "loss": 33.9298, + "step": 42850 + }, + { + "epoch": 0.08657991168283391, + "grad_norm": 508.830078125, + "learning_rate": 8.572e-06, + "loss": 17.8937, + "step": 42860 + }, + { + "epoch": 0.08660011231551772, + "grad_norm": 683.411865234375, + "learning_rate": 8.574000000000001e-06, + "loss": 30.3102, + "step": 42870 + }, + { + "epoch": 0.08662031294820154, + "grad_norm": 1214.28466796875, + "learning_rate": 8.576e-06, + "loss": 39.2031, + "step": 42880 + }, + { + "epoch": 0.08664051358088536, + "grad_norm": 468.9919738769531, + "learning_rate": 8.578000000000002e-06, + "loss": 15.042, + "step": 42890 + }, + { + "epoch": 0.08666071421356916, + "grad_norm": 392.3851623535156, + "learning_rate": 8.580000000000001e-06, + "loss": 25.493, + "step": 42900 + }, + { + "epoch": 0.08668091484625298, + "grad_norm": 111.14898681640625, + "learning_rate": 8.582e-06, + "loss": 20.3436, + "step": 42910 + }, + { + "epoch": 0.0867011154789368, + "grad_norm": 811.585205078125, + "learning_rate": 8.584000000000001e-06, + "loss": 34.9878, + "step": 42920 + }, + { + "epoch": 0.08672131611162062, + "grad_norm": 531.7177734375, + "learning_rate": 8.586e-06, + "loss": 29.4499, + "step": 42930 + }, + { + "epoch": 0.08674151674430443, + "grad_norm": 1020.8329467773438, + "learning_rate": 8.588000000000001e-06, + "loss": 45.2873, + "step": 42940 + }, + { + "epoch": 0.08676171737698825, + "grad_norm": 693.1109619140625, + "learning_rate": 8.59e-06, + "loss": 15.733, + "step": 42950 + }, + { + "epoch": 0.08678191800967207, + "grad_norm": 175.2774658203125, + "learning_rate": 8.592e-06, + "loss": 12.659, + "step": 42960 + }, + { + "epoch": 0.08680211864235587, + "grad_norm": 235.388427734375, + "learning_rate": 8.594000000000001e-06, + "loss": 27.075, + "step": 42970 + }, + { + "epoch": 0.0868223192750397, + "grad_norm": 85.86357879638672, + "learning_rate": 8.596e-06, + "loss": 56.4613, + "step": 42980 + }, + { + "epoch": 0.08684251990772351, + "grad_norm": 545.3470458984375, + "learning_rate": 8.598000000000001e-06, + "loss": 23.8057, + "step": 42990 + }, + { + "epoch": 0.08686272054040732, + "grad_norm": 344.33355712890625, + "learning_rate": 8.6e-06, + "loss": 19.7577, + "step": 43000 + }, + { + "epoch": 0.08688292117309114, + "grad_norm": 144.74801635742188, + "learning_rate": 8.602e-06, + "loss": 16.6569, + "step": 43010 + }, + { + "epoch": 0.08690312180577496, + "grad_norm": 292.4404296875, + "learning_rate": 8.604000000000001e-06, + "loss": 36.0593, + "step": 43020 + }, + { + "epoch": 0.08692332243845877, + "grad_norm": 145.99012756347656, + "learning_rate": 8.606e-06, + "loss": 22.7391, + "step": 43030 + }, + { + "epoch": 0.08694352307114259, + "grad_norm": 242.26412963867188, + "learning_rate": 8.608000000000001e-06, + "loss": 22.2086, + "step": 43040 + }, + { + "epoch": 0.0869637237038264, + "grad_norm": 297.0873718261719, + "learning_rate": 8.61e-06, + "loss": 28.5126, + "step": 43050 + }, + { + "epoch": 0.08698392433651021, + "grad_norm": 561.925537109375, + "learning_rate": 8.612e-06, + "loss": 30.4806, + "step": 43060 + }, + { + "epoch": 0.08700412496919403, + "grad_norm": 459.6188659667969, + "learning_rate": 8.614000000000001e-06, + "loss": 17.2545, + "step": 43070 + }, + { + "epoch": 0.08702432560187785, + "grad_norm": 876.9536743164062, + "learning_rate": 8.616000000000002e-06, + "loss": 17.9509, + "step": 43080 + }, + { + "epoch": 0.08704452623456167, + "grad_norm": 157.70240783691406, + "learning_rate": 8.618000000000001e-06, + "loss": 27.4339, + "step": 43090 + }, + { + "epoch": 0.08706472686724548, + "grad_norm": 452.8877868652344, + "learning_rate": 8.62e-06, + "loss": 32.1382, + "step": 43100 + }, + { + "epoch": 0.0870849274999293, + "grad_norm": 113.4677963256836, + "learning_rate": 8.622e-06, + "loss": 35.2102, + "step": 43110 + }, + { + "epoch": 0.08710512813261312, + "grad_norm": 404.09381103515625, + "learning_rate": 8.624e-06, + "loss": 20.1554, + "step": 43120 + }, + { + "epoch": 0.08712532876529692, + "grad_norm": 451.4709167480469, + "learning_rate": 8.626000000000002e-06, + "loss": 18.2809, + "step": 43130 + }, + { + "epoch": 0.08714552939798074, + "grad_norm": 440.0563049316406, + "learning_rate": 8.628000000000001e-06, + "loss": 7.4467, + "step": 43140 + }, + { + "epoch": 0.08716573003066456, + "grad_norm": 855.1495361328125, + "learning_rate": 8.63e-06, + "loss": 35.828, + "step": 43150 + }, + { + "epoch": 0.08718593066334837, + "grad_norm": 10.839653968811035, + "learning_rate": 8.632e-06, + "loss": 43.2081, + "step": 43160 + }, + { + "epoch": 0.08720613129603219, + "grad_norm": 614.3381958007812, + "learning_rate": 8.634e-06, + "loss": 17.8169, + "step": 43170 + }, + { + "epoch": 0.08722633192871601, + "grad_norm": 271.685791015625, + "learning_rate": 8.636000000000002e-06, + "loss": 31.9236, + "step": 43180 + }, + { + "epoch": 0.08724653256139982, + "grad_norm": 554.6610717773438, + "learning_rate": 8.638000000000001e-06, + "loss": 30.3433, + "step": 43190 + }, + { + "epoch": 0.08726673319408364, + "grad_norm": 785.8115844726562, + "learning_rate": 8.64e-06, + "loss": 42.5126, + "step": 43200 + }, + { + "epoch": 0.08728693382676746, + "grad_norm": 818.6058959960938, + "learning_rate": 8.642e-06, + "loss": 24.9131, + "step": 43210 + }, + { + "epoch": 0.08730713445945126, + "grad_norm": 333.1800231933594, + "learning_rate": 8.644e-06, + "loss": 19.2756, + "step": 43220 + }, + { + "epoch": 0.08732733509213508, + "grad_norm": 167.22509765625, + "learning_rate": 8.646000000000002e-06, + "loss": 25.0911, + "step": 43230 + }, + { + "epoch": 0.0873475357248189, + "grad_norm": 994.1484375, + "learning_rate": 8.648000000000001e-06, + "loss": 35.0122, + "step": 43240 + }, + { + "epoch": 0.08736773635750272, + "grad_norm": 844.7338256835938, + "learning_rate": 8.65e-06, + "loss": 18.575, + "step": 43250 + }, + { + "epoch": 0.08738793699018653, + "grad_norm": 176.0022735595703, + "learning_rate": 8.652000000000001e-06, + "loss": 48.7586, + "step": 43260 + }, + { + "epoch": 0.08740813762287035, + "grad_norm": 178.6254119873047, + "learning_rate": 8.654e-06, + "loss": 18.2863, + "step": 43270 + }, + { + "epoch": 0.08742833825555417, + "grad_norm": 541.4852905273438, + "learning_rate": 8.656000000000001e-06, + "loss": 18.9335, + "step": 43280 + }, + { + "epoch": 0.08744853888823798, + "grad_norm": 424.3155822753906, + "learning_rate": 8.658e-06, + "loss": 27.0739, + "step": 43290 + }, + { + "epoch": 0.0874687395209218, + "grad_norm": 598.650390625, + "learning_rate": 8.66e-06, + "loss": 25.5768, + "step": 43300 + }, + { + "epoch": 0.08748894015360562, + "grad_norm": 760.1422729492188, + "learning_rate": 8.662000000000001e-06, + "loss": 28.0227, + "step": 43310 + }, + { + "epoch": 0.08750914078628942, + "grad_norm": 464.99127197265625, + "learning_rate": 8.664e-06, + "loss": 19.4783, + "step": 43320 + }, + { + "epoch": 0.08752934141897324, + "grad_norm": 110.35054016113281, + "learning_rate": 8.666000000000001e-06, + "loss": 26.5671, + "step": 43330 + }, + { + "epoch": 0.08754954205165706, + "grad_norm": 261.06536865234375, + "learning_rate": 8.668e-06, + "loss": 33.3942, + "step": 43340 + }, + { + "epoch": 0.08756974268434087, + "grad_norm": 342.5465393066406, + "learning_rate": 8.67e-06, + "loss": 18.9044, + "step": 43350 + }, + { + "epoch": 0.08758994331702469, + "grad_norm": 270.8699951171875, + "learning_rate": 8.672000000000001e-06, + "loss": 20.0748, + "step": 43360 + }, + { + "epoch": 0.08761014394970851, + "grad_norm": 489.7830810546875, + "learning_rate": 8.674e-06, + "loss": 24.418, + "step": 43370 + }, + { + "epoch": 0.08763034458239231, + "grad_norm": 299.12762451171875, + "learning_rate": 8.676000000000001e-06, + "loss": 31.1323, + "step": 43380 + }, + { + "epoch": 0.08765054521507613, + "grad_norm": 210.87306213378906, + "learning_rate": 8.678e-06, + "loss": 27.3704, + "step": 43390 + }, + { + "epoch": 0.08767074584775995, + "grad_norm": 262.9859313964844, + "learning_rate": 8.68e-06, + "loss": 34.1434, + "step": 43400 + }, + { + "epoch": 0.08769094648044377, + "grad_norm": 426.6127014160156, + "learning_rate": 8.682000000000001e-06, + "loss": 21.96, + "step": 43410 + }, + { + "epoch": 0.08771114711312758, + "grad_norm": 794.215087890625, + "learning_rate": 8.684e-06, + "loss": 36.9314, + "step": 43420 + }, + { + "epoch": 0.0877313477458114, + "grad_norm": 399.9016418457031, + "learning_rate": 8.686000000000001e-06, + "loss": 20.9496, + "step": 43430 + }, + { + "epoch": 0.08775154837849522, + "grad_norm": 691.166259765625, + "learning_rate": 8.688e-06, + "loss": 38.0485, + "step": 43440 + }, + { + "epoch": 0.08777174901117903, + "grad_norm": 711.8749389648438, + "learning_rate": 8.690000000000002e-06, + "loss": 25.7033, + "step": 43450 + }, + { + "epoch": 0.08779194964386285, + "grad_norm": 171.9166717529297, + "learning_rate": 8.692e-06, + "loss": 21.509, + "step": 43460 + }, + { + "epoch": 0.08781215027654667, + "grad_norm": 626.6830444335938, + "learning_rate": 8.694e-06, + "loss": 13.0115, + "step": 43470 + }, + { + "epoch": 0.08783235090923047, + "grad_norm": 851.20654296875, + "learning_rate": 8.696000000000001e-06, + "loss": 18.5017, + "step": 43480 + }, + { + "epoch": 0.08785255154191429, + "grad_norm": 569.1126708984375, + "learning_rate": 8.698e-06, + "loss": 23.5853, + "step": 43490 + }, + { + "epoch": 0.08787275217459811, + "grad_norm": 538.6649780273438, + "learning_rate": 8.700000000000001e-06, + "loss": 20.7032, + "step": 43500 + }, + { + "epoch": 0.08789295280728192, + "grad_norm": 783.0730590820312, + "learning_rate": 8.702e-06, + "loss": 35.5005, + "step": 43510 + }, + { + "epoch": 0.08791315343996574, + "grad_norm": 371.6290283203125, + "learning_rate": 8.704e-06, + "loss": 17.9829, + "step": 43520 + }, + { + "epoch": 0.08793335407264956, + "grad_norm": 243.0632781982422, + "learning_rate": 8.706000000000001e-06, + "loss": 25.7718, + "step": 43530 + }, + { + "epoch": 0.08795355470533336, + "grad_norm": 428.3433532714844, + "learning_rate": 8.708e-06, + "loss": 22.0723, + "step": 43540 + }, + { + "epoch": 0.08797375533801718, + "grad_norm": 359.037109375, + "learning_rate": 8.710000000000001e-06, + "loss": 38.0554, + "step": 43550 + }, + { + "epoch": 0.087993955970701, + "grad_norm": 123.59037780761719, + "learning_rate": 8.712e-06, + "loss": 17.7862, + "step": 43560 + }, + { + "epoch": 0.08801415660338482, + "grad_norm": 757.7338256835938, + "learning_rate": 8.714e-06, + "loss": 17.6592, + "step": 43570 + }, + { + "epoch": 0.08803435723606863, + "grad_norm": 150.5660858154297, + "learning_rate": 8.716000000000001e-06, + "loss": 22.1696, + "step": 43580 + }, + { + "epoch": 0.08805455786875245, + "grad_norm": 866.910400390625, + "learning_rate": 8.718e-06, + "loss": 30.1427, + "step": 43590 + }, + { + "epoch": 0.08807475850143627, + "grad_norm": 533.5135498046875, + "learning_rate": 8.720000000000001e-06, + "loss": 37.1078, + "step": 43600 + }, + { + "epoch": 0.08809495913412008, + "grad_norm": 1215.2125244140625, + "learning_rate": 8.722e-06, + "loss": 20.1203, + "step": 43610 + }, + { + "epoch": 0.0881151597668039, + "grad_norm": 545.397216796875, + "learning_rate": 8.724e-06, + "loss": 31.156, + "step": 43620 + }, + { + "epoch": 0.08813536039948772, + "grad_norm": 706.6424560546875, + "learning_rate": 8.726e-06, + "loss": 16.5895, + "step": 43630 + }, + { + "epoch": 0.08815556103217152, + "grad_norm": 173.1048583984375, + "learning_rate": 8.728e-06, + "loss": 16.0207, + "step": 43640 + }, + { + "epoch": 0.08817576166485534, + "grad_norm": 1318.9002685546875, + "learning_rate": 8.730000000000001e-06, + "loss": 21.7477, + "step": 43650 + }, + { + "epoch": 0.08819596229753916, + "grad_norm": 1221.095947265625, + "learning_rate": 8.732e-06, + "loss": 41.428, + "step": 43660 + }, + { + "epoch": 0.08821616293022297, + "grad_norm": 424.8775634765625, + "learning_rate": 8.734e-06, + "loss": 33.7021, + "step": 43670 + }, + { + "epoch": 0.08823636356290679, + "grad_norm": 435.27850341796875, + "learning_rate": 8.736e-06, + "loss": 23.2997, + "step": 43680 + }, + { + "epoch": 0.08825656419559061, + "grad_norm": 443.4166259765625, + "learning_rate": 8.738000000000002e-06, + "loss": 12.6361, + "step": 43690 + }, + { + "epoch": 0.08827676482827441, + "grad_norm": 161.66542053222656, + "learning_rate": 8.740000000000001e-06, + "loss": 15.993, + "step": 43700 + }, + { + "epoch": 0.08829696546095823, + "grad_norm": 612.4815063476562, + "learning_rate": 8.742e-06, + "loss": 32.946, + "step": 43710 + }, + { + "epoch": 0.08831716609364205, + "grad_norm": 1040.73681640625, + "learning_rate": 8.744e-06, + "loss": 26.8302, + "step": 43720 + }, + { + "epoch": 0.08833736672632587, + "grad_norm": 60.330177307128906, + "learning_rate": 8.746e-06, + "loss": 25.7003, + "step": 43730 + }, + { + "epoch": 0.08835756735900968, + "grad_norm": 190.57351684570312, + "learning_rate": 8.748000000000002e-06, + "loss": 27.0377, + "step": 43740 + }, + { + "epoch": 0.0883777679916935, + "grad_norm": 16.609949111938477, + "learning_rate": 8.750000000000001e-06, + "loss": 25.3611, + "step": 43750 + }, + { + "epoch": 0.08839796862437732, + "grad_norm": 407.3550720214844, + "learning_rate": 8.752e-06, + "loss": 27.6014, + "step": 43760 + }, + { + "epoch": 0.08841816925706113, + "grad_norm": 130.43447875976562, + "learning_rate": 8.754e-06, + "loss": 10.7957, + "step": 43770 + }, + { + "epoch": 0.08843836988974495, + "grad_norm": 827.7708129882812, + "learning_rate": 8.756e-06, + "loss": 31.6983, + "step": 43780 + }, + { + "epoch": 0.08845857052242877, + "grad_norm": 395.01715087890625, + "learning_rate": 8.758000000000002e-06, + "loss": 26.3091, + "step": 43790 + }, + { + "epoch": 0.08847877115511257, + "grad_norm": 301.2219543457031, + "learning_rate": 8.76e-06, + "loss": 28.6031, + "step": 43800 + }, + { + "epoch": 0.0884989717877964, + "grad_norm": 660.8982543945312, + "learning_rate": 8.762e-06, + "loss": 45.3063, + "step": 43810 + }, + { + "epoch": 0.08851917242048021, + "grad_norm": 533.4058227539062, + "learning_rate": 8.764e-06, + "loss": 27.4031, + "step": 43820 + }, + { + "epoch": 0.08853937305316402, + "grad_norm": 337.8182067871094, + "learning_rate": 8.766e-06, + "loss": 14.2368, + "step": 43830 + }, + { + "epoch": 0.08855957368584784, + "grad_norm": 611.1868286132812, + "learning_rate": 8.768000000000001e-06, + "loss": 31.2573, + "step": 43840 + }, + { + "epoch": 0.08857977431853166, + "grad_norm": 775.6557006835938, + "learning_rate": 8.77e-06, + "loss": 20.9313, + "step": 43850 + }, + { + "epoch": 0.08859997495121547, + "grad_norm": 1237.54248046875, + "learning_rate": 8.772e-06, + "loss": 31.3704, + "step": 43860 + }, + { + "epoch": 0.08862017558389929, + "grad_norm": 626.9205932617188, + "learning_rate": 8.774000000000001e-06, + "loss": 39.7188, + "step": 43870 + }, + { + "epoch": 0.0886403762165831, + "grad_norm": 137.8606414794922, + "learning_rate": 8.776e-06, + "loss": 27.5984, + "step": 43880 + }, + { + "epoch": 0.08866057684926693, + "grad_norm": 1082.872802734375, + "learning_rate": 8.778000000000001e-06, + "loss": 18.0964, + "step": 43890 + }, + { + "epoch": 0.08868077748195073, + "grad_norm": 220.36451721191406, + "learning_rate": 8.78e-06, + "loss": 34.1153, + "step": 43900 + }, + { + "epoch": 0.08870097811463455, + "grad_norm": 650.5850219726562, + "learning_rate": 8.782e-06, + "loss": 40.8374, + "step": 43910 + }, + { + "epoch": 0.08872117874731837, + "grad_norm": 736.3362426757812, + "learning_rate": 8.784000000000001e-06, + "loss": 33.0653, + "step": 43920 + }, + { + "epoch": 0.08874137938000218, + "grad_norm": 182.11184692382812, + "learning_rate": 8.786000000000002e-06, + "loss": 36.9502, + "step": 43930 + }, + { + "epoch": 0.088761580012686, + "grad_norm": 204.7152099609375, + "learning_rate": 8.788000000000001e-06, + "loss": 32.2236, + "step": 43940 + }, + { + "epoch": 0.08878178064536982, + "grad_norm": 664.1483154296875, + "learning_rate": 8.79e-06, + "loss": 29.3924, + "step": 43950 + }, + { + "epoch": 0.08880198127805362, + "grad_norm": 474.9621276855469, + "learning_rate": 8.792e-06, + "loss": 16.2539, + "step": 43960 + }, + { + "epoch": 0.08882218191073744, + "grad_norm": 1068.8729248046875, + "learning_rate": 8.794e-06, + "loss": 21.6826, + "step": 43970 + }, + { + "epoch": 0.08884238254342126, + "grad_norm": 1158.564697265625, + "learning_rate": 8.796000000000002e-06, + "loss": 30.4319, + "step": 43980 + }, + { + "epoch": 0.08886258317610507, + "grad_norm": 57.3450927734375, + "learning_rate": 8.798000000000001e-06, + "loss": 22.6598, + "step": 43990 + }, + { + "epoch": 0.08888278380878889, + "grad_norm": 529.1062622070312, + "learning_rate": 8.8e-06, + "loss": 28.3954, + "step": 44000 + }, + { + "epoch": 0.08890298444147271, + "grad_norm": 371.1905517578125, + "learning_rate": 8.802e-06, + "loss": 38.479, + "step": 44010 + }, + { + "epoch": 0.08892318507415652, + "grad_norm": 220.77362060546875, + "learning_rate": 8.804e-06, + "loss": 18.6284, + "step": 44020 + }, + { + "epoch": 0.08894338570684034, + "grad_norm": 539.539794921875, + "learning_rate": 8.806000000000002e-06, + "loss": 12.7973, + "step": 44030 + }, + { + "epoch": 0.08896358633952416, + "grad_norm": 599.9065551757812, + "learning_rate": 8.808000000000001e-06, + "loss": 35.6135, + "step": 44040 + }, + { + "epoch": 0.08898378697220798, + "grad_norm": 423.3573913574219, + "learning_rate": 8.81e-06, + "loss": 24.9562, + "step": 44050 + }, + { + "epoch": 0.08900398760489178, + "grad_norm": 571.5018920898438, + "learning_rate": 8.812000000000001e-06, + "loss": 36.8214, + "step": 44060 + }, + { + "epoch": 0.0890241882375756, + "grad_norm": 542.1673583984375, + "learning_rate": 8.814e-06, + "loss": 18.4148, + "step": 44070 + }, + { + "epoch": 0.08904438887025942, + "grad_norm": 347.5661926269531, + "learning_rate": 8.816000000000002e-06, + "loss": 28.4541, + "step": 44080 + }, + { + "epoch": 0.08906458950294323, + "grad_norm": 434.1124572753906, + "learning_rate": 8.818000000000001e-06, + "loss": 16.2437, + "step": 44090 + }, + { + "epoch": 0.08908479013562705, + "grad_norm": 646.8975830078125, + "learning_rate": 8.82e-06, + "loss": 23.2977, + "step": 44100 + }, + { + "epoch": 0.08910499076831087, + "grad_norm": 631.682861328125, + "learning_rate": 8.822000000000001e-06, + "loss": 30.6054, + "step": 44110 + }, + { + "epoch": 0.08912519140099467, + "grad_norm": 479.34423828125, + "learning_rate": 8.824e-06, + "loss": 27.6769, + "step": 44120 + }, + { + "epoch": 0.0891453920336785, + "grad_norm": 130.42416381835938, + "learning_rate": 8.826000000000002e-06, + "loss": 16.2841, + "step": 44130 + }, + { + "epoch": 0.08916559266636231, + "grad_norm": 679.4235229492188, + "learning_rate": 8.828000000000001e-06, + "loss": 29.5048, + "step": 44140 + }, + { + "epoch": 0.08918579329904612, + "grad_norm": 255.19798278808594, + "learning_rate": 8.83e-06, + "loss": 10.4203, + "step": 44150 + }, + { + "epoch": 0.08920599393172994, + "grad_norm": 2336.6416015625, + "learning_rate": 8.832000000000001e-06, + "loss": 52.0455, + "step": 44160 + }, + { + "epoch": 0.08922619456441376, + "grad_norm": 904.8772583007812, + "learning_rate": 8.834e-06, + "loss": 44.5716, + "step": 44170 + }, + { + "epoch": 0.08924639519709757, + "grad_norm": 1653.8470458984375, + "learning_rate": 8.836000000000001e-06, + "loss": 33.5973, + "step": 44180 + }, + { + "epoch": 0.08926659582978139, + "grad_norm": 799.48583984375, + "learning_rate": 8.838e-06, + "loss": 37.314, + "step": 44190 + }, + { + "epoch": 0.0892867964624652, + "grad_norm": 363.776123046875, + "learning_rate": 8.84e-06, + "loss": 23.4966, + "step": 44200 + }, + { + "epoch": 0.08930699709514903, + "grad_norm": 515.052490234375, + "learning_rate": 8.842000000000001e-06, + "loss": 23.1721, + "step": 44210 + }, + { + "epoch": 0.08932719772783283, + "grad_norm": 1712.2266845703125, + "learning_rate": 8.844e-06, + "loss": 38.6416, + "step": 44220 + }, + { + "epoch": 0.08934739836051665, + "grad_norm": 997.6633911132812, + "learning_rate": 8.846000000000001e-06, + "loss": 27.9869, + "step": 44230 + }, + { + "epoch": 0.08936759899320047, + "grad_norm": 181.0132293701172, + "learning_rate": 8.848e-06, + "loss": 22.755, + "step": 44240 + }, + { + "epoch": 0.08938779962588428, + "grad_norm": 417.53985595703125, + "learning_rate": 8.85e-06, + "loss": 28.4482, + "step": 44250 + }, + { + "epoch": 0.0894080002585681, + "grad_norm": 661.7625122070312, + "learning_rate": 8.852000000000001e-06, + "loss": 32.031, + "step": 44260 + }, + { + "epoch": 0.08942820089125192, + "grad_norm": 171.3567352294922, + "learning_rate": 8.854e-06, + "loss": 20.9981, + "step": 44270 + }, + { + "epoch": 0.08944840152393572, + "grad_norm": 565.4457397460938, + "learning_rate": 8.856000000000001e-06, + "loss": 19.9229, + "step": 44280 + }, + { + "epoch": 0.08946860215661954, + "grad_norm": 1008.0512084960938, + "learning_rate": 8.858e-06, + "loss": 23.9828, + "step": 44290 + }, + { + "epoch": 0.08948880278930336, + "grad_norm": 650.6478881835938, + "learning_rate": 8.860000000000002e-06, + "loss": 32.9885, + "step": 44300 + }, + { + "epoch": 0.08950900342198717, + "grad_norm": 236.9464569091797, + "learning_rate": 8.862000000000001e-06, + "loss": 14.9416, + "step": 44310 + }, + { + "epoch": 0.08952920405467099, + "grad_norm": 485.0042419433594, + "learning_rate": 8.864e-06, + "loss": 21.3128, + "step": 44320 + }, + { + "epoch": 0.08954940468735481, + "grad_norm": 665.9822998046875, + "learning_rate": 8.866000000000001e-06, + "loss": 30.5664, + "step": 44330 + }, + { + "epoch": 0.08956960532003862, + "grad_norm": 575.6888427734375, + "learning_rate": 8.868e-06, + "loss": 23.7571, + "step": 44340 + }, + { + "epoch": 0.08958980595272244, + "grad_norm": 369.822265625, + "learning_rate": 8.870000000000001e-06, + "loss": 21.7342, + "step": 44350 + }, + { + "epoch": 0.08961000658540626, + "grad_norm": 434.0180969238281, + "learning_rate": 8.872e-06, + "loss": 22.0393, + "step": 44360 + }, + { + "epoch": 0.08963020721809008, + "grad_norm": 124.84725189208984, + "learning_rate": 8.874e-06, + "loss": 20.3464, + "step": 44370 + }, + { + "epoch": 0.08965040785077388, + "grad_norm": 658.8175048828125, + "learning_rate": 8.876e-06, + "loss": 51.8665, + "step": 44380 + }, + { + "epoch": 0.0896706084834577, + "grad_norm": 400.6201477050781, + "learning_rate": 8.878e-06, + "loss": 36.426, + "step": 44390 + }, + { + "epoch": 0.08969080911614152, + "grad_norm": 450.5768127441406, + "learning_rate": 8.880000000000001e-06, + "loss": 39.3117, + "step": 44400 + }, + { + "epoch": 0.08971100974882533, + "grad_norm": 505.92724609375, + "learning_rate": 8.882e-06, + "loss": 19.8241, + "step": 44410 + }, + { + "epoch": 0.08973121038150915, + "grad_norm": 537.7205200195312, + "learning_rate": 8.884e-06, + "loss": 27.0366, + "step": 44420 + }, + { + "epoch": 0.08975141101419297, + "grad_norm": 1432.2957763671875, + "learning_rate": 8.886000000000001e-06, + "loss": 49.6572, + "step": 44430 + }, + { + "epoch": 0.08977161164687678, + "grad_norm": 87.47107696533203, + "learning_rate": 8.888e-06, + "loss": 40.1563, + "step": 44440 + }, + { + "epoch": 0.0897918122795606, + "grad_norm": 417.109619140625, + "learning_rate": 8.890000000000001e-06, + "loss": 20.4187, + "step": 44450 + }, + { + "epoch": 0.08981201291224442, + "grad_norm": 192.43753051757812, + "learning_rate": 8.892e-06, + "loss": 10.9749, + "step": 44460 + }, + { + "epoch": 0.08983221354492822, + "grad_norm": 501.7425231933594, + "learning_rate": 8.894e-06, + "loss": 32.2478, + "step": 44470 + }, + { + "epoch": 0.08985241417761204, + "grad_norm": 219.19515991210938, + "learning_rate": 8.896000000000001e-06, + "loss": 41.9935, + "step": 44480 + }, + { + "epoch": 0.08987261481029586, + "grad_norm": 1015.3475952148438, + "learning_rate": 8.898000000000002e-06, + "loss": 28.0073, + "step": 44490 + }, + { + "epoch": 0.08989281544297967, + "grad_norm": 63.530372619628906, + "learning_rate": 8.900000000000001e-06, + "loss": 27.8875, + "step": 44500 + }, + { + "epoch": 0.08991301607566349, + "grad_norm": 193.50613403320312, + "learning_rate": 8.902e-06, + "loss": 17.7265, + "step": 44510 + }, + { + "epoch": 0.08993321670834731, + "grad_norm": 235.31927490234375, + "learning_rate": 8.904e-06, + "loss": 25.7365, + "step": 44520 + }, + { + "epoch": 0.08995341734103113, + "grad_norm": 942.6410522460938, + "learning_rate": 8.906e-06, + "loss": 21.3539, + "step": 44530 + }, + { + "epoch": 0.08997361797371493, + "grad_norm": 334.50787353515625, + "learning_rate": 8.908000000000002e-06, + "loss": 23.9226, + "step": 44540 + }, + { + "epoch": 0.08999381860639875, + "grad_norm": 394.564697265625, + "learning_rate": 8.910000000000001e-06, + "loss": 27.8762, + "step": 44550 + }, + { + "epoch": 0.09001401923908257, + "grad_norm": 385.86102294921875, + "learning_rate": 8.912e-06, + "loss": 12.1461, + "step": 44560 + }, + { + "epoch": 0.09003421987176638, + "grad_norm": 266.55340576171875, + "learning_rate": 8.914e-06, + "loss": 28.2976, + "step": 44570 + }, + { + "epoch": 0.0900544205044502, + "grad_norm": 353.8782958984375, + "learning_rate": 8.916e-06, + "loss": 54.0394, + "step": 44580 + }, + { + "epoch": 0.09007462113713402, + "grad_norm": 118.74738311767578, + "learning_rate": 8.918000000000002e-06, + "loss": 38.6842, + "step": 44590 + }, + { + "epoch": 0.09009482176981783, + "grad_norm": 520.180908203125, + "learning_rate": 8.920000000000001e-06, + "loss": 38.7813, + "step": 44600 + }, + { + "epoch": 0.09011502240250165, + "grad_norm": 1197.62353515625, + "learning_rate": 8.922e-06, + "loss": 28.3833, + "step": 44610 + }, + { + "epoch": 0.09013522303518547, + "grad_norm": 563.7283325195312, + "learning_rate": 8.924e-06, + "loss": 18.2278, + "step": 44620 + }, + { + "epoch": 0.09015542366786927, + "grad_norm": 544.5486450195312, + "learning_rate": 8.926e-06, + "loss": 33.8533, + "step": 44630 + }, + { + "epoch": 0.09017562430055309, + "grad_norm": 568.8088989257812, + "learning_rate": 8.928000000000002e-06, + "loss": 42.4741, + "step": 44640 + }, + { + "epoch": 0.09019582493323691, + "grad_norm": 683.1576538085938, + "learning_rate": 8.930000000000001e-06, + "loss": 28.4008, + "step": 44650 + }, + { + "epoch": 0.09021602556592072, + "grad_norm": 246.2177276611328, + "learning_rate": 8.932e-06, + "loss": 28.559, + "step": 44660 + }, + { + "epoch": 0.09023622619860454, + "grad_norm": 644.2553100585938, + "learning_rate": 8.934000000000001e-06, + "loss": 18.8847, + "step": 44670 + }, + { + "epoch": 0.09025642683128836, + "grad_norm": 1007.45947265625, + "learning_rate": 8.936e-06, + "loss": 33.1327, + "step": 44680 + }, + { + "epoch": 0.09027662746397218, + "grad_norm": 695.0758666992188, + "learning_rate": 8.938000000000001e-06, + "loss": 25.8596, + "step": 44690 + }, + { + "epoch": 0.09029682809665598, + "grad_norm": 173.3174285888672, + "learning_rate": 8.94e-06, + "loss": 27.8275, + "step": 44700 + }, + { + "epoch": 0.0903170287293398, + "grad_norm": 156.8406219482422, + "learning_rate": 8.942e-06, + "loss": 30.0689, + "step": 44710 + }, + { + "epoch": 0.09033722936202362, + "grad_norm": 312.4314880371094, + "learning_rate": 8.944000000000001e-06, + "loss": 20.6215, + "step": 44720 + }, + { + "epoch": 0.09035742999470743, + "grad_norm": 1031.1641845703125, + "learning_rate": 8.946e-06, + "loss": 30.0523, + "step": 44730 + }, + { + "epoch": 0.09037763062739125, + "grad_norm": 594.5155029296875, + "learning_rate": 8.948000000000001e-06, + "loss": 24.2484, + "step": 44740 + }, + { + "epoch": 0.09039783126007507, + "grad_norm": 305.5896301269531, + "learning_rate": 8.95e-06, + "loss": 26.1327, + "step": 44750 + }, + { + "epoch": 0.09041803189275888, + "grad_norm": 357.19189453125, + "learning_rate": 8.952e-06, + "loss": 23.2766, + "step": 44760 + }, + { + "epoch": 0.0904382325254427, + "grad_norm": 378.5004577636719, + "learning_rate": 8.954000000000001e-06, + "loss": 24.1598, + "step": 44770 + }, + { + "epoch": 0.09045843315812652, + "grad_norm": 571.3589477539062, + "learning_rate": 8.956e-06, + "loss": 43.4971, + "step": 44780 + }, + { + "epoch": 0.09047863379081032, + "grad_norm": 250.39462280273438, + "learning_rate": 8.958000000000001e-06, + "loss": 21.941, + "step": 44790 + }, + { + "epoch": 0.09049883442349414, + "grad_norm": 633.5438232421875, + "learning_rate": 8.96e-06, + "loss": 23.1309, + "step": 44800 + }, + { + "epoch": 0.09051903505617796, + "grad_norm": 424.7410583496094, + "learning_rate": 8.962e-06, + "loss": 23.7086, + "step": 44810 + }, + { + "epoch": 0.09053923568886177, + "grad_norm": 197.0968780517578, + "learning_rate": 8.964000000000001e-06, + "loss": 23.5309, + "step": 44820 + }, + { + "epoch": 0.09055943632154559, + "grad_norm": 293.7751159667969, + "learning_rate": 8.966e-06, + "loss": 28.0905, + "step": 44830 + }, + { + "epoch": 0.09057963695422941, + "grad_norm": 91.09911346435547, + "learning_rate": 8.968000000000001e-06, + "loss": 20.4776, + "step": 44840 + }, + { + "epoch": 0.09059983758691323, + "grad_norm": 668.6318969726562, + "learning_rate": 8.97e-06, + "loss": 25.6164, + "step": 44850 + }, + { + "epoch": 0.09062003821959703, + "grad_norm": 216.250732421875, + "learning_rate": 8.972000000000002e-06, + "loss": 20.6183, + "step": 44860 + }, + { + "epoch": 0.09064023885228085, + "grad_norm": 90.79194641113281, + "learning_rate": 8.974e-06, + "loss": 29.3862, + "step": 44870 + }, + { + "epoch": 0.09066043948496467, + "grad_norm": 927.6483764648438, + "learning_rate": 8.976e-06, + "loss": 20.3835, + "step": 44880 + }, + { + "epoch": 0.09068064011764848, + "grad_norm": 1245.59375, + "learning_rate": 8.978000000000001e-06, + "loss": 36.7677, + "step": 44890 + }, + { + "epoch": 0.0907008407503323, + "grad_norm": 653.4105834960938, + "learning_rate": 8.98e-06, + "loss": 31.7304, + "step": 44900 + }, + { + "epoch": 0.09072104138301612, + "grad_norm": 239.88685607910156, + "learning_rate": 8.982000000000001e-06, + "loss": 30.5177, + "step": 44910 + }, + { + "epoch": 0.09074124201569993, + "grad_norm": 1254.9267578125, + "learning_rate": 8.984e-06, + "loss": 45.9213, + "step": 44920 + }, + { + "epoch": 0.09076144264838375, + "grad_norm": 237.966064453125, + "learning_rate": 8.986e-06, + "loss": 30.8989, + "step": 44930 + }, + { + "epoch": 0.09078164328106757, + "grad_norm": 495.72808837890625, + "learning_rate": 8.988000000000001e-06, + "loss": 33.639, + "step": 44940 + }, + { + "epoch": 0.09080184391375137, + "grad_norm": 653.3446044921875, + "learning_rate": 8.99e-06, + "loss": 36.9416, + "step": 44950 + }, + { + "epoch": 0.0908220445464352, + "grad_norm": 40.03471755981445, + "learning_rate": 8.992000000000001e-06, + "loss": 15.2221, + "step": 44960 + }, + { + "epoch": 0.09084224517911901, + "grad_norm": 666.4488525390625, + "learning_rate": 8.994e-06, + "loss": 29.3564, + "step": 44970 + }, + { + "epoch": 0.09086244581180282, + "grad_norm": 497.86419677734375, + "learning_rate": 8.996e-06, + "loss": 29.4009, + "step": 44980 + }, + { + "epoch": 0.09088264644448664, + "grad_norm": 230.97999572753906, + "learning_rate": 8.998000000000001e-06, + "loss": 17.3159, + "step": 44990 + }, + { + "epoch": 0.09090284707717046, + "grad_norm": 1031.1046142578125, + "learning_rate": 9e-06, + "loss": 18.6158, + "step": 45000 + }, + { + "epoch": 0.09092304770985428, + "grad_norm": 1001.5123901367188, + "learning_rate": 9.002000000000001e-06, + "loss": 17.3041, + "step": 45010 + }, + { + "epoch": 0.09094324834253809, + "grad_norm": 458.33746337890625, + "learning_rate": 9.004e-06, + "loss": 26.3967, + "step": 45020 + }, + { + "epoch": 0.0909634489752219, + "grad_norm": 464.9259948730469, + "learning_rate": 9.006e-06, + "loss": 18.7797, + "step": 45030 + }, + { + "epoch": 0.09098364960790573, + "grad_norm": 177.4857177734375, + "learning_rate": 9.008e-06, + "loss": 20.0804, + "step": 45040 + }, + { + "epoch": 0.09100385024058953, + "grad_norm": 186.34747314453125, + "learning_rate": 9.01e-06, + "loss": 25.6041, + "step": 45050 + }, + { + "epoch": 0.09102405087327335, + "grad_norm": 156.54200744628906, + "learning_rate": 9.012000000000001e-06, + "loss": 14.926, + "step": 45060 + }, + { + "epoch": 0.09104425150595717, + "grad_norm": 1313.6944580078125, + "learning_rate": 9.014e-06, + "loss": 28.0868, + "step": 45070 + }, + { + "epoch": 0.09106445213864098, + "grad_norm": 1131.9775390625, + "learning_rate": 9.016e-06, + "loss": 32.2958, + "step": 45080 + }, + { + "epoch": 0.0910846527713248, + "grad_norm": 423.86138916015625, + "learning_rate": 9.018e-06, + "loss": 20.0051, + "step": 45090 + }, + { + "epoch": 0.09110485340400862, + "grad_norm": 537.714599609375, + "learning_rate": 9.020000000000002e-06, + "loss": 18.3262, + "step": 45100 + }, + { + "epoch": 0.09112505403669242, + "grad_norm": 1961.3375244140625, + "learning_rate": 9.022000000000001e-06, + "loss": 23.6604, + "step": 45110 + }, + { + "epoch": 0.09114525466937624, + "grad_norm": 424.4392395019531, + "learning_rate": 9.024e-06, + "loss": 45.3459, + "step": 45120 + }, + { + "epoch": 0.09116545530206006, + "grad_norm": 536.6264038085938, + "learning_rate": 9.026e-06, + "loss": 41.8265, + "step": 45130 + }, + { + "epoch": 0.09118565593474387, + "grad_norm": 195.9641571044922, + "learning_rate": 9.028e-06, + "loss": 29.6634, + "step": 45140 + }, + { + "epoch": 0.09120585656742769, + "grad_norm": 561.6903076171875, + "learning_rate": 9.030000000000002e-06, + "loss": 34.5336, + "step": 45150 + }, + { + "epoch": 0.09122605720011151, + "grad_norm": 404.424560546875, + "learning_rate": 9.032000000000001e-06, + "loss": 30.3016, + "step": 45160 + }, + { + "epoch": 0.09124625783279533, + "grad_norm": 404.8960266113281, + "learning_rate": 9.034e-06, + "loss": 23.3106, + "step": 45170 + }, + { + "epoch": 0.09126645846547914, + "grad_norm": 227.19601440429688, + "learning_rate": 9.036e-06, + "loss": 13.8677, + "step": 45180 + }, + { + "epoch": 0.09128665909816296, + "grad_norm": 1348.8074951171875, + "learning_rate": 9.038e-06, + "loss": 40.3152, + "step": 45190 + }, + { + "epoch": 0.09130685973084678, + "grad_norm": 285.83380126953125, + "learning_rate": 9.040000000000002e-06, + "loss": 26.9267, + "step": 45200 + }, + { + "epoch": 0.09132706036353058, + "grad_norm": 1111.412109375, + "learning_rate": 9.042e-06, + "loss": 17.8624, + "step": 45210 + }, + { + "epoch": 0.0913472609962144, + "grad_norm": 252.94573974609375, + "learning_rate": 9.044e-06, + "loss": 33.217, + "step": 45220 + }, + { + "epoch": 0.09136746162889822, + "grad_norm": 122.52388763427734, + "learning_rate": 9.046000000000001e-06, + "loss": 27.4571, + "step": 45230 + }, + { + "epoch": 0.09138766226158203, + "grad_norm": 1031.7841796875, + "learning_rate": 9.048e-06, + "loss": 31.5857, + "step": 45240 + }, + { + "epoch": 0.09140786289426585, + "grad_norm": 288.2206726074219, + "learning_rate": 9.050000000000001e-06, + "loss": 34.444, + "step": 45250 + }, + { + "epoch": 0.09142806352694967, + "grad_norm": 503.2816162109375, + "learning_rate": 9.052e-06, + "loss": 23.9389, + "step": 45260 + }, + { + "epoch": 0.09144826415963347, + "grad_norm": 79.8081283569336, + "learning_rate": 9.054e-06, + "loss": 36.626, + "step": 45270 + }, + { + "epoch": 0.0914684647923173, + "grad_norm": 74.55158233642578, + "learning_rate": 9.056000000000001e-06, + "loss": 15.2105, + "step": 45280 + }, + { + "epoch": 0.09148866542500111, + "grad_norm": 408.1317443847656, + "learning_rate": 9.058000000000002e-06, + "loss": 26.4177, + "step": 45290 + }, + { + "epoch": 0.09150886605768492, + "grad_norm": 183.99171447753906, + "learning_rate": 9.060000000000001e-06, + "loss": 28.0801, + "step": 45300 + }, + { + "epoch": 0.09152906669036874, + "grad_norm": 711.5023803710938, + "learning_rate": 9.062e-06, + "loss": 44.5351, + "step": 45310 + }, + { + "epoch": 0.09154926732305256, + "grad_norm": 684.1775512695312, + "learning_rate": 9.064e-06, + "loss": 23.5863, + "step": 45320 + }, + { + "epoch": 0.09156946795573638, + "grad_norm": 288.3268127441406, + "learning_rate": 9.066000000000001e-06, + "loss": 20.7055, + "step": 45330 + }, + { + "epoch": 0.09158966858842019, + "grad_norm": 383.0359802246094, + "learning_rate": 9.068000000000002e-06, + "loss": 32.7892, + "step": 45340 + }, + { + "epoch": 0.091609869221104, + "grad_norm": 602.8131103515625, + "learning_rate": 9.070000000000001e-06, + "loss": 30.4119, + "step": 45350 + }, + { + "epoch": 0.09163006985378783, + "grad_norm": 874.2801513671875, + "learning_rate": 9.072e-06, + "loss": 16.9795, + "step": 45360 + }, + { + "epoch": 0.09165027048647163, + "grad_norm": 43.25394058227539, + "learning_rate": 9.074e-06, + "loss": 23.7794, + "step": 45370 + }, + { + "epoch": 0.09167047111915545, + "grad_norm": 591.4110107421875, + "learning_rate": 9.076000000000001e-06, + "loss": 17.5977, + "step": 45380 + }, + { + "epoch": 0.09169067175183927, + "grad_norm": 294.79351806640625, + "learning_rate": 9.078000000000002e-06, + "loss": 43.4651, + "step": 45390 + }, + { + "epoch": 0.09171087238452308, + "grad_norm": 596.6688232421875, + "learning_rate": 9.080000000000001e-06, + "loss": 21.6313, + "step": 45400 + }, + { + "epoch": 0.0917310730172069, + "grad_norm": 666.8948364257812, + "learning_rate": 9.082e-06, + "loss": 24.9612, + "step": 45410 + }, + { + "epoch": 0.09175127364989072, + "grad_norm": 1062.8319091796875, + "learning_rate": 9.084e-06, + "loss": 35.6636, + "step": 45420 + }, + { + "epoch": 0.09177147428257452, + "grad_norm": 472.1858825683594, + "learning_rate": 9.086e-06, + "loss": 21.0453, + "step": 45430 + }, + { + "epoch": 0.09179167491525834, + "grad_norm": 104.99319458007812, + "learning_rate": 9.088000000000002e-06, + "loss": 22.8547, + "step": 45440 + }, + { + "epoch": 0.09181187554794216, + "grad_norm": 627.6539916992188, + "learning_rate": 9.090000000000001e-06, + "loss": 39.3272, + "step": 45450 + }, + { + "epoch": 0.09183207618062597, + "grad_norm": 1194.921630859375, + "learning_rate": 9.092e-06, + "loss": 35.9621, + "step": 45460 + }, + { + "epoch": 0.09185227681330979, + "grad_norm": 295.4495849609375, + "learning_rate": 9.094000000000001e-06, + "loss": 33.5286, + "step": 45470 + }, + { + "epoch": 0.09187247744599361, + "grad_norm": 303.76641845703125, + "learning_rate": 9.096e-06, + "loss": 16.3219, + "step": 45480 + }, + { + "epoch": 0.09189267807867743, + "grad_norm": 308.8945617675781, + "learning_rate": 9.098000000000002e-06, + "loss": 11.375, + "step": 45490 + }, + { + "epoch": 0.09191287871136124, + "grad_norm": 264.4830322265625, + "learning_rate": 9.100000000000001e-06, + "loss": 17.8894, + "step": 45500 + }, + { + "epoch": 0.09193307934404506, + "grad_norm": 859.7493896484375, + "learning_rate": 9.102e-06, + "loss": 28.4209, + "step": 45510 + }, + { + "epoch": 0.09195327997672888, + "grad_norm": 277.9159240722656, + "learning_rate": 9.104000000000001e-06, + "loss": 31.0762, + "step": 45520 + }, + { + "epoch": 0.09197348060941268, + "grad_norm": 779.0860595703125, + "learning_rate": 9.106e-06, + "loss": 32.6603, + "step": 45530 + }, + { + "epoch": 0.0919936812420965, + "grad_norm": 305.54998779296875, + "learning_rate": 9.108000000000002e-06, + "loss": 23.2495, + "step": 45540 + }, + { + "epoch": 0.09201388187478032, + "grad_norm": 478.4544982910156, + "learning_rate": 9.110000000000001e-06, + "loss": 25.226, + "step": 45550 + }, + { + "epoch": 0.09203408250746413, + "grad_norm": 795.5175170898438, + "learning_rate": 9.112e-06, + "loss": 24.1799, + "step": 45560 + }, + { + "epoch": 0.09205428314014795, + "grad_norm": 347.9759216308594, + "learning_rate": 9.114000000000001e-06, + "loss": 18.2694, + "step": 45570 + }, + { + "epoch": 0.09207448377283177, + "grad_norm": 580.8516235351562, + "learning_rate": 9.116e-06, + "loss": 14.3688, + "step": 45580 + }, + { + "epoch": 0.09209468440551558, + "grad_norm": 573.6776733398438, + "learning_rate": 9.118000000000001e-06, + "loss": 30.0077, + "step": 45590 + }, + { + "epoch": 0.0921148850381994, + "grad_norm": 477.5769348144531, + "learning_rate": 9.12e-06, + "loss": 21.5053, + "step": 45600 + }, + { + "epoch": 0.09213508567088322, + "grad_norm": 404.3766174316406, + "learning_rate": 9.122e-06, + "loss": 40.801, + "step": 45610 + }, + { + "epoch": 0.09215528630356702, + "grad_norm": 420.88336181640625, + "learning_rate": 9.124000000000001e-06, + "loss": 20.7538, + "step": 45620 + }, + { + "epoch": 0.09217548693625084, + "grad_norm": 280.4460144042969, + "learning_rate": 9.126e-06, + "loss": 29.4988, + "step": 45630 + }, + { + "epoch": 0.09219568756893466, + "grad_norm": 323.4939880371094, + "learning_rate": 9.128e-06, + "loss": 27.4168, + "step": 45640 + }, + { + "epoch": 0.09221588820161847, + "grad_norm": 575.996826171875, + "learning_rate": 9.13e-06, + "loss": 20.6319, + "step": 45650 + }, + { + "epoch": 0.09223608883430229, + "grad_norm": 690.1239013671875, + "learning_rate": 9.132000000000002e-06, + "loss": 21.5123, + "step": 45660 + }, + { + "epoch": 0.09225628946698611, + "grad_norm": 2189.362060546875, + "learning_rate": 9.134000000000001e-06, + "loss": 32.8427, + "step": 45670 + }, + { + "epoch": 0.09227649009966993, + "grad_norm": 559.661865234375, + "learning_rate": 9.136e-06, + "loss": 23.2627, + "step": 45680 + }, + { + "epoch": 0.09229669073235373, + "grad_norm": 852.9085083007812, + "learning_rate": 9.138e-06, + "loss": 21.4998, + "step": 45690 + }, + { + "epoch": 0.09231689136503755, + "grad_norm": 159.3247528076172, + "learning_rate": 9.14e-06, + "loss": 23.1891, + "step": 45700 + }, + { + "epoch": 0.09233709199772137, + "grad_norm": 492.49896240234375, + "learning_rate": 9.142000000000002e-06, + "loss": 33.5397, + "step": 45710 + }, + { + "epoch": 0.09235729263040518, + "grad_norm": 287.7591247558594, + "learning_rate": 9.144000000000001e-06, + "loss": 35.0218, + "step": 45720 + }, + { + "epoch": 0.092377493263089, + "grad_norm": 223.2801055908203, + "learning_rate": 9.146e-06, + "loss": 15.031, + "step": 45730 + }, + { + "epoch": 0.09239769389577282, + "grad_norm": 201.246826171875, + "learning_rate": 9.148e-06, + "loss": 27.2919, + "step": 45740 + }, + { + "epoch": 0.09241789452845663, + "grad_norm": 520.645751953125, + "learning_rate": 9.15e-06, + "loss": 20.69, + "step": 45750 + }, + { + "epoch": 0.09243809516114045, + "grad_norm": 602.8200073242188, + "learning_rate": 9.152000000000001e-06, + "loss": 11.4767, + "step": 45760 + }, + { + "epoch": 0.09245829579382427, + "grad_norm": 380.5906982421875, + "learning_rate": 9.154e-06, + "loss": 21.1936, + "step": 45770 + }, + { + "epoch": 0.09247849642650807, + "grad_norm": 442.836181640625, + "learning_rate": 9.156e-06, + "loss": 43.0645, + "step": 45780 + }, + { + "epoch": 0.09249869705919189, + "grad_norm": 434.8324890136719, + "learning_rate": 9.158e-06, + "loss": 22.0832, + "step": 45790 + }, + { + "epoch": 0.09251889769187571, + "grad_norm": 540.1774291992188, + "learning_rate": 9.16e-06, + "loss": 24.301, + "step": 45800 + }, + { + "epoch": 0.09253909832455952, + "grad_norm": 759.1398315429688, + "learning_rate": 9.162000000000001e-06, + "loss": 38.7936, + "step": 45810 + }, + { + "epoch": 0.09255929895724334, + "grad_norm": 196.523681640625, + "learning_rate": 9.164e-06, + "loss": 29.7429, + "step": 45820 + }, + { + "epoch": 0.09257949958992716, + "grad_norm": 374.3005676269531, + "learning_rate": 9.166e-06, + "loss": 28.2329, + "step": 45830 + }, + { + "epoch": 0.09259970022261098, + "grad_norm": 242.35548400878906, + "learning_rate": 9.168000000000001e-06, + "loss": 37.0264, + "step": 45840 + }, + { + "epoch": 0.09261990085529478, + "grad_norm": 605.6915893554688, + "learning_rate": 9.17e-06, + "loss": 51.7208, + "step": 45850 + }, + { + "epoch": 0.0926401014879786, + "grad_norm": 581.0315551757812, + "learning_rate": 9.172000000000001e-06, + "loss": 32.1027, + "step": 45860 + }, + { + "epoch": 0.09266030212066242, + "grad_norm": 395.7412414550781, + "learning_rate": 9.174e-06, + "loss": 31.1553, + "step": 45870 + }, + { + "epoch": 0.09268050275334623, + "grad_norm": 244.5458221435547, + "learning_rate": 9.176e-06, + "loss": 17.0078, + "step": 45880 + }, + { + "epoch": 0.09270070338603005, + "grad_norm": 1080.458984375, + "learning_rate": 9.178000000000001e-06, + "loss": 23.8676, + "step": 45890 + }, + { + "epoch": 0.09272090401871387, + "grad_norm": 505.7313232421875, + "learning_rate": 9.180000000000002e-06, + "loss": 21.9754, + "step": 45900 + }, + { + "epoch": 0.09274110465139768, + "grad_norm": 414.4971618652344, + "learning_rate": 9.182000000000001e-06, + "loss": 24.1529, + "step": 45910 + }, + { + "epoch": 0.0927613052840815, + "grad_norm": 373.2899169921875, + "learning_rate": 9.184e-06, + "loss": 30.3035, + "step": 45920 + }, + { + "epoch": 0.09278150591676532, + "grad_norm": 712.4485473632812, + "learning_rate": 9.186e-06, + "loss": 32.8662, + "step": 45930 + }, + { + "epoch": 0.09280170654944912, + "grad_norm": 501.9930419921875, + "learning_rate": 9.188e-06, + "loss": 22.7343, + "step": 45940 + }, + { + "epoch": 0.09282190718213294, + "grad_norm": 407.44189453125, + "learning_rate": 9.190000000000002e-06, + "loss": 21.3925, + "step": 45950 + }, + { + "epoch": 0.09284210781481676, + "grad_norm": 778.7462768554688, + "learning_rate": 9.192000000000001e-06, + "loss": 17.9243, + "step": 45960 + }, + { + "epoch": 0.09286230844750057, + "grad_norm": 452.5445251464844, + "learning_rate": 9.194e-06, + "loss": 21.0491, + "step": 45970 + }, + { + "epoch": 0.09288250908018439, + "grad_norm": 513.406005859375, + "learning_rate": 9.196e-06, + "loss": 33.8423, + "step": 45980 + }, + { + "epoch": 0.09290270971286821, + "grad_norm": 500.4651794433594, + "learning_rate": 9.198e-06, + "loss": 26.1923, + "step": 45990 + }, + { + "epoch": 0.09292291034555203, + "grad_norm": 803.8782348632812, + "learning_rate": 9.200000000000002e-06, + "loss": 16.747, + "step": 46000 + }, + { + "epoch": 0.09294311097823584, + "grad_norm": 263.3955078125, + "learning_rate": 9.202000000000001e-06, + "loss": 19.4754, + "step": 46010 + }, + { + "epoch": 0.09296331161091966, + "grad_norm": 863.2661743164062, + "learning_rate": 9.204e-06, + "loss": 26.7707, + "step": 46020 + }, + { + "epoch": 0.09298351224360348, + "grad_norm": 619.3812255859375, + "learning_rate": 9.206000000000001e-06, + "loss": 17.0884, + "step": 46030 + }, + { + "epoch": 0.09300371287628728, + "grad_norm": 764.2350463867188, + "learning_rate": 9.208e-06, + "loss": 19.7395, + "step": 46040 + }, + { + "epoch": 0.0930239135089711, + "grad_norm": 128.36607360839844, + "learning_rate": 9.210000000000002e-06, + "loss": 24.6796, + "step": 46050 + }, + { + "epoch": 0.09304411414165492, + "grad_norm": 430.3720703125, + "learning_rate": 9.212000000000001e-06, + "loss": 36.0097, + "step": 46060 + }, + { + "epoch": 0.09306431477433873, + "grad_norm": 1156.9661865234375, + "learning_rate": 9.214e-06, + "loss": 35.3924, + "step": 46070 + }, + { + "epoch": 0.09308451540702255, + "grad_norm": 588.6331787109375, + "learning_rate": 9.216000000000001e-06, + "loss": 23.5478, + "step": 46080 + }, + { + "epoch": 0.09310471603970637, + "grad_norm": 320.4897766113281, + "learning_rate": 9.218e-06, + "loss": 110.5782, + "step": 46090 + }, + { + "epoch": 0.09312491667239017, + "grad_norm": 401.3931579589844, + "learning_rate": 9.220000000000002e-06, + "loss": 31.4041, + "step": 46100 + }, + { + "epoch": 0.093145117305074, + "grad_norm": 779.8281860351562, + "learning_rate": 9.222e-06, + "loss": 20.6462, + "step": 46110 + }, + { + "epoch": 0.09316531793775781, + "grad_norm": 177.2711944580078, + "learning_rate": 9.224e-06, + "loss": 28.436, + "step": 46120 + }, + { + "epoch": 0.09318551857044162, + "grad_norm": 483.94451904296875, + "learning_rate": 9.226000000000001e-06, + "loss": 23.6505, + "step": 46130 + }, + { + "epoch": 0.09320571920312544, + "grad_norm": 930.6395874023438, + "learning_rate": 9.228e-06, + "loss": 22.6424, + "step": 46140 + }, + { + "epoch": 0.09322591983580926, + "grad_norm": 684.120849609375, + "learning_rate": 9.230000000000001e-06, + "loss": 29.6059, + "step": 46150 + }, + { + "epoch": 0.09324612046849308, + "grad_norm": 332.86712646484375, + "learning_rate": 9.232e-06, + "loss": 33.6476, + "step": 46160 + }, + { + "epoch": 0.09326632110117689, + "grad_norm": 692.9288330078125, + "learning_rate": 9.234e-06, + "loss": 41.6462, + "step": 46170 + }, + { + "epoch": 0.0932865217338607, + "grad_norm": 852.34326171875, + "learning_rate": 9.236000000000001e-06, + "loss": 29.988, + "step": 46180 + }, + { + "epoch": 0.09330672236654453, + "grad_norm": 610.2581176757812, + "learning_rate": 9.238e-06, + "loss": 35.6251, + "step": 46190 + }, + { + "epoch": 0.09332692299922833, + "grad_norm": 218.29176330566406, + "learning_rate": 9.240000000000001e-06, + "loss": 12.5734, + "step": 46200 + }, + { + "epoch": 0.09334712363191215, + "grad_norm": 421.0220947265625, + "learning_rate": 9.242e-06, + "loss": 28.3077, + "step": 46210 + }, + { + "epoch": 0.09336732426459597, + "grad_norm": 361.57086181640625, + "learning_rate": 9.244e-06, + "loss": 27.5079, + "step": 46220 + }, + { + "epoch": 0.09338752489727978, + "grad_norm": 703.63525390625, + "learning_rate": 9.246000000000001e-06, + "loss": 19.9812, + "step": 46230 + }, + { + "epoch": 0.0934077255299636, + "grad_norm": 85.08383178710938, + "learning_rate": 9.248e-06, + "loss": 17.4826, + "step": 46240 + }, + { + "epoch": 0.09342792616264742, + "grad_norm": 6.185848236083984, + "learning_rate": 9.250000000000001e-06, + "loss": 28.1113, + "step": 46250 + }, + { + "epoch": 0.09344812679533122, + "grad_norm": 1017.949951171875, + "learning_rate": 9.252e-06, + "loss": 33.4018, + "step": 46260 + }, + { + "epoch": 0.09346832742801504, + "grad_norm": 175.9057159423828, + "learning_rate": 9.254000000000002e-06, + "loss": 26.9791, + "step": 46270 + }, + { + "epoch": 0.09348852806069886, + "grad_norm": 471.81842041015625, + "learning_rate": 9.256e-06, + "loss": 18.282, + "step": 46280 + }, + { + "epoch": 0.09350872869338267, + "grad_norm": 694.974609375, + "learning_rate": 9.258e-06, + "loss": 37.9084, + "step": 46290 + }, + { + "epoch": 0.09352892932606649, + "grad_norm": 1199.8182373046875, + "learning_rate": 9.260000000000001e-06, + "loss": 33.2196, + "step": 46300 + }, + { + "epoch": 0.09354912995875031, + "grad_norm": 324.40020751953125, + "learning_rate": 9.262e-06, + "loss": 24.762, + "step": 46310 + }, + { + "epoch": 0.09356933059143413, + "grad_norm": 673.99951171875, + "learning_rate": 9.264000000000001e-06, + "loss": 28.9734, + "step": 46320 + }, + { + "epoch": 0.09358953122411794, + "grad_norm": 576.8762817382812, + "learning_rate": 9.266e-06, + "loss": 30.6695, + "step": 46330 + }, + { + "epoch": 0.09360973185680176, + "grad_norm": 605.3089599609375, + "learning_rate": 9.268e-06, + "loss": 18.2871, + "step": 46340 + }, + { + "epoch": 0.09362993248948558, + "grad_norm": 347.7716979980469, + "learning_rate": 9.270000000000001e-06, + "loss": 25.4771, + "step": 46350 + }, + { + "epoch": 0.09365013312216938, + "grad_norm": 317.8143310546875, + "learning_rate": 9.272e-06, + "loss": 30.3004, + "step": 46360 + }, + { + "epoch": 0.0936703337548532, + "grad_norm": 277.15118408203125, + "learning_rate": 9.274000000000001e-06, + "loss": 16.8318, + "step": 46370 + }, + { + "epoch": 0.09369053438753702, + "grad_norm": 454.1064453125, + "learning_rate": 9.276e-06, + "loss": 47.5036, + "step": 46380 + }, + { + "epoch": 0.09371073502022083, + "grad_norm": 611.6060791015625, + "learning_rate": 9.278e-06, + "loss": 28.1561, + "step": 46390 + }, + { + "epoch": 0.09373093565290465, + "grad_norm": 836.2199096679688, + "learning_rate": 9.280000000000001e-06, + "loss": 34.3666, + "step": 46400 + }, + { + "epoch": 0.09375113628558847, + "grad_norm": 332.7566223144531, + "learning_rate": 9.282e-06, + "loss": 22.9057, + "step": 46410 + }, + { + "epoch": 0.09377133691827227, + "grad_norm": 728.076171875, + "learning_rate": 9.284000000000001e-06, + "loss": 42.9878, + "step": 46420 + }, + { + "epoch": 0.0937915375509561, + "grad_norm": 1112.4063720703125, + "learning_rate": 9.286e-06, + "loss": 33.806, + "step": 46430 + }, + { + "epoch": 0.09381173818363991, + "grad_norm": 353.80694580078125, + "learning_rate": 9.288e-06, + "loss": 26.4135, + "step": 46440 + }, + { + "epoch": 0.09383193881632372, + "grad_norm": 497.838134765625, + "learning_rate": 9.29e-06, + "loss": 64.492, + "step": 46450 + }, + { + "epoch": 0.09385213944900754, + "grad_norm": 262.32928466796875, + "learning_rate": 9.292000000000002e-06, + "loss": 35.9133, + "step": 46460 + }, + { + "epoch": 0.09387234008169136, + "grad_norm": 176.88645935058594, + "learning_rate": 9.294000000000001e-06, + "loss": 28.2898, + "step": 46470 + }, + { + "epoch": 0.09389254071437518, + "grad_norm": 167.89064025878906, + "learning_rate": 9.296e-06, + "loss": 7.748, + "step": 46480 + }, + { + "epoch": 0.09391274134705899, + "grad_norm": 33.82958221435547, + "learning_rate": 9.298e-06, + "loss": 8.3599, + "step": 46490 + }, + { + "epoch": 0.0939329419797428, + "grad_norm": 691.0979614257812, + "learning_rate": 9.3e-06, + "loss": 17.1772, + "step": 46500 + }, + { + "epoch": 0.09395314261242663, + "grad_norm": 238.71844482421875, + "learning_rate": 9.302000000000002e-06, + "loss": 26.17, + "step": 46510 + }, + { + "epoch": 0.09397334324511043, + "grad_norm": 457.35858154296875, + "learning_rate": 9.304000000000001e-06, + "loss": 23.229, + "step": 46520 + }, + { + "epoch": 0.09399354387779425, + "grad_norm": 219.3545379638672, + "learning_rate": 9.306e-06, + "loss": 13.5186, + "step": 46530 + }, + { + "epoch": 0.09401374451047807, + "grad_norm": 238.89414978027344, + "learning_rate": 9.308e-06, + "loss": 14.7598, + "step": 46540 + }, + { + "epoch": 0.09403394514316188, + "grad_norm": 729.1282348632812, + "learning_rate": 9.31e-06, + "loss": 31.1767, + "step": 46550 + }, + { + "epoch": 0.0940541457758457, + "grad_norm": 317.5440673828125, + "learning_rate": 9.312000000000002e-06, + "loss": 18.8898, + "step": 46560 + }, + { + "epoch": 0.09407434640852952, + "grad_norm": 462.8973083496094, + "learning_rate": 9.314000000000001e-06, + "loss": 20.6372, + "step": 46570 + }, + { + "epoch": 0.09409454704121333, + "grad_norm": 206.12710571289062, + "learning_rate": 9.316e-06, + "loss": 14.3806, + "step": 46580 + }, + { + "epoch": 0.09411474767389715, + "grad_norm": 1026.1854248046875, + "learning_rate": 9.318e-06, + "loss": 29.5411, + "step": 46590 + }, + { + "epoch": 0.09413494830658097, + "grad_norm": 604.3740234375, + "learning_rate": 9.32e-06, + "loss": 15.6268, + "step": 46600 + }, + { + "epoch": 0.09415514893926477, + "grad_norm": 230.26522827148438, + "learning_rate": 9.322000000000002e-06, + "loss": 15.1666, + "step": 46610 + }, + { + "epoch": 0.09417534957194859, + "grad_norm": 204.7043914794922, + "learning_rate": 9.324000000000001e-06, + "loss": 17.7947, + "step": 46620 + }, + { + "epoch": 0.09419555020463241, + "grad_norm": 1054.12255859375, + "learning_rate": 9.326e-06, + "loss": 35.5986, + "step": 46630 + }, + { + "epoch": 0.09421575083731623, + "grad_norm": 1029.0006103515625, + "learning_rate": 9.328000000000001e-06, + "loss": 29.8678, + "step": 46640 + }, + { + "epoch": 0.09423595147000004, + "grad_norm": 311.5683288574219, + "learning_rate": 9.33e-06, + "loss": 35.0463, + "step": 46650 + }, + { + "epoch": 0.09425615210268386, + "grad_norm": 88.19459533691406, + "learning_rate": 9.332000000000001e-06, + "loss": 19.4599, + "step": 46660 + }, + { + "epoch": 0.09427635273536768, + "grad_norm": 1075.1103515625, + "learning_rate": 9.334e-06, + "loss": 35.3525, + "step": 46670 + }, + { + "epoch": 0.09429655336805148, + "grad_norm": 277.8233947753906, + "learning_rate": 9.336e-06, + "loss": 9.9575, + "step": 46680 + }, + { + "epoch": 0.0943167540007353, + "grad_norm": 454.8833923339844, + "learning_rate": 9.338000000000001e-06, + "loss": 18.5828, + "step": 46690 + }, + { + "epoch": 0.09433695463341912, + "grad_norm": 599.8738403320312, + "learning_rate": 9.340000000000002e-06, + "loss": 18.3383, + "step": 46700 + }, + { + "epoch": 0.09435715526610293, + "grad_norm": 1908.4217529296875, + "learning_rate": 9.342000000000001e-06, + "loss": 29.3225, + "step": 46710 + }, + { + "epoch": 0.09437735589878675, + "grad_norm": 559.5934448242188, + "learning_rate": 9.344e-06, + "loss": 14.9467, + "step": 46720 + }, + { + "epoch": 0.09439755653147057, + "grad_norm": 132.86239624023438, + "learning_rate": 9.346e-06, + "loss": 28.6968, + "step": 46730 + }, + { + "epoch": 0.09441775716415438, + "grad_norm": 344.94012451171875, + "learning_rate": 9.348000000000001e-06, + "loss": 14.2309, + "step": 46740 + }, + { + "epoch": 0.0944379577968382, + "grad_norm": 701.255859375, + "learning_rate": 9.350000000000002e-06, + "loss": 20.1366, + "step": 46750 + }, + { + "epoch": 0.09445815842952202, + "grad_norm": 436.1933288574219, + "learning_rate": 9.352000000000001e-06, + "loss": 39.7034, + "step": 46760 + }, + { + "epoch": 0.09447835906220582, + "grad_norm": 484.61767578125, + "learning_rate": 9.354e-06, + "loss": 17.1705, + "step": 46770 + }, + { + "epoch": 0.09449855969488964, + "grad_norm": 287.7072448730469, + "learning_rate": 9.356e-06, + "loss": 13.5601, + "step": 46780 + }, + { + "epoch": 0.09451876032757346, + "grad_norm": 528.2890625, + "learning_rate": 9.358000000000001e-06, + "loss": 28.1622, + "step": 46790 + }, + { + "epoch": 0.09453896096025728, + "grad_norm": 734.2177734375, + "learning_rate": 9.360000000000002e-06, + "loss": 42.813, + "step": 46800 + }, + { + "epoch": 0.09455916159294109, + "grad_norm": 280.38079833984375, + "learning_rate": 9.362000000000001e-06, + "loss": 26.2074, + "step": 46810 + }, + { + "epoch": 0.09457936222562491, + "grad_norm": 533.17529296875, + "learning_rate": 9.364e-06, + "loss": 27.4847, + "step": 46820 + }, + { + "epoch": 0.09459956285830873, + "grad_norm": 298.01092529296875, + "learning_rate": 9.366000000000001e-06, + "loss": 20.3961, + "step": 46830 + }, + { + "epoch": 0.09461976349099253, + "grad_norm": 490.7327575683594, + "learning_rate": 9.368e-06, + "loss": 20.4112, + "step": 46840 + }, + { + "epoch": 0.09463996412367635, + "grad_norm": 352.32666015625, + "learning_rate": 9.370000000000002e-06, + "loss": 14.1502, + "step": 46850 + }, + { + "epoch": 0.09466016475636017, + "grad_norm": 244.9453125, + "learning_rate": 9.372000000000001e-06, + "loss": 15.6283, + "step": 46860 + }, + { + "epoch": 0.09468036538904398, + "grad_norm": 1122.0040283203125, + "learning_rate": 9.374e-06, + "loss": 30.7566, + "step": 46870 + }, + { + "epoch": 0.0947005660217278, + "grad_norm": 325.7804870605469, + "learning_rate": 9.376000000000001e-06, + "loss": 28.0684, + "step": 46880 + }, + { + "epoch": 0.09472076665441162, + "grad_norm": 490.7943115234375, + "learning_rate": 9.378e-06, + "loss": 23.9426, + "step": 46890 + }, + { + "epoch": 0.09474096728709543, + "grad_norm": 305.4912414550781, + "learning_rate": 9.38e-06, + "loss": 39.7157, + "step": 46900 + }, + { + "epoch": 0.09476116791977925, + "grad_norm": 268.6631164550781, + "learning_rate": 9.382000000000001e-06, + "loss": 27.5846, + "step": 46910 + }, + { + "epoch": 0.09478136855246307, + "grad_norm": 17.4284725189209, + "learning_rate": 9.384e-06, + "loss": 13.5578, + "step": 46920 + }, + { + "epoch": 0.09480156918514687, + "grad_norm": 496.9639587402344, + "learning_rate": 9.386000000000001e-06, + "loss": 16.1514, + "step": 46930 + }, + { + "epoch": 0.09482176981783069, + "grad_norm": 1134.2791748046875, + "learning_rate": 9.388e-06, + "loss": 31.3823, + "step": 46940 + }, + { + "epoch": 0.09484197045051451, + "grad_norm": 445.1515808105469, + "learning_rate": 9.39e-06, + "loss": 17.084, + "step": 46950 + }, + { + "epoch": 0.09486217108319833, + "grad_norm": 311.2286682128906, + "learning_rate": 9.392000000000001e-06, + "loss": 19.7756, + "step": 46960 + }, + { + "epoch": 0.09488237171588214, + "grad_norm": 1450.7786865234375, + "learning_rate": 9.394e-06, + "loss": 33.0113, + "step": 46970 + }, + { + "epoch": 0.09490257234856596, + "grad_norm": 1356.1912841796875, + "learning_rate": 9.396000000000001e-06, + "loss": 40.1539, + "step": 46980 + }, + { + "epoch": 0.09492277298124978, + "grad_norm": 563.09912109375, + "learning_rate": 9.398e-06, + "loss": 32.9458, + "step": 46990 + }, + { + "epoch": 0.09494297361393358, + "grad_norm": 822.3256225585938, + "learning_rate": 9.4e-06, + "loss": 34.4446, + "step": 47000 + }, + { + "epoch": 0.0949631742466174, + "grad_norm": 885.6978759765625, + "learning_rate": 9.402e-06, + "loss": 22.9885, + "step": 47010 + }, + { + "epoch": 0.09498337487930122, + "grad_norm": 535.3980102539062, + "learning_rate": 9.404e-06, + "loss": 27.7092, + "step": 47020 + }, + { + "epoch": 0.09500357551198503, + "grad_norm": 529.09619140625, + "learning_rate": 9.406000000000001e-06, + "loss": 31.665, + "step": 47030 + }, + { + "epoch": 0.09502377614466885, + "grad_norm": 475.2674255371094, + "learning_rate": 9.408e-06, + "loss": 29.4506, + "step": 47040 + }, + { + "epoch": 0.09504397677735267, + "grad_norm": 451.7756652832031, + "learning_rate": 9.41e-06, + "loss": 36.2947, + "step": 47050 + }, + { + "epoch": 0.09506417741003648, + "grad_norm": 88.78792572021484, + "learning_rate": 9.412e-06, + "loss": 11.6324, + "step": 47060 + }, + { + "epoch": 0.0950843780427203, + "grad_norm": 658.9125366210938, + "learning_rate": 9.414000000000002e-06, + "loss": 22.183, + "step": 47070 + }, + { + "epoch": 0.09510457867540412, + "grad_norm": 284.6681823730469, + "learning_rate": 9.416000000000001e-06, + "loss": 13.6106, + "step": 47080 + }, + { + "epoch": 0.09512477930808792, + "grad_norm": 410.4359436035156, + "learning_rate": 9.418e-06, + "loss": 36.8253, + "step": 47090 + }, + { + "epoch": 0.09514497994077174, + "grad_norm": 983.0914306640625, + "learning_rate": 9.42e-06, + "loss": 19.7314, + "step": 47100 + }, + { + "epoch": 0.09516518057345556, + "grad_norm": 245.94956970214844, + "learning_rate": 9.422e-06, + "loss": 28.235, + "step": 47110 + }, + { + "epoch": 0.09518538120613938, + "grad_norm": 606.5446166992188, + "learning_rate": 9.424000000000002e-06, + "loss": 16.4744, + "step": 47120 + }, + { + "epoch": 0.09520558183882319, + "grad_norm": 252.76507568359375, + "learning_rate": 9.426000000000001e-06, + "loss": 18.8907, + "step": 47130 + }, + { + "epoch": 0.09522578247150701, + "grad_norm": 486.3790283203125, + "learning_rate": 9.428e-06, + "loss": 27.5937, + "step": 47140 + }, + { + "epoch": 0.09524598310419083, + "grad_norm": 640.2138671875, + "learning_rate": 9.43e-06, + "loss": 32.7064, + "step": 47150 + }, + { + "epoch": 0.09526618373687464, + "grad_norm": 300.470458984375, + "learning_rate": 9.432e-06, + "loss": 14.2572, + "step": 47160 + }, + { + "epoch": 0.09528638436955846, + "grad_norm": 368.8597717285156, + "learning_rate": 9.434000000000001e-06, + "loss": 13.106, + "step": 47170 + }, + { + "epoch": 0.09530658500224228, + "grad_norm": 1.5726017951965332, + "learning_rate": 9.436e-06, + "loss": 20.2675, + "step": 47180 + }, + { + "epoch": 0.09532678563492608, + "grad_norm": 236.24609375, + "learning_rate": 9.438e-06, + "loss": 23.3547, + "step": 47190 + }, + { + "epoch": 0.0953469862676099, + "grad_norm": 131.8438262939453, + "learning_rate": 9.440000000000001e-06, + "loss": 36.904, + "step": 47200 + }, + { + "epoch": 0.09536718690029372, + "grad_norm": 385.8686218261719, + "learning_rate": 9.442e-06, + "loss": 22.8752, + "step": 47210 + }, + { + "epoch": 0.09538738753297753, + "grad_norm": 147.48646545410156, + "learning_rate": 9.444000000000001e-06, + "loss": 27.6462, + "step": 47220 + }, + { + "epoch": 0.09540758816566135, + "grad_norm": 641.9337158203125, + "learning_rate": 9.446e-06, + "loss": 32.553, + "step": 47230 + }, + { + "epoch": 0.09542778879834517, + "grad_norm": 379.5395812988281, + "learning_rate": 9.448e-06, + "loss": 29.307, + "step": 47240 + }, + { + "epoch": 0.09544798943102897, + "grad_norm": 647.9698486328125, + "learning_rate": 9.450000000000001e-06, + "loss": 44.1172, + "step": 47250 + }, + { + "epoch": 0.0954681900637128, + "grad_norm": 380.27032470703125, + "learning_rate": 9.452000000000002e-06, + "loss": 32.1924, + "step": 47260 + }, + { + "epoch": 0.09548839069639661, + "grad_norm": 618.8623657226562, + "learning_rate": 9.454000000000001e-06, + "loss": 20.6941, + "step": 47270 + }, + { + "epoch": 0.09550859132908043, + "grad_norm": 189.7655792236328, + "learning_rate": 9.456e-06, + "loss": 22.5058, + "step": 47280 + }, + { + "epoch": 0.09552879196176424, + "grad_norm": 615.1441650390625, + "learning_rate": 9.458e-06, + "loss": 38.724, + "step": 47290 + }, + { + "epoch": 0.09554899259444806, + "grad_norm": 760.5025634765625, + "learning_rate": 9.460000000000001e-06, + "loss": 42.7108, + "step": 47300 + }, + { + "epoch": 0.09556919322713188, + "grad_norm": 254.2874298095703, + "learning_rate": 9.462000000000002e-06, + "loss": 19.7814, + "step": 47310 + }, + { + "epoch": 0.09558939385981569, + "grad_norm": 364.7778015136719, + "learning_rate": 9.464000000000001e-06, + "loss": 24.5197, + "step": 47320 + }, + { + "epoch": 0.0956095944924995, + "grad_norm": 336.9996643066406, + "learning_rate": 9.466e-06, + "loss": 13.6541, + "step": 47330 + }, + { + "epoch": 0.09562979512518333, + "grad_norm": 495.0628967285156, + "learning_rate": 9.468e-06, + "loss": 36.6066, + "step": 47340 + }, + { + "epoch": 0.09564999575786713, + "grad_norm": 320.690673828125, + "learning_rate": 9.47e-06, + "loss": 41.1295, + "step": 47350 + }, + { + "epoch": 0.09567019639055095, + "grad_norm": 56.88438415527344, + "learning_rate": 9.472000000000002e-06, + "loss": 32.3294, + "step": 47360 + }, + { + "epoch": 0.09569039702323477, + "grad_norm": 865.4175415039062, + "learning_rate": 9.474000000000001e-06, + "loss": 27.6809, + "step": 47370 + }, + { + "epoch": 0.09571059765591858, + "grad_norm": 591.1318359375, + "learning_rate": 9.476e-06, + "loss": 30.3608, + "step": 47380 + }, + { + "epoch": 0.0957307982886024, + "grad_norm": 485.3149108886719, + "learning_rate": 9.478e-06, + "loss": 30.1645, + "step": 47390 + }, + { + "epoch": 0.09575099892128622, + "grad_norm": 582.667236328125, + "learning_rate": 9.48e-06, + "loss": 31.4972, + "step": 47400 + }, + { + "epoch": 0.09577119955397002, + "grad_norm": 919.2569580078125, + "learning_rate": 9.482000000000002e-06, + "loss": 37.2458, + "step": 47410 + }, + { + "epoch": 0.09579140018665384, + "grad_norm": 324.873291015625, + "learning_rate": 9.484000000000001e-06, + "loss": 21.6334, + "step": 47420 + }, + { + "epoch": 0.09581160081933766, + "grad_norm": 403.7833251953125, + "learning_rate": 9.486e-06, + "loss": 30.9771, + "step": 47430 + }, + { + "epoch": 0.09583180145202148, + "grad_norm": 845.0510864257812, + "learning_rate": 9.488000000000001e-06, + "loss": 20.9893, + "step": 47440 + }, + { + "epoch": 0.09585200208470529, + "grad_norm": 737.4741821289062, + "learning_rate": 9.49e-06, + "loss": 33.6122, + "step": 47450 + }, + { + "epoch": 0.09587220271738911, + "grad_norm": 3.5464887619018555, + "learning_rate": 9.492000000000002e-06, + "loss": 22.8622, + "step": 47460 + }, + { + "epoch": 0.09589240335007293, + "grad_norm": 250.9477081298828, + "learning_rate": 9.494000000000001e-06, + "loss": 42.5499, + "step": 47470 + }, + { + "epoch": 0.09591260398275674, + "grad_norm": 897.3517456054688, + "learning_rate": 9.496e-06, + "loss": 30.6094, + "step": 47480 + }, + { + "epoch": 0.09593280461544056, + "grad_norm": 1824.970458984375, + "learning_rate": 9.498000000000001e-06, + "loss": 29.0285, + "step": 47490 + }, + { + "epoch": 0.09595300524812438, + "grad_norm": 762.5152587890625, + "learning_rate": 9.5e-06, + "loss": 26.8512, + "step": 47500 + }, + { + "epoch": 0.09597320588080818, + "grad_norm": 190.81185913085938, + "learning_rate": 9.502000000000002e-06, + "loss": 20.1303, + "step": 47510 + }, + { + "epoch": 0.095993406513492, + "grad_norm": 286.0945129394531, + "learning_rate": 9.504e-06, + "loss": 27.1819, + "step": 47520 + }, + { + "epoch": 0.09601360714617582, + "grad_norm": 300.3119812011719, + "learning_rate": 9.506e-06, + "loss": 39.6307, + "step": 47530 + }, + { + "epoch": 0.09603380777885963, + "grad_norm": 256.6836853027344, + "learning_rate": 9.508000000000001e-06, + "loss": 32.6281, + "step": 47540 + }, + { + "epoch": 0.09605400841154345, + "grad_norm": 520.3702392578125, + "learning_rate": 9.51e-06, + "loss": 26.2266, + "step": 47550 + }, + { + "epoch": 0.09607420904422727, + "grad_norm": 511.2731628417969, + "learning_rate": 9.512000000000001e-06, + "loss": 19.3785, + "step": 47560 + }, + { + "epoch": 0.09609440967691107, + "grad_norm": 657.1405029296875, + "learning_rate": 9.514e-06, + "loss": 40.0813, + "step": 47570 + }, + { + "epoch": 0.0961146103095949, + "grad_norm": 645.1378173828125, + "learning_rate": 9.516e-06, + "loss": 30.9545, + "step": 47580 + }, + { + "epoch": 0.09613481094227871, + "grad_norm": 378.3953857421875, + "learning_rate": 9.518000000000001e-06, + "loss": 21.1613, + "step": 47590 + }, + { + "epoch": 0.09615501157496253, + "grad_norm": 228.1564178466797, + "learning_rate": 9.52e-06, + "loss": 34.5154, + "step": 47600 + }, + { + "epoch": 0.09617521220764634, + "grad_norm": 860.0980224609375, + "learning_rate": 9.522000000000001e-06, + "loss": 23.0088, + "step": 47610 + }, + { + "epoch": 0.09619541284033016, + "grad_norm": 343.0500793457031, + "learning_rate": 9.524e-06, + "loss": 24.8239, + "step": 47620 + }, + { + "epoch": 0.09621561347301398, + "grad_norm": 300.5077209472656, + "learning_rate": 9.526000000000002e-06, + "loss": 23.6022, + "step": 47630 + }, + { + "epoch": 0.09623581410569779, + "grad_norm": 355.74566650390625, + "learning_rate": 9.528000000000001e-06, + "loss": 29.7588, + "step": 47640 + }, + { + "epoch": 0.0962560147383816, + "grad_norm": 646.3258056640625, + "learning_rate": 9.53e-06, + "loss": 28.0084, + "step": 47650 + }, + { + "epoch": 0.09627621537106543, + "grad_norm": 455.2993469238281, + "learning_rate": 9.532000000000001e-06, + "loss": 49.2897, + "step": 47660 + }, + { + "epoch": 0.09629641600374923, + "grad_norm": 1240.07080078125, + "learning_rate": 9.534e-06, + "loss": 30.8765, + "step": 47670 + }, + { + "epoch": 0.09631661663643305, + "grad_norm": 223.4470672607422, + "learning_rate": 9.536000000000002e-06, + "loss": 32.9669, + "step": 47680 + }, + { + "epoch": 0.09633681726911687, + "grad_norm": 366.6746826171875, + "learning_rate": 9.538e-06, + "loss": 12.4205, + "step": 47690 + }, + { + "epoch": 0.09635701790180068, + "grad_norm": 607.6683349609375, + "learning_rate": 9.54e-06, + "loss": 17.9753, + "step": 47700 + }, + { + "epoch": 0.0963772185344845, + "grad_norm": 476.2154235839844, + "learning_rate": 9.542000000000001e-06, + "loss": 29.2031, + "step": 47710 + }, + { + "epoch": 0.09639741916716832, + "grad_norm": 614.3005981445312, + "learning_rate": 9.544e-06, + "loss": 32.7907, + "step": 47720 + }, + { + "epoch": 0.09641761979985213, + "grad_norm": 329.02471923828125, + "learning_rate": 9.546000000000001e-06, + "loss": 26.8656, + "step": 47730 + }, + { + "epoch": 0.09643782043253595, + "grad_norm": 46.64715576171875, + "learning_rate": 9.548e-06, + "loss": 19.0038, + "step": 47740 + }, + { + "epoch": 0.09645802106521977, + "grad_norm": 140.94720458984375, + "learning_rate": 9.55e-06, + "loss": 22.3911, + "step": 47750 + }, + { + "epoch": 0.09647822169790359, + "grad_norm": 324.6895751953125, + "learning_rate": 9.552000000000001e-06, + "loss": 14.5786, + "step": 47760 + }, + { + "epoch": 0.09649842233058739, + "grad_norm": 750.6205444335938, + "learning_rate": 9.554e-06, + "loss": 26.3296, + "step": 47770 + }, + { + "epoch": 0.09651862296327121, + "grad_norm": 253.6141815185547, + "learning_rate": 9.556000000000001e-06, + "loss": 24.6459, + "step": 47780 + }, + { + "epoch": 0.09653882359595503, + "grad_norm": 1342.5201416015625, + "learning_rate": 9.558e-06, + "loss": 40.2704, + "step": 47790 + }, + { + "epoch": 0.09655902422863884, + "grad_norm": 89.08769226074219, + "learning_rate": 9.56e-06, + "loss": 30.5612, + "step": 47800 + }, + { + "epoch": 0.09657922486132266, + "grad_norm": 513.7152099609375, + "learning_rate": 9.562000000000001e-06, + "loss": 30.5212, + "step": 47810 + }, + { + "epoch": 0.09659942549400648, + "grad_norm": 381.00433349609375, + "learning_rate": 9.564e-06, + "loss": 48.6877, + "step": 47820 + }, + { + "epoch": 0.09661962612669028, + "grad_norm": 431.2875061035156, + "learning_rate": 9.566000000000001e-06, + "loss": 16.3693, + "step": 47830 + }, + { + "epoch": 0.0966398267593741, + "grad_norm": 1027.247314453125, + "learning_rate": 9.568e-06, + "loss": 19.0051, + "step": 47840 + }, + { + "epoch": 0.09666002739205792, + "grad_norm": 960.1653442382812, + "learning_rate": 9.57e-06, + "loss": 43.8949, + "step": 47850 + }, + { + "epoch": 0.09668022802474173, + "grad_norm": 407.0733642578125, + "learning_rate": 9.572000000000001e-06, + "loss": 25.9826, + "step": 47860 + }, + { + "epoch": 0.09670042865742555, + "grad_norm": 415.5325927734375, + "learning_rate": 9.574000000000002e-06, + "loss": 29.5783, + "step": 47870 + }, + { + "epoch": 0.09672062929010937, + "grad_norm": 389.2325439453125, + "learning_rate": 9.576000000000001e-06, + "loss": 31.4837, + "step": 47880 + }, + { + "epoch": 0.09674082992279318, + "grad_norm": 363.0729064941406, + "learning_rate": 9.578e-06, + "loss": 24.4001, + "step": 47890 + }, + { + "epoch": 0.096761030555477, + "grad_norm": 274.0908508300781, + "learning_rate": 9.58e-06, + "loss": 23.3419, + "step": 47900 + }, + { + "epoch": 0.09678123118816082, + "grad_norm": 610.1469116210938, + "learning_rate": 9.582e-06, + "loss": 35.8652, + "step": 47910 + }, + { + "epoch": 0.09680143182084464, + "grad_norm": 581.1511840820312, + "learning_rate": 9.584000000000002e-06, + "loss": 17.0714, + "step": 47920 + }, + { + "epoch": 0.09682163245352844, + "grad_norm": 97.17230224609375, + "learning_rate": 9.586000000000001e-06, + "loss": 31.8159, + "step": 47930 + }, + { + "epoch": 0.09684183308621226, + "grad_norm": 383.70404052734375, + "learning_rate": 9.588e-06, + "loss": 30.4987, + "step": 47940 + }, + { + "epoch": 0.09686203371889608, + "grad_norm": 444.5631103515625, + "learning_rate": 9.59e-06, + "loss": 25.1242, + "step": 47950 + }, + { + "epoch": 0.09688223435157989, + "grad_norm": 145.23377990722656, + "learning_rate": 9.592e-06, + "loss": 10.8214, + "step": 47960 + }, + { + "epoch": 0.09690243498426371, + "grad_norm": 360.3565979003906, + "learning_rate": 9.594000000000002e-06, + "loss": 27.0414, + "step": 47970 + }, + { + "epoch": 0.09692263561694753, + "grad_norm": 454.5389709472656, + "learning_rate": 9.596000000000001e-06, + "loss": 22.0487, + "step": 47980 + }, + { + "epoch": 0.09694283624963133, + "grad_norm": 232.68057250976562, + "learning_rate": 9.598e-06, + "loss": 37.0938, + "step": 47990 + }, + { + "epoch": 0.09696303688231515, + "grad_norm": 759.0354614257812, + "learning_rate": 9.600000000000001e-06, + "loss": 31.7083, + "step": 48000 + }, + { + "epoch": 0.09698323751499897, + "grad_norm": 329.7867431640625, + "learning_rate": 9.602e-06, + "loss": 31.8868, + "step": 48010 + }, + { + "epoch": 0.09700343814768278, + "grad_norm": 214.38702392578125, + "learning_rate": 9.604000000000002e-06, + "loss": 35.0172, + "step": 48020 + }, + { + "epoch": 0.0970236387803666, + "grad_norm": 392.5238342285156, + "learning_rate": 9.606000000000001e-06, + "loss": 46.4812, + "step": 48030 + }, + { + "epoch": 0.09704383941305042, + "grad_norm": 628.2894287109375, + "learning_rate": 9.608e-06, + "loss": 16.1298, + "step": 48040 + }, + { + "epoch": 0.09706404004573423, + "grad_norm": 550.8941650390625, + "learning_rate": 9.610000000000001e-06, + "loss": 28.6235, + "step": 48050 + }, + { + "epoch": 0.09708424067841805, + "grad_norm": 400.3697204589844, + "learning_rate": 9.612000000000002e-06, + "loss": 30.4452, + "step": 48060 + }, + { + "epoch": 0.09710444131110187, + "grad_norm": 95.51148223876953, + "learning_rate": 9.614000000000001e-06, + "loss": 22.6477, + "step": 48070 + }, + { + "epoch": 0.09712464194378569, + "grad_norm": 159.8924560546875, + "learning_rate": 9.616e-06, + "loss": 27.6692, + "step": 48080 + }, + { + "epoch": 0.09714484257646949, + "grad_norm": 180.40538024902344, + "learning_rate": 9.618e-06, + "loss": 18.7072, + "step": 48090 + }, + { + "epoch": 0.09716504320915331, + "grad_norm": 945.0508422851562, + "learning_rate": 9.620000000000001e-06, + "loss": 30.841, + "step": 48100 + }, + { + "epoch": 0.09718524384183713, + "grad_norm": 666.922119140625, + "learning_rate": 9.622000000000002e-06, + "loss": 28.6417, + "step": 48110 + }, + { + "epoch": 0.09720544447452094, + "grad_norm": 468.8777770996094, + "learning_rate": 9.624000000000001e-06, + "loss": 16.8679, + "step": 48120 + }, + { + "epoch": 0.09722564510720476, + "grad_norm": 426.8685302734375, + "learning_rate": 9.626e-06, + "loss": 16.4289, + "step": 48130 + }, + { + "epoch": 0.09724584573988858, + "grad_norm": 123.73139953613281, + "learning_rate": 9.628e-06, + "loss": 11.6659, + "step": 48140 + }, + { + "epoch": 0.09726604637257238, + "grad_norm": 1088.898681640625, + "learning_rate": 9.630000000000001e-06, + "loss": 42.8518, + "step": 48150 + }, + { + "epoch": 0.0972862470052562, + "grad_norm": 595.4813842773438, + "learning_rate": 9.632e-06, + "loss": 26.0703, + "step": 48160 + }, + { + "epoch": 0.09730644763794002, + "grad_norm": 534.6911010742188, + "learning_rate": 9.634000000000001e-06, + "loss": 26.4292, + "step": 48170 + }, + { + "epoch": 0.09732664827062383, + "grad_norm": 337.2037658691406, + "learning_rate": 9.636e-06, + "loss": 14.135, + "step": 48180 + }, + { + "epoch": 0.09734684890330765, + "grad_norm": 302.7720642089844, + "learning_rate": 9.638e-06, + "loss": 16.6161, + "step": 48190 + }, + { + "epoch": 0.09736704953599147, + "grad_norm": 383.7428283691406, + "learning_rate": 9.640000000000001e-06, + "loss": 56.4775, + "step": 48200 + }, + { + "epoch": 0.09738725016867528, + "grad_norm": 401.3696594238281, + "learning_rate": 9.642e-06, + "loss": 26.5211, + "step": 48210 + }, + { + "epoch": 0.0974074508013591, + "grad_norm": 501.5325622558594, + "learning_rate": 9.644000000000001e-06, + "loss": 20.5206, + "step": 48220 + }, + { + "epoch": 0.09742765143404292, + "grad_norm": 993.3955688476562, + "learning_rate": 9.646e-06, + "loss": 31.1039, + "step": 48230 + }, + { + "epoch": 0.09744785206672674, + "grad_norm": 249.63485717773438, + "learning_rate": 9.648000000000001e-06, + "loss": 18.1888, + "step": 48240 + }, + { + "epoch": 0.09746805269941054, + "grad_norm": 142.93650817871094, + "learning_rate": 9.65e-06, + "loss": 25.8998, + "step": 48250 + }, + { + "epoch": 0.09748825333209436, + "grad_norm": 346.6017761230469, + "learning_rate": 9.652e-06, + "loss": 28.8821, + "step": 48260 + }, + { + "epoch": 0.09750845396477818, + "grad_norm": 139.22000122070312, + "learning_rate": 9.654000000000001e-06, + "loss": 17.9235, + "step": 48270 + }, + { + "epoch": 0.09752865459746199, + "grad_norm": 592.041015625, + "learning_rate": 9.656e-06, + "loss": 36.8627, + "step": 48280 + }, + { + "epoch": 0.09754885523014581, + "grad_norm": 1026.140625, + "learning_rate": 9.658000000000001e-06, + "loss": 28.4813, + "step": 48290 + }, + { + "epoch": 0.09756905586282963, + "grad_norm": 800.9178466796875, + "learning_rate": 9.66e-06, + "loss": 21.5535, + "step": 48300 + }, + { + "epoch": 0.09758925649551344, + "grad_norm": 222.7467498779297, + "learning_rate": 9.662e-06, + "loss": 22.0214, + "step": 48310 + }, + { + "epoch": 0.09760945712819726, + "grad_norm": 229.6143035888672, + "learning_rate": 9.664000000000001e-06, + "loss": 24.3152, + "step": 48320 + }, + { + "epoch": 0.09762965776088108, + "grad_norm": 124.33726501464844, + "learning_rate": 9.666e-06, + "loss": 23.7983, + "step": 48330 + }, + { + "epoch": 0.09764985839356488, + "grad_norm": 685.0302124023438, + "learning_rate": 9.668000000000001e-06, + "loss": 19.4996, + "step": 48340 + }, + { + "epoch": 0.0976700590262487, + "grad_norm": 650.8430786132812, + "learning_rate": 9.67e-06, + "loss": 38.9991, + "step": 48350 + }, + { + "epoch": 0.09769025965893252, + "grad_norm": 792.5812377929688, + "learning_rate": 9.672e-06, + "loss": 33.9084, + "step": 48360 + }, + { + "epoch": 0.09771046029161633, + "grad_norm": 352.05487060546875, + "learning_rate": 9.674000000000001e-06, + "loss": 24.5777, + "step": 48370 + }, + { + "epoch": 0.09773066092430015, + "grad_norm": 399.4421081542969, + "learning_rate": 9.676e-06, + "loss": 29.6903, + "step": 48380 + }, + { + "epoch": 0.09775086155698397, + "grad_norm": 282.4613952636719, + "learning_rate": 9.678000000000001e-06, + "loss": 27.1341, + "step": 48390 + }, + { + "epoch": 0.09777106218966779, + "grad_norm": 875.9047241210938, + "learning_rate": 9.68e-06, + "loss": 17.4331, + "step": 48400 + }, + { + "epoch": 0.0977912628223516, + "grad_norm": 238.08753967285156, + "learning_rate": 9.682e-06, + "loss": 41.6776, + "step": 48410 + }, + { + "epoch": 0.09781146345503541, + "grad_norm": 381.57763671875, + "learning_rate": 9.684e-06, + "loss": 28.2582, + "step": 48420 + }, + { + "epoch": 0.09783166408771923, + "grad_norm": 740.5884399414062, + "learning_rate": 9.686000000000002e-06, + "loss": 32.7431, + "step": 48430 + }, + { + "epoch": 0.09785186472040304, + "grad_norm": 491.7377014160156, + "learning_rate": 9.688000000000001e-06, + "loss": 20.9476, + "step": 48440 + }, + { + "epoch": 0.09787206535308686, + "grad_norm": 746.2936401367188, + "learning_rate": 9.69e-06, + "loss": 30.073, + "step": 48450 + }, + { + "epoch": 0.09789226598577068, + "grad_norm": 35.32424545288086, + "learning_rate": 9.692e-06, + "loss": 41.2601, + "step": 48460 + }, + { + "epoch": 0.09791246661845449, + "grad_norm": 313.6356506347656, + "learning_rate": 9.694e-06, + "loss": 15.1328, + "step": 48470 + }, + { + "epoch": 0.0979326672511383, + "grad_norm": 622.7384033203125, + "learning_rate": 9.696000000000002e-06, + "loss": 16.9361, + "step": 48480 + }, + { + "epoch": 0.09795286788382213, + "grad_norm": 444.7352294921875, + "learning_rate": 9.698000000000001e-06, + "loss": 15.3796, + "step": 48490 + }, + { + "epoch": 0.09797306851650593, + "grad_norm": 309.7575378417969, + "learning_rate": 9.7e-06, + "loss": 79.6284, + "step": 48500 + }, + { + "epoch": 0.09799326914918975, + "grad_norm": 1072.44921875, + "learning_rate": 9.702e-06, + "loss": 43.3716, + "step": 48510 + }, + { + "epoch": 0.09801346978187357, + "grad_norm": 436.8084411621094, + "learning_rate": 9.704e-06, + "loss": 39.6096, + "step": 48520 + }, + { + "epoch": 0.09803367041455738, + "grad_norm": 649.1705322265625, + "learning_rate": 9.706000000000002e-06, + "loss": 26.3654, + "step": 48530 + }, + { + "epoch": 0.0980538710472412, + "grad_norm": 322.66290283203125, + "learning_rate": 9.708000000000001e-06, + "loss": 23.6526, + "step": 48540 + }, + { + "epoch": 0.09807407167992502, + "grad_norm": 325.04425048828125, + "learning_rate": 9.71e-06, + "loss": 25.3373, + "step": 48550 + }, + { + "epoch": 0.09809427231260884, + "grad_norm": 1282.7777099609375, + "learning_rate": 9.712e-06, + "loss": 18.9983, + "step": 48560 + }, + { + "epoch": 0.09811447294529264, + "grad_norm": 436.2747802734375, + "learning_rate": 9.714e-06, + "loss": 25.8153, + "step": 48570 + }, + { + "epoch": 0.09813467357797646, + "grad_norm": 450.4263000488281, + "learning_rate": 9.716000000000002e-06, + "loss": 20.8123, + "step": 48580 + }, + { + "epoch": 0.09815487421066028, + "grad_norm": 256.3553771972656, + "learning_rate": 9.718e-06, + "loss": 22.8999, + "step": 48590 + }, + { + "epoch": 0.09817507484334409, + "grad_norm": 310.2727355957031, + "learning_rate": 9.72e-06, + "loss": 20.5452, + "step": 48600 + }, + { + "epoch": 0.09819527547602791, + "grad_norm": 443.9215087890625, + "learning_rate": 9.722000000000001e-06, + "loss": 22.8119, + "step": 48610 + }, + { + "epoch": 0.09821547610871173, + "grad_norm": 273.1202087402344, + "learning_rate": 9.724e-06, + "loss": 26.6364, + "step": 48620 + }, + { + "epoch": 0.09823567674139554, + "grad_norm": 335.86328125, + "learning_rate": 9.726000000000001e-06, + "loss": 25.182, + "step": 48630 + }, + { + "epoch": 0.09825587737407936, + "grad_norm": 287.7085266113281, + "learning_rate": 9.728e-06, + "loss": 20.7065, + "step": 48640 + }, + { + "epoch": 0.09827607800676318, + "grad_norm": 658.7157592773438, + "learning_rate": 9.73e-06, + "loss": 14.5838, + "step": 48650 + }, + { + "epoch": 0.09829627863944698, + "grad_norm": 614.1793823242188, + "learning_rate": 9.732000000000001e-06, + "loss": 23.6119, + "step": 48660 + }, + { + "epoch": 0.0983164792721308, + "grad_norm": 591.2699584960938, + "learning_rate": 9.734000000000002e-06, + "loss": 14.9975, + "step": 48670 + }, + { + "epoch": 0.09833667990481462, + "grad_norm": 191.0492401123047, + "learning_rate": 9.736000000000001e-06, + "loss": 14.8718, + "step": 48680 + }, + { + "epoch": 0.09835688053749843, + "grad_norm": 739.43359375, + "learning_rate": 9.738e-06, + "loss": 22.9545, + "step": 48690 + }, + { + "epoch": 0.09837708117018225, + "grad_norm": 464.5688781738281, + "learning_rate": 9.74e-06, + "loss": 13.4298, + "step": 48700 + }, + { + "epoch": 0.09839728180286607, + "grad_norm": 756.2413940429688, + "learning_rate": 9.742000000000001e-06, + "loss": 45.1545, + "step": 48710 + }, + { + "epoch": 0.09841748243554987, + "grad_norm": 219.85523986816406, + "learning_rate": 9.744000000000002e-06, + "loss": 34.1919, + "step": 48720 + }, + { + "epoch": 0.0984376830682337, + "grad_norm": 631.513671875, + "learning_rate": 9.746000000000001e-06, + "loss": 20.2011, + "step": 48730 + }, + { + "epoch": 0.09845788370091751, + "grad_norm": 432.5674743652344, + "learning_rate": 9.748e-06, + "loss": 30.0944, + "step": 48740 + }, + { + "epoch": 0.09847808433360133, + "grad_norm": 647.7390747070312, + "learning_rate": 9.75e-06, + "loss": 38.3661, + "step": 48750 + }, + { + "epoch": 0.09849828496628514, + "grad_norm": 410.4930725097656, + "learning_rate": 9.752e-06, + "loss": 29.9325, + "step": 48760 + }, + { + "epoch": 0.09851848559896896, + "grad_norm": 470.92840576171875, + "learning_rate": 9.754000000000002e-06, + "loss": 35.1719, + "step": 48770 + }, + { + "epoch": 0.09853868623165278, + "grad_norm": 953.4794921875, + "learning_rate": 9.756000000000001e-06, + "loss": 36.2353, + "step": 48780 + }, + { + "epoch": 0.09855888686433659, + "grad_norm": 471.443359375, + "learning_rate": 9.758e-06, + "loss": 33.5306, + "step": 48790 + }, + { + "epoch": 0.0985790874970204, + "grad_norm": 820.6240234375, + "learning_rate": 9.760000000000001e-06, + "loss": 24.8495, + "step": 48800 + }, + { + "epoch": 0.09859928812970423, + "grad_norm": 463.1524658203125, + "learning_rate": 9.762e-06, + "loss": 22.0579, + "step": 48810 + }, + { + "epoch": 0.09861948876238803, + "grad_norm": 1104.513916015625, + "learning_rate": 9.764000000000002e-06, + "loss": 47.9236, + "step": 48820 + }, + { + "epoch": 0.09863968939507185, + "grad_norm": 345.3257141113281, + "learning_rate": 9.766000000000001e-06, + "loss": 20.4036, + "step": 48830 + }, + { + "epoch": 0.09865989002775567, + "grad_norm": 549.0986328125, + "learning_rate": 9.768e-06, + "loss": 34.3462, + "step": 48840 + }, + { + "epoch": 0.09868009066043948, + "grad_norm": 277.656982421875, + "learning_rate": 9.770000000000001e-06, + "loss": 14.8656, + "step": 48850 + }, + { + "epoch": 0.0987002912931233, + "grad_norm": 465.69207763671875, + "learning_rate": 9.772e-06, + "loss": 17.3637, + "step": 48860 + }, + { + "epoch": 0.09872049192580712, + "grad_norm": 1081.8680419921875, + "learning_rate": 9.774000000000002e-06, + "loss": 44.3744, + "step": 48870 + }, + { + "epoch": 0.09874069255849093, + "grad_norm": 158.52818298339844, + "learning_rate": 9.776000000000001e-06, + "loss": 33.0443, + "step": 48880 + }, + { + "epoch": 0.09876089319117475, + "grad_norm": 1199.7681884765625, + "learning_rate": 9.778e-06, + "loss": 29.7864, + "step": 48890 + }, + { + "epoch": 0.09878109382385857, + "grad_norm": 692.0853881835938, + "learning_rate": 9.780000000000001e-06, + "loss": 40.4721, + "step": 48900 + }, + { + "epoch": 0.09880129445654239, + "grad_norm": 753.58740234375, + "learning_rate": 9.782e-06, + "loss": 28.6318, + "step": 48910 + }, + { + "epoch": 0.09882149508922619, + "grad_norm": 319.7926940917969, + "learning_rate": 9.784000000000002e-06, + "loss": 34.901, + "step": 48920 + }, + { + "epoch": 0.09884169572191001, + "grad_norm": 278.90948486328125, + "learning_rate": 9.786e-06, + "loss": 35.2243, + "step": 48930 + }, + { + "epoch": 0.09886189635459383, + "grad_norm": 888.979736328125, + "learning_rate": 9.788e-06, + "loss": 21.1623, + "step": 48940 + }, + { + "epoch": 0.09888209698727764, + "grad_norm": 50.69482421875, + "learning_rate": 9.790000000000001e-06, + "loss": 19.2865, + "step": 48950 + }, + { + "epoch": 0.09890229761996146, + "grad_norm": 271.34930419921875, + "learning_rate": 9.792e-06, + "loss": 14.6149, + "step": 48960 + }, + { + "epoch": 0.09892249825264528, + "grad_norm": 865.9595336914062, + "learning_rate": 9.794000000000001e-06, + "loss": 22.4335, + "step": 48970 + }, + { + "epoch": 0.09894269888532908, + "grad_norm": 556.2496948242188, + "learning_rate": 9.796e-06, + "loss": 54.3076, + "step": 48980 + }, + { + "epoch": 0.0989628995180129, + "grad_norm": 735.6205444335938, + "learning_rate": 9.798e-06, + "loss": 38.0693, + "step": 48990 + }, + { + "epoch": 0.09898310015069672, + "grad_norm": 426.1153259277344, + "learning_rate": 9.800000000000001e-06, + "loss": 39.0343, + "step": 49000 + }, + { + "epoch": 0.09900330078338053, + "grad_norm": 471.3280334472656, + "learning_rate": 9.802e-06, + "loss": 26.5112, + "step": 49010 + }, + { + "epoch": 0.09902350141606435, + "grad_norm": 684.482421875, + "learning_rate": 9.804000000000001e-06, + "loss": 26.193, + "step": 49020 + }, + { + "epoch": 0.09904370204874817, + "grad_norm": 1121.659423828125, + "learning_rate": 9.806e-06, + "loss": 33.4643, + "step": 49030 + }, + { + "epoch": 0.09906390268143198, + "grad_norm": 200.65771484375, + "learning_rate": 9.808000000000002e-06, + "loss": 16.047, + "step": 49040 + }, + { + "epoch": 0.0990841033141158, + "grad_norm": 884.4088134765625, + "learning_rate": 9.810000000000001e-06, + "loss": 36.1623, + "step": 49050 + }, + { + "epoch": 0.09910430394679962, + "grad_norm": 194.87034606933594, + "learning_rate": 9.812e-06, + "loss": 19.7563, + "step": 49060 + }, + { + "epoch": 0.09912450457948344, + "grad_norm": 558.3735961914062, + "learning_rate": 9.814000000000001e-06, + "loss": 16.634, + "step": 49070 + }, + { + "epoch": 0.09914470521216724, + "grad_norm": 298.5070495605469, + "learning_rate": 9.816e-06, + "loss": 31.8815, + "step": 49080 + }, + { + "epoch": 0.09916490584485106, + "grad_norm": 371.0187072753906, + "learning_rate": 9.818000000000002e-06, + "loss": 22.5085, + "step": 49090 + }, + { + "epoch": 0.09918510647753488, + "grad_norm": 807.2926025390625, + "learning_rate": 9.820000000000001e-06, + "loss": 25.0996, + "step": 49100 + }, + { + "epoch": 0.09920530711021869, + "grad_norm": 630.2256469726562, + "learning_rate": 9.822e-06, + "loss": 27.6292, + "step": 49110 + }, + { + "epoch": 0.09922550774290251, + "grad_norm": 741.7303466796875, + "learning_rate": 9.824000000000001e-06, + "loss": 29.3892, + "step": 49120 + }, + { + "epoch": 0.09924570837558633, + "grad_norm": 972.7586059570312, + "learning_rate": 9.826e-06, + "loss": 49.3759, + "step": 49130 + }, + { + "epoch": 0.09926590900827013, + "grad_norm": 659.7006225585938, + "learning_rate": 9.828000000000001e-06, + "loss": 30.6737, + "step": 49140 + }, + { + "epoch": 0.09928610964095395, + "grad_norm": 323.7527160644531, + "learning_rate": 9.83e-06, + "loss": 21.8791, + "step": 49150 + }, + { + "epoch": 0.09930631027363777, + "grad_norm": 181.38067626953125, + "learning_rate": 9.832e-06, + "loss": 15.8764, + "step": 49160 + }, + { + "epoch": 0.09932651090632158, + "grad_norm": 531.7208862304688, + "learning_rate": 9.834000000000001e-06, + "loss": 51.3417, + "step": 49170 + }, + { + "epoch": 0.0993467115390054, + "grad_norm": 471.9506530761719, + "learning_rate": 9.836e-06, + "loss": 33.242, + "step": 49180 + }, + { + "epoch": 0.09936691217168922, + "grad_norm": 215.51206970214844, + "learning_rate": 9.838000000000001e-06, + "loss": 41.7949, + "step": 49190 + }, + { + "epoch": 0.09938711280437303, + "grad_norm": 1006.3677368164062, + "learning_rate": 9.84e-06, + "loss": 22.6436, + "step": 49200 + }, + { + "epoch": 0.09940731343705685, + "grad_norm": 322.4917297363281, + "learning_rate": 9.842e-06, + "loss": 15.0789, + "step": 49210 + }, + { + "epoch": 0.09942751406974067, + "grad_norm": 309.1917724609375, + "learning_rate": 9.844000000000001e-06, + "loss": 29.4168, + "step": 49220 + }, + { + "epoch": 0.09944771470242449, + "grad_norm": 712.6861572265625, + "learning_rate": 9.846000000000002e-06, + "loss": 16.7173, + "step": 49230 + }, + { + "epoch": 0.09946791533510829, + "grad_norm": 174.32557678222656, + "learning_rate": 9.848000000000001e-06, + "loss": 19.2307, + "step": 49240 + }, + { + "epoch": 0.09948811596779211, + "grad_norm": 589.4677734375, + "learning_rate": 9.85e-06, + "loss": 22.0556, + "step": 49250 + }, + { + "epoch": 0.09950831660047593, + "grad_norm": 193.81019592285156, + "learning_rate": 9.852e-06, + "loss": 17.3796, + "step": 49260 + }, + { + "epoch": 0.09952851723315974, + "grad_norm": 647.8886108398438, + "learning_rate": 9.854000000000001e-06, + "loss": 27.9435, + "step": 49270 + }, + { + "epoch": 0.09954871786584356, + "grad_norm": 348.2982177734375, + "learning_rate": 9.856000000000002e-06, + "loss": 25.0838, + "step": 49280 + }, + { + "epoch": 0.09956891849852738, + "grad_norm": 203.4016571044922, + "learning_rate": 9.858000000000001e-06, + "loss": 20.925, + "step": 49290 + }, + { + "epoch": 0.09958911913121118, + "grad_norm": 454.58380126953125, + "learning_rate": 9.86e-06, + "loss": 14.073, + "step": 49300 + }, + { + "epoch": 0.099609319763895, + "grad_norm": 470.5152893066406, + "learning_rate": 9.862e-06, + "loss": 20.2741, + "step": 49310 + }, + { + "epoch": 0.09962952039657882, + "grad_norm": 651.3986206054688, + "learning_rate": 9.864e-06, + "loss": 33.504, + "step": 49320 + }, + { + "epoch": 0.09964972102926263, + "grad_norm": 140.67408752441406, + "learning_rate": 9.866000000000002e-06, + "loss": 24.0097, + "step": 49330 + }, + { + "epoch": 0.09966992166194645, + "grad_norm": 414.4718322753906, + "learning_rate": 9.868000000000001e-06, + "loss": 48.8509, + "step": 49340 + }, + { + "epoch": 0.09969012229463027, + "grad_norm": 113.15275573730469, + "learning_rate": 9.87e-06, + "loss": 28.0613, + "step": 49350 + }, + { + "epoch": 0.09971032292731408, + "grad_norm": 1159.5601806640625, + "learning_rate": 9.872e-06, + "loss": 28.8047, + "step": 49360 + }, + { + "epoch": 0.0997305235599979, + "grad_norm": 609.1572265625, + "learning_rate": 9.874e-06, + "loss": 35.9614, + "step": 49370 + }, + { + "epoch": 0.09975072419268172, + "grad_norm": 290.0668029785156, + "learning_rate": 9.876000000000002e-06, + "loss": 29.4886, + "step": 49380 + }, + { + "epoch": 0.09977092482536554, + "grad_norm": 580.041015625, + "learning_rate": 9.878000000000001e-06, + "loss": 21.3514, + "step": 49390 + }, + { + "epoch": 0.09979112545804934, + "grad_norm": 476.08868408203125, + "learning_rate": 9.88e-06, + "loss": 12.8899, + "step": 49400 + }, + { + "epoch": 0.09981132609073316, + "grad_norm": 284.8551330566406, + "learning_rate": 9.882000000000001e-06, + "loss": 31.1479, + "step": 49410 + }, + { + "epoch": 0.09983152672341698, + "grad_norm": 244.68206787109375, + "learning_rate": 9.884e-06, + "loss": 23.8315, + "step": 49420 + }, + { + "epoch": 0.09985172735610079, + "grad_norm": 907.8687133789062, + "learning_rate": 9.886000000000002e-06, + "loss": 30.9991, + "step": 49430 + }, + { + "epoch": 0.09987192798878461, + "grad_norm": 513.1172485351562, + "learning_rate": 9.888000000000001e-06, + "loss": 21.5931, + "step": 49440 + }, + { + "epoch": 0.09989212862146843, + "grad_norm": 338.56011962890625, + "learning_rate": 9.89e-06, + "loss": 26.8022, + "step": 49450 + }, + { + "epoch": 0.09991232925415224, + "grad_norm": 185.15675354003906, + "learning_rate": 9.892000000000001e-06, + "loss": 28.307, + "step": 49460 + }, + { + "epoch": 0.09993252988683606, + "grad_norm": 904.0110473632812, + "learning_rate": 9.894e-06, + "loss": 37.4639, + "step": 49470 + }, + { + "epoch": 0.09995273051951988, + "grad_norm": 521.9727172851562, + "learning_rate": 9.896000000000001e-06, + "loss": 25.5981, + "step": 49480 + }, + { + "epoch": 0.09997293115220368, + "grad_norm": 1088.702880859375, + "learning_rate": 9.898e-06, + "loss": 28.4142, + "step": 49490 + }, + { + "epoch": 0.0999931317848875, + "grad_norm": 861.6896362304688, + "learning_rate": 9.9e-06, + "loss": 37.6479, + "step": 49500 + }, + { + "epoch": 0.10001333241757132, + "grad_norm": 1178.4163818359375, + "learning_rate": 9.902000000000001e-06, + "loss": 28.324, + "step": 49510 + }, + { + "epoch": 0.10003353305025513, + "grad_norm": 389.5504150390625, + "learning_rate": 9.904e-06, + "loss": 15.2446, + "step": 49520 + }, + { + "epoch": 0.10005373368293895, + "grad_norm": 87.75468444824219, + "learning_rate": 9.906000000000001e-06, + "loss": 18.3448, + "step": 49530 + }, + { + "epoch": 0.10007393431562277, + "grad_norm": 485.5669250488281, + "learning_rate": 9.908e-06, + "loss": 23.2733, + "step": 49540 + }, + { + "epoch": 0.10009413494830659, + "grad_norm": 325.63507080078125, + "learning_rate": 9.91e-06, + "loss": 14.1606, + "step": 49550 + }, + { + "epoch": 0.1001143355809904, + "grad_norm": 516.7115478515625, + "learning_rate": 9.912000000000001e-06, + "loss": 20.5059, + "step": 49560 + }, + { + "epoch": 0.10013453621367421, + "grad_norm": 232.55116271972656, + "learning_rate": 9.914e-06, + "loss": 40.8113, + "step": 49570 + }, + { + "epoch": 0.10015473684635803, + "grad_norm": 1050.576416015625, + "learning_rate": 9.916000000000001e-06, + "loss": 28.4415, + "step": 49580 + }, + { + "epoch": 0.10017493747904184, + "grad_norm": 224.6217498779297, + "learning_rate": 9.918e-06, + "loss": 21.9504, + "step": 49590 + }, + { + "epoch": 0.10019513811172566, + "grad_norm": 691.5042724609375, + "learning_rate": 9.920000000000002e-06, + "loss": 36.9765, + "step": 49600 + }, + { + "epoch": 0.10021533874440948, + "grad_norm": 529.460205078125, + "learning_rate": 9.922000000000001e-06, + "loss": 30.945, + "step": 49610 + }, + { + "epoch": 0.10023553937709329, + "grad_norm": 744.3157348632812, + "learning_rate": 9.924e-06, + "loss": 28.8601, + "step": 49620 + }, + { + "epoch": 0.1002557400097771, + "grad_norm": 417.7217712402344, + "learning_rate": 9.926000000000001e-06, + "loss": 32.3139, + "step": 49630 + }, + { + "epoch": 0.10027594064246093, + "grad_norm": 457.09521484375, + "learning_rate": 9.928e-06, + "loss": 34.9598, + "step": 49640 + }, + { + "epoch": 0.10029614127514473, + "grad_norm": 300.70135498046875, + "learning_rate": 9.930000000000001e-06, + "loss": 36.5921, + "step": 49650 + }, + { + "epoch": 0.10031634190782855, + "grad_norm": 692.780517578125, + "learning_rate": 9.932e-06, + "loss": 51.0492, + "step": 49660 + }, + { + "epoch": 0.10033654254051237, + "grad_norm": 630.0022583007812, + "learning_rate": 9.934e-06, + "loss": 25.7311, + "step": 49670 + }, + { + "epoch": 0.10035674317319618, + "grad_norm": 484.6986999511719, + "learning_rate": 9.936000000000001e-06, + "loss": 38.4831, + "step": 49680 + }, + { + "epoch": 0.10037694380588, + "grad_norm": 374.391357421875, + "learning_rate": 9.938e-06, + "loss": 26.8847, + "step": 49690 + }, + { + "epoch": 0.10039714443856382, + "grad_norm": 257.7834167480469, + "learning_rate": 9.940000000000001e-06, + "loss": 17.869, + "step": 49700 + }, + { + "epoch": 0.10041734507124764, + "grad_norm": 170.61480712890625, + "learning_rate": 9.942e-06, + "loss": 56.4838, + "step": 49710 + }, + { + "epoch": 0.10043754570393144, + "grad_norm": 676.6981201171875, + "learning_rate": 9.944e-06, + "loss": 24.0504, + "step": 49720 + }, + { + "epoch": 0.10045774633661526, + "grad_norm": 375.68365478515625, + "learning_rate": 9.946000000000001e-06, + "loss": 29.4494, + "step": 49730 + }, + { + "epoch": 0.10047794696929908, + "grad_norm": 500.6487121582031, + "learning_rate": 9.948e-06, + "loss": 36.1562, + "step": 49740 + }, + { + "epoch": 0.10049814760198289, + "grad_norm": 536.2442626953125, + "learning_rate": 9.950000000000001e-06, + "loss": 30.0837, + "step": 49750 + }, + { + "epoch": 0.10051834823466671, + "grad_norm": 451.24169921875, + "learning_rate": 9.952e-06, + "loss": 26.6665, + "step": 49760 + }, + { + "epoch": 0.10053854886735053, + "grad_norm": 143.89686584472656, + "learning_rate": 9.954e-06, + "loss": 22.8259, + "step": 49770 + }, + { + "epoch": 0.10055874950003434, + "grad_norm": 847.1919555664062, + "learning_rate": 9.956000000000001e-06, + "loss": 23.5676, + "step": 49780 + }, + { + "epoch": 0.10057895013271816, + "grad_norm": 378.9535827636719, + "learning_rate": 9.958e-06, + "loss": 33.596, + "step": 49790 + }, + { + "epoch": 0.10059915076540198, + "grad_norm": 350.87176513671875, + "learning_rate": 9.960000000000001e-06, + "loss": 21.7925, + "step": 49800 + }, + { + "epoch": 0.10061935139808578, + "grad_norm": 517.0904541015625, + "learning_rate": 9.962e-06, + "loss": 17.3267, + "step": 49810 + }, + { + "epoch": 0.1006395520307696, + "grad_norm": 170.994140625, + "learning_rate": 9.964e-06, + "loss": 23.7536, + "step": 49820 + }, + { + "epoch": 0.10065975266345342, + "grad_norm": 305.552001953125, + "learning_rate": 9.966e-06, + "loss": 18.693, + "step": 49830 + }, + { + "epoch": 0.10067995329613723, + "grad_norm": 470.3461608886719, + "learning_rate": 9.968000000000002e-06, + "loss": 39.8488, + "step": 49840 + }, + { + "epoch": 0.10070015392882105, + "grad_norm": 524.2937622070312, + "learning_rate": 9.970000000000001e-06, + "loss": 18.4456, + "step": 49850 + }, + { + "epoch": 0.10072035456150487, + "grad_norm": 369.7236633300781, + "learning_rate": 9.972e-06, + "loss": 26.828, + "step": 49860 + }, + { + "epoch": 0.10074055519418869, + "grad_norm": 463.5006408691406, + "learning_rate": 9.974e-06, + "loss": 26.2836, + "step": 49870 + }, + { + "epoch": 0.1007607558268725, + "grad_norm": 629.5526733398438, + "learning_rate": 9.976e-06, + "loss": 24.2549, + "step": 49880 + }, + { + "epoch": 0.10078095645955631, + "grad_norm": 384.2488098144531, + "learning_rate": 9.978000000000002e-06, + "loss": 19.898, + "step": 49890 + }, + { + "epoch": 0.10080115709224013, + "grad_norm": 799.72119140625, + "learning_rate": 9.980000000000001e-06, + "loss": 25.2737, + "step": 49900 + }, + { + "epoch": 0.10082135772492394, + "grad_norm": 441.5334777832031, + "learning_rate": 9.982e-06, + "loss": 31.0727, + "step": 49910 + }, + { + "epoch": 0.10084155835760776, + "grad_norm": 276.5469665527344, + "learning_rate": 9.984e-06, + "loss": 18.8078, + "step": 49920 + }, + { + "epoch": 0.10086175899029158, + "grad_norm": 187.44979858398438, + "learning_rate": 9.986e-06, + "loss": 15.4224, + "step": 49930 + }, + { + "epoch": 0.10088195962297539, + "grad_norm": 361.1304931640625, + "learning_rate": 9.988000000000002e-06, + "loss": 16.9527, + "step": 49940 + }, + { + "epoch": 0.1009021602556592, + "grad_norm": 191.9720916748047, + "learning_rate": 9.990000000000001e-06, + "loss": 34.9568, + "step": 49950 + }, + { + "epoch": 0.10092236088834303, + "grad_norm": 8.192609786987305, + "learning_rate": 9.992e-06, + "loss": 39.4295, + "step": 49960 + }, + { + "epoch": 0.10094256152102683, + "grad_norm": 300.62615966796875, + "learning_rate": 9.994000000000001e-06, + "loss": 12.9227, + "step": 49970 + }, + { + "epoch": 0.10096276215371065, + "grad_norm": 494.94549560546875, + "learning_rate": 9.996e-06, + "loss": 42.9506, + "step": 49980 + }, + { + "epoch": 0.10098296278639447, + "grad_norm": 412.6275634765625, + "learning_rate": 9.998000000000002e-06, + "loss": 28.3969, + "step": 49990 + }, + { + "epoch": 0.10100316341907828, + "grad_norm": 99.5644302368164, + "learning_rate": 1e-05, + "loss": 27.8822, + "step": 50000 + }, + { + "epoch": 0.1010233640517621, + "grad_norm": 362.5364990234375, + "learning_rate": 9.999999987815305e-06, + "loss": 29.5299, + "step": 50010 + }, + { + "epoch": 0.10104356468444592, + "grad_norm": 5027.0400390625, + "learning_rate": 9.999999951261215e-06, + "loss": 62.4795, + "step": 50020 + }, + { + "epoch": 0.10106376531712974, + "grad_norm": 354.19964599609375, + "learning_rate": 9.99999989033773e-06, + "loss": 36.6571, + "step": 50030 + }, + { + "epoch": 0.10108396594981355, + "grad_norm": 725.4144897460938, + "learning_rate": 9.999999805044853e-06, + "loss": 49.3792, + "step": 50040 + }, + { + "epoch": 0.10110416658249737, + "grad_norm": 221.52777099609375, + "learning_rate": 9.999999695382584e-06, + "loss": 31.6822, + "step": 50050 + }, + { + "epoch": 0.10112436721518119, + "grad_norm": 853.5789794921875, + "learning_rate": 9.999999561350923e-06, + "loss": 17.717, + "step": 50060 + }, + { + "epoch": 0.10114456784786499, + "grad_norm": 897.6527709960938, + "learning_rate": 9.99999940294987e-06, + "loss": 21.7384, + "step": 50070 + }, + { + "epoch": 0.10116476848054881, + "grad_norm": 411.7169189453125, + "learning_rate": 9.999999220179426e-06, + "loss": 22.1829, + "step": 50080 + }, + { + "epoch": 0.10118496911323263, + "grad_norm": 250.9145050048828, + "learning_rate": 9.999999013039593e-06, + "loss": 18.3321, + "step": 50090 + }, + { + "epoch": 0.10120516974591644, + "grad_norm": 736.3198852539062, + "learning_rate": 9.999998781530372e-06, + "loss": 37.9785, + "step": 50100 + }, + { + "epoch": 0.10122537037860026, + "grad_norm": 568.701904296875, + "learning_rate": 9.999998525651761e-06, + "loss": 25.465, + "step": 50110 + }, + { + "epoch": 0.10124557101128408, + "grad_norm": 870.9944458007812, + "learning_rate": 9.999998245403766e-06, + "loss": 27.752, + "step": 50120 + }, + { + "epoch": 0.10126577164396788, + "grad_norm": 518.2539672851562, + "learning_rate": 9.999997940786385e-06, + "loss": 24.4236, + "step": 50130 + }, + { + "epoch": 0.1012859722766517, + "grad_norm": 261.4921569824219, + "learning_rate": 9.99999761179962e-06, + "loss": 28.2899, + "step": 50140 + }, + { + "epoch": 0.10130617290933552, + "grad_norm": 390.89501953125, + "learning_rate": 9.999997258443473e-06, + "loss": 18.5972, + "step": 50150 + }, + { + "epoch": 0.10132637354201933, + "grad_norm": 375.1386413574219, + "learning_rate": 9.999996880717946e-06, + "loss": 28.7425, + "step": 50160 + }, + { + "epoch": 0.10134657417470315, + "grad_norm": 156.5116424560547, + "learning_rate": 9.999996478623041e-06, + "loss": 41.3474, + "step": 50170 + }, + { + "epoch": 0.10136677480738697, + "grad_norm": 1503.359375, + "learning_rate": 9.99999605215876e-06, + "loss": 22.6597, + "step": 50180 + }, + { + "epoch": 0.10138697544007079, + "grad_norm": 205.34335327148438, + "learning_rate": 9.999995601325104e-06, + "loss": 26.2105, + "step": 50190 + }, + { + "epoch": 0.1014071760727546, + "grad_norm": 1484.55126953125, + "learning_rate": 9.999995126122076e-06, + "loss": 27.1298, + "step": 50200 + }, + { + "epoch": 0.10142737670543842, + "grad_norm": 1214.3726806640625, + "learning_rate": 9.999994626549678e-06, + "loss": 49.6199, + "step": 50210 + }, + { + "epoch": 0.10144757733812224, + "grad_norm": 79.13058471679688, + "learning_rate": 9.999994102607912e-06, + "loss": 19.2485, + "step": 50220 + }, + { + "epoch": 0.10146777797080604, + "grad_norm": 451.7241516113281, + "learning_rate": 9.999993554296783e-06, + "loss": 26.9545, + "step": 50230 + }, + { + "epoch": 0.10148797860348986, + "grad_norm": 281.6031494140625, + "learning_rate": 9.999992981616292e-06, + "loss": 29.894, + "step": 50240 + }, + { + "epoch": 0.10150817923617368, + "grad_norm": 297.3249816894531, + "learning_rate": 9.99999238456644e-06, + "loss": 21.369, + "step": 50250 + }, + { + "epoch": 0.10152837986885749, + "grad_norm": 95.88583374023438, + "learning_rate": 9.999991763147232e-06, + "loss": 24.8606, + "step": 50260 + }, + { + "epoch": 0.10154858050154131, + "grad_norm": 1134.8065185546875, + "learning_rate": 9.99999111735867e-06, + "loss": 55.5175, + "step": 50270 + }, + { + "epoch": 0.10156878113422513, + "grad_norm": 288.083740234375, + "learning_rate": 9.999990447200758e-06, + "loss": 32.7888, + "step": 50280 + }, + { + "epoch": 0.10158898176690893, + "grad_norm": 329.77276611328125, + "learning_rate": 9.9999897526735e-06, + "loss": 28.8469, + "step": 50290 + }, + { + "epoch": 0.10160918239959275, + "grad_norm": 308.0667419433594, + "learning_rate": 9.999989033776898e-06, + "loss": 40.5289, + "step": 50300 + }, + { + "epoch": 0.10162938303227657, + "grad_norm": 1404.3338623046875, + "learning_rate": 9.999988290510955e-06, + "loss": 38.1056, + "step": 50310 + }, + { + "epoch": 0.10164958366496038, + "grad_norm": 679.6793212890625, + "learning_rate": 9.999987522875676e-06, + "loss": 24.7506, + "step": 50320 + }, + { + "epoch": 0.1016697842976442, + "grad_norm": 589.7515869140625, + "learning_rate": 9.999986730871065e-06, + "loss": 31.0373, + "step": 50330 + }, + { + "epoch": 0.10168998493032802, + "grad_norm": 1030.587646484375, + "learning_rate": 9.999985914497124e-06, + "loss": 41.3364, + "step": 50340 + }, + { + "epoch": 0.10171018556301184, + "grad_norm": 253.97445678710938, + "learning_rate": 9.999985073753857e-06, + "loss": 21.784, + "step": 50350 + }, + { + "epoch": 0.10173038619569565, + "grad_norm": 803.2162475585938, + "learning_rate": 9.999984208641271e-06, + "loss": 31.6272, + "step": 50360 + }, + { + "epoch": 0.10175058682837947, + "grad_norm": 313.1042175292969, + "learning_rate": 9.999983319159368e-06, + "loss": 24.4944, + "step": 50370 + }, + { + "epoch": 0.10177078746106329, + "grad_norm": 444.9187927246094, + "learning_rate": 9.999982405308154e-06, + "loss": 26.4927, + "step": 50380 + }, + { + "epoch": 0.10179098809374709, + "grad_norm": 1321.82421875, + "learning_rate": 9.999981467087629e-06, + "loss": 35.5587, + "step": 50390 + }, + { + "epoch": 0.10181118872643091, + "grad_norm": 391.11090087890625, + "learning_rate": 9.999980504497803e-06, + "loss": 18.7048, + "step": 50400 + }, + { + "epoch": 0.10183138935911473, + "grad_norm": 337.62158203125, + "learning_rate": 9.999979517538677e-06, + "loss": 22.3681, + "step": 50410 + }, + { + "epoch": 0.10185158999179854, + "grad_norm": 601.7754516601562, + "learning_rate": 9.99997850621026e-06, + "loss": 32.052, + "step": 50420 + }, + { + "epoch": 0.10187179062448236, + "grad_norm": 412.9033203125, + "learning_rate": 9.999977470512551e-06, + "loss": 17.4484, + "step": 50430 + }, + { + "epoch": 0.10189199125716618, + "grad_norm": 209.8957977294922, + "learning_rate": 9.999976410445563e-06, + "loss": 11.9061, + "step": 50440 + }, + { + "epoch": 0.10191219188984998, + "grad_norm": 570.1461181640625, + "learning_rate": 9.999975326009292e-06, + "loss": 26.4283, + "step": 50450 + }, + { + "epoch": 0.1019323925225338, + "grad_norm": 583.421630859375, + "learning_rate": 9.999974217203749e-06, + "loss": 34.1035, + "step": 50460 + }, + { + "epoch": 0.10195259315521762, + "grad_norm": 680.7811889648438, + "learning_rate": 9.999973084028938e-06, + "loss": 44.3453, + "step": 50470 + }, + { + "epoch": 0.10197279378790143, + "grad_norm": 4006.479736328125, + "learning_rate": 9.999971926484865e-06, + "loss": 56.8407, + "step": 50480 + }, + { + "epoch": 0.10199299442058525, + "grad_norm": 249.22564697265625, + "learning_rate": 9.999970744571534e-06, + "loss": 25.6443, + "step": 50490 + }, + { + "epoch": 0.10201319505326907, + "grad_norm": 551.4610595703125, + "learning_rate": 9.999969538288953e-06, + "loss": 16.5304, + "step": 50500 + }, + { + "epoch": 0.10203339568595289, + "grad_norm": 375.3378601074219, + "learning_rate": 9.999968307637127e-06, + "loss": 27.6524, + "step": 50510 + }, + { + "epoch": 0.1020535963186367, + "grad_norm": 436.2530822753906, + "learning_rate": 9.999967052616061e-06, + "loss": 26.4405, + "step": 50520 + }, + { + "epoch": 0.10207379695132052, + "grad_norm": 317.2707214355469, + "learning_rate": 9.999965773225762e-06, + "loss": 48.2838, + "step": 50530 + }, + { + "epoch": 0.10209399758400434, + "grad_norm": 534.0542602539062, + "learning_rate": 9.999964469466236e-06, + "loss": 19.6013, + "step": 50540 + }, + { + "epoch": 0.10211419821668814, + "grad_norm": 306.4703674316406, + "learning_rate": 9.999963141337493e-06, + "loss": 26.0738, + "step": 50550 + }, + { + "epoch": 0.10213439884937196, + "grad_norm": 255.9512176513672, + "learning_rate": 9.999961788839533e-06, + "loss": 25.5566, + "step": 50560 + }, + { + "epoch": 0.10215459948205578, + "grad_norm": 515.935791015625, + "learning_rate": 9.999960411972366e-06, + "loss": 26.5733, + "step": 50570 + }, + { + "epoch": 0.10217480011473959, + "grad_norm": 255.52371215820312, + "learning_rate": 9.999959010735997e-06, + "loss": 17.7823, + "step": 50580 + }, + { + "epoch": 0.10219500074742341, + "grad_norm": 451.1026916503906, + "learning_rate": 9.999957585130438e-06, + "loss": 26.8705, + "step": 50590 + }, + { + "epoch": 0.10221520138010723, + "grad_norm": 106.65933990478516, + "learning_rate": 9.999956135155688e-06, + "loss": 16.0425, + "step": 50600 + }, + { + "epoch": 0.10223540201279104, + "grad_norm": 511.0063781738281, + "learning_rate": 9.999954660811761e-06, + "loss": 26.8811, + "step": 50610 + }, + { + "epoch": 0.10225560264547486, + "grad_norm": 331.65325927734375, + "learning_rate": 9.99995316209866e-06, + "loss": 28.7006, + "step": 50620 + }, + { + "epoch": 0.10227580327815868, + "grad_norm": 638.6454467773438, + "learning_rate": 9.999951639016396e-06, + "loss": 43.5507, + "step": 50630 + }, + { + "epoch": 0.10229600391084248, + "grad_norm": 381.807861328125, + "learning_rate": 9.999950091564972e-06, + "loss": 29.0359, + "step": 50640 + }, + { + "epoch": 0.1023162045435263, + "grad_norm": 1509.6329345703125, + "learning_rate": 9.999948519744397e-06, + "loss": 29.2152, + "step": 50650 + }, + { + "epoch": 0.10233640517621012, + "grad_norm": 591.4840698242188, + "learning_rate": 9.999946923554681e-06, + "loss": 33.325, + "step": 50660 + }, + { + "epoch": 0.10235660580889394, + "grad_norm": 924.4262084960938, + "learning_rate": 9.99994530299583e-06, + "loss": 45.0304, + "step": 50670 + }, + { + "epoch": 0.10237680644157775, + "grad_norm": 101.5724868774414, + "learning_rate": 9.99994365806785e-06, + "loss": 28.9632, + "step": 50680 + }, + { + "epoch": 0.10239700707426157, + "grad_norm": 1574.7281494140625, + "learning_rate": 9.999941988770754e-06, + "loss": 32.7535, + "step": 50690 + }, + { + "epoch": 0.10241720770694539, + "grad_norm": 59.04460906982422, + "learning_rate": 9.999940295104546e-06, + "loss": 24.2862, + "step": 50700 + }, + { + "epoch": 0.1024374083396292, + "grad_norm": 1890.5078125, + "learning_rate": 9.999938577069235e-06, + "loss": 36.4641, + "step": 50710 + }, + { + "epoch": 0.10245760897231301, + "grad_norm": 362.4627990722656, + "learning_rate": 9.99993683466483e-06, + "loss": 17.6841, + "step": 50720 + }, + { + "epoch": 0.10247780960499683, + "grad_norm": 637.8155517578125, + "learning_rate": 9.999935067891339e-06, + "loss": 36.074, + "step": 50730 + }, + { + "epoch": 0.10249801023768064, + "grad_norm": 397.3134460449219, + "learning_rate": 9.999933276748772e-06, + "loss": 31.947, + "step": 50740 + }, + { + "epoch": 0.10251821087036446, + "grad_norm": 405.8807067871094, + "learning_rate": 9.999931461237135e-06, + "loss": 42.5062, + "step": 50750 + }, + { + "epoch": 0.10253841150304828, + "grad_norm": 437.52545166015625, + "learning_rate": 9.99992962135644e-06, + "loss": 24.7354, + "step": 50760 + }, + { + "epoch": 0.10255861213573209, + "grad_norm": 597.0206298828125, + "learning_rate": 9.999927757106693e-06, + "loss": 22.6693, + "step": 50770 + }, + { + "epoch": 0.1025788127684159, + "grad_norm": 541.7193603515625, + "learning_rate": 9.999925868487905e-06, + "loss": 16.0042, + "step": 50780 + }, + { + "epoch": 0.10259901340109973, + "grad_norm": 262.95062255859375, + "learning_rate": 9.999923955500085e-06, + "loss": 42.3814, + "step": 50790 + }, + { + "epoch": 0.10261921403378353, + "grad_norm": 9.531939506530762, + "learning_rate": 9.999922018143242e-06, + "loss": 35.0652, + "step": 50800 + }, + { + "epoch": 0.10263941466646735, + "grad_norm": 460.79962158203125, + "learning_rate": 9.999920056417385e-06, + "loss": 22.1603, + "step": 50810 + }, + { + "epoch": 0.10265961529915117, + "grad_norm": 277.33734130859375, + "learning_rate": 9.999918070322525e-06, + "loss": 27.1832, + "step": 50820 + }, + { + "epoch": 0.10267981593183499, + "grad_norm": 694.1124877929688, + "learning_rate": 9.999916059858669e-06, + "loss": 18.5083, + "step": 50830 + }, + { + "epoch": 0.1027000165645188, + "grad_norm": 479.1972961425781, + "learning_rate": 9.999914025025831e-06, + "loss": 40.0288, + "step": 50840 + }, + { + "epoch": 0.10272021719720262, + "grad_norm": 374.9565734863281, + "learning_rate": 9.999911965824018e-06, + "loss": 20.3209, + "step": 50850 + }, + { + "epoch": 0.10274041782988644, + "grad_norm": 387.45050048828125, + "learning_rate": 9.99990988225324e-06, + "loss": 13.5861, + "step": 50860 + }, + { + "epoch": 0.10276061846257024, + "grad_norm": 0.3691152036190033, + "learning_rate": 9.999907774313507e-06, + "loss": 35.716, + "step": 50870 + }, + { + "epoch": 0.10278081909525406, + "grad_norm": 792.8024291992188, + "learning_rate": 9.99990564200483e-06, + "loss": 29.3103, + "step": 50880 + }, + { + "epoch": 0.10280101972793788, + "grad_norm": 0.0, + "learning_rate": 9.999903485327221e-06, + "loss": 24.5385, + "step": 50890 + }, + { + "epoch": 0.10282122036062169, + "grad_norm": 558.761474609375, + "learning_rate": 9.999901304280686e-06, + "loss": 29.9813, + "step": 50900 + }, + { + "epoch": 0.10284142099330551, + "grad_norm": 453.54083251953125, + "learning_rate": 9.99989909886524e-06, + "loss": 25.5813, + "step": 50910 + }, + { + "epoch": 0.10286162162598933, + "grad_norm": 423.0571594238281, + "learning_rate": 9.999896869080893e-06, + "loss": 35.28, + "step": 50920 + }, + { + "epoch": 0.10288182225867314, + "grad_norm": 456.1601257324219, + "learning_rate": 9.999894614927655e-06, + "loss": 38.2854, + "step": 50930 + }, + { + "epoch": 0.10290202289135696, + "grad_norm": 271.1506042480469, + "learning_rate": 9.999892336405534e-06, + "loss": 24.9651, + "step": 50940 + }, + { + "epoch": 0.10292222352404078, + "grad_norm": 677.3641357421875, + "learning_rate": 9.999890033514547e-06, + "loss": 15.468, + "step": 50950 + }, + { + "epoch": 0.10294242415672458, + "grad_norm": 1117.5863037109375, + "learning_rate": 9.999887706254703e-06, + "loss": 23.983, + "step": 50960 + }, + { + "epoch": 0.1029626247894084, + "grad_norm": 292.31304931640625, + "learning_rate": 9.999885354626011e-06, + "loss": 19.6384, + "step": 50970 + }, + { + "epoch": 0.10298282542209222, + "grad_norm": 214.32498168945312, + "learning_rate": 9.999882978628485e-06, + "loss": 16.6767, + "step": 50980 + }, + { + "epoch": 0.10300302605477604, + "grad_norm": 1378.0211181640625, + "learning_rate": 9.999880578262135e-06, + "loss": 36.312, + "step": 50990 + }, + { + "epoch": 0.10302322668745985, + "grad_norm": 745.3373413085938, + "learning_rate": 9.999878153526974e-06, + "loss": 43.9458, + "step": 51000 + }, + { + "epoch": 0.10304342732014367, + "grad_norm": 415.322265625, + "learning_rate": 9.999875704423015e-06, + "loss": 20.623, + "step": 51010 + }, + { + "epoch": 0.10306362795282749, + "grad_norm": 57.3155517578125, + "learning_rate": 9.999873230950265e-06, + "loss": 25.575, + "step": 51020 + }, + { + "epoch": 0.1030838285855113, + "grad_norm": 103.42644500732422, + "learning_rate": 9.99987073310874e-06, + "loss": 41.701, + "step": 51030 + }, + { + "epoch": 0.10310402921819511, + "grad_norm": 331.5990295410156, + "learning_rate": 9.999868210898454e-06, + "loss": 20.5087, + "step": 51040 + }, + { + "epoch": 0.10312422985087893, + "grad_norm": 333.0093994140625, + "learning_rate": 9.999865664319414e-06, + "loss": 49.9883, + "step": 51050 + }, + { + "epoch": 0.10314443048356274, + "grad_norm": 185.40383911132812, + "learning_rate": 9.999863093371638e-06, + "loss": 28.2973, + "step": 51060 + }, + { + "epoch": 0.10316463111624656, + "grad_norm": 212.54566955566406, + "learning_rate": 9.999860498055134e-06, + "loss": 25.8497, + "step": 51070 + }, + { + "epoch": 0.10318483174893038, + "grad_norm": 758.4007568359375, + "learning_rate": 9.999857878369917e-06, + "loss": 49.4042, + "step": 51080 + }, + { + "epoch": 0.10320503238161419, + "grad_norm": 395.51055908203125, + "learning_rate": 9.999855234315997e-06, + "loss": 24.5271, + "step": 51090 + }, + { + "epoch": 0.103225233014298, + "grad_norm": 550.81298828125, + "learning_rate": 9.99985256589339e-06, + "loss": 43.8895, + "step": 51100 + }, + { + "epoch": 0.10324543364698183, + "grad_norm": 79.79631805419922, + "learning_rate": 9.999849873102108e-06, + "loss": 16.611, + "step": 51110 + }, + { + "epoch": 0.10326563427966563, + "grad_norm": 463.4939270019531, + "learning_rate": 9.999847155942165e-06, + "loss": 17.6845, + "step": 51120 + }, + { + "epoch": 0.10328583491234945, + "grad_norm": 754.7512817382812, + "learning_rate": 9.999844414413574e-06, + "loss": 25.539, + "step": 51130 + }, + { + "epoch": 0.10330603554503327, + "grad_norm": 0.0, + "learning_rate": 9.999841648516347e-06, + "loss": 14.2443, + "step": 51140 + }, + { + "epoch": 0.10332623617771709, + "grad_norm": 267.25244140625, + "learning_rate": 9.999838858250497e-06, + "loss": 21.5823, + "step": 51150 + }, + { + "epoch": 0.1033464368104009, + "grad_norm": 285.90228271484375, + "learning_rate": 9.99983604361604e-06, + "loss": 22.5378, + "step": 51160 + }, + { + "epoch": 0.10336663744308472, + "grad_norm": 921.9609985351562, + "learning_rate": 9.999833204612988e-06, + "loss": 43.638, + "step": 51170 + }, + { + "epoch": 0.10338683807576854, + "grad_norm": 650.6875610351562, + "learning_rate": 9.999830341241354e-06, + "loss": 23.4214, + "step": 51180 + }, + { + "epoch": 0.10340703870845235, + "grad_norm": 594.7377319335938, + "learning_rate": 9.999827453501156e-06, + "loss": 29.7626, + "step": 51190 + }, + { + "epoch": 0.10342723934113617, + "grad_norm": 707.9284057617188, + "learning_rate": 9.999824541392404e-06, + "loss": 21.3611, + "step": 51200 + }, + { + "epoch": 0.10344743997381999, + "grad_norm": 590.6197509765625, + "learning_rate": 9.999821604915114e-06, + "loss": 26.3745, + "step": 51210 + }, + { + "epoch": 0.10346764060650379, + "grad_norm": 358.73663330078125, + "learning_rate": 9.999818644069299e-06, + "loss": 17.7493, + "step": 51220 + }, + { + "epoch": 0.10348784123918761, + "grad_norm": 354.9080505371094, + "learning_rate": 9.999815658854976e-06, + "loss": 19.0474, + "step": 51230 + }, + { + "epoch": 0.10350804187187143, + "grad_norm": 677.2418823242188, + "learning_rate": 9.999812649272157e-06, + "loss": 24.1071, + "step": 51240 + }, + { + "epoch": 0.10352824250455524, + "grad_norm": 390.850341796875, + "learning_rate": 9.999809615320857e-06, + "loss": 27.0657, + "step": 51250 + }, + { + "epoch": 0.10354844313723906, + "grad_norm": 764.44580078125, + "learning_rate": 9.999806557001092e-06, + "loss": 32.6188, + "step": 51260 + }, + { + "epoch": 0.10356864376992288, + "grad_norm": 365.0505676269531, + "learning_rate": 9.999803474312877e-06, + "loss": 41.0896, + "step": 51270 + }, + { + "epoch": 0.10358884440260668, + "grad_norm": 994.4592895507812, + "learning_rate": 9.999800367256225e-06, + "loss": 40.0048, + "step": 51280 + }, + { + "epoch": 0.1036090450352905, + "grad_norm": 393.38275146484375, + "learning_rate": 9.999797235831153e-06, + "loss": 34.3346, + "step": 51290 + }, + { + "epoch": 0.10362924566797432, + "grad_norm": 161.1769561767578, + "learning_rate": 9.999794080037675e-06, + "loss": 25.0284, + "step": 51300 + }, + { + "epoch": 0.10364944630065814, + "grad_norm": 1198.1754150390625, + "learning_rate": 9.999790899875807e-06, + "loss": 28.4299, + "step": 51310 + }, + { + "epoch": 0.10366964693334195, + "grad_norm": 631.53759765625, + "learning_rate": 9.999787695345565e-06, + "loss": 27.3778, + "step": 51320 + }, + { + "epoch": 0.10368984756602577, + "grad_norm": 743.1617431640625, + "learning_rate": 9.999784466446965e-06, + "loss": 31.1852, + "step": 51330 + }, + { + "epoch": 0.10371004819870959, + "grad_norm": 666.7760009765625, + "learning_rate": 9.99978121318002e-06, + "loss": 17.261, + "step": 51340 + }, + { + "epoch": 0.1037302488313934, + "grad_norm": 412.00347900390625, + "learning_rate": 9.99977793554475e-06, + "loss": 10.3803, + "step": 51350 + }, + { + "epoch": 0.10375044946407722, + "grad_norm": 355.3598327636719, + "learning_rate": 9.999774633541169e-06, + "loss": 20.7727, + "step": 51360 + }, + { + "epoch": 0.10377065009676104, + "grad_norm": 631.0842895507812, + "learning_rate": 9.999771307169291e-06, + "loss": 28.0426, + "step": 51370 + }, + { + "epoch": 0.10379085072944484, + "grad_norm": 382.2905578613281, + "learning_rate": 9.999767956429135e-06, + "loss": 25.7704, + "step": 51380 + }, + { + "epoch": 0.10381105136212866, + "grad_norm": 224.6883544921875, + "learning_rate": 9.999764581320714e-06, + "loss": 32.9772, + "step": 51390 + }, + { + "epoch": 0.10383125199481248, + "grad_norm": 2052.6513671875, + "learning_rate": 9.99976118184405e-06, + "loss": 28.7195, + "step": 51400 + }, + { + "epoch": 0.10385145262749629, + "grad_norm": 920.0391845703125, + "learning_rate": 9.999757757999155e-06, + "loss": 22.5864, + "step": 51410 + }, + { + "epoch": 0.10387165326018011, + "grad_norm": 1105.635009765625, + "learning_rate": 9.999754309786047e-06, + "loss": 39.2646, + "step": 51420 + }, + { + "epoch": 0.10389185389286393, + "grad_norm": 618.7783813476562, + "learning_rate": 9.999750837204743e-06, + "loss": 23.2488, + "step": 51430 + }, + { + "epoch": 0.10391205452554773, + "grad_norm": 17.139188766479492, + "learning_rate": 9.99974734025526e-06, + "loss": 14.946, + "step": 51440 + }, + { + "epoch": 0.10393225515823155, + "grad_norm": 938.8400268554688, + "learning_rate": 9.999743818937614e-06, + "loss": 32.3364, + "step": 51450 + }, + { + "epoch": 0.10395245579091537, + "grad_norm": 339.00128173828125, + "learning_rate": 9.999740273251824e-06, + "loss": 52.0854, + "step": 51460 + }, + { + "epoch": 0.1039726564235992, + "grad_norm": 901.957275390625, + "learning_rate": 9.999736703197907e-06, + "loss": 23.8989, + "step": 51470 + }, + { + "epoch": 0.103992857056283, + "grad_norm": 620.7359008789062, + "learning_rate": 9.999733108775878e-06, + "loss": 29.3632, + "step": 51480 + }, + { + "epoch": 0.10401305768896682, + "grad_norm": 580.5281372070312, + "learning_rate": 9.999729489985757e-06, + "loss": 19.642, + "step": 51490 + }, + { + "epoch": 0.10403325832165064, + "grad_norm": 936.9527587890625, + "learning_rate": 9.999725846827562e-06, + "loss": 32.875, + "step": 51500 + }, + { + "epoch": 0.10405345895433445, + "grad_norm": 901.7499389648438, + "learning_rate": 9.999722179301309e-06, + "loss": 29.5425, + "step": 51510 + }, + { + "epoch": 0.10407365958701827, + "grad_norm": 1585.744384765625, + "learning_rate": 9.999718487407015e-06, + "loss": 38.8731, + "step": 51520 + }, + { + "epoch": 0.10409386021970209, + "grad_norm": 512.9431762695312, + "learning_rate": 9.9997147711447e-06, + "loss": 19.3307, + "step": 51530 + }, + { + "epoch": 0.10411406085238589, + "grad_norm": 902.8880615234375, + "learning_rate": 9.999711030514383e-06, + "loss": 33.5718, + "step": 51540 + }, + { + "epoch": 0.10413426148506971, + "grad_norm": 299.1571350097656, + "learning_rate": 9.99970726551608e-06, + "loss": 16.439, + "step": 51550 + }, + { + "epoch": 0.10415446211775353, + "grad_norm": 574.4469604492188, + "learning_rate": 9.999703476149808e-06, + "loss": 23.9682, + "step": 51560 + }, + { + "epoch": 0.10417466275043734, + "grad_norm": 555.4169921875, + "learning_rate": 9.999699662415592e-06, + "loss": 27.2174, + "step": 51570 + }, + { + "epoch": 0.10419486338312116, + "grad_norm": 558.3908081054688, + "learning_rate": 9.999695824313443e-06, + "loss": 39.1974, + "step": 51580 + }, + { + "epoch": 0.10421506401580498, + "grad_norm": 407.7419738769531, + "learning_rate": 9.999691961843385e-06, + "loss": 28.7544, + "step": 51590 + }, + { + "epoch": 0.10423526464848878, + "grad_norm": 324.6638488769531, + "learning_rate": 9.999688075005434e-06, + "loss": 39.8842, + "step": 51600 + }, + { + "epoch": 0.1042554652811726, + "grad_norm": 339.9502258300781, + "learning_rate": 9.999684163799609e-06, + "loss": 21.535, + "step": 51610 + }, + { + "epoch": 0.10427566591385642, + "grad_norm": 338.2267150878906, + "learning_rate": 9.99968022822593e-06, + "loss": 17.7734, + "step": 51620 + }, + { + "epoch": 0.10429586654654023, + "grad_norm": 398.8443603515625, + "learning_rate": 9.999676268284416e-06, + "loss": 17.9831, + "step": 51630 + }, + { + "epoch": 0.10431606717922405, + "grad_norm": 372.1466979980469, + "learning_rate": 9.999672283975085e-06, + "loss": 47.2209, + "step": 51640 + }, + { + "epoch": 0.10433626781190787, + "grad_norm": 289.52178955078125, + "learning_rate": 9.99966827529796e-06, + "loss": 46.3143, + "step": 51650 + }, + { + "epoch": 0.10435646844459169, + "grad_norm": 84.61372375488281, + "learning_rate": 9.999664242253058e-06, + "loss": 20.6101, + "step": 51660 + }, + { + "epoch": 0.1043766690772755, + "grad_norm": 750.5768432617188, + "learning_rate": 9.999660184840398e-06, + "loss": 47.7543, + "step": 51670 + }, + { + "epoch": 0.10439686970995932, + "grad_norm": 398.7869567871094, + "learning_rate": 9.999656103060001e-06, + "loss": 22.8918, + "step": 51680 + }, + { + "epoch": 0.10441707034264314, + "grad_norm": 359.3421325683594, + "learning_rate": 9.999651996911886e-06, + "loss": 31.5459, + "step": 51690 + }, + { + "epoch": 0.10443727097532694, + "grad_norm": 285.4722595214844, + "learning_rate": 9.999647866396073e-06, + "loss": 16.909, + "step": 51700 + }, + { + "epoch": 0.10445747160801076, + "grad_norm": 381.8937072753906, + "learning_rate": 9.999643711512586e-06, + "loss": 27.6456, + "step": 51710 + }, + { + "epoch": 0.10447767224069458, + "grad_norm": 206.19906616210938, + "learning_rate": 9.999639532261438e-06, + "loss": 23.2891, + "step": 51720 + }, + { + "epoch": 0.10449787287337839, + "grad_norm": 1186.94482421875, + "learning_rate": 9.999635328642655e-06, + "loss": 30.3152, + "step": 51730 + }, + { + "epoch": 0.10451807350606221, + "grad_norm": 0.0, + "learning_rate": 9.999631100656255e-06, + "loss": 24.8379, + "step": 51740 + }, + { + "epoch": 0.10453827413874603, + "grad_norm": 702.4994506835938, + "learning_rate": 9.999626848302261e-06, + "loss": 25.1743, + "step": 51750 + }, + { + "epoch": 0.10455847477142984, + "grad_norm": 430.71099853515625, + "learning_rate": 9.99962257158069e-06, + "loss": 41.395, + "step": 51760 + }, + { + "epoch": 0.10457867540411366, + "grad_norm": 786.567626953125, + "learning_rate": 9.999618270491567e-06, + "loss": 21.2917, + "step": 51770 + }, + { + "epoch": 0.10459887603679748, + "grad_norm": 536.7110595703125, + "learning_rate": 9.999613945034909e-06, + "loss": 40.1751, + "step": 51780 + }, + { + "epoch": 0.10461907666948128, + "grad_norm": 436.41864013671875, + "learning_rate": 9.999609595210743e-06, + "loss": 11.29, + "step": 51790 + }, + { + "epoch": 0.1046392773021651, + "grad_norm": 682.1253051757812, + "learning_rate": 9.999605221019082e-06, + "loss": 48.1011, + "step": 51800 + }, + { + "epoch": 0.10465947793484892, + "grad_norm": 248.62008666992188, + "learning_rate": 9.999600822459952e-06, + "loss": 15.4513, + "step": 51810 + }, + { + "epoch": 0.10467967856753274, + "grad_norm": 7.036900997161865, + "learning_rate": 9.999596399533375e-06, + "loss": 34.9343, + "step": 51820 + }, + { + "epoch": 0.10469987920021655, + "grad_norm": 166.2686767578125, + "learning_rate": 9.999591952239371e-06, + "loss": 14.706, + "step": 51830 + }, + { + "epoch": 0.10472007983290037, + "grad_norm": 692.9573974609375, + "learning_rate": 9.999587480577964e-06, + "loss": 23.2436, + "step": 51840 + }, + { + "epoch": 0.10474028046558419, + "grad_norm": 597.7660522460938, + "learning_rate": 9.999582984549172e-06, + "loss": 25.2128, + "step": 51850 + }, + { + "epoch": 0.104760481098268, + "grad_norm": 51.638824462890625, + "learning_rate": 9.99957846415302e-06, + "loss": 15.9829, + "step": 51860 + }, + { + "epoch": 0.10478068173095181, + "grad_norm": 1591.6973876953125, + "learning_rate": 9.999573919389527e-06, + "loss": 34.3104, + "step": 51870 + }, + { + "epoch": 0.10480088236363563, + "grad_norm": 325.8369140625, + "learning_rate": 9.999569350258717e-06, + "loss": 35.8628, + "step": 51880 + }, + { + "epoch": 0.10482108299631944, + "grad_norm": 702.3936157226562, + "learning_rate": 9.999564756760616e-06, + "loss": 37.2984, + "step": 51890 + }, + { + "epoch": 0.10484128362900326, + "grad_norm": 442.11151123046875, + "learning_rate": 9.999560138895238e-06, + "loss": 31.7938, + "step": 51900 + }, + { + "epoch": 0.10486148426168708, + "grad_norm": 658.4376831054688, + "learning_rate": 9.999555496662614e-06, + "loss": 24.6095, + "step": 51910 + }, + { + "epoch": 0.10488168489437089, + "grad_norm": 486.6987609863281, + "learning_rate": 9.999550830062762e-06, + "loss": 30.5503, + "step": 51920 + }, + { + "epoch": 0.1049018855270547, + "grad_norm": 486.70977783203125, + "learning_rate": 9.999546139095706e-06, + "loss": 30.3326, + "step": 51930 + }, + { + "epoch": 0.10492208615973853, + "grad_norm": 426.9347839355469, + "learning_rate": 9.999541423761468e-06, + "loss": 26.62, + "step": 51940 + }, + { + "epoch": 0.10494228679242233, + "grad_norm": 310.9057312011719, + "learning_rate": 9.999536684060071e-06, + "loss": 31.6233, + "step": 51950 + }, + { + "epoch": 0.10496248742510615, + "grad_norm": 875.162109375, + "learning_rate": 9.999531919991538e-06, + "loss": 31.4422, + "step": 51960 + }, + { + "epoch": 0.10498268805778997, + "grad_norm": 545.2814331054688, + "learning_rate": 9.999527131555894e-06, + "loss": 24.9442, + "step": 51970 + }, + { + "epoch": 0.10500288869047379, + "grad_norm": 427.1497802734375, + "learning_rate": 9.99952231875316e-06, + "loss": 25.8614, + "step": 51980 + }, + { + "epoch": 0.1050230893231576, + "grad_norm": 203.9998321533203, + "learning_rate": 9.999517481583363e-06, + "loss": 27.2882, + "step": 51990 + }, + { + "epoch": 0.10504328995584142, + "grad_norm": 637.4127197265625, + "learning_rate": 9.999512620046523e-06, + "loss": 19.4995, + "step": 52000 + }, + { + "epoch": 0.10506349058852524, + "grad_norm": 1.447941541671753, + "learning_rate": 9.999507734142663e-06, + "loss": 24.0484, + "step": 52010 + }, + { + "epoch": 0.10508369122120904, + "grad_norm": 347.05963134765625, + "learning_rate": 9.999502823871809e-06, + "loss": 11.7482, + "step": 52020 + }, + { + "epoch": 0.10510389185389286, + "grad_norm": 711.4403076171875, + "learning_rate": 9.999497889233987e-06, + "loss": 13.4801, + "step": 52030 + }, + { + "epoch": 0.10512409248657668, + "grad_norm": 358.95794677734375, + "learning_rate": 9.999492930229217e-06, + "loss": 19.8398, + "step": 52040 + }, + { + "epoch": 0.10514429311926049, + "grad_norm": 154.77569580078125, + "learning_rate": 9.999487946857526e-06, + "loss": 27.7863, + "step": 52050 + }, + { + "epoch": 0.10516449375194431, + "grad_norm": 385.1726379394531, + "learning_rate": 9.999482939118936e-06, + "loss": 20.5073, + "step": 52060 + }, + { + "epoch": 0.10518469438462813, + "grad_norm": 1505.1348876953125, + "learning_rate": 9.999477907013473e-06, + "loss": 39.0965, + "step": 52070 + }, + { + "epoch": 0.10520489501731194, + "grad_norm": 717.062255859375, + "learning_rate": 9.999472850541161e-06, + "loss": 28.785, + "step": 52080 + }, + { + "epoch": 0.10522509564999576, + "grad_norm": 666.9161987304688, + "learning_rate": 9.999467769702023e-06, + "loss": 24.6605, + "step": 52090 + }, + { + "epoch": 0.10524529628267958, + "grad_norm": 605.6522216796875, + "learning_rate": 9.999462664496088e-06, + "loss": 18.0582, + "step": 52100 + }, + { + "epoch": 0.10526549691536338, + "grad_norm": 1095.1883544921875, + "learning_rate": 9.999457534923377e-06, + "loss": 25.2785, + "step": 52110 + }, + { + "epoch": 0.1052856975480472, + "grad_norm": 414.3034973144531, + "learning_rate": 9.999452380983915e-06, + "loss": 29.4141, + "step": 52120 + }, + { + "epoch": 0.10530589818073102, + "grad_norm": 270.92645263671875, + "learning_rate": 9.999447202677732e-06, + "loss": 25.1118, + "step": 52130 + }, + { + "epoch": 0.10532609881341484, + "grad_norm": 488.3146667480469, + "learning_rate": 9.999442000004848e-06, + "loss": 22.3528, + "step": 52140 + }, + { + "epoch": 0.10534629944609865, + "grad_norm": 535.1305541992188, + "learning_rate": 9.99943677296529e-06, + "loss": 33.6538, + "step": 52150 + }, + { + "epoch": 0.10536650007878247, + "grad_norm": 386.5953063964844, + "learning_rate": 9.999431521559081e-06, + "loss": 33.8812, + "step": 52160 + }, + { + "epoch": 0.10538670071146629, + "grad_norm": 412.7962341308594, + "learning_rate": 9.999426245786253e-06, + "loss": 14.0402, + "step": 52170 + }, + { + "epoch": 0.1054069013441501, + "grad_norm": 908.8006591796875, + "learning_rate": 9.999420945646828e-06, + "loss": 36.5653, + "step": 52180 + }, + { + "epoch": 0.10542710197683391, + "grad_norm": 831.282470703125, + "learning_rate": 9.99941562114083e-06, + "loss": 13.5727, + "step": 52190 + }, + { + "epoch": 0.10544730260951773, + "grad_norm": 193.27737426757812, + "learning_rate": 9.999410272268285e-06, + "loss": 17.8289, + "step": 52200 + }, + { + "epoch": 0.10546750324220154, + "grad_norm": 293.7889099121094, + "learning_rate": 9.999404899029222e-06, + "loss": 23.4262, + "step": 52210 + }, + { + "epoch": 0.10548770387488536, + "grad_norm": 384.21270751953125, + "learning_rate": 9.999399501423667e-06, + "loss": 22.6356, + "step": 52220 + }, + { + "epoch": 0.10550790450756918, + "grad_norm": 239.36883544921875, + "learning_rate": 9.999394079451643e-06, + "loss": 20.3355, + "step": 52230 + }, + { + "epoch": 0.10552810514025299, + "grad_norm": 570.4447021484375, + "learning_rate": 9.99938863311318e-06, + "loss": 15.0387, + "step": 52240 + }, + { + "epoch": 0.1055483057729368, + "grad_norm": 514.3821411132812, + "learning_rate": 9.999383162408303e-06, + "loss": 19.0036, + "step": 52250 + }, + { + "epoch": 0.10556850640562063, + "grad_norm": 615.8590698242188, + "learning_rate": 9.99937766733704e-06, + "loss": 22.2815, + "step": 52260 + }, + { + "epoch": 0.10558870703830443, + "grad_norm": 272.0684509277344, + "learning_rate": 9.999372147899416e-06, + "loss": 11.1711, + "step": 52270 + }, + { + "epoch": 0.10560890767098825, + "grad_norm": 31.58615493774414, + "learning_rate": 9.999366604095458e-06, + "loss": 44.4439, + "step": 52280 + }, + { + "epoch": 0.10562910830367207, + "grad_norm": 1436.013916015625, + "learning_rate": 9.999361035925193e-06, + "loss": 43.548, + "step": 52290 + }, + { + "epoch": 0.10564930893635589, + "grad_norm": 1929.2962646484375, + "learning_rate": 9.999355443388649e-06, + "loss": 40.7206, + "step": 52300 + }, + { + "epoch": 0.1056695095690397, + "grad_norm": 864.4912109375, + "learning_rate": 9.999349826485854e-06, + "loss": 24.2701, + "step": 52310 + }, + { + "epoch": 0.10568971020172352, + "grad_norm": 441.25213623046875, + "learning_rate": 9.999344185216833e-06, + "loss": 31.1885, + "step": 52320 + }, + { + "epoch": 0.10570991083440734, + "grad_norm": 1056.015380859375, + "learning_rate": 9.999338519581616e-06, + "loss": 31.1782, + "step": 52330 + }, + { + "epoch": 0.10573011146709115, + "grad_norm": 326.104248046875, + "learning_rate": 9.999332829580227e-06, + "loss": 26.628, + "step": 52340 + }, + { + "epoch": 0.10575031209977497, + "grad_norm": 393.9754638671875, + "learning_rate": 9.999327115212698e-06, + "loss": 18.3491, + "step": 52350 + }, + { + "epoch": 0.10577051273245879, + "grad_norm": 493.7698059082031, + "learning_rate": 9.999321376479054e-06, + "loss": 22.4783, + "step": 52360 + }, + { + "epoch": 0.10579071336514259, + "grad_norm": 261.75701904296875, + "learning_rate": 9.999315613379326e-06, + "loss": 20.0119, + "step": 52370 + }, + { + "epoch": 0.10581091399782641, + "grad_norm": 341.0134582519531, + "learning_rate": 9.999309825913538e-06, + "loss": 21.6089, + "step": 52380 + }, + { + "epoch": 0.10583111463051023, + "grad_norm": 845.4539184570312, + "learning_rate": 9.999304014081721e-06, + "loss": 26.0644, + "step": 52390 + }, + { + "epoch": 0.10585131526319404, + "grad_norm": 114.71834564208984, + "learning_rate": 9.999298177883902e-06, + "loss": 22.7964, + "step": 52400 + }, + { + "epoch": 0.10587151589587786, + "grad_norm": 818.751953125, + "learning_rate": 9.999292317320112e-06, + "loss": 12.1773, + "step": 52410 + }, + { + "epoch": 0.10589171652856168, + "grad_norm": 947.8588256835938, + "learning_rate": 9.999286432390376e-06, + "loss": 39.9747, + "step": 52420 + }, + { + "epoch": 0.10591191716124548, + "grad_norm": 605.8237915039062, + "learning_rate": 9.999280523094724e-06, + "loss": 33.8493, + "step": 52430 + }, + { + "epoch": 0.1059321177939293, + "grad_norm": 116.42375183105469, + "learning_rate": 9.999274589433186e-06, + "loss": 34.6618, + "step": 52440 + }, + { + "epoch": 0.10595231842661312, + "grad_norm": 577.7481689453125, + "learning_rate": 9.99926863140579e-06, + "loss": 27.7989, + "step": 52450 + }, + { + "epoch": 0.10597251905929694, + "grad_norm": 676.259521484375, + "learning_rate": 9.999262649012564e-06, + "loss": 23.1021, + "step": 52460 + }, + { + "epoch": 0.10599271969198075, + "grad_norm": 573.849853515625, + "learning_rate": 9.99925664225354e-06, + "loss": 11.2289, + "step": 52470 + }, + { + "epoch": 0.10601292032466457, + "grad_norm": 659.0547485351562, + "learning_rate": 9.999250611128743e-06, + "loss": 28.9521, + "step": 52480 + }, + { + "epoch": 0.10603312095734839, + "grad_norm": 242.6114044189453, + "learning_rate": 9.999244555638205e-06, + "loss": 12.3119, + "step": 52490 + }, + { + "epoch": 0.1060533215900322, + "grad_norm": 634.4039306640625, + "learning_rate": 9.999238475781957e-06, + "loss": 11.1968, + "step": 52500 + }, + { + "epoch": 0.10607352222271602, + "grad_norm": 402.59869384765625, + "learning_rate": 9.999232371560027e-06, + "loss": 15.2867, + "step": 52510 + }, + { + "epoch": 0.10609372285539984, + "grad_norm": 417.6640319824219, + "learning_rate": 9.999226242972445e-06, + "loss": 11.4818, + "step": 52520 + }, + { + "epoch": 0.10611392348808364, + "grad_norm": 556.7757568359375, + "learning_rate": 9.999220090019238e-06, + "loss": 28.6786, + "step": 52530 + }, + { + "epoch": 0.10613412412076746, + "grad_norm": 875.1763305664062, + "learning_rate": 9.99921391270044e-06, + "loss": 33.0905, + "step": 52540 + }, + { + "epoch": 0.10615432475345128, + "grad_norm": 915.5540161132812, + "learning_rate": 9.999207711016081e-06, + "loss": 17.076, + "step": 52550 + }, + { + "epoch": 0.10617452538613509, + "grad_norm": 349.79315185546875, + "learning_rate": 9.999201484966188e-06, + "loss": 17.2544, + "step": 52560 + }, + { + "epoch": 0.10619472601881891, + "grad_norm": 351.5311584472656, + "learning_rate": 9.999195234550796e-06, + "loss": 37.0559, + "step": 52570 + }, + { + "epoch": 0.10621492665150273, + "grad_norm": 649.7620849609375, + "learning_rate": 9.99918895976993e-06, + "loss": 39.2075, + "step": 52580 + }, + { + "epoch": 0.10623512728418653, + "grad_norm": 445.8486633300781, + "learning_rate": 9.999182660623625e-06, + "loss": 26.4072, + "step": 52590 + }, + { + "epoch": 0.10625532791687035, + "grad_norm": 693.0591430664062, + "learning_rate": 9.999176337111908e-06, + "loss": 33.9964, + "step": 52600 + }, + { + "epoch": 0.10627552854955417, + "grad_norm": 428.8168640136719, + "learning_rate": 9.999169989234815e-06, + "loss": 19.2667, + "step": 52610 + }, + { + "epoch": 0.106295729182238, + "grad_norm": 706.9661865234375, + "learning_rate": 9.999163616992371e-06, + "loss": 27.4699, + "step": 52620 + }, + { + "epoch": 0.1063159298149218, + "grad_norm": 27.181907653808594, + "learning_rate": 9.999157220384612e-06, + "loss": 11.5111, + "step": 52630 + }, + { + "epoch": 0.10633613044760562, + "grad_norm": 1255.19873046875, + "learning_rate": 9.999150799411565e-06, + "loss": 23.6398, + "step": 52640 + }, + { + "epoch": 0.10635633108028944, + "grad_norm": 1140.2137451171875, + "learning_rate": 9.999144354073264e-06, + "loss": 41.7556, + "step": 52650 + }, + { + "epoch": 0.10637653171297325, + "grad_norm": 892.744384765625, + "learning_rate": 9.999137884369741e-06, + "loss": 36.8008, + "step": 52660 + }, + { + "epoch": 0.10639673234565707, + "grad_norm": 619.9985961914062, + "learning_rate": 9.999131390301027e-06, + "loss": 21.2839, + "step": 52670 + }, + { + "epoch": 0.10641693297834089, + "grad_norm": 673.101806640625, + "learning_rate": 9.99912487186715e-06, + "loss": 29.2515, + "step": 52680 + }, + { + "epoch": 0.10643713361102469, + "grad_norm": 835.369384765625, + "learning_rate": 9.999118329068148e-06, + "loss": 24.7376, + "step": 52690 + }, + { + "epoch": 0.10645733424370851, + "grad_norm": 894.3941650390625, + "learning_rate": 9.999111761904046e-06, + "loss": 41.1717, + "step": 52700 + }, + { + "epoch": 0.10647753487639233, + "grad_norm": 427.3544921875, + "learning_rate": 9.999105170374881e-06, + "loss": 19.4941, + "step": 52710 + }, + { + "epoch": 0.10649773550907614, + "grad_norm": 622.7033081054688, + "learning_rate": 9.999098554480685e-06, + "loss": 35.2149, + "step": 52720 + }, + { + "epoch": 0.10651793614175996, + "grad_norm": 0.0, + "learning_rate": 9.999091914221487e-06, + "loss": 21.8452, + "step": 52730 + }, + { + "epoch": 0.10653813677444378, + "grad_norm": 916.897705078125, + "learning_rate": 9.999085249597322e-06, + "loss": 17.9101, + "step": 52740 + }, + { + "epoch": 0.10655833740712758, + "grad_norm": 294.47430419921875, + "learning_rate": 9.999078560608221e-06, + "loss": 30.3524, + "step": 52750 + }, + { + "epoch": 0.1065785380398114, + "grad_norm": 421.6717834472656, + "learning_rate": 9.999071847254219e-06, + "loss": 19.8002, + "step": 52760 + }, + { + "epoch": 0.10659873867249522, + "grad_norm": 1147.113037109375, + "learning_rate": 9.999065109535346e-06, + "loss": 35.8034, + "step": 52770 + }, + { + "epoch": 0.10661893930517904, + "grad_norm": 404.02435302734375, + "learning_rate": 9.999058347451638e-06, + "loss": 37.1147, + "step": 52780 + }, + { + "epoch": 0.10663913993786285, + "grad_norm": 252.00086975097656, + "learning_rate": 9.999051561003124e-06, + "loss": 28.5985, + "step": 52790 + }, + { + "epoch": 0.10665934057054667, + "grad_norm": 530.6477661132812, + "learning_rate": 9.99904475018984e-06, + "loss": 15.8119, + "step": 52800 + }, + { + "epoch": 0.10667954120323049, + "grad_norm": 23.36237144470215, + "learning_rate": 9.999037915011819e-06, + "loss": 26.4043, + "step": 52810 + }, + { + "epoch": 0.1066997418359143, + "grad_norm": 336.9685363769531, + "learning_rate": 9.999031055469091e-06, + "loss": 24.8935, + "step": 52820 + }, + { + "epoch": 0.10671994246859812, + "grad_norm": 243.57879638671875, + "learning_rate": 9.999024171561693e-06, + "loss": 31.3197, + "step": 52830 + }, + { + "epoch": 0.10674014310128194, + "grad_norm": 294.1635437011719, + "learning_rate": 9.999017263289656e-06, + "loss": 52.6235, + "step": 52840 + }, + { + "epoch": 0.10676034373396574, + "grad_norm": 929.2692260742188, + "learning_rate": 9.999010330653019e-06, + "loss": 23.1442, + "step": 52850 + }, + { + "epoch": 0.10678054436664956, + "grad_norm": 376.0462951660156, + "learning_rate": 9.999003373651809e-06, + "loss": 27.5916, + "step": 52860 + }, + { + "epoch": 0.10680074499933338, + "grad_norm": 150.8600616455078, + "learning_rate": 9.998996392286062e-06, + "loss": 25.589, + "step": 52870 + }, + { + "epoch": 0.10682094563201719, + "grad_norm": 474.697265625, + "learning_rate": 9.998989386555815e-06, + "loss": 42.2781, + "step": 52880 + }, + { + "epoch": 0.10684114626470101, + "grad_norm": 419.69866943359375, + "learning_rate": 9.9989823564611e-06, + "loss": 25.1389, + "step": 52890 + }, + { + "epoch": 0.10686134689738483, + "grad_norm": 993.33203125, + "learning_rate": 9.99897530200195e-06, + "loss": 34.6098, + "step": 52900 + }, + { + "epoch": 0.10688154753006864, + "grad_norm": 916.4841918945312, + "learning_rate": 9.998968223178402e-06, + "loss": 28.6282, + "step": 52910 + }, + { + "epoch": 0.10690174816275246, + "grad_norm": 377.6288146972656, + "learning_rate": 9.99896111999049e-06, + "loss": 24.0976, + "step": 52920 + }, + { + "epoch": 0.10692194879543628, + "grad_norm": 520.31005859375, + "learning_rate": 9.998953992438245e-06, + "loss": 21.7018, + "step": 52930 + }, + { + "epoch": 0.1069421494281201, + "grad_norm": 13.560802459716797, + "learning_rate": 9.998946840521706e-06, + "loss": 19.4946, + "step": 52940 + }, + { + "epoch": 0.1069623500608039, + "grad_norm": 363.0177001953125, + "learning_rate": 9.998939664240908e-06, + "loss": 28.6526, + "step": 52950 + }, + { + "epoch": 0.10698255069348772, + "grad_norm": 833.1908569335938, + "learning_rate": 9.998932463595882e-06, + "loss": 27.0875, + "step": 52960 + }, + { + "epoch": 0.10700275132617154, + "grad_norm": 210.7505340576172, + "learning_rate": 9.998925238586666e-06, + "loss": 29.3727, + "step": 52970 + }, + { + "epoch": 0.10702295195885535, + "grad_norm": 1198.724853515625, + "learning_rate": 9.998917989213296e-06, + "loss": 35.7954, + "step": 52980 + }, + { + "epoch": 0.10704315259153917, + "grad_norm": 161.1515655517578, + "learning_rate": 9.998910715475804e-06, + "loss": 20.7784, + "step": 52990 + }, + { + "epoch": 0.10706335322422299, + "grad_norm": 343.9988098144531, + "learning_rate": 9.998903417374228e-06, + "loss": 30.7323, + "step": 53000 + }, + { + "epoch": 0.1070835538569068, + "grad_norm": 633.6296997070312, + "learning_rate": 9.998896094908603e-06, + "loss": 41.7541, + "step": 53010 + }, + { + "epoch": 0.10710375448959061, + "grad_norm": 1106.44287109375, + "learning_rate": 9.998888748078966e-06, + "loss": 30.7732, + "step": 53020 + }, + { + "epoch": 0.10712395512227443, + "grad_norm": 206.07020568847656, + "learning_rate": 9.99888137688535e-06, + "loss": 21.4664, + "step": 53030 + }, + { + "epoch": 0.10714415575495824, + "grad_norm": 63.42051315307617, + "learning_rate": 9.998873981327795e-06, + "loss": 15.0017, + "step": 53040 + }, + { + "epoch": 0.10716435638764206, + "grad_norm": 336.19708251953125, + "learning_rate": 9.998866561406331e-06, + "loss": 42.9714, + "step": 53050 + }, + { + "epoch": 0.10718455702032588, + "grad_norm": 1005.9963989257812, + "learning_rate": 9.998859117121e-06, + "loss": 20.2886, + "step": 53060 + }, + { + "epoch": 0.10720475765300969, + "grad_norm": 536.5841674804688, + "learning_rate": 9.998851648471834e-06, + "loss": 25.2781, + "step": 53070 + }, + { + "epoch": 0.1072249582856935, + "grad_norm": 568.0372924804688, + "learning_rate": 9.998844155458873e-06, + "loss": 27.6063, + "step": 53080 + }, + { + "epoch": 0.10724515891837733, + "grad_norm": 491.802490234375, + "learning_rate": 9.998836638082152e-06, + "loss": 18.594, + "step": 53090 + }, + { + "epoch": 0.10726535955106115, + "grad_norm": 441.60296630859375, + "learning_rate": 9.998829096341706e-06, + "loss": 24.2671, + "step": 53100 + }, + { + "epoch": 0.10728556018374495, + "grad_norm": 473.935546875, + "learning_rate": 9.998821530237576e-06, + "loss": 32.2329, + "step": 53110 + }, + { + "epoch": 0.10730576081642877, + "grad_norm": 698.8916015625, + "learning_rate": 9.998813939769794e-06, + "loss": 19.6332, + "step": 53120 + }, + { + "epoch": 0.10732596144911259, + "grad_norm": 762.2566528320312, + "learning_rate": 9.9988063249384e-06, + "loss": 31.1006, + "step": 53130 + }, + { + "epoch": 0.1073461620817964, + "grad_norm": 819.5429077148438, + "learning_rate": 9.99879868574343e-06, + "loss": 35.1782, + "step": 53140 + }, + { + "epoch": 0.10736636271448022, + "grad_norm": 969.3384399414062, + "learning_rate": 9.998791022184921e-06, + "loss": 49.248, + "step": 53150 + }, + { + "epoch": 0.10738656334716404, + "grad_norm": 376.130615234375, + "learning_rate": 9.998783334262911e-06, + "loss": 30.3146, + "step": 53160 + }, + { + "epoch": 0.10740676397984784, + "grad_norm": 422.056640625, + "learning_rate": 9.998775621977438e-06, + "loss": 21.0312, + "step": 53170 + }, + { + "epoch": 0.10742696461253166, + "grad_norm": 239.6740264892578, + "learning_rate": 9.998767885328538e-06, + "loss": 48.1807, + "step": 53180 + }, + { + "epoch": 0.10744716524521548, + "grad_norm": 499.3955993652344, + "learning_rate": 9.99876012431625e-06, + "loss": 30.1279, + "step": 53190 + }, + { + "epoch": 0.10746736587789929, + "grad_norm": 1409.747314453125, + "learning_rate": 9.998752338940612e-06, + "loss": 32.9902, + "step": 53200 + }, + { + "epoch": 0.10748756651058311, + "grad_norm": 380.8999328613281, + "learning_rate": 9.99874452920166e-06, + "loss": 22.1534, + "step": 53210 + }, + { + "epoch": 0.10750776714326693, + "grad_norm": 833.2760009765625, + "learning_rate": 9.998736695099434e-06, + "loss": 30.3201, + "step": 53220 + }, + { + "epoch": 0.10752796777595074, + "grad_norm": 20.92523193359375, + "learning_rate": 9.998728836633972e-06, + "loss": 23.5144, + "step": 53230 + }, + { + "epoch": 0.10754816840863456, + "grad_norm": 172.36753845214844, + "learning_rate": 9.998720953805312e-06, + "loss": 9.3381, + "step": 53240 + }, + { + "epoch": 0.10756836904131838, + "grad_norm": 600.3418579101562, + "learning_rate": 9.998713046613492e-06, + "loss": 23.8562, + "step": 53250 + }, + { + "epoch": 0.1075885696740022, + "grad_norm": 426.4175720214844, + "learning_rate": 9.998705115058552e-06, + "loss": 21.8772, + "step": 53260 + }, + { + "epoch": 0.107608770306686, + "grad_norm": 863.323974609375, + "learning_rate": 9.998697159140528e-06, + "loss": 22.8647, + "step": 53270 + }, + { + "epoch": 0.10762897093936982, + "grad_norm": 672.56005859375, + "learning_rate": 9.998689178859461e-06, + "loss": 47.7858, + "step": 53280 + }, + { + "epoch": 0.10764917157205364, + "grad_norm": 302.8929138183594, + "learning_rate": 9.99868117421539e-06, + "loss": 12.609, + "step": 53290 + }, + { + "epoch": 0.10766937220473745, + "grad_norm": 1255.6273193359375, + "learning_rate": 9.998673145208351e-06, + "loss": 22.6348, + "step": 53300 + }, + { + "epoch": 0.10768957283742127, + "grad_norm": 475.1684875488281, + "learning_rate": 9.998665091838386e-06, + "loss": 33.2957, + "step": 53310 + }, + { + "epoch": 0.10770977347010509, + "grad_norm": 845.2681884765625, + "learning_rate": 9.998657014105535e-06, + "loss": 34.4305, + "step": 53320 + }, + { + "epoch": 0.1077299741027889, + "grad_norm": 612.3766479492188, + "learning_rate": 9.998648912009835e-06, + "loss": 29.1685, + "step": 53330 + }, + { + "epoch": 0.10775017473547271, + "grad_norm": 1290.07958984375, + "learning_rate": 9.998640785551327e-06, + "loss": 29.3782, + "step": 53340 + }, + { + "epoch": 0.10777037536815653, + "grad_norm": 537.4452514648438, + "learning_rate": 9.99863263473005e-06, + "loss": 23.3855, + "step": 53350 + }, + { + "epoch": 0.10779057600084034, + "grad_norm": 746.5062866210938, + "learning_rate": 9.998624459546043e-06, + "loss": 16.1965, + "step": 53360 + }, + { + "epoch": 0.10781077663352416, + "grad_norm": 312.06500244140625, + "learning_rate": 9.998616259999348e-06, + "loss": 26.0713, + "step": 53370 + }, + { + "epoch": 0.10783097726620798, + "grad_norm": 1012.008056640625, + "learning_rate": 9.998608036090003e-06, + "loss": 22.3234, + "step": 53380 + }, + { + "epoch": 0.10785117789889179, + "grad_norm": 272.9735412597656, + "learning_rate": 9.998599787818048e-06, + "loss": 31.0834, + "step": 53390 + }, + { + "epoch": 0.1078713785315756, + "grad_norm": 673.2590942382812, + "learning_rate": 9.998591515183524e-06, + "loss": 25.4174, + "step": 53400 + }, + { + "epoch": 0.10789157916425943, + "grad_norm": 318.61651611328125, + "learning_rate": 9.998583218186471e-06, + "loss": 17.2329, + "step": 53410 + }, + { + "epoch": 0.10791177979694325, + "grad_norm": 539.6445922851562, + "learning_rate": 9.998574896826931e-06, + "loss": 28.4101, + "step": 53420 + }, + { + "epoch": 0.10793198042962705, + "grad_norm": 844.5818481445312, + "learning_rate": 9.998566551104943e-06, + "loss": 21.043, + "step": 53430 + }, + { + "epoch": 0.10795218106231087, + "grad_norm": 463.2301940917969, + "learning_rate": 9.998558181020547e-06, + "loss": 37.8564, + "step": 53440 + }, + { + "epoch": 0.10797238169499469, + "grad_norm": 87.23060607910156, + "learning_rate": 9.998549786573785e-06, + "loss": 22.3368, + "step": 53450 + }, + { + "epoch": 0.1079925823276785, + "grad_norm": 630.809326171875, + "learning_rate": 9.998541367764699e-06, + "loss": 39.6557, + "step": 53460 + }, + { + "epoch": 0.10801278296036232, + "grad_norm": 790.4534912109375, + "learning_rate": 9.998532924593327e-06, + "loss": 29.6731, + "step": 53470 + }, + { + "epoch": 0.10803298359304614, + "grad_norm": 419.2471008300781, + "learning_rate": 9.99852445705971e-06, + "loss": 36.6505, + "step": 53480 + }, + { + "epoch": 0.10805318422572995, + "grad_norm": 75.427978515625, + "learning_rate": 9.998515965163894e-06, + "loss": 12.1146, + "step": 53490 + }, + { + "epoch": 0.10807338485841377, + "grad_norm": 83.66301727294922, + "learning_rate": 9.998507448905917e-06, + "loss": 39.2989, + "step": 53500 + }, + { + "epoch": 0.10809358549109759, + "grad_norm": 391.13385009765625, + "learning_rate": 9.99849890828582e-06, + "loss": 20.2477, + "step": 53510 + }, + { + "epoch": 0.10811378612378139, + "grad_norm": 490.6471252441406, + "learning_rate": 9.998490343303646e-06, + "loss": 43.6847, + "step": 53520 + }, + { + "epoch": 0.10813398675646521, + "grad_norm": 599.6272583007812, + "learning_rate": 9.998481753959436e-06, + "loss": 37.4203, + "step": 53530 + }, + { + "epoch": 0.10815418738914903, + "grad_norm": 155.78521728515625, + "learning_rate": 9.998473140253234e-06, + "loss": 27.2147, + "step": 53540 + }, + { + "epoch": 0.10817438802183284, + "grad_norm": 479.9679260253906, + "learning_rate": 9.998464502185076e-06, + "loss": 39.7745, + "step": 53550 + }, + { + "epoch": 0.10819458865451666, + "grad_norm": 572.0115356445312, + "learning_rate": 9.998455839755013e-06, + "loss": 26.7621, + "step": 53560 + }, + { + "epoch": 0.10821478928720048, + "grad_norm": 535.1430053710938, + "learning_rate": 9.99844715296308e-06, + "loss": 26.889, + "step": 53570 + }, + { + "epoch": 0.1082349899198843, + "grad_norm": 422.1188049316406, + "learning_rate": 9.998438441809322e-06, + "loss": 33.705, + "step": 53580 + }, + { + "epoch": 0.1082551905525681, + "grad_norm": 357.9285888671875, + "learning_rate": 9.998429706293781e-06, + "loss": 28.6368, + "step": 53590 + }, + { + "epoch": 0.10827539118525192, + "grad_norm": 109.74182891845703, + "learning_rate": 9.9984209464165e-06, + "loss": 20.9994, + "step": 53600 + }, + { + "epoch": 0.10829559181793574, + "grad_norm": 334.81854248046875, + "learning_rate": 9.998412162177523e-06, + "loss": 21.9419, + "step": 53610 + }, + { + "epoch": 0.10831579245061955, + "grad_norm": 549.0813598632812, + "learning_rate": 9.99840335357689e-06, + "loss": 28.1463, + "step": 53620 + }, + { + "epoch": 0.10833599308330337, + "grad_norm": 441.5652770996094, + "learning_rate": 9.998394520614645e-06, + "loss": 23.2583, + "step": 53630 + }, + { + "epoch": 0.10835619371598719, + "grad_norm": 401.61724853515625, + "learning_rate": 9.998385663290833e-06, + "loss": 24.7862, + "step": 53640 + }, + { + "epoch": 0.108376394348671, + "grad_norm": 549.4804077148438, + "learning_rate": 9.998376781605493e-06, + "loss": 28.995, + "step": 53650 + }, + { + "epoch": 0.10839659498135482, + "grad_norm": 1153.8587646484375, + "learning_rate": 9.998367875558673e-06, + "loss": 35.7791, + "step": 53660 + }, + { + "epoch": 0.10841679561403864, + "grad_norm": 320.6040344238281, + "learning_rate": 9.998358945150412e-06, + "loss": 13.7571, + "step": 53670 + }, + { + "epoch": 0.10843699624672244, + "grad_norm": 478.5079345703125, + "learning_rate": 9.998349990380757e-06, + "loss": 26.84, + "step": 53680 + }, + { + "epoch": 0.10845719687940626, + "grad_norm": 282.97344970703125, + "learning_rate": 9.998341011249752e-06, + "loss": 23.3799, + "step": 53690 + }, + { + "epoch": 0.10847739751209008, + "grad_norm": 136.7860565185547, + "learning_rate": 9.998332007757436e-06, + "loss": 23.2608, + "step": 53700 + }, + { + "epoch": 0.10849759814477389, + "grad_norm": 452.0782775878906, + "learning_rate": 9.998322979903859e-06, + "loss": 18.1503, + "step": 53710 + }, + { + "epoch": 0.10851779877745771, + "grad_norm": 145.5153350830078, + "learning_rate": 9.99831392768906e-06, + "loss": 33.076, + "step": 53720 + }, + { + "epoch": 0.10853799941014153, + "grad_norm": 559.6956787109375, + "learning_rate": 9.998304851113086e-06, + "loss": 19.8302, + "step": 53730 + }, + { + "epoch": 0.10855820004282535, + "grad_norm": 395.3185119628906, + "learning_rate": 9.99829575017598e-06, + "loss": 16.7486, + "step": 53740 + }, + { + "epoch": 0.10857840067550915, + "grad_norm": 1397.56103515625, + "learning_rate": 9.998286624877786e-06, + "loss": 31.4005, + "step": 53750 + }, + { + "epoch": 0.10859860130819297, + "grad_norm": 927.7950439453125, + "learning_rate": 9.998277475218552e-06, + "loss": 21.003, + "step": 53760 + }, + { + "epoch": 0.1086188019408768, + "grad_norm": 304.82440185546875, + "learning_rate": 9.998268301198317e-06, + "loss": 28.2366, + "step": 53770 + }, + { + "epoch": 0.1086390025735606, + "grad_norm": 485.1670837402344, + "learning_rate": 9.99825910281713e-06, + "loss": 20.6205, + "step": 53780 + }, + { + "epoch": 0.10865920320624442, + "grad_norm": 731.8318481445312, + "learning_rate": 9.998249880075033e-06, + "loss": 34.5541, + "step": 53790 + }, + { + "epoch": 0.10867940383892824, + "grad_norm": 604.5245361328125, + "learning_rate": 9.998240632972073e-06, + "loss": 55.5603, + "step": 53800 + }, + { + "epoch": 0.10869960447161205, + "grad_norm": 246.02989196777344, + "learning_rate": 9.998231361508295e-06, + "loss": 45.6582, + "step": 53810 + }, + { + "epoch": 0.10871980510429587, + "grad_norm": 198.7808380126953, + "learning_rate": 9.998222065683743e-06, + "loss": 33.1657, + "step": 53820 + }, + { + "epoch": 0.10874000573697969, + "grad_norm": 500.4750671386719, + "learning_rate": 9.998212745498464e-06, + "loss": 20.7875, + "step": 53830 + }, + { + "epoch": 0.10876020636966349, + "grad_norm": 19.5983943939209, + "learning_rate": 9.9982034009525e-06, + "loss": 23.3247, + "step": 53840 + }, + { + "epoch": 0.10878040700234731, + "grad_norm": 623.8035888671875, + "learning_rate": 9.9981940320459e-06, + "loss": 39.7996, + "step": 53850 + }, + { + "epoch": 0.10880060763503113, + "grad_norm": 161.05856323242188, + "learning_rate": 9.998184638778708e-06, + "loss": 15.9248, + "step": 53860 + }, + { + "epoch": 0.10882080826771494, + "grad_norm": 612.73681640625, + "learning_rate": 9.99817522115097e-06, + "loss": 26.1079, + "step": 53870 + }, + { + "epoch": 0.10884100890039876, + "grad_norm": 522.5286865234375, + "learning_rate": 9.998165779162734e-06, + "loss": 14.3749, + "step": 53880 + }, + { + "epoch": 0.10886120953308258, + "grad_norm": 1359.5078125, + "learning_rate": 9.998156312814043e-06, + "loss": 32.4587, + "step": 53890 + }, + { + "epoch": 0.1088814101657664, + "grad_norm": 127.57379913330078, + "learning_rate": 9.998146822104943e-06, + "loss": 26.3984, + "step": 53900 + }, + { + "epoch": 0.1089016107984502, + "grad_norm": 600.6903076171875, + "learning_rate": 9.998137307035486e-06, + "loss": 23.3385, + "step": 53910 + }, + { + "epoch": 0.10892181143113402, + "grad_norm": 444.3834533691406, + "learning_rate": 9.99812776760571e-06, + "loss": 16.6408, + "step": 53920 + }, + { + "epoch": 0.10894201206381784, + "grad_norm": 877.210693359375, + "learning_rate": 9.998118203815666e-06, + "loss": 31.8723, + "step": 53930 + }, + { + "epoch": 0.10896221269650165, + "grad_norm": 181.5795135498047, + "learning_rate": 9.9981086156654e-06, + "loss": 24.42, + "step": 53940 + }, + { + "epoch": 0.10898241332918547, + "grad_norm": 507.66552734375, + "learning_rate": 9.99809900315496e-06, + "loss": 19.5684, + "step": 53950 + }, + { + "epoch": 0.10900261396186929, + "grad_norm": 19.269437789916992, + "learning_rate": 9.998089366284392e-06, + "loss": 37.1751, + "step": 53960 + }, + { + "epoch": 0.1090228145945531, + "grad_norm": 168.76394653320312, + "learning_rate": 9.99807970505374e-06, + "loss": 42.0356, + "step": 53970 + }, + { + "epoch": 0.10904301522723692, + "grad_norm": 190.10775756835938, + "learning_rate": 9.998070019463055e-06, + "loss": 18.8945, + "step": 53980 + }, + { + "epoch": 0.10906321585992074, + "grad_norm": 312.4748840332031, + "learning_rate": 9.998060309512384e-06, + "loss": 14.506, + "step": 53990 + }, + { + "epoch": 0.10908341649260454, + "grad_norm": 254.49879455566406, + "learning_rate": 9.998050575201772e-06, + "loss": 22.7319, + "step": 54000 + }, + { + "epoch": 0.10910361712528836, + "grad_norm": 532.451171875, + "learning_rate": 9.99804081653127e-06, + "loss": 26.7482, + "step": 54010 + }, + { + "epoch": 0.10912381775797218, + "grad_norm": 229.19528198242188, + "learning_rate": 9.99803103350092e-06, + "loss": 35.1213, + "step": 54020 + }, + { + "epoch": 0.10914401839065599, + "grad_norm": 440.79376220703125, + "learning_rate": 9.998021226110775e-06, + "loss": 26.4451, + "step": 54030 + }, + { + "epoch": 0.10916421902333981, + "grad_norm": 519.68359375, + "learning_rate": 9.99801139436088e-06, + "loss": 30.7062, + "step": 54040 + }, + { + "epoch": 0.10918441965602363, + "grad_norm": 55.21887969970703, + "learning_rate": 9.998001538251283e-06, + "loss": 42.4805, + "step": 54050 + }, + { + "epoch": 0.10920462028870745, + "grad_norm": 317.5483093261719, + "learning_rate": 9.997991657782033e-06, + "loss": 31.2201, + "step": 54060 + }, + { + "epoch": 0.10922482092139126, + "grad_norm": 345.548828125, + "learning_rate": 9.997981752953179e-06, + "loss": 25.0568, + "step": 54070 + }, + { + "epoch": 0.10924502155407508, + "grad_norm": 359.0416259765625, + "learning_rate": 9.997971823764766e-06, + "loss": 21.3692, + "step": 54080 + }, + { + "epoch": 0.1092652221867589, + "grad_norm": 595.4906005859375, + "learning_rate": 9.997961870216849e-06, + "loss": 35.6632, + "step": 54090 + }, + { + "epoch": 0.1092854228194427, + "grad_norm": 575.0725708007812, + "learning_rate": 9.997951892309468e-06, + "loss": 28.2221, + "step": 54100 + }, + { + "epoch": 0.10930562345212652, + "grad_norm": 317.45599365234375, + "learning_rate": 9.997941890042677e-06, + "loss": 22.9554, + "step": 54110 + }, + { + "epoch": 0.10932582408481034, + "grad_norm": 75.43408966064453, + "learning_rate": 9.997931863416522e-06, + "loss": 24.0662, + "step": 54120 + }, + { + "epoch": 0.10934602471749415, + "grad_norm": 599.4276733398438, + "learning_rate": 9.997921812431055e-06, + "loss": 31.3656, + "step": 54130 + }, + { + "epoch": 0.10936622535017797, + "grad_norm": 584.6956176757812, + "learning_rate": 9.997911737086322e-06, + "loss": 25.7612, + "step": 54140 + }, + { + "epoch": 0.10938642598286179, + "grad_norm": 881.17529296875, + "learning_rate": 9.997901637382375e-06, + "loss": 35.9875, + "step": 54150 + }, + { + "epoch": 0.1094066266155456, + "grad_norm": 802.3683471679688, + "learning_rate": 9.99789151331926e-06, + "loss": 23.1203, + "step": 54160 + }, + { + "epoch": 0.10942682724822941, + "grad_norm": 1032.8818359375, + "learning_rate": 9.997881364897028e-06, + "loss": 31.3608, + "step": 54170 + }, + { + "epoch": 0.10944702788091323, + "grad_norm": 646.2077026367188, + "learning_rate": 9.99787119211573e-06, + "loss": 13.592, + "step": 54180 + }, + { + "epoch": 0.10946722851359704, + "grad_norm": 403.6358947753906, + "learning_rate": 9.997860994975412e-06, + "loss": 49.6401, + "step": 54190 + }, + { + "epoch": 0.10948742914628086, + "grad_norm": 436.64154052734375, + "learning_rate": 9.997850773476126e-06, + "loss": 56.8283, + "step": 54200 + }, + { + "epoch": 0.10950762977896468, + "grad_norm": 477.2749328613281, + "learning_rate": 9.997840527617921e-06, + "loss": 18.9835, + "step": 54210 + }, + { + "epoch": 0.1095278304116485, + "grad_norm": 686.4449462890625, + "learning_rate": 9.99783025740085e-06, + "loss": 17.5475, + "step": 54220 + }, + { + "epoch": 0.1095480310443323, + "grad_norm": 216.48248291015625, + "learning_rate": 9.997819962824958e-06, + "loss": 8.0905, + "step": 54230 + }, + { + "epoch": 0.10956823167701613, + "grad_norm": 912.3517456054688, + "learning_rate": 9.9978096438903e-06, + "loss": 23.9246, + "step": 54240 + }, + { + "epoch": 0.10958843230969995, + "grad_norm": 164.28170776367188, + "learning_rate": 9.997799300596921e-06, + "loss": 23.017, + "step": 54250 + }, + { + "epoch": 0.10960863294238375, + "grad_norm": 504.7520751953125, + "learning_rate": 9.997788932944877e-06, + "loss": 23.6303, + "step": 54260 + }, + { + "epoch": 0.10962883357506757, + "grad_norm": 206.88621520996094, + "learning_rate": 9.997778540934213e-06, + "loss": 14.847, + "step": 54270 + }, + { + "epoch": 0.10964903420775139, + "grad_norm": 454.28759765625, + "learning_rate": 9.997768124564986e-06, + "loss": 38.5703, + "step": 54280 + }, + { + "epoch": 0.1096692348404352, + "grad_norm": 222.45407104492188, + "learning_rate": 9.997757683837242e-06, + "loss": 19.3966, + "step": 54290 + }, + { + "epoch": 0.10968943547311902, + "grad_norm": 229.19483947753906, + "learning_rate": 9.997747218751032e-06, + "loss": 28.7323, + "step": 54300 + }, + { + "epoch": 0.10970963610580284, + "grad_norm": 463.9140930175781, + "learning_rate": 9.997736729306409e-06, + "loss": 19.2052, + "step": 54310 + }, + { + "epoch": 0.10972983673848664, + "grad_norm": 145.03977966308594, + "learning_rate": 9.997726215503422e-06, + "loss": 34.755, + "step": 54320 + }, + { + "epoch": 0.10975003737117046, + "grad_norm": 101.87061309814453, + "learning_rate": 9.997715677342126e-06, + "loss": 41.6257, + "step": 54330 + }, + { + "epoch": 0.10977023800385428, + "grad_norm": 725.406494140625, + "learning_rate": 9.99770511482257e-06, + "loss": 33.6832, + "step": 54340 + }, + { + "epoch": 0.10979043863653809, + "grad_norm": 580.8824462890625, + "learning_rate": 9.997694527944804e-06, + "loss": 29.1013, + "step": 54350 + }, + { + "epoch": 0.10981063926922191, + "grad_norm": 512.6138916015625, + "learning_rate": 9.99768391670888e-06, + "loss": 12.6474, + "step": 54360 + }, + { + "epoch": 0.10983083990190573, + "grad_norm": 195.0771942138672, + "learning_rate": 9.997673281114852e-06, + "loss": 18.8358, + "step": 54370 + }, + { + "epoch": 0.10985104053458955, + "grad_norm": 817.9515380859375, + "learning_rate": 9.99766262116277e-06, + "loss": 30.0784, + "step": 54380 + }, + { + "epoch": 0.10987124116727336, + "grad_norm": 674.0523681640625, + "learning_rate": 9.997651936852689e-06, + "loss": 18.0687, + "step": 54390 + }, + { + "epoch": 0.10989144179995718, + "grad_norm": 369.1578674316406, + "learning_rate": 9.997641228184656e-06, + "loss": 19.4317, + "step": 54400 + }, + { + "epoch": 0.109911642432641, + "grad_norm": 512.1553344726562, + "learning_rate": 9.997630495158728e-06, + "loss": 29.9607, + "step": 54410 + }, + { + "epoch": 0.1099318430653248, + "grad_norm": 334.9314270019531, + "learning_rate": 9.997619737774953e-06, + "loss": 22.4586, + "step": 54420 + }, + { + "epoch": 0.10995204369800862, + "grad_norm": 0.5988953113555908, + "learning_rate": 9.997608956033386e-06, + "loss": 15.379, + "step": 54430 + }, + { + "epoch": 0.10997224433069244, + "grad_norm": 1061.28662109375, + "learning_rate": 9.99759814993408e-06, + "loss": 49.6255, + "step": 54440 + }, + { + "epoch": 0.10999244496337625, + "grad_norm": 1052.1668701171875, + "learning_rate": 9.997587319477084e-06, + "loss": 22.5256, + "step": 54450 + }, + { + "epoch": 0.11001264559606007, + "grad_norm": 403.6893005371094, + "learning_rate": 9.997576464662458e-06, + "loss": 17.6924, + "step": 54460 + }, + { + "epoch": 0.11003284622874389, + "grad_norm": 272.0291442871094, + "learning_rate": 9.997565585490247e-06, + "loss": 27.3708, + "step": 54470 + }, + { + "epoch": 0.1100530468614277, + "grad_norm": 632.6046142578125, + "learning_rate": 9.997554681960508e-06, + "loss": 22.4342, + "step": 54480 + }, + { + "epoch": 0.11007324749411151, + "grad_norm": 52.98223114013672, + "learning_rate": 9.997543754073295e-06, + "loss": 13.4893, + "step": 54490 + }, + { + "epoch": 0.11009344812679533, + "grad_norm": 391.6419677734375, + "learning_rate": 9.997532801828659e-06, + "loss": 18.883, + "step": 54500 + }, + { + "epoch": 0.11011364875947914, + "grad_norm": 259.5841369628906, + "learning_rate": 9.997521825226654e-06, + "loss": 37.2853, + "step": 54510 + }, + { + "epoch": 0.11013384939216296, + "grad_norm": 824.7269287109375, + "learning_rate": 9.997510824267334e-06, + "loss": 23.0775, + "step": 54520 + }, + { + "epoch": 0.11015405002484678, + "grad_norm": 314.40289306640625, + "learning_rate": 9.997499798950752e-06, + "loss": 27.897, + "step": 54530 + }, + { + "epoch": 0.1101742506575306, + "grad_norm": 372.8232116699219, + "learning_rate": 9.997488749276962e-06, + "loss": 28.0352, + "step": 54540 + }, + { + "epoch": 0.1101944512902144, + "grad_norm": 238.98486328125, + "learning_rate": 9.997477675246018e-06, + "loss": 15.9825, + "step": 54550 + }, + { + "epoch": 0.11021465192289823, + "grad_norm": 268.8155822753906, + "learning_rate": 9.997466576857974e-06, + "loss": 28.761, + "step": 54560 + }, + { + "epoch": 0.11023485255558205, + "grad_norm": 372.1516418457031, + "learning_rate": 9.997455454112885e-06, + "loss": 20.2901, + "step": 54570 + }, + { + "epoch": 0.11025505318826585, + "grad_norm": 884.8719482421875, + "learning_rate": 9.997444307010804e-06, + "loss": 30.1917, + "step": 54580 + }, + { + "epoch": 0.11027525382094967, + "grad_norm": 562.89404296875, + "learning_rate": 9.997433135551786e-06, + "loss": 45.1124, + "step": 54590 + }, + { + "epoch": 0.11029545445363349, + "grad_norm": 471.60125732421875, + "learning_rate": 9.997421939735885e-06, + "loss": 16.8417, + "step": 54600 + }, + { + "epoch": 0.1103156550863173, + "grad_norm": 163.50860595703125, + "learning_rate": 9.997410719563155e-06, + "loss": 16.828, + "step": 54610 + }, + { + "epoch": 0.11033585571900112, + "grad_norm": 351.6030578613281, + "learning_rate": 9.997399475033648e-06, + "loss": 18.1173, + "step": 54620 + }, + { + "epoch": 0.11035605635168494, + "grad_norm": 441.3740539550781, + "learning_rate": 9.997388206147427e-06, + "loss": 53.553, + "step": 54630 + }, + { + "epoch": 0.11037625698436875, + "grad_norm": 431.582275390625, + "learning_rate": 9.99737691290454e-06, + "loss": 21.1727, + "step": 54640 + }, + { + "epoch": 0.11039645761705257, + "grad_norm": 929.2691650390625, + "learning_rate": 9.997365595305045e-06, + "loss": 32.7812, + "step": 54650 + }, + { + "epoch": 0.11041665824973639, + "grad_norm": 606.33935546875, + "learning_rate": 9.997354253348994e-06, + "loss": 20.2651, + "step": 54660 + }, + { + "epoch": 0.11043685888242019, + "grad_norm": 936.4393920898438, + "learning_rate": 9.997342887036446e-06, + "loss": 29.7989, + "step": 54670 + }, + { + "epoch": 0.11045705951510401, + "grad_norm": 1194.6771240234375, + "learning_rate": 9.997331496367455e-06, + "loss": 42.4737, + "step": 54680 + }, + { + "epoch": 0.11047726014778783, + "grad_norm": 923.7783813476562, + "learning_rate": 9.997320081342076e-06, + "loss": 34.2305, + "step": 54690 + }, + { + "epoch": 0.11049746078047164, + "grad_norm": 231.43601989746094, + "learning_rate": 9.997308641960365e-06, + "loss": 43.9614, + "step": 54700 + }, + { + "epoch": 0.11051766141315546, + "grad_norm": 400.3169860839844, + "learning_rate": 9.997297178222378e-06, + "loss": 46.262, + "step": 54710 + }, + { + "epoch": 0.11053786204583928, + "grad_norm": 156.56224060058594, + "learning_rate": 9.997285690128172e-06, + "loss": 22.7581, + "step": 54720 + }, + { + "epoch": 0.1105580626785231, + "grad_norm": 332.6772766113281, + "learning_rate": 9.997274177677799e-06, + "loss": 22.8242, + "step": 54730 + }, + { + "epoch": 0.1105782633112069, + "grad_norm": 432.33856201171875, + "learning_rate": 9.997262640871319e-06, + "loss": 45.6695, + "step": 54740 + }, + { + "epoch": 0.11059846394389072, + "grad_norm": 222.8570556640625, + "learning_rate": 9.997251079708788e-06, + "loss": 14.8523, + "step": 54750 + }, + { + "epoch": 0.11061866457657454, + "grad_norm": 334.9029846191406, + "learning_rate": 9.997239494190258e-06, + "loss": 30.4605, + "step": 54760 + }, + { + "epoch": 0.11063886520925835, + "grad_norm": 446.7870178222656, + "learning_rate": 9.997227884315792e-06, + "loss": 29.3506, + "step": 54770 + }, + { + "epoch": 0.11065906584194217, + "grad_norm": 456.64947509765625, + "learning_rate": 9.997216250085441e-06, + "loss": 23.3379, + "step": 54780 + }, + { + "epoch": 0.11067926647462599, + "grad_norm": 900.9299926757812, + "learning_rate": 9.997204591499266e-06, + "loss": 20.6617, + "step": 54790 + }, + { + "epoch": 0.1106994671073098, + "grad_norm": 494.7242431640625, + "learning_rate": 9.997192908557322e-06, + "loss": 34.7412, + "step": 54800 + }, + { + "epoch": 0.11071966773999362, + "grad_norm": 474.8592529296875, + "learning_rate": 9.997181201259664e-06, + "loss": 28.8738, + "step": 54810 + }, + { + "epoch": 0.11073986837267744, + "grad_norm": 1227.4732666015625, + "learning_rate": 9.997169469606353e-06, + "loss": 31.113, + "step": 54820 + }, + { + "epoch": 0.11076006900536124, + "grad_norm": 375.9781799316406, + "learning_rate": 9.997157713597444e-06, + "loss": 19.6979, + "step": 54830 + }, + { + "epoch": 0.11078026963804506, + "grad_norm": 57.54928207397461, + "learning_rate": 9.997145933232994e-06, + "loss": 51.4527, + "step": 54840 + }, + { + "epoch": 0.11080047027072888, + "grad_norm": 1278.0533447265625, + "learning_rate": 9.99713412851306e-06, + "loss": 25.5548, + "step": 54850 + }, + { + "epoch": 0.11082067090341269, + "grad_norm": 505.0701599121094, + "learning_rate": 9.9971222994377e-06, + "loss": 27.7517, + "step": 54860 + }, + { + "epoch": 0.11084087153609651, + "grad_norm": 332.50238037109375, + "learning_rate": 9.997110446006974e-06, + "loss": 19.6809, + "step": 54870 + }, + { + "epoch": 0.11086107216878033, + "grad_norm": 26.537817001342773, + "learning_rate": 9.997098568220937e-06, + "loss": 21.3314, + "step": 54880 + }, + { + "epoch": 0.11088127280146415, + "grad_norm": 660.31982421875, + "learning_rate": 9.997086666079647e-06, + "loss": 39.6938, + "step": 54890 + }, + { + "epoch": 0.11090147343414795, + "grad_norm": 132.39810180664062, + "learning_rate": 9.997074739583162e-06, + "loss": 26.4871, + "step": 54900 + }, + { + "epoch": 0.11092167406683177, + "grad_norm": 183.94454956054688, + "learning_rate": 9.997062788731541e-06, + "loss": 41.0132, + "step": 54910 + }, + { + "epoch": 0.1109418746995156, + "grad_norm": 370.9226989746094, + "learning_rate": 9.997050813524843e-06, + "loss": 17.3327, + "step": 54920 + }, + { + "epoch": 0.1109620753321994, + "grad_norm": 385.3973083496094, + "learning_rate": 9.997038813963126e-06, + "loss": 33.9201, + "step": 54930 + }, + { + "epoch": 0.11098227596488322, + "grad_norm": 227.87315368652344, + "learning_rate": 9.997026790046446e-06, + "loss": 25.1385, + "step": 54940 + }, + { + "epoch": 0.11100247659756704, + "grad_norm": 280.9164123535156, + "learning_rate": 9.997014741774866e-06, + "loss": 32.4668, + "step": 54950 + }, + { + "epoch": 0.11102267723025085, + "grad_norm": 86.0960693359375, + "learning_rate": 9.99700266914844e-06, + "loss": 24.8734, + "step": 54960 + }, + { + "epoch": 0.11104287786293467, + "grad_norm": 495.7022399902344, + "learning_rate": 9.996990572167229e-06, + "loss": 23.1751, + "step": 54970 + }, + { + "epoch": 0.11106307849561849, + "grad_norm": 260.6387939453125, + "learning_rate": 9.996978450831293e-06, + "loss": 20.1213, + "step": 54980 + }, + { + "epoch": 0.11108327912830229, + "grad_norm": 1072.140380859375, + "learning_rate": 9.99696630514069e-06, + "loss": 45.5128, + "step": 54990 + }, + { + "epoch": 0.11110347976098611, + "grad_norm": 288.9633483886719, + "learning_rate": 9.99695413509548e-06, + "loss": 12.2324, + "step": 55000 + }, + { + "epoch": 0.11112368039366993, + "grad_norm": 443.3056945800781, + "learning_rate": 9.99694194069572e-06, + "loss": 15.8432, + "step": 55010 + }, + { + "epoch": 0.11114388102635374, + "grad_norm": 394.9388427734375, + "learning_rate": 9.996929721941472e-06, + "loss": 17.3093, + "step": 55020 + }, + { + "epoch": 0.11116408165903756, + "grad_norm": 652.3133544921875, + "learning_rate": 9.996917478832794e-06, + "loss": 27.1608, + "step": 55030 + }, + { + "epoch": 0.11118428229172138, + "grad_norm": 6.787502288818359, + "learning_rate": 9.996905211369748e-06, + "loss": 23.7932, + "step": 55040 + }, + { + "epoch": 0.1112044829244052, + "grad_norm": 278.0809020996094, + "learning_rate": 9.99689291955239e-06, + "loss": 18.0862, + "step": 55050 + }, + { + "epoch": 0.111224683557089, + "grad_norm": 312.7170104980469, + "learning_rate": 9.996880603380784e-06, + "loss": 29.128, + "step": 55060 + }, + { + "epoch": 0.11124488418977282, + "grad_norm": 130.94741821289062, + "learning_rate": 9.996868262854986e-06, + "loss": 19.5875, + "step": 55070 + }, + { + "epoch": 0.11126508482245664, + "grad_norm": 140.8140106201172, + "learning_rate": 9.996855897975058e-06, + "loss": 34.391, + "step": 55080 + }, + { + "epoch": 0.11128528545514045, + "grad_norm": 270.0653076171875, + "learning_rate": 9.996843508741061e-06, + "loss": 36.159, + "step": 55090 + }, + { + "epoch": 0.11130548608782427, + "grad_norm": 353.35723876953125, + "learning_rate": 9.996831095153054e-06, + "loss": 25.6871, + "step": 55100 + }, + { + "epoch": 0.11132568672050809, + "grad_norm": 827.9155883789062, + "learning_rate": 9.9968186572111e-06, + "loss": 17.5517, + "step": 55110 + }, + { + "epoch": 0.1113458873531919, + "grad_norm": 444.61322021484375, + "learning_rate": 9.996806194915258e-06, + "loss": 17.5781, + "step": 55120 + }, + { + "epoch": 0.11136608798587572, + "grad_norm": 321.96417236328125, + "learning_rate": 9.996793708265586e-06, + "loss": 21.4721, + "step": 55130 + }, + { + "epoch": 0.11138628861855954, + "grad_norm": 632.9940185546875, + "learning_rate": 9.99678119726215e-06, + "loss": 43.9916, + "step": 55140 + }, + { + "epoch": 0.11140648925124334, + "grad_norm": 361.72259521484375, + "learning_rate": 9.996768661905008e-06, + "loss": 26.2405, + "step": 55150 + }, + { + "epoch": 0.11142668988392716, + "grad_norm": 176.63058471679688, + "learning_rate": 9.996756102194222e-06, + "loss": 25.3955, + "step": 55160 + }, + { + "epoch": 0.11144689051661098, + "grad_norm": 364.80621337890625, + "learning_rate": 9.996743518129852e-06, + "loss": 27.8307, + "step": 55170 + }, + { + "epoch": 0.11146709114929479, + "grad_norm": 623.612060546875, + "learning_rate": 9.99673090971196e-06, + "loss": 21.1618, + "step": 55180 + }, + { + "epoch": 0.11148729178197861, + "grad_norm": 511.4942321777344, + "learning_rate": 9.996718276940608e-06, + "loss": 51.635, + "step": 55190 + }, + { + "epoch": 0.11150749241466243, + "grad_norm": 248.29827880859375, + "learning_rate": 9.996705619815857e-06, + "loss": 33.6349, + "step": 55200 + }, + { + "epoch": 0.11152769304734625, + "grad_norm": 206.81057739257812, + "learning_rate": 9.996692938337768e-06, + "loss": 30.27, + "step": 55210 + }, + { + "epoch": 0.11154789368003006, + "grad_norm": 1038.8233642578125, + "learning_rate": 9.996680232506404e-06, + "loss": 27.2952, + "step": 55220 + }, + { + "epoch": 0.11156809431271388, + "grad_norm": 291.82684326171875, + "learning_rate": 9.996667502321829e-06, + "loss": 33.1123, + "step": 55230 + }, + { + "epoch": 0.1115882949453977, + "grad_norm": 355.9356384277344, + "learning_rate": 9.9966547477841e-06, + "loss": 40.046, + "step": 55240 + }, + { + "epoch": 0.1116084955780815, + "grad_norm": 349.36199951171875, + "learning_rate": 9.996641968893281e-06, + "loss": 31.7898, + "step": 55250 + }, + { + "epoch": 0.11162869621076532, + "grad_norm": 279.517578125, + "learning_rate": 9.996629165649437e-06, + "loss": 19.8619, + "step": 55260 + }, + { + "epoch": 0.11164889684344914, + "grad_norm": 702.2809448242188, + "learning_rate": 9.996616338052629e-06, + "loss": 19.4479, + "step": 55270 + }, + { + "epoch": 0.11166909747613295, + "grad_norm": 339.9565124511719, + "learning_rate": 9.996603486102918e-06, + "loss": 17.744, + "step": 55280 + }, + { + "epoch": 0.11168929810881677, + "grad_norm": 548.6559448242188, + "learning_rate": 9.996590609800367e-06, + "loss": 36.1667, + "step": 55290 + }, + { + "epoch": 0.11170949874150059, + "grad_norm": 188.1508331298828, + "learning_rate": 9.99657770914504e-06, + "loss": 25.0498, + "step": 55300 + }, + { + "epoch": 0.1117296993741844, + "grad_norm": 133.46206665039062, + "learning_rate": 9.996564784137e-06, + "loss": 21.7022, + "step": 55310 + }, + { + "epoch": 0.11174990000686821, + "grad_norm": 527.6439208984375, + "learning_rate": 9.996551834776309e-06, + "loss": 29.3728, + "step": 55320 + }, + { + "epoch": 0.11177010063955203, + "grad_norm": 766.2464599609375, + "learning_rate": 9.996538861063029e-06, + "loss": 22.1587, + "step": 55330 + }, + { + "epoch": 0.11179030127223584, + "grad_norm": 281.0527038574219, + "learning_rate": 9.996525862997225e-06, + "loss": 14.2926, + "step": 55340 + }, + { + "epoch": 0.11181050190491966, + "grad_norm": 428.09002685546875, + "learning_rate": 9.99651284057896e-06, + "loss": 23.0039, + "step": 55350 + }, + { + "epoch": 0.11183070253760348, + "grad_norm": 353.99432373046875, + "learning_rate": 9.9964997938083e-06, + "loss": 23.1277, + "step": 55360 + }, + { + "epoch": 0.1118509031702873, + "grad_norm": 177.79042053222656, + "learning_rate": 9.996486722685302e-06, + "loss": 27.636, + "step": 55370 + }, + { + "epoch": 0.1118711038029711, + "grad_norm": 402.3406982421875, + "learning_rate": 9.996473627210035e-06, + "loss": 17.3252, + "step": 55380 + }, + { + "epoch": 0.11189130443565493, + "grad_norm": 906.8685302734375, + "learning_rate": 9.996460507382563e-06, + "loss": 21.174, + "step": 55390 + }, + { + "epoch": 0.11191150506833875, + "grad_norm": 865.0765991210938, + "learning_rate": 9.996447363202947e-06, + "loss": 30.8987, + "step": 55400 + }, + { + "epoch": 0.11193170570102255, + "grad_norm": 144.87705993652344, + "learning_rate": 9.996434194671254e-06, + "loss": 21.6505, + "step": 55410 + }, + { + "epoch": 0.11195190633370637, + "grad_norm": 489.4412536621094, + "learning_rate": 9.996421001787545e-06, + "loss": 23.8128, + "step": 55420 + }, + { + "epoch": 0.11197210696639019, + "grad_norm": 224.87892150878906, + "learning_rate": 9.996407784551888e-06, + "loss": 20.8717, + "step": 55430 + }, + { + "epoch": 0.111992307599074, + "grad_norm": 768.6514282226562, + "learning_rate": 9.996394542964343e-06, + "loss": 15.8672, + "step": 55440 + }, + { + "epoch": 0.11201250823175782, + "grad_norm": 360.9457702636719, + "learning_rate": 9.996381277024978e-06, + "loss": 24.7542, + "step": 55450 + }, + { + "epoch": 0.11203270886444164, + "grad_norm": 1402.239990234375, + "learning_rate": 9.996367986733857e-06, + "loss": 45.6634, + "step": 55460 + }, + { + "epoch": 0.11205290949712544, + "grad_norm": 317.9265441894531, + "learning_rate": 9.996354672091044e-06, + "loss": 29.3877, + "step": 55470 + }, + { + "epoch": 0.11207311012980926, + "grad_norm": 490.77606201171875, + "learning_rate": 9.996341333096606e-06, + "loss": 21.8087, + "step": 55480 + }, + { + "epoch": 0.11209331076249308, + "grad_norm": 421.822509765625, + "learning_rate": 9.996327969750605e-06, + "loss": 34.6557, + "step": 55490 + }, + { + "epoch": 0.11211351139517689, + "grad_norm": 159.26327514648438, + "learning_rate": 9.996314582053106e-06, + "loss": 22.0908, + "step": 55500 + }, + { + "epoch": 0.11213371202786071, + "grad_norm": 531.8242797851562, + "learning_rate": 9.996301170004179e-06, + "loss": 26.5158, + "step": 55510 + }, + { + "epoch": 0.11215391266054453, + "grad_norm": 328.302490234375, + "learning_rate": 9.996287733603883e-06, + "loss": 15.4036, + "step": 55520 + }, + { + "epoch": 0.11217411329322835, + "grad_norm": 70.45991516113281, + "learning_rate": 9.996274272852289e-06, + "loss": 21.8658, + "step": 55530 + }, + { + "epoch": 0.11219431392591216, + "grad_norm": 393.1010437011719, + "learning_rate": 9.996260787749457e-06, + "loss": 15.8029, + "step": 55540 + }, + { + "epoch": 0.11221451455859598, + "grad_norm": 866.2672729492188, + "learning_rate": 9.99624727829546e-06, + "loss": 22.1823, + "step": 55550 + }, + { + "epoch": 0.1122347151912798, + "grad_norm": 341.88909912109375, + "learning_rate": 9.996233744490356e-06, + "loss": 18.3481, + "step": 55560 + }, + { + "epoch": 0.1122549158239636, + "grad_norm": 340.0164489746094, + "learning_rate": 9.996220186334217e-06, + "loss": 16.4936, + "step": 55570 + }, + { + "epoch": 0.11227511645664742, + "grad_norm": 381.4293518066406, + "learning_rate": 9.996206603827105e-06, + "loss": 21.0376, + "step": 55580 + }, + { + "epoch": 0.11229531708933124, + "grad_norm": 1209.41845703125, + "learning_rate": 9.99619299696909e-06, + "loss": 46.2751, + "step": 55590 + }, + { + "epoch": 0.11231551772201505, + "grad_norm": 558.7429809570312, + "learning_rate": 9.996179365760235e-06, + "loss": 32.1158, + "step": 55600 + }, + { + "epoch": 0.11233571835469887, + "grad_norm": 306.48089599609375, + "learning_rate": 9.996165710200607e-06, + "loss": 34.9552, + "step": 55610 + }, + { + "epoch": 0.11235591898738269, + "grad_norm": 435.9257507324219, + "learning_rate": 9.996152030290276e-06, + "loss": 25.7519, + "step": 55620 + }, + { + "epoch": 0.1123761196200665, + "grad_norm": 270.9410095214844, + "learning_rate": 9.996138326029303e-06, + "loss": 37.5972, + "step": 55630 + }, + { + "epoch": 0.11239632025275031, + "grad_norm": 586.7861938476562, + "learning_rate": 9.99612459741776e-06, + "loss": 11.5343, + "step": 55640 + }, + { + "epoch": 0.11241652088543413, + "grad_norm": 965.9428100585938, + "learning_rate": 9.99611084445571e-06, + "loss": 34.3882, + "step": 55650 + }, + { + "epoch": 0.11243672151811794, + "grad_norm": 423.5049133300781, + "learning_rate": 9.996097067143223e-06, + "loss": 19.8324, + "step": 55660 + }, + { + "epoch": 0.11245692215080176, + "grad_norm": 628.5164794921875, + "learning_rate": 9.996083265480366e-06, + "loss": 15.5108, + "step": 55670 + }, + { + "epoch": 0.11247712278348558, + "grad_norm": 750.0355224609375, + "learning_rate": 9.996069439467203e-06, + "loss": 14.4539, + "step": 55680 + }, + { + "epoch": 0.1124973234161694, + "grad_norm": 312.1278991699219, + "learning_rate": 9.996055589103804e-06, + "loss": 41.3886, + "step": 55690 + }, + { + "epoch": 0.1125175240488532, + "grad_norm": 176.88836669921875, + "learning_rate": 9.996041714390235e-06, + "loss": 24.272, + "step": 55700 + }, + { + "epoch": 0.11253772468153703, + "grad_norm": 282.3443908691406, + "learning_rate": 9.996027815326565e-06, + "loss": 29.1884, + "step": 55710 + }, + { + "epoch": 0.11255792531422085, + "grad_norm": 459.432861328125, + "learning_rate": 9.996013891912862e-06, + "loss": 30.6273, + "step": 55720 + }, + { + "epoch": 0.11257812594690465, + "grad_norm": 69.33020782470703, + "learning_rate": 9.995999944149192e-06, + "loss": 24.3404, + "step": 55730 + }, + { + "epoch": 0.11259832657958847, + "grad_norm": 468.4703063964844, + "learning_rate": 9.995985972035626e-06, + "loss": 22.6565, + "step": 55740 + }, + { + "epoch": 0.11261852721227229, + "grad_norm": 227.947265625, + "learning_rate": 9.995971975572231e-06, + "loss": 19.853, + "step": 55750 + }, + { + "epoch": 0.1126387278449561, + "grad_norm": 875.2319946289062, + "learning_rate": 9.995957954759073e-06, + "loss": 30.5711, + "step": 55760 + }, + { + "epoch": 0.11265892847763992, + "grad_norm": 507.3000793457031, + "learning_rate": 9.995943909596222e-06, + "loss": 27.3444, + "step": 55770 + }, + { + "epoch": 0.11267912911032374, + "grad_norm": 289.5107421875, + "learning_rate": 9.995929840083746e-06, + "loss": 15.7379, + "step": 55780 + }, + { + "epoch": 0.11269932974300755, + "grad_norm": 469.662353515625, + "learning_rate": 9.995915746221715e-06, + "loss": 40.0659, + "step": 55790 + }, + { + "epoch": 0.11271953037569137, + "grad_norm": 589.2631225585938, + "learning_rate": 9.995901628010196e-06, + "loss": 20.8513, + "step": 55800 + }, + { + "epoch": 0.11273973100837519, + "grad_norm": 828.27001953125, + "learning_rate": 9.995887485449257e-06, + "loss": 25.4181, + "step": 55810 + }, + { + "epoch": 0.11275993164105899, + "grad_norm": 356.7001647949219, + "learning_rate": 9.99587331853897e-06, + "loss": 12.0291, + "step": 55820 + }, + { + "epoch": 0.11278013227374281, + "grad_norm": 361.2005615234375, + "learning_rate": 9.995859127279402e-06, + "loss": 28.8626, + "step": 55830 + }, + { + "epoch": 0.11280033290642663, + "grad_norm": 738.162353515625, + "learning_rate": 9.995844911670623e-06, + "loss": 30.355, + "step": 55840 + }, + { + "epoch": 0.11282053353911045, + "grad_norm": 478.655029296875, + "learning_rate": 9.995830671712701e-06, + "loss": 31.6947, + "step": 55850 + }, + { + "epoch": 0.11284073417179426, + "grad_norm": 278.16937255859375, + "learning_rate": 9.995816407405708e-06, + "loss": 17.1322, + "step": 55860 + }, + { + "epoch": 0.11286093480447808, + "grad_norm": 565.4407958984375, + "learning_rate": 9.995802118749708e-06, + "loss": 13.4129, + "step": 55870 + }, + { + "epoch": 0.1128811354371619, + "grad_norm": 794.0328369140625, + "learning_rate": 9.995787805744778e-06, + "loss": 36.1648, + "step": 55880 + }, + { + "epoch": 0.1129013360698457, + "grad_norm": 404.40130615234375, + "learning_rate": 9.995773468390983e-06, + "loss": 51.3268, + "step": 55890 + }, + { + "epoch": 0.11292153670252952, + "grad_norm": 479.6914978027344, + "learning_rate": 9.995759106688394e-06, + "loss": 42.5575, + "step": 55900 + }, + { + "epoch": 0.11294173733521334, + "grad_norm": 426.2121276855469, + "learning_rate": 9.99574472063708e-06, + "loss": 25.6598, + "step": 55910 + }, + { + "epoch": 0.11296193796789715, + "grad_norm": 378.1146240234375, + "learning_rate": 9.995730310237113e-06, + "loss": 16.542, + "step": 55920 + }, + { + "epoch": 0.11298213860058097, + "grad_norm": 993.0328369140625, + "learning_rate": 9.995715875488563e-06, + "loss": 29.6863, + "step": 55930 + }, + { + "epoch": 0.11300233923326479, + "grad_norm": 480.3172912597656, + "learning_rate": 9.9957014163915e-06, + "loss": 20.4958, + "step": 55940 + }, + { + "epoch": 0.1130225398659486, + "grad_norm": 461.8092346191406, + "learning_rate": 9.995686932945993e-06, + "loss": 21.4223, + "step": 55950 + }, + { + "epoch": 0.11304274049863242, + "grad_norm": 1219.12744140625, + "learning_rate": 9.995672425152115e-06, + "loss": 20.2931, + "step": 55960 + }, + { + "epoch": 0.11306294113131624, + "grad_norm": 276.2924499511719, + "learning_rate": 9.995657893009933e-06, + "loss": 18.137, + "step": 55970 + }, + { + "epoch": 0.11308314176400004, + "grad_norm": 500.78607177734375, + "learning_rate": 9.995643336519523e-06, + "loss": 71.622, + "step": 55980 + }, + { + "epoch": 0.11310334239668386, + "grad_norm": 8.226629257202148, + "learning_rate": 9.995628755680952e-06, + "loss": 14.3438, + "step": 55990 + }, + { + "epoch": 0.11312354302936768, + "grad_norm": 1123.1558837890625, + "learning_rate": 9.995614150494293e-06, + "loss": 32.6038, + "step": 56000 + }, + { + "epoch": 0.1131437436620515, + "grad_norm": 306.28533935546875, + "learning_rate": 9.995599520959615e-06, + "loss": 22.0771, + "step": 56010 + }, + { + "epoch": 0.11316394429473531, + "grad_norm": 290.10418701171875, + "learning_rate": 9.995584867076994e-06, + "loss": 33.6507, + "step": 56020 + }, + { + "epoch": 0.11318414492741913, + "grad_norm": 321.5469055175781, + "learning_rate": 9.995570188846495e-06, + "loss": 19.5831, + "step": 56030 + }, + { + "epoch": 0.11320434556010295, + "grad_norm": 2160.419189453125, + "learning_rate": 9.995555486268193e-06, + "loss": 23.1755, + "step": 56040 + }, + { + "epoch": 0.11322454619278675, + "grad_norm": 494.45135498046875, + "learning_rate": 9.995540759342161e-06, + "loss": 24.4019, + "step": 56050 + }, + { + "epoch": 0.11324474682547057, + "grad_norm": 220.35263061523438, + "learning_rate": 9.995526008068469e-06, + "loss": 33.1868, + "step": 56060 + }, + { + "epoch": 0.1132649474581544, + "grad_norm": 943.328369140625, + "learning_rate": 9.995511232447189e-06, + "loss": 39.2561, + "step": 56070 + }, + { + "epoch": 0.1132851480908382, + "grad_norm": 840.2725219726562, + "learning_rate": 9.995496432478392e-06, + "loss": 20.5088, + "step": 56080 + }, + { + "epoch": 0.11330534872352202, + "grad_norm": 215.2857666015625, + "learning_rate": 9.995481608162154e-06, + "loss": 32.3851, + "step": 56090 + }, + { + "epoch": 0.11332554935620584, + "grad_norm": 862.8607177734375, + "learning_rate": 9.995466759498543e-06, + "loss": 40.5288, + "step": 56100 + }, + { + "epoch": 0.11334574998888965, + "grad_norm": 469.4685363769531, + "learning_rate": 9.995451886487632e-06, + "loss": 23.1097, + "step": 56110 + }, + { + "epoch": 0.11336595062157347, + "grad_norm": 288.13287353515625, + "learning_rate": 9.995436989129495e-06, + "loss": 12.7289, + "step": 56120 + }, + { + "epoch": 0.11338615125425729, + "grad_norm": 537.4690551757812, + "learning_rate": 9.995422067424203e-06, + "loss": 19.3094, + "step": 56130 + }, + { + "epoch": 0.11340635188694109, + "grad_norm": 481.13201904296875, + "learning_rate": 9.995407121371832e-06, + "loss": 60.8726, + "step": 56140 + }, + { + "epoch": 0.11342655251962491, + "grad_norm": 704.1107788085938, + "learning_rate": 9.995392150972451e-06, + "loss": 20.7292, + "step": 56150 + }, + { + "epoch": 0.11344675315230873, + "grad_norm": 87.8348159790039, + "learning_rate": 9.995377156226133e-06, + "loss": 16.0251, + "step": 56160 + }, + { + "epoch": 0.11346695378499255, + "grad_norm": 465.2171936035156, + "learning_rate": 9.995362137132956e-06, + "loss": 19.0864, + "step": 56170 + }, + { + "epoch": 0.11348715441767636, + "grad_norm": 502.523193359375, + "learning_rate": 9.995347093692987e-06, + "loss": 51.2079, + "step": 56180 + }, + { + "epoch": 0.11350735505036018, + "grad_norm": 278.1914978027344, + "learning_rate": 9.995332025906304e-06, + "loss": 17.8252, + "step": 56190 + }, + { + "epoch": 0.113527555683044, + "grad_norm": 663.8987426757812, + "learning_rate": 9.995316933772978e-06, + "loss": 39.5888, + "step": 56200 + }, + { + "epoch": 0.1135477563157278, + "grad_norm": 186.1903076171875, + "learning_rate": 9.995301817293084e-06, + "loss": 22.6794, + "step": 56210 + }, + { + "epoch": 0.11356795694841162, + "grad_norm": 361.7859191894531, + "learning_rate": 9.995286676466694e-06, + "loss": 23.041, + "step": 56220 + }, + { + "epoch": 0.11358815758109544, + "grad_norm": 83.45796203613281, + "learning_rate": 9.995271511293881e-06, + "loss": 28.4525, + "step": 56230 + }, + { + "epoch": 0.11360835821377925, + "grad_norm": 630.6253662109375, + "learning_rate": 9.995256321774722e-06, + "loss": 22.4465, + "step": 56240 + }, + { + "epoch": 0.11362855884646307, + "grad_norm": 1938.2076416015625, + "learning_rate": 9.99524110790929e-06, + "loss": 26.8141, + "step": 56250 + }, + { + "epoch": 0.11364875947914689, + "grad_norm": 202.69126892089844, + "learning_rate": 9.995225869697657e-06, + "loss": 18.5333, + "step": 56260 + }, + { + "epoch": 0.1136689601118307, + "grad_norm": 724.8658447265625, + "learning_rate": 9.9952106071399e-06, + "loss": 31.2706, + "step": 56270 + }, + { + "epoch": 0.11368916074451452, + "grad_norm": 337.84869384765625, + "learning_rate": 9.995195320236093e-06, + "loss": 15.1961, + "step": 56280 + }, + { + "epoch": 0.11370936137719834, + "grad_norm": 447.12237548828125, + "learning_rate": 9.995180008986309e-06, + "loss": 23.9454, + "step": 56290 + }, + { + "epoch": 0.11372956200988214, + "grad_norm": 175.07080078125, + "learning_rate": 9.995164673390624e-06, + "loss": 15.8106, + "step": 56300 + }, + { + "epoch": 0.11374976264256596, + "grad_norm": 708.8894653320312, + "learning_rate": 9.995149313449114e-06, + "loss": 27.8332, + "step": 56310 + }, + { + "epoch": 0.11376996327524978, + "grad_norm": 524.0888671875, + "learning_rate": 9.995133929161848e-06, + "loss": 29.217, + "step": 56320 + }, + { + "epoch": 0.1137901639079336, + "grad_norm": 191.51504516601562, + "learning_rate": 9.995118520528908e-06, + "loss": 24.1241, + "step": 56330 + }, + { + "epoch": 0.11381036454061741, + "grad_norm": 293.5033264160156, + "learning_rate": 9.995103087550366e-06, + "loss": 37.2772, + "step": 56340 + }, + { + "epoch": 0.11383056517330123, + "grad_norm": 759.746337890625, + "learning_rate": 9.995087630226295e-06, + "loss": 20.8201, + "step": 56350 + }, + { + "epoch": 0.11385076580598505, + "grad_norm": 1000.7098388671875, + "learning_rate": 9.995072148556776e-06, + "loss": 37.3082, + "step": 56360 + }, + { + "epoch": 0.11387096643866886, + "grad_norm": 338.8851318359375, + "learning_rate": 9.995056642541879e-06, + "loss": 28.0065, + "step": 56370 + }, + { + "epoch": 0.11389116707135268, + "grad_norm": 513.4716796875, + "learning_rate": 9.995041112181683e-06, + "loss": 23.1701, + "step": 56380 + }, + { + "epoch": 0.1139113677040365, + "grad_norm": 482.89727783203125, + "learning_rate": 9.99502555747626e-06, + "loss": 21.6878, + "step": 56390 + }, + { + "epoch": 0.1139315683367203, + "grad_norm": 181.3020782470703, + "learning_rate": 9.995009978425692e-06, + "loss": 23.6508, + "step": 56400 + }, + { + "epoch": 0.11395176896940412, + "grad_norm": 275.6293029785156, + "learning_rate": 9.994994375030048e-06, + "loss": 16.3618, + "step": 56410 + }, + { + "epoch": 0.11397196960208794, + "grad_norm": 504.3753967285156, + "learning_rate": 9.994978747289408e-06, + "loss": 26.1073, + "step": 56420 + }, + { + "epoch": 0.11399217023477175, + "grad_norm": 264.54803466796875, + "learning_rate": 9.994963095203849e-06, + "loss": 21.6772, + "step": 56430 + }, + { + "epoch": 0.11401237086745557, + "grad_norm": 701.0449829101562, + "learning_rate": 9.994947418773445e-06, + "loss": 18.9392, + "step": 56440 + }, + { + "epoch": 0.11403257150013939, + "grad_norm": 58.76970672607422, + "learning_rate": 9.994931717998272e-06, + "loss": 23.6807, + "step": 56450 + }, + { + "epoch": 0.1140527721328232, + "grad_norm": 554.5676879882812, + "learning_rate": 9.99491599287841e-06, + "loss": 25.2369, + "step": 56460 + }, + { + "epoch": 0.11407297276550701, + "grad_norm": 892.4497680664062, + "learning_rate": 9.99490024341393e-06, + "loss": 34.5214, + "step": 56470 + }, + { + "epoch": 0.11409317339819083, + "grad_norm": 662.4153442382812, + "learning_rate": 9.994884469604913e-06, + "loss": 26.8336, + "step": 56480 + }, + { + "epoch": 0.11411337403087465, + "grad_norm": 685.477294921875, + "learning_rate": 9.994868671451436e-06, + "loss": 19.0529, + "step": 56490 + }, + { + "epoch": 0.11413357466355846, + "grad_norm": 465.7191467285156, + "learning_rate": 9.994852848953574e-06, + "loss": 30.9412, + "step": 56500 + }, + { + "epoch": 0.11415377529624228, + "grad_norm": 403.6457824707031, + "learning_rate": 9.994837002111407e-06, + "loss": 24.6486, + "step": 56510 + }, + { + "epoch": 0.1141739759289261, + "grad_norm": 743.2261962890625, + "learning_rate": 9.994821130925007e-06, + "loss": 22.5384, + "step": 56520 + }, + { + "epoch": 0.1141941765616099, + "grad_norm": 684.90576171875, + "learning_rate": 9.994805235394456e-06, + "loss": 26.7867, + "step": 56530 + }, + { + "epoch": 0.11421437719429373, + "grad_norm": 160.59158325195312, + "learning_rate": 9.99478931551983e-06, + "loss": 34.1178, + "step": 56540 + }, + { + "epoch": 0.11423457782697755, + "grad_norm": 474.1269226074219, + "learning_rate": 9.994773371301208e-06, + "loss": 36.5215, + "step": 56550 + }, + { + "epoch": 0.11425477845966135, + "grad_norm": 1074.53271484375, + "learning_rate": 9.994757402738666e-06, + "loss": 42.4489, + "step": 56560 + }, + { + "epoch": 0.11427497909234517, + "grad_norm": 258.2461853027344, + "learning_rate": 9.99474140983228e-06, + "loss": 28.1245, + "step": 56570 + }, + { + "epoch": 0.11429517972502899, + "grad_norm": 411.3861999511719, + "learning_rate": 9.994725392582132e-06, + "loss": 21.3379, + "step": 56580 + }, + { + "epoch": 0.1143153803577128, + "grad_norm": 444.5906677246094, + "learning_rate": 9.994709350988299e-06, + "loss": 13.2252, + "step": 56590 + }, + { + "epoch": 0.11433558099039662, + "grad_norm": 307.0662841796875, + "learning_rate": 9.994693285050858e-06, + "loss": 20.8745, + "step": 56600 + }, + { + "epoch": 0.11435578162308044, + "grad_norm": 433.30780029296875, + "learning_rate": 9.994677194769886e-06, + "loss": 24.0843, + "step": 56610 + }, + { + "epoch": 0.11437598225576424, + "grad_norm": 271.6571960449219, + "learning_rate": 9.994661080145464e-06, + "loss": 33.2398, + "step": 56620 + }, + { + "epoch": 0.11439618288844806, + "grad_norm": 338.8603210449219, + "learning_rate": 9.99464494117767e-06, + "loss": 32.869, + "step": 56630 + }, + { + "epoch": 0.11441638352113188, + "grad_norm": 358.50103759765625, + "learning_rate": 9.994628777866582e-06, + "loss": 21.8971, + "step": 56640 + }, + { + "epoch": 0.1144365841538157, + "grad_norm": 423.21978759765625, + "learning_rate": 9.99461259021228e-06, + "loss": 31.6401, + "step": 56650 + }, + { + "epoch": 0.11445678478649951, + "grad_norm": 133.86451721191406, + "learning_rate": 9.99459637821484e-06, + "loss": 21.5203, + "step": 56660 + }, + { + "epoch": 0.11447698541918333, + "grad_norm": 179.37725830078125, + "learning_rate": 9.994580141874345e-06, + "loss": 22.2574, + "step": 56670 + }, + { + "epoch": 0.11449718605186715, + "grad_norm": 458.38720703125, + "learning_rate": 9.994563881190874e-06, + "loss": 16.016, + "step": 56680 + }, + { + "epoch": 0.11451738668455096, + "grad_norm": 234.48512268066406, + "learning_rate": 9.9945475961645e-06, + "loss": 36.4717, + "step": 56690 + }, + { + "epoch": 0.11453758731723478, + "grad_norm": 906.83837890625, + "learning_rate": 9.994531286795309e-06, + "loss": 34.8452, + "step": 56700 + }, + { + "epoch": 0.1145577879499186, + "grad_norm": 293.6820068359375, + "learning_rate": 9.994514953083379e-06, + "loss": 18.3865, + "step": 56710 + }, + { + "epoch": 0.1145779885826024, + "grad_norm": 399.5999450683594, + "learning_rate": 9.994498595028787e-06, + "loss": 15.497, + "step": 56720 + }, + { + "epoch": 0.11459818921528622, + "grad_norm": 321.9813232421875, + "learning_rate": 9.994482212631616e-06, + "loss": 22.6184, + "step": 56730 + }, + { + "epoch": 0.11461838984797004, + "grad_norm": 612.1865844726562, + "learning_rate": 9.994465805891944e-06, + "loss": 33.9304, + "step": 56740 + }, + { + "epoch": 0.11463859048065385, + "grad_norm": 490.2412414550781, + "learning_rate": 9.994449374809851e-06, + "loss": 26.3115, + "step": 56750 + }, + { + "epoch": 0.11465879111333767, + "grad_norm": 564.2010498046875, + "learning_rate": 9.994432919385417e-06, + "loss": 22.0646, + "step": 56760 + }, + { + "epoch": 0.11467899174602149, + "grad_norm": 1255.3232421875, + "learning_rate": 9.994416439618723e-06, + "loss": 32.4541, + "step": 56770 + }, + { + "epoch": 0.1146991923787053, + "grad_norm": 463.0643310546875, + "learning_rate": 9.994399935509851e-06, + "loss": 25.6335, + "step": 56780 + }, + { + "epoch": 0.11471939301138911, + "grad_norm": 411.6350402832031, + "learning_rate": 9.994383407058878e-06, + "loss": 21.3374, + "step": 56790 + }, + { + "epoch": 0.11473959364407293, + "grad_norm": 190.25454711914062, + "learning_rate": 9.994366854265886e-06, + "loss": 35.2417, + "step": 56800 + }, + { + "epoch": 0.11475979427675675, + "grad_norm": 499.181396484375, + "learning_rate": 9.994350277130956e-06, + "loss": 27.2036, + "step": 56810 + }, + { + "epoch": 0.11477999490944056, + "grad_norm": 353.4414367675781, + "learning_rate": 9.994333675654169e-06, + "loss": 31.7005, + "step": 56820 + }, + { + "epoch": 0.11480019554212438, + "grad_norm": 1147.8031005859375, + "learning_rate": 9.994317049835604e-06, + "loss": 36.4782, + "step": 56830 + }, + { + "epoch": 0.1148203961748082, + "grad_norm": 738.7057495117188, + "learning_rate": 9.994300399675342e-06, + "loss": 18.1057, + "step": 56840 + }, + { + "epoch": 0.114840596807492, + "grad_norm": 61.7659797668457, + "learning_rate": 9.994283725173468e-06, + "loss": 14.8604, + "step": 56850 + }, + { + "epoch": 0.11486079744017583, + "grad_norm": 513.6361694335938, + "learning_rate": 9.994267026330063e-06, + "loss": 26.7803, + "step": 56860 + }, + { + "epoch": 0.11488099807285965, + "grad_norm": 395.6575622558594, + "learning_rate": 9.994250303145203e-06, + "loss": 29.9968, + "step": 56870 + }, + { + "epoch": 0.11490119870554345, + "grad_norm": 477.5645446777344, + "learning_rate": 9.994233555618973e-06, + "loss": 31.5064, + "step": 56880 + }, + { + "epoch": 0.11492139933822727, + "grad_norm": 823.677978515625, + "learning_rate": 9.994216783751457e-06, + "loss": 20.4399, + "step": 56890 + }, + { + "epoch": 0.11494159997091109, + "grad_norm": 697.7006225585938, + "learning_rate": 9.99419998754273e-06, + "loss": 19.6363, + "step": 56900 + }, + { + "epoch": 0.1149618006035949, + "grad_norm": 309.27020263671875, + "learning_rate": 9.99418316699288e-06, + "loss": 15.9355, + "step": 56910 + }, + { + "epoch": 0.11498200123627872, + "grad_norm": 765.7940063476562, + "learning_rate": 9.994166322101988e-06, + "loss": 23.612, + "step": 56920 + }, + { + "epoch": 0.11500220186896254, + "grad_norm": 422.702392578125, + "learning_rate": 9.994149452870133e-06, + "loss": 21.7339, + "step": 56930 + }, + { + "epoch": 0.11502240250164635, + "grad_norm": 143.65939331054688, + "learning_rate": 9.9941325592974e-06, + "loss": 26.656, + "step": 56940 + }, + { + "epoch": 0.11504260313433017, + "grad_norm": 522.1123657226562, + "learning_rate": 9.994115641383872e-06, + "loss": 16.4517, + "step": 56950 + }, + { + "epoch": 0.11506280376701399, + "grad_norm": 439.05224609375, + "learning_rate": 9.994098699129628e-06, + "loss": 25.0762, + "step": 56960 + }, + { + "epoch": 0.1150830043996978, + "grad_norm": 448.45843505859375, + "learning_rate": 9.994081732534755e-06, + "loss": 31.6349, + "step": 56970 + }, + { + "epoch": 0.11510320503238161, + "grad_norm": 285.8194274902344, + "learning_rate": 9.994064741599332e-06, + "loss": 33.3975, + "step": 56980 + }, + { + "epoch": 0.11512340566506543, + "grad_norm": 248.9784393310547, + "learning_rate": 9.994047726323442e-06, + "loss": 45.9833, + "step": 56990 + }, + { + "epoch": 0.11514360629774925, + "grad_norm": 389.7409362792969, + "learning_rate": 9.994030686707171e-06, + "loss": 21.8294, + "step": 57000 + }, + { + "epoch": 0.11516380693043306, + "grad_norm": 615.904052734375, + "learning_rate": 9.9940136227506e-06, + "loss": 22.9632, + "step": 57010 + }, + { + "epoch": 0.11518400756311688, + "grad_norm": 621.622314453125, + "learning_rate": 9.993996534453812e-06, + "loss": 33.6485, + "step": 57020 + }, + { + "epoch": 0.1152042081958007, + "grad_norm": 725.99365234375, + "learning_rate": 9.993979421816889e-06, + "loss": 24.6697, + "step": 57030 + }, + { + "epoch": 0.1152244088284845, + "grad_norm": 476.81597900390625, + "learning_rate": 9.993962284839918e-06, + "loss": 11.6037, + "step": 57040 + }, + { + "epoch": 0.11524460946116832, + "grad_norm": 514.463623046875, + "learning_rate": 9.99394512352298e-06, + "loss": 17.1566, + "step": 57050 + }, + { + "epoch": 0.11526481009385214, + "grad_norm": 104.52167510986328, + "learning_rate": 9.993927937866158e-06, + "loss": 17.8265, + "step": 57060 + }, + { + "epoch": 0.11528501072653595, + "grad_norm": 642.501220703125, + "learning_rate": 9.993910727869538e-06, + "loss": 11.5884, + "step": 57070 + }, + { + "epoch": 0.11530521135921977, + "grad_norm": 1070.10546875, + "learning_rate": 9.993893493533203e-06, + "loss": 33.6258, + "step": 57080 + }, + { + "epoch": 0.11532541199190359, + "grad_norm": 349.5815124511719, + "learning_rate": 9.993876234857236e-06, + "loss": 26.892, + "step": 57090 + }, + { + "epoch": 0.1153456126245874, + "grad_norm": 879.8796997070312, + "learning_rate": 9.993858951841724e-06, + "loss": 32.2006, + "step": 57100 + }, + { + "epoch": 0.11536581325727122, + "grad_norm": 424.421875, + "learning_rate": 9.993841644486747e-06, + "loss": 21.2678, + "step": 57110 + }, + { + "epoch": 0.11538601388995504, + "grad_norm": 462.0297546386719, + "learning_rate": 9.993824312792393e-06, + "loss": 19.2433, + "step": 57120 + }, + { + "epoch": 0.11540621452263886, + "grad_norm": 856.4939575195312, + "learning_rate": 9.993806956758743e-06, + "loss": 31.9198, + "step": 57130 + }, + { + "epoch": 0.11542641515532266, + "grad_norm": 427.4967346191406, + "learning_rate": 9.993789576385884e-06, + "loss": 24.4099, + "step": 57140 + }, + { + "epoch": 0.11544661578800648, + "grad_norm": 440.6112060546875, + "learning_rate": 9.993772171673901e-06, + "loss": 23.8275, + "step": 57150 + }, + { + "epoch": 0.1154668164206903, + "grad_norm": 592.0247192382812, + "learning_rate": 9.993754742622879e-06, + "loss": 22.3119, + "step": 57160 + }, + { + "epoch": 0.11548701705337411, + "grad_norm": 569.0758056640625, + "learning_rate": 9.993737289232902e-06, + "loss": 25.2769, + "step": 57170 + }, + { + "epoch": 0.11550721768605793, + "grad_norm": 488.72186279296875, + "learning_rate": 9.993719811504053e-06, + "loss": 46.9328, + "step": 57180 + }, + { + "epoch": 0.11552741831874175, + "grad_norm": 543.4966430664062, + "learning_rate": 9.993702309436419e-06, + "loss": 32.973, + "step": 57190 + }, + { + "epoch": 0.11554761895142555, + "grad_norm": 421.9209289550781, + "learning_rate": 9.99368478303009e-06, + "loss": 29.8, + "step": 57200 + }, + { + "epoch": 0.11556781958410937, + "grad_norm": 626.4269409179688, + "learning_rate": 9.993667232285142e-06, + "loss": 27.3487, + "step": 57210 + }, + { + "epoch": 0.1155880202167932, + "grad_norm": 692.6672973632812, + "learning_rate": 9.993649657201669e-06, + "loss": 38.2158, + "step": 57220 + }, + { + "epoch": 0.115608220849477, + "grad_norm": 570.1915893554688, + "learning_rate": 9.993632057779752e-06, + "loss": 27.0207, + "step": 57230 + }, + { + "epoch": 0.11562842148216082, + "grad_norm": 467.6327209472656, + "learning_rate": 9.993614434019476e-06, + "loss": 22.598, + "step": 57240 + }, + { + "epoch": 0.11564862211484464, + "grad_norm": 325.28057861328125, + "learning_rate": 9.993596785920932e-06, + "loss": 19.8026, + "step": 57250 + }, + { + "epoch": 0.11566882274752845, + "grad_norm": 511.07354736328125, + "learning_rate": 9.993579113484202e-06, + "loss": 15.4554, + "step": 57260 + }, + { + "epoch": 0.11568902338021227, + "grad_norm": 673.0833740234375, + "learning_rate": 9.993561416709372e-06, + "loss": 20.6629, + "step": 57270 + }, + { + "epoch": 0.11570922401289609, + "grad_norm": 536.3285522460938, + "learning_rate": 9.99354369559653e-06, + "loss": 15.0465, + "step": 57280 + }, + { + "epoch": 0.1157294246455799, + "grad_norm": 496.5762634277344, + "learning_rate": 9.993525950145761e-06, + "loss": 43.6122, + "step": 57290 + }, + { + "epoch": 0.11574962527826371, + "grad_norm": 224.7476806640625, + "learning_rate": 9.993508180357154e-06, + "loss": 30.9923, + "step": 57300 + }, + { + "epoch": 0.11576982591094753, + "grad_norm": 402.7610168457031, + "learning_rate": 9.993490386230793e-06, + "loss": 16.9395, + "step": 57310 + }, + { + "epoch": 0.11579002654363135, + "grad_norm": 367.4598693847656, + "learning_rate": 9.993472567766764e-06, + "loss": 38.8323, + "step": 57320 + }, + { + "epoch": 0.11581022717631516, + "grad_norm": 364.91204833984375, + "learning_rate": 9.993454724965157e-06, + "loss": 41.1314, + "step": 57330 + }, + { + "epoch": 0.11583042780899898, + "grad_norm": 645.4959106445312, + "learning_rate": 9.993436857826058e-06, + "loss": 20.6863, + "step": 57340 + }, + { + "epoch": 0.1158506284416828, + "grad_norm": 1384.5391845703125, + "learning_rate": 9.993418966349551e-06, + "loss": 42.2686, + "step": 57350 + }, + { + "epoch": 0.1158708290743666, + "grad_norm": 139.28945922851562, + "learning_rate": 9.993401050535726e-06, + "loss": 22.8077, + "step": 57360 + }, + { + "epoch": 0.11589102970705042, + "grad_norm": 650.4097290039062, + "learning_rate": 9.993383110384673e-06, + "loss": 22.7724, + "step": 57370 + }, + { + "epoch": 0.11591123033973424, + "grad_norm": 778.6038208007812, + "learning_rate": 9.993365145896473e-06, + "loss": 39.9742, + "step": 57380 + }, + { + "epoch": 0.11593143097241805, + "grad_norm": 345.47930908203125, + "learning_rate": 9.993347157071218e-06, + "loss": 39.1161, + "step": 57390 + }, + { + "epoch": 0.11595163160510187, + "grad_norm": 597.97900390625, + "learning_rate": 9.993329143908994e-06, + "loss": 18.4554, + "step": 57400 + }, + { + "epoch": 0.11597183223778569, + "grad_norm": 596.5072021484375, + "learning_rate": 9.993311106409891e-06, + "loss": 42.1834, + "step": 57410 + }, + { + "epoch": 0.1159920328704695, + "grad_norm": 614.6201782226562, + "learning_rate": 9.993293044573995e-06, + "loss": 24.9577, + "step": 57420 + }, + { + "epoch": 0.11601223350315332, + "grad_norm": 279.330078125, + "learning_rate": 9.993274958401392e-06, + "loss": 15.4448, + "step": 57430 + }, + { + "epoch": 0.11603243413583714, + "grad_norm": 958.8892211914062, + "learning_rate": 9.993256847892175e-06, + "loss": 38.1558, + "step": 57440 + }, + { + "epoch": 0.11605263476852096, + "grad_norm": 280.5309753417969, + "learning_rate": 9.993238713046428e-06, + "loss": 24.53, + "step": 57450 + }, + { + "epoch": 0.11607283540120476, + "grad_norm": 600.3644409179688, + "learning_rate": 9.993220553864242e-06, + "loss": 20.4499, + "step": 57460 + }, + { + "epoch": 0.11609303603388858, + "grad_norm": 562.6834106445312, + "learning_rate": 9.993202370345705e-06, + "loss": 33.8889, + "step": 57470 + }, + { + "epoch": 0.1161132366665724, + "grad_norm": 548.0901489257812, + "learning_rate": 9.993184162490903e-06, + "loss": 29.6577, + "step": 57480 + }, + { + "epoch": 0.11613343729925621, + "grad_norm": 2284.564208984375, + "learning_rate": 9.99316593029993e-06, + "loss": 16.7488, + "step": 57490 + }, + { + "epoch": 0.11615363793194003, + "grad_norm": 303.5821838378906, + "learning_rate": 9.993147673772869e-06, + "loss": 12.0639, + "step": 57500 + }, + { + "epoch": 0.11617383856462385, + "grad_norm": 66.35529327392578, + "learning_rate": 9.993129392909814e-06, + "loss": 12.7584, + "step": 57510 + }, + { + "epoch": 0.11619403919730766, + "grad_norm": 562.1385498046875, + "learning_rate": 9.993111087710852e-06, + "loss": 27.5785, + "step": 57520 + }, + { + "epoch": 0.11621423982999148, + "grad_norm": 539.2239990234375, + "learning_rate": 9.993092758176071e-06, + "loss": 27.2716, + "step": 57530 + }, + { + "epoch": 0.1162344404626753, + "grad_norm": 614.6983032226562, + "learning_rate": 9.993074404305563e-06, + "loss": 42.4461, + "step": 57540 + }, + { + "epoch": 0.1162546410953591, + "grad_norm": 294.503662109375, + "learning_rate": 9.993056026099415e-06, + "loss": 19.2669, + "step": 57550 + }, + { + "epoch": 0.11627484172804292, + "grad_norm": 1092.808837890625, + "learning_rate": 9.993037623557716e-06, + "loss": 27.4644, + "step": 57560 + }, + { + "epoch": 0.11629504236072674, + "grad_norm": 371.67474365234375, + "learning_rate": 9.993019196680558e-06, + "loss": 29.2138, + "step": 57570 + }, + { + "epoch": 0.11631524299341055, + "grad_norm": 180.21095275878906, + "learning_rate": 9.993000745468031e-06, + "loss": 31.5071, + "step": 57580 + }, + { + "epoch": 0.11633544362609437, + "grad_norm": 300.315673828125, + "learning_rate": 9.992982269920223e-06, + "loss": 34.1091, + "step": 57590 + }, + { + "epoch": 0.11635564425877819, + "grad_norm": 456.3833312988281, + "learning_rate": 9.992963770037227e-06, + "loss": 33.3168, + "step": 57600 + }, + { + "epoch": 0.11637584489146201, + "grad_norm": 561.9912719726562, + "learning_rate": 9.99294524581913e-06, + "loss": 24.6334, + "step": 57610 + }, + { + "epoch": 0.11639604552414581, + "grad_norm": 923.3316650390625, + "learning_rate": 9.992926697266023e-06, + "loss": 32.0695, + "step": 57620 + }, + { + "epoch": 0.11641624615682963, + "grad_norm": 271.27056884765625, + "learning_rate": 9.992908124377997e-06, + "loss": 24.4795, + "step": 57630 + }, + { + "epoch": 0.11643644678951345, + "grad_norm": 366.7196960449219, + "learning_rate": 9.992889527155143e-06, + "loss": 12.7371, + "step": 57640 + }, + { + "epoch": 0.11645664742219726, + "grad_norm": 556.1275634765625, + "learning_rate": 9.992870905597549e-06, + "loss": 29.0126, + "step": 57650 + }, + { + "epoch": 0.11647684805488108, + "grad_norm": 286.32623291015625, + "learning_rate": 9.99285225970531e-06, + "loss": 12.455, + "step": 57660 + }, + { + "epoch": 0.1164970486875649, + "grad_norm": 1047.10791015625, + "learning_rate": 9.992833589478513e-06, + "loss": 33.5002, + "step": 57670 + }, + { + "epoch": 0.1165172493202487, + "grad_norm": 158.2174835205078, + "learning_rate": 9.992814894917251e-06, + "loss": 30.4101, + "step": 57680 + }, + { + "epoch": 0.11653744995293253, + "grad_norm": 427.68572998046875, + "learning_rate": 9.992796176021616e-06, + "loss": 29.2073, + "step": 57690 + }, + { + "epoch": 0.11655765058561635, + "grad_norm": 769.3206176757812, + "learning_rate": 9.992777432791697e-06, + "loss": 30.5703, + "step": 57700 + }, + { + "epoch": 0.11657785121830015, + "grad_norm": 341.97418212890625, + "learning_rate": 9.992758665227586e-06, + "loss": 20.2494, + "step": 57710 + }, + { + "epoch": 0.11659805185098397, + "grad_norm": 262.6230773925781, + "learning_rate": 9.992739873329375e-06, + "loss": 19.6372, + "step": 57720 + }, + { + "epoch": 0.11661825248366779, + "grad_norm": 458.0865478515625, + "learning_rate": 9.992721057097157e-06, + "loss": 24.1996, + "step": 57730 + }, + { + "epoch": 0.1166384531163516, + "grad_norm": 0.0, + "learning_rate": 9.99270221653102e-06, + "loss": 26.2471, + "step": 57740 + }, + { + "epoch": 0.11665865374903542, + "grad_norm": 513.4843139648438, + "learning_rate": 9.99268335163106e-06, + "loss": 13.6434, + "step": 57750 + }, + { + "epoch": 0.11667885438171924, + "grad_norm": 678.3419189453125, + "learning_rate": 9.992664462397366e-06, + "loss": 25.4183, + "step": 57760 + }, + { + "epoch": 0.11669905501440304, + "grad_norm": 431.51934814453125, + "learning_rate": 9.99264554883003e-06, + "loss": 39.4392, + "step": 57770 + }, + { + "epoch": 0.11671925564708686, + "grad_norm": 1319.972412109375, + "learning_rate": 9.992626610929146e-06, + "loss": 38.1995, + "step": 57780 + }, + { + "epoch": 0.11673945627977068, + "grad_norm": 961.5303344726562, + "learning_rate": 9.992607648694805e-06, + "loss": 40.5593, + "step": 57790 + }, + { + "epoch": 0.1167596569124545, + "grad_norm": 1148.9764404296875, + "learning_rate": 9.9925886621271e-06, + "loss": 41.0508, + "step": 57800 + }, + { + "epoch": 0.11677985754513831, + "grad_norm": 970.7742309570312, + "learning_rate": 9.992569651226123e-06, + "loss": 33.3816, + "step": 57810 + }, + { + "epoch": 0.11680005817782213, + "grad_norm": 494.38726806640625, + "learning_rate": 9.992550615991968e-06, + "loss": 24.0766, + "step": 57820 + }, + { + "epoch": 0.11682025881050595, + "grad_norm": 767.6143798828125, + "learning_rate": 9.992531556424726e-06, + "loss": 23.0109, + "step": 57830 + }, + { + "epoch": 0.11684045944318976, + "grad_norm": 1003.689697265625, + "learning_rate": 9.992512472524491e-06, + "loss": 26.832, + "step": 57840 + }, + { + "epoch": 0.11686066007587358, + "grad_norm": 314.8854064941406, + "learning_rate": 9.992493364291356e-06, + "loss": 17.4941, + "step": 57850 + }, + { + "epoch": 0.1168808607085574, + "grad_norm": 2019.5692138671875, + "learning_rate": 9.992474231725412e-06, + "loss": 12.1528, + "step": 57860 + }, + { + "epoch": 0.1169010613412412, + "grad_norm": 562.8046264648438, + "learning_rate": 9.992455074826757e-06, + "loss": 37.6524, + "step": 57870 + }, + { + "epoch": 0.11692126197392502, + "grad_norm": 376.6202392578125, + "learning_rate": 9.99243589359548e-06, + "loss": 20.6366, + "step": 57880 + }, + { + "epoch": 0.11694146260660884, + "grad_norm": 657.8515625, + "learning_rate": 9.992416688031676e-06, + "loss": 59.3229, + "step": 57890 + }, + { + "epoch": 0.11696166323929265, + "grad_norm": 494.7339172363281, + "learning_rate": 9.992397458135438e-06, + "loss": 25.8328, + "step": 57900 + }, + { + "epoch": 0.11698186387197647, + "grad_norm": 543.6424560546875, + "learning_rate": 9.992378203906862e-06, + "loss": 44.8141, + "step": 57910 + }, + { + "epoch": 0.11700206450466029, + "grad_norm": 639.5101928710938, + "learning_rate": 9.99235892534604e-06, + "loss": 17.6983, + "step": 57920 + }, + { + "epoch": 0.1170222651373441, + "grad_norm": 370.078125, + "learning_rate": 9.992339622453065e-06, + "loss": 18.3153, + "step": 57930 + }, + { + "epoch": 0.11704246577002791, + "grad_norm": 717.491943359375, + "learning_rate": 9.992320295228032e-06, + "loss": 17.3251, + "step": 57940 + }, + { + "epoch": 0.11706266640271173, + "grad_norm": 760.3424072265625, + "learning_rate": 9.992300943671035e-06, + "loss": 28.1888, + "step": 57950 + }, + { + "epoch": 0.11708286703539555, + "grad_norm": 713.1371459960938, + "learning_rate": 9.99228156778217e-06, + "loss": 34.6133, + "step": 57960 + }, + { + "epoch": 0.11710306766807936, + "grad_norm": 922.029541015625, + "learning_rate": 9.99226216756153e-06, + "loss": 32.47, + "step": 57970 + }, + { + "epoch": 0.11712326830076318, + "grad_norm": 440.2043762207031, + "learning_rate": 9.99224274300921e-06, + "loss": 23.6248, + "step": 57980 + }, + { + "epoch": 0.117143468933447, + "grad_norm": 435.0998840332031, + "learning_rate": 9.992223294125303e-06, + "loss": 32.5848, + "step": 57990 + }, + { + "epoch": 0.11716366956613081, + "grad_norm": 367.6516418457031, + "learning_rate": 9.992203820909906e-06, + "loss": 15.29, + "step": 58000 + }, + { + "epoch": 0.11718387019881463, + "grad_norm": 1535.766845703125, + "learning_rate": 9.992184323363112e-06, + "loss": 34.3091, + "step": 58010 + }, + { + "epoch": 0.11720407083149845, + "grad_norm": 151.68701171875, + "learning_rate": 9.992164801485018e-06, + "loss": 15.6894, + "step": 58020 + }, + { + "epoch": 0.11722427146418225, + "grad_norm": 664.869140625, + "learning_rate": 9.992145255275718e-06, + "loss": 43.3754, + "step": 58030 + }, + { + "epoch": 0.11724447209686607, + "grad_norm": 286.3050842285156, + "learning_rate": 9.99212568473531e-06, + "loss": 41.8678, + "step": 58040 + }, + { + "epoch": 0.1172646727295499, + "grad_norm": 387.75909423828125, + "learning_rate": 9.992106089863884e-06, + "loss": 20.4063, + "step": 58050 + }, + { + "epoch": 0.1172848733622337, + "grad_norm": 555.0672607421875, + "learning_rate": 9.992086470661537e-06, + "loss": 21.1624, + "step": 58060 + }, + { + "epoch": 0.11730507399491752, + "grad_norm": 337.3147888183594, + "learning_rate": 9.992066827128368e-06, + "loss": 34.3024, + "step": 58070 + }, + { + "epoch": 0.11732527462760134, + "grad_norm": 368.8452453613281, + "learning_rate": 9.992047159264472e-06, + "loss": 28.2762, + "step": 58080 + }, + { + "epoch": 0.11734547526028515, + "grad_norm": 2197.8544921875, + "learning_rate": 9.992027467069943e-06, + "loss": 50.4354, + "step": 58090 + }, + { + "epoch": 0.11736567589296897, + "grad_norm": 304.2486572265625, + "learning_rate": 9.992007750544876e-06, + "loss": 23.3722, + "step": 58100 + }, + { + "epoch": 0.11738587652565279, + "grad_norm": 185.19680786132812, + "learning_rate": 9.99198800968937e-06, + "loss": 14.933, + "step": 58110 + }, + { + "epoch": 0.1174060771583366, + "grad_norm": 280.1153259277344, + "learning_rate": 9.991968244503519e-06, + "loss": 16.9798, + "step": 58120 + }, + { + "epoch": 0.11742627779102041, + "grad_norm": 76.7415542602539, + "learning_rate": 9.991948454987422e-06, + "loss": 31.7434, + "step": 58130 + }, + { + "epoch": 0.11744647842370423, + "grad_norm": 420.1647033691406, + "learning_rate": 9.99192864114117e-06, + "loss": 24.274, + "step": 58140 + }, + { + "epoch": 0.11746667905638805, + "grad_norm": 495.7421569824219, + "learning_rate": 9.991908802964867e-06, + "loss": 23.7836, + "step": 58150 + }, + { + "epoch": 0.11748687968907186, + "grad_norm": 703.0047607421875, + "learning_rate": 9.991888940458605e-06, + "loss": 49.2084, + "step": 58160 + }, + { + "epoch": 0.11750708032175568, + "grad_norm": 1381.2279052734375, + "learning_rate": 9.99186905362248e-06, + "loss": 26.2378, + "step": 58170 + }, + { + "epoch": 0.1175272809544395, + "grad_norm": 596.7677001953125, + "learning_rate": 9.991849142456593e-06, + "loss": 28.4683, + "step": 58180 + }, + { + "epoch": 0.1175474815871233, + "grad_norm": 591.5968017578125, + "learning_rate": 9.991829206961038e-06, + "loss": 21.3083, + "step": 58190 + }, + { + "epoch": 0.11756768221980712, + "grad_norm": 319.48431396484375, + "learning_rate": 9.991809247135912e-06, + "loss": 17.7571, + "step": 58200 + }, + { + "epoch": 0.11758788285249094, + "grad_norm": 525.16064453125, + "learning_rate": 9.991789262981314e-06, + "loss": 21.6295, + "step": 58210 + }, + { + "epoch": 0.11760808348517475, + "grad_norm": 647.814208984375, + "learning_rate": 9.99176925449734e-06, + "loss": 27.8297, + "step": 58220 + }, + { + "epoch": 0.11762828411785857, + "grad_norm": 604.1672973632812, + "learning_rate": 9.991749221684088e-06, + "loss": 30.7615, + "step": 58230 + }, + { + "epoch": 0.11764848475054239, + "grad_norm": 349.92083740234375, + "learning_rate": 9.991729164541656e-06, + "loss": 12.1612, + "step": 58240 + }, + { + "epoch": 0.1176686853832262, + "grad_norm": 504.90301513671875, + "learning_rate": 9.991709083070143e-06, + "loss": 27.4712, + "step": 58250 + }, + { + "epoch": 0.11768888601591002, + "grad_norm": 342.3115234375, + "learning_rate": 9.991688977269643e-06, + "loss": 13.135, + "step": 58260 + }, + { + "epoch": 0.11770908664859384, + "grad_norm": 734.429443359375, + "learning_rate": 9.991668847140258e-06, + "loss": 27.3773, + "step": 58270 + }, + { + "epoch": 0.11772928728127766, + "grad_norm": 343.8572082519531, + "learning_rate": 9.991648692682083e-06, + "loss": 27.6731, + "step": 58280 + }, + { + "epoch": 0.11774948791396146, + "grad_norm": 927.5995483398438, + "learning_rate": 9.99162851389522e-06, + "loss": 37.6311, + "step": 58290 + }, + { + "epoch": 0.11776968854664528, + "grad_norm": 367.6170654296875, + "learning_rate": 9.991608310779762e-06, + "loss": 20.4994, + "step": 58300 + }, + { + "epoch": 0.1177898891793291, + "grad_norm": 320.6072998046875, + "learning_rate": 9.991588083335812e-06, + "loss": 19.2313, + "step": 58310 + }, + { + "epoch": 0.11781008981201291, + "grad_norm": 23.609176635742188, + "learning_rate": 9.991567831563468e-06, + "loss": 17.761, + "step": 58320 + }, + { + "epoch": 0.11783029044469673, + "grad_norm": 727.7561645507812, + "learning_rate": 9.991547555462825e-06, + "loss": 49.5363, + "step": 58330 + }, + { + "epoch": 0.11785049107738055, + "grad_norm": 802.432373046875, + "learning_rate": 9.991527255033988e-06, + "loss": 29.4928, + "step": 58340 + }, + { + "epoch": 0.11787069171006435, + "grad_norm": 270.4449768066406, + "learning_rate": 9.99150693027705e-06, + "loss": 21.9631, + "step": 58350 + }, + { + "epoch": 0.11789089234274817, + "grad_norm": 470.1295471191406, + "learning_rate": 9.991486581192115e-06, + "loss": 28.0967, + "step": 58360 + }, + { + "epoch": 0.117911092975432, + "grad_norm": 301.1944885253906, + "learning_rate": 9.991466207779279e-06, + "loss": 33.0549, + "step": 58370 + }, + { + "epoch": 0.1179312936081158, + "grad_norm": 477.31915283203125, + "learning_rate": 9.99144581003864e-06, + "loss": 59.8705, + "step": 58380 + }, + { + "epoch": 0.11795149424079962, + "grad_norm": 351.3213806152344, + "learning_rate": 9.991425387970301e-06, + "loss": 16.9762, + "step": 58390 + }, + { + "epoch": 0.11797169487348344, + "grad_norm": 351.16082763671875, + "learning_rate": 9.99140494157436e-06, + "loss": 20.5696, + "step": 58400 + }, + { + "epoch": 0.11799189550616725, + "grad_norm": 512.7572631835938, + "learning_rate": 9.991384470850918e-06, + "loss": 29.0036, + "step": 58410 + }, + { + "epoch": 0.11801209613885107, + "grad_norm": 1209.859375, + "learning_rate": 9.991363975800073e-06, + "loss": 38.2597, + "step": 58420 + }, + { + "epoch": 0.11803229677153489, + "grad_norm": 84.26258087158203, + "learning_rate": 9.991343456421923e-06, + "loss": 22.3503, + "step": 58430 + }, + { + "epoch": 0.1180524974042187, + "grad_norm": 389.1960144042969, + "learning_rate": 9.991322912716572e-06, + "loss": 24.8009, + "step": 58440 + }, + { + "epoch": 0.11807269803690251, + "grad_norm": 231.02340698242188, + "learning_rate": 9.99130234468412e-06, + "loss": 31.0219, + "step": 58450 + }, + { + "epoch": 0.11809289866958633, + "grad_norm": 334.5686950683594, + "learning_rate": 9.991281752324664e-06, + "loss": 36.9654, + "step": 58460 + }, + { + "epoch": 0.11811309930227015, + "grad_norm": 690.6244506835938, + "learning_rate": 9.991261135638307e-06, + "loss": 33.6558, + "step": 58470 + }, + { + "epoch": 0.11813329993495396, + "grad_norm": 1505.632568359375, + "learning_rate": 9.991240494625147e-06, + "loss": 31.5277, + "step": 58480 + }, + { + "epoch": 0.11815350056763778, + "grad_norm": 481.1512145996094, + "learning_rate": 9.991219829285287e-06, + "loss": 25.099, + "step": 58490 + }, + { + "epoch": 0.1181737012003216, + "grad_norm": 308.86309814453125, + "learning_rate": 9.991199139618828e-06, + "loss": 27.3802, + "step": 58500 + }, + { + "epoch": 0.1181939018330054, + "grad_norm": 517.23974609375, + "learning_rate": 9.991178425625869e-06, + "loss": 20.0147, + "step": 58510 + }, + { + "epoch": 0.11821410246568922, + "grad_norm": 603.149169921875, + "learning_rate": 9.99115768730651e-06, + "loss": 18.7056, + "step": 58520 + }, + { + "epoch": 0.11823430309837304, + "grad_norm": 542.2094116210938, + "learning_rate": 9.991136924660856e-06, + "loss": 25.3252, + "step": 58530 + }, + { + "epoch": 0.11825450373105685, + "grad_norm": 313.8913879394531, + "learning_rate": 9.991116137689006e-06, + "loss": 25.2412, + "step": 58540 + }, + { + "epoch": 0.11827470436374067, + "grad_norm": 169.6969451904297, + "learning_rate": 9.991095326391061e-06, + "loss": 25.847, + "step": 58550 + }, + { + "epoch": 0.11829490499642449, + "grad_norm": 474.6405944824219, + "learning_rate": 9.99107449076712e-06, + "loss": 36.8564, + "step": 58560 + }, + { + "epoch": 0.1183151056291083, + "grad_norm": 104.40969848632812, + "learning_rate": 9.99105363081729e-06, + "loss": 31.2089, + "step": 58570 + }, + { + "epoch": 0.11833530626179212, + "grad_norm": 732.682373046875, + "learning_rate": 9.99103274654167e-06, + "loss": 38.1569, + "step": 58580 + }, + { + "epoch": 0.11835550689447594, + "grad_norm": 895.7142944335938, + "learning_rate": 9.99101183794036e-06, + "loss": 28.7475, + "step": 58590 + }, + { + "epoch": 0.11837570752715976, + "grad_norm": 870.6466064453125, + "learning_rate": 9.990990905013466e-06, + "loss": 38.2412, + "step": 58600 + }, + { + "epoch": 0.11839590815984356, + "grad_norm": 542.204345703125, + "learning_rate": 9.990969947761087e-06, + "loss": 44.1003, + "step": 58610 + }, + { + "epoch": 0.11841610879252738, + "grad_norm": 331.6095886230469, + "learning_rate": 9.990948966183324e-06, + "loss": 17.5997, + "step": 58620 + }, + { + "epoch": 0.1184363094252112, + "grad_norm": 720.2861328125, + "learning_rate": 9.990927960280283e-06, + "loss": 18.8987, + "step": 58630 + }, + { + "epoch": 0.11845651005789501, + "grad_norm": 239.30694580078125, + "learning_rate": 9.990906930052065e-06, + "loss": 26.3439, + "step": 58640 + }, + { + "epoch": 0.11847671069057883, + "grad_norm": 426.04010009765625, + "learning_rate": 9.99088587549877e-06, + "loss": 19.9495, + "step": 58650 + }, + { + "epoch": 0.11849691132326265, + "grad_norm": 659.5116577148438, + "learning_rate": 9.990864796620503e-06, + "loss": 34.0379, + "step": 58660 + }, + { + "epoch": 0.11851711195594646, + "grad_norm": 318.29913330078125, + "learning_rate": 9.990843693417366e-06, + "loss": 33.4272, + "step": 58670 + }, + { + "epoch": 0.11853731258863028, + "grad_norm": 1562.17578125, + "learning_rate": 9.990822565889464e-06, + "loss": 39.8931, + "step": 58680 + }, + { + "epoch": 0.1185575132213141, + "grad_norm": 414.65179443359375, + "learning_rate": 9.990801414036896e-06, + "loss": 27.6652, + "step": 58690 + }, + { + "epoch": 0.1185777138539979, + "grad_norm": 619.0970458984375, + "learning_rate": 9.99078023785977e-06, + "loss": 34.0049, + "step": 58700 + }, + { + "epoch": 0.11859791448668172, + "grad_norm": 70.22889709472656, + "learning_rate": 9.990759037358184e-06, + "loss": 21.8346, + "step": 58710 + }, + { + "epoch": 0.11861811511936554, + "grad_norm": 615.2586669921875, + "learning_rate": 9.990737812532245e-06, + "loss": 38.4461, + "step": 58720 + }, + { + "epoch": 0.11863831575204935, + "grad_norm": 209.21034240722656, + "learning_rate": 9.990716563382055e-06, + "loss": 26.0752, + "step": 58730 + }, + { + "epoch": 0.11865851638473317, + "grad_norm": 538.8511352539062, + "learning_rate": 9.990695289907716e-06, + "loss": 23.6841, + "step": 58740 + }, + { + "epoch": 0.11867871701741699, + "grad_norm": 438.6738586425781, + "learning_rate": 9.990673992109335e-06, + "loss": 20.516, + "step": 58750 + }, + { + "epoch": 0.11869891765010081, + "grad_norm": 688.6759033203125, + "learning_rate": 9.990652669987016e-06, + "loss": 29.0761, + "step": 58760 + }, + { + "epoch": 0.11871911828278461, + "grad_norm": 660.4488525390625, + "learning_rate": 9.990631323540858e-06, + "loss": 38.8158, + "step": 58770 + }, + { + "epoch": 0.11873931891546843, + "grad_norm": 245.68350219726562, + "learning_rate": 9.990609952770969e-06, + "loss": 26.0449, + "step": 58780 + }, + { + "epoch": 0.11875951954815225, + "grad_norm": 518.3926391601562, + "learning_rate": 9.990588557677454e-06, + "loss": 21.3319, + "step": 58790 + }, + { + "epoch": 0.11877972018083606, + "grad_norm": 302.8959655761719, + "learning_rate": 9.990567138260414e-06, + "loss": 25.8163, + "step": 58800 + }, + { + "epoch": 0.11879992081351988, + "grad_norm": 538.0042724609375, + "learning_rate": 9.990545694519956e-06, + "loss": 20.2308, + "step": 58810 + }, + { + "epoch": 0.1188201214462037, + "grad_norm": 324.6055603027344, + "learning_rate": 9.990524226456182e-06, + "loss": 17.885, + "step": 58820 + }, + { + "epoch": 0.1188403220788875, + "grad_norm": 277.4670715332031, + "learning_rate": 9.9905027340692e-06, + "loss": 21.5367, + "step": 58830 + }, + { + "epoch": 0.11886052271157133, + "grad_norm": 645.9742431640625, + "learning_rate": 9.990481217359112e-06, + "loss": 28.8911, + "step": 58840 + }, + { + "epoch": 0.11888072334425515, + "grad_norm": 592.8202514648438, + "learning_rate": 9.990459676326025e-06, + "loss": 36.6906, + "step": 58850 + }, + { + "epoch": 0.11890092397693895, + "grad_norm": 239.2350616455078, + "learning_rate": 9.990438110970043e-06, + "loss": 34.2834, + "step": 58860 + }, + { + "epoch": 0.11892112460962277, + "grad_norm": 448.14385986328125, + "learning_rate": 9.990416521291268e-06, + "loss": 30.5244, + "step": 58870 + }, + { + "epoch": 0.11894132524230659, + "grad_norm": 569.7626953125, + "learning_rate": 9.990394907289811e-06, + "loss": 17.9717, + "step": 58880 + }, + { + "epoch": 0.1189615258749904, + "grad_norm": 212.79605102539062, + "learning_rate": 9.990373268965773e-06, + "loss": 24.8917, + "step": 58890 + }, + { + "epoch": 0.11898172650767422, + "grad_norm": 324.95703125, + "learning_rate": 9.990351606319261e-06, + "loss": 23.7205, + "step": 58900 + }, + { + "epoch": 0.11900192714035804, + "grad_norm": 168.9674530029297, + "learning_rate": 9.990329919350382e-06, + "loss": 26.6645, + "step": 58910 + }, + { + "epoch": 0.11902212777304186, + "grad_norm": 445.2579040527344, + "learning_rate": 9.990308208059239e-06, + "loss": 29.4624, + "step": 58920 + }, + { + "epoch": 0.11904232840572566, + "grad_norm": 312.6114501953125, + "learning_rate": 9.990286472445938e-06, + "loss": 33.1878, + "step": 58930 + }, + { + "epoch": 0.11906252903840948, + "grad_norm": 316.3656311035156, + "learning_rate": 9.990264712510586e-06, + "loss": 20.1057, + "step": 58940 + }, + { + "epoch": 0.1190827296710933, + "grad_norm": 761.0764770507812, + "learning_rate": 9.990242928253291e-06, + "loss": 26.4023, + "step": 58950 + }, + { + "epoch": 0.11910293030377711, + "grad_norm": 697.7257690429688, + "learning_rate": 9.990221119674157e-06, + "loss": 26.1063, + "step": 58960 + }, + { + "epoch": 0.11912313093646093, + "grad_norm": 316.9278259277344, + "learning_rate": 9.99019928677329e-06, + "loss": 30.1808, + "step": 58970 + }, + { + "epoch": 0.11914333156914475, + "grad_norm": 1050.027099609375, + "learning_rate": 9.990177429550797e-06, + "loss": 39.0877, + "step": 58980 + }, + { + "epoch": 0.11916353220182856, + "grad_norm": 1752.5679931640625, + "learning_rate": 9.990155548006783e-06, + "loss": 46.105, + "step": 58990 + }, + { + "epoch": 0.11918373283451238, + "grad_norm": 439.0074768066406, + "learning_rate": 9.990133642141359e-06, + "loss": 20.7704, + "step": 59000 + }, + { + "epoch": 0.1192039334671962, + "grad_norm": 903.7244873046875, + "learning_rate": 9.990111711954626e-06, + "loss": 26.862, + "step": 59010 + }, + { + "epoch": 0.11922413409988, + "grad_norm": 458.92303466796875, + "learning_rate": 9.990089757446697e-06, + "loss": 27.5504, + "step": 59020 + }, + { + "epoch": 0.11924433473256382, + "grad_norm": 698.0676879882812, + "learning_rate": 9.990067778617672e-06, + "loss": 25.8991, + "step": 59030 + }, + { + "epoch": 0.11926453536524764, + "grad_norm": 370.335693359375, + "learning_rate": 9.990045775467664e-06, + "loss": 17.0434, + "step": 59040 + }, + { + "epoch": 0.11928473599793145, + "grad_norm": 358.4537658691406, + "learning_rate": 9.990023747996778e-06, + "loss": 41.7143, + "step": 59050 + }, + { + "epoch": 0.11930493663061527, + "grad_norm": 152.5941619873047, + "learning_rate": 9.990001696205121e-06, + "loss": 28.2509, + "step": 59060 + }, + { + "epoch": 0.11932513726329909, + "grad_norm": 308.4674987792969, + "learning_rate": 9.989979620092802e-06, + "loss": 41.3298, + "step": 59070 + }, + { + "epoch": 0.11934533789598291, + "grad_norm": 1057.538818359375, + "learning_rate": 9.989957519659926e-06, + "loss": 21.2202, + "step": 59080 + }, + { + "epoch": 0.11936553852866671, + "grad_norm": 96.51363372802734, + "learning_rate": 9.989935394906602e-06, + "loss": 15.6763, + "step": 59090 + }, + { + "epoch": 0.11938573916135053, + "grad_norm": 492.85235595703125, + "learning_rate": 9.98991324583294e-06, + "loss": 25.7886, + "step": 59100 + }, + { + "epoch": 0.11940593979403435, + "grad_norm": 351.7673645019531, + "learning_rate": 9.989891072439045e-06, + "loss": 16.7681, + "step": 59110 + }, + { + "epoch": 0.11942614042671816, + "grad_norm": 160.32521057128906, + "learning_rate": 9.989868874725026e-06, + "loss": 27.9819, + "step": 59120 + }, + { + "epoch": 0.11944634105940198, + "grad_norm": 373.262939453125, + "learning_rate": 9.989846652690992e-06, + "loss": 38.1606, + "step": 59130 + }, + { + "epoch": 0.1194665416920858, + "grad_norm": 405.1363220214844, + "learning_rate": 9.989824406337049e-06, + "loss": 16.6895, + "step": 59140 + }, + { + "epoch": 0.11948674232476961, + "grad_norm": 0.0, + "learning_rate": 9.989802135663308e-06, + "loss": 25.7655, + "step": 59150 + }, + { + "epoch": 0.11950694295745343, + "grad_norm": 384.34661865234375, + "learning_rate": 9.989779840669878e-06, + "loss": 16.8252, + "step": 59160 + }, + { + "epoch": 0.11952714359013725, + "grad_norm": 223.25277709960938, + "learning_rate": 9.989757521356864e-06, + "loss": 29.3963, + "step": 59170 + }, + { + "epoch": 0.11954734422282105, + "grad_norm": 473.12060546875, + "learning_rate": 9.989735177724378e-06, + "loss": 27.4276, + "step": 59180 + }, + { + "epoch": 0.11956754485550487, + "grad_norm": 427.2732238769531, + "learning_rate": 9.989712809772528e-06, + "loss": 12.1941, + "step": 59190 + }, + { + "epoch": 0.1195877454881887, + "grad_norm": 679.8904418945312, + "learning_rate": 9.989690417501423e-06, + "loss": 21.767, + "step": 59200 + }, + { + "epoch": 0.1196079461208725, + "grad_norm": 848.4871215820312, + "learning_rate": 9.989668000911173e-06, + "loss": 37.814, + "step": 59210 + }, + { + "epoch": 0.11962814675355632, + "grad_norm": 1310.9892578125, + "learning_rate": 9.989645560001884e-06, + "loss": 34.3109, + "step": 59220 + }, + { + "epoch": 0.11964834738624014, + "grad_norm": 901.0699462890625, + "learning_rate": 9.989623094773669e-06, + "loss": 40.5445, + "step": 59230 + }, + { + "epoch": 0.11966854801892396, + "grad_norm": 418.48486328125, + "learning_rate": 9.989600605226637e-06, + "loss": 13.6698, + "step": 59240 + }, + { + "epoch": 0.11968874865160777, + "grad_norm": 536.8137817382812, + "learning_rate": 9.989578091360896e-06, + "loss": 27.8379, + "step": 59250 + }, + { + "epoch": 0.11970894928429159, + "grad_norm": 376.47186279296875, + "learning_rate": 9.989555553176556e-06, + "loss": 27.6799, + "step": 59260 + }, + { + "epoch": 0.1197291499169754, + "grad_norm": 207.98965454101562, + "learning_rate": 9.989532990673729e-06, + "loss": 19.1201, + "step": 59270 + }, + { + "epoch": 0.11974935054965921, + "grad_norm": 511.27935791015625, + "learning_rate": 9.989510403852521e-06, + "loss": 16.5542, + "step": 59280 + }, + { + "epoch": 0.11976955118234303, + "grad_norm": 453.9069519042969, + "learning_rate": 9.989487792713045e-06, + "loss": 34.6967, + "step": 59290 + }, + { + "epoch": 0.11978975181502685, + "grad_norm": 347.8300476074219, + "learning_rate": 9.989465157255413e-06, + "loss": 56.5665, + "step": 59300 + }, + { + "epoch": 0.11980995244771066, + "grad_norm": 910.8221435546875, + "learning_rate": 9.98944249747973e-06, + "loss": 23.1538, + "step": 59310 + }, + { + "epoch": 0.11983015308039448, + "grad_norm": 453.1932067871094, + "learning_rate": 9.989419813386112e-06, + "loss": 14.0581, + "step": 59320 + }, + { + "epoch": 0.1198503537130783, + "grad_norm": 909.9956665039062, + "learning_rate": 9.989397104974665e-06, + "loss": 41.8085, + "step": 59330 + }, + { + "epoch": 0.1198705543457621, + "grad_norm": 358.1769714355469, + "learning_rate": 9.989374372245503e-06, + "loss": 17.7476, + "step": 59340 + }, + { + "epoch": 0.11989075497844592, + "grad_norm": 4.939453601837158, + "learning_rate": 9.989351615198734e-06, + "loss": 22.7722, + "step": 59350 + }, + { + "epoch": 0.11991095561112974, + "grad_norm": 536.285400390625, + "learning_rate": 9.989328833834472e-06, + "loss": 35.5165, + "step": 59360 + }, + { + "epoch": 0.11993115624381355, + "grad_norm": 133.91326904296875, + "learning_rate": 9.989306028152825e-06, + "loss": 30.8306, + "step": 59370 + }, + { + "epoch": 0.11995135687649737, + "grad_norm": 473.4412536621094, + "learning_rate": 9.989283198153908e-06, + "loss": 26.01, + "step": 59380 + }, + { + "epoch": 0.11997155750918119, + "grad_norm": 402.10003662109375, + "learning_rate": 9.989260343837827e-06, + "loss": 35.8979, + "step": 59390 + }, + { + "epoch": 0.11999175814186501, + "grad_norm": 619.9006958007812, + "learning_rate": 9.989237465204698e-06, + "loss": 24.5981, + "step": 59400 + }, + { + "epoch": 0.12001195877454882, + "grad_norm": 230.40646362304688, + "learning_rate": 9.989214562254628e-06, + "loss": 20.0228, + "step": 59410 + }, + { + "epoch": 0.12003215940723264, + "grad_norm": 986.6119995117188, + "learning_rate": 9.989191634987734e-06, + "loss": 45.3377, + "step": 59420 + }, + { + "epoch": 0.12005236003991646, + "grad_norm": 708.2041015625, + "learning_rate": 9.989168683404125e-06, + "loss": 31.9151, + "step": 59430 + }, + { + "epoch": 0.12007256067260026, + "grad_norm": 770.5503540039062, + "learning_rate": 9.98914570750391e-06, + "loss": 31.5929, + "step": 59440 + }, + { + "epoch": 0.12009276130528408, + "grad_norm": 893.4546508789062, + "learning_rate": 9.98912270728721e-06, + "loss": 27.7154, + "step": 59450 + }, + { + "epoch": 0.1201129619379679, + "grad_norm": 554.7857666015625, + "learning_rate": 9.989099682754125e-06, + "loss": 31.6928, + "step": 59460 + }, + { + "epoch": 0.12013316257065171, + "grad_norm": 255.66104125976562, + "learning_rate": 9.989076633904775e-06, + "loss": 21.1733, + "step": 59470 + }, + { + "epoch": 0.12015336320333553, + "grad_norm": 572.1908569335938, + "learning_rate": 9.989053560739272e-06, + "loss": 30.5591, + "step": 59480 + }, + { + "epoch": 0.12017356383601935, + "grad_norm": 202.6723175048828, + "learning_rate": 9.989030463257726e-06, + "loss": 23.6414, + "step": 59490 + }, + { + "epoch": 0.12019376446870315, + "grad_norm": 424.7135009765625, + "learning_rate": 9.989007341460251e-06, + "loss": 19.5265, + "step": 59500 + }, + { + "epoch": 0.12021396510138697, + "grad_norm": 145.0359344482422, + "learning_rate": 9.98898419534696e-06, + "loss": 33.8042, + "step": 59510 + }, + { + "epoch": 0.1202341657340708, + "grad_norm": 337.57708740234375, + "learning_rate": 9.988961024917963e-06, + "loss": 19.0491, + "step": 59520 + }, + { + "epoch": 0.1202543663667546, + "grad_norm": 806.2337036132812, + "learning_rate": 9.988937830173376e-06, + "loss": 25.364, + "step": 59530 + }, + { + "epoch": 0.12027456699943842, + "grad_norm": 343.5975036621094, + "learning_rate": 9.988914611113311e-06, + "loss": 34.3596, + "step": 59540 + }, + { + "epoch": 0.12029476763212224, + "grad_norm": 233.7205810546875, + "learning_rate": 9.988891367737882e-06, + "loss": 23.9101, + "step": 59550 + }, + { + "epoch": 0.12031496826480606, + "grad_norm": 336.2087707519531, + "learning_rate": 9.988868100047203e-06, + "loss": 27.152, + "step": 59560 + }, + { + "epoch": 0.12033516889748987, + "grad_norm": 626.9401245117188, + "learning_rate": 9.988844808041382e-06, + "loss": 24.9398, + "step": 59570 + }, + { + "epoch": 0.12035536953017369, + "grad_norm": 83.66886901855469, + "learning_rate": 9.98882149172054e-06, + "loss": 15.7853, + "step": 59580 + }, + { + "epoch": 0.1203755701628575, + "grad_norm": 285.5963439941406, + "learning_rate": 9.988798151084783e-06, + "loss": 20.7018, + "step": 59590 + }, + { + "epoch": 0.12039577079554131, + "grad_norm": 113.7260971069336, + "learning_rate": 9.988774786134235e-06, + "loss": 27.5119, + "step": 59600 + }, + { + "epoch": 0.12041597142822513, + "grad_norm": 674.3125, + "learning_rate": 9.988751396869e-06, + "loss": 21.0328, + "step": 59610 + }, + { + "epoch": 0.12043617206090895, + "grad_norm": 297.72686767578125, + "learning_rate": 9.988727983289195e-06, + "loss": 15.3047, + "step": 59620 + }, + { + "epoch": 0.12045637269359276, + "grad_norm": 1073.702880859375, + "learning_rate": 9.988704545394936e-06, + "loss": 36.0673, + "step": 59630 + }, + { + "epoch": 0.12047657332627658, + "grad_norm": 434.8298034667969, + "learning_rate": 9.988681083186336e-06, + "loss": 22.7055, + "step": 59640 + }, + { + "epoch": 0.1204967739589604, + "grad_norm": 218.23529052734375, + "learning_rate": 9.988657596663509e-06, + "loss": 30.0182, + "step": 59650 + }, + { + "epoch": 0.1205169745916442, + "grad_norm": 245.4269561767578, + "learning_rate": 9.988634085826571e-06, + "loss": 22.8707, + "step": 59660 + }, + { + "epoch": 0.12053717522432802, + "grad_norm": 857.5812377929688, + "learning_rate": 9.988610550675635e-06, + "loss": 22.2752, + "step": 59670 + }, + { + "epoch": 0.12055737585701184, + "grad_norm": 861.4116821289062, + "learning_rate": 9.988586991210816e-06, + "loss": 26.9759, + "step": 59680 + }, + { + "epoch": 0.12057757648969565, + "grad_norm": 266.3482666015625, + "learning_rate": 9.98856340743223e-06, + "loss": 22.2715, + "step": 59690 + }, + { + "epoch": 0.12059777712237947, + "grad_norm": 893.9590454101562, + "learning_rate": 9.988539799339989e-06, + "loss": 19.9834, + "step": 59700 + }, + { + "epoch": 0.12061797775506329, + "grad_norm": 336.1539306640625, + "learning_rate": 9.988516166934212e-06, + "loss": 21.4351, + "step": 59710 + }, + { + "epoch": 0.12063817838774711, + "grad_norm": 277.47705078125, + "learning_rate": 9.988492510215011e-06, + "loss": 12.952, + "step": 59720 + }, + { + "epoch": 0.12065837902043092, + "grad_norm": 474.82122802734375, + "learning_rate": 9.988468829182504e-06, + "loss": 17.7442, + "step": 59730 + }, + { + "epoch": 0.12067857965311474, + "grad_norm": 399.83392333984375, + "learning_rate": 9.988445123836804e-06, + "loss": 24.6492, + "step": 59740 + }, + { + "epoch": 0.12069878028579856, + "grad_norm": 361.17340087890625, + "learning_rate": 9.988421394178027e-06, + "loss": 23.9565, + "step": 59750 + }, + { + "epoch": 0.12071898091848236, + "grad_norm": 385.287109375, + "learning_rate": 9.98839764020629e-06, + "loss": 19.6916, + "step": 59760 + }, + { + "epoch": 0.12073918155116618, + "grad_norm": 229.69644165039062, + "learning_rate": 9.988373861921708e-06, + "loss": 18.5205, + "step": 59770 + }, + { + "epoch": 0.12075938218385, + "grad_norm": 1031.325927734375, + "learning_rate": 9.988350059324396e-06, + "loss": 39.4992, + "step": 59780 + }, + { + "epoch": 0.12077958281653381, + "grad_norm": 408.28875732421875, + "learning_rate": 9.988326232414472e-06, + "loss": 32.7715, + "step": 59790 + }, + { + "epoch": 0.12079978344921763, + "grad_norm": 407.96453857421875, + "learning_rate": 9.98830238119205e-06, + "loss": 18.9229, + "step": 59800 + }, + { + "epoch": 0.12081998408190145, + "grad_norm": 478.8912048339844, + "learning_rate": 9.988278505657247e-06, + "loss": 35.969, + "step": 59810 + }, + { + "epoch": 0.12084018471458526, + "grad_norm": 551.2542114257812, + "learning_rate": 9.98825460581018e-06, + "loss": 30.9293, + "step": 59820 + }, + { + "epoch": 0.12086038534726908, + "grad_norm": 959.1485595703125, + "learning_rate": 9.988230681650964e-06, + "loss": 41.9438, + "step": 59830 + }, + { + "epoch": 0.1208805859799529, + "grad_norm": 390.9014587402344, + "learning_rate": 9.988206733179718e-06, + "loss": 20.6948, + "step": 59840 + }, + { + "epoch": 0.1209007866126367, + "grad_norm": 562.6345825195312, + "learning_rate": 9.988182760396557e-06, + "loss": 71.4276, + "step": 59850 + }, + { + "epoch": 0.12092098724532052, + "grad_norm": 431.55218505859375, + "learning_rate": 9.988158763301598e-06, + "loss": 31.5547, + "step": 59860 + }, + { + "epoch": 0.12094118787800434, + "grad_norm": 429.47686767578125, + "learning_rate": 9.988134741894959e-06, + "loss": 25.3229, + "step": 59870 + }, + { + "epoch": 0.12096138851068816, + "grad_norm": 431.9924011230469, + "learning_rate": 9.988110696176756e-06, + "loss": 27.9693, + "step": 59880 + }, + { + "epoch": 0.12098158914337197, + "grad_norm": 1132.8892822265625, + "learning_rate": 9.988086626147107e-06, + "loss": 50.1092, + "step": 59890 + }, + { + "epoch": 0.12100178977605579, + "grad_norm": 326.2806396484375, + "learning_rate": 9.988062531806127e-06, + "loss": 24.4289, + "step": 59900 + }, + { + "epoch": 0.12102199040873961, + "grad_norm": 238.85772705078125, + "learning_rate": 9.988038413153936e-06, + "loss": 34.2657, + "step": 59910 + }, + { + "epoch": 0.12104219104142341, + "grad_norm": 1077.6678466796875, + "learning_rate": 9.988014270190652e-06, + "loss": 31.9235, + "step": 59920 + }, + { + "epoch": 0.12106239167410723, + "grad_norm": 399.2193298339844, + "learning_rate": 9.98799010291639e-06, + "loss": 32.658, + "step": 59930 + }, + { + "epoch": 0.12108259230679105, + "grad_norm": 67.97364044189453, + "learning_rate": 9.987965911331268e-06, + "loss": 22.4463, + "step": 59940 + }, + { + "epoch": 0.12110279293947486, + "grad_norm": 682.7759399414062, + "learning_rate": 9.987941695435409e-06, + "loss": 25.9808, + "step": 59950 + }, + { + "epoch": 0.12112299357215868, + "grad_norm": 673.4269409179688, + "learning_rate": 9.987917455228924e-06, + "loss": 26.4, + "step": 59960 + }, + { + "epoch": 0.1211431942048425, + "grad_norm": 634.286865234375, + "learning_rate": 9.987893190711935e-06, + "loss": 13.4664, + "step": 59970 + }, + { + "epoch": 0.1211633948375263, + "grad_norm": 449.27093505859375, + "learning_rate": 9.987868901884558e-06, + "loss": 31.3343, + "step": 59980 + }, + { + "epoch": 0.12118359547021013, + "grad_norm": 37.370670318603516, + "learning_rate": 9.987844588746916e-06, + "loss": 27.4201, + "step": 59990 + }, + { + "epoch": 0.12120379610289395, + "grad_norm": 800.6913452148438, + "learning_rate": 9.987820251299121e-06, + "loss": 45.0441, + "step": 60000 + }, + { + "epoch": 0.12122399673557775, + "grad_norm": 472.5890197753906, + "learning_rate": 9.987795889541298e-06, + "loss": 19.9383, + "step": 60010 + }, + { + "epoch": 0.12124419736826157, + "grad_norm": 215.47775268554688, + "learning_rate": 9.987771503473562e-06, + "loss": 25.8273, + "step": 60020 + }, + { + "epoch": 0.12126439800094539, + "grad_norm": 548.9321899414062, + "learning_rate": 9.987747093096032e-06, + "loss": 32.3031, + "step": 60030 + }, + { + "epoch": 0.12128459863362921, + "grad_norm": 481.9915771484375, + "learning_rate": 9.987722658408828e-06, + "loss": 30.0601, + "step": 60040 + }, + { + "epoch": 0.12130479926631302, + "grad_norm": 268.7188415527344, + "learning_rate": 9.98769819941207e-06, + "loss": 17.8035, + "step": 60050 + }, + { + "epoch": 0.12132499989899684, + "grad_norm": 696.6322021484375, + "learning_rate": 9.987673716105874e-06, + "loss": 40.5163, + "step": 60060 + }, + { + "epoch": 0.12134520053168066, + "grad_norm": 440.85699462890625, + "learning_rate": 9.987649208490361e-06, + "loss": 27.2829, + "step": 60070 + }, + { + "epoch": 0.12136540116436446, + "grad_norm": 438.387451171875, + "learning_rate": 9.987624676565652e-06, + "loss": 24.1422, + "step": 60080 + }, + { + "epoch": 0.12138560179704828, + "grad_norm": 432.260009765625, + "learning_rate": 9.987600120331864e-06, + "loss": 33.1364, + "step": 60090 + }, + { + "epoch": 0.1214058024297321, + "grad_norm": 470.2272033691406, + "learning_rate": 9.987575539789119e-06, + "loss": 32.1774, + "step": 60100 + }, + { + "epoch": 0.12142600306241591, + "grad_norm": 510.7843322753906, + "learning_rate": 9.987550934937536e-06, + "loss": 22.3902, + "step": 60110 + }, + { + "epoch": 0.12144620369509973, + "grad_norm": 294.6884765625, + "learning_rate": 9.987526305777234e-06, + "loss": 45.5376, + "step": 60120 + }, + { + "epoch": 0.12146640432778355, + "grad_norm": 465.04290771484375, + "learning_rate": 9.987501652308333e-06, + "loss": 23.2083, + "step": 60130 + }, + { + "epoch": 0.12148660496046736, + "grad_norm": 308.0381774902344, + "learning_rate": 9.987476974530957e-06, + "loss": 37.3375, + "step": 60140 + }, + { + "epoch": 0.12150680559315118, + "grad_norm": 290.26446533203125, + "learning_rate": 9.98745227244522e-06, + "loss": 24.6941, + "step": 60150 + }, + { + "epoch": 0.121527006225835, + "grad_norm": 9.64521598815918, + "learning_rate": 9.987427546051246e-06, + "loss": 46.0543, + "step": 60160 + }, + { + "epoch": 0.1215472068585188, + "grad_norm": 141.52389526367188, + "learning_rate": 9.987402795349154e-06, + "loss": 18.1899, + "step": 60170 + }, + { + "epoch": 0.12156740749120262, + "grad_norm": 1335.341552734375, + "learning_rate": 9.987378020339069e-06, + "loss": 33.4883, + "step": 60180 + }, + { + "epoch": 0.12158760812388644, + "grad_norm": 771.3448486328125, + "learning_rate": 9.987353221021106e-06, + "loss": 21.3794, + "step": 60190 + }, + { + "epoch": 0.12160780875657026, + "grad_norm": 264.4566345214844, + "learning_rate": 9.987328397395389e-06, + "loss": 15.827, + "step": 60200 + }, + { + "epoch": 0.12162800938925407, + "grad_norm": 343.1784362792969, + "learning_rate": 9.987303549462038e-06, + "loss": 26.9193, + "step": 60210 + }, + { + "epoch": 0.12164821002193789, + "grad_norm": 879.5808715820312, + "learning_rate": 9.987278677221174e-06, + "loss": 19.3837, + "step": 60220 + }, + { + "epoch": 0.12166841065462171, + "grad_norm": 271.2762451171875, + "learning_rate": 9.987253780672918e-06, + "loss": 25.6372, + "step": 60230 + }, + { + "epoch": 0.12168861128730551, + "grad_norm": 431.32720947265625, + "learning_rate": 9.987228859817395e-06, + "loss": 38.1591, + "step": 60240 + }, + { + "epoch": 0.12170881191998933, + "grad_norm": 414.6575012207031, + "learning_rate": 9.987203914654721e-06, + "loss": 24.6773, + "step": 60250 + }, + { + "epoch": 0.12172901255267315, + "grad_norm": 0.0, + "learning_rate": 9.987178945185019e-06, + "loss": 18.7729, + "step": 60260 + }, + { + "epoch": 0.12174921318535696, + "grad_norm": 275.6715393066406, + "learning_rate": 9.987153951408414e-06, + "loss": 26.2396, + "step": 60270 + }, + { + "epoch": 0.12176941381804078, + "grad_norm": 370.8847961425781, + "learning_rate": 9.987128933325025e-06, + "loss": 20.9022, + "step": 60280 + }, + { + "epoch": 0.1217896144507246, + "grad_norm": 157.69837951660156, + "learning_rate": 9.987103890934974e-06, + "loss": 25.6948, + "step": 60290 + }, + { + "epoch": 0.12180981508340841, + "grad_norm": 628.7579956054688, + "learning_rate": 9.987078824238384e-06, + "loss": 28.3776, + "step": 60300 + }, + { + "epoch": 0.12183001571609223, + "grad_norm": 1117.4095458984375, + "learning_rate": 9.987053733235376e-06, + "loss": 37.893, + "step": 60310 + }, + { + "epoch": 0.12185021634877605, + "grad_norm": 498.21563720703125, + "learning_rate": 9.987028617926074e-06, + "loss": 34.8712, + "step": 60320 + }, + { + "epoch": 0.12187041698145985, + "grad_norm": 13.446024894714355, + "learning_rate": 9.987003478310597e-06, + "loss": 36.502, + "step": 60330 + }, + { + "epoch": 0.12189061761414367, + "grad_norm": 266.8991394042969, + "learning_rate": 9.986978314389071e-06, + "loss": 22.1401, + "step": 60340 + }, + { + "epoch": 0.1219108182468275, + "grad_norm": 373.22625732421875, + "learning_rate": 9.98695312616162e-06, + "loss": 20.9709, + "step": 60350 + }, + { + "epoch": 0.12193101887951131, + "grad_norm": 419.4221496582031, + "learning_rate": 9.986927913628361e-06, + "loss": 27.2048, + "step": 60360 + }, + { + "epoch": 0.12195121951219512, + "grad_norm": 540.9066772460938, + "learning_rate": 9.986902676789421e-06, + "loss": 22.5147, + "step": 60370 + }, + { + "epoch": 0.12197142014487894, + "grad_norm": 384.9314880371094, + "learning_rate": 9.986877415644925e-06, + "loss": 38.0179, + "step": 60380 + }, + { + "epoch": 0.12199162077756276, + "grad_norm": 708.5804443359375, + "learning_rate": 9.98685213019499e-06, + "loss": 21.5257, + "step": 60390 + }, + { + "epoch": 0.12201182141024657, + "grad_norm": 278.7950439453125, + "learning_rate": 9.986826820439743e-06, + "loss": 20.9915, + "step": 60400 + }, + { + "epoch": 0.12203202204293039, + "grad_norm": 258.7886962890625, + "learning_rate": 9.986801486379307e-06, + "loss": 22.9166, + "step": 60410 + }, + { + "epoch": 0.1220522226756142, + "grad_norm": 470.2723083496094, + "learning_rate": 9.986776128013807e-06, + "loss": 18.895, + "step": 60420 + }, + { + "epoch": 0.12207242330829801, + "grad_norm": 332.79156494140625, + "learning_rate": 9.986750745343363e-06, + "loss": 22.5629, + "step": 60430 + }, + { + "epoch": 0.12209262394098183, + "grad_norm": 585.84814453125, + "learning_rate": 9.986725338368103e-06, + "loss": 21.4241, + "step": 60440 + }, + { + "epoch": 0.12211282457366565, + "grad_norm": 286.4283752441406, + "learning_rate": 9.986699907088147e-06, + "loss": 16.1121, + "step": 60450 + }, + { + "epoch": 0.12213302520634946, + "grad_norm": 5481.30712890625, + "learning_rate": 9.986674451503619e-06, + "loss": 51.051, + "step": 60460 + }, + { + "epoch": 0.12215322583903328, + "grad_norm": 128.4597625732422, + "learning_rate": 9.986648971614646e-06, + "loss": 22.5577, + "step": 60470 + }, + { + "epoch": 0.1221734264717171, + "grad_norm": 968.1907348632812, + "learning_rate": 9.98662346742135e-06, + "loss": 17.0717, + "step": 60480 + }, + { + "epoch": 0.1221936271044009, + "grad_norm": 1540.86474609375, + "learning_rate": 9.986597938923859e-06, + "loss": 22.1102, + "step": 60490 + }, + { + "epoch": 0.12221382773708472, + "grad_norm": 95.4626693725586, + "learning_rate": 9.98657238612229e-06, + "loss": 12.8485, + "step": 60500 + }, + { + "epoch": 0.12223402836976854, + "grad_norm": 428.83221435546875, + "learning_rate": 9.986546809016775e-06, + "loss": 28.4717, + "step": 60510 + }, + { + "epoch": 0.12225422900245236, + "grad_norm": 233.34690856933594, + "learning_rate": 9.986521207607436e-06, + "loss": 29.036, + "step": 60520 + }, + { + "epoch": 0.12227442963513617, + "grad_norm": 314.431396484375, + "learning_rate": 9.986495581894396e-06, + "loss": 17.1531, + "step": 60530 + }, + { + "epoch": 0.12229463026781999, + "grad_norm": 357.11761474609375, + "learning_rate": 9.986469931877781e-06, + "loss": 14.6935, + "step": 60540 + }, + { + "epoch": 0.12231483090050381, + "grad_norm": 377.8204650878906, + "learning_rate": 9.986444257557717e-06, + "loss": 16.1743, + "step": 60550 + }, + { + "epoch": 0.12233503153318762, + "grad_norm": 251.5559539794922, + "learning_rate": 9.986418558934329e-06, + "loss": 18.553, + "step": 60560 + }, + { + "epoch": 0.12235523216587144, + "grad_norm": 278.3482360839844, + "learning_rate": 9.98639283600774e-06, + "loss": 26.0725, + "step": 60570 + }, + { + "epoch": 0.12237543279855526, + "grad_norm": 127.29243469238281, + "learning_rate": 9.98636708877808e-06, + "loss": 15.9861, + "step": 60580 + }, + { + "epoch": 0.12239563343123906, + "grad_norm": 461.56890869140625, + "learning_rate": 9.986341317245469e-06, + "loss": 21.9128, + "step": 60590 + }, + { + "epoch": 0.12241583406392288, + "grad_norm": 438.2317199707031, + "learning_rate": 9.986315521410035e-06, + "loss": 22.8366, + "step": 60600 + }, + { + "epoch": 0.1224360346966067, + "grad_norm": 911.2008056640625, + "learning_rate": 9.986289701271905e-06, + "loss": 41.1762, + "step": 60610 + }, + { + "epoch": 0.12245623532929051, + "grad_norm": 937.158203125, + "learning_rate": 9.986263856831204e-06, + "loss": 39.3684, + "step": 60620 + }, + { + "epoch": 0.12247643596197433, + "grad_norm": 120.9735107421875, + "learning_rate": 9.986237988088059e-06, + "loss": 22.2186, + "step": 60630 + }, + { + "epoch": 0.12249663659465815, + "grad_norm": 405.4740295410156, + "learning_rate": 9.986212095042593e-06, + "loss": 27.3225, + "step": 60640 + }, + { + "epoch": 0.12251683722734195, + "grad_norm": 889.656494140625, + "learning_rate": 9.986186177694935e-06, + "loss": 35.7576, + "step": 60650 + }, + { + "epoch": 0.12253703786002577, + "grad_norm": 352.78173828125, + "learning_rate": 9.986160236045207e-06, + "loss": 25.7411, + "step": 60660 + }, + { + "epoch": 0.1225572384927096, + "grad_norm": 382.47137451171875, + "learning_rate": 9.986134270093542e-06, + "loss": 33.1577, + "step": 60670 + }, + { + "epoch": 0.12257743912539341, + "grad_norm": 947.0201416015625, + "learning_rate": 9.986108279840063e-06, + "loss": 28.6087, + "step": 60680 + }, + { + "epoch": 0.12259763975807722, + "grad_norm": 1204.2515869140625, + "learning_rate": 9.986082265284896e-06, + "loss": 25.0962, + "step": 60690 + }, + { + "epoch": 0.12261784039076104, + "grad_norm": 963.8109741210938, + "learning_rate": 9.98605622642817e-06, + "loss": 20.0314, + "step": 60700 + }, + { + "epoch": 0.12263804102344486, + "grad_norm": 320.7643127441406, + "learning_rate": 9.986030163270011e-06, + "loss": 30.8012, + "step": 60710 + }, + { + "epoch": 0.12265824165612867, + "grad_norm": 361.7972412109375, + "learning_rate": 9.986004075810543e-06, + "loss": 40.871, + "step": 60720 + }, + { + "epoch": 0.12267844228881249, + "grad_norm": 666.5037231445312, + "learning_rate": 9.985977964049898e-06, + "loss": 38.1172, + "step": 60730 + }, + { + "epoch": 0.1226986429214963, + "grad_norm": 725.2588500976562, + "learning_rate": 9.9859518279882e-06, + "loss": 32.248, + "step": 60740 + }, + { + "epoch": 0.12271884355418011, + "grad_norm": 773.1401977539062, + "learning_rate": 9.985925667625581e-06, + "loss": 24.3105, + "step": 60750 + }, + { + "epoch": 0.12273904418686393, + "grad_norm": 326.5883483886719, + "learning_rate": 9.98589948296216e-06, + "loss": 18.8559, + "step": 60760 + }, + { + "epoch": 0.12275924481954775, + "grad_norm": 1008.8342895507812, + "learning_rate": 9.985873273998072e-06, + "loss": 28.795, + "step": 60770 + }, + { + "epoch": 0.12277944545223156, + "grad_norm": 273.88427734375, + "learning_rate": 9.985847040733442e-06, + "loss": 20.6629, + "step": 60780 + }, + { + "epoch": 0.12279964608491538, + "grad_norm": 200.18011474609375, + "learning_rate": 9.9858207831684e-06, + "loss": 34.8831, + "step": 60790 + }, + { + "epoch": 0.1228198467175992, + "grad_norm": 386.1703796386719, + "learning_rate": 9.98579450130307e-06, + "loss": 23.9608, + "step": 60800 + }, + { + "epoch": 0.122840047350283, + "grad_norm": 409.2703552246094, + "learning_rate": 9.985768195137585e-06, + "loss": 24.2317, + "step": 60810 + }, + { + "epoch": 0.12286024798296682, + "grad_norm": 231.22320556640625, + "learning_rate": 9.985741864672067e-06, + "loss": 24.0767, + "step": 60820 + }, + { + "epoch": 0.12288044861565064, + "grad_norm": 154.06040954589844, + "learning_rate": 9.985715509906649e-06, + "loss": 19.945, + "step": 60830 + }, + { + "epoch": 0.12290064924833445, + "grad_norm": 389.05035400390625, + "learning_rate": 9.985689130841459e-06, + "loss": 12.9584, + "step": 60840 + }, + { + "epoch": 0.12292084988101827, + "grad_norm": 656.5178833007812, + "learning_rate": 9.985662727476625e-06, + "loss": 27.0256, + "step": 60850 + }, + { + "epoch": 0.12294105051370209, + "grad_norm": 851.6620483398438, + "learning_rate": 9.985636299812275e-06, + "loss": 16.2623, + "step": 60860 + }, + { + "epoch": 0.12296125114638591, + "grad_norm": 497.35540771484375, + "learning_rate": 9.98560984784854e-06, + "loss": 31.88, + "step": 60870 + }, + { + "epoch": 0.12298145177906972, + "grad_norm": 720.6923828125, + "learning_rate": 9.985583371585544e-06, + "loss": 22.5373, + "step": 60880 + }, + { + "epoch": 0.12300165241175354, + "grad_norm": 2.283111095428467, + "learning_rate": 9.98555687102342e-06, + "loss": 12.7178, + "step": 60890 + }, + { + "epoch": 0.12302185304443736, + "grad_norm": 215.31703186035156, + "learning_rate": 9.9855303461623e-06, + "loss": 15.6389, + "step": 60900 + }, + { + "epoch": 0.12304205367712116, + "grad_norm": 410.4704284667969, + "learning_rate": 9.985503797002307e-06, + "loss": 22.4629, + "step": 60910 + }, + { + "epoch": 0.12306225430980498, + "grad_norm": 411.4508972167969, + "learning_rate": 9.985477223543574e-06, + "loss": 25.9603, + "step": 60920 + }, + { + "epoch": 0.1230824549424888, + "grad_norm": 344.9388732910156, + "learning_rate": 9.985450625786228e-06, + "loss": 35.1812, + "step": 60930 + }, + { + "epoch": 0.12310265557517261, + "grad_norm": 679.7549438476562, + "learning_rate": 9.985424003730403e-06, + "loss": 21.9696, + "step": 60940 + }, + { + "epoch": 0.12312285620785643, + "grad_norm": 101.80624389648438, + "learning_rate": 9.985397357376224e-06, + "loss": 25.8398, + "step": 60950 + }, + { + "epoch": 0.12314305684054025, + "grad_norm": 431.5050354003906, + "learning_rate": 9.985370686723823e-06, + "loss": 35.3801, + "step": 60960 + }, + { + "epoch": 0.12316325747322406, + "grad_norm": 225.48330688476562, + "learning_rate": 9.985343991773331e-06, + "loss": 52.8736, + "step": 60970 + }, + { + "epoch": 0.12318345810590788, + "grad_norm": 608.2294921875, + "learning_rate": 9.985317272524876e-06, + "loss": 29.2429, + "step": 60980 + }, + { + "epoch": 0.1232036587385917, + "grad_norm": 345.0340881347656, + "learning_rate": 9.98529052897859e-06, + "loss": 40.7269, + "step": 60990 + }, + { + "epoch": 0.1232238593712755, + "grad_norm": 175.9382781982422, + "learning_rate": 9.985263761134602e-06, + "loss": 19.7594, + "step": 61000 + }, + { + "epoch": 0.12324406000395932, + "grad_norm": 617.882080078125, + "learning_rate": 9.985236968993044e-06, + "loss": 27.3067, + "step": 61010 + }, + { + "epoch": 0.12326426063664314, + "grad_norm": 990.5284423828125, + "learning_rate": 9.985210152554045e-06, + "loss": 41.4177, + "step": 61020 + }, + { + "epoch": 0.12328446126932696, + "grad_norm": 390.6434631347656, + "learning_rate": 9.985183311817736e-06, + "loss": 29.425, + "step": 61030 + }, + { + "epoch": 0.12330466190201077, + "grad_norm": 545.95703125, + "learning_rate": 9.985156446784249e-06, + "loss": 27.7012, + "step": 61040 + }, + { + "epoch": 0.12332486253469459, + "grad_norm": 392.1247863769531, + "learning_rate": 9.985129557453714e-06, + "loss": 13.2131, + "step": 61050 + }, + { + "epoch": 0.12334506316737841, + "grad_norm": 934.265625, + "learning_rate": 9.985102643826261e-06, + "loss": 29.2276, + "step": 61060 + }, + { + "epoch": 0.12336526380006221, + "grad_norm": 246.337890625, + "learning_rate": 9.985075705902024e-06, + "loss": 33.959, + "step": 61070 + }, + { + "epoch": 0.12338546443274603, + "grad_norm": 411.26983642578125, + "learning_rate": 9.985048743681131e-06, + "loss": 18.7968, + "step": 61080 + }, + { + "epoch": 0.12340566506542985, + "grad_norm": 231.3650665283203, + "learning_rate": 9.985021757163715e-06, + "loss": 37.9589, + "step": 61090 + }, + { + "epoch": 0.12342586569811366, + "grad_norm": 535.2852172851562, + "learning_rate": 9.98499474634991e-06, + "loss": 26.4339, + "step": 61100 + }, + { + "epoch": 0.12344606633079748, + "grad_norm": 128.99632263183594, + "learning_rate": 9.984967711239844e-06, + "loss": 19.9055, + "step": 61110 + }, + { + "epoch": 0.1234662669634813, + "grad_norm": 528.6619262695312, + "learning_rate": 9.984940651833648e-06, + "loss": 21.8287, + "step": 61120 + }, + { + "epoch": 0.1234864675961651, + "grad_norm": 272.9593505859375, + "learning_rate": 9.984913568131458e-06, + "loss": 28.5915, + "step": 61130 + }, + { + "epoch": 0.12350666822884893, + "grad_norm": 574.6172485351562, + "learning_rate": 9.984886460133403e-06, + "loss": 19.0841, + "step": 61140 + }, + { + "epoch": 0.12352686886153275, + "grad_norm": 263.6935729980469, + "learning_rate": 9.984859327839617e-06, + "loss": 26.3418, + "step": 61150 + }, + { + "epoch": 0.12354706949421655, + "grad_norm": 36.87807846069336, + "learning_rate": 9.98483217125023e-06, + "loss": 23.3879, + "step": 61160 + }, + { + "epoch": 0.12356727012690037, + "grad_norm": 109.2344741821289, + "learning_rate": 9.984804990365376e-06, + "loss": 13.9505, + "step": 61170 + }, + { + "epoch": 0.12358747075958419, + "grad_norm": 798.1170654296875, + "learning_rate": 9.984777785185188e-06, + "loss": 32.6037, + "step": 61180 + }, + { + "epoch": 0.12360767139226801, + "grad_norm": 117.23961639404297, + "learning_rate": 9.984750555709797e-06, + "loss": 24.9973, + "step": 61190 + }, + { + "epoch": 0.12362787202495182, + "grad_norm": 25.17947006225586, + "learning_rate": 9.984723301939337e-06, + "loss": 15.0977, + "step": 61200 + }, + { + "epoch": 0.12364807265763564, + "grad_norm": 751.7405395507812, + "learning_rate": 9.984696023873939e-06, + "loss": 24.9332, + "step": 61210 + }, + { + "epoch": 0.12366827329031946, + "grad_norm": 631.7305908203125, + "learning_rate": 9.984668721513737e-06, + "loss": 25.5583, + "step": 61220 + }, + { + "epoch": 0.12368847392300326, + "grad_norm": 189.65731811523438, + "learning_rate": 9.984641394858865e-06, + "loss": 31.7349, + "step": 61230 + }, + { + "epoch": 0.12370867455568708, + "grad_norm": 864.2401733398438, + "learning_rate": 9.984614043909455e-06, + "loss": 41.5382, + "step": 61240 + }, + { + "epoch": 0.1237288751883709, + "grad_norm": 474.04248046875, + "learning_rate": 9.984586668665641e-06, + "loss": 22.3956, + "step": 61250 + }, + { + "epoch": 0.12374907582105471, + "grad_norm": 389.1614685058594, + "learning_rate": 9.984559269127557e-06, + "loss": 37.3944, + "step": 61260 + }, + { + "epoch": 0.12376927645373853, + "grad_norm": 271.9932861328125, + "learning_rate": 9.984531845295333e-06, + "loss": 40.0991, + "step": 61270 + }, + { + "epoch": 0.12378947708642235, + "grad_norm": 236.9116668701172, + "learning_rate": 9.984504397169107e-06, + "loss": 30.0676, + "step": 61280 + }, + { + "epoch": 0.12380967771910616, + "grad_norm": 349.3979187011719, + "learning_rate": 9.984476924749011e-06, + "loss": 25.6765, + "step": 61290 + }, + { + "epoch": 0.12382987835178998, + "grad_norm": 796.3491821289062, + "learning_rate": 9.98444942803518e-06, + "loss": 30.176, + "step": 61300 + }, + { + "epoch": 0.1238500789844738, + "grad_norm": 377.94488525390625, + "learning_rate": 9.984421907027747e-06, + "loss": 17.7391, + "step": 61310 + }, + { + "epoch": 0.1238702796171576, + "grad_norm": 386.1169128417969, + "learning_rate": 9.984394361726844e-06, + "loss": 24.3696, + "step": 61320 + }, + { + "epoch": 0.12389048024984142, + "grad_norm": 349.8980712890625, + "learning_rate": 9.98436679213261e-06, + "loss": 15.9381, + "step": 61330 + }, + { + "epoch": 0.12391068088252524, + "grad_norm": 294.2398986816406, + "learning_rate": 9.984339198245175e-06, + "loss": 20.0689, + "step": 61340 + }, + { + "epoch": 0.12393088151520906, + "grad_norm": 570.2796630859375, + "learning_rate": 9.984311580064676e-06, + "loss": 19.928, + "step": 61350 + }, + { + "epoch": 0.12395108214789287, + "grad_norm": 382.2208557128906, + "learning_rate": 9.984283937591246e-06, + "loss": 21.401, + "step": 61360 + }, + { + "epoch": 0.12397128278057669, + "grad_norm": 213.10166931152344, + "learning_rate": 9.98425627082502e-06, + "loss": 39.4499, + "step": 61370 + }, + { + "epoch": 0.12399148341326051, + "grad_norm": 287.1283264160156, + "learning_rate": 9.984228579766136e-06, + "loss": 26.1791, + "step": 61380 + }, + { + "epoch": 0.12401168404594431, + "grad_norm": 476.8355712890625, + "learning_rate": 9.984200864414726e-06, + "loss": 17.3141, + "step": 61390 + }, + { + "epoch": 0.12403188467862813, + "grad_norm": 283.53021240234375, + "learning_rate": 9.984173124770924e-06, + "loss": 25.9638, + "step": 61400 + }, + { + "epoch": 0.12405208531131195, + "grad_norm": 553.5846557617188, + "learning_rate": 9.984145360834868e-06, + "loss": 17.2837, + "step": 61410 + }, + { + "epoch": 0.12407228594399576, + "grad_norm": 697.6364135742188, + "learning_rate": 9.984117572606691e-06, + "loss": 22.7716, + "step": 61420 + }, + { + "epoch": 0.12409248657667958, + "grad_norm": 444.0830078125, + "learning_rate": 9.984089760086531e-06, + "loss": 20.1147, + "step": 61430 + }, + { + "epoch": 0.1241126872093634, + "grad_norm": 849.0206298828125, + "learning_rate": 9.98406192327452e-06, + "loss": 21.7118, + "step": 61440 + }, + { + "epoch": 0.12413288784204721, + "grad_norm": 439.1556396484375, + "learning_rate": 9.984034062170796e-06, + "loss": 17.6189, + "step": 61450 + }, + { + "epoch": 0.12415308847473103, + "grad_norm": 125.74227142333984, + "learning_rate": 9.984006176775496e-06, + "loss": 10.3332, + "step": 61460 + }, + { + "epoch": 0.12417328910741485, + "grad_norm": 601.2088623046875, + "learning_rate": 9.983978267088753e-06, + "loss": 28.9244, + "step": 61470 + }, + { + "epoch": 0.12419348974009865, + "grad_norm": 554.8843994140625, + "learning_rate": 9.983950333110705e-06, + "loss": 35.1721, + "step": 61480 + }, + { + "epoch": 0.12421369037278247, + "grad_norm": 937.4196166992188, + "learning_rate": 9.983922374841488e-06, + "loss": 23.9458, + "step": 61490 + }, + { + "epoch": 0.1242338910054663, + "grad_norm": 1045.7427978515625, + "learning_rate": 9.983894392281237e-06, + "loss": 16.7227, + "step": 61500 + }, + { + "epoch": 0.12425409163815011, + "grad_norm": 490.57745361328125, + "learning_rate": 9.98386638543009e-06, + "loss": 21.1804, + "step": 61510 + }, + { + "epoch": 0.12427429227083392, + "grad_norm": 618.8065185546875, + "learning_rate": 9.983838354288181e-06, + "loss": 18.1042, + "step": 61520 + }, + { + "epoch": 0.12429449290351774, + "grad_norm": 363.8397216796875, + "learning_rate": 9.98381029885565e-06, + "loss": 16.5094, + "step": 61530 + }, + { + "epoch": 0.12431469353620156, + "grad_norm": 390.1875, + "learning_rate": 9.983782219132631e-06, + "loss": 30.3959, + "step": 61540 + }, + { + "epoch": 0.12433489416888537, + "grad_norm": 624.716552734375, + "learning_rate": 9.983754115119262e-06, + "loss": 18.5887, + "step": 61550 + }, + { + "epoch": 0.12435509480156919, + "grad_norm": 233.31919860839844, + "learning_rate": 9.983725986815682e-06, + "loss": 29.8712, + "step": 61560 + }, + { + "epoch": 0.124375295434253, + "grad_norm": 617.6629638671875, + "learning_rate": 9.983697834222024e-06, + "loss": 19.6594, + "step": 61570 + }, + { + "epoch": 0.12439549606693681, + "grad_norm": 1028.4312744140625, + "learning_rate": 9.983669657338425e-06, + "loss": 34.8874, + "step": 61580 + }, + { + "epoch": 0.12441569669962063, + "grad_norm": 407.6793212890625, + "learning_rate": 9.98364145616503e-06, + "loss": 28.1744, + "step": 61590 + }, + { + "epoch": 0.12443589733230445, + "grad_norm": 901.1602783203125, + "learning_rate": 9.983613230701967e-06, + "loss": 29.0466, + "step": 61600 + }, + { + "epoch": 0.12445609796498826, + "grad_norm": 1.035197138786316, + "learning_rate": 9.98358498094938e-06, + "loss": 9.143, + "step": 61610 + }, + { + "epoch": 0.12447629859767208, + "grad_norm": 651.5478515625, + "learning_rate": 9.983556706907401e-06, + "loss": 29.3895, + "step": 61620 + }, + { + "epoch": 0.1244964992303559, + "grad_norm": 241.96258544921875, + "learning_rate": 9.983528408576173e-06, + "loss": 22.3464, + "step": 61630 + }, + { + "epoch": 0.1245166998630397, + "grad_norm": 303.60888671875, + "learning_rate": 9.983500085955833e-06, + "loss": 20.8418, + "step": 61640 + }, + { + "epoch": 0.12453690049572352, + "grad_norm": 122.80511474609375, + "learning_rate": 9.983471739046515e-06, + "loss": 24.85, + "step": 61650 + }, + { + "epoch": 0.12455710112840734, + "grad_norm": 281.5797424316406, + "learning_rate": 9.983443367848363e-06, + "loss": 26.0335, + "step": 61660 + }, + { + "epoch": 0.12457730176109116, + "grad_norm": 522.9718627929688, + "learning_rate": 9.98341497236151e-06, + "loss": 31.9752, + "step": 61670 + }, + { + "epoch": 0.12459750239377497, + "grad_norm": 588.1405029296875, + "learning_rate": 9.9833865525861e-06, + "loss": 25.7646, + "step": 61680 + }, + { + "epoch": 0.12461770302645879, + "grad_norm": 466.3359069824219, + "learning_rate": 9.983358108522266e-06, + "loss": 16.9239, + "step": 61690 + }, + { + "epoch": 0.12463790365914261, + "grad_norm": 493.8813781738281, + "learning_rate": 9.98332964017015e-06, + "loss": 19.6411, + "step": 61700 + }, + { + "epoch": 0.12465810429182642, + "grad_norm": 640.7128295898438, + "learning_rate": 9.98330114752989e-06, + "loss": 33.6878, + "step": 61710 + }, + { + "epoch": 0.12467830492451024, + "grad_norm": 43.688926696777344, + "learning_rate": 9.983272630601624e-06, + "loss": 20.7413, + "step": 61720 + }, + { + "epoch": 0.12469850555719406, + "grad_norm": 632.6639404296875, + "learning_rate": 9.983244089385491e-06, + "loss": 29.1099, + "step": 61730 + }, + { + "epoch": 0.12471870618987786, + "grad_norm": 153.4970245361328, + "learning_rate": 9.98321552388163e-06, + "loss": 20.4657, + "step": 61740 + }, + { + "epoch": 0.12473890682256168, + "grad_norm": 3450.285888671875, + "learning_rate": 9.983186934090183e-06, + "loss": 25.8551, + "step": 61750 + }, + { + "epoch": 0.1247591074552455, + "grad_norm": 504.5535888671875, + "learning_rate": 9.983158320011288e-06, + "loss": 16.0905, + "step": 61760 + }, + { + "epoch": 0.12477930808792931, + "grad_norm": 757.3421630859375, + "learning_rate": 9.983129681645082e-06, + "loss": 41.4387, + "step": 61770 + }, + { + "epoch": 0.12479950872061313, + "grad_norm": 647.8482055664062, + "learning_rate": 9.983101018991706e-06, + "loss": 52.0292, + "step": 61780 + }, + { + "epoch": 0.12481970935329695, + "grad_norm": 446.3780517578125, + "learning_rate": 9.9830723320513e-06, + "loss": 18.6124, + "step": 61790 + }, + { + "epoch": 0.12483990998598075, + "grad_norm": 786.84619140625, + "learning_rate": 9.983043620824005e-06, + "loss": 18.3116, + "step": 61800 + }, + { + "epoch": 0.12486011061866457, + "grad_norm": 768.5186767578125, + "learning_rate": 9.983014885309959e-06, + "loss": 28.8062, + "step": 61810 + }, + { + "epoch": 0.1248803112513484, + "grad_norm": 104.13092803955078, + "learning_rate": 9.982986125509303e-06, + "loss": 23.444, + "step": 61820 + }, + { + "epoch": 0.12490051188403221, + "grad_norm": 358.8445739746094, + "learning_rate": 9.982957341422177e-06, + "loss": 20.2794, + "step": 61830 + }, + { + "epoch": 0.12492071251671602, + "grad_norm": 301.2618103027344, + "learning_rate": 9.982928533048722e-06, + "loss": 23.7256, + "step": 61840 + }, + { + "epoch": 0.12494091314939984, + "grad_norm": 330.6232604980469, + "learning_rate": 9.982899700389077e-06, + "loss": 23.1953, + "step": 61850 + }, + { + "epoch": 0.12496111378208366, + "grad_norm": 403.6541442871094, + "learning_rate": 9.982870843443381e-06, + "loss": 18.7548, + "step": 61860 + }, + { + "epoch": 0.12498131441476747, + "grad_norm": 914.1959838867188, + "learning_rate": 9.98284196221178e-06, + "loss": 52.2723, + "step": 61870 + }, + { + "epoch": 0.12500151504745127, + "grad_norm": 83.53597259521484, + "learning_rate": 9.982813056694411e-06, + "loss": 22.2355, + "step": 61880 + }, + { + "epoch": 0.1250217156801351, + "grad_norm": 223.97230529785156, + "learning_rate": 9.982784126891416e-06, + "loss": 25.4685, + "step": 61890 + }, + { + "epoch": 0.1250419163128189, + "grad_norm": 347.29119873046875, + "learning_rate": 9.982755172802933e-06, + "loss": 16.4986, + "step": 61900 + }, + { + "epoch": 0.12506211694550273, + "grad_norm": 619.5742797851562, + "learning_rate": 9.98272619442911e-06, + "loss": 80.9853, + "step": 61910 + }, + { + "epoch": 0.12508231757818655, + "grad_norm": 507.7166442871094, + "learning_rate": 9.982697191770079e-06, + "loss": 39.1105, + "step": 61920 + }, + { + "epoch": 0.12510251821087037, + "grad_norm": 338.95648193359375, + "learning_rate": 9.982668164825989e-06, + "loss": 36.0701, + "step": 61930 + }, + { + "epoch": 0.1251227188435542, + "grad_norm": 301.35870361328125, + "learning_rate": 9.982639113596978e-06, + "loss": 18.2, + "step": 61940 + }, + { + "epoch": 0.12514291947623798, + "grad_norm": 194.94728088378906, + "learning_rate": 9.982610038083188e-06, + "loss": 20.8123, + "step": 61950 + }, + { + "epoch": 0.1251631201089218, + "grad_norm": 499.0058288574219, + "learning_rate": 9.98258093828476e-06, + "loss": 28.3766, + "step": 61960 + }, + { + "epoch": 0.12518332074160562, + "grad_norm": 532.4949951171875, + "learning_rate": 9.98255181420184e-06, + "loss": 22.134, + "step": 61970 + }, + { + "epoch": 0.12520352137428944, + "grad_norm": 259.8731689453125, + "learning_rate": 9.982522665834565e-06, + "loss": 27.6904, + "step": 61980 + }, + { + "epoch": 0.12522372200697326, + "grad_norm": 523.75927734375, + "learning_rate": 9.982493493183079e-06, + "loss": 33.2929, + "step": 61990 + }, + { + "epoch": 0.12524392263965708, + "grad_norm": 442.4575500488281, + "learning_rate": 9.982464296247523e-06, + "loss": 22.7982, + "step": 62000 + }, + { + "epoch": 0.12526412327234088, + "grad_norm": 357.35003662109375, + "learning_rate": 9.98243507502804e-06, + "loss": 25.7578, + "step": 62010 + }, + { + "epoch": 0.1252843239050247, + "grad_norm": 140.2523651123047, + "learning_rate": 9.982405829524774e-06, + "loss": 15.0906, + "step": 62020 + }, + { + "epoch": 0.12530452453770852, + "grad_norm": 661.8172607421875, + "learning_rate": 9.982376559737866e-06, + "loss": 22.1402, + "step": 62030 + }, + { + "epoch": 0.12532472517039234, + "grad_norm": 529.1638793945312, + "learning_rate": 9.982347265667459e-06, + "loss": 41.1321, + "step": 62040 + }, + { + "epoch": 0.12534492580307616, + "grad_norm": 210.56182861328125, + "learning_rate": 9.982317947313695e-06, + "loss": 21.2127, + "step": 62050 + }, + { + "epoch": 0.12536512643575998, + "grad_norm": 278.6200866699219, + "learning_rate": 9.982288604676719e-06, + "loss": 30.7348, + "step": 62060 + }, + { + "epoch": 0.1253853270684438, + "grad_norm": 654.5919799804688, + "learning_rate": 9.982259237756674e-06, + "loss": 20.0652, + "step": 62070 + }, + { + "epoch": 0.1254055277011276, + "grad_norm": 1227.6817626953125, + "learning_rate": 9.982229846553698e-06, + "loss": 49.6078, + "step": 62080 + }, + { + "epoch": 0.1254257283338114, + "grad_norm": 528.229736328125, + "learning_rate": 9.982200431067939e-06, + "loss": 17.7291, + "step": 62090 + }, + { + "epoch": 0.12544592896649523, + "grad_norm": 511.2461242675781, + "learning_rate": 9.98217099129954e-06, + "loss": 23.0345, + "step": 62100 + }, + { + "epoch": 0.12546612959917905, + "grad_norm": 539.4857177734375, + "learning_rate": 9.982141527248646e-06, + "loss": 17.4074, + "step": 62110 + }, + { + "epoch": 0.12548633023186287, + "grad_norm": 269.706787109375, + "learning_rate": 9.982112038915394e-06, + "loss": 32.9321, + "step": 62120 + }, + { + "epoch": 0.1255065308645467, + "grad_norm": 617.9381713867188, + "learning_rate": 9.982082526299935e-06, + "loss": 20.269, + "step": 62130 + }, + { + "epoch": 0.12552673149723048, + "grad_norm": 715.280517578125, + "learning_rate": 9.98205298940241e-06, + "loss": 24.2262, + "step": 62140 + }, + { + "epoch": 0.1255469321299143, + "grad_norm": 23.387699127197266, + "learning_rate": 9.982023428222963e-06, + "loss": 17.8983, + "step": 62150 + }, + { + "epoch": 0.12556713276259812, + "grad_norm": 752.984130859375, + "learning_rate": 9.981993842761737e-06, + "loss": 35.768, + "step": 62160 + }, + { + "epoch": 0.12558733339528194, + "grad_norm": 755.310302734375, + "learning_rate": 9.981964233018877e-06, + "loss": 37.8018, + "step": 62170 + }, + { + "epoch": 0.12560753402796576, + "grad_norm": 482.14447021484375, + "learning_rate": 9.981934598994529e-06, + "loss": 31.3451, + "step": 62180 + }, + { + "epoch": 0.12562773466064958, + "grad_norm": 426.26519775390625, + "learning_rate": 9.981904940688836e-06, + "loss": 14.7094, + "step": 62190 + }, + { + "epoch": 0.12564793529333337, + "grad_norm": 317.7471923828125, + "learning_rate": 9.981875258101944e-06, + "loss": 45.4545, + "step": 62200 + }, + { + "epoch": 0.1256681359260172, + "grad_norm": 453.5595397949219, + "learning_rate": 9.981845551233993e-06, + "loss": 26.3621, + "step": 62210 + }, + { + "epoch": 0.125688336558701, + "grad_norm": 694.0244140625, + "learning_rate": 9.981815820085132e-06, + "loss": 20.3197, + "step": 62220 + }, + { + "epoch": 0.12570853719138483, + "grad_norm": 382.5671691894531, + "learning_rate": 9.981786064655505e-06, + "loss": 24.1285, + "step": 62230 + }, + { + "epoch": 0.12572873782406865, + "grad_norm": 571.6986694335938, + "learning_rate": 9.981756284945256e-06, + "loss": 22.9406, + "step": 62240 + }, + { + "epoch": 0.12574893845675247, + "grad_norm": 1450.048583984375, + "learning_rate": 9.981726480954532e-06, + "loss": 26.0896, + "step": 62250 + }, + { + "epoch": 0.1257691390894363, + "grad_norm": 6.673811912536621, + "learning_rate": 9.981696652683479e-06, + "loss": 26.3327, + "step": 62260 + }, + { + "epoch": 0.12578933972212009, + "grad_norm": 528.0861206054688, + "learning_rate": 9.98166680013224e-06, + "loss": 27.3872, + "step": 62270 + }, + { + "epoch": 0.1258095403548039, + "grad_norm": 413.5461120605469, + "learning_rate": 9.981636923300959e-06, + "loss": 23.6593, + "step": 62280 + }, + { + "epoch": 0.12582974098748773, + "grad_norm": 643.4320068359375, + "learning_rate": 9.981607022189785e-06, + "loss": 21.158, + "step": 62290 + }, + { + "epoch": 0.12584994162017155, + "grad_norm": 147.05723571777344, + "learning_rate": 9.981577096798864e-06, + "loss": 25.1463, + "step": 62300 + }, + { + "epoch": 0.12587014225285537, + "grad_norm": 261.19879150390625, + "learning_rate": 9.981547147128338e-06, + "loss": 29.367, + "step": 62310 + }, + { + "epoch": 0.12589034288553919, + "grad_norm": 550.4620971679688, + "learning_rate": 9.981517173178357e-06, + "loss": 29.0102, + "step": 62320 + }, + { + "epoch": 0.12591054351822298, + "grad_norm": 1298.7689208984375, + "learning_rate": 9.981487174949065e-06, + "loss": 26.1776, + "step": 62330 + }, + { + "epoch": 0.1259307441509068, + "grad_norm": 797.3565063476562, + "learning_rate": 9.98145715244061e-06, + "loss": 24.0866, + "step": 62340 + }, + { + "epoch": 0.12595094478359062, + "grad_norm": 371.12225341796875, + "learning_rate": 9.981427105653135e-06, + "loss": 11.8349, + "step": 62350 + }, + { + "epoch": 0.12597114541627444, + "grad_norm": 324.1166687011719, + "learning_rate": 9.981397034586789e-06, + "loss": 16.4646, + "step": 62360 + }, + { + "epoch": 0.12599134604895826, + "grad_norm": 239.66323852539062, + "learning_rate": 9.981366939241719e-06, + "loss": 18.3616, + "step": 62370 + }, + { + "epoch": 0.12601154668164208, + "grad_norm": 237.30917358398438, + "learning_rate": 9.98133681961807e-06, + "loss": 16.7278, + "step": 62380 + }, + { + "epoch": 0.1260317473143259, + "grad_norm": 569.3565673828125, + "learning_rate": 9.981306675715989e-06, + "loss": 31.6287, + "step": 62390 + }, + { + "epoch": 0.1260519479470097, + "grad_norm": 778.6282958984375, + "learning_rate": 9.981276507535625e-06, + "loss": 28.1746, + "step": 62400 + }, + { + "epoch": 0.1260721485796935, + "grad_norm": 1418.2340087890625, + "learning_rate": 9.981246315077123e-06, + "loss": 22.1584, + "step": 62410 + }, + { + "epoch": 0.12609234921237733, + "grad_norm": 351.3438415527344, + "learning_rate": 9.98121609834063e-06, + "loss": 20.2334, + "step": 62420 + }, + { + "epoch": 0.12611254984506115, + "grad_norm": 361.52587890625, + "learning_rate": 9.981185857326292e-06, + "loss": 29.948, + "step": 62430 + }, + { + "epoch": 0.12613275047774497, + "grad_norm": 1249.1368408203125, + "learning_rate": 9.98115559203426e-06, + "loss": 49.3923, + "step": 62440 + }, + { + "epoch": 0.1261529511104288, + "grad_norm": 133.21397399902344, + "learning_rate": 9.981125302464681e-06, + "loss": 36.9209, + "step": 62450 + }, + { + "epoch": 0.12617315174311258, + "grad_norm": 401.4458923339844, + "learning_rate": 9.9810949886177e-06, + "loss": 15.0456, + "step": 62460 + }, + { + "epoch": 0.1261933523757964, + "grad_norm": 708.7898559570312, + "learning_rate": 9.981064650493466e-06, + "loss": 18.3516, + "step": 62470 + }, + { + "epoch": 0.12621355300848022, + "grad_norm": 446.0447082519531, + "learning_rate": 9.981034288092129e-06, + "loss": 14.9286, + "step": 62480 + }, + { + "epoch": 0.12623375364116404, + "grad_norm": 444.21429443359375, + "learning_rate": 9.981003901413833e-06, + "loss": 27.0375, + "step": 62490 + }, + { + "epoch": 0.12625395427384786, + "grad_norm": 848.4797973632812, + "learning_rate": 9.980973490458728e-06, + "loss": 26.718, + "step": 62500 + }, + { + "epoch": 0.12627415490653168, + "grad_norm": 399.71966552734375, + "learning_rate": 9.980943055226964e-06, + "loss": 35.966, + "step": 62510 + }, + { + "epoch": 0.12629435553921547, + "grad_norm": 382.6466979980469, + "learning_rate": 9.980912595718686e-06, + "loss": 49.3784, + "step": 62520 + }, + { + "epoch": 0.1263145561718993, + "grad_norm": 497.79144287109375, + "learning_rate": 9.980882111934046e-06, + "loss": 22.7074, + "step": 62530 + }, + { + "epoch": 0.12633475680458311, + "grad_norm": 277.94525146484375, + "learning_rate": 9.980851603873189e-06, + "loss": 11.5644, + "step": 62540 + }, + { + "epoch": 0.12635495743726693, + "grad_norm": 538.9913330078125, + "learning_rate": 9.980821071536266e-06, + "loss": 16.5249, + "step": 62550 + }, + { + "epoch": 0.12637515806995075, + "grad_norm": 309.7591247558594, + "learning_rate": 9.980790514923425e-06, + "loss": 16.1624, + "step": 62560 + }, + { + "epoch": 0.12639535870263457, + "grad_norm": 750.3809204101562, + "learning_rate": 9.980759934034816e-06, + "loss": 36.466, + "step": 62570 + }, + { + "epoch": 0.1264155593353184, + "grad_norm": 282.3537902832031, + "learning_rate": 9.980729328870586e-06, + "loss": 25.2453, + "step": 62580 + }, + { + "epoch": 0.1264357599680022, + "grad_norm": 766.6907958984375, + "learning_rate": 9.980698699430884e-06, + "loss": 27.8084, + "step": 62590 + }, + { + "epoch": 0.126455960600686, + "grad_norm": 303.53924560546875, + "learning_rate": 9.980668045715864e-06, + "loss": 16.3529, + "step": 62600 + }, + { + "epoch": 0.12647616123336983, + "grad_norm": 388.064453125, + "learning_rate": 9.98063736772567e-06, + "loss": 25.9673, + "step": 62610 + }, + { + "epoch": 0.12649636186605365, + "grad_norm": 583.8735961914062, + "learning_rate": 9.980606665460453e-06, + "loss": 27.4928, + "step": 62620 + }, + { + "epoch": 0.12651656249873747, + "grad_norm": 440.026611328125, + "learning_rate": 9.980575938920364e-06, + "loss": 14.5656, + "step": 62630 + }, + { + "epoch": 0.1265367631314213, + "grad_norm": 146.12265014648438, + "learning_rate": 9.980545188105553e-06, + "loss": 35.6076, + "step": 62640 + }, + { + "epoch": 0.12655696376410508, + "grad_norm": 385.4832458496094, + "learning_rate": 9.980514413016167e-06, + "loss": 14.4751, + "step": 62650 + }, + { + "epoch": 0.1265771643967889, + "grad_norm": 463.3867492675781, + "learning_rate": 9.980483613652359e-06, + "loss": 14.9639, + "step": 62660 + }, + { + "epoch": 0.12659736502947272, + "grad_norm": 459.55169677734375, + "learning_rate": 9.980452790014278e-06, + "loss": 16.1644, + "step": 62670 + }, + { + "epoch": 0.12661756566215654, + "grad_norm": 587.0556640625, + "learning_rate": 9.980421942102075e-06, + "loss": 29.3284, + "step": 62680 + }, + { + "epoch": 0.12663776629484036, + "grad_norm": 805.0811767578125, + "learning_rate": 9.980391069915897e-06, + "loss": 12.8979, + "step": 62690 + }, + { + "epoch": 0.12665796692752418, + "grad_norm": 100.18392944335938, + "learning_rate": 9.980360173455899e-06, + "loss": 25.0022, + "step": 62700 + }, + { + "epoch": 0.126678167560208, + "grad_norm": 463.1624755859375, + "learning_rate": 9.980329252722227e-06, + "loss": 14.9295, + "step": 62710 + }, + { + "epoch": 0.1266983681928918, + "grad_norm": 406.43902587890625, + "learning_rate": 9.980298307715038e-06, + "loss": 29.2952, + "step": 62720 + }, + { + "epoch": 0.1267185688255756, + "grad_norm": 508.7016296386719, + "learning_rate": 9.980267338434477e-06, + "loss": 34.4754, + "step": 62730 + }, + { + "epoch": 0.12673876945825943, + "grad_norm": 99.78828430175781, + "learning_rate": 9.980236344880696e-06, + "loss": 15.8014, + "step": 62740 + }, + { + "epoch": 0.12675897009094325, + "grad_norm": 783.4483032226562, + "learning_rate": 9.98020532705385e-06, + "loss": 32.7593, + "step": 62750 + }, + { + "epoch": 0.12677917072362707, + "grad_norm": 648.4140014648438, + "learning_rate": 9.980174284954084e-06, + "loss": 44.6443, + "step": 62760 + }, + { + "epoch": 0.1267993713563109, + "grad_norm": 341.7405090332031, + "learning_rate": 9.980143218581555e-06, + "loss": 34.7895, + "step": 62770 + }, + { + "epoch": 0.12681957198899468, + "grad_norm": 1091.858154296875, + "learning_rate": 9.98011212793641e-06, + "loss": 33.4063, + "step": 62780 + }, + { + "epoch": 0.1268397726216785, + "grad_norm": 216.94027709960938, + "learning_rate": 9.980081013018804e-06, + "loss": 15.5937, + "step": 62790 + }, + { + "epoch": 0.12685997325436232, + "grad_norm": 202.47886657714844, + "learning_rate": 9.980049873828887e-06, + "loss": 16.3564, + "step": 62800 + }, + { + "epoch": 0.12688017388704614, + "grad_norm": 545.5491333007812, + "learning_rate": 9.98001871036681e-06, + "loss": 34.0013, + "step": 62810 + }, + { + "epoch": 0.12690037451972996, + "grad_norm": 1135.317626953125, + "learning_rate": 9.979987522632727e-06, + "loss": 36.4949, + "step": 62820 + }, + { + "epoch": 0.12692057515241378, + "grad_norm": 727.2149047851562, + "learning_rate": 9.979956310626788e-06, + "loss": 27.9218, + "step": 62830 + }, + { + "epoch": 0.12694077578509758, + "grad_norm": 78.84790802001953, + "learning_rate": 9.979925074349146e-06, + "loss": 26.28, + "step": 62840 + }, + { + "epoch": 0.1269609764177814, + "grad_norm": 331.7414245605469, + "learning_rate": 9.979893813799953e-06, + "loss": 14.5715, + "step": 62850 + }, + { + "epoch": 0.12698117705046522, + "grad_norm": 116.44114685058594, + "learning_rate": 9.979862528979362e-06, + "loss": 50.3006, + "step": 62860 + }, + { + "epoch": 0.12700137768314904, + "grad_norm": 532.8187866210938, + "learning_rate": 9.979831219887526e-06, + "loss": 21.8745, + "step": 62870 + }, + { + "epoch": 0.12702157831583286, + "grad_norm": 470.2156066894531, + "learning_rate": 9.979799886524594e-06, + "loss": 15.5994, + "step": 62880 + }, + { + "epoch": 0.12704177894851668, + "grad_norm": 854.6575317382812, + "learning_rate": 9.979768528890725e-06, + "loss": 35.0675, + "step": 62890 + }, + { + "epoch": 0.1270619795812005, + "grad_norm": 1332.267578125, + "learning_rate": 9.979737146986064e-06, + "loss": 40.4353, + "step": 62900 + }, + { + "epoch": 0.1270821802138843, + "grad_norm": 223.40878295898438, + "learning_rate": 9.979705740810771e-06, + "loss": 21.3494, + "step": 62910 + }, + { + "epoch": 0.1271023808465681, + "grad_norm": 792.7653198242188, + "learning_rate": 9.979674310364996e-06, + "loss": 25.4134, + "step": 62920 + }, + { + "epoch": 0.12712258147925193, + "grad_norm": 511.9507751464844, + "learning_rate": 9.979642855648892e-06, + "loss": 20.9992, + "step": 62930 + }, + { + "epoch": 0.12714278211193575, + "grad_norm": 477.951416015625, + "learning_rate": 9.979611376662613e-06, + "loss": 32.2239, + "step": 62940 + }, + { + "epoch": 0.12716298274461957, + "grad_norm": 362.9677734375, + "learning_rate": 9.97957987340631e-06, + "loss": 19.6855, + "step": 62950 + }, + { + "epoch": 0.1271831833773034, + "grad_norm": 836.57373046875, + "learning_rate": 9.979548345880142e-06, + "loss": 25.1571, + "step": 62960 + }, + { + "epoch": 0.12720338400998718, + "grad_norm": 803.3682250976562, + "learning_rate": 9.979516794084256e-06, + "loss": 24.3883, + "step": 62970 + }, + { + "epoch": 0.127223584642671, + "grad_norm": 614.2709350585938, + "learning_rate": 9.97948521801881e-06, + "loss": 29.8649, + "step": 62980 + }, + { + "epoch": 0.12724378527535482, + "grad_norm": 376.4046936035156, + "learning_rate": 9.979453617683958e-06, + "loss": 17.8293, + "step": 62990 + }, + { + "epoch": 0.12726398590803864, + "grad_norm": 491.6141357421875, + "learning_rate": 9.979421993079853e-06, + "loss": 23.357, + "step": 63000 + }, + { + "epoch": 0.12728418654072246, + "grad_norm": 759.8665161132812, + "learning_rate": 9.979390344206648e-06, + "loss": 44.4881, + "step": 63010 + }, + { + "epoch": 0.12730438717340628, + "grad_norm": 520.8623657226562, + "learning_rate": 9.9793586710645e-06, + "loss": 24.3983, + "step": 63020 + }, + { + "epoch": 0.1273245878060901, + "grad_norm": 1215.4954833984375, + "learning_rate": 9.97932697365356e-06, + "loss": 22.5193, + "step": 63030 + }, + { + "epoch": 0.1273447884387739, + "grad_norm": 676.3529663085938, + "learning_rate": 9.979295251973986e-06, + "loss": 21.0263, + "step": 63040 + }, + { + "epoch": 0.1273649890714577, + "grad_norm": 123.64913940429688, + "learning_rate": 9.97926350602593e-06, + "loss": 35.7433, + "step": 63050 + }, + { + "epoch": 0.12738518970414153, + "grad_norm": 516.5016479492188, + "learning_rate": 9.979231735809546e-06, + "loss": 15.5689, + "step": 63060 + }, + { + "epoch": 0.12740539033682535, + "grad_norm": 415.07916259765625, + "learning_rate": 9.979199941324994e-06, + "loss": 25.6038, + "step": 63070 + }, + { + "epoch": 0.12742559096950917, + "grad_norm": 245.9202880859375, + "learning_rate": 9.979168122572422e-06, + "loss": 49.7665, + "step": 63080 + }, + { + "epoch": 0.127445791602193, + "grad_norm": 489.91802978515625, + "learning_rate": 9.97913627955199e-06, + "loss": 36.2062, + "step": 63090 + }, + { + "epoch": 0.12746599223487678, + "grad_norm": 432.1904296875, + "learning_rate": 9.979104412263851e-06, + "loss": 12.2549, + "step": 63100 + }, + { + "epoch": 0.1274861928675606, + "grad_norm": 1187.847900390625, + "learning_rate": 9.979072520708162e-06, + "loss": 49.9682, + "step": 63110 + }, + { + "epoch": 0.12750639350024442, + "grad_norm": 222.1941680908203, + "learning_rate": 9.979040604885077e-06, + "loss": 16.5459, + "step": 63120 + }, + { + "epoch": 0.12752659413292824, + "grad_norm": 416.6627502441406, + "learning_rate": 9.979008664794751e-06, + "loss": 24.3047, + "step": 63130 + }, + { + "epoch": 0.12754679476561206, + "grad_norm": 560.9697875976562, + "learning_rate": 9.978976700437341e-06, + "loss": 42.9497, + "step": 63140 + }, + { + "epoch": 0.12756699539829588, + "grad_norm": 936.8536376953125, + "learning_rate": 9.978944711813003e-06, + "loss": 25.2848, + "step": 63150 + }, + { + "epoch": 0.12758719603097968, + "grad_norm": 513.5052490234375, + "learning_rate": 9.978912698921892e-06, + "loss": 24.7976, + "step": 63160 + }, + { + "epoch": 0.1276073966636635, + "grad_norm": 493.39495849609375, + "learning_rate": 9.978880661764166e-06, + "loss": 27.8852, + "step": 63170 + }, + { + "epoch": 0.12762759729634732, + "grad_norm": 1059.8115234375, + "learning_rate": 9.978848600339978e-06, + "loss": 26.1442, + "step": 63180 + }, + { + "epoch": 0.12764779792903114, + "grad_norm": 272.6844787597656, + "learning_rate": 9.978816514649486e-06, + "loss": 18.1199, + "step": 63190 + }, + { + "epoch": 0.12766799856171496, + "grad_norm": 409.2892150878906, + "learning_rate": 9.978784404692847e-06, + "loss": 15.1925, + "step": 63200 + }, + { + "epoch": 0.12768819919439878, + "grad_norm": 542.8263549804688, + "learning_rate": 9.978752270470216e-06, + "loss": 28.0719, + "step": 63210 + }, + { + "epoch": 0.1277083998270826, + "grad_norm": 213.36155700683594, + "learning_rate": 9.97872011198175e-06, + "loss": 21.9098, + "step": 63220 + }, + { + "epoch": 0.1277286004597664, + "grad_norm": 448.73211669921875, + "learning_rate": 9.978687929227606e-06, + "loss": 12.8645, + "step": 63230 + }, + { + "epoch": 0.1277488010924502, + "grad_norm": 156.6364288330078, + "learning_rate": 9.97865572220794e-06, + "loss": 39.7648, + "step": 63240 + }, + { + "epoch": 0.12776900172513403, + "grad_norm": 245.63671875, + "learning_rate": 9.978623490922913e-06, + "loss": 30.4392, + "step": 63250 + }, + { + "epoch": 0.12778920235781785, + "grad_norm": 526.5847778320312, + "learning_rate": 9.978591235372675e-06, + "loss": 27.034, + "step": 63260 + }, + { + "epoch": 0.12780940299050167, + "grad_norm": 855.2476806640625, + "learning_rate": 9.97855895555739e-06, + "loss": 40.838, + "step": 63270 + }, + { + "epoch": 0.1278296036231855, + "grad_norm": 755.0592041015625, + "learning_rate": 9.978526651477211e-06, + "loss": 21.5641, + "step": 63280 + }, + { + "epoch": 0.12784980425586928, + "grad_norm": 696.832763671875, + "learning_rate": 9.978494323132296e-06, + "loss": 36.0213, + "step": 63290 + }, + { + "epoch": 0.1278700048885531, + "grad_norm": 617.84130859375, + "learning_rate": 9.978461970522807e-06, + "loss": 40.2178, + "step": 63300 + }, + { + "epoch": 0.12789020552123692, + "grad_norm": 611.9417114257812, + "learning_rate": 9.978429593648894e-06, + "loss": 31.6474, + "step": 63310 + }, + { + "epoch": 0.12791040615392074, + "grad_norm": 738.6346435546875, + "learning_rate": 9.978397192510722e-06, + "loss": 44.4837, + "step": 63320 + }, + { + "epoch": 0.12793060678660456, + "grad_norm": 576.538330078125, + "learning_rate": 9.978364767108444e-06, + "loss": 23.9101, + "step": 63330 + }, + { + "epoch": 0.12795080741928838, + "grad_norm": 486.086181640625, + "learning_rate": 9.97833231744222e-06, + "loss": 35.1615, + "step": 63340 + }, + { + "epoch": 0.1279710080519722, + "grad_norm": 596.9443359375, + "learning_rate": 9.97829984351221e-06, + "loss": 16.0704, + "step": 63350 + }, + { + "epoch": 0.127991208684656, + "grad_norm": 423.5945739746094, + "learning_rate": 9.978267345318569e-06, + "loss": 24.7456, + "step": 63360 + }, + { + "epoch": 0.1280114093173398, + "grad_norm": 267.3250732421875, + "learning_rate": 9.978234822861456e-06, + "loss": 17.0896, + "step": 63370 + }, + { + "epoch": 0.12803160995002363, + "grad_norm": 861.3472290039062, + "learning_rate": 9.978202276141032e-06, + "loss": 42.2953, + "step": 63380 + }, + { + "epoch": 0.12805181058270745, + "grad_norm": 272.4585876464844, + "learning_rate": 9.978169705157455e-06, + "loss": 28.3199, + "step": 63390 + }, + { + "epoch": 0.12807201121539127, + "grad_norm": 191.0371551513672, + "learning_rate": 9.97813710991088e-06, + "loss": 12.2191, + "step": 63400 + }, + { + "epoch": 0.1280922118480751, + "grad_norm": 767.1546020507812, + "learning_rate": 9.978104490401468e-06, + "loss": 28.2204, + "step": 63410 + }, + { + "epoch": 0.12811241248075889, + "grad_norm": 939.4093627929688, + "learning_rate": 9.978071846629381e-06, + "loss": 16.1536, + "step": 63420 + }, + { + "epoch": 0.1281326131134427, + "grad_norm": 300.4186096191406, + "learning_rate": 9.978039178594774e-06, + "loss": 25.2184, + "step": 63430 + }, + { + "epoch": 0.12815281374612653, + "grad_norm": 396.7921447753906, + "learning_rate": 9.978006486297808e-06, + "loss": 31.1113, + "step": 63440 + }, + { + "epoch": 0.12817301437881035, + "grad_norm": 381.0914001464844, + "learning_rate": 9.977973769738642e-06, + "loss": 20.6207, + "step": 63450 + }, + { + "epoch": 0.12819321501149417, + "grad_norm": 422.78662109375, + "learning_rate": 9.977941028917436e-06, + "loss": 30.2763, + "step": 63460 + }, + { + "epoch": 0.12821341564417799, + "grad_norm": 509.16619873046875, + "learning_rate": 9.977908263834348e-06, + "loss": 47.3221, + "step": 63470 + }, + { + "epoch": 0.12823361627686178, + "grad_norm": 5963.0517578125, + "learning_rate": 9.97787547448954e-06, + "loss": 38.5918, + "step": 63480 + }, + { + "epoch": 0.1282538169095456, + "grad_norm": 472.1371765136719, + "learning_rate": 9.977842660883172e-06, + "loss": 17.5362, + "step": 63490 + }, + { + "epoch": 0.12827401754222942, + "grad_norm": 688.972900390625, + "learning_rate": 9.9778098230154e-06, + "loss": 36.3663, + "step": 63500 + }, + { + "epoch": 0.12829421817491324, + "grad_norm": 708.9854736328125, + "learning_rate": 9.97777696088639e-06, + "loss": 36.2051, + "step": 63510 + }, + { + "epoch": 0.12831441880759706, + "grad_norm": 469.16461181640625, + "learning_rate": 9.977744074496297e-06, + "loss": 29.461, + "step": 63520 + }, + { + "epoch": 0.12833461944028088, + "grad_norm": 111.1149673461914, + "learning_rate": 9.97771116384528e-06, + "loss": 16.2335, + "step": 63530 + }, + { + "epoch": 0.1283548200729647, + "grad_norm": 853.3607177734375, + "learning_rate": 9.977678228933508e-06, + "loss": 38.7535, + "step": 63540 + }, + { + "epoch": 0.1283750207056485, + "grad_norm": 373.10369873046875, + "learning_rate": 9.977645269761131e-06, + "loss": 13.5213, + "step": 63550 + }, + { + "epoch": 0.1283952213383323, + "grad_norm": 445.2317810058594, + "learning_rate": 9.977612286328317e-06, + "loss": 18.522, + "step": 63560 + }, + { + "epoch": 0.12841542197101613, + "grad_norm": 432.3799743652344, + "learning_rate": 9.977579278635225e-06, + "loss": 23.454, + "step": 63570 + }, + { + "epoch": 0.12843562260369995, + "grad_norm": 113.14830017089844, + "learning_rate": 9.977546246682015e-06, + "loss": 18.0668, + "step": 63580 + }, + { + "epoch": 0.12845582323638377, + "grad_norm": 644.360595703125, + "learning_rate": 9.977513190468848e-06, + "loss": 30.9097, + "step": 63590 + }, + { + "epoch": 0.1284760238690676, + "grad_norm": 473.98876953125, + "learning_rate": 9.977480109995886e-06, + "loss": 16.7546, + "step": 63600 + }, + { + "epoch": 0.12849622450175138, + "grad_norm": 153.7622833251953, + "learning_rate": 9.977447005263289e-06, + "loss": 22.1556, + "step": 63610 + }, + { + "epoch": 0.1285164251344352, + "grad_norm": 905.6984252929688, + "learning_rate": 9.97741387627122e-06, + "loss": 30.4328, + "step": 63620 + }, + { + "epoch": 0.12853662576711902, + "grad_norm": 665.6696166992188, + "learning_rate": 9.977380723019838e-06, + "loss": 21.1389, + "step": 63630 + }, + { + "epoch": 0.12855682639980284, + "grad_norm": 173.6195526123047, + "learning_rate": 9.977347545509307e-06, + "loss": 29.5608, + "step": 63640 + }, + { + "epoch": 0.12857702703248666, + "grad_norm": 340.8911437988281, + "learning_rate": 9.977314343739785e-06, + "loss": 26.3831, + "step": 63650 + }, + { + "epoch": 0.12859722766517048, + "grad_norm": 622.4423828125, + "learning_rate": 9.97728111771144e-06, + "loss": 19.2024, + "step": 63660 + }, + { + "epoch": 0.1286174282978543, + "grad_norm": 1370.3134765625, + "learning_rate": 9.97724786742443e-06, + "loss": 35.2596, + "step": 63670 + }, + { + "epoch": 0.1286376289305381, + "grad_norm": 574.2199096679688, + "learning_rate": 9.977214592878917e-06, + "loss": 21.3078, + "step": 63680 + }, + { + "epoch": 0.12865782956322191, + "grad_norm": 389.3919677734375, + "learning_rate": 9.977181294075063e-06, + "loss": 26.5428, + "step": 63690 + }, + { + "epoch": 0.12867803019590573, + "grad_norm": 382.49951171875, + "learning_rate": 9.977147971013033e-06, + "loss": 27.7318, + "step": 63700 + }, + { + "epoch": 0.12869823082858955, + "grad_norm": 169.2600860595703, + "learning_rate": 9.977114623692985e-06, + "loss": 20.7903, + "step": 63710 + }, + { + "epoch": 0.12871843146127337, + "grad_norm": 247.0108642578125, + "learning_rate": 9.977081252115085e-06, + "loss": 19.3662, + "step": 63720 + }, + { + "epoch": 0.1287386320939572, + "grad_norm": 111.87555694580078, + "learning_rate": 9.977047856279496e-06, + "loss": 20.1174, + "step": 63730 + }, + { + "epoch": 0.128758832726641, + "grad_norm": 472.9613952636719, + "learning_rate": 9.977014436186377e-06, + "loss": 14.0777, + "step": 63740 + }, + { + "epoch": 0.1287790333593248, + "grad_norm": 267.59625244140625, + "learning_rate": 9.976980991835896e-06, + "loss": 28.9651, + "step": 63750 + }, + { + "epoch": 0.12879923399200863, + "grad_norm": 580.331298828125, + "learning_rate": 9.97694752322821e-06, + "loss": 14.1914, + "step": 63760 + }, + { + "epoch": 0.12881943462469245, + "grad_norm": 190.30702209472656, + "learning_rate": 9.976914030363488e-06, + "loss": 22.6187, + "step": 63770 + }, + { + "epoch": 0.12883963525737627, + "grad_norm": 502.5259704589844, + "learning_rate": 9.976880513241889e-06, + "loss": 23.2208, + "step": 63780 + }, + { + "epoch": 0.1288598358900601, + "grad_norm": 8.196840286254883, + "learning_rate": 9.976846971863579e-06, + "loss": 24.0969, + "step": 63790 + }, + { + "epoch": 0.12888003652274388, + "grad_norm": 593.9136352539062, + "learning_rate": 9.97681340622872e-06, + "loss": 34.7351, + "step": 63800 + }, + { + "epoch": 0.1289002371554277, + "grad_norm": 153.56385803222656, + "learning_rate": 9.976779816337476e-06, + "loss": 13.862, + "step": 63810 + }, + { + "epoch": 0.12892043778811152, + "grad_norm": 849.2635498046875, + "learning_rate": 9.976746202190012e-06, + "loss": 23.3059, + "step": 63820 + }, + { + "epoch": 0.12894063842079534, + "grad_norm": 373.9201965332031, + "learning_rate": 9.97671256378649e-06, + "loss": 16.4901, + "step": 63830 + }, + { + "epoch": 0.12896083905347916, + "grad_norm": 494.0238342285156, + "learning_rate": 9.976678901127074e-06, + "loss": 34.1576, + "step": 63840 + }, + { + "epoch": 0.12898103968616298, + "grad_norm": 39.347312927246094, + "learning_rate": 9.976645214211929e-06, + "loss": 26.6505, + "step": 63850 + }, + { + "epoch": 0.1290012403188468, + "grad_norm": 414.5834655761719, + "learning_rate": 9.976611503041218e-06, + "loss": 36.6754, + "step": 63860 + }, + { + "epoch": 0.1290214409515306, + "grad_norm": 803.5025024414062, + "learning_rate": 9.976577767615108e-06, + "loss": 22.6195, + "step": 63870 + }, + { + "epoch": 0.1290416415842144, + "grad_norm": 374.4903259277344, + "learning_rate": 9.97654400793376e-06, + "loss": 14.9365, + "step": 63880 + }, + { + "epoch": 0.12906184221689823, + "grad_norm": 419.9744567871094, + "learning_rate": 9.97651022399734e-06, + "loss": 24.2052, + "step": 63890 + }, + { + "epoch": 0.12908204284958205, + "grad_norm": 553.0997314453125, + "learning_rate": 9.976476415806013e-06, + "loss": 24.3125, + "step": 63900 + }, + { + "epoch": 0.12910224348226587, + "grad_norm": 273.9212951660156, + "learning_rate": 9.976442583359944e-06, + "loss": 18.0074, + "step": 63910 + }, + { + "epoch": 0.1291224441149497, + "grad_norm": 304.9971923828125, + "learning_rate": 9.976408726659296e-06, + "loss": 19.2274, + "step": 63920 + }, + { + "epoch": 0.12914264474763348, + "grad_norm": 256.17962646484375, + "learning_rate": 9.976374845704238e-06, + "loss": 29.1594, + "step": 63930 + }, + { + "epoch": 0.1291628453803173, + "grad_norm": 272.98468017578125, + "learning_rate": 9.976340940494931e-06, + "loss": 15.6594, + "step": 63940 + }, + { + "epoch": 0.12918304601300112, + "grad_norm": 390.11083984375, + "learning_rate": 9.976307011031542e-06, + "loss": 14.2196, + "step": 63950 + }, + { + "epoch": 0.12920324664568494, + "grad_norm": 174.08494567871094, + "learning_rate": 9.976273057314236e-06, + "loss": 14.4673, + "step": 63960 + }, + { + "epoch": 0.12922344727836876, + "grad_norm": 425.7914733886719, + "learning_rate": 9.97623907934318e-06, + "loss": 23.7006, + "step": 63970 + }, + { + "epoch": 0.12924364791105258, + "grad_norm": 1210.1116943359375, + "learning_rate": 9.976205077118536e-06, + "loss": 36.7556, + "step": 63980 + }, + { + "epoch": 0.12926384854373638, + "grad_norm": 631.6767578125, + "learning_rate": 9.976171050640473e-06, + "loss": 14.5007, + "step": 63990 + }, + { + "epoch": 0.1292840491764202, + "grad_norm": 1304.597412109375, + "learning_rate": 9.976136999909156e-06, + "loss": 42.4553, + "step": 64000 + }, + { + "epoch": 0.12930424980910402, + "grad_norm": 374.9949645996094, + "learning_rate": 9.976102924924752e-06, + "loss": 21.9549, + "step": 64010 + }, + { + "epoch": 0.12932445044178784, + "grad_norm": 504.45379638671875, + "learning_rate": 9.976068825687424e-06, + "loss": 53.4435, + "step": 64020 + }, + { + "epoch": 0.12934465107447166, + "grad_norm": 1374.239501953125, + "learning_rate": 9.97603470219734e-06, + "loss": 35.7616, + "step": 64030 + }, + { + "epoch": 0.12936485170715548, + "grad_norm": 1206.2969970703125, + "learning_rate": 9.976000554454668e-06, + "loss": 33.7899, + "step": 64040 + }, + { + "epoch": 0.1293850523398393, + "grad_norm": 494.3724365234375, + "learning_rate": 9.975966382459571e-06, + "loss": 25.3993, + "step": 64050 + }, + { + "epoch": 0.1294052529725231, + "grad_norm": 606.0910034179688, + "learning_rate": 9.975932186212217e-06, + "loss": 21.95, + "step": 64060 + }, + { + "epoch": 0.1294254536052069, + "grad_norm": 281.4843444824219, + "learning_rate": 9.975897965712777e-06, + "loss": 30.7947, + "step": 64070 + }, + { + "epoch": 0.12944565423789073, + "grad_norm": 342.4183654785156, + "learning_rate": 9.975863720961411e-06, + "loss": 25.8221, + "step": 64080 + }, + { + "epoch": 0.12946585487057455, + "grad_norm": 411.2066345214844, + "learning_rate": 9.975829451958288e-06, + "loss": 16.7347, + "step": 64090 + }, + { + "epoch": 0.12948605550325837, + "grad_norm": 1058.9154052734375, + "learning_rate": 9.975795158703576e-06, + "loss": 20.1638, + "step": 64100 + }, + { + "epoch": 0.1295062561359422, + "grad_norm": 440.6236572265625, + "learning_rate": 9.975760841197443e-06, + "loss": 17.8365, + "step": 64110 + }, + { + "epoch": 0.12952645676862598, + "grad_norm": 1042.24072265625, + "learning_rate": 9.975726499440055e-06, + "loss": 33.1461, + "step": 64120 + }, + { + "epoch": 0.1295466574013098, + "grad_norm": 714.8013916015625, + "learning_rate": 9.975692133431579e-06, + "loss": 21.159, + "step": 64130 + }, + { + "epoch": 0.12956685803399362, + "grad_norm": 693.3626098632812, + "learning_rate": 9.975657743172182e-06, + "loss": 33.558, + "step": 64140 + }, + { + "epoch": 0.12958705866667744, + "grad_norm": 453.12518310546875, + "learning_rate": 9.975623328662036e-06, + "loss": 41.4328, + "step": 64150 + }, + { + "epoch": 0.12960725929936126, + "grad_norm": 252.20648193359375, + "learning_rate": 9.975588889901302e-06, + "loss": 44.4843, + "step": 64160 + }, + { + "epoch": 0.12962745993204508, + "grad_norm": 361.7718505859375, + "learning_rate": 9.975554426890152e-06, + "loss": 29.4922, + "step": 64170 + }, + { + "epoch": 0.1296476605647289, + "grad_norm": 407.3759460449219, + "learning_rate": 9.975519939628754e-06, + "loss": 18.3133, + "step": 64180 + }, + { + "epoch": 0.1296678611974127, + "grad_norm": 848.3601684570312, + "learning_rate": 9.975485428117276e-06, + "loss": 28.4552, + "step": 64190 + }, + { + "epoch": 0.1296880618300965, + "grad_norm": 806.0908203125, + "learning_rate": 9.975450892355882e-06, + "loss": 25.4261, + "step": 64200 + }, + { + "epoch": 0.12970826246278033, + "grad_norm": 502.7238464355469, + "learning_rate": 9.975416332344747e-06, + "loss": 14.6552, + "step": 64210 + }, + { + "epoch": 0.12972846309546415, + "grad_norm": 248.8984832763672, + "learning_rate": 9.975381748084035e-06, + "loss": 22.231, + "step": 64220 + }, + { + "epoch": 0.12974866372814797, + "grad_norm": 625.7821044921875, + "learning_rate": 9.975347139573917e-06, + "loss": 25.4415, + "step": 64230 + }, + { + "epoch": 0.1297688643608318, + "grad_norm": 1019.3402709960938, + "learning_rate": 9.97531250681456e-06, + "loss": 27.9808, + "step": 64240 + }, + { + "epoch": 0.12978906499351558, + "grad_norm": 89.6280517578125, + "learning_rate": 9.975277849806133e-06, + "loss": 20.8346, + "step": 64250 + }, + { + "epoch": 0.1298092656261994, + "grad_norm": 483.666259765625, + "learning_rate": 9.975243168548804e-06, + "loss": 22.905, + "step": 64260 + }, + { + "epoch": 0.12982946625888322, + "grad_norm": 325.27740478515625, + "learning_rate": 9.975208463042745e-06, + "loss": 15.8422, + "step": 64270 + }, + { + "epoch": 0.12984966689156704, + "grad_norm": 131.23312377929688, + "learning_rate": 9.975173733288122e-06, + "loss": 10.4955, + "step": 64280 + }, + { + "epoch": 0.12986986752425086, + "grad_norm": 442.3735046386719, + "learning_rate": 9.975138979285107e-06, + "loss": 13.1356, + "step": 64290 + }, + { + "epoch": 0.12989006815693468, + "grad_norm": 340.0762634277344, + "learning_rate": 9.975104201033868e-06, + "loss": 23.6785, + "step": 64300 + }, + { + "epoch": 0.12991026878961848, + "grad_norm": 655.1138305664062, + "learning_rate": 9.975069398534574e-06, + "loss": 20.8663, + "step": 64310 + }, + { + "epoch": 0.1299304694223023, + "grad_norm": 326.4592590332031, + "learning_rate": 9.975034571787394e-06, + "loss": 46.7844, + "step": 64320 + }, + { + "epoch": 0.12995067005498612, + "grad_norm": 664.6102905273438, + "learning_rate": 9.9749997207925e-06, + "loss": 52.0087, + "step": 64330 + }, + { + "epoch": 0.12997087068766994, + "grad_norm": 1101.6463623046875, + "learning_rate": 9.974964845550062e-06, + "loss": 36.4772, + "step": 64340 + }, + { + "epoch": 0.12999107132035376, + "grad_norm": 372.61932373046875, + "learning_rate": 9.974929946060246e-06, + "loss": 25.8474, + "step": 64350 + }, + { + "epoch": 0.13001127195303758, + "grad_norm": 405.5989074707031, + "learning_rate": 9.974895022323226e-06, + "loss": 23.0418, + "step": 64360 + }, + { + "epoch": 0.1300314725857214, + "grad_norm": 154.89332580566406, + "learning_rate": 9.974860074339173e-06, + "loss": 25.3169, + "step": 64370 + }, + { + "epoch": 0.1300516732184052, + "grad_norm": 984.13720703125, + "learning_rate": 9.974825102108251e-06, + "loss": 29.0367, + "step": 64380 + }, + { + "epoch": 0.130071873851089, + "grad_norm": 707.0607299804688, + "learning_rate": 9.974790105630639e-06, + "loss": 23.0206, + "step": 64390 + }, + { + "epoch": 0.13009207448377283, + "grad_norm": 607.7217407226562, + "learning_rate": 9.974755084906503e-06, + "loss": 22.579, + "step": 64400 + }, + { + "epoch": 0.13011227511645665, + "grad_norm": 210.3593292236328, + "learning_rate": 9.974720039936012e-06, + "loss": 31.0888, + "step": 64410 + }, + { + "epoch": 0.13013247574914047, + "grad_norm": 390.9085693359375, + "learning_rate": 9.97468497071934e-06, + "loss": 18.0068, + "step": 64420 + }, + { + "epoch": 0.1301526763818243, + "grad_norm": 544.2200317382812, + "learning_rate": 9.974649877256657e-06, + "loss": 33.2558, + "step": 64430 + }, + { + "epoch": 0.13017287701450808, + "grad_norm": 478.2790222167969, + "learning_rate": 9.974614759548133e-06, + "loss": 21.6782, + "step": 64440 + }, + { + "epoch": 0.1301930776471919, + "grad_norm": 1261.015869140625, + "learning_rate": 9.97457961759394e-06, + "loss": 35.9209, + "step": 64450 + }, + { + "epoch": 0.13021327827987572, + "grad_norm": 182.3002166748047, + "learning_rate": 9.97454445139425e-06, + "loss": 28.252, + "step": 64460 + }, + { + "epoch": 0.13023347891255954, + "grad_norm": 259.7107238769531, + "learning_rate": 9.974509260949233e-06, + "loss": 22.5172, + "step": 64470 + }, + { + "epoch": 0.13025367954524336, + "grad_norm": 923.8392944335938, + "learning_rate": 9.97447404625906e-06, + "loss": 24.7331, + "step": 64480 + }, + { + "epoch": 0.13027388017792718, + "grad_norm": 728.4458618164062, + "learning_rate": 9.974438807323907e-06, + "loss": 30.6729, + "step": 64490 + }, + { + "epoch": 0.130294080810611, + "grad_norm": 342.2673645019531, + "learning_rate": 9.974403544143942e-06, + "loss": 27.3647, + "step": 64500 + }, + { + "epoch": 0.1303142814432948, + "grad_norm": 1431.777099609375, + "learning_rate": 9.974368256719335e-06, + "loss": 17.8087, + "step": 64510 + }, + { + "epoch": 0.1303344820759786, + "grad_norm": 767.341796875, + "learning_rate": 9.974332945050263e-06, + "loss": 16.4807, + "step": 64520 + }, + { + "epoch": 0.13035468270866243, + "grad_norm": 351.86724853515625, + "learning_rate": 9.974297609136895e-06, + "loss": 12.8071, + "step": 64530 + }, + { + "epoch": 0.13037488334134625, + "grad_norm": 826.7744750976562, + "learning_rate": 9.974262248979402e-06, + "loss": 22.566, + "step": 64540 + }, + { + "epoch": 0.13039508397403007, + "grad_norm": 854.947021484375, + "learning_rate": 9.97422686457796e-06, + "loss": 27.3139, + "step": 64550 + }, + { + "epoch": 0.1304152846067139, + "grad_norm": 271.3603820800781, + "learning_rate": 9.97419145593274e-06, + "loss": 25.9834, + "step": 64560 + }, + { + "epoch": 0.13043548523939769, + "grad_norm": 244.5535125732422, + "learning_rate": 9.974156023043912e-06, + "loss": 39.2915, + "step": 64570 + }, + { + "epoch": 0.1304556858720815, + "grad_norm": 781.3843994140625, + "learning_rate": 9.974120565911653e-06, + "loss": 30.444, + "step": 64580 + }, + { + "epoch": 0.13047588650476533, + "grad_norm": 391.172119140625, + "learning_rate": 9.974085084536132e-06, + "loss": 19.9876, + "step": 64590 + }, + { + "epoch": 0.13049608713744915, + "grad_norm": 277.1730041503906, + "learning_rate": 9.974049578917524e-06, + "loss": 10.8678, + "step": 64600 + }, + { + "epoch": 0.13051628777013297, + "grad_norm": 488.2147216796875, + "learning_rate": 9.974014049056003e-06, + "loss": 38.7072, + "step": 64610 + }, + { + "epoch": 0.13053648840281679, + "grad_norm": 635.7302856445312, + "learning_rate": 9.973978494951739e-06, + "loss": 23.2721, + "step": 64620 + }, + { + "epoch": 0.13055668903550058, + "grad_norm": 249.53155517578125, + "learning_rate": 9.973942916604907e-06, + "loss": 20.0015, + "step": 64630 + }, + { + "epoch": 0.1305768896681844, + "grad_norm": 248.79937744140625, + "learning_rate": 9.973907314015682e-06, + "loss": 33.9802, + "step": 64640 + }, + { + "epoch": 0.13059709030086822, + "grad_norm": 326.5070495605469, + "learning_rate": 9.973871687184234e-06, + "loss": 29.0385, + "step": 64650 + }, + { + "epoch": 0.13061729093355204, + "grad_norm": 515.3168334960938, + "learning_rate": 9.97383603611074e-06, + "loss": 37.1464, + "step": 64660 + }, + { + "epoch": 0.13063749156623586, + "grad_norm": 392.71917724609375, + "learning_rate": 9.973800360795372e-06, + "loss": 20.8531, + "step": 64670 + }, + { + "epoch": 0.13065769219891968, + "grad_norm": 380.54180908203125, + "learning_rate": 9.973764661238306e-06, + "loss": 14.8502, + "step": 64680 + }, + { + "epoch": 0.1306778928316035, + "grad_norm": 634.4970703125, + "learning_rate": 9.973728937439714e-06, + "loss": 19.7428, + "step": 64690 + }, + { + "epoch": 0.1306980934642873, + "grad_norm": 1988.073974609375, + "learning_rate": 9.973693189399767e-06, + "loss": 27.3053, + "step": 64700 + }, + { + "epoch": 0.1307182940969711, + "grad_norm": 496.10345458984375, + "learning_rate": 9.973657417118646e-06, + "loss": 30.6424, + "step": 64710 + }, + { + "epoch": 0.13073849472965493, + "grad_norm": 1122.1827392578125, + "learning_rate": 9.97362162059652e-06, + "loss": 54.9974, + "step": 64720 + }, + { + "epoch": 0.13075869536233875, + "grad_norm": 236.85916137695312, + "learning_rate": 9.973585799833567e-06, + "loss": 19.5452, + "step": 64730 + }, + { + "epoch": 0.13077889599502257, + "grad_norm": 589.6627807617188, + "learning_rate": 9.97354995482996e-06, + "loss": 15.7837, + "step": 64740 + }, + { + "epoch": 0.1307990966277064, + "grad_norm": 297.345703125, + "learning_rate": 9.973514085585871e-06, + "loss": 31.8557, + "step": 64750 + }, + { + "epoch": 0.13081929726039018, + "grad_norm": 140.3568878173828, + "learning_rate": 9.97347819210148e-06, + "loss": 23.0842, + "step": 64760 + }, + { + "epoch": 0.130839497893074, + "grad_norm": 843.7931518554688, + "learning_rate": 9.973442274376958e-06, + "loss": 18.599, + "step": 64770 + }, + { + "epoch": 0.13085969852575782, + "grad_norm": 771.300048828125, + "learning_rate": 9.973406332412484e-06, + "loss": 30.0886, + "step": 64780 + }, + { + "epoch": 0.13087989915844164, + "grad_norm": 551.4595336914062, + "learning_rate": 9.97337036620823e-06, + "loss": 35.5825, + "step": 64790 + }, + { + "epoch": 0.13090009979112546, + "grad_norm": 511.57830810546875, + "learning_rate": 9.973334375764372e-06, + "loss": 29.4059, + "step": 64800 + }, + { + "epoch": 0.13092030042380928, + "grad_norm": 481.27239990234375, + "learning_rate": 9.973298361081083e-06, + "loss": 29.5845, + "step": 64810 + }, + { + "epoch": 0.1309405010564931, + "grad_norm": 1035.55810546875, + "learning_rate": 9.973262322158544e-06, + "loss": 41.9542, + "step": 64820 + }, + { + "epoch": 0.1309607016891769, + "grad_norm": 915.2047729492188, + "learning_rate": 9.973226258996926e-06, + "loss": 19.5643, + "step": 64830 + }, + { + "epoch": 0.13098090232186071, + "grad_norm": 910.8612670898438, + "learning_rate": 9.973190171596407e-06, + "loss": 30.1054, + "step": 64840 + }, + { + "epoch": 0.13100110295454453, + "grad_norm": 542.8687133789062, + "learning_rate": 9.973154059957162e-06, + "loss": 20.7429, + "step": 64850 + }, + { + "epoch": 0.13102130358722835, + "grad_norm": 548.01904296875, + "learning_rate": 9.973117924079367e-06, + "loss": 32.7375, + "step": 64860 + }, + { + "epoch": 0.13104150421991217, + "grad_norm": 599.3499145507812, + "learning_rate": 9.973081763963199e-06, + "loss": 18.564, + "step": 64870 + }, + { + "epoch": 0.131061704852596, + "grad_norm": 360.1961975097656, + "learning_rate": 9.973045579608834e-06, + "loss": 23.948, + "step": 64880 + }, + { + "epoch": 0.1310819054852798, + "grad_norm": 4.230323314666748, + "learning_rate": 9.973009371016447e-06, + "loss": 21.289, + "step": 64890 + }, + { + "epoch": 0.1311021061179636, + "grad_norm": 274.7096252441406, + "learning_rate": 9.972973138186217e-06, + "loss": 28.9408, + "step": 64900 + }, + { + "epoch": 0.13112230675064743, + "grad_norm": 829.801025390625, + "learning_rate": 9.972936881118318e-06, + "loss": 48.0795, + "step": 64910 + }, + { + "epoch": 0.13114250738333125, + "grad_norm": 677.6478881835938, + "learning_rate": 9.972900599812928e-06, + "loss": 22.2759, + "step": 64920 + }, + { + "epoch": 0.13116270801601507, + "grad_norm": 723.1565551757812, + "learning_rate": 9.972864294270224e-06, + "loss": 34.0883, + "step": 64930 + }, + { + "epoch": 0.1311829086486989, + "grad_norm": 311.7381591796875, + "learning_rate": 9.972827964490382e-06, + "loss": 29.4282, + "step": 64940 + }, + { + "epoch": 0.13120310928138268, + "grad_norm": 153.3361358642578, + "learning_rate": 9.972791610473578e-06, + "loss": 17.1192, + "step": 64950 + }, + { + "epoch": 0.1312233099140665, + "grad_norm": 489.56915283203125, + "learning_rate": 9.972755232219992e-06, + "loss": 18.815, + "step": 64960 + }, + { + "epoch": 0.13124351054675032, + "grad_norm": 193.79891967773438, + "learning_rate": 9.972718829729802e-06, + "loss": 23.3586, + "step": 64970 + }, + { + "epoch": 0.13126371117943414, + "grad_norm": 437.9062194824219, + "learning_rate": 9.972682403003182e-06, + "loss": 30.5258, + "step": 64980 + }, + { + "epoch": 0.13128391181211796, + "grad_norm": 2628.37548828125, + "learning_rate": 9.972645952040311e-06, + "loss": 43.4292, + "step": 64990 + }, + { + "epoch": 0.13130411244480178, + "grad_norm": 966.9459228515625, + "learning_rate": 9.972609476841368e-06, + "loss": 30.0858, + "step": 65000 + }, + { + "epoch": 0.1313243130774856, + "grad_norm": 589.35888671875, + "learning_rate": 9.972572977406527e-06, + "loss": 18.7885, + "step": 65010 + }, + { + "epoch": 0.1313445137101694, + "grad_norm": 691.9627685546875, + "learning_rate": 9.97253645373597e-06, + "loss": 39.7634, + "step": 65020 + }, + { + "epoch": 0.1313647143428532, + "grad_norm": 296.74078369140625, + "learning_rate": 9.972499905829874e-06, + "loss": 20.8549, + "step": 65030 + }, + { + "epoch": 0.13138491497553703, + "grad_norm": 345.5241394042969, + "learning_rate": 9.972463333688416e-06, + "loss": 22.2938, + "step": 65040 + }, + { + "epoch": 0.13140511560822085, + "grad_norm": 407.33001708984375, + "learning_rate": 9.972426737311775e-06, + "loss": 80.9497, + "step": 65050 + }, + { + "epoch": 0.13142531624090467, + "grad_norm": 222.73822021484375, + "learning_rate": 9.972390116700128e-06, + "loss": 26.0368, + "step": 65060 + }, + { + "epoch": 0.1314455168735885, + "grad_norm": 840.1920776367188, + "learning_rate": 9.972353471853655e-06, + "loss": 39.2992, + "step": 65070 + }, + { + "epoch": 0.13146571750627228, + "grad_norm": 670.8307495117188, + "learning_rate": 9.972316802772536e-06, + "loss": 35.0744, + "step": 65080 + }, + { + "epoch": 0.1314859181389561, + "grad_norm": 267.5872802734375, + "learning_rate": 9.972280109456946e-06, + "loss": 15.6043, + "step": 65090 + }, + { + "epoch": 0.13150611877163992, + "grad_norm": 318.8644104003906, + "learning_rate": 9.972243391907068e-06, + "loss": 22.2779, + "step": 65100 + }, + { + "epoch": 0.13152631940432374, + "grad_norm": 190.1090087890625, + "learning_rate": 9.972206650123077e-06, + "loss": 25.1073, + "step": 65110 + }, + { + "epoch": 0.13154652003700756, + "grad_norm": 1195.5474853515625, + "learning_rate": 9.972169884105155e-06, + "loss": 51.1655, + "step": 65120 + }, + { + "epoch": 0.13156672066969138, + "grad_norm": 401.41473388671875, + "learning_rate": 9.972133093853477e-06, + "loss": 23.0753, + "step": 65130 + }, + { + "epoch": 0.1315869213023752, + "grad_norm": 6.660244941711426, + "learning_rate": 9.972096279368228e-06, + "loss": 24.8062, + "step": 65140 + }, + { + "epoch": 0.131607121935059, + "grad_norm": 705.7124633789062, + "learning_rate": 9.972059440649584e-06, + "loss": 15.3535, + "step": 65150 + }, + { + "epoch": 0.13162732256774282, + "grad_norm": 452.8949890136719, + "learning_rate": 9.972022577697726e-06, + "loss": 22.1648, + "step": 65160 + }, + { + "epoch": 0.13164752320042664, + "grad_norm": 213.5918426513672, + "learning_rate": 9.971985690512834e-06, + "loss": 29.4293, + "step": 65170 + }, + { + "epoch": 0.13166772383311046, + "grad_norm": 435.3036804199219, + "learning_rate": 9.971948779095084e-06, + "loss": 24.4588, + "step": 65180 + }, + { + "epoch": 0.13168792446579428, + "grad_norm": 391.5770568847656, + "learning_rate": 9.97191184344466e-06, + "loss": 16.859, + "step": 65190 + }, + { + "epoch": 0.1317081250984781, + "grad_norm": 375.3039245605469, + "learning_rate": 9.97187488356174e-06, + "loss": 30.387, + "step": 65200 + }, + { + "epoch": 0.1317283257311619, + "grad_norm": 898.590087890625, + "learning_rate": 9.971837899446505e-06, + "loss": 21.8392, + "step": 65210 + }, + { + "epoch": 0.1317485263638457, + "grad_norm": 195.2491912841797, + "learning_rate": 9.971800891099137e-06, + "loss": 34.7816, + "step": 65220 + }, + { + "epoch": 0.13176872699652953, + "grad_norm": 457.738525390625, + "learning_rate": 9.971763858519812e-06, + "loss": 22.7771, + "step": 65230 + }, + { + "epoch": 0.13178892762921335, + "grad_norm": 596.8607177734375, + "learning_rate": 9.971726801708715e-06, + "loss": 37.7583, + "step": 65240 + }, + { + "epoch": 0.13180912826189717, + "grad_norm": 175.02639770507812, + "learning_rate": 9.971689720666024e-06, + "loss": 25.4858, + "step": 65250 + }, + { + "epoch": 0.131829328894581, + "grad_norm": 393.55499267578125, + "learning_rate": 9.97165261539192e-06, + "loss": 34.8065, + "step": 65260 + }, + { + "epoch": 0.13184952952726478, + "grad_norm": 489.8519287109375, + "learning_rate": 9.971615485886583e-06, + "loss": 26.5333, + "step": 65270 + }, + { + "epoch": 0.1318697301599486, + "grad_norm": 23.220930099487305, + "learning_rate": 9.971578332150197e-06, + "loss": 27.3347, + "step": 65280 + }, + { + "epoch": 0.13188993079263242, + "grad_norm": 729.2762451171875, + "learning_rate": 9.97154115418294e-06, + "loss": 31.484, + "step": 65290 + }, + { + "epoch": 0.13191013142531624, + "grad_norm": 443.9441223144531, + "learning_rate": 9.971503951984996e-06, + "loss": 26.0966, + "step": 65300 + }, + { + "epoch": 0.13193033205800006, + "grad_norm": 804.5036010742188, + "learning_rate": 9.971466725556542e-06, + "loss": 29.2958, + "step": 65310 + }, + { + "epoch": 0.13195053269068388, + "grad_norm": 887.5077514648438, + "learning_rate": 9.971429474897765e-06, + "loss": 29.7262, + "step": 65320 + }, + { + "epoch": 0.1319707333233677, + "grad_norm": 244.98841857910156, + "learning_rate": 9.971392200008842e-06, + "loss": 17.1568, + "step": 65330 + }, + { + "epoch": 0.1319909339560515, + "grad_norm": 267.2784423828125, + "learning_rate": 9.971354900889955e-06, + "loss": 26.1696, + "step": 65340 + }, + { + "epoch": 0.1320111345887353, + "grad_norm": 1662.8267822265625, + "learning_rate": 9.97131757754129e-06, + "loss": 21.3175, + "step": 65350 + }, + { + "epoch": 0.13203133522141913, + "grad_norm": 412.65484619140625, + "learning_rate": 9.971280229963026e-06, + "loss": 20.529, + "step": 65360 + }, + { + "epoch": 0.13205153585410295, + "grad_norm": 1063.288330078125, + "learning_rate": 9.971242858155344e-06, + "loss": 23.6185, + "step": 65370 + }, + { + "epoch": 0.13207173648678677, + "grad_norm": 390.1273498535156, + "learning_rate": 9.971205462118427e-06, + "loss": 27.4855, + "step": 65380 + }, + { + "epoch": 0.1320919371194706, + "grad_norm": 226.97308349609375, + "learning_rate": 9.971168041852456e-06, + "loss": 23.0615, + "step": 65390 + }, + { + "epoch": 0.13211213775215438, + "grad_norm": 207.3575439453125, + "learning_rate": 9.971130597357618e-06, + "loss": 19.5395, + "step": 65400 + }, + { + "epoch": 0.1321323383848382, + "grad_norm": 101.44998931884766, + "learning_rate": 9.97109312863409e-06, + "loss": 15.154, + "step": 65410 + }, + { + "epoch": 0.13215253901752202, + "grad_norm": 793.5848388671875, + "learning_rate": 9.971055635682059e-06, + "loss": 36.1369, + "step": 65420 + }, + { + "epoch": 0.13217273965020584, + "grad_norm": 382.6589660644531, + "learning_rate": 9.971018118501706e-06, + "loss": 31.8379, + "step": 65430 + }, + { + "epoch": 0.13219294028288966, + "grad_norm": 679.6033325195312, + "learning_rate": 9.970980577093212e-06, + "loss": 30.3324, + "step": 65440 + }, + { + "epoch": 0.13221314091557348, + "grad_norm": 203.62864685058594, + "learning_rate": 9.970943011456762e-06, + "loss": 21.4908, + "step": 65450 + }, + { + "epoch": 0.1322333415482573, + "grad_norm": 303.47442626953125, + "learning_rate": 9.970905421592538e-06, + "loss": 24.6235, + "step": 65460 + }, + { + "epoch": 0.1322535421809411, + "grad_norm": 98.95892333984375, + "learning_rate": 9.970867807500725e-06, + "loss": 17.3525, + "step": 65470 + }, + { + "epoch": 0.13227374281362492, + "grad_norm": 339.5857849121094, + "learning_rate": 9.970830169181504e-06, + "loss": 23.0931, + "step": 65480 + }, + { + "epoch": 0.13229394344630874, + "grad_norm": 434.018798828125, + "learning_rate": 9.97079250663506e-06, + "loss": 21.3534, + "step": 65490 + }, + { + "epoch": 0.13231414407899256, + "grad_norm": 775.967041015625, + "learning_rate": 9.970754819861577e-06, + "loss": 38.1361, + "step": 65500 + }, + { + "epoch": 0.13233434471167638, + "grad_norm": 294.7892761230469, + "learning_rate": 9.97071710886124e-06, + "loss": 41.3853, + "step": 65510 + }, + { + "epoch": 0.1323545453443602, + "grad_norm": 288.3537902832031, + "learning_rate": 9.970679373634227e-06, + "loss": 23.1883, + "step": 65520 + }, + { + "epoch": 0.132374745977044, + "grad_norm": 668.0358276367188, + "learning_rate": 9.970641614180727e-06, + "loss": 28.7483, + "step": 65530 + }, + { + "epoch": 0.1323949466097278, + "grad_norm": 594.7171630859375, + "learning_rate": 9.970603830500923e-06, + "loss": 24.1122, + "step": 65540 + }, + { + "epoch": 0.13241514724241163, + "grad_norm": 376.3564453125, + "learning_rate": 9.970566022594996e-06, + "loss": 15.4006, + "step": 65550 + }, + { + "epoch": 0.13243534787509545, + "grad_norm": 929.987548828125, + "learning_rate": 9.970528190463136e-06, + "loss": 26.8913, + "step": 65560 + }, + { + "epoch": 0.13245554850777927, + "grad_norm": 254.50448608398438, + "learning_rate": 9.970490334105525e-06, + "loss": 28.261, + "step": 65570 + }, + { + "epoch": 0.1324757491404631, + "grad_norm": 626.42822265625, + "learning_rate": 9.970452453522344e-06, + "loss": 41.1738, + "step": 65580 + }, + { + "epoch": 0.13249594977314688, + "grad_norm": 231.1833953857422, + "learning_rate": 9.970414548713783e-06, + "loss": 19.4747, + "step": 65590 + }, + { + "epoch": 0.1325161504058307, + "grad_norm": 348.5114440917969, + "learning_rate": 9.970376619680024e-06, + "loss": 26.3212, + "step": 65600 + }, + { + "epoch": 0.13253635103851452, + "grad_norm": 442.5945129394531, + "learning_rate": 9.970338666421251e-06, + "loss": 23.4635, + "step": 65610 + }, + { + "epoch": 0.13255655167119834, + "grad_norm": 1400.3162841796875, + "learning_rate": 9.970300688937651e-06, + "loss": 51.6835, + "step": 65620 + }, + { + "epoch": 0.13257675230388216, + "grad_norm": 606.9361572265625, + "learning_rate": 9.970262687229409e-06, + "loss": 30.7905, + "step": 65630 + }, + { + "epoch": 0.13259695293656598, + "grad_norm": 134.6631622314453, + "learning_rate": 9.970224661296708e-06, + "loss": 12.5447, + "step": 65640 + }, + { + "epoch": 0.1326171535692498, + "grad_norm": 940.1969604492188, + "learning_rate": 9.970186611139736e-06, + "loss": 19.8421, + "step": 65650 + }, + { + "epoch": 0.1326373542019336, + "grad_norm": 123.28846740722656, + "learning_rate": 9.970148536758678e-06, + "loss": 29.372, + "step": 65660 + }, + { + "epoch": 0.1326575548346174, + "grad_norm": 271.18011474609375, + "learning_rate": 9.970110438153717e-06, + "loss": 55.829, + "step": 65670 + }, + { + "epoch": 0.13267775546730123, + "grad_norm": 936.2969970703125, + "learning_rate": 9.970072315325041e-06, + "loss": 37.135, + "step": 65680 + }, + { + "epoch": 0.13269795609998505, + "grad_norm": 471.40313720703125, + "learning_rate": 9.970034168272835e-06, + "loss": 30.9649, + "step": 65690 + }, + { + "epoch": 0.13271815673266887, + "grad_norm": 281.6181335449219, + "learning_rate": 9.969995996997285e-06, + "loss": 45.4568, + "step": 65700 + }, + { + "epoch": 0.1327383573653527, + "grad_norm": 494.97882080078125, + "learning_rate": 9.96995780149858e-06, + "loss": 17.6761, + "step": 65710 + }, + { + "epoch": 0.13275855799803649, + "grad_norm": 258.3773193359375, + "learning_rate": 9.969919581776902e-06, + "loss": 58.6749, + "step": 65720 + }, + { + "epoch": 0.1327787586307203, + "grad_norm": 736.1028442382812, + "learning_rate": 9.969881337832437e-06, + "loss": 29.7768, + "step": 65730 + }, + { + "epoch": 0.13279895926340413, + "grad_norm": 171.40736389160156, + "learning_rate": 9.969843069665375e-06, + "loss": 20.6034, + "step": 65740 + }, + { + "epoch": 0.13281915989608795, + "grad_norm": 645.6675415039062, + "learning_rate": 9.9698047772759e-06, + "loss": 25.7872, + "step": 65750 + }, + { + "epoch": 0.13283936052877177, + "grad_norm": 761.6134033203125, + "learning_rate": 9.969766460664199e-06, + "loss": 23.8913, + "step": 65760 + }, + { + "epoch": 0.13285956116145559, + "grad_norm": 463.9010925292969, + "learning_rate": 9.96972811983046e-06, + "loss": 29.2706, + "step": 65770 + }, + { + "epoch": 0.1328797617941394, + "grad_norm": 744.6932983398438, + "learning_rate": 9.969689754774868e-06, + "loss": 41.1497, + "step": 65780 + }, + { + "epoch": 0.1328999624268232, + "grad_norm": 113.17998504638672, + "learning_rate": 9.96965136549761e-06, + "loss": 24.2846, + "step": 65790 + }, + { + "epoch": 0.13292016305950702, + "grad_norm": 502.0255126953125, + "learning_rate": 9.969612951998874e-06, + "loss": 18.2136, + "step": 65800 + }, + { + "epoch": 0.13294036369219084, + "grad_norm": 218.131103515625, + "learning_rate": 9.96957451427885e-06, + "loss": 12.6895, + "step": 65810 + }, + { + "epoch": 0.13296056432487466, + "grad_norm": 779.2266845703125, + "learning_rate": 9.96953605233772e-06, + "loss": 29.5333, + "step": 65820 + }, + { + "epoch": 0.13298076495755848, + "grad_norm": 357.7973937988281, + "learning_rate": 9.969497566175675e-06, + "loss": 24.1586, + "step": 65830 + }, + { + "epoch": 0.1330009655902423, + "grad_norm": 298.0308837890625, + "learning_rate": 9.969459055792903e-06, + "loss": 25.5972, + "step": 65840 + }, + { + "epoch": 0.1330211662229261, + "grad_norm": 641.5197143554688, + "learning_rate": 9.969420521189587e-06, + "loss": 49.9216, + "step": 65850 + }, + { + "epoch": 0.1330413668556099, + "grad_norm": 617.6475219726562, + "learning_rate": 9.96938196236592e-06, + "loss": 26.327, + "step": 65860 + }, + { + "epoch": 0.13306156748829373, + "grad_norm": 931.670654296875, + "learning_rate": 9.96934337932209e-06, + "loss": 33.2053, + "step": 65870 + }, + { + "epoch": 0.13308176812097755, + "grad_norm": 334.64727783203125, + "learning_rate": 9.969304772058279e-06, + "loss": 30.9071, + "step": 65880 + }, + { + "epoch": 0.13310196875366137, + "grad_norm": 1005.65576171875, + "learning_rate": 9.969266140574682e-06, + "loss": 18.8181, + "step": 65890 + }, + { + "epoch": 0.1331221693863452, + "grad_norm": 423.1051940917969, + "learning_rate": 9.969227484871485e-06, + "loss": 38.5354, + "step": 65900 + }, + { + "epoch": 0.13314237001902898, + "grad_norm": 172.60694885253906, + "learning_rate": 9.969188804948872e-06, + "loss": 26.1588, + "step": 65910 + }, + { + "epoch": 0.1331625706517128, + "grad_norm": 11.49144172668457, + "learning_rate": 9.969150100807039e-06, + "loss": 23.5129, + "step": 65920 + }, + { + "epoch": 0.13318277128439662, + "grad_norm": 270.6551208496094, + "learning_rate": 9.969111372446171e-06, + "loss": 26.9372, + "step": 65930 + }, + { + "epoch": 0.13320297191708044, + "grad_norm": 278.15399169921875, + "learning_rate": 9.969072619866455e-06, + "loss": 27.4141, + "step": 65940 + }, + { + "epoch": 0.13322317254976426, + "grad_norm": 480.8581848144531, + "learning_rate": 9.969033843068083e-06, + "loss": 18.4563, + "step": 65950 + }, + { + "epoch": 0.13324337318244808, + "grad_norm": 7.052169322967529, + "learning_rate": 9.968995042051244e-06, + "loss": 26.8215, + "step": 65960 + }, + { + "epoch": 0.1332635738151319, + "grad_norm": 3.9065651893615723, + "learning_rate": 9.968956216816123e-06, + "loss": 25.9901, + "step": 65970 + }, + { + "epoch": 0.1332837744478157, + "grad_norm": 306.3456115722656, + "learning_rate": 9.968917367362914e-06, + "loss": 20.442, + "step": 65980 + }, + { + "epoch": 0.13330397508049951, + "grad_norm": 314.388427734375, + "learning_rate": 9.968878493691803e-06, + "loss": 22.7953, + "step": 65990 + }, + { + "epoch": 0.13332417571318333, + "grad_norm": 373.8533020019531, + "learning_rate": 9.968839595802982e-06, + "loss": 40.5255, + "step": 66000 + }, + { + "epoch": 0.13334437634586715, + "grad_norm": 40.60340881347656, + "learning_rate": 9.968800673696638e-06, + "loss": 25.608, + "step": 66010 + }, + { + "epoch": 0.13336457697855097, + "grad_norm": 156.1980438232422, + "learning_rate": 9.968761727372965e-06, + "loss": 19.8583, + "step": 66020 + }, + { + "epoch": 0.1333847776112348, + "grad_norm": 496.1346130371094, + "learning_rate": 9.968722756832148e-06, + "loss": 28.0034, + "step": 66030 + }, + { + "epoch": 0.1334049782439186, + "grad_norm": 536.6175537109375, + "learning_rate": 9.96868376207438e-06, + "loss": 25.7999, + "step": 66040 + }, + { + "epoch": 0.1334251788766024, + "grad_norm": 595.9449462890625, + "learning_rate": 9.968644743099848e-06, + "loss": 17.8054, + "step": 66050 + }, + { + "epoch": 0.13344537950928623, + "grad_norm": 683.5084838867188, + "learning_rate": 9.968605699908747e-06, + "loss": 17.1145, + "step": 66060 + }, + { + "epoch": 0.13346558014197005, + "grad_norm": 745.710205078125, + "learning_rate": 9.968566632501262e-06, + "loss": 36.371, + "step": 66070 + }, + { + "epoch": 0.13348578077465387, + "grad_norm": 324.8899230957031, + "learning_rate": 9.968527540877586e-06, + "loss": 27.8732, + "step": 66080 + }, + { + "epoch": 0.1335059814073377, + "grad_norm": 488.1936950683594, + "learning_rate": 9.96848842503791e-06, + "loss": 18.805, + "step": 66090 + }, + { + "epoch": 0.1335261820400215, + "grad_norm": 332.4110412597656, + "learning_rate": 9.968449284982424e-06, + "loss": 25.6441, + "step": 66100 + }, + { + "epoch": 0.1335463826727053, + "grad_norm": 494.11724853515625, + "learning_rate": 9.968410120711321e-06, + "loss": 24.8029, + "step": 66110 + }, + { + "epoch": 0.13356658330538912, + "grad_norm": 397.534912109375, + "learning_rate": 9.968370932224787e-06, + "loss": 35.6343, + "step": 66120 + }, + { + "epoch": 0.13358678393807294, + "grad_norm": 884.9349975585938, + "learning_rate": 9.968331719523015e-06, + "loss": 18.9939, + "step": 66130 + }, + { + "epoch": 0.13360698457075676, + "grad_norm": 329.7498779296875, + "learning_rate": 9.968292482606199e-06, + "loss": 22.9038, + "step": 66140 + }, + { + "epoch": 0.13362718520344058, + "grad_norm": 852.6181030273438, + "learning_rate": 9.968253221474527e-06, + "loss": 42.3823, + "step": 66150 + }, + { + "epoch": 0.1336473858361244, + "grad_norm": 490.7461242675781, + "learning_rate": 9.96821393612819e-06, + "loss": 19.3947, + "step": 66160 + }, + { + "epoch": 0.1336675864688082, + "grad_norm": 0.002593101467937231, + "learning_rate": 9.968174626567382e-06, + "loss": 13.4255, + "step": 66170 + }, + { + "epoch": 0.133687787101492, + "grad_norm": 390.745361328125, + "learning_rate": 9.968135292792294e-06, + "loss": 41.6445, + "step": 66180 + }, + { + "epoch": 0.13370798773417583, + "grad_norm": 767.7255859375, + "learning_rate": 9.968095934803116e-06, + "loss": 36.6521, + "step": 66190 + }, + { + "epoch": 0.13372818836685965, + "grad_norm": 296.0707702636719, + "learning_rate": 9.968056552600043e-06, + "loss": 31.3354, + "step": 66200 + }, + { + "epoch": 0.13374838899954347, + "grad_norm": 317.9017639160156, + "learning_rate": 9.968017146183263e-06, + "loss": 29.1129, + "step": 66210 + }, + { + "epoch": 0.1337685896322273, + "grad_norm": 4.721127986907959, + "learning_rate": 9.967977715552972e-06, + "loss": 34.9939, + "step": 66220 + }, + { + "epoch": 0.13378879026491108, + "grad_norm": 666.0792846679688, + "learning_rate": 9.967938260709357e-06, + "loss": 34.0891, + "step": 66230 + }, + { + "epoch": 0.1338089908975949, + "grad_norm": 283.35662841796875, + "learning_rate": 9.967898781652616e-06, + "loss": 16.9283, + "step": 66240 + }, + { + "epoch": 0.13382919153027872, + "grad_norm": 798.3267822265625, + "learning_rate": 9.967859278382939e-06, + "loss": 18.6469, + "step": 66250 + }, + { + "epoch": 0.13384939216296254, + "grad_norm": 0.20100249350070953, + "learning_rate": 9.967819750900517e-06, + "loss": 33.2351, + "step": 66260 + }, + { + "epoch": 0.13386959279564636, + "grad_norm": 469.26519775390625, + "learning_rate": 9.967780199205544e-06, + "loss": 23.6421, + "step": 66270 + }, + { + "epoch": 0.13388979342833018, + "grad_norm": 317.48565673828125, + "learning_rate": 9.967740623298214e-06, + "loss": 29.4715, + "step": 66280 + }, + { + "epoch": 0.133909994061014, + "grad_norm": 375.7717590332031, + "learning_rate": 9.967701023178717e-06, + "loss": 22.9257, + "step": 66290 + }, + { + "epoch": 0.1339301946936978, + "grad_norm": 761.9866943359375, + "learning_rate": 9.96766139884725e-06, + "loss": 24.6845, + "step": 66300 + }, + { + "epoch": 0.13395039532638162, + "grad_norm": 102.87804412841797, + "learning_rate": 9.967621750304002e-06, + "loss": 26.4663, + "step": 66310 + }, + { + "epoch": 0.13397059595906544, + "grad_norm": 135.01455688476562, + "learning_rate": 9.96758207754917e-06, + "loss": 22.0731, + "step": 66320 + }, + { + "epoch": 0.13399079659174926, + "grad_norm": 464.4028015136719, + "learning_rate": 9.967542380582944e-06, + "loss": 32.7973, + "step": 66330 + }, + { + "epoch": 0.13401099722443308, + "grad_norm": 1074.724365234375, + "learning_rate": 9.96750265940552e-06, + "loss": 21.3063, + "step": 66340 + }, + { + "epoch": 0.1340311978571169, + "grad_norm": 1100.5205078125, + "learning_rate": 9.967462914017087e-06, + "loss": 26.3388, + "step": 66350 + }, + { + "epoch": 0.1340513984898007, + "grad_norm": 665.2679443359375, + "learning_rate": 9.967423144417847e-06, + "loss": 17.4782, + "step": 66360 + }, + { + "epoch": 0.1340715991224845, + "grad_norm": 401.9553527832031, + "learning_rate": 9.967383350607986e-06, + "loss": 20.9634, + "step": 66370 + }, + { + "epoch": 0.13409179975516833, + "grad_norm": 857.6044921875, + "learning_rate": 9.967343532587701e-06, + "loss": 25.9167, + "step": 66380 + }, + { + "epoch": 0.13411200038785215, + "grad_norm": 273.08056640625, + "learning_rate": 9.967303690357189e-06, + "loss": 22.9494, + "step": 66390 + }, + { + "epoch": 0.13413220102053597, + "grad_norm": 339.6724853515625, + "learning_rate": 9.967263823916638e-06, + "loss": 22.062, + "step": 66400 + }, + { + "epoch": 0.1341524016532198, + "grad_norm": 499.204345703125, + "learning_rate": 9.967223933266247e-06, + "loss": 15.3817, + "step": 66410 + }, + { + "epoch": 0.1341726022859036, + "grad_norm": 450.8316345214844, + "learning_rate": 9.96718401840621e-06, + "loss": 19.4555, + "step": 66420 + }, + { + "epoch": 0.1341928029185874, + "grad_norm": 155.22021484375, + "learning_rate": 9.96714407933672e-06, + "loss": 22.9686, + "step": 66430 + }, + { + "epoch": 0.13421300355127122, + "grad_norm": 405.15850830078125, + "learning_rate": 9.96710411605797e-06, + "loss": 12.4172, + "step": 66440 + }, + { + "epoch": 0.13423320418395504, + "grad_norm": 691.965087890625, + "learning_rate": 9.96706412857016e-06, + "loss": 33.4339, + "step": 66450 + }, + { + "epoch": 0.13425340481663886, + "grad_norm": 133.9887237548828, + "learning_rate": 9.967024116873481e-06, + "loss": 13.0019, + "step": 66460 + }, + { + "epoch": 0.13427360544932268, + "grad_norm": 767.7424926757812, + "learning_rate": 9.966984080968128e-06, + "loss": 23.3655, + "step": 66470 + }, + { + "epoch": 0.1342938060820065, + "grad_norm": 147.04881286621094, + "learning_rate": 9.966944020854297e-06, + "loss": 19.7351, + "step": 66480 + }, + { + "epoch": 0.1343140067146903, + "grad_norm": 57.78780746459961, + "learning_rate": 9.966903936532184e-06, + "loss": 25.1124, + "step": 66490 + }, + { + "epoch": 0.1343342073473741, + "grad_norm": 1054.9471435546875, + "learning_rate": 9.966863828001982e-06, + "loss": 30.6405, + "step": 66500 + }, + { + "epoch": 0.13435440798005793, + "grad_norm": 239.92677307128906, + "learning_rate": 9.96682369526389e-06, + "loss": 21.5139, + "step": 66510 + }, + { + "epoch": 0.13437460861274175, + "grad_norm": 739.2789306640625, + "learning_rate": 9.966783538318101e-06, + "loss": 20.7183, + "step": 66520 + }, + { + "epoch": 0.13439480924542557, + "grad_norm": 720.2469482421875, + "learning_rate": 9.966743357164812e-06, + "loss": 41.5077, + "step": 66530 + }, + { + "epoch": 0.1344150098781094, + "grad_norm": 863.4821166992188, + "learning_rate": 9.966703151804219e-06, + "loss": 20.7125, + "step": 66540 + }, + { + "epoch": 0.13443521051079318, + "grad_norm": 230.131591796875, + "learning_rate": 9.966662922236515e-06, + "loss": 25.8556, + "step": 66550 + }, + { + "epoch": 0.134455411143477, + "grad_norm": 183.84738159179688, + "learning_rate": 9.966622668461899e-06, + "loss": 26.4966, + "step": 66560 + }, + { + "epoch": 0.13447561177616082, + "grad_norm": 575.3276977539062, + "learning_rate": 9.966582390480567e-06, + "loss": 18.8949, + "step": 66570 + }, + { + "epoch": 0.13449581240884464, + "grad_norm": 283.83489990234375, + "learning_rate": 9.966542088292714e-06, + "loss": 32.3473, + "step": 66580 + }, + { + "epoch": 0.13451601304152846, + "grad_norm": 466.7230529785156, + "learning_rate": 9.96650176189854e-06, + "loss": 23.7041, + "step": 66590 + }, + { + "epoch": 0.13453621367421228, + "grad_norm": 392.8020324707031, + "learning_rate": 9.966461411298235e-06, + "loss": 22.3975, + "step": 66600 + }, + { + "epoch": 0.1345564143068961, + "grad_norm": 194.2777557373047, + "learning_rate": 9.966421036492003e-06, + "loss": 10.7699, + "step": 66610 + }, + { + "epoch": 0.1345766149395799, + "grad_norm": 1023.65185546875, + "learning_rate": 9.966380637480034e-06, + "loss": 32.0953, + "step": 66620 + }, + { + "epoch": 0.13459681557226372, + "grad_norm": 158.03370666503906, + "learning_rate": 9.96634021426253e-06, + "loss": 24.5801, + "step": 66630 + }, + { + "epoch": 0.13461701620494754, + "grad_norm": 229.6029510498047, + "learning_rate": 9.966299766839685e-06, + "loss": 42.3127, + "step": 66640 + }, + { + "epoch": 0.13463721683763136, + "grad_norm": 418.8670654296875, + "learning_rate": 9.966259295211698e-06, + "loss": 14.8902, + "step": 66650 + }, + { + "epoch": 0.13465741747031518, + "grad_norm": 314.76324462890625, + "learning_rate": 9.966218799378766e-06, + "loss": 35.0342, + "step": 66660 + }, + { + "epoch": 0.134677618102999, + "grad_norm": 417.505615234375, + "learning_rate": 9.966178279341084e-06, + "loss": 50.4991, + "step": 66670 + }, + { + "epoch": 0.1346978187356828, + "grad_norm": 613.02197265625, + "learning_rate": 9.966137735098853e-06, + "loss": 39.8462, + "step": 66680 + }, + { + "epoch": 0.1347180193683666, + "grad_norm": 2.145066261291504, + "learning_rate": 9.966097166652268e-06, + "loss": 27.7255, + "step": 66690 + }, + { + "epoch": 0.13473822000105043, + "grad_norm": 361.9964904785156, + "learning_rate": 9.966056574001528e-06, + "loss": 23.7623, + "step": 66700 + }, + { + "epoch": 0.13475842063373425, + "grad_norm": 327.3876037597656, + "learning_rate": 9.966015957146832e-06, + "loss": 28.5767, + "step": 66710 + }, + { + "epoch": 0.13477862126641807, + "grad_norm": 237.48236083984375, + "learning_rate": 9.965975316088377e-06, + "loss": 16.9549, + "step": 66720 + }, + { + "epoch": 0.1347988218991019, + "grad_norm": 196.621337890625, + "learning_rate": 9.96593465082636e-06, + "loss": 26.1039, + "step": 66730 + }, + { + "epoch": 0.1348190225317857, + "grad_norm": 177.80996704101562, + "learning_rate": 9.965893961360977e-06, + "loss": 18.6695, + "step": 66740 + }, + { + "epoch": 0.1348392231644695, + "grad_norm": 460.58953857421875, + "learning_rate": 9.965853247692433e-06, + "loss": 30.0823, + "step": 66750 + }, + { + "epoch": 0.13485942379715332, + "grad_norm": 277.8708801269531, + "learning_rate": 9.965812509820918e-06, + "loss": 13.5059, + "step": 66760 + }, + { + "epoch": 0.13487962442983714, + "grad_norm": 351.21954345703125, + "learning_rate": 9.965771747746638e-06, + "loss": 29.4535, + "step": 66770 + }, + { + "epoch": 0.13489982506252096, + "grad_norm": 301.07952880859375, + "learning_rate": 9.96573096146979e-06, + "loss": 11.7858, + "step": 66780 + }, + { + "epoch": 0.13492002569520478, + "grad_norm": 314.43585205078125, + "learning_rate": 9.96569015099057e-06, + "loss": 24.5155, + "step": 66790 + }, + { + "epoch": 0.1349402263278886, + "grad_norm": 499.5002746582031, + "learning_rate": 9.965649316309178e-06, + "loss": 13.3254, + "step": 66800 + }, + { + "epoch": 0.1349604269605724, + "grad_norm": 196.3819580078125, + "learning_rate": 9.965608457425813e-06, + "loss": 16.0215, + "step": 66810 + }, + { + "epoch": 0.1349806275932562, + "grad_norm": 305.2103576660156, + "learning_rate": 9.965567574340676e-06, + "loss": 20.4752, + "step": 66820 + }, + { + "epoch": 0.13500082822594003, + "grad_norm": 798.6466064453125, + "learning_rate": 9.965526667053964e-06, + "loss": 36.875, + "step": 66830 + }, + { + "epoch": 0.13502102885862385, + "grad_norm": 1179.1925048828125, + "learning_rate": 9.965485735565878e-06, + "loss": 32.7117, + "step": 66840 + }, + { + "epoch": 0.13504122949130767, + "grad_norm": 803.5941772460938, + "learning_rate": 9.965444779876618e-06, + "loss": 26.9367, + "step": 66850 + }, + { + "epoch": 0.1350614301239915, + "grad_norm": 504.97418212890625, + "learning_rate": 9.96540379998638e-06, + "loss": 46.2716, + "step": 66860 + }, + { + "epoch": 0.13508163075667529, + "grad_norm": 356.00054931640625, + "learning_rate": 9.965362795895368e-06, + "loss": 26.5936, + "step": 66870 + }, + { + "epoch": 0.1351018313893591, + "grad_norm": 173.8934783935547, + "learning_rate": 9.965321767603778e-06, + "loss": 24.5727, + "step": 66880 + }, + { + "epoch": 0.13512203202204293, + "grad_norm": 603.3222045898438, + "learning_rate": 9.965280715111814e-06, + "loss": 22.2474, + "step": 66890 + }, + { + "epoch": 0.13514223265472675, + "grad_norm": 259.8135986328125, + "learning_rate": 9.965239638419673e-06, + "loss": 37.9913, + "step": 66900 + }, + { + "epoch": 0.13516243328741057, + "grad_norm": 754.8200073242188, + "learning_rate": 9.965198537527556e-06, + "loss": 24.2608, + "step": 66910 + }, + { + "epoch": 0.13518263392009439, + "grad_norm": 705.2947387695312, + "learning_rate": 9.965157412435663e-06, + "loss": 18.6022, + "step": 66920 + }, + { + "epoch": 0.1352028345527782, + "grad_norm": 501.9771728515625, + "learning_rate": 9.965116263144196e-06, + "loss": 19.7913, + "step": 66930 + }, + { + "epoch": 0.135223035185462, + "grad_norm": 1352.078369140625, + "learning_rate": 9.965075089653354e-06, + "loss": 37.3073, + "step": 66940 + }, + { + "epoch": 0.13524323581814582, + "grad_norm": 667.0462646484375, + "learning_rate": 9.965033891963338e-06, + "loss": 16.9303, + "step": 66950 + }, + { + "epoch": 0.13526343645082964, + "grad_norm": 216.81114196777344, + "learning_rate": 9.96499267007435e-06, + "loss": 19.2859, + "step": 66960 + }, + { + "epoch": 0.13528363708351346, + "grad_norm": 225.5086212158203, + "learning_rate": 9.964951423986588e-06, + "loss": 11.4455, + "step": 66970 + }, + { + "epoch": 0.13530383771619728, + "grad_norm": 167.20376586914062, + "learning_rate": 9.964910153700258e-06, + "loss": 36.076, + "step": 66980 + }, + { + "epoch": 0.1353240383488811, + "grad_norm": 449.9119567871094, + "learning_rate": 9.964868859215555e-06, + "loss": 14.487, + "step": 66990 + }, + { + "epoch": 0.1353442389815649, + "grad_norm": 344.0535888671875, + "learning_rate": 9.964827540532685e-06, + "loss": 21.6166, + "step": 67000 + }, + { + "epoch": 0.1353644396142487, + "grad_norm": 555.558837890625, + "learning_rate": 9.964786197651848e-06, + "loss": 40.5872, + "step": 67010 + }, + { + "epoch": 0.13538464024693253, + "grad_norm": 415.7837219238281, + "learning_rate": 9.964744830573245e-06, + "loss": 29.0629, + "step": 67020 + }, + { + "epoch": 0.13540484087961635, + "grad_norm": 190.95452880859375, + "learning_rate": 9.964703439297076e-06, + "loss": 38.2105, + "step": 67030 + }, + { + "epoch": 0.13542504151230017, + "grad_norm": 420.1988830566406, + "learning_rate": 9.964662023823548e-06, + "loss": 39.1838, + "step": 67040 + }, + { + "epoch": 0.135445242144984, + "grad_norm": 630.7103881835938, + "learning_rate": 9.964620584152858e-06, + "loss": 21.7209, + "step": 67050 + }, + { + "epoch": 0.13546544277766778, + "grad_norm": 555.09765625, + "learning_rate": 9.964579120285208e-06, + "loss": 38.1503, + "step": 67060 + }, + { + "epoch": 0.1354856434103516, + "grad_norm": 399.41827392578125, + "learning_rate": 9.964537632220801e-06, + "loss": 16.5488, + "step": 67070 + }, + { + "epoch": 0.13550584404303542, + "grad_norm": 541.5283203125, + "learning_rate": 9.964496119959842e-06, + "loss": 24.1675, + "step": 67080 + }, + { + "epoch": 0.13552604467571924, + "grad_norm": 724.1858520507812, + "learning_rate": 9.96445458350253e-06, + "loss": 18.6799, + "step": 67090 + }, + { + "epoch": 0.13554624530840306, + "grad_norm": 88.37238311767578, + "learning_rate": 9.964413022849069e-06, + "loss": 15.1643, + "step": 67100 + }, + { + "epoch": 0.13556644594108688, + "grad_norm": 329.0193176269531, + "learning_rate": 9.964371437999661e-06, + "loss": 16.1981, + "step": 67110 + }, + { + "epoch": 0.1355866465737707, + "grad_norm": 1264.38818359375, + "learning_rate": 9.96432982895451e-06, + "loss": 26.0559, + "step": 67120 + }, + { + "epoch": 0.1356068472064545, + "grad_norm": 255.5403289794922, + "learning_rate": 9.964288195713814e-06, + "loss": 18.634, + "step": 67130 + }, + { + "epoch": 0.13562704783913831, + "grad_norm": 536.2504272460938, + "learning_rate": 9.964246538277782e-06, + "loss": 21.1002, + "step": 67140 + }, + { + "epoch": 0.13564724847182213, + "grad_norm": 286.3037109375, + "learning_rate": 9.964204856646613e-06, + "loss": 33.4804, + "step": 67150 + }, + { + "epoch": 0.13566744910450595, + "grad_norm": 717.703369140625, + "learning_rate": 9.964163150820512e-06, + "loss": 27.5898, + "step": 67160 + }, + { + "epoch": 0.13568764973718977, + "grad_norm": 630.5046997070312, + "learning_rate": 9.964121420799682e-06, + "loss": 21.9463, + "step": 67170 + }, + { + "epoch": 0.1357078503698736, + "grad_norm": 434.26190185546875, + "learning_rate": 9.964079666584327e-06, + "loss": 28.9183, + "step": 67180 + }, + { + "epoch": 0.1357280510025574, + "grad_norm": 521.5183715820312, + "learning_rate": 9.96403788817465e-06, + "loss": 27.702, + "step": 67190 + }, + { + "epoch": 0.1357482516352412, + "grad_norm": 243.92129516601562, + "learning_rate": 9.963996085570854e-06, + "loss": 20.1031, + "step": 67200 + }, + { + "epoch": 0.13576845226792503, + "grad_norm": 271.4411926269531, + "learning_rate": 9.963954258773143e-06, + "loss": 30.0734, + "step": 67210 + }, + { + "epoch": 0.13578865290060885, + "grad_norm": 767.510009765625, + "learning_rate": 9.963912407781721e-06, + "loss": 40.2711, + "step": 67220 + }, + { + "epoch": 0.13580885353329267, + "grad_norm": 553.5968627929688, + "learning_rate": 9.963870532596791e-06, + "loss": 24.5075, + "step": 67230 + }, + { + "epoch": 0.1358290541659765, + "grad_norm": 15.026395797729492, + "learning_rate": 9.96382863321856e-06, + "loss": 34.9113, + "step": 67240 + }, + { + "epoch": 0.1358492547986603, + "grad_norm": 391.4563293457031, + "learning_rate": 9.963786709647228e-06, + "loss": 21.575, + "step": 67250 + }, + { + "epoch": 0.1358694554313441, + "grad_norm": 361.42437744140625, + "learning_rate": 9.963744761883003e-06, + "loss": 30.4524, + "step": 67260 + }, + { + "epoch": 0.13588965606402792, + "grad_norm": 395.6061096191406, + "learning_rate": 9.963702789926089e-06, + "loss": 26.6612, + "step": 67270 + }, + { + "epoch": 0.13590985669671174, + "grad_norm": 1625.7520751953125, + "learning_rate": 9.963660793776689e-06, + "loss": 28.8253, + "step": 67280 + }, + { + "epoch": 0.13593005732939556, + "grad_norm": 562.817626953125, + "learning_rate": 9.963618773435006e-06, + "loss": 28.3572, + "step": 67290 + }, + { + "epoch": 0.13595025796207938, + "grad_norm": 445.7143859863281, + "learning_rate": 9.96357672890125e-06, + "loss": 55.1681, + "step": 67300 + }, + { + "epoch": 0.1359704585947632, + "grad_norm": 429.44384765625, + "learning_rate": 9.963534660175622e-06, + "loss": 18.6562, + "step": 67310 + }, + { + "epoch": 0.135990659227447, + "grad_norm": 275.9927062988281, + "learning_rate": 9.963492567258327e-06, + "loss": 30.0211, + "step": 67320 + }, + { + "epoch": 0.1360108598601308, + "grad_norm": 510.2088928222656, + "learning_rate": 9.963450450149572e-06, + "loss": 19.8133, + "step": 67330 + }, + { + "epoch": 0.13603106049281463, + "grad_norm": 1253.2021484375, + "learning_rate": 9.963408308849563e-06, + "loss": 38.2961, + "step": 67340 + }, + { + "epoch": 0.13605126112549845, + "grad_norm": 325.6735534667969, + "learning_rate": 9.963366143358502e-06, + "loss": 30.1534, + "step": 67350 + }, + { + "epoch": 0.13607146175818227, + "grad_norm": 878.3908081054688, + "learning_rate": 9.963323953676599e-06, + "loss": 31.5037, + "step": 67360 + }, + { + "epoch": 0.1360916623908661, + "grad_norm": 10.928342819213867, + "learning_rate": 9.963281739804054e-06, + "loss": 39.6315, + "step": 67370 + }, + { + "epoch": 0.13611186302354988, + "grad_norm": 400.7942810058594, + "learning_rate": 9.963239501741076e-06, + "loss": 27.5854, + "step": 67380 + }, + { + "epoch": 0.1361320636562337, + "grad_norm": 572.9030151367188, + "learning_rate": 9.963197239487871e-06, + "loss": 14.9577, + "step": 67390 + }, + { + "epoch": 0.13615226428891752, + "grad_norm": 394.2555236816406, + "learning_rate": 9.963154953044646e-06, + "loss": 21.3841, + "step": 67400 + }, + { + "epoch": 0.13617246492160134, + "grad_norm": 450.9844055175781, + "learning_rate": 9.963112642411606e-06, + "loss": 17.7634, + "step": 67410 + }, + { + "epoch": 0.13619266555428516, + "grad_norm": 335.7803649902344, + "learning_rate": 9.963070307588955e-06, + "loss": 16.0571, + "step": 67420 + }, + { + "epoch": 0.13621286618696898, + "grad_norm": 833.5035400390625, + "learning_rate": 9.963027948576902e-06, + "loss": 41.7651, + "step": 67430 + }, + { + "epoch": 0.1362330668196528, + "grad_norm": 54.94221115112305, + "learning_rate": 9.96298556537565e-06, + "loss": 18.6648, + "step": 67440 + }, + { + "epoch": 0.1362532674523366, + "grad_norm": 461.9586181640625, + "learning_rate": 9.962943157985412e-06, + "loss": 54.466, + "step": 67450 + }, + { + "epoch": 0.13627346808502042, + "grad_norm": 416.55413818359375, + "learning_rate": 9.96290072640639e-06, + "loss": 25.2187, + "step": 67460 + }, + { + "epoch": 0.13629366871770424, + "grad_norm": 100.7286148071289, + "learning_rate": 9.962858270638793e-06, + "loss": 27.7973, + "step": 67470 + }, + { + "epoch": 0.13631386935038806, + "grad_norm": 581.2256469726562, + "learning_rate": 9.962815790682825e-06, + "loss": 18.5809, + "step": 67480 + }, + { + "epoch": 0.13633406998307188, + "grad_norm": 106.12641906738281, + "learning_rate": 9.962773286538696e-06, + "loss": 12.7785, + "step": 67490 + }, + { + "epoch": 0.1363542706157557, + "grad_norm": 380.1458740234375, + "learning_rate": 9.962730758206612e-06, + "loss": 23.1059, + "step": 67500 + }, + { + "epoch": 0.1363744712484395, + "grad_norm": 300.8750305175781, + "learning_rate": 9.962688205686778e-06, + "loss": 22.584, + "step": 67510 + }, + { + "epoch": 0.1363946718811233, + "grad_norm": 13.970352172851562, + "learning_rate": 9.962645628979406e-06, + "loss": 30.345, + "step": 67520 + }, + { + "epoch": 0.13641487251380713, + "grad_norm": 391.0465393066406, + "learning_rate": 9.962603028084699e-06, + "loss": 15.2943, + "step": 67530 + }, + { + "epoch": 0.13643507314649095, + "grad_norm": 475.7255859375, + "learning_rate": 9.962560403002868e-06, + "loss": 33.4532, + "step": 67540 + }, + { + "epoch": 0.13645527377917477, + "grad_norm": 274.9440612792969, + "learning_rate": 9.96251775373412e-06, + "loss": 27.1277, + "step": 67550 + }, + { + "epoch": 0.1364754744118586, + "grad_norm": 575.31396484375, + "learning_rate": 9.962475080278662e-06, + "loss": 19.352, + "step": 67560 + }, + { + "epoch": 0.1364956750445424, + "grad_norm": 339.14263916015625, + "learning_rate": 9.9624323826367e-06, + "loss": 12.814, + "step": 67570 + }, + { + "epoch": 0.1365158756772262, + "grad_norm": 429.7268371582031, + "learning_rate": 9.962389660808447e-06, + "loss": 25.122, + "step": 67580 + }, + { + "epoch": 0.13653607630991002, + "grad_norm": 527.354736328125, + "learning_rate": 9.96234691479411e-06, + "loss": 38.8722, + "step": 67590 + }, + { + "epoch": 0.13655627694259384, + "grad_norm": 259.0694274902344, + "learning_rate": 9.962304144593893e-06, + "loss": 24.0669, + "step": 67600 + }, + { + "epoch": 0.13657647757527766, + "grad_norm": 2309.948974609375, + "learning_rate": 9.962261350208008e-06, + "loss": 27.341, + "step": 67610 + }, + { + "epoch": 0.13659667820796148, + "grad_norm": 381.55816650390625, + "learning_rate": 9.962218531636664e-06, + "loss": 51.6446, + "step": 67620 + }, + { + "epoch": 0.1366168788406453, + "grad_norm": 0.0, + "learning_rate": 9.962175688880067e-06, + "loss": 24.9652, + "step": 67630 + }, + { + "epoch": 0.1366370794733291, + "grad_norm": 463.18341064453125, + "learning_rate": 9.96213282193843e-06, + "loss": 11.8149, + "step": 67640 + }, + { + "epoch": 0.1366572801060129, + "grad_norm": 388.6172180175781, + "learning_rate": 9.962089930811959e-06, + "loss": 29.2907, + "step": 67650 + }, + { + "epoch": 0.13667748073869673, + "grad_norm": 566.1581420898438, + "learning_rate": 9.962047015500861e-06, + "loss": 38.5838, + "step": 67660 + }, + { + "epoch": 0.13669768137138055, + "grad_norm": 1256.208984375, + "learning_rate": 9.96200407600535e-06, + "loss": 28.8538, + "step": 67670 + }, + { + "epoch": 0.13671788200406437, + "grad_norm": 573.28955078125, + "learning_rate": 9.961961112325633e-06, + "loss": 22.7216, + "step": 67680 + }, + { + "epoch": 0.1367380826367482, + "grad_norm": 205.13772583007812, + "learning_rate": 9.961918124461918e-06, + "loss": 26.4684, + "step": 67690 + }, + { + "epoch": 0.13675828326943198, + "grad_norm": 352.84588623046875, + "learning_rate": 9.961875112414417e-06, + "loss": 22.3335, + "step": 67700 + }, + { + "epoch": 0.1367784839021158, + "grad_norm": 215.87574768066406, + "learning_rate": 9.961832076183337e-06, + "loss": 30.561, + "step": 67710 + }, + { + "epoch": 0.13679868453479962, + "grad_norm": 438.83349609375, + "learning_rate": 9.96178901576889e-06, + "loss": 18.9253, + "step": 67720 + }, + { + "epoch": 0.13681888516748344, + "grad_norm": 502.7414245605469, + "learning_rate": 9.961745931171288e-06, + "loss": 24.2419, + "step": 67730 + }, + { + "epoch": 0.13683908580016726, + "grad_norm": 333.0059509277344, + "learning_rate": 9.961702822390735e-06, + "loss": 11.7589, + "step": 67740 + }, + { + "epoch": 0.13685928643285108, + "grad_norm": 112.25717163085938, + "learning_rate": 9.961659689427444e-06, + "loss": 37.3995, + "step": 67750 + }, + { + "epoch": 0.1368794870655349, + "grad_norm": 359.86376953125, + "learning_rate": 9.961616532281626e-06, + "loss": 34.1306, + "step": 67760 + }, + { + "epoch": 0.1368996876982187, + "grad_norm": 108.73788452148438, + "learning_rate": 9.961573350953491e-06, + "loss": 18.536, + "step": 67770 + }, + { + "epoch": 0.13691988833090252, + "grad_norm": 442.3098449707031, + "learning_rate": 9.96153014544325e-06, + "loss": 38.8789, + "step": 67780 + }, + { + "epoch": 0.13694008896358634, + "grad_norm": 553.655029296875, + "learning_rate": 9.961486915751114e-06, + "loss": 22.1211, + "step": 67790 + }, + { + "epoch": 0.13696028959627016, + "grad_norm": 427.0609436035156, + "learning_rate": 9.96144366187729e-06, + "loss": 22.2904, + "step": 67800 + }, + { + "epoch": 0.13698049022895398, + "grad_norm": 374.5810852050781, + "learning_rate": 9.961400383821992e-06, + "loss": 18.3437, + "step": 67810 + }, + { + "epoch": 0.1370006908616378, + "grad_norm": 619.0362548828125, + "learning_rate": 9.96135708158543e-06, + "loss": 28.4649, + "step": 67820 + }, + { + "epoch": 0.1370208914943216, + "grad_norm": 703.8502197265625, + "learning_rate": 9.961313755167816e-06, + "loss": 21.8278, + "step": 67830 + }, + { + "epoch": 0.1370410921270054, + "grad_norm": 651.2317504882812, + "learning_rate": 9.961270404569358e-06, + "loss": 40.7999, + "step": 67840 + }, + { + "epoch": 0.13706129275968923, + "grad_norm": 545.5133666992188, + "learning_rate": 9.961227029790272e-06, + "loss": 17.8611, + "step": 67850 + }, + { + "epoch": 0.13708149339237305, + "grad_norm": 575.947265625, + "learning_rate": 9.961183630830768e-06, + "loss": 22.0308, + "step": 67860 + }, + { + "epoch": 0.13710169402505687, + "grad_norm": 690.7603759765625, + "learning_rate": 9.961140207691055e-06, + "loss": 34.1975, + "step": 67870 + }, + { + "epoch": 0.1371218946577407, + "grad_norm": 673.443115234375, + "learning_rate": 9.961096760371349e-06, + "loss": 31.6265, + "step": 67880 + }, + { + "epoch": 0.1371420952904245, + "grad_norm": 657.22900390625, + "learning_rate": 9.961053288871855e-06, + "loss": 28.9402, + "step": 67890 + }, + { + "epoch": 0.1371622959231083, + "grad_norm": 200.78326416015625, + "learning_rate": 9.961009793192793e-06, + "loss": 20.546, + "step": 67900 + }, + { + "epoch": 0.13718249655579212, + "grad_norm": 388.554931640625, + "learning_rate": 9.96096627333437e-06, + "loss": 35.5744, + "step": 67910 + }, + { + "epoch": 0.13720269718847594, + "grad_norm": 165.8963623046875, + "learning_rate": 9.960922729296797e-06, + "loss": 33.9824, + "step": 67920 + }, + { + "epoch": 0.13722289782115976, + "grad_norm": 621.302978515625, + "learning_rate": 9.96087916108029e-06, + "loss": 36.0635, + "step": 67930 + }, + { + "epoch": 0.13724309845384358, + "grad_norm": 466.5898742675781, + "learning_rate": 9.960835568685058e-06, + "loss": 20.2203, + "step": 67940 + }, + { + "epoch": 0.1372632990865274, + "grad_norm": 539.1677856445312, + "learning_rate": 9.960791952111318e-06, + "loss": 28.2711, + "step": 67950 + }, + { + "epoch": 0.1372834997192112, + "grad_norm": 535.0186767578125, + "learning_rate": 9.960748311359278e-06, + "loss": 26.4194, + "step": 67960 + }, + { + "epoch": 0.137303700351895, + "grad_norm": 375.3293762207031, + "learning_rate": 9.96070464642915e-06, + "loss": 21.6461, + "step": 67970 + }, + { + "epoch": 0.13732390098457883, + "grad_norm": 550.9193725585938, + "learning_rate": 9.960660957321153e-06, + "loss": 41.7613, + "step": 67980 + }, + { + "epoch": 0.13734410161726265, + "grad_norm": 271.54925537109375, + "learning_rate": 9.960617244035495e-06, + "loss": 40.3926, + "step": 67990 + }, + { + "epoch": 0.13736430224994647, + "grad_norm": 734.3870239257812, + "learning_rate": 9.960573506572391e-06, + "loss": 30.0268, + "step": 68000 + }, + { + "epoch": 0.1373845028826303, + "grad_norm": 708.679443359375, + "learning_rate": 9.960529744932051e-06, + "loss": 23.1979, + "step": 68010 + }, + { + "epoch": 0.13740470351531409, + "grad_norm": 49.49264144897461, + "learning_rate": 9.960485959114693e-06, + "loss": 11.3027, + "step": 68020 + }, + { + "epoch": 0.1374249041479979, + "grad_norm": 423.9950256347656, + "learning_rate": 9.960442149120527e-06, + "loss": 39.6836, + "step": 68030 + }, + { + "epoch": 0.13744510478068173, + "grad_norm": 415.1904602050781, + "learning_rate": 9.960398314949767e-06, + "loss": 30.8742, + "step": 68040 + }, + { + "epoch": 0.13746530541336555, + "grad_norm": 331.5396423339844, + "learning_rate": 9.960354456602627e-06, + "loss": 18.4558, + "step": 68050 + }, + { + "epoch": 0.13748550604604937, + "grad_norm": 432.09698486328125, + "learning_rate": 9.960310574079324e-06, + "loss": 20.5393, + "step": 68060 + }, + { + "epoch": 0.13750570667873319, + "grad_norm": 296.25335693359375, + "learning_rate": 9.960266667380065e-06, + "loss": 18.6186, + "step": 68070 + }, + { + "epoch": 0.137525907311417, + "grad_norm": 695.0523071289062, + "learning_rate": 9.96022273650507e-06, + "loss": 22.6743, + "step": 68080 + }, + { + "epoch": 0.1375461079441008, + "grad_norm": 382.1102294921875, + "learning_rate": 9.96017878145455e-06, + "loss": 35.8567, + "step": 68090 + }, + { + "epoch": 0.13756630857678462, + "grad_norm": 228.53292846679688, + "learning_rate": 9.960134802228722e-06, + "loss": 22.0488, + "step": 68100 + }, + { + "epoch": 0.13758650920946844, + "grad_norm": 258.3619384765625, + "learning_rate": 9.960090798827798e-06, + "loss": 21.9386, + "step": 68110 + }, + { + "epoch": 0.13760670984215226, + "grad_norm": 43.63883972167969, + "learning_rate": 9.960046771251991e-06, + "loss": 30.7622, + "step": 68120 + }, + { + "epoch": 0.13762691047483608, + "grad_norm": 0.0, + "learning_rate": 9.96000271950152e-06, + "loss": 19.6476, + "step": 68130 + }, + { + "epoch": 0.1376471111075199, + "grad_norm": 451.0689392089844, + "learning_rate": 9.959958643576597e-06, + "loss": 18.3354, + "step": 68140 + }, + { + "epoch": 0.1376673117402037, + "grad_norm": 537.6965942382812, + "learning_rate": 9.959914543477436e-06, + "loss": 17.0985, + "step": 68150 + }, + { + "epoch": 0.1376875123728875, + "grad_norm": 432.7866516113281, + "learning_rate": 9.959870419204253e-06, + "loss": 32.9352, + "step": 68160 + }, + { + "epoch": 0.13770771300557133, + "grad_norm": 923.4307250976562, + "learning_rate": 9.959826270757265e-06, + "loss": 22.7099, + "step": 68170 + }, + { + "epoch": 0.13772791363825515, + "grad_norm": 331.7381896972656, + "learning_rate": 9.959782098136683e-06, + "loss": 32.172, + "step": 68180 + }, + { + "epoch": 0.13774811427093897, + "grad_norm": 315.8378601074219, + "learning_rate": 9.959737901342725e-06, + "loss": 20.4848, + "step": 68190 + }, + { + "epoch": 0.1377683149036228, + "grad_norm": 580.3365478515625, + "learning_rate": 9.959693680375608e-06, + "loss": 17.3681, + "step": 68200 + }, + { + "epoch": 0.1377885155363066, + "grad_norm": 297.5736389160156, + "learning_rate": 9.959649435235543e-06, + "loss": 24.8473, + "step": 68210 + }, + { + "epoch": 0.1378087161689904, + "grad_norm": 836.2515869140625, + "learning_rate": 9.95960516592275e-06, + "loss": 30.2962, + "step": 68220 + }, + { + "epoch": 0.13782891680167422, + "grad_norm": 164.470458984375, + "learning_rate": 9.959560872437443e-06, + "loss": 23.1598, + "step": 68230 + }, + { + "epoch": 0.13784911743435804, + "grad_norm": 827.936767578125, + "learning_rate": 9.959516554779838e-06, + "loss": 28.8193, + "step": 68240 + }, + { + "epoch": 0.13786931806704186, + "grad_norm": 363.9051513671875, + "learning_rate": 9.95947221295015e-06, + "loss": 27.7335, + "step": 68250 + }, + { + "epoch": 0.13788951869972568, + "grad_norm": 777.4404907226562, + "learning_rate": 9.959427846948595e-06, + "loss": 14.0809, + "step": 68260 + }, + { + "epoch": 0.1379097193324095, + "grad_norm": 699.2410888671875, + "learning_rate": 9.959383456775392e-06, + "loss": 44.7876, + "step": 68270 + }, + { + "epoch": 0.1379299199650933, + "grad_norm": 1100.128662109375, + "learning_rate": 9.959339042430753e-06, + "loss": 46.4882, + "step": 68280 + }, + { + "epoch": 0.13795012059777711, + "grad_norm": 350.7457275390625, + "learning_rate": 9.9592946039149e-06, + "loss": 22.3283, + "step": 68290 + }, + { + "epoch": 0.13797032123046093, + "grad_norm": 496.0047607421875, + "learning_rate": 9.959250141228046e-06, + "loss": 14.4258, + "step": 68300 + }, + { + "epoch": 0.13799052186314475, + "grad_norm": 721.7589721679688, + "learning_rate": 9.959205654370406e-06, + "loss": 41.2446, + "step": 68310 + }, + { + "epoch": 0.13801072249582857, + "grad_norm": 502.5924377441406, + "learning_rate": 9.959161143342201e-06, + "loss": 31.4467, + "step": 68320 + }, + { + "epoch": 0.1380309231285124, + "grad_norm": 656.1293334960938, + "learning_rate": 9.959116608143647e-06, + "loss": 31.2924, + "step": 68330 + }, + { + "epoch": 0.1380511237611962, + "grad_norm": 314.748046875, + "learning_rate": 9.959072048774958e-06, + "loss": 22.5767, + "step": 68340 + }, + { + "epoch": 0.13807132439388, + "grad_norm": 367.8751525878906, + "learning_rate": 9.959027465236354e-06, + "loss": 16.0496, + "step": 68350 + }, + { + "epoch": 0.13809152502656383, + "grad_norm": 26.35764503479004, + "learning_rate": 9.958982857528053e-06, + "loss": 30.7503, + "step": 68360 + }, + { + "epoch": 0.13811172565924765, + "grad_norm": 167.81228637695312, + "learning_rate": 9.958938225650268e-06, + "loss": 35.3317, + "step": 68370 + }, + { + "epoch": 0.13813192629193147, + "grad_norm": 604.2232055664062, + "learning_rate": 9.958893569603222e-06, + "loss": 39.864, + "step": 68380 + }, + { + "epoch": 0.1381521269246153, + "grad_norm": 809.530517578125, + "learning_rate": 9.958848889387129e-06, + "loss": 20.9284, + "step": 68390 + }, + { + "epoch": 0.1381723275572991, + "grad_norm": 674.8519287109375, + "learning_rate": 9.958804185002209e-06, + "loss": 21.7976, + "step": 68400 + }, + { + "epoch": 0.1381925281899829, + "grad_norm": 393.74920654296875, + "learning_rate": 9.958759456448677e-06, + "loss": 34.8823, + "step": 68410 + }, + { + "epoch": 0.13821272882266672, + "grad_norm": 949.1607055664062, + "learning_rate": 9.958714703726755e-06, + "loss": 37.0131, + "step": 68420 + }, + { + "epoch": 0.13823292945535054, + "grad_norm": 193.75955200195312, + "learning_rate": 9.958669926836658e-06, + "loss": 15.5887, + "step": 68430 + }, + { + "epoch": 0.13825313008803436, + "grad_norm": 171.90570068359375, + "learning_rate": 9.958625125778606e-06, + "loss": 25.8418, + "step": 68440 + }, + { + "epoch": 0.13827333072071818, + "grad_norm": 347.1898193359375, + "learning_rate": 9.958580300552816e-06, + "loss": 22.348, + "step": 68450 + }, + { + "epoch": 0.138293531353402, + "grad_norm": 556.2518920898438, + "learning_rate": 9.958535451159506e-06, + "loss": 23.4541, + "step": 68460 + }, + { + "epoch": 0.1383137319860858, + "grad_norm": 35.548519134521484, + "learning_rate": 9.958490577598896e-06, + "loss": 21.8669, + "step": 68470 + }, + { + "epoch": 0.1383339326187696, + "grad_norm": 303.63336181640625, + "learning_rate": 9.958445679871204e-06, + "loss": 20.6764, + "step": 68480 + }, + { + "epoch": 0.13835413325145343, + "grad_norm": 724.085205078125, + "learning_rate": 9.958400757976651e-06, + "loss": 23.8251, + "step": 68490 + }, + { + "epoch": 0.13837433388413725, + "grad_norm": 848.7816772460938, + "learning_rate": 9.958355811915452e-06, + "loss": 27.7101, + "step": 68500 + }, + { + "epoch": 0.13839453451682107, + "grad_norm": 732.7776489257812, + "learning_rate": 9.95831084168783e-06, + "loss": 53.3678, + "step": 68510 + }, + { + "epoch": 0.1384147351495049, + "grad_norm": 470.7287292480469, + "learning_rate": 9.958265847294001e-06, + "loss": 25.9891, + "step": 68520 + }, + { + "epoch": 0.1384349357821887, + "grad_norm": 185.62628173828125, + "learning_rate": 9.958220828734187e-06, + "loss": 34.1429, + "step": 68530 + }, + { + "epoch": 0.1384551364148725, + "grad_norm": 449.30059814453125, + "learning_rate": 9.958175786008605e-06, + "loss": 12.2641, + "step": 68540 + }, + { + "epoch": 0.13847533704755632, + "grad_norm": 734.6017456054688, + "learning_rate": 9.958130719117476e-06, + "loss": 29.0336, + "step": 68550 + }, + { + "epoch": 0.13849553768024014, + "grad_norm": 243.384765625, + "learning_rate": 9.958085628061018e-06, + "loss": 21.049, + "step": 68560 + }, + { + "epoch": 0.13851573831292396, + "grad_norm": 220.47532653808594, + "learning_rate": 9.958040512839453e-06, + "loss": 22.1039, + "step": 68570 + }, + { + "epoch": 0.13853593894560778, + "grad_norm": 515.5274047851562, + "learning_rate": 9.957995373453e-06, + "loss": 21.835, + "step": 68580 + }, + { + "epoch": 0.1385561395782916, + "grad_norm": 538.095947265625, + "learning_rate": 9.95795020990188e-06, + "loss": 33.4923, + "step": 68590 + }, + { + "epoch": 0.1385763402109754, + "grad_norm": 402.7262268066406, + "learning_rate": 9.957905022186309e-06, + "loss": 18.5955, + "step": 68600 + }, + { + "epoch": 0.13859654084365922, + "grad_norm": 315.2361755371094, + "learning_rate": 9.957859810306511e-06, + "loss": 31.8788, + "step": 68610 + }, + { + "epoch": 0.13861674147634304, + "grad_norm": 1102.16552734375, + "learning_rate": 9.957814574262707e-06, + "loss": 35.5344, + "step": 68620 + }, + { + "epoch": 0.13863694210902686, + "grad_norm": 865.6183471679688, + "learning_rate": 9.957769314055117e-06, + "loss": 30.1209, + "step": 68630 + }, + { + "epoch": 0.13865714274171068, + "grad_norm": 912.3616333007812, + "learning_rate": 9.957724029683958e-06, + "loss": 13.6061, + "step": 68640 + }, + { + "epoch": 0.1386773433743945, + "grad_norm": 1288.3985595703125, + "learning_rate": 9.957678721149454e-06, + "loss": 32.3121, + "step": 68650 + }, + { + "epoch": 0.1386975440070783, + "grad_norm": 875.0906372070312, + "learning_rate": 9.957633388451827e-06, + "loss": 33.2735, + "step": 68660 + }, + { + "epoch": 0.1387177446397621, + "grad_norm": 260.54937744140625, + "learning_rate": 9.957588031591295e-06, + "loss": 23.7336, + "step": 68670 + }, + { + "epoch": 0.13873794527244593, + "grad_norm": 270.64227294921875, + "learning_rate": 9.957542650568079e-06, + "loss": 28.7163, + "step": 68680 + }, + { + "epoch": 0.13875814590512975, + "grad_norm": 886.39306640625, + "learning_rate": 9.957497245382403e-06, + "loss": 20.3497, + "step": 68690 + }, + { + "epoch": 0.13877834653781357, + "grad_norm": 486.9722900390625, + "learning_rate": 9.957451816034487e-06, + "loss": 21.0285, + "step": 68700 + }, + { + "epoch": 0.1387985471704974, + "grad_norm": 477.1606750488281, + "learning_rate": 9.95740636252455e-06, + "loss": 36.446, + "step": 68710 + }, + { + "epoch": 0.1388187478031812, + "grad_norm": 369.5108337402344, + "learning_rate": 9.957360884852819e-06, + "loss": 18.5451, + "step": 68720 + }, + { + "epoch": 0.138838948435865, + "grad_norm": 984.824951171875, + "learning_rate": 9.95731538301951e-06, + "loss": 33.7099, + "step": 68730 + }, + { + "epoch": 0.13885914906854882, + "grad_norm": 458.1757507324219, + "learning_rate": 9.957269857024847e-06, + "loss": 24.3889, + "step": 68740 + }, + { + "epoch": 0.13887934970123264, + "grad_norm": 206.4055633544922, + "learning_rate": 9.957224306869053e-06, + "loss": 23.9538, + "step": 68750 + }, + { + "epoch": 0.13889955033391646, + "grad_norm": 405.8059387207031, + "learning_rate": 9.957178732552348e-06, + "loss": 22.2457, + "step": 68760 + }, + { + "epoch": 0.13891975096660028, + "grad_norm": 348.5886535644531, + "learning_rate": 9.957133134074955e-06, + "loss": 16.4888, + "step": 68770 + }, + { + "epoch": 0.1389399515992841, + "grad_norm": 384.4114685058594, + "learning_rate": 9.957087511437099e-06, + "loss": 45.6042, + "step": 68780 + }, + { + "epoch": 0.1389601522319679, + "grad_norm": 197.05648803710938, + "learning_rate": 9.957041864638997e-06, + "loss": 14.7395, + "step": 68790 + }, + { + "epoch": 0.1389803528646517, + "grad_norm": 393.7522277832031, + "learning_rate": 9.956996193680874e-06, + "loss": 24.0284, + "step": 68800 + }, + { + "epoch": 0.13900055349733553, + "grad_norm": 796.6821899414062, + "learning_rate": 9.956950498562954e-06, + "loss": 25.859, + "step": 68810 + }, + { + "epoch": 0.13902075413001935, + "grad_norm": 257.58404541015625, + "learning_rate": 9.956904779285457e-06, + "loss": 20.2082, + "step": 68820 + }, + { + "epoch": 0.13904095476270317, + "grad_norm": 1210.4019775390625, + "learning_rate": 9.956859035848608e-06, + "loss": 34.5147, + "step": 68830 + }, + { + "epoch": 0.139061155395387, + "grad_norm": 212.5049591064453, + "learning_rate": 9.95681326825263e-06, + "loss": 24.8414, + "step": 68840 + }, + { + "epoch": 0.1390813560280708, + "grad_norm": 134.4169158935547, + "learning_rate": 9.956767476497745e-06, + "loss": 30.2395, + "step": 68850 + }, + { + "epoch": 0.1391015566607546, + "grad_norm": 624.7702026367188, + "learning_rate": 9.956721660584175e-06, + "loss": 40.1395, + "step": 68860 + }, + { + "epoch": 0.13912175729343843, + "grad_norm": 300.9224548339844, + "learning_rate": 9.956675820512146e-06, + "loss": 27.4624, + "step": 68870 + }, + { + "epoch": 0.13914195792612225, + "grad_norm": 674.11083984375, + "learning_rate": 9.956629956281881e-06, + "loss": 44.8466, + "step": 68880 + }, + { + "epoch": 0.13916215855880607, + "grad_norm": 552.4646606445312, + "learning_rate": 9.956584067893602e-06, + "loss": 17.5639, + "step": 68890 + }, + { + "epoch": 0.13918235919148988, + "grad_norm": 289.3154296875, + "learning_rate": 9.956538155347534e-06, + "loss": 19.0497, + "step": 68900 + }, + { + "epoch": 0.1392025598241737, + "grad_norm": 791.2666625976562, + "learning_rate": 9.9564922186439e-06, + "loss": 23.6453, + "step": 68910 + }, + { + "epoch": 0.1392227604568575, + "grad_norm": 387.15289306640625, + "learning_rate": 9.956446257782923e-06, + "loss": 18.8502, + "step": 68920 + }, + { + "epoch": 0.13924296108954132, + "grad_norm": 713.1915893554688, + "learning_rate": 9.95640027276483e-06, + "loss": 43.7808, + "step": 68930 + }, + { + "epoch": 0.13926316172222514, + "grad_norm": 234.76963806152344, + "learning_rate": 9.95635426358984e-06, + "loss": 27.3004, + "step": 68940 + }, + { + "epoch": 0.13928336235490896, + "grad_norm": 646.1390380859375, + "learning_rate": 9.956308230258182e-06, + "loss": 20.7643, + "step": 68950 + }, + { + "epoch": 0.13930356298759278, + "grad_norm": 410.85760498046875, + "learning_rate": 9.956262172770082e-06, + "loss": 21.7578, + "step": 68960 + }, + { + "epoch": 0.1393237636202766, + "grad_norm": 160.68807983398438, + "learning_rate": 9.956216091125756e-06, + "loss": 25.8317, + "step": 68970 + }, + { + "epoch": 0.1393439642529604, + "grad_norm": 418.814208984375, + "learning_rate": 9.956169985325438e-06, + "loss": 22.5675, + "step": 68980 + }, + { + "epoch": 0.1393641648856442, + "grad_norm": 477.0711975097656, + "learning_rate": 9.956123855369346e-06, + "loss": 36.7583, + "step": 68990 + }, + { + "epoch": 0.13938436551832803, + "grad_norm": 489.3850402832031, + "learning_rate": 9.95607770125771e-06, + "loss": 12.5654, + "step": 69000 + }, + { + "epoch": 0.13940456615101185, + "grad_norm": 42.41978073120117, + "learning_rate": 9.95603152299075e-06, + "loss": 35.2543, + "step": 69010 + }, + { + "epoch": 0.13942476678369567, + "grad_norm": 299.6903381347656, + "learning_rate": 9.955985320568696e-06, + "loss": 17.0381, + "step": 69020 + }, + { + "epoch": 0.1394449674163795, + "grad_norm": 323.9222717285156, + "learning_rate": 9.955939093991767e-06, + "loss": 20.292, + "step": 69030 + }, + { + "epoch": 0.1394651680490633, + "grad_norm": 148.71099853515625, + "learning_rate": 9.955892843260195e-06, + "loss": 41.621, + "step": 69040 + }, + { + "epoch": 0.1394853686817471, + "grad_norm": 461.989013671875, + "learning_rate": 9.955846568374201e-06, + "loss": 23.9631, + "step": 69050 + }, + { + "epoch": 0.13950556931443092, + "grad_norm": 942.498291015625, + "learning_rate": 9.955800269334013e-06, + "loss": 31.5033, + "step": 69060 + }, + { + "epoch": 0.13952576994711474, + "grad_norm": 210.36659240722656, + "learning_rate": 9.955753946139855e-06, + "loss": 28.8701, + "step": 69070 + }, + { + "epoch": 0.13954597057979856, + "grad_norm": 200.81434631347656, + "learning_rate": 9.955707598791952e-06, + "loss": 15.0929, + "step": 69080 + }, + { + "epoch": 0.13956617121248238, + "grad_norm": 317.3502197265625, + "learning_rate": 9.955661227290531e-06, + "loss": 43.9322, + "step": 69090 + }, + { + "epoch": 0.1395863718451662, + "grad_norm": 581.02978515625, + "learning_rate": 9.95561483163582e-06, + "loss": 21.4542, + "step": 69100 + }, + { + "epoch": 0.13960657247785, + "grad_norm": 909.7908325195312, + "learning_rate": 9.955568411828043e-06, + "loss": 26.0563, + "step": 69110 + }, + { + "epoch": 0.13962677311053381, + "grad_norm": 326.35675048828125, + "learning_rate": 9.955521967867427e-06, + "loss": 34.8543, + "step": 69120 + }, + { + "epoch": 0.13964697374321763, + "grad_norm": 180.7159881591797, + "learning_rate": 9.955475499754197e-06, + "loss": 10.9506, + "step": 69130 + }, + { + "epoch": 0.13966717437590145, + "grad_norm": 64.47581481933594, + "learning_rate": 9.955429007488582e-06, + "loss": 26.2207, + "step": 69140 + }, + { + "epoch": 0.13968737500858527, + "grad_norm": 541.9278564453125, + "learning_rate": 9.955382491070806e-06, + "loss": 27.692, + "step": 69150 + }, + { + "epoch": 0.1397075756412691, + "grad_norm": 188.29824829101562, + "learning_rate": 9.955335950501097e-06, + "loss": 10.1276, + "step": 69160 + }, + { + "epoch": 0.13972777627395291, + "grad_norm": 1394.490478515625, + "learning_rate": 9.955289385779681e-06, + "loss": 36.9296, + "step": 69170 + }, + { + "epoch": 0.1397479769066367, + "grad_norm": 520.42138671875, + "learning_rate": 9.955242796906785e-06, + "loss": 16.4732, + "step": 69180 + }, + { + "epoch": 0.13976817753932053, + "grad_norm": 236.06822204589844, + "learning_rate": 9.955196183882637e-06, + "loss": 33.4686, + "step": 69190 + }, + { + "epoch": 0.13978837817200435, + "grad_norm": 565.5700073242188, + "learning_rate": 9.955149546707465e-06, + "loss": 23.4184, + "step": 69200 + }, + { + "epoch": 0.13980857880468817, + "grad_norm": 150.66734313964844, + "learning_rate": 9.955102885381494e-06, + "loss": 22.63, + "step": 69210 + }, + { + "epoch": 0.13982877943737199, + "grad_norm": 470.6201477050781, + "learning_rate": 9.955056199904953e-06, + "loss": 16.4032, + "step": 69220 + }, + { + "epoch": 0.1398489800700558, + "grad_norm": 483.5701904296875, + "learning_rate": 9.955009490278069e-06, + "loss": 23.6988, + "step": 69230 + }, + { + "epoch": 0.1398691807027396, + "grad_norm": 386.0276184082031, + "learning_rate": 9.95496275650107e-06, + "loss": 34.1396, + "step": 69240 + }, + { + "epoch": 0.13988938133542342, + "grad_norm": 470.1758728027344, + "learning_rate": 9.954915998574182e-06, + "loss": 7.9019, + "step": 69250 + }, + { + "epoch": 0.13990958196810724, + "grad_norm": 269.1112060546875, + "learning_rate": 9.954869216497636e-06, + "loss": 18.3955, + "step": 69260 + }, + { + "epoch": 0.13992978260079106, + "grad_norm": 4.423580169677734, + "learning_rate": 9.954822410271657e-06, + "loss": 27.5181, + "step": 69270 + }, + { + "epoch": 0.13994998323347488, + "grad_norm": 875.4022216796875, + "learning_rate": 9.954775579896476e-06, + "loss": 37.5965, + "step": 69280 + }, + { + "epoch": 0.1399701838661587, + "grad_norm": 188.47088623046875, + "learning_rate": 9.954728725372319e-06, + "loss": 15.6562, + "step": 69290 + }, + { + "epoch": 0.1399903844988425, + "grad_norm": 453.6847839355469, + "learning_rate": 9.954681846699414e-06, + "loss": 14.6812, + "step": 69300 + }, + { + "epoch": 0.1400105851315263, + "grad_norm": 286.9229431152344, + "learning_rate": 9.954634943877993e-06, + "loss": 26.4569, + "step": 69310 + }, + { + "epoch": 0.14003078576421013, + "grad_norm": 224.12432861328125, + "learning_rate": 9.95458801690828e-06, + "loss": 37.0338, + "step": 69320 + }, + { + "epoch": 0.14005098639689395, + "grad_norm": 250.95457458496094, + "learning_rate": 9.954541065790509e-06, + "loss": 19.3025, + "step": 69330 + }, + { + "epoch": 0.14007118702957777, + "grad_norm": 204.41136169433594, + "learning_rate": 9.954494090524903e-06, + "loss": 29.5032, + "step": 69340 + }, + { + "epoch": 0.1400913876622616, + "grad_norm": 1198.9293212890625, + "learning_rate": 9.954447091111695e-06, + "loss": 29.9783, + "step": 69350 + }, + { + "epoch": 0.1401115882949454, + "grad_norm": 605.0090942382812, + "learning_rate": 9.95440006755111e-06, + "loss": 28.5639, + "step": 69360 + }, + { + "epoch": 0.1401317889276292, + "grad_norm": 203.74388122558594, + "learning_rate": 9.954353019843384e-06, + "loss": 29.1512, + "step": 69370 + }, + { + "epoch": 0.14015198956031302, + "grad_norm": 1235.79541015625, + "learning_rate": 9.95430594798874e-06, + "loss": 43.7828, + "step": 69380 + }, + { + "epoch": 0.14017219019299684, + "grad_norm": 1251.275390625, + "learning_rate": 9.954258851987411e-06, + "loss": 28.2353, + "step": 69390 + }, + { + "epoch": 0.14019239082568066, + "grad_norm": 473.9927062988281, + "learning_rate": 9.954211731839623e-06, + "loss": 31.9145, + "step": 69400 + }, + { + "epoch": 0.14021259145836448, + "grad_norm": 120.4296875, + "learning_rate": 9.95416458754561e-06, + "loss": 24.6197, + "step": 69410 + }, + { + "epoch": 0.1402327920910483, + "grad_norm": 1342.9423828125, + "learning_rate": 9.954117419105599e-06, + "loss": 31.6074, + "step": 69420 + }, + { + "epoch": 0.1402529927237321, + "grad_norm": 377.57598876953125, + "learning_rate": 9.95407022651982e-06, + "loss": 42.855, + "step": 69430 + }, + { + "epoch": 0.14027319335641592, + "grad_norm": 127.66248321533203, + "learning_rate": 9.954023009788505e-06, + "loss": 16.1247, + "step": 69440 + }, + { + "epoch": 0.14029339398909974, + "grad_norm": 824.560791015625, + "learning_rate": 9.953975768911881e-06, + "loss": 28.6148, + "step": 69450 + }, + { + "epoch": 0.14031359462178356, + "grad_norm": 1145.84765625, + "learning_rate": 9.953928503890181e-06, + "loss": 23.5062, + "step": 69460 + }, + { + "epoch": 0.14033379525446738, + "grad_norm": 68.81543731689453, + "learning_rate": 9.953881214723636e-06, + "loss": 29.8865, + "step": 69470 + }, + { + "epoch": 0.1403539958871512, + "grad_norm": 387.13104248046875, + "learning_rate": 9.95383390141247e-06, + "loss": 23.7702, + "step": 69480 + }, + { + "epoch": 0.14037419651983502, + "grad_norm": 235.42279052734375, + "learning_rate": 9.953786563956923e-06, + "loss": 27.2501, + "step": 69490 + }, + { + "epoch": 0.1403943971525188, + "grad_norm": 868.7579956054688, + "learning_rate": 9.953739202357219e-06, + "loss": 38.222, + "step": 69500 + }, + { + "epoch": 0.14041459778520263, + "grad_norm": 819.7193603515625, + "learning_rate": 9.953691816613592e-06, + "loss": 28.7001, + "step": 69510 + }, + { + "epoch": 0.14043479841788645, + "grad_norm": 698.81201171875, + "learning_rate": 9.95364440672627e-06, + "loss": 26.3605, + "step": 69520 + }, + { + "epoch": 0.14045499905057027, + "grad_norm": 539.9085083007812, + "learning_rate": 9.953596972695487e-06, + "loss": 20.6521, + "step": 69530 + }, + { + "epoch": 0.1404751996832541, + "grad_norm": 578.3551635742188, + "learning_rate": 9.953549514521474e-06, + "loss": 30.846, + "step": 69540 + }, + { + "epoch": 0.1404954003159379, + "grad_norm": 356.5929260253906, + "learning_rate": 9.953502032204461e-06, + "loss": 26.0084, + "step": 69550 + }, + { + "epoch": 0.1405156009486217, + "grad_norm": 0.0, + "learning_rate": 9.95345452574468e-06, + "loss": 30.3256, + "step": 69560 + }, + { + "epoch": 0.14053580158130552, + "grad_norm": 183.98497009277344, + "learning_rate": 9.95340699514236e-06, + "loss": 26.8322, + "step": 69570 + }, + { + "epoch": 0.14055600221398934, + "grad_norm": 624.8582763671875, + "learning_rate": 9.953359440397738e-06, + "loss": 25.6515, + "step": 69580 + }, + { + "epoch": 0.14057620284667316, + "grad_norm": 1048.4591064453125, + "learning_rate": 9.953311861511043e-06, + "loss": 17.7401, + "step": 69590 + }, + { + "epoch": 0.14059640347935698, + "grad_norm": 491.6831970214844, + "learning_rate": 9.953264258482505e-06, + "loss": 23.7325, + "step": 69600 + }, + { + "epoch": 0.1406166041120408, + "grad_norm": 396.37847900390625, + "learning_rate": 9.953216631312358e-06, + "loss": 19.6475, + "step": 69610 + }, + { + "epoch": 0.1406368047447246, + "grad_norm": 236.17860412597656, + "learning_rate": 9.953168980000836e-06, + "loss": 25.2312, + "step": 69620 + }, + { + "epoch": 0.1406570053774084, + "grad_norm": 310.13165283203125, + "learning_rate": 9.953121304548167e-06, + "loss": 19.4689, + "step": 69630 + }, + { + "epoch": 0.14067720601009223, + "grad_norm": 410.7505187988281, + "learning_rate": 9.953073604954586e-06, + "loss": 41.4177, + "step": 69640 + }, + { + "epoch": 0.14069740664277605, + "grad_norm": 237.97850036621094, + "learning_rate": 9.953025881220325e-06, + "loss": 23.2995, + "step": 69650 + }, + { + "epoch": 0.14071760727545987, + "grad_norm": 510.98211669921875, + "learning_rate": 9.952978133345616e-06, + "loss": 26.9472, + "step": 69660 + }, + { + "epoch": 0.1407378079081437, + "grad_norm": 132.69935607910156, + "learning_rate": 9.952930361330694e-06, + "loss": 19.6408, + "step": 69670 + }, + { + "epoch": 0.1407580085408275, + "grad_norm": 272.2227783203125, + "learning_rate": 9.952882565175788e-06, + "loss": 38.1983, + "step": 69680 + }, + { + "epoch": 0.1407782091735113, + "grad_norm": 784.8576049804688, + "learning_rate": 9.952834744881135e-06, + "loss": 18.8075, + "step": 69690 + }, + { + "epoch": 0.14079840980619512, + "grad_norm": 1082.32763671875, + "learning_rate": 9.952786900446964e-06, + "loss": 32.6177, + "step": 69700 + }, + { + "epoch": 0.14081861043887894, + "grad_norm": 709.037109375, + "learning_rate": 9.952739031873513e-06, + "loss": 19.6137, + "step": 69710 + }, + { + "epoch": 0.14083881107156276, + "grad_norm": 991.0155029296875, + "learning_rate": 9.952691139161012e-06, + "loss": 27.8896, + "step": 69720 + }, + { + "epoch": 0.14085901170424658, + "grad_norm": 575.1040649414062, + "learning_rate": 9.952643222309694e-06, + "loss": 14.214, + "step": 69730 + }, + { + "epoch": 0.1408792123369304, + "grad_norm": 414.91455078125, + "learning_rate": 9.952595281319794e-06, + "loss": 25.0494, + "step": 69740 + }, + { + "epoch": 0.1408994129696142, + "grad_norm": 277.9414367675781, + "learning_rate": 9.952547316191545e-06, + "loss": 11.4466, + "step": 69750 + }, + { + "epoch": 0.14091961360229802, + "grad_norm": 530.8134765625, + "learning_rate": 9.95249932692518e-06, + "loss": 30.588, + "step": 69760 + }, + { + "epoch": 0.14093981423498184, + "grad_norm": 419.50445556640625, + "learning_rate": 9.952451313520937e-06, + "loss": 33.3972, + "step": 69770 + }, + { + "epoch": 0.14096001486766566, + "grad_norm": 332.69232177734375, + "learning_rate": 9.952403275979046e-06, + "loss": 36.2613, + "step": 69780 + }, + { + "epoch": 0.14098021550034948, + "grad_norm": 544.991455078125, + "learning_rate": 9.95235521429974e-06, + "loss": 30.0454, + "step": 69790 + }, + { + "epoch": 0.1410004161330333, + "grad_norm": 213.3559112548828, + "learning_rate": 9.952307128483257e-06, + "loss": 12.5632, + "step": 69800 + }, + { + "epoch": 0.14102061676571712, + "grad_norm": 528.3657836914062, + "learning_rate": 9.952259018529829e-06, + "loss": 30.6922, + "step": 69810 + }, + { + "epoch": 0.1410408173984009, + "grad_norm": 367.0792236328125, + "learning_rate": 9.952210884439693e-06, + "loss": 23.4962, + "step": 69820 + }, + { + "epoch": 0.14106101803108473, + "grad_norm": 598.0098876953125, + "learning_rate": 9.95216272621308e-06, + "loss": 16.1818, + "step": 69830 + }, + { + "epoch": 0.14108121866376855, + "grad_norm": 490.101318359375, + "learning_rate": 9.952114543850227e-06, + "loss": 23.8804, + "step": 69840 + }, + { + "epoch": 0.14110141929645237, + "grad_norm": 424.291015625, + "learning_rate": 9.952066337351367e-06, + "loss": 14.4883, + "step": 69850 + }, + { + "epoch": 0.1411216199291362, + "grad_norm": 195.29574584960938, + "learning_rate": 9.952018106716737e-06, + "loss": 21.0854, + "step": 69860 + }, + { + "epoch": 0.14114182056182, + "grad_norm": 383.5818786621094, + "learning_rate": 9.951969851946573e-06, + "loss": 32.4189, + "step": 69870 + }, + { + "epoch": 0.1411620211945038, + "grad_norm": 73.61356353759766, + "learning_rate": 9.951921573041107e-06, + "loss": 35.9182, + "step": 69880 + }, + { + "epoch": 0.14118222182718762, + "grad_norm": 420.78076171875, + "learning_rate": 9.951873270000576e-06, + "loss": 25.4562, + "step": 69890 + }, + { + "epoch": 0.14120242245987144, + "grad_norm": 511.9713134765625, + "learning_rate": 9.951824942825215e-06, + "loss": 37.8071, + "step": 69900 + }, + { + "epoch": 0.14122262309255526, + "grad_norm": 323.9211730957031, + "learning_rate": 9.951776591515262e-06, + "loss": 32.4247, + "step": 69910 + }, + { + "epoch": 0.14124282372523908, + "grad_norm": 487.0621032714844, + "learning_rate": 9.951728216070949e-06, + "loss": 13.8481, + "step": 69920 + }, + { + "epoch": 0.1412630243579229, + "grad_norm": 356.62457275390625, + "learning_rate": 9.951679816492513e-06, + "loss": 20.0213, + "step": 69930 + }, + { + "epoch": 0.1412832249906067, + "grad_norm": 377.02972412109375, + "learning_rate": 9.951631392780189e-06, + "loss": 18.7376, + "step": 69940 + }, + { + "epoch": 0.1413034256232905, + "grad_norm": 738.23046875, + "learning_rate": 9.951582944934215e-06, + "loss": 37.3979, + "step": 69950 + }, + { + "epoch": 0.14132362625597433, + "grad_norm": 892.313232421875, + "learning_rate": 9.951534472954826e-06, + "loss": 31.2392, + "step": 69960 + }, + { + "epoch": 0.14134382688865815, + "grad_norm": 460.5330505371094, + "learning_rate": 9.95148597684226e-06, + "loss": 35.134, + "step": 69970 + }, + { + "epoch": 0.14136402752134197, + "grad_norm": 384.5951232910156, + "learning_rate": 9.951437456596751e-06, + "loss": 14.3294, + "step": 69980 + }, + { + "epoch": 0.1413842281540258, + "grad_norm": 486.7411804199219, + "learning_rate": 9.951388912218536e-06, + "loss": 34.7231, + "step": 69990 + }, + { + "epoch": 0.1414044287867096, + "grad_norm": 677.1619873046875, + "learning_rate": 9.951340343707852e-06, + "loss": 17.5728, + "step": 70000 + }, + { + "epoch": 0.1414246294193934, + "grad_norm": 442.0053405761719, + "learning_rate": 9.951291751064937e-06, + "loss": 22.1191, + "step": 70010 + }, + { + "epoch": 0.14144483005207723, + "grad_norm": 257.58514404296875, + "learning_rate": 9.951243134290025e-06, + "loss": 19.5074, + "step": 70020 + }, + { + "epoch": 0.14146503068476105, + "grad_norm": 800.439697265625, + "learning_rate": 9.951194493383355e-06, + "loss": 32.0352, + "step": 70030 + }, + { + "epoch": 0.14148523131744487, + "grad_norm": 539.307861328125, + "learning_rate": 9.951145828345163e-06, + "loss": 27.1896, + "step": 70040 + }, + { + "epoch": 0.14150543195012869, + "grad_norm": 671.9888916015625, + "learning_rate": 9.951097139175688e-06, + "loss": 30.1305, + "step": 70050 + }, + { + "epoch": 0.1415256325828125, + "grad_norm": 693.0454711914062, + "learning_rate": 9.951048425875165e-06, + "loss": 30.7239, + "step": 70060 + }, + { + "epoch": 0.1415458332154963, + "grad_norm": 144.39990234375, + "learning_rate": 9.950999688443833e-06, + "loss": 10.8088, + "step": 70070 + }, + { + "epoch": 0.14156603384818012, + "grad_norm": 645.5604858398438, + "learning_rate": 9.950950926881928e-06, + "loss": 25.8998, + "step": 70080 + }, + { + "epoch": 0.14158623448086394, + "grad_norm": 876.4896240234375, + "learning_rate": 9.950902141189691e-06, + "loss": 28.263, + "step": 70090 + }, + { + "epoch": 0.14160643511354776, + "grad_norm": 988.877197265625, + "learning_rate": 9.950853331367356e-06, + "loss": 28.0873, + "step": 70100 + }, + { + "epoch": 0.14162663574623158, + "grad_norm": 166.33299255371094, + "learning_rate": 9.95080449741516e-06, + "loss": 25.429, + "step": 70110 + }, + { + "epoch": 0.1416468363789154, + "grad_norm": 656.4375, + "learning_rate": 9.950755639333347e-06, + "loss": 29.118, + "step": 70120 + }, + { + "epoch": 0.1416670370115992, + "grad_norm": 502.52630615234375, + "learning_rate": 9.95070675712215e-06, + "loss": 24.3436, + "step": 70130 + }, + { + "epoch": 0.141687237644283, + "grad_norm": 465.3695983886719, + "learning_rate": 9.950657850781809e-06, + "loss": 16.0229, + "step": 70140 + }, + { + "epoch": 0.14170743827696683, + "grad_norm": 240.92152404785156, + "learning_rate": 9.95060892031256e-06, + "loss": 34.4966, + "step": 70150 + }, + { + "epoch": 0.14172763890965065, + "grad_norm": 497.15380859375, + "learning_rate": 9.950559965714647e-06, + "loss": 51.5669, + "step": 70160 + }, + { + "epoch": 0.14174783954233447, + "grad_norm": 919.5972900390625, + "learning_rate": 9.950510986988304e-06, + "loss": 26.8649, + "step": 70170 + }, + { + "epoch": 0.1417680401750183, + "grad_norm": 525.713134765625, + "learning_rate": 9.95046198413377e-06, + "loss": 20.6903, + "step": 70180 + }, + { + "epoch": 0.1417882408077021, + "grad_norm": 516.0868530273438, + "learning_rate": 9.950412957151286e-06, + "loss": 32.3086, + "step": 70190 + }, + { + "epoch": 0.1418084414403859, + "grad_norm": 0.0, + "learning_rate": 9.950363906041089e-06, + "loss": 22.1381, + "step": 70200 + }, + { + "epoch": 0.14182864207306972, + "grad_norm": 357.0196228027344, + "learning_rate": 9.950314830803418e-06, + "loss": 23.64, + "step": 70210 + }, + { + "epoch": 0.14184884270575354, + "grad_norm": 1040.35546875, + "learning_rate": 9.950265731438513e-06, + "loss": 32.8609, + "step": 70220 + }, + { + "epoch": 0.14186904333843736, + "grad_norm": 1001.8739013671875, + "learning_rate": 9.950216607946614e-06, + "loss": 35.8748, + "step": 70230 + }, + { + "epoch": 0.14188924397112118, + "grad_norm": 714.5865478515625, + "learning_rate": 9.95016746032796e-06, + "loss": 40.7987, + "step": 70240 + }, + { + "epoch": 0.141909444603805, + "grad_norm": 114.6450424194336, + "learning_rate": 9.95011828858279e-06, + "loss": 28.8126, + "step": 70250 + }, + { + "epoch": 0.1419296452364888, + "grad_norm": 127.16027069091797, + "learning_rate": 9.950069092711342e-06, + "loss": 17.4822, + "step": 70260 + }, + { + "epoch": 0.14194984586917261, + "grad_norm": 243.0860137939453, + "learning_rate": 9.950019872713858e-06, + "loss": 35.47, + "step": 70270 + }, + { + "epoch": 0.14197004650185643, + "grad_norm": 204.92752075195312, + "learning_rate": 9.94997062859058e-06, + "loss": 14.5843, + "step": 70280 + }, + { + "epoch": 0.14199024713454025, + "grad_norm": 654.3239135742188, + "learning_rate": 9.949921360341743e-06, + "loss": 21.8702, + "step": 70290 + }, + { + "epoch": 0.14201044776722407, + "grad_norm": 19.094406127929688, + "learning_rate": 9.94987206796759e-06, + "loss": 26.9252, + "step": 70300 + }, + { + "epoch": 0.1420306483999079, + "grad_norm": 92.65380096435547, + "learning_rate": 9.949822751468364e-06, + "loss": 23.4914, + "step": 70310 + }, + { + "epoch": 0.14205084903259171, + "grad_norm": 118.98175811767578, + "learning_rate": 9.949773410844299e-06, + "loss": 31.5911, + "step": 70320 + }, + { + "epoch": 0.1420710496652755, + "grad_norm": 509.1815185546875, + "learning_rate": 9.94972404609564e-06, + "loss": 30.0535, + "step": 70330 + }, + { + "epoch": 0.14209125029795933, + "grad_norm": 457.3714904785156, + "learning_rate": 9.949674657222624e-06, + "loss": 32.8641, + "step": 70340 + }, + { + "epoch": 0.14211145093064315, + "grad_norm": 552.8912353515625, + "learning_rate": 9.949625244225496e-06, + "loss": 21.6767, + "step": 70350 + }, + { + "epoch": 0.14213165156332697, + "grad_norm": 760.5970458984375, + "learning_rate": 9.949575807104494e-06, + "loss": 22.7619, + "step": 70360 + }, + { + "epoch": 0.1421518521960108, + "grad_norm": 1111.3450927734375, + "learning_rate": 9.94952634585986e-06, + "loss": 56.025, + "step": 70370 + }, + { + "epoch": 0.1421720528286946, + "grad_norm": 27.02530860900879, + "learning_rate": 9.949476860491836e-06, + "loss": 48.3702, + "step": 70380 + }, + { + "epoch": 0.1421922534613784, + "grad_norm": 394.2063903808594, + "learning_rate": 9.949427351000662e-06, + "loss": 27.2819, + "step": 70390 + }, + { + "epoch": 0.14221245409406222, + "grad_norm": 257.6773681640625, + "learning_rate": 9.94937781738658e-06, + "loss": 19.1424, + "step": 70400 + }, + { + "epoch": 0.14223265472674604, + "grad_norm": 160.51309204101562, + "learning_rate": 9.949328259649828e-06, + "loss": 12.8866, + "step": 70410 + }, + { + "epoch": 0.14225285535942986, + "grad_norm": 356.33544921875, + "learning_rate": 9.949278677790653e-06, + "loss": 20.0134, + "step": 70420 + }, + { + "epoch": 0.14227305599211368, + "grad_norm": 252.20248413085938, + "learning_rate": 9.949229071809294e-06, + "loss": 19.4008, + "step": 70430 + }, + { + "epoch": 0.1422932566247975, + "grad_norm": 359.1466064453125, + "learning_rate": 9.949179441705992e-06, + "loss": 37.4052, + "step": 70440 + }, + { + "epoch": 0.1423134572574813, + "grad_norm": 66.66751861572266, + "learning_rate": 9.949129787480988e-06, + "loss": 19.6264, + "step": 70450 + }, + { + "epoch": 0.1423336578901651, + "grad_norm": 571.6493530273438, + "learning_rate": 9.949080109134528e-06, + "loss": 29.6699, + "step": 70460 + }, + { + "epoch": 0.14235385852284893, + "grad_norm": 299.482666015625, + "learning_rate": 9.949030406666852e-06, + "loss": 34.4904, + "step": 70470 + }, + { + "epoch": 0.14237405915553275, + "grad_norm": 201.146728515625, + "learning_rate": 9.948980680078199e-06, + "loss": 30.5541, + "step": 70480 + }, + { + "epoch": 0.14239425978821657, + "grad_norm": 637.2625732421875, + "learning_rate": 9.948930929368818e-06, + "loss": 19.0271, + "step": 70490 + }, + { + "epoch": 0.1424144604209004, + "grad_norm": 1033.426025390625, + "learning_rate": 9.948881154538946e-06, + "loss": 32.5386, + "step": 70500 + }, + { + "epoch": 0.1424346610535842, + "grad_norm": 1215.341796875, + "learning_rate": 9.948831355588828e-06, + "loss": 30.3841, + "step": 70510 + }, + { + "epoch": 0.142454861686268, + "grad_norm": 822.8902587890625, + "learning_rate": 9.948781532518706e-06, + "loss": 24.5997, + "step": 70520 + }, + { + "epoch": 0.14247506231895182, + "grad_norm": 214.73452758789062, + "learning_rate": 9.948731685328823e-06, + "loss": 18.9409, + "step": 70530 + }, + { + "epoch": 0.14249526295163564, + "grad_norm": 299.24285888671875, + "learning_rate": 9.948681814019421e-06, + "loss": 33.5172, + "step": 70540 + }, + { + "epoch": 0.14251546358431946, + "grad_norm": 396.3786926269531, + "learning_rate": 9.948631918590746e-06, + "loss": 27.0587, + "step": 70550 + }, + { + "epoch": 0.14253566421700328, + "grad_norm": 928.9906616210938, + "learning_rate": 9.948581999043038e-06, + "loss": 32.9953, + "step": 70560 + }, + { + "epoch": 0.1425558648496871, + "grad_norm": 1428.0191650390625, + "learning_rate": 9.948532055376541e-06, + "loss": 37.54, + "step": 70570 + }, + { + "epoch": 0.1425760654823709, + "grad_norm": 709.8215942382812, + "learning_rate": 9.9484820875915e-06, + "loss": 23.8771, + "step": 70580 + }, + { + "epoch": 0.14259626611505472, + "grad_norm": 489.7835693359375, + "learning_rate": 9.948432095688157e-06, + "loss": 22.3415, + "step": 70590 + }, + { + "epoch": 0.14261646674773854, + "grad_norm": 374.5968322753906, + "learning_rate": 9.948382079666756e-06, + "loss": 16.0797, + "step": 70600 + }, + { + "epoch": 0.14263666738042236, + "grad_norm": 396.25579833984375, + "learning_rate": 9.948332039527541e-06, + "loss": 36.8543, + "step": 70610 + }, + { + "epoch": 0.14265686801310618, + "grad_norm": 680.112060546875, + "learning_rate": 9.948281975270758e-06, + "loss": 25.1519, + "step": 70620 + }, + { + "epoch": 0.14267706864579, + "grad_norm": 302.9621276855469, + "learning_rate": 9.948231886896646e-06, + "loss": 13.1773, + "step": 70630 + }, + { + "epoch": 0.14269726927847382, + "grad_norm": 1454.6026611328125, + "learning_rate": 9.948181774405453e-06, + "loss": 44.4787, + "step": 70640 + }, + { + "epoch": 0.1427174699111576, + "grad_norm": 295.6230773925781, + "learning_rate": 9.94813163779742e-06, + "loss": 21.5774, + "step": 70650 + }, + { + "epoch": 0.14273767054384143, + "grad_norm": 382.83721923828125, + "learning_rate": 9.948081477072797e-06, + "loss": 22.5638, + "step": 70660 + }, + { + "epoch": 0.14275787117652525, + "grad_norm": 109.67560577392578, + "learning_rate": 9.948031292231823e-06, + "loss": 38.8435, + "step": 70670 + }, + { + "epoch": 0.14277807180920907, + "grad_norm": 414.8904113769531, + "learning_rate": 9.947981083274747e-06, + "loss": 20.7894, + "step": 70680 + }, + { + "epoch": 0.1427982724418929, + "grad_norm": 94.47399139404297, + "learning_rate": 9.947930850201808e-06, + "loss": 27.4957, + "step": 70690 + }, + { + "epoch": 0.1428184730745767, + "grad_norm": 349.111328125, + "learning_rate": 9.947880593013256e-06, + "loss": 17.7162, + "step": 70700 + }, + { + "epoch": 0.1428386737072605, + "grad_norm": 327.14337158203125, + "learning_rate": 9.947830311709333e-06, + "loss": 17.6172, + "step": 70710 + }, + { + "epoch": 0.14285887433994432, + "grad_norm": 281.6408386230469, + "learning_rate": 9.947780006290287e-06, + "loss": 31.3466, + "step": 70720 + }, + { + "epoch": 0.14287907497262814, + "grad_norm": 466.6632080078125, + "learning_rate": 9.947729676756359e-06, + "loss": 18.6596, + "step": 70730 + }, + { + "epoch": 0.14289927560531196, + "grad_norm": 386.74041748046875, + "learning_rate": 9.947679323107798e-06, + "loss": 29.0378, + "step": 70740 + }, + { + "epoch": 0.14291947623799578, + "grad_norm": 254.9665069580078, + "learning_rate": 9.947628945344849e-06, + "loss": 38.1556, + "step": 70750 + }, + { + "epoch": 0.1429396768706796, + "grad_norm": 189.6139373779297, + "learning_rate": 9.947578543467755e-06, + "loss": 21.6816, + "step": 70760 + }, + { + "epoch": 0.1429598775033634, + "grad_norm": 496.5278625488281, + "learning_rate": 9.947528117476764e-06, + "loss": 31.7688, + "step": 70770 + }, + { + "epoch": 0.1429800781360472, + "grad_norm": 716.6107788085938, + "learning_rate": 9.94747766737212e-06, + "loss": 31.3266, + "step": 70780 + }, + { + "epoch": 0.14300027876873103, + "grad_norm": 0.0, + "learning_rate": 9.94742719315407e-06, + "loss": 28.7017, + "step": 70790 + }, + { + "epoch": 0.14302047940141485, + "grad_norm": 351.0498962402344, + "learning_rate": 9.947376694822861e-06, + "loss": 25.1587, + "step": 70800 + }, + { + "epoch": 0.14304068003409867, + "grad_norm": 553.6657104492188, + "learning_rate": 9.947326172378736e-06, + "loss": 24.1454, + "step": 70810 + }, + { + "epoch": 0.1430608806667825, + "grad_norm": 856.5927124023438, + "learning_rate": 9.947275625821947e-06, + "loss": 33.1798, + "step": 70820 + }, + { + "epoch": 0.1430810812994663, + "grad_norm": 38.8865852355957, + "learning_rate": 9.947225055152735e-06, + "loss": 27.3439, + "step": 70830 + }, + { + "epoch": 0.1431012819321501, + "grad_norm": 1.1428660154342651, + "learning_rate": 9.947174460371347e-06, + "loss": 31.6444, + "step": 70840 + }, + { + "epoch": 0.14312148256483392, + "grad_norm": 246.42755126953125, + "learning_rate": 9.947123841478032e-06, + "loss": 37.5845, + "step": 70850 + }, + { + "epoch": 0.14314168319751774, + "grad_norm": 471.3725280761719, + "learning_rate": 9.947073198473034e-06, + "loss": 29.1606, + "step": 70860 + }, + { + "epoch": 0.14316188383020156, + "grad_norm": 591.9281616210938, + "learning_rate": 9.947022531356602e-06, + "loss": 23.9603, + "step": 70870 + }, + { + "epoch": 0.14318208446288538, + "grad_norm": 134.68898010253906, + "learning_rate": 9.946971840128982e-06, + "loss": 21.785, + "step": 70880 + }, + { + "epoch": 0.1432022850955692, + "grad_norm": 1408.50439453125, + "learning_rate": 9.94692112479042e-06, + "loss": 35.7125, + "step": 70890 + }, + { + "epoch": 0.143222485728253, + "grad_norm": 923.0642700195312, + "learning_rate": 9.946870385341167e-06, + "loss": 22.5851, + "step": 70900 + }, + { + "epoch": 0.14324268636093682, + "grad_norm": 491.4736633300781, + "learning_rate": 9.946819621781467e-06, + "loss": 25.2908, + "step": 70910 + }, + { + "epoch": 0.14326288699362064, + "grad_norm": 0.0, + "learning_rate": 9.946768834111568e-06, + "loss": 21.2509, + "step": 70920 + }, + { + "epoch": 0.14328308762630446, + "grad_norm": 561.29443359375, + "learning_rate": 9.946718022331715e-06, + "loss": 24.0213, + "step": 70930 + }, + { + "epoch": 0.14330328825898828, + "grad_norm": 606.1071166992188, + "learning_rate": 9.946667186442162e-06, + "loss": 23.4115, + "step": 70940 + }, + { + "epoch": 0.1433234888916721, + "grad_norm": 1087.3997802734375, + "learning_rate": 9.946616326443153e-06, + "loss": 30.4881, + "step": 70950 + }, + { + "epoch": 0.14334368952435592, + "grad_norm": 187.8196563720703, + "learning_rate": 9.946565442334935e-06, + "loss": 21.7119, + "step": 70960 + }, + { + "epoch": 0.1433638901570397, + "grad_norm": 473.32635498046875, + "learning_rate": 9.946514534117755e-06, + "loss": 24.639, + "step": 70970 + }, + { + "epoch": 0.14338409078972353, + "grad_norm": 752.272216796875, + "learning_rate": 9.946463601791865e-06, + "loss": 34.8138, + "step": 70980 + }, + { + "epoch": 0.14340429142240735, + "grad_norm": 426.7176208496094, + "learning_rate": 9.94641264535751e-06, + "loss": 26.9315, + "step": 70990 + }, + { + "epoch": 0.14342449205509117, + "grad_norm": 397.26904296875, + "learning_rate": 9.946361664814942e-06, + "loss": 20.7427, + "step": 71000 + }, + { + "epoch": 0.143444692687775, + "grad_norm": 177.0984649658203, + "learning_rate": 9.946310660164407e-06, + "loss": 18.6685, + "step": 71010 + }, + { + "epoch": 0.1434648933204588, + "grad_norm": 419.7292175292969, + "learning_rate": 9.946259631406153e-06, + "loss": 18.3675, + "step": 71020 + }, + { + "epoch": 0.1434850939531426, + "grad_norm": 580.2398681640625, + "learning_rate": 9.946208578540428e-06, + "loss": 25.4747, + "step": 71030 + }, + { + "epoch": 0.14350529458582642, + "grad_norm": 718.4306030273438, + "learning_rate": 9.946157501567484e-06, + "loss": 23.6021, + "step": 71040 + }, + { + "epoch": 0.14352549521851024, + "grad_norm": 560.6592407226562, + "learning_rate": 9.946106400487568e-06, + "loss": 17.248, + "step": 71050 + }, + { + "epoch": 0.14354569585119406, + "grad_norm": 392.8415222167969, + "learning_rate": 9.946055275300929e-06, + "loss": 19.7624, + "step": 71060 + }, + { + "epoch": 0.14356589648387788, + "grad_norm": 362.4658203125, + "learning_rate": 9.946004126007817e-06, + "loss": 14.2599, + "step": 71070 + }, + { + "epoch": 0.1435860971165617, + "grad_norm": 1601.98583984375, + "learning_rate": 9.94595295260848e-06, + "loss": 37.1649, + "step": 71080 + }, + { + "epoch": 0.1436062977492455, + "grad_norm": 0.0, + "learning_rate": 9.945901755103169e-06, + "loss": 31.0146, + "step": 71090 + }, + { + "epoch": 0.1436264983819293, + "grad_norm": 408.6718444824219, + "learning_rate": 9.945850533492132e-06, + "loss": 23.8855, + "step": 71100 + }, + { + "epoch": 0.14364669901461313, + "grad_norm": 375.17816162109375, + "learning_rate": 9.94579928777562e-06, + "loss": 59.7405, + "step": 71110 + }, + { + "epoch": 0.14366689964729695, + "grad_norm": 420.8493957519531, + "learning_rate": 9.94574801795388e-06, + "loss": 21.973, + "step": 71120 + }, + { + "epoch": 0.14368710027998077, + "grad_norm": 337.4616394042969, + "learning_rate": 9.945696724027166e-06, + "loss": 26.9458, + "step": 71130 + }, + { + "epoch": 0.1437073009126646, + "grad_norm": 458.1142883300781, + "learning_rate": 9.945645405995726e-06, + "loss": 16.4347, + "step": 71140 + }, + { + "epoch": 0.1437275015453484, + "grad_norm": 811.0916748046875, + "learning_rate": 9.94559406385981e-06, + "loss": 21.0274, + "step": 71150 + }, + { + "epoch": 0.1437477021780322, + "grad_norm": 215.50169372558594, + "learning_rate": 9.945542697619667e-06, + "loss": 22.2922, + "step": 71160 + }, + { + "epoch": 0.14376790281071603, + "grad_norm": 829.2427368164062, + "learning_rate": 9.94549130727555e-06, + "loss": 26.2956, + "step": 71170 + }, + { + "epoch": 0.14378810344339985, + "grad_norm": 208.13414001464844, + "learning_rate": 9.945439892827709e-06, + "loss": 18.2897, + "step": 71180 + }, + { + "epoch": 0.14380830407608367, + "grad_norm": 497.759521484375, + "learning_rate": 9.945388454276392e-06, + "loss": 24.3056, + "step": 71190 + }, + { + "epoch": 0.14382850470876749, + "grad_norm": 440.09063720703125, + "learning_rate": 9.945336991621854e-06, + "loss": 45.3409, + "step": 71200 + }, + { + "epoch": 0.1438487053414513, + "grad_norm": 448.5332946777344, + "learning_rate": 9.945285504864342e-06, + "loss": 25.1452, + "step": 71210 + }, + { + "epoch": 0.1438689059741351, + "grad_norm": 254.63189697265625, + "learning_rate": 9.945233994004107e-06, + "loss": 12.5257, + "step": 71220 + }, + { + "epoch": 0.14388910660681892, + "grad_norm": 229.0892791748047, + "learning_rate": 9.945182459041403e-06, + "loss": 20.8888, + "step": 71230 + }, + { + "epoch": 0.14390930723950274, + "grad_norm": 329.3419189453125, + "learning_rate": 9.945130899976477e-06, + "loss": 26.5256, + "step": 71240 + }, + { + "epoch": 0.14392950787218656, + "grad_norm": 468.9582824707031, + "learning_rate": 9.945079316809585e-06, + "loss": 19.1805, + "step": 71250 + }, + { + "epoch": 0.14394970850487038, + "grad_norm": 264.82318115234375, + "learning_rate": 9.945027709540975e-06, + "loss": 26.3839, + "step": 71260 + }, + { + "epoch": 0.1439699091375542, + "grad_norm": 438.0096130371094, + "learning_rate": 9.9449760781709e-06, + "loss": 23.1159, + "step": 71270 + }, + { + "epoch": 0.14399010977023802, + "grad_norm": 433.0907897949219, + "learning_rate": 9.944924422699613e-06, + "loss": 23.6146, + "step": 71280 + }, + { + "epoch": 0.1440103104029218, + "grad_norm": 108.1781005859375, + "learning_rate": 9.944872743127363e-06, + "loss": 18.6154, + "step": 71290 + }, + { + "epoch": 0.14403051103560563, + "grad_norm": 152.50469970703125, + "learning_rate": 9.944821039454403e-06, + "loss": 25.8473, + "step": 71300 + }, + { + "epoch": 0.14405071166828945, + "grad_norm": 565.368896484375, + "learning_rate": 9.944769311680984e-06, + "loss": 22.8616, + "step": 71310 + }, + { + "epoch": 0.14407091230097327, + "grad_norm": 292.283447265625, + "learning_rate": 9.94471755980736e-06, + "loss": 25.7549, + "step": 71320 + }, + { + "epoch": 0.1440911129336571, + "grad_norm": 400.43670654296875, + "learning_rate": 9.944665783833782e-06, + "loss": 37.9263, + "step": 71330 + }, + { + "epoch": 0.1441113135663409, + "grad_norm": 591.67431640625, + "learning_rate": 9.944613983760503e-06, + "loss": 40.6577, + "step": 71340 + }, + { + "epoch": 0.1441315141990247, + "grad_norm": 323.1944580078125, + "learning_rate": 9.944562159587774e-06, + "loss": 20.8895, + "step": 71350 + }, + { + "epoch": 0.14415171483170852, + "grad_norm": 226.9590606689453, + "learning_rate": 9.94451031131585e-06, + "loss": 20.5786, + "step": 71360 + }, + { + "epoch": 0.14417191546439234, + "grad_norm": 200.12771606445312, + "learning_rate": 9.944458438944983e-06, + "loss": 22.3802, + "step": 71370 + }, + { + "epoch": 0.14419211609707616, + "grad_norm": 198.01441955566406, + "learning_rate": 9.944406542475425e-06, + "loss": 23.1966, + "step": 71380 + }, + { + "epoch": 0.14421231672975998, + "grad_norm": 760.1091918945312, + "learning_rate": 9.944354621907428e-06, + "loss": 29.3378, + "step": 71390 + }, + { + "epoch": 0.1442325173624438, + "grad_norm": 270.2275085449219, + "learning_rate": 9.944302677241247e-06, + "loss": 17.9261, + "step": 71400 + }, + { + "epoch": 0.1442527179951276, + "grad_norm": 1244.6324462890625, + "learning_rate": 9.944250708477135e-06, + "loss": 39.5314, + "step": 71410 + }, + { + "epoch": 0.14427291862781141, + "grad_norm": 1194.732666015625, + "learning_rate": 9.944198715615343e-06, + "loss": 38.33, + "step": 71420 + }, + { + "epoch": 0.14429311926049523, + "grad_norm": 353.905517578125, + "learning_rate": 9.944146698656127e-06, + "loss": 19.8253, + "step": 71430 + }, + { + "epoch": 0.14431331989317905, + "grad_norm": 589.7752075195312, + "learning_rate": 9.94409465759974e-06, + "loss": 30.6325, + "step": 71440 + }, + { + "epoch": 0.14433352052586287, + "grad_norm": 605.8091430664062, + "learning_rate": 9.944042592446434e-06, + "loss": 12.5729, + "step": 71450 + }, + { + "epoch": 0.1443537211585467, + "grad_norm": 256.3218078613281, + "learning_rate": 9.943990503196466e-06, + "loss": 24.6645, + "step": 71460 + }, + { + "epoch": 0.14437392179123051, + "grad_norm": 366.6457214355469, + "learning_rate": 9.943938389850087e-06, + "loss": 27.4997, + "step": 71470 + }, + { + "epoch": 0.1443941224239143, + "grad_norm": 243.17288208007812, + "learning_rate": 9.943886252407551e-06, + "loss": 28.5592, + "step": 71480 + }, + { + "epoch": 0.14441432305659813, + "grad_norm": 215.53054809570312, + "learning_rate": 9.943834090869116e-06, + "loss": 31.3464, + "step": 71490 + }, + { + "epoch": 0.14443452368928195, + "grad_norm": 322.67962646484375, + "learning_rate": 9.94378190523503e-06, + "loss": 32.2169, + "step": 71500 + }, + { + "epoch": 0.14445472432196577, + "grad_norm": 447.6349792480469, + "learning_rate": 9.943729695505552e-06, + "loss": 20.0924, + "step": 71510 + }, + { + "epoch": 0.1444749249546496, + "grad_norm": 477.0517883300781, + "learning_rate": 9.943677461680935e-06, + "loss": 47.1084, + "step": 71520 + }, + { + "epoch": 0.1444951255873334, + "grad_norm": 384.2557067871094, + "learning_rate": 9.943625203761434e-06, + "loss": 11.8908, + "step": 71530 + }, + { + "epoch": 0.1445153262200172, + "grad_norm": 230.05831909179688, + "learning_rate": 9.943572921747302e-06, + "loss": 34.5417, + "step": 71540 + }, + { + "epoch": 0.14453552685270102, + "grad_norm": 277.23028564453125, + "learning_rate": 9.943520615638796e-06, + "loss": 40.2084, + "step": 71550 + }, + { + "epoch": 0.14455572748538484, + "grad_norm": 230.21156311035156, + "learning_rate": 9.943468285436171e-06, + "loss": 29.1958, + "step": 71560 + }, + { + "epoch": 0.14457592811806866, + "grad_norm": 643.342529296875, + "learning_rate": 9.94341593113968e-06, + "loss": 35.6655, + "step": 71570 + }, + { + "epoch": 0.14459612875075248, + "grad_norm": 253.8309326171875, + "learning_rate": 9.943363552749579e-06, + "loss": 25.6384, + "step": 71580 + }, + { + "epoch": 0.1446163293834363, + "grad_norm": 603.95263671875, + "learning_rate": 9.943311150266124e-06, + "loss": 27.2352, + "step": 71590 + }, + { + "epoch": 0.14463653001612012, + "grad_norm": 601.6875, + "learning_rate": 9.94325872368957e-06, + "loss": 34.9108, + "step": 71600 + }, + { + "epoch": 0.1446567306488039, + "grad_norm": 501.0145263671875, + "learning_rate": 9.943206273020174e-06, + "loss": 37.0522, + "step": 71610 + }, + { + "epoch": 0.14467693128148773, + "grad_norm": 282.2042541503906, + "learning_rate": 9.943153798258188e-06, + "loss": 25.7665, + "step": 71620 + }, + { + "epoch": 0.14469713191417155, + "grad_norm": 731.9998168945312, + "learning_rate": 9.94310129940387e-06, + "loss": 35.901, + "step": 71630 + }, + { + "epoch": 0.14471733254685537, + "grad_norm": 175.04879760742188, + "learning_rate": 9.943048776457479e-06, + "loss": 19.0858, + "step": 71640 + }, + { + "epoch": 0.1447375331795392, + "grad_norm": 203.5707244873047, + "learning_rate": 9.942996229419264e-06, + "loss": 24.6141, + "step": 71650 + }, + { + "epoch": 0.144757733812223, + "grad_norm": 112.2118911743164, + "learning_rate": 9.942943658289487e-06, + "loss": 15.6003, + "step": 71660 + }, + { + "epoch": 0.1447779344449068, + "grad_norm": 635.341552734375, + "learning_rate": 9.942891063068401e-06, + "loss": 22.3218, + "step": 71670 + }, + { + "epoch": 0.14479813507759062, + "grad_norm": 1005.9336547851562, + "learning_rate": 9.942838443756265e-06, + "loss": 30.0047, + "step": 71680 + }, + { + "epoch": 0.14481833571027444, + "grad_norm": 518.338623046875, + "learning_rate": 9.942785800353332e-06, + "loss": 21.7113, + "step": 71690 + }, + { + "epoch": 0.14483853634295826, + "grad_norm": 1058.7803955078125, + "learning_rate": 9.942733132859861e-06, + "loss": 31.9251, + "step": 71700 + }, + { + "epoch": 0.14485873697564208, + "grad_norm": 773.3302612304688, + "learning_rate": 9.94268044127611e-06, + "loss": 23.9225, + "step": 71710 + }, + { + "epoch": 0.1448789376083259, + "grad_norm": 324.1346435546875, + "learning_rate": 9.942627725602332e-06, + "loss": 27.4762, + "step": 71720 + }, + { + "epoch": 0.1448991382410097, + "grad_norm": 994.0797119140625, + "learning_rate": 9.942574985838785e-06, + "loss": 30.1357, + "step": 71730 + }, + { + "epoch": 0.14491933887369352, + "grad_norm": 356.2742004394531, + "learning_rate": 9.942522221985728e-06, + "loss": 12.7751, + "step": 71740 + }, + { + "epoch": 0.14493953950637734, + "grad_norm": 1333.2322998046875, + "learning_rate": 9.942469434043418e-06, + "loss": 56.2553, + "step": 71750 + }, + { + "epoch": 0.14495974013906116, + "grad_norm": 566.2124633789062, + "learning_rate": 9.942416622012113e-06, + "loss": 33.4768, + "step": 71760 + }, + { + "epoch": 0.14497994077174498, + "grad_norm": 307.00054931640625, + "learning_rate": 9.942363785892065e-06, + "loss": 27.0711, + "step": 71770 + }, + { + "epoch": 0.1450001414044288, + "grad_norm": 560.7721557617188, + "learning_rate": 9.942310925683538e-06, + "loss": 30.7037, + "step": 71780 + }, + { + "epoch": 0.14502034203711262, + "grad_norm": 684.5990600585938, + "learning_rate": 9.942258041386785e-06, + "loss": 22.6475, + "step": 71790 + }, + { + "epoch": 0.1450405426697964, + "grad_norm": 727.7120971679688, + "learning_rate": 9.942205133002067e-06, + "loss": 21.8784, + "step": 71800 + }, + { + "epoch": 0.14506074330248023, + "grad_norm": 561.783447265625, + "learning_rate": 9.94215220052964e-06, + "loss": 25.0401, + "step": 71810 + }, + { + "epoch": 0.14508094393516405, + "grad_norm": 275.6103820800781, + "learning_rate": 9.942099243969765e-06, + "loss": 18.0501, + "step": 71820 + }, + { + "epoch": 0.14510114456784787, + "grad_norm": 422.5455322265625, + "learning_rate": 9.942046263322694e-06, + "loss": 26.8563, + "step": 71830 + }, + { + "epoch": 0.1451213452005317, + "grad_norm": 210.2841796875, + "learning_rate": 9.941993258588691e-06, + "loss": 18.4556, + "step": 71840 + }, + { + "epoch": 0.1451415458332155, + "grad_norm": 1116.8270263671875, + "learning_rate": 9.941940229768012e-06, + "loss": 24.2325, + "step": 71850 + }, + { + "epoch": 0.1451617464658993, + "grad_norm": 1234.160400390625, + "learning_rate": 9.941887176860916e-06, + "loss": 41.3825, + "step": 71860 + }, + { + "epoch": 0.14518194709858312, + "grad_norm": 401.3832092285156, + "learning_rate": 9.94183409986766e-06, + "loss": 25.3118, + "step": 71870 + }, + { + "epoch": 0.14520214773126694, + "grad_norm": 474.1400146484375, + "learning_rate": 9.941780998788506e-06, + "loss": 20.4632, + "step": 71880 + }, + { + "epoch": 0.14522234836395076, + "grad_norm": 840.2498168945312, + "learning_rate": 9.941727873623709e-06, + "loss": 23.0177, + "step": 71890 + }, + { + "epoch": 0.14524254899663458, + "grad_norm": 611.0426635742188, + "learning_rate": 9.94167472437353e-06, + "loss": 17.3159, + "step": 71900 + }, + { + "epoch": 0.1452627496293184, + "grad_norm": 185.4032745361328, + "learning_rate": 9.941621551038228e-06, + "loss": 22.2357, + "step": 71910 + }, + { + "epoch": 0.14528295026200222, + "grad_norm": 188.64749145507812, + "learning_rate": 9.941568353618064e-06, + "loss": 19.7696, + "step": 71920 + }, + { + "epoch": 0.145303150894686, + "grad_norm": 486.74554443359375, + "learning_rate": 9.941515132113291e-06, + "loss": 20.4263, + "step": 71930 + }, + { + "epoch": 0.14532335152736983, + "grad_norm": 1020.122314453125, + "learning_rate": 9.941461886524176e-06, + "loss": 24.4576, + "step": 71940 + }, + { + "epoch": 0.14534355216005365, + "grad_norm": 264.3952331542969, + "learning_rate": 9.941408616850974e-06, + "loss": 24.2844, + "step": 71950 + }, + { + "epoch": 0.14536375279273747, + "grad_norm": 311.9317932128906, + "learning_rate": 9.941355323093944e-06, + "loss": 25.734, + "step": 71960 + }, + { + "epoch": 0.1453839534254213, + "grad_norm": 331.9508056640625, + "learning_rate": 9.94130200525335e-06, + "loss": 21.6463, + "step": 71970 + }, + { + "epoch": 0.1454041540581051, + "grad_norm": 280.59490966796875, + "learning_rate": 9.941248663329448e-06, + "loss": 23.8914, + "step": 71980 + }, + { + "epoch": 0.1454243546907889, + "grad_norm": 605.7194213867188, + "learning_rate": 9.941195297322498e-06, + "loss": 43.5715, + "step": 71990 + }, + { + "epoch": 0.14544455532347272, + "grad_norm": 524.6144409179688, + "learning_rate": 9.941141907232766e-06, + "loss": 45.0033, + "step": 72000 + }, + { + "epoch": 0.14546475595615654, + "grad_norm": 158.6700897216797, + "learning_rate": 9.941088493060504e-06, + "loss": 9.7627, + "step": 72010 + }, + { + "epoch": 0.14548495658884036, + "grad_norm": 221.95321655273438, + "learning_rate": 9.941035054805977e-06, + "loss": 22.0498, + "step": 72020 + }, + { + "epoch": 0.14550515722152418, + "grad_norm": 384.6241760253906, + "learning_rate": 9.940981592469443e-06, + "loss": 25.4921, + "step": 72030 + }, + { + "epoch": 0.145525357854208, + "grad_norm": 591.4114990234375, + "learning_rate": 9.940928106051166e-06, + "loss": 21.4154, + "step": 72040 + }, + { + "epoch": 0.1455455584868918, + "grad_norm": 499.0342102050781, + "learning_rate": 9.940874595551403e-06, + "loss": 23.9025, + "step": 72050 + }, + { + "epoch": 0.14556575911957562, + "grad_norm": 80.63325500488281, + "learning_rate": 9.940821060970418e-06, + "loss": 13.7926, + "step": 72060 + }, + { + "epoch": 0.14558595975225944, + "grad_norm": 888.7040405273438, + "learning_rate": 9.940767502308469e-06, + "loss": 45.9061, + "step": 72070 + }, + { + "epoch": 0.14560616038494326, + "grad_norm": 318.607666015625, + "learning_rate": 9.940713919565819e-06, + "loss": 28.0343, + "step": 72080 + }, + { + "epoch": 0.14562636101762708, + "grad_norm": 273.0870666503906, + "learning_rate": 9.94066031274273e-06, + "loss": 22.2069, + "step": 72090 + }, + { + "epoch": 0.1456465616503109, + "grad_norm": 448.2008361816406, + "learning_rate": 9.94060668183946e-06, + "loss": 31.4073, + "step": 72100 + }, + { + "epoch": 0.14566676228299472, + "grad_norm": 479.724365234375, + "learning_rate": 9.940553026856273e-06, + "loss": 36.0503, + "step": 72110 + }, + { + "epoch": 0.1456869629156785, + "grad_norm": 589.8539428710938, + "learning_rate": 9.940499347793429e-06, + "loss": 33.8547, + "step": 72120 + }, + { + "epoch": 0.14570716354836233, + "grad_norm": 3227.3447265625, + "learning_rate": 9.940445644651191e-06, + "loss": 34.8153, + "step": 72130 + }, + { + "epoch": 0.14572736418104615, + "grad_norm": 115.9288101196289, + "learning_rate": 9.94039191742982e-06, + "loss": 19.407, + "step": 72140 + }, + { + "epoch": 0.14574756481372997, + "grad_norm": 314.36505126953125, + "learning_rate": 9.940338166129578e-06, + "loss": 22.6938, + "step": 72150 + }, + { + "epoch": 0.1457677654464138, + "grad_norm": 200.53411865234375, + "learning_rate": 9.940284390750727e-06, + "loss": 13.9298, + "step": 72160 + }, + { + "epoch": 0.1457879660790976, + "grad_norm": 94.67725372314453, + "learning_rate": 9.94023059129353e-06, + "loss": 29.4556, + "step": 72170 + }, + { + "epoch": 0.1458081667117814, + "grad_norm": 327.7362365722656, + "learning_rate": 9.940176767758247e-06, + "loss": 22.0037, + "step": 72180 + }, + { + "epoch": 0.14582836734446522, + "grad_norm": 446.6937561035156, + "learning_rate": 9.940122920145142e-06, + "loss": 19.2912, + "step": 72190 + }, + { + "epoch": 0.14584856797714904, + "grad_norm": 612.349365234375, + "learning_rate": 9.940069048454478e-06, + "loss": 29.6461, + "step": 72200 + }, + { + "epoch": 0.14586876860983286, + "grad_norm": 673.0333862304688, + "learning_rate": 9.940015152686514e-06, + "loss": 17.071, + "step": 72210 + }, + { + "epoch": 0.14588896924251668, + "grad_norm": 688.1871337890625, + "learning_rate": 9.939961232841517e-06, + "loss": 21.6557, + "step": 72220 + }, + { + "epoch": 0.1459091698752005, + "grad_norm": 392.2479553222656, + "learning_rate": 9.939907288919749e-06, + "loss": 50.7839, + "step": 72230 + }, + { + "epoch": 0.14592937050788432, + "grad_norm": 220.849609375, + "learning_rate": 9.93985332092147e-06, + "loss": 36.6879, + "step": 72240 + }, + { + "epoch": 0.1459495711405681, + "grad_norm": 191.9847412109375, + "learning_rate": 9.939799328846947e-06, + "loss": 31.7834, + "step": 72250 + }, + { + "epoch": 0.14596977177325193, + "grad_norm": 1537.3671875, + "learning_rate": 9.93974531269644e-06, + "loss": 40.3923, + "step": 72260 + }, + { + "epoch": 0.14598997240593575, + "grad_norm": 484.6485900878906, + "learning_rate": 9.939691272470214e-06, + "loss": 33.6825, + "step": 72270 + }, + { + "epoch": 0.14601017303861957, + "grad_norm": 132.58514404296875, + "learning_rate": 9.939637208168532e-06, + "loss": 21.2194, + "step": 72280 + }, + { + "epoch": 0.1460303736713034, + "grad_norm": 546.3110961914062, + "learning_rate": 9.939583119791656e-06, + "loss": 22.5165, + "step": 72290 + }, + { + "epoch": 0.1460505743039872, + "grad_norm": 688.3536376953125, + "learning_rate": 9.939529007339852e-06, + "loss": 14.9624, + "step": 72300 + }, + { + "epoch": 0.146070774936671, + "grad_norm": 384.6953430175781, + "learning_rate": 9.939474870813383e-06, + "loss": 18.2546, + "step": 72310 + }, + { + "epoch": 0.14609097556935483, + "grad_norm": 402.7142028808594, + "learning_rate": 9.939420710212511e-06, + "loss": 25.0478, + "step": 72320 + }, + { + "epoch": 0.14611117620203865, + "grad_norm": 438.6862487792969, + "learning_rate": 9.939366525537503e-06, + "loss": 22.4429, + "step": 72330 + }, + { + "epoch": 0.14613137683472247, + "grad_norm": 558.4066162109375, + "learning_rate": 9.939312316788622e-06, + "loss": 35.3318, + "step": 72340 + }, + { + "epoch": 0.14615157746740629, + "grad_norm": 403.58538818359375, + "learning_rate": 9.93925808396613e-06, + "loss": 23.4264, + "step": 72350 + }, + { + "epoch": 0.1461717781000901, + "grad_norm": 608.3269653320312, + "learning_rate": 9.939203827070296e-06, + "loss": 24.3604, + "step": 72360 + }, + { + "epoch": 0.1461919787327739, + "grad_norm": 530.1038208007812, + "learning_rate": 9.939149546101379e-06, + "loss": 30.2006, + "step": 72370 + }, + { + "epoch": 0.14621217936545772, + "grad_norm": 136.15496826171875, + "learning_rate": 9.939095241059648e-06, + "loss": 54.0793, + "step": 72380 + }, + { + "epoch": 0.14623237999814154, + "grad_norm": 891.3991088867188, + "learning_rate": 9.939040911945365e-06, + "loss": 22.0462, + "step": 72390 + }, + { + "epoch": 0.14625258063082536, + "grad_norm": 911.6983642578125, + "learning_rate": 9.938986558758795e-06, + "loss": 24.5087, + "step": 72400 + }, + { + "epoch": 0.14627278126350918, + "grad_norm": 462.1217956542969, + "learning_rate": 9.938932181500206e-06, + "loss": 15.7559, + "step": 72410 + }, + { + "epoch": 0.146292981896193, + "grad_norm": 341.0543518066406, + "learning_rate": 9.938877780169858e-06, + "loss": 22.3165, + "step": 72420 + }, + { + "epoch": 0.14631318252887682, + "grad_norm": 263.682373046875, + "learning_rate": 9.938823354768019e-06, + "loss": 29.8192, + "step": 72430 + }, + { + "epoch": 0.1463333831615606, + "grad_norm": 1487.048583984375, + "learning_rate": 9.938768905294954e-06, + "loss": 27.3454, + "step": 72440 + }, + { + "epoch": 0.14635358379424443, + "grad_norm": 643.3409423828125, + "learning_rate": 9.938714431750928e-06, + "loss": 13.5408, + "step": 72450 + }, + { + "epoch": 0.14637378442692825, + "grad_norm": 90.78771209716797, + "learning_rate": 9.938659934136208e-06, + "loss": 45.4623, + "step": 72460 + }, + { + "epoch": 0.14639398505961207, + "grad_norm": 196.9699249267578, + "learning_rate": 9.93860541245106e-06, + "loss": 21.7251, + "step": 72470 + }, + { + "epoch": 0.1464141856922959, + "grad_norm": 958.56005859375, + "learning_rate": 9.938550866695745e-06, + "loss": 30.8501, + "step": 72480 + }, + { + "epoch": 0.1464343863249797, + "grad_norm": 260.7809753417969, + "learning_rate": 9.938496296870532e-06, + "loss": 13.5617, + "step": 72490 + }, + { + "epoch": 0.1464545869576635, + "grad_norm": 315.475830078125, + "learning_rate": 9.938441702975689e-06, + "loss": 28.4831, + "step": 72500 + }, + { + "epoch": 0.14647478759034732, + "grad_norm": 295.13983154296875, + "learning_rate": 9.93838708501148e-06, + "loss": 16.444, + "step": 72510 + }, + { + "epoch": 0.14649498822303114, + "grad_norm": 232.6532440185547, + "learning_rate": 9.93833244297817e-06, + "loss": 27.1728, + "step": 72520 + }, + { + "epoch": 0.14651518885571496, + "grad_norm": 648.509033203125, + "learning_rate": 9.938277776876029e-06, + "loss": 49.6133, + "step": 72530 + }, + { + "epoch": 0.14653538948839878, + "grad_norm": 0.0, + "learning_rate": 9.938223086705318e-06, + "loss": 35.7085, + "step": 72540 + }, + { + "epoch": 0.1465555901210826, + "grad_norm": 210.35365295410156, + "learning_rate": 9.938168372466308e-06, + "loss": 26.5235, + "step": 72550 + }, + { + "epoch": 0.14657579075376642, + "grad_norm": 806.7994384765625, + "learning_rate": 9.938113634159266e-06, + "loss": 30.8781, + "step": 72560 + }, + { + "epoch": 0.14659599138645021, + "grad_norm": 231.79620361328125, + "learning_rate": 9.938058871784453e-06, + "loss": 17.0388, + "step": 72570 + }, + { + "epoch": 0.14661619201913403, + "grad_norm": 658.1468505859375, + "learning_rate": 9.938004085342144e-06, + "loss": 32.5382, + "step": 72580 + }, + { + "epoch": 0.14663639265181785, + "grad_norm": 463.2548828125, + "learning_rate": 9.9379492748326e-06, + "loss": 18.3364, + "step": 72590 + }, + { + "epoch": 0.14665659328450167, + "grad_norm": 674.02099609375, + "learning_rate": 9.937894440256091e-06, + "loss": 22.3668, + "step": 72600 + }, + { + "epoch": 0.1466767939171855, + "grad_norm": 305.38836669921875, + "learning_rate": 9.937839581612883e-06, + "loss": 21.051, + "step": 72610 + }, + { + "epoch": 0.14669699454986931, + "grad_norm": 644.2742309570312, + "learning_rate": 9.937784698903244e-06, + "loss": 41.5936, + "step": 72620 + }, + { + "epoch": 0.1467171951825531, + "grad_norm": 472.70849609375, + "learning_rate": 9.937729792127439e-06, + "loss": 24.1044, + "step": 72630 + }, + { + "epoch": 0.14673739581523693, + "grad_norm": 102.48870086669922, + "learning_rate": 9.93767486128574e-06, + "loss": 25.1896, + "step": 72640 + }, + { + "epoch": 0.14675759644792075, + "grad_norm": 204.5181121826172, + "learning_rate": 9.937619906378413e-06, + "loss": 20.637, + "step": 72650 + }, + { + "epoch": 0.14677779708060457, + "grad_norm": 694.7501220703125, + "learning_rate": 9.937564927405724e-06, + "loss": 53.5922, + "step": 72660 + }, + { + "epoch": 0.1467979977132884, + "grad_norm": 728.1456909179688, + "learning_rate": 9.937509924367944e-06, + "loss": 25.4128, + "step": 72670 + }, + { + "epoch": 0.1468181983459722, + "grad_norm": 596.4130859375, + "learning_rate": 9.937454897265338e-06, + "loss": 20.684, + "step": 72680 + }, + { + "epoch": 0.146838398978656, + "grad_norm": 476.9186706542969, + "learning_rate": 9.937399846098177e-06, + "loss": 24.8751, + "step": 72690 + }, + { + "epoch": 0.14685859961133982, + "grad_norm": 375.2703552246094, + "learning_rate": 9.937344770866727e-06, + "loss": 23.6126, + "step": 72700 + }, + { + "epoch": 0.14687880024402364, + "grad_norm": 0.0, + "learning_rate": 9.937289671571257e-06, + "loss": 13.9453, + "step": 72710 + }, + { + "epoch": 0.14689900087670746, + "grad_norm": 703.7996215820312, + "learning_rate": 9.937234548212038e-06, + "loss": 35.1447, + "step": 72720 + }, + { + "epoch": 0.14691920150939128, + "grad_norm": 388.9770202636719, + "learning_rate": 9.937179400789336e-06, + "loss": 25.3248, + "step": 72730 + }, + { + "epoch": 0.1469394021420751, + "grad_norm": 896.0739135742188, + "learning_rate": 9.937124229303419e-06, + "loss": 35.4272, + "step": 72740 + }, + { + "epoch": 0.14695960277475892, + "grad_norm": 51.91160583496094, + "learning_rate": 9.937069033754558e-06, + "loss": 16.3871, + "step": 72750 + }, + { + "epoch": 0.1469798034074427, + "grad_norm": 319.04730224609375, + "learning_rate": 9.937013814143021e-06, + "loss": 20.4597, + "step": 72760 + }, + { + "epoch": 0.14700000404012653, + "grad_norm": 857.5437622070312, + "learning_rate": 9.936958570469077e-06, + "loss": 26.6086, + "step": 72770 + }, + { + "epoch": 0.14702020467281035, + "grad_norm": 318.8205261230469, + "learning_rate": 9.936903302732997e-06, + "loss": 32.8497, + "step": 72780 + }, + { + "epoch": 0.14704040530549417, + "grad_norm": 105.94303131103516, + "learning_rate": 9.936848010935049e-06, + "loss": 18.9716, + "step": 72790 + }, + { + "epoch": 0.147060605938178, + "grad_norm": 305.947265625, + "learning_rate": 9.936792695075502e-06, + "loss": 20.7573, + "step": 72800 + }, + { + "epoch": 0.1470808065708618, + "grad_norm": 627.2362670898438, + "learning_rate": 9.936737355154627e-06, + "loss": 22.1974, + "step": 72810 + }, + { + "epoch": 0.1471010072035456, + "grad_norm": 54.313289642333984, + "learning_rate": 9.936681991172692e-06, + "loss": 51.2659, + "step": 72820 + }, + { + "epoch": 0.14712120783622942, + "grad_norm": 207.916015625, + "learning_rate": 9.936626603129968e-06, + "loss": 21.5755, + "step": 72830 + }, + { + "epoch": 0.14714140846891324, + "grad_norm": 37.225521087646484, + "learning_rate": 9.936571191026726e-06, + "loss": 13.9551, + "step": 72840 + }, + { + "epoch": 0.14716160910159706, + "grad_norm": 607.05322265625, + "learning_rate": 9.936515754863231e-06, + "loss": 20.6456, + "step": 72850 + }, + { + "epoch": 0.14718180973428088, + "grad_norm": 301.5707702636719, + "learning_rate": 9.93646029463976e-06, + "loss": 40.3485, + "step": 72860 + }, + { + "epoch": 0.1472020103669647, + "grad_norm": 448.8086853027344, + "learning_rate": 9.93640481035658e-06, + "loss": 21.5936, + "step": 72870 + }, + { + "epoch": 0.14722221099964852, + "grad_norm": 683.2775268554688, + "learning_rate": 9.936349302013962e-06, + "loss": 23.9059, + "step": 72880 + }, + { + "epoch": 0.14724241163233232, + "grad_norm": 495.9053649902344, + "learning_rate": 9.936293769612175e-06, + "loss": 23.7701, + "step": 72890 + }, + { + "epoch": 0.14726261226501614, + "grad_norm": 631.983642578125, + "learning_rate": 9.936238213151491e-06, + "loss": 25.8129, + "step": 72900 + }, + { + "epoch": 0.14728281289769996, + "grad_norm": 886.5300903320312, + "learning_rate": 9.93618263263218e-06, + "loss": 22.7748, + "step": 72910 + }, + { + "epoch": 0.14730301353038378, + "grad_norm": 615.3167114257812, + "learning_rate": 9.936127028054516e-06, + "loss": 31.9266, + "step": 72920 + }, + { + "epoch": 0.1473232141630676, + "grad_norm": 245.50645446777344, + "learning_rate": 9.936071399418764e-06, + "loss": 29.3315, + "step": 72930 + }, + { + "epoch": 0.14734341479575142, + "grad_norm": 393.6664123535156, + "learning_rate": 9.936015746725202e-06, + "loss": 19.3517, + "step": 72940 + }, + { + "epoch": 0.1473636154284352, + "grad_norm": 333.4764099121094, + "learning_rate": 9.935960069974096e-06, + "loss": 12.023, + "step": 72950 + }, + { + "epoch": 0.14738381606111903, + "grad_norm": 338.2967224121094, + "learning_rate": 9.93590436916572e-06, + "loss": 32.581, + "step": 72960 + }, + { + "epoch": 0.14740401669380285, + "grad_norm": 234.2685089111328, + "learning_rate": 9.935848644300345e-06, + "loss": 21.2717, + "step": 72970 + }, + { + "epoch": 0.14742421732648667, + "grad_norm": 1314.709716796875, + "learning_rate": 9.935792895378243e-06, + "loss": 38.1685, + "step": 72980 + }, + { + "epoch": 0.1474444179591705, + "grad_norm": 145.0713653564453, + "learning_rate": 9.935737122399683e-06, + "loss": 18.8477, + "step": 72990 + }, + { + "epoch": 0.1474646185918543, + "grad_norm": 231.71591186523438, + "learning_rate": 9.93568132536494e-06, + "loss": 15.7926, + "step": 73000 + }, + { + "epoch": 0.1474848192245381, + "grad_norm": 409.4983825683594, + "learning_rate": 9.935625504274284e-06, + "loss": 22.0732, + "step": 73010 + }, + { + "epoch": 0.14750501985722192, + "grad_norm": 316.9377746582031, + "learning_rate": 9.93556965912799e-06, + "loss": 30.9234, + "step": 73020 + }, + { + "epoch": 0.14752522048990574, + "grad_norm": 176.481201171875, + "learning_rate": 9.935513789926327e-06, + "loss": 33.495, + "step": 73030 + }, + { + "epoch": 0.14754542112258956, + "grad_norm": 481.75, + "learning_rate": 9.935457896669568e-06, + "loss": 26.2654, + "step": 73040 + }, + { + "epoch": 0.14756562175527338, + "grad_norm": 501.1618347167969, + "learning_rate": 9.935401979357985e-06, + "loss": 14.997, + "step": 73050 + }, + { + "epoch": 0.1475858223879572, + "grad_norm": 503.587646484375, + "learning_rate": 9.935346037991854e-06, + "loss": 27.9058, + "step": 73060 + }, + { + "epoch": 0.14760602302064102, + "grad_norm": 726.5780029296875, + "learning_rate": 9.935290072571442e-06, + "loss": 20.1439, + "step": 73070 + }, + { + "epoch": 0.1476262236533248, + "grad_norm": 296.281982421875, + "learning_rate": 9.935234083097028e-06, + "loss": 25.1859, + "step": 73080 + }, + { + "epoch": 0.14764642428600863, + "grad_norm": 439.1563415527344, + "learning_rate": 9.935178069568878e-06, + "loss": 41.3575, + "step": 73090 + }, + { + "epoch": 0.14766662491869245, + "grad_norm": 594.9931640625, + "learning_rate": 9.93512203198727e-06, + "loss": 19.4409, + "step": 73100 + }, + { + "epoch": 0.14768682555137627, + "grad_norm": 542.8465576171875, + "learning_rate": 9.935065970352477e-06, + "loss": 13.9351, + "step": 73110 + }, + { + "epoch": 0.1477070261840601, + "grad_norm": 549.0725708007812, + "learning_rate": 9.93500988466477e-06, + "loss": 28.5047, + "step": 73120 + }, + { + "epoch": 0.1477272268167439, + "grad_norm": 386.43927001953125, + "learning_rate": 9.934953774924425e-06, + "loss": 35.2264, + "step": 73130 + }, + { + "epoch": 0.1477474274494277, + "grad_norm": 102.36702728271484, + "learning_rate": 9.934897641131712e-06, + "loss": 29.2037, + "step": 73140 + }, + { + "epoch": 0.14776762808211152, + "grad_norm": 502.3369445800781, + "learning_rate": 9.934841483286907e-06, + "loss": 34.0425, + "step": 73150 + }, + { + "epoch": 0.14778782871479534, + "grad_norm": 143.2417449951172, + "learning_rate": 9.934785301390282e-06, + "loss": 35.4966, + "step": 73160 + }, + { + "epoch": 0.14780802934747916, + "grad_norm": 582.600830078125, + "learning_rate": 9.934729095442113e-06, + "loss": 26.3404, + "step": 73170 + }, + { + "epoch": 0.14782822998016298, + "grad_norm": 16.06324005126953, + "learning_rate": 9.934672865442673e-06, + "loss": 11.684, + "step": 73180 + }, + { + "epoch": 0.1478484306128468, + "grad_norm": 486.680419921875, + "learning_rate": 9.934616611392235e-06, + "loss": 22.8405, + "step": 73190 + }, + { + "epoch": 0.1478686312455306, + "grad_norm": 1008.5476684570312, + "learning_rate": 9.934560333291077e-06, + "loss": 34.9215, + "step": 73200 + }, + { + "epoch": 0.14788883187821442, + "grad_norm": 362.5773620605469, + "learning_rate": 9.934504031139468e-06, + "loss": 35.7853, + "step": 73210 + }, + { + "epoch": 0.14790903251089824, + "grad_norm": 552.7269897460938, + "learning_rate": 9.934447704937684e-06, + "loss": 27.2318, + "step": 73220 + }, + { + "epoch": 0.14792923314358206, + "grad_norm": 1780.918212890625, + "learning_rate": 9.934391354686002e-06, + "loss": 46.4913, + "step": 73230 + }, + { + "epoch": 0.14794943377626588, + "grad_norm": 63.38420867919922, + "learning_rate": 9.934334980384694e-06, + "loss": 18.7989, + "step": 73240 + }, + { + "epoch": 0.1479696344089497, + "grad_norm": 250.62246704101562, + "learning_rate": 9.934278582034037e-06, + "loss": 30.9458, + "step": 73250 + }, + { + "epoch": 0.14798983504163352, + "grad_norm": 820.0814208984375, + "learning_rate": 9.934222159634303e-06, + "loss": 25.7737, + "step": 73260 + }, + { + "epoch": 0.1480100356743173, + "grad_norm": 222.3558807373047, + "learning_rate": 9.93416571318577e-06, + "loss": 19.4418, + "step": 73270 + }, + { + "epoch": 0.14803023630700113, + "grad_norm": 472.84735107421875, + "learning_rate": 9.934109242688712e-06, + "loss": 23.9828, + "step": 73280 + }, + { + "epoch": 0.14805043693968495, + "grad_norm": 714.0300903320312, + "learning_rate": 9.934052748143403e-06, + "loss": 52.5531, + "step": 73290 + }, + { + "epoch": 0.14807063757236877, + "grad_norm": 398.2305908203125, + "learning_rate": 9.93399622955012e-06, + "loss": 20.6784, + "step": 73300 + }, + { + "epoch": 0.1480908382050526, + "grad_norm": 104.2729263305664, + "learning_rate": 9.933939686909137e-06, + "loss": 24.4492, + "step": 73310 + }, + { + "epoch": 0.1481110388377364, + "grad_norm": 1208.980224609375, + "learning_rate": 9.933883120220731e-06, + "loss": 34.2655, + "step": 73320 + }, + { + "epoch": 0.1481312394704202, + "grad_norm": 200.7018280029297, + "learning_rate": 9.933826529485178e-06, + "loss": 40.8249, + "step": 73330 + }, + { + "epoch": 0.14815144010310402, + "grad_norm": 139.2305450439453, + "learning_rate": 9.933769914702751e-06, + "loss": 18.0279, + "step": 73340 + }, + { + "epoch": 0.14817164073578784, + "grad_norm": 537.5657348632812, + "learning_rate": 9.933713275873728e-06, + "loss": 30.6076, + "step": 73350 + }, + { + "epoch": 0.14819184136847166, + "grad_norm": 290.5312805175781, + "learning_rate": 9.933656612998387e-06, + "loss": 20.4049, + "step": 73360 + }, + { + "epoch": 0.14821204200115548, + "grad_norm": 461.2494812011719, + "learning_rate": 9.933599926077e-06, + "loss": 25.0895, + "step": 73370 + }, + { + "epoch": 0.1482322426338393, + "grad_norm": 345.025634765625, + "learning_rate": 9.933543215109846e-06, + "loss": 22.5119, + "step": 73380 + }, + { + "epoch": 0.14825244326652312, + "grad_norm": 417.30206298828125, + "learning_rate": 9.933486480097201e-06, + "loss": 31.8335, + "step": 73390 + }, + { + "epoch": 0.1482726438992069, + "grad_norm": 17.376859664916992, + "learning_rate": 9.93342972103934e-06, + "loss": 18.072, + "step": 73400 + }, + { + "epoch": 0.14829284453189073, + "grad_norm": 380.94287109375, + "learning_rate": 9.933372937936542e-06, + "loss": 39.921, + "step": 73410 + }, + { + "epoch": 0.14831304516457455, + "grad_norm": 909.1221313476562, + "learning_rate": 9.933316130789084e-06, + "loss": 38.6447, + "step": 73420 + }, + { + "epoch": 0.14833324579725837, + "grad_norm": 382.06170654296875, + "learning_rate": 9.93325929959724e-06, + "loss": 34.3546, + "step": 73430 + }, + { + "epoch": 0.1483534464299422, + "grad_norm": 166.60946655273438, + "learning_rate": 9.933202444361288e-06, + "loss": 18.317, + "step": 73440 + }, + { + "epoch": 0.148373647062626, + "grad_norm": 561.4780883789062, + "learning_rate": 9.933145565081506e-06, + "loss": 21.7824, + "step": 73450 + }, + { + "epoch": 0.1483938476953098, + "grad_norm": 776.0906982421875, + "learning_rate": 9.933088661758172e-06, + "loss": 23.8071, + "step": 73460 + }, + { + "epoch": 0.14841404832799363, + "grad_norm": 535.42431640625, + "learning_rate": 9.933031734391561e-06, + "loss": 24.7226, + "step": 73470 + }, + { + "epoch": 0.14843424896067745, + "grad_norm": 219.21336364746094, + "learning_rate": 9.93297478298195e-06, + "loss": 16.1967, + "step": 73480 + }, + { + "epoch": 0.14845444959336127, + "grad_norm": 315.6730041503906, + "learning_rate": 9.93291780752962e-06, + "loss": 10.5076, + "step": 73490 + }, + { + "epoch": 0.14847465022604509, + "grad_norm": 326.9447937011719, + "learning_rate": 9.932860808034847e-06, + "loss": 22.2159, + "step": 73500 + }, + { + "epoch": 0.1484948508587289, + "grad_norm": 153.1551055908203, + "learning_rate": 9.93280378449791e-06, + "loss": 45.4165, + "step": 73510 + }, + { + "epoch": 0.1485150514914127, + "grad_norm": 309.3044128417969, + "learning_rate": 9.932746736919084e-06, + "loss": 36.2212, + "step": 73520 + }, + { + "epoch": 0.14853525212409652, + "grad_norm": 96.15229034423828, + "learning_rate": 9.93268966529865e-06, + "loss": 27.7137, + "step": 73530 + }, + { + "epoch": 0.14855545275678034, + "grad_norm": 145.47523498535156, + "learning_rate": 9.932632569636882e-06, + "loss": 18.7185, + "step": 73540 + }, + { + "epoch": 0.14857565338946416, + "grad_norm": 754.7882690429688, + "learning_rate": 9.932575449934063e-06, + "loss": 17.8452, + "step": 73550 + }, + { + "epoch": 0.14859585402214798, + "grad_norm": 1249.251953125, + "learning_rate": 9.93251830619047e-06, + "loss": 34.2317, + "step": 73560 + }, + { + "epoch": 0.1486160546548318, + "grad_norm": 577.043701171875, + "learning_rate": 9.93246113840638e-06, + "loss": 31.0139, + "step": 73570 + }, + { + "epoch": 0.14863625528751562, + "grad_norm": 778.6085205078125, + "learning_rate": 9.932403946582071e-06, + "loss": 30.8692, + "step": 73580 + }, + { + "epoch": 0.1486564559201994, + "grad_norm": 703.0812377929688, + "learning_rate": 9.932346730717828e-06, + "loss": 18.4887, + "step": 73590 + }, + { + "epoch": 0.14867665655288323, + "grad_norm": 294.0816345214844, + "learning_rate": 9.932289490813922e-06, + "loss": 26.0219, + "step": 73600 + }, + { + "epoch": 0.14869685718556705, + "grad_norm": 444.4838562011719, + "learning_rate": 9.932232226870635e-06, + "loss": 17.5626, + "step": 73610 + }, + { + "epoch": 0.14871705781825087, + "grad_norm": 185.8114013671875, + "learning_rate": 9.932174938888248e-06, + "loss": 20.0357, + "step": 73620 + }, + { + "epoch": 0.1487372584509347, + "grad_norm": 641.9573974609375, + "learning_rate": 9.932117626867037e-06, + "loss": 20.9796, + "step": 73630 + }, + { + "epoch": 0.1487574590836185, + "grad_norm": 871.9342041015625, + "learning_rate": 9.932060290807283e-06, + "loss": 23.5767, + "step": 73640 + }, + { + "epoch": 0.1487776597163023, + "grad_norm": 996.84326171875, + "learning_rate": 9.932002930709268e-06, + "loss": 25.4256, + "step": 73650 + }, + { + "epoch": 0.14879786034898612, + "grad_norm": 1495.176025390625, + "learning_rate": 9.931945546573266e-06, + "loss": 35.1073, + "step": 73660 + }, + { + "epoch": 0.14881806098166994, + "grad_norm": 597.5657348632812, + "learning_rate": 9.931888138399562e-06, + "loss": 22.791, + "step": 73670 + }, + { + "epoch": 0.14883826161435376, + "grad_norm": 597.6026611328125, + "learning_rate": 9.93183070618843e-06, + "loss": 40.5281, + "step": 73680 + }, + { + "epoch": 0.14885846224703758, + "grad_norm": 557.2459716796875, + "learning_rate": 9.931773249940156e-06, + "loss": 14.6514, + "step": 73690 + }, + { + "epoch": 0.1488786628797214, + "grad_norm": 438.7613525390625, + "learning_rate": 9.931715769655017e-06, + "loss": 26.2284, + "step": 73700 + }, + { + "epoch": 0.14889886351240522, + "grad_norm": 747.023193359375, + "learning_rate": 9.931658265333293e-06, + "loss": 32.2668, + "step": 73710 + }, + { + "epoch": 0.14891906414508901, + "grad_norm": 264.25408935546875, + "learning_rate": 9.931600736975264e-06, + "loss": 23.192, + "step": 73720 + }, + { + "epoch": 0.14893926477777283, + "grad_norm": 806.9119262695312, + "learning_rate": 9.93154318458121e-06, + "loss": 31.4644, + "step": 73730 + }, + { + "epoch": 0.14895946541045665, + "grad_norm": 129.6310272216797, + "learning_rate": 9.931485608151416e-06, + "loss": 30.5778, + "step": 73740 + }, + { + "epoch": 0.14897966604314047, + "grad_norm": 556.3810424804688, + "learning_rate": 9.931428007686158e-06, + "loss": 33.2141, + "step": 73750 + }, + { + "epoch": 0.1489998666758243, + "grad_norm": 291.308349609375, + "learning_rate": 9.931370383185717e-06, + "loss": 22.1474, + "step": 73760 + }, + { + "epoch": 0.14902006730850811, + "grad_norm": 460.9413146972656, + "learning_rate": 9.931312734650376e-06, + "loss": 33.3643, + "step": 73770 + }, + { + "epoch": 0.1490402679411919, + "grad_norm": 124.01321411132812, + "learning_rate": 9.931255062080415e-06, + "loss": 18.3147, + "step": 73780 + }, + { + "epoch": 0.14906046857387573, + "grad_norm": 175.38063049316406, + "learning_rate": 9.931197365476113e-06, + "loss": 14.1293, + "step": 73790 + }, + { + "epoch": 0.14908066920655955, + "grad_norm": 290.3349914550781, + "learning_rate": 9.931139644837755e-06, + "loss": 14.0846, + "step": 73800 + }, + { + "epoch": 0.14910086983924337, + "grad_norm": 833.2294311523438, + "learning_rate": 9.93108190016562e-06, + "loss": 30.1313, + "step": 73810 + }, + { + "epoch": 0.1491210704719272, + "grad_norm": 368.4813537597656, + "learning_rate": 9.93102413145999e-06, + "loss": 18.284, + "step": 73820 + }, + { + "epoch": 0.149141271104611, + "grad_norm": 315.0820007324219, + "learning_rate": 9.930966338721146e-06, + "loss": 32.4391, + "step": 73830 + }, + { + "epoch": 0.1491614717372948, + "grad_norm": 446.75750732421875, + "learning_rate": 9.930908521949371e-06, + "loss": 18.0605, + "step": 73840 + }, + { + "epoch": 0.14918167236997862, + "grad_norm": 471.6275634765625, + "learning_rate": 9.930850681144946e-06, + "loss": 22.9185, + "step": 73850 + }, + { + "epoch": 0.14920187300266244, + "grad_norm": 325.270263671875, + "learning_rate": 9.930792816308151e-06, + "loss": 29.7445, + "step": 73860 + }, + { + "epoch": 0.14922207363534626, + "grad_norm": 479.4029235839844, + "learning_rate": 9.930734927439272e-06, + "loss": 24.8208, + "step": 73870 + }, + { + "epoch": 0.14924227426803008, + "grad_norm": 184.0373992919922, + "learning_rate": 9.930677014538587e-06, + "loss": 31.5938, + "step": 73880 + }, + { + "epoch": 0.1492624749007139, + "grad_norm": 830.44921875, + "learning_rate": 9.93061907760638e-06, + "loss": 22.3806, + "step": 73890 + }, + { + "epoch": 0.14928267553339772, + "grad_norm": 287.38494873046875, + "learning_rate": 9.930561116642936e-06, + "loss": 33.4672, + "step": 73900 + }, + { + "epoch": 0.1493028761660815, + "grad_norm": 918.8916015625, + "learning_rate": 9.930503131648535e-06, + "loss": 46.5241, + "step": 73910 + }, + { + "epoch": 0.14932307679876533, + "grad_norm": 1375.9710693359375, + "learning_rate": 9.930445122623458e-06, + "loss": 19.428, + "step": 73920 + }, + { + "epoch": 0.14934327743144915, + "grad_norm": 59.05356979370117, + "learning_rate": 9.93038708956799e-06, + "loss": 31.6225, + "step": 73930 + }, + { + "epoch": 0.14936347806413297, + "grad_norm": 588.1260986328125, + "learning_rate": 9.930329032482412e-06, + "loss": 33.2808, + "step": 73940 + }, + { + "epoch": 0.1493836786968168, + "grad_norm": 764.7052001953125, + "learning_rate": 9.930270951367012e-06, + "loss": 36.3644, + "step": 73950 + }, + { + "epoch": 0.1494038793295006, + "grad_norm": 249.54364013671875, + "learning_rate": 9.930212846222065e-06, + "loss": 19.8232, + "step": 73960 + }, + { + "epoch": 0.1494240799621844, + "grad_norm": 973.8547973632812, + "learning_rate": 9.930154717047862e-06, + "loss": 22.1961, + "step": 73970 + }, + { + "epoch": 0.14944428059486822, + "grad_norm": 854.751220703125, + "learning_rate": 9.930096563844682e-06, + "loss": 57.2152, + "step": 73980 + }, + { + "epoch": 0.14946448122755204, + "grad_norm": 548.9900512695312, + "learning_rate": 9.930038386612809e-06, + "loss": 26.7725, + "step": 73990 + }, + { + "epoch": 0.14948468186023586, + "grad_norm": 520.6438598632812, + "learning_rate": 9.929980185352525e-06, + "loss": 21.2265, + "step": 74000 + }, + { + "epoch": 0.14950488249291968, + "grad_norm": 46.74055480957031, + "learning_rate": 9.929921960064117e-06, + "loss": 15.4381, + "step": 74010 + }, + { + "epoch": 0.1495250831256035, + "grad_norm": 67.48046875, + "learning_rate": 9.929863710747869e-06, + "loss": 34.7771, + "step": 74020 + }, + { + "epoch": 0.14954528375828732, + "grad_norm": 496.70318603515625, + "learning_rate": 9.929805437404061e-06, + "loss": 22.7784, + "step": 74030 + }, + { + "epoch": 0.14956548439097112, + "grad_norm": 363.4870910644531, + "learning_rate": 9.929747140032979e-06, + "loss": 30.567, + "step": 74040 + }, + { + "epoch": 0.14958568502365494, + "grad_norm": 459.33447265625, + "learning_rate": 9.929688818634909e-06, + "loss": 27.3487, + "step": 74050 + }, + { + "epoch": 0.14960588565633876, + "grad_norm": 586.962646484375, + "learning_rate": 9.929630473210132e-06, + "loss": 31.9464, + "step": 74060 + }, + { + "epoch": 0.14962608628902258, + "grad_norm": 106.21631622314453, + "learning_rate": 9.929572103758935e-06, + "loss": 14.6552, + "step": 74070 + }, + { + "epoch": 0.1496462869217064, + "grad_norm": 579.7979125976562, + "learning_rate": 9.929513710281602e-06, + "loss": 25.5261, + "step": 74080 + }, + { + "epoch": 0.14966648755439022, + "grad_norm": 310.973876953125, + "learning_rate": 9.929455292778416e-06, + "loss": 16.3713, + "step": 74090 + }, + { + "epoch": 0.149686688187074, + "grad_norm": 586.092529296875, + "learning_rate": 9.929396851249661e-06, + "loss": 32.1962, + "step": 74100 + }, + { + "epoch": 0.14970688881975783, + "grad_norm": 509.2660217285156, + "learning_rate": 9.929338385695626e-06, + "loss": 38.1233, + "step": 74110 + }, + { + "epoch": 0.14972708945244165, + "grad_norm": 702.5281372070312, + "learning_rate": 9.929279896116595e-06, + "loss": 22.2846, + "step": 74120 + }, + { + "epoch": 0.14974729008512547, + "grad_norm": 1263.78515625, + "learning_rate": 9.92922138251285e-06, + "loss": 27.9118, + "step": 74130 + }, + { + "epoch": 0.1497674907178093, + "grad_norm": 309.19757080078125, + "learning_rate": 9.929162844884676e-06, + "loss": 23.4226, + "step": 74140 + }, + { + "epoch": 0.1497876913504931, + "grad_norm": 427.94464111328125, + "learning_rate": 9.929104283232363e-06, + "loss": 19.4286, + "step": 74150 + }, + { + "epoch": 0.1498078919831769, + "grad_norm": 336.3860778808594, + "learning_rate": 9.929045697556192e-06, + "loss": 17.9172, + "step": 74160 + }, + { + "epoch": 0.14982809261586072, + "grad_norm": 173.4564666748047, + "learning_rate": 9.92898708785645e-06, + "loss": 14.8733, + "step": 74170 + }, + { + "epoch": 0.14984829324854454, + "grad_norm": 299.27276611328125, + "learning_rate": 9.928928454133424e-06, + "loss": 11.8068, + "step": 74180 + }, + { + "epoch": 0.14986849388122836, + "grad_norm": 350.9851989746094, + "learning_rate": 9.928869796387396e-06, + "loss": 20.769, + "step": 74190 + }, + { + "epoch": 0.14988869451391218, + "grad_norm": 254.02316284179688, + "learning_rate": 9.928811114618658e-06, + "loss": 44.699, + "step": 74200 + }, + { + "epoch": 0.149908895146596, + "grad_norm": 1146.9010009765625, + "learning_rate": 9.92875240882749e-06, + "loss": 18.2647, + "step": 74210 + }, + { + "epoch": 0.14992909577927982, + "grad_norm": 595.7329711914062, + "learning_rate": 9.92869367901418e-06, + "loss": 28.4935, + "step": 74220 + }, + { + "epoch": 0.1499492964119636, + "grad_norm": 61.0308837890625, + "learning_rate": 9.928634925179018e-06, + "loss": 26.1481, + "step": 74230 + }, + { + "epoch": 0.14996949704464743, + "grad_norm": 545.187255859375, + "learning_rate": 9.928576147322283e-06, + "loss": 26.7562, + "step": 74240 + }, + { + "epoch": 0.14998969767733125, + "grad_norm": 535.875, + "learning_rate": 9.92851734544427e-06, + "loss": 44.9036, + "step": 74250 + }, + { + "epoch": 0.15000989831001507, + "grad_norm": 366.1502380371094, + "learning_rate": 9.928458519545258e-06, + "loss": 26.5014, + "step": 74260 + }, + { + "epoch": 0.1500300989426989, + "grad_norm": 377.4646301269531, + "learning_rate": 9.928399669625537e-06, + "loss": 19.756, + "step": 74270 + }, + { + "epoch": 0.1500502995753827, + "grad_norm": 847.5916748046875, + "learning_rate": 9.928340795685396e-06, + "loss": 45.6313, + "step": 74280 + }, + { + "epoch": 0.1500705002080665, + "grad_norm": 234.44790649414062, + "learning_rate": 9.928281897725117e-06, + "loss": 29.406, + "step": 74290 + }, + { + "epoch": 0.15009070084075032, + "grad_norm": 174.0032958984375, + "learning_rate": 9.928222975744992e-06, + "loss": 24.4992, + "step": 74300 + }, + { + "epoch": 0.15011090147343414, + "grad_norm": 289.6244812011719, + "learning_rate": 9.928164029745304e-06, + "loss": 21.2528, + "step": 74310 + }, + { + "epoch": 0.15013110210611796, + "grad_norm": 644.0867309570312, + "learning_rate": 9.928105059726342e-06, + "loss": 33.9758, + "step": 74320 + }, + { + "epoch": 0.15015130273880178, + "grad_norm": 418.8067932128906, + "learning_rate": 9.928046065688396e-06, + "loss": 25.9401, + "step": 74330 + }, + { + "epoch": 0.1501715033714856, + "grad_norm": 609.118896484375, + "learning_rate": 9.927987047631749e-06, + "loss": 20.8019, + "step": 74340 + }, + { + "epoch": 0.15019170400416942, + "grad_norm": 737.2074584960938, + "learning_rate": 9.927928005556692e-06, + "loss": 25.2103, + "step": 74350 + }, + { + "epoch": 0.15021190463685322, + "grad_norm": 372.0144348144531, + "learning_rate": 9.927868939463511e-06, + "loss": 12.7893, + "step": 74360 + }, + { + "epoch": 0.15023210526953704, + "grad_norm": 196.4942626953125, + "learning_rate": 9.927809849352496e-06, + "loss": 27.7522, + "step": 74370 + }, + { + "epoch": 0.15025230590222086, + "grad_norm": 516.6409301757812, + "learning_rate": 9.927750735223932e-06, + "loss": 31.0842, + "step": 74380 + }, + { + "epoch": 0.15027250653490468, + "grad_norm": 370.11346435546875, + "learning_rate": 9.927691597078109e-06, + "loss": 29.7255, + "step": 74390 + }, + { + "epoch": 0.1502927071675885, + "grad_norm": 707.3624267578125, + "learning_rate": 9.927632434915315e-06, + "loss": 38.3532, + "step": 74400 + }, + { + "epoch": 0.15031290780027232, + "grad_norm": 684.619384765625, + "learning_rate": 9.927573248735839e-06, + "loss": 21.7652, + "step": 74410 + }, + { + "epoch": 0.1503331084329561, + "grad_norm": 706.516845703125, + "learning_rate": 9.927514038539966e-06, + "loss": 42.4786, + "step": 74420 + }, + { + "epoch": 0.15035330906563993, + "grad_norm": 441.7760009765625, + "learning_rate": 9.927454804327989e-06, + "loss": 34.0491, + "step": 74430 + }, + { + "epoch": 0.15037350969832375, + "grad_norm": 835.3612060546875, + "learning_rate": 9.927395546100195e-06, + "loss": 28.9184, + "step": 74440 + }, + { + "epoch": 0.15039371033100757, + "grad_norm": 351.7720947265625, + "learning_rate": 9.927336263856873e-06, + "loss": 19.5311, + "step": 74450 + }, + { + "epoch": 0.1504139109636914, + "grad_norm": 395.46429443359375, + "learning_rate": 9.92727695759831e-06, + "loss": 18.5734, + "step": 74460 + }, + { + "epoch": 0.1504341115963752, + "grad_norm": 179.93319702148438, + "learning_rate": 9.927217627324798e-06, + "loss": 19.2748, + "step": 74470 + }, + { + "epoch": 0.150454312229059, + "grad_norm": 140.97605895996094, + "learning_rate": 9.927158273036624e-06, + "loss": 12.4965, + "step": 74480 + }, + { + "epoch": 0.15047451286174282, + "grad_norm": 530.7149658203125, + "learning_rate": 9.92709889473408e-06, + "loss": 36.4391, + "step": 74490 + }, + { + "epoch": 0.15049471349442664, + "grad_norm": 322.33831787109375, + "learning_rate": 9.927039492417452e-06, + "loss": 27.7319, + "step": 74500 + }, + { + "epoch": 0.15051491412711046, + "grad_norm": 355.8292236328125, + "learning_rate": 9.92698006608703e-06, + "loss": 22.9936, + "step": 74510 + }, + { + "epoch": 0.15053511475979428, + "grad_norm": 345.5832214355469, + "learning_rate": 9.926920615743108e-06, + "loss": 29.3384, + "step": 74520 + }, + { + "epoch": 0.1505553153924781, + "grad_norm": 1088.1964111328125, + "learning_rate": 9.92686114138597e-06, + "loss": 41.6574, + "step": 74530 + }, + { + "epoch": 0.15057551602516192, + "grad_norm": 448.7097473144531, + "learning_rate": 9.926801643015908e-06, + "loss": 30.9739, + "step": 74540 + }, + { + "epoch": 0.1505957166578457, + "grad_norm": 222.37721252441406, + "learning_rate": 9.926742120633215e-06, + "loss": 20.0155, + "step": 74550 + }, + { + "epoch": 0.15061591729052953, + "grad_norm": 391.6602783203125, + "learning_rate": 9.926682574238175e-06, + "loss": 35.0277, + "step": 74560 + }, + { + "epoch": 0.15063611792321335, + "grad_norm": 212.23800659179688, + "learning_rate": 9.926623003831085e-06, + "loss": 24.9151, + "step": 74570 + }, + { + "epoch": 0.15065631855589717, + "grad_norm": 699.8584594726562, + "learning_rate": 9.92656340941223e-06, + "loss": 46.0956, + "step": 74580 + }, + { + "epoch": 0.150676519188581, + "grad_norm": 381.85382080078125, + "learning_rate": 9.926503790981903e-06, + "loss": 16.9223, + "step": 74590 + }, + { + "epoch": 0.1506967198212648, + "grad_norm": 648.5833129882812, + "learning_rate": 9.926444148540394e-06, + "loss": 18.0826, + "step": 74600 + }, + { + "epoch": 0.1507169204539486, + "grad_norm": 681.515869140625, + "learning_rate": 9.926384482087994e-06, + "loss": 19.9235, + "step": 74610 + }, + { + "epoch": 0.15073712108663243, + "grad_norm": 418.3777160644531, + "learning_rate": 9.926324791624993e-06, + "loss": 18.0033, + "step": 74620 + }, + { + "epoch": 0.15075732171931625, + "grad_norm": 308.60699462890625, + "learning_rate": 9.926265077151682e-06, + "loss": 10.9382, + "step": 74630 + }, + { + "epoch": 0.15077752235200007, + "grad_norm": 316.04754638671875, + "learning_rate": 9.926205338668353e-06, + "loss": 18.1846, + "step": 74640 + }, + { + "epoch": 0.15079772298468389, + "grad_norm": 419.26593017578125, + "learning_rate": 9.926145576175297e-06, + "loss": 39.5158, + "step": 74650 + }, + { + "epoch": 0.1508179236173677, + "grad_norm": 415.845947265625, + "learning_rate": 9.926085789672806e-06, + "loss": 31.3318, + "step": 74660 + }, + { + "epoch": 0.15083812425005153, + "grad_norm": 338.5599365234375, + "learning_rate": 9.926025979161169e-06, + "loss": 15.7716, + "step": 74670 + }, + { + "epoch": 0.15085832488273532, + "grad_norm": 396.2326965332031, + "learning_rate": 9.925966144640677e-06, + "loss": 17.2184, + "step": 74680 + }, + { + "epoch": 0.15087852551541914, + "grad_norm": 300.29656982421875, + "learning_rate": 9.925906286111627e-06, + "loss": 13.1868, + "step": 74690 + }, + { + "epoch": 0.15089872614810296, + "grad_norm": 389.8894348144531, + "learning_rate": 9.925846403574306e-06, + "loss": 29.3188, + "step": 74700 + }, + { + "epoch": 0.15091892678078678, + "grad_norm": 262.18408203125, + "learning_rate": 9.925786497029007e-06, + "loss": 11.778, + "step": 74710 + }, + { + "epoch": 0.1509391274134706, + "grad_norm": 382.22821044921875, + "learning_rate": 9.925726566476021e-06, + "loss": 34.0867, + "step": 74720 + }, + { + "epoch": 0.15095932804615442, + "grad_norm": 244.11781311035156, + "learning_rate": 9.925666611915642e-06, + "loss": 38.6311, + "step": 74730 + }, + { + "epoch": 0.1509795286788382, + "grad_norm": 415.68048095703125, + "learning_rate": 9.925606633348161e-06, + "loss": 15.0911, + "step": 74740 + }, + { + "epoch": 0.15099972931152203, + "grad_norm": 376.7303771972656, + "learning_rate": 9.92554663077387e-06, + "loss": 21.868, + "step": 74750 + }, + { + "epoch": 0.15101992994420585, + "grad_norm": 1667.0445556640625, + "learning_rate": 9.925486604193064e-06, + "loss": 34.2437, + "step": 74760 + }, + { + "epoch": 0.15104013057688967, + "grad_norm": 507.23333740234375, + "learning_rate": 9.925426553606033e-06, + "loss": 22.3991, + "step": 74770 + }, + { + "epoch": 0.1510603312095735, + "grad_norm": 404.0858154296875, + "learning_rate": 9.92536647901307e-06, + "loss": 20.0867, + "step": 74780 + }, + { + "epoch": 0.1510805318422573, + "grad_norm": 406.8843994140625, + "learning_rate": 9.925306380414468e-06, + "loss": 27.0763, + "step": 74790 + }, + { + "epoch": 0.1511007324749411, + "grad_norm": 584.16796875, + "learning_rate": 9.925246257810519e-06, + "loss": 24.6141, + "step": 74800 + }, + { + "epoch": 0.15112093310762492, + "grad_norm": 369.7292175292969, + "learning_rate": 9.925186111201519e-06, + "loss": 28.5047, + "step": 74810 + }, + { + "epoch": 0.15114113374030874, + "grad_norm": 138.77566528320312, + "learning_rate": 9.92512594058776e-06, + "loss": 18.0816, + "step": 74820 + }, + { + "epoch": 0.15116133437299256, + "grad_norm": 340.2200012207031, + "learning_rate": 9.925065745969531e-06, + "loss": 39.3574, + "step": 74830 + }, + { + "epoch": 0.15118153500567638, + "grad_norm": 531.7938842773438, + "learning_rate": 9.925005527347132e-06, + "loss": 21.366, + "step": 74840 + }, + { + "epoch": 0.1512017356383602, + "grad_norm": 245.169921875, + "learning_rate": 9.924945284720852e-06, + "loss": 13.2756, + "step": 74850 + }, + { + "epoch": 0.15122193627104402, + "grad_norm": 468.15594482421875, + "learning_rate": 9.924885018090987e-06, + "loss": 24.177, + "step": 74860 + }, + { + "epoch": 0.15124213690372781, + "grad_norm": 570.9451293945312, + "learning_rate": 9.924824727457829e-06, + "loss": 15.2152, + "step": 74870 + }, + { + "epoch": 0.15126233753641163, + "grad_norm": 418.68328857421875, + "learning_rate": 9.924764412821673e-06, + "loss": 27.3847, + "step": 74880 + }, + { + "epoch": 0.15128253816909545, + "grad_norm": 413.4746398925781, + "learning_rate": 9.924704074182811e-06, + "loss": 33.3847, + "step": 74890 + }, + { + "epoch": 0.15130273880177927, + "grad_norm": 563.0431518554688, + "learning_rate": 9.92464371154154e-06, + "loss": 9.7395, + "step": 74900 + }, + { + "epoch": 0.1513229394344631, + "grad_norm": 581.7094116210938, + "learning_rate": 9.924583324898152e-06, + "loss": 26.6438, + "step": 74910 + }, + { + "epoch": 0.15134314006714691, + "grad_norm": 756.6255493164062, + "learning_rate": 9.924522914252943e-06, + "loss": 39.0042, + "step": 74920 + }, + { + "epoch": 0.1513633406998307, + "grad_norm": 233.01658630371094, + "learning_rate": 9.924462479606207e-06, + "loss": 20.5177, + "step": 74930 + }, + { + "epoch": 0.15138354133251453, + "grad_norm": 268.5835876464844, + "learning_rate": 9.924402020958238e-06, + "loss": 15.2923, + "step": 74940 + }, + { + "epoch": 0.15140374196519835, + "grad_norm": 182.87258911132812, + "learning_rate": 9.92434153830933e-06, + "loss": 11.0032, + "step": 74950 + }, + { + "epoch": 0.15142394259788217, + "grad_norm": 632.199951171875, + "learning_rate": 9.92428103165978e-06, + "loss": 23.4439, + "step": 74960 + }, + { + "epoch": 0.151444143230566, + "grad_norm": 280.0271911621094, + "learning_rate": 9.92422050100988e-06, + "loss": 16.7607, + "step": 74970 + }, + { + "epoch": 0.1514643438632498, + "grad_norm": 266.1084899902344, + "learning_rate": 9.924159946359927e-06, + "loss": 22.0468, + "step": 74980 + }, + { + "epoch": 0.15148454449593363, + "grad_norm": 336.4967956542969, + "learning_rate": 9.924099367710215e-06, + "loss": 28.806, + "step": 74990 + }, + { + "epoch": 0.15150474512861742, + "grad_norm": 474.88555908203125, + "learning_rate": 9.924038765061042e-06, + "loss": 39.5052, + "step": 75000 + }, + { + "epoch": 0.15152494576130124, + "grad_norm": 489.7330627441406, + "learning_rate": 9.923978138412698e-06, + "loss": 16.7885, + "step": 75010 + }, + { + "epoch": 0.15154514639398506, + "grad_norm": 597.09130859375, + "learning_rate": 9.923917487765484e-06, + "loss": 19.9639, + "step": 75020 + }, + { + "epoch": 0.15156534702666888, + "grad_norm": 377.1841125488281, + "learning_rate": 9.923856813119694e-06, + "loss": 26.6894, + "step": 75030 + }, + { + "epoch": 0.1515855476593527, + "grad_norm": 311.2860107421875, + "learning_rate": 9.92379611447562e-06, + "loss": 26.1113, + "step": 75040 + }, + { + "epoch": 0.15160574829203652, + "grad_norm": 455.06207275390625, + "learning_rate": 9.923735391833564e-06, + "loss": 28.5179, + "step": 75050 + }, + { + "epoch": 0.1516259489247203, + "grad_norm": 607.7901611328125, + "learning_rate": 9.923674645193819e-06, + "loss": 22.8817, + "step": 75060 + }, + { + "epoch": 0.15164614955740413, + "grad_norm": 356.8912353515625, + "learning_rate": 9.92361387455668e-06, + "loss": 29.9606, + "step": 75070 + }, + { + "epoch": 0.15166635019008795, + "grad_norm": 312.0567321777344, + "learning_rate": 9.923553079922443e-06, + "loss": 16.3842, + "step": 75080 + }, + { + "epoch": 0.15168655082277177, + "grad_norm": 487.7376708984375, + "learning_rate": 9.923492261291406e-06, + "loss": 15.1316, + "step": 75090 + }, + { + "epoch": 0.1517067514554556, + "grad_norm": 1110.7755126953125, + "learning_rate": 9.923431418663866e-06, + "loss": 31.4992, + "step": 75100 + }, + { + "epoch": 0.1517269520881394, + "grad_norm": 140.07437133789062, + "learning_rate": 9.923370552040117e-06, + "loss": 18.2737, + "step": 75110 + }, + { + "epoch": 0.1517471527208232, + "grad_norm": 594.5069580078125, + "learning_rate": 9.923309661420458e-06, + "loss": 30.8502, + "step": 75120 + }, + { + "epoch": 0.15176735335350702, + "grad_norm": 526.073486328125, + "learning_rate": 9.923248746805185e-06, + "loss": 17.2189, + "step": 75130 + }, + { + "epoch": 0.15178755398619084, + "grad_norm": 359.8941955566406, + "learning_rate": 9.923187808194594e-06, + "loss": 32.6762, + "step": 75140 + }, + { + "epoch": 0.15180775461887466, + "grad_norm": 660.1754760742188, + "learning_rate": 9.923126845588982e-06, + "loss": 25.885, + "step": 75150 + }, + { + "epoch": 0.15182795525155848, + "grad_norm": 89.28858947753906, + "learning_rate": 9.92306585898865e-06, + "loss": 16.3939, + "step": 75160 + }, + { + "epoch": 0.1518481558842423, + "grad_norm": 153.34735107421875, + "learning_rate": 9.92300484839389e-06, + "loss": 18.2618, + "step": 75170 + }, + { + "epoch": 0.15186835651692612, + "grad_norm": 660.1924438476562, + "learning_rate": 9.922943813805e-06, + "loss": 18.8986, + "step": 75180 + }, + { + "epoch": 0.15188855714960992, + "grad_norm": 723.8023681640625, + "learning_rate": 9.92288275522228e-06, + "loss": 20.5952, + "step": 75190 + }, + { + "epoch": 0.15190875778229374, + "grad_norm": 915.8523559570312, + "learning_rate": 9.922821672646028e-06, + "loss": 43.1564, + "step": 75200 + }, + { + "epoch": 0.15192895841497756, + "grad_norm": 453.2025146484375, + "learning_rate": 9.922760566076538e-06, + "loss": 13.1289, + "step": 75210 + }, + { + "epoch": 0.15194915904766138, + "grad_norm": 555.0697021484375, + "learning_rate": 9.922699435514112e-06, + "loss": 19.823, + "step": 75220 + }, + { + "epoch": 0.1519693596803452, + "grad_norm": 1103.8321533203125, + "learning_rate": 9.922638280959044e-06, + "loss": 31.5298, + "step": 75230 + }, + { + "epoch": 0.15198956031302902, + "grad_norm": 458.439208984375, + "learning_rate": 9.922577102411638e-06, + "loss": 13.7083, + "step": 75240 + }, + { + "epoch": 0.1520097609457128, + "grad_norm": 1403.2596435546875, + "learning_rate": 9.922515899872184e-06, + "loss": 31.9748, + "step": 75250 + }, + { + "epoch": 0.15202996157839663, + "grad_norm": 774.9822387695312, + "learning_rate": 9.922454673340987e-06, + "loss": 38.4883, + "step": 75260 + }, + { + "epoch": 0.15205016221108045, + "grad_norm": 397.8044128417969, + "learning_rate": 9.922393422818342e-06, + "loss": 29.8237, + "step": 75270 + }, + { + "epoch": 0.15207036284376427, + "grad_norm": 441.2273864746094, + "learning_rate": 9.922332148304548e-06, + "loss": 36.652, + "step": 75280 + }, + { + "epoch": 0.1520905634764481, + "grad_norm": 610.16064453125, + "learning_rate": 9.922270849799903e-06, + "loss": 30.548, + "step": 75290 + }, + { + "epoch": 0.1521107641091319, + "grad_norm": 347.32177734375, + "learning_rate": 9.922209527304709e-06, + "loss": 27.3734, + "step": 75300 + }, + { + "epoch": 0.15213096474181573, + "grad_norm": 383.2518005371094, + "learning_rate": 9.922148180819261e-06, + "loss": 10.6325, + "step": 75310 + }, + { + "epoch": 0.15215116537449952, + "grad_norm": 634.7061767578125, + "learning_rate": 9.922086810343862e-06, + "loss": 30.0669, + "step": 75320 + }, + { + "epoch": 0.15217136600718334, + "grad_norm": 825.0751342773438, + "learning_rate": 9.922025415878809e-06, + "loss": 19.7906, + "step": 75330 + }, + { + "epoch": 0.15219156663986716, + "grad_norm": 240.3961944580078, + "learning_rate": 9.9219639974244e-06, + "loss": 19.2792, + "step": 75340 + }, + { + "epoch": 0.15221176727255098, + "grad_norm": 708.1145629882812, + "learning_rate": 9.921902554980935e-06, + "loss": 31.2332, + "step": 75350 + }, + { + "epoch": 0.1522319679052348, + "grad_norm": 205.85398864746094, + "learning_rate": 9.921841088548713e-06, + "loss": 28.1157, + "step": 75360 + }, + { + "epoch": 0.15225216853791862, + "grad_norm": 699.2012939453125, + "learning_rate": 9.921779598128036e-06, + "loss": 16.0434, + "step": 75370 + }, + { + "epoch": 0.1522723691706024, + "grad_norm": 511.19268798828125, + "learning_rate": 9.921718083719203e-06, + "loss": 15.3861, + "step": 75380 + }, + { + "epoch": 0.15229256980328623, + "grad_norm": 991.4080200195312, + "learning_rate": 9.921656545322512e-06, + "loss": 29.8642, + "step": 75390 + }, + { + "epoch": 0.15231277043597005, + "grad_norm": 498.2525634765625, + "learning_rate": 9.921594982938262e-06, + "loss": 36.8228, + "step": 75400 + }, + { + "epoch": 0.15233297106865387, + "grad_norm": 452.8070068359375, + "learning_rate": 9.921533396566758e-06, + "loss": 24.1474, + "step": 75410 + }, + { + "epoch": 0.1523531717013377, + "grad_norm": 188.76637268066406, + "learning_rate": 9.921471786208296e-06, + "loss": 22.8066, + "step": 75420 + }, + { + "epoch": 0.1523733723340215, + "grad_norm": 500.168212890625, + "learning_rate": 9.921410151863177e-06, + "loss": 26.2489, + "step": 75430 + }, + { + "epoch": 0.1523935729667053, + "grad_norm": 485.8915710449219, + "learning_rate": 9.921348493531701e-06, + "loss": 24.2653, + "step": 75440 + }, + { + "epoch": 0.15241377359938912, + "grad_norm": 66.25651550292969, + "learning_rate": 9.921286811214173e-06, + "loss": 11.0651, + "step": 75450 + }, + { + "epoch": 0.15243397423207294, + "grad_norm": 338.6200256347656, + "learning_rate": 9.921225104910886e-06, + "loss": 13.1423, + "step": 75460 + }, + { + "epoch": 0.15245417486475676, + "grad_norm": 302.8039245605469, + "learning_rate": 9.921163374622147e-06, + "loss": 11.354, + "step": 75470 + }, + { + "epoch": 0.15247437549744058, + "grad_norm": 645.382080078125, + "learning_rate": 9.921101620348252e-06, + "loss": 29.3993, + "step": 75480 + }, + { + "epoch": 0.1524945761301244, + "grad_norm": 650.5449829101562, + "learning_rate": 9.921039842089508e-06, + "loss": 29.5309, + "step": 75490 + }, + { + "epoch": 0.15251477676280822, + "grad_norm": 434.36199951171875, + "learning_rate": 9.92097803984621e-06, + "loss": 8.5805, + "step": 75500 + }, + { + "epoch": 0.15253497739549202, + "grad_norm": 535.7971801757812, + "learning_rate": 9.920916213618664e-06, + "loss": 27.8635, + "step": 75510 + }, + { + "epoch": 0.15255517802817584, + "grad_norm": 709.4111328125, + "learning_rate": 9.920854363407168e-06, + "loss": 36.1847, + "step": 75520 + }, + { + "epoch": 0.15257537866085966, + "grad_norm": 454.9409484863281, + "learning_rate": 9.920792489212023e-06, + "loss": 35.0518, + "step": 75530 + }, + { + "epoch": 0.15259557929354348, + "grad_norm": 222.44839477539062, + "learning_rate": 9.920730591033534e-06, + "loss": 29.2773, + "step": 75540 + }, + { + "epoch": 0.1526157799262273, + "grad_norm": 240.63441467285156, + "learning_rate": 9.920668668872002e-06, + "loss": 21.3169, + "step": 75550 + }, + { + "epoch": 0.15263598055891112, + "grad_norm": 293.8035583496094, + "learning_rate": 9.920606722727726e-06, + "loss": 20.815, + "step": 75560 + }, + { + "epoch": 0.1526561811915949, + "grad_norm": 701.8116455078125, + "learning_rate": 9.920544752601011e-06, + "loss": 25.9134, + "step": 75570 + }, + { + "epoch": 0.15267638182427873, + "grad_norm": 493.2641906738281, + "learning_rate": 9.920482758492156e-06, + "loss": 19.1189, + "step": 75580 + }, + { + "epoch": 0.15269658245696255, + "grad_norm": 305.3404235839844, + "learning_rate": 9.920420740401466e-06, + "loss": 19.7217, + "step": 75590 + }, + { + "epoch": 0.15271678308964637, + "grad_norm": 363.524658203125, + "learning_rate": 9.920358698329242e-06, + "loss": 16.3366, + "step": 75600 + }, + { + "epoch": 0.1527369837223302, + "grad_norm": 82.39474487304688, + "learning_rate": 9.920296632275785e-06, + "loss": 18.9301, + "step": 75610 + }, + { + "epoch": 0.152757184355014, + "grad_norm": 452.2556457519531, + "learning_rate": 9.9202345422414e-06, + "loss": 26.0476, + "step": 75620 + }, + { + "epoch": 0.15277738498769783, + "grad_norm": 23.63207244873047, + "learning_rate": 9.92017242822639e-06, + "loss": 12.7449, + "step": 75630 + }, + { + "epoch": 0.15279758562038162, + "grad_norm": 771.0640869140625, + "learning_rate": 9.920110290231056e-06, + "loss": 43.8966, + "step": 75640 + }, + { + "epoch": 0.15281778625306544, + "grad_norm": 504.03155517578125, + "learning_rate": 9.920048128255699e-06, + "loss": 29.4398, + "step": 75650 + }, + { + "epoch": 0.15283798688574926, + "grad_norm": 937.1925048828125, + "learning_rate": 9.919985942300625e-06, + "loss": 27.1426, + "step": 75660 + }, + { + "epoch": 0.15285818751843308, + "grad_norm": 121.6279067993164, + "learning_rate": 9.919923732366137e-06, + "loss": 42.1627, + "step": 75670 + }, + { + "epoch": 0.1528783881511169, + "grad_norm": 428.188232421875, + "learning_rate": 9.919861498452538e-06, + "loss": 32.3342, + "step": 75680 + }, + { + "epoch": 0.15289858878380072, + "grad_norm": 225.9127197265625, + "learning_rate": 9.91979924056013e-06, + "loss": 24.5659, + "step": 75690 + }, + { + "epoch": 0.1529187894164845, + "grad_norm": 1178.7598876953125, + "learning_rate": 9.919736958689216e-06, + "loss": 31.9912, + "step": 75700 + }, + { + "epoch": 0.15293899004916833, + "grad_norm": 442.5712585449219, + "learning_rate": 9.919674652840103e-06, + "loss": 21.1682, + "step": 75710 + }, + { + "epoch": 0.15295919068185215, + "grad_norm": 509.2165832519531, + "learning_rate": 9.91961232301309e-06, + "loss": 15.8996, + "step": 75720 + }, + { + "epoch": 0.15297939131453597, + "grad_norm": 304.9468994140625, + "learning_rate": 9.919549969208486e-06, + "loss": 23.0404, + "step": 75730 + }, + { + "epoch": 0.1529995919472198, + "grad_norm": 488.9942626953125, + "learning_rate": 9.919487591426591e-06, + "loss": 15.6335, + "step": 75740 + }, + { + "epoch": 0.1530197925799036, + "grad_norm": 564.2071533203125, + "learning_rate": 9.91942518966771e-06, + "loss": 35.4118, + "step": 75750 + }, + { + "epoch": 0.1530399932125874, + "grad_norm": 462.4146728515625, + "learning_rate": 9.919362763932145e-06, + "loss": 27.5123, + "step": 75760 + }, + { + "epoch": 0.15306019384527123, + "grad_norm": 478.4244384765625, + "learning_rate": 9.919300314220206e-06, + "loss": 18.8904, + "step": 75770 + }, + { + "epoch": 0.15308039447795505, + "grad_norm": 644.9933471679688, + "learning_rate": 9.919237840532192e-06, + "loss": 17.6179, + "step": 75780 + }, + { + "epoch": 0.15310059511063887, + "grad_norm": 502.4374084472656, + "learning_rate": 9.91917534286841e-06, + "loss": 17.1634, + "step": 75790 + }, + { + "epoch": 0.15312079574332269, + "grad_norm": 602.4114990234375, + "learning_rate": 9.919112821229165e-06, + "loss": 16.3323, + "step": 75800 + }, + { + "epoch": 0.1531409963760065, + "grad_norm": 299.54547119140625, + "learning_rate": 9.91905027561476e-06, + "loss": 401.1151, + "step": 75810 + }, + { + "epoch": 0.15316119700869033, + "grad_norm": 134.6005859375, + "learning_rate": 9.918987706025498e-06, + "loss": 17.0332, + "step": 75820 + }, + { + "epoch": 0.15318139764137412, + "grad_norm": 676.94970703125, + "learning_rate": 9.918925112461688e-06, + "loss": 138.785, + "step": 75830 + }, + { + "epoch": 0.15320159827405794, + "grad_norm": 81.3394775390625, + "learning_rate": 9.918862494923635e-06, + "loss": 17.716, + "step": 75840 + }, + { + "epoch": 0.15322179890674176, + "grad_norm": 814.876953125, + "learning_rate": 9.918799853411642e-06, + "loss": 137.1458, + "step": 75850 + }, + { + "epoch": 0.15324199953942558, + "grad_norm": 340.6568908691406, + "learning_rate": 9.918737187926014e-06, + "loss": 325.2914, + "step": 75860 + }, + { + "epoch": 0.1532622001721094, + "grad_norm": 693.6303100585938, + "learning_rate": 9.91867449846706e-06, + "loss": 172.8194, + "step": 75870 + }, + { + "epoch": 0.15328240080479322, + "grad_norm": 308.6283874511719, + "learning_rate": 9.91861178503508e-06, + "loss": 35.7975, + "step": 75880 + }, + { + "epoch": 0.153302601437477, + "grad_norm": 1901.94873046875, + "learning_rate": 9.918549047630386e-06, + "loss": 26.413, + "step": 75890 + }, + { + "epoch": 0.15332280207016083, + "grad_norm": 418.302978515625, + "learning_rate": 9.918486286253279e-06, + "loss": 19.2579, + "step": 75900 + }, + { + "epoch": 0.15334300270284465, + "grad_norm": 296.1881103515625, + "learning_rate": 9.918423500904066e-06, + "loss": 34.349, + "step": 75910 + }, + { + "epoch": 0.15336320333552847, + "grad_norm": 217.81080627441406, + "learning_rate": 9.918360691583056e-06, + "loss": 17.4354, + "step": 75920 + }, + { + "epoch": 0.1533834039682123, + "grad_norm": 393.1789855957031, + "learning_rate": 9.918297858290548e-06, + "loss": 20.4026, + "step": 75930 + }, + { + "epoch": 0.1534036046008961, + "grad_norm": 217.10507202148438, + "learning_rate": 9.918235001026856e-06, + "loss": 39.3795, + "step": 75940 + }, + { + "epoch": 0.1534238052335799, + "grad_norm": 469.1217956542969, + "learning_rate": 9.918172119792283e-06, + "loss": 27.6138, + "step": 75950 + }, + { + "epoch": 0.15344400586626372, + "grad_norm": 576.5799560546875, + "learning_rate": 9.918109214587134e-06, + "loss": 18.6139, + "step": 75960 + }, + { + "epoch": 0.15346420649894754, + "grad_norm": 478.5797424316406, + "learning_rate": 9.918046285411717e-06, + "loss": 24.1646, + "step": 75970 + }, + { + "epoch": 0.15348440713163136, + "grad_norm": 94.12849426269531, + "learning_rate": 9.917983332266342e-06, + "loss": 28.6062, + "step": 75980 + }, + { + "epoch": 0.15350460776431518, + "grad_norm": 63.8079719543457, + "learning_rate": 9.91792035515131e-06, + "loss": 13.2191, + "step": 75990 + }, + { + "epoch": 0.153524808396999, + "grad_norm": 1298.6837158203125, + "learning_rate": 9.91785735406693e-06, + "loss": 19.2591, + "step": 76000 + }, + { + "epoch": 0.15354500902968282, + "grad_norm": 159.49261474609375, + "learning_rate": 9.917794329013511e-06, + "loss": 24.4185, + "step": 76010 + }, + { + "epoch": 0.15356520966236661, + "grad_norm": 543.8831176757812, + "learning_rate": 9.917731279991358e-06, + "loss": 25.5307, + "step": 76020 + }, + { + "epoch": 0.15358541029505043, + "grad_norm": 165.6639404296875, + "learning_rate": 9.91766820700078e-06, + "loss": 33.5344, + "step": 76030 + }, + { + "epoch": 0.15360561092773425, + "grad_norm": 179.58657836914062, + "learning_rate": 9.917605110042084e-06, + "loss": 19.2109, + "step": 76040 + }, + { + "epoch": 0.15362581156041807, + "grad_norm": 240.4688262939453, + "learning_rate": 9.917541989115579e-06, + "loss": 24.1171, + "step": 76050 + }, + { + "epoch": 0.1536460121931019, + "grad_norm": 550.3084716796875, + "learning_rate": 9.917478844221566e-06, + "loss": 17.9788, + "step": 76060 + }, + { + "epoch": 0.15366621282578571, + "grad_norm": 469.7898254394531, + "learning_rate": 9.91741567536036e-06, + "loss": 10.9789, + "step": 76070 + }, + { + "epoch": 0.1536864134584695, + "grad_norm": 687.9810791015625, + "learning_rate": 9.917352482532267e-06, + "loss": 29.3961, + "step": 76080 + }, + { + "epoch": 0.15370661409115333, + "grad_norm": 198.01539611816406, + "learning_rate": 9.917289265737594e-06, + "loss": 21.9126, + "step": 76090 + }, + { + "epoch": 0.15372681472383715, + "grad_norm": 140.68165588378906, + "learning_rate": 9.91722602497665e-06, + "loss": 16.0029, + "step": 76100 + }, + { + "epoch": 0.15374701535652097, + "grad_norm": 246.17007446289062, + "learning_rate": 9.917162760249741e-06, + "loss": 14.9653, + "step": 76110 + }, + { + "epoch": 0.1537672159892048, + "grad_norm": 525.4667358398438, + "learning_rate": 9.91709947155718e-06, + "loss": 42.5954, + "step": 76120 + }, + { + "epoch": 0.1537874166218886, + "grad_norm": 499.7342224121094, + "learning_rate": 9.91703615889927e-06, + "loss": 18.207, + "step": 76130 + }, + { + "epoch": 0.15380761725457243, + "grad_norm": 0.0, + "learning_rate": 9.916972822276322e-06, + "loss": 16.8293, + "step": 76140 + }, + { + "epoch": 0.15382781788725622, + "grad_norm": 606.3645629882812, + "learning_rate": 9.916909461688646e-06, + "loss": 32.2728, + "step": 76150 + }, + { + "epoch": 0.15384801851994004, + "grad_norm": 433.4953918457031, + "learning_rate": 9.916846077136548e-06, + "loss": 25.9946, + "step": 76160 + }, + { + "epoch": 0.15386821915262386, + "grad_norm": 642.1217651367188, + "learning_rate": 9.916782668620341e-06, + "loss": 29.4844, + "step": 76170 + }, + { + "epoch": 0.15388841978530768, + "grad_norm": 12.11685562133789, + "learning_rate": 9.91671923614033e-06, + "loss": 20.6094, + "step": 76180 + }, + { + "epoch": 0.1539086204179915, + "grad_norm": 974.4423217773438, + "learning_rate": 9.916655779696826e-06, + "loss": 31.4484, + "step": 76190 + }, + { + "epoch": 0.15392882105067532, + "grad_norm": 656.9561767578125, + "learning_rate": 9.91659229929014e-06, + "loss": 28.2891, + "step": 76200 + }, + { + "epoch": 0.1539490216833591, + "grad_norm": 680.5543823242188, + "learning_rate": 9.916528794920577e-06, + "loss": 22.4223, + "step": 76210 + }, + { + "epoch": 0.15396922231604293, + "grad_norm": 487.59283447265625, + "learning_rate": 9.916465266588448e-06, + "loss": 26.6795, + "step": 76220 + }, + { + "epoch": 0.15398942294872675, + "grad_norm": 547.8593139648438, + "learning_rate": 9.916401714294067e-06, + "loss": 32.8007, + "step": 76230 + }, + { + "epoch": 0.15400962358141057, + "grad_norm": 372.6311950683594, + "learning_rate": 9.916338138037738e-06, + "loss": 30.9446, + "step": 76240 + }, + { + "epoch": 0.1540298242140944, + "grad_norm": 315.0910949707031, + "learning_rate": 9.916274537819774e-06, + "loss": 27.6902, + "step": 76250 + }, + { + "epoch": 0.1540500248467782, + "grad_norm": 1071.88720703125, + "learning_rate": 9.916210913640483e-06, + "loss": 27.2452, + "step": 76260 + }, + { + "epoch": 0.154070225479462, + "grad_norm": 520.8048095703125, + "learning_rate": 9.916147265500179e-06, + "loss": 37.0092, + "step": 76270 + }, + { + "epoch": 0.15409042611214582, + "grad_norm": 297.08050537109375, + "learning_rate": 9.916083593399167e-06, + "loss": 19.7901, + "step": 76280 + }, + { + "epoch": 0.15411062674482964, + "grad_norm": 264.24761962890625, + "learning_rate": 9.916019897337761e-06, + "loss": 36.8133, + "step": 76290 + }, + { + "epoch": 0.15413082737751346, + "grad_norm": 252.61801147460938, + "learning_rate": 9.915956177316269e-06, + "loss": 11.0623, + "step": 76300 + }, + { + "epoch": 0.15415102801019728, + "grad_norm": 211.3620147705078, + "learning_rate": 9.915892433335004e-06, + "loss": 21.1611, + "step": 76310 + }, + { + "epoch": 0.1541712286428811, + "grad_norm": 372.51190185546875, + "learning_rate": 9.915828665394274e-06, + "loss": 26.7633, + "step": 76320 + }, + { + "epoch": 0.15419142927556492, + "grad_norm": 1148.5885009765625, + "learning_rate": 9.915764873494393e-06, + "loss": 35.1224, + "step": 76330 + }, + { + "epoch": 0.15421162990824872, + "grad_norm": 257.689697265625, + "learning_rate": 9.915701057635669e-06, + "loss": 20.7373, + "step": 76340 + }, + { + "epoch": 0.15423183054093254, + "grad_norm": 776.6898803710938, + "learning_rate": 9.915637217818415e-06, + "loss": 15.584, + "step": 76350 + }, + { + "epoch": 0.15425203117361636, + "grad_norm": 365.4017333984375, + "learning_rate": 9.915573354042943e-06, + "loss": 24.7629, + "step": 76360 + }, + { + "epoch": 0.15427223180630018, + "grad_norm": 526.2894287109375, + "learning_rate": 9.91550946630956e-06, + "loss": 20.6592, + "step": 76370 + }, + { + "epoch": 0.154292432438984, + "grad_norm": 499.64520263671875, + "learning_rate": 9.915445554618581e-06, + "loss": 23.5198, + "step": 76380 + }, + { + "epoch": 0.15431263307166782, + "grad_norm": 267.19219970703125, + "learning_rate": 9.915381618970317e-06, + "loss": 17.8118, + "step": 76390 + }, + { + "epoch": 0.1543328337043516, + "grad_norm": 146.3233184814453, + "learning_rate": 9.915317659365078e-06, + "loss": 16.3571, + "step": 76400 + }, + { + "epoch": 0.15435303433703543, + "grad_norm": 4946.3212890625, + "learning_rate": 9.915253675803178e-06, + "loss": 25.2522, + "step": 76410 + }, + { + "epoch": 0.15437323496971925, + "grad_norm": 288.2351989746094, + "learning_rate": 9.915189668284927e-06, + "loss": 23.2986, + "step": 76420 + }, + { + "epoch": 0.15439343560240307, + "grad_norm": 218.5893096923828, + "learning_rate": 9.915125636810638e-06, + "loss": 27.1564, + "step": 76430 + }, + { + "epoch": 0.1544136362350869, + "grad_norm": 391.52838134765625, + "learning_rate": 9.915061581380622e-06, + "loss": 25.8432, + "step": 76440 + }, + { + "epoch": 0.1544338368677707, + "grad_norm": 594.8270874023438, + "learning_rate": 9.914997501995193e-06, + "loss": 27.4184, + "step": 76450 + }, + { + "epoch": 0.15445403750045453, + "grad_norm": 498.0327453613281, + "learning_rate": 9.914933398654663e-06, + "loss": 32.746, + "step": 76460 + }, + { + "epoch": 0.15447423813313832, + "grad_norm": 1847.38525390625, + "learning_rate": 9.914869271359342e-06, + "loss": 55.4857, + "step": 76470 + }, + { + "epoch": 0.15449443876582214, + "grad_norm": 1521.9986572265625, + "learning_rate": 9.914805120109545e-06, + "loss": 49.2495, + "step": 76480 + }, + { + "epoch": 0.15451463939850596, + "grad_norm": 898.3662109375, + "learning_rate": 9.914740944905585e-06, + "loss": 34.2419, + "step": 76490 + }, + { + "epoch": 0.15453484003118978, + "grad_norm": 201742.3125, + "learning_rate": 9.914676745747772e-06, + "loss": 55.974, + "step": 76500 + }, + { + "epoch": 0.1545550406638736, + "grad_norm": 1071.6346435546875, + "learning_rate": 9.914612522636423e-06, + "loss": 25.4427, + "step": 76510 + }, + { + "epoch": 0.15457524129655742, + "grad_norm": 874.8290405273438, + "learning_rate": 9.914548275571845e-06, + "loss": 29.754, + "step": 76520 + }, + { + "epoch": 0.1545954419292412, + "grad_norm": 1176.8680419921875, + "learning_rate": 9.914484004554356e-06, + "loss": 31.7233, + "step": 76530 + }, + { + "epoch": 0.15461564256192503, + "grad_norm": 422.1765441894531, + "learning_rate": 9.91441970958427e-06, + "loss": 31.054, + "step": 76540 + }, + { + "epoch": 0.15463584319460885, + "grad_norm": 934.604248046875, + "learning_rate": 9.914355390661897e-06, + "loss": 23.3269, + "step": 76550 + }, + { + "epoch": 0.15465604382729267, + "grad_norm": 196.0285186767578, + "learning_rate": 9.914291047787552e-06, + "loss": 34.0607, + "step": 76560 + }, + { + "epoch": 0.1546762444599765, + "grad_norm": 294.592041015625, + "learning_rate": 9.914226680961549e-06, + "loss": 32.4687, + "step": 76570 + }, + { + "epoch": 0.1546964450926603, + "grad_norm": 706.431640625, + "learning_rate": 9.9141622901842e-06, + "loss": 22.9893, + "step": 76580 + }, + { + "epoch": 0.1547166457253441, + "grad_norm": 534.7452392578125, + "learning_rate": 9.914097875455821e-06, + "loss": 29.5437, + "step": 76590 + }, + { + "epoch": 0.15473684635802792, + "grad_norm": 2935.35498046875, + "learning_rate": 9.914033436776724e-06, + "loss": 18.9412, + "step": 76600 + }, + { + "epoch": 0.15475704699071174, + "grad_norm": 543.9201049804688, + "learning_rate": 9.913968974147225e-06, + "loss": 18.4595, + "step": 76610 + }, + { + "epoch": 0.15477724762339556, + "grad_norm": 767.2598266601562, + "learning_rate": 9.913904487567636e-06, + "loss": 35.2977, + "step": 76620 + }, + { + "epoch": 0.15479744825607938, + "grad_norm": 1048.299560546875, + "learning_rate": 9.913839977038274e-06, + "loss": 37.5103, + "step": 76630 + }, + { + "epoch": 0.1548176488887632, + "grad_norm": 85.23568725585938, + "learning_rate": 9.913775442559451e-06, + "loss": 42.2107, + "step": 76640 + }, + { + "epoch": 0.15483784952144702, + "grad_norm": 466.147216796875, + "learning_rate": 9.913710884131483e-06, + "loss": 24.6358, + "step": 76650 + }, + { + "epoch": 0.15485805015413082, + "grad_norm": 731.3047485351562, + "learning_rate": 9.913646301754685e-06, + "loss": 18.8588, + "step": 76660 + }, + { + "epoch": 0.15487825078681464, + "grad_norm": 63.4068603515625, + "learning_rate": 9.913581695429368e-06, + "loss": 26.315, + "step": 76670 + }, + { + "epoch": 0.15489845141949846, + "grad_norm": 304.8154602050781, + "learning_rate": 9.913517065155852e-06, + "loss": 19.8241, + "step": 76680 + }, + { + "epoch": 0.15491865205218228, + "grad_norm": 497.0758056640625, + "learning_rate": 9.91345241093445e-06, + "loss": 17.4634, + "step": 76690 + }, + { + "epoch": 0.1549388526848661, + "grad_norm": 625.6817626953125, + "learning_rate": 9.913387732765475e-06, + "loss": 18.1652, + "step": 76700 + }, + { + "epoch": 0.15495905331754992, + "grad_norm": 552.4562377929688, + "learning_rate": 9.913323030649247e-06, + "loss": 25.8852, + "step": 76710 + }, + { + "epoch": 0.1549792539502337, + "grad_norm": 316.5075378417969, + "learning_rate": 9.913258304586076e-06, + "loss": 18.0775, + "step": 76720 + }, + { + "epoch": 0.15499945458291753, + "grad_norm": 488.21954345703125, + "learning_rate": 9.91319355457628e-06, + "loss": 20.0942, + "step": 76730 + }, + { + "epoch": 0.15501965521560135, + "grad_norm": 557.0736083984375, + "learning_rate": 9.913128780620175e-06, + "loss": 19.5888, + "step": 76740 + }, + { + "epoch": 0.15503985584828517, + "grad_norm": 99.08460235595703, + "learning_rate": 9.913063982718076e-06, + "loss": 34.2454, + "step": 76750 + }, + { + "epoch": 0.155060056480969, + "grad_norm": 432.3857727050781, + "learning_rate": 9.9129991608703e-06, + "loss": 24.6678, + "step": 76760 + }, + { + "epoch": 0.1550802571136528, + "grad_norm": 114.72826385498047, + "learning_rate": 9.912934315077162e-06, + "loss": 24.7001, + "step": 76770 + }, + { + "epoch": 0.15510045774633663, + "grad_norm": 290.5341796875, + "learning_rate": 9.912869445338978e-06, + "loss": 25.8757, + "step": 76780 + }, + { + "epoch": 0.15512065837902042, + "grad_norm": 732.5404663085938, + "learning_rate": 9.912804551656064e-06, + "loss": 24.0103, + "step": 76790 + }, + { + "epoch": 0.15514085901170424, + "grad_norm": 145.4403533935547, + "learning_rate": 9.912739634028734e-06, + "loss": 33.1736, + "step": 76800 + }, + { + "epoch": 0.15516105964438806, + "grad_norm": 846.7334594726562, + "learning_rate": 9.91267469245731e-06, + "loss": 41.3353, + "step": 76810 + }, + { + "epoch": 0.15518126027707188, + "grad_norm": 392.328857421875, + "learning_rate": 9.912609726942104e-06, + "loss": 73.3939, + "step": 76820 + }, + { + "epoch": 0.1552014609097557, + "grad_norm": 452.7477111816406, + "learning_rate": 9.912544737483434e-06, + "loss": 17.1838, + "step": 76830 + }, + { + "epoch": 0.15522166154243952, + "grad_norm": 541.6780395507812, + "learning_rate": 9.912479724081617e-06, + "loss": 23.2453, + "step": 76840 + }, + { + "epoch": 0.1552418621751233, + "grad_norm": 394.6813659667969, + "learning_rate": 9.912414686736971e-06, + "loss": 24.0498, + "step": 76850 + }, + { + "epoch": 0.15526206280780713, + "grad_norm": 977.437744140625, + "learning_rate": 9.912349625449808e-06, + "loss": 27.9861, + "step": 76860 + }, + { + "epoch": 0.15528226344049095, + "grad_norm": 970.5541381835938, + "learning_rate": 9.912284540220452e-06, + "loss": 41.6399, + "step": 76870 + }, + { + "epoch": 0.15530246407317477, + "grad_norm": 234.07644653320312, + "learning_rate": 9.912219431049217e-06, + "loss": 18.4895, + "step": 76880 + }, + { + "epoch": 0.1553226647058586, + "grad_norm": 215.9114532470703, + "learning_rate": 9.912154297936418e-06, + "loss": 17.8177, + "step": 76890 + }, + { + "epoch": 0.1553428653385424, + "grad_norm": 476.1218566894531, + "learning_rate": 9.912089140882377e-06, + "loss": 45.933, + "step": 76900 + }, + { + "epoch": 0.1553630659712262, + "grad_norm": 321.398193359375, + "learning_rate": 9.912023959887408e-06, + "loss": 18.5507, + "step": 76910 + }, + { + "epoch": 0.15538326660391003, + "grad_norm": 664.8775024414062, + "learning_rate": 9.91195875495183e-06, + "loss": 24.0288, + "step": 76920 + }, + { + "epoch": 0.15540346723659385, + "grad_norm": 143.9698944091797, + "learning_rate": 9.911893526075961e-06, + "loss": 34.0271, + "step": 76930 + }, + { + "epoch": 0.15542366786927767, + "grad_norm": 165.63645935058594, + "learning_rate": 9.911828273260119e-06, + "loss": 18.4847, + "step": 76940 + }, + { + "epoch": 0.15544386850196149, + "grad_norm": 630.33984375, + "learning_rate": 9.911762996504621e-06, + "loss": 37.2778, + "step": 76950 + }, + { + "epoch": 0.1554640691346453, + "grad_norm": 330.86572265625, + "learning_rate": 9.911697695809787e-06, + "loss": 30.9341, + "step": 76960 + }, + { + "epoch": 0.15548426976732913, + "grad_norm": 773.8388061523438, + "learning_rate": 9.911632371175934e-06, + "loss": 34.7768, + "step": 76970 + }, + { + "epoch": 0.15550447040001292, + "grad_norm": 658.99853515625, + "learning_rate": 9.911567022603379e-06, + "loss": 19.9509, + "step": 76980 + }, + { + "epoch": 0.15552467103269674, + "grad_norm": 29.142122268676758, + "learning_rate": 9.911501650092443e-06, + "loss": 33.9231, + "step": 76990 + }, + { + "epoch": 0.15554487166538056, + "grad_norm": 688.2086791992188, + "learning_rate": 9.911436253643445e-06, + "loss": 41.0092, + "step": 77000 + }, + { + "epoch": 0.15556507229806438, + "grad_norm": 366.0467224121094, + "learning_rate": 9.911370833256701e-06, + "loss": 29.3616, + "step": 77010 + }, + { + "epoch": 0.1555852729307482, + "grad_norm": 1323.949462890625, + "learning_rate": 9.91130538893253e-06, + "loss": 34.4818, + "step": 77020 + }, + { + "epoch": 0.15560547356343202, + "grad_norm": 302.8736572265625, + "learning_rate": 9.911239920671253e-06, + "loss": 20.0606, + "step": 77030 + }, + { + "epoch": 0.1556256741961158, + "grad_norm": 256.28668212890625, + "learning_rate": 9.91117442847319e-06, + "loss": 41.563, + "step": 77040 + }, + { + "epoch": 0.15564587482879963, + "grad_norm": 155.36709594726562, + "learning_rate": 9.911108912338656e-06, + "loss": 21.108, + "step": 77050 + }, + { + "epoch": 0.15566607546148345, + "grad_norm": 400.1911926269531, + "learning_rate": 9.911043372267975e-06, + "loss": 23.0878, + "step": 77060 + }, + { + "epoch": 0.15568627609416727, + "grad_norm": 868.4791259765625, + "learning_rate": 9.910977808261463e-06, + "loss": 32.3664, + "step": 77070 + }, + { + "epoch": 0.1557064767268511, + "grad_norm": 157.62985229492188, + "learning_rate": 9.910912220319443e-06, + "loss": 30.454, + "step": 77080 + }, + { + "epoch": 0.1557266773595349, + "grad_norm": 215.0827178955078, + "learning_rate": 9.910846608442229e-06, + "loss": 16.5634, + "step": 77090 + }, + { + "epoch": 0.15574687799221873, + "grad_norm": 794.7200927734375, + "learning_rate": 9.910780972630146e-06, + "loss": 25.2269, + "step": 77100 + }, + { + "epoch": 0.15576707862490252, + "grad_norm": 849.30419921875, + "learning_rate": 9.910715312883512e-06, + "loss": 26.416, + "step": 77110 + }, + { + "epoch": 0.15578727925758634, + "grad_norm": 707.9942016601562, + "learning_rate": 9.910649629202648e-06, + "loss": 23.5251, + "step": 77120 + }, + { + "epoch": 0.15580747989027016, + "grad_norm": 254.89849853515625, + "learning_rate": 9.910583921587872e-06, + "loss": 18.0644, + "step": 77130 + }, + { + "epoch": 0.15582768052295398, + "grad_norm": 4.354783535003662, + "learning_rate": 9.910518190039506e-06, + "loss": 41.0595, + "step": 77140 + }, + { + "epoch": 0.1558478811556378, + "grad_norm": 247.84661865234375, + "learning_rate": 9.91045243455787e-06, + "loss": 17.9057, + "step": 77150 + }, + { + "epoch": 0.15586808178832162, + "grad_norm": 324.16680908203125, + "learning_rate": 9.910386655143285e-06, + "loss": 22.0415, + "step": 77160 + }, + { + "epoch": 0.15588828242100541, + "grad_norm": 178.23960876464844, + "learning_rate": 9.91032085179607e-06, + "loss": 18.2775, + "step": 77170 + }, + { + "epoch": 0.15590848305368923, + "grad_norm": 926.4631958007812, + "learning_rate": 9.910255024516546e-06, + "loss": 26.7787, + "step": 77180 + }, + { + "epoch": 0.15592868368637305, + "grad_norm": 1258.066650390625, + "learning_rate": 9.910189173305035e-06, + "loss": 28.3574, + "step": 77190 + }, + { + "epoch": 0.15594888431905687, + "grad_norm": 583.8325805664062, + "learning_rate": 9.91012329816186e-06, + "loss": 42.5556, + "step": 77200 + }, + { + "epoch": 0.1559690849517407, + "grad_norm": 1802.2276611328125, + "learning_rate": 9.910057399087338e-06, + "loss": 54.5202, + "step": 77210 + }, + { + "epoch": 0.15598928558442451, + "grad_norm": 827.5545043945312, + "learning_rate": 9.90999147608179e-06, + "loss": 12.4443, + "step": 77220 + }, + { + "epoch": 0.1560094862171083, + "grad_norm": 375.8981628417969, + "learning_rate": 9.909925529145541e-06, + "loss": 16.1195, + "step": 77230 + }, + { + "epoch": 0.15602968684979213, + "grad_norm": 313.28118896484375, + "learning_rate": 9.90985955827891e-06, + "loss": 29.6728, + "step": 77240 + }, + { + "epoch": 0.15604988748247595, + "grad_norm": 215.35768127441406, + "learning_rate": 9.90979356348222e-06, + "loss": 50.7093, + "step": 77250 + }, + { + "epoch": 0.15607008811515977, + "grad_norm": 261.1755065917969, + "learning_rate": 9.909727544755789e-06, + "loss": 17.7893, + "step": 77260 + }, + { + "epoch": 0.1560902887478436, + "grad_norm": 696.5534057617188, + "learning_rate": 9.909661502099943e-06, + "loss": 26.2884, + "step": 77270 + }, + { + "epoch": 0.1561104893805274, + "grad_norm": 255.1345977783203, + "learning_rate": 9.909595435515002e-06, + "loss": 18.1933, + "step": 77280 + }, + { + "epoch": 0.15613069001321123, + "grad_norm": 17.61561393737793, + "learning_rate": 9.90952934500129e-06, + "loss": 31.0577, + "step": 77290 + }, + { + "epoch": 0.15615089064589502, + "grad_norm": 395.40673828125, + "learning_rate": 9.909463230559127e-06, + "loss": 19.4631, + "step": 77300 + }, + { + "epoch": 0.15617109127857884, + "grad_norm": 1012.7659301757812, + "learning_rate": 9.909397092188834e-06, + "loss": 21.1493, + "step": 77310 + }, + { + "epoch": 0.15619129191126266, + "grad_norm": 131.47860717773438, + "learning_rate": 9.909330929890734e-06, + "loss": 13.5247, + "step": 77320 + }, + { + "epoch": 0.15621149254394648, + "grad_norm": 621.2780151367188, + "learning_rate": 9.909264743665153e-06, + "loss": 31.8489, + "step": 77330 + }, + { + "epoch": 0.1562316931766303, + "grad_norm": 0.0, + "learning_rate": 9.90919853351241e-06, + "loss": 34.2143, + "step": 77340 + }, + { + "epoch": 0.15625189380931412, + "grad_norm": 1121.5819091796875, + "learning_rate": 9.90913229943283e-06, + "loss": 43.6555, + "step": 77350 + }, + { + "epoch": 0.1562720944419979, + "grad_norm": 268.9812316894531, + "learning_rate": 9.909066041426733e-06, + "loss": 25.7186, + "step": 77360 + }, + { + "epoch": 0.15629229507468173, + "grad_norm": 447.2286376953125, + "learning_rate": 9.908999759494444e-06, + "loss": 29.3143, + "step": 77370 + }, + { + "epoch": 0.15631249570736555, + "grad_norm": 381.0235595703125, + "learning_rate": 9.908933453636287e-06, + "loss": 25.825, + "step": 77380 + }, + { + "epoch": 0.15633269634004937, + "grad_norm": 327.3936462402344, + "learning_rate": 9.90886712385258e-06, + "loss": 19.0007, + "step": 77390 + }, + { + "epoch": 0.1563528969727332, + "grad_norm": 329.6006774902344, + "learning_rate": 9.908800770143654e-06, + "loss": 35.5917, + "step": 77400 + }, + { + "epoch": 0.156373097605417, + "grad_norm": 445.0928039550781, + "learning_rate": 9.908734392509827e-06, + "loss": 25.8085, + "step": 77410 + }, + { + "epoch": 0.15639329823810083, + "grad_norm": 307.52618408203125, + "learning_rate": 9.908667990951424e-06, + "loss": 12.7383, + "step": 77420 + }, + { + "epoch": 0.15641349887078462, + "grad_norm": 266.4269104003906, + "learning_rate": 9.908601565468768e-06, + "loss": 10.2761, + "step": 77430 + }, + { + "epoch": 0.15643369950346844, + "grad_norm": 706.94921875, + "learning_rate": 9.908535116062185e-06, + "loss": 22.4348, + "step": 77440 + }, + { + "epoch": 0.15645390013615226, + "grad_norm": 533.7908935546875, + "learning_rate": 9.908468642731996e-06, + "loss": 25.5123, + "step": 77450 + }, + { + "epoch": 0.15647410076883608, + "grad_norm": 943.0638427734375, + "learning_rate": 9.908402145478526e-06, + "loss": 30.1085, + "step": 77460 + }, + { + "epoch": 0.1564943014015199, + "grad_norm": 375.0356750488281, + "learning_rate": 9.908335624302099e-06, + "loss": 18.2726, + "step": 77470 + }, + { + "epoch": 0.15651450203420372, + "grad_norm": 1029.725341796875, + "learning_rate": 9.908269079203039e-06, + "loss": 34.551, + "step": 77480 + }, + { + "epoch": 0.15653470266688752, + "grad_norm": 944.670166015625, + "learning_rate": 9.908202510181673e-06, + "loss": 36.7944, + "step": 77490 + }, + { + "epoch": 0.15655490329957134, + "grad_norm": 336.8741760253906, + "learning_rate": 9.908135917238321e-06, + "loss": 20.5199, + "step": 77500 + }, + { + "epoch": 0.15657510393225516, + "grad_norm": 336.6961364746094, + "learning_rate": 9.90806930037331e-06, + "loss": 21.9893, + "step": 77510 + }, + { + "epoch": 0.15659530456493898, + "grad_norm": 1216.4962158203125, + "learning_rate": 9.908002659586966e-06, + "loss": 20.7643, + "step": 77520 + }, + { + "epoch": 0.1566155051976228, + "grad_norm": 611.2420043945312, + "learning_rate": 9.907935994879612e-06, + "loss": 25.8953, + "step": 77530 + }, + { + "epoch": 0.15663570583030662, + "grad_norm": 165.66566467285156, + "learning_rate": 9.907869306251571e-06, + "loss": 32.8533, + "step": 77540 + }, + { + "epoch": 0.1566559064629904, + "grad_norm": 209.29922485351562, + "learning_rate": 9.907802593703173e-06, + "loss": 15.847, + "step": 77550 + }, + { + "epoch": 0.15667610709567423, + "grad_norm": 831.6998291015625, + "learning_rate": 9.90773585723474e-06, + "loss": 27.0138, + "step": 77560 + }, + { + "epoch": 0.15669630772835805, + "grad_norm": 424.3438720703125, + "learning_rate": 9.907669096846596e-06, + "loss": 29.0207, + "step": 77570 + }, + { + "epoch": 0.15671650836104187, + "grad_norm": 574.2207641601562, + "learning_rate": 9.90760231253907e-06, + "loss": 21.9935, + "step": 77580 + }, + { + "epoch": 0.1567367089937257, + "grad_norm": 306.4346923828125, + "learning_rate": 9.907535504312484e-06, + "loss": 10.5125, + "step": 77590 + }, + { + "epoch": 0.1567569096264095, + "grad_norm": 223.48089599609375, + "learning_rate": 9.907468672167165e-06, + "loss": 25.2818, + "step": 77600 + }, + { + "epoch": 0.15677711025909333, + "grad_norm": 1125.790771484375, + "learning_rate": 9.90740181610344e-06, + "loss": 31.856, + "step": 77610 + }, + { + "epoch": 0.15679731089177712, + "grad_norm": 347.9830017089844, + "learning_rate": 9.907334936121634e-06, + "loss": 21.6079, + "step": 77620 + }, + { + "epoch": 0.15681751152446094, + "grad_norm": 1014.19140625, + "learning_rate": 9.907268032222072e-06, + "loss": 34.0749, + "step": 77630 + }, + { + "epoch": 0.15683771215714476, + "grad_norm": 373.791748046875, + "learning_rate": 9.90720110440508e-06, + "loss": 29.7491, + "step": 77640 + }, + { + "epoch": 0.15685791278982858, + "grad_norm": 263.6556396484375, + "learning_rate": 9.907134152670987e-06, + "loss": 21.2426, + "step": 77650 + }, + { + "epoch": 0.1568781134225124, + "grad_norm": 543.62890625, + "learning_rate": 9.907067177020115e-06, + "loss": 33.8, + "step": 77660 + }, + { + "epoch": 0.15689831405519622, + "grad_norm": 467.43670654296875, + "learning_rate": 9.907000177452794e-06, + "loss": 13.0608, + "step": 77670 + }, + { + "epoch": 0.15691851468788, + "grad_norm": 398.6751708984375, + "learning_rate": 9.90693315396935e-06, + "loss": 21.4861, + "step": 77680 + }, + { + "epoch": 0.15693871532056383, + "grad_norm": 741.5259399414062, + "learning_rate": 9.906866106570108e-06, + "loss": 27.6681, + "step": 77690 + }, + { + "epoch": 0.15695891595324765, + "grad_norm": 310.8424377441406, + "learning_rate": 9.906799035255395e-06, + "loss": 22.0539, + "step": 77700 + }, + { + "epoch": 0.15697911658593147, + "grad_norm": 357.97467041015625, + "learning_rate": 9.90673194002554e-06, + "loss": 15.8134, + "step": 77710 + }, + { + "epoch": 0.1569993172186153, + "grad_norm": 987.2659912109375, + "learning_rate": 9.906664820880869e-06, + "loss": 27.9395, + "step": 77720 + }, + { + "epoch": 0.1570195178512991, + "grad_norm": 219.6345672607422, + "learning_rate": 9.906597677821708e-06, + "loss": 25.4969, + "step": 77730 + }, + { + "epoch": 0.15703971848398293, + "grad_norm": 298.4275817871094, + "learning_rate": 9.906530510848384e-06, + "loss": 25.9595, + "step": 77740 + }, + { + "epoch": 0.15705991911666672, + "grad_norm": 527.123291015625, + "learning_rate": 9.906463319961225e-06, + "loss": 33.0071, + "step": 77750 + }, + { + "epoch": 0.15708011974935054, + "grad_norm": 114.7584457397461, + "learning_rate": 9.906396105160561e-06, + "loss": 16.7282, + "step": 77760 + }, + { + "epoch": 0.15710032038203436, + "grad_norm": 606.5748901367188, + "learning_rate": 9.906328866446717e-06, + "loss": 19.7913, + "step": 77770 + }, + { + "epoch": 0.15712052101471818, + "grad_norm": 411.9849853515625, + "learning_rate": 9.906261603820022e-06, + "loss": 16.8656, + "step": 77780 + }, + { + "epoch": 0.157140721647402, + "grad_norm": 129.91851806640625, + "learning_rate": 9.906194317280802e-06, + "loss": 24.2333, + "step": 77790 + }, + { + "epoch": 0.15716092228008582, + "grad_norm": 140.0809326171875, + "learning_rate": 9.906127006829385e-06, + "loss": 11.4515, + "step": 77800 + }, + { + "epoch": 0.15718112291276962, + "grad_norm": 596.6380004882812, + "learning_rate": 9.9060596724661e-06, + "loss": 19.8775, + "step": 77810 + }, + { + "epoch": 0.15720132354545344, + "grad_norm": 184.2483367919922, + "learning_rate": 9.905992314191277e-06, + "loss": 25.3137, + "step": 77820 + }, + { + "epoch": 0.15722152417813726, + "grad_norm": 447.89453125, + "learning_rate": 9.905924932005241e-06, + "loss": 42.2161, + "step": 77830 + }, + { + "epoch": 0.15724172481082108, + "grad_norm": 356.61834716796875, + "learning_rate": 9.905857525908322e-06, + "loss": 17.7926, + "step": 77840 + }, + { + "epoch": 0.1572619254435049, + "grad_norm": 294.5441589355469, + "learning_rate": 9.905790095900849e-06, + "loss": 23.3443, + "step": 77850 + }, + { + "epoch": 0.15728212607618872, + "grad_norm": 754.86279296875, + "learning_rate": 9.905722641983151e-06, + "loss": 19.1399, + "step": 77860 + }, + { + "epoch": 0.1573023267088725, + "grad_norm": 366.54058837890625, + "learning_rate": 9.905655164155554e-06, + "loss": 35.2363, + "step": 77870 + }, + { + "epoch": 0.15732252734155633, + "grad_norm": 971.9962158203125, + "learning_rate": 9.90558766241839e-06, + "loss": 29.3012, + "step": 77880 + }, + { + "epoch": 0.15734272797424015, + "grad_norm": 175.7159881591797, + "learning_rate": 9.905520136771985e-06, + "loss": 25.0469, + "step": 77890 + }, + { + "epoch": 0.15736292860692397, + "grad_norm": 540.7525024414062, + "learning_rate": 9.90545258721667e-06, + "loss": 24.6414, + "step": 77900 + }, + { + "epoch": 0.1573831292396078, + "grad_norm": 341.110107421875, + "learning_rate": 9.905385013752777e-06, + "loss": 16.2405, + "step": 77910 + }, + { + "epoch": 0.1574033298722916, + "grad_norm": 381.3741455078125, + "learning_rate": 9.905317416380629e-06, + "loss": 40.9647, + "step": 77920 + }, + { + "epoch": 0.15742353050497543, + "grad_norm": 1155.7586669921875, + "learning_rate": 9.905249795100561e-06, + "loss": 30.6699, + "step": 77930 + }, + { + "epoch": 0.15744373113765922, + "grad_norm": 485.991943359375, + "learning_rate": 9.905182149912899e-06, + "loss": 30.4669, + "step": 77940 + }, + { + "epoch": 0.15746393177034304, + "grad_norm": 553.8409423828125, + "learning_rate": 9.905114480817976e-06, + "loss": 35.3405, + "step": 77950 + }, + { + "epoch": 0.15748413240302686, + "grad_norm": 347.72711181640625, + "learning_rate": 9.905046787816118e-06, + "loss": 17.575, + "step": 77960 + }, + { + "epoch": 0.15750433303571068, + "grad_norm": 490.9544982910156, + "learning_rate": 9.904979070907657e-06, + "loss": 21.9032, + "step": 77970 + }, + { + "epoch": 0.1575245336683945, + "grad_norm": 809.31494140625, + "learning_rate": 9.904911330092923e-06, + "loss": 28.7668, + "step": 77980 + }, + { + "epoch": 0.15754473430107832, + "grad_norm": 347.6751708984375, + "learning_rate": 9.904843565372249e-06, + "loss": 43.1143, + "step": 77990 + }, + { + "epoch": 0.1575649349337621, + "grad_norm": 299.9636535644531, + "learning_rate": 9.904775776745959e-06, + "loss": 24.741, + "step": 78000 + }, + { + "epoch": 0.15758513556644593, + "grad_norm": 677.6216430664062, + "learning_rate": 9.904707964214386e-06, + "loss": 17.0624, + "step": 78010 + }, + { + "epoch": 0.15760533619912975, + "grad_norm": 403.5384826660156, + "learning_rate": 9.904640127777865e-06, + "loss": 36.389, + "step": 78020 + }, + { + "epoch": 0.15762553683181357, + "grad_norm": 339.7036437988281, + "learning_rate": 9.904572267436721e-06, + "loss": 14.2183, + "step": 78030 + }, + { + "epoch": 0.1576457374644974, + "grad_norm": 675.6530151367188, + "learning_rate": 9.904504383191286e-06, + "loss": 13.3383, + "step": 78040 + }, + { + "epoch": 0.1576659380971812, + "grad_norm": 551.7474365234375, + "learning_rate": 9.904436475041892e-06, + "loss": 26.4035, + "step": 78050 + }, + { + "epoch": 0.15768613872986503, + "grad_norm": 467.4062805175781, + "learning_rate": 9.904368542988869e-06, + "loss": 32.9935, + "step": 78060 + }, + { + "epoch": 0.15770633936254883, + "grad_norm": 215.01026916503906, + "learning_rate": 9.90430058703255e-06, + "loss": 25.5886, + "step": 78070 + }, + { + "epoch": 0.15772653999523265, + "grad_norm": 233.78195190429688, + "learning_rate": 9.904232607173262e-06, + "loss": 24.0233, + "step": 78080 + }, + { + "epoch": 0.15774674062791647, + "grad_norm": 7.0149664878845215, + "learning_rate": 9.90416460341134e-06, + "loss": 22.3333, + "step": 78090 + }, + { + "epoch": 0.15776694126060029, + "grad_norm": 793.04150390625, + "learning_rate": 9.904096575747117e-06, + "loss": 33.6149, + "step": 78100 + }, + { + "epoch": 0.1577871418932841, + "grad_norm": 748.5648803710938, + "learning_rate": 9.90402852418092e-06, + "loss": 31.4978, + "step": 78110 + }, + { + "epoch": 0.15780734252596793, + "grad_norm": 306.300537109375, + "learning_rate": 9.903960448713084e-06, + "loss": 20.0952, + "step": 78120 + }, + { + "epoch": 0.15782754315865172, + "grad_norm": 211.0547332763672, + "learning_rate": 9.903892349343938e-06, + "loss": 45.8324, + "step": 78130 + }, + { + "epoch": 0.15784774379133554, + "grad_norm": 253.32325744628906, + "learning_rate": 9.903824226073816e-06, + "loss": 24.4214, + "step": 78140 + }, + { + "epoch": 0.15786794442401936, + "grad_norm": 349.3773193359375, + "learning_rate": 9.90375607890305e-06, + "loss": 18.1157, + "step": 78150 + }, + { + "epoch": 0.15788814505670318, + "grad_norm": 328.2897033691406, + "learning_rate": 9.903687907831972e-06, + "loss": 16.9796, + "step": 78160 + }, + { + "epoch": 0.157908345689387, + "grad_norm": 980.4244384765625, + "learning_rate": 9.903619712860912e-06, + "loss": 35.0202, + "step": 78170 + }, + { + "epoch": 0.15792854632207082, + "grad_norm": 1009.1373291015625, + "learning_rate": 9.903551493990205e-06, + "loss": 41.671, + "step": 78180 + }, + { + "epoch": 0.1579487469547546, + "grad_norm": 41.043155670166016, + "learning_rate": 9.903483251220183e-06, + "loss": 8.9685, + "step": 78190 + }, + { + "epoch": 0.15796894758743843, + "grad_norm": 442.794189453125, + "learning_rate": 9.903414984551178e-06, + "loss": 24.7748, + "step": 78200 + }, + { + "epoch": 0.15798914822012225, + "grad_norm": 415.76959228515625, + "learning_rate": 9.903346693983524e-06, + "loss": 24.3408, + "step": 78210 + }, + { + "epoch": 0.15800934885280607, + "grad_norm": 367.5481262207031, + "learning_rate": 9.903278379517554e-06, + "loss": 23.2312, + "step": 78220 + }, + { + "epoch": 0.1580295494854899, + "grad_norm": 825.7837524414062, + "learning_rate": 9.903210041153597e-06, + "loss": 34.7608, + "step": 78230 + }, + { + "epoch": 0.1580497501181737, + "grad_norm": 370.3226318359375, + "learning_rate": 9.90314167889199e-06, + "loss": 18.5017, + "step": 78240 + }, + { + "epoch": 0.15806995075085753, + "grad_norm": 147.1715850830078, + "learning_rate": 9.903073292733065e-06, + "loss": 16.7317, + "step": 78250 + }, + { + "epoch": 0.15809015138354132, + "grad_norm": 1859.4921875, + "learning_rate": 9.903004882677157e-06, + "loss": 26.7355, + "step": 78260 + }, + { + "epoch": 0.15811035201622514, + "grad_norm": 334.5677185058594, + "learning_rate": 9.902936448724596e-06, + "loss": 32.5608, + "step": 78270 + }, + { + "epoch": 0.15813055264890896, + "grad_norm": 192.8003692626953, + "learning_rate": 9.90286799087572e-06, + "loss": 17.3901, + "step": 78280 + }, + { + "epoch": 0.15815075328159278, + "grad_norm": 165.76022338867188, + "learning_rate": 9.902799509130857e-06, + "loss": 18.9962, + "step": 78290 + }, + { + "epoch": 0.1581709539142766, + "grad_norm": 381.4460144042969, + "learning_rate": 9.902731003490344e-06, + "loss": 19.4837, + "step": 78300 + }, + { + "epoch": 0.15819115454696042, + "grad_norm": 466.5162048339844, + "learning_rate": 9.902662473954516e-06, + "loss": 24.59, + "step": 78310 + }, + { + "epoch": 0.15821135517964421, + "grad_norm": 919.7655029296875, + "learning_rate": 9.902593920523706e-06, + "loss": 38.7198, + "step": 78320 + }, + { + "epoch": 0.15823155581232803, + "grad_norm": 211.0843963623047, + "learning_rate": 9.902525343198249e-06, + "loss": 17.7991, + "step": 78330 + }, + { + "epoch": 0.15825175644501185, + "grad_norm": 894.8384399414062, + "learning_rate": 9.902456741978475e-06, + "loss": 23.4159, + "step": 78340 + }, + { + "epoch": 0.15827195707769567, + "grad_norm": 917.4749755859375, + "learning_rate": 9.902388116864723e-06, + "loss": 28.902, + "step": 78350 + }, + { + "epoch": 0.1582921577103795, + "grad_norm": 173.82229614257812, + "learning_rate": 9.902319467857326e-06, + "loss": 34.2183, + "step": 78360 + }, + { + "epoch": 0.15831235834306331, + "grad_norm": 616.9852905273438, + "learning_rate": 9.902250794956618e-06, + "loss": 19.6543, + "step": 78370 + }, + { + "epoch": 0.15833255897574713, + "grad_norm": 374.2037658691406, + "learning_rate": 9.902182098162933e-06, + "loss": 10.8244, + "step": 78380 + }, + { + "epoch": 0.15835275960843093, + "grad_norm": 563.2325439453125, + "learning_rate": 9.90211337747661e-06, + "loss": 21.4827, + "step": 78390 + }, + { + "epoch": 0.15837296024111475, + "grad_norm": 328.37738037109375, + "learning_rate": 9.90204463289798e-06, + "loss": 20.2847, + "step": 78400 + }, + { + "epoch": 0.15839316087379857, + "grad_norm": 261.7314453125, + "learning_rate": 9.901975864427378e-06, + "loss": 17.9665, + "step": 78410 + }, + { + "epoch": 0.1584133615064824, + "grad_norm": 357.9438781738281, + "learning_rate": 9.90190707206514e-06, + "loss": 33.0531, + "step": 78420 + }, + { + "epoch": 0.1584335621391662, + "grad_norm": 505.8860168457031, + "learning_rate": 9.901838255811602e-06, + "loss": 21.0291, + "step": 78430 + }, + { + "epoch": 0.15845376277185003, + "grad_norm": 987.0386962890625, + "learning_rate": 9.9017694156671e-06, + "loss": 27.4556, + "step": 78440 + }, + { + "epoch": 0.15847396340453382, + "grad_norm": 564.0770263671875, + "learning_rate": 9.901700551631966e-06, + "loss": 22.7123, + "step": 78450 + }, + { + "epoch": 0.15849416403721764, + "grad_norm": 160.28030395507812, + "learning_rate": 9.901631663706539e-06, + "loss": 21.0695, + "step": 78460 + }, + { + "epoch": 0.15851436466990146, + "grad_norm": 0.0, + "learning_rate": 9.901562751891155e-06, + "loss": 17.3257, + "step": 78470 + }, + { + "epoch": 0.15853456530258528, + "grad_norm": 255.56430053710938, + "learning_rate": 9.901493816186148e-06, + "loss": 28.4382, + "step": 78480 + }, + { + "epoch": 0.1585547659352691, + "grad_norm": 261.3754577636719, + "learning_rate": 9.901424856591855e-06, + "loss": 15.6467, + "step": 78490 + }, + { + "epoch": 0.15857496656795292, + "grad_norm": 113.56884002685547, + "learning_rate": 9.901355873108611e-06, + "loss": 15.9694, + "step": 78500 + }, + { + "epoch": 0.1585951672006367, + "grad_norm": 428.04351806640625, + "learning_rate": 9.901286865736752e-06, + "loss": 25.3237, + "step": 78510 + }, + { + "epoch": 0.15861536783332053, + "grad_norm": 272.8048400878906, + "learning_rate": 9.901217834476616e-06, + "loss": 23.434, + "step": 78520 + }, + { + "epoch": 0.15863556846600435, + "grad_norm": 240.38743591308594, + "learning_rate": 9.90114877932854e-06, + "loss": 25.2228, + "step": 78530 + }, + { + "epoch": 0.15865576909868817, + "grad_norm": 931.5643920898438, + "learning_rate": 9.901079700292858e-06, + "loss": 25.6073, + "step": 78540 + }, + { + "epoch": 0.158675969731372, + "grad_norm": 1009.3199462890625, + "learning_rate": 9.901010597369908e-06, + "loss": 25.051, + "step": 78550 + }, + { + "epoch": 0.1586961703640558, + "grad_norm": 98.7724838256836, + "learning_rate": 9.900941470560025e-06, + "loss": 19.5606, + "step": 78560 + }, + { + "epoch": 0.15871637099673963, + "grad_norm": 404.62738037109375, + "learning_rate": 9.900872319863551e-06, + "loss": 12.841, + "step": 78570 + }, + { + "epoch": 0.15873657162942342, + "grad_norm": 814.7490234375, + "learning_rate": 9.90080314528082e-06, + "loss": 21.3623, + "step": 78580 + }, + { + "epoch": 0.15875677226210724, + "grad_norm": 500.2481384277344, + "learning_rate": 9.900733946812167e-06, + "loss": 25.9193, + "step": 78590 + }, + { + "epoch": 0.15877697289479106, + "grad_norm": 202.12466430664062, + "learning_rate": 9.900664724457932e-06, + "loss": 32.4658, + "step": 78600 + }, + { + "epoch": 0.15879717352747488, + "grad_norm": 304.1930236816406, + "learning_rate": 9.900595478218449e-06, + "loss": 16.5442, + "step": 78610 + }, + { + "epoch": 0.1588173741601587, + "grad_norm": 462.53460693359375, + "learning_rate": 9.900526208094061e-06, + "loss": 18.6229, + "step": 78620 + }, + { + "epoch": 0.15883757479284252, + "grad_norm": 747.231201171875, + "learning_rate": 9.900456914085101e-06, + "loss": 25.9071, + "step": 78630 + }, + { + "epoch": 0.15885777542552632, + "grad_norm": 685.5018310546875, + "learning_rate": 9.90038759619191e-06, + "loss": 17.1709, + "step": 78640 + }, + { + "epoch": 0.15887797605821014, + "grad_norm": 176.51881408691406, + "learning_rate": 9.900318254414823e-06, + "loss": 13.024, + "step": 78650 + }, + { + "epoch": 0.15889817669089396, + "grad_norm": 309.40802001953125, + "learning_rate": 9.900248888754179e-06, + "loss": 39.5304, + "step": 78660 + }, + { + "epoch": 0.15891837732357778, + "grad_norm": 299.6368713378906, + "learning_rate": 9.900179499210316e-06, + "loss": 25.6999, + "step": 78670 + }, + { + "epoch": 0.1589385779562616, + "grad_norm": 795.1394653320312, + "learning_rate": 9.900110085783573e-06, + "loss": 18.9494, + "step": 78680 + }, + { + "epoch": 0.15895877858894542, + "grad_norm": 306.2209777832031, + "learning_rate": 9.900040648474287e-06, + "loss": 37.5444, + "step": 78690 + }, + { + "epoch": 0.15897897922162924, + "grad_norm": 913.7659301757812, + "learning_rate": 9.899971187282799e-06, + "loss": 41.746, + "step": 78700 + }, + { + "epoch": 0.15899917985431303, + "grad_norm": 345.07769775390625, + "learning_rate": 9.899901702209445e-06, + "loss": 20.4043, + "step": 78710 + }, + { + "epoch": 0.15901938048699685, + "grad_norm": 782.451416015625, + "learning_rate": 9.899832193254564e-06, + "loss": 31.8815, + "step": 78720 + }, + { + "epoch": 0.15903958111968067, + "grad_norm": 756.5360107421875, + "learning_rate": 9.899762660418495e-06, + "loss": 39.7848, + "step": 78730 + }, + { + "epoch": 0.1590597817523645, + "grad_norm": 521.2705078125, + "learning_rate": 9.899693103701577e-06, + "loss": 27.9996, + "step": 78740 + }, + { + "epoch": 0.1590799823850483, + "grad_norm": 410.418701171875, + "learning_rate": 9.899623523104149e-06, + "loss": 17.4492, + "step": 78750 + }, + { + "epoch": 0.15910018301773213, + "grad_norm": 274.18798828125, + "learning_rate": 9.89955391862655e-06, + "loss": 22.3157, + "step": 78760 + }, + { + "epoch": 0.15912038365041592, + "grad_norm": 649.7140502929688, + "learning_rate": 9.89948429026912e-06, + "loss": 29.439, + "step": 78770 + }, + { + "epoch": 0.15914058428309974, + "grad_norm": 510.0090637207031, + "learning_rate": 9.8994146380322e-06, + "loss": 23.7447, + "step": 78780 + }, + { + "epoch": 0.15916078491578356, + "grad_norm": 239.9370880126953, + "learning_rate": 9.899344961916123e-06, + "loss": 23.484, + "step": 78790 + }, + { + "epoch": 0.15918098554846738, + "grad_norm": 328.5083923339844, + "learning_rate": 9.899275261921236e-06, + "loss": 15.7193, + "step": 78800 + }, + { + "epoch": 0.1592011861811512, + "grad_norm": 220.7934112548828, + "learning_rate": 9.899205538047873e-06, + "loss": 26.5226, + "step": 78810 + }, + { + "epoch": 0.15922138681383502, + "grad_norm": 187.09764099121094, + "learning_rate": 9.899135790296379e-06, + "loss": 26.8904, + "step": 78820 + }, + { + "epoch": 0.1592415874465188, + "grad_norm": 310.1947021484375, + "learning_rate": 9.89906601866709e-06, + "loss": 19.6561, + "step": 78830 + }, + { + "epoch": 0.15926178807920263, + "grad_norm": 543.0774536132812, + "learning_rate": 9.898996223160348e-06, + "loss": 26.4144, + "step": 78840 + }, + { + "epoch": 0.15928198871188645, + "grad_norm": 652.3689575195312, + "learning_rate": 9.898926403776492e-06, + "loss": 36.7921, + "step": 78850 + }, + { + "epoch": 0.15930218934457027, + "grad_norm": 535.2853393554688, + "learning_rate": 9.898856560515864e-06, + "loss": 37.836, + "step": 78860 + }, + { + "epoch": 0.1593223899772541, + "grad_norm": 407.8519592285156, + "learning_rate": 9.898786693378801e-06, + "loss": 18.7181, + "step": 78870 + }, + { + "epoch": 0.1593425906099379, + "grad_norm": 126.90837097167969, + "learning_rate": 9.898716802365648e-06, + "loss": 26.3485, + "step": 78880 + }, + { + "epoch": 0.15936279124262173, + "grad_norm": 841.9174194335938, + "learning_rate": 9.898646887476742e-06, + "loss": 41.1984, + "step": 78890 + }, + { + "epoch": 0.15938299187530552, + "grad_norm": 364.14984130859375, + "learning_rate": 9.898576948712427e-06, + "loss": 13.7308, + "step": 78900 + }, + { + "epoch": 0.15940319250798934, + "grad_norm": 1718.7718505859375, + "learning_rate": 9.89850698607304e-06, + "loss": 63.4012, + "step": 78910 + }, + { + "epoch": 0.15942339314067316, + "grad_norm": 320.6148986816406, + "learning_rate": 9.898436999558924e-06, + "loss": 30.7214, + "step": 78920 + }, + { + "epoch": 0.15944359377335698, + "grad_norm": 392.3424377441406, + "learning_rate": 9.898366989170423e-06, + "loss": 26.3176, + "step": 78930 + }, + { + "epoch": 0.1594637944060408, + "grad_norm": 726.4696044921875, + "learning_rate": 9.898296954907874e-06, + "loss": 33.6014, + "step": 78940 + }, + { + "epoch": 0.15948399503872462, + "grad_norm": 577.5730590820312, + "learning_rate": 9.898226896771619e-06, + "loss": 31.6721, + "step": 78950 + }, + { + "epoch": 0.15950419567140842, + "grad_norm": 1348.5142822265625, + "learning_rate": 9.898156814762e-06, + "loss": 24.0305, + "step": 78960 + }, + { + "epoch": 0.15952439630409224, + "grad_norm": 515.8429565429688, + "learning_rate": 9.898086708879359e-06, + "loss": 20.5706, + "step": 78970 + }, + { + "epoch": 0.15954459693677606, + "grad_norm": 455.5331115722656, + "learning_rate": 9.898016579124039e-06, + "loss": 26.9815, + "step": 78980 + }, + { + "epoch": 0.15956479756945988, + "grad_norm": 233.70321655273438, + "learning_rate": 9.897946425496379e-06, + "loss": 20.0229, + "step": 78990 + }, + { + "epoch": 0.1595849982021437, + "grad_norm": 485.7436828613281, + "learning_rate": 9.89787624799672e-06, + "loss": 17.0227, + "step": 79000 + }, + { + "epoch": 0.15960519883482752, + "grad_norm": 206.46237182617188, + "learning_rate": 9.897806046625408e-06, + "loss": 24.8934, + "step": 79010 + }, + { + "epoch": 0.1596253994675113, + "grad_norm": 575.5479125976562, + "learning_rate": 9.897735821382786e-06, + "loss": 25.1718, + "step": 79020 + }, + { + "epoch": 0.15964560010019513, + "grad_norm": 186.63671875, + "learning_rate": 9.89766557226919e-06, + "loss": 20.1794, + "step": 79030 + }, + { + "epoch": 0.15966580073287895, + "grad_norm": 283.55096435546875, + "learning_rate": 9.897595299284968e-06, + "loss": 25.0277, + "step": 79040 + }, + { + "epoch": 0.15968600136556277, + "grad_norm": 308.6726989746094, + "learning_rate": 9.897525002430459e-06, + "loss": 36.4611, + "step": 79050 + }, + { + "epoch": 0.1597062019982466, + "grad_norm": 326.8775329589844, + "learning_rate": 9.89745468170601e-06, + "loss": 21.8554, + "step": 79060 + }, + { + "epoch": 0.1597264026309304, + "grad_norm": 462.3061218261719, + "learning_rate": 9.897384337111956e-06, + "loss": 27.4429, + "step": 79070 + }, + { + "epoch": 0.15974660326361423, + "grad_norm": 472.64935302734375, + "learning_rate": 9.89731396864865e-06, + "loss": 22.6224, + "step": 79080 + }, + { + "epoch": 0.15976680389629802, + "grad_norm": 454.6688537597656, + "learning_rate": 9.897243576316426e-06, + "loss": 32.4297, + "step": 79090 + }, + { + "epoch": 0.15978700452898184, + "grad_norm": 139.9843292236328, + "learning_rate": 9.897173160115633e-06, + "loss": 19.9118, + "step": 79100 + }, + { + "epoch": 0.15980720516166566, + "grad_norm": 404.95819091796875, + "learning_rate": 9.89710272004661e-06, + "loss": 27.7085, + "step": 79110 + }, + { + "epoch": 0.15982740579434948, + "grad_norm": 372.12335205078125, + "learning_rate": 9.897032256109705e-06, + "loss": 19.7024, + "step": 79120 + }, + { + "epoch": 0.1598476064270333, + "grad_norm": 99.77437591552734, + "learning_rate": 9.896961768305255e-06, + "loss": 37.5838, + "step": 79130 + }, + { + "epoch": 0.15986780705971712, + "grad_norm": 16.08628273010254, + "learning_rate": 9.89689125663361e-06, + "loss": 15.0803, + "step": 79140 + }, + { + "epoch": 0.1598880076924009, + "grad_norm": 142.9248809814453, + "learning_rate": 9.89682072109511e-06, + "loss": 38.2178, + "step": 79150 + }, + { + "epoch": 0.15990820832508473, + "grad_norm": 1037.6297607421875, + "learning_rate": 9.8967501616901e-06, + "loss": 36.9221, + "step": 79160 + }, + { + "epoch": 0.15992840895776855, + "grad_norm": 433.88153076171875, + "learning_rate": 9.896679578418924e-06, + "loss": 46.2312, + "step": 79170 + }, + { + "epoch": 0.15994860959045237, + "grad_norm": 305.0263671875, + "learning_rate": 9.896608971281926e-06, + "loss": 14.7309, + "step": 79180 + }, + { + "epoch": 0.1599688102231362, + "grad_norm": 538.8518676757812, + "learning_rate": 9.896538340279449e-06, + "loss": 36.7984, + "step": 79190 + }, + { + "epoch": 0.15998901085582, + "grad_norm": 184.4010009765625, + "learning_rate": 9.896467685411838e-06, + "loss": 16.8615, + "step": 79200 + }, + { + "epoch": 0.16000921148850383, + "grad_norm": 405.7578125, + "learning_rate": 9.896397006679437e-06, + "loss": 26.13, + "step": 79210 + }, + { + "epoch": 0.16002941212118763, + "grad_norm": 516.3880004882812, + "learning_rate": 9.89632630408259e-06, + "loss": 22.1688, + "step": 79220 + }, + { + "epoch": 0.16004961275387145, + "grad_norm": 382.6693115234375, + "learning_rate": 9.896255577621646e-06, + "loss": 39.9827, + "step": 79230 + }, + { + "epoch": 0.16006981338655527, + "grad_norm": 166.48707580566406, + "learning_rate": 9.896184827296942e-06, + "loss": 20.4572, + "step": 79240 + }, + { + "epoch": 0.16009001401923909, + "grad_norm": 1007.0042724609375, + "learning_rate": 9.89611405310883e-06, + "loss": 37.7073, + "step": 79250 + }, + { + "epoch": 0.1601102146519229, + "grad_norm": 357.4544677734375, + "learning_rate": 9.89604325505765e-06, + "loss": 26.8339, + "step": 79260 + }, + { + "epoch": 0.16013041528460673, + "grad_norm": 221.9705810546875, + "learning_rate": 9.89597243314375e-06, + "loss": 23.2171, + "step": 79270 + }, + { + "epoch": 0.16015061591729052, + "grad_norm": 1382.4564208984375, + "learning_rate": 9.895901587367473e-06, + "loss": 34.2562, + "step": 79280 + }, + { + "epoch": 0.16017081654997434, + "grad_norm": 156.50645446777344, + "learning_rate": 9.895830717729166e-06, + "loss": 24.7322, + "step": 79290 + }, + { + "epoch": 0.16019101718265816, + "grad_norm": 394.5559997558594, + "learning_rate": 9.895759824229176e-06, + "loss": 25.0025, + "step": 79300 + }, + { + "epoch": 0.16021121781534198, + "grad_norm": 498.4718322753906, + "learning_rate": 9.895688906867844e-06, + "loss": 21.523, + "step": 79310 + }, + { + "epoch": 0.1602314184480258, + "grad_norm": 883.3118896484375, + "learning_rate": 9.89561796564552e-06, + "loss": 38.3339, + "step": 79320 + }, + { + "epoch": 0.16025161908070962, + "grad_norm": 458.0317687988281, + "learning_rate": 9.895547000562546e-06, + "loss": 20.0624, + "step": 79330 + }, + { + "epoch": 0.1602718197133934, + "grad_norm": 438.5228576660156, + "learning_rate": 9.895476011619269e-06, + "loss": 35.2709, + "step": 79340 + }, + { + "epoch": 0.16029202034607723, + "grad_norm": 300.7026062011719, + "learning_rate": 9.895404998816038e-06, + "loss": 21.5623, + "step": 79350 + }, + { + "epoch": 0.16031222097876105, + "grad_norm": 396.0194091796875, + "learning_rate": 9.895333962153195e-06, + "loss": 18.2463, + "step": 79360 + }, + { + "epoch": 0.16033242161144487, + "grad_norm": 136.9529266357422, + "learning_rate": 9.895262901631088e-06, + "loss": 26.6099, + "step": 79370 + }, + { + "epoch": 0.1603526222441287, + "grad_norm": 322.3371276855469, + "learning_rate": 9.895191817250064e-06, + "loss": 25.9525, + "step": 79380 + }, + { + "epoch": 0.1603728228768125, + "grad_norm": 151.4694366455078, + "learning_rate": 9.89512070901047e-06, + "loss": 15.6631, + "step": 79390 + }, + { + "epoch": 0.16039302350949633, + "grad_norm": 291.7964172363281, + "learning_rate": 9.89504957691265e-06, + "loss": 16.5786, + "step": 79400 + }, + { + "epoch": 0.16041322414218012, + "grad_norm": 261.4203186035156, + "learning_rate": 9.894978420956953e-06, + "loss": 14.5822, + "step": 79410 + }, + { + "epoch": 0.16043342477486394, + "grad_norm": 631.08642578125, + "learning_rate": 9.894907241143722e-06, + "loss": 21.6464, + "step": 79420 + }, + { + "epoch": 0.16045362540754776, + "grad_norm": 390.1617736816406, + "learning_rate": 9.89483603747331e-06, + "loss": 34.0947, + "step": 79430 + }, + { + "epoch": 0.16047382604023158, + "grad_norm": 240.8678741455078, + "learning_rate": 9.89476480994606e-06, + "loss": 47.7836, + "step": 79440 + }, + { + "epoch": 0.1604940266729154, + "grad_norm": 767.7398071289062, + "learning_rate": 9.894693558562319e-06, + "loss": 19.2485, + "step": 79450 + }, + { + "epoch": 0.16051422730559922, + "grad_norm": 880.9559326171875, + "learning_rate": 9.894622283322436e-06, + "loss": 28.4394, + "step": 79460 + }, + { + "epoch": 0.16053442793828301, + "grad_norm": 347.03765869140625, + "learning_rate": 9.894550984226759e-06, + "loss": 22.7121, + "step": 79470 + }, + { + "epoch": 0.16055462857096683, + "grad_norm": 497.994384765625, + "learning_rate": 9.894479661275631e-06, + "loss": 22.3754, + "step": 79480 + }, + { + "epoch": 0.16057482920365065, + "grad_norm": 418.9917297363281, + "learning_rate": 9.894408314469404e-06, + "loss": 29.0265, + "step": 79490 + }, + { + "epoch": 0.16059502983633447, + "grad_norm": 813.7963256835938, + "learning_rate": 9.894336943808426e-06, + "loss": 31.7485, + "step": 79500 + }, + { + "epoch": 0.1606152304690183, + "grad_norm": 968.6171875, + "learning_rate": 9.894265549293043e-06, + "loss": 19.6763, + "step": 79510 + }, + { + "epoch": 0.16063543110170211, + "grad_norm": 200.5768280029297, + "learning_rate": 9.894194130923602e-06, + "loss": 20.3647, + "step": 79520 + }, + { + "epoch": 0.16065563173438593, + "grad_norm": 1075.686767578125, + "learning_rate": 9.894122688700452e-06, + "loss": 49.7737, + "step": 79530 + }, + { + "epoch": 0.16067583236706973, + "grad_norm": 535.426513671875, + "learning_rate": 9.894051222623943e-06, + "loss": 16.3721, + "step": 79540 + }, + { + "epoch": 0.16069603299975355, + "grad_norm": 645.5509033203125, + "learning_rate": 9.893979732694422e-06, + "loss": 35.6814, + "step": 79550 + }, + { + "epoch": 0.16071623363243737, + "grad_norm": 494.3453063964844, + "learning_rate": 9.893908218912237e-06, + "loss": 30.173, + "step": 79560 + }, + { + "epoch": 0.1607364342651212, + "grad_norm": 543.2445068359375, + "learning_rate": 9.893836681277736e-06, + "loss": 16.8465, + "step": 79570 + }, + { + "epoch": 0.160756634897805, + "grad_norm": 445.7519226074219, + "learning_rate": 9.89376511979127e-06, + "loss": 17.2944, + "step": 79580 + }, + { + "epoch": 0.16077683553048883, + "grad_norm": 349.07275390625, + "learning_rate": 9.893693534453186e-06, + "loss": 23.8222, + "step": 79590 + }, + { + "epoch": 0.16079703616317262, + "grad_norm": 555.302001953125, + "learning_rate": 9.893621925263832e-06, + "loss": 24.3078, + "step": 79600 + }, + { + "epoch": 0.16081723679585644, + "grad_norm": 283.0042419433594, + "learning_rate": 9.89355029222356e-06, + "loss": 17.526, + "step": 79610 + }, + { + "epoch": 0.16083743742854026, + "grad_norm": 327.09649658203125, + "learning_rate": 9.893478635332716e-06, + "loss": 20.9916, + "step": 79620 + }, + { + "epoch": 0.16085763806122408, + "grad_norm": 710.9197998046875, + "learning_rate": 9.893406954591651e-06, + "loss": 27.72, + "step": 79630 + }, + { + "epoch": 0.1608778386939079, + "grad_norm": 314.8814392089844, + "learning_rate": 9.893335250000715e-06, + "loss": 15.2672, + "step": 79640 + }, + { + "epoch": 0.16089803932659172, + "grad_norm": 287.74749755859375, + "learning_rate": 9.893263521560255e-06, + "loss": 35.9944, + "step": 79650 + }, + { + "epoch": 0.1609182399592755, + "grad_norm": 526.1190185546875, + "learning_rate": 9.893191769270624e-06, + "loss": 17.8103, + "step": 79660 + }, + { + "epoch": 0.16093844059195933, + "grad_norm": 195.8773956298828, + "learning_rate": 9.893119993132167e-06, + "loss": 31.6414, + "step": 79670 + }, + { + "epoch": 0.16095864122464315, + "grad_norm": 404.8184814453125, + "learning_rate": 9.89304819314524e-06, + "loss": 29.388, + "step": 79680 + }, + { + "epoch": 0.16097884185732697, + "grad_norm": 266.7315979003906, + "learning_rate": 9.892976369310188e-06, + "loss": 18.0616, + "step": 79690 + }, + { + "epoch": 0.1609990424900108, + "grad_norm": 986.9926147460938, + "learning_rate": 9.89290452162736e-06, + "loss": 19.9799, + "step": 79700 + }, + { + "epoch": 0.1610192431226946, + "grad_norm": 558.1566772460938, + "learning_rate": 9.892832650097113e-06, + "loss": 12.9845, + "step": 79710 + }, + { + "epoch": 0.16103944375537843, + "grad_norm": 499.2138366699219, + "learning_rate": 9.89276075471979e-06, + "loss": 20.4387, + "step": 79720 + }, + { + "epoch": 0.16105964438806222, + "grad_norm": 318.83428955078125, + "learning_rate": 9.892688835495747e-06, + "loss": 37.4188, + "step": 79730 + }, + { + "epoch": 0.16107984502074604, + "grad_norm": 922.7367553710938, + "learning_rate": 9.89261689242533e-06, + "loss": 24.9707, + "step": 79740 + }, + { + "epoch": 0.16110004565342986, + "grad_norm": 407.1906433105469, + "learning_rate": 9.892544925508894e-06, + "loss": 17.5345, + "step": 79750 + }, + { + "epoch": 0.16112024628611368, + "grad_norm": 346.2884826660156, + "learning_rate": 9.892472934746784e-06, + "loss": 27.4917, + "step": 79760 + }, + { + "epoch": 0.1611404469187975, + "grad_norm": 190.8533172607422, + "learning_rate": 9.892400920139357e-06, + "loss": 36.435, + "step": 79770 + }, + { + "epoch": 0.16116064755148132, + "grad_norm": 59.34263229370117, + "learning_rate": 9.892328881686961e-06, + "loss": 14.9362, + "step": 79780 + }, + { + "epoch": 0.16118084818416512, + "grad_norm": 558.2163696289062, + "learning_rate": 9.892256819389947e-06, + "loss": 30.6207, + "step": 79790 + }, + { + "epoch": 0.16120104881684894, + "grad_norm": 229.86813354492188, + "learning_rate": 9.892184733248666e-06, + "loss": 19.285, + "step": 79800 + }, + { + "epoch": 0.16122124944953276, + "grad_norm": 801.639404296875, + "learning_rate": 9.89211262326347e-06, + "loss": 29.1693, + "step": 79810 + }, + { + "epoch": 0.16124145008221658, + "grad_norm": 560.8685302734375, + "learning_rate": 9.892040489434711e-06, + "loss": 34.4534, + "step": 79820 + }, + { + "epoch": 0.1612616507149004, + "grad_norm": 351.6724853515625, + "learning_rate": 9.89196833176274e-06, + "loss": 35.787, + "step": 79830 + }, + { + "epoch": 0.16128185134758422, + "grad_norm": 287.583984375, + "learning_rate": 9.891896150247909e-06, + "loss": 11.5023, + "step": 79840 + }, + { + "epoch": 0.16130205198026804, + "grad_norm": 695.3501586914062, + "learning_rate": 9.891823944890569e-06, + "loss": 31.0794, + "step": 79850 + }, + { + "epoch": 0.16132225261295183, + "grad_norm": 909.6913452148438, + "learning_rate": 9.891751715691071e-06, + "loss": 40.7071, + "step": 79860 + }, + { + "epoch": 0.16134245324563565, + "grad_norm": 70.47769927978516, + "learning_rate": 9.89167946264977e-06, + "loss": 15.011, + "step": 79870 + }, + { + "epoch": 0.16136265387831947, + "grad_norm": 341.2173767089844, + "learning_rate": 9.891607185767018e-06, + "loss": 29.7133, + "step": 79880 + }, + { + "epoch": 0.1613828545110033, + "grad_norm": 1443.01708984375, + "learning_rate": 9.891534885043164e-06, + "loss": 40.5355, + "step": 79890 + }, + { + "epoch": 0.1614030551436871, + "grad_norm": 685.8870239257812, + "learning_rate": 9.891462560478562e-06, + "loss": 23.2146, + "step": 79900 + }, + { + "epoch": 0.16142325577637093, + "grad_norm": 180.77066040039062, + "learning_rate": 9.891390212073566e-06, + "loss": 17.5765, + "step": 79910 + }, + { + "epoch": 0.16144345640905472, + "grad_norm": 525.450927734375, + "learning_rate": 9.891317839828527e-06, + "loss": 44.7903, + "step": 79920 + }, + { + "epoch": 0.16146365704173854, + "grad_norm": 477.0588073730469, + "learning_rate": 9.891245443743797e-06, + "loss": 46.1729, + "step": 79930 + }, + { + "epoch": 0.16148385767442236, + "grad_norm": 689.93505859375, + "learning_rate": 9.891173023819731e-06, + "loss": 36.2425, + "step": 79940 + }, + { + "epoch": 0.16150405830710618, + "grad_norm": 440.6839904785156, + "learning_rate": 9.891100580056681e-06, + "loss": 32.2712, + "step": 79950 + }, + { + "epoch": 0.16152425893979, + "grad_norm": 283.39691162109375, + "learning_rate": 9.891028112454998e-06, + "loss": 30.0116, + "step": 79960 + }, + { + "epoch": 0.16154445957247382, + "grad_norm": 135.4347381591797, + "learning_rate": 9.890955621015039e-06, + "loss": 36.9487, + "step": 79970 + }, + { + "epoch": 0.1615646602051576, + "grad_norm": 114.75359344482422, + "learning_rate": 9.890883105737156e-06, + "loss": 29.5158, + "step": 79980 + }, + { + "epoch": 0.16158486083784143, + "grad_norm": 703.3095703125, + "learning_rate": 9.890810566621702e-06, + "loss": 32.883, + "step": 79990 + }, + { + "epoch": 0.16160506147052525, + "grad_norm": 276.4881286621094, + "learning_rate": 9.890738003669029e-06, + "loss": 13.6305, + "step": 80000 + }, + { + "epoch": 0.16162526210320907, + "grad_norm": 756.2501831054688, + "learning_rate": 9.890665416879492e-06, + "loss": 30.3908, + "step": 80010 + }, + { + "epoch": 0.1616454627358929, + "grad_norm": 63.37554168701172, + "learning_rate": 9.890592806253447e-06, + "loss": 10.6785, + "step": 80020 + }, + { + "epoch": 0.1616656633685767, + "grad_norm": 669.55859375, + "learning_rate": 9.890520171791244e-06, + "loss": 27.5863, + "step": 80030 + }, + { + "epoch": 0.16168586400126053, + "grad_norm": 300.28253173828125, + "learning_rate": 9.89044751349324e-06, + "loss": 18.5517, + "step": 80040 + }, + { + "epoch": 0.16170606463394432, + "grad_norm": 853.7218627929688, + "learning_rate": 9.890374831359787e-06, + "loss": 25.1595, + "step": 80050 + }, + { + "epoch": 0.16172626526662814, + "grad_norm": 80.81767272949219, + "learning_rate": 9.89030212539124e-06, + "loss": 20.0168, + "step": 80060 + }, + { + "epoch": 0.16174646589931196, + "grad_norm": 334.1025085449219, + "learning_rate": 9.890229395587954e-06, + "loss": 21.4223, + "step": 80070 + }, + { + "epoch": 0.16176666653199578, + "grad_norm": 420.7502136230469, + "learning_rate": 9.890156641950284e-06, + "loss": 19.9016, + "step": 80080 + }, + { + "epoch": 0.1617868671646796, + "grad_norm": 312.5161437988281, + "learning_rate": 9.890083864478584e-06, + "loss": 33.7502, + "step": 80090 + }, + { + "epoch": 0.16180706779736342, + "grad_norm": 563.5485229492188, + "learning_rate": 9.890011063173207e-06, + "loss": 35.707, + "step": 80100 + }, + { + "epoch": 0.16182726843004722, + "grad_norm": 328.59368896484375, + "learning_rate": 9.889938238034509e-06, + "loss": 23.2849, + "step": 80110 + }, + { + "epoch": 0.16184746906273104, + "grad_norm": 311.5693359375, + "learning_rate": 9.889865389062845e-06, + "loss": 22.8267, + "step": 80120 + }, + { + "epoch": 0.16186766969541486, + "grad_norm": 353.86895751953125, + "learning_rate": 9.889792516258571e-06, + "loss": 21.1397, + "step": 80130 + }, + { + "epoch": 0.16188787032809868, + "grad_norm": 381.8929138183594, + "learning_rate": 9.88971961962204e-06, + "loss": 16.9112, + "step": 80140 + }, + { + "epoch": 0.1619080709607825, + "grad_norm": 780.3839721679688, + "learning_rate": 9.88964669915361e-06, + "loss": 19.9509, + "step": 80150 + }, + { + "epoch": 0.16192827159346632, + "grad_norm": 838.907470703125, + "learning_rate": 9.889573754853633e-06, + "loss": 28.3124, + "step": 80160 + }, + { + "epoch": 0.16194847222615014, + "grad_norm": 416.4563903808594, + "learning_rate": 9.889500786722471e-06, + "loss": 17.7988, + "step": 80170 + }, + { + "epoch": 0.16196867285883393, + "grad_norm": 491.28765869140625, + "learning_rate": 9.889427794760472e-06, + "loss": 17.411, + "step": 80180 + }, + { + "epoch": 0.16198887349151775, + "grad_norm": 208.65689086914062, + "learning_rate": 9.889354778967995e-06, + "loss": 18.2061, + "step": 80190 + }, + { + "epoch": 0.16200907412420157, + "grad_norm": 373.3161926269531, + "learning_rate": 9.889281739345395e-06, + "loss": 23.4229, + "step": 80200 + }, + { + "epoch": 0.1620292747568854, + "grad_norm": 343.974365234375, + "learning_rate": 9.88920867589303e-06, + "loss": 11.5185, + "step": 80210 + }, + { + "epoch": 0.1620494753895692, + "grad_norm": 240.10968017578125, + "learning_rate": 9.889135588611254e-06, + "loss": 29.3327, + "step": 80220 + }, + { + "epoch": 0.16206967602225303, + "grad_norm": 447.5523681640625, + "learning_rate": 9.889062477500425e-06, + "loss": 33.0841, + "step": 80230 + }, + { + "epoch": 0.16208987665493682, + "grad_norm": 107.09142303466797, + "learning_rate": 9.8889893425609e-06, + "loss": 20.7825, + "step": 80240 + }, + { + "epoch": 0.16211007728762064, + "grad_norm": 541.6611328125, + "learning_rate": 9.88891618379303e-06, + "loss": 35.7423, + "step": 80250 + }, + { + "epoch": 0.16213027792030446, + "grad_norm": 749.7138671875, + "learning_rate": 9.88884300119718e-06, + "loss": 29.4236, + "step": 80260 + }, + { + "epoch": 0.16215047855298828, + "grad_norm": 336.53924560546875, + "learning_rate": 9.888769794773699e-06, + "loss": 51.9439, + "step": 80270 + }, + { + "epoch": 0.1621706791856721, + "grad_norm": 143.0902557373047, + "learning_rate": 9.888696564522948e-06, + "loss": 29.0381, + "step": 80280 + }, + { + "epoch": 0.16219087981835592, + "grad_norm": 1135.908203125, + "learning_rate": 9.888623310445282e-06, + "loss": 44.4468, + "step": 80290 + }, + { + "epoch": 0.1622110804510397, + "grad_norm": 355.0585632324219, + "learning_rate": 9.88855003254106e-06, + "loss": 12.7337, + "step": 80300 + }, + { + "epoch": 0.16223128108372353, + "grad_norm": 822.2958984375, + "learning_rate": 9.88847673081064e-06, + "loss": 29.0946, + "step": 80310 + }, + { + "epoch": 0.16225148171640735, + "grad_norm": 459.4270935058594, + "learning_rate": 9.888403405254374e-06, + "loss": 31.1366, + "step": 80320 + }, + { + "epoch": 0.16227168234909117, + "grad_norm": 378.10626220703125, + "learning_rate": 9.888330055872623e-06, + "loss": 28.2985, + "step": 80330 + }, + { + "epoch": 0.162291882981775, + "grad_norm": 463.220458984375, + "learning_rate": 9.888256682665744e-06, + "loss": 38.7636, + "step": 80340 + }, + { + "epoch": 0.1623120836144588, + "grad_norm": 752.4970703125, + "learning_rate": 9.888183285634097e-06, + "loss": 23.502, + "step": 80350 + }, + { + "epoch": 0.16233228424714263, + "grad_norm": 398.5994567871094, + "learning_rate": 9.888109864778036e-06, + "loss": 19.4908, + "step": 80360 + }, + { + "epoch": 0.16235248487982643, + "grad_norm": 648.8282470703125, + "learning_rate": 9.88803642009792e-06, + "loss": 22.1598, + "step": 80370 + }, + { + "epoch": 0.16237268551251025, + "grad_norm": 574.5921020507812, + "learning_rate": 9.887962951594108e-06, + "loss": 27.2408, + "step": 80380 + }, + { + "epoch": 0.16239288614519407, + "grad_norm": 213.2351837158203, + "learning_rate": 9.887889459266957e-06, + "loss": 30.9682, + "step": 80390 + }, + { + "epoch": 0.16241308677787789, + "grad_norm": 579.56298828125, + "learning_rate": 9.887815943116827e-06, + "loss": 35.2533, + "step": 80400 + }, + { + "epoch": 0.1624332874105617, + "grad_norm": 317.9184265136719, + "learning_rate": 9.887742403144074e-06, + "loss": 21.8071, + "step": 80410 + }, + { + "epoch": 0.16245348804324553, + "grad_norm": 1311.876708984375, + "learning_rate": 9.887668839349057e-06, + "loss": 28.1026, + "step": 80420 + }, + { + "epoch": 0.16247368867592932, + "grad_norm": 919.910888671875, + "learning_rate": 9.887595251732135e-06, + "loss": 31.4523, + "step": 80430 + }, + { + "epoch": 0.16249388930861314, + "grad_norm": 147.07989501953125, + "learning_rate": 9.887521640293668e-06, + "loss": 15.9144, + "step": 80440 + }, + { + "epoch": 0.16251408994129696, + "grad_norm": 426.0009765625, + "learning_rate": 9.887448005034011e-06, + "loss": 20.6647, + "step": 80450 + }, + { + "epoch": 0.16253429057398078, + "grad_norm": 85.75631713867188, + "learning_rate": 9.887374345953526e-06, + "loss": 15.8915, + "step": 80460 + }, + { + "epoch": 0.1625544912066646, + "grad_norm": 407.2970275878906, + "learning_rate": 9.88730066305257e-06, + "loss": 22.5537, + "step": 80470 + }, + { + "epoch": 0.16257469183934842, + "grad_norm": 560.683837890625, + "learning_rate": 9.887226956331506e-06, + "loss": 26.6558, + "step": 80480 + }, + { + "epoch": 0.16259489247203224, + "grad_norm": 315.2942199707031, + "learning_rate": 9.887153225790688e-06, + "loss": 27.6426, + "step": 80490 + }, + { + "epoch": 0.16261509310471603, + "grad_norm": 193.90476989746094, + "learning_rate": 9.887079471430481e-06, + "loss": 11.5378, + "step": 80500 + }, + { + "epoch": 0.16263529373739985, + "grad_norm": 291.6756896972656, + "learning_rate": 9.88700569325124e-06, + "loss": 28.5151, + "step": 80510 + }, + { + "epoch": 0.16265549437008367, + "grad_norm": 414.41900634765625, + "learning_rate": 9.886931891253324e-06, + "loss": 12.7147, + "step": 80520 + }, + { + "epoch": 0.1626756950027675, + "grad_norm": 742.55078125, + "learning_rate": 9.886858065437097e-06, + "loss": 55.4787, + "step": 80530 + }, + { + "epoch": 0.1626958956354513, + "grad_norm": 539.345947265625, + "learning_rate": 9.886784215802915e-06, + "loss": 43.4888, + "step": 80540 + }, + { + "epoch": 0.16271609626813513, + "grad_norm": 87.77115631103516, + "learning_rate": 9.88671034235114e-06, + "loss": 23.1452, + "step": 80550 + }, + { + "epoch": 0.16273629690081892, + "grad_norm": 287.9833984375, + "learning_rate": 9.886636445082132e-06, + "loss": 29.6764, + "step": 80560 + }, + { + "epoch": 0.16275649753350274, + "grad_norm": 16.543699264526367, + "learning_rate": 9.88656252399625e-06, + "loss": 19.3727, + "step": 80570 + }, + { + "epoch": 0.16277669816618656, + "grad_norm": 58.35026550292969, + "learning_rate": 9.886488579093856e-06, + "loss": 23.0495, + "step": 80580 + }, + { + "epoch": 0.16279689879887038, + "grad_norm": 266.4010314941406, + "learning_rate": 9.886414610375309e-06, + "loss": 22.8869, + "step": 80590 + }, + { + "epoch": 0.1628170994315542, + "grad_norm": 634.8953857421875, + "learning_rate": 9.886340617840968e-06, + "loss": 22.1367, + "step": 80600 + }, + { + "epoch": 0.16283730006423802, + "grad_norm": 189.2071075439453, + "learning_rate": 9.886266601491197e-06, + "loss": 32.8526, + "step": 80610 + }, + { + "epoch": 0.16285750069692181, + "grad_norm": 0.0, + "learning_rate": 9.886192561326356e-06, + "loss": 25.8318, + "step": 80620 + }, + { + "epoch": 0.16287770132960563, + "grad_norm": 89.25527954101562, + "learning_rate": 9.886118497346804e-06, + "loss": 26.2434, + "step": 80630 + }, + { + "epoch": 0.16289790196228945, + "grad_norm": 630.5790405273438, + "learning_rate": 9.886044409552902e-06, + "loss": 22.8639, + "step": 80640 + }, + { + "epoch": 0.16291810259497327, + "grad_norm": 0.0, + "learning_rate": 9.885970297945013e-06, + "loss": 28.5499, + "step": 80650 + }, + { + "epoch": 0.1629383032276571, + "grad_norm": 559.54736328125, + "learning_rate": 9.885896162523498e-06, + "loss": 32.4067, + "step": 80660 + }, + { + "epoch": 0.16295850386034091, + "grad_norm": 158.8244171142578, + "learning_rate": 9.885822003288717e-06, + "loss": 29.8507, + "step": 80670 + }, + { + "epoch": 0.16297870449302473, + "grad_norm": 467.65557861328125, + "learning_rate": 9.885747820241032e-06, + "loss": 31.5979, + "step": 80680 + }, + { + "epoch": 0.16299890512570853, + "grad_norm": 497.6941223144531, + "learning_rate": 9.885673613380806e-06, + "loss": 15.9469, + "step": 80690 + }, + { + "epoch": 0.16301910575839235, + "grad_norm": 263.04931640625, + "learning_rate": 9.8855993827084e-06, + "loss": 15.349, + "step": 80700 + }, + { + "epoch": 0.16303930639107617, + "grad_norm": 274.8655090332031, + "learning_rate": 9.885525128224173e-06, + "loss": 20.3094, + "step": 80710 + }, + { + "epoch": 0.16305950702376, + "grad_norm": 378.819091796875, + "learning_rate": 9.885450849928489e-06, + "loss": 35.3756, + "step": 80720 + }, + { + "epoch": 0.1630797076564438, + "grad_norm": 501.4461364746094, + "learning_rate": 9.885376547821711e-06, + "loss": 22.8929, + "step": 80730 + }, + { + "epoch": 0.16309990828912763, + "grad_norm": 536.461669921875, + "learning_rate": 9.885302221904201e-06, + "loss": 15.1673, + "step": 80740 + }, + { + "epoch": 0.16312010892181142, + "grad_norm": 455.5689697265625, + "learning_rate": 9.88522787217632e-06, + "loss": 34.1152, + "step": 80750 + }, + { + "epoch": 0.16314030955449524, + "grad_norm": 263.53814697265625, + "learning_rate": 9.88515349863843e-06, + "loss": 35.1738, + "step": 80760 + }, + { + "epoch": 0.16316051018717906, + "grad_norm": 345.0772705078125, + "learning_rate": 9.885079101290894e-06, + "loss": 20.5357, + "step": 80770 + }, + { + "epoch": 0.16318071081986288, + "grad_norm": 328.6314697265625, + "learning_rate": 9.885004680134075e-06, + "loss": 18.8966, + "step": 80780 + }, + { + "epoch": 0.1632009114525467, + "grad_norm": 366.3641662597656, + "learning_rate": 9.884930235168338e-06, + "loss": 48.2096, + "step": 80790 + }, + { + "epoch": 0.16322111208523052, + "grad_norm": 729.0635375976562, + "learning_rate": 9.884855766394041e-06, + "loss": 37.265, + "step": 80800 + }, + { + "epoch": 0.16324131271791434, + "grad_norm": 574.7216796875, + "learning_rate": 9.88478127381155e-06, + "loss": 20.4683, + "step": 80810 + }, + { + "epoch": 0.16326151335059813, + "grad_norm": 457.9243469238281, + "learning_rate": 9.884706757421229e-06, + "loss": 17.3267, + "step": 80820 + }, + { + "epoch": 0.16328171398328195, + "grad_norm": 305.1440734863281, + "learning_rate": 9.884632217223438e-06, + "loss": 28.1274, + "step": 80830 + }, + { + "epoch": 0.16330191461596577, + "grad_norm": 612.5533447265625, + "learning_rate": 9.884557653218544e-06, + "loss": 32.4094, + "step": 80840 + }, + { + "epoch": 0.1633221152486496, + "grad_norm": 1437.5220947265625, + "learning_rate": 9.884483065406905e-06, + "loss": 26.99, + "step": 80850 + }, + { + "epoch": 0.1633423158813334, + "grad_norm": 757.4486083984375, + "learning_rate": 9.88440845378889e-06, + "loss": 33.4293, + "step": 80860 + }, + { + "epoch": 0.16336251651401723, + "grad_norm": 713.0037841796875, + "learning_rate": 9.884333818364861e-06, + "loss": 32.334, + "step": 80870 + }, + { + "epoch": 0.16338271714670102, + "grad_norm": 450.5823669433594, + "learning_rate": 9.88425915913518e-06, + "loss": 29.5672, + "step": 80880 + }, + { + "epoch": 0.16340291777938484, + "grad_norm": 496.3887023925781, + "learning_rate": 9.884184476100215e-06, + "loss": 20.1971, + "step": 80890 + }, + { + "epoch": 0.16342311841206866, + "grad_norm": 98.01220703125, + "learning_rate": 9.884109769260326e-06, + "loss": 23.402, + "step": 80900 + }, + { + "epoch": 0.16344331904475248, + "grad_norm": 253.8908233642578, + "learning_rate": 9.884035038615876e-06, + "loss": 15.4084, + "step": 80910 + }, + { + "epoch": 0.1634635196774363, + "grad_norm": 272.7874450683594, + "learning_rate": 9.883960284167234e-06, + "loss": 25.7597, + "step": 80920 + }, + { + "epoch": 0.16348372031012012, + "grad_norm": 736.2428588867188, + "learning_rate": 9.88388550591476e-06, + "loss": 21.9517, + "step": 80930 + }, + { + "epoch": 0.16350392094280392, + "grad_norm": 453.32965087890625, + "learning_rate": 9.883810703858823e-06, + "loss": 21.7236, + "step": 80940 + }, + { + "epoch": 0.16352412157548774, + "grad_norm": 694.5930786132812, + "learning_rate": 9.883735877999785e-06, + "loss": 32.9681, + "step": 80950 + }, + { + "epoch": 0.16354432220817156, + "grad_norm": 423.9303894042969, + "learning_rate": 9.883661028338009e-06, + "loss": 21.3099, + "step": 80960 + }, + { + "epoch": 0.16356452284085538, + "grad_norm": 305.148681640625, + "learning_rate": 9.88358615487386e-06, + "loss": 20.2351, + "step": 80970 + }, + { + "epoch": 0.1635847234735392, + "grad_norm": 310.5696105957031, + "learning_rate": 9.883511257607708e-06, + "loss": 25.3677, + "step": 80980 + }, + { + "epoch": 0.16360492410622302, + "grad_norm": 546.5814208984375, + "learning_rate": 9.883436336539913e-06, + "loss": 33.1325, + "step": 80990 + }, + { + "epoch": 0.16362512473890684, + "grad_norm": 1494.5396728515625, + "learning_rate": 9.883361391670841e-06, + "loss": 46.9954, + "step": 81000 + }, + { + "epoch": 0.16364532537159063, + "grad_norm": 241.7465362548828, + "learning_rate": 9.883286423000857e-06, + "loss": 10.2596, + "step": 81010 + }, + { + "epoch": 0.16366552600427445, + "grad_norm": 234.1220245361328, + "learning_rate": 9.883211430530329e-06, + "loss": 14.028, + "step": 81020 + }, + { + "epoch": 0.16368572663695827, + "grad_norm": 305.42034912109375, + "learning_rate": 9.88313641425962e-06, + "loss": 30.106, + "step": 81030 + }, + { + "epoch": 0.1637059272696421, + "grad_norm": 110.0243148803711, + "learning_rate": 9.883061374189095e-06, + "loss": 31.9587, + "step": 81040 + }, + { + "epoch": 0.1637261279023259, + "grad_norm": 0.0, + "learning_rate": 9.882986310319124e-06, + "loss": 32.0352, + "step": 81050 + }, + { + "epoch": 0.16374632853500973, + "grad_norm": 217.22923278808594, + "learning_rate": 9.882911222650069e-06, + "loss": 18.9343, + "step": 81060 + }, + { + "epoch": 0.16376652916769352, + "grad_norm": 297.120361328125, + "learning_rate": 9.882836111182295e-06, + "loss": 26.9097, + "step": 81070 + }, + { + "epoch": 0.16378672980037734, + "grad_norm": 95.64879608154297, + "learning_rate": 9.882760975916173e-06, + "loss": 16.0606, + "step": 81080 + }, + { + "epoch": 0.16380693043306116, + "grad_norm": 724.5462036132812, + "learning_rate": 9.882685816852064e-06, + "loss": 32.3865, + "step": 81090 + }, + { + "epoch": 0.16382713106574498, + "grad_norm": 648.523681640625, + "learning_rate": 9.882610633990337e-06, + "loss": 27.8499, + "step": 81100 + }, + { + "epoch": 0.1638473316984288, + "grad_norm": 352.72210693359375, + "learning_rate": 9.882535427331357e-06, + "loss": 17.0921, + "step": 81110 + }, + { + "epoch": 0.16386753233111262, + "grad_norm": 106.14461517333984, + "learning_rate": 9.882460196875495e-06, + "loss": 37.6977, + "step": 81120 + }, + { + "epoch": 0.16388773296379644, + "grad_norm": 161.5916748046875, + "learning_rate": 9.88238494262311e-06, + "loss": 30.4489, + "step": 81130 + }, + { + "epoch": 0.16390793359648023, + "grad_norm": 852.1343383789062, + "learning_rate": 9.882309664574576e-06, + "loss": 18.6942, + "step": 81140 + }, + { + "epoch": 0.16392813422916405, + "grad_norm": 269.9232177734375, + "learning_rate": 9.882234362730255e-06, + "loss": 16.7037, + "step": 81150 + }, + { + "epoch": 0.16394833486184787, + "grad_norm": 572.7348022460938, + "learning_rate": 9.882159037090517e-06, + "loss": 22.5891, + "step": 81160 + }, + { + "epoch": 0.1639685354945317, + "grad_norm": 443.86895751953125, + "learning_rate": 9.882083687655728e-06, + "loss": 27.8898, + "step": 81170 + }, + { + "epoch": 0.1639887361272155, + "grad_norm": 812.183837890625, + "learning_rate": 9.882008314426253e-06, + "loss": 39.9703, + "step": 81180 + }, + { + "epoch": 0.16400893675989933, + "grad_norm": 632.869140625, + "learning_rate": 9.881932917402464e-06, + "loss": 19.4012, + "step": 81190 + }, + { + "epoch": 0.16402913739258312, + "grad_norm": 374.23455810546875, + "learning_rate": 9.881857496584726e-06, + "loss": 24.5433, + "step": 81200 + }, + { + "epoch": 0.16404933802526694, + "grad_norm": 323.980712890625, + "learning_rate": 9.881782051973405e-06, + "loss": 17.1854, + "step": 81210 + }, + { + "epoch": 0.16406953865795076, + "grad_norm": 148.46449279785156, + "learning_rate": 9.88170658356887e-06, + "loss": 35.3014, + "step": 81220 + }, + { + "epoch": 0.16408973929063458, + "grad_norm": 500.7608947753906, + "learning_rate": 9.881631091371492e-06, + "loss": 29.5488, + "step": 81230 + }, + { + "epoch": 0.1641099399233184, + "grad_norm": 1187.115966796875, + "learning_rate": 9.881555575381635e-06, + "loss": 27.2644, + "step": 81240 + }, + { + "epoch": 0.16413014055600222, + "grad_norm": 544.7838134765625, + "learning_rate": 9.881480035599667e-06, + "loss": 43.3424, + "step": 81250 + }, + { + "epoch": 0.16415034118868602, + "grad_norm": 867.3125610351562, + "learning_rate": 9.88140447202596e-06, + "loss": 34.9676, + "step": 81260 + }, + { + "epoch": 0.16417054182136984, + "grad_norm": 1001.7696533203125, + "learning_rate": 9.881328884660876e-06, + "loss": 22.9481, + "step": 81270 + }, + { + "epoch": 0.16419074245405366, + "grad_norm": 254.9727020263672, + "learning_rate": 9.88125327350479e-06, + "loss": 25.8817, + "step": 81280 + }, + { + "epoch": 0.16421094308673748, + "grad_norm": 839.4126586914062, + "learning_rate": 9.881177638558066e-06, + "loss": 29.3466, + "step": 81290 + }, + { + "epoch": 0.1642311437194213, + "grad_norm": 311.6737060546875, + "learning_rate": 9.881101979821075e-06, + "loss": 42.4605, + "step": 81300 + }, + { + "epoch": 0.16425134435210512, + "grad_norm": 202.302001953125, + "learning_rate": 9.881026297294185e-06, + "loss": 38.1463, + "step": 81310 + }, + { + "epoch": 0.16427154498478894, + "grad_norm": 401.0799560546875, + "learning_rate": 9.880950590977764e-06, + "loss": 19.8945, + "step": 81320 + }, + { + "epoch": 0.16429174561747273, + "grad_norm": 120.13053131103516, + "learning_rate": 9.880874860872183e-06, + "loss": 9.1085, + "step": 81330 + }, + { + "epoch": 0.16431194625015655, + "grad_norm": 621.6092529296875, + "learning_rate": 9.88079910697781e-06, + "loss": 21.6557, + "step": 81340 + }, + { + "epoch": 0.16433214688284037, + "grad_norm": 434.39892578125, + "learning_rate": 9.880723329295012e-06, + "loss": 20.388, + "step": 81350 + }, + { + "epoch": 0.1643523475155242, + "grad_norm": 413.6324768066406, + "learning_rate": 9.880647527824161e-06, + "loss": 19.689, + "step": 81360 + }, + { + "epoch": 0.164372548148208, + "grad_norm": 903.112548828125, + "learning_rate": 9.880571702565627e-06, + "loss": 37.867, + "step": 81370 + }, + { + "epoch": 0.16439274878089183, + "grad_norm": 723.8510131835938, + "learning_rate": 9.880495853519777e-06, + "loss": 24.6308, + "step": 81380 + }, + { + "epoch": 0.16441294941357562, + "grad_norm": 371.6813659667969, + "learning_rate": 9.880419980686986e-06, + "loss": 24.4824, + "step": 81390 + }, + { + "epoch": 0.16443315004625944, + "grad_norm": 267.4392395019531, + "learning_rate": 9.880344084067616e-06, + "loss": 37.8947, + "step": 81400 + }, + { + "epoch": 0.16445335067894326, + "grad_norm": 477.8439636230469, + "learning_rate": 9.880268163662043e-06, + "loss": 24.8996, + "step": 81410 + }, + { + "epoch": 0.16447355131162708, + "grad_norm": 916.473388671875, + "learning_rate": 9.880192219470633e-06, + "loss": 31.2848, + "step": 81420 + }, + { + "epoch": 0.1644937519443109, + "grad_norm": 298.04296875, + "learning_rate": 9.88011625149376e-06, + "loss": 17.4741, + "step": 81430 + }, + { + "epoch": 0.16451395257699472, + "grad_norm": 724.1027221679688, + "learning_rate": 9.88004025973179e-06, + "loss": 25.3127, + "step": 81440 + }, + { + "epoch": 0.16453415320967854, + "grad_norm": 766.1636352539062, + "learning_rate": 9.879964244185098e-06, + "loss": 26.2444, + "step": 81450 + }, + { + "epoch": 0.16455435384236233, + "grad_norm": 117.17510223388672, + "learning_rate": 9.87988820485405e-06, + "loss": 35.3773, + "step": 81460 + }, + { + "epoch": 0.16457455447504615, + "grad_norm": 576.5032958984375, + "learning_rate": 9.87981214173902e-06, + "loss": 18.0257, + "step": 81470 + }, + { + "epoch": 0.16459475510772997, + "grad_norm": 90.32857513427734, + "learning_rate": 9.879736054840377e-06, + "loss": 53.1879, + "step": 81480 + }, + { + "epoch": 0.1646149557404138, + "grad_norm": 663.2149658203125, + "learning_rate": 9.879659944158493e-06, + "loss": 25.8617, + "step": 81490 + }, + { + "epoch": 0.1646351563730976, + "grad_norm": 1498.1995849609375, + "learning_rate": 9.879583809693737e-06, + "loss": 32.2337, + "step": 81500 + }, + { + "epoch": 0.16465535700578143, + "grad_norm": 509.119384765625, + "learning_rate": 9.879507651446482e-06, + "loss": 30.5044, + "step": 81510 + }, + { + "epoch": 0.16467555763846523, + "grad_norm": 205.88491821289062, + "learning_rate": 9.8794314694171e-06, + "loss": 22.2007, + "step": 81520 + }, + { + "epoch": 0.16469575827114905, + "grad_norm": 599.6121215820312, + "learning_rate": 9.879355263605958e-06, + "loss": 19.7081, + "step": 81530 + }, + { + "epoch": 0.16471595890383287, + "grad_norm": 666.6627807617188, + "learning_rate": 9.879279034013434e-06, + "loss": 18.6181, + "step": 81540 + }, + { + "epoch": 0.16473615953651669, + "grad_norm": 0.0, + "learning_rate": 9.879202780639893e-06, + "loss": 14.5669, + "step": 81550 + }, + { + "epoch": 0.1647563601692005, + "grad_norm": 116.70372009277344, + "learning_rate": 9.879126503485709e-06, + "loss": 31.6664, + "step": 81560 + }, + { + "epoch": 0.16477656080188433, + "grad_norm": 255.89442443847656, + "learning_rate": 9.879050202551256e-06, + "loss": 29.5966, + "step": 81570 + }, + { + "epoch": 0.16479676143456812, + "grad_norm": 287.06884765625, + "learning_rate": 9.878973877836902e-06, + "loss": 27.2852, + "step": 81580 + }, + { + "epoch": 0.16481696206725194, + "grad_norm": 426.3962707519531, + "learning_rate": 9.878897529343023e-06, + "loss": 11.7752, + "step": 81590 + }, + { + "epoch": 0.16483716269993576, + "grad_norm": 426.3056335449219, + "learning_rate": 9.878821157069988e-06, + "loss": 21.1229, + "step": 81600 + }, + { + "epoch": 0.16485736333261958, + "grad_norm": 374.24920654296875, + "learning_rate": 9.87874476101817e-06, + "loss": 34.8238, + "step": 81610 + }, + { + "epoch": 0.1648775639653034, + "grad_norm": 382.10540771484375, + "learning_rate": 9.878668341187944e-06, + "loss": 32.4983, + "step": 81620 + }, + { + "epoch": 0.16489776459798722, + "grad_norm": 496.6434020996094, + "learning_rate": 9.878591897579678e-06, + "loss": 28.0837, + "step": 81630 + }, + { + "epoch": 0.16491796523067104, + "grad_norm": 538.9864501953125, + "learning_rate": 9.87851543019375e-06, + "loss": 40.5745, + "step": 81640 + }, + { + "epoch": 0.16493816586335483, + "grad_norm": 407.2173156738281, + "learning_rate": 9.878438939030526e-06, + "loss": 18.182, + "step": 81650 + }, + { + "epoch": 0.16495836649603865, + "grad_norm": 507.8926086425781, + "learning_rate": 9.878362424090384e-06, + "loss": 36.1084, + "step": 81660 + }, + { + "epoch": 0.16497856712872247, + "grad_norm": 423.6439514160156, + "learning_rate": 9.878285885373693e-06, + "loss": 26.5332, + "step": 81670 + }, + { + "epoch": 0.1649987677614063, + "grad_norm": 518.3607788085938, + "learning_rate": 9.87820932288083e-06, + "loss": 21.0788, + "step": 81680 + }, + { + "epoch": 0.1650189683940901, + "grad_norm": 309.38507080078125, + "learning_rate": 9.878132736612167e-06, + "loss": 19.2194, + "step": 81690 + }, + { + "epoch": 0.16503916902677393, + "grad_norm": 469.4150390625, + "learning_rate": 9.878056126568077e-06, + "loss": 14.9641, + "step": 81700 + }, + { + "epoch": 0.16505936965945772, + "grad_norm": 728.9360961914062, + "learning_rate": 9.87797949274893e-06, + "loss": 27.921, + "step": 81710 + }, + { + "epoch": 0.16507957029214154, + "grad_norm": 347.7802734375, + "learning_rate": 9.877902835155105e-06, + "loss": 23.0371, + "step": 81720 + }, + { + "epoch": 0.16509977092482536, + "grad_norm": 257.8112487792969, + "learning_rate": 9.877826153786973e-06, + "loss": 19.6124, + "step": 81730 + }, + { + "epoch": 0.16511997155750918, + "grad_norm": 758.5030517578125, + "learning_rate": 9.877749448644908e-06, + "loss": 15.3159, + "step": 81740 + }, + { + "epoch": 0.165140172190193, + "grad_norm": 244.9395294189453, + "learning_rate": 9.877672719729283e-06, + "loss": 25.8874, + "step": 81750 + }, + { + "epoch": 0.16516037282287682, + "grad_norm": 468.4497985839844, + "learning_rate": 9.877595967040475e-06, + "loss": 24.9234, + "step": 81760 + }, + { + "epoch": 0.16518057345556064, + "grad_norm": 390.4100341796875, + "learning_rate": 9.877519190578852e-06, + "loss": 22.5945, + "step": 81770 + }, + { + "epoch": 0.16520077408824443, + "grad_norm": 483.197265625, + "learning_rate": 9.877442390344796e-06, + "loss": 39.905, + "step": 81780 + }, + { + "epoch": 0.16522097472092825, + "grad_norm": 181.7025146484375, + "learning_rate": 9.877365566338675e-06, + "loss": 18.6734, + "step": 81790 + }, + { + "epoch": 0.16524117535361207, + "grad_norm": 202.7747039794922, + "learning_rate": 9.877288718560866e-06, + "loss": 12.8252, + "step": 81800 + }, + { + "epoch": 0.1652613759862959, + "grad_norm": 253.33004760742188, + "learning_rate": 9.877211847011744e-06, + "loss": 23.8899, + "step": 81810 + }, + { + "epoch": 0.16528157661897971, + "grad_norm": 387.10028076171875, + "learning_rate": 9.877134951691683e-06, + "loss": 34.4809, + "step": 81820 + }, + { + "epoch": 0.16530177725166353, + "grad_norm": 625.840576171875, + "learning_rate": 9.877058032601057e-06, + "loss": 44.1464, + "step": 81830 + }, + { + "epoch": 0.16532197788434733, + "grad_norm": 832.5033569335938, + "learning_rate": 9.876981089740242e-06, + "loss": 29.491, + "step": 81840 + }, + { + "epoch": 0.16534217851703115, + "grad_norm": 116.0150375366211, + "learning_rate": 9.876904123109613e-06, + "loss": 36.1661, + "step": 81850 + }, + { + "epoch": 0.16536237914971497, + "grad_norm": 0.0, + "learning_rate": 9.876827132709545e-06, + "loss": 16.6707, + "step": 81860 + }, + { + "epoch": 0.1653825797823988, + "grad_norm": 1153.5323486328125, + "learning_rate": 9.876750118540413e-06, + "loss": 36.7919, + "step": 81870 + }, + { + "epoch": 0.1654027804150826, + "grad_norm": 457.7942810058594, + "learning_rate": 9.87667308060259e-06, + "loss": 26.9531, + "step": 81880 + }, + { + "epoch": 0.16542298104776643, + "grad_norm": 236.09178161621094, + "learning_rate": 9.876596018896457e-06, + "loss": 17.4196, + "step": 81890 + }, + { + "epoch": 0.16544318168045022, + "grad_norm": 166.97422790527344, + "learning_rate": 9.876518933422385e-06, + "loss": 18.2988, + "step": 81900 + }, + { + "epoch": 0.16546338231313404, + "grad_norm": 558.3728637695312, + "learning_rate": 9.876441824180752e-06, + "loss": 24.043, + "step": 81910 + }, + { + "epoch": 0.16548358294581786, + "grad_norm": 605.8748779296875, + "learning_rate": 9.876364691171933e-06, + "loss": 23.0312, + "step": 81920 + }, + { + "epoch": 0.16550378357850168, + "grad_norm": 450.554931640625, + "learning_rate": 9.876287534396304e-06, + "loss": 22.7319, + "step": 81930 + }, + { + "epoch": 0.1655239842111855, + "grad_norm": 1077.175537109375, + "learning_rate": 9.876210353854239e-06, + "loss": 18.6651, + "step": 81940 + }, + { + "epoch": 0.16554418484386932, + "grad_norm": 187.02159118652344, + "learning_rate": 9.876133149546117e-06, + "loss": 11.5557, + "step": 81950 + }, + { + "epoch": 0.16556438547655314, + "grad_norm": 768.2868041992188, + "learning_rate": 9.876055921472316e-06, + "loss": 33.242, + "step": 81960 + }, + { + "epoch": 0.16558458610923693, + "grad_norm": 138.95401000976562, + "learning_rate": 9.875978669633206e-06, + "loss": 13.3355, + "step": 81970 + }, + { + "epoch": 0.16560478674192075, + "grad_norm": 640.6097412109375, + "learning_rate": 9.87590139402917e-06, + "loss": 25.534, + "step": 81980 + }, + { + "epoch": 0.16562498737460457, + "grad_norm": 97.48204803466797, + "learning_rate": 9.87582409466058e-06, + "loss": 16.6026, + "step": 81990 + }, + { + "epoch": 0.1656451880072884, + "grad_norm": 296.3551330566406, + "learning_rate": 9.875746771527817e-06, + "loss": 26.2501, + "step": 82000 + }, + { + "epoch": 0.1656653886399722, + "grad_norm": 633.5289306640625, + "learning_rate": 9.875669424631255e-06, + "loss": 17.8387, + "step": 82010 + }, + { + "epoch": 0.16568558927265603, + "grad_norm": 295.5682373046875, + "learning_rate": 9.87559205397127e-06, + "loss": 23.7444, + "step": 82020 + }, + { + "epoch": 0.16570578990533982, + "grad_norm": 200.01937866210938, + "learning_rate": 9.875514659548243e-06, + "loss": 27.2707, + "step": 82030 + }, + { + "epoch": 0.16572599053802364, + "grad_norm": 190.04364013671875, + "learning_rate": 9.875437241362546e-06, + "loss": 16.2927, + "step": 82040 + }, + { + "epoch": 0.16574619117070746, + "grad_norm": 53.213069915771484, + "learning_rate": 9.87535979941456e-06, + "loss": 5.1306, + "step": 82050 + }, + { + "epoch": 0.16576639180339128, + "grad_norm": 455.74365234375, + "learning_rate": 9.875282333704665e-06, + "loss": 38.2995, + "step": 82060 + }, + { + "epoch": 0.1657865924360751, + "grad_norm": 678.8593139648438, + "learning_rate": 9.875204844233231e-06, + "loss": 34.142, + "step": 82070 + }, + { + "epoch": 0.16580679306875892, + "grad_norm": 179.88424682617188, + "learning_rate": 9.875127331000642e-06, + "loss": 20.9054, + "step": 82080 + }, + { + "epoch": 0.16582699370144272, + "grad_norm": 832.3750610351562, + "learning_rate": 9.875049794007274e-06, + "loss": 48.829, + "step": 82090 + }, + { + "epoch": 0.16584719433412654, + "grad_norm": 434.0812072753906, + "learning_rate": 9.874972233253503e-06, + "loss": 27.954, + "step": 82100 + }, + { + "epoch": 0.16586739496681036, + "grad_norm": 407.52557373046875, + "learning_rate": 9.87489464873971e-06, + "loss": 7.0023, + "step": 82110 + }, + { + "epoch": 0.16588759559949418, + "grad_norm": 364.2360534667969, + "learning_rate": 9.874817040466271e-06, + "loss": 31.6499, + "step": 82120 + }, + { + "epoch": 0.165907796232178, + "grad_norm": 694.1449584960938, + "learning_rate": 9.874739408433565e-06, + "loss": 23.6916, + "step": 82130 + }, + { + "epoch": 0.16592799686486182, + "grad_norm": 261.3028869628906, + "learning_rate": 9.87466175264197e-06, + "loss": 34.8582, + "step": 82140 + }, + { + "epoch": 0.16594819749754564, + "grad_norm": 430.9696044921875, + "learning_rate": 9.874584073091867e-06, + "loss": 30.5337, + "step": 82150 + }, + { + "epoch": 0.16596839813022943, + "grad_norm": 384.80712890625, + "learning_rate": 9.874506369783629e-06, + "loss": 17.2107, + "step": 82160 + }, + { + "epoch": 0.16598859876291325, + "grad_norm": 303.382568359375, + "learning_rate": 9.874428642717641e-06, + "loss": 26.3623, + "step": 82170 + }, + { + "epoch": 0.16600879939559707, + "grad_norm": 392.7908630371094, + "learning_rate": 9.874350891894278e-06, + "loss": 28.234, + "step": 82180 + }, + { + "epoch": 0.1660290000282809, + "grad_norm": 172.56654357910156, + "learning_rate": 9.87427311731392e-06, + "loss": 23.7759, + "step": 82190 + }, + { + "epoch": 0.1660492006609647, + "grad_norm": 345.6696472167969, + "learning_rate": 9.874195318976945e-06, + "loss": 19.3664, + "step": 82200 + }, + { + "epoch": 0.16606940129364853, + "grad_norm": 517.3274536132812, + "learning_rate": 9.874117496883734e-06, + "loss": 21.3027, + "step": 82210 + }, + { + "epoch": 0.16608960192633232, + "grad_norm": 373.7959289550781, + "learning_rate": 9.874039651034665e-06, + "loss": 8.4066, + "step": 82220 + }, + { + "epoch": 0.16610980255901614, + "grad_norm": 1242.808349609375, + "learning_rate": 9.873961781430119e-06, + "loss": 41.4418, + "step": 82230 + }, + { + "epoch": 0.16613000319169996, + "grad_norm": 491.39044189453125, + "learning_rate": 9.873883888070474e-06, + "loss": 38.7835, + "step": 82240 + }, + { + "epoch": 0.16615020382438378, + "grad_norm": 216.55055236816406, + "learning_rate": 9.87380597095611e-06, + "loss": 10.1805, + "step": 82250 + }, + { + "epoch": 0.1661704044570676, + "grad_norm": 617.9553833007812, + "learning_rate": 9.873728030087406e-06, + "loss": 18.4007, + "step": 82260 + }, + { + "epoch": 0.16619060508975142, + "grad_norm": 355.6474304199219, + "learning_rate": 9.873650065464744e-06, + "loss": 15.7755, + "step": 82270 + }, + { + "epoch": 0.16621080572243524, + "grad_norm": 379.7162170410156, + "learning_rate": 9.873572077088502e-06, + "loss": 22.0875, + "step": 82280 + }, + { + "epoch": 0.16623100635511903, + "grad_norm": 303.56890869140625, + "learning_rate": 9.87349406495906e-06, + "loss": 28.2835, + "step": 82290 + }, + { + "epoch": 0.16625120698780285, + "grad_norm": 4.648748397827148, + "learning_rate": 9.873416029076801e-06, + "loss": 28.6749, + "step": 82300 + }, + { + "epoch": 0.16627140762048667, + "grad_norm": 34.346946716308594, + "learning_rate": 9.873337969442102e-06, + "loss": 28.4306, + "step": 82310 + }, + { + "epoch": 0.1662916082531705, + "grad_norm": 302.66461181640625, + "learning_rate": 9.873259886055344e-06, + "loss": 31.9182, + "step": 82320 + }, + { + "epoch": 0.1663118088858543, + "grad_norm": 184.1270294189453, + "learning_rate": 9.873181778916911e-06, + "loss": 23.1773, + "step": 82330 + }, + { + "epoch": 0.16633200951853813, + "grad_norm": 417.1488037109375, + "learning_rate": 9.873103648027178e-06, + "loss": 16.6926, + "step": 82340 + }, + { + "epoch": 0.16635221015122192, + "grad_norm": 770.2700805664062, + "learning_rate": 9.873025493386531e-06, + "loss": 28.0675, + "step": 82350 + }, + { + "epoch": 0.16637241078390574, + "grad_norm": 227.6401824951172, + "learning_rate": 9.872947314995348e-06, + "loss": 39.1034, + "step": 82360 + }, + { + "epoch": 0.16639261141658956, + "grad_norm": 410.4468078613281, + "learning_rate": 9.872869112854011e-06, + "loss": 18.9919, + "step": 82370 + }, + { + "epoch": 0.16641281204927338, + "grad_norm": 435.20819091796875, + "learning_rate": 9.872790886962901e-06, + "loss": 28.2852, + "step": 82380 + }, + { + "epoch": 0.1664330126819572, + "grad_norm": 421.23089599609375, + "learning_rate": 9.8727126373224e-06, + "loss": 32.7884, + "step": 82390 + }, + { + "epoch": 0.16645321331464102, + "grad_norm": 1861.697265625, + "learning_rate": 9.872634363932887e-06, + "loss": 36.1657, + "step": 82400 + }, + { + "epoch": 0.16647341394732482, + "grad_norm": 545.474609375, + "learning_rate": 9.872556066794745e-06, + "loss": 38.8971, + "step": 82410 + }, + { + "epoch": 0.16649361458000864, + "grad_norm": 429.2677001953125, + "learning_rate": 9.872477745908356e-06, + "loss": 25.0905, + "step": 82420 + }, + { + "epoch": 0.16651381521269246, + "grad_norm": 464.4039306640625, + "learning_rate": 9.872399401274103e-06, + "loss": 19.8081, + "step": 82430 + }, + { + "epoch": 0.16653401584537628, + "grad_norm": 215.3997344970703, + "learning_rate": 9.872321032892364e-06, + "loss": 27.133, + "step": 82440 + }, + { + "epoch": 0.1665542164780601, + "grad_norm": 328.7926330566406, + "learning_rate": 9.872242640763525e-06, + "loss": 18.479, + "step": 82450 + }, + { + "epoch": 0.16657441711074392, + "grad_norm": 259.94964599609375, + "learning_rate": 9.872164224887966e-06, + "loss": 19.8865, + "step": 82460 + }, + { + "epoch": 0.16659461774342774, + "grad_norm": 429.4085388183594, + "learning_rate": 9.872085785266069e-06, + "loss": 28.6425, + "step": 82470 + }, + { + "epoch": 0.16661481837611153, + "grad_norm": 427.0818786621094, + "learning_rate": 9.872007321898218e-06, + "loss": 21.0934, + "step": 82480 + }, + { + "epoch": 0.16663501900879535, + "grad_norm": 203.4080810546875, + "learning_rate": 9.871928834784793e-06, + "loss": 26.3094, + "step": 82490 + }, + { + "epoch": 0.16665521964147917, + "grad_norm": 421.71295166015625, + "learning_rate": 9.871850323926178e-06, + "loss": 20.1551, + "step": 82500 + }, + { + "epoch": 0.166675420274163, + "grad_norm": 173.3473663330078, + "learning_rate": 9.871771789322754e-06, + "loss": 27.9404, + "step": 82510 + }, + { + "epoch": 0.1666956209068468, + "grad_norm": 32.80507278442383, + "learning_rate": 9.871693230974907e-06, + "loss": 19.7537, + "step": 82520 + }, + { + "epoch": 0.16671582153953063, + "grad_norm": 810.5501708984375, + "learning_rate": 9.871614648883017e-06, + "loss": 14.9206, + "step": 82530 + }, + { + "epoch": 0.16673602217221442, + "grad_norm": 326.1086120605469, + "learning_rate": 9.87153604304747e-06, + "loss": 28.2606, + "step": 82540 + }, + { + "epoch": 0.16675622280489824, + "grad_norm": 569.2630004882812, + "learning_rate": 9.871457413468645e-06, + "loss": 81.8506, + "step": 82550 + }, + { + "epoch": 0.16677642343758206, + "grad_norm": 306.8781433105469, + "learning_rate": 9.871378760146928e-06, + "loss": 36.3098, + "step": 82560 + }, + { + "epoch": 0.16679662407026588, + "grad_norm": 834.5847778320312, + "learning_rate": 9.871300083082702e-06, + "loss": 31.8916, + "step": 82570 + }, + { + "epoch": 0.1668168247029497, + "grad_norm": 424.6417541503906, + "learning_rate": 9.87122138227635e-06, + "loss": 24.0622, + "step": 82580 + }, + { + "epoch": 0.16683702533563352, + "grad_norm": 218.0953826904297, + "learning_rate": 9.871142657728257e-06, + "loss": 34.0845, + "step": 82590 + }, + { + "epoch": 0.16685722596831734, + "grad_norm": 509.2384338378906, + "learning_rate": 9.871063909438803e-06, + "loss": 32.4494, + "step": 82600 + }, + { + "epoch": 0.16687742660100113, + "grad_norm": 303.0262756347656, + "learning_rate": 9.870985137408375e-06, + "loss": 20.673, + "step": 82610 + }, + { + "epoch": 0.16689762723368495, + "grad_norm": 487.12994384765625, + "learning_rate": 9.870906341637358e-06, + "loss": 23.9176, + "step": 82620 + }, + { + "epoch": 0.16691782786636877, + "grad_norm": 731.3406982421875, + "learning_rate": 9.870827522126134e-06, + "loss": 29.0172, + "step": 82630 + }, + { + "epoch": 0.1669380284990526, + "grad_norm": 364.0166320800781, + "learning_rate": 9.870748678875086e-06, + "loss": 18.3425, + "step": 82640 + }, + { + "epoch": 0.1669582291317364, + "grad_norm": 671.7162475585938, + "learning_rate": 9.8706698118846e-06, + "loss": 28.2177, + "step": 82650 + }, + { + "epoch": 0.16697842976442023, + "grad_norm": 827.7516479492188, + "learning_rate": 9.870590921155062e-06, + "loss": 26.0711, + "step": 82660 + }, + { + "epoch": 0.16699863039710403, + "grad_norm": 1016.3685913085938, + "learning_rate": 9.870512006686852e-06, + "loss": 32.4713, + "step": 82670 + }, + { + "epoch": 0.16701883102978785, + "grad_norm": 374.1891784667969, + "learning_rate": 9.870433068480359e-06, + "loss": 26.4627, + "step": 82680 + }, + { + "epoch": 0.16703903166247167, + "grad_norm": 416.0177917480469, + "learning_rate": 9.870354106535964e-06, + "loss": 26.2241, + "step": 82690 + }, + { + "epoch": 0.16705923229515549, + "grad_norm": 863.1328735351562, + "learning_rate": 9.870275120854055e-06, + "loss": 25.0358, + "step": 82700 + }, + { + "epoch": 0.1670794329278393, + "grad_norm": 165.13653564453125, + "learning_rate": 9.870196111435016e-06, + "loss": 23.1991, + "step": 82710 + }, + { + "epoch": 0.16709963356052313, + "grad_norm": 409.42364501953125, + "learning_rate": 9.870117078279231e-06, + "loss": 48.5756, + "step": 82720 + }, + { + "epoch": 0.16711983419320692, + "grad_norm": 535.4275512695312, + "learning_rate": 9.870038021387087e-06, + "loss": 24.4176, + "step": 82730 + }, + { + "epoch": 0.16714003482589074, + "grad_norm": 231.93536376953125, + "learning_rate": 9.869958940758968e-06, + "loss": 20.2856, + "step": 82740 + }, + { + "epoch": 0.16716023545857456, + "grad_norm": 324.0728759765625, + "learning_rate": 9.86987983639526e-06, + "loss": 24.347, + "step": 82750 + }, + { + "epoch": 0.16718043609125838, + "grad_norm": 577.8886108398438, + "learning_rate": 9.869800708296347e-06, + "loss": 24.2615, + "step": 82760 + }, + { + "epoch": 0.1672006367239422, + "grad_norm": 141.15060424804688, + "learning_rate": 9.869721556462617e-06, + "loss": 37.6809, + "step": 82770 + }, + { + "epoch": 0.16722083735662602, + "grad_norm": 227.20361328125, + "learning_rate": 9.869642380894454e-06, + "loss": 30.4997, + "step": 82780 + }, + { + "epoch": 0.16724103798930984, + "grad_norm": 1045.2889404296875, + "learning_rate": 9.869563181592246e-06, + "loss": 29.1216, + "step": 82790 + }, + { + "epoch": 0.16726123862199363, + "grad_norm": 494.2144775390625, + "learning_rate": 9.869483958556376e-06, + "loss": 24.3889, + "step": 82800 + }, + { + "epoch": 0.16728143925467745, + "grad_norm": 361.6865539550781, + "learning_rate": 9.869404711787234e-06, + "loss": 20.2709, + "step": 82810 + }, + { + "epoch": 0.16730163988736127, + "grad_norm": 480.38104248046875, + "learning_rate": 9.869325441285203e-06, + "loss": 24.9362, + "step": 82820 + }, + { + "epoch": 0.1673218405200451, + "grad_norm": 289.5055236816406, + "learning_rate": 9.869246147050669e-06, + "loss": 28.7482, + "step": 82830 + }, + { + "epoch": 0.1673420411527289, + "grad_norm": 259.9599914550781, + "learning_rate": 9.869166829084023e-06, + "loss": 23.9636, + "step": 82840 + }, + { + "epoch": 0.16736224178541273, + "grad_norm": 172.7774200439453, + "learning_rate": 9.869087487385644e-06, + "loss": 26.7328, + "step": 82850 + }, + { + "epoch": 0.16738244241809652, + "grad_norm": 15.006548881530762, + "learning_rate": 9.869008121955928e-06, + "loss": 18.947, + "step": 82860 + }, + { + "epoch": 0.16740264305078034, + "grad_norm": 1037.4061279296875, + "learning_rate": 9.868928732795253e-06, + "loss": 22.4189, + "step": 82870 + }, + { + "epoch": 0.16742284368346416, + "grad_norm": 732.235595703125, + "learning_rate": 9.868849319904012e-06, + "loss": 31.7228, + "step": 82880 + }, + { + "epoch": 0.16744304431614798, + "grad_norm": 758.1985473632812, + "learning_rate": 9.86876988328259e-06, + "loss": 18.0874, + "step": 82890 + }, + { + "epoch": 0.1674632449488318, + "grad_norm": 448.0339660644531, + "learning_rate": 9.868690422931372e-06, + "loss": 40.1267, + "step": 82900 + }, + { + "epoch": 0.16748344558151562, + "grad_norm": 440.24273681640625, + "learning_rate": 9.86861093885075e-06, + "loss": 24.2678, + "step": 82910 + }, + { + "epoch": 0.16750364621419944, + "grad_norm": 140.989990234375, + "learning_rate": 9.868531431041108e-06, + "loss": 19.4625, + "step": 82920 + }, + { + "epoch": 0.16752384684688323, + "grad_norm": 434.0629577636719, + "learning_rate": 9.868451899502833e-06, + "loss": 21.7049, + "step": 82930 + }, + { + "epoch": 0.16754404747956705, + "grad_norm": 254.76785278320312, + "learning_rate": 9.868372344236314e-06, + "loss": 58.1585, + "step": 82940 + }, + { + "epoch": 0.16756424811225087, + "grad_norm": 459.30084228515625, + "learning_rate": 9.86829276524194e-06, + "loss": 30.7155, + "step": 82950 + }, + { + "epoch": 0.1675844487449347, + "grad_norm": 901.2047729492188, + "learning_rate": 9.868213162520097e-06, + "loss": 29.0213, + "step": 82960 + }, + { + "epoch": 0.16760464937761851, + "grad_norm": 219.8902587890625, + "learning_rate": 9.868133536071174e-06, + "loss": 19.2086, + "step": 82970 + }, + { + "epoch": 0.16762485001030233, + "grad_norm": 344.43682861328125, + "learning_rate": 9.868053885895559e-06, + "loss": 22.8623, + "step": 82980 + }, + { + "epoch": 0.16764505064298613, + "grad_norm": 97.78935241699219, + "learning_rate": 9.867974211993639e-06, + "loss": 24.2089, + "step": 82990 + }, + { + "epoch": 0.16766525127566995, + "grad_norm": 214.8300323486328, + "learning_rate": 9.867894514365802e-06, + "loss": 20.0944, + "step": 83000 + }, + { + "epoch": 0.16768545190835377, + "grad_norm": 73.76973724365234, + "learning_rate": 9.867814793012437e-06, + "loss": 15.5302, + "step": 83010 + }, + { + "epoch": 0.1677056525410376, + "grad_norm": 976.6002197265625, + "learning_rate": 9.867735047933936e-06, + "loss": 24.5506, + "step": 83020 + }, + { + "epoch": 0.1677258531737214, + "grad_norm": 73.62681579589844, + "learning_rate": 9.867655279130684e-06, + "loss": 23.3989, + "step": 83030 + }, + { + "epoch": 0.16774605380640523, + "grad_norm": 818.6986083984375, + "learning_rate": 9.86757548660307e-06, + "loss": 18.849, + "step": 83040 + }, + { + "epoch": 0.16776625443908902, + "grad_norm": 716.3596801757812, + "learning_rate": 9.867495670351483e-06, + "loss": 35.8794, + "step": 83050 + }, + { + "epoch": 0.16778645507177284, + "grad_norm": 481.1494140625, + "learning_rate": 9.867415830376313e-06, + "loss": 22.0284, + "step": 83060 + }, + { + "epoch": 0.16780665570445666, + "grad_norm": 236.226806640625, + "learning_rate": 9.867335966677949e-06, + "loss": 28.4822, + "step": 83070 + }, + { + "epoch": 0.16782685633714048, + "grad_norm": 537.2672729492188, + "learning_rate": 9.867256079256779e-06, + "loss": 22.5255, + "step": 83080 + }, + { + "epoch": 0.1678470569698243, + "grad_norm": 465.3406066894531, + "learning_rate": 9.867176168113193e-06, + "loss": 31.2277, + "step": 83090 + }, + { + "epoch": 0.16786725760250812, + "grad_norm": 162.4087371826172, + "learning_rate": 9.867096233247581e-06, + "loss": 27.0187, + "step": 83100 + }, + { + "epoch": 0.16788745823519194, + "grad_norm": 204.7646026611328, + "learning_rate": 9.867016274660333e-06, + "loss": 26.6123, + "step": 83110 + }, + { + "epoch": 0.16790765886787573, + "grad_norm": 476.49517822265625, + "learning_rate": 9.866936292351837e-06, + "loss": 33.0041, + "step": 83120 + }, + { + "epoch": 0.16792785950055955, + "grad_norm": 310.9793701171875, + "learning_rate": 9.866856286322484e-06, + "loss": 13.6093, + "step": 83130 + }, + { + "epoch": 0.16794806013324337, + "grad_norm": 1082.086181640625, + "learning_rate": 9.866776256572662e-06, + "loss": 47.0691, + "step": 83140 + }, + { + "epoch": 0.1679682607659272, + "grad_norm": 771.9020385742188, + "learning_rate": 9.866696203102765e-06, + "loss": 32.4807, + "step": 83150 + }, + { + "epoch": 0.167988461398611, + "grad_norm": 454.5968017578125, + "learning_rate": 9.866616125913182e-06, + "loss": 25.2858, + "step": 83160 + }, + { + "epoch": 0.16800866203129483, + "grad_norm": 537.4826049804688, + "learning_rate": 9.8665360250043e-06, + "loss": 21.0879, + "step": 83170 + }, + { + "epoch": 0.16802886266397862, + "grad_norm": 755.97802734375, + "learning_rate": 9.866455900376514e-06, + "loss": 30.0066, + "step": 83180 + }, + { + "epoch": 0.16804906329666244, + "grad_norm": 447.1153869628906, + "learning_rate": 9.86637575203021e-06, + "loss": 21.2045, + "step": 83190 + }, + { + "epoch": 0.16806926392934626, + "grad_norm": 423.9645080566406, + "learning_rate": 9.866295579965782e-06, + "loss": 26.5368, + "step": 83200 + }, + { + "epoch": 0.16808946456203008, + "grad_norm": 630.846923828125, + "learning_rate": 9.86621538418362e-06, + "loss": 37.2785, + "step": 83210 + }, + { + "epoch": 0.1681096651947139, + "grad_norm": 443.00054931640625, + "learning_rate": 9.866135164684112e-06, + "loss": 32.5187, + "step": 83220 + }, + { + "epoch": 0.16812986582739772, + "grad_norm": 267.3347473144531, + "learning_rate": 9.866054921467654e-06, + "loss": 27.728, + "step": 83230 + }, + { + "epoch": 0.16815006646008154, + "grad_norm": 499.41632080078125, + "learning_rate": 9.865974654534634e-06, + "loss": 22.1323, + "step": 83240 + }, + { + "epoch": 0.16817026709276534, + "grad_norm": 564.2894897460938, + "learning_rate": 9.865894363885442e-06, + "loss": 40.6049, + "step": 83250 + }, + { + "epoch": 0.16819046772544916, + "grad_norm": 87.25096893310547, + "learning_rate": 9.865814049520473e-06, + "loss": 14.6281, + "step": 83260 + }, + { + "epoch": 0.16821066835813298, + "grad_norm": 268.3941345214844, + "learning_rate": 9.865733711440116e-06, + "loss": 16.5669, + "step": 83270 + }, + { + "epoch": 0.1682308689908168, + "grad_norm": 258.07379150390625, + "learning_rate": 9.865653349644761e-06, + "loss": 17.1536, + "step": 83280 + }, + { + "epoch": 0.16825106962350062, + "grad_norm": 564.84912109375, + "learning_rate": 9.865572964134804e-06, + "loss": 38.7723, + "step": 83290 + }, + { + "epoch": 0.16827127025618444, + "grad_norm": 198.5872802734375, + "learning_rate": 9.865492554910634e-06, + "loss": 11.8511, + "step": 83300 + }, + { + "epoch": 0.16829147088886823, + "grad_norm": 267.9917297363281, + "learning_rate": 9.865412121972643e-06, + "loss": 18.4249, + "step": 83310 + }, + { + "epoch": 0.16831167152155205, + "grad_norm": 197.87791442871094, + "learning_rate": 9.865331665321222e-06, + "loss": 25.7429, + "step": 83320 + }, + { + "epoch": 0.16833187215423587, + "grad_norm": 877.0123901367188, + "learning_rate": 9.865251184956767e-06, + "loss": 31.5932, + "step": 83330 + }, + { + "epoch": 0.1683520727869197, + "grad_norm": 295.51324462890625, + "learning_rate": 9.865170680879667e-06, + "loss": 26.722, + "step": 83340 + }, + { + "epoch": 0.1683722734196035, + "grad_norm": 13.86534309387207, + "learning_rate": 9.865090153090315e-06, + "loss": 30.3432, + "step": 83350 + }, + { + "epoch": 0.16839247405228733, + "grad_norm": 317.31341552734375, + "learning_rate": 9.865009601589105e-06, + "loss": 27.8171, + "step": 83360 + }, + { + "epoch": 0.16841267468497112, + "grad_norm": 425.7890930175781, + "learning_rate": 9.864929026376427e-06, + "loss": 25.6833, + "step": 83370 + }, + { + "epoch": 0.16843287531765494, + "grad_norm": 769.4445190429688, + "learning_rate": 9.864848427452675e-06, + "loss": 28.4851, + "step": 83380 + }, + { + "epoch": 0.16845307595033876, + "grad_norm": 336.91571044921875, + "learning_rate": 9.864767804818242e-06, + "loss": 16.6119, + "step": 83390 + }, + { + "epoch": 0.16847327658302258, + "grad_norm": 524.8754272460938, + "learning_rate": 9.86468715847352e-06, + "loss": 22.7018, + "step": 83400 + }, + { + "epoch": 0.1684934772157064, + "grad_norm": 401.87353515625, + "learning_rate": 9.864606488418905e-06, + "loss": 39.302, + "step": 83410 + }, + { + "epoch": 0.16851367784839022, + "grad_norm": 365.1914367675781, + "learning_rate": 9.864525794654786e-06, + "loss": 19.2479, + "step": 83420 + }, + { + "epoch": 0.16853387848107404, + "grad_norm": 745.577880859375, + "learning_rate": 9.864445077181559e-06, + "loss": 38.7108, + "step": 83430 + }, + { + "epoch": 0.16855407911375783, + "grad_norm": 112.34915924072266, + "learning_rate": 9.864364335999615e-06, + "loss": 23.4639, + "step": 83440 + }, + { + "epoch": 0.16857427974644165, + "grad_norm": 464.5140075683594, + "learning_rate": 9.864283571109352e-06, + "loss": 35.8791, + "step": 83450 + }, + { + "epoch": 0.16859448037912547, + "grad_norm": 132.9011688232422, + "learning_rate": 9.864202782511158e-06, + "loss": 40.9821, + "step": 83460 + }, + { + "epoch": 0.1686146810118093, + "grad_norm": 201.52503967285156, + "learning_rate": 9.864121970205431e-06, + "loss": 25.3062, + "step": 83470 + }, + { + "epoch": 0.1686348816444931, + "grad_norm": 233.94447326660156, + "learning_rate": 9.864041134192563e-06, + "loss": 13.6422, + "step": 83480 + }, + { + "epoch": 0.16865508227717693, + "grad_norm": 336.04986572265625, + "learning_rate": 9.86396027447295e-06, + "loss": 31.3297, + "step": 83490 + }, + { + "epoch": 0.16867528290986072, + "grad_norm": 519.1437377929688, + "learning_rate": 9.863879391046985e-06, + "loss": 18.7927, + "step": 83500 + }, + { + "epoch": 0.16869548354254454, + "grad_norm": 271.48291015625, + "learning_rate": 9.863798483915059e-06, + "loss": 24.2579, + "step": 83510 + }, + { + "epoch": 0.16871568417522836, + "grad_norm": 389.02093505859375, + "learning_rate": 9.86371755307757e-06, + "loss": 29.0493, + "step": 83520 + }, + { + "epoch": 0.16873588480791218, + "grad_norm": 696.2223510742188, + "learning_rate": 9.863636598534912e-06, + "loss": 24.1301, + "step": 83530 + }, + { + "epoch": 0.168756085440596, + "grad_norm": 234.34986877441406, + "learning_rate": 9.863555620287479e-06, + "loss": 14.363, + "step": 83540 + }, + { + "epoch": 0.16877628607327982, + "grad_norm": 279.8359680175781, + "learning_rate": 9.863474618335666e-06, + "loss": 21.0213, + "step": 83550 + }, + { + "epoch": 0.16879648670596364, + "grad_norm": 299.2847595214844, + "learning_rate": 9.863393592679867e-06, + "loss": 30.3096, + "step": 83560 + }, + { + "epoch": 0.16881668733864744, + "grad_norm": 665.0737915039062, + "learning_rate": 9.863312543320479e-06, + "loss": 19.6344, + "step": 83570 + }, + { + "epoch": 0.16883688797133126, + "grad_norm": 608.3574829101562, + "learning_rate": 9.863231470257893e-06, + "loss": 17.4846, + "step": 83580 + }, + { + "epoch": 0.16885708860401508, + "grad_norm": 632.60400390625, + "learning_rate": 9.863150373492509e-06, + "loss": 18.2631, + "step": 83590 + }, + { + "epoch": 0.1688772892366989, + "grad_norm": 88.76485443115234, + "learning_rate": 9.863069253024719e-06, + "loss": 26.1825, + "step": 83600 + }, + { + "epoch": 0.16889748986938272, + "grad_norm": 438.93951416015625, + "learning_rate": 9.862988108854919e-06, + "loss": 17.7443, + "step": 83610 + }, + { + "epoch": 0.16891769050206654, + "grad_norm": 496.48602294921875, + "learning_rate": 9.862906940983505e-06, + "loss": 33.4956, + "step": 83620 + }, + { + "epoch": 0.16893789113475033, + "grad_norm": 391.3303527832031, + "learning_rate": 9.862825749410872e-06, + "loss": 21.2826, + "step": 83630 + }, + { + "epoch": 0.16895809176743415, + "grad_norm": 329.953369140625, + "learning_rate": 9.862744534137416e-06, + "loss": 14.2922, + "step": 83640 + }, + { + "epoch": 0.16897829240011797, + "grad_norm": 408.1158752441406, + "learning_rate": 9.862663295163533e-06, + "loss": 35.4676, + "step": 83650 + }, + { + "epoch": 0.1689984930328018, + "grad_norm": 244.37925720214844, + "learning_rate": 9.862582032489621e-06, + "loss": 15.8363, + "step": 83660 + }, + { + "epoch": 0.1690186936654856, + "grad_norm": 428.01556396484375, + "learning_rate": 9.86250074611607e-06, + "loss": 15.2981, + "step": 83670 + }, + { + "epoch": 0.16903889429816943, + "grad_norm": 282.2440185546875, + "learning_rate": 9.862419436043284e-06, + "loss": 40.3479, + "step": 83680 + }, + { + "epoch": 0.16905909493085322, + "grad_norm": 425.94366455078125, + "learning_rate": 9.862338102271654e-06, + "loss": 18.5903, + "step": 83690 + }, + { + "epoch": 0.16907929556353704, + "grad_norm": 337.7959899902344, + "learning_rate": 9.862256744801576e-06, + "loss": 19.8282, + "step": 83700 + }, + { + "epoch": 0.16909949619622086, + "grad_norm": 844.557373046875, + "learning_rate": 9.86217536363345e-06, + "loss": 29.688, + "step": 83710 + }, + { + "epoch": 0.16911969682890468, + "grad_norm": 194.49513244628906, + "learning_rate": 9.862093958767671e-06, + "loss": 36.4832, + "step": 83720 + }, + { + "epoch": 0.1691398974615885, + "grad_norm": 537.4203491210938, + "learning_rate": 9.862012530204636e-06, + "loss": 26.5721, + "step": 83730 + }, + { + "epoch": 0.16916009809427232, + "grad_norm": 731.2578125, + "learning_rate": 9.86193107794474e-06, + "loss": 17.8927, + "step": 83740 + }, + { + "epoch": 0.16918029872695614, + "grad_norm": 87.39258575439453, + "learning_rate": 9.861849601988384e-06, + "loss": 14.7109, + "step": 83750 + }, + { + "epoch": 0.16920049935963993, + "grad_norm": 188.87950134277344, + "learning_rate": 9.861768102335961e-06, + "loss": 34.3287, + "step": 83760 + }, + { + "epoch": 0.16922069999232375, + "grad_norm": 165.73243713378906, + "learning_rate": 9.861686578987871e-06, + "loss": 33.7244, + "step": 83770 + }, + { + "epoch": 0.16924090062500757, + "grad_norm": 314.24139404296875, + "learning_rate": 9.86160503194451e-06, + "loss": 13.688, + "step": 83780 + }, + { + "epoch": 0.1692611012576914, + "grad_norm": 441.1898193359375, + "learning_rate": 9.861523461206275e-06, + "loss": 21.0881, + "step": 83790 + }, + { + "epoch": 0.1692813018903752, + "grad_norm": 375.0849914550781, + "learning_rate": 9.861441866773564e-06, + "loss": 29.8344, + "step": 83800 + }, + { + "epoch": 0.16930150252305903, + "grad_norm": 280.3564147949219, + "learning_rate": 9.861360248646777e-06, + "loss": 30.6994, + "step": 83810 + }, + { + "epoch": 0.16932170315574283, + "grad_norm": 456.1604919433594, + "learning_rate": 9.861278606826307e-06, + "loss": 44.3701, + "step": 83820 + }, + { + "epoch": 0.16934190378842665, + "grad_norm": 213.81643676757812, + "learning_rate": 9.861196941312556e-06, + "loss": 16.4747, + "step": 83830 + }, + { + "epoch": 0.16936210442111047, + "grad_norm": 500.5413818359375, + "learning_rate": 9.861115252105922e-06, + "loss": 25.5642, + "step": 83840 + }, + { + "epoch": 0.16938230505379429, + "grad_norm": 475.6985778808594, + "learning_rate": 9.8610335392068e-06, + "loss": 19.0535, + "step": 83850 + }, + { + "epoch": 0.1694025056864781, + "grad_norm": 351.562744140625, + "learning_rate": 9.86095180261559e-06, + "loss": 24.703, + "step": 83860 + }, + { + "epoch": 0.16942270631916193, + "grad_norm": 373.62286376953125, + "learning_rate": 9.860870042332693e-06, + "loss": 17.4584, + "step": 83870 + }, + { + "epoch": 0.16944290695184575, + "grad_norm": 230.38662719726562, + "learning_rate": 9.860788258358503e-06, + "loss": 23.5072, + "step": 83880 + }, + { + "epoch": 0.16946310758452954, + "grad_norm": 180.81906127929688, + "learning_rate": 9.86070645069342e-06, + "loss": 17.0741, + "step": 83890 + }, + { + "epoch": 0.16948330821721336, + "grad_norm": 378.0179748535156, + "learning_rate": 9.860624619337844e-06, + "loss": 56.2685, + "step": 83900 + }, + { + "epoch": 0.16950350884989718, + "grad_norm": 320.8547668457031, + "learning_rate": 9.860542764292173e-06, + "loss": 19.7221, + "step": 83910 + }, + { + "epoch": 0.169523709482581, + "grad_norm": 245.2682647705078, + "learning_rate": 9.860460885556806e-06, + "loss": 22.3827, + "step": 83920 + }, + { + "epoch": 0.16954391011526482, + "grad_norm": 551.7291870117188, + "learning_rate": 9.860378983132144e-06, + "loss": 19.6086, + "step": 83930 + }, + { + "epoch": 0.16956411074794864, + "grad_norm": 2057.81494140625, + "learning_rate": 9.860297057018581e-06, + "loss": 44.4755, + "step": 83940 + }, + { + "epoch": 0.16958431138063243, + "grad_norm": 314.1662902832031, + "learning_rate": 9.860215107216523e-06, + "loss": 33.9747, + "step": 83950 + }, + { + "epoch": 0.16960451201331625, + "grad_norm": 595.001953125, + "learning_rate": 9.860133133726364e-06, + "loss": 18.0649, + "step": 83960 + }, + { + "epoch": 0.16962471264600007, + "grad_norm": 651.8950805664062, + "learning_rate": 9.860051136548506e-06, + "loss": 25.674, + "step": 83970 + }, + { + "epoch": 0.1696449132786839, + "grad_norm": 7985.6484375, + "learning_rate": 9.859969115683348e-06, + "loss": 54.6429, + "step": 83980 + }, + { + "epoch": 0.1696651139113677, + "grad_norm": 529.5379638671875, + "learning_rate": 9.85988707113129e-06, + "loss": 20.52, + "step": 83990 + }, + { + "epoch": 0.16968531454405153, + "grad_norm": 249.68833923339844, + "learning_rate": 9.859805002892733e-06, + "loss": 16.4941, + "step": 84000 + }, + { + "epoch": 0.16970551517673532, + "grad_norm": 438.7927551269531, + "learning_rate": 9.859722910968073e-06, + "loss": 22.7703, + "step": 84010 + }, + { + "epoch": 0.16972571580941914, + "grad_norm": 330.3010559082031, + "learning_rate": 9.859640795357716e-06, + "loss": 17.3347, + "step": 84020 + }, + { + "epoch": 0.16974591644210296, + "grad_norm": 682.3013305664062, + "learning_rate": 9.859558656062057e-06, + "loss": 19.3317, + "step": 84030 + }, + { + "epoch": 0.16976611707478678, + "grad_norm": 76.0897445678711, + "learning_rate": 9.8594764930815e-06, + "loss": 11.554, + "step": 84040 + }, + { + "epoch": 0.1697863177074706, + "grad_norm": 654.0238647460938, + "learning_rate": 9.859394306416443e-06, + "loss": 15.4549, + "step": 84050 + }, + { + "epoch": 0.16980651834015442, + "grad_norm": 983.010009765625, + "learning_rate": 9.859312096067289e-06, + "loss": 25.9158, + "step": 84060 + }, + { + "epoch": 0.16982671897283824, + "grad_norm": 451.46307373046875, + "learning_rate": 9.859229862034436e-06, + "loss": 22.6454, + "step": 84070 + }, + { + "epoch": 0.16984691960552203, + "grad_norm": 1029.73291015625, + "learning_rate": 9.859147604318286e-06, + "loss": 21.3787, + "step": 84080 + }, + { + "epoch": 0.16986712023820585, + "grad_norm": 1070.5137939453125, + "learning_rate": 9.859065322919239e-06, + "loss": 20.8518, + "step": 84090 + }, + { + "epoch": 0.16988732087088967, + "grad_norm": 217.96324157714844, + "learning_rate": 9.8589830178377e-06, + "loss": 25.6919, + "step": 84100 + }, + { + "epoch": 0.1699075215035735, + "grad_norm": 310.8514404296875, + "learning_rate": 9.858900689074065e-06, + "loss": 18.1398, + "step": 84110 + }, + { + "epoch": 0.16992772213625731, + "grad_norm": 168.74844360351562, + "learning_rate": 9.858818336628737e-06, + "loss": 15.2742, + "step": 84120 + }, + { + "epoch": 0.16994792276894113, + "grad_norm": 485.7269592285156, + "learning_rate": 9.858735960502118e-06, + "loss": 27.0941, + "step": 84130 + }, + { + "epoch": 0.16996812340162493, + "grad_norm": 488.62548828125, + "learning_rate": 9.858653560694609e-06, + "loss": 18.2037, + "step": 84140 + }, + { + "epoch": 0.16998832403430875, + "grad_norm": 385.607421875, + "learning_rate": 9.858571137206611e-06, + "loss": 23.4072, + "step": 84150 + }, + { + "epoch": 0.17000852466699257, + "grad_norm": 844.4804077148438, + "learning_rate": 9.858488690038529e-06, + "loss": 25.7212, + "step": 84160 + }, + { + "epoch": 0.1700287252996764, + "grad_norm": 77.7420425415039, + "learning_rate": 9.858406219190761e-06, + "loss": 21.6422, + "step": 84170 + }, + { + "epoch": 0.1700489259323602, + "grad_norm": 814.97509765625, + "learning_rate": 9.858323724663712e-06, + "loss": 35.8841, + "step": 84180 + }, + { + "epoch": 0.17006912656504403, + "grad_norm": 508.8529357910156, + "learning_rate": 9.85824120645778e-06, + "loss": 21.9199, + "step": 84190 + }, + { + "epoch": 0.17008932719772785, + "grad_norm": 136.87261962890625, + "learning_rate": 9.85815866457337e-06, + "loss": 18.0118, + "step": 84200 + }, + { + "epoch": 0.17010952783041164, + "grad_norm": 156.2366180419922, + "learning_rate": 9.858076099010885e-06, + "loss": 18.8808, + "step": 84210 + }, + { + "epoch": 0.17012972846309546, + "grad_norm": 429.79193115234375, + "learning_rate": 9.857993509770725e-06, + "loss": 12.0462, + "step": 84220 + }, + { + "epoch": 0.17014992909577928, + "grad_norm": 380.55255126953125, + "learning_rate": 9.857910896853296e-06, + "loss": 26.2462, + "step": 84230 + }, + { + "epoch": 0.1701701297284631, + "grad_norm": 642.4622802734375, + "learning_rate": 9.857828260258997e-06, + "loss": 26.4779, + "step": 84240 + }, + { + "epoch": 0.17019033036114692, + "grad_norm": 443.7367858886719, + "learning_rate": 9.857745599988231e-06, + "loss": 23.3827, + "step": 84250 + }, + { + "epoch": 0.17021053099383074, + "grad_norm": 489.1470947265625, + "learning_rate": 9.857662916041404e-06, + "loss": 25.824, + "step": 84260 + }, + { + "epoch": 0.17023073162651453, + "grad_norm": 832.2505493164062, + "learning_rate": 9.857580208418917e-06, + "loss": 21.9109, + "step": 84270 + }, + { + "epoch": 0.17025093225919835, + "grad_norm": 132.86337280273438, + "learning_rate": 9.857497477121172e-06, + "loss": 21.6228, + "step": 84280 + }, + { + "epoch": 0.17027113289188217, + "grad_norm": 149.47938537597656, + "learning_rate": 9.857414722148574e-06, + "loss": 28.1589, + "step": 84290 + }, + { + "epoch": 0.170291333524566, + "grad_norm": 294.6853942871094, + "learning_rate": 9.857331943501527e-06, + "loss": 20.025, + "step": 84300 + }, + { + "epoch": 0.1703115341572498, + "grad_norm": 504.4954833984375, + "learning_rate": 9.857249141180431e-06, + "loss": 22.9865, + "step": 84310 + }, + { + "epoch": 0.17033173478993363, + "grad_norm": 406.8207092285156, + "learning_rate": 9.857166315185693e-06, + "loss": 14.5563, + "step": 84320 + }, + { + "epoch": 0.17035193542261742, + "grad_norm": 973.2086791992188, + "learning_rate": 9.857083465517716e-06, + "loss": 32.8805, + "step": 84330 + }, + { + "epoch": 0.17037213605530124, + "grad_norm": 667.1934814453125, + "learning_rate": 9.857000592176902e-06, + "loss": 18.7556, + "step": 84340 + }, + { + "epoch": 0.17039233668798506, + "grad_norm": 49.66745376586914, + "learning_rate": 9.856917695163659e-06, + "loss": 21.303, + "step": 84350 + }, + { + "epoch": 0.17041253732066888, + "grad_norm": 467.7310791015625, + "learning_rate": 9.856834774478385e-06, + "loss": 31.2778, + "step": 84360 + }, + { + "epoch": 0.1704327379533527, + "grad_norm": 1260.0576171875, + "learning_rate": 9.85675183012149e-06, + "loss": 35.1867, + "step": 84370 + }, + { + "epoch": 0.17045293858603652, + "grad_norm": 439.5983581542969, + "learning_rate": 9.856668862093372e-06, + "loss": 32.415, + "step": 84380 + }, + { + "epoch": 0.17047313921872034, + "grad_norm": 0.0, + "learning_rate": 9.856585870394442e-06, + "loss": 26.6695, + "step": 84390 + }, + { + "epoch": 0.17049333985140414, + "grad_norm": 512.7556762695312, + "learning_rate": 9.856502855025101e-06, + "loss": 26.631, + "step": 84400 + }, + { + "epoch": 0.17051354048408796, + "grad_norm": 551.2488403320312, + "learning_rate": 9.856419815985754e-06, + "loss": 29.4239, + "step": 84410 + }, + { + "epoch": 0.17053374111677178, + "grad_norm": 490.7176818847656, + "learning_rate": 9.856336753276804e-06, + "loss": 32.1564, + "step": 84420 + }, + { + "epoch": 0.1705539417494556, + "grad_norm": 487.49810791015625, + "learning_rate": 9.85625366689866e-06, + "loss": 31.1072, + "step": 84430 + }, + { + "epoch": 0.17057414238213942, + "grad_norm": 600.7901000976562, + "learning_rate": 9.856170556851725e-06, + "loss": 28.5669, + "step": 84440 + }, + { + "epoch": 0.17059434301482324, + "grad_norm": 98.74726104736328, + "learning_rate": 9.856087423136403e-06, + "loss": 15.7651, + "step": 84450 + }, + { + "epoch": 0.17061454364750703, + "grad_norm": 922.817138671875, + "learning_rate": 9.856004265753099e-06, + "loss": 24.1547, + "step": 84460 + }, + { + "epoch": 0.17063474428019085, + "grad_norm": 493.76019287109375, + "learning_rate": 9.85592108470222e-06, + "loss": 23.3009, + "step": 84470 + }, + { + "epoch": 0.17065494491287467, + "grad_norm": 383.5968017578125, + "learning_rate": 9.85583787998417e-06, + "loss": 30.4717, + "step": 84480 + }, + { + "epoch": 0.1706751455455585, + "grad_norm": 350.8125305175781, + "learning_rate": 9.855754651599355e-06, + "loss": 18.2192, + "step": 84490 + }, + { + "epoch": 0.1706953461782423, + "grad_norm": 498.9764709472656, + "learning_rate": 9.85567139954818e-06, + "loss": 30.5408, + "step": 84500 + }, + { + "epoch": 0.17071554681092613, + "grad_norm": 915.3928833007812, + "learning_rate": 9.855588123831053e-06, + "loss": 26.5895, + "step": 84510 + }, + { + "epoch": 0.17073574744360995, + "grad_norm": 261.22625732421875, + "learning_rate": 9.855504824448379e-06, + "loss": 21.6851, + "step": 84520 + }, + { + "epoch": 0.17075594807629374, + "grad_norm": 1028.3553466796875, + "learning_rate": 9.855421501400562e-06, + "loss": 32.9727, + "step": 84530 + }, + { + "epoch": 0.17077614870897756, + "grad_norm": 632.3440551757812, + "learning_rate": 9.85533815468801e-06, + "loss": 42.8606, + "step": 84540 + }, + { + "epoch": 0.17079634934166138, + "grad_norm": 410.7352294921875, + "learning_rate": 9.85525478431113e-06, + "loss": 35.3848, + "step": 84550 + }, + { + "epoch": 0.1708165499743452, + "grad_norm": 219.40965270996094, + "learning_rate": 9.855171390270325e-06, + "loss": 16.5595, + "step": 84560 + }, + { + "epoch": 0.17083675060702902, + "grad_norm": 847.7810668945312, + "learning_rate": 9.855087972566004e-06, + "loss": 24.7424, + "step": 84570 + }, + { + "epoch": 0.17085695123971284, + "grad_norm": 189.41087341308594, + "learning_rate": 9.855004531198573e-06, + "loss": 19.8264, + "step": 84580 + }, + { + "epoch": 0.17087715187239663, + "grad_norm": 429.26263427734375, + "learning_rate": 9.854921066168439e-06, + "loss": 32.3948, + "step": 84590 + }, + { + "epoch": 0.17089735250508045, + "grad_norm": 938.6051025390625, + "learning_rate": 9.854837577476008e-06, + "loss": 48.2536, + "step": 84600 + }, + { + "epoch": 0.17091755313776427, + "grad_norm": 858.9446411132812, + "learning_rate": 9.854754065121689e-06, + "loss": 27.1367, + "step": 84610 + }, + { + "epoch": 0.1709377537704481, + "grad_norm": 395.283935546875, + "learning_rate": 9.854670529105887e-06, + "loss": 23.5078, + "step": 84620 + }, + { + "epoch": 0.1709579544031319, + "grad_norm": 509.2507629394531, + "learning_rate": 9.854586969429009e-06, + "loss": 21.7217, + "step": 84630 + }, + { + "epoch": 0.17097815503581573, + "grad_norm": 197.88796997070312, + "learning_rate": 9.854503386091463e-06, + "loss": 24.1366, + "step": 84640 + }, + { + "epoch": 0.17099835566849952, + "grad_norm": 116.27680206298828, + "learning_rate": 9.854419779093656e-06, + "loss": 22.4091, + "step": 84650 + }, + { + "epoch": 0.17101855630118334, + "grad_norm": 419.3030700683594, + "learning_rate": 9.854336148435997e-06, + "loss": 21.6782, + "step": 84660 + }, + { + "epoch": 0.17103875693386716, + "grad_norm": 330.1683654785156, + "learning_rate": 9.85425249411889e-06, + "loss": 18.3274, + "step": 84670 + }, + { + "epoch": 0.17105895756655098, + "grad_norm": 273.65057373046875, + "learning_rate": 9.854168816142747e-06, + "loss": 31.5305, + "step": 84680 + }, + { + "epoch": 0.1710791581992348, + "grad_norm": 835.4115600585938, + "learning_rate": 9.854085114507974e-06, + "loss": 18.9734, + "step": 84690 + }, + { + "epoch": 0.17109935883191862, + "grad_norm": 1398.6064453125, + "learning_rate": 9.854001389214979e-06, + "loss": 35.758, + "step": 84700 + }, + { + "epoch": 0.17111955946460244, + "grad_norm": 1547.03466796875, + "learning_rate": 9.853917640264169e-06, + "loss": 37.8532, + "step": 84710 + }, + { + "epoch": 0.17113976009728624, + "grad_norm": 406.7312316894531, + "learning_rate": 9.853833867655954e-06, + "loss": 21.9945, + "step": 84720 + }, + { + "epoch": 0.17115996072997006, + "grad_norm": 250.5803680419922, + "learning_rate": 9.853750071390739e-06, + "loss": 31.857, + "step": 84730 + }, + { + "epoch": 0.17118016136265388, + "grad_norm": 1100.2838134765625, + "learning_rate": 9.853666251468938e-06, + "loss": 35.0792, + "step": 84740 + }, + { + "epoch": 0.1712003619953377, + "grad_norm": 960.6127319335938, + "learning_rate": 9.853582407890954e-06, + "loss": 38.3057, + "step": 84750 + }, + { + "epoch": 0.17122056262802152, + "grad_norm": 284.38946533203125, + "learning_rate": 9.853498540657201e-06, + "loss": 29.5066, + "step": 84760 + }, + { + "epoch": 0.17124076326070534, + "grad_norm": 342.55877685546875, + "learning_rate": 9.853414649768082e-06, + "loss": 27.3454, + "step": 84770 + }, + { + "epoch": 0.17126096389338913, + "grad_norm": 345.8613586425781, + "learning_rate": 9.85333073522401e-06, + "loss": 21.7276, + "step": 84780 + }, + { + "epoch": 0.17128116452607295, + "grad_norm": 391.4869689941406, + "learning_rate": 9.853246797025391e-06, + "loss": 25.0733, + "step": 84790 + }, + { + "epoch": 0.17130136515875677, + "grad_norm": 260.3005676269531, + "learning_rate": 9.853162835172638e-06, + "loss": 27.0298, + "step": 84800 + }, + { + "epoch": 0.1713215657914406, + "grad_norm": 1010.074951171875, + "learning_rate": 9.853078849666156e-06, + "loss": 43.123, + "step": 84810 + }, + { + "epoch": 0.1713417664241244, + "grad_norm": 565.1976318359375, + "learning_rate": 9.852994840506357e-06, + "loss": 20.6687, + "step": 84820 + }, + { + "epoch": 0.17136196705680823, + "grad_norm": 293.1611022949219, + "learning_rate": 9.85291080769365e-06, + "loss": 25.6356, + "step": 84830 + }, + { + "epoch": 0.17138216768949205, + "grad_norm": 373.0630798339844, + "learning_rate": 9.852826751228445e-06, + "loss": 20.0546, + "step": 84840 + }, + { + "epoch": 0.17140236832217584, + "grad_norm": 450.2943115234375, + "learning_rate": 9.852742671111151e-06, + "loss": 24.5589, + "step": 84850 + }, + { + "epoch": 0.17142256895485966, + "grad_norm": 859.8757934570312, + "learning_rate": 9.852658567342177e-06, + "loss": 23.4677, + "step": 84860 + }, + { + "epoch": 0.17144276958754348, + "grad_norm": 847.817138671875, + "learning_rate": 9.852574439921933e-06, + "loss": 38.1542, + "step": 84870 + }, + { + "epoch": 0.1714629702202273, + "grad_norm": 113.4184341430664, + "learning_rate": 9.85249028885083e-06, + "loss": 14.9155, + "step": 84880 + }, + { + "epoch": 0.17148317085291112, + "grad_norm": 198.20477294921875, + "learning_rate": 9.852406114129277e-06, + "loss": 11.3782, + "step": 84890 + }, + { + "epoch": 0.17150337148559494, + "grad_norm": 649.3355712890625, + "learning_rate": 9.852321915757688e-06, + "loss": 40.6609, + "step": 84900 + }, + { + "epoch": 0.17152357211827873, + "grad_norm": 476.6934814453125, + "learning_rate": 9.852237693736469e-06, + "loss": 37.8541, + "step": 84910 + }, + { + "epoch": 0.17154377275096255, + "grad_norm": 325.90057373046875, + "learning_rate": 9.852153448066031e-06, + "loss": 30.1682, + "step": 84920 + }, + { + "epoch": 0.17156397338364637, + "grad_norm": 440.6416931152344, + "learning_rate": 9.852069178746786e-06, + "loss": 27.6828, + "step": 84930 + }, + { + "epoch": 0.1715841740163302, + "grad_norm": 1373.3228759765625, + "learning_rate": 9.851984885779147e-06, + "loss": 24.4666, + "step": 84940 + }, + { + "epoch": 0.171604374649014, + "grad_norm": 297.64324951171875, + "learning_rate": 9.85190056916352e-06, + "loss": 31.457, + "step": 84950 + }, + { + "epoch": 0.17162457528169783, + "grad_norm": 559.4208374023438, + "learning_rate": 9.851816228900317e-06, + "loss": 16.3533, + "step": 84960 + }, + { + "epoch": 0.17164477591438163, + "grad_norm": 535.56201171875, + "learning_rate": 9.85173186498995e-06, + "loss": 9.1985, + "step": 84970 + }, + { + "epoch": 0.17166497654706545, + "grad_norm": 35.89240264892578, + "learning_rate": 9.851647477432834e-06, + "loss": 15.0027, + "step": 84980 + }, + { + "epoch": 0.17168517717974927, + "grad_norm": 407.8486022949219, + "learning_rate": 9.851563066229373e-06, + "loss": 41.9, + "step": 84990 + }, + { + "epoch": 0.17170537781243309, + "grad_norm": 476.017333984375, + "learning_rate": 9.851478631379982e-06, + "loss": 30.2493, + "step": 85000 + }, + { + "epoch": 0.1717255784451169, + "grad_norm": 438.6534118652344, + "learning_rate": 9.851394172885075e-06, + "loss": 32.9419, + "step": 85010 + }, + { + "epoch": 0.17174577907780073, + "grad_norm": 263.57464599609375, + "learning_rate": 9.85130969074506e-06, + "loss": 27.4015, + "step": 85020 + }, + { + "epoch": 0.17176597971048455, + "grad_norm": 369.37799072265625, + "learning_rate": 9.851225184960349e-06, + "loss": 34.6385, + "step": 85030 + }, + { + "epoch": 0.17178618034316834, + "grad_norm": 590.7777099609375, + "learning_rate": 9.851140655531357e-06, + "loss": 21.8534, + "step": 85040 + }, + { + "epoch": 0.17180638097585216, + "grad_norm": 953.6223754882812, + "learning_rate": 9.851056102458492e-06, + "loss": 23.6818, + "step": 85050 + }, + { + "epoch": 0.17182658160853598, + "grad_norm": 306.7108459472656, + "learning_rate": 9.85097152574217e-06, + "loss": 17.0797, + "step": 85060 + }, + { + "epoch": 0.1718467822412198, + "grad_norm": 671.138671875, + "learning_rate": 9.8508869253828e-06, + "loss": 30.8856, + "step": 85070 + }, + { + "epoch": 0.17186698287390362, + "grad_norm": 736.0115356445312, + "learning_rate": 9.850802301380793e-06, + "loss": 18.4232, + "step": 85080 + }, + { + "epoch": 0.17188718350658744, + "grad_norm": 24.5797061920166, + "learning_rate": 9.850717653736566e-06, + "loss": 20.5339, + "step": 85090 + }, + { + "epoch": 0.17190738413927123, + "grad_norm": 314.4891662597656, + "learning_rate": 9.85063298245053e-06, + "loss": 29.7254, + "step": 85100 + }, + { + "epoch": 0.17192758477195505, + "grad_norm": 233.59913635253906, + "learning_rate": 9.850548287523096e-06, + "loss": 33.0526, + "step": 85110 + }, + { + "epoch": 0.17194778540463887, + "grad_norm": 505.6223449707031, + "learning_rate": 9.850463568954679e-06, + "loss": 11.4925, + "step": 85120 + }, + { + "epoch": 0.1719679860373227, + "grad_norm": 657.2791137695312, + "learning_rate": 9.85037882674569e-06, + "loss": 20.6603, + "step": 85130 + }, + { + "epoch": 0.1719881866700065, + "grad_norm": 478.3403015136719, + "learning_rate": 9.850294060896544e-06, + "loss": 25.793, + "step": 85140 + }, + { + "epoch": 0.17200838730269033, + "grad_norm": 396.8893127441406, + "learning_rate": 9.850209271407653e-06, + "loss": 18.696, + "step": 85150 + }, + { + "epoch": 0.17202858793537412, + "grad_norm": 3137.254150390625, + "learning_rate": 9.850124458279429e-06, + "loss": 47.7481, + "step": 85160 + }, + { + "epoch": 0.17204878856805794, + "grad_norm": 836.9934692382812, + "learning_rate": 9.850039621512287e-06, + "loss": 23.6223, + "step": 85170 + }, + { + "epoch": 0.17206898920074176, + "grad_norm": 274.5120849609375, + "learning_rate": 9.849954761106642e-06, + "loss": 17.3965, + "step": 85180 + }, + { + "epoch": 0.17208918983342558, + "grad_norm": 487.4518737792969, + "learning_rate": 9.849869877062903e-06, + "loss": 29.7293, + "step": 85190 + }, + { + "epoch": 0.1721093904661094, + "grad_norm": 590.492431640625, + "learning_rate": 9.849784969381488e-06, + "loss": 27.5779, + "step": 85200 + }, + { + "epoch": 0.17212959109879322, + "grad_norm": 1912.063720703125, + "learning_rate": 9.849700038062808e-06, + "loss": 36.5601, + "step": 85210 + }, + { + "epoch": 0.17214979173147704, + "grad_norm": 219.05401611328125, + "learning_rate": 9.849615083107279e-06, + "loss": 26.0575, + "step": 85220 + }, + { + "epoch": 0.17216999236416083, + "grad_norm": 635.5755615234375, + "learning_rate": 9.849530104515314e-06, + "loss": 18.6673, + "step": 85230 + }, + { + "epoch": 0.17219019299684465, + "grad_norm": 503.1160583496094, + "learning_rate": 9.849445102287328e-06, + "loss": 12.5884, + "step": 85240 + }, + { + "epoch": 0.17221039362952847, + "grad_norm": 421.8708801269531, + "learning_rate": 9.849360076423736e-06, + "loss": 31.2692, + "step": 85250 + }, + { + "epoch": 0.1722305942622123, + "grad_norm": 826.088134765625, + "learning_rate": 9.849275026924949e-06, + "loss": 31.0778, + "step": 85260 + }, + { + "epoch": 0.17225079489489611, + "grad_norm": 1230.977783203125, + "learning_rate": 9.849189953791385e-06, + "loss": 39.1126, + "step": 85270 + }, + { + "epoch": 0.17227099552757993, + "grad_norm": 291.8015441894531, + "learning_rate": 9.849104857023455e-06, + "loss": 18.5041, + "step": 85280 + }, + { + "epoch": 0.17229119616026373, + "grad_norm": 243.10191345214844, + "learning_rate": 9.849019736621578e-06, + "loss": 34.2812, + "step": 85290 + }, + { + "epoch": 0.17231139679294755, + "grad_norm": 4.916224002838135, + "learning_rate": 9.848934592586165e-06, + "loss": 23.2434, + "step": 85300 + }, + { + "epoch": 0.17233159742563137, + "grad_norm": 353.7478942871094, + "learning_rate": 9.848849424917636e-06, + "loss": 25.1351, + "step": 85310 + }, + { + "epoch": 0.1723517980583152, + "grad_norm": 230.8633270263672, + "learning_rate": 9.848764233616401e-06, + "loss": 33.0791, + "step": 85320 + }, + { + "epoch": 0.172371998690999, + "grad_norm": 1136.6898193359375, + "learning_rate": 9.848679018682879e-06, + "loss": 25.9801, + "step": 85330 + }, + { + "epoch": 0.17239219932368283, + "grad_norm": 288.8282775878906, + "learning_rate": 9.848593780117482e-06, + "loss": 27.5867, + "step": 85340 + }, + { + "epoch": 0.17241239995636665, + "grad_norm": 211.8262176513672, + "learning_rate": 9.848508517920626e-06, + "loss": 26.1852, + "step": 85350 + }, + { + "epoch": 0.17243260058905044, + "grad_norm": 443.3017272949219, + "learning_rate": 9.84842323209273e-06, + "loss": 20.2442, + "step": 85360 + }, + { + "epoch": 0.17245280122173426, + "grad_norm": 618.8023071289062, + "learning_rate": 9.848337922634205e-06, + "loss": 22.1683, + "step": 85370 + }, + { + "epoch": 0.17247300185441808, + "grad_norm": 587.0542602539062, + "learning_rate": 9.84825258954547e-06, + "loss": 25.7737, + "step": 85380 + }, + { + "epoch": 0.1724932024871019, + "grad_norm": 109.8345947265625, + "learning_rate": 9.84816723282694e-06, + "loss": 24.3801, + "step": 85390 + }, + { + "epoch": 0.17251340311978572, + "grad_norm": 1000.1812133789062, + "learning_rate": 9.84808185247903e-06, + "loss": 37.4684, + "step": 85400 + }, + { + "epoch": 0.17253360375246954, + "grad_norm": 652.8297729492188, + "learning_rate": 9.847996448502159e-06, + "loss": 36.792, + "step": 85410 + }, + { + "epoch": 0.17255380438515333, + "grad_norm": 1045.991455078125, + "learning_rate": 9.84791102089674e-06, + "loss": 38.8997, + "step": 85420 + }, + { + "epoch": 0.17257400501783715, + "grad_norm": 268.00225830078125, + "learning_rate": 9.84782556966319e-06, + "loss": 26.6358, + "step": 85430 + }, + { + "epoch": 0.17259420565052097, + "grad_norm": 119.07523345947266, + "learning_rate": 9.847740094801928e-06, + "loss": 19.3829, + "step": 85440 + }, + { + "epoch": 0.1726144062832048, + "grad_norm": 970.8414916992188, + "learning_rate": 9.847654596313368e-06, + "loss": 34.3435, + "step": 85450 + }, + { + "epoch": 0.1726346069158886, + "grad_norm": 625.1595458984375, + "learning_rate": 9.847569074197927e-06, + "loss": 16.7644, + "step": 85460 + }, + { + "epoch": 0.17265480754857243, + "grad_norm": 464.2474060058594, + "learning_rate": 9.847483528456021e-06, + "loss": 29.0098, + "step": 85470 + }, + { + "epoch": 0.17267500818125622, + "grad_norm": 1429.7886962890625, + "learning_rate": 9.84739795908807e-06, + "loss": 23.6252, + "step": 85480 + }, + { + "epoch": 0.17269520881394004, + "grad_norm": 201.17111206054688, + "learning_rate": 9.84731236609449e-06, + "loss": 28.2237, + "step": 85490 + }, + { + "epoch": 0.17271540944662386, + "grad_norm": 727.03466796875, + "learning_rate": 9.847226749475696e-06, + "loss": 17.4406, + "step": 85500 + }, + { + "epoch": 0.17273561007930768, + "grad_norm": 18.44334602355957, + "learning_rate": 9.847141109232105e-06, + "loss": 50.6119, + "step": 85510 + }, + { + "epoch": 0.1727558107119915, + "grad_norm": 290.5135498046875, + "learning_rate": 9.84705544536414e-06, + "loss": 23.2972, + "step": 85520 + }, + { + "epoch": 0.17277601134467532, + "grad_norm": 268.5848083496094, + "learning_rate": 9.846969757872212e-06, + "loss": 32.5315, + "step": 85530 + }, + { + "epoch": 0.17279621197735914, + "grad_norm": 365.1296081542969, + "learning_rate": 9.846884046756742e-06, + "loss": 28.6295, + "step": 85540 + }, + { + "epoch": 0.17281641261004294, + "grad_norm": 184.7109375, + "learning_rate": 9.846798312018147e-06, + "loss": 21.2583, + "step": 85550 + }, + { + "epoch": 0.17283661324272676, + "grad_norm": 487.54742431640625, + "learning_rate": 9.846712553656845e-06, + "loss": 20.1115, + "step": 85560 + }, + { + "epoch": 0.17285681387541058, + "grad_norm": 369.90167236328125, + "learning_rate": 9.846626771673254e-06, + "loss": 30.3869, + "step": 85570 + }, + { + "epoch": 0.1728770145080944, + "grad_norm": 373.9340515136719, + "learning_rate": 9.846540966067793e-06, + "loss": 23.7879, + "step": 85580 + }, + { + "epoch": 0.17289721514077822, + "grad_norm": 461.4269104003906, + "learning_rate": 9.846455136840876e-06, + "loss": 32.9735, + "step": 85590 + }, + { + "epoch": 0.17291741577346204, + "grad_norm": 240.84036254882812, + "learning_rate": 9.846369283992927e-06, + "loss": 12.2622, + "step": 85600 + }, + { + "epoch": 0.17293761640614583, + "grad_norm": 375.2400207519531, + "learning_rate": 9.846283407524362e-06, + "loss": 18.1758, + "step": 85610 + }, + { + "epoch": 0.17295781703882965, + "grad_norm": 439.8492736816406, + "learning_rate": 9.846197507435598e-06, + "loss": 28.5939, + "step": 85620 + }, + { + "epoch": 0.17297801767151347, + "grad_norm": 313.9316101074219, + "learning_rate": 9.846111583727056e-06, + "loss": 12.9351, + "step": 85630 + }, + { + "epoch": 0.1729982183041973, + "grad_norm": 320.2373352050781, + "learning_rate": 9.846025636399152e-06, + "loss": 34.522, + "step": 85640 + }, + { + "epoch": 0.1730184189368811, + "grad_norm": 392.8552551269531, + "learning_rate": 9.845939665452309e-06, + "loss": 20.1833, + "step": 85650 + }, + { + "epoch": 0.17303861956956493, + "grad_norm": 406.9341735839844, + "learning_rate": 9.845853670886945e-06, + "loss": 30.3154, + "step": 85660 + }, + { + "epoch": 0.17305882020224875, + "grad_norm": 1090.675048828125, + "learning_rate": 9.845767652703475e-06, + "loss": 44.2072, + "step": 85670 + }, + { + "epoch": 0.17307902083493254, + "grad_norm": 434.3168029785156, + "learning_rate": 9.845681610902323e-06, + "loss": 40.6966, + "step": 85680 + }, + { + "epoch": 0.17309922146761636, + "grad_norm": 528.4317626953125, + "learning_rate": 9.845595545483906e-06, + "loss": 13.6988, + "step": 85690 + }, + { + "epoch": 0.17311942210030018, + "grad_norm": 299.3660583496094, + "learning_rate": 9.845509456448642e-06, + "loss": 21.5124, + "step": 85700 + }, + { + "epoch": 0.173139622732984, + "grad_norm": 355.94366455078125, + "learning_rate": 9.845423343796957e-06, + "loss": 35.3705, + "step": 85710 + }, + { + "epoch": 0.17315982336566782, + "grad_norm": 252.6102752685547, + "learning_rate": 9.845337207529264e-06, + "loss": 14.7852, + "step": 85720 + }, + { + "epoch": 0.17318002399835164, + "grad_norm": 317.8782958984375, + "learning_rate": 9.845251047645984e-06, + "loss": 15.0958, + "step": 85730 + }, + { + "epoch": 0.17320022463103543, + "grad_norm": 671.6204223632812, + "learning_rate": 9.84516486414754e-06, + "loss": 39.5295, + "step": 85740 + }, + { + "epoch": 0.17322042526371925, + "grad_norm": 388.7713623046875, + "learning_rate": 9.845078657034348e-06, + "loss": 30.8397, + "step": 85750 + }, + { + "epoch": 0.17324062589640307, + "grad_norm": 567.0277099609375, + "learning_rate": 9.844992426306832e-06, + "loss": 18.9498, + "step": 85760 + }, + { + "epoch": 0.1732608265290869, + "grad_norm": 291.2276611328125, + "learning_rate": 9.84490617196541e-06, + "loss": 14.8765, + "step": 85770 + }, + { + "epoch": 0.1732810271617707, + "grad_norm": 170.3280792236328, + "learning_rate": 9.844819894010502e-06, + "loss": 11.7575, + "step": 85780 + }, + { + "epoch": 0.17330122779445453, + "grad_norm": 353.33306884765625, + "learning_rate": 9.84473359244253e-06, + "loss": 9.9312, + "step": 85790 + }, + { + "epoch": 0.17332142842713832, + "grad_norm": 567.3309326171875, + "learning_rate": 9.844647267261915e-06, + "loss": 16.3679, + "step": 85800 + }, + { + "epoch": 0.17334162905982214, + "grad_norm": 172.38259887695312, + "learning_rate": 9.844560918469076e-06, + "loss": 35.616, + "step": 85810 + }, + { + "epoch": 0.17336182969250596, + "grad_norm": 507.5289611816406, + "learning_rate": 9.844474546064436e-06, + "loss": 39.5978, + "step": 85820 + }, + { + "epoch": 0.17338203032518978, + "grad_norm": 293.0918884277344, + "learning_rate": 9.844388150048413e-06, + "loss": 27.7895, + "step": 85830 + }, + { + "epoch": 0.1734022309578736, + "grad_norm": 375.8603515625, + "learning_rate": 9.844301730421431e-06, + "loss": 17.5738, + "step": 85840 + }, + { + "epoch": 0.17342243159055742, + "grad_norm": 121.36817932128906, + "learning_rate": 9.84421528718391e-06, + "loss": 25.1292, + "step": 85850 + }, + { + "epoch": 0.17344263222324124, + "grad_norm": 51.05403137207031, + "learning_rate": 9.844128820336269e-06, + "loss": 27.508, + "step": 85860 + }, + { + "epoch": 0.17346283285592504, + "grad_norm": 210.55831909179688, + "learning_rate": 9.844042329878934e-06, + "loss": 27.2181, + "step": 85870 + }, + { + "epoch": 0.17348303348860886, + "grad_norm": 695.690673828125, + "learning_rate": 9.843955815812322e-06, + "loss": 27.5437, + "step": 85880 + }, + { + "epoch": 0.17350323412129268, + "grad_norm": 936.8657836914062, + "learning_rate": 9.843869278136857e-06, + "loss": 35.3513, + "step": 85890 + }, + { + "epoch": 0.1735234347539765, + "grad_norm": 485.0730895996094, + "learning_rate": 9.843782716852963e-06, + "loss": 32.5886, + "step": 85900 + }, + { + "epoch": 0.17354363538666032, + "grad_norm": 853.9366455078125, + "learning_rate": 9.843696131961058e-06, + "loss": 33.1918, + "step": 85910 + }, + { + "epoch": 0.17356383601934414, + "grad_norm": 295.1344909667969, + "learning_rate": 9.843609523461565e-06, + "loss": 20.3857, + "step": 85920 + }, + { + "epoch": 0.17358403665202793, + "grad_norm": 115.91895294189453, + "learning_rate": 9.843522891354908e-06, + "loss": 14.5028, + "step": 85930 + }, + { + "epoch": 0.17360423728471175, + "grad_norm": 5.01973295211792, + "learning_rate": 9.843436235641506e-06, + "loss": 16.2039, + "step": 85940 + }, + { + "epoch": 0.17362443791739557, + "grad_norm": 747.1133422851562, + "learning_rate": 9.843349556321787e-06, + "loss": 23.1308, + "step": 85950 + }, + { + "epoch": 0.1736446385500794, + "grad_norm": 187.75865173339844, + "learning_rate": 9.843262853396164e-06, + "loss": 24.8493, + "step": 85960 + }, + { + "epoch": 0.1736648391827632, + "grad_norm": 302.3079528808594, + "learning_rate": 9.84317612686507e-06, + "loss": 21.0599, + "step": 85970 + }, + { + "epoch": 0.17368503981544703, + "grad_norm": 604.0231323242188, + "learning_rate": 9.843089376728922e-06, + "loss": 36.2197, + "step": 85980 + }, + { + "epoch": 0.17370524044813085, + "grad_norm": 125.68768310546875, + "learning_rate": 9.843002602988143e-06, + "loss": 24.3327, + "step": 85990 + }, + { + "epoch": 0.17372544108081464, + "grad_norm": 15.420726776123047, + "learning_rate": 9.842915805643156e-06, + "loss": 23.0155, + "step": 86000 + }, + { + "epoch": 0.17374564171349846, + "grad_norm": 621.4620361328125, + "learning_rate": 9.842828984694385e-06, + "loss": 15.173, + "step": 86010 + }, + { + "epoch": 0.17376584234618228, + "grad_norm": 630.4441528320312, + "learning_rate": 9.842742140142255e-06, + "loss": 31.3845, + "step": 86020 + }, + { + "epoch": 0.1737860429788661, + "grad_norm": 446.2193603515625, + "learning_rate": 9.842655271987185e-06, + "loss": 19.4188, + "step": 86030 + }, + { + "epoch": 0.17380624361154992, + "grad_norm": 628.043212890625, + "learning_rate": 9.8425683802296e-06, + "loss": 32.6731, + "step": 86040 + }, + { + "epoch": 0.17382644424423374, + "grad_norm": 191.02517700195312, + "learning_rate": 9.842481464869926e-06, + "loss": 32.1473, + "step": 86050 + }, + { + "epoch": 0.17384664487691753, + "grad_norm": 886.7763671875, + "learning_rate": 9.842394525908585e-06, + "loss": 46.2831, + "step": 86060 + }, + { + "epoch": 0.17386684550960135, + "grad_norm": 503.0904235839844, + "learning_rate": 9.842307563345999e-06, + "loss": 18.0938, + "step": 86070 + }, + { + "epoch": 0.17388704614228517, + "grad_norm": 584.3623657226562, + "learning_rate": 9.842220577182592e-06, + "loss": 23.6258, + "step": 86080 + }, + { + "epoch": 0.173907246774969, + "grad_norm": 102.97643280029297, + "learning_rate": 9.842133567418793e-06, + "loss": 28.5246, + "step": 86090 + }, + { + "epoch": 0.1739274474076528, + "grad_norm": 96.3404312133789, + "learning_rate": 9.84204653405502e-06, + "loss": 38.0371, + "step": 86100 + }, + { + "epoch": 0.17394764804033663, + "grad_norm": 542.3681030273438, + "learning_rate": 9.841959477091698e-06, + "loss": 12.9829, + "step": 86110 + }, + { + "epoch": 0.17396784867302043, + "grad_norm": 432.1968994140625, + "learning_rate": 9.841872396529255e-06, + "loss": 42.8921, + "step": 86120 + }, + { + "epoch": 0.17398804930570425, + "grad_norm": 363.61370849609375, + "learning_rate": 9.841785292368113e-06, + "loss": 31.3374, + "step": 86130 + }, + { + "epoch": 0.17400824993838807, + "grad_norm": 1183.536865234375, + "learning_rate": 9.841698164608696e-06, + "loss": 31.7024, + "step": 86140 + }, + { + "epoch": 0.17402845057107189, + "grad_norm": 323.79974365234375, + "learning_rate": 9.841611013251428e-06, + "loss": 25.19, + "step": 86150 + }, + { + "epoch": 0.1740486512037557, + "grad_norm": 659.388427734375, + "learning_rate": 9.841523838296738e-06, + "loss": 27.4956, + "step": 86160 + }, + { + "epoch": 0.17406885183643953, + "grad_norm": 655.215576171875, + "learning_rate": 9.841436639745046e-06, + "loss": 21.8667, + "step": 86170 + }, + { + "epoch": 0.17408905246912335, + "grad_norm": 12.534440994262695, + "learning_rate": 9.84134941759678e-06, + "loss": 23.089, + "step": 86180 + }, + { + "epoch": 0.17410925310180714, + "grad_norm": 508.0062561035156, + "learning_rate": 9.841262171852364e-06, + "loss": 32.2547, + "step": 86190 + }, + { + "epoch": 0.17412945373449096, + "grad_norm": 829.3807373046875, + "learning_rate": 9.841174902512223e-06, + "loss": 18.1947, + "step": 86200 + }, + { + "epoch": 0.17414965436717478, + "grad_norm": 103.1199951171875, + "learning_rate": 9.841087609576782e-06, + "loss": 22.1132, + "step": 86210 + }, + { + "epoch": 0.1741698549998586, + "grad_norm": 655.1348266601562, + "learning_rate": 9.841000293046469e-06, + "loss": 17.729, + "step": 86220 + }, + { + "epoch": 0.17419005563254242, + "grad_norm": 319.682861328125, + "learning_rate": 9.840912952921707e-06, + "loss": 24.888, + "step": 86230 + }, + { + "epoch": 0.17421025626522624, + "grad_norm": 664.3706665039062, + "learning_rate": 9.840825589202922e-06, + "loss": 35.6744, + "step": 86240 + }, + { + "epoch": 0.17423045689791003, + "grad_norm": 440.5725402832031, + "learning_rate": 9.84073820189054e-06, + "loss": 27.2891, + "step": 86250 + }, + { + "epoch": 0.17425065753059385, + "grad_norm": 304.0477600097656, + "learning_rate": 9.840650790984988e-06, + "loss": 31.555, + "step": 86260 + }, + { + "epoch": 0.17427085816327767, + "grad_norm": 708.5603637695312, + "learning_rate": 9.84056335648669e-06, + "loss": 23.6433, + "step": 86270 + }, + { + "epoch": 0.1742910587959615, + "grad_norm": 193.69650268554688, + "learning_rate": 9.840475898396073e-06, + "loss": 44.6928, + "step": 86280 + }, + { + "epoch": 0.1743112594286453, + "grad_norm": 157.66673278808594, + "learning_rate": 9.840388416713564e-06, + "loss": 13.4153, + "step": 86290 + }, + { + "epoch": 0.17433146006132913, + "grad_norm": 291.8675537109375, + "learning_rate": 9.84030091143959e-06, + "loss": 22.2489, + "step": 86300 + }, + { + "epoch": 0.17435166069401295, + "grad_norm": 516.598388671875, + "learning_rate": 9.840213382574575e-06, + "loss": 29.6473, + "step": 86310 + }, + { + "epoch": 0.17437186132669674, + "grad_norm": 529.6154174804688, + "learning_rate": 9.840125830118949e-06, + "loss": 29.525, + "step": 86320 + }, + { + "epoch": 0.17439206195938056, + "grad_norm": 835.8650512695312, + "learning_rate": 9.840038254073136e-06, + "loss": 27.0934, + "step": 86330 + }, + { + "epoch": 0.17441226259206438, + "grad_norm": 285.62811279296875, + "learning_rate": 9.839950654437563e-06, + "loss": 28.5629, + "step": 86340 + }, + { + "epoch": 0.1744324632247482, + "grad_norm": 452.54388427734375, + "learning_rate": 9.839863031212657e-06, + "loss": 21.8137, + "step": 86350 + }, + { + "epoch": 0.17445266385743202, + "grad_norm": 604.9288330078125, + "learning_rate": 9.839775384398846e-06, + "loss": 19.8672, + "step": 86360 + }, + { + "epoch": 0.17447286449011584, + "grad_norm": 336.3262023925781, + "learning_rate": 9.839687713996558e-06, + "loss": 17.3938, + "step": 86370 + }, + { + "epoch": 0.17449306512279963, + "grad_norm": 377.794189453125, + "learning_rate": 9.839600020006217e-06, + "loss": 37.7098, + "step": 86380 + }, + { + "epoch": 0.17451326575548345, + "grad_norm": 1111.11083984375, + "learning_rate": 9.839512302428254e-06, + "loss": 27.9636, + "step": 86390 + }, + { + "epoch": 0.17453346638816727, + "grad_norm": 551.1155395507812, + "learning_rate": 9.839424561263094e-06, + "loss": 44.3898, + "step": 86400 + }, + { + "epoch": 0.1745536670208511, + "grad_norm": 305.22747802734375, + "learning_rate": 9.839336796511167e-06, + "loss": 25.7225, + "step": 86410 + }, + { + "epoch": 0.17457386765353491, + "grad_norm": 426.5642395019531, + "learning_rate": 9.839249008172897e-06, + "loss": 25.0607, + "step": 86420 + }, + { + "epoch": 0.17459406828621873, + "grad_norm": 484.6824645996094, + "learning_rate": 9.839161196248717e-06, + "loss": 30.5871, + "step": 86430 + }, + { + "epoch": 0.17461426891890253, + "grad_norm": 225.3271942138672, + "learning_rate": 9.839073360739052e-06, + "loss": 22.7973, + "step": 86440 + }, + { + "epoch": 0.17463446955158635, + "grad_norm": 238.68386840820312, + "learning_rate": 9.838985501644329e-06, + "loss": 8.8989, + "step": 86450 + }, + { + "epoch": 0.17465467018427017, + "grad_norm": 643.7196655273438, + "learning_rate": 9.838897618964978e-06, + "loss": 34.3993, + "step": 86460 + }, + { + "epoch": 0.174674870816954, + "grad_norm": 505.0343933105469, + "learning_rate": 9.838809712701426e-06, + "loss": 34.1341, + "step": 86470 + }, + { + "epoch": 0.1746950714496378, + "grad_norm": 531.5932006835938, + "learning_rate": 9.838721782854103e-06, + "loss": 22.0213, + "step": 86480 + }, + { + "epoch": 0.17471527208232163, + "grad_norm": 166.92123413085938, + "learning_rate": 9.838633829423437e-06, + "loss": 20.5236, + "step": 86490 + }, + { + "epoch": 0.17473547271500545, + "grad_norm": 543.1427612304688, + "learning_rate": 9.838545852409857e-06, + "loss": 35.5636, + "step": 86500 + }, + { + "epoch": 0.17475567334768924, + "grad_norm": 595.4486083984375, + "learning_rate": 9.83845785181379e-06, + "loss": 27.4291, + "step": 86510 + }, + { + "epoch": 0.17477587398037306, + "grad_norm": 0.0, + "learning_rate": 9.838369827635668e-06, + "loss": 13.164, + "step": 86520 + }, + { + "epoch": 0.17479607461305688, + "grad_norm": 188.305419921875, + "learning_rate": 9.838281779875918e-06, + "loss": 24.5203, + "step": 86530 + }, + { + "epoch": 0.1748162752457407, + "grad_norm": 354.55157470703125, + "learning_rate": 9.838193708534969e-06, + "loss": 28.7658, + "step": 86540 + }, + { + "epoch": 0.17483647587842452, + "grad_norm": 219.94261169433594, + "learning_rate": 9.83810561361325e-06, + "loss": 30.0866, + "step": 86550 + }, + { + "epoch": 0.17485667651110834, + "grad_norm": 526.7067260742188, + "learning_rate": 9.838017495111191e-06, + "loss": 33.0553, + "step": 86560 + }, + { + "epoch": 0.17487687714379213, + "grad_norm": 427.62835693359375, + "learning_rate": 9.837929353029223e-06, + "loss": 16.9799, + "step": 86570 + }, + { + "epoch": 0.17489707777647595, + "grad_norm": 706.796630859375, + "learning_rate": 9.837841187367774e-06, + "loss": 26.0387, + "step": 86580 + }, + { + "epoch": 0.17491727840915977, + "grad_norm": 691.537109375, + "learning_rate": 9.837752998127272e-06, + "loss": 13.7285, + "step": 86590 + }, + { + "epoch": 0.1749374790418436, + "grad_norm": 211.5023651123047, + "learning_rate": 9.83766478530815e-06, + "loss": 22.2743, + "step": 86600 + }, + { + "epoch": 0.1749576796745274, + "grad_norm": 992.1719360351562, + "learning_rate": 9.837576548910836e-06, + "loss": 31.9646, + "step": 86610 + }, + { + "epoch": 0.17497788030721123, + "grad_norm": 596.32666015625, + "learning_rate": 9.837488288935761e-06, + "loss": 31.6708, + "step": 86620 + }, + { + "epoch": 0.17499808093989505, + "grad_norm": 527.4917602539062, + "learning_rate": 9.837400005383355e-06, + "loss": 23.7666, + "step": 86630 + }, + { + "epoch": 0.17501828157257884, + "grad_norm": 297.6175842285156, + "learning_rate": 9.837311698254048e-06, + "loss": 22.8128, + "step": 86640 + }, + { + "epoch": 0.17503848220526266, + "grad_norm": 765.5863037109375, + "learning_rate": 9.837223367548271e-06, + "loss": 52.1946, + "step": 86650 + }, + { + "epoch": 0.17505868283794648, + "grad_norm": 0.0, + "learning_rate": 9.837135013266452e-06, + "loss": 12.7448, + "step": 86660 + }, + { + "epoch": 0.1750788834706303, + "grad_norm": 52.100852966308594, + "learning_rate": 9.837046635409026e-06, + "loss": 22.3195, + "step": 86670 + }, + { + "epoch": 0.17509908410331412, + "grad_norm": 118.32286071777344, + "learning_rate": 9.83695823397642e-06, + "loss": 20.3009, + "step": 86680 + }, + { + "epoch": 0.17511928473599794, + "grad_norm": 68.52686309814453, + "learning_rate": 9.836869808969068e-06, + "loss": 18.1231, + "step": 86690 + }, + { + "epoch": 0.17513948536868174, + "grad_norm": 98.37853240966797, + "learning_rate": 9.836781360387396e-06, + "loss": 22.4627, + "step": 86700 + }, + { + "epoch": 0.17515968600136556, + "grad_norm": 633.7237548828125, + "learning_rate": 9.83669288823184e-06, + "loss": 23.0583, + "step": 86710 + }, + { + "epoch": 0.17517988663404938, + "grad_norm": 349.5272216796875, + "learning_rate": 9.836604392502829e-06, + "loss": 47.9853, + "step": 86720 + }, + { + "epoch": 0.1752000872667332, + "grad_norm": 461.8108825683594, + "learning_rate": 9.836515873200796e-06, + "loss": 29.9733, + "step": 86730 + }, + { + "epoch": 0.17522028789941702, + "grad_norm": 2400.55908203125, + "learning_rate": 9.83642733032617e-06, + "loss": 46.5731, + "step": 86740 + }, + { + "epoch": 0.17524048853210084, + "grad_norm": 109.9241714477539, + "learning_rate": 9.836338763879386e-06, + "loss": 15.8482, + "step": 86750 + }, + { + "epoch": 0.17526068916478463, + "grad_norm": 322.31573486328125, + "learning_rate": 9.83625017386087e-06, + "loss": 27.7555, + "step": 86760 + }, + { + "epoch": 0.17528088979746845, + "grad_norm": 241.99822998046875, + "learning_rate": 9.836161560271058e-06, + "loss": 24.6157, + "step": 86770 + }, + { + "epoch": 0.17530109043015227, + "grad_norm": 371.5823669433594, + "learning_rate": 9.836072923110384e-06, + "loss": 27.911, + "step": 86780 + }, + { + "epoch": 0.1753212910628361, + "grad_norm": 419.4341125488281, + "learning_rate": 9.835984262379275e-06, + "loss": 35.3021, + "step": 86790 + }, + { + "epoch": 0.1753414916955199, + "grad_norm": 201.52525329589844, + "learning_rate": 9.835895578078165e-06, + "loss": 30.3006, + "step": 86800 + }, + { + "epoch": 0.17536169232820373, + "grad_norm": 648.3505249023438, + "learning_rate": 9.835806870207487e-06, + "loss": 13.3086, + "step": 86810 + }, + { + "epoch": 0.17538189296088755, + "grad_norm": 518.4767456054688, + "learning_rate": 9.835718138767672e-06, + "loss": 32.3735, + "step": 86820 + }, + { + "epoch": 0.17540209359357134, + "grad_norm": 358.6706848144531, + "learning_rate": 9.835629383759155e-06, + "loss": 12.7865, + "step": 86830 + }, + { + "epoch": 0.17542229422625516, + "grad_norm": 423.8340148925781, + "learning_rate": 9.835540605182366e-06, + "loss": 24.5132, + "step": 86840 + }, + { + "epoch": 0.17544249485893898, + "grad_norm": 1237.3050537109375, + "learning_rate": 9.835451803037738e-06, + "loss": 34.7186, + "step": 86850 + }, + { + "epoch": 0.1754626954916228, + "grad_norm": 684.8671875, + "learning_rate": 9.835362977325703e-06, + "loss": 21.6767, + "step": 86860 + }, + { + "epoch": 0.17548289612430662, + "grad_norm": 247.02529907226562, + "learning_rate": 9.835274128046698e-06, + "loss": 20.9051, + "step": 86870 + }, + { + "epoch": 0.17550309675699044, + "grad_norm": 740.8329467773438, + "learning_rate": 9.835185255201153e-06, + "loss": 31.4438, + "step": 86880 + }, + { + "epoch": 0.17552329738967423, + "grad_norm": 230.60369873046875, + "learning_rate": 9.835096358789501e-06, + "loss": 28.1067, + "step": 86890 + }, + { + "epoch": 0.17554349802235805, + "grad_norm": 291.94354248046875, + "learning_rate": 9.835007438812177e-06, + "loss": 26.0546, + "step": 86900 + }, + { + "epoch": 0.17556369865504187, + "grad_norm": 477.5041198730469, + "learning_rate": 9.834918495269611e-06, + "loss": 20.2462, + "step": 86910 + }, + { + "epoch": 0.1755838992877257, + "grad_norm": 305.2869873046875, + "learning_rate": 9.83482952816224e-06, + "loss": 19.1239, + "step": 86920 + }, + { + "epoch": 0.1756040999204095, + "grad_norm": 289.0022888183594, + "learning_rate": 9.834740537490495e-06, + "loss": 45.3907, + "step": 86930 + }, + { + "epoch": 0.17562430055309333, + "grad_norm": 197.3190155029297, + "learning_rate": 9.834651523254812e-06, + "loss": 23.1044, + "step": 86940 + }, + { + "epoch": 0.17564450118577715, + "grad_norm": 97.71805572509766, + "learning_rate": 9.834562485455622e-06, + "loss": 10.2724, + "step": 86950 + }, + { + "epoch": 0.17566470181846094, + "grad_norm": 300.4330139160156, + "learning_rate": 9.834473424093364e-06, + "loss": 18.5511, + "step": 86960 + }, + { + "epoch": 0.17568490245114476, + "grad_norm": 543.9212646484375, + "learning_rate": 9.834384339168468e-06, + "loss": 20.8691, + "step": 86970 + }, + { + "epoch": 0.17570510308382858, + "grad_norm": 623.2275390625, + "learning_rate": 9.834295230681368e-06, + "loss": 22.9184, + "step": 86980 + }, + { + "epoch": 0.1757253037165124, + "grad_norm": 573.5552368164062, + "learning_rate": 9.834206098632499e-06, + "loss": 19.0464, + "step": 86990 + }, + { + "epoch": 0.17574550434919622, + "grad_norm": 536.0875854492188, + "learning_rate": 9.834116943022299e-06, + "loss": 28.9614, + "step": 87000 + }, + { + "epoch": 0.17576570498188004, + "grad_norm": 142.19651794433594, + "learning_rate": 9.834027763851196e-06, + "loss": 21.2632, + "step": 87010 + }, + { + "epoch": 0.17578590561456384, + "grad_norm": 276.2485656738281, + "learning_rate": 9.833938561119629e-06, + "loss": 35.1163, + "step": 87020 + }, + { + "epoch": 0.17580610624724766, + "grad_norm": 258.3413391113281, + "learning_rate": 9.833849334828033e-06, + "loss": 22.3229, + "step": 87030 + }, + { + "epoch": 0.17582630687993148, + "grad_norm": 228.56153869628906, + "learning_rate": 9.833760084976838e-06, + "loss": 11.6619, + "step": 87040 + }, + { + "epoch": 0.1758465075126153, + "grad_norm": 462.5628662109375, + "learning_rate": 9.833670811566485e-06, + "loss": 18.0792, + "step": 87050 + }, + { + "epoch": 0.17586670814529912, + "grad_norm": 403.3442687988281, + "learning_rate": 9.833581514597408e-06, + "loss": 26.6091, + "step": 87060 + }, + { + "epoch": 0.17588690877798294, + "grad_norm": 405.4480895996094, + "learning_rate": 9.833492194070039e-06, + "loss": 14.3622, + "step": 87070 + }, + { + "epoch": 0.17590710941066673, + "grad_norm": 356.7472839355469, + "learning_rate": 9.833402849984815e-06, + "loss": 33.6278, + "step": 87080 + }, + { + "epoch": 0.17592731004335055, + "grad_norm": 224.11256408691406, + "learning_rate": 9.833313482342173e-06, + "loss": 6.896, + "step": 87090 + }, + { + "epoch": 0.17594751067603437, + "grad_norm": 257.6687927246094, + "learning_rate": 9.833224091142548e-06, + "loss": 22.3012, + "step": 87100 + }, + { + "epoch": 0.1759677113087182, + "grad_norm": 246.01637268066406, + "learning_rate": 9.833134676386373e-06, + "loss": 31.0415, + "step": 87110 + }, + { + "epoch": 0.175987911941402, + "grad_norm": 257.9366455078125, + "learning_rate": 9.833045238074085e-06, + "loss": 34.6887, + "step": 87120 + }, + { + "epoch": 0.17600811257408583, + "grad_norm": 670.2232666015625, + "learning_rate": 9.832955776206123e-06, + "loss": 26.4223, + "step": 87130 + }, + { + "epoch": 0.17602831320676965, + "grad_norm": 462.1443786621094, + "learning_rate": 9.832866290782922e-06, + "loss": 17.0104, + "step": 87140 + }, + { + "epoch": 0.17604851383945344, + "grad_norm": 312.8056640625, + "learning_rate": 9.832776781804913e-06, + "loss": 25.8889, + "step": 87150 + }, + { + "epoch": 0.17606871447213726, + "grad_norm": 519.9537963867188, + "learning_rate": 9.83268724927254e-06, + "loss": 53.274, + "step": 87160 + }, + { + "epoch": 0.17608891510482108, + "grad_norm": 492.2060852050781, + "learning_rate": 9.832597693186233e-06, + "loss": 25.1681, + "step": 87170 + }, + { + "epoch": 0.1761091157375049, + "grad_norm": 442.8360290527344, + "learning_rate": 9.83250811354643e-06, + "loss": 18.9079, + "step": 87180 + }, + { + "epoch": 0.17612931637018872, + "grad_norm": 935.154052734375, + "learning_rate": 9.832418510353572e-06, + "loss": 30.2418, + "step": 87190 + }, + { + "epoch": 0.17614951700287254, + "grad_norm": 156.57376098632812, + "learning_rate": 9.832328883608088e-06, + "loss": 24.3359, + "step": 87200 + }, + { + "epoch": 0.17616971763555633, + "grad_norm": 440.4377746582031, + "learning_rate": 9.832239233310421e-06, + "loss": 23.9274, + "step": 87210 + }, + { + "epoch": 0.17618991826824015, + "grad_norm": 296.2905578613281, + "learning_rate": 9.832149559461009e-06, + "loss": 21.4329, + "step": 87220 + }, + { + "epoch": 0.17621011890092397, + "grad_norm": 57.15469741821289, + "learning_rate": 9.832059862060282e-06, + "loss": 35.4871, + "step": 87230 + }, + { + "epoch": 0.1762303195336078, + "grad_norm": 461.4469909667969, + "learning_rate": 9.831970141108684e-06, + "loss": 35.2258, + "step": 87240 + }, + { + "epoch": 0.1762505201662916, + "grad_norm": 459.0410461425781, + "learning_rate": 9.831880396606649e-06, + "loss": 15.5546, + "step": 87250 + }, + { + "epoch": 0.17627072079897543, + "grad_norm": 633.9259643554688, + "learning_rate": 9.831790628554613e-06, + "loss": 28.6745, + "step": 87260 + }, + { + "epoch": 0.17629092143165925, + "grad_norm": 545.2411499023438, + "learning_rate": 9.831700836953017e-06, + "loss": 25.3021, + "step": 87270 + }, + { + "epoch": 0.17631112206434305, + "grad_norm": 389.36090087890625, + "learning_rate": 9.831611021802297e-06, + "loss": 33.1733, + "step": 87280 + }, + { + "epoch": 0.17633132269702687, + "grad_norm": 383.3346252441406, + "learning_rate": 9.83152118310289e-06, + "loss": 26.8975, + "step": 87290 + }, + { + "epoch": 0.17635152332971069, + "grad_norm": 550.85791015625, + "learning_rate": 9.831431320855235e-06, + "loss": 28.9455, + "step": 87300 + }, + { + "epoch": 0.1763717239623945, + "grad_norm": 721.927978515625, + "learning_rate": 9.831341435059772e-06, + "loss": 17.3499, + "step": 87310 + }, + { + "epoch": 0.17639192459507833, + "grad_norm": 742.4193725585938, + "learning_rate": 9.831251525716934e-06, + "loss": 23.8338, + "step": 87320 + }, + { + "epoch": 0.17641212522776215, + "grad_norm": 216.81134033203125, + "learning_rate": 9.831161592827164e-06, + "loss": 30.7533, + "step": 87330 + }, + { + "epoch": 0.17643232586044594, + "grad_norm": 440.5499267578125, + "learning_rate": 9.831071636390899e-06, + "loss": 19.2126, + "step": 87340 + }, + { + "epoch": 0.17645252649312976, + "grad_norm": 222.04574584960938, + "learning_rate": 9.830981656408575e-06, + "loss": 28.4134, + "step": 87350 + }, + { + "epoch": 0.17647272712581358, + "grad_norm": 326.693603515625, + "learning_rate": 9.830891652880632e-06, + "loss": 23.4, + "step": 87360 + }, + { + "epoch": 0.1764929277584974, + "grad_norm": 380.5382385253906, + "learning_rate": 9.83080162580751e-06, + "loss": 21.9362, + "step": 87370 + }, + { + "epoch": 0.17651312839118122, + "grad_norm": 392.95794677734375, + "learning_rate": 9.830711575189646e-06, + "loss": 22.7161, + "step": 87380 + }, + { + "epoch": 0.17653332902386504, + "grad_norm": 91.82284545898438, + "learning_rate": 9.83062150102748e-06, + "loss": 15.9523, + "step": 87390 + }, + { + "epoch": 0.17655352965654883, + "grad_norm": 1855.065185546875, + "learning_rate": 9.830531403321451e-06, + "loss": 21.3149, + "step": 87400 + }, + { + "epoch": 0.17657373028923265, + "grad_norm": 487.7505798339844, + "learning_rate": 9.830441282071999e-06, + "loss": 30.5719, + "step": 87410 + }, + { + "epoch": 0.17659393092191647, + "grad_norm": 454.8311462402344, + "learning_rate": 9.830351137279559e-06, + "loss": 17.6413, + "step": 87420 + }, + { + "epoch": 0.1766141315546003, + "grad_norm": 1592.8018798828125, + "learning_rate": 9.830260968944577e-06, + "loss": 26.3831, + "step": 87430 + }, + { + "epoch": 0.1766343321872841, + "grad_norm": 863.373046875, + "learning_rate": 9.830170777067486e-06, + "loss": 34.066, + "step": 87440 + }, + { + "epoch": 0.17665453281996793, + "grad_norm": 207.10821533203125, + "learning_rate": 9.83008056164873e-06, + "loss": 14.8623, + "step": 87450 + }, + { + "epoch": 0.17667473345265175, + "grad_norm": 600.4519653320312, + "learning_rate": 9.829990322688746e-06, + "loss": 28.3211, + "step": 87460 + }, + { + "epoch": 0.17669493408533554, + "grad_norm": 930.9231567382812, + "learning_rate": 9.829900060187976e-06, + "loss": 25.3523, + "step": 87470 + }, + { + "epoch": 0.17671513471801936, + "grad_norm": 200.3104248046875, + "learning_rate": 9.82980977414686e-06, + "loss": 17.3831, + "step": 87480 + }, + { + "epoch": 0.17673533535070318, + "grad_norm": 138.60247802734375, + "learning_rate": 9.829719464565834e-06, + "loss": 34.9849, + "step": 87490 + }, + { + "epoch": 0.176755535983387, + "grad_norm": 104.7635726928711, + "learning_rate": 9.829629131445342e-06, + "loss": 29.9168, + "step": 87500 + }, + { + "epoch": 0.17677573661607082, + "grad_norm": 356.3173828125, + "learning_rate": 9.829538774785825e-06, + "loss": 36.6058, + "step": 87510 + }, + { + "epoch": 0.17679593724875464, + "grad_norm": 366.2814636230469, + "learning_rate": 9.82944839458772e-06, + "loss": 23.335, + "step": 87520 + }, + { + "epoch": 0.17681613788143843, + "grad_norm": 445.0623474121094, + "learning_rate": 9.82935799085147e-06, + "loss": 20.7442, + "step": 87530 + }, + { + "epoch": 0.17683633851412225, + "grad_norm": 332.32598876953125, + "learning_rate": 9.829267563577514e-06, + "loss": 26.1064, + "step": 87540 + }, + { + "epoch": 0.17685653914680607, + "grad_norm": 294.8311767578125, + "learning_rate": 9.829177112766295e-06, + "loss": 19.3694, + "step": 87550 + }, + { + "epoch": 0.1768767397794899, + "grad_norm": 957.253662109375, + "learning_rate": 9.829086638418252e-06, + "loss": 19.1206, + "step": 87560 + }, + { + "epoch": 0.17689694041217371, + "grad_norm": 521.346923828125, + "learning_rate": 9.828996140533826e-06, + "loss": 28.4056, + "step": 87570 + }, + { + "epoch": 0.17691714104485753, + "grad_norm": 913.6758422851562, + "learning_rate": 9.82890561911346e-06, + "loss": 33.756, + "step": 87580 + }, + { + "epoch": 0.17693734167754135, + "grad_norm": 236.279052734375, + "learning_rate": 9.828815074157591e-06, + "loss": 17.5507, + "step": 87590 + }, + { + "epoch": 0.17695754231022515, + "grad_norm": 142.71974182128906, + "learning_rate": 9.828724505666664e-06, + "loss": 29.2634, + "step": 87600 + }, + { + "epoch": 0.17697774294290897, + "grad_norm": 328.37646484375, + "learning_rate": 9.82863391364112e-06, + "loss": 39.9557, + "step": 87610 + }, + { + "epoch": 0.1769979435755928, + "grad_norm": 189.7563018798828, + "learning_rate": 9.828543298081401e-06, + "loss": 37.644, + "step": 87620 + }, + { + "epoch": 0.1770181442082766, + "grad_norm": 751.918701171875, + "learning_rate": 9.828452658987946e-06, + "loss": 29.8027, + "step": 87630 + }, + { + "epoch": 0.17703834484096043, + "grad_norm": 619.622802734375, + "learning_rate": 9.828361996361199e-06, + "loss": 16.2425, + "step": 87640 + }, + { + "epoch": 0.17705854547364425, + "grad_norm": 768.6655883789062, + "learning_rate": 9.828271310201601e-06, + "loss": 22.6406, + "step": 87650 + }, + { + "epoch": 0.17707874610632804, + "grad_norm": 528.748291015625, + "learning_rate": 9.828180600509595e-06, + "loss": 15.7845, + "step": 87660 + }, + { + "epoch": 0.17709894673901186, + "grad_norm": 166.1478729248047, + "learning_rate": 9.828089867285622e-06, + "loss": 12.502, + "step": 87670 + }, + { + "epoch": 0.17711914737169568, + "grad_norm": 480.45355224609375, + "learning_rate": 9.827999110530124e-06, + "loss": 34.4591, + "step": 87680 + }, + { + "epoch": 0.1771393480043795, + "grad_norm": 447.4638671875, + "learning_rate": 9.827908330243545e-06, + "loss": 32.2473, + "step": 87690 + }, + { + "epoch": 0.17715954863706332, + "grad_norm": 569.7048950195312, + "learning_rate": 9.827817526426324e-06, + "loss": 25.3609, + "step": 87700 + }, + { + "epoch": 0.17717974926974714, + "grad_norm": 0.0, + "learning_rate": 9.827726699078907e-06, + "loss": 21.6572, + "step": 87710 + }, + { + "epoch": 0.17719994990243093, + "grad_norm": 465.3253479003906, + "learning_rate": 9.827635848201737e-06, + "loss": 21.0944, + "step": 87720 + }, + { + "epoch": 0.17722015053511475, + "grad_norm": 316.3619689941406, + "learning_rate": 9.827544973795254e-06, + "loss": 18.0882, + "step": 87730 + }, + { + "epoch": 0.17724035116779857, + "grad_norm": 660.8316040039062, + "learning_rate": 9.827454075859904e-06, + "loss": 45.2601, + "step": 87740 + }, + { + "epoch": 0.1772605518004824, + "grad_norm": 550.6033325195312, + "learning_rate": 9.827363154396126e-06, + "loss": 37.1864, + "step": 87750 + }, + { + "epoch": 0.1772807524331662, + "grad_norm": 219.22695922851562, + "learning_rate": 9.827272209404366e-06, + "loss": 25.944, + "step": 87760 + }, + { + "epoch": 0.17730095306585003, + "grad_norm": 482.16094970703125, + "learning_rate": 9.827181240885068e-06, + "loss": 25.122, + "step": 87770 + }, + { + "epoch": 0.17732115369853385, + "grad_norm": 550.360107421875, + "learning_rate": 9.827090248838673e-06, + "loss": 25.6232, + "step": 87780 + }, + { + "epoch": 0.17734135433121764, + "grad_norm": 585.909912109375, + "learning_rate": 9.826999233265626e-06, + "loss": 20.155, + "step": 87790 + }, + { + "epoch": 0.17736155496390146, + "grad_norm": 283.1487121582031, + "learning_rate": 9.82690819416637e-06, + "loss": 17.7344, + "step": 87800 + }, + { + "epoch": 0.17738175559658528, + "grad_norm": 601.101806640625, + "learning_rate": 9.826817131541349e-06, + "loss": 15.1215, + "step": 87810 + }, + { + "epoch": 0.1774019562292691, + "grad_norm": 805.8764038085938, + "learning_rate": 9.826726045391006e-06, + "loss": 15.1918, + "step": 87820 + }, + { + "epoch": 0.17742215686195292, + "grad_norm": 193.71456909179688, + "learning_rate": 9.826634935715787e-06, + "loss": 19.3605, + "step": 87830 + }, + { + "epoch": 0.17744235749463674, + "grad_norm": 60.50779724121094, + "learning_rate": 9.826543802516135e-06, + "loss": 19.2635, + "step": 87840 + }, + { + "epoch": 0.17746255812732054, + "grad_norm": 445.96173095703125, + "learning_rate": 9.826452645792493e-06, + "loss": 26.2762, + "step": 87850 + }, + { + "epoch": 0.17748275876000436, + "grad_norm": 608.7047119140625, + "learning_rate": 9.826361465545306e-06, + "loss": 22.6092, + "step": 87860 + }, + { + "epoch": 0.17750295939268818, + "grad_norm": 211.99295043945312, + "learning_rate": 9.826270261775018e-06, + "loss": 27.7095, + "step": 87870 + }, + { + "epoch": 0.177523160025372, + "grad_norm": 932.4351806640625, + "learning_rate": 9.826179034482074e-06, + "loss": 31.0207, + "step": 87880 + }, + { + "epoch": 0.17754336065805582, + "grad_norm": 1075.3310546875, + "learning_rate": 9.82608778366692e-06, + "loss": 18.1641, + "step": 87890 + }, + { + "epoch": 0.17756356129073964, + "grad_norm": 385.71209716796875, + "learning_rate": 9.825996509330001e-06, + "loss": 21.5776, + "step": 87900 + }, + { + "epoch": 0.17758376192342346, + "grad_norm": 556.7341918945312, + "learning_rate": 9.825905211471757e-06, + "loss": 40.6338, + "step": 87910 + }, + { + "epoch": 0.17760396255610725, + "grad_norm": 19.428258895874023, + "learning_rate": 9.825813890092639e-06, + "loss": 20.1369, + "step": 87920 + }, + { + "epoch": 0.17762416318879107, + "grad_norm": 601.0648193359375, + "learning_rate": 9.825722545193087e-06, + "loss": 18.9756, + "step": 87930 + }, + { + "epoch": 0.1776443638214749, + "grad_norm": 681.321044921875, + "learning_rate": 9.82563117677355e-06, + "loss": 23.8888, + "step": 87940 + }, + { + "epoch": 0.1776645644541587, + "grad_norm": 507.18701171875, + "learning_rate": 9.825539784834472e-06, + "loss": 15.3194, + "step": 87950 + }, + { + "epoch": 0.17768476508684253, + "grad_norm": 523.6488037109375, + "learning_rate": 9.825448369376298e-06, + "loss": 27.7807, + "step": 87960 + }, + { + "epoch": 0.17770496571952635, + "grad_norm": 644.4873657226562, + "learning_rate": 9.825356930399474e-06, + "loss": 17.1149, + "step": 87970 + }, + { + "epoch": 0.17772516635221014, + "grad_norm": 595.9818115234375, + "learning_rate": 9.825265467904446e-06, + "loss": 29.5429, + "step": 87980 + }, + { + "epoch": 0.17774536698489396, + "grad_norm": 385.09405517578125, + "learning_rate": 9.825173981891658e-06, + "loss": 24.3628, + "step": 87990 + }, + { + "epoch": 0.17776556761757778, + "grad_norm": 382.9449768066406, + "learning_rate": 9.825082472361558e-06, + "loss": 30.5227, + "step": 88000 + }, + { + "epoch": 0.1777857682502616, + "grad_norm": 999.642578125, + "learning_rate": 9.82499093931459e-06, + "loss": 19.8723, + "step": 88010 + }, + { + "epoch": 0.17780596888294542, + "grad_norm": 609.13232421875, + "learning_rate": 9.824899382751204e-06, + "loss": 17.872, + "step": 88020 + }, + { + "epoch": 0.17782616951562924, + "grad_norm": 479.62408447265625, + "learning_rate": 9.824807802671843e-06, + "loss": 37.9173, + "step": 88030 + }, + { + "epoch": 0.17784637014831303, + "grad_norm": 755.3995971679688, + "learning_rate": 9.824716199076952e-06, + "loss": 28.8684, + "step": 88040 + }, + { + "epoch": 0.17786657078099685, + "grad_norm": 777.1272583007812, + "learning_rate": 9.824624571966982e-06, + "loss": 31.0329, + "step": 88050 + }, + { + "epoch": 0.17788677141368067, + "grad_norm": 42.348876953125, + "learning_rate": 9.824532921342375e-06, + "loss": 15.062, + "step": 88060 + }, + { + "epoch": 0.1779069720463645, + "grad_norm": 1899.2667236328125, + "learning_rate": 9.82444124720358e-06, + "loss": 47.0505, + "step": 88070 + }, + { + "epoch": 0.1779271726790483, + "grad_norm": 200.7368621826172, + "learning_rate": 9.824349549551045e-06, + "loss": 37.6947, + "step": 88080 + }, + { + "epoch": 0.17794737331173213, + "grad_norm": 446.684814453125, + "learning_rate": 9.824257828385213e-06, + "loss": 11.5831, + "step": 88090 + }, + { + "epoch": 0.17796757394441595, + "grad_norm": 406.3067626953125, + "learning_rate": 9.824166083706534e-06, + "loss": 12.2547, + "step": 88100 + }, + { + "epoch": 0.17798777457709974, + "grad_norm": 197.05783081054688, + "learning_rate": 9.824074315515457e-06, + "loss": 21.8061, + "step": 88110 + }, + { + "epoch": 0.17800797520978356, + "grad_norm": 577.2730102539062, + "learning_rate": 9.823982523812424e-06, + "loss": 41.0802, + "step": 88120 + }, + { + "epoch": 0.17802817584246738, + "grad_norm": 256.77215576171875, + "learning_rate": 9.823890708597887e-06, + "loss": 19.7997, + "step": 88130 + }, + { + "epoch": 0.1780483764751512, + "grad_norm": 455.495361328125, + "learning_rate": 9.823798869872291e-06, + "loss": 20.599, + "step": 88140 + }, + { + "epoch": 0.17806857710783502, + "grad_norm": 376.7768859863281, + "learning_rate": 9.823707007636085e-06, + "loss": 25.4198, + "step": 88150 + }, + { + "epoch": 0.17808877774051884, + "grad_norm": 84.9360580444336, + "learning_rate": 9.823615121889716e-06, + "loss": 34.0157, + "step": 88160 + }, + { + "epoch": 0.17810897837320264, + "grad_norm": 159.77825927734375, + "learning_rate": 9.82352321263363e-06, + "loss": 18.454, + "step": 88170 + }, + { + "epoch": 0.17812917900588646, + "grad_norm": 162.8426055908203, + "learning_rate": 9.823431279868278e-06, + "loss": 16.558, + "step": 88180 + }, + { + "epoch": 0.17814937963857028, + "grad_norm": 417.8797302246094, + "learning_rate": 9.823339323594107e-06, + "loss": 103.8009, + "step": 88190 + }, + { + "epoch": 0.1781695802712541, + "grad_norm": 139.08123779296875, + "learning_rate": 9.823247343811567e-06, + "loss": 31.3308, + "step": 88200 + }, + { + "epoch": 0.17818978090393792, + "grad_norm": 346.410888671875, + "learning_rate": 9.823155340521104e-06, + "loss": 22.9895, + "step": 88210 + }, + { + "epoch": 0.17820998153662174, + "grad_norm": 328.610595703125, + "learning_rate": 9.823063313723165e-06, + "loss": 21.5902, + "step": 88220 + }, + { + "epoch": 0.17823018216930553, + "grad_norm": 645.4741821289062, + "learning_rate": 9.822971263418202e-06, + "loss": 20.7911, + "step": 88230 + }, + { + "epoch": 0.17825038280198935, + "grad_norm": 455.9669494628906, + "learning_rate": 9.82287918960666e-06, + "loss": 19.1658, + "step": 88240 + }, + { + "epoch": 0.17827058343467317, + "grad_norm": 309.32366943359375, + "learning_rate": 9.822787092288991e-06, + "loss": 41.181, + "step": 88250 + }, + { + "epoch": 0.178290784067357, + "grad_norm": 429.23919677734375, + "learning_rate": 9.822694971465643e-06, + "loss": 24.2197, + "step": 88260 + }, + { + "epoch": 0.1783109847000408, + "grad_norm": 884.8764038085938, + "learning_rate": 9.822602827137065e-06, + "loss": 29.4798, + "step": 88270 + }, + { + "epoch": 0.17833118533272463, + "grad_norm": 259.2513732910156, + "learning_rate": 9.822510659303704e-06, + "loss": 17.1712, + "step": 88280 + }, + { + "epoch": 0.17835138596540845, + "grad_norm": 692.7817993164062, + "learning_rate": 9.822418467966013e-06, + "loss": 17.3893, + "step": 88290 + }, + { + "epoch": 0.17837158659809224, + "grad_norm": 1016.9531860351562, + "learning_rate": 9.822326253124436e-06, + "loss": 36.6533, + "step": 88300 + }, + { + "epoch": 0.17839178723077606, + "grad_norm": 80.7690200805664, + "learning_rate": 9.82223401477943e-06, + "loss": 21.8245, + "step": 88310 + }, + { + "epoch": 0.17841198786345988, + "grad_norm": 291.8055114746094, + "learning_rate": 9.822141752931438e-06, + "loss": 20.9248, + "step": 88320 + }, + { + "epoch": 0.1784321884961437, + "grad_norm": 444.70440673828125, + "learning_rate": 9.822049467580912e-06, + "loss": 17.7563, + "step": 88330 + }, + { + "epoch": 0.17845238912882752, + "grad_norm": 520.4324951171875, + "learning_rate": 9.821957158728302e-06, + "loss": 31.1346, + "step": 88340 + }, + { + "epoch": 0.17847258976151134, + "grad_norm": 109.189208984375, + "learning_rate": 9.821864826374057e-06, + "loss": 21.6773, + "step": 88350 + }, + { + "epoch": 0.17849279039419513, + "grad_norm": 528.0753173828125, + "learning_rate": 9.82177247051863e-06, + "loss": 14.649, + "step": 88360 + }, + { + "epoch": 0.17851299102687895, + "grad_norm": 852.3883666992188, + "learning_rate": 9.821680091162466e-06, + "loss": 38.5516, + "step": 88370 + }, + { + "epoch": 0.17853319165956277, + "grad_norm": 429.7975769042969, + "learning_rate": 9.821587688306017e-06, + "loss": 15.0936, + "step": 88380 + }, + { + "epoch": 0.1785533922922466, + "grad_norm": 220.61557006835938, + "learning_rate": 9.821495261949739e-06, + "loss": 20.1851, + "step": 88390 + }, + { + "epoch": 0.1785735929249304, + "grad_norm": 531.4949951171875, + "learning_rate": 9.821402812094074e-06, + "loss": 13.8314, + "step": 88400 + }, + { + "epoch": 0.17859379355761423, + "grad_norm": 411.6770324707031, + "learning_rate": 9.821310338739478e-06, + "loss": 21.5172, + "step": 88410 + }, + { + "epoch": 0.17861399419029805, + "grad_norm": 462.91461181640625, + "learning_rate": 9.821217841886399e-06, + "loss": 35.1949, + "step": 88420 + }, + { + "epoch": 0.17863419482298185, + "grad_norm": 1127.7225341796875, + "learning_rate": 9.82112532153529e-06, + "loss": 50.0871, + "step": 88430 + }, + { + "epoch": 0.17865439545566567, + "grad_norm": 321.0269775390625, + "learning_rate": 9.821032777686601e-06, + "loss": 25.3275, + "step": 88440 + }, + { + "epoch": 0.17867459608834949, + "grad_norm": 183.1136016845703, + "learning_rate": 9.820940210340784e-06, + "loss": 21.9095, + "step": 88450 + }, + { + "epoch": 0.1786947967210333, + "grad_norm": 133.33180236816406, + "learning_rate": 9.820847619498288e-06, + "loss": 16.5137, + "step": 88460 + }, + { + "epoch": 0.17871499735371713, + "grad_norm": 566.6614379882812, + "learning_rate": 9.820755005159565e-06, + "loss": 26.651, + "step": 88470 + }, + { + "epoch": 0.17873519798640095, + "grad_norm": 843.701904296875, + "learning_rate": 9.820662367325067e-06, + "loss": 18.4092, + "step": 88480 + }, + { + "epoch": 0.17875539861908474, + "grad_norm": 132.54522705078125, + "learning_rate": 9.820569705995244e-06, + "loss": 19.0154, + "step": 88490 + }, + { + "epoch": 0.17877559925176856, + "grad_norm": 1032.7620849609375, + "learning_rate": 9.82047702117055e-06, + "loss": 21.4827, + "step": 88500 + }, + { + "epoch": 0.17879579988445238, + "grad_norm": 151.09715270996094, + "learning_rate": 9.820384312851437e-06, + "loss": 16.5228, + "step": 88510 + }, + { + "epoch": 0.1788160005171362, + "grad_norm": 358.14984130859375, + "learning_rate": 9.820291581038354e-06, + "loss": 36.5084, + "step": 88520 + }, + { + "epoch": 0.17883620114982002, + "grad_norm": 475.3742370605469, + "learning_rate": 9.820198825731757e-06, + "loss": 40.8406, + "step": 88530 + }, + { + "epoch": 0.17885640178250384, + "grad_norm": 721.9351806640625, + "learning_rate": 9.820106046932092e-06, + "loss": 60.1918, + "step": 88540 + }, + { + "epoch": 0.17887660241518763, + "grad_norm": 575.8406982421875, + "learning_rate": 9.820013244639817e-06, + "loss": 20.3473, + "step": 88550 + }, + { + "epoch": 0.17889680304787145, + "grad_norm": 486.1315612792969, + "learning_rate": 9.81992041885538e-06, + "loss": 27.2605, + "step": 88560 + }, + { + "epoch": 0.17891700368055527, + "grad_norm": 229.42684936523438, + "learning_rate": 9.819827569579237e-06, + "loss": 21.3187, + "step": 88570 + }, + { + "epoch": 0.1789372043132391, + "grad_norm": 234.98072814941406, + "learning_rate": 9.819734696811839e-06, + "loss": 12.1782, + "step": 88580 + }, + { + "epoch": 0.1789574049459229, + "grad_norm": 808.8175048828125, + "learning_rate": 9.81964180055364e-06, + "loss": 23.4937, + "step": 88590 + }, + { + "epoch": 0.17897760557860673, + "grad_norm": 585.3851318359375, + "learning_rate": 9.819548880805087e-06, + "loss": 36.2704, + "step": 88600 + }, + { + "epoch": 0.17899780621129055, + "grad_norm": 439.37286376953125, + "learning_rate": 9.819455937566642e-06, + "loss": 22.6001, + "step": 88610 + }, + { + "epoch": 0.17901800684397434, + "grad_norm": 212.2778778076172, + "learning_rate": 9.819362970838751e-06, + "loss": 22.1894, + "step": 88620 + }, + { + "epoch": 0.17903820747665816, + "grad_norm": 1028.92236328125, + "learning_rate": 9.819269980621869e-06, + "loss": 17.4052, + "step": 88630 + }, + { + "epoch": 0.17905840810934198, + "grad_norm": 368.2625732421875, + "learning_rate": 9.819176966916451e-06, + "loss": 23.653, + "step": 88640 + }, + { + "epoch": 0.1790786087420258, + "grad_norm": 1294.4925537109375, + "learning_rate": 9.819083929722947e-06, + "loss": 33.3029, + "step": 88650 + }, + { + "epoch": 0.17909880937470962, + "grad_norm": 354.2082214355469, + "learning_rate": 9.818990869041816e-06, + "loss": 18.8248, + "step": 88660 + }, + { + "epoch": 0.17911901000739344, + "grad_norm": 1367.2764892578125, + "learning_rate": 9.818897784873504e-06, + "loss": 28.3457, + "step": 88670 + }, + { + "epoch": 0.17913921064007723, + "grad_norm": 373.48748779296875, + "learning_rate": 9.818804677218472e-06, + "loss": 27.0261, + "step": 88680 + }, + { + "epoch": 0.17915941127276105, + "grad_norm": 182.73428344726562, + "learning_rate": 9.818711546077169e-06, + "loss": 15.2134, + "step": 88690 + }, + { + "epoch": 0.17917961190544487, + "grad_norm": 119.61083221435547, + "learning_rate": 9.81861839145005e-06, + "loss": 58.4868, + "step": 88700 + }, + { + "epoch": 0.1791998125381287, + "grad_norm": 682.3272705078125, + "learning_rate": 9.818525213337568e-06, + "loss": 38.5422, + "step": 88710 + }, + { + "epoch": 0.17922001317081251, + "grad_norm": 180.2148895263672, + "learning_rate": 9.818432011740181e-06, + "loss": 10.1237, + "step": 88720 + }, + { + "epoch": 0.17924021380349633, + "grad_norm": 307.799072265625, + "learning_rate": 9.81833878665834e-06, + "loss": 18.1421, + "step": 88730 + }, + { + "epoch": 0.17926041443618015, + "grad_norm": 378.252197265625, + "learning_rate": 9.8182455380925e-06, + "loss": 18.8232, + "step": 88740 + }, + { + "epoch": 0.17928061506886395, + "grad_norm": 1313.838623046875, + "learning_rate": 9.818152266043115e-06, + "loss": 35.5768, + "step": 88750 + }, + { + "epoch": 0.17930081570154777, + "grad_norm": 365.5284729003906, + "learning_rate": 9.818058970510642e-06, + "loss": 17.2489, + "step": 88760 + }, + { + "epoch": 0.1793210163342316, + "grad_norm": 168.6874237060547, + "learning_rate": 9.817965651495533e-06, + "loss": 26.2463, + "step": 88770 + }, + { + "epoch": 0.1793412169669154, + "grad_norm": 953.0068969726562, + "learning_rate": 9.817872308998242e-06, + "loss": 15.8511, + "step": 88780 + }, + { + "epoch": 0.17936141759959923, + "grad_norm": 717.1253662109375, + "learning_rate": 9.817778943019228e-06, + "loss": 12.767, + "step": 88790 + }, + { + "epoch": 0.17938161823228305, + "grad_norm": 737.7371826171875, + "learning_rate": 9.817685553558945e-06, + "loss": 23.686, + "step": 88800 + }, + { + "epoch": 0.17940181886496684, + "grad_norm": 143.72450256347656, + "learning_rate": 9.817592140617844e-06, + "loss": 33.5139, + "step": 88810 + }, + { + "epoch": 0.17942201949765066, + "grad_norm": 466.5359191894531, + "learning_rate": 9.817498704196384e-06, + "loss": 23.8166, + "step": 88820 + }, + { + "epoch": 0.17944222013033448, + "grad_norm": 761.9892578125, + "learning_rate": 9.81740524429502e-06, + "loss": 37.5548, + "step": 88830 + }, + { + "epoch": 0.1794624207630183, + "grad_norm": 608.7931518554688, + "learning_rate": 9.817311760914206e-06, + "loss": 28.2609, + "step": 88840 + }, + { + "epoch": 0.17948262139570212, + "grad_norm": 334.7822265625, + "learning_rate": 9.8172182540544e-06, + "loss": 27.8392, + "step": 88850 + }, + { + "epoch": 0.17950282202838594, + "grad_norm": 542.93896484375, + "learning_rate": 9.817124723716057e-06, + "loss": 17.527, + "step": 88860 + }, + { + "epoch": 0.17952302266106973, + "grad_norm": 809.5488891601562, + "learning_rate": 9.817031169899631e-06, + "loss": 37.7099, + "step": 88870 + }, + { + "epoch": 0.17954322329375355, + "grad_norm": 817.4562377929688, + "learning_rate": 9.81693759260558e-06, + "loss": 30.6387, + "step": 88880 + }, + { + "epoch": 0.17956342392643737, + "grad_norm": 724.6182861328125, + "learning_rate": 9.81684399183436e-06, + "loss": 22.2492, + "step": 88890 + }, + { + "epoch": 0.1795836245591212, + "grad_norm": 162.0093536376953, + "learning_rate": 9.816750367586424e-06, + "loss": 27.2942, + "step": 88900 + }, + { + "epoch": 0.179603825191805, + "grad_norm": 408.91680908203125, + "learning_rate": 9.816656719862234e-06, + "loss": 20.7502, + "step": 88910 + }, + { + "epoch": 0.17962402582448883, + "grad_norm": 360.9241943359375, + "learning_rate": 9.816563048662242e-06, + "loss": 15.3958, + "step": 88920 + }, + { + "epoch": 0.17964422645717265, + "grad_norm": 928.7029418945312, + "learning_rate": 9.816469353986905e-06, + "loss": 20.9076, + "step": 88930 + }, + { + "epoch": 0.17966442708985644, + "grad_norm": 352.09735107421875, + "learning_rate": 9.816375635836683e-06, + "loss": 27.9983, + "step": 88940 + }, + { + "epoch": 0.17968462772254026, + "grad_norm": 349.9088439941406, + "learning_rate": 9.816281894212028e-06, + "loss": 22.8719, + "step": 88950 + }, + { + "epoch": 0.17970482835522408, + "grad_norm": 554.7149047851562, + "learning_rate": 9.8161881291134e-06, + "loss": 17.0414, + "step": 88960 + }, + { + "epoch": 0.1797250289879079, + "grad_norm": 437.630126953125, + "learning_rate": 9.816094340541256e-06, + "loss": 27.4759, + "step": 88970 + }, + { + "epoch": 0.17974522962059172, + "grad_norm": 253.888916015625, + "learning_rate": 9.81600052849605e-06, + "loss": 27.4915, + "step": 88980 + }, + { + "epoch": 0.17976543025327554, + "grad_norm": 387.9095153808594, + "learning_rate": 9.815906692978244e-06, + "loss": 17.6506, + "step": 88990 + }, + { + "epoch": 0.17978563088595934, + "grad_norm": 330.9479675292969, + "learning_rate": 9.815812833988292e-06, + "loss": 21.3976, + "step": 89000 + }, + { + "epoch": 0.17980583151864316, + "grad_norm": 349.994140625, + "learning_rate": 9.815718951526651e-06, + "loss": 15.974, + "step": 89010 + }, + { + "epoch": 0.17982603215132698, + "grad_norm": 630.7244262695312, + "learning_rate": 9.815625045593783e-06, + "loss": 49.8131, + "step": 89020 + }, + { + "epoch": 0.1798462327840108, + "grad_norm": 129.8878631591797, + "learning_rate": 9.81553111619014e-06, + "loss": 35.3881, + "step": 89030 + }, + { + "epoch": 0.17986643341669462, + "grad_norm": 807.0062866210938, + "learning_rate": 9.815437163316182e-06, + "loss": 24.98, + "step": 89040 + }, + { + "epoch": 0.17988663404937844, + "grad_norm": 23.817771911621094, + "learning_rate": 9.815343186972369e-06, + "loss": 17.7969, + "step": 89050 + }, + { + "epoch": 0.17990683468206226, + "grad_norm": 278.3724060058594, + "learning_rate": 9.815249187159158e-06, + "loss": 18.6364, + "step": 89060 + }, + { + "epoch": 0.17992703531474605, + "grad_norm": 723.9588012695312, + "learning_rate": 9.815155163877003e-06, + "loss": 37.0394, + "step": 89070 + }, + { + "epoch": 0.17994723594742987, + "grad_norm": 426.5888977050781, + "learning_rate": 9.81506111712637e-06, + "loss": 16.4161, + "step": 89080 + }, + { + "epoch": 0.1799674365801137, + "grad_norm": 156.51239013671875, + "learning_rate": 9.81496704690771e-06, + "loss": 11.2356, + "step": 89090 + }, + { + "epoch": 0.1799876372127975, + "grad_norm": 427.3960266113281, + "learning_rate": 9.814872953221487e-06, + "loss": 12.5643, + "step": 89100 + }, + { + "epoch": 0.18000783784548133, + "grad_norm": 302.4227294921875, + "learning_rate": 9.814778836068154e-06, + "loss": 19.3252, + "step": 89110 + }, + { + "epoch": 0.18002803847816515, + "grad_norm": 263.3395690917969, + "learning_rate": 9.814684695448176e-06, + "loss": 21.6315, + "step": 89120 + }, + { + "epoch": 0.18004823911084894, + "grad_norm": 1128.4725341796875, + "learning_rate": 9.814590531362006e-06, + "loss": 29.6173, + "step": 89130 + }, + { + "epoch": 0.18006843974353276, + "grad_norm": 845.773681640625, + "learning_rate": 9.814496343810109e-06, + "loss": 28.2154, + "step": 89140 + }, + { + "epoch": 0.18008864037621658, + "grad_norm": 837.7940673828125, + "learning_rate": 9.814402132792939e-06, + "loss": 22.2783, + "step": 89150 + }, + { + "epoch": 0.1801088410089004, + "grad_norm": 536.5330200195312, + "learning_rate": 9.814307898310957e-06, + "loss": 49.601, + "step": 89160 + }, + { + "epoch": 0.18012904164158422, + "grad_norm": 122.6075668334961, + "learning_rate": 9.814213640364623e-06, + "loss": 24.3462, + "step": 89170 + }, + { + "epoch": 0.18014924227426804, + "grad_norm": 360.3377380371094, + "learning_rate": 9.814119358954394e-06, + "loss": 20.5534, + "step": 89180 + }, + { + "epoch": 0.18016944290695183, + "grad_norm": 262.70587158203125, + "learning_rate": 9.81402505408073e-06, + "loss": 26.6822, + "step": 89190 + }, + { + "epoch": 0.18018964353963565, + "grad_norm": 150.52392578125, + "learning_rate": 9.813930725744095e-06, + "loss": 16.4466, + "step": 89200 + }, + { + "epoch": 0.18020984417231947, + "grad_norm": 428.7914733886719, + "learning_rate": 9.813836373944945e-06, + "loss": 23.3214, + "step": 89210 + }, + { + "epoch": 0.1802300448050033, + "grad_norm": 731.9844360351562, + "learning_rate": 9.813741998683738e-06, + "loss": 33.7192, + "step": 89220 + }, + { + "epoch": 0.1802502454376871, + "grad_norm": 50.36422348022461, + "learning_rate": 9.813647599960938e-06, + "loss": 42.3862, + "step": 89230 + }, + { + "epoch": 0.18027044607037093, + "grad_norm": 710.2027587890625, + "learning_rate": 9.813553177777005e-06, + "loss": 28.604, + "step": 89240 + }, + { + "epoch": 0.18029064670305475, + "grad_norm": 1012.0244750976562, + "learning_rate": 9.813458732132395e-06, + "loss": 32.8651, + "step": 89250 + }, + { + "epoch": 0.18031084733573854, + "grad_norm": 124.10336303710938, + "learning_rate": 9.813364263027572e-06, + "loss": 34.6443, + "step": 89260 + }, + { + "epoch": 0.18033104796842236, + "grad_norm": 935.6665649414062, + "learning_rate": 9.813269770462995e-06, + "loss": 32.9984, + "step": 89270 + }, + { + "epoch": 0.18035124860110618, + "grad_norm": 374.593017578125, + "learning_rate": 9.813175254439125e-06, + "loss": 17.0858, + "step": 89280 + }, + { + "epoch": 0.18037144923379, + "grad_norm": 374.7408752441406, + "learning_rate": 9.813080714956422e-06, + "loss": 15.3942, + "step": 89290 + }, + { + "epoch": 0.18039164986647382, + "grad_norm": 636.0196533203125, + "learning_rate": 9.812986152015349e-06, + "loss": 22.3878, + "step": 89300 + }, + { + "epoch": 0.18041185049915764, + "grad_norm": 249.57730102539062, + "learning_rate": 9.812891565616363e-06, + "loss": 34.4672, + "step": 89310 + }, + { + "epoch": 0.18043205113184144, + "grad_norm": 470.1400146484375, + "learning_rate": 9.812796955759929e-06, + "loss": 39.6037, + "step": 89320 + }, + { + "epoch": 0.18045225176452526, + "grad_norm": 389.9975891113281, + "learning_rate": 9.812702322446506e-06, + "loss": 16.4977, + "step": 89330 + }, + { + "epoch": 0.18047245239720908, + "grad_norm": 350.7633972167969, + "learning_rate": 9.812607665676555e-06, + "loss": 10.3507, + "step": 89340 + }, + { + "epoch": 0.1804926530298929, + "grad_norm": 139.79766845703125, + "learning_rate": 9.812512985450539e-06, + "loss": 15.6512, + "step": 89350 + }, + { + "epoch": 0.18051285366257672, + "grad_norm": 127.40026092529297, + "learning_rate": 9.812418281768919e-06, + "loss": 26.3492, + "step": 89360 + }, + { + "epoch": 0.18053305429526054, + "grad_norm": 243.89901733398438, + "learning_rate": 9.812323554632153e-06, + "loss": 18.9652, + "step": 89370 + }, + { + "epoch": 0.18055325492794436, + "grad_norm": 1298.3121337890625, + "learning_rate": 9.812228804040708e-06, + "loss": 34.2148, + "step": 89380 + }, + { + "epoch": 0.18057345556062815, + "grad_norm": 759.9421997070312, + "learning_rate": 9.812134029995043e-06, + "loss": 18.2996, + "step": 89390 + }, + { + "epoch": 0.18059365619331197, + "grad_norm": 328.4620666503906, + "learning_rate": 9.81203923249562e-06, + "loss": 30.062, + "step": 89400 + }, + { + "epoch": 0.1806138568259958, + "grad_norm": 777.0197143554688, + "learning_rate": 9.811944411542903e-06, + "loss": 24.0067, + "step": 89410 + }, + { + "epoch": 0.1806340574586796, + "grad_norm": 524.9449462890625, + "learning_rate": 9.811849567137351e-06, + "loss": 22.9762, + "step": 89420 + }, + { + "epoch": 0.18065425809136343, + "grad_norm": 234.361572265625, + "learning_rate": 9.811754699279428e-06, + "loss": 35.5237, + "step": 89430 + }, + { + "epoch": 0.18067445872404725, + "grad_norm": 306.3564453125, + "learning_rate": 9.811659807969596e-06, + "loss": 29.2511, + "step": 89440 + }, + { + "epoch": 0.18069465935673104, + "grad_norm": 303.1343994140625, + "learning_rate": 9.811564893208317e-06, + "loss": 29.7451, + "step": 89450 + }, + { + "epoch": 0.18071485998941486, + "grad_norm": 231.79466247558594, + "learning_rate": 9.811469954996056e-06, + "loss": 13.9908, + "step": 89460 + }, + { + "epoch": 0.18073506062209868, + "grad_norm": 29.039527893066406, + "learning_rate": 9.811374993333274e-06, + "loss": 19.4468, + "step": 89470 + }, + { + "epoch": 0.1807552612547825, + "grad_norm": 379.2695007324219, + "learning_rate": 9.811280008220432e-06, + "loss": 30.9259, + "step": 89480 + }, + { + "epoch": 0.18077546188746632, + "grad_norm": 372.0519104003906, + "learning_rate": 9.811184999657996e-06, + "loss": 14.7546, + "step": 89490 + }, + { + "epoch": 0.18079566252015014, + "grad_norm": 386.9299621582031, + "learning_rate": 9.811089967646427e-06, + "loss": 23.1313, + "step": 89500 + }, + { + "epoch": 0.18081586315283393, + "grad_norm": 1153.033935546875, + "learning_rate": 9.81099491218619e-06, + "loss": 19.5238, + "step": 89510 + }, + { + "epoch": 0.18083606378551775, + "grad_norm": 239.7135772705078, + "learning_rate": 9.810899833277747e-06, + "loss": 28.7153, + "step": 89520 + }, + { + "epoch": 0.18085626441820157, + "grad_norm": 295.32598876953125, + "learning_rate": 9.810804730921561e-06, + "loss": 37.923, + "step": 89530 + }, + { + "epoch": 0.1808764650508854, + "grad_norm": 395.2212219238281, + "learning_rate": 9.810709605118098e-06, + "loss": 27.4408, + "step": 89540 + }, + { + "epoch": 0.1808966656835692, + "grad_norm": 380.6080017089844, + "learning_rate": 9.810614455867818e-06, + "loss": 12.1637, + "step": 89550 + }, + { + "epoch": 0.18091686631625303, + "grad_norm": 397.90911865234375, + "learning_rate": 9.810519283171189e-06, + "loss": 12.555, + "step": 89560 + }, + { + "epoch": 0.18093706694893685, + "grad_norm": 233.5554656982422, + "learning_rate": 9.810424087028669e-06, + "loss": 22.0574, + "step": 89570 + }, + { + "epoch": 0.18095726758162065, + "grad_norm": 426.50872802734375, + "learning_rate": 9.810328867440729e-06, + "loss": 26.7537, + "step": 89580 + }, + { + "epoch": 0.18097746821430447, + "grad_norm": 319.06915283203125, + "learning_rate": 9.810233624407827e-06, + "loss": 35.952, + "step": 89590 + }, + { + "epoch": 0.18099766884698829, + "grad_norm": 293.732666015625, + "learning_rate": 9.81013835793043e-06, + "loss": 25.265, + "step": 89600 + }, + { + "epoch": 0.1810178694796721, + "grad_norm": 351.9346618652344, + "learning_rate": 9.810043068009002e-06, + "loss": 23.6208, + "step": 89610 + }, + { + "epoch": 0.18103807011235593, + "grad_norm": 253.8270263671875, + "learning_rate": 9.809947754644009e-06, + "loss": 25.0661, + "step": 89620 + }, + { + "epoch": 0.18105827074503975, + "grad_norm": 482.05511474609375, + "learning_rate": 9.809852417835913e-06, + "loss": 18.5693, + "step": 89630 + }, + { + "epoch": 0.18107847137772354, + "grad_norm": 305.4869079589844, + "learning_rate": 9.80975705758518e-06, + "loss": 13.1552, + "step": 89640 + }, + { + "epoch": 0.18109867201040736, + "grad_norm": 352.21221923828125, + "learning_rate": 9.809661673892274e-06, + "loss": 37.0356, + "step": 89650 + }, + { + "epoch": 0.18111887264309118, + "grad_norm": 162.22671508789062, + "learning_rate": 9.80956626675766e-06, + "loss": 12.715, + "step": 89660 + }, + { + "epoch": 0.181139073275775, + "grad_norm": 542.3357543945312, + "learning_rate": 9.809470836181804e-06, + "loss": 16.3945, + "step": 89670 + }, + { + "epoch": 0.18115927390845882, + "grad_norm": 395.6851501464844, + "learning_rate": 9.80937538216517e-06, + "loss": 19.242, + "step": 89680 + }, + { + "epoch": 0.18117947454114264, + "grad_norm": 454.8094482421875, + "learning_rate": 9.809279904708224e-06, + "loss": 31.3309, + "step": 89690 + }, + { + "epoch": 0.18119967517382646, + "grad_norm": 687.8006591796875, + "learning_rate": 9.809184403811432e-06, + "loss": 24.0704, + "step": 89700 + }, + { + "epoch": 0.18121987580651025, + "grad_norm": 309.10076904296875, + "learning_rate": 9.809088879475257e-06, + "loss": 29.3016, + "step": 89710 + }, + { + "epoch": 0.18124007643919407, + "grad_norm": 542.1755981445312, + "learning_rate": 9.808993331700167e-06, + "loss": 18.7698, + "step": 89720 + }, + { + "epoch": 0.1812602770718779, + "grad_norm": 402.2753601074219, + "learning_rate": 9.808897760486626e-06, + "loss": 33.7752, + "step": 89730 + }, + { + "epoch": 0.1812804777045617, + "grad_norm": 206.4251708984375, + "learning_rate": 9.808802165835101e-06, + "loss": 21.0911, + "step": 89740 + }, + { + "epoch": 0.18130067833724553, + "grad_norm": 654.4439697265625, + "learning_rate": 9.808706547746057e-06, + "loss": 28.3269, + "step": 89750 + }, + { + "epoch": 0.18132087896992935, + "grad_norm": 571.3541870117188, + "learning_rate": 9.808610906219963e-06, + "loss": 19.0851, + "step": 89760 + }, + { + "epoch": 0.18134107960261314, + "grad_norm": 350.6704406738281, + "learning_rate": 9.80851524125728e-06, + "loss": 31.8791, + "step": 89770 + }, + { + "epoch": 0.18136128023529696, + "grad_norm": 519.6353149414062, + "learning_rate": 9.808419552858477e-06, + "loss": 19.224, + "step": 89780 + }, + { + "epoch": 0.18138148086798078, + "grad_norm": 337.0629577636719, + "learning_rate": 9.808323841024021e-06, + "loss": 34.6968, + "step": 89790 + }, + { + "epoch": 0.1814016815006646, + "grad_norm": 672.872314453125, + "learning_rate": 9.808228105754378e-06, + "loss": 16.8094, + "step": 89800 + }, + { + "epoch": 0.18142188213334842, + "grad_norm": 459.8106994628906, + "learning_rate": 9.808132347050013e-06, + "loss": 10.5876, + "step": 89810 + }, + { + "epoch": 0.18144208276603224, + "grad_norm": 525.3077392578125, + "learning_rate": 9.808036564911396e-06, + "loss": 16.7304, + "step": 89820 + }, + { + "epoch": 0.18146228339871603, + "grad_norm": 145.01712036132812, + "learning_rate": 9.80794075933899e-06, + "loss": 16.1975, + "step": 89830 + }, + { + "epoch": 0.18148248403139985, + "grad_norm": 625.13720703125, + "learning_rate": 9.807844930333266e-06, + "loss": 30.531, + "step": 89840 + }, + { + "epoch": 0.18150268466408367, + "grad_norm": 589.8753662109375, + "learning_rate": 9.807749077894686e-06, + "loss": 19.9581, + "step": 89850 + }, + { + "epoch": 0.1815228852967675, + "grad_norm": 324.2210998535156, + "learning_rate": 9.807653202023723e-06, + "loss": 22.9263, + "step": 89860 + }, + { + "epoch": 0.18154308592945131, + "grad_norm": 408.2608642578125, + "learning_rate": 9.80755730272084e-06, + "loss": 29.5001, + "step": 89870 + }, + { + "epoch": 0.18156328656213513, + "grad_norm": 316.8686828613281, + "learning_rate": 9.807461379986506e-06, + "loss": 17.0876, + "step": 89880 + }, + { + "epoch": 0.18158348719481895, + "grad_norm": 310.5201110839844, + "learning_rate": 9.807365433821188e-06, + "loss": 10.0434, + "step": 89890 + }, + { + "epoch": 0.18160368782750275, + "grad_norm": 289.92010498046875, + "learning_rate": 9.807269464225355e-06, + "loss": 12.7067, + "step": 89900 + }, + { + "epoch": 0.18162388846018657, + "grad_norm": 1168.183837890625, + "learning_rate": 9.807173471199474e-06, + "loss": 30.2959, + "step": 89910 + }, + { + "epoch": 0.1816440890928704, + "grad_norm": 705.6151123046875, + "learning_rate": 9.80707745474401e-06, + "loss": 18.0212, + "step": 89920 + }, + { + "epoch": 0.1816642897255542, + "grad_norm": 508.00830078125, + "learning_rate": 9.806981414859435e-06, + "loss": 17.4614, + "step": 89930 + }, + { + "epoch": 0.18168449035823803, + "grad_norm": 589.3351440429688, + "learning_rate": 9.806885351546215e-06, + "loss": 25.1383, + "step": 89940 + }, + { + "epoch": 0.18170469099092185, + "grad_norm": 211.0186004638672, + "learning_rate": 9.806789264804821e-06, + "loss": 17.8697, + "step": 89950 + }, + { + "epoch": 0.18172489162360564, + "grad_norm": 612.8939819335938, + "learning_rate": 9.806693154635719e-06, + "loss": 32.0525, + "step": 89960 + }, + { + "epoch": 0.18174509225628946, + "grad_norm": 294.01751708984375, + "learning_rate": 9.806597021039374e-06, + "loss": 17.3785, + "step": 89970 + }, + { + "epoch": 0.18176529288897328, + "grad_norm": 539.4071655273438, + "learning_rate": 9.806500864016261e-06, + "loss": 19.5875, + "step": 89980 + }, + { + "epoch": 0.1817854935216571, + "grad_norm": 535.8582153320312, + "learning_rate": 9.806404683566845e-06, + "loss": 22.0834, + "step": 89990 + }, + { + "epoch": 0.18180569415434092, + "grad_norm": 371.3377990722656, + "learning_rate": 9.806308479691595e-06, + "loss": 30.617, + "step": 90000 + }, + { + "epoch": 0.18182589478702474, + "grad_norm": 166.52806091308594, + "learning_rate": 9.80621225239098e-06, + "loss": 23.92, + "step": 90010 + }, + { + "epoch": 0.18184609541970856, + "grad_norm": 512.3137817382812, + "learning_rate": 9.806116001665471e-06, + "loss": 31.545, + "step": 90020 + }, + { + "epoch": 0.18186629605239235, + "grad_norm": 316.3475036621094, + "learning_rate": 9.806019727515534e-06, + "loss": 31.4052, + "step": 90030 + }, + { + "epoch": 0.18188649668507617, + "grad_norm": 916.9949340820312, + "learning_rate": 9.805923429941642e-06, + "loss": 33.257, + "step": 90040 + }, + { + "epoch": 0.18190669731776, + "grad_norm": 545.9802856445312, + "learning_rate": 9.80582710894426e-06, + "loss": 36.0621, + "step": 90050 + }, + { + "epoch": 0.1819268979504438, + "grad_norm": 194.6088409423828, + "learning_rate": 9.805730764523861e-06, + "loss": 19.7219, + "step": 90060 + }, + { + "epoch": 0.18194709858312763, + "grad_norm": 461.10498046875, + "learning_rate": 9.805634396680912e-06, + "loss": 17.4369, + "step": 90070 + }, + { + "epoch": 0.18196729921581145, + "grad_norm": 412.11981201171875, + "learning_rate": 9.805538005415885e-06, + "loss": 20.6825, + "step": 90080 + }, + { + "epoch": 0.18198749984849524, + "grad_norm": 813.4285278320312, + "learning_rate": 9.805441590729246e-06, + "loss": 28.1719, + "step": 90090 + }, + { + "epoch": 0.18200770048117906, + "grad_norm": 612.4114990234375, + "learning_rate": 9.80534515262147e-06, + "loss": 26.6351, + "step": 90100 + }, + { + "epoch": 0.18202790111386288, + "grad_norm": 145.53575134277344, + "learning_rate": 9.805248691093023e-06, + "loss": 17.3572, + "step": 90110 + }, + { + "epoch": 0.1820481017465467, + "grad_norm": 852.3433837890625, + "learning_rate": 9.805152206144378e-06, + "loss": 40.7627, + "step": 90120 + }, + { + "epoch": 0.18206830237923052, + "grad_norm": 646.241943359375, + "learning_rate": 9.805055697776003e-06, + "loss": 32.0512, + "step": 90130 + }, + { + "epoch": 0.18208850301191434, + "grad_norm": 575.9727172851562, + "learning_rate": 9.80495916598837e-06, + "loss": 31.3801, + "step": 90140 + }, + { + "epoch": 0.18210870364459814, + "grad_norm": 536.2017211914062, + "learning_rate": 9.804862610781949e-06, + "loss": 19.398, + "step": 90150 + }, + { + "epoch": 0.18212890427728196, + "grad_norm": 325.2167663574219, + "learning_rate": 9.80476603215721e-06, + "loss": 27.9279, + "step": 90160 + }, + { + "epoch": 0.18214910490996578, + "grad_norm": 813.2481079101562, + "learning_rate": 9.804669430114625e-06, + "loss": 29.9856, + "step": 90170 + }, + { + "epoch": 0.1821693055426496, + "grad_norm": 301.8158874511719, + "learning_rate": 9.804572804654662e-06, + "loss": 16.7911, + "step": 90180 + }, + { + "epoch": 0.18218950617533342, + "grad_norm": 705.236572265625, + "learning_rate": 9.804476155777796e-06, + "loss": 33.4667, + "step": 90190 + }, + { + "epoch": 0.18220970680801724, + "grad_norm": 207.18588256835938, + "learning_rate": 9.804379483484493e-06, + "loss": 18.2618, + "step": 90200 + }, + { + "epoch": 0.18222990744070106, + "grad_norm": 989.70849609375, + "learning_rate": 9.80428278777523e-06, + "loss": 25.4147, + "step": 90210 + }, + { + "epoch": 0.18225010807338485, + "grad_norm": 1044.6300048828125, + "learning_rate": 9.804186068650474e-06, + "loss": 23.8518, + "step": 90220 + }, + { + "epoch": 0.18227030870606867, + "grad_norm": 677.1602783203125, + "learning_rate": 9.804089326110697e-06, + "loss": 40.6791, + "step": 90230 + }, + { + "epoch": 0.1822905093387525, + "grad_norm": 532.63623046875, + "learning_rate": 9.803992560156372e-06, + "loss": 30.1999, + "step": 90240 + }, + { + "epoch": 0.1823107099714363, + "grad_norm": 689.9764404296875, + "learning_rate": 9.803895770787972e-06, + "loss": 24.7726, + "step": 90250 + }, + { + "epoch": 0.18233091060412013, + "grad_norm": 449.429443359375, + "learning_rate": 9.803798958005965e-06, + "loss": 13.6979, + "step": 90260 + }, + { + "epoch": 0.18235111123680395, + "grad_norm": 417.9967956542969, + "learning_rate": 9.803702121810823e-06, + "loss": 27.424, + "step": 90270 + }, + { + "epoch": 0.18237131186948774, + "grad_norm": 115.34632110595703, + "learning_rate": 9.803605262203022e-06, + "loss": 11.9526, + "step": 90280 + }, + { + "epoch": 0.18239151250217156, + "grad_norm": 300.5381774902344, + "learning_rate": 9.80350837918303e-06, + "loss": 24.2963, + "step": 90290 + }, + { + "epoch": 0.18241171313485538, + "grad_norm": 199.52664184570312, + "learning_rate": 9.803411472751321e-06, + "loss": 23.4315, + "step": 90300 + }, + { + "epoch": 0.1824319137675392, + "grad_norm": 760.7015991210938, + "learning_rate": 9.803314542908368e-06, + "loss": 19.409, + "step": 90310 + }, + { + "epoch": 0.18245211440022302, + "grad_norm": 476.14093017578125, + "learning_rate": 9.803217589654642e-06, + "loss": 39.1428, + "step": 90320 + }, + { + "epoch": 0.18247231503290684, + "grad_norm": 352.9468078613281, + "learning_rate": 9.803120612990616e-06, + "loss": 16.5901, + "step": 90330 + }, + { + "epoch": 0.18249251566559066, + "grad_norm": 260.7500915527344, + "learning_rate": 9.803023612916763e-06, + "loss": 15.6792, + "step": 90340 + }, + { + "epoch": 0.18251271629827445, + "grad_norm": 676.7280883789062, + "learning_rate": 9.802926589433553e-06, + "loss": 22.615, + "step": 90350 + }, + { + "epoch": 0.18253291693095827, + "grad_norm": 551.415771484375, + "learning_rate": 9.802829542541463e-06, + "loss": 20.0372, + "step": 90360 + }, + { + "epoch": 0.1825531175636421, + "grad_norm": 414.0419006347656, + "learning_rate": 9.802732472240966e-06, + "loss": 24.8075, + "step": 90370 + }, + { + "epoch": 0.1825733181963259, + "grad_norm": 239.91038513183594, + "learning_rate": 9.802635378532531e-06, + "loss": 22.1132, + "step": 90380 + }, + { + "epoch": 0.18259351882900973, + "grad_norm": 322.6324462890625, + "learning_rate": 9.802538261416635e-06, + "loss": 46.8077, + "step": 90390 + }, + { + "epoch": 0.18261371946169355, + "grad_norm": 348.8457946777344, + "learning_rate": 9.80244112089375e-06, + "loss": 21.1164, + "step": 90400 + }, + { + "epoch": 0.18263392009437734, + "grad_norm": 693.45703125, + "learning_rate": 9.802343956964348e-06, + "loss": 23.1028, + "step": 90410 + }, + { + "epoch": 0.18265412072706116, + "grad_norm": 544.8148193359375, + "learning_rate": 9.802246769628906e-06, + "loss": 22.4803, + "step": 90420 + }, + { + "epoch": 0.18267432135974498, + "grad_norm": 928.405029296875, + "learning_rate": 9.802149558887895e-06, + "loss": 11.8932, + "step": 90430 + }, + { + "epoch": 0.1826945219924288, + "grad_norm": 257.213134765625, + "learning_rate": 9.802052324741789e-06, + "loss": 14.1029, + "step": 90440 + }, + { + "epoch": 0.18271472262511262, + "grad_norm": 680.4622802734375, + "learning_rate": 9.801955067191062e-06, + "loss": 27.1829, + "step": 90450 + }, + { + "epoch": 0.18273492325779644, + "grad_norm": 267.8209228515625, + "learning_rate": 9.80185778623619e-06, + "loss": 19.5032, + "step": 90460 + }, + { + "epoch": 0.18275512389048024, + "grad_norm": 437.3539123535156, + "learning_rate": 9.801760481877644e-06, + "loss": 24.0697, + "step": 90470 + }, + { + "epoch": 0.18277532452316406, + "grad_norm": 499.927978515625, + "learning_rate": 9.8016631541159e-06, + "loss": 17.041, + "step": 90480 + }, + { + "epoch": 0.18279552515584788, + "grad_norm": 323.8553771972656, + "learning_rate": 9.801565802951432e-06, + "loss": 15.5036, + "step": 90490 + }, + { + "epoch": 0.1828157257885317, + "grad_norm": 264.6095886230469, + "learning_rate": 9.801468428384716e-06, + "loss": 18.6817, + "step": 90500 + }, + { + "epoch": 0.18283592642121552, + "grad_norm": 80.06328582763672, + "learning_rate": 9.801371030416224e-06, + "loss": 14.7219, + "step": 90510 + }, + { + "epoch": 0.18285612705389934, + "grad_norm": 997.8451538085938, + "learning_rate": 9.801273609046433e-06, + "loss": 24.1481, + "step": 90520 + }, + { + "epoch": 0.18287632768658316, + "grad_norm": 342.7030334472656, + "learning_rate": 9.801176164275816e-06, + "loss": 32.648, + "step": 90530 + }, + { + "epoch": 0.18289652831926695, + "grad_norm": 1081.7420654296875, + "learning_rate": 9.801078696104849e-06, + "loss": 46.3561, + "step": 90540 + }, + { + "epoch": 0.18291672895195077, + "grad_norm": 320.661865234375, + "learning_rate": 9.800981204534006e-06, + "loss": 34.9884, + "step": 90550 + }, + { + "epoch": 0.1829369295846346, + "grad_norm": 914.1555786132812, + "learning_rate": 9.800883689563764e-06, + "loss": 21.4398, + "step": 90560 + }, + { + "epoch": 0.1829571302173184, + "grad_norm": 350.35943603515625, + "learning_rate": 9.800786151194596e-06, + "loss": 19.3436, + "step": 90570 + }, + { + "epoch": 0.18297733085000223, + "grad_norm": 423.799560546875, + "learning_rate": 9.800688589426978e-06, + "loss": 20.0453, + "step": 90580 + }, + { + "epoch": 0.18299753148268605, + "grad_norm": 466.1945495605469, + "learning_rate": 9.800591004261388e-06, + "loss": 13.6574, + "step": 90590 + }, + { + "epoch": 0.18301773211536984, + "grad_norm": 284.81573486328125, + "learning_rate": 9.8004933956983e-06, + "loss": 32.2185, + "step": 90600 + }, + { + "epoch": 0.18303793274805366, + "grad_norm": 342.5842590332031, + "learning_rate": 9.800395763738189e-06, + "loss": 11.7302, + "step": 90610 + }, + { + "epoch": 0.18305813338073748, + "grad_norm": 291.6769104003906, + "learning_rate": 9.80029810838153e-06, + "loss": 24.1659, + "step": 90620 + }, + { + "epoch": 0.1830783340134213, + "grad_norm": 502.8605651855469, + "learning_rate": 9.8002004296288e-06, + "loss": 16.5568, + "step": 90630 + }, + { + "epoch": 0.18309853464610512, + "grad_norm": 381.07281494140625, + "learning_rate": 9.800102727480476e-06, + "loss": 25.4679, + "step": 90640 + }, + { + "epoch": 0.18311873527878894, + "grad_norm": 352.68731689453125, + "learning_rate": 9.800005001937034e-06, + "loss": 16.953, + "step": 90650 + }, + { + "epoch": 0.18313893591147276, + "grad_norm": 1023.3972778320312, + "learning_rate": 9.79990725299895e-06, + "loss": 34.6754, + "step": 90660 + }, + { + "epoch": 0.18315913654415655, + "grad_norm": 210.64784240722656, + "learning_rate": 9.7998094806667e-06, + "loss": 21.666, + "step": 90670 + }, + { + "epoch": 0.18317933717684037, + "grad_norm": 272.3280029296875, + "learning_rate": 9.79971168494076e-06, + "loss": 25.4315, + "step": 90680 + }, + { + "epoch": 0.1831995378095242, + "grad_norm": 656.0177001953125, + "learning_rate": 9.799613865821608e-06, + "loss": 30.1317, + "step": 90690 + }, + { + "epoch": 0.183219738442208, + "grad_norm": 432.247802734375, + "learning_rate": 9.799516023309719e-06, + "loss": 25.4692, + "step": 90700 + }, + { + "epoch": 0.18323993907489183, + "grad_norm": 402.7593994140625, + "learning_rate": 9.799418157405571e-06, + "loss": 33.9424, + "step": 90710 + }, + { + "epoch": 0.18326013970757565, + "grad_norm": 1191.7305908203125, + "learning_rate": 9.799320268109644e-06, + "loss": 46.6512, + "step": 90720 + }, + { + "epoch": 0.18328034034025945, + "grad_norm": 299.83856201171875, + "learning_rate": 9.799222355422409e-06, + "loss": 15.2029, + "step": 90730 + }, + { + "epoch": 0.18330054097294327, + "grad_norm": 253.31137084960938, + "learning_rate": 9.799124419344348e-06, + "loss": 21.3899, + "step": 90740 + }, + { + "epoch": 0.18332074160562709, + "grad_norm": 243.34938049316406, + "learning_rate": 9.799026459875935e-06, + "loss": 16.0306, + "step": 90750 + }, + { + "epoch": 0.1833409422383109, + "grad_norm": 365.17510986328125, + "learning_rate": 9.798928477017651e-06, + "loss": 25.4496, + "step": 90760 + }, + { + "epoch": 0.18336114287099473, + "grad_norm": 239.15045166015625, + "learning_rate": 9.79883047076997e-06, + "loss": 25.564, + "step": 90770 + }, + { + "epoch": 0.18338134350367855, + "grad_norm": 797.671630859375, + "learning_rate": 9.798732441133372e-06, + "loss": 40.2145, + "step": 90780 + }, + { + "epoch": 0.18340154413636234, + "grad_norm": 653.4474487304688, + "learning_rate": 9.798634388108334e-06, + "loss": 32.0455, + "step": 90790 + }, + { + "epoch": 0.18342174476904616, + "grad_norm": 1001.5005493164062, + "learning_rate": 9.798536311695334e-06, + "loss": 32.0787, + "step": 90800 + }, + { + "epoch": 0.18344194540172998, + "grad_norm": 325.3088073730469, + "learning_rate": 9.79843821189485e-06, + "loss": 26.1869, + "step": 90810 + }, + { + "epoch": 0.1834621460344138, + "grad_norm": 540.4581298828125, + "learning_rate": 9.79834008870736e-06, + "loss": 18.94, + "step": 90820 + }, + { + "epoch": 0.18348234666709762, + "grad_norm": 318.7337341308594, + "learning_rate": 9.798241942133344e-06, + "loss": 32.0472, + "step": 90830 + }, + { + "epoch": 0.18350254729978144, + "grad_norm": 279.1095886230469, + "learning_rate": 9.798143772173276e-06, + "loss": 25.8265, + "step": 90840 + }, + { + "epoch": 0.18352274793246526, + "grad_norm": 270.6580505371094, + "learning_rate": 9.79804557882764e-06, + "loss": 18.2445, + "step": 90850 + }, + { + "epoch": 0.18354294856514905, + "grad_norm": 370.2752685546875, + "learning_rate": 9.797947362096909e-06, + "loss": 25.2333, + "step": 90860 + }, + { + "epoch": 0.18356314919783287, + "grad_norm": 352.0126037597656, + "learning_rate": 9.797849121981566e-06, + "loss": 16.6814, + "step": 90870 + }, + { + "epoch": 0.1835833498305167, + "grad_norm": 201.3636932373047, + "learning_rate": 9.797750858482088e-06, + "loss": 17.3969, + "step": 90880 + }, + { + "epoch": 0.1836035504632005, + "grad_norm": 564.9591064453125, + "learning_rate": 9.797652571598954e-06, + "loss": 25.868, + "step": 90890 + }, + { + "epoch": 0.18362375109588433, + "grad_norm": 35.203826904296875, + "learning_rate": 9.797554261332644e-06, + "loss": 25.6218, + "step": 90900 + }, + { + "epoch": 0.18364395172856815, + "grad_norm": 226.03759765625, + "learning_rate": 9.797455927683637e-06, + "loss": 28.4551, + "step": 90910 + }, + { + "epoch": 0.18366415236125194, + "grad_norm": 231.2720184326172, + "learning_rate": 9.79735757065241e-06, + "loss": 33.2498, + "step": 90920 + }, + { + "epoch": 0.18368435299393576, + "grad_norm": 515.2210693359375, + "learning_rate": 9.797259190239444e-06, + "loss": 27.7162, + "step": 90930 + }, + { + "epoch": 0.18370455362661958, + "grad_norm": 429.1297607421875, + "learning_rate": 9.797160786445218e-06, + "loss": 25.1077, + "step": 90940 + }, + { + "epoch": 0.1837247542593034, + "grad_norm": 789.713134765625, + "learning_rate": 9.797062359270215e-06, + "loss": 30.9357, + "step": 90950 + }, + { + "epoch": 0.18374495489198722, + "grad_norm": 1423.0126953125, + "learning_rate": 9.79696390871491e-06, + "loss": 32.7171, + "step": 90960 + }, + { + "epoch": 0.18376515552467104, + "grad_norm": 95.33780670166016, + "learning_rate": 9.796865434779786e-06, + "loss": 26.9618, + "step": 90970 + }, + { + "epoch": 0.18378535615735486, + "grad_norm": 562.197021484375, + "learning_rate": 9.79676693746532e-06, + "loss": 26.0074, + "step": 90980 + }, + { + "epoch": 0.18380555679003865, + "grad_norm": 549.0386962890625, + "learning_rate": 9.796668416771996e-06, + "loss": 47.5324, + "step": 90990 + }, + { + "epoch": 0.18382575742272247, + "grad_norm": 720.4099731445312, + "learning_rate": 9.796569872700287e-06, + "loss": 20.1496, + "step": 91000 + }, + { + "epoch": 0.1838459580554063, + "grad_norm": 769.2049560546875, + "learning_rate": 9.796471305250683e-06, + "loss": 24.3029, + "step": 91010 + }, + { + "epoch": 0.18386615868809011, + "grad_norm": 400.9169616699219, + "learning_rate": 9.79637271442366e-06, + "loss": 19.8763, + "step": 91020 + }, + { + "epoch": 0.18388635932077393, + "grad_norm": 239.54544067382812, + "learning_rate": 9.796274100219693e-06, + "loss": 19.1498, + "step": 91030 + }, + { + "epoch": 0.18390655995345775, + "grad_norm": 306.354248046875, + "learning_rate": 9.796175462639273e-06, + "loss": 19.2447, + "step": 91040 + }, + { + "epoch": 0.18392676058614155, + "grad_norm": 604.0792846679688, + "learning_rate": 9.796076801682873e-06, + "loss": 33.2674, + "step": 91050 + }, + { + "epoch": 0.18394696121882537, + "grad_norm": 831.4409790039062, + "learning_rate": 9.795978117350976e-06, + "loss": 28.5236, + "step": 91060 + }, + { + "epoch": 0.1839671618515092, + "grad_norm": 374.80401611328125, + "learning_rate": 9.795879409644064e-06, + "loss": 25.0916, + "step": 91070 + }, + { + "epoch": 0.183987362484193, + "grad_norm": 621.6986083984375, + "learning_rate": 9.795780678562618e-06, + "loss": 24.8459, + "step": 91080 + }, + { + "epoch": 0.18400756311687683, + "grad_norm": 171.66006469726562, + "learning_rate": 9.79568192410712e-06, + "loss": 15.5722, + "step": 91090 + }, + { + "epoch": 0.18402776374956065, + "grad_norm": 483.9534606933594, + "learning_rate": 9.795583146278047e-06, + "loss": 34.7736, + "step": 91100 + }, + { + "epoch": 0.18404796438224444, + "grad_norm": 390.1487731933594, + "learning_rate": 9.795484345075882e-06, + "loss": 48.1763, + "step": 91110 + }, + { + "epoch": 0.18406816501492826, + "grad_norm": 468.73345947265625, + "learning_rate": 9.795385520501113e-06, + "loss": 28.4409, + "step": 91120 + }, + { + "epoch": 0.18408836564761208, + "grad_norm": 434.3601379394531, + "learning_rate": 9.795286672554214e-06, + "loss": 20.1435, + "step": 91130 + }, + { + "epoch": 0.1841085662802959, + "grad_norm": 322.37896728515625, + "learning_rate": 9.795187801235668e-06, + "loss": 28.5537, + "step": 91140 + }, + { + "epoch": 0.18412876691297972, + "grad_norm": 804.7830200195312, + "learning_rate": 9.795088906545959e-06, + "loss": 37.5352, + "step": 91150 + }, + { + "epoch": 0.18414896754566354, + "grad_norm": 487.69134521484375, + "learning_rate": 9.794989988485571e-06, + "loss": 26.9165, + "step": 91160 + }, + { + "epoch": 0.18416916817834736, + "grad_norm": 270.79864501953125, + "learning_rate": 9.79489104705498e-06, + "loss": 23.71, + "step": 91170 + }, + { + "epoch": 0.18418936881103115, + "grad_norm": 1238.611328125, + "learning_rate": 9.794792082254673e-06, + "loss": 23.848, + "step": 91180 + }, + { + "epoch": 0.18420956944371497, + "grad_norm": 356.26678466796875, + "learning_rate": 9.79469309408513e-06, + "loss": 29.2174, + "step": 91190 + }, + { + "epoch": 0.1842297700763988, + "grad_norm": 645.976318359375, + "learning_rate": 9.794594082546835e-06, + "loss": 21.5411, + "step": 91200 + }, + { + "epoch": 0.1842499707090826, + "grad_norm": 25.679527282714844, + "learning_rate": 9.794495047640271e-06, + "loss": 21.4862, + "step": 91210 + }, + { + "epoch": 0.18427017134176643, + "grad_norm": 502.16778564453125, + "learning_rate": 9.79439598936592e-06, + "loss": 19.6671, + "step": 91220 + }, + { + "epoch": 0.18429037197445025, + "grad_norm": 235.72203063964844, + "learning_rate": 9.794296907724262e-06, + "loss": 27.9619, + "step": 91230 + }, + { + "epoch": 0.18431057260713404, + "grad_norm": 429.992431640625, + "learning_rate": 9.794197802715784e-06, + "loss": 73.0211, + "step": 91240 + }, + { + "epoch": 0.18433077323981786, + "grad_norm": 625.5643920898438, + "learning_rate": 9.794098674340966e-06, + "loss": 33.1416, + "step": 91250 + }, + { + "epoch": 0.18435097387250168, + "grad_norm": 484.22747802734375, + "learning_rate": 9.793999522600293e-06, + "loss": 45.421, + "step": 91260 + }, + { + "epoch": 0.1843711745051855, + "grad_norm": 443.10504150390625, + "learning_rate": 9.793900347494248e-06, + "loss": 29.7148, + "step": 91270 + }, + { + "epoch": 0.18439137513786932, + "grad_norm": 676.413818359375, + "learning_rate": 9.793801149023315e-06, + "loss": 30.5974, + "step": 91280 + }, + { + "epoch": 0.18441157577055314, + "grad_norm": 449.8853454589844, + "learning_rate": 9.793701927187975e-06, + "loss": 24.3086, + "step": 91290 + }, + { + "epoch": 0.18443177640323694, + "grad_norm": 237.00990295410156, + "learning_rate": 9.793602681988714e-06, + "loss": 23.9471, + "step": 91300 + }, + { + "epoch": 0.18445197703592076, + "grad_norm": 2.009406805038452, + "learning_rate": 9.793503413426016e-06, + "loss": 25.3265, + "step": 91310 + }, + { + "epoch": 0.18447217766860458, + "grad_norm": 442.2948913574219, + "learning_rate": 9.793404121500362e-06, + "loss": 30.3534, + "step": 91320 + }, + { + "epoch": 0.1844923783012884, + "grad_norm": 292.3894958496094, + "learning_rate": 9.79330480621224e-06, + "loss": 22.5941, + "step": 91330 + }, + { + "epoch": 0.18451257893397222, + "grad_norm": 198.1782989501953, + "learning_rate": 9.793205467562131e-06, + "loss": 27.5463, + "step": 91340 + }, + { + "epoch": 0.18453277956665604, + "grad_norm": 285.3832092285156, + "learning_rate": 9.793106105550518e-06, + "loss": 11.2666, + "step": 91350 + }, + { + "epoch": 0.18455298019933986, + "grad_norm": 914.3174438476562, + "learning_rate": 9.793006720177887e-06, + "loss": 23.5066, + "step": 91360 + }, + { + "epoch": 0.18457318083202365, + "grad_norm": 302.8216857910156, + "learning_rate": 9.792907311444724e-06, + "loss": 26.5094, + "step": 91370 + }, + { + "epoch": 0.18459338146470747, + "grad_norm": 455.9059143066406, + "learning_rate": 9.792807879351513e-06, + "loss": 33.6376, + "step": 91380 + }, + { + "epoch": 0.1846135820973913, + "grad_norm": 337.9946594238281, + "learning_rate": 9.792708423898735e-06, + "loss": 12.2292, + "step": 91390 + }, + { + "epoch": 0.1846337827300751, + "grad_norm": 432.6836242675781, + "learning_rate": 9.79260894508688e-06, + "loss": 21.2961, + "step": 91400 + }, + { + "epoch": 0.18465398336275893, + "grad_norm": 822.6380615234375, + "learning_rate": 9.79250944291643e-06, + "loss": 20.3763, + "step": 91410 + }, + { + "epoch": 0.18467418399544275, + "grad_norm": 714.7791137695312, + "learning_rate": 9.792409917387869e-06, + "loss": 18.4543, + "step": 91420 + }, + { + "epoch": 0.18469438462812654, + "grad_norm": 433.1051330566406, + "learning_rate": 9.792310368501684e-06, + "loss": 13.5443, + "step": 91430 + }, + { + "epoch": 0.18471458526081036, + "grad_norm": 684.8148803710938, + "learning_rate": 9.792210796258358e-06, + "loss": 21.7222, + "step": 91440 + }, + { + "epoch": 0.18473478589349418, + "grad_norm": 287.2908630371094, + "learning_rate": 9.79211120065838e-06, + "loss": 13.8875, + "step": 91450 + }, + { + "epoch": 0.184754986526178, + "grad_norm": 425.7033386230469, + "learning_rate": 9.792011581702234e-06, + "loss": 24.8602, + "step": 91460 + }, + { + "epoch": 0.18477518715886182, + "grad_norm": 231.6212158203125, + "learning_rate": 9.791911939390401e-06, + "loss": 11.3502, + "step": 91470 + }, + { + "epoch": 0.18479538779154564, + "grad_norm": 249.83753967285156, + "learning_rate": 9.791812273723374e-06, + "loss": 23.6826, + "step": 91480 + }, + { + "epoch": 0.18481558842422946, + "grad_norm": 499.1154479980469, + "learning_rate": 9.791712584701634e-06, + "loss": 30.1655, + "step": 91490 + }, + { + "epoch": 0.18483578905691325, + "grad_norm": 38.84739685058594, + "learning_rate": 9.791612872325667e-06, + "loss": 30.1333, + "step": 91500 + }, + { + "epoch": 0.18485598968959707, + "grad_norm": 517.4888305664062, + "learning_rate": 9.79151313659596e-06, + "loss": 29.1216, + "step": 91510 + }, + { + "epoch": 0.1848761903222809, + "grad_norm": 113.28861236572266, + "learning_rate": 9.791413377513001e-06, + "loss": 34.6165, + "step": 91520 + }, + { + "epoch": 0.1848963909549647, + "grad_norm": 360.3495788574219, + "learning_rate": 9.791313595077272e-06, + "loss": 19.8101, + "step": 91530 + }, + { + "epoch": 0.18491659158764853, + "grad_norm": 255.13893127441406, + "learning_rate": 9.791213789289264e-06, + "loss": 16.1137, + "step": 91540 + }, + { + "epoch": 0.18493679222033235, + "grad_norm": 652.9347534179688, + "learning_rate": 9.791113960149458e-06, + "loss": 15.4775, + "step": 91550 + }, + { + "epoch": 0.18495699285301614, + "grad_norm": 849.7731323242188, + "learning_rate": 9.791014107658348e-06, + "loss": 37.3864, + "step": 91560 + }, + { + "epoch": 0.18497719348569996, + "grad_norm": 361.5867614746094, + "learning_rate": 9.790914231816414e-06, + "loss": 13.7115, + "step": 91570 + }, + { + "epoch": 0.18499739411838378, + "grad_norm": 182.37893676757812, + "learning_rate": 9.790814332624144e-06, + "loss": 18.676, + "step": 91580 + }, + { + "epoch": 0.1850175947510676, + "grad_norm": 256.94537353515625, + "learning_rate": 9.790714410082027e-06, + "loss": 37.3471, + "step": 91590 + }, + { + "epoch": 0.18503779538375142, + "grad_norm": 177.5472412109375, + "learning_rate": 9.79061446419055e-06, + "loss": 11.4554, + "step": 91600 + }, + { + "epoch": 0.18505799601643524, + "grad_norm": 836.4344482421875, + "learning_rate": 9.790514494950196e-06, + "loss": 41.3177, + "step": 91610 + }, + { + "epoch": 0.18507819664911904, + "grad_norm": 246.44635009765625, + "learning_rate": 9.790414502361458e-06, + "loss": 36.0105, + "step": 91620 + }, + { + "epoch": 0.18509839728180286, + "grad_norm": 1101.7769775390625, + "learning_rate": 9.790314486424821e-06, + "loss": 25.5152, + "step": 91630 + }, + { + "epoch": 0.18511859791448668, + "grad_norm": 561.969970703125, + "learning_rate": 9.790214447140771e-06, + "loss": 35.5113, + "step": 91640 + }, + { + "epoch": 0.1851387985471705, + "grad_norm": 431.95269775390625, + "learning_rate": 9.790114384509796e-06, + "loss": 22.5607, + "step": 91650 + }, + { + "epoch": 0.18515899917985432, + "grad_norm": 326.224609375, + "learning_rate": 9.790014298532386e-06, + "loss": 10.0795, + "step": 91660 + }, + { + "epoch": 0.18517919981253814, + "grad_norm": 463.1079406738281, + "learning_rate": 9.789914189209028e-06, + "loss": 16.5434, + "step": 91670 + }, + { + "epoch": 0.18519940044522196, + "grad_norm": 89.60653686523438, + "learning_rate": 9.789814056540207e-06, + "loss": 35.3536, + "step": 91680 + }, + { + "epoch": 0.18521960107790575, + "grad_norm": 919.5565185546875, + "learning_rate": 9.789713900526415e-06, + "loss": 28.1427, + "step": 91690 + }, + { + "epoch": 0.18523980171058957, + "grad_norm": 409.40673828125, + "learning_rate": 9.789613721168138e-06, + "loss": 22.8103, + "step": 91700 + }, + { + "epoch": 0.1852600023432734, + "grad_norm": 670.8854370117188, + "learning_rate": 9.789513518465866e-06, + "loss": 30.5297, + "step": 91710 + }, + { + "epoch": 0.1852802029759572, + "grad_norm": 773.198486328125, + "learning_rate": 9.789413292420082e-06, + "loss": 23.2327, + "step": 91720 + }, + { + "epoch": 0.18530040360864103, + "grad_norm": 641.0538330078125, + "learning_rate": 9.789313043031281e-06, + "loss": 27.2788, + "step": 91730 + }, + { + "epoch": 0.18532060424132485, + "grad_norm": 253.89642333984375, + "learning_rate": 9.78921277029995e-06, + "loss": 14.6494, + "step": 91740 + }, + { + "epoch": 0.18534080487400864, + "grad_norm": 114.03929901123047, + "learning_rate": 9.789112474226575e-06, + "loss": 16.255, + "step": 91750 + }, + { + "epoch": 0.18536100550669246, + "grad_norm": 450.802001953125, + "learning_rate": 9.789012154811648e-06, + "loss": 24.0678, + "step": 91760 + }, + { + "epoch": 0.18538120613937628, + "grad_norm": 444.7924499511719, + "learning_rate": 9.788911812055656e-06, + "loss": 14.7521, + "step": 91770 + }, + { + "epoch": 0.1854014067720601, + "grad_norm": 41.541664123535156, + "learning_rate": 9.788811445959088e-06, + "loss": 21.4242, + "step": 91780 + }, + { + "epoch": 0.18542160740474392, + "grad_norm": 677.9859008789062, + "learning_rate": 9.788711056522436e-06, + "loss": 25.3031, + "step": 91790 + }, + { + "epoch": 0.18544180803742774, + "grad_norm": 320.2901916503906, + "learning_rate": 9.788610643746184e-06, + "loss": 19.8867, + "step": 91800 + }, + { + "epoch": 0.18546200867011156, + "grad_norm": 442.32427978515625, + "learning_rate": 9.788510207630825e-06, + "loss": 19.3791, + "step": 91810 + }, + { + "epoch": 0.18548220930279535, + "grad_norm": 373.7599792480469, + "learning_rate": 9.78840974817685e-06, + "loss": 27.9548, + "step": 91820 + }, + { + "epoch": 0.18550240993547917, + "grad_norm": 512.506591796875, + "learning_rate": 9.788309265384745e-06, + "loss": 25.047, + "step": 91830 + }, + { + "epoch": 0.185522610568163, + "grad_norm": 629.9691772460938, + "learning_rate": 9.788208759255003e-06, + "loss": 23.8249, + "step": 91840 + }, + { + "epoch": 0.1855428112008468, + "grad_norm": 461.40625, + "learning_rate": 9.788108229788111e-06, + "loss": 19.3849, + "step": 91850 + }, + { + "epoch": 0.18556301183353063, + "grad_norm": 686.3657836914062, + "learning_rate": 9.788007676984562e-06, + "loss": 29.3324, + "step": 91860 + }, + { + "epoch": 0.18558321246621445, + "grad_norm": 797.767578125, + "learning_rate": 9.787907100844842e-06, + "loss": 36.3265, + "step": 91870 + }, + { + "epoch": 0.18560341309889825, + "grad_norm": 852.8424072265625, + "learning_rate": 9.787806501369446e-06, + "loss": 24.1865, + "step": 91880 + }, + { + "epoch": 0.18562361373158207, + "grad_norm": 404.8200988769531, + "learning_rate": 9.78770587855886e-06, + "loss": 25.7485, + "step": 91890 + }, + { + "epoch": 0.18564381436426589, + "grad_norm": 539.3692626953125, + "learning_rate": 9.787605232413575e-06, + "loss": 30.4667, + "step": 91900 + }, + { + "epoch": 0.1856640149969497, + "grad_norm": 1060.843017578125, + "learning_rate": 9.787504562934085e-06, + "loss": 32.7073, + "step": 91910 + }, + { + "epoch": 0.18568421562963353, + "grad_norm": 341.1324768066406, + "learning_rate": 9.787403870120877e-06, + "loss": 21.3345, + "step": 91920 + }, + { + "epoch": 0.18570441626231735, + "grad_norm": 225.0143280029297, + "learning_rate": 9.787303153974444e-06, + "loss": 25.7457, + "step": 91930 + }, + { + "epoch": 0.18572461689500114, + "grad_norm": 486.2966613769531, + "learning_rate": 9.787202414495275e-06, + "loss": 20.8555, + "step": 91940 + }, + { + "epoch": 0.18574481752768496, + "grad_norm": 128.24383544921875, + "learning_rate": 9.787101651683864e-06, + "loss": 15.369, + "step": 91950 + }, + { + "epoch": 0.18576501816036878, + "grad_norm": 1446.906005859375, + "learning_rate": 9.787000865540698e-06, + "loss": 27.5928, + "step": 91960 + }, + { + "epoch": 0.1857852187930526, + "grad_norm": 408.8646545410156, + "learning_rate": 9.786900056066272e-06, + "loss": 15.0458, + "step": 91970 + }, + { + "epoch": 0.18580541942573642, + "grad_norm": 128.8407745361328, + "learning_rate": 9.786799223261076e-06, + "loss": 30.458, + "step": 91980 + }, + { + "epoch": 0.18582562005842024, + "grad_norm": 135.77200317382812, + "learning_rate": 9.7866983671256e-06, + "loss": 22.3848, + "step": 91990 + }, + { + "epoch": 0.18584582069110406, + "grad_norm": 841.3544921875, + "learning_rate": 9.786597487660336e-06, + "loss": 20.4153, + "step": 92000 + }, + { + "epoch": 0.18586602132378785, + "grad_norm": 507.96136474609375, + "learning_rate": 9.786496584865778e-06, + "loss": 13.7371, + "step": 92010 + }, + { + "epoch": 0.18588622195647167, + "grad_norm": 196.4841766357422, + "learning_rate": 9.786395658742415e-06, + "loss": 17.8674, + "step": 92020 + }, + { + "epoch": 0.1859064225891555, + "grad_norm": 211.14559936523438, + "learning_rate": 9.786294709290741e-06, + "loss": 11.653, + "step": 92030 + }, + { + "epoch": 0.1859266232218393, + "grad_norm": 312.5054931640625, + "learning_rate": 9.786193736511247e-06, + "loss": 18.0755, + "step": 92040 + }, + { + "epoch": 0.18594682385452313, + "grad_norm": 338.7178955078125, + "learning_rate": 9.786092740404424e-06, + "loss": 18.7011, + "step": 92050 + }, + { + "epoch": 0.18596702448720695, + "grad_norm": 194.91444396972656, + "learning_rate": 9.78599172097077e-06, + "loss": 12.2531, + "step": 92060 + }, + { + "epoch": 0.18598722511989074, + "grad_norm": 577.6099853515625, + "learning_rate": 9.785890678210768e-06, + "loss": 27.4933, + "step": 92070 + }, + { + "epoch": 0.18600742575257456, + "grad_norm": 270.8672790527344, + "learning_rate": 9.785789612124916e-06, + "loss": 31.445, + "step": 92080 + }, + { + "epoch": 0.18602762638525838, + "grad_norm": 643.9032592773438, + "learning_rate": 9.785688522713707e-06, + "loss": 20.8608, + "step": 92090 + }, + { + "epoch": 0.1860478270179422, + "grad_norm": 533.7869262695312, + "learning_rate": 9.785587409977632e-06, + "loss": 47.6427, + "step": 92100 + }, + { + "epoch": 0.18606802765062602, + "grad_norm": 524.4956665039062, + "learning_rate": 9.785486273917184e-06, + "loss": 23.4507, + "step": 92110 + }, + { + "epoch": 0.18608822828330984, + "grad_norm": 647.6858520507812, + "learning_rate": 9.785385114532858e-06, + "loss": 25.3778, + "step": 92120 + }, + { + "epoch": 0.18610842891599366, + "grad_norm": 497.45330810546875, + "learning_rate": 9.785283931825143e-06, + "loss": 22.2115, + "step": 92130 + }, + { + "epoch": 0.18612862954867745, + "grad_norm": 124.12263488769531, + "learning_rate": 9.785182725794535e-06, + "loss": 51.5499, + "step": 92140 + }, + { + "epoch": 0.18614883018136127, + "grad_norm": 167.64231872558594, + "learning_rate": 9.785081496441528e-06, + "loss": 31.4775, + "step": 92150 + }, + { + "epoch": 0.1861690308140451, + "grad_norm": 205.71829223632812, + "learning_rate": 9.784980243766613e-06, + "loss": 15.2482, + "step": 92160 + }, + { + "epoch": 0.18618923144672891, + "grad_norm": 320.18206787109375, + "learning_rate": 9.784878967770286e-06, + "loss": 23.7468, + "step": 92170 + }, + { + "epoch": 0.18620943207941273, + "grad_norm": 1295.7110595703125, + "learning_rate": 9.784777668453039e-06, + "loss": 35.8464, + "step": 92180 + }, + { + "epoch": 0.18622963271209655, + "grad_norm": 475.7550964355469, + "learning_rate": 9.784676345815364e-06, + "loss": 29.358, + "step": 92190 + }, + { + "epoch": 0.18624983334478035, + "grad_norm": 651.8758544921875, + "learning_rate": 9.784574999857757e-06, + "loss": 18.2877, + "step": 92200 + }, + { + "epoch": 0.18627003397746417, + "grad_norm": 489.3287353515625, + "learning_rate": 9.784473630580713e-06, + "loss": 38.393, + "step": 92210 + }, + { + "epoch": 0.186290234610148, + "grad_norm": 632.679931640625, + "learning_rate": 9.784372237984726e-06, + "loss": 29.1181, + "step": 92220 + }, + { + "epoch": 0.1863104352428318, + "grad_norm": 826.5474243164062, + "learning_rate": 9.784270822070288e-06, + "loss": 27.4359, + "step": 92230 + }, + { + "epoch": 0.18633063587551563, + "grad_norm": 282.6408386230469, + "learning_rate": 9.784169382837893e-06, + "loss": 34.9363, + "step": 92240 + }, + { + "epoch": 0.18635083650819945, + "grad_norm": 435.7392883300781, + "learning_rate": 9.78406792028804e-06, + "loss": 15.9484, + "step": 92250 + }, + { + "epoch": 0.18637103714088324, + "grad_norm": 780.05419921875, + "learning_rate": 9.783966434421215e-06, + "loss": 19.4334, + "step": 92260 + }, + { + "epoch": 0.18639123777356706, + "grad_norm": 383.31512451171875, + "learning_rate": 9.783864925237922e-06, + "loss": 21.3674, + "step": 92270 + }, + { + "epoch": 0.18641143840625088, + "grad_norm": 221.4168701171875, + "learning_rate": 9.78376339273865e-06, + "loss": 23.3482, + "step": 92280 + }, + { + "epoch": 0.1864316390389347, + "grad_norm": 748.03125, + "learning_rate": 9.783661836923894e-06, + "loss": 29.0127, + "step": 92290 + }, + { + "epoch": 0.18645183967161852, + "grad_norm": 366.36187744140625, + "learning_rate": 9.783560257794153e-06, + "loss": 16.3667, + "step": 92300 + }, + { + "epoch": 0.18647204030430234, + "grad_norm": 933.26318359375, + "learning_rate": 9.783458655349919e-06, + "loss": 17.0001, + "step": 92310 + }, + { + "epoch": 0.18649224093698616, + "grad_norm": 792.078369140625, + "learning_rate": 9.783357029591686e-06, + "loss": 39.9801, + "step": 92320 + }, + { + "epoch": 0.18651244156966995, + "grad_norm": 556.2901611328125, + "learning_rate": 9.783255380519953e-06, + "loss": 39.2964, + "step": 92330 + }, + { + "epoch": 0.18653264220235377, + "grad_norm": 847.1810913085938, + "learning_rate": 9.783153708135214e-06, + "loss": 37.5978, + "step": 92340 + }, + { + "epoch": 0.1865528428350376, + "grad_norm": 396.2494812011719, + "learning_rate": 9.783052012437962e-06, + "loss": 22.5996, + "step": 92350 + }, + { + "epoch": 0.1865730434677214, + "grad_norm": 538.60400390625, + "learning_rate": 9.782950293428695e-06, + "loss": 18.3907, + "step": 92360 + }, + { + "epoch": 0.18659324410040523, + "grad_norm": 394.4925842285156, + "learning_rate": 9.782848551107908e-06, + "loss": 16.942, + "step": 92370 + }, + { + "epoch": 0.18661344473308905, + "grad_norm": 597.7157592773438, + "learning_rate": 9.782746785476098e-06, + "loss": 20.4589, + "step": 92380 + }, + { + "epoch": 0.18663364536577284, + "grad_norm": 847.0343017578125, + "learning_rate": 9.78264499653376e-06, + "loss": 38.7538, + "step": 92390 + }, + { + "epoch": 0.18665384599845666, + "grad_norm": 930.4030151367188, + "learning_rate": 9.78254318428139e-06, + "loss": 24.3005, + "step": 92400 + }, + { + "epoch": 0.18667404663114048, + "grad_norm": 1564.1207275390625, + "learning_rate": 9.782441348719485e-06, + "loss": 54.1396, + "step": 92410 + }, + { + "epoch": 0.1866942472638243, + "grad_norm": 529.907470703125, + "learning_rate": 9.782339489848541e-06, + "loss": 40.0633, + "step": 92420 + }, + { + "epoch": 0.18671444789650812, + "grad_norm": 451.3714294433594, + "learning_rate": 9.782237607669053e-06, + "loss": 22.0001, + "step": 92430 + }, + { + "epoch": 0.18673464852919194, + "grad_norm": 470.5815124511719, + "learning_rate": 9.782135702181521e-06, + "loss": 14.8972, + "step": 92440 + }, + { + "epoch": 0.18675484916187576, + "grad_norm": 280.4551086425781, + "learning_rate": 9.782033773386439e-06, + "loss": 22.6161, + "step": 92450 + }, + { + "epoch": 0.18677504979455956, + "grad_norm": 862.764404296875, + "learning_rate": 9.781931821284305e-06, + "loss": 26.2604, + "step": 92460 + }, + { + "epoch": 0.18679525042724338, + "grad_norm": 690.3392944335938, + "learning_rate": 9.781829845875613e-06, + "loss": 44.8535, + "step": 92470 + }, + { + "epoch": 0.1868154510599272, + "grad_norm": 308.93902587890625, + "learning_rate": 9.781727847160865e-06, + "loss": 19.226, + "step": 92480 + }, + { + "epoch": 0.18683565169261102, + "grad_norm": 1005.1414794921875, + "learning_rate": 9.781625825140552e-06, + "loss": 34.8473, + "step": 92490 + }, + { + "epoch": 0.18685585232529484, + "grad_norm": 709.4344482421875, + "learning_rate": 9.781523779815178e-06, + "loss": 18.6231, + "step": 92500 + }, + { + "epoch": 0.18687605295797866, + "grad_norm": 551.372802734375, + "learning_rate": 9.781421711185236e-06, + "loss": 17.6513, + "step": 92510 + }, + { + "epoch": 0.18689625359066245, + "grad_norm": 771.9329223632812, + "learning_rate": 9.781319619251223e-06, + "loss": 49.4025, + "step": 92520 + }, + { + "epoch": 0.18691645422334627, + "grad_norm": 474.9455871582031, + "learning_rate": 9.78121750401364e-06, + "loss": 20.0667, + "step": 92530 + }, + { + "epoch": 0.1869366548560301, + "grad_norm": 354.9248962402344, + "learning_rate": 9.781115365472983e-06, + "loss": 29.108, + "step": 92540 + }, + { + "epoch": 0.1869568554887139, + "grad_norm": 910.9464111328125, + "learning_rate": 9.781013203629748e-06, + "loss": 17.285, + "step": 92550 + }, + { + "epoch": 0.18697705612139773, + "grad_norm": 612.3464965820312, + "learning_rate": 9.780911018484436e-06, + "loss": 23.8168, + "step": 92560 + }, + { + "epoch": 0.18699725675408155, + "grad_norm": 194.9729461669922, + "learning_rate": 9.780808810037543e-06, + "loss": 19.419, + "step": 92570 + }, + { + "epoch": 0.18701745738676534, + "grad_norm": 665.943359375, + "learning_rate": 9.780706578289567e-06, + "loss": 29.7412, + "step": 92580 + }, + { + "epoch": 0.18703765801944916, + "grad_norm": 522.5048217773438, + "learning_rate": 9.780604323241007e-06, + "loss": 24.8991, + "step": 92590 + }, + { + "epoch": 0.18705785865213298, + "grad_norm": 104.87069702148438, + "learning_rate": 9.780502044892363e-06, + "loss": 12.9761, + "step": 92600 + }, + { + "epoch": 0.1870780592848168, + "grad_norm": 1118.92822265625, + "learning_rate": 9.78039974324413e-06, + "loss": 30.0879, + "step": 92610 + }, + { + "epoch": 0.18709825991750062, + "grad_norm": 348.06439208984375, + "learning_rate": 9.78029741829681e-06, + "loss": 26.8473, + "step": 92620 + }, + { + "epoch": 0.18711846055018444, + "grad_norm": 249.37655639648438, + "learning_rate": 9.780195070050898e-06, + "loss": 14.2755, + "step": 92630 + }, + { + "epoch": 0.18713866118286826, + "grad_norm": 492.69427490234375, + "learning_rate": 9.780092698506897e-06, + "loss": 26.6387, + "step": 92640 + }, + { + "epoch": 0.18715886181555205, + "grad_norm": 268.4568176269531, + "learning_rate": 9.779990303665303e-06, + "loss": 13.0258, + "step": 92650 + }, + { + "epoch": 0.18717906244823587, + "grad_norm": 641.7935791015625, + "learning_rate": 9.779887885526616e-06, + "loss": 17.125, + "step": 92660 + }, + { + "epoch": 0.1871992630809197, + "grad_norm": 342.4461975097656, + "learning_rate": 9.779785444091336e-06, + "loss": 12.7438, + "step": 92670 + }, + { + "epoch": 0.1872194637136035, + "grad_norm": 17.85383415222168, + "learning_rate": 9.779682979359961e-06, + "loss": 21.3807, + "step": 92680 + }, + { + "epoch": 0.18723966434628733, + "grad_norm": 701.517578125, + "learning_rate": 9.77958049133299e-06, + "loss": 26.6243, + "step": 92690 + }, + { + "epoch": 0.18725986497897115, + "grad_norm": 257.793212890625, + "learning_rate": 9.779477980010924e-06, + "loss": 37.1666, + "step": 92700 + }, + { + "epoch": 0.18728006561165494, + "grad_norm": 485.3106689453125, + "learning_rate": 9.779375445394262e-06, + "loss": 20.7584, + "step": 92710 + }, + { + "epoch": 0.18730026624433876, + "grad_norm": 204.590087890625, + "learning_rate": 9.779272887483503e-06, + "loss": 28.2374, + "step": 92720 + }, + { + "epoch": 0.18732046687702258, + "grad_norm": 116.32440185546875, + "learning_rate": 9.77917030627915e-06, + "loss": 29.7355, + "step": 92730 + }, + { + "epoch": 0.1873406675097064, + "grad_norm": 208.10128784179688, + "learning_rate": 9.779067701781698e-06, + "loss": 26.3415, + "step": 92740 + }, + { + "epoch": 0.18736086814239022, + "grad_norm": 862.18505859375, + "learning_rate": 9.778965073991652e-06, + "loss": 39.0226, + "step": 92750 + }, + { + "epoch": 0.18738106877507404, + "grad_norm": 312.6378479003906, + "learning_rate": 9.778862422909507e-06, + "loss": 19.8167, + "step": 92760 + }, + { + "epoch": 0.18740126940775786, + "grad_norm": 520.6155395507812, + "learning_rate": 9.778759748535768e-06, + "loss": 27.1971, + "step": 92770 + }, + { + "epoch": 0.18742147004044166, + "grad_norm": 31.449466705322266, + "learning_rate": 9.778657050870934e-06, + "loss": 22.8948, + "step": 92780 + }, + { + "epoch": 0.18744167067312548, + "grad_norm": 405.7028503417969, + "learning_rate": 9.778554329915503e-06, + "loss": 32.0095, + "step": 92790 + }, + { + "epoch": 0.1874618713058093, + "grad_norm": 389.78369140625, + "learning_rate": 9.778451585669982e-06, + "loss": 12.5171, + "step": 92800 + }, + { + "epoch": 0.18748207193849312, + "grad_norm": 260.9162902832031, + "learning_rate": 9.778348818134864e-06, + "loss": 32.6285, + "step": 92810 + }, + { + "epoch": 0.18750227257117694, + "grad_norm": 938.2512817382812, + "learning_rate": 9.778246027310654e-06, + "loss": 46.6952, + "step": 92820 + }, + { + "epoch": 0.18752247320386076, + "grad_norm": 9.68444538116455, + "learning_rate": 9.778143213197852e-06, + "loss": 25.1359, + "step": 92830 + }, + { + "epoch": 0.18754267383654455, + "grad_norm": 164.32061767578125, + "learning_rate": 9.77804037579696e-06, + "loss": 22.7962, + "step": 92840 + }, + { + "epoch": 0.18756287446922837, + "grad_norm": 472.6469421386719, + "learning_rate": 9.777937515108478e-06, + "loss": 17.5599, + "step": 92850 + }, + { + "epoch": 0.1875830751019122, + "grad_norm": 123.95637512207031, + "learning_rate": 9.77783463113291e-06, + "loss": 26.1313, + "step": 92860 + }, + { + "epoch": 0.187603275734596, + "grad_norm": 231.21205139160156, + "learning_rate": 9.777731723870753e-06, + "loss": 15.3762, + "step": 92870 + }, + { + "epoch": 0.18762347636727983, + "grad_norm": 223.0507354736328, + "learning_rate": 9.777628793322513e-06, + "loss": 23.4335, + "step": 92880 + }, + { + "epoch": 0.18764367699996365, + "grad_norm": 430.06610107421875, + "learning_rate": 9.777525839488688e-06, + "loss": 28.6041, + "step": 92890 + }, + { + "epoch": 0.18766387763264744, + "grad_norm": 294.2397766113281, + "learning_rate": 9.777422862369782e-06, + "loss": 27.0556, + "step": 92900 + }, + { + "epoch": 0.18768407826533126, + "grad_norm": 972.4275512695312, + "learning_rate": 9.777319861966298e-06, + "loss": 26.982, + "step": 92910 + }, + { + "epoch": 0.18770427889801508, + "grad_norm": 198.08999633789062, + "learning_rate": 9.777216838278735e-06, + "loss": 13.3893, + "step": 92920 + }, + { + "epoch": 0.1877244795306989, + "grad_norm": 604.3071899414062, + "learning_rate": 9.777113791307597e-06, + "loss": 32.195, + "step": 92930 + }, + { + "epoch": 0.18774468016338272, + "grad_norm": 148.30014038085938, + "learning_rate": 9.777010721053387e-06, + "loss": 26.3873, + "step": 92940 + }, + { + "epoch": 0.18776488079606654, + "grad_norm": 605.6109619140625, + "learning_rate": 9.776907627516604e-06, + "loss": 36.4842, + "step": 92950 + }, + { + "epoch": 0.18778508142875036, + "grad_norm": 257.2639465332031, + "learning_rate": 9.776804510697753e-06, + "loss": 23.2307, + "step": 92960 + }, + { + "epoch": 0.18780528206143415, + "grad_norm": 351.9004821777344, + "learning_rate": 9.776701370597337e-06, + "loss": 18.8501, + "step": 92970 + }, + { + "epoch": 0.18782548269411797, + "grad_norm": 55.3215446472168, + "learning_rate": 9.776598207215857e-06, + "loss": 30.9671, + "step": 92980 + }, + { + "epoch": 0.1878456833268018, + "grad_norm": 394.4371337890625, + "learning_rate": 9.776495020553817e-06, + "loss": 17.9072, + "step": 92990 + }, + { + "epoch": 0.1878658839594856, + "grad_norm": 586.047119140625, + "learning_rate": 9.776391810611719e-06, + "loss": 12.5296, + "step": 93000 + }, + { + "epoch": 0.18788608459216943, + "grad_norm": 338.12921142578125, + "learning_rate": 9.776288577390067e-06, + "loss": 13.8105, + "step": 93010 + }, + { + "epoch": 0.18790628522485325, + "grad_norm": 266.59466552734375, + "learning_rate": 9.776185320889364e-06, + "loss": 15.234, + "step": 93020 + }, + { + "epoch": 0.18792648585753705, + "grad_norm": 553.8460693359375, + "learning_rate": 9.776082041110112e-06, + "loss": 9.0351, + "step": 93030 + }, + { + "epoch": 0.18794668649022087, + "grad_norm": 422.61676025390625, + "learning_rate": 9.775978738052818e-06, + "loss": 24.472, + "step": 93040 + }, + { + "epoch": 0.18796688712290469, + "grad_norm": 215.39242553710938, + "learning_rate": 9.775875411717981e-06, + "loss": 22.547, + "step": 93050 + }, + { + "epoch": 0.1879870877555885, + "grad_norm": 240.72828674316406, + "learning_rate": 9.775772062106106e-06, + "loss": 28.4686, + "step": 93060 + }, + { + "epoch": 0.18800728838827233, + "grad_norm": 452.4482727050781, + "learning_rate": 9.775668689217698e-06, + "loss": 15.161, + "step": 93070 + }, + { + "epoch": 0.18802748902095615, + "grad_norm": 371.5116882324219, + "learning_rate": 9.775565293053262e-06, + "loss": 24.6782, + "step": 93080 + }, + { + "epoch": 0.18804768965363997, + "grad_norm": 82.33761596679688, + "learning_rate": 9.775461873613297e-06, + "loss": 20.9432, + "step": 93090 + }, + { + "epoch": 0.18806789028632376, + "grad_norm": 297.91217041015625, + "learning_rate": 9.775358430898311e-06, + "loss": 26.2282, + "step": 93100 + }, + { + "epoch": 0.18808809091900758, + "grad_norm": 194.85494995117188, + "learning_rate": 9.775254964908807e-06, + "loss": 19.049, + "step": 93110 + }, + { + "epoch": 0.1881082915516914, + "grad_norm": 302.1685791015625, + "learning_rate": 9.77515147564529e-06, + "loss": 17.2638, + "step": 93120 + }, + { + "epoch": 0.18812849218437522, + "grad_norm": 424.76251220703125, + "learning_rate": 9.775047963108264e-06, + "loss": 14.7522, + "step": 93130 + }, + { + "epoch": 0.18814869281705904, + "grad_norm": 386.8235168457031, + "learning_rate": 9.774944427298232e-06, + "loss": 22.5074, + "step": 93140 + }, + { + "epoch": 0.18816889344974286, + "grad_norm": 451.16705322265625, + "learning_rate": 9.7748408682157e-06, + "loss": 40.7772, + "step": 93150 + }, + { + "epoch": 0.18818909408242665, + "grad_norm": 280.4852600097656, + "learning_rate": 9.774737285861176e-06, + "loss": 32.5479, + "step": 93160 + }, + { + "epoch": 0.18820929471511047, + "grad_norm": 634.6343994140625, + "learning_rate": 9.774633680235158e-06, + "loss": 22.3752, + "step": 93170 + }, + { + "epoch": 0.1882294953477943, + "grad_norm": 345.7625732421875, + "learning_rate": 9.774530051338155e-06, + "loss": 15.5305, + "step": 93180 + }, + { + "epoch": 0.1882496959804781, + "grad_norm": 328.84136962890625, + "learning_rate": 9.774426399170673e-06, + "loss": 22.2794, + "step": 93190 + }, + { + "epoch": 0.18826989661316193, + "grad_norm": 306.8594055175781, + "learning_rate": 9.774322723733216e-06, + "loss": 18.519, + "step": 93200 + }, + { + "epoch": 0.18829009724584575, + "grad_norm": 433.818603515625, + "learning_rate": 9.774219025026289e-06, + "loss": 24.7969, + "step": 93210 + }, + { + "epoch": 0.18831029787852954, + "grad_norm": 208.9978485107422, + "learning_rate": 9.774115303050395e-06, + "loss": 34.9355, + "step": 93220 + }, + { + "epoch": 0.18833049851121336, + "grad_norm": 384.2287292480469, + "learning_rate": 9.774011557806044e-06, + "loss": 13.7821, + "step": 93230 + }, + { + "epoch": 0.18835069914389718, + "grad_norm": 235.39218139648438, + "learning_rate": 9.773907789293739e-06, + "loss": 14.9591, + "step": 93240 + }, + { + "epoch": 0.188370899776581, + "grad_norm": 140.73678588867188, + "learning_rate": 9.77380399751399e-06, + "loss": 16.6937, + "step": 93250 + }, + { + "epoch": 0.18839110040926482, + "grad_norm": 756.7713623046875, + "learning_rate": 9.773700182467295e-06, + "loss": 28.5085, + "step": 93260 + }, + { + "epoch": 0.18841130104194864, + "grad_norm": 630.3973999023438, + "learning_rate": 9.773596344154165e-06, + "loss": 14.0977, + "step": 93270 + }, + { + "epoch": 0.18843150167463246, + "grad_norm": 329.23321533203125, + "learning_rate": 9.773492482575106e-06, + "loss": 36.9815, + "step": 93280 + }, + { + "epoch": 0.18845170230731625, + "grad_norm": 262.5638732910156, + "learning_rate": 9.773388597730623e-06, + "loss": 21.7334, + "step": 93290 + }, + { + "epoch": 0.18847190294000007, + "grad_norm": 658.1475219726562, + "learning_rate": 9.773284689621223e-06, + "loss": 31.7606, + "step": 93300 + }, + { + "epoch": 0.1884921035726839, + "grad_norm": 292.66546630859375, + "learning_rate": 9.773180758247413e-06, + "loss": 37.4666, + "step": 93310 + }, + { + "epoch": 0.18851230420536771, + "grad_norm": 109.0897445678711, + "learning_rate": 9.773076803609699e-06, + "loss": 10.9353, + "step": 93320 + }, + { + "epoch": 0.18853250483805153, + "grad_norm": 136.06051635742188, + "learning_rate": 9.772972825708587e-06, + "loss": 9.4173, + "step": 93330 + }, + { + "epoch": 0.18855270547073535, + "grad_norm": 190.45758056640625, + "learning_rate": 9.772868824544585e-06, + "loss": 28.1177, + "step": 93340 + }, + { + "epoch": 0.18857290610341915, + "grad_norm": 313.33502197265625, + "learning_rate": 9.7727648001182e-06, + "loss": 31.6668, + "step": 93350 + }, + { + "epoch": 0.18859310673610297, + "grad_norm": 445.6346740722656, + "learning_rate": 9.772660752429937e-06, + "loss": 21.3606, + "step": 93360 + }, + { + "epoch": 0.1886133073687868, + "grad_norm": 88.0118408203125, + "learning_rate": 9.772556681480303e-06, + "loss": 10.1299, + "step": 93370 + }, + { + "epoch": 0.1886335080014706, + "grad_norm": 262.5257263183594, + "learning_rate": 9.772452587269808e-06, + "loss": 22.9904, + "step": 93380 + }, + { + "epoch": 0.18865370863415443, + "grad_norm": 371.00152587890625, + "learning_rate": 9.772348469798958e-06, + "loss": 27.2311, + "step": 93390 + }, + { + "epoch": 0.18867390926683825, + "grad_norm": 501.8653564453125, + "learning_rate": 9.772244329068261e-06, + "loss": 23.2194, + "step": 93400 + }, + { + "epoch": 0.18869410989952207, + "grad_norm": 358.61175537109375, + "learning_rate": 9.772140165078223e-06, + "loss": 30.7846, + "step": 93410 + }, + { + "epoch": 0.18871431053220586, + "grad_norm": 191.6844482421875, + "learning_rate": 9.772035977829352e-06, + "loss": 52.7518, + "step": 93420 + }, + { + "epoch": 0.18873451116488968, + "grad_norm": 513.1759643554688, + "learning_rate": 9.771931767322158e-06, + "loss": 37.1406, + "step": 93430 + }, + { + "epoch": 0.1887547117975735, + "grad_norm": 528.90087890625, + "learning_rate": 9.771827533557147e-06, + "loss": 21.9348, + "step": 93440 + }, + { + "epoch": 0.18877491243025732, + "grad_norm": 364.3177185058594, + "learning_rate": 9.771723276534825e-06, + "loss": 19.5551, + "step": 93450 + }, + { + "epoch": 0.18879511306294114, + "grad_norm": 203.65122985839844, + "learning_rate": 9.771618996255704e-06, + "loss": 26.3584, + "step": 93460 + }, + { + "epoch": 0.18881531369562496, + "grad_norm": 411.6865539550781, + "learning_rate": 9.771514692720293e-06, + "loss": 13.1048, + "step": 93470 + }, + { + "epoch": 0.18883551432830875, + "grad_norm": 280.50433349609375, + "learning_rate": 9.771410365929097e-06, + "loss": 19.4224, + "step": 93480 + }, + { + "epoch": 0.18885571496099257, + "grad_norm": 96.84584045410156, + "learning_rate": 9.771306015882624e-06, + "loss": 20.947, + "step": 93490 + }, + { + "epoch": 0.1888759155936764, + "grad_norm": 635.2235107421875, + "learning_rate": 9.771201642581384e-06, + "loss": 30.9145, + "step": 93500 + }, + { + "epoch": 0.1888961162263602, + "grad_norm": 374.6190185546875, + "learning_rate": 9.771097246025889e-06, + "loss": 21.1268, + "step": 93510 + }, + { + "epoch": 0.18891631685904403, + "grad_norm": 412.28717041015625, + "learning_rate": 9.770992826216642e-06, + "loss": 19.0786, + "step": 93520 + }, + { + "epoch": 0.18893651749172785, + "grad_norm": 507.44390869140625, + "learning_rate": 9.770888383154156e-06, + "loss": 26.3149, + "step": 93530 + }, + { + "epoch": 0.18895671812441164, + "grad_norm": 651.7052001953125, + "learning_rate": 9.770783916838938e-06, + "loss": 39.9074, + "step": 93540 + }, + { + "epoch": 0.18897691875709546, + "grad_norm": 290.569091796875, + "learning_rate": 9.770679427271496e-06, + "loss": 24.4273, + "step": 93550 + }, + { + "epoch": 0.18899711938977928, + "grad_norm": 2062.24560546875, + "learning_rate": 9.770574914452343e-06, + "loss": 38.4955, + "step": 93560 + }, + { + "epoch": 0.1890173200224631, + "grad_norm": 887.5333251953125, + "learning_rate": 9.770470378381986e-06, + "loss": 25.2377, + "step": 93570 + }, + { + "epoch": 0.18903752065514692, + "grad_norm": 755.4471435546875, + "learning_rate": 9.770365819060936e-06, + "loss": 40.5245, + "step": 93580 + }, + { + "epoch": 0.18905772128783074, + "grad_norm": 604.2175903320312, + "learning_rate": 9.7702612364897e-06, + "loss": 20.7595, + "step": 93590 + }, + { + "epoch": 0.18907792192051456, + "grad_norm": 23.88644027709961, + "learning_rate": 9.77015663066879e-06, + "loss": 17.105, + "step": 93600 + }, + { + "epoch": 0.18909812255319836, + "grad_norm": 13.520434379577637, + "learning_rate": 9.770052001598716e-06, + "loss": 17.5936, + "step": 93610 + }, + { + "epoch": 0.18911832318588218, + "grad_norm": 192.54385375976562, + "learning_rate": 9.769947349279987e-06, + "loss": 19.7351, + "step": 93620 + }, + { + "epoch": 0.189138523818566, + "grad_norm": 564.4246215820312, + "learning_rate": 9.769842673713112e-06, + "loss": 30.1727, + "step": 93630 + }, + { + "epoch": 0.18915872445124982, + "grad_norm": 751.2989501953125, + "learning_rate": 9.769737974898602e-06, + "loss": 20.6267, + "step": 93640 + }, + { + "epoch": 0.18917892508393364, + "grad_norm": 257.06866455078125, + "learning_rate": 9.769633252836969e-06, + "loss": 31.7081, + "step": 93650 + }, + { + "epoch": 0.18919912571661746, + "grad_norm": 168.4287567138672, + "learning_rate": 9.76952850752872e-06, + "loss": 9.743, + "step": 93660 + }, + { + "epoch": 0.18921932634930125, + "grad_norm": 414.6647644042969, + "learning_rate": 9.76942373897437e-06, + "loss": 23.1347, + "step": 93670 + }, + { + "epoch": 0.18923952698198507, + "grad_norm": 330.8674621582031, + "learning_rate": 9.769318947174426e-06, + "loss": 30.5585, + "step": 93680 + }, + { + "epoch": 0.1892597276146689, + "grad_norm": 100.80751037597656, + "learning_rate": 9.769214132129399e-06, + "loss": 29.8406, + "step": 93690 + }, + { + "epoch": 0.1892799282473527, + "grad_norm": 278.685302734375, + "learning_rate": 9.769109293839803e-06, + "loss": 21.5444, + "step": 93700 + }, + { + "epoch": 0.18930012888003653, + "grad_norm": 263.3114318847656, + "learning_rate": 9.769004432306145e-06, + "loss": 20.2435, + "step": 93710 + }, + { + "epoch": 0.18932032951272035, + "grad_norm": 523.59326171875, + "learning_rate": 9.768899547528939e-06, + "loss": 23.3171, + "step": 93720 + }, + { + "epoch": 0.18934053014540417, + "grad_norm": 618.2498168945312, + "learning_rate": 9.768794639508693e-06, + "loss": 26.2153, + "step": 93730 + }, + { + "epoch": 0.18936073077808796, + "grad_norm": 273.69769287109375, + "learning_rate": 9.768689708245921e-06, + "loss": 24.2667, + "step": 93740 + }, + { + "epoch": 0.18938093141077178, + "grad_norm": 696.7081298828125, + "learning_rate": 9.768584753741134e-06, + "loss": 21.3982, + "step": 93750 + }, + { + "epoch": 0.1894011320434556, + "grad_norm": 820.3089599609375, + "learning_rate": 9.768479775994846e-06, + "loss": 20.8917, + "step": 93760 + }, + { + "epoch": 0.18942133267613942, + "grad_norm": 160.27163696289062, + "learning_rate": 9.768374775007562e-06, + "loss": 17.6037, + "step": 93770 + }, + { + "epoch": 0.18944153330882324, + "grad_norm": 167.3863067626953, + "learning_rate": 9.7682697507798e-06, + "loss": 36.3596, + "step": 93780 + }, + { + "epoch": 0.18946173394150706, + "grad_norm": 382.2495422363281, + "learning_rate": 9.768164703312068e-06, + "loss": 33.2881, + "step": 93790 + }, + { + "epoch": 0.18948193457419085, + "grad_norm": 129.846435546875, + "learning_rate": 9.768059632604881e-06, + "loss": 22.1885, + "step": 93800 + }, + { + "epoch": 0.18950213520687467, + "grad_norm": 328.9864501953125, + "learning_rate": 9.767954538658749e-06, + "loss": 33.0496, + "step": 93810 + }, + { + "epoch": 0.1895223358395585, + "grad_norm": 207.57334899902344, + "learning_rate": 9.767849421474185e-06, + "loss": 17.3691, + "step": 93820 + }, + { + "epoch": 0.1895425364722423, + "grad_norm": 0.0, + "learning_rate": 9.767744281051702e-06, + "loss": 11.1974, + "step": 93830 + }, + { + "epoch": 0.18956273710492613, + "grad_norm": 599.1231689453125, + "learning_rate": 9.76763911739181e-06, + "loss": 31.3294, + "step": 93840 + }, + { + "epoch": 0.18958293773760995, + "grad_norm": 425.5417175292969, + "learning_rate": 9.767533930495023e-06, + "loss": 14.5931, + "step": 93850 + }, + { + "epoch": 0.18960313837029374, + "grad_norm": 454.7153625488281, + "learning_rate": 9.767428720361854e-06, + "loss": 21.8906, + "step": 93860 + }, + { + "epoch": 0.18962333900297756, + "grad_norm": 1088.3084716796875, + "learning_rate": 9.767323486992816e-06, + "loss": 34.9469, + "step": 93870 + }, + { + "epoch": 0.18964353963566138, + "grad_norm": 281.15972900390625, + "learning_rate": 9.767218230388423e-06, + "loss": 21.4637, + "step": 93880 + }, + { + "epoch": 0.1896637402683452, + "grad_norm": 267.9508361816406, + "learning_rate": 9.767112950549184e-06, + "loss": 20.8022, + "step": 93890 + }, + { + "epoch": 0.18968394090102902, + "grad_norm": 424.75604248046875, + "learning_rate": 9.767007647475618e-06, + "loss": 30.6686, + "step": 93900 + }, + { + "epoch": 0.18970414153371284, + "grad_norm": 584.2125854492188, + "learning_rate": 9.766902321168232e-06, + "loss": 22.9802, + "step": 93910 + }, + { + "epoch": 0.18972434216639666, + "grad_norm": 344.3031311035156, + "learning_rate": 9.766796971627543e-06, + "loss": 19.7187, + "step": 93920 + }, + { + "epoch": 0.18974454279908046, + "grad_norm": 334.32958984375, + "learning_rate": 9.766691598854064e-06, + "loss": 15.5833, + "step": 93930 + }, + { + "epoch": 0.18976474343176428, + "grad_norm": 393.0152587890625, + "learning_rate": 9.766586202848306e-06, + "loss": 22.1252, + "step": 93940 + }, + { + "epoch": 0.1897849440644481, + "grad_norm": 976.2451782226562, + "learning_rate": 9.766480783610789e-06, + "loss": 16.7059, + "step": 93950 + }, + { + "epoch": 0.18980514469713192, + "grad_norm": 655.0115356445312, + "learning_rate": 9.76637534114202e-06, + "loss": 21.6289, + "step": 93960 + }, + { + "epoch": 0.18982534532981574, + "grad_norm": 672.9705810546875, + "learning_rate": 9.766269875442517e-06, + "loss": 26.8585, + "step": 93970 + }, + { + "epoch": 0.18984554596249956, + "grad_norm": 481.5218200683594, + "learning_rate": 9.766164386512794e-06, + "loss": 19.6168, + "step": 93980 + }, + { + "epoch": 0.18986574659518335, + "grad_norm": 416.34222412109375, + "learning_rate": 9.766058874353361e-06, + "loss": 15.9359, + "step": 93990 + }, + { + "epoch": 0.18988594722786717, + "grad_norm": 1040.742919921875, + "learning_rate": 9.765953338964736e-06, + "loss": 39.0241, + "step": 94000 + }, + { + "epoch": 0.189906147860551, + "grad_norm": 472.5757141113281, + "learning_rate": 9.765847780347433e-06, + "loss": 26.1622, + "step": 94010 + }, + { + "epoch": 0.1899263484932348, + "grad_norm": 55.61189270019531, + "learning_rate": 9.765742198501965e-06, + "loss": 32.746, + "step": 94020 + }, + { + "epoch": 0.18994654912591863, + "grad_norm": 818.5496826171875, + "learning_rate": 9.765636593428849e-06, + "loss": 21.8989, + "step": 94030 + }, + { + "epoch": 0.18996674975860245, + "grad_norm": 689.7357788085938, + "learning_rate": 9.765530965128597e-06, + "loss": 31.6282, + "step": 94040 + }, + { + "epoch": 0.18998695039128627, + "grad_norm": 745.34814453125, + "learning_rate": 9.765425313601726e-06, + "loss": 22.1681, + "step": 94050 + }, + { + "epoch": 0.19000715102397006, + "grad_norm": 268.80755615234375, + "learning_rate": 9.765319638848749e-06, + "loss": 13.777, + "step": 94060 + }, + { + "epoch": 0.19002735165665388, + "grad_norm": 488.5350341796875, + "learning_rate": 9.765213940870183e-06, + "loss": 31.8917, + "step": 94070 + }, + { + "epoch": 0.1900475522893377, + "grad_norm": 175.02919006347656, + "learning_rate": 9.765108219666542e-06, + "loss": 21.4682, + "step": 94080 + }, + { + "epoch": 0.19006775292202152, + "grad_norm": 109.80484008789062, + "learning_rate": 9.76500247523834e-06, + "loss": 15.6488, + "step": 94090 + }, + { + "epoch": 0.19008795355470534, + "grad_norm": 369.6401672363281, + "learning_rate": 9.764896707586095e-06, + "loss": 21.3393, + "step": 94100 + }, + { + "epoch": 0.19010815418738916, + "grad_norm": 887.25634765625, + "learning_rate": 9.76479091671032e-06, + "loss": 15.4807, + "step": 94110 + }, + { + "epoch": 0.19012835482007295, + "grad_norm": 321.5193176269531, + "learning_rate": 9.764685102611535e-06, + "loss": 30.7634, + "step": 94120 + }, + { + "epoch": 0.19014855545275677, + "grad_norm": 418.4538879394531, + "learning_rate": 9.76457926529025e-06, + "loss": 11.3235, + "step": 94130 + }, + { + "epoch": 0.1901687560854406, + "grad_norm": 872.686279296875, + "learning_rate": 9.764473404746986e-06, + "loss": 30.4777, + "step": 94140 + }, + { + "epoch": 0.1901889567181244, + "grad_norm": 894.0331420898438, + "learning_rate": 9.764367520982255e-06, + "loss": 28.57, + "step": 94150 + }, + { + "epoch": 0.19020915735080823, + "grad_norm": 164.6750030517578, + "learning_rate": 9.764261613996574e-06, + "loss": 16.8684, + "step": 94160 + }, + { + "epoch": 0.19022935798349205, + "grad_norm": 317.54217529296875, + "learning_rate": 9.764155683790461e-06, + "loss": 12.447, + "step": 94170 + }, + { + "epoch": 0.19024955861617585, + "grad_norm": 338.030517578125, + "learning_rate": 9.76404973036443e-06, + "loss": 28.7921, + "step": 94180 + }, + { + "epoch": 0.19026975924885967, + "grad_norm": 258.0565185546875, + "learning_rate": 9.763943753719e-06, + "loss": 35.6858, + "step": 94190 + }, + { + "epoch": 0.19028995988154349, + "grad_norm": 303.80841064453125, + "learning_rate": 9.763837753854684e-06, + "loss": 18.3098, + "step": 94200 + }, + { + "epoch": 0.1903101605142273, + "grad_norm": 817.4520874023438, + "learning_rate": 9.763731730772001e-06, + "loss": 22.2052, + "step": 94210 + }, + { + "epoch": 0.19033036114691113, + "grad_norm": 1681.8250732421875, + "learning_rate": 9.763625684471467e-06, + "loss": 10.2072, + "step": 94220 + }, + { + "epoch": 0.19035056177959495, + "grad_norm": 275.3839416503906, + "learning_rate": 9.7635196149536e-06, + "loss": 28.9352, + "step": 94230 + }, + { + "epoch": 0.19037076241227877, + "grad_norm": 642.3046875, + "learning_rate": 9.763413522218917e-06, + "loss": 17.2307, + "step": 94240 + }, + { + "epoch": 0.19039096304496256, + "grad_norm": 534.1638793945312, + "learning_rate": 9.763307406267933e-06, + "loss": 44.2703, + "step": 94250 + }, + { + "epoch": 0.19041116367764638, + "grad_norm": 419.8201599121094, + "learning_rate": 9.763201267101165e-06, + "loss": 13.8423, + "step": 94260 + }, + { + "epoch": 0.1904313643103302, + "grad_norm": 297.29913330078125, + "learning_rate": 9.763095104719133e-06, + "loss": 18.2041, + "step": 94270 + }, + { + "epoch": 0.19045156494301402, + "grad_norm": 283.5640869140625, + "learning_rate": 9.762988919122354e-06, + "loss": 23.7125, + "step": 94280 + }, + { + "epoch": 0.19047176557569784, + "grad_norm": 276.22174072265625, + "learning_rate": 9.762882710311345e-06, + "loss": 20.0406, + "step": 94290 + }, + { + "epoch": 0.19049196620838166, + "grad_norm": 158.86599731445312, + "learning_rate": 9.762776478286622e-06, + "loss": 33.5239, + "step": 94300 + }, + { + "epoch": 0.19051216684106545, + "grad_norm": 921.1744995117188, + "learning_rate": 9.762670223048705e-06, + "loss": 35.5041, + "step": 94310 + }, + { + "epoch": 0.19053236747374927, + "grad_norm": 422.2298278808594, + "learning_rate": 9.76256394459811e-06, + "loss": 16.1197, + "step": 94320 + }, + { + "epoch": 0.1905525681064331, + "grad_norm": 611.1220092773438, + "learning_rate": 9.762457642935357e-06, + "loss": 13.1988, + "step": 94330 + }, + { + "epoch": 0.1905727687391169, + "grad_norm": 116.6915054321289, + "learning_rate": 9.762351318060962e-06, + "loss": 19.1604, + "step": 94340 + }, + { + "epoch": 0.19059296937180073, + "grad_norm": 87.05732727050781, + "learning_rate": 9.762244969975446e-06, + "loss": 18.2294, + "step": 94350 + }, + { + "epoch": 0.19061317000448455, + "grad_norm": 305.0800476074219, + "learning_rate": 9.762138598679324e-06, + "loss": 30.1884, + "step": 94360 + }, + { + "epoch": 0.19063337063716834, + "grad_norm": 272.8003845214844, + "learning_rate": 9.762032204173116e-06, + "loss": 25.5305, + "step": 94370 + }, + { + "epoch": 0.19065357126985216, + "grad_norm": 310.4867248535156, + "learning_rate": 9.761925786457343e-06, + "loss": 21.7396, + "step": 94380 + }, + { + "epoch": 0.19067377190253598, + "grad_norm": 22.4447021484375, + "learning_rate": 9.761819345532519e-06, + "loss": 15.6707, + "step": 94390 + }, + { + "epoch": 0.1906939725352198, + "grad_norm": 212.0104522705078, + "learning_rate": 9.761712881399164e-06, + "loss": 18.4564, + "step": 94400 + }, + { + "epoch": 0.19071417316790362, + "grad_norm": 913.8582153320312, + "learning_rate": 9.7616063940578e-06, + "loss": 53.2364, + "step": 94410 + }, + { + "epoch": 0.19073437380058744, + "grad_norm": 381.44964599609375, + "learning_rate": 9.761499883508942e-06, + "loss": 24.3057, + "step": 94420 + }, + { + "epoch": 0.19075457443327126, + "grad_norm": 361.863037109375, + "learning_rate": 9.761393349753115e-06, + "loss": 23.973, + "step": 94430 + }, + { + "epoch": 0.19077477506595505, + "grad_norm": 201.03997802734375, + "learning_rate": 9.76128679279083e-06, + "loss": 27.5303, + "step": 94440 + }, + { + "epoch": 0.19079497569863887, + "grad_norm": 598.7647094726562, + "learning_rate": 9.761180212622613e-06, + "loss": 36.8382, + "step": 94450 + }, + { + "epoch": 0.1908151763313227, + "grad_norm": 612.81201171875, + "learning_rate": 9.761073609248981e-06, + "loss": 31.6944, + "step": 94460 + }, + { + "epoch": 0.19083537696400651, + "grad_norm": 308.01251220703125, + "learning_rate": 9.760966982670453e-06, + "loss": 38.2548, + "step": 94470 + }, + { + "epoch": 0.19085557759669033, + "grad_norm": 214.9204864501953, + "learning_rate": 9.760860332887549e-06, + "loss": 27.0995, + "step": 94480 + }, + { + "epoch": 0.19087577822937415, + "grad_norm": 2.1772916316986084, + "learning_rate": 9.76075365990079e-06, + "loss": 19.2659, + "step": 94490 + }, + { + "epoch": 0.19089597886205795, + "grad_norm": 801.9741821289062, + "learning_rate": 9.760646963710694e-06, + "loss": 26.7987, + "step": 94500 + }, + { + "epoch": 0.19091617949474177, + "grad_norm": 192.08648681640625, + "learning_rate": 9.760540244317784e-06, + "loss": 23.6551, + "step": 94510 + }, + { + "epoch": 0.1909363801274256, + "grad_norm": 372.5327453613281, + "learning_rate": 9.760433501722576e-06, + "loss": 12.4313, + "step": 94520 + }, + { + "epoch": 0.1909565807601094, + "grad_norm": 80.73799133300781, + "learning_rate": 9.760326735925594e-06, + "loss": 33.106, + "step": 94530 + }, + { + "epoch": 0.19097678139279323, + "grad_norm": 569.3877563476562, + "learning_rate": 9.760219946927357e-06, + "loss": 29.0983, + "step": 94540 + }, + { + "epoch": 0.19099698202547705, + "grad_norm": 188.4160919189453, + "learning_rate": 9.760113134728383e-06, + "loss": 26.0278, + "step": 94550 + }, + { + "epoch": 0.19101718265816087, + "grad_norm": 145.1660614013672, + "learning_rate": 9.760006299329198e-06, + "loss": 19.5478, + "step": 94560 + }, + { + "epoch": 0.19103738329084466, + "grad_norm": 516.3740234375, + "learning_rate": 9.759899440730318e-06, + "loss": 36.2998, + "step": 94570 + }, + { + "epoch": 0.19105758392352848, + "grad_norm": 302.2230529785156, + "learning_rate": 9.759792558932267e-06, + "loss": 20.6328, + "step": 94580 + }, + { + "epoch": 0.1910777845562123, + "grad_norm": 321.30389404296875, + "learning_rate": 9.759685653935563e-06, + "loss": 20.3567, + "step": 94590 + }, + { + "epoch": 0.19109798518889612, + "grad_norm": 408.89434814453125, + "learning_rate": 9.759578725740726e-06, + "loss": 24.1724, + "step": 94600 + }, + { + "epoch": 0.19111818582157994, + "grad_norm": 154.68870544433594, + "learning_rate": 9.759471774348284e-06, + "loss": 31.3046, + "step": 94610 + }, + { + "epoch": 0.19113838645426376, + "grad_norm": 371.37109375, + "learning_rate": 9.759364799758751e-06, + "loss": 16.1947, + "step": 94620 + }, + { + "epoch": 0.19115858708694755, + "grad_norm": 300.44744873046875, + "learning_rate": 9.759257801972652e-06, + "loss": 21.571, + "step": 94630 + }, + { + "epoch": 0.19117878771963137, + "grad_norm": 366.66973876953125, + "learning_rate": 9.759150780990508e-06, + "loss": 21.162, + "step": 94640 + }, + { + "epoch": 0.1911989883523152, + "grad_norm": 301.9317321777344, + "learning_rate": 9.75904373681284e-06, + "loss": 14.5093, + "step": 94650 + }, + { + "epoch": 0.191219188984999, + "grad_norm": 397.9039611816406, + "learning_rate": 9.75893666944017e-06, + "loss": 22.7078, + "step": 94660 + }, + { + "epoch": 0.19123938961768283, + "grad_norm": 398.2569580078125, + "learning_rate": 9.758829578873019e-06, + "loss": 19.4191, + "step": 94670 + }, + { + "epoch": 0.19125959025036665, + "grad_norm": 628.5740966796875, + "learning_rate": 9.758722465111912e-06, + "loss": 35.5664, + "step": 94680 + }, + { + "epoch": 0.19127979088305044, + "grad_norm": 290.260986328125, + "learning_rate": 9.758615328157367e-06, + "loss": 27.4729, + "step": 94690 + }, + { + "epoch": 0.19129999151573426, + "grad_norm": 401.98455810546875, + "learning_rate": 9.758508168009908e-06, + "loss": 24.0101, + "step": 94700 + }, + { + "epoch": 0.19132019214841808, + "grad_norm": 363.4964599609375, + "learning_rate": 9.75840098467006e-06, + "loss": 23.6348, + "step": 94710 + }, + { + "epoch": 0.1913403927811019, + "grad_norm": 368.26922607421875, + "learning_rate": 9.758293778138339e-06, + "loss": 32.6702, + "step": 94720 + }, + { + "epoch": 0.19136059341378572, + "grad_norm": 542.6871337890625, + "learning_rate": 9.758186548415274e-06, + "loss": 22.5824, + "step": 94730 + }, + { + "epoch": 0.19138079404646954, + "grad_norm": 34.795433044433594, + "learning_rate": 9.758079295501384e-06, + "loss": 21.9126, + "step": 94740 + }, + { + "epoch": 0.19140099467915336, + "grad_norm": 397.8806457519531, + "learning_rate": 9.757972019397192e-06, + "loss": 26.8283, + "step": 94750 + }, + { + "epoch": 0.19142119531183716, + "grad_norm": 424.9023132324219, + "learning_rate": 9.757864720103222e-06, + "loss": 14.3381, + "step": 94760 + }, + { + "epoch": 0.19144139594452098, + "grad_norm": 753.5485229492188, + "learning_rate": 9.757757397619995e-06, + "loss": 18.7168, + "step": 94770 + }, + { + "epoch": 0.1914615965772048, + "grad_norm": 286.5726623535156, + "learning_rate": 9.757650051948037e-06, + "loss": 21.9167, + "step": 94780 + }, + { + "epoch": 0.19148179720988862, + "grad_norm": 217.6724853515625, + "learning_rate": 9.757542683087871e-06, + "loss": 27.2246, + "step": 94790 + }, + { + "epoch": 0.19150199784257244, + "grad_norm": 490.37493896484375, + "learning_rate": 9.757435291040016e-06, + "loss": 19.4853, + "step": 94800 + }, + { + "epoch": 0.19152219847525626, + "grad_norm": 557.5157470703125, + "learning_rate": 9.757327875805e-06, + "loss": 30.4812, + "step": 94810 + }, + { + "epoch": 0.19154239910794005, + "grad_norm": 337.1351318359375, + "learning_rate": 9.757220437383345e-06, + "loss": 13.7743, + "step": 94820 + }, + { + "epoch": 0.19156259974062387, + "grad_norm": 311.22943115234375, + "learning_rate": 9.757112975775575e-06, + "loss": 18.5559, + "step": 94830 + }, + { + "epoch": 0.1915828003733077, + "grad_norm": 293.1070861816406, + "learning_rate": 9.757005490982213e-06, + "loss": 12.0658, + "step": 94840 + }, + { + "epoch": 0.1916030010059915, + "grad_norm": 217.46958923339844, + "learning_rate": 9.756897983003782e-06, + "loss": 18.6618, + "step": 94850 + }, + { + "epoch": 0.19162320163867533, + "grad_norm": 1383.739990234375, + "learning_rate": 9.756790451840807e-06, + "loss": 24.5521, + "step": 94860 + }, + { + "epoch": 0.19164340227135915, + "grad_norm": 584.2903442382812, + "learning_rate": 9.756682897493814e-06, + "loss": 25.5006, + "step": 94870 + }, + { + "epoch": 0.19166360290404297, + "grad_norm": 286.12554931640625, + "learning_rate": 9.756575319963325e-06, + "loss": 24.2221, + "step": 94880 + }, + { + "epoch": 0.19168380353672676, + "grad_norm": 365.3943786621094, + "learning_rate": 9.756467719249865e-06, + "loss": 44.9083, + "step": 94890 + }, + { + "epoch": 0.19170400416941058, + "grad_norm": 476.6283264160156, + "learning_rate": 9.756360095353957e-06, + "loss": 28.985, + "step": 94900 + }, + { + "epoch": 0.1917242048020944, + "grad_norm": 223.5682373046875, + "learning_rate": 9.756252448276128e-06, + "loss": 16.9192, + "step": 94910 + }, + { + "epoch": 0.19174440543477822, + "grad_norm": 282.4645690917969, + "learning_rate": 9.756144778016901e-06, + "loss": 16.1518, + "step": 94920 + }, + { + "epoch": 0.19176460606746204, + "grad_norm": 993.4703979492188, + "learning_rate": 9.756037084576801e-06, + "loss": 44.8314, + "step": 94930 + }, + { + "epoch": 0.19178480670014586, + "grad_norm": 162.4573974609375, + "learning_rate": 9.755929367956354e-06, + "loss": 19.7513, + "step": 94940 + }, + { + "epoch": 0.19180500733282965, + "grad_norm": 597.595703125, + "learning_rate": 9.755821628156083e-06, + "loss": 31.1724, + "step": 94950 + }, + { + "epoch": 0.19182520796551347, + "grad_norm": 607.3621826171875, + "learning_rate": 9.755713865176514e-06, + "loss": 32.0112, + "step": 94960 + }, + { + "epoch": 0.1918454085981973, + "grad_norm": 535.0570068359375, + "learning_rate": 9.755606079018174e-06, + "loss": 29.5479, + "step": 94970 + }, + { + "epoch": 0.1918656092308811, + "grad_norm": 1022.1478881835938, + "learning_rate": 9.755498269681585e-06, + "loss": 35.6381, + "step": 94980 + }, + { + "epoch": 0.19188580986356493, + "grad_norm": 348.110595703125, + "learning_rate": 9.755390437167274e-06, + "loss": 23.0737, + "step": 94990 + }, + { + "epoch": 0.19190601049624875, + "grad_norm": 402.2424011230469, + "learning_rate": 9.755282581475769e-06, + "loss": 14.4856, + "step": 95000 + }, + { + "epoch": 0.19192621112893254, + "grad_norm": 306.0251159667969, + "learning_rate": 9.755174702607592e-06, + "loss": 37.8608, + "step": 95010 + }, + { + "epoch": 0.19194641176161636, + "grad_norm": 547.3987426757812, + "learning_rate": 9.75506680056327e-06, + "loss": 28.8134, + "step": 95020 + }, + { + "epoch": 0.19196661239430018, + "grad_norm": 538.8976440429688, + "learning_rate": 9.75495887534333e-06, + "loss": 25.2399, + "step": 95030 + }, + { + "epoch": 0.191986813026984, + "grad_norm": 484.52056884765625, + "learning_rate": 9.754850926948295e-06, + "loss": 18.9739, + "step": 95040 + }, + { + "epoch": 0.19200701365966782, + "grad_norm": 610.3802490234375, + "learning_rate": 9.754742955378697e-06, + "loss": 38.8773, + "step": 95050 + }, + { + "epoch": 0.19202721429235164, + "grad_norm": 204.48780822753906, + "learning_rate": 9.754634960635057e-06, + "loss": 21.8397, + "step": 95060 + }, + { + "epoch": 0.19204741492503546, + "grad_norm": 552.1886596679688, + "learning_rate": 9.754526942717901e-06, + "loss": 24.1843, + "step": 95070 + }, + { + "epoch": 0.19206761555771926, + "grad_norm": 544.3379516601562, + "learning_rate": 9.75441890162776e-06, + "loss": 31.6727, + "step": 95080 + }, + { + "epoch": 0.19208781619040308, + "grad_norm": 191.04501342773438, + "learning_rate": 9.754310837365155e-06, + "loss": 18.3352, + "step": 95090 + }, + { + "epoch": 0.1921080168230869, + "grad_norm": 423.8003845214844, + "learning_rate": 9.754202749930618e-06, + "loss": 19.5345, + "step": 95100 + }, + { + "epoch": 0.19212821745577072, + "grad_norm": 854.3909912109375, + "learning_rate": 9.754094639324672e-06, + "loss": 30.5475, + "step": 95110 + }, + { + "epoch": 0.19214841808845454, + "grad_norm": 485.14215087890625, + "learning_rate": 9.753986505547845e-06, + "loss": 25.7641, + "step": 95120 + }, + { + "epoch": 0.19216861872113836, + "grad_norm": 108.00077819824219, + "learning_rate": 9.753878348600666e-06, + "loss": 20.8481, + "step": 95130 + }, + { + "epoch": 0.19218881935382215, + "grad_norm": 814.5859985351562, + "learning_rate": 9.75377016848366e-06, + "loss": 26.7563, + "step": 95140 + }, + { + "epoch": 0.19220901998650597, + "grad_norm": 388.3916320800781, + "learning_rate": 9.753661965197355e-06, + "loss": 12.6434, + "step": 95150 + }, + { + "epoch": 0.1922292206191898, + "grad_norm": 669.4876708984375, + "learning_rate": 9.753553738742278e-06, + "loss": 44.7474, + "step": 95160 + }, + { + "epoch": 0.1922494212518736, + "grad_norm": 152.13226318359375, + "learning_rate": 9.753445489118955e-06, + "loss": 20.3763, + "step": 95170 + }, + { + "epoch": 0.19226962188455743, + "grad_norm": 1334.5699462890625, + "learning_rate": 9.753337216327917e-06, + "loss": 37.9783, + "step": 95180 + }, + { + "epoch": 0.19228982251724125, + "grad_norm": 2598.234375, + "learning_rate": 9.75322892036969e-06, + "loss": 35.7785, + "step": 95190 + }, + { + "epoch": 0.19231002314992507, + "grad_norm": 279.7922668457031, + "learning_rate": 9.7531206012448e-06, + "loss": 19.9184, + "step": 95200 + }, + { + "epoch": 0.19233022378260886, + "grad_norm": 500.36456298828125, + "learning_rate": 9.753012258953778e-06, + "loss": 40.2961, + "step": 95210 + }, + { + "epoch": 0.19235042441529268, + "grad_norm": 418.6767578125, + "learning_rate": 9.752903893497152e-06, + "loss": 28.6601, + "step": 95220 + }, + { + "epoch": 0.1923706250479765, + "grad_norm": 190.2279815673828, + "learning_rate": 9.752795504875447e-06, + "loss": 21.5785, + "step": 95230 + }, + { + "epoch": 0.19239082568066032, + "grad_norm": 433.3700866699219, + "learning_rate": 9.752687093089192e-06, + "loss": 34.0436, + "step": 95240 + }, + { + "epoch": 0.19241102631334414, + "grad_norm": 500.3661804199219, + "learning_rate": 9.75257865813892e-06, + "loss": 37.865, + "step": 95250 + }, + { + "epoch": 0.19243122694602796, + "grad_norm": 405.3109130859375, + "learning_rate": 9.752470200025153e-06, + "loss": 19.5941, + "step": 95260 + }, + { + "epoch": 0.19245142757871175, + "grad_norm": 543.3590698242188, + "learning_rate": 9.752361718748425e-06, + "loss": 11.6947, + "step": 95270 + }, + { + "epoch": 0.19247162821139557, + "grad_norm": 229.73873901367188, + "learning_rate": 9.75225321430926e-06, + "loss": 13.0533, + "step": 95280 + }, + { + "epoch": 0.1924918288440794, + "grad_norm": 268.1575012207031, + "learning_rate": 9.752144686708192e-06, + "loss": 38.0653, + "step": 95290 + }, + { + "epoch": 0.1925120294767632, + "grad_norm": 301.10516357421875, + "learning_rate": 9.752036135945743e-06, + "loss": 27.4795, + "step": 95300 + }, + { + "epoch": 0.19253223010944703, + "grad_norm": 665.0114135742188, + "learning_rate": 9.75192756202245e-06, + "loss": 29.2695, + "step": 95310 + }, + { + "epoch": 0.19255243074213085, + "grad_norm": 1296.9481201171875, + "learning_rate": 9.751818964938837e-06, + "loss": 23.5781, + "step": 95320 + }, + { + "epoch": 0.19257263137481465, + "grad_norm": 25.395132064819336, + "learning_rate": 9.751710344695436e-06, + "loss": 18.2023, + "step": 95330 + }, + { + "epoch": 0.19259283200749847, + "grad_norm": 367.8702697753906, + "learning_rate": 9.751601701292773e-06, + "loss": 21.1432, + "step": 95340 + }, + { + "epoch": 0.19261303264018229, + "grad_norm": 228.0545654296875, + "learning_rate": 9.75149303473138e-06, + "loss": 31.7488, + "step": 95350 + }, + { + "epoch": 0.1926332332728661, + "grad_norm": 15.69963264465332, + "learning_rate": 9.751384345011787e-06, + "loss": 15.3159, + "step": 95360 + }, + { + "epoch": 0.19265343390554993, + "grad_norm": 105.42630004882812, + "learning_rate": 9.751275632134523e-06, + "loss": 27.5233, + "step": 95370 + }, + { + "epoch": 0.19267363453823375, + "grad_norm": 21.262840270996094, + "learning_rate": 9.751166896100119e-06, + "loss": 20.9626, + "step": 95380 + }, + { + "epoch": 0.19269383517091757, + "grad_norm": 1015.583984375, + "learning_rate": 9.751058136909102e-06, + "loss": 40.2617, + "step": 95390 + }, + { + "epoch": 0.19271403580360136, + "grad_norm": 753.014892578125, + "learning_rate": 9.750949354562006e-06, + "loss": 25.4126, + "step": 95400 + }, + { + "epoch": 0.19273423643628518, + "grad_norm": 545.4683227539062, + "learning_rate": 9.750840549059354e-06, + "loss": 14.7867, + "step": 95410 + }, + { + "epoch": 0.192754437068969, + "grad_norm": 358.6993408203125, + "learning_rate": 9.750731720401685e-06, + "loss": 22.2825, + "step": 95420 + }, + { + "epoch": 0.19277463770165282, + "grad_norm": 828.7424926757812, + "learning_rate": 9.750622868589527e-06, + "loss": 13.9393, + "step": 95430 + }, + { + "epoch": 0.19279483833433664, + "grad_norm": 485.7572937011719, + "learning_rate": 9.750513993623406e-06, + "loss": 24.9899, + "step": 95440 + }, + { + "epoch": 0.19281503896702046, + "grad_norm": 842.6419677734375, + "learning_rate": 9.750405095503859e-06, + "loss": 59.4863, + "step": 95450 + }, + { + "epoch": 0.19283523959970425, + "grad_norm": 436.1926574707031, + "learning_rate": 9.750296174231412e-06, + "loss": 17.983, + "step": 95460 + }, + { + "epoch": 0.19285544023238807, + "grad_norm": 438.8775939941406, + "learning_rate": 9.7501872298066e-06, + "loss": 18.8818, + "step": 95470 + }, + { + "epoch": 0.1928756408650719, + "grad_norm": 370.5317077636719, + "learning_rate": 9.75007826222995e-06, + "loss": 16.6989, + "step": 95480 + }, + { + "epoch": 0.1928958414977557, + "grad_norm": 649.5635375976562, + "learning_rate": 9.749969271501993e-06, + "loss": 18.2181, + "step": 95490 + }, + { + "epoch": 0.19291604213043953, + "grad_norm": 126.58338165283203, + "learning_rate": 9.749860257623262e-06, + "loss": 33.4575, + "step": 95500 + }, + { + "epoch": 0.19293624276312335, + "grad_norm": 20.53302001953125, + "learning_rate": 9.74975122059429e-06, + "loss": 23.0558, + "step": 95510 + }, + { + "epoch": 0.19295644339580717, + "grad_norm": 582.2803955078125, + "learning_rate": 9.749642160415606e-06, + "loss": 18.6158, + "step": 95520 + }, + { + "epoch": 0.19297664402849096, + "grad_norm": 469.9850769042969, + "learning_rate": 9.749533077087742e-06, + "loss": 21.8119, + "step": 95530 + }, + { + "epoch": 0.19299684466117478, + "grad_norm": 116.4792251586914, + "learning_rate": 9.749423970611232e-06, + "loss": 27.1043, + "step": 95540 + }, + { + "epoch": 0.1930170452938586, + "grad_norm": 452.0263977050781, + "learning_rate": 9.749314840986604e-06, + "loss": 15.7223, + "step": 95550 + }, + { + "epoch": 0.19303724592654242, + "grad_norm": 567.0089111328125, + "learning_rate": 9.74920568821439e-06, + "loss": 17.3021, + "step": 95560 + }, + { + "epoch": 0.19305744655922624, + "grad_norm": 454.8739929199219, + "learning_rate": 9.749096512295124e-06, + "loss": 18.5041, + "step": 95570 + }, + { + "epoch": 0.19307764719191006, + "grad_norm": 406.47967529296875, + "learning_rate": 9.748987313229339e-06, + "loss": 26.3245, + "step": 95580 + }, + { + "epoch": 0.19309784782459385, + "grad_norm": 250.55763244628906, + "learning_rate": 9.748878091017565e-06, + "loss": 17.7021, + "step": 95590 + }, + { + "epoch": 0.19311804845727767, + "grad_norm": 342.03485107421875, + "learning_rate": 9.748768845660335e-06, + "loss": 16.6775, + "step": 95600 + }, + { + "epoch": 0.1931382490899615, + "grad_norm": 299.2231140136719, + "learning_rate": 9.748659577158182e-06, + "loss": 37.2432, + "step": 95610 + }, + { + "epoch": 0.19315844972264531, + "grad_norm": 442.600341796875, + "learning_rate": 9.748550285511637e-06, + "loss": 21.9335, + "step": 95620 + }, + { + "epoch": 0.19317865035532913, + "grad_norm": 274.6854553222656, + "learning_rate": 9.748440970721236e-06, + "loss": 9.4645, + "step": 95630 + }, + { + "epoch": 0.19319885098801295, + "grad_norm": 571.7615356445312, + "learning_rate": 9.74833163278751e-06, + "loss": 14.4993, + "step": 95640 + }, + { + "epoch": 0.19321905162069675, + "grad_norm": 540.5484619140625, + "learning_rate": 9.748222271710988e-06, + "loss": 26.3743, + "step": 95650 + }, + { + "epoch": 0.19323925225338057, + "grad_norm": 285.39373779296875, + "learning_rate": 9.74811288749221e-06, + "loss": 9.7227, + "step": 95660 + }, + { + "epoch": 0.1932594528860644, + "grad_norm": 42.79730987548828, + "learning_rate": 9.748003480131702e-06, + "loss": 32.0558, + "step": 95670 + }, + { + "epoch": 0.1932796535187482, + "grad_norm": 460.7587890625, + "learning_rate": 9.747894049630004e-06, + "loss": 52.6809, + "step": 95680 + }, + { + "epoch": 0.19329985415143203, + "grad_norm": 56.83159255981445, + "learning_rate": 9.747784595987645e-06, + "loss": 41.3574, + "step": 95690 + }, + { + "epoch": 0.19332005478411585, + "grad_norm": 405.26861572265625, + "learning_rate": 9.74767511920516e-06, + "loss": 23.6188, + "step": 95700 + }, + { + "epoch": 0.19334025541679967, + "grad_norm": 215.51358032226562, + "learning_rate": 9.747565619283083e-06, + "loss": 16.9533, + "step": 95710 + }, + { + "epoch": 0.19336045604948346, + "grad_norm": 393.44281005859375, + "learning_rate": 9.747456096221946e-06, + "loss": 43.6118, + "step": 95720 + }, + { + "epoch": 0.19338065668216728, + "grad_norm": 40.6806526184082, + "learning_rate": 9.747346550022282e-06, + "loss": 18.7496, + "step": 95730 + }, + { + "epoch": 0.1934008573148511, + "grad_norm": 110.1174545288086, + "learning_rate": 9.747236980684632e-06, + "loss": 28.4388, + "step": 95740 + }, + { + "epoch": 0.19342105794753492, + "grad_norm": 525.581298828125, + "learning_rate": 9.74712738820952e-06, + "loss": 21.0295, + "step": 95750 + }, + { + "epoch": 0.19344125858021874, + "grad_norm": 476.88739013671875, + "learning_rate": 9.747017772597487e-06, + "loss": 20.7958, + "step": 95760 + }, + { + "epoch": 0.19346145921290256, + "grad_norm": 373.5235900878906, + "learning_rate": 9.746908133849065e-06, + "loss": 23.7376, + "step": 95770 + }, + { + "epoch": 0.19348165984558635, + "grad_norm": 460.3899230957031, + "learning_rate": 9.746798471964787e-06, + "loss": 24.5617, + "step": 95780 + }, + { + "epoch": 0.19350186047827017, + "grad_norm": 30.116121292114258, + "learning_rate": 9.74668878694519e-06, + "loss": 19.1157, + "step": 95790 + }, + { + "epoch": 0.193522061110954, + "grad_norm": 968.26513671875, + "learning_rate": 9.746579078790808e-06, + "loss": 34.6997, + "step": 95800 + }, + { + "epoch": 0.1935422617436378, + "grad_norm": 499.2196960449219, + "learning_rate": 9.746469347502174e-06, + "loss": 40.1381, + "step": 95810 + }, + { + "epoch": 0.19356246237632163, + "grad_norm": 335.9527893066406, + "learning_rate": 9.746359593079825e-06, + "loss": 23.6027, + "step": 95820 + }, + { + "epoch": 0.19358266300900545, + "grad_norm": 130.8729705810547, + "learning_rate": 9.746249815524295e-06, + "loss": 20.722, + "step": 95830 + }, + { + "epoch": 0.19360286364168927, + "grad_norm": 212.45811462402344, + "learning_rate": 9.746140014836118e-06, + "loss": 31.2415, + "step": 95840 + }, + { + "epoch": 0.19362306427437306, + "grad_norm": 1760.1175537109375, + "learning_rate": 9.746030191015831e-06, + "loss": 53.4861, + "step": 95850 + }, + { + "epoch": 0.19364326490705688, + "grad_norm": 475.439697265625, + "learning_rate": 9.745920344063969e-06, + "loss": 22.7386, + "step": 95860 + }, + { + "epoch": 0.1936634655397407, + "grad_norm": 463.5982360839844, + "learning_rate": 9.745810473981067e-06, + "loss": 24.8727, + "step": 95870 + }, + { + "epoch": 0.19368366617242452, + "grad_norm": 113.54488372802734, + "learning_rate": 9.74570058076766e-06, + "loss": 30.2602, + "step": 95880 + }, + { + "epoch": 0.19370386680510834, + "grad_norm": 267.89654541015625, + "learning_rate": 9.745590664424283e-06, + "loss": 17.2591, + "step": 95890 + }, + { + "epoch": 0.19372406743779216, + "grad_norm": 1579.3568115234375, + "learning_rate": 9.745480724951473e-06, + "loss": 42.5363, + "step": 95900 + }, + { + "epoch": 0.19374426807047596, + "grad_norm": 379.80615234375, + "learning_rate": 9.745370762349766e-06, + "loss": 28.0911, + "step": 95910 + }, + { + "epoch": 0.19376446870315978, + "grad_norm": 407.65985107421875, + "learning_rate": 9.745260776619698e-06, + "loss": 23.3073, + "step": 95920 + }, + { + "epoch": 0.1937846693358436, + "grad_norm": 409.89239501953125, + "learning_rate": 9.745150767761805e-06, + "loss": 18.4817, + "step": 95930 + }, + { + "epoch": 0.19380486996852742, + "grad_norm": 40.69590377807617, + "learning_rate": 9.745040735776622e-06, + "loss": 19.7151, + "step": 95940 + }, + { + "epoch": 0.19382507060121124, + "grad_norm": 309.7251281738281, + "learning_rate": 9.744930680664685e-06, + "loss": 30.5486, + "step": 95950 + }, + { + "epoch": 0.19384527123389506, + "grad_norm": 777.8043823242188, + "learning_rate": 9.74482060242653e-06, + "loss": 19.8186, + "step": 95960 + }, + { + "epoch": 0.19386547186657885, + "grad_norm": 407.1781921386719, + "learning_rate": 9.744710501062698e-06, + "loss": 32.2383, + "step": 95970 + }, + { + "epoch": 0.19388567249926267, + "grad_norm": 952.85693359375, + "learning_rate": 9.744600376573721e-06, + "loss": 26.6892, + "step": 95980 + }, + { + "epoch": 0.1939058731319465, + "grad_norm": 416.1173095703125, + "learning_rate": 9.744490228960137e-06, + "loss": 27.1903, + "step": 95990 + }, + { + "epoch": 0.1939260737646303, + "grad_norm": 254.97972106933594, + "learning_rate": 9.744380058222483e-06, + "loss": 27.8831, + "step": 96000 + }, + { + "epoch": 0.19394627439731413, + "grad_norm": 429.0074768066406, + "learning_rate": 9.744269864361298e-06, + "loss": 14.0351, + "step": 96010 + }, + { + "epoch": 0.19396647502999795, + "grad_norm": 221.5646209716797, + "learning_rate": 9.744159647377114e-06, + "loss": 17.8883, + "step": 96020 + }, + { + "epoch": 0.19398667566268177, + "grad_norm": 503.8222351074219, + "learning_rate": 9.744049407270472e-06, + "loss": 19.1898, + "step": 96030 + }, + { + "epoch": 0.19400687629536556, + "grad_norm": 324.0685729980469, + "learning_rate": 9.74393914404191e-06, + "loss": 23.3817, + "step": 96040 + }, + { + "epoch": 0.19402707692804938, + "grad_norm": 36.85819625854492, + "learning_rate": 9.743828857691964e-06, + "loss": 30.638, + "step": 96050 + }, + { + "epoch": 0.1940472775607332, + "grad_norm": 365.65386962890625, + "learning_rate": 9.74371854822117e-06, + "loss": 40.5744, + "step": 96060 + }, + { + "epoch": 0.19406747819341702, + "grad_norm": 223.74566650390625, + "learning_rate": 9.74360821563007e-06, + "loss": 20.3652, + "step": 96070 + }, + { + "epoch": 0.19408767882610084, + "grad_norm": 382.7676086425781, + "learning_rate": 9.743497859919196e-06, + "loss": 29.6577, + "step": 96080 + }, + { + "epoch": 0.19410787945878466, + "grad_norm": 482.62103271484375, + "learning_rate": 9.743387481089091e-06, + "loss": 24.2073, + "step": 96090 + }, + { + "epoch": 0.19412808009146845, + "grad_norm": 332.37127685546875, + "learning_rate": 9.743277079140288e-06, + "loss": 12.8352, + "step": 96100 + }, + { + "epoch": 0.19414828072415227, + "grad_norm": 868.508056640625, + "learning_rate": 9.74316665407333e-06, + "loss": 31.6455, + "step": 96110 + }, + { + "epoch": 0.1941684813568361, + "grad_norm": 450.9184265136719, + "learning_rate": 9.743056205888752e-06, + "loss": 24.8632, + "step": 96120 + }, + { + "epoch": 0.1941886819895199, + "grad_norm": 588.66650390625, + "learning_rate": 9.742945734587093e-06, + "loss": 25.9008, + "step": 96130 + }, + { + "epoch": 0.19420888262220373, + "grad_norm": 377.9995422363281, + "learning_rate": 9.742835240168893e-06, + "loss": 23.8725, + "step": 96140 + }, + { + "epoch": 0.19422908325488755, + "grad_norm": 280.8848571777344, + "learning_rate": 9.742724722634688e-06, + "loss": 38.8778, + "step": 96150 + }, + { + "epoch": 0.19424928388757137, + "grad_norm": 392.6344909667969, + "learning_rate": 9.742614181985019e-06, + "loss": 22.6172, + "step": 96160 + }, + { + "epoch": 0.19426948452025516, + "grad_norm": 363.85845947265625, + "learning_rate": 9.742503618220422e-06, + "loss": 30.3061, + "step": 96170 + }, + { + "epoch": 0.19428968515293898, + "grad_norm": 328.5239562988281, + "learning_rate": 9.74239303134144e-06, + "loss": 18.4416, + "step": 96180 + }, + { + "epoch": 0.1943098857856228, + "grad_norm": 277.4551696777344, + "learning_rate": 9.742282421348607e-06, + "loss": 20.9246, + "step": 96190 + }, + { + "epoch": 0.19433008641830662, + "grad_norm": 283.2068176269531, + "learning_rate": 9.742171788242468e-06, + "loss": 40.5074, + "step": 96200 + }, + { + "epoch": 0.19435028705099044, + "grad_norm": 925.0282592773438, + "learning_rate": 9.742061132023555e-06, + "loss": 17.0826, + "step": 96210 + }, + { + "epoch": 0.19437048768367426, + "grad_norm": 1141.78173828125, + "learning_rate": 9.741950452692414e-06, + "loss": 20.104, + "step": 96220 + }, + { + "epoch": 0.19439068831635806, + "grad_norm": 378.7456359863281, + "learning_rate": 9.741839750249579e-06, + "loss": 15.3081, + "step": 96230 + }, + { + "epoch": 0.19441088894904188, + "grad_norm": 860.4197387695312, + "learning_rate": 9.741729024695594e-06, + "loss": 26.3532, + "step": 96240 + }, + { + "epoch": 0.1944310895817257, + "grad_norm": 415.7444152832031, + "learning_rate": 9.741618276030998e-06, + "loss": 22.4038, + "step": 96250 + }, + { + "epoch": 0.19445129021440952, + "grad_norm": 434.64569091796875, + "learning_rate": 9.741507504256327e-06, + "loss": 18.588, + "step": 96260 + }, + { + "epoch": 0.19447149084709334, + "grad_norm": 231.0457305908203, + "learning_rate": 9.741396709372126e-06, + "loss": 29.817, + "step": 96270 + }, + { + "epoch": 0.19449169147977716, + "grad_norm": 269.8516540527344, + "learning_rate": 9.74128589137893e-06, + "loss": 24.8359, + "step": 96280 + }, + { + "epoch": 0.19451189211246095, + "grad_norm": 492.55133056640625, + "learning_rate": 9.741175050277283e-06, + "loss": 15.1692, + "step": 96290 + }, + { + "epoch": 0.19453209274514477, + "grad_norm": 410.5207824707031, + "learning_rate": 9.741064186067723e-06, + "loss": 19.2975, + "step": 96300 + }, + { + "epoch": 0.1945522933778286, + "grad_norm": 1291.794921875, + "learning_rate": 9.740953298750792e-06, + "loss": 44.1921, + "step": 96310 + }, + { + "epoch": 0.1945724940105124, + "grad_norm": 336.7740173339844, + "learning_rate": 9.74084238832703e-06, + "loss": 35.6793, + "step": 96320 + }, + { + "epoch": 0.19459269464319623, + "grad_norm": 725.1658325195312, + "learning_rate": 9.740731454796976e-06, + "loss": 42.7733, + "step": 96330 + }, + { + "epoch": 0.19461289527588005, + "grad_norm": 732.6233520507812, + "learning_rate": 9.740620498161173e-06, + "loss": 25.4401, + "step": 96340 + }, + { + "epoch": 0.19463309590856387, + "grad_norm": 445.3772277832031, + "learning_rate": 9.74050951842016e-06, + "loss": 16.6677, + "step": 96350 + }, + { + "epoch": 0.19465329654124766, + "grad_norm": 463.70745849609375, + "learning_rate": 9.74039851557448e-06, + "loss": 19.5565, + "step": 96360 + }, + { + "epoch": 0.19467349717393148, + "grad_norm": 484.64068603515625, + "learning_rate": 9.740287489624671e-06, + "loss": 28.0441, + "step": 96370 + }, + { + "epoch": 0.1946936978066153, + "grad_norm": 24.487995147705078, + "learning_rate": 9.740176440571277e-06, + "loss": 18.9507, + "step": 96380 + }, + { + "epoch": 0.19471389843929912, + "grad_norm": 11.86369800567627, + "learning_rate": 9.740065368414837e-06, + "loss": 15.8156, + "step": 96390 + }, + { + "epoch": 0.19473409907198294, + "grad_norm": 582.7681274414062, + "learning_rate": 9.739954273155892e-06, + "loss": 22.7781, + "step": 96400 + }, + { + "epoch": 0.19475429970466676, + "grad_norm": 271.8302917480469, + "learning_rate": 9.739843154794985e-06, + "loss": 17.154, + "step": 96410 + }, + { + "epoch": 0.19477450033735055, + "grad_norm": 1138.83544921875, + "learning_rate": 9.73973201333266e-06, + "loss": 32.6356, + "step": 96420 + }, + { + "epoch": 0.19479470097003437, + "grad_norm": 168.5606689453125, + "learning_rate": 9.739620848769455e-06, + "loss": 22.8429, + "step": 96430 + }, + { + "epoch": 0.1948149016027182, + "grad_norm": 0.0, + "learning_rate": 9.739509661105912e-06, + "loss": 24.7045, + "step": 96440 + }, + { + "epoch": 0.194835102235402, + "grad_norm": 365.2082824707031, + "learning_rate": 9.739398450342573e-06, + "loss": 41.4875, + "step": 96450 + }, + { + "epoch": 0.19485530286808583, + "grad_norm": 128.98590087890625, + "learning_rate": 9.739287216479983e-06, + "loss": 13.2018, + "step": 96460 + }, + { + "epoch": 0.19487550350076965, + "grad_norm": 512.8473510742188, + "learning_rate": 9.73917595951868e-06, + "loss": 31.5895, + "step": 96470 + }, + { + "epoch": 0.19489570413345347, + "grad_norm": 239.5601043701172, + "learning_rate": 9.73906467945921e-06, + "loss": 13.3083, + "step": 96480 + }, + { + "epoch": 0.19491590476613727, + "grad_norm": 397.5332336425781, + "learning_rate": 9.738953376302111e-06, + "loss": 29.3499, + "step": 96490 + }, + { + "epoch": 0.19493610539882109, + "grad_norm": 246.18441772460938, + "learning_rate": 9.73884205004793e-06, + "loss": 25.9016, + "step": 96500 + }, + { + "epoch": 0.1949563060315049, + "grad_norm": 417.99932861328125, + "learning_rate": 9.738730700697207e-06, + "loss": 9.6361, + "step": 96510 + }, + { + "epoch": 0.19497650666418873, + "grad_norm": 947.2726440429688, + "learning_rate": 9.738619328250485e-06, + "loss": 25.0256, + "step": 96520 + }, + { + "epoch": 0.19499670729687255, + "grad_norm": 952.1075439453125, + "learning_rate": 9.738507932708308e-06, + "loss": 12.2096, + "step": 96530 + }, + { + "epoch": 0.19501690792955637, + "grad_norm": 175.35069274902344, + "learning_rate": 9.738396514071216e-06, + "loss": 13.2193, + "step": 96540 + }, + { + "epoch": 0.19503710856224016, + "grad_norm": 633.6536865234375, + "learning_rate": 9.738285072339756e-06, + "loss": 15.0969, + "step": 96550 + }, + { + "epoch": 0.19505730919492398, + "grad_norm": 682.6355590820312, + "learning_rate": 9.73817360751447e-06, + "loss": 27.0863, + "step": 96560 + }, + { + "epoch": 0.1950775098276078, + "grad_norm": 637.1114501953125, + "learning_rate": 9.738062119595897e-06, + "loss": 29.1412, + "step": 96570 + }, + { + "epoch": 0.19509771046029162, + "grad_norm": 172.63792419433594, + "learning_rate": 9.737950608584588e-06, + "loss": 19.8485, + "step": 96580 + }, + { + "epoch": 0.19511791109297544, + "grad_norm": 390.24969482421875, + "learning_rate": 9.73783907448108e-06, + "loss": 20.4416, + "step": 96590 + }, + { + "epoch": 0.19513811172565926, + "grad_norm": 390.1633605957031, + "learning_rate": 9.73772751728592e-06, + "loss": 26.4586, + "step": 96600 + }, + { + "epoch": 0.19515831235834305, + "grad_norm": 128.56723022460938, + "learning_rate": 9.73761593699965e-06, + "loss": 18.1316, + "step": 96610 + }, + { + "epoch": 0.19517851299102687, + "grad_norm": 603.45166015625, + "learning_rate": 9.737504333622814e-06, + "loss": 35.423, + "step": 96620 + }, + { + "epoch": 0.1951987136237107, + "grad_norm": 2.215932607650757, + "learning_rate": 9.737392707155957e-06, + "loss": 28.0613, + "step": 96630 + }, + { + "epoch": 0.1952189142563945, + "grad_norm": 342.9813537597656, + "learning_rate": 9.737281057599623e-06, + "loss": 37.2923, + "step": 96640 + }, + { + "epoch": 0.19523911488907833, + "grad_norm": 309.3840637207031, + "learning_rate": 9.737169384954356e-06, + "loss": 19.6012, + "step": 96650 + }, + { + "epoch": 0.19525931552176215, + "grad_norm": 247.97494506835938, + "learning_rate": 9.7370576892207e-06, + "loss": 18.5051, + "step": 96660 + }, + { + "epoch": 0.19527951615444597, + "grad_norm": 644.9800415039062, + "learning_rate": 9.736945970399198e-06, + "loss": 16.9285, + "step": 96670 + }, + { + "epoch": 0.19529971678712976, + "grad_norm": 696.183349609375, + "learning_rate": 9.736834228490398e-06, + "loss": 21.4221, + "step": 96680 + }, + { + "epoch": 0.19531991741981358, + "grad_norm": 339.5610046386719, + "learning_rate": 9.73672246349484e-06, + "loss": 28.4233, + "step": 96690 + }, + { + "epoch": 0.1953401180524974, + "grad_norm": 488.5916748046875, + "learning_rate": 9.736610675413073e-06, + "loss": 44.2945, + "step": 96700 + }, + { + "epoch": 0.19536031868518122, + "grad_norm": 341.2404479980469, + "learning_rate": 9.736498864245638e-06, + "loss": 29.4299, + "step": 96710 + }, + { + "epoch": 0.19538051931786504, + "grad_norm": 580.4157104492188, + "learning_rate": 9.736387029993084e-06, + "loss": 12.2115, + "step": 96720 + }, + { + "epoch": 0.19540071995054886, + "grad_norm": 527.8916625976562, + "learning_rate": 9.736275172655954e-06, + "loss": 29.7147, + "step": 96730 + }, + { + "epoch": 0.19542092058323265, + "grad_norm": 625.5405883789062, + "learning_rate": 9.736163292234792e-06, + "loss": 17.8191, + "step": 96740 + }, + { + "epoch": 0.19544112121591647, + "grad_norm": 1239.732421875, + "learning_rate": 9.736051388730146e-06, + "loss": 37.3495, + "step": 96750 + }, + { + "epoch": 0.1954613218486003, + "grad_norm": 236.486572265625, + "learning_rate": 9.735939462142558e-06, + "loss": 22.7968, + "step": 96760 + }, + { + "epoch": 0.19548152248128411, + "grad_norm": 587.5750732421875, + "learning_rate": 9.735827512472576e-06, + "loss": 32.5167, + "step": 96770 + }, + { + "epoch": 0.19550172311396793, + "grad_norm": 643.1697998046875, + "learning_rate": 9.735715539720747e-06, + "loss": 24.2916, + "step": 96780 + }, + { + "epoch": 0.19552192374665175, + "grad_norm": 213.8444366455078, + "learning_rate": 9.735603543887613e-06, + "loss": 16.9851, + "step": 96790 + }, + { + "epoch": 0.19554212437933557, + "grad_norm": 202.58883666992188, + "learning_rate": 9.735491524973723e-06, + "loss": 26.4842, + "step": 96800 + }, + { + "epoch": 0.19556232501201937, + "grad_norm": 362.2386474609375, + "learning_rate": 9.73537948297962e-06, + "loss": 20.7448, + "step": 96810 + }, + { + "epoch": 0.1955825256447032, + "grad_norm": 477.868408203125, + "learning_rate": 9.735267417905852e-06, + "loss": 28.062, + "step": 96820 + }, + { + "epoch": 0.195602726277387, + "grad_norm": 176.94195556640625, + "learning_rate": 9.735155329752965e-06, + "loss": 22.0477, + "step": 96830 + }, + { + "epoch": 0.19562292691007083, + "grad_norm": 415.7734069824219, + "learning_rate": 9.735043218521507e-06, + "loss": 21.3873, + "step": 96840 + }, + { + "epoch": 0.19564312754275465, + "grad_norm": 303.6124572753906, + "learning_rate": 9.734931084212021e-06, + "loss": 22.8141, + "step": 96850 + }, + { + "epoch": 0.19566332817543847, + "grad_norm": 612.8478393554688, + "learning_rate": 9.734818926825056e-06, + "loss": 15.9648, + "step": 96860 + }, + { + "epoch": 0.19568352880812226, + "grad_norm": 190.95999145507812, + "learning_rate": 9.734706746361157e-06, + "loss": 23.0034, + "step": 96870 + }, + { + "epoch": 0.19570372944080608, + "grad_norm": 344.77801513671875, + "learning_rate": 9.734594542820871e-06, + "loss": 17.1716, + "step": 96880 + }, + { + "epoch": 0.1957239300734899, + "grad_norm": 399.13232421875, + "learning_rate": 9.734482316204747e-06, + "loss": 21.2854, + "step": 96890 + }, + { + "epoch": 0.19574413070617372, + "grad_norm": 147.22430419921875, + "learning_rate": 9.73437006651333e-06, + "loss": 19.4687, + "step": 96900 + }, + { + "epoch": 0.19576433133885754, + "grad_norm": 35.441829681396484, + "learning_rate": 9.734257793747168e-06, + "loss": 27.392, + "step": 96910 + }, + { + "epoch": 0.19578453197154136, + "grad_norm": 1310.0384521484375, + "learning_rate": 9.734145497906807e-06, + "loss": 34.6572, + "step": 96920 + }, + { + "epoch": 0.19580473260422515, + "grad_norm": 201.7588348388672, + "learning_rate": 9.734033178992794e-06, + "loss": 38.5917, + "step": 96930 + }, + { + "epoch": 0.19582493323690897, + "grad_norm": 1403.2457275390625, + "learning_rate": 9.73392083700568e-06, + "loss": 28.0674, + "step": 96940 + }, + { + "epoch": 0.1958451338695928, + "grad_norm": 224.3494873046875, + "learning_rate": 9.73380847194601e-06, + "loss": 35.1673, + "step": 96950 + }, + { + "epoch": 0.1958653345022766, + "grad_norm": 416.7572021484375, + "learning_rate": 9.733696083814327e-06, + "loss": 21.4552, + "step": 96960 + }, + { + "epoch": 0.19588553513496043, + "grad_norm": 394.698486328125, + "learning_rate": 9.733583672611189e-06, + "loss": 25.4069, + "step": 96970 + }, + { + "epoch": 0.19590573576764425, + "grad_norm": 1013.8325805664062, + "learning_rate": 9.733471238337136e-06, + "loss": 27.7805, + "step": 96980 + }, + { + "epoch": 0.19592593640032807, + "grad_norm": 440.7705993652344, + "learning_rate": 9.733358780992717e-06, + "loss": 33.2662, + "step": 96990 + }, + { + "epoch": 0.19594613703301186, + "grad_norm": 307.0263977050781, + "learning_rate": 9.733246300578482e-06, + "loss": 24.355, + "step": 97000 + }, + { + "epoch": 0.19596633766569568, + "grad_norm": 583.3680419921875, + "learning_rate": 9.73313379709498e-06, + "loss": 32.2298, + "step": 97010 + }, + { + "epoch": 0.1959865382983795, + "grad_norm": 1420.5535888671875, + "learning_rate": 9.733021270542758e-06, + "loss": 28.7911, + "step": 97020 + }, + { + "epoch": 0.19600673893106332, + "grad_norm": 624.2348022460938, + "learning_rate": 9.732908720922362e-06, + "loss": 30.9689, + "step": 97030 + }, + { + "epoch": 0.19602693956374714, + "grad_norm": 129.958740234375, + "learning_rate": 9.732796148234345e-06, + "loss": 12.3031, + "step": 97040 + }, + { + "epoch": 0.19604714019643096, + "grad_norm": 145.7042694091797, + "learning_rate": 9.732683552479253e-06, + "loss": 17.4793, + "step": 97050 + }, + { + "epoch": 0.19606734082911476, + "grad_norm": 711.9461669921875, + "learning_rate": 9.732570933657635e-06, + "loss": 16.8873, + "step": 97060 + }, + { + "epoch": 0.19608754146179858, + "grad_norm": 85.0526123046875, + "learning_rate": 9.73245829177004e-06, + "loss": 16.1001, + "step": 97070 + }, + { + "epoch": 0.1961077420944824, + "grad_norm": 536.3442993164062, + "learning_rate": 9.732345626817018e-06, + "loss": 32.5822, + "step": 97080 + }, + { + "epoch": 0.19612794272716622, + "grad_norm": 243.9501190185547, + "learning_rate": 9.732232938799118e-06, + "loss": 25.1747, + "step": 97090 + }, + { + "epoch": 0.19614814335985004, + "grad_norm": 345.52655029296875, + "learning_rate": 9.732120227716887e-06, + "loss": 32.5119, + "step": 97100 + }, + { + "epoch": 0.19616834399253386, + "grad_norm": 300.8923645019531, + "learning_rate": 9.732007493570877e-06, + "loss": 29.92, + "step": 97110 + }, + { + "epoch": 0.19618854462521768, + "grad_norm": 419.56195068359375, + "learning_rate": 9.731894736361636e-06, + "loss": 20.8166, + "step": 97120 + }, + { + "epoch": 0.19620874525790147, + "grad_norm": 330.9029235839844, + "learning_rate": 9.731781956089713e-06, + "loss": 19.3806, + "step": 97130 + }, + { + "epoch": 0.1962289458905853, + "grad_norm": 264.5019836425781, + "learning_rate": 9.731669152755662e-06, + "loss": 10.3985, + "step": 97140 + }, + { + "epoch": 0.1962491465232691, + "grad_norm": 609.105712890625, + "learning_rate": 9.731556326360027e-06, + "loss": 33.6437, + "step": 97150 + }, + { + "epoch": 0.19626934715595293, + "grad_norm": 287.99896240234375, + "learning_rate": 9.73144347690336e-06, + "loss": 35.8379, + "step": 97160 + }, + { + "epoch": 0.19628954778863675, + "grad_norm": 981.0174560546875, + "learning_rate": 9.731330604386214e-06, + "loss": 23.1507, + "step": 97170 + }, + { + "epoch": 0.19630974842132057, + "grad_norm": 1168.6593017578125, + "learning_rate": 9.731217708809133e-06, + "loss": 56.7481, + "step": 97180 + }, + { + "epoch": 0.19632994905400436, + "grad_norm": 288.0087890625, + "learning_rate": 9.731104790172672e-06, + "loss": 24.3511, + "step": 97190 + }, + { + "epoch": 0.19635014968668818, + "grad_norm": 321.56048583984375, + "learning_rate": 9.73099184847738e-06, + "loss": 21.5144, + "step": 97200 + }, + { + "epoch": 0.196370350319372, + "grad_norm": 570.0701904296875, + "learning_rate": 9.73087888372381e-06, + "loss": 35.149, + "step": 97210 + }, + { + "epoch": 0.19639055095205582, + "grad_norm": 635.6535034179688, + "learning_rate": 9.730765895912506e-06, + "loss": 24.0599, + "step": 97220 + }, + { + "epoch": 0.19641075158473964, + "grad_norm": 212.3668670654297, + "learning_rate": 9.730652885044025e-06, + "loss": 10.3039, + "step": 97230 + }, + { + "epoch": 0.19643095221742346, + "grad_norm": 515.6719970703125, + "learning_rate": 9.730539851118913e-06, + "loss": 33.4415, + "step": 97240 + }, + { + "epoch": 0.19645115285010725, + "grad_norm": 306.0253601074219, + "learning_rate": 9.730426794137727e-06, + "loss": 37.0226, + "step": 97250 + }, + { + "epoch": 0.19647135348279107, + "grad_norm": 149.7908172607422, + "learning_rate": 9.730313714101014e-06, + "loss": 22.73, + "step": 97260 + }, + { + "epoch": 0.1964915541154749, + "grad_norm": 458.1486511230469, + "learning_rate": 9.730200611009324e-06, + "loss": 26.1247, + "step": 97270 + }, + { + "epoch": 0.1965117547481587, + "grad_norm": 208.75621032714844, + "learning_rate": 9.73008748486321e-06, + "loss": 24.2153, + "step": 97280 + }, + { + "epoch": 0.19653195538084253, + "grad_norm": 245.51007080078125, + "learning_rate": 9.729974335663227e-06, + "loss": 18.0914, + "step": 97290 + }, + { + "epoch": 0.19655215601352635, + "grad_norm": 275.8787841796875, + "learning_rate": 9.72986116340992e-06, + "loss": 27.147, + "step": 97300 + }, + { + "epoch": 0.19657235664621017, + "grad_norm": 427.34661865234375, + "learning_rate": 9.729747968103842e-06, + "loss": 36.523, + "step": 97310 + }, + { + "epoch": 0.19659255727889396, + "grad_norm": 842.1649169921875, + "learning_rate": 9.729634749745547e-06, + "loss": 28.4887, + "step": 97320 + }, + { + "epoch": 0.19661275791157778, + "grad_norm": 261.8681640625, + "learning_rate": 9.729521508335586e-06, + "loss": 10.9931, + "step": 97330 + }, + { + "epoch": 0.1966329585442616, + "grad_norm": 317.2580871582031, + "learning_rate": 9.729408243874511e-06, + "loss": 18.809, + "step": 97340 + }, + { + "epoch": 0.19665315917694542, + "grad_norm": 347.6241760253906, + "learning_rate": 9.729294956362873e-06, + "loss": 13.2427, + "step": 97350 + }, + { + "epoch": 0.19667335980962924, + "grad_norm": 109.27238464355469, + "learning_rate": 9.729181645801227e-06, + "loss": 15.1256, + "step": 97360 + }, + { + "epoch": 0.19669356044231306, + "grad_norm": 319.5441589355469, + "learning_rate": 9.729068312190122e-06, + "loss": 10.9112, + "step": 97370 + }, + { + "epoch": 0.19671376107499686, + "grad_norm": 296.64691162109375, + "learning_rate": 9.728954955530113e-06, + "loss": 17.7924, + "step": 97380 + }, + { + "epoch": 0.19673396170768068, + "grad_norm": 476.6488037109375, + "learning_rate": 9.728841575821749e-06, + "loss": 17.7629, + "step": 97390 + }, + { + "epoch": 0.1967541623403645, + "grad_norm": 240.47398376464844, + "learning_rate": 9.728728173065584e-06, + "loss": 14.8832, + "step": 97400 + }, + { + "epoch": 0.19677436297304832, + "grad_norm": 1217.341552734375, + "learning_rate": 9.728614747262173e-06, + "loss": 43.1087, + "step": 97410 + }, + { + "epoch": 0.19679456360573214, + "grad_norm": 737.4136962890625, + "learning_rate": 9.728501298412067e-06, + "loss": 25.4208, + "step": 97420 + }, + { + "epoch": 0.19681476423841596, + "grad_norm": 375.95751953125, + "learning_rate": 9.72838782651582e-06, + "loss": 21.8538, + "step": 97430 + }, + { + "epoch": 0.19683496487109975, + "grad_norm": 702.6217041015625, + "learning_rate": 9.728274331573983e-06, + "loss": 23.9543, + "step": 97440 + }, + { + "epoch": 0.19685516550378357, + "grad_norm": 699.86572265625, + "learning_rate": 9.728160813587111e-06, + "loss": 24.3078, + "step": 97450 + }, + { + "epoch": 0.1968753661364674, + "grad_norm": 314.34027099609375, + "learning_rate": 9.728047272555756e-06, + "loss": 11.6271, + "step": 97460 + }, + { + "epoch": 0.1968955667691512, + "grad_norm": 1008.5680541992188, + "learning_rate": 9.727933708480474e-06, + "loss": 39.3888, + "step": 97470 + }, + { + "epoch": 0.19691576740183503, + "grad_norm": 335.6155090332031, + "learning_rate": 9.727820121361815e-06, + "loss": 21.5971, + "step": 97480 + }, + { + "epoch": 0.19693596803451885, + "grad_norm": 174.36721801757812, + "learning_rate": 9.727706511200335e-06, + "loss": 18.7552, + "step": 97490 + }, + { + "epoch": 0.19695616866720267, + "grad_norm": 650.0670166015625, + "learning_rate": 9.727592877996585e-06, + "loss": 37.8017, + "step": 97500 + }, + { + "epoch": 0.19697636929988646, + "grad_norm": 397.67364501953125, + "learning_rate": 9.727479221751122e-06, + "loss": 17.679, + "step": 97510 + }, + { + "epoch": 0.19699656993257028, + "grad_norm": 348.5740661621094, + "learning_rate": 9.727365542464498e-06, + "loss": 52.2058, + "step": 97520 + }, + { + "epoch": 0.1970167705652541, + "grad_norm": 512.7111206054688, + "learning_rate": 9.727251840137269e-06, + "loss": 39.8835, + "step": 97530 + }, + { + "epoch": 0.19703697119793792, + "grad_norm": 163.2144012451172, + "learning_rate": 9.727138114769986e-06, + "loss": 16.9144, + "step": 97540 + }, + { + "epoch": 0.19705717183062174, + "grad_norm": 581.782470703125, + "learning_rate": 9.727024366363208e-06, + "loss": 27.117, + "step": 97550 + }, + { + "epoch": 0.19707737246330556, + "grad_norm": 631.5810546875, + "learning_rate": 9.726910594917482e-06, + "loss": 19.6229, + "step": 97560 + }, + { + "epoch": 0.19709757309598935, + "grad_norm": 462.9996643066406, + "learning_rate": 9.726796800433371e-06, + "loss": 24.7656, + "step": 97570 + }, + { + "epoch": 0.19711777372867317, + "grad_norm": 326.8007507324219, + "learning_rate": 9.726682982911423e-06, + "loss": 43.7528, + "step": 97580 + }, + { + "epoch": 0.197137974361357, + "grad_norm": 467.9544982910156, + "learning_rate": 9.726569142352197e-06, + "loss": 23.2517, + "step": 97590 + }, + { + "epoch": 0.1971581749940408, + "grad_norm": 318.44317626953125, + "learning_rate": 9.726455278756249e-06, + "loss": 48.4266, + "step": 97600 + }, + { + "epoch": 0.19717837562672463, + "grad_norm": 432.2263488769531, + "learning_rate": 9.726341392124127e-06, + "loss": 13.2294, + "step": 97610 + }, + { + "epoch": 0.19719857625940845, + "grad_norm": 417.3358459472656, + "learning_rate": 9.726227482456391e-06, + "loss": 19.9933, + "step": 97620 + }, + { + "epoch": 0.19721877689209227, + "grad_norm": 764.5451049804688, + "learning_rate": 9.726113549753597e-06, + "loss": 32.1456, + "step": 97630 + }, + { + "epoch": 0.19723897752477607, + "grad_norm": 241.2354736328125, + "learning_rate": 9.725999594016298e-06, + "loss": 23.6431, + "step": 97640 + }, + { + "epoch": 0.19725917815745989, + "grad_norm": 2175.685302734375, + "learning_rate": 9.72588561524505e-06, + "loss": 42.8542, + "step": 97650 + }, + { + "epoch": 0.1972793787901437, + "grad_norm": 268.6827697753906, + "learning_rate": 9.725771613440408e-06, + "loss": 28.908, + "step": 97660 + }, + { + "epoch": 0.19729957942282753, + "grad_norm": 387.532470703125, + "learning_rate": 9.72565758860293e-06, + "loss": 16.1734, + "step": 97670 + }, + { + "epoch": 0.19731978005551135, + "grad_norm": 1250.755126953125, + "learning_rate": 9.725543540733168e-06, + "loss": 40.3066, + "step": 97680 + }, + { + "epoch": 0.19733998068819517, + "grad_norm": 258.7558288574219, + "learning_rate": 9.725429469831682e-06, + "loss": 18.6515, + "step": 97690 + }, + { + "epoch": 0.19736018132087896, + "grad_norm": 299.046142578125, + "learning_rate": 9.725315375899025e-06, + "loss": 17.033, + "step": 97700 + }, + { + "epoch": 0.19738038195356278, + "grad_norm": 594.5548095703125, + "learning_rate": 9.725201258935755e-06, + "loss": 12.3045, + "step": 97710 + }, + { + "epoch": 0.1974005825862466, + "grad_norm": 630.0167236328125, + "learning_rate": 9.725087118942425e-06, + "loss": 16.1351, + "step": 97720 + }, + { + "epoch": 0.19742078321893042, + "grad_norm": 6.42670202255249, + "learning_rate": 9.724972955919594e-06, + "loss": 16.2627, + "step": 97730 + }, + { + "epoch": 0.19744098385161424, + "grad_norm": 352.1437683105469, + "learning_rate": 9.72485876986782e-06, + "loss": 17.0874, + "step": 97740 + }, + { + "epoch": 0.19746118448429806, + "grad_norm": 789.1223754882812, + "learning_rate": 9.724744560787655e-06, + "loss": 28.869, + "step": 97750 + }, + { + "epoch": 0.19748138511698185, + "grad_norm": 324.92718505859375, + "learning_rate": 9.724630328679658e-06, + "loss": 23.7328, + "step": 97760 + }, + { + "epoch": 0.19750158574966567, + "grad_norm": 484.04547119140625, + "learning_rate": 9.724516073544388e-06, + "loss": 22.9338, + "step": 97770 + }, + { + "epoch": 0.1975217863823495, + "grad_norm": 312.01007080078125, + "learning_rate": 9.724401795382398e-06, + "loss": 13.2022, + "step": 97780 + }, + { + "epoch": 0.1975419870150333, + "grad_norm": 243.39907836914062, + "learning_rate": 9.724287494194247e-06, + "loss": 17.0577, + "step": 97790 + }, + { + "epoch": 0.19756218764771713, + "grad_norm": 160.3676300048828, + "learning_rate": 9.724173169980492e-06, + "loss": 29.0538, + "step": 97800 + }, + { + "epoch": 0.19758238828040095, + "grad_norm": 116.18144226074219, + "learning_rate": 9.72405882274169e-06, + "loss": 16.0328, + "step": 97810 + }, + { + "epoch": 0.19760258891308477, + "grad_norm": 112.05309295654297, + "learning_rate": 9.723944452478398e-06, + "loss": 19.876, + "step": 97820 + }, + { + "epoch": 0.19762278954576856, + "grad_norm": 861.3955688476562, + "learning_rate": 9.723830059191173e-06, + "loss": 19.4862, + "step": 97830 + }, + { + "epoch": 0.19764299017845238, + "grad_norm": 411.5660705566406, + "learning_rate": 9.723715642880574e-06, + "loss": 22.4642, + "step": 97840 + }, + { + "epoch": 0.1976631908111362, + "grad_norm": 153.12356567382812, + "learning_rate": 9.723601203547158e-06, + "loss": 18.7759, + "step": 97850 + }, + { + "epoch": 0.19768339144382002, + "grad_norm": 228.43202209472656, + "learning_rate": 9.723486741191482e-06, + "loss": 23.4711, + "step": 97860 + }, + { + "epoch": 0.19770359207650384, + "grad_norm": 827.0603637695312, + "learning_rate": 9.723372255814105e-06, + "loss": 31.2898, + "step": 97870 + }, + { + "epoch": 0.19772379270918766, + "grad_norm": 681.8280029296875, + "learning_rate": 9.723257747415584e-06, + "loss": 26.4491, + "step": 97880 + }, + { + "epoch": 0.19774399334187145, + "grad_norm": 408.0483703613281, + "learning_rate": 9.723143215996479e-06, + "loss": 33.7071, + "step": 97890 + }, + { + "epoch": 0.19776419397455527, + "grad_norm": 165.12281799316406, + "learning_rate": 9.723028661557345e-06, + "loss": 16.8155, + "step": 97900 + }, + { + "epoch": 0.1977843946072391, + "grad_norm": 450.7340087890625, + "learning_rate": 9.722914084098745e-06, + "loss": 40.1197, + "step": 97910 + }, + { + "epoch": 0.19780459523992291, + "grad_norm": 111.25831604003906, + "learning_rate": 9.722799483621232e-06, + "loss": 15.7333, + "step": 97920 + }, + { + "epoch": 0.19782479587260673, + "grad_norm": 479.22613525390625, + "learning_rate": 9.722684860125367e-06, + "loss": 13.8798, + "step": 97930 + }, + { + "epoch": 0.19784499650529055, + "grad_norm": 267.47003173828125, + "learning_rate": 9.72257021361171e-06, + "loss": 20.3385, + "step": 97940 + }, + { + "epoch": 0.19786519713797437, + "grad_norm": 33.6751823425293, + "learning_rate": 9.722455544080818e-06, + "loss": 19.3102, + "step": 97950 + }, + { + "epoch": 0.19788539777065817, + "grad_norm": 6.024238586425781, + "learning_rate": 9.72234085153325e-06, + "loss": 23.8579, + "step": 97960 + }, + { + "epoch": 0.197905598403342, + "grad_norm": 360.9849853515625, + "learning_rate": 9.722226135969565e-06, + "loss": 34.6989, + "step": 97970 + }, + { + "epoch": 0.1979257990360258, + "grad_norm": 739.1018676757812, + "learning_rate": 9.722111397390325e-06, + "loss": 24.8314, + "step": 97980 + }, + { + "epoch": 0.19794599966870963, + "grad_norm": 1417.043212890625, + "learning_rate": 9.721996635796085e-06, + "loss": 28.6388, + "step": 97990 + }, + { + "epoch": 0.19796620030139345, + "grad_norm": 329.4474792480469, + "learning_rate": 9.721881851187406e-06, + "loss": 38.0278, + "step": 98000 + }, + { + "epoch": 0.19798640093407727, + "grad_norm": 484.0167236328125, + "learning_rate": 9.721767043564848e-06, + "loss": 29.8133, + "step": 98010 + }, + { + "epoch": 0.19800660156676106, + "grad_norm": 521.02880859375, + "learning_rate": 9.72165221292897e-06, + "loss": 26.0634, + "step": 98020 + }, + { + "epoch": 0.19802680219944488, + "grad_norm": 632.4464721679688, + "learning_rate": 9.721537359280332e-06, + "loss": 21.9816, + "step": 98030 + }, + { + "epoch": 0.1980470028321287, + "grad_norm": 591.2565307617188, + "learning_rate": 9.721422482619493e-06, + "loss": 37.4917, + "step": 98040 + }, + { + "epoch": 0.19806720346481252, + "grad_norm": 677.9862670898438, + "learning_rate": 9.721307582947014e-06, + "loss": 23.1645, + "step": 98050 + }, + { + "epoch": 0.19808740409749634, + "grad_norm": 252.03517150878906, + "learning_rate": 9.721192660263454e-06, + "loss": 24.8406, + "step": 98060 + }, + { + "epoch": 0.19810760473018016, + "grad_norm": 333.40264892578125, + "learning_rate": 9.721077714569374e-06, + "loss": 13.3066, + "step": 98070 + }, + { + "epoch": 0.19812780536286395, + "grad_norm": 600.9188842773438, + "learning_rate": 9.720962745865334e-06, + "loss": 27.6109, + "step": 98080 + }, + { + "epoch": 0.19814800599554777, + "grad_norm": 392.2532958984375, + "learning_rate": 9.720847754151894e-06, + "loss": 16.8962, + "step": 98090 + }, + { + "epoch": 0.1981682066282316, + "grad_norm": 157.43276977539062, + "learning_rate": 9.720732739429614e-06, + "loss": 41.7694, + "step": 98100 + }, + { + "epoch": 0.1981884072609154, + "grad_norm": 521.4472045898438, + "learning_rate": 9.720617701699056e-06, + "loss": 31.5027, + "step": 98110 + }, + { + "epoch": 0.19820860789359923, + "grad_norm": 312.29608154296875, + "learning_rate": 9.72050264096078e-06, + "loss": 27.9765, + "step": 98120 + }, + { + "epoch": 0.19822880852628305, + "grad_norm": 198.32687377929688, + "learning_rate": 9.720387557215344e-06, + "loss": 38.0568, + "step": 98130 + }, + { + "epoch": 0.19824900915896687, + "grad_norm": 119.37037658691406, + "learning_rate": 9.720272450463315e-06, + "loss": 16.5318, + "step": 98140 + }, + { + "epoch": 0.19826920979165066, + "grad_norm": 274.2486572265625, + "learning_rate": 9.72015732070525e-06, + "loss": 13.2182, + "step": 98150 + }, + { + "epoch": 0.19828941042433448, + "grad_norm": 392.5074462890625, + "learning_rate": 9.72004216794171e-06, + "loss": 36.3018, + "step": 98160 + }, + { + "epoch": 0.1983096110570183, + "grad_norm": 458.8179016113281, + "learning_rate": 9.719926992173257e-06, + "loss": 23.7772, + "step": 98170 + }, + { + "epoch": 0.19832981168970212, + "grad_norm": 103.11629486083984, + "learning_rate": 9.71981179340045e-06, + "loss": 25.5153, + "step": 98180 + }, + { + "epoch": 0.19835001232238594, + "grad_norm": 970.4147338867188, + "learning_rate": 9.719696571623857e-06, + "loss": 29.5578, + "step": 98190 + }, + { + "epoch": 0.19837021295506976, + "grad_norm": 337.5794677734375, + "learning_rate": 9.719581326844033e-06, + "loss": 12.8689, + "step": 98200 + }, + { + "epoch": 0.19839041358775356, + "grad_norm": 122.60509490966797, + "learning_rate": 9.719466059061542e-06, + "loss": 14.564, + "step": 98210 + }, + { + "epoch": 0.19841061422043738, + "grad_norm": 18.19044303894043, + "learning_rate": 9.719350768276947e-06, + "loss": 16.1531, + "step": 98220 + }, + { + "epoch": 0.1984308148531212, + "grad_norm": 448.45513916015625, + "learning_rate": 9.719235454490807e-06, + "loss": 25.5586, + "step": 98230 + }, + { + "epoch": 0.19845101548580502, + "grad_norm": 534.3065185546875, + "learning_rate": 9.719120117703688e-06, + "loss": 12.0728, + "step": 98240 + }, + { + "epoch": 0.19847121611848884, + "grad_norm": 436.68438720703125, + "learning_rate": 9.719004757916149e-06, + "loss": 24.4262, + "step": 98250 + }, + { + "epoch": 0.19849141675117266, + "grad_norm": 595.8948364257812, + "learning_rate": 9.718889375128752e-06, + "loss": 20.5172, + "step": 98260 + }, + { + "epoch": 0.19851161738385648, + "grad_norm": 229.6000213623047, + "learning_rate": 9.71877396934206e-06, + "loss": 13.0411, + "step": 98270 + }, + { + "epoch": 0.19853181801654027, + "grad_norm": 1049.194580078125, + "learning_rate": 9.718658540556638e-06, + "loss": 31.0035, + "step": 98280 + }, + { + "epoch": 0.1985520186492241, + "grad_norm": 431.2273254394531, + "learning_rate": 9.718543088773047e-06, + "loss": 22.3374, + "step": 98290 + }, + { + "epoch": 0.1985722192819079, + "grad_norm": 641.8602905273438, + "learning_rate": 9.718427613991848e-06, + "loss": 38.678, + "step": 98300 + }, + { + "epoch": 0.19859241991459173, + "grad_norm": 642.9496459960938, + "learning_rate": 9.718312116213604e-06, + "loss": 27.4916, + "step": 98310 + }, + { + "epoch": 0.19861262054727555, + "grad_norm": 349.74932861328125, + "learning_rate": 9.71819659543888e-06, + "loss": 30.3614, + "step": 98320 + }, + { + "epoch": 0.19863282117995937, + "grad_norm": 258.68194580078125, + "learning_rate": 9.71808105166824e-06, + "loss": 31.4953, + "step": 98330 + }, + { + "epoch": 0.19865302181264316, + "grad_norm": 456.8066101074219, + "learning_rate": 9.717965484902244e-06, + "loss": 24.9158, + "step": 98340 + }, + { + "epoch": 0.19867322244532698, + "grad_norm": 534.912353515625, + "learning_rate": 9.717849895141455e-06, + "loss": 24.7208, + "step": 98350 + }, + { + "epoch": 0.1986934230780108, + "grad_norm": 2246.787841796875, + "learning_rate": 9.717734282386439e-06, + "loss": 28.4148, + "step": 98360 + }, + { + "epoch": 0.19871362371069462, + "grad_norm": 269.0535583496094, + "learning_rate": 9.717618646637758e-06, + "loss": 9.2332, + "step": 98370 + }, + { + "epoch": 0.19873382434337844, + "grad_norm": 445.5294494628906, + "learning_rate": 9.717502987895975e-06, + "loss": 19.4302, + "step": 98380 + }, + { + "epoch": 0.19875402497606226, + "grad_norm": 763.8333129882812, + "learning_rate": 9.717387306161657e-06, + "loss": 39.5839, + "step": 98390 + }, + { + "epoch": 0.19877422560874605, + "grad_norm": 832.6790161132812, + "learning_rate": 9.717271601435363e-06, + "loss": 27.792, + "step": 98400 + }, + { + "epoch": 0.19879442624142987, + "grad_norm": 334.370849609375, + "learning_rate": 9.71715587371766e-06, + "loss": 23.0572, + "step": 98410 + }, + { + "epoch": 0.1988146268741137, + "grad_norm": 202.83169555664062, + "learning_rate": 9.717040123009111e-06, + "loss": 17.9318, + "step": 98420 + }, + { + "epoch": 0.1988348275067975, + "grad_norm": 636.1160278320312, + "learning_rate": 9.716924349310281e-06, + "loss": 27.7571, + "step": 98430 + }, + { + "epoch": 0.19885502813948133, + "grad_norm": 256.3330078125, + "learning_rate": 9.716808552621735e-06, + "loss": 10.47, + "step": 98440 + }, + { + "epoch": 0.19887522877216515, + "grad_norm": 734.7747802734375, + "learning_rate": 9.716692732944036e-06, + "loss": 35.1714, + "step": 98450 + }, + { + "epoch": 0.19889542940484897, + "grad_norm": 385.6474609375, + "learning_rate": 9.716576890277747e-06, + "loss": 34.7533, + "step": 98460 + }, + { + "epoch": 0.19891563003753276, + "grad_norm": 1101.65478515625, + "learning_rate": 9.716461024623437e-06, + "loss": 43.279, + "step": 98470 + }, + { + "epoch": 0.19893583067021658, + "grad_norm": 751.535400390625, + "learning_rate": 9.716345135981663e-06, + "loss": 47.3784, + "step": 98480 + }, + { + "epoch": 0.1989560313029004, + "grad_norm": 713.7888793945312, + "learning_rate": 9.716229224353e-06, + "loss": 20.0859, + "step": 98490 + }, + { + "epoch": 0.19897623193558422, + "grad_norm": 149.56085205078125, + "learning_rate": 9.716113289738005e-06, + "loss": 22.1564, + "step": 98500 + }, + { + "epoch": 0.19899643256826804, + "grad_norm": 529.3665161132812, + "learning_rate": 9.715997332137248e-06, + "loss": 20.9888, + "step": 98510 + }, + { + "epoch": 0.19901663320095186, + "grad_norm": 286.5312194824219, + "learning_rate": 9.71588135155129e-06, + "loss": 18.9513, + "step": 98520 + }, + { + "epoch": 0.19903683383363566, + "grad_norm": 541.6201782226562, + "learning_rate": 9.7157653479807e-06, + "loss": 14.7998, + "step": 98530 + }, + { + "epoch": 0.19905703446631948, + "grad_norm": 286.26678466796875, + "learning_rate": 9.71564932142604e-06, + "loss": 16.4403, + "step": 98540 + }, + { + "epoch": 0.1990772350990033, + "grad_norm": 446.2434997558594, + "learning_rate": 9.715533271887877e-06, + "loss": 24.1389, + "step": 98550 + }, + { + "epoch": 0.19909743573168712, + "grad_norm": 625.5509033203125, + "learning_rate": 9.715417199366778e-06, + "loss": 23.0274, + "step": 98560 + }, + { + "epoch": 0.19911763636437094, + "grad_norm": 141.72207641601562, + "learning_rate": 9.715301103863306e-06, + "loss": 15.1728, + "step": 98570 + }, + { + "epoch": 0.19913783699705476, + "grad_norm": 324.7051696777344, + "learning_rate": 9.71518498537803e-06, + "loss": 21.8118, + "step": 98580 + }, + { + "epoch": 0.19915803762973858, + "grad_norm": 571.4437866210938, + "learning_rate": 9.715068843911513e-06, + "loss": 18.9153, + "step": 98590 + }, + { + "epoch": 0.19917823826242237, + "grad_norm": 219.9671173095703, + "learning_rate": 9.714952679464324e-06, + "loss": 16.8028, + "step": 98600 + }, + { + "epoch": 0.1991984388951062, + "grad_norm": 1232.3668212890625, + "learning_rate": 9.714836492037025e-06, + "loss": 26.0865, + "step": 98610 + }, + { + "epoch": 0.19921863952779, + "grad_norm": 366.5797119140625, + "learning_rate": 9.714720281630186e-06, + "loss": 16.8544, + "step": 98620 + }, + { + "epoch": 0.19923884016047383, + "grad_norm": 872.5311889648438, + "learning_rate": 9.714604048244372e-06, + "loss": 22.2088, + "step": 98630 + }, + { + "epoch": 0.19925904079315765, + "grad_norm": 67.38678741455078, + "learning_rate": 9.714487791880151e-06, + "loss": 12.5812, + "step": 98640 + }, + { + "epoch": 0.19927924142584147, + "grad_norm": 625.0254516601562, + "learning_rate": 9.714371512538088e-06, + "loss": 30.3685, + "step": 98650 + }, + { + "epoch": 0.19929944205852526, + "grad_norm": 464.5794677734375, + "learning_rate": 9.714255210218747e-06, + "loss": 15.5791, + "step": 98660 + }, + { + "epoch": 0.19931964269120908, + "grad_norm": 428.2454833984375, + "learning_rate": 9.7141388849227e-06, + "loss": 31.7191, + "step": 98670 + }, + { + "epoch": 0.1993398433238929, + "grad_norm": 324.87799072265625, + "learning_rate": 9.714022536650513e-06, + "loss": 29.7944, + "step": 98680 + }, + { + "epoch": 0.19936004395657672, + "grad_norm": 193.9474639892578, + "learning_rate": 9.713906165402751e-06, + "loss": 38.0959, + "step": 98690 + }, + { + "epoch": 0.19938024458926054, + "grad_norm": 253.7535858154297, + "learning_rate": 9.713789771179983e-06, + "loss": 21.0864, + "step": 98700 + }, + { + "epoch": 0.19940044522194436, + "grad_norm": 604.3446044921875, + "learning_rate": 9.713673353982773e-06, + "loss": 21.4508, + "step": 98710 + }, + { + "epoch": 0.19942064585462815, + "grad_norm": 290.3115234375, + "learning_rate": 9.713556913811693e-06, + "loss": 21.0728, + "step": 98720 + }, + { + "epoch": 0.19944084648731197, + "grad_norm": 224.96560668945312, + "learning_rate": 9.713440450667307e-06, + "loss": 14.6751, + "step": 98730 + }, + { + "epoch": 0.1994610471199958, + "grad_norm": 182.87301635742188, + "learning_rate": 9.713323964550185e-06, + "loss": 16.7221, + "step": 98740 + }, + { + "epoch": 0.1994812477526796, + "grad_norm": 535.5203247070312, + "learning_rate": 9.713207455460893e-06, + "loss": 24.1277, + "step": 98750 + }, + { + "epoch": 0.19950144838536343, + "grad_norm": 439.2107238769531, + "learning_rate": 9.713090923399999e-06, + "loss": 16.6916, + "step": 98760 + }, + { + "epoch": 0.19952164901804725, + "grad_norm": 792.3186645507812, + "learning_rate": 9.712974368368072e-06, + "loss": 34.1692, + "step": 98770 + }, + { + "epoch": 0.19954184965073107, + "grad_norm": 1346.2783203125, + "learning_rate": 9.71285779036568e-06, + "loss": 47.4253, + "step": 98780 + }, + { + "epoch": 0.19956205028341487, + "grad_norm": 530.707763671875, + "learning_rate": 9.71274118939339e-06, + "loss": 18.1747, + "step": 98790 + }, + { + "epoch": 0.19958225091609869, + "grad_norm": 330.8648986816406, + "learning_rate": 9.712624565451772e-06, + "loss": 29.0284, + "step": 98800 + }, + { + "epoch": 0.1996024515487825, + "grad_norm": 570.5589599609375, + "learning_rate": 9.712507918541391e-06, + "loss": 21.0312, + "step": 98810 + }, + { + "epoch": 0.19962265218146633, + "grad_norm": 453.2312316894531, + "learning_rate": 9.712391248662821e-06, + "loss": 15.446, + "step": 98820 + }, + { + "epoch": 0.19964285281415015, + "grad_norm": 307.1821594238281, + "learning_rate": 9.712274555816626e-06, + "loss": 23.4843, + "step": 98830 + }, + { + "epoch": 0.19966305344683397, + "grad_norm": 216.52488708496094, + "learning_rate": 9.712157840003377e-06, + "loss": 14.731, + "step": 98840 + }, + { + "epoch": 0.19968325407951776, + "grad_norm": 635.91552734375, + "learning_rate": 9.71204110122364e-06, + "loss": 58.3714, + "step": 98850 + }, + { + "epoch": 0.19970345471220158, + "grad_norm": 182.4932403564453, + "learning_rate": 9.71192433947799e-06, + "loss": 18.5096, + "step": 98860 + }, + { + "epoch": 0.1997236553448854, + "grad_norm": 1478.6046142578125, + "learning_rate": 9.71180755476699e-06, + "loss": 33.0487, + "step": 98870 + }, + { + "epoch": 0.19974385597756922, + "grad_norm": 123.76673126220703, + "learning_rate": 9.711690747091211e-06, + "loss": 14.9674, + "step": 98880 + }, + { + "epoch": 0.19976405661025304, + "grad_norm": 254.47056579589844, + "learning_rate": 9.711573916451224e-06, + "loss": 13.8541, + "step": 98890 + }, + { + "epoch": 0.19978425724293686, + "grad_norm": 104.44090270996094, + "learning_rate": 9.711457062847596e-06, + "loss": 22.3526, + "step": 98900 + }, + { + "epoch": 0.19980445787562068, + "grad_norm": 213.63504028320312, + "learning_rate": 9.7113401862809e-06, + "loss": 25.3043, + "step": 98910 + }, + { + "epoch": 0.19982465850830447, + "grad_norm": 228.15830993652344, + "learning_rate": 9.7112232867517e-06, + "loss": 26.1682, + "step": 98920 + }, + { + "epoch": 0.1998448591409883, + "grad_norm": 359.323974609375, + "learning_rate": 9.711106364260572e-06, + "loss": 17.7793, + "step": 98930 + }, + { + "epoch": 0.1998650597736721, + "grad_norm": 767.1118774414062, + "learning_rate": 9.71098941880808e-06, + "loss": 13.5829, + "step": 98940 + }, + { + "epoch": 0.19988526040635593, + "grad_norm": 600.9185180664062, + "learning_rate": 9.7108724503948e-06, + "loss": 16.578, + "step": 98950 + }, + { + "epoch": 0.19990546103903975, + "grad_norm": 394.9085998535156, + "learning_rate": 9.710755459021297e-06, + "loss": 20.4923, + "step": 98960 + }, + { + "epoch": 0.19992566167172357, + "grad_norm": 724.1331176757812, + "learning_rate": 9.710638444688146e-06, + "loss": 16.3844, + "step": 98970 + }, + { + "epoch": 0.19994586230440736, + "grad_norm": 536.1177368164062, + "learning_rate": 9.71052140739591e-06, + "loss": 10.3771, + "step": 98980 + }, + { + "epoch": 0.19996606293709118, + "grad_norm": 622.7116088867188, + "learning_rate": 9.710404347145168e-06, + "loss": 18.9236, + "step": 98990 + }, + { + "epoch": 0.199986263569775, + "grad_norm": 475.79736328125, + "learning_rate": 9.710287263936485e-06, + "loss": 16.2164, + "step": 99000 + }, + { + "epoch": 0.20000646420245882, + "grad_norm": 354.1842041015625, + "learning_rate": 9.710170157770434e-06, + "loss": 25.4532, + "step": 99010 + }, + { + "epoch": 0.20002666483514264, + "grad_norm": 844.4014282226562, + "learning_rate": 9.710053028647583e-06, + "loss": 46.8019, + "step": 99020 + }, + { + "epoch": 0.20004686546782646, + "grad_norm": 470.49542236328125, + "learning_rate": 9.709935876568506e-06, + "loss": 24.6423, + "step": 99030 + }, + { + "epoch": 0.20006706610051025, + "grad_norm": 480.843017578125, + "learning_rate": 9.709818701533774e-06, + "loss": 23.2238, + "step": 99040 + }, + { + "epoch": 0.20008726673319407, + "grad_norm": 650.1700439453125, + "learning_rate": 9.709701503543954e-06, + "loss": 13.175, + "step": 99050 + }, + { + "epoch": 0.2001074673658779, + "grad_norm": 546.8789672851562, + "learning_rate": 9.709584282599623e-06, + "loss": 17.9362, + "step": 99060 + }, + { + "epoch": 0.20012766799856171, + "grad_norm": 526.370849609375, + "learning_rate": 9.709467038701348e-06, + "loss": 29.2563, + "step": 99070 + }, + { + "epoch": 0.20014786863124553, + "grad_norm": 271.8720703125, + "learning_rate": 9.709349771849701e-06, + "loss": 14.2697, + "step": 99080 + }, + { + "epoch": 0.20016806926392935, + "grad_norm": 366.5167236328125, + "learning_rate": 9.709232482045254e-06, + "loss": 23.3278, + "step": 99090 + }, + { + "epoch": 0.20018826989661317, + "grad_norm": 118.59703063964844, + "learning_rate": 9.709115169288582e-06, + "loss": 29.0655, + "step": 99100 + }, + { + "epoch": 0.20020847052929697, + "grad_norm": 1124.3302001953125, + "learning_rate": 9.708997833580251e-06, + "loss": 41.9663, + "step": 99110 + }, + { + "epoch": 0.2002286711619808, + "grad_norm": 450.16778564453125, + "learning_rate": 9.708880474920836e-06, + "loss": 31.6968, + "step": 99120 + }, + { + "epoch": 0.2002488717946646, + "grad_norm": 1278.3619384765625, + "learning_rate": 9.708763093310911e-06, + "loss": 25.3642, + "step": 99130 + }, + { + "epoch": 0.20026907242734843, + "grad_norm": 964.25048828125, + "learning_rate": 9.708645688751043e-06, + "loss": 36.2234, + "step": 99140 + }, + { + "epoch": 0.20028927306003225, + "grad_norm": 585.2734375, + "learning_rate": 9.70852826124181e-06, + "loss": 26.9971, + "step": 99150 + }, + { + "epoch": 0.20030947369271607, + "grad_norm": 511.50244140625, + "learning_rate": 9.70841081078378e-06, + "loss": 21.3564, + "step": 99160 + }, + { + "epoch": 0.20032967432539986, + "grad_norm": 108.579833984375, + "learning_rate": 9.708293337377525e-06, + "loss": 17.6321, + "step": 99170 + }, + { + "epoch": 0.20034987495808368, + "grad_norm": 605.6941528320312, + "learning_rate": 9.70817584102362e-06, + "loss": 17.3211, + "step": 99180 + }, + { + "epoch": 0.2003700755907675, + "grad_norm": 528.8156127929688, + "learning_rate": 9.70805832172264e-06, + "loss": 27.5643, + "step": 99190 + }, + { + "epoch": 0.20039027622345132, + "grad_norm": 906.8600463867188, + "learning_rate": 9.707940779475151e-06, + "loss": 33.9335, + "step": 99200 + }, + { + "epoch": 0.20041047685613514, + "grad_norm": 714.3448486328125, + "learning_rate": 9.707823214281733e-06, + "loss": 33.7857, + "step": 99210 + }, + { + "epoch": 0.20043067748881896, + "grad_norm": 255.84066772460938, + "learning_rate": 9.707705626142952e-06, + "loss": 18.3717, + "step": 99220 + }, + { + "epoch": 0.20045087812150278, + "grad_norm": 512.1227416992188, + "learning_rate": 9.707588015059387e-06, + "loss": 20.0914, + "step": 99230 + }, + { + "epoch": 0.20047107875418657, + "grad_norm": 481.708251953125, + "learning_rate": 9.707470381031608e-06, + "loss": 44.6957, + "step": 99240 + }, + { + "epoch": 0.2004912793868704, + "grad_norm": 492.8053283691406, + "learning_rate": 9.70735272406019e-06, + "loss": 38.131, + "step": 99250 + }, + { + "epoch": 0.2005114800195542, + "grad_norm": 952.0032958984375, + "learning_rate": 9.707235044145707e-06, + "loss": 12.7383, + "step": 99260 + }, + { + "epoch": 0.20053168065223803, + "grad_norm": 206.84864807128906, + "learning_rate": 9.707117341288728e-06, + "loss": 17.0316, + "step": 99270 + }, + { + "epoch": 0.20055188128492185, + "grad_norm": 471.7038269042969, + "learning_rate": 9.706999615489833e-06, + "loss": 22.6234, + "step": 99280 + }, + { + "epoch": 0.20057208191760567, + "grad_norm": 219.2476806640625, + "learning_rate": 9.70688186674959e-06, + "loss": 25.6948, + "step": 99290 + }, + { + "epoch": 0.20059228255028946, + "grad_norm": 506.0567321777344, + "learning_rate": 9.706764095068579e-06, + "loss": 19.0954, + "step": 99300 + }, + { + "epoch": 0.20061248318297328, + "grad_norm": 414.366455078125, + "learning_rate": 9.706646300447369e-06, + "loss": 20.1565, + "step": 99310 + }, + { + "epoch": 0.2006326838156571, + "grad_norm": 3225.341796875, + "learning_rate": 9.706528482886535e-06, + "loss": 35.9637, + "step": 99320 + }, + { + "epoch": 0.20065288444834092, + "grad_norm": 493.09442138671875, + "learning_rate": 9.706410642386653e-06, + "loss": 25.6267, + "step": 99330 + }, + { + "epoch": 0.20067308508102474, + "grad_norm": 505.97161865234375, + "learning_rate": 9.706292778948297e-06, + "loss": 27.1495, + "step": 99340 + }, + { + "epoch": 0.20069328571370856, + "grad_norm": 230.9978485107422, + "learning_rate": 9.706174892572038e-06, + "loss": 19.038, + "step": 99350 + }, + { + "epoch": 0.20071348634639236, + "grad_norm": 768.5806884765625, + "learning_rate": 9.706056983258456e-06, + "loss": 21.1466, + "step": 99360 + }, + { + "epoch": 0.20073368697907618, + "grad_norm": 572.5496826171875, + "learning_rate": 9.705939051008124e-06, + "loss": 17.4252, + "step": 99370 + }, + { + "epoch": 0.20075388761176, + "grad_norm": 402.9242858886719, + "learning_rate": 9.705821095821612e-06, + "loss": 15.5765, + "step": 99380 + }, + { + "epoch": 0.20077408824444382, + "grad_norm": 325.3233947753906, + "learning_rate": 9.705703117699501e-06, + "loss": 26.812, + "step": 99390 + }, + { + "epoch": 0.20079428887712764, + "grad_norm": 592.6520385742188, + "learning_rate": 9.705585116642364e-06, + "loss": 24.3809, + "step": 99400 + }, + { + "epoch": 0.20081448950981146, + "grad_norm": 264.4601135253906, + "learning_rate": 9.705467092650775e-06, + "loss": 28.0144, + "step": 99410 + }, + { + "epoch": 0.20083469014249528, + "grad_norm": 221.36004638671875, + "learning_rate": 9.705349045725313e-06, + "loss": 20.65, + "step": 99420 + }, + { + "epoch": 0.20085489077517907, + "grad_norm": 306.7693786621094, + "learning_rate": 9.705230975866547e-06, + "loss": 23.898, + "step": 99430 + }, + { + "epoch": 0.2008750914078629, + "grad_norm": 323.7845153808594, + "learning_rate": 9.705112883075055e-06, + "loss": 21.4143, + "step": 99440 + }, + { + "epoch": 0.2008952920405467, + "grad_norm": 193.15858459472656, + "learning_rate": 9.704994767351417e-06, + "loss": 16.4971, + "step": 99450 + }, + { + "epoch": 0.20091549267323053, + "grad_norm": 1528.001220703125, + "learning_rate": 9.704876628696202e-06, + "loss": 43.906, + "step": 99460 + }, + { + "epoch": 0.20093569330591435, + "grad_norm": 119.73884582519531, + "learning_rate": 9.70475846710999e-06, + "loss": 37.649, + "step": 99470 + }, + { + "epoch": 0.20095589393859817, + "grad_norm": 131.5335693359375, + "learning_rate": 9.704640282593359e-06, + "loss": 21.3057, + "step": 99480 + }, + { + "epoch": 0.20097609457128196, + "grad_norm": 433.1357421875, + "learning_rate": 9.704522075146878e-06, + "loss": 30.4396, + "step": 99490 + }, + { + "epoch": 0.20099629520396578, + "grad_norm": 187.129638671875, + "learning_rate": 9.704403844771128e-06, + "loss": 24.8694, + "step": 99500 + }, + { + "epoch": 0.2010164958366496, + "grad_norm": 192.597900390625, + "learning_rate": 9.704285591466685e-06, + "loss": 20.0425, + "step": 99510 + }, + { + "epoch": 0.20103669646933342, + "grad_norm": 327.7987060546875, + "learning_rate": 9.704167315234124e-06, + "loss": 48.091, + "step": 99520 + }, + { + "epoch": 0.20105689710201724, + "grad_norm": 192.59085083007812, + "learning_rate": 9.704049016074022e-06, + "loss": 38.102, + "step": 99530 + }, + { + "epoch": 0.20107709773470106, + "grad_norm": 448.1267395019531, + "learning_rate": 9.703930693986956e-06, + "loss": 23.2346, + "step": 99540 + }, + { + "epoch": 0.20109729836738488, + "grad_norm": 184.4168701171875, + "learning_rate": 9.703812348973501e-06, + "loss": 33.5095, + "step": 99550 + }, + { + "epoch": 0.20111749900006867, + "grad_norm": 636.1046142578125, + "learning_rate": 9.703693981034236e-06, + "loss": 19.6154, + "step": 99560 + }, + { + "epoch": 0.2011376996327525, + "grad_norm": 508.3456726074219, + "learning_rate": 9.703575590169738e-06, + "loss": 25.7803, + "step": 99570 + }, + { + "epoch": 0.2011579002654363, + "grad_norm": 108.27706909179688, + "learning_rate": 9.703457176380581e-06, + "loss": 34.7553, + "step": 99580 + }, + { + "epoch": 0.20117810089812013, + "grad_norm": 348.802734375, + "learning_rate": 9.703338739667347e-06, + "loss": 23.5485, + "step": 99590 + }, + { + "epoch": 0.20119830153080395, + "grad_norm": 340.8241271972656, + "learning_rate": 9.703220280030607e-06, + "loss": 31.4257, + "step": 99600 + }, + { + "epoch": 0.20121850216348777, + "grad_norm": 675.7904052734375, + "learning_rate": 9.703101797470944e-06, + "loss": 29.3283, + "step": 99610 + }, + { + "epoch": 0.20123870279617156, + "grad_norm": 134.0491180419922, + "learning_rate": 9.702983291988934e-06, + "loss": 48.8887, + "step": 99620 + }, + { + "epoch": 0.20125890342885538, + "grad_norm": 39.543617248535156, + "learning_rate": 9.702864763585152e-06, + "loss": 32.0166, + "step": 99630 + }, + { + "epoch": 0.2012791040615392, + "grad_norm": 569.5358276367188, + "learning_rate": 9.702746212260179e-06, + "loss": 27.353, + "step": 99640 + }, + { + "epoch": 0.20129930469422302, + "grad_norm": 528.3767700195312, + "learning_rate": 9.70262763801459e-06, + "loss": 19.063, + "step": 99650 + }, + { + "epoch": 0.20131950532690684, + "grad_norm": 269.0294189453125, + "learning_rate": 9.702509040848964e-06, + "loss": 11.7937, + "step": 99660 + }, + { + "epoch": 0.20133970595959066, + "grad_norm": 395.1287841796875, + "learning_rate": 9.70239042076388e-06, + "loss": 17.4976, + "step": 99670 + }, + { + "epoch": 0.20135990659227446, + "grad_norm": 430.4999694824219, + "learning_rate": 9.702271777759915e-06, + "loss": 16.4138, + "step": 99680 + }, + { + "epoch": 0.20138010722495828, + "grad_norm": 642.3116455078125, + "learning_rate": 9.70215311183765e-06, + "loss": 33.394, + "step": 99690 + }, + { + "epoch": 0.2014003078576421, + "grad_norm": 56.426029205322266, + "learning_rate": 9.702034422997658e-06, + "loss": 34.9607, + "step": 99700 + }, + { + "epoch": 0.20142050849032592, + "grad_norm": 195.6506805419922, + "learning_rate": 9.701915711240522e-06, + "loss": 25.5187, + "step": 99710 + }, + { + "epoch": 0.20144070912300974, + "grad_norm": 295.8341369628906, + "learning_rate": 9.70179697656682e-06, + "loss": 21.67, + "step": 99720 + }, + { + "epoch": 0.20146090975569356, + "grad_norm": 313.3680419921875, + "learning_rate": 9.701678218977128e-06, + "loss": 14.2844, + "step": 99730 + }, + { + "epoch": 0.20148111038837738, + "grad_norm": 195.22238159179688, + "learning_rate": 9.701559438472026e-06, + "loss": 8.5286, + "step": 99740 + }, + { + "epoch": 0.20150131102106117, + "grad_norm": 242.9795379638672, + "learning_rate": 9.701440635052094e-06, + "loss": 17.6431, + "step": 99750 + }, + { + "epoch": 0.201521511653745, + "grad_norm": 630.5758056640625, + "learning_rate": 9.701321808717912e-06, + "loss": 17.8251, + "step": 99760 + }, + { + "epoch": 0.2015417122864288, + "grad_norm": 228.25241088867188, + "learning_rate": 9.701202959470057e-06, + "loss": 21.8293, + "step": 99770 + }, + { + "epoch": 0.20156191291911263, + "grad_norm": 413.8714599609375, + "learning_rate": 9.70108408730911e-06, + "loss": 21.084, + "step": 99780 + }, + { + "epoch": 0.20158211355179645, + "grad_norm": 391.77960205078125, + "learning_rate": 9.700965192235647e-06, + "loss": 20.4074, + "step": 99790 + }, + { + "epoch": 0.20160231418448027, + "grad_norm": 48.752891540527344, + "learning_rate": 9.700846274250252e-06, + "loss": 9.2901, + "step": 99800 + }, + { + "epoch": 0.20162251481716406, + "grad_norm": 676.621826171875, + "learning_rate": 9.700727333353502e-06, + "loss": 18.9555, + "step": 99810 + }, + { + "epoch": 0.20164271544984788, + "grad_norm": 235.96255493164062, + "learning_rate": 9.700608369545976e-06, + "loss": 24.6128, + "step": 99820 + }, + { + "epoch": 0.2016629160825317, + "grad_norm": 1.0006201267242432, + "learning_rate": 9.700489382828255e-06, + "loss": 23.243, + "step": 99830 + }, + { + "epoch": 0.20168311671521552, + "grad_norm": 202.7542724609375, + "learning_rate": 9.70037037320092e-06, + "loss": 66.303, + "step": 99840 + }, + { + "epoch": 0.20170331734789934, + "grad_norm": 396.8049011230469, + "learning_rate": 9.70025134066455e-06, + "loss": 20.0084, + "step": 99850 + }, + { + "epoch": 0.20172351798058316, + "grad_norm": 101.11209869384766, + "learning_rate": 9.700132285219724e-06, + "loss": 20.172, + "step": 99860 + }, + { + "epoch": 0.20174371861326698, + "grad_norm": 130.86679077148438, + "learning_rate": 9.700013206867022e-06, + "loss": 23.5345, + "step": 99870 + }, + { + "epoch": 0.20176391924595077, + "grad_norm": 274.3761901855469, + "learning_rate": 9.699894105607028e-06, + "loss": 22.9851, + "step": 99880 + }, + { + "epoch": 0.2017841198786346, + "grad_norm": 12.90697956085205, + "learning_rate": 9.69977498144032e-06, + "loss": 43.7818, + "step": 99890 + }, + { + "epoch": 0.2018043205113184, + "grad_norm": 152.14195251464844, + "learning_rate": 9.699655834367479e-06, + "loss": 20.2141, + "step": 99900 + }, + { + "epoch": 0.20182452114400223, + "grad_norm": 338.20098876953125, + "learning_rate": 9.699536664389084e-06, + "loss": 17.3666, + "step": 99910 + }, + { + "epoch": 0.20184472177668605, + "grad_norm": 704.154052734375, + "learning_rate": 9.699417471505717e-06, + "loss": 39.9503, + "step": 99920 + }, + { + "epoch": 0.20186492240936987, + "grad_norm": 210.18307495117188, + "learning_rate": 9.699298255717961e-06, + "loss": 21.9548, + "step": 99930 + }, + { + "epoch": 0.20188512304205367, + "grad_norm": 363.80230712890625, + "learning_rate": 9.699179017026395e-06, + "loss": 22.7939, + "step": 99940 + }, + { + "epoch": 0.20190532367473749, + "grad_norm": 642.005126953125, + "learning_rate": 9.699059755431599e-06, + "loss": 29.3214, + "step": 99950 + }, + { + "epoch": 0.2019255243074213, + "grad_norm": 624.2991943359375, + "learning_rate": 9.698940470934158e-06, + "loss": 28.8318, + "step": 99960 + }, + { + "epoch": 0.20194572494010513, + "grad_norm": 628.0044555664062, + "learning_rate": 9.698821163534649e-06, + "loss": 29.6279, + "step": 99970 + }, + { + "epoch": 0.20196592557278895, + "grad_norm": 647.2896728515625, + "learning_rate": 9.698701833233654e-06, + "loss": 55.589, + "step": 99980 + }, + { + "epoch": 0.20198612620547277, + "grad_norm": 250.62294006347656, + "learning_rate": 9.69858248003176e-06, + "loss": 18.9179, + "step": 99990 + }, + { + "epoch": 0.20200632683815656, + "grad_norm": 320.8274230957031, + "learning_rate": 9.698463103929542e-06, + "loss": 23.6085, + "step": 100000 + }, + { + "epoch": 0.20202652747084038, + "grad_norm": 446.7718505859375, + "learning_rate": 9.698343704927586e-06, + "loss": 34.3639, + "step": 100010 + }, + { + "epoch": 0.2020467281035242, + "grad_norm": 735.5264892578125, + "learning_rate": 9.698224283026473e-06, + "loss": 26.1571, + "step": 100020 + }, + { + "epoch": 0.20206692873620802, + "grad_norm": 922.5233154296875, + "learning_rate": 9.698104838226783e-06, + "loss": 33.7224, + "step": 100030 + }, + { + "epoch": 0.20208712936889184, + "grad_norm": 366.1944580078125, + "learning_rate": 9.697985370529101e-06, + "loss": 16.7596, + "step": 100040 + }, + { + "epoch": 0.20210733000157566, + "grad_norm": 334.633544921875, + "learning_rate": 9.69786587993401e-06, + "loss": 13.3622, + "step": 100050 + }, + { + "epoch": 0.20212753063425948, + "grad_norm": 456.33074951171875, + "learning_rate": 9.697746366442087e-06, + "loss": 32.405, + "step": 100060 + }, + { + "epoch": 0.20214773126694327, + "grad_norm": 1194.3699951171875, + "learning_rate": 9.69762683005392e-06, + "loss": 47.251, + "step": 100070 + }, + { + "epoch": 0.2021679318996271, + "grad_norm": 388.7727966308594, + "learning_rate": 9.69750727077009e-06, + "loss": 28.373, + "step": 100080 + }, + { + "epoch": 0.2021881325323109, + "grad_norm": 24.773212432861328, + "learning_rate": 9.697387688591178e-06, + "loss": 22.8887, + "step": 100090 + }, + { + "epoch": 0.20220833316499473, + "grad_norm": 359.620361328125, + "learning_rate": 9.697268083517767e-06, + "loss": 31.703, + "step": 100100 + }, + { + "epoch": 0.20222853379767855, + "grad_norm": 653.7416381835938, + "learning_rate": 9.697148455550444e-06, + "loss": 20.0855, + "step": 100110 + }, + { + "epoch": 0.20224873443036237, + "grad_norm": 284.58673095703125, + "learning_rate": 9.697028804689788e-06, + "loss": 15.7838, + "step": 100120 + }, + { + "epoch": 0.20226893506304616, + "grad_norm": 984.1806030273438, + "learning_rate": 9.696909130936382e-06, + "loss": 14.0852, + "step": 100130 + }, + { + "epoch": 0.20228913569572998, + "grad_norm": 153.69190979003906, + "learning_rate": 9.696789434290812e-06, + "loss": 14.096, + "step": 100140 + }, + { + "epoch": 0.2023093363284138, + "grad_norm": 258.2850646972656, + "learning_rate": 9.696669714753658e-06, + "loss": 22.492, + "step": 100150 + }, + { + "epoch": 0.20232953696109762, + "grad_norm": 748.961669921875, + "learning_rate": 9.696549972325509e-06, + "loss": 19.6018, + "step": 100160 + }, + { + "epoch": 0.20234973759378144, + "grad_norm": 287.2615051269531, + "learning_rate": 9.696430207006942e-06, + "loss": 21.6377, + "step": 100170 + }, + { + "epoch": 0.20236993822646526, + "grad_norm": 339.321044921875, + "learning_rate": 9.696310418798544e-06, + "loss": 24.7975, + "step": 100180 + }, + { + "epoch": 0.20239013885914908, + "grad_norm": 459.3150939941406, + "learning_rate": 9.696190607700901e-06, + "loss": 16.3885, + "step": 100190 + }, + { + "epoch": 0.20241033949183287, + "grad_norm": 278.3927917480469, + "learning_rate": 9.696070773714592e-06, + "loss": 33.2706, + "step": 100200 + }, + { + "epoch": 0.2024305401245167, + "grad_norm": 347.731689453125, + "learning_rate": 9.695950916840204e-06, + "loss": 21.2646, + "step": 100210 + }, + { + "epoch": 0.20245074075720051, + "grad_norm": 593.0798950195312, + "learning_rate": 9.695831037078323e-06, + "loss": 30.1376, + "step": 100220 + }, + { + "epoch": 0.20247094138988433, + "grad_norm": 543.983154296875, + "learning_rate": 9.695711134429529e-06, + "loss": 42.1279, + "step": 100230 + }, + { + "epoch": 0.20249114202256815, + "grad_norm": 389.13482666015625, + "learning_rate": 9.695591208894408e-06, + "loss": 23.1487, + "step": 100240 + }, + { + "epoch": 0.20251134265525197, + "grad_norm": 370.58160400390625, + "learning_rate": 9.695471260473546e-06, + "loss": 60.422, + "step": 100250 + }, + { + "epoch": 0.20253154328793577, + "grad_norm": 518.9379272460938, + "learning_rate": 9.695351289167527e-06, + "loss": 24.1456, + "step": 100260 + }, + { + "epoch": 0.2025517439206196, + "grad_norm": 840.3296508789062, + "learning_rate": 9.695231294976935e-06, + "loss": 24.2454, + "step": 100270 + }, + { + "epoch": 0.2025719445533034, + "grad_norm": 737.5658569335938, + "learning_rate": 9.695111277902353e-06, + "loss": 29.3572, + "step": 100280 + }, + { + "epoch": 0.20259214518598723, + "grad_norm": 433.4263000488281, + "learning_rate": 9.69499123794437e-06, + "loss": 34.5794, + "step": 100290 + }, + { + "epoch": 0.20261234581867105, + "grad_norm": 123.38919067382812, + "learning_rate": 9.69487117510357e-06, + "loss": 22.7723, + "step": 100300 + }, + { + "epoch": 0.20263254645135487, + "grad_norm": 452.074462890625, + "learning_rate": 9.694751089380536e-06, + "loss": 30.7951, + "step": 100310 + }, + { + "epoch": 0.20265274708403866, + "grad_norm": 0.0, + "learning_rate": 9.694630980775856e-06, + "loss": 20.6811, + "step": 100320 + }, + { + "epoch": 0.20267294771672248, + "grad_norm": 229.5910186767578, + "learning_rate": 9.694510849290113e-06, + "loss": 29.4656, + "step": 100330 + }, + { + "epoch": 0.2026931483494063, + "grad_norm": 228.48316955566406, + "learning_rate": 9.694390694923893e-06, + "loss": 17.5072, + "step": 100340 + }, + { + "epoch": 0.20271334898209012, + "grad_norm": 553.59521484375, + "learning_rate": 9.694270517677782e-06, + "loss": 20.3764, + "step": 100350 + }, + { + "epoch": 0.20273354961477394, + "grad_norm": 463.97491455078125, + "learning_rate": 9.694150317552367e-06, + "loss": 18.7534, + "step": 100360 + }, + { + "epoch": 0.20275375024745776, + "grad_norm": 281.0998840332031, + "learning_rate": 9.694030094548233e-06, + "loss": 31.498, + "step": 100370 + }, + { + "epoch": 0.20277395088014158, + "grad_norm": 256.85919189453125, + "learning_rate": 9.693909848665962e-06, + "loss": 13.6459, + "step": 100380 + }, + { + "epoch": 0.20279415151282537, + "grad_norm": 281.70880126953125, + "learning_rate": 9.693789579906147e-06, + "loss": 22.8798, + "step": 100390 + }, + { + "epoch": 0.2028143521455092, + "grad_norm": 316.8731994628906, + "learning_rate": 9.693669288269371e-06, + "loss": 15.9722, + "step": 100400 + }, + { + "epoch": 0.202834552778193, + "grad_norm": 503.97418212890625, + "learning_rate": 9.69354897375622e-06, + "loss": 25.7422, + "step": 100410 + }, + { + "epoch": 0.20285475341087683, + "grad_norm": 351.5521240234375, + "learning_rate": 9.693428636367279e-06, + "loss": 22.8897, + "step": 100420 + }, + { + "epoch": 0.20287495404356065, + "grad_norm": 259.8196716308594, + "learning_rate": 9.693308276103136e-06, + "loss": 16.2709, + "step": 100430 + }, + { + "epoch": 0.20289515467624447, + "grad_norm": 556.3468017578125, + "learning_rate": 9.693187892964381e-06, + "loss": 32.2715, + "step": 100440 + }, + { + "epoch": 0.20291535530892826, + "grad_norm": 577.9406127929688, + "learning_rate": 9.693067486951595e-06, + "loss": 44.8964, + "step": 100450 + }, + { + "epoch": 0.20293555594161208, + "grad_norm": 76.00013732910156, + "learning_rate": 9.692947058065367e-06, + "loss": 31.2101, + "step": 100460 + }, + { + "epoch": 0.2029557565742959, + "grad_norm": 668.4077758789062, + "learning_rate": 9.692826606306284e-06, + "loss": 33.4007, + "step": 100470 + }, + { + "epoch": 0.20297595720697972, + "grad_norm": 514.4503173828125, + "learning_rate": 9.692706131674935e-06, + "loss": 31.504, + "step": 100480 + }, + { + "epoch": 0.20299615783966354, + "grad_norm": 886.607666015625, + "learning_rate": 9.692585634171906e-06, + "loss": 20.9908, + "step": 100490 + }, + { + "epoch": 0.20301635847234736, + "grad_norm": 360.0110778808594, + "learning_rate": 9.69246511379778e-06, + "loss": 19.564, + "step": 100500 + }, + { + "epoch": 0.20303655910503116, + "grad_norm": 254.813232421875, + "learning_rate": 9.692344570553152e-06, + "loss": 27.0873, + "step": 100510 + }, + { + "epoch": 0.20305675973771498, + "grad_norm": 321.7149963378906, + "learning_rate": 9.692224004438603e-06, + "loss": 15.984, + "step": 100520 + }, + { + "epoch": 0.2030769603703988, + "grad_norm": 233.30462646484375, + "learning_rate": 9.692103415454724e-06, + "loss": 29.4682, + "step": 100530 + }, + { + "epoch": 0.20309716100308262, + "grad_norm": 619.5001220703125, + "learning_rate": 9.691982803602102e-06, + "loss": 24.5903, + "step": 100540 + }, + { + "epoch": 0.20311736163576644, + "grad_norm": 566.3729858398438, + "learning_rate": 9.691862168881325e-06, + "loss": 26.2647, + "step": 100550 + }, + { + "epoch": 0.20313756226845026, + "grad_norm": 412.0101318359375, + "learning_rate": 9.691741511292983e-06, + "loss": 16.6728, + "step": 100560 + }, + { + "epoch": 0.20315776290113408, + "grad_norm": 261.8351135253906, + "learning_rate": 9.691620830837659e-06, + "loss": 22.1806, + "step": 100570 + }, + { + "epoch": 0.20317796353381787, + "grad_norm": 200.07791137695312, + "learning_rate": 9.691500127515945e-06, + "loss": 11.2046, + "step": 100580 + }, + { + "epoch": 0.2031981641665017, + "grad_norm": 426.7507629394531, + "learning_rate": 9.69137940132843e-06, + "loss": 16.9677, + "step": 100590 + }, + { + "epoch": 0.2032183647991855, + "grad_norm": 332.6185302734375, + "learning_rate": 9.691258652275698e-06, + "loss": 19.8274, + "step": 100600 + }, + { + "epoch": 0.20323856543186933, + "grad_norm": 490.4311218261719, + "learning_rate": 9.691137880358341e-06, + "loss": 28.4095, + "step": 100610 + }, + { + "epoch": 0.20325876606455315, + "grad_norm": 701.2317504882812, + "learning_rate": 9.691017085576947e-06, + "loss": 26.9267, + "step": 100620 + }, + { + "epoch": 0.20327896669723697, + "grad_norm": 328.32666015625, + "learning_rate": 9.690896267932106e-06, + "loss": 16.1199, + "step": 100630 + }, + { + "epoch": 0.20329916732992076, + "grad_norm": 273.16168212890625, + "learning_rate": 9.690775427424406e-06, + "loss": 36.8842, + "step": 100640 + }, + { + "epoch": 0.20331936796260458, + "grad_norm": 854.030517578125, + "learning_rate": 9.690654564054433e-06, + "loss": 30.4505, + "step": 100650 + }, + { + "epoch": 0.2033395685952884, + "grad_norm": 509.2734069824219, + "learning_rate": 9.69053367782278e-06, + "loss": 32.0603, + "step": 100660 + }, + { + "epoch": 0.20335976922797222, + "grad_norm": 215.01947021484375, + "learning_rate": 9.690412768730036e-06, + "loss": 11.925, + "step": 100670 + }, + { + "epoch": 0.20337996986065604, + "grad_norm": 149.21881103515625, + "learning_rate": 9.690291836776786e-06, + "loss": 15.3797, + "step": 100680 + }, + { + "epoch": 0.20340017049333986, + "grad_norm": 383.1336669921875, + "learning_rate": 9.690170881963624e-06, + "loss": 37.7917, + "step": 100690 + }, + { + "epoch": 0.20342037112602368, + "grad_norm": 0.5593068599700928, + "learning_rate": 9.690049904291139e-06, + "loss": 22.0224, + "step": 100700 + }, + { + "epoch": 0.20344057175870747, + "grad_norm": 202.04061889648438, + "learning_rate": 9.689928903759918e-06, + "loss": 11.615, + "step": 100710 + }, + { + "epoch": 0.2034607723913913, + "grad_norm": 342.6385498046875, + "learning_rate": 9.689807880370554e-06, + "loss": 19.7127, + "step": 100720 + }, + { + "epoch": 0.2034809730240751, + "grad_norm": 284.9398193359375, + "learning_rate": 9.689686834123633e-06, + "loss": 19.4428, + "step": 100730 + }, + { + "epoch": 0.20350117365675893, + "grad_norm": 267.9001159667969, + "learning_rate": 9.689565765019748e-06, + "loss": 21.0667, + "step": 100740 + }, + { + "epoch": 0.20352137428944275, + "grad_norm": 150.65304565429688, + "learning_rate": 9.68944467305949e-06, + "loss": 10.7642, + "step": 100750 + }, + { + "epoch": 0.20354157492212657, + "grad_norm": 760.1095581054688, + "learning_rate": 9.689323558243446e-06, + "loss": 25.7059, + "step": 100760 + }, + { + "epoch": 0.20356177555481036, + "grad_norm": 219.99559020996094, + "learning_rate": 9.689202420572207e-06, + "loss": 25.8721, + "step": 100770 + }, + { + "epoch": 0.20358197618749418, + "grad_norm": 428.0672912597656, + "learning_rate": 9.689081260046365e-06, + "loss": 16.5707, + "step": 100780 + }, + { + "epoch": 0.203602176820178, + "grad_norm": 288.5916442871094, + "learning_rate": 9.68896007666651e-06, + "loss": 9.0285, + "step": 100790 + }, + { + "epoch": 0.20362237745286182, + "grad_norm": 189.73159790039062, + "learning_rate": 9.68883887043323e-06, + "loss": 31.2634, + "step": 100800 + }, + { + "epoch": 0.20364257808554564, + "grad_norm": 354.1461181640625, + "learning_rate": 9.688717641347121e-06, + "loss": 30.3951, + "step": 100810 + }, + { + "epoch": 0.20366277871822946, + "grad_norm": 472.4429626464844, + "learning_rate": 9.688596389408769e-06, + "loss": 23.1525, + "step": 100820 + }, + { + "epoch": 0.20368297935091326, + "grad_norm": 579.1621704101562, + "learning_rate": 9.688475114618768e-06, + "loss": 11.8348, + "step": 100830 + }, + { + "epoch": 0.20370317998359708, + "grad_norm": 408.7290954589844, + "learning_rate": 9.688353816977708e-06, + "loss": 26.5684, + "step": 100840 + }, + { + "epoch": 0.2037233806162809, + "grad_norm": 438.94097900390625, + "learning_rate": 9.688232496486179e-06, + "loss": 21.479, + "step": 100850 + }, + { + "epoch": 0.20374358124896472, + "grad_norm": 315.06683349609375, + "learning_rate": 9.688111153144775e-06, + "loss": 18.5876, + "step": 100860 + }, + { + "epoch": 0.20376378188164854, + "grad_norm": 368.37176513671875, + "learning_rate": 9.687989786954084e-06, + "loss": 25.1675, + "step": 100870 + }, + { + "epoch": 0.20378398251433236, + "grad_norm": 408.96441650390625, + "learning_rate": 9.687868397914701e-06, + "loss": 28.6644, + "step": 100880 + }, + { + "epoch": 0.20380418314701618, + "grad_norm": 83.42842864990234, + "learning_rate": 9.687746986027215e-06, + "loss": 15.1172, + "step": 100890 + }, + { + "epoch": 0.20382438377969997, + "grad_norm": 778.487548828125, + "learning_rate": 9.687625551292219e-06, + "loss": 25.876, + "step": 100900 + }, + { + "epoch": 0.2038445844123838, + "grad_norm": 555.4335327148438, + "learning_rate": 9.687504093710304e-06, + "loss": 23.7801, + "step": 100910 + }, + { + "epoch": 0.2038647850450676, + "grad_norm": 340.75274658203125, + "learning_rate": 9.687382613282063e-06, + "loss": 18.6781, + "step": 100920 + }, + { + "epoch": 0.20388498567775143, + "grad_norm": 315.389404296875, + "learning_rate": 9.687261110008088e-06, + "loss": 15.0097, + "step": 100930 + }, + { + "epoch": 0.20390518631043525, + "grad_norm": 344.5794982910156, + "learning_rate": 9.687139583888971e-06, + "loss": 41.7478, + "step": 100940 + }, + { + "epoch": 0.20392538694311907, + "grad_norm": 367.5317687988281, + "learning_rate": 9.687018034925304e-06, + "loss": 36.2405, + "step": 100950 + }, + { + "epoch": 0.20394558757580286, + "grad_norm": 930.07080078125, + "learning_rate": 9.686896463117679e-06, + "loss": 24.3968, + "step": 100960 + }, + { + "epoch": 0.20396578820848668, + "grad_norm": 676.9625854492188, + "learning_rate": 9.68677486846669e-06, + "loss": 34.5976, + "step": 100970 + }, + { + "epoch": 0.2039859888411705, + "grad_norm": 263.13287353515625, + "learning_rate": 9.686653250972928e-06, + "loss": 13.8814, + "step": 100980 + }, + { + "epoch": 0.20400618947385432, + "grad_norm": 263.9481201171875, + "learning_rate": 9.686531610636986e-06, + "loss": 14.7206, + "step": 100990 + }, + { + "epoch": 0.20402639010653814, + "grad_norm": 411.3809509277344, + "learning_rate": 9.68640994745946e-06, + "loss": 24.1162, + "step": 101000 + }, + { + "epoch": 0.20404659073922196, + "grad_norm": 208.5432891845703, + "learning_rate": 9.686288261440937e-06, + "loss": 20.4968, + "step": 101010 + }, + { + "epoch": 0.20406679137190578, + "grad_norm": 348.7425231933594, + "learning_rate": 9.686166552582015e-06, + "loss": 18.1232, + "step": 101020 + }, + { + "epoch": 0.20408699200458957, + "grad_norm": 688.8912353515625, + "learning_rate": 9.686044820883284e-06, + "loss": 17.6353, + "step": 101030 + }, + { + "epoch": 0.2041071926372734, + "grad_norm": 166.44342041015625, + "learning_rate": 9.68592306634534e-06, + "loss": 10.5638, + "step": 101040 + }, + { + "epoch": 0.2041273932699572, + "grad_norm": 343.3615417480469, + "learning_rate": 9.685801288968777e-06, + "loss": 31.3551, + "step": 101050 + }, + { + "epoch": 0.20414759390264103, + "grad_norm": 363.5527038574219, + "learning_rate": 9.685679488754184e-06, + "loss": 16.4418, + "step": 101060 + }, + { + "epoch": 0.20416779453532485, + "grad_norm": 626.4178466796875, + "learning_rate": 9.685557665702158e-06, + "loss": 25.6591, + "step": 101070 + }, + { + "epoch": 0.20418799516800867, + "grad_norm": 433.0784912109375, + "learning_rate": 9.685435819813294e-06, + "loss": 25.233, + "step": 101080 + }, + { + "epoch": 0.20420819580069247, + "grad_norm": 107.19353485107422, + "learning_rate": 9.685313951088184e-06, + "loss": 15.3305, + "step": 101090 + }, + { + "epoch": 0.20422839643337629, + "grad_norm": 315.6756286621094, + "learning_rate": 9.68519205952742e-06, + "loss": 37.633, + "step": 101100 + }, + { + "epoch": 0.2042485970660601, + "grad_norm": 483.7098083496094, + "learning_rate": 9.6850701451316e-06, + "loss": 26.4755, + "step": 101110 + }, + { + "epoch": 0.20426879769874393, + "grad_norm": 349.6911926269531, + "learning_rate": 9.684948207901315e-06, + "loss": 20.3785, + "step": 101120 + }, + { + "epoch": 0.20428899833142775, + "grad_norm": 40.62013244628906, + "learning_rate": 9.684826247837162e-06, + "loss": 16.121, + "step": 101130 + }, + { + "epoch": 0.20430919896411157, + "grad_norm": 1106.7093505859375, + "learning_rate": 9.684704264939734e-06, + "loss": 29.3836, + "step": 101140 + }, + { + "epoch": 0.20432939959679536, + "grad_norm": 663.3167114257812, + "learning_rate": 9.684582259209625e-06, + "loss": 14.8486, + "step": 101150 + }, + { + "epoch": 0.20434960022947918, + "grad_norm": 268.1558532714844, + "learning_rate": 9.68446023064743e-06, + "loss": 16.3439, + "step": 101160 + }, + { + "epoch": 0.204369800862163, + "grad_norm": 866.4666137695312, + "learning_rate": 9.684338179253744e-06, + "loss": 16.8831, + "step": 101170 + }, + { + "epoch": 0.20439000149484682, + "grad_norm": 120.70707702636719, + "learning_rate": 9.684216105029163e-06, + "loss": 21.0449, + "step": 101180 + }, + { + "epoch": 0.20441020212753064, + "grad_norm": 683.2108764648438, + "learning_rate": 9.684094007974278e-06, + "loss": 18.4854, + "step": 101190 + }, + { + "epoch": 0.20443040276021446, + "grad_norm": 653.0943603515625, + "learning_rate": 9.68397188808969e-06, + "loss": 26.5146, + "step": 101200 + }, + { + "epoch": 0.20445060339289828, + "grad_norm": 258.76812744140625, + "learning_rate": 9.683849745375991e-06, + "loss": 28.7401, + "step": 101210 + }, + { + "epoch": 0.20447080402558207, + "grad_norm": 331.1708984375, + "learning_rate": 9.683727579833776e-06, + "loss": 23.6947, + "step": 101220 + }, + { + "epoch": 0.2044910046582659, + "grad_norm": 329.5214538574219, + "learning_rate": 9.68360539146364e-06, + "loss": 22.2485, + "step": 101230 + }, + { + "epoch": 0.2045112052909497, + "grad_norm": 253.5193328857422, + "learning_rate": 9.683483180266179e-06, + "loss": 27.205, + "step": 101240 + }, + { + "epoch": 0.20453140592363353, + "grad_norm": 1097.482177734375, + "learning_rate": 9.683360946241988e-06, + "loss": 31.314, + "step": 101250 + }, + { + "epoch": 0.20455160655631735, + "grad_norm": 208.3839111328125, + "learning_rate": 9.683238689391667e-06, + "loss": 17.2174, + "step": 101260 + }, + { + "epoch": 0.20457180718900117, + "grad_norm": 184.15036010742188, + "learning_rate": 9.683116409715807e-06, + "loss": 35.4615, + "step": 101270 + }, + { + "epoch": 0.20459200782168496, + "grad_norm": 486.5890197753906, + "learning_rate": 9.682994107215005e-06, + "loss": 22.9579, + "step": 101280 + }, + { + "epoch": 0.20461220845436878, + "grad_norm": 431.9108581542969, + "learning_rate": 9.682871781889858e-06, + "loss": 21.2137, + "step": 101290 + }, + { + "epoch": 0.2046324090870526, + "grad_norm": 182.5166015625, + "learning_rate": 9.682749433740963e-06, + "loss": 37.8259, + "step": 101300 + }, + { + "epoch": 0.20465260971973642, + "grad_norm": 438.4036865234375, + "learning_rate": 9.682627062768914e-06, + "loss": 17.6295, + "step": 101310 + }, + { + "epoch": 0.20467281035242024, + "grad_norm": 309.7554931640625, + "learning_rate": 9.682504668974308e-06, + "loss": 16.9148, + "step": 101320 + }, + { + "epoch": 0.20469301098510406, + "grad_norm": 393.6847229003906, + "learning_rate": 9.682382252357745e-06, + "loss": 18.2264, + "step": 101330 + }, + { + "epoch": 0.20471321161778788, + "grad_norm": 294.20904541015625, + "learning_rate": 9.682259812919817e-06, + "loss": 25.2018, + "step": 101340 + }, + { + "epoch": 0.20473341225047167, + "grad_norm": 300.732421875, + "learning_rate": 9.682137350661123e-06, + "loss": 17.3774, + "step": 101350 + }, + { + "epoch": 0.2047536128831555, + "grad_norm": 423.7529602050781, + "learning_rate": 9.682014865582259e-06, + "loss": 17.0439, + "step": 101360 + }, + { + "epoch": 0.20477381351583931, + "grad_norm": 279.9488220214844, + "learning_rate": 9.681892357683822e-06, + "loss": 30.9309, + "step": 101370 + }, + { + "epoch": 0.20479401414852313, + "grad_norm": 1027.737060546875, + "learning_rate": 9.68176982696641e-06, + "loss": 30.3881, + "step": 101380 + }, + { + "epoch": 0.20481421478120695, + "grad_norm": 660.5496215820312, + "learning_rate": 9.681647273430618e-06, + "loss": 41.3444, + "step": 101390 + }, + { + "epoch": 0.20483441541389077, + "grad_norm": 818.9313354492188, + "learning_rate": 9.681524697077047e-06, + "loss": 19.0959, + "step": 101400 + }, + { + "epoch": 0.20485461604657457, + "grad_norm": 647.1447143554688, + "learning_rate": 9.681402097906293e-06, + "loss": 19.1615, + "step": 101410 + }, + { + "epoch": 0.2048748166792584, + "grad_norm": 243.00503540039062, + "learning_rate": 9.681279475918952e-06, + "loss": 24.1584, + "step": 101420 + }, + { + "epoch": 0.2048950173119422, + "grad_norm": 49.23984146118164, + "learning_rate": 9.681156831115622e-06, + "loss": 29.7425, + "step": 101430 + }, + { + "epoch": 0.20491521794462603, + "grad_norm": 252.69985961914062, + "learning_rate": 9.681034163496902e-06, + "loss": 26.8188, + "step": 101440 + }, + { + "epoch": 0.20493541857730985, + "grad_norm": 206.2428741455078, + "learning_rate": 9.68091147306339e-06, + "loss": 29.8481, + "step": 101450 + }, + { + "epoch": 0.20495561920999367, + "grad_norm": 156.89279174804688, + "learning_rate": 9.680788759815682e-06, + "loss": 8.749, + "step": 101460 + }, + { + "epoch": 0.20497581984267746, + "grad_norm": 670.9077758789062, + "learning_rate": 9.680666023754377e-06, + "loss": 21.0357, + "step": 101470 + }, + { + "epoch": 0.20499602047536128, + "grad_norm": 494.2296447753906, + "learning_rate": 9.680543264880075e-06, + "loss": 14.5555, + "step": 101480 + }, + { + "epoch": 0.2050162211080451, + "grad_norm": 604.6879272460938, + "learning_rate": 9.680420483193371e-06, + "loss": 25.0197, + "step": 101490 + }, + { + "epoch": 0.20503642174072892, + "grad_norm": 15.526825904846191, + "learning_rate": 9.680297678694867e-06, + "loss": 21.2223, + "step": 101500 + }, + { + "epoch": 0.20505662237341274, + "grad_norm": 267.8724060058594, + "learning_rate": 9.680174851385158e-06, + "loss": 16.4051, + "step": 101510 + }, + { + "epoch": 0.20507682300609656, + "grad_norm": 316.9556579589844, + "learning_rate": 9.680052001264847e-06, + "loss": 24.9228, + "step": 101520 + }, + { + "epoch": 0.20509702363878038, + "grad_norm": 198.80479431152344, + "learning_rate": 9.679929128334529e-06, + "loss": 41.2172, + "step": 101530 + }, + { + "epoch": 0.20511722427146417, + "grad_norm": 244.0272674560547, + "learning_rate": 9.679806232594803e-06, + "loss": 10.5038, + "step": 101540 + }, + { + "epoch": 0.205137424904148, + "grad_norm": 1116.754150390625, + "learning_rate": 9.67968331404627e-06, + "loss": 19.753, + "step": 101550 + }, + { + "epoch": 0.2051576255368318, + "grad_norm": 508.11761474609375, + "learning_rate": 9.679560372689527e-06, + "loss": 40.9566, + "step": 101560 + }, + { + "epoch": 0.20517782616951563, + "grad_norm": 520.5548095703125, + "learning_rate": 9.679437408525175e-06, + "loss": 21.2994, + "step": 101570 + }, + { + "epoch": 0.20519802680219945, + "grad_norm": 441.4461975097656, + "learning_rate": 9.679314421553814e-06, + "loss": 22.9699, + "step": 101580 + }, + { + "epoch": 0.20521822743488327, + "grad_norm": 219.525146484375, + "learning_rate": 9.67919141177604e-06, + "loss": 22.78, + "step": 101590 + }, + { + "epoch": 0.20523842806756706, + "grad_norm": 224.53749084472656, + "learning_rate": 9.679068379192455e-06, + "loss": 20.2164, + "step": 101600 + }, + { + "epoch": 0.20525862870025088, + "grad_norm": 228.8566131591797, + "learning_rate": 9.67894532380366e-06, + "loss": 32.5503, + "step": 101610 + }, + { + "epoch": 0.2052788293329347, + "grad_norm": 283.5226745605469, + "learning_rate": 9.67882224561025e-06, + "loss": 22.461, + "step": 101620 + }, + { + "epoch": 0.20529902996561852, + "grad_norm": 385.9081115722656, + "learning_rate": 9.678699144612829e-06, + "loss": 20.0961, + "step": 101630 + }, + { + "epoch": 0.20531923059830234, + "grad_norm": 375.8223876953125, + "learning_rate": 9.678576020811996e-06, + "loss": 13.825, + "step": 101640 + }, + { + "epoch": 0.20533943123098616, + "grad_norm": 580.4359130859375, + "learning_rate": 9.678452874208352e-06, + "loss": 17.9582, + "step": 101650 + }, + { + "epoch": 0.20535963186366998, + "grad_norm": 652.5196533203125, + "learning_rate": 9.678329704802495e-06, + "loss": 18.9625, + "step": 101660 + }, + { + "epoch": 0.20537983249635378, + "grad_norm": 501.9729309082031, + "learning_rate": 9.678206512595027e-06, + "loss": 32.7783, + "step": 101670 + }, + { + "epoch": 0.2054000331290376, + "grad_norm": 892.1295776367188, + "learning_rate": 9.678083297586547e-06, + "loss": 28.2075, + "step": 101680 + }, + { + "epoch": 0.20542023376172142, + "grad_norm": 307.83026123046875, + "learning_rate": 9.677960059777656e-06, + "loss": 28.0779, + "step": 101690 + }, + { + "epoch": 0.20544043439440524, + "grad_norm": 390.3979187011719, + "learning_rate": 9.677836799168958e-06, + "loss": 36.6936, + "step": 101700 + }, + { + "epoch": 0.20546063502708906, + "grad_norm": 764.1943969726562, + "learning_rate": 9.677713515761046e-06, + "loss": 36.1618, + "step": 101710 + }, + { + "epoch": 0.20548083565977288, + "grad_norm": 767.7238159179688, + "learning_rate": 9.677590209554531e-06, + "loss": 29.0246, + "step": 101720 + }, + { + "epoch": 0.20550103629245667, + "grad_norm": 894.7352905273438, + "learning_rate": 9.677466880550004e-06, + "loss": 29.4374, + "step": 101730 + }, + { + "epoch": 0.2055212369251405, + "grad_norm": 228.73587036132812, + "learning_rate": 9.677343528748073e-06, + "loss": 23.9688, + "step": 101740 + }, + { + "epoch": 0.2055414375578243, + "grad_norm": 550.2158203125, + "learning_rate": 9.677220154149338e-06, + "loss": 35.1127, + "step": 101750 + }, + { + "epoch": 0.20556163819050813, + "grad_norm": 912.5822143554688, + "learning_rate": 9.677096756754397e-06, + "loss": 26.836, + "step": 101760 + }, + { + "epoch": 0.20558183882319195, + "grad_norm": 1875.5404052734375, + "learning_rate": 9.676973336563856e-06, + "loss": 32.0896, + "step": 101770 + }, + { + "epoch": 0.20560203945587577, + "grad_norm": 644.2416381835938, + "learning_rate": 9.676849893578312e-06, + "loss": 23.7709, + "step": 101780 + }, + { + "epoch": 0.20562224008855956, + "grad_norm": 529.8448486328125, + "learning_rate": 9.67672642779837e-06, + "loss": 20.3778, + "step": 101790 + }, + { + "epoch": 0.20564244072124338, + "grad_norm": 184.81747436523438, + "learning_rate": 9.67660293922463e-06, + "loss": 19.5761, + "step": 101800 + }, + { + "epoch": 0.2056626413539272, + "grad_norm": 372.7962341308594, + "learning_rate": 9.676479427857694e-06, + "loss": 34.0516, + "step": 101810 + }, + { + "epoch": 0.20568284198661102, + "grad_norm": 780.0181274414062, + "learning_rate": 9.676355893698165e-06, + "loss": 27.3943, + "step": 101820 + }, + { + "epoch": 0.20570304261929484, + "grad_norm": 439.5355224609375, + "learning_rate": 9.676232336746645e-06, + "loss": 23.2605, + "step": 101830 + }, + { + "epoch": 0.20572324325197866, + "grad_norm": 213.00759887695312, + "learning_rate": 9.676108757003735e-06, + "loss": 20.3967, + "step": 101840 + }, + { + "epoch": 0.20574344388466248, + "grad_norm": 1251.4813232421875, + "learning_rate": 9.67598515447004e-06, + "loss": 47.155, + "step": 101850 + }, + { + "epoch": 0.20576364451734627, + "grad_norm": 398.27423095703125, + "learning_rate": 9.67586152914616e-06, + "loss": 36.8861, + "step": 101860 + }, + { + "epoch": 0.2057838451500301, + "grad_norm": 123.7984848022461, + "learning_rate": 9.675737881032696e-06, + "loss": 14.2424, + "step": 101870 + }, + { + "epoch": 0.2058040457827139, + "grad_norm": 408.1427001953125, + "learning_rate": 9.675614210130252e-06, + "loss": 46.8987, + "step": 101880 + }, + { + "epoch": 0.20582424641539773, + "grad_norm": 766.0629272460938, + "learning_rate": 9.675490516439434e-06, + "loss": 31.8348, + "step": 101890 + }, + { + "epoch": 0.20584444704808155, + "grad_norm": 284.36663818359375, + "learning_rate": 9.675366799960842e-06, + "loss": 23.4975, + "step": 101900 + }, + { + "epoch": 0.20586464768076537, + "grad_norm": 284.55548095703125, + "learning_rate": 9.675243060695079e-06, + "loss": 24.3463, + "step": 101910 + }, + { + "epoch": 0.20588484831344916, + "grad_norm": 422.52703857421875, + "learning_rate": 9.675119298642748e-06, + "loss": 38.6667, + "step": 101920 + }, + { + "epoch": 0.20590504894613298, + "grad_norm": 430.506103515625, + "learning_rate": 9.674995513804452e-06, + "loss": 23.379, + "step": 101930 + }, + { + "epoch": 0.2059252495788168, + "grad_norm": 910.5701904296875, + "learning_rate": 9.674871706180796e-06, + "loss": 22.0203, + "step": 101940 + }, + { + "epoch": 0.20594545021150062, + "grad_norm": 1329.03125, + "learning_rate": 9.674747875772381e-06, + "loss": 19.5187, + "step": 101950 + }, + { + "epoch": 0.20596565084418444, + "grad_norm": 2294.890625, + "learning_rate": 9.674624022579814e-06, + "loss": 26.2692, + "step": 101960 + }, + { + "epoch": 0.20598585147686826, + "grad_norm": 377.37701416015625, + "learning_rate": 9.674500146603695e-06, + "loss": 17.2762, + "step": 101970 + }, + { + "epoch": 0.20600605210955208, + "grad_norm": 270.7948303222656, + "learning_rate": 9.674376247844628e-06, + "loss": 16.6206, + "step": 101980 + }, + { + "epoch": 0.20602625274223588, + "grad_norm": 708.5576782226562, + "learning_rate": 9.67425232630322e-06, + "loss": 27.1679, + "step": 101990 + }, + { + "epoch": 0.2060464533749197, + "grad_norm": 372.044677734375, + "learning_rate": 9.674128381980073e-06, + "loss": 29.2218, + "step": 102000 + }, + { + "epoch": 0.20606665400760352, + "grad_norm": 515.59228515625, + "learning_rate": 9.67400441487579e-06, + "loss": 18.0636, + "step": 102010 + }, + { + "epoch": 0.20608685464028734, + "grad_norm": 312.6241760253906, + "learning_rate": 9.673880424990978e-06, + "loss": 33.0065, + "step": 102020 + }, + { + "epoch": 0.20610705527297116, + "grad_norm": 420.17803955078125, + "learning_rate": 9.673756412326238e-06, + "loss": 17.6985, + "step": 102030 + }, + { + "epoch": 0.20612725590565498, + "grad_norm": 106.03390502929688, + "learning_rate": 9.673632376882178e-06, + "loss": 20.5482, + "step": 102040 + }, + { + "epoch": 0.20614745653833877, + "grad_norm": 212.96438598632812, + "learning_rate": 9.673508318659399e-06, + "loss": 15.4148, + "step": 102050 + }, + { + "epoch": 0.2061676571710226, + "grad_norm": 926.5879516601562, + "learning_rate": 9.673384237658508e-06, + "loss": 27.9721, + "step": 102060 + }, + { + "epoch": 0.2061878578037064, + "grad_norm": 431.341064453125, + "learning_rate": 9.67326013388011e-06, + "loss": 13.3387, + "step": 102070 + }, + { + "epoch": 0.20620805843639023, + "grad_norm": 318.4190979003906, + "learning_rate": 9.673136007324806e-06, + "loss": 25.5216, + "step": 102080 + }, + { + "epoch": 0.20622825906907405, + "grad_norm": 886.5811157226562, + "learning_rate": 9.673011857993207e-06, + "loss": 25.061, + "step": 102090 + }, + { + "epoch": 0.20624845970175787, + "grad_norm": 642.0043334960938, + "learning_rate": 9.672887685885913e-06, + "loss": 24.7124, + "step": 102100 + }, + { + "epoch": 0.20626866033444166, + "grad_norm": 0.0, + "learning_rate": 9.672763491003531e-06, + "loss": 26.6625, + "step": 102110 + }, + { + "epoch": 0.20628886096712548, + "grad_norm": 323.4170227050781, + "learning_rate": 9.672639273346668e-06, + "loss": 19.3366, + "step": 102120 + }, + { + "epoch": 0.2063090615998093, + "grad_norm": 511.27362060546875, + "learning_rate": 9.672515032915926e-06, + "loss": 29.8732, + "step": 102130 + }, + { + "epoch": 0.20632926223249312, + "grad_norm": 131.7635498046875, + "learning_rate": 9.672390769711914e-06, + "loss": 16.1435, + "step": 102140 + }, + { + "epoch": 0.20634946286517694, + "grad_norm": 455.59521484375, + "learning_rate": 9.672266483735235e-06, + "loss": 26.799, + "step": 102150 + }, + { + "epoch": 0.20636966349786076, + "grad_norm": 781.8112182617188, + "learning_rate": 9.672142174986497e-06, + "loss": 32.1946, + "step": 102160 + }, + { + "epoch": 0.20638986413054458, + "grad_norm": 287.041748046875, + "learning_rate": 9.672017843466305e-06, + "loss": 49.6158, + "step": 102170 + }, + { + "epoch": 0.20641006476322837, + "grad_norm": 883.7597045898438, + "learning_rate": 9.671893489175263e-06, + "loss": 38.0351, + "step": 102180 + }, + { + "epoch": 0.2064302653959122, + "grad_norm": 295.4107666015625, + "learning_rate": 9.67176911211398e-06, + "loss": 42.8857, + "step": 102190 + }, + { + "epoch": 0.206450466028596, + "grad_norm": 913.2815551757812, + "learning_rate": 9.671644712283061e-06, + "loss": 26.6924, + "step": 102200 + }, + { + "epoch": 0.20647066666127983, + "grad_norm": 803.0249633789062, + "learning_rate": 9.671520289683112e-06, + "loss": 13.8448, + "step": 102210 + }, + { + "epoch": 0.20649086729396365, + "grad_norm": 93.17762756347656, + "learning_rate": 9.671395844314739e-06, + "loss": 18.475, + "step": 102220 + }, + { + "epoch": 0.20651106792664747, + "grad_norm": 508.1399230957031, + "learning_rate": 9.67127137617855e-06, + "loss": 26.1959, + "step": 102230 + }, + { + "epoch": 0.20653126855933127, + "grad_norm": 118.68540954589844, + "learning_rate": 9.67114688527515e-06, + "loss": 18.8665, + "step": 102240 + }, + { + "epoch": 0.20655146919201509, + "grad_norm": 76.24480438232422, + "learning_rate": 9.671022371605148e-06, + "loss": 26.302, + "step": 102250 + }, + { + "epoch": 0.2065716698246989, + "grad_norm": 771.3875122070312, + "learning_rate": 9.670897835169149e-06, + "loss": 26.3433, + "step": 102260 + }, + { + "epoch": 0.20659187045738273, + "grad_norm": 698.0466918945312, + "learning_rate": 9.67077327596776e-06, + "loss": 21.2148, + "step": 102270 + }, + { + "epoch": 0.20661207109006655, + "grad_norm": 216.55682373046875, + "learning_rate": 9.67064869400159e-06, + "loss": 30.4819, + "step": 102280 + }, + { + "epoch": 0.20663227172275037, + "grad_norm": 1432.4974365234375, + "learning_rate": 9.670524089271242e-06, + "loss": 48.6451, + "step": 102290 + }, + { + "epoch": 0.20665247235543419, + "grad_norm": 580.0415649414062, + "learning_rate": 9.670399461777328e-06, + "loss": 17.0973, + "step": 102300 + }, + { + "epoch": 0.20667267298811798, + "grad_norm": 731.4429321289062, + "learning_rate": 9.670274811520454e-06, + "loss": 18.9645, + "step": 102310 + }, + { + "epoch": 0.2066928736208018, + "grad_norm": 345.7098388671875, + "learning_rate": 9.670150138501226e-06, + "loss": 23.997, + "step": 102320 + }, + { + "epoch": 0.20671307425348562, + "grad_norm": 614.6451416015625, + "learning_rate": 9.670025442720253e-06, + "loss": 22.9857, + "step": 102330 + }, + { + "epoch": 0.20673327488616944, + "grad_norm": 97.4102783203125, + "learning_rate": 9.669900724178142e-06, + "loss": 16.508, + "step": 102340 + }, + { + "epoch": 0.20675347551885326, + "grad_norm": 477.67340087890625, + "learning_rate": 9.6697759828755e-06, + "loss": 39.7115, + "step": 102350 + }, + { + "epoch": 0.20677367615153708, + "grad_norm": 302.7767639160156, + "learning_rate": 9.669651218812938e-06, + "loss": 29.2609, + "step": 102360 + }, + { + "epoch": 0.20679387678422087, + "grad_norm": 336.0804138183594, + "learning_rate": 9.669526431991063e-06, + "loss": 19.4055, + "step": 102370 + }, + { + "epoch": 0.2068140774169047, + "grad_norm": 560.2570190429688, + "learning_rate": 9.669401622410481e-06, + "loss": 23.5082, + "step": 102380 + }, + { + "epoch": 0.2068342780495885, + "grad_norm": 868.6182861328125, + "learning_rate": 9.669276790071803e-06, + "loss": 40.4677, + "step": 102390 + }, + { + "epoch": 0.20685447868227233, + "grad_norm": 604.2676391601562, + "learning_rate": 9.669151934975635e-06, + "loss": 20.6695, + "step": 102400 + }, + { + "epoch": 0.20687467931495615, + "grad_norm": 791.1727905273438, + "learning_rate": 9.669027057122586e-06, + "loss": 29.302, + "step": 102410 + }, + { + "epoch": 0.20689487994763997, + "grad_norm": 75.31989288330078, + "learning_rate": 9.668902156513268e-06, + "loss": 12.0986, + "step": 102420 + }, + { + "epoch": 0.20691508058032376, + "grad_norm": 500.6473693847656, + "learning_rate": 9.668777233148285e-06, + "loss": 19.4095, + "step": 102430 + }, + { + "epoch": 0.20693528121300758, + "grad_norm": 233.5106201171875, + "learning_rate": 9.668652287028249e-06, + "loss": 31.9816, + "step": 102440 + }, + { + "epoch": 0.2069554818456914, + "grad_norm": 2282.65478515625, + "learning_rate": 9.668527318153769e-06, + "loss": 35.4047, + "step": 102450 + }, + { + "epoch": 0.20697568247837522, + "grad_norm": 301.068359375, + "learning_rate": 9.66840232652545e-06, + "loss": 37.4903, + "step": 102460 + }, + { + "epoch": 0.20699588311105904, + "grad_norm": 380.9482421875, + "learning_rate": 9.668277312143908e-06, + "loss": 18.6953, + "step": 102470 + }, + { + "epoch": 0.20701608374374286, + "grad_norm": 339.640380859375, + "learning_rate": 9.668152275009747e-06, + "loss": 26.4746, + "step": 102480 + }, + { + "epoch": 0.20703628437642668, + "grad_norm": 325.6827087402344, + "learning_rate": 9.668027215123577e-06, + "loss": 26.1166, + "step": 102490 + }, + { + "epoch": 0.20705648500911047, + "grad_norm": 452.2302551269531, + "learning_rate": 9.667902132486009e-06, + "loss": 27.4258, + "step": 102500 + }, + { + "epoch": 0.2070766856417943, + "grad_norm": 462.70391845703125, + "learning_rate": 9.667777027097654e-06, + "loss": 20.7429, + "step": 102510 + }, + { + "epoch": 0.20709688627447811, + "grad_norm": 277.04974365234375, + "learning_rate": 9.667651898959118e-06, + "loss": 22.5922, + "step": 102520 + }, + { + "epoch": 0.20711708690716193, + "grad_norm": 276.8566589355469, + "learning_rate": 9.667526748071013e-06, + "loss": 17.4676, + "step": 102530 + }, + { + "epoch": 0.20713728753984575, + "grad_norm": 312.2072448730469, + "learning_rate": 9.667401574433948e-06, + "loss": 24.4108, + "step": 102540 + }, + { + "epoch": 0.20715748817252957, + "grad_norm": 303.51275634765625, + "learning_rate": 9.667276378048535e-06, + "loss": 17.7054, + "step": 102550 + }, + { + "epoch": 0.20717768880521337, + "grad_norm": 516.8126220703125, + "learning_rate": 9.667151158915382e-06, + "loss": 21.9469, + "step": 102560 + }, + { + "epoch": 0.2071978894378972, + "grad_norm": 469.1775817871094, + "learning_rate": 9.667025917035102e-06, + "loss": 16.2955, + "step": 102570 + }, + { + "epoch": 0.207218090070581, + "grad_norm": 346.0306701660156, + "learning_rate": 9.666900652408302e-06, + "loss": 17.62, + "step": 102580 + }, + { + "epoch": 0.20723829070326483, + "grad_norm": 314.55615234375, + "learning_rate": 9.666775365035596e-06, + "loss": 25.255, + "step": 102590 + }, + { + "epoch": 0.20725849133594865, + "grad_norm": 311.3092956542969, + "learning_rate": 9.666650054917591e-06, + "loss": 27.1351, + "step": 102600 + }, + { + "epoch": 0.20727869196863247, + "grad_norm": 420.1705322265625, + "learning_rate": 9.666524722054902e-06, + "loss": 22.8531, + "step": 102610 + }, + { + "epoch": 0.2072988926013163, + "grad_norm": 86.36851501464844, + "learning_rate": 9.666399366448135e-06, + "loss": 12.1727, + "step": 102620 + }, + { + "epoch": 0.20731909323400008, + "grad_norm": 659.5982055664062, + "learning_rate": 9.666273988097904e-06, + "loss": 30.465, + "step": 102630 + }, + { + "epoch": 0.2073392938666839, + "grad_norm": 325.5073547363281, + "learning_rate": 9.66614858700482e-06, + "loss": 22.9632, + "step": 102640 + }, + { + "epoch": 0.20735949449936772, + "grad_norm": 755.0143432617188, + "learning_rate": 9.666023163169493e-06, + "loss": 30.1206, + "step": 102650 + }, + { + "epoch": 0.20737969513205154, + "grad_norm": 72.33990478515625, + "learning_rate": 9.665897716592537e-06, + "loss": 13.5545, + "step": 102660 + }, + { + "epoch": 0.20739989576473536, + "grad_norm": 255.3713836669922, + "learning_rate": 9.66577224727456e-06, + "loss": 35.5251, + "step": 102670 + }, + { + "epoch": 0.20742009639741918, + "grad_norm": 615.615478515625, + "learning_rate": 9.665646755216175e-06, + "loss": 23.9089, + "step": 102680 + }, + { + "epoch": 0.20744029703010297, + "grad_norm": 419.4078369140625, + "learning_rate": 9.665521240417993e-06, + "loss": 23.9347, + "step": 102690 + }, + { + "epoch": 0.2074604976627868, + "grad_norm": 367.4272766113281, + "learning_rate": 9.665395702880627e-06, + "loss": 24.4317, + "step": 102700 + }, + { + "epoch": 0.2074806982954706, + "grad_norm": 671.7747192382812, + "learning_rate": 9.665270142604688e-06, + "loss": 13.4263, + "step": 102710 + }, + { + "epoch": 0.20750089892815443, + "grad_norm": 589.3701171875, + "learning_rate": 9.665144559590789e-06, + "loss": 18.6563, + "step": 102720 + }, + { + "epoch": 0.20752109956083825, + "grad_norm": 583.2495727539062, + "learning_rate": 9.66501895383954e-06, + "loss": 22.839, + "step": 102730 + }, + { + "epoch": 0.20754130019352207, + "grad_norm": 1090.4007568359375, + "learning_rate": 9.664893325351556e-06, + "loss": 30.8803, + "step": 102740 + }, + { + "epoch": 0.20756150082620586, + "grad_norm": 237.1215057373047, + "learning_rate": 9.664767674127447e-06, + "loss": 31.7639, + "step": 102750 + }, + { + "epoch": 0.20758170145888968, + "grad_norm": 187.4674072265625, + "learning_rate": 9.664642000167825e-06, + "loss": 19.2407, + "step": 102760 + }, + { + "epoch": 0.2076019020915735, + "grad_norm": 650.6720581054688, + "learning_rate": 9.664516303473305e-06, + "loss": 27.2862, + "step": 102770 + }, + { + "epoch": 0.20762210272425732, + "grad_norm": 462.8194885253906, + "learning_rate": 9.664390584044497e-06, + "loss": 31.5226, + "step": 102780 + }, + { + "epoch": 0.20764230335694114, + "grad_norm": 520.2129516601562, + "learning_rate": 9.664264841882016e-06, + "loss": 20.4408, + "step": 102790 + }, + { + "epoch": 0.20766250398962496, + "grad_norm": 231.10311889648438, + "learning_rate": 9.664139076986473e-06, + "loss": 16.1323, + "step": 102800 + }, + { + "epoch": 0.20768270462230878, + "grad_norm": 754.71533203125, + "learning_rate": 9.664013289358483e-06, + "loss": 30.6725, + "step": 102810 + }, + { + "epoch": 0.20770290525499258, + "grad_norm": 328.50787353515625, + "learning_rate": 9.663887478998657e-06, + "loss": 23.7939, + "step": 102820 + }, + { + "epoch": 0.2077231058876764, + "grad_norm": 608.3097534179688, + "learning_rate": 9.66376164590761e-06, + "loss": 26.8249, + "step": 102830 + }, + { + "epoch": 0.20774330652036022, + "grad_norm": 566.4948120117188, + "learning_rate": 9.663635790085954e-06, + "loss": 34.519, + "step": 102840 + }, + { + "epoch": 0.20776350715304404, + "grad_norm": 39.48759841918945, + "learning_rate": 9.663509911534302e-06, + "loss": 21.2754, + "step": 102850 + }, + { + "epoch": 0.20778370778572786, + "grad_norm": 789.2013549804688, + "learning_rate": 9.663384010253269e-06, + "loss": 25.6627, + "step": 102860 + }, + { + "epoch": 0.20780390841841168, + "grad_norm": 213.62619018554688, + "learning_rate": 9.663258086243468e-06, + "loss": 14.5341, + "step": 102870 + }, + { + "epoch": 0.20782410905109547, + "grad_norm": 358.7830505371094, + "learning_rate": 9.663132139505513e-06, + "loss": 14.7057, + "step": 102880 + }, + { + "epoch": 0.2078443096837793, + "grad_norm": 541.4920043945312, + "learning_rate": 9.663006170040016e-06, + "loss": 17.3757, + "step": 102890 + }, + { + "epoch": 0.2078645103164631, + "grad_norm": 871.7129516601562, + "learning_rate": 9.662880177847595e-06, + "loss": 22.9835, + "step": 102900 + }, + { + "epoch": 0.20788471094914693, + "grad_norm": 420.2844543457031, + "learning_rate": 9.66275416292886e-06, + "loss": 17.0057, + "step": 102910 + }, + { + "epoch": 0.20790491158183075, + "grad_norm": 362.6865234375, + "learning_rate": 9.662628125284426e-06, + "loss": 27.016, + "step": 102920 + }, + { + "epoch": 0.20792511221451457, + "grad_norm": 25.914304733276367, + "learning_rate": 9.66250206491491e-06, + "loss": 29.3879, + "step": 102930 + }, + { + "epoch": 0.2079453128471984, + "grad_norm": 1035.3394775390625, + "learning_rate": 9.662375981820921e-06, + "loss": 46.7387, + "step": 102940 + }, + { + "epoch": 0.20796551347988218, + "grad_norm": 455.292236328125, + "learning_rate": 9.66224987600308e-06, + "loss": 14.5877, + "step": 102950 + }, + { + "epoch": 0.207985714112566, + "grad_norm": 517.5321655273438, + "learning_rate": 9.662123747461998e-06, + "loss": 24.5114, + "step": 102960 + }, + { + "epoch": 0.20800591474524982, + "grad_norm": 438.7643737792969, + "learning_rate": 9.66199759619829e-06, + "loss": 39.6918, + "step": 102970 + }, + { + "epoch": 0.20802611537793364, + "grad_norm": 243.06369018554688, + "learning_rate": 9.661871422212572e-06, + "loss": 22.8764, + "step": 102980 + }, + { + "epoch": 0.20804631601061746, + "grad_norm": 571.2363891601562, + "learning_rate": 9.661745225505457e-06, + "loss": 20.3634, + "step": 102990 + }, + { + "epoch": 0.20806651664330128, + "grad_norm": 16.093774795532227, + "learning_rate": 9.661619006077562e-06, + "loss": 13.9841, + "step": 103000 + }, + { + "epoch": 0.20808671727598507, + "grad_norm": 394.1217041015625, + "learning_rate": 9.661492763929502e-06, + "loss": 29.1695, + "step": 103010 + }, + { + "epoch": 0.2081069179086689, + "grad_norm": 463.3786926269531, + "learning_rate": 9.66136649906189e-06, + "loss": 17.631, + "step": 103020 + }, + { + "epoch": 0.2081271185413527, + "grad_norm": 218.5435333251953, + "learning_rate": 9.661240211475342e-06, + "loss": 24.5859, + "step": 103030 + }, + { + "epoch": 0.20814731917403653, + "grad_norm": 322.4661560058594, + "learning_rate": 9.661113901170477e-06, + "loss": 28.7746, + "step": 103040 + }, + { + "epoch": 0.20816751980672035, + "grad_norm": 739.1019287109375, + "learning_rate": 9.660987568147907e-06, + "loss": 32.986, + "step": 103050 + }, + { + "epoch": 0.20818772043940417, + "grad_norm": 206.95220947265625, + "learning_rate": 9.66086121240825e-06, + "loss": 36.8508, + "step": 103060 + }, + { + "epoch": 0.20820792107208796, + "grad_norm": 174.70111083984375, + "learning_rate": 9.66073483395212e-06, + "loss": 15.9502, + "step": 103070 + }, + { + "epoch": 0.20822812170477178, + "grad_norm": 437.3995666503906, + "learning_rate": 9.660608432780133e-06, + "loss": 12.2834, + "step": 103080 + }, + { + "epoch": 0.2082483223374556, + "grad_norm": 288.0544128417969, + "learning_rate": 9.660482008892907e-06, + "loss": 13.4444, + "step": 103090 + }, + { + "epoch": 0.20826852297013942, + "grad_norm": 543.4057006835938, + "learning_rate": 9.660355562291055e-06, + "loss": 32.2566, + "step": 103100 + }, + { + "epoch": 0.20828872360282324, + "grad_norm": 509.3530578613281, + "learning_rate": 9.660229092975197e-06, + "loss": 15.5038, + "step": 103110 + }, + { + "epoch": 0.20830892423550706, + "grad_norm": 367.4169921875, + "learning_rate": 9.660102600945947e-06, + "loss": 16.8844, + "step": 103120 + }, + { + "epoch": 0.20832912486819088, + "grad_norm": 319.7675476074219, + "learning_rate": 9.659976086203922e-06, + "loss": 14.6273, + "step": 103130 + }, + { + "epoch": 0.20834932550087468, + "grad_norm": 290.2574157714844, + "learning_rate": 9.65984954874974e-06, + "loss": 29.0676, + "step": 103140 + }, + { + "epoch": 0.2083695261335585, + "grad_norm": 267.9770812988281, + "learning_rate": 9.659722988584015e-06, + "loss": 34.6491, + "step": 103150 + }, + { + "epoch": 0.20838972676624232, + "grad_norm": 745.9423217773438, + "learning_rate": 9.659596405707366e-06, + "loss": 32.7889, + "step": 103160 + }, + { + "epoch": 0.20840992739892614, + "grad_norm": 438.6147766113281, + "learning_rate": 9.659469800120408e-06, + "loss": 24.7799, + "step": 103170 + }, + { + "epoch": 0.20843012803160996, + "grad_norm": 74.18256378173828, + "learning_rate": 9.65934317182376e-06, + "loss": 12.9358, + "step": 103180 + }, + { + "epoch": 0.20845032866429378, + "grad_norm": 146.2864227294922, + "learning_rate": 9.65921652081804e-06, + "loss": 19.3974, + "step": 103190 + }, + { + "epoch": 0.20847052929697757, + "grad_norm": 320.2649230957031, + "learning_rate": 9.659089847103863e-06, + "loss": 18.9058, + "step": 103200 + }, + { + "epoch": 0.2084907299296614, + "grad_norm": 916.2047119140625, + "learning_rate": 9.658963150681848e-06, + "loss": 35.5412, + "step": 103210 + }, + { + "epoch": 0.2085109305623452, + "grad_norm": 359.853515625, + "learning_rate": 9.658836431552609e-06, + "loss": 24.8434, + "step": 103220 + }, + { + "epoch": 0.20853113119502903, + "grad_norm": 867.36376953125, + "learning_rate": 9.658709689716768e-06, + "loss": 20.0789, + "step": 103230 + }, + { + "epoch": 0.20855133182771285, + "grad_norm": 513.3993530273438, + "learning_rate": 9.65858292517494e-06, + "loss": 16.9374, + "step": 103240 + }, + { + "epoch": 0.20857153246039667, + "grad_norm": 599.906982421875, + "learning_rate": 9.658456137927745e-06, + "loss": 31.0198, + "step": 103250 + }, + { + "epoch": 0.20859173309308046, + "grad_norm": 230.84130859375, + "learning_rate": 9.6583293279758e-06, + "loss": 18.9309, + "step": 103260 + }, + { + "epoch": 0.20861193372576428, + "grad_norm": 403.97515869140625, + "learning_rate": 9.658202495319721e-06, + "loss": 13.8265, + "step": 103270 + }, + { + "epoch": 0.2086321343584481, + "grad_norm": 620.8816528320312, + "learning_rate": 9.65807563996013e-06, + "loss": 13.429, + "step": 103280 + }, + { + "epoch": 0.20865233499113192, + "grad_norm": 287.4898986816406, + "learning_rate": 9.657948761897643e-06, + "loss": 22.3302, + "step": 103290 + }, + { + "epoch": 0.20867253562381574, + "grad_norm": 122.81134033203125, + "learning_rate": 9.65782186113288e-06, + "loss": 15.098, + "step": 103300 + }, + { + "epoch": 0.20869273625649956, + "grad_norm": 0.0, + "learning_rate": 9.657694937666454e-06, + "loss": 13.8586, + "step": 103310 + }, + { + "epoch": 0.20871293688918338, + "grad_norm": 328.1044616699219, + "learning_rate": 9.65756799149899e-06, + "loss": 25.5796, + "step": 103320 + }, + { + "epoch": 0.20873313752186717, + "grad_norm": 190.67489624023438, + "learning_rate": 9.657441022631105e-06, + "loss": 32.4643, + "step": 103330 + }, + { + "epoch": 0.208753338154551, + "grad_norm": 411.0813903808594, + "learning_rate": 9.657314031063419e-06, + "loss": 17.2426, + "step": 103340 + }, + { + "epoch": 0.2087735387872348, + "grad_norm": 391.1584777832031, + "learning_rate": 9.657187016796546e-06, + "loss": 12.9593, + "step": 103350 + }, + { + "epoch": 0.20879373941991863, + "grad_norm": 313.9464416503906, + "learning_rate": 9.657059979831109e-06, + "loss": 18.2481, + "step": 103360 + }, + { + "epoch": 0.20881394005260245, + "grad_norm": 771.1447143554688, + "learning_rate": 9.656932920167727e-06, + "loss": 34.8314, + "step": 103370 + }, + { + "epoch": 0.20883414068528627, + "grad_norm": 607.5734252929688, + "learning_rate": 9.65680583780702e-06, + "loss": 21.0373, + "step": 103380 + }, + { + "epoch": 0.20885434131797007, + "grad_norm": 123.56460571289062, + "learning_rate": 9.656678732749605e-06, + "loss": 12.7128, + "step": 103390 + }, + { + "epoch": 0.20887454195065389, + "grad_norm": 103.83605194091797, + "learning_rate": 9.656551604996102e-06, + "loss": 29.9846, + "step": 103400 + }, + { + "epoch": 0.2088947425833377, + "grad_norm": 339.4877014160156, + "learning_rate": 9.656424454547131e-06, + "loss": 21.3867, + "step": 103410 + }, + { + "epoch": 0.20891494321602153, + "grad_norm": 695.9169921875, + "learning_rate": 9.656297281403315e-06, + "loss": 16.8246, + "step": 103420 + }, + { + "epoch": 0.20893514384870535, + "grad_norm": 800.305419921875, + "learning_rate": 9.656170085565268e-06, + "loss": 22.556, + "step": 103430 + }, + { + "epoch": 0.20895534448138917, + "grad_norm": 80.22218322753906, + "learning_rate": 9.656042867033613e-06, + "loss": 16.0496, + "step": 103440 + }, + { + "epoch": 0.20897554511407299, + "grad_norm": 492.98284912109375, + "learning_rate": 9.655915625808971e-06, + "loss": 26.8057, + "step": 103450 + }, + { + "epoch": 0.20899574574675678, + "grad_norm": 252.5966796875, + "learning_rate": 9.65578836189196e-06, + "loss": 26.845, + "step": 103460 + }, + { + "epoch": 0.2090159463794406, + "grad_norm": 609.3297729492188, + "learning_rate": 9.6556610752832e-06, + "loss": 25.3696, + "step": 103470 + }, + { + "epoch": 0.20903614701212442, + "grad_norm": 268.52362060546875, + "learning_rate": 9.655533765983315e-06, + "loss": 18.3948, + "step": 103480 + }, + { + "epoch": 0.20905634764480824, + "grad_norm": 547.3050537109375, + "learning_rate": 9.655406433992922e-06, + "loss": 40.6968, + "step": 103490 + }, + { + "epoch": 0.20907654827749206, + "grad_norm": 429.0570983886719, + "learning_rate": 9.655279079312643e-06, + "loss": 25.0425, + "step": 103500 + }, + { + "epoch": 0.20909674891017588, + "grad_norm": 451.627197265625, + "learning_rate": 9.655151701943098e-06, + "loss": 30.1036, + "step": 103510 + }, + { + "epoch": 0.20911694954285967, + "grad_norm": 289.7569885253906, + "learning_rate": 9.655024301884908e-06, + "loss": 22.6742, + "step": 103520 + }, + { + "epoch": 0.2091371501755435, + "grad_norm": 350.6484375, + "learning_rate": 9.654896879138693e-06, + "loss": 13.1503, + "step": 103530 + }, + { + "epoch": 0.2091573508082273, + "grad_norm": 286.00164794921875, + "learning_rate": 9.654769433705079e-06, + "loss": 18.3662, + "step": 103540 + }, + { + "epoch": 0.20917755144091113, + "grad_norm": 205.3463592529297, + "learning_rate": 9.65464196558468e-06, + "loss": 90.5848, + "step": 103550 + }, + { + "epoch": 0.20919775207359495, + "grad_norm": 372.85943603515625, + "learning_rate": 9.65451447477812e-06, + "loss": 25.0431, + "step": 103560 + }, + { + "epoch": 0.20921795270627877, + "grad_norm": 272.0143127441406, + "learning_rate": 9.654386961286023e-06, + "loss": 15.6093, + "step": 103570 + }, + { + "epoch": 0.20923815333896256, + "grad_norm": 347.99725341796875, + "learning_rate": 9.654259425109009e-06, + "loss": 20.3958, + "step": 103580 + }, + { + "epoch": 0.20925835397164638, + "grad_norm": 339.36376953125, + "learning_rate": 9.654131866247698e-06, + "loss": 23.5363, + "step": 103590 + }, + { + "epoch": 0.2092785546043302, + "grad_norm": 473.11767578125, + "learning_rate": 9.654004284702712e-06, + "loss": 11.7759, + "step": 103600 + }, + { + "epoch": 0.20929875523701402, + "grad_norm": 551.1157836914062, + "learning_rate": 9.653876680474674e-06, + "loss": 28.9522, + "step": 103610 + }, + { + "epoch": 0.20931895586969784, + "grad_norm": 370.058349609375, + "learning_rate": 9.653749053564206e-06, + "loss": 25.6404, + "step": 103620 + }, + { + "epoch": 0.20933915650238166, + "grad_norm": 590.947265625, + "learning_rate": 9.65362140397193e-06, + "loss": 18.4211, + "step": 103630 + }, + { + "epoch": 0.20935935713506548, + "grad_norm": 429.77056884765625, + "learning_rate": 9.653493731698467e-06, + "loss": 22.7104, + "step": 103640 + }, + { + "epoch": 0.20937955776774927, + "grad_norm": 330.9519348144531, + "learning_rate": 9.65336603674444e-06, + "loss": 22.992, + "step": 103650 + }, + { + "epoch": 0.2093997584004331, + "grad_norm": 993.672119140625, + "learning_rate": 9.653238319110473e-06, + "loss": 20.435, + "step": 103660 + }, + { + "epoch": 0.20941995903311691, + "grad_norm": 217.466552734375, + "learning_rate": 9.653110578797183e-06, + "loss": 35.0793, + "step": 103670 + }, + { + "epoch": 0.20944015966580073, + "grad_norm": 529.8526000976562, + "learning_rate": 9.652982815805199e-06, + "loss": 18.7878, + "step": 103680 + }, + { + "epoch": 0.20946036029848455, + "grad_norm": 683.9608764648438, + "learning_rate": 9.652855030135139e-06, + "loss": 23.9079, + "step": 103690 + }, + { + "epoch": 0.20948056093116837, + "grad_norm": 495.77899169921875, + "learning_rate": 9.65272722178763e-06, + "loss": 32.7164, + "step": 103700 + }, + { + "epoch": 0.20950076156385217, + "grad_norm": 44.03288269042969, + "learning_rate": 9.652599390763294e-06, + "loss": 52.573, + "step": 103710 + }, + { + "epoch": 0.209520962196536, + "grad_norm": 154.39260864257812, + "learning_rate": 9.652471537062751e-06, + "loss": 13.7941, + "step": 103720 + }, + { + "epoch": 0.2095411628292198, + "grad_norm": 417.5998840332031, + "learning_rate": 9.652343660686626e-06, + "loss": 14.2793, + "step": 103730 + }, + { + "epoch": 0.20956136346190363, + "grad_norm": 491.297607421875, + "learning_rate": 9.652215761635541e-06, + "loss": 23.3906, + "step": 103740 + }, + { + "epoch": 0.20958156409458745, + "grad_norm": 415.0970153808594, + "learning_rate": 9.652087839910123e-06, + "loss": 32.32, + "step": 103750 + }, + { + "epoch": 0.20960176472727127, + "grad_norm": 91.13452911376953, + "learning_rate": 9.651959895510992e-06, + "loss": 29.1928, + "step": 103760 + }, + { + "epoch": 0.2096219653599551, + "grad_norm": 332.85137939453125, + "learning_rate": 9.651831928438773e-06, + "loss": 23.4254, + "step": 103770 + }, + { + "epoch": 0.20964216599263888, + "grad_norm": 323.9944152832031, + "learning_rate": 9.65170393869409e-06, + "loss": 12.4928, + "step": 103780 + }, + { + "epoch": 0.2096623666253227, + "grad_norm": 322.8095397949219, + "learning_rate": 9.651575926277566e-06, + "loss": 16.172, + "step": 103790 + }, + { + "epoch": 0.20968256725800652, + "grad_norm": 219.4324493408203, + "learning_rate": 9.651447891189824e-06, + "loss": 19.0577, + "step": 103800 + }, + { + "epoch": 0.20970276789069034, + "grad_norm": 46.137203216552734, + "learning_rate": 9.65131983343149e-06, + "loss": 30.1033, + "step": 103810 + }, + { + "epoch": 0.20972296852337416, + "grad_norm": 839.6621704101562, + "learning_rate": 9.651191753003187e-06, + "loss": 26.9795, + "step": 103820 + }, + { + "epoch": 0.20974316915605798, + "grad_norm": 665.8576049804688, + "learning_rate": 9.65106364990554e-06, + "loss": 35.2283, + "step": 103830 + }, + { + "epoch": 0.20976336978874177, + "grad_norm": 543.6165161132812, + "learning_rate": 9.650935524139172e-06, + "loss": 32.0244, + "step": 103840 + }, + { + "epoch": 0.2097835704214256, + "grad_norm": 555.6519165039062, + "learning_rate": 9.650807375704708e-06, + "loss": 30.0249, + "step": 103850 + }, + { + "epoch": 0.2098037710541094, + "grad_norm": 261.15020751953125, + "learning_rate": 9.650679204602773e-06, + "loss": 18.333, + "step": 103860 + }, + { + "epoch": 0.20982397168679323, + "grad_norm": 203.61585998535156, + "learning_rate": 9.650551010833993e-06, + "loss": 17.2493, + "step": 103870 + }, + { + "epoch": 0.20984417231947705, + "grad_norm": 465.2434997558594, + "learning_rate": 9.650422794398991e-06, + "loss": 26.312, + "step": 103880 + }, + { + "epoch": 0.20986437295216087, + "grad_norm": 907.5621337890625, + "learning_rate": 9.650294555298392e-06, + "loss": 35.4733, + "step": 103890 + }, + { + "epoch": 0.20988457358484466, + "grad_norm": 1396.000244140625, + "learning_rate": 9.650166293532822e-06, + "loss": 21.6182, + "step": 103900 + }, + { + "epoch": 0.20990477421752848, + "grad_norm": 572.03369140625, + "learning_rate": 9.650038009102905e-06, + "loss": 16.3848, + "step": 103910 + }, + { + "epoch": 0.2099249748502123, + "grad_norm": 485.6903991699219, + "learning_rate": 9.649909702009265e-06, + "loss": 19.6476, + "step": 103920 + }, + { + "epoch": 0.20994517548289612, + "grad_norm": 650.719482421875, + "learning_rate": 9.649781372252532e-06, + "loss": 13.2238, + "step": 103930 + }, + { + "epoch": 0.20996537611557994, + "grad_norm": 160.12863159179688, + "learning_rate": 9.649653019833327e-06, + "loss": 20.9152, + "step": 103940 + }, + { + "epoch": 0.20998557674826376, + "grad_norm": 271.49151611328125, + "learning_rate": 9.649524644752278e-06, + "loss": 24.1985, + "step": 103950 + }, + { + "epoch": 0.21000577738094758, + "grad_norm": 562.4899291992188, + "learning_rate": 9.649396247010008e-06, + "loss": 27.713, + "step": 103960 + }, + { + "epoch": 0.21002597801363138, + "grad_norm": 695.3604125976562, + "learning_rate": 9.649267826607145e-06, + "loss": 25.0402, + "step": 103970 + }, + { + "epoch": 0.2100461786463152, + "grad_norm": 761.8713989257812, + "learning_rate": 9.649139383544315e-06, + "loss": 23.7212, + "step": 103980 + }, + { + "epoch": 0.21006637927899902, + "grad_norm": 214.13221740722656, + "learning_rate": 9.649010917822145e-06, + "loss": 23.0139, + "step": 103990 + }, + { + "epoch": 0.21008657991168284, + "grad_norm": 342.4206237792969, + "learning_rate": 9.648882429441258e-06, + "loss": 30.4913, + "step": 104000 + }, + { + "epoch": 0.21010678054436666, + "grad_norm": 317.7519226074219, + "learning_rate": 9.648753918402283e-06, + "loss": 18.0373, + "step": 104010 + }, + { + "epoch": 0.21012698117705048, + "grad_norm": 742.5284423828125, + "learning_rate": 9.648625384705844e-06, + "loss": 27.8134, + "step": 104020 + }, + { + "epoch": 0.21014718180973427, + "grad_norm": 424.1588439941406, + "learning_rate": 9.648496828352569e-06, + "loss": 20.2973, + "step": 104030 + }, + { + "epoch": 0.2101673824424181, + "grad_norm": 455.66943359375, + "learning_rate": 9.648368249343084e-06, + "loss": 23.7883, + "step": 104040 + }, + { + "epoch": 0.2101875830751019, + "grad_norm": 163.74085998535156, + "learning_rate": 9.648239647678017e-06, + "loss": 11.1417, + "step": 104050 + }, + { + "epoch": 0.21020778370778573, + "grad_norm": 190.27513122558594, + "learning_rate": 9.648111023357994e-06, + "loss": 29.0521, + "step": 104060 + }, + { + "epoch": 0.21022798434046955, + "grad_norm": 625.2974243164062, + "learning_rate": 9.64798237638364e-06, + "loss": 17.1522, + "step": 104070 + }, + { + "epoch": 0.21024818497315337, + "grad_norm": 596.1205444335938, + "learning_rate": 9.647853706755583e-06, + "loss": 31.4544, + "step": 104080 + }, + { + "epoch": 0.2102683856058372, + "grad_norm": 511.16522216796875, + "learning_rate": 9.647725014474452e-06, + "loss": 23.0785, + "step": 104090 + }, + { + "epoch": 0.21028858623852098, + "grad_norm": 153.3165740966797, + "learning_rate": 9.647596299540874e-06, + "loss": 17.5788, + "step": 104100 + }, + { + "epoch": 0.2103087868712048, + "grad_norm": 352.26904296875, + "learning_rate": 9.647467561955474e-06, + "loss": 15.7225, + "step": 104110 + }, + { + "epoch": 0.21032898750388862, + "grad_norm": 783.6790771484375, + "learning_rate": 9.647338801718882e-06, + "loss": 29.8876, + "step": 104120 + }, + { + "epoch": 0.21034918813657244, + "grad_norm": 620.0906372070312, + "learning_rate": 9.647210018831723e-06, + "loss": 27.5526, + "step": 104130 + }, + { + "epoch": 0.21036938876925626, + "grad_norm": 190.4215087890625, + "learning_rate": 9.647081213294627e-06, + "loss": 21.3827, + "step": 104140 + }, + { + "epoch": 0.21038958940194008, + "grad_norm": 20.406620025634766, + "learning_rate": 9.64695238510822e-06, + "loss": 17.9227, + "step": 104150 + }, + { + "epoch": 0.21040979003462387, + "grad_norm": 314.3428649902344, + "learning_rate": 9.646823534273131e-06, + "loss": 24.0574, + "step": 104160 + }, + { + "epoch": 0.2104299906673077, + "grad_norm": 370.3890686035156, + "learning_rate": 9.646694660789986e-06, + "loss": 19.9015, + "step": 104170 + }, + { + "epoch": 0.2104501912999915, + "grad_norm": 474.7753601074219, + "learning_rate": 9.646565764659418e-06, + "loss": 29.9416, + "step": 104180 + }, + { + "epoch": 0.21047039193267533, + "grad_norm": 284.2029724121094, + "learning_rate": 9.64643684588205e-06, + "loss": 26.8573, + "step": 104190 + }, + { + "epoch": 0.21049059256535915, + "grad_norm": 695.9445190429688, + "learning_rate": 9.646307904458513e-06, + "loss": 29.3759, + "step": 104200 + }, + { + "epoch": 0.21051079319804297, + "grad_norm": 174.19967651367188, + "learning_rate": 9.646178940389435e-06, + "loss": 12.2988, + "step": 104210 + }, + { + "epoch": 0.21053099383072676, + "grad_norm": 449.9962158203125, + "learning_rate": 9.646049953675443e-06, + "loss": 20.4931, + "step": 104220 + }, + { + "epoch": 0.21055119446341058, + "grad_norm": 227.7549285888672, + "learning_rate": 9.645920944317168e-06, + "loss": 17.4696, + "step": 104230 + }, + { + "epoch": 0.2105713950960944, + "grad_norm": 317.04510498046875, + "learning_rate": 9.645791912315239e-06, + "loss": 30.6065, + "step": 104240 + }, + { + "epoch": 0.21059159572877822, + "grad_norm": 964.8438110351562, + "learning_rate": 9.645662857670281e-06, + "loss": 33.8488, + "step": 104250 + }, + { + "epoch": 0.21061179636146204, + "grad_norm": 360.98028564453125, + "learning_rate": 9.645533780382928e-06, + "loss": 43.4764, + "step": 104260 + }, + { + "epoch": 0.21063199699414586, + "grad_norm": 304.18011474609375, + "learning_rate": 9.645404680453805e-06, + "loss": 18.507, + "step": 104270 + }, + { + "epoch": 0.21065219762682968, + "grad_norm": 519.6947631835938, + "learning_rate": 9.645275557883545e-06, + "loss": 10.1371, + "step": 104280 + }, + { + "epoch": 0.21067239825951348, + "grad_norm": 346.36541748046875, + "learning_rate": 9.645146412672774e-06, + "loss": 18.4591, + "step": 104290 + }, + { + "epoch": 0.2106925988921973, + "grad_norm": 30.78389549255371, + "learning_rate": 9.645017244822124e-06, + "loss": 38.215, + "step": 104300 + }, + { + "epoch": 0.21071279952488112, + "grad_norm": 523.0682983398438, + "learning_rate": 9.644888054332222e-06, + "loss": 29.042, + "step": 104310 + }, + { + "epoch": 0.21073300015756494, + "grad_norm": 576.9373779296875, + "learning_rate": 9.644758841203698e-06, + "loss": 30.983, + "step": 104320 + }, + { + "epoch": 0.21075320079024876, + "grad_norm": 955.591796875, + "learning_rate": 9.644629605437186e-06, + "loss": 32.7122, + "step": 104330 + }, + { + "epoch": 0.21077340142293258, + "grad_norm": 510.2848205566406, + "learning_rate": 9.64450034703331e-06, + "loss": 31.5209, + "step": 104340 + }, + { + "epoch": 0.21079360205561637, + "grad_norm": 509.8435363769531, + "learning_rate": 9.644371065992704e-06, + "loss": 23.3649, + "step": 104350 + }, + { + "epoch": 0.2108138026883002, + "grad_norm": 662.75341796875, + "learning_rate": 9.644241762315995e-06, + "loss": 21.6943, + "step": 104360 + }, + { + "epoch": 0.210834003320984, + "grad_norm": 392.33282470703125, + "learning_rate": 9.644112436003817e-06, + "loss": 18.6042, + "step": 104370 + }, + { + "epoch": 0.21085420395366783, + "grad_norm": 277.52398681640625, + "learning_rate": 9.643983087056796e-06, + "loss": 48.5873, + "step": 104380 + }, + { + "epoch": 0.21087440458635165, + "grad_norm": 558.379638671875, + "learning_rate": 9.643853715475567e-06, + "loss": 8.8655, + "step": 104390 + }, + { + "epoch": 0.21089460521903547, + "grad_norm": 912.6490478515625, + "learning_rate": 9.643724321260757e-06, + "loss": 39.9867, + "step": 104400 + }, + { + "epoch": 0.2109148058517193, + "grad_norm": 196.27220153808594, + "learning_rate": 9.643594904412998e-06, + "loss": 36.5609, + "step": 104410 + }, + { + "epoch": 0.21093500648440308, + "grad_norm": 399.4701843261719, + "learning_rate": 9.64346546493292e-06, + "loss": 29.8103, + "step": 104420 + }, + { + "epoch": 0.2109552071170869, + "grad_norm": 390.3523254394531, + "learning_rate": 9.643336002821155e-06, + "loss": 21.7872, + "step": 104430 + }, + { + "epoch": 0.21097540774977072, + "grad_norm": 307.52020263671875, + "learning_rate": 9.643206518078335e-06, + "loss": 24.2028, + "step": 104440 + }, + { + "epoch": 0.21099560838245454, + "grad_norm": 150.6370849609375, + "learning_rate": 9.643077010705088e-06, + "loss": 15.9814, + "step": 104450 + }, + { + "epoch": 0.21101580901513836, + "grad_norm": 29.605432510375977, + "learning_rate": 9.642947480702047e-06, + "loss": 17.2623, + "step": 104460 + }, + { + "epoch": 0.21103600964782218, + "grad_norm": 521.399658203125, + "learning_rate": 9.642817928069843e-06, + "loss": 29.4208, + "step": 104470 + }, + { + "epoch": 0.21105621028050597, + "grad_norm": 389.67401123046875, + "learning_rate": 9.642688352809108e-06, + "loss": 12.6442, + "step": 104480 + }, + { + "epoch": 0.2110764109131898, + "grad_norm": 1051.3519287109375, + "learning_rate": 9.642558754920472e-06, + "loss": 27.8617, + "step": 104490 + }, + { + "epoch": 0.2110966115458736, + "grad_norm": 242.0317840576172, + "learning_rate": 9.642429134404568e-06, + "loss": 19.388, + "step": 104500 + }, + { + "epoch": 0.21111681217855743, + "grad_norm": 183.55413818359375, + "learning_rate": 9.642299491262028e-06, + "loss": 66.1441, + "step": 104510 + }, + { + "epoch": 0.21113701281124125, + "grad_norm": 182.1591796875, + "learning_rate": 9.642169825493483e-06, + "loss": 15.527, + "step": 104520 + }, + { + "epoch": 0.21115721344392507, + "grad_norm": 760.7169189453125, + "learning_rate": 9.642040137099567e-06, + "loss": 23.7755, + "step": 104530 + }, + { + "epoch": 0.21117741407660887, + "grad_norm": 69.65254974365234, + "learning_rate": 9.641910426080909e-06, + "loss": 24.1172, + "step": 104540 + }, + { + "epoch": 0.21119761470929269, + "grad_norm": 109.78113555908203, + "learning_rate": 9.641780692438142e-06, + "loss": 20.1373, + "step": 104550 + }, + { + "epoch": 0.2112178153419765, + "grad_norm": 258.04345703125, + "learning_rate": 9.641650936171899e-06, + "loss": 21.6432, + "step": 104560 + }, + { + "epoch": 0.21123801597466033, + "grad_norm": 295.4312438964844, + "learning_rate": 9.641521157282812e-06, + "loss": 40.5421, + "step": 104570 + }, + { + "epoch": 0.21125821660734415, + "grad_norm": 88.19575500488281, + "learning_rate": 9.641391355771515e-06, + "loss": 41.9684, + "step": 104580 + }, + { + "epoch": 0.21127841724002797, + "grad_norm": 446.5942687988281, + "learning_rate": 9.641261531638639e-06, + "loss": 24.5742, + "step": 104590 + }, + { + "epoch": 0.21129861787271179, + "grad_norm": 541.0067138671875, + "learning_rate": 9.641131684884817e-06, + "loss": 24.1824, + "step": 104600 + }, + { + "epoch": 0.21131881850539558, + "grad_norm": 721.6463012695312, + "learning_rate": 9.641001815510683e-06, + "loss": 21.8882, + "step": 104610 + }, + { + "epoch": 0.2113390191380794, + "grad_norm": 232.47772216796875, + "learning_rate": 9.640871923516868e-06, + "loss": 13.5951, + "step": 104620 + }, + { + "epoch": 0.21135921977076322, + "grad_norm": 381.5535583496094, + "learning_rate": 9.640742008904006e-06, + "loss": 19.7345, + "step": 104630 + }, + { + "epoch": 0.21137942040344704, + "grad_norm": 406.0082092285156, + "learning_rate": 9.64061207167273e-06, + "loss": 33.6257, + "step": 104640 + }, + { + "epoch": 0.21139962103613086, + "grad_norm": 450.4580383300781, + "learning_rate": 9.640482111823675e-06, + "loss": 15.2805, + "step": 104650 + }, + { + "epoch": 0.21141982166881468, + "grad_norm": 348.883544921875, + "learning_rate": 9.640352129357473e-06, + "loss": 17.2821, + "step": 104660 + }, + { + "epoch": 0.21144002230149847, + "grad_norm": 259.6910095214844, + "learning_rate": 9.640222124274756e-06, + "loss": 33.3449, + "step": 104670 + }, + { + "epoch": 0.2114602229341823, + "grad_norm": 760.593994140625, + "learning_rate": 9.64009209657616e-06, + "loss": 25.1494, + "step": 104680 + }, + { + "epoch": 0.2114804235668661, + "grad_norm": 487.2242126464844, + "learning_rate": 9.639962046262319e-06, + "loss": 20.9965, + "step": 104690 + }, + { + "epoch": 0.21150062419954993, + "grad_norm": 369.1243591308594, + "learning_rate": 9.639831973333864e-06, + "loss": 18.845, + "step": 104700 + }, + { + "epoch": 0.21152082483223375, + "grad_norm": 826.623779296875, + "learning_rate": 9.63970187779143e-06, + "loss": 29.863, + "step": 104710 + }, + { + "epoch": 0.21154102546491757, + "grad_norm": 679.8445434570312, + "learning_rate": 9.639571759635655e-06, + "loss": 29.9109, + "step": 104720 + }, + { + "epoch": 0.2115612260976014, + "grad_norm": 602.4418334960938, + "learning_rate": 9.639441618867167e-06, + "loss": 35.8549, + "step": 104730 + }, + { + "epoch": 0.21158142673028518, + "grad_norm": 916.6395263671875, + "learning_rate": 9.639311455486603e-06, + "loss": 21.7738, + "step": 104740 + }, + { + "epoch": 0.211601627362969, + "grad_norm": 480.5182189941406, + "learning_rate": 9.6391812694946e-06, + "loss": 69.9435, + "step": 104750 + }, + { + "epoch": 0.21162182799565282, + "grad_norm": 460.5019836425781, + "learning_rate": 9.639051060891789e-06, + "loss": 29.9818, + "step": 104760 + }, + { + "epoch": 0.21164202862833664, + "grad_norm": 467.3058166503906, + "learning_rate": 9.638920829678806e-06, + "loss": 16.7311, + "step": 104770 + }, + { + "epoch": 0.21166222926102046, + "grad_norm": 313.87542724609375, + "learning_rate": 9.638790575856285e-06, + "loss": 25.4375, + "step": 104780 + }, + { + "epoch": 0.21168242989370428, + "grad_norm": 298.5509338378906, + "learning_rate": 9.638660299424863e-06, + "loss": 13.7837, + "step": 104790 + }, + { + "epoch": 0.21170263052638807, + "grad_norm": 480.658935546875, + "learning_rate": 9.638530000385171e-06, + "loss": 27.33, + "step": 104800 + }, + { + "epoch": 0.2117228311590719, + "grad_norm": 1329.874755859375, + "learning_rate": 9.63839967873785e-06, + "loss": 20.4955, + "step": 104810 + }, + { + "epoch": 0.21174303179175571, + "grad_norm": 118.67254638671875, + "learning_rate": 9.638269334483528e-06, + "loss": 37.3196, + "step": 104820 + }, + { + "epoch": 0.21176323242443953, + "grad_norm": 560.5855102539062, + "learning_rate": 9.638138967622845e-06, + "loss": 15.966, + "step": 104830 + }, + { + "epoch": 0.21178343305712335, + "grad_norm": 391.7400817871094, + "learning_rate": 9.638008578156435e-06, + "loss": 30.7358, + "step": 104840 + }, + { + "epoch": 0.21180363368980717, + "grad_norm": 334.184326171875, + "learning_rate": 9.637878166084932e-06, + "loss": 21.3462, + "step": 104850 + }, + { + "epoch": 0.21182383432249097, + "grad_norm": 481.4829406738281, + "learning_rate": 9.637747731408975e-06, + "loss": 39.2185, + "step": 104860 + }, + { + "epoch": 0.2118440349551748, + "grad_norm": 2150.810546875, + "learning_rate": 9.637617274129198e-06, + "loss": 40.7387, + "step": 104870 + }, + { + "epoch": 0.2118642355878586, + "grad_norm": 714.3262939453125, + "learning_rate": 9.637486794246237e-06, + "loss": 26.9667, + "step": 104880 + }, + { + "epoch": 0.21188443622054243, + "grad_norm": 362.8081359863281, + "learning_rate": 9.637356291760729e-06, + "loss": 10.2341, + "step": 104890 + }, + { + "epoch": 0.21190463685322625, + "grad_norm": 989.9890747070312, + "learning_rate": 9.637225766673309e-06, + "loss": 19.2879, + "step": 104900 + }, + { + "epoch": 0.21192483748591007, + "grad_norm": 469.71044921875, + "learning_rate": 9.63709521898461e-06, + "loss": 22.5119, + "step": 104910 + }, + { + "epoch": 0.2119450381185939, + "grad_norm": 477.49322509765625, + "learning_rate": 9.636964648695272e-06, + "loss": 27.2541, + "step": 104920 + }, + { + "epoch": 0.21196523875127768, + "grad_norm": 767.3585205078125, + "learning_rate": 9.636834055805933e-06, + "loss": 16.5624, + "step": 104930 + }, + { + "epoch": 0.2119854393839615, + "grad_norm": 575.4326171875, + "learning_rate": 9.636703440317225e-06, + "loss": 19.2976, + "step": 104940 + }, + { + "epoch": 0.21200564001664532, + "grad_norm": 352.4774169921875, + "learning_rate": 9.636572802229789e-06, + "loss": 19.7583, + "step": 104950 + }, + { + "epoch": 0.21202584064932914, + "grad_norm": 234.3727569580078, + "learning_rate": 9.636442141544259e-06, + "loss": 22.481, + "step": 104960 + }, + { + "epoch": 0.21204604128201296, + "grad_norm": 460.0948486328125, + "learning_rate": 9.63631145826127e-06, + "loss": 19.744, + "step": 104970 + }, + { + "epoch": 0.21206624191469678, + "grad_norm": 248.75135803222656, + "learning_rate": 9.636180752381464e-06, + "loss": 27.15, + "step": 104980 + }, + { + "epoch": 0.21208644254738057, + "grad_norm": 683.0396118164062, + "learning_rate": 9.636050023905473e-06, + "loss": 21.9629, + "step": 104990 + }, + { + "epoch": 0.2121066431800644, + "grad_norm": 541.5690307617188, + "learning_rate": 9.635919272833938e-06, + "loss": 15.2957, + "step": 105000 + }, + { + "epoch": 0.2121268438127482, + "grad_norm": 623.2774047851562, + "learning_rate": 9.635788499167494e-06, + "loss": 33.0525, + "step": 105010 + }, + { + "epoch": 0.21214704444543203, + "grad_norm": 456.1404113769531, + "learning_rate": 9.63565770290678e-06, + "loss": 28.6321, + "step": 105020 + }, + { + "epoch": 0.21216724507811585, + "grad_norm": 51.168670654296875, + "learning_rate": 9.635526884052431e-06, + "loss": 27.4311, + "step": 105030 + }, + { + "epoch": 0.21218744571079967, + "grad_norm": 110.73509979248047, + "learning_rate": 9.635396042605088e-06, + "loss": 21.2991, + "step": 105040 + }, + { + "epoch": 0.2122076463434835, + "grad_norm": 334.5740051269531, + "learning_rate": 9.635265178565386e-06, + "loss": 12.511, + "step": 105050 + }, + { + "epoch": 0.21222784697616728, + "grad_norm": 317.1121520996094, + "learning_rate": 9.635134291933964e-06, + "loss": 13.7092, + "step": 105060 + }, + { + "epoch": 0.2122480476088511, + "grad_norm": 395.42852783203125, + "learning_rate": 9.63500338271146e-06, + "loss": 16.8476, + "step": 105070 + }, + { + "epoch": 0.21226824824153492, + "grad_norm": 185.52284240722656, + "learning_rate": 9.634872450898511e-06, + "loss": 19.4594, + "step": 105080 + }, + { + "epoch": 0.21228844887421874, + "grad_norm": 442.63720703125, + "learning_rate": 9.634741496495755e-06, + "loss": 18.2281, + "step": 105090 + }, + { + "epoch": 0.21230864950690256, + "grad_norm": 454.5132751464844, + "learning_rate": 9.634610519503833e-06, + "loss": 19.671, + "step": 105100 + }, + { + "epoch": 0.21232885013958638, + "grad_norm": 175.4168243408203, + "learning_rate": 9.63447951992338e-06, + "loss": 14.6504, + "step": 105110 + }, + { + "epoch": 0.21234905077227018, + "grad_norm": 506.148681640625, + "learning_rate": 9.634348497755035e-06, + "loss": 32.2731, + "step": 105120 + }, + { + "epoch": 0.212369251404954, + "grad_norm": 191.395751953125, + "learning_rate": 9.63421745299944e-06, + "loss": 25.8174, + "step": 105130 + }, + { + "epoch": 0.21238945203763782, + "grad_norm": 549.5165405273438, + "learning_rate": 9.634086385657231e-06, + "loss": 18.8579, + "step": 105140 + }, + { + "epoch": 0.21240965267032164, + "grad_norm": 558.5545654296875, + "learning_rate": 9.633955295729044e-06, + "loss": 28.8999, + "step": 105150 + }, + { + "epoch": 0.21242985330300546, + "grad_norm": 726.7172241210938, + "learning_rate": 9.633824183215525e-06, + "loss": 34.1918, + "step": 105160 + }, + { + "epoch": 0.21245005393568928, + "grad_norm": 396.98297119140625, + "learning_rate": 9.633693048117306e-06, + "loss": 14.727, + "step": 105170 + }, + { + "epoch": 0.21247025456837307, + "grad_norm": 205.2148895263672, + "learning_rate": 9.633561890435031e-06, + "loss": 35.7451, + "step": 105180 + }, + { + "epoch": 0.2124904552010569, + "grad_norm": 32.1508674621582, + "learning_rate": 9.633430710169335e-06, + "loss": 30.3772, + "step": 105190 + }, + { + "epoch": 0.2125106558337407, + "grad_norm": 188.542724609375, + "learning_rate": 9.633299507320862e-06, + "loss": 23.9483, + "step": 105200 + }, + { + "epoch": 0.21253085646642453, + "grad_norm": 673.6744384765625, + "learning_rate": 9.633168281890248e-06, + "loss": 26.4373, + "step": 105210 + }, + { + "epoch": 0.21255105709910835, + "grad_norm": 564.9224853515625, + "learning_rate": 9.633037033878135e-06, + "loss": 26.3376, + "step": 105220 + }, + { + "epoch": 0.21257125773179217, + "grad_norm": 307.643798828125, + "learning_rate": 9.632905763285159e-06, + "loss": 16.5857, + "step": 105230 + }, + { + "epoch": 0.212591458364476, + "grad_norm": 520.0028076171875, + "learning_rate": 9.632774470111964e-06, + "loss": 19.2623, + "step": 105240 + }, + { + "epoch": 0.21261165899715978, + "grad_norm": 1217.56494140625, + "learning_rate": 9.632643154359187e-06, + "loss": 27.696, + "step": 105250 + }, + { + "epoch": 0.2126318596298436, + "grad_norm": 1328.8448486328125, + "learning_rate": 9.63251181602747e-06, + "loss": 42.2831, + "step": 105260 + }, + { + "epoch": 0.21265206026252742, + "grad_norm": 274.86114501953125, + "learning_rate": 9.632380455117452e-06, + "loss": 34.2978, + "step": 105270 + }, + { + "epoch": 0.21267226089521124, + "grad_norm": 877.2543334960938, + "learning_rate": 9.632249071629773e-06, + "loss": 29.2062, + "step": 105280 + }, + { + "epoch": 0.21269246152789506, + "grad_norm": 161.43724060058594, + "learning_rate": 9.632117665565075e-06, + "loss": 17.8729, + "step": 105290 + }, + { + "epoch": 0.21271266216057888, + "grad_norm": 9.050535202026367, + "learning_rate": 9.631986236923998e-06, + "loss": 32.4674, + "step": 105300 + }, + { + "epoch": 0.21273286279326267, + "grad_norm": 378.3053894042969, + "learning_rate": 9.63185478570718e-06, + "loss": 30.5205, + "step": 105310 + }, + { + "epoch": 0.2127530634259465, + "grad_norm": 639.4319458007812, + "learning_rate": 9.631723311915264e-06, + "loss": 19.0381, + "step": 105320 + }, + { + "epoch": 0.2127732640586303, + "grad_norm": 526.9933471679688, + "learning_rate": 9.63159181554889e-06, + "loss": 47.6003, + "step": 105330 + }, + { + "epoch": 0.21279346469131413, + "grad_norm": 470.7127990722656, + "learning_rate": 9.6314602966087e-06, + "loss": 17.5264, + "step": 105340 + }, + { + "epoch": 0.21281366532399795, + "grad_norm": 807.1245727539062, + "learning_rate": 9.631328755095334e-06, + "loss": 43.1902, + "step": 105350 + }, + { + "epoch": 0.21283386595668177, + "grad_norm": 495.2669677734375, + "learning_rate": 9.631197191009433e-06, + "loss": 28.6787, + "step": 105360 + }, + { + "epoch": 0.2128540665893656, + "grad_norm": 254.3119354248047, + "learning_rate": 9.631065604351639e-06, + "loss": 16.0459, + "step": 105370 + }, + { + "epoch": 0.21287426722204938, + "grad_norm": 184.9499969482422, + "learning_rate": 9.630933995122594e-06, + "loss": 15.4068, + "step": 105380 + }, + { + "epoch": 0.2128944678547332, + "grad_norm": 687.2736206054688, + "learning_rate": 9.630802363322936e-06, + "loss": 21.8519, + "step": 105390 + }, + { + "epoch": 0.21291466848741702, + "grad_norm": 505.1662292480469, + "learning_rate": 9.630670708953311e-06, + "loss": 24.3643, + "step": 105400 + }, + { + "epoch": 0.21293486912010084, + "grad_norm": 440.841064453125, + "learning_rate": 9.630539032014358e-06, + "loss": 23.5284, + "step": 105410 + }, + { + "epoch": 0.21295506975278466, + "grad_norm": 113.52627563476562, + "learning_rate": 9.630407332506718e-06, + "loss": 13.9981, + "step": 105420 + }, + { + "epoch": 0.21297527038546848, + "grad_norm": 316.8888244628906, + "learning_rate": 9.630275610431036e-06, + "loss": 16.4941, + "step": 105430 + }, + { + "epoch": 0.21299547101815228, + "grad_norm": 431.79595947265625, + "learning_rate": 9.630143865787951e-06, + "loss": 16.3349, + "step": 105440 + }, + { + "epoch": 0.2130156716508361, + "grad_norm": 161.92481994628906, + "learning_rate": 9.630012098578108e-06, + "loss": 21.3761, + "step": 105450 + }, + { + "epoch": 0.21303587228351992, + "grad_norm": 124.17230224609375, + "learning_rate": 9.629880308802147e-06, + "loss": 40.3034, + "step": 105460 + }, + { + "epoch": 0.21305607291620374, + "grad_norm": 501.4466247558594, + "learning_rate": 9.62974849646071e-06, + "loss": 22.316, + "step": 105470 + }, + { + "epoch": 0.21307627354888756, + "grad_norm": 614.591796875, + "learning_rate": 9.62961666155444e-06, + "loss": 16.7649, + "step": 105480 + }, + { + "epoch": 0.21309647418157138, + "grad_norm": 315.0533142089844, + "learning_rate": 9.629484804083982e-06, + "loss": 37.166, + "step": 105490 + }, + { + "epoch": 0.21311667481425517, + "grad_norm": 338.9149169921875, + "learning_rate": 9.629352924049975e-06, + "loss": 30.9103, + "step": 105500 + }, + { + "epoch": 0.213136875446939, + "grad_norm": 229.65621948242188, + "learning_rate": 9.629221021453063e-06, + "loss": 20.4643, + "step": 105510 + }, + { + "epoch": 0.2131570760796228, + "grad_norm": 604.4810180664062, + "learning_rate": 9.62908909629389e-06, + "loss": 18.4634, + "step": 105520 + }, + { + "epoch": 0.21317727671230663, + "grad_norm": 126.59526824951172, + "learning_rate": 9.628957148573099e-06, + "loss": 23.5431, + "step": 105530 + }, + { + "epoch": 0.21319747734499045, + "grad_norm": 320.6859436035156, + "learning_rate": 9.62882517829133e-06, + "loss": 28.3505, + "step": 105540 + }, + { + "epoch": 0.21321767797767427, + "grad_norm": 29.92398452758789, + "learning_rate": 9.62869318544923e-06, + "loss": 24.5034, + "step": 105550 + }, + { + "epoch": 0.2132378786103581, + "grad_norm": 320.8935546875, + "learning_rate": 9.62856117004744e-06, + "loss": 24.5576, + "step": 105560 + }, + { + "epoch": 0.21325807924304188, + "grad_norm": 777.5241088867188, + "learning_rate": 9.628429132086606e-06, + "loss": 22.4324, + "step": 105570 + }, + { + "epoch": 0.2132782798757257, + "grad_norm": 405.69134521484375, + "learning_rate": 9.628297071567368e-06, + "loss": 49.801, + "step": 105580 + }, + { + "epoch": 0.21329848050840952, + "grad_norm": 335.9660339355469, + "learning_rate": 9.628164988490372e-06, + "loss": 19.5176, + "step": 105590 + }, + { + "epoch": 0.21331868114109334, + "grad_norm": 292.7089538574219, + "learning_rate": 9.628032882856262e-06, + "loss": 17.4636, + "step": 105600 + }, + { + "epoch": 0.21333888177377716, + "grad_norm": 362.6543273925781, + "learning_rate": 9.62790075466568e-06, + "loss": 18.5619, + "step": 105610 + }, + { + "epoch": 0.21335908240646098, + "grad_norm": 341.5017395019531, + "learning_rate": 9.62776860391927e-06, + "loss": 11.2724, + "step": 105620 + }, + { + "epoch": 0.21337928303914477, + "grad_norm": 832.9976806640625, + "learning_rate": 9.62763643061768e-06, + "loss": 22.233, + "step": 105630 + }, + { + "epoch": 0.2133994836718286, + "grad_norm": 602.6585083007812, + "learning_rate": 9.62750423476155e-06, + "loss": 28.1702, + "step": 105640 + }, + { + "epoch": 0.2134196843045124, + "grad_norm": 556.5542602539062, + "learning_rate": 9.627372016351524e-06, + "loss": 31.1442, + "step": 105650 + }, + { + "epoch": 0.21343988493719623, + "grad_norm": 845.3563842773438, + "learning_rate": 9.62723977538825e-06, + "loss": 25.2998, + "step": 105660 + }, + { + "epoch": 0.21346008556988005, + "grad_norm": 510.3428649902344, + "learning_rate": 9.62710751187237e-06, + "loss": 21.3298, + "step": 105670 + }, + { + "epoch": 0.21348028620256387, + "grad_norm": 346.706787109375, + "learning_rate": 9.62697522580453e-06, + "loss": 25.0908, + "step": 105680 + }, + { + "epoch": 0.2135004868352477, + "grad_norm": 319.75732421875, + "learning_rate": 9.62684291718537e-06, + "loss": 23.8758, + "step": 105690 + }, + { + "epoch": 0.21352068746793149, + "grad_norm": 439.9209899902344, + "learning_rate": 9.626710586015543e-06, + "loss": 24.0023, + "step": 105700 + }, + { + "epoch": 0.2135408881006153, + "grad_norm": 706.7144775390625, + "learning_rate": 9.626578232295689e-06, + "loss": 18.1785, + "step": 105710 + }, + { + "epoch": 0.21356108873329913, + "grad_norm": 255.9178009033203, + "learning_rate": 9.626445856026453e-06, + "loss": 13.3758, + "step": 105720 + }, + { + "epoch": 0.21358128936598295, + "grad_norm": 352.71368408203125, + "learning_rate": 9.626313457208482e-06, + "loss": 11.3927, + "step": 105730 + }, + { + "epoch": 0.21360148999866677, + "grad_norm": 337.35638427734375, + "learning_rate": 9.626181035842418e-06, + "loss": 18.072, + "step": 105740 + }, + { + "epoch": 0.21362169063135059, + "grad_norm": 141.04266357421875, + "learning_rate": 9.626048591928911e-06, + "loss": 22.8306, + "step": 105750 + }, + { + "epoch": 0.21364189126403438, + "grad_norm": 573.4781494140625, + "learning_rate": 9.625916125468604e-06, + "loss": 33.1273, + "step": 105760 + }, + { + "epoch": 0.2136620918967182, + "grad_norm": 515.8070068359375, + "learning_rate": 9.625783636462143e-06, + "loss": 18.2064, + "step": 105770 + }, + { + "epoch": 0.21368229252940202, + "grad_norm": 241.1742401123047, + "learning_rate": 9.625651124910173e-06, + "loss": 36.4317, + "step": 105780 + }, + { + "epoch": 0.21370249316208584, + "grad_norm": 453.4202880859375, + "learning_rate": 9.625518590813342e-06, + "loss": 25.3025, + "step": 105790 + }, + { + "epoch": 0.21372269379476966, + "grad_norm": 113.76762390136719, + "learning_rate": 9.62538603417229e-06, + "loss": 30.8774, + "step": 105800 + }, + { + "epoch": 0.21374289442745348, + "grad_norm": 673.5302124023438, + "learning_rate": 9.625253454987672e-06, + "loss": 11.2741, + "step": 105810 + }, + { + "epoch": 0.21376309506013727, + "grad_norm": 469.35845947265625, + "learning_rate": 9.625120853260129e-06, + "loss": 27.6385, + "step": 105820 + }, + { + "epoch": 0.2137832956928211, + "grad_norm": 146.98387145996094, + "learning_rate": 9.624988228990305e-06, + "loss": 42.1381, + "step": 105830 + }, + { + "epoch": 0.2138034963255049, + "grad_norm": 372.7845153808594, + "learning_rate": 9.624855582178852e-06, + "loss": 30.9867, + "step": 105840 + }, + { + "epoch": 0.21382369695818873, + "grad_norm": 253.32186889648438, + "learning_rate": 9.624722912826412e-06, + "loss": 10.2385, + "step": 105850 + }, + { + "epoch": 0.21384389759087255, + "grad_norm": 419.5494384765625, + "learning_rate": 9.624590220933635e-06, + "loss": 29.3135, + "step": 105860 + }, + { + "epoch": 0.21386409822355637, + "grad_norm": 1056.16748046875, + "learning_rate": 9.624457506501165e-06, + "loss": 32.2349, + "step": 105870 + }, + { + "epoch": 0.2138842988562402, + "grad_norm": 172.6009979248047, + "learning_rate": 9.624324769529652e-06, + "loss": 20.3206, + "step": 105880 + }, + { + "epoch": 0.21390449948892398, + "grad_norm": 40.16970443725586, + "learning_rate": 9.624192010019739e-06, + "loss": 18.4412, + "step": 105890 + }, + { + "epoch": 0.2139247001216078, + "grad_norm": 598.6088256835938, + "learning_rate": 9.624059227972077e-06, + "loss": 26.2211, + "step": 105900 + }, + { + "epoch": 0.21394490075429162, + "grad_norm": 228.47137451171875, + "learning_rate": 9.623926423387308e-06, + "loss": 39.7224, + "step": 105910 + }, + { + "epoch": 0.21396510138697544, + "grad_norm": 910.9898071289062, + "learning_rate": 9.623793596266084e-06, + "loss": 29.0457, + "step": 105920 + }, + { + "epoch": 0.21398530201965926, + "grad_norm": 181.25929260253906, + "learning_rate": 9.623660746609051e-06, + "loss": 28.0292, + "step": 105930 + }, + { + "epoch": 0.21400550265234308, + "grad_norm": 11.92809772491455, + "learning_rate": 9.623527874416857e-06, + "loss": 26.4717, + "step": 105940 + }, + { + "epoch": 0.21402570328502687, + "grad_norm": 529.3529052734375, + "learning_rate": 9.623394979690149e-06, + "loss": 17.4593, + "step": 105950 + }, + { + "epoch": 0.2140459039177107, + "grad_norm": 454.60321044921875, + "learning_rate": 9.623262062429573e-06, + "loss": 22.5844, + "step": 105960 + }, + { + "epoch": 0.21406610455039451, + "grad_norm": 906.16748046875, + "learning_rate": 9.623129122635778e-06, + "loss": 50.2058, + "step": 105970 + }, + { + "epoch": 0.21408630518307833, + "grad_norm": 182.6000213623047, + "learning_rate": 9.622996160309415e-06, + "loss": 16.7407, + "step": 105980 + }, + { + "epoch": 0.21410650581576215, + "grad_norm": 469.909423828125, + "learning_rate": 9.622863175451128e-06, + "loss": 14.0903, + "step": 105990 + }, + { + "epoch": 0.21412670644844597, + "grad_norm": 177.44290161132812, + "learning_rate": 9.622730168061568e-06, + "loss": 24.2696, + "step": 106000 + }, + { + "epoch": 0.2141469070811298, + "grad_norm": 913.941162109375, + "learning_rate": 9.622597138141379e-06, + "loss": 25.4878, + "step": 106010 + }, + { + "epoch": 0.2141671077138136, + "grad_norm": 214.55035400390625, + "learning_rate": 9.622464085691214e-06, + "loss": 32.1625, + "step": 106020 + }, + { + "epoch": 0.2141873083464974, + "grad_norm": 640.6804809570312, + "learning_rate": 9.622331010711718e-06, + "loss": 13.401, + "step": 106030 + }, + { + "epoch": 0.21420750897918123, + "grad_norm": 70.52410125732422, + "learning_rate": 9.622197913203543e-06, + "loss": 18.1005, + "step": 106040 + }, + { + "epoch": 0.21422770961186505, + "grad_norm": 778.2262573242188, + "learning_rate": 9.622064793167336e-06, + "loss": 28.411, + "step": 106050 + }, + { + "epoch": 0.21424791024454887, + "grad_norm": 225.76895141601562, + "learning_rate": 9.621931650603747e-06, + "loss": 21.2988, + "step": 106060 + }, + { + "epoch": 0.2142681108772327, + "grad_norm": 330.450927734375, + "learning_rate": 9.62179848551342e-06, + "loss": 21.3016, + "step": 106070 + }, + { + "epoch": 0.21428831150991648, + "grad_norm": 333.67413330078125, + "learning_rate": 9.62166529789701e-06, + "loss": 19.7966, + "step": 106080 + }, + { + "epoch": 0.2143085121426003, + "grad_norm": 299.3463439941406, + "learning_rate": 9.621532087755163e-06, + "loss": 18.3854, + "step": 106090 + }, + { + "epoch": 0.21432871277528412, + "grad_norm": 495.6922302246094, + "learning_rate": 9.62139885508853e-06, + "loss": 28.7322, + "step": 106100 + }, + { + "epoch": 0.21434891340796794, + "grad_norm": 150.03817749023438, + "learning_rate": 9.621265599897759e-06, + "loss": 23.8521, + "step": 106110 + }, + { + "epoch": 0.21436911404065176, + "grad_norm": 287.9223327636719, + "learning_rate": 9.621132322183502e-06, + "loss": 32.3816, + "step": 106120 + }, + { + "epoch": 0.21438931467333558, + "grad_norm": 168.50302124023438, + "learning_rate": 9.620999021946404e-06, + "loss": 20.1759, + "step": 106130 + }, + { + "epoch": 0.21440951530601937, + "grad_norm": 804.5635375976562, + "learning_rate": 9.620865699187118e-06, + "loss": 37.1679, + "step": 106140 + }, + { + "epoch": 0.2144297159387032, + "grad_norm": 235.29754638671875, + "learning_rate": 9.620732353906293e-06, + "loss": 23.3458, + "step": 106150 + }, + { + "epoch": 0.214449916571387, + "grad_norm": 150.1128692626953, + "learning_rate": 9.620598986104578e-06, + "loss": 30.2106, + "step": 106160 + }, + { + "epoch": 0.21447011720407083, + "grad_norm": 453.7975769042969, + "learning_rate": 9.620465595782626e-06, + "loss": 17.6138, + "step": 106170 + }, + { + "epoch": 0.21449031783675465, + "grad_norm": 352.6557922363281, + "learning_rate": 9.620332182941084e-06, + "loss": 21.9908, + "step": 106180 + }, + { + "epoch": 0.21451051846943847, + "grad_norm": 643.4624633789062, + "learning_rate": 9.620198747580604e-06, + "loss": 14.7946, + "step": 106190 + }, + { + "epoch": 0.2145307191021223, + "grad_norm": 365.9803161621094, + "learning_rate": 9.620065289701835e-06, + "loss": 14.0779, + "step": 106200 + }, + { + "epoch": 0.21455091973480608, + "grad_norm": 322.1335754394531, + "learning_rate": 9.619931809305428e-06, + "loss": 22.869, + "step": 106210 + }, + { + "epoch": 0.2145711203674899, + "grad_norm": 450.4367370605469, + "learning_rate": 9.619798306392034e-06, + "loss": 50.7252, + "step": 106220 + }, + { + "epoch": 0.21459132100017372, + "grad_norm": 520.1192626953125, + "learning_rate": 9.619664780962304e-06, + "loss": 20.1309, + "step": 106230 + }, + { + "epoch": 0.21461152163285754, + "grad_norm": 496.409423828125, + "learning_rate": 9.619531233016885e-06, + "loss": 23.2359, + "step": 106240 + }, + { + "epoch": 0.21463172226554136, + "grad_norm": 200.6538543701172, + "learning_rate": 9.619397662556434e-06, + "loss": 22.8069, + "step": 106250 + }, + { + "epoch": 0.21465192289822518, + "grad_norm": 465.4859313964844, + "learning_rate": 9.6192640695816e-06, + "loss": 19.7823, + "step": 106260 + }, + { + "epoch": 0.21467212353090898, + "grad_norm": 682.9042358398438, + "learning_rate": 9.619130454093031e-06, + "loss": 18.6066, + "step": 106270 + }, + { + "epoch": 0.2146923241635928, + "grad_norm": 605.0374145507812, + "learning_rate": 9.618996816091382e-06, + "loss": 23.5346, + "step": 106280 + }, + { + "epoch": 0.21471252479627662, + "grad_norm": 757.7998657226562, + "learning_rate": 9.6188631555773e-06, + "loss": 37.1718, + "step": 106290 + }, + { + "epoch": 0.21473272542896044, + "grad_norm": 583.7636108398438, + "learning_rate": 9.61872947255144e-06, + "loss": 42.9624, + "step": 106300 + }, + { + "epoch": 0.21475292606164426, + "grad_norm": 506.0607604980469, + "learning_rate": 9.618595767014456e-06, + "loss": 29.5447, + "step": 106310 + }, + { + "epoch": 0.21477312669432808, + "grad_norm": 388.08837890625, + "learning_rate": 9.618462038966994e-06, + "loss": 18.6144, + "step": 106320 + }, + { + "epoch": 0.21479332732701187, + "grad_norm": 468.95562744140625, + "learning_rate": 9.618328288409708e-06, + "loss": 23.4604, + "step": 106330 + }, + { + "epoch": 0.2148135279596957, + "grad_norm": 284.74102783203125, + "learning_rate": 9.61819451534325e-06, + "loss": 18.2775, + "step": 106340 + }, + { + "epoch": 0.2148337285923795, + "grad_norm": 279.8360595703125, + "learning_rate": 9.618060719768272e-06, + "loss": 21.2849, + "step": 106350 + }, + { + "epoch": 0.21485392922506333, + "grad_norm": 351.17144775390625, + "learning_rate": 9.617926901685427e-06, + "loss": 18.6386, + "step": 106360 + }, + { + "epoch": 0.21487412985774715, + "grad_norm": 30.238037109375, + "learning_rate": 9.617793061095366e-06, + "loss": 35.902, + "step": 106370 + }, + { + "epoch": 0.21489433049043097, + "grad_norm": 218.7053985595703, + "learning_rate": 9.617659197998741e-06, + "loss": 14.3084, + "step": 106380 + }, + { + "epoch": 0.2149145311231148, + "grad_norm": 165.39132690429688, + "learning_rate": 9.617525312396206e-06, + "loss": 25.3413, + "step": 106390 + }, + { + "epoch": 0.21493473175579858, + "grad_norm": 350.9925231933594, + "learning_rate": 9.617391404288412e-06, + "loss": 41.2889, + "step": 106400 + }, + { + "epoch": 0.2149549323884824, + "grad_norm": 345.8832702636719, + "learning_rate": 9.617257473676014e-06, + "loss": 12.9189, + "step": 106410 + }, + { + "epoch": 0.21497513302116622, + "grad_norm": 357.4729919433594, + "learning_rate": 9.617123520559662e-06, + "loss": 24.3934, + "step": 106420 + }, + { + "epoch": 0.21499533365385004, + "grad_norm": 186.6416778564453, + "learning_rate": 9.616989544940008e-06, + "loss": 26.8619, + "step": 106430 + }, + { + "epoch": 0.21501553428653386, + "grad_norm": 170.92840576171875, + "learning_rate": 9.61685554681771e-06, + "loss": 20.7272, + "step": 106440 + }, + { + "epoch": 0.21503573491921768, + "grad_norm": 466.51190185546875, + "learning_rate": 9.616721526193416e-06, + "loss": 25.6284, + "step": 106450 + }, + { + "epoch": 0.21505593555190147, + "grad_norm": 732.600830078125, + "learning_rate": 9.616587483067782e-06, + "loss": 34.207, + "step": 106460 + }, + { + "epoch": 0.2150761361845853, + "grad_norm": 262.58404541015625, + "learning_rate": 9.61645341744146e-06, + "loss": 16.7664, + "step": 106470 + }, + { + "epoch": 0.2150963368172691, + "grad_norm": 200.99440002441406, + "learning_rate": 9.616319329315105e-06, + "loss": 19.5444, + "step": 106480 + }, + { + "epoch": 0.21511653744995293, + "grad_norm": 504.94146728515625, + "learning_rate": 9.616185218689369e-06, + "loss": 17.5292, + "step": 106490 + }, + { + "epoch": 0.21513673808263675, + "grad_norm": 345.2034606933594, + "learning_rate": 9.616051085564905e-06, + "loss": 24.942, + "step": 106500 + }, + { + "epoch": 0.21515693871532057, + "grad_norm": 491.8196105957031, + "learning_rate": 9.61591692994237e-06, + "loss": 24.6979, + "step": 106510 + }, + { + "epoch": 0.2151771393480044, + "grad_norm": 0.0, + "learning_rate": 9.615782751822413e-06, + "loss": 18.6308, + "step": 106520 + }, + { + "epoch": 0.21519733998068818, + "grad_norm": 261.9867858886719, + "learning_rate": 9.615648551205692e-06, + "loss": 12.4077, + "step": 106530 + }, + { + "epoch": 0.215217540613372, + "grad_norm": 394.6444396972656, + "learning_rate": 9.61551432809286e-06, + "loss": 18.7726, + "step": 106540 + }, + { + "epoch": 0.21523774124605582, + "grad_norm": 223.36647033691406, + "learning_rate": 9.615380082484573e-06, + "loss": 18.6529, + "step": 106550 + }, + { + "epoch": 0.21525794187873964, + "grad_norm": 515.4940795898438, + "learning_rate": 9.61524581438148e-06, + "loss": 22.5348, + "step": 106560 + }, + { + "epoch": 0.21527814251142346, + "grad_norm": 606.5992431640625, + "learning_rate": 9.61511152378424e-06, + "loss": 25.2943, + "step": 106570 + }, + { + "epoch": 0.21529834314410728, + "grad_norm": 405.7440185546875, + "learning_rate": 9.614977210693505e-06, + "loss": 30.6357, + "step": 106580 + }, + { + "epoch": 0.21531854377679108, + "grad_norm": 481.6194152832031, + "learning_rate": 9.614842875109933e-06, + "loss": 26.367, + "step": 106590 + }, + { + "epoch": 0.2153387444094749, + "grad_norm": 341.5598449707031, + "learning_rate": 9.614708517034176e-06, + "loss": 15.5823, + "step": 106600 + }, + { + "epoch": 0.21535894504215872, + "grad_norm": 346.8001708984375, + "learning_rate": 9.61457413646689e-06, + "loss": 33.0959, + "step": 106610 + }, + { + "epoch": 0.21537914567484254, + "grad_norm": 271.7821960449219, + "learning_rate": 9.614439733408726e-06, + "loss": 57.9604, + "step": 106620 + }, + { + "epoch": 0.21539934630752636, + "grad_norm": 79.5567398071289, + "learning_rate": 9.614305307860346e-06, + "loss": 22.1122, + "step": 106630 + }, + { + "epoch": 0.21541954694021018, + "grad_norm": 5.893133163452148, + "learning_rate": 9.6141708598224e-06, + "loss": 23.4139, + "step": 106640 + }, + { + "epoch": 0.21543974757289397, + "grad_norm": 450.4199523925781, + "learning_rate": 9.614036389295545e-06, + "loss": 35.7561, + "step": 106650 + }, + { + "epoch": 0.2154599482055778, + "grad_norm": 395.6968078613281, + "learning_rate": 9.613901896280436e-06, + "loss": 31.719, + "step": 106660 + }, + { + "epoch": 0.2154801488382616, + "grad_norm": 14.778434753417969, + "learning_rate": 9.61376738077773e-06, + "loss": 19.4138, + "step": 106670 + }, + { + "epoch": 0.21550034947094543, + "grad_norm": 81.33409118652344, + "learning_rate": 9.613632842788079e-06, + "loss": 19.4228, + "step": 106680 + }, + { + "epoch": 0.21552055010362925, + "grad_norm": 655.814697265625, + "learning_rate": 9.613498282312143e-06, + "loss": 37.7355, + "step": 106690 + }, + { + "epoch": 0.21554075073631307, + "grad_norm": 945.9786376953125, + "learning_rate": 9.613363699350575e-06, + "loss": 33.2514, + "step": 106700 + }, + { + "epoch": 0.2155609513689969, + "grad_norm": 613.876708984375, + "learning_rate": 9.613229093904033e-06, + "loss": 20.2544, + "step": 106710 + }, + { + "epoch": 0.21558115200168068, + "grad_norm": 347.0071716308594, + "learning_rate": 9.613094465973171e-06, + "loss": 19.2405, + "step": 106720 + }, + { + "epoch": 0.2156013526343645, + "grad_norm": 391.2256774902344, + "learning_rate": 9.612959815558645e-06, + "loss": 21.5669, + "step": 106730 + }, + { + "epoch": 0.21562155326704832, + "grad_norm": 262.7676696777344, + "learning_rate": 9.612825142661115e-06, + "loss": 20.3953, + "step": 106740 + }, + { + "epoch": 0.21564175389973214, + "grad_norm": 215.56687927246094, + "learning_rate": 9.612690447281233e-06, + "loss": 18.0082, + "step": 106750 + }, + { + "epoch": 0.21566195453241596, + "grad_norm": 419.5194396972656, + "learning_rate": 9.612555729419656e-06, + "loss": 32.976, + "step": 106760 + }, + { + "epoch": 0.21568215516509978, + "grad_norm": 352.706298828125, + "learning_rate": 9.612420989077045e-06, + "loss": 15.573, + "step": 106770 + }, + { + "epoch": 0.21570235579778357, + "grad_norm": 788.6929321289062, + "learning_rate": 9.61228622625405e-06, + "loss": 40.8067, + "step": 106780 + }, + { + "epoch": 0.2157225564304674, + "grad_norm": 252.27297973632812, + "learning_rate": 9.612151440951334e-06, + "loss": 28.4294, + "step": 106790 + }, + { + "epoch": 0.2157427570631512, + "grad_norm": 272.7535400390625, + "learning_rate": 9.612016633169552e-06, + "loss": 23.2759, + "step": 106800 + }, + { + "epoch": 0.21576295769583503, + "grad_norm": 226.66220092773438, + "learning_rate": 9.611881802909358e-06, + "loss": 46.624, + "step": 106810 + }, + { + "epoch": 0.21578315832851885, + "grad_norm": 302.8719177246094, + "learning_rate": 9.611746950171412e-06, + "loss": 29.6137, + "step": 106820 + }, + { + "epoch": 0.21580335896120267, + "grad_norm": 571.4275512695312, + "learning_rate": 9.61161207495637e-06, + "loss": 23.2898, + "step": 106830 + }, + { + "epoch": 0.2158235595938865, + "grad_norm": 164.63987731933594, + "learning_rate": 9.611477177264892e-06, + "loss": 27.7975, + "step": 106840 + }, + { + "epoch": 0.21584376022657029, + "grad_norm": 535.705810546875, + "learning_rate": 9.611342257097632e-06, + "loss": 41.7002, + "step": 106850 + }, + { + "epoch": 0.2158639608592541, + "grad_norm": 590.9491577148438, + "learning_rate": 9.61120731445525e-06, + "loss": 17.6121, + "step": 106860 + }, + { + "epoch": 0.21588416149193793, + "grad_norm": 6.254391670227051, + "learning_rate": 9.611072349338402e-06, + "loss": 22.8545, + "step": 106870 + }, + { + "epoch": 0.21590436212462175, + "grad_norm": 286.39208984375, + "learning_rate": 9.610937361747747e-06, + "loss": 14.3659, + "step": 106880 + }, + { + "epoch": 0.21592456275730557, + "grad_norm": 967.7152099609375, + "learning_rate": 9.610802351683943e-06, + "loss": 34.7506, + "step": 106890 + }, + { + "epoch": 0.21594476338998939, + "grad_norm": 17.1362247467041, + "learning_rate": 9.610667319147648e-06, + "loss": 15.6267, + "step": 106900 + }, + { + "epoch": 0.21596496402267318, + "grad_norm": 359.0757751464844, + "learning_rate": 9.610532264139518e-06, + "loss": 21.3726, + "step": 106910 + }, + { + "epoch": 0.215985164655357, + "grad_norm": 388.51324462890625, + "learning_rate": 9.610397186660214e-06, + "loss": 16.1625, + "step": 106920 + }, + { + "epoch": 0.21600536528804082, + "grad_norm": 455.5050048828125, + "learning_rate": 9.610262086710393e-06, + "loss": 15.9479, + "step": 106930 + }, + { + "epoch": 0.21602556592072464, + "grad_norm": 376.7058410644531, + "learning_rate": 9.610126964290714e-06, + "loss": 16.8989, + "step": 106940 + }, + { + "epoch": 0.21604576655340846, + "grad_norm": 345.0117492675781, + "learning_rate": 9.609991819401836e-06, + "loss": 31.7803, + "step": 106950 + }, + { + "epoch": 0.21606596718609228, + "grad_norm": 252.3141632080078, + "learning_rate": 9.609856652044417e-06, + "loss": 19.7048, + "step": 106960 + }, + { + "epoch": 0.21608616781877607, + "grad_norm": 678.9373779296875, + "learning_rate": 9.609721462219115e-06, + "loss": 26.4886, + "step": 106970 + }, + { + "epoch": 0.2161063684514599, + "grad_norm": 409.2751159667969, + "learning_rate": 9.609586249926589e-06, + "loss": 30.0331, + "step": 106980 + }, + { + "epoch": 0.2161265690841437, + "grad_norm": 473.8980407714844, + "learning_rate": 9.609451015167498e-06, + "loss": 16.8544, + "step": 106990 + }, + { + "epoch": 0.21614676971682753, + "grad_norm": 390.9248046875, + "learning_rate": 9.609315757942504e-06, + "loss": 27.1546, + "step": 107000 + }, + { + "epoch": 0.21616697034951135, + "grad_norm": 29.839492797851562, + "learning_rate": 9.609180478252262e-06, + "loss": 25.4001, + "step": 107010 + }, + { + "epoch": 0.21618717098219517, + "grad_norm": 298.6227111816406, + "learning_rate": 9.609045176097435e-06, + "loss": 25.3501, + "step": 107020 + }, + { + "epoch": 0.216207371614879, + "grad_norm": 784.2269287109375, + "learning_rate": 9.608909851478681e-06, + "loss": 17.4117, + "step": 107030 + }, + { + "epoch": 0.21622757224756278, + "grad_norm": 347.4877014160156, + "learning_rate": 9.608774504396659e-06, + "loss": 17.0232, + "step": 107040 + }, + { + "epoch": 0.2162477728802466, + "grad_norm": 592.7592163085938, + "learning_rate": 9.608639134852028e-06, + "loss": 35.3994, + "step": 107050 + }, + { + "epoch": 0.21626797351293042, + "grad_norm": 275.7370300292969, + "learning_rate": 9.60850374284545e-06, + "loss": 29.4426, + "step": 107060 + }, + { + "epoch": 0.21628817414561424, + "grad_norm": 101.7520751953125, + "learning_rate": 9.608368328377582e-06, + "loss": 24.4883, + "step": 107070 + }, + { + "epoch": 0.21630837477829806, + "grad_norm": 369.6875, + "learning_rate": 9.608232891449088e-06, + "loss": 22.2167, + "step": 107080 + }, + { + "epoch": 0.21632857541098188, + "grad_norm": 453.6797180175781, + "learning_rate": 9.608097432060626e-06, + "loss": 28.343, + "step": 107090 + }, + { + "epoch": 0.21634877604366567, + "grad_norm": 128.98529052734375, + "learning_rate": 9.607961950212855e-06, + "loss": 31.0087, + "step": 107100 + }, + { + "epoch": 0.2163689766763495, + "grad_norm": 189.78399658203125, + "learning_rate": 9.607826445906434e-06, + "loss": 28.4285, + "step": 107110 + }, + { + "epoch": 0.21638917730903331, + "grad_norm": 274.7174072265625, + "learning_rate": 9.607690919142028e-06, + "loss": 24.8506, + "step": 107120 + }, + { + "epoch": 0.21640937794171713, + "grad_norm": 725.8607788085938, + "learning_rate": 9.607555369920296e-06, + "loss": 18.3541, + "step": 107130 + }, + { + "epoch": 0.21642957857440095, + "grad_norm": 507.2761535644531, + "learning_rate": 9.607419798241897e-06, + "loss": 22.0484, + "step": 107140 + }, + { + "epoch": 0.21644977920708477, + "grad_norm": 570.9094848632812, + "learning_rate": 9.607284204107493e-06, + "loss": 20.056, + "step": 107150 + }, + { + "epoch": 0.2164699798397686, + "grad_norm": 208.45387268066406, + "learning_rate": 9.607148587517746e-06, + "loss": 18.9086, + "step": 107160 + }, + { + "epoch": 0.2164901804724524, + "grad_norm": 378.5279846191406, + "learning_rate": 9.607012948473314e-06, + "loss": 28.591, + "step": 107170 + }, + { + "epoch": 0.2165103811051362, + "grad_norm": 308.3681945800781, + "learning_rate": 9.606877286974859e-06, + "loss": 28.9336, + "step": 107180 + }, + { + "epoch": 0.21653058173782003, + "grad_norm": 543.411865234375, + "learning_rate": 9.606741603023043e-06, + "loss": 25.0842, + "step": 107190 + }, + { + "epoch": 0.21655078237050385, + "grad_norm": 321.71380615234375, + "learning_rate": 9.606605896618528e-06, + "loss": 19.3353, + "step": 107200 + }, + { + "epoch": 0.21657098300318767, + "grad_norm": 296.8135070800781, + "learning_rate": 9.606470167761975e-06, + "loss": 29.9971, + "step": 107210 + }, + { + "epoch": 0.2165911836358715, + "grad_norm": 807.8027954101562, + "learning_rate": 9.606334416454045e-06, + "loss": 21.1007, + "step": 107220 + }, + { + "epoch": 0.21661138426855528, + "grad_norm": 192.43112182617188, + "learning_rate": 9.606198642695398e-06, + "loss": 17.6655, + "step": 107230 + }, + { + "epoch": 0.2166315849012391, + "grad_norm": 502.1242370605469, + "learning_rate": 9.606062846486698e-06, + "loss": 26.5737, + "step": 107240 + }, + { + "epoch": 0.21665178553392292, + "grad_norm": 261.8995666503906, + "learning_rate": 9.605927027828608e-06, + "loss": 29.288, + "step": 107250 + }, + { + "epoch": 0.21667198616660674, + "grad_norm": 581.6419067382812, + "learning_rate": 9.605791186721787e-06, + "loss": 55.1319, + "step": 107260 + }, + { + "epoch": 0.21669218679929056, + "grad_norm": 714.2874145507812, + "learning_rate": 9.605655323166899e-06, + "loss": 22.0499, + "step": 107270 + }, + { + "epoch": 0.21671238743197438, + "grad_norm": 754.3344116210938, + "learning_rate": 9.605519437164604e-06, + "loss": 18.609, + "step": 107280 + }, + { + "epoch": 0.21673258806465817, + "grad_norm": 246.62757873535156, + "learning_rate": 9.605383528715567e-06, + "loss": 25.0839, + "step": 107290 + }, + { + "epoch": 0.216752788697342, + "grad_norm": 207.9958953857422, + "learning_rate": 9.605247597820448e-06, + "loss": 16.158, + "step": 107300 + }, + { + "epoch": 0.2167729893300258, + "grad_norm": 906.9510498046875, + "learning_rate": 9.605111644479913e-06, + "loss": 24.2108, + "step": 107310 + }, + { + "epoch": 0.21679318996270963, + "grad_norm": 886.7114868164062, + "learning_rate": 9.604975668694621e-06, + "loss": 20.522, + "step": 107320 + }, + { + "epoch": 0.21681339059539345, + "grad_norm": 625.944091796875, + "learning_rate": 9.604839670465236e-06, + "loss": 27.3642, + "step": 107330 + }, + { + "epoch": 0.21683359122807727, + "grad_norm": 304.8418884277344, + "learning_rate": 9.604703649792421e-06, + "loss": 23.6104, + "step": 107340 + }, + { + "epoch": 0.2168537918607611, + "grad_norm": 721.0968017578125, + "learning_rate": 9.60456760667684e-06, + "loss": 19.9008, + "step": 107350 + }, + { + "epoch": 0.21687399249344488, + "grad_norm": 625.4487915039062, + "learning_rate": 9.604431541119155e-06, + "loss": 29.7023, + "step": 107360 + }, + { + "epoch": 0.2168941931261287, + "grad_norm": 378.6073913574219, + "learning_rate": 9.604295453120028e-06, + "loss": 18.9232, + "step": 107370 + }, + { + "epoch": 0.21691439375881252, + "grad_norm": 556.75439453125, + "learning_rate": 9.604159342680125e-06, + "loss": 18.6585, + "step": 107380 + }, + { + "epoch": 0.21693459439149634, + "grad_norm": 1016.4850463867188, + "learning_rate": 9.604023209800106e-06, + "loss": 50.3389, + "step": 107390 + }, + { + "epoch": 0.21695479502418016, + "grad_norm": 438.6138916015625, + "learning_rate": 9.603887054480636e-06, + "loss": 14.1566, + "step": 107400 + }, + { + "epoch": 0.21697499565686398, + "grad_norm": 349.15924072265625, + "learning_rate": 9.60375087672238e-06, + "loss": 37.0312, + "step": 107410 + }, + { + "epoch": 0.21699519628954778, + "grad_norm": 451.6742248535156, + "learning_rate": 9.603614676526e-06, + "loss": 17.6007, + "step": 107420 + }, + { + "epoch": 0.2170153969222316, + "grad_norm": 675.8563842773438, + "learning_rate": 9.603478453892161e-06, + "loss": 51.1016, + "step": 107430 + }, + { + "epoch": 0.21703559755491542, + "grad_norm": 371.1186828613281, + "learning_rate": 9.603342208821526e-06, + "loss": 21.7236, + "step": 107440 + }, + { + "epoch": 0.21705579818759924, + "grad_norm": 523.7366943359375, + "learning_rate": 9.60320594131476e-06, + "loss": 32.1254, + "step": 107450 + }, + { + "epoch": 0.21707599882028306, + "grad_norm": 411.5473327636719, + "learning_rate": 9.603069651372526e-06, + "loss": 25.7108, + "step": 107460 + }, + { + "epoch": 0.21709619945296688, + "grad_norm": 419.90386962890625, + "learning_rate": 9.602933338995489e-06, + "loss": 31.5363, + "step": 107470 + }, + { + "epoch": 0.2171164000856507, + "grad_norm": 211.2162628173828, + "learning_rate": 9.602797004184311e-06, + "loss": 17.8802, + "step": 107480 + }, + { + "epoch": 0.2171366007183345, + "grad_norm": 178.4124755859375, + "learning_rate": 9.602660646939663e-06, + "loss": 20.6594, + "step": 107490 + }, + { + "epoch": 0.2171568013510183, + "grad_norm": 436.7868347167969, + "learning_rate": 9.602524267262202e-06, + "loss": 17.0319, + "step": 107500 + }, + { + "epoch": 0.21717700198370213, + "grad_norm": 613.3228149414062, + "learning_rate": 9.602387865152597e-06, + "loss": 19.3349, + "step": 107510 + }, + { + "epoch": 0.21719720261638595, + "grad_norm": 271.5887145996094, + "learning_rate": 9.602251440611512e-06, + "loss": 14.5195, + "step": 107520 + }, + { + "epoch": 0.21721740324906977, + "grad_norm": 226.7190399169922, + "learning_rate": 9.602114993639611e-06, + "loss": 30.9531, + "step": 107530 + }, + { + "epoch": 0.2172376038817536, + "grad_norm": 1274.6104736328125, + "learning_rate": 9.60197852423756e-06, + "loss": 24.8917, + "step": 107540 + }, + { + "epoch": 0.21725780451443738, + "grad_norm": 257.9808349609375, + "learning_rate": 9.601842032406024e-06, + "loss": 14.5826, + "step": 107550 + }, + { + "epoch": 0.2172780051471212, + "grad_norm": 138.10923767089844, + "learning_rate": 9.601705518145668e-06, + "loss": 18.0225, + "step": 107560 + }, + { + "epoch": 0.21729820577980502, + "grad_norm": 652.5715942382812, + "learning_rate": 9.601568981457156e-06, + "loss": 20.4635, + "step": 107570 + }, + { + "epoch": 0.21731840641248884, + "grad_norm": 677.6094970703125, + "learning_rate": 9.601432422341156e-06, + "loss": 38.3076, + "step": 107580 + }, + { + "epoch": 0.21733860704517266, + "grad_norm": 172.53482055664062, + "learning_rate": 9.601295840798333e-06, + "loss": 27.0276, + "step": 107590 + }, + { + "epoch": 0.21735880767785648, + "grad_norm": 790.575439453125, + "learning_rate": 9.601159236829353e-06, + "loss": 35.0712, + "step": 107600 + }, + { + "epoch": 0.21737900831054027, + "grad_norm": 952.8884887695312, + "learning_rate": 9.60102261043488e-06, + "loss": 32.1461, + "step": 107610 + }, + { + "epoch": 0.2173992089432241, + "grad_norm": 219.7374725341797, + "learning_rate": 9.60088596161558e-06, + "loss": 29.0019, + "step": 107620 + }, + { + "epoch": 0.2174194095759079, + "grad_norm": 458.9838562011719, + "learning_rate": 9.60074929037212e-06, + "loss": 23.6285, + "step": 107630 + }, + { + "epoch": 0.21743961020859173, + "grad_norm": 432.98138427734375, + "learning_rate": 9.600612596705167e-06, + "loss": 21.9855, + "step": 107640 + }, + { + "epoch": 0.21745981084127555, + "grad_norm": 760.4342651367188, + "learning_rate": 9.600475880615385e-06, + "loss": 34.3365, + "step": 107650 + }, + { + "epoch": 0.21748001147395937, + "grad_norm": 322.08953857421875, + "learning_rate": 9.600339142103441e-06, + "loss": 23.2254, + "step": 107660 + }, + { + "epoch": 0.2175002121066432, + "grad_norm": 737.721923828125, + "learning_rate": 9.600202381170004e-06, + "loss": 53.575, + "step": 107670 + }, + { + "epoch": 0.21752041273932698, + "grad_norm": 923.2410888671875, + "learning_rate": 9.600065597815738e-06, + "loss": 50.5392, + "step": 107680 + }, + { + "epoch": 0.2175406133720108, + "grad_norm": 271.1661376953125, + "learning_rate": 9.59992879204131e-06, + "loss": 37.8647, + "step": 107690 + }, + { + "epoch": 0.21756081400469462, + "grad_norm": 338.381591796875, + "learning_rate": 9.599791963847388e-06, + "loss": 27.4579, + "step": 107700 + }, + { + "epoch": 0.21758101463737844, + "grad_norm": 256.3115234375, + "learning_rate": 9.599655113234635e-06, + "loss": 13.811, + "step": 107710 + }, + { + "epoch": 0.21760121527006226, + "grad_norm": 391.17803955078125, + "learning_rate": 9.599518240203722e-06, + "loss": 15.3174, + "step": 107720 + }, + { + "epoch": 0.21762141590274608, + "grad_norm": 588.9943237304688, + "learning_rate": 9.599381344755315e-06, + "loss": 13.929, + "step": 107730 + }, + { + "epoch": 0.21764161653542988, + "grad_norm": 257.79913330078125, + "learning_rate": 9.599244426890082e-06, + "loss": 9.7967, + "step": 107740 + }, + { + "epoch": 0.2176618171681137, + "grad_norm": 558.75244140625, + "learning_rate": 9.59910748660869e-06, + "loss": 18.7975, + "step": 107750 + }, + { + "epoch": 0.21768201780079752, + "grad_norm": 470.57305908203125, + "learning_rate": 9.598970523911803e-06, + "loss": 40.6686, + "step": 107760 + }, + { + "epoch": 0.21770221843348134, + "grad_norm": 563.8414916992188, + "learning_rate": 9.598833538800094e-06, + "loss": 27.8138, + "step": 107770 + }, + { + "epoch": 0.21772241906616516, + "grad_norm": 31.067607879638672, + "learning_rate": 9.598696531274227e-06, + "loss": 22.4565, + "step": 107780 + }, + { + "epoch": 0.21774261969884898, + "grad_norm": 517.5081787109375, + "learning_rate": 9.598559501334872e-06, + "loss": 24.7159, + "step": 107790 + }, + { + "epoch": 0.2177628203315328, + "grad_norm": 511.7974548339844, + "learning_rate": 9.598422448982697e-06, + "loss": 25.7823, + "step": 107800 + }, + { + "epoch": 0.2177830209642166, + "grad_norm": 656.6521606445312, + "learning_rate": 9.598285374218367e-06, + "loss": 17.8043, + "step": 107810 + }, + { + "epoch": 0.2178032215969004, + "grad_norm": 465.1502380371094, + "learning_rate": 9.59814827704255e-06, + "loss": 12.7316, + "step": 107820 + }, + { + "epoch": 0.21782342222958423, + "grad_norm": 231.67138671875, + "learning_rate": 9.598011157455917e-06, + "loss": 15.0623, + "step": 107830 + }, + { + "epoch": 0.21784362286226805, + "grad_norm": 346.3719787597656, + "learning_rate": 9.597874015459136e-06, + "loss": 25.4132, + "step": 107840 + }, + { + "epoch": 0.21786382349495187, + "grad_norm": 277.23211669921875, + "learning_rate": 9.597736851052874e-06, + "loss": 24.0558, + "step": 107850 + }, + { + "epoch": 0.2178840241276357, + "grad_norm": 830.926513671875, + "learning_rate": 9.597599664237801e-06, + "loss": 16.0927, + "step": 107860 + }, + { + "epoch": 0.21790422476031948, + "grad_norm": 175.6227264404297, + "learning_rate": 9.597462455014585e-06, + "loss": 35.8062, + "step": 107870 + }, + { + "epoch": 0.2179244253930033, + "grad_norm": 2479.54052734375, + "learning_rate": 9.597325223383895e-06, + "loss": 31.0808, + "step": 107880 + }, + { + "epoch": 0.21794462602568712, + "grad_norm": 187.91407775878906, + "learning_rate": 9.597187969346398e-06, + "loss": 13.4602, + "step": 107890 + }, + { + "epoch": 0.21796482665837094, + "grad_norm": 132.35665893554688, + "learning_rate": 9.597050692902765e-06, + "loss": 21.4644, + "step": 107900 + }, + { + "epoch": 0.21798502729105476, + "grad_norm": 367.8532409667969, + "learning_rate": 9.596913394053664e-06, + "loss": 24.8335, + "step": 107910 + }, + { + "epoch": 0.21800522792373858, + "grad_norm": 546.89306640625, + "learning_rate": 9.596776072799767e-06, + "loss": 19.5862, + "step": 107920 + }, + { + "epoch": 0.21802542855642237, + "grad_norm": 348.8690185546875, + "learning_rate": 9.596638729141739e-06, + "loss": 10.1862, + "step": 107930 + }, + { + "epoch": 0.2180456291891062, + "grad_norm": 241.12767028808594, + "learning_rate": 9.59650136308025e-06, + "loss": 17.195, + "step": 107940 + }, + { + "epoch": 0.21806582982179, + "grad_norm": 635.4271240234375, + "learning_rate": 9.596363974615973e-06, + "loss": 25.7954, + "step": 107950 + }, + { + "epoch": 0.21808603045447383, + "grad_norm": 255.62783813476562, + "learning_rate": 9.596226563749575e-06, + "loss": 28.5505, + "step": 107960 + }, + { + "epoch": 0.21810623108715765, + "grad_norm": 165.38198852539062, + "learning_rate": 9.596089130481727e-06, + "loss": 13.5765, + "step": 107970 + }, + { + "epoch": 0.21812643171984147, + "grad_norm": 506.2337951660156, + "learning_rate": 9.595951674813096e-06, + "loss": 24.8279, + "step": 107980 + }, + { + "epoch": 0.2181466323525253, + "grad_norm": 225.37368774414062, + "learning_rate": 9.595814196744355e-06, + "loss": 12.9455, + "step": 107990 + }, + { + "epoch": 0.21816683298520909, + "grad_norm": 201.5785675048828, + "learning_rate": 9.595676696276173e-06, + "loss": 21.0967, + "step": 108000 + }, + { + "epoch": 0.2181870336178929, + "grad_norm": 1118.9906005859375, + "learning_rate": 9.595539173409222e-06, + "loss": 26.1802, + "step": 108010 + }, + { + "epoch": 0.21820723425057673, + "grad_norm": 380.7827453613281, + "learning_rate": 9.595401628144166e-06, + "loss": 24.0537, + "step": 108020 + }, + { + "epoch": 0.21822743488326055, + "grad_norm": 0.0, + "learning_rate": 9.595264060481684e-06, + "loss": 19.8214, + "step": 108030 + }, + { + "epoch": 0.21824763551594437, + "grad_norm": 196.51121520996094, + "learning_rate": 9.59512647042244e-06, + "loss": 19.1049, + "step": 108040 + }, + { + "epoch": 0.21826783614862819, + "grad_norm": 146.11227416992188, + "learning_rate": 9.594988857967107e-06, + "loss": 23.5608, + "step": 108050 + }, + { + "epoch": 0.21828803678131198, + "grad_norm": 262.90264892578125, + "learning_rate": 9.594851223116357e-06, + "loss": 13.5468, + "step": 108060 + }, + { + "epoch": 0.2183082374139958, + "grad_norm": 575.9906616210938, + "learning_rate": 9.594713565870859e-06, + "loss": 15.4239, + "step": 108070 + }, + { + "epoch": 0.21832843804667962, + "grad_norm": 368.9980163574219, + "learning_rate": 9.594575886231284e-06, + "loss": 18.8937, + "step": 108080 + }, + { + "epoch": 0.21834863867936344, + "grad_norm": 323.3814392089844, + "learning_rate": 9.594438184198302e-06, + "loss": 50.8597, + "step": 108090 + }, + { + "epoch": 0.21836883931204726, + "grad_norm": 762.1126098632812, + "learning_rate": 9.594300459772588e-06, + "loss": 16.905, + "step": 108100 + }, + { + "epoch": 0.21838903994473108, + "grad_norm": 782.7981567382812, + "learning_rate": 9.59416271295481e-06, + "loss": 22.6836, + "step": 108110 + }, + { + "epoch": 0.2184092405774149, + "grad_norm": 413.7884521484375, + "learning_rate": 9.594024943745638e-06, + "loss": 34.0252, + "step": 108120 + }, + { + "epoch": 0.2184294412100987, + "grad_norm": 502.3115539550781, + "learning_rate": 9.59388715214575e-06, + "loss": 27.0354, + "step": 108130 + }, + { + "epoch": 0.2184496418427825, + "grad_norm": 6.615095138549805, + "learning_rate": 9.593749338155809e-06, + "loss": 12.11, + "step": 108140 + }, + { + "epoch": 0.21846984247546633, + "grad_norm": 203.57705688476562, + "learning_rate": 9.593611501776493e-06, + "loss": 25.7498, + "step": 108150 + }, + { + "epoch": 0.21849004310815015, + "grad_norm": 627.1359252929688, + "learning_rate": 9.59347364300847e-06, + "loss": 23.113, + "step": 108160 + }, + { + "epoch": 0.21851024374083397, + "grad_norm": 1197.2982177734375, + "learning_rate": 9.593335761852416e-06, + "loss": 27.1871, + "step": 108170 + }, + { + "epoch": 0.2185304443735178, + "grad_norm": 403.3395080566406, + "learning_rate": 9.593197858309e-06, + "loss": 30.3373, + "step": 108180 + }, + { + "epoch": 0.21855064500620158, + "grad_norm": 379.2122802734375, + "learning_rate": 9.593059932378894e-06, + "loss": 27.3166, + "step": 108190 + }, + { + "epoch": 0.2185708456388854, + "grad_norm": 180.17523193359375, + "learning_rate": 9.592921984062771e-06, + "loss": 20.1945, + "step": 108200 + }, + { + "epoch": 0.21859104627156922, + "grad_norm": 225.83389282226562, + "learning_rate": 9.592784013361303e-06, + "loss": 33.6536, + "step": 108210 + }, + { + "epoch": 0.21861124690425304, + "grad_norm": 355.9768371582031, + "learning_rate": 9.592646020275165e-06, + "loss": 29.3099, + "step": 108220 + }, + { + "epoch": 0.21863144753693686, + "grad_norm": 580.0260009765625, + "learning_rate": 9.592508004805023e-06, + "loss": 28.8732, + "step": 108230 + }, + { + "epoch": 0.21865164816962068, + "grad_norm": 102.36605834960938, + "learning_rate": 9.592369966951557e-06, + "loss": 61.1071, + "step": 108240 + }, + { + "epoch": 0.21867184880230447, + "grad_norm": 735.1100463867188, + "learning_rate": 9.592231906715437e-06, + "loss": 31.9127, + "step": 108250 + }, + { + "epoch": 0.2186920494349883, + "grad_norm": 1059.560546875, + "learning_rate": 9.592093824097335e-06, + "loss": 37.0552, + "step": 108260 + }, + { + "epoch": 0.21871225006767211, + "grad_norm": 342.75823974609375, + "learning_rate": 9.591955719097924e-06, + "loss": 29.1568, + "step": 108270 + }, + { + "epoch": 0.21873245070035593, + "grad_norm": 442.01934814453125, + "learning_rate": 9.591817591717878e-06, + "loss": 20.9597, + "step": 108280 + }, + { + "epoch": 0.21875265133303975, + "grad_norm": 566.12548828125, + "learning_rate": 9.591679441957871e-06, + "loss": 25.3353, + "step": 108290 + }, + { + "epoch": 0.21877285196572357, + "grad_norm": 371.56494140625, + "learning_rate": 9.591541269818574e-06, + "loss": 22.3805, + "step": 108300 + }, + { + "epoch": 0.2187930525984074, + "grad_norm": 259.03778076171875, + "learning_rate": 9.591403075300662e-06, + "loss": 20.911, + "step": 108310 + }, + { + "epoch": 0.2188132532310912, + "grad_norm": 588.49462890625, + "learning_rate": 9.591264858404809e-06, + "loss": 26.2738, + "step": 108320 + }, + { + "epoch": 0.218833453863775, + "grad_norm": 344.5282897949219, + "learning_rate": 9.591126619131687e-06, + "loss": 21.055, + "step": 108330 + }, + { + "epoch": 0.21885365449645883, + "grad_norm": 162.2899627685547, + "learning_rate": 9.590988357481972e-06, + "loss": 8.9625, + "step": 108340 + }, + { + "epoch": 0.21887385512914265, + "grad_norm": 599.2951049804688, + "learning_rate": 9.590850073456335e-06, + "loss": 29.8846, + "step": 108350 + }, + { + "epoch": 0.21889405576182647, + "grad_norm": 317.268798828125, + "learning_rate": 9.590711767055454e-06, + "loss": 17.3586, + "step": 108360 + }, + { + "epoch": 0.2189142563945103, + "grad_norm": 329.30364990234375, + "learning_rate": 9.590573438279997e-06, + "loss": 19.8385, + "step": 108370 + }, + { + "epoch": 0.21893445702719408, + "grad_norm": 664.9000854492188, + "learning_rate": 9.590435087130645e-06, + "loss": 36.2991, + "step": 108380 + }, + { + "epoch": 0.2189546576598779, + "grad_norm": 495.962890625, + "learning_rate": 9.590296713608068e-06, + "loss": 16.7955, + "step": 108390 + }, + { + "epoch": 0.21897485829256172, + "grad_norm": 474.8290100097656, + "learning_rate": 9.590158317712941e-06, + "loss": 20.0439, + "step": 108400 + }, + { + "epoch": 0.21899505892524554, + "grad_norm": 149.83889770507812, + "learning_rate": 9.59001989944594e-06, + "loss": 22.0663, + "step": 108410 + }, + { + "epoch": 0.21901525955792936, + "grad_norm": 467.04229736328125, + "learning_rate": 9.589881458807738e-06, + "loss": 26.8434, + "step": 108420 + }, + { + "epoch": 0.21903546019061318, + "grad_norm": 445.6435241699219, + "learning_rate": 9.589742995799011e-06, + "loss": 25.06, + "step": 108430 + }, + { + "epoch": 0.219055660823297, + "grad_norm": 739.6090698242188, + "learning_rate": 9.589604510420434e-06, + "loss": 22.3647, + "step": 108440 + }, + { + "epoch": 0.2190758614559808, + "grad_norm": 485.4827575683594, + "learning_rate": 9.58946600267268e-06, + "loss": 15.544, + "step": 108450 + }, + { + "epoch": 0.2190960620886646, + "grad_norm": 1479.1304931640625, + "learning_rate": 9.589327472556424e-06, + "loss": 47.4186, + "step": 108460 + }, + { + "epoch": 0.21911626272134843, + "grad_norm": 283.0572204589844, + "learning_rate": 9.589188920072346e-06, + "loss": 18.6419, + "step": 108470 + }, + { + "epoch": 0.21913646335403225, + "grad_norm": 187.07830810546875, + "learning_rate": 9.589050345221116e-06, + "loss": 19.724, + "step": 108480 + }, + { + "epoch": 0.21915666398671607, + "grad_norm": 414.17437744140625, + "learning_rate": 9.58891174800341e-06, + "loss": 21.7915, + "step": 108490 + }, + { + "epoch": 0.2191768646193999, + "grad_norm": 627.4232177734375, + "learning_rate": 9.588773128419907e-06, + "loss": 30.9095, + "step": 108500 + }, + { + "epoch": 0.21919706525208368, + "grad_norm": 338.1419372558594, + "learning_rate": 9.588634486471279e-06, + "loss": 17.5366, + "step": 108510 + }, + { + "epoch": 0.2192172658847675, + "grad_norm": 997.3385009765625, + "learning_rate": 9.588495822158203e-06, + "loss": 29.2473, + "step": 108520 + }, + { + "epoch": 0.21923746651745132, + "grad_norm": 406.22979736328125, + "learning_rate": 9.588357135481355e-06, + "loss": 26.403, + "step": 108530 + }, + { + "epoch": 0.21925766715013514, + "grad_norm": 400.2430419921875, + "learning_rate": 9.58821842644141e-06, + "loss": 21.4057, + "step": 108540 + }, + { + "epoch": 0.21927786778281896, + "grad_norm": 607.7499389648438, + "learning_rate": 9.588079695039048e-06, + "loss": 36.6994, + "step": 108550 + }, + { + "epoch": 0.21929806841550278, + "grad_norm": 506.59906005859375, + "learning_rate": 9.587940941274939e-06, + "loss": 25.7518, + "step": 108560 + }, + { + "epoch": 0.21931826904818658, + "grad_norm": 186.6425323486328, + "learning_rate": 9.587802165149763e-06, + "loss": 21.7186, + "step": 108570 + }, + { + "epoch": 0.2193384696808704, + "grad_norm": 369.2998046875, + "learning_rate": 9.587663366664197e-06, + "loss": 22.8296, + "step": 108580 + }, + { + "epoch": 0.21935867031355422, + "grad_norm": 115.20941162109375, + "learning_rate": 9.587524545818914e-06, + "loss": 23.3687, + "step": 108590 + }, + { + "epoch": 0.21937887094623804, + "grad_norm": 725.7109985351562, + "learning_rate": 9.587385702614593e-06, + "loss": 20.6851, + "step": 108600 + }, + { + "epoch": 0.21939907157892186, + "grad_norm": 943.8353271484375, + "learning_rate": 9.587246837051912e-06, + "loss": 28.3232, + "step": 108610 + }, + { + "epoch": 0.21941927221160568, + "grad_norm": 109.12712860107422, + "learning_rate": 9.587107949131545e-06, + "loss": 10.8042, + "step": 108620 + }, + { + "epoch": 0.2194394728442895, + "grad_norm": 523.9343872070312, + "learning_rate": 9.586969038854172e-06, + "loss": 22.0542, + "step": 108630 + }, + { + "epoch": 0.2194596734769733, + "grad_norm": 389.647705078125, + "learning_rate": 9.586830106220466e-06, + "loss": 8.5891, + "step": 108640 + }, + { + "epoch": 0.2194798741096571, + "grad_norm": 480.90130615234375, + "learning_rate": 9.586691151231109e-06, + "loss": 14.0215, + "step": 108650 + }, + { + "epoch": 0.21950007474234093, + "grad_norm": 517.3112182617188, + "learning_rate": 9.586552173886773e-06, + "loss": 27.6435, + "step": 108660 + }, + { + "epoch": 0.21952027537502475, + "grad_norm": 735.51953125, + "learning_rate": 9.586413174188139e-06, + "loss": 17.1136, + "step": 108670 + }, + { + "epoch": 0.21954047600770857, + "grad_norm": 401.6905822753906, + "learning_rate": 9.586274152135883e-06, + "loss": 23.0881, + "step": 108680 + }, + { + "epoch": 0.2195606766403924, + "grad_norm": 68.62858581542969, + "learning_rate": 9.586135107730684e-06, + "loss": 29.8915, + "step": 108690 + }, + { + "epoch": 0.21958087727307618, + "grad_norm": 491.77764892578125, + "learning_rate": 9.585996040973218e-06, + "loss": 17.5886, + "step": 108700 + }, + { + "epoch": 0.21960107790576, + "grad_norm": 234.37025451660156, + "learning_rate": 9.585856951864163e-06, + "loss": 28.1159, + "step": 108710 + }, + { + "epoch": 0.21962127853844382, + "grad_norm": 604.2092895507812, + "learning_rate": 9.585717840404198e-06, + "loss": 15.2642, + "step": 108720 + }, + { + "epoch": 0.21964147917112764, + "grad_norm": 735.1883544921875, + "learning_rate": 9.585578706594e-06, + "loss": 39.425, + "step": 108730 + }, + { + "epoch": 0.21966167980381146, + "grad_norm": 660.2123413085938, + "learning_rate": 9.585439550434249e-06, + "loss": 34.6851, + "step": 108740 + }, + { + "epoch": 0.21968188043649528, + "grad_norm": 500.07293701171875, + "learning_rate": 9.58530037192562e-06, + "loss": 33.1483, + "step": 108750 + }, + { + "epoch": 0.2197020810691791, + "grad_norm": 293.2021484375, + "learning_rate": 9.585161171068796e-06, + "loss": 23.5174, + "step": 108760 + }, + { + "epoch": 0.2197222817018629, + "grad_norm": 100.20855712890625, + "learning_rate": 9.58502194786445e-06, + "loss": 13.2763, + "step": 108770 + }, + { + "epoch": 0.2197424823345467, + "grad_norm": 335.36566162109375, + "learning_rate": 9.584882702313263e-06, + "loss": 14.1443, + "step": 108780 + }, + { + "epoch": 0.21976268296723053, + "grad_norm": 293.823974609375, + "learning_rate": 9.584743434415915e-06, + "loss": 19.8771, + "step": 108790 + }, + { + "epoch": 0.21978288359991435, + "grad_norm": 374.0918273925781, + "learning_rate": 9.584604144173084e-06, + "loss": 16.1451, + "step": 108800 + }, + { + "epoch": 0.21980308423259817, + "grad_norm": 234.1194305419922, + "learning_rate": 9.584464831585446e-06, + "loss": 19.1597, + "step": 108810 + }, + { + "epoch": 0.219823284865282, + "grad_norm": 288.78277587890625, + "learning_rate": 9.584325496653683e-06, + "loss": 44.7845, + "step": 108820 + }, + { + "epoch": 0.21984348549796578, + "grad_norm": 258.80322265625, + "learning_rate": 9.584186139378476e-06, + "loss": 18.7545, + "step": 108830 + }, + { + "epoch": 0.2198636861306496, + "grad_norm": 431.63507080078125, + "learning_rate": 9.5840467597605e-06, + "loss": 33.6674, + "step": 108840 + }, + { + "epoch": 0.21988388676333342, + "grad_norm": 25.456167221069336, + "learning_rate": 9.583907357800436e-06, + "loss": 24.8607, + "step": 108850 + }, + { + "epoch": 0.21990408739601724, + "grad_norm": 363.77581787109375, + "learning_rate": 9.583767933498964e-06, + "loss": 32.6472, + "step": 108860 + }, + { + "epoch": 0.21992428802870106, + "grad_norm": 593.9925537109375, + "learning_rate": 9.583628486856761e-06, + "loss": 29.4518, + "step": 108870 + }, + { + "epoch": 0.21994448866138488, + "grad_norm": 221.8987274169922, + "learning_rate": 9.583489017874512e-06, + "loss": 22.8747, + "step": 108880 + }, + { + "epoch": 0.21996468929406868, + "grad_norm": 495.2400207519531, + "learning_rate": 9.583349526552893e-06, + "loss": 19.2236, + "step": 108890 + }, + { + "epoch": 0.2199848899267525, + "grad_norm": 414.316162109375, + "learning_rate": 9.583210012892582e-06, + "loss": 19.9706, + "step": 108900 + }, + { + "epoch": 0.22000509055943632, + "grad_norm": 664.9085083007812, + "learning_rate": 9.583070476894263e-06, + "loss": 24.9737, + "step": 108910 + }, + { + "epoch": 0.22002529119212014, + "grad_norm": 620.5914916992188, + "learning_rate": 9.582930918558613e-06, + "loss": 22.0299, + "step": 108920 + }, + { + "epoch": 0.22004549182480396, + "grad_norm": 344.242431640625, + "learning_rate": 9.582791337886314e-06, + "loss": 23.7912, + "step": 108930 + }, + { + "epoch": 0.22006569245748778, + "grad_norm": 258.2727355957031, + "learning_rate": 9.582651734878048e-06, + "loss": 12.6917, + "step": 108940 + }, + { + "epoch": 0.2200858930901716, + "grad_norm": 711.8133544921875, + "learning_rate": 9.58251210953449e-06, + "loss": 26.5246, + "step": 108950 + }, + { + "epoch": 0.2201060937228554, + "grad_norm": 313.27752685546875, + "learning_rate": 9.582372461856326e-06, + "loss": 26.1567, + "step": 108960 + }, + { + "epoch": 0.2201262943555392, + "grad_norm": 722.0053100585938, + "learning_rate": 9.582232791844233e-06, + "loss": 41.2081, + "step": 108970 + }, + { + "epoch": 0.22014649498822303, + "grad_norm": 487.4915771484375, + "learning_rate": 9.582093099498895e-06, + "loss": 38.4402, + "step": 108980 + }, + { + "epoch": 0.22016669562090685, + "grad_norm": 604.3970947265625, + "learning_rate": 9.581953384820989e-06, + "loss": 20.8286, + "step": 108990 + }, + { + "epoch": 0.22018689625359067, + "grad_norm": 136.74668884277344, + "learning_rate": 9.581813647811199e-06, + "loss": 19.8767, + "step": 109000 + }, + { + "epoch": 0.2202070968862745, + "grad_norm": 489.6549987792969, + "learning_rate": 9.581673888470203e-06, + "loss": 9.5, + "step": 109010 + }, + { + "epoch": 0.22022729751895828, + "grad_norm": 349.5637512207031, + "learning_rate": 9.581534106798687e-06, + "loss": 11.5665, + "step": 109020 + }, + { + "epoch": 0.2202474981516421, + "grad_norm": 837.957275390625, + "learning_rate": 9.58139430279733e-06, + "loss": 33.0956, + "step": 109030 + }, + { + "epoch": 0.22026769878432592, + "grad_norm": 520.0826416015625, + "learning_rate": 9.58125447646681e-06, + "loss": 13.8629, + "step": 109040 + }, + { + "epoch": 0.22028789941700974, + "grad_norm": 252.03729248046875, + "learning_rate": 9.581114627807812e-06, + "loss": 31.939, + "step": 109050 + }, + { + "epoch": 0.22030810004969356, + "grad_norm": 1100.6593017578125, + "learning_rate": 9.580974756821017e-06, + "loss": 23.3874, + "step": 109060 + }, + { + "epoch": 0.22032830068237738, + "grad_norm": 381.92694091796875, + "learning_rate": 9.580834863507107e-06, + "loss": 15.2103, + "step": 109070 + }, + { + "epoch": 0.2203485013150612, + "grad_norm": 143.5346221923828, + "learning_rate": 9.580694947866765e-06, + "loss": 16.3813, + "step": 109080 + }, + { + "epoch": 0.220368701947745, + "grad_norm": 438.9550476074219, + "learning_rate": 9.58055500990067e-06, + "loss": 32.5477, + "step": 109090 + }, + { + "epoch": 0.2203889025804288, + "grad_norm": 772.0028686523438, + "learning_rate": 9.580415049609503e-06, + "loss": 22.2763, + "step": 109100 + }, + { + "epoch": 0.22040910321311263, + "grad_norm": 436.929931640625, + "learning_rate": 9.580275066993952e-06, + "loss": 19.3724, + "step": 109110 + }, + { + "epoch": 0.22042930384579645, + "grad_norm": 693.9835205078125, + "learning_rate": 9.580135062054693e-06, + "loss": 23.5535, + "step": 109120 + }, + { + "epoch": 0.22044950447848027, + "grad_norm": 506.35821533203125, + "learning_rate": 9.579995034792415e-06, + "loss": 17.582, + "step": 109130 + }, + { + "epoch": 0.2204697051111641, + "grad_norm": 585.5834350585938, + "learning_rate": 9.579854985207794e-06, + "loss": 21.895, + "step": 109140 + }, + { + "epoch": 0.22048990574384789, + "grad_norm": 91.59484100341797, + "learning_rate": 9.579714913301516e-06, + "loss": 16.7803, + "step": 109150 + }, + { + "epoch": 0.2205101063765317, + "grad_norm": 233.09005737304688, + "learning_rate": 9.579574819074263e-06, + "loss": 21.4606, + "step": 109160 + }, + { + "epoch": 0.22053030700921553, + "grad_norm": 554.2740478515625, + "learning_rate": 9.579434702526717e-06, + "loss": 39.7076, + "step": 109170 + }, + { + "epoch": 0.22055050764189935, + "grad_norm": 418.4535217285156, + "learning_rate": 9.579294563659562e-06, + "loss": 29.9358, + "step": 109180 + }, + { + "epoch": 0.22057070827458317, + "grad_norm": 220.78564453125, + "learning_rate": 9.579154402473482e-06, + "loss": 44.5528, + "step": 109190 + }, + { + "epoch": 0.22059090890726699, + "grad_norm": 6.106456279754639, + "learning_rate": 9.579014218969158e-06, + "loss": 27.3099, + "step": 109200 + }, + { + "epoch": 0.22061110953995078, + "grad_norm": 783.509521484375, + "learning_rate": 9.578874013147274e-06, + "loss": 17.8169, + "step": 109210 + }, + { + "epoch": 0.2206313101726346, + "grad_norm": 419.3775939941406, + "learning_rate": 9.578733785008513e-06, + "loss": 17.4024, + "step": 109220 + }, + { + "epoch": 0.22065151080531842, + "grad_norm": 181.3259735107422, + "learning_rate": 9.578593534553558e-06, + "loss": 17.975, + "step": 109230 + }, + { + "epoch": 0.22067171143800224, + "grad_norm": 487.99969482421875, + "learning_rate": 9.578453261783096e-06, + "loss": 30.2975, + "step": 109240 + }, + { + "epoch": 0.22069191207068606, + "grad_norm": 329.6623229980469, + "learning_rate": 9.578312966697807e-06, + "loss": 16.3705, + "step": 109250 + }, + { + "epoch": 0.22071211270336988, + "grad_norm": 591.29150390625, + "learning_rate": 9.578172649298374e-06, + "loss": 34.7328, + "step": 109260 + }, + { + "epoch": 0.2207323133360537, + "grad_norm": 383.66900634765625, + "learning_rate": 9.578032309585485e-06, + "loss": 21.4675, + "step": 109270 + }, + { + "epoch": 0.2207525139687375, + "grad_norm": 910.4567260742188, + "learning_rate": 9.577891947559821e-06, + "loss": 34.6472, + "step": 109280 + }, + { + "epoch": 0.2207727146014213, + "grad_norm": 248.37745666503906, + "learning_rate": 9.577751563222067e-06, + "loss": 29.2419, + "step": 109290 + }, + { + "epoch": 0.22079291523410513, + "grad_norm": 304.04144287109375, + "learning_rate": 9.577611156572908e-06, + "loss": 16.7587, + "step": 109300 + }, + { + "epoch": 0.22081311586678895, + "grad_norm": 96.64410400390625, + "learning_rate": 9.577470727613025e-06, + "loss": 15.9196, + "step": 109310 + }, + { + "epoch": 0.22083331649947277, + "grad_norm": 576.2413940429688, + "learning_rate": 9.577330276343106e-06, + "loss": 24.888, + "step": 109320 + }, + { + "epoch": 0.2208535171321566, + "grad_norm": 207.7760009765625, + "learning_rate": 9.577189802763836e-06, + "loss": 25.2301, + "step": 109330 + }, + { + "epoch": 0.22087371776484038, + "grad_norm": 104.967041015625, + "learning_rate": 9.577049306875895e-06, + "loss": 49.4909, + "step": 109340 + }, + { + "epoch": 0.2208939183975242, + "grad_norm": 209.85760498046875, + "learning_rate": 9.576908788679975e-06, + "loss": 20.2544, + "step": 109350 + }, + { + "epoch": 0.22091411903020802, + "grad_norm": 465.70330810546875, + "learning_rate": 9.576768248176752e-06, + "loss": 20.1185, + "step": 109360 + }, + { + "epoch": 0.22093431966289184, + "grad_norm": 786.9861450195312, + "learning_rate": 9.576627685366919e-06, + "loss": 17.1637, + "step": 109370 + }, + { + "epoch": 0.22095452029557566, + "grad_norm": 336.6026306152344, + "learning_rate": 9.576487100251157e-06, + "loss": 21.1663, + "step": 109380 + }, + { + "epoch": 0.22097472092825948, + "grad_norm": 354.6676330566406, + "learning_rate": 9.576346492830151e-06, + "loss": 19.0305, + "step": 109390 + }, + { + "epoch": 0.22099492156094327, + "grad_norm": 1076.187744140625, + "learning_rate": 9.576205863104588e-06, + "loss": 21.228, + "step": 109400 + }, + { + "epoch": 0.2210151221936271, + "grad_norm": 614.33203125, + "learning_rate": 9.576065211075153e-06, + "loss": 30.4912, + "step": 109410 + }, + { + "epoch": 0.22103532282631091, + "grad_norm": 833.4633178710938, + "learning_rate": 9.575924536742532e-06, + "loss": 22.7512, + "step": 109420 + }, + { + "epoch": 0.22105552345899473, + "grad_norm": 281.7948303222656, + "learning_rate": 9.575783840107407e-06, + "loss": 30.2963, + "step": 109430 + }, + { + "epoch": 0.22107572409167855, + "grad_norm": 354.9866943359375, + "learning_rate": 9.575643121170468e-06, + "loss": 16.2715, + "step": 109440 + }, + { + "epoch": 0.22109592472436237, + "grad_norm": 432.01953125, + "learning_rate": 9.5755023799324e-06, + "loss": 33.8724, + "step": 109450 + }, + { + "epoch": 0.2211161253570462, + "grad_norm": 1179.9603271484375, + "learning_rate": 9.575361616393888e-06, + "loss": 27.342, + "step": 109460 + }, + { + "epoch": 0.22113632598973, + "grad_norm": 109.8602523803711, + "learning_rate": 9.575220830555618e-06, + "loss": 20.4152, + "step": 109470 + }, + { + "epoch": 0.2211565266224138, + "grad_norm": 637.301025390625, + "learning_rate": 9.575080022418276e-06, + "loss": 15.0124, + "step": 109480 + }, + { + "epoch": 0.22117672725509763, + "grad_norm": 335.2337341308594, + "learning_rate": 9.574939191982549e-06, + "loss": 17.1853, + "step": 109490 + }, + { + "epoch": 0.22119692788778145, + "grad_norm": 272.91522216796875, + "learning_rate": 9.574798339249124e-06, + "loss": 24.9664, + "step": 109500 + }, + { + "epoch": 0.22121712852046527, + "grad_norm": 354.1695556640625, + "learning_rate": 9.574657464218688e-06, + "loss": 42.1583, + "step": 109510 + }, + { + "epoch": 0.2212373291531491, + "grad_norm": 758.5416259765625, + "learning_rate": 9.574516566891925e-06, + "loss": 29.1923, + "step": 109520 + }, + { + "epoch": 0.22125752978583288, + "grad_norm": 67.76472473144531, + "learning_rate": 9.574375647269523e-06, + "loss": 34.9052, + "step": 109530 + }, + { + "epoch": 0.2212777304185167, + "grad_norm": 564.2835693359375, + "learning_rate": 9.574234705352167e-06, + "loss": 16.1783, + "step": 109540 + }, + { + "epoch": 0.22129793105120052, + "grad_norm": 139.2586669921875, + "learning_rate": 9.574093741140549e-06, + "loss": 18.3567, + "step": 109550 + }, + { + "epoch": 0.22131813168388434, + "grad_norm": 230.43524169921875, + "learning_rate": 9.573952754635351e-06, + "loss": 15.0595, + "step": 109560 + }, + { + "epoch": 0.22133833231656816, + "grad_norm": 1182.6629638671875, + "learning_rate": 9.573811745837262e-06, + "loss": 34.0825, + "step": 109570 + }, + { + "epoch": 0.22135853294925198, + "grad_norm": 317.4469909667969, + "learning_rate": 9.573670714746973e-06, + "loss": 16.955, + "step": 109580 + }, + { + "epoch": 0.2213787335819358, + "grad_norm": 520.1302490234375, + "learning_rate": 9.573529661365163e-06, + "loss": 26.3998, + "step": 109590 + }, + { + "epoch": 0.2213989342146196, + "grad_norm": 367.7105407714844, + "learning_rate": 9.573388585692525e-06, + "loss": 15.3604, + "step": 109600 + }, + { + "epoch": 0.2214191348473034, + "grad_norm": 625.9605102539062, + "learning_rate": 9.573247487729747e-06, + "loss": 23.8124, + "step": 109610 + }, + { + "epoch": 0.22143933547998723, + "grad_norm": 314.0114440917969, + "learning_rate": 9.573106367477515e-06, + "loss": 21.104, + "step": 109620 + }, + { + "epoch": 0.22145953611267105, + "grad_norm": 25.299861907958984, + "learning_rate": 9.572965224936517e-06, + "loss": 24.232, + "step": 109630 + }, + { + "epoch": 0.22147973674535487, + "grad_norm": 476.4053649902344, + "learning_rate": 9.572824060107442e-06, + "loss": 16.1803, + "step": 109640 + }, + { + "epoch": 0.2214999373780387, + "grad_norm": 353.9746398925781, + "learning_rate": 9.572682872990975e-06, + "loss": 28.4468, + "step": 109650 + }, + { + "epoch": 0.22152013801072248, + "grad_norm": 579.5936279296875, + "learning_rate": 9.572541663587809e-06, + "loss": 14.2607, + "step": 109660 + }, + { + "epoch": 0.2215403386434063, + "grad_norm": 81.84984588623047, + "learning_rate": 9.572400431898627e-06, + "loss": 28.0746, + "step": 109670 + }, + { + "epoch": 0.22156053927609012, + "grad_norm": 371.1416931152344, + "learning_rate": 9.572259177924122e-06, + "loss": 27.2328, + "step": 109680 + }, + { + "epoch": 0.22158073990877394, + "grad_norm": 247.1607208251953, + "learning_rate": 9.572117901664979e-06, + "loss": 25.4636, + "step": 109690 + }, + { + "epoch": 0.22160094054145776, + "grad_norm": 362.3720397949219, + "learning_rate": 9.571976603121889e-06, + "loss": 38.2739, + "step": 109700 + }, + { + "epoch": 0.22162114117414158, + "grad_norm": 306.3987731933594, + "learning_rate": 9.571835282295539e-06, + "loss": 23.3747, + "step": 109710 + }, + { + "epoch": 0.22164134180682538, + "grad_norm": 220.51995849609375, + "learning_rate": 9.571693939186618e-06, + "loss": 15.8963, + "step": 109720 + }, + { + "epoch": 0.2216615424395092, + "grad_norm": 876.3275756835938, + "learning_rate": 9.571552573795814e-06, + "loss": 20.8997, + "step": 109730 + }, + { + "epoch": 0.22168174307219302, + "grad_norm": 891.5435791015625, + "learning_rate": 9.571411186123818e-06, + "loss": 27.2309, + "step": 109740 + }, + { + "epoch": 0.22170194370487684, + "grad_norm": 642.4542236328125, + "learning_rate": 9.571269776171319e-06, + "loss": 29.3748, + "step": 109750 + }, + { + "epoch": 0.22172214433756066, + "grad_norm": 42.82487869262695, + "learning_rate": 9.571128343939006e-06, + "loss": 11.178, + "step": 109760 + }, + { + "epoch": 0.22174234497024448, + "grad_norm": 449.6181945800781, + "learning_rate": 9.570986889427567e-06, + "loss": 23.855, + "step": 109770 + }, + { + "epoch": 0.2217625456029283, + "grad_norm": 321.8900146484375, + "learning_rate": 9.570845412637692e-06, + "loss": 36.1449, + "step": 109780 + }, + { + "epoch": 0.2217827462356121, + "grad_norm": 322.9187927246094, + "learning_rate": 9.570703913570072e-06, + "loss": 22.747, + "step": 109790 + }, + { + "epoch": 0.2218029468682959, + "grad_norm": 166.61514282226562, + "learning_rate": 9.570562392225395e-06, + "loss": 7.5422, + "step": 109800 + }, + { + "epoch": 0.22182314750097973, + "grad_norm": 532.9989013671875, + "learning_rate": 9.570420848604351e-06, + "loss": 18.9286, + "step": 109810 + }, + { + "epoch": 0.22184334813366355, + "grad_norm": 379.5887756347656, + "learning_rate": 9.57027928270763e-06, + "loss": 21.7228, + "step": 109820 + }, + { + "epoch": 0.22186354876634737, + "grad_norm": 145.89117431640625, + "learning_rate": 9.570137694535922e-06, + "loss": 29.015, + "step": 109830 + }, + { + "epoch": 0.2218837493990312, + "grad_norm": 511.4658508300781, + "learning_rate": 9.569996084089918e-06, + "loss": 17.8443, + "step": 109840 + }, + { + "epoch": 0.22190395003171498, + "grad_norm": 989.8679809570312, + "learning_rate": 9.569854451370306e-06, + "loss": 39.1116, + "step": 109850 + }, + { + "epoch": 0.2219241506643988, + "grad_norm": 792.4555053710938, + "learning_rate": 9.569712796377779e-06, + "loss": 35.5257, + "step": 109860 + }, + { + "epoch": 0.22194435129708262, + "grad_norm": 370.8437194824219, + "learning_rate": 9.569571119113025e-06, + "loss": 27.1154, + "step": 109870 + }, + { + "epoch": 0.22196455192976644, + "grad_norm": 341.374755859375, + "learning_rate": 9.569429419576737e-06, + "loss": 33.4845, + "step": 109880 + }, + { + "epoch": 0.22198475256245026, + "grad_norm": 71.42806243896484, + "learning_rate": 9.569287697769602e-06, + "loss": 14.3483, + "step": 109890 + }, + { + "epoch": 0.22200495319513408, + "grad_norm": 348.98779296875, + "learning_rate": 9.569145953692316e-06, + "loss": 19.8431, + "step": 109900 + }, + { + "epoch": 0.2220251538278179, + "grad_norm": 178.1228485107422, + "learning_rate": 9.569004187345565e-06, + "loss": 25.9737, + "step": 109910 + }, + { + "epoch": 0.2220453544605017, + "grad_norm": 129.86044311523438, + "learning_rate": 9.568862398730042e-06, + "loss": 24.8706, + "step": 109920 + }, + { + "epoch": 0.2220655550931855, + "grad_norm": 377.76983642578125, + "learning_rate": 9.568720587846437e-06, + "loss": 17.8867, + "step": 109930 + }, + { + "epoch": 0.22208575572586933, + "grad_norm": 641.7771606445312, + "learning_rate": 9.568578754695443e-06, + "loss": 34.9395, + "step": 109940 + }, + { + "epoch": 0.22210595635855315, + "grad_norm": 422.3647766113281, + "learning_rate": 9.56843689927775e-06, + "loss": 24.1158, + "step": 109950 + }, + { + "epoch": 0.22212615699123697, + "grad_norm": 449.8459167480469, + "learning_rate": 9.568295021594049e-06, + "loss": 30.3078, + "step": 109960 + }, + { + "epoch": 0.2221463576239208, + "grad_norm": 323.5790100097656, + "learning_rate": 9.568153121645033e-06, + "loss": 14.0972, + "step": 109970 + }, + { + "epoch": 0.22216655825660458, + "grad_norm": 792.2882080078125, + "learning_rate": 9.568011199431392e-06, + "loss": 16.4563, + "step": 109980 + }, + { + "epoch": 0.2221867588892884, + "grad_norm": 425.24822998046875, + "learning_rate": 9.56786925495382e-06, + "loss": 45.4373, + "step": 109990 + }, + { + "epoch": 0.22220695952197222, + "grad_norm": 1241.8756103515625, + "learning_rate": 9.567727288213005e-06, + "loss": 38.42, + "step": 110000 + }, + { + "epoch": 0.22222716015465604, + "grad_norm": 217.55137634277344, + "learning_rate": 9.567585299209642e-06, + "loss": 15.3444, + "step": 110010 + }, + { + "epoch": 0.22224736078733986, + "grad_norm": 413.5858154296875, + "learning_rate": 9.567443287944422e-06, + "loss": 23.6093, + "step": 110020 + }, + { + "epoch": 0.22226756142002368, + "grad_norm": 497.9522705078125, + "learning_rate": 9.567301254418038e-06, + "loss": 24.472, + "step": 110030 + }, + { + "epoch": 0.22228776205270748, + "grad_norm": 576.4864501953125, + "learning_rate": 9.567159198631181e-06, + "loss": 27.0471, + "step": 110040 + }, + { + "epoch": 0.2223079626853913, + "grad_norm": 466.5008850097656, + "learning_rate": 9.567017120584545e-06, + "loss": 35.0435, + "step": 110050 + }, + { + "epoch": 0.22232816331807512, + "grad_norm": 166.7375946044922, + "learning_rate": 9.566875020278822e-06, + "loss": 33.9176, + "step": 110060 + }, + { + "epoch": 0.22234836395075894, + "grad_norm": 338.6051025390625, + "learning_rate": 9.566732897714703e-06, + "loss": 25.5896, + "step": 110070 + }, + { + "epoch": 0.22236856458344276, + "grad_norm": 231.14297485351562, + "learning_rate": 9.56659075289288e-06, + "loss": 21.4084, + "step": 110080 + }, + { + "epoch": 0.22238876521612658, + "grad_norm": 303.2173767089844, + "learning_rate": 9.566448585814051e-06, + "loss": 21.725, + "step": 110090 + }, + { + "epoch": 0.2224089658488104, + "grad_norm": 16.235166549682617, + "learning_rate": 9.566306396478904e-06, + "loss": 18.9133, + "step": 110100 + }, + { + "epoch": 0.2224291664814942, + "grad_norm": 314.7041015625, + "learning_rate": 9.566164184888134e-06, + "loss": 25.7187, + "step": 110110 + }, + { + "epoch": 0.222449367114178, + "grad_norm": 408.3448181152344, + "learning_rate": 9.566021951042432e-06, + "loss": 21.4768, + "step": 110120 + }, + { + "epoch": 0.22246956774686183, + "grad_norm": 354.4145202636719, + "learning_rate": 9.565879694942493e-06, + "loss": 18.8502, + "step": 110130 + }, + { + "epoch": 0.22248976837954565, + "grad_norm": 399.0462951660156, + "learning_rate": 9.565737416589011e-06, + "loss": 11.6276, + "step": 110140 + }, + { + "epoch": 0.22250996901222947, + "grad_norm": 413.0665588378906, + "learning_rate": 9.565595115982678e-06, + "loss": 18.873, + "step": 110150 + }, + { + "epoch": 0.2225301696449133, + "grad_norm": 591.3002319335938, + "learning_rate": 9.56545279312419e-06, + "loss": 22.1303, + "step": 110160 + }, + { + "epoch": 0.22255037027759708, + "grad_norm": 359.10064697265625, + "learning_rate": 9.565310448014236e-06, + "loss": 24.5434, + "step": 110170 + }, + { + "epoch": 0.2225705709102809, + "grad_norm": 223.58663940429688, + "learning_rate": 9.565168080653514e-06, + "loss": 22.9331, + "step": 110180 + }, + { + "epoch": 0.22259077154296472, + "grad_norm": 206.38140869140625, + "learning_rate": 9.565025691042717e-06, + "loss": 42.607, + "step": 110190 + }, + { + "epoch": 0.22261097217564854, + "grad_norm": 312.63934326171875, + "learning_rate": 9.564883279182538e-06, + "loss": 12.663, + "step": 110200 + }, + { + "epoch": 0.22263117280833236, + "grad_norm": 666.5807495117188, + "learning_rate": 9.56474084507367e-06, + "loss": 17.0194, + "step": 110210 + }, + { + "epoch": 0.22265137344101618, + "grad_norm": 292.62994384765625, + "learning_rate": 9.56459838871681e-06, + "loss": 27.7475, + "step": 110220 + }, + { + "epoch": 0.2226715740737, + "grad_norm": 348.21539306640625, + "learning_rate": 9.564455910112651e-06, + "loss": 14.2087, + "step": 110230 + }, + { + "epoch": 0.2226917747063838, + "grad_norm": 287.2896423339844, + "learning_rate": 9.564313409261888e-06, + "loss": 36.3649, + "step": 110240 + }, + { + "epoch": 0.2227119753390676, + "grad_norm": 256.77691650390625, + "learning_rate": 9.564170886165214e-06, + "loss": 27.8815, + "step": 110250 + }, + { + "epoch": 0.22273217597175143, + "grad_norm": 72.4677963256836, + "learning_rate": 9.564028340823325e-06, + "loss": 17.3781, + "step": 110260 + }, + { + "epoch": 0.22275237660443525, + "grad_norm": 508.245361328125, + "learning_rate": 9.563885773236917e-06, + "loss": 17.9092, + "step": 110270 + }, + { + "epoch": 0.22277257723711907, + "grad_norm": 628.5128784179688, + "learning_rate": 9.563743183406683e-06, + "loss": 23.3545, + "step": 110280 + }, + { + "epoch": 0.2227927778698029, + "grad_norm": 436.4483337402344, + "learning_rate": 9.563600571333316e-06, + "loss": 24.9046, + "step": 110290 + }, + { + "epoch": 0.22281297850248669, + "grad_norm": 4.252965450286865, + "learning_rate": 9.563457937017514e-06, + "loss": 32.6382, + "step": 110300 + }, + { + "epoch": 0.2228331791351705, + "grad_norm": 171.72998046875, + "learning_rate": 9.563315280459973e-06, + "loss": 34.4523, + "step": 110310 + }, + { + "epoch": 0.22285337976785433, + "grad_norm": 510.9077453613281, + "learning_rate": 9.563172601661386e-06, + "loss": 41.5938, + "step": 110320 + }, + { + "epoch": 0.22287358040053815, + "grad_norm": 221.45542907714844, + "learning_rate": 9.56302990062245e-06, + "loss": 21.51, + "step": 110330 + }, + { + "epoch": 0.22289378103322197, + "grad_norm": 317.7682800292969, + "learning_rate": 9.562887177343858e-06, + "loss": 17.6238, + "step": 110340 + }, + { + "epoch": 0.22291398166590579, + "grad_norm": 966.6275634765625, + "learning_rate": 9.562744431826307e-06, + "loss": 27.6289, + "step": 110350 + }, + { + "epoch": 0.22293418229858958, + "grad_norm": 270.346435546875, + "learning_rate": 9.562601664070495e-06, + "loss": 14.6713, + "step": 110360 + }, + { + "epoch": 0.2229543829312734, + "grad_norm": 817.4618530273438, + "learning_rate": 9.562458874077114e-06, + "loss": 33.7798, + "step": 110370 + }, + { + "epoch": 0.22297458356395722, + "grad_norm": 566.02294921875, + "learning_rate": 9.562316061846863e-06, + "loss": 23.0801, + "step": 110380 + }, + { + "epoch": 0.22299478419664104, + "grad_norm": 520.538818359375, + "learning_rate": 9.562173227380437e-06, + "loss": 20.5937, + "step": 110390 + }, + { + "epoch": 0.22301498482932486, + "grad_norm": 469.1568298339844, + "learning_rate": 9.562030370678533e-06, + "loss": 18.5103, + "step": 110400 + }, + { + "epoch": 0.22303518546200868, + "grad_norm": 220.47048950195312, + "learning_rate": 9.561887491741844e-06, + "loss": 22.8271, + "step": 110410 + }, + { + "epoch": 0.2230553860946925, + "grad_norm": 63.98490905761719, + "learning_rate": 9.56174459057107e-06, + "loss": 13.5171, + "step": 110420 + }, + { + "epoch": 0.2230755867273763, + "grad_norm": 1099.8052978515625, + "learning_rate": 9.561601667166905e-06, + "loss": 26.0947, + "step": 110430 + }, + { + "epoch": 0.2230957873600601, + "grad_norm": 251.17816162109375, + "learning_rate": 9.561458721530047e-06, + "loss": 20.2432, + "step": 110440 + }, + { + "epoch": 0.22311598799274393, + "grad_norm": 168.3972930908203, + "learning_rate": 9.561315753661194e-06, + "loss": 20.0466, + "step": 110450 + }, + { + "epoch": 0.22313618862542775, + "grad_norm": 699.1183471679688, + "learning_rate": 9.56117276356104e-06, + "loss": 24.6622, + "step": 110460 + }, + { + "epoch": 0.22315638925811157, + "grad_norm": 579.03759765625, + "learning_rate": 9.561029751230282e-06, + "loss": 24.6237, + "step": 110470 + }, + { + "epoch": 0.2231765898907954, + "grad_norm": 277.62646484375, + "learning_rate": 9.56088671666962e-06, + "loss": 11.2887, + "step": 110480 + }, + { + "epoch": 0.22319679052347918, + "grad_norm": 620.6677856445312, + "learning_rate": 9.56074365987975e-06, + "loss": 15.1699, + "step": 110490 + }, + { + "epoch": 0.223216991156163, + "grad_norm": 936.4269409179688, + "learning_rate": 9.560600580861366e-06, + "loss": 35.6911, + "step": 110500 + }, + { + "epoch": 0.22323719178884682, + "grad_norm": 552.4637451171875, + "learning_rate": 9.560457479615168e-06, + "loss": 24.9559, + "step": 110510 + }, + { + "epoch": 0.22325739242153064, + "grad_norm": 576.7377319335938, + "learning_rate": 9.560314356141856e-06, + "loss": 22.4393, + "step": 110520 + }, + { + "epoch": 0.22327759305421446, + "grad_norm": 358.72589111328125, + "learning_rate": 9.560171210442123e-06, + "loss": 20.9439, + "step": 110530 + }, + { + "epoch": 0.22329779368689828, + "grad_norm": 249.48648071289062, + "learning_rate": 9.56002804251667e-06, + "loss": 15.8705, + "step": 110540 + }, + { + "epoch": 0.2233179943195821, + "grad_norm": 308.5624694824219, + "learning_rate": 9.559884852366191e-06, + "loss": 27.0888, + "step": 110550 + }, + { + "epoch": 0.2233381949522659, + "grad_norm": 187.78858947753906, + "learning_rate": 9.55974163999139e-06, + "loss": 35.8462, + "step": 110560 + }, + { + "epoch": 0.22335839558494971, + "grad_norm": 439.64764404296875, + "learning_rate": 9.559598405392959e-06, + "loss": 17.1107, + "step": 110570 + }, + { + "epoch": 0.22337859621763353, + "grad_norm": 146.07711791992188, + "learning_rate": 9.559455148571597e-06, + "loss": 28.0377, + "step": 110580 + }, + { + "epoch": 0.22339879685031735, + "grad_norm": 270.3213806152344, + "learning_rate": 9.559311869528005e-06, + "loss": 28.6909, + "step": 110590 + }, + { + "epoch": 0.22341899748300117, + "grad_norm": 200.6590576171875, + "learning_rate": 9.55916856826288e-06, + "loss": 25.6525, + "step": 110600 + }, + { + "epoch": 0.223439198115685, + "grad_norm": 612.1190185546875, + "learning_rate": 9.55902524477692e-06, + "loss": 15.2635, + "step": 110610 + }, + { + "epoch": 0.2234593987483688, + "grad_norm": 322.1373291015625, + "learning_rate": 9.558881899070824e-06, + "loss": 33.6983, + "step": 110620 + }, + { + "epoch": 0.2234795993810526, + "grad_norm": 134.87986755371094, + "learning_rate": 9.55873853114529e-06, + "loss": 24.8128, + "step": 110630 + }, + { + "epoch": 0.22349980001373643, + "grad_norm": 1127.307373046875, + "learning_rate": 9.55859514100102e-06, + "loss": 29.5784, + "step": 110640 + }, + { + "epoch": 0.22352000064642025, + "grad_norm": 471.3376770019531, + "learning_rate": 9.558451728638706e-06, + "loss": 17.0943, + "step": 110650 + }, + { + "epoch": 0.22354020127910407, + "grad_norm": 368.75640869140625, + "learning_rate": 9.558308294059055e-06, + "loss": 8.737, + "step": 110660 + }, + { + "epoch": 0.2235604019117879, + "grad_norm": 265.4960632324219, + "learning_rate": 9.55816483726276e-06, + "loss": 29.3541, + "step": 110670 + }, + { + "epoch": 0.22358060254447168, + "grad_norm": 170.4871063232422, + "learning_rate": 9.558021358250523e-06, + "loss": 40.0966, + "step": 110680 + }, + { + "epoch": 0.2236008031771555, + "grad_norm": 931.2318725585938, + "learning_rate": 9.557877857023043e-06, + "loss": 21.5192, + "step": 110690 + }, + { + "epoch": 0.22362100380983932, + "grad_norm": 1998.2803955078125, + "learning_rate": 9.557734333581019e-06, + "loss": 49.9292, + "step": 110700 + }, + { + "epoch": 0.22364120444252314, + "grad_norm": 155.32125854492188, + "learning_rate": 9.557590787925151e-06, + "loss": 40.1303, + "step": 110710 + }, + { + "epoch": 0.22366140507520696, + "grad_norm": 454.46685791015625, + "learning_rate": 9.557447220056137e-06, + "loss": 23.0269, + "step": 110720 + }, + { + "epoch": 0.22368160570789078, + "grad_norm": 303.2588195800781, + "learning_rate": 9.557303629974679e-06, + "loss": 19.352, + "step": 110730 + }, + { + "epoch": 0.2237018063405746, + "grad_norm": 259.6453552246094, + "learning_rate": 9.557160017681475e-06, + "loss": 16.9695, + "step": 110740 + }, + { + "epoch": 0.2237220069732584, + "grad_norm": 341.01123046875, + "learning_rate": 9.557016383177226e-06, + "loss": 34.446, + "step": 110750 + }, + { + "epoch": 0.2237422076059422, + "grad_norm": 861.7747192382812, + "learning_rate": 9.556872726462634e-06, + "loss": 16.9156, + "step": 110760 + }, + { + "epoch": 0.22376240823862603, + "grad_norm": 589.8540649414062, + "learning_rate": 9.556729047538395e-06, + "loss": 35.2068, + "step": 110770 + }, + { + "epoch": 0.22378260887130985, + "grad_norm": 404.4129638671875, + "learning_rate": 9.556585346405211e-06, + "loss": 21.4091, + "step": 110780 + }, + { + "epoch": 0.22380280950399367, + "grad_norm": 726.181640625, + "learning_rate": 9.556441623063783e-06, + "loss": 26.9133, + "step": 110790 + }, + { + "epoch": 0.2238230101366775, + "grad_norm": 148.09535217285156, + "learning_rate": 9.556297877514812e-06, + "loss": 19.4471, + "step": 110800 + }, + { + "epoch": 0.22384321076936128, + "grad_norm": 372.90972900390625, + "learning_rate": 9.556154109758998e-06, + "loss": 23.5338, + "step": 110810 + }, + { + "epoch": 0.2238634114020451, + "grad_norm": 181.201171875, + "learning_rate": 9.55601031979704e-06, + "loss": 15.8666, + "step": 110820 + }, + { + "epoch": 0.22388361203472892, + "grad_norm": 878.591796875, + "learning_rate": 9.55586650762964e-06, + "loss": 44.4969, + "step": 110830 + }, + { + "epoch": 0.22390381266741274, + "grad_norm": 212.8592987060547, + "learning_rate": 9.555722673257502e-06, + "loss": 21.3121, + "step": 110840 + }, + { + "epoch": 0.22392401330009656, + "grad_norm": 12.053426742553711, + "learning_rate": 9.555578816681321e-06, + "loss": 22.3437, + "step": 110850 + }, + { + "epoch": 0.22394421393278038, + "grad_norm": 232.9084930419922, + "learning_rate": 9.555434937901801e-06, + "loss": 29.2265, + "step": 110860 + }, + { + "epoch": 0.2239644145654642, + "grad_norm": 596.0853271484375, + "learning_rate": 9.555291036919647e-06, + "loss": 28.1046, + "step": 110870 + }, + { + "epoch": 0.223984615198148, + "grad_norm": 362.330322265625, + "learning_rate": 9.555147113735555e-06, + "loss": 17.273, + "step": 110880 + }, + { + "epoch": 0.22400481583083182, + "grad_norm": 537.8699951171875, + "learning_rate": 9.55500316835023e-06, + "loss": 31.301, + "step": 110890 + }, + { + "epoch": 0.22402501646351564, + "grad_norm": 532.1046142578125, + "learning_rate": 9.554859200764371e-06, + "loss": 14.2249, + "step": 110900 + }, + { + "epoch": 0.22404521709619946, + "grad_norm": 143.5594024658203, + "learning_rate": 9.55471521097868e-06, + "loss": 13.2904, + "step": 110910 + }, + { + "epoch": 0.22406541772888328, + "grad_norm": 9.103543281555176, + "learning_rate": 9.554571198993858e-06, + "loss": 15.9683, + "step": 110920 + }, + { + "epoch": 0.2240856183615671, + "grad_norm": 0.0, + "learning_rate": 9.55442716481061e-06, + "loss": 38.8312, + "step": 110930 + }, + { + "epoch": 0.2241058189942509, + "grad_norm": 301.2005310058594, + "learning_rate": 9.554283108429639e-06, + "loss": 13.2248, + "step": 110940 + }, + { + "epoch": 0.2241260196269347, + "grad_norm": 341.2347412109375, + "learning_rate": 9.55413902985164e-06, + "loss": 22.6971, + "step": 110950 + }, + { + "epoch": 0.22414622025961853, + "grad_norm": 544.2408447265625, + "learning_rate": 9.553994929077323e-06, + "loss": 30.891, + "step": 110960 + }, + { + "epoch": 0.22416642089230235, + "grad_norm": 157.01345825195312, + "learning_rate": 9.553850806107387e-06, + "loss": 17.0911, + "step": 110970 + }, + { + "epoch": 0.22418662152498617, + "grad_norm": 134.6532440185547, + "learning_rate": 9.553706660942532e-06, + "loss": 25.8211, + "step": 110980 + }, + { + "epoch": 0.22420682215767, + "grad_norm": 1391.4884033203125, + "learning_rate": 9.553562493583466e-06, + "loss": 25.441, + "step": 110990 + }, + { + "epoch": 0.22422702279035378, + "grad_norm": 791.5779418945312, + "learning_rate": 9.553418304030886e-06, + "loss": 28.1263, + "step": 111000 + }, + { + "epoch": 0.2242472234230376, + "grad_norm": 667.532470703125, + "learning_rate": 9.553274092285498e-06, + "loss": 20.6861, + "step": 111010 + }, + { + "epoch": 0.22426742405572142, + "grad_norm": 198.34072875976562, + "learning_rate": 9.553129858348006e-06, + "loss": 27.3397, + "step": 111020 + }, + { + "epoch": 0.22428762468840524, + "grad_norm": 674.15625, + "learning_rate": 9.55298560221911e-06, + "loss": 32.4214, + "step": 111030 + }, + { + "epoch": 0.22430782532108906, + "grad_norm": 243.1038360595703, + "learning_rate": 9.552841323899515e-06, + "loss": 24.5726, + "step": 111040 + }, + { + "epoch": 0.22432802595377288, + "grad_norm": 359.1784362792969, + "learning_rate": 9.552697023389923e-06, + "loss": 20.5158, + "step": 111050 + }, + { + "epoch": 0.2243482265864567, + "grad_norm": 272.7997131347656, + "learning_rate": 9.552552700691037e-06, + "loss": 28.2356, + "step": 111060 + }, + { + "epoch": 0.2243684272191405, + "grad_norm": 947.9569702148438, + "learning_rate": 9.552408355803564e-06, + "loss": 30.3974, + "step": 111070 + }, + { + "epoch": 0.2243886278518243, + "grad_norm": 199.59693908691406, + "learning_rate": 9.552263988728204e-06, + "loss": 22.4818, + "step": 111080 + }, + { + "epoch": 0.22440882848450813, + "grad_norm": 400.09222412109375, + "learning_rate": 9.552119599465659e-06, + "loss": 36.573, + "step": 111090 + }, + { + "epoch": 0.22442902911719195, + "grad_norm": 525.7420043945312, + "learning_rate": 9.551975188016638e-06, + "loss": 29.9474, + "step": 111100 + }, + { + "epoch": 0.22444922974987577, + "grad_norm": 146.42710876464844, + "learning_rate": 9.55183075438184e-06, + "loss": 12.032, + "step": 111110 + }, + { + "epoch": 0.2244694303825596, + "grad_norm": 440.4906005859375, + "learning_rate": 9.551686298561972e-06, + "loss": 23.1909, + "step": 111120 + }, + { + "epoch": 0.22448963101524338, + "grad_norm": 417.2303771972656, + "learning_rate": 9.551541820557737e-06, + "loss": 24.6403, + "step": 111130 + }, + { + "epoch": 0.2245098316479272, + "grad_norm": 379.7185974121094, + "learning_rate": 9.551397320369838e-06, + "loss": 18.976, + "step": 111140 + }, + { + "epoch": 0.22453003228061102, + "grad_norm": 175.88348388671875, + "learning_rate": 9.551252797998982e-06, + "loss": 11.4578, + "step": 111150 + }, + { + "epoch": 0.22455023291329484, + "grad_norm": 307.9947814941406, + "learning_rate": 9.55110825344587e-06, + "loss": 14.3487, + "step": 111160 + }, + { + "epoch": 0.22457043354597866, + "grad_norm": 229.0950927734375, + "learning_rate": 9.550963686711212e-06, + "loss": 31.3663, + "step": 111170 + }, + { + "epoch": 0.22459063417866248, + "grad_norm": 649.1748657226562, + "learning_rate": 9.550819097795706e-06, + "loss": 28.7961, + "step": 111180 + }, + { + "epoch": 0.2246108348113463, + "grad_norm": 199.32424926757812, + "learning_rate": 9.55067448670006e-06, + "loss": 7.9647, + "step": 111190 + }, + { + "epoch": 0.2246310354440301, + "grad_norm": 837.6842041015625, + "learning_rate": 9.550529853424979e-06, + "loss": 24.2685, + "step": 111200 + }, + { + "epoch": 0.22465123607671392, + "grad_norm": 598.1119384765625, + "learning_rate": 9.550385197971168e-06, + "loss": 20.1005, + "step": 111210 + }, + { + "epoch": 0.22467143670939774, + "grad_norm": 620.2124633789062, + "learning_rate": 9.55024052033933e-06, + "loss": 30.4976, + "step": 111220 + }, + { + "epoch": 0.22469163734208156, + "grad_norm": 495.8719177246094, + "learning_rate": 9.550095820530172e-06, + "loss": 15.5378, + "step": 111230 + }, + { + "epoch": 0.22471183797476538, + "grad_norm": 118.84636688232422, + "learning_rate": 9.549951098544399e-06, + "loss": 13.2547, + "step": 111240 + }, + { + "epoch": 0.2247320386074492, + "grad_norm": 369.2759704589844, + "learning_rate": 9.549806354382716e-06, + "loss": 36.1086, + "step": 111250 + }, + { + "epoch": 0.224752239240133, + "grad_norm": 393.2582092285156, + "learning_rate": 9.54966158804583e-06, + "loss": 21.1021, + "step": 111260 + }, + { + "epoch": 0.2247724398728168, + "grad_norm": 916.140869140625, + "learning_rate": 9.549516799534444e-06, + "loss": 20.0107, + "step": 111270 + }, + { + "epoch": 0.22479264050550063, + "grad_norm": 274.2108154296875, + "learning_rate": 9.549371988849266e-06, + "loss": 27.7032, + "step": 111280 + }, + { + "epoch": 0.22481284113818445, + "grad_norm": 323.6493225097656, + "learning_rate": 9.549227155991e-06, + "loss": 27.2792, + "step": 111290 + }, + { + "epoch": 0.22483304177086827, + "grad_norm": 489.6667785644531, + "learning_rate": 9.549082300960351e-06, + "loss": 15.4463, + "step": 111300 + }, + { + "epoch": 0.2248532424035521, + "grad_norm": 504.2126159667969, + "learning_rate": 9.54893742375803e-06, + "loss": 16.0059, + "step": 111310 + }, + { + "epoch": 0.22487344303623588, + "grad_norm": 856.1909790039062, + "learning_rate": 9.548792524384735e-06, + "loss": 38.3172, + "step": 111320 + }, + { + "epoch": 0.2248936436689197, + "grad_norm": 282.4917297363281, + "learning_rate": 9.54864760284118e-06, + "loss": 19.7568, + "step": 111330 + }, + { + "epoch": 0.22491384430160352, + "grad_norm": 543.8866577148438, + "learning_rate": 9.548502659128069e-06, + "loss": 21.8816, + "step": 111340 + }, + { + "epoch": 0.22493404493428734, + "grad_norm": 304.6163635253906, + "learning_rate": 9.548357693246107e-06, + "loss": 31.6129, + "step": 111350 + }, + { + "epoch": 0.22495424556697116, + "grad_norm": 413.11669921875, + "learning_rate": 9.548212705196e-06, + "loss": 17.6464, + "step": 111360 + }, + { + "epoch": 0.22497444619965498, + "grad_norm": 845.2156982421875, + "learning_rate": 9.548067694978457e-06, + "loss": 49.6162, + "step": 111370 + }, + { + "epoch": 0.2249946468323388, + "grad_norm": 89.01980590820312, + "learning_rate": 9.547922662594183e-06, + "loss": 25.6655, + "step": 111380 + }, + { + "epoch": 0.2250148474650226, + "grad_norm": 380.6142578125, + "learning_rate": 9.547777608043886e-06, + "loss": 24.6077, + "step": 111390 + }, + { + "epoch": 0.2250350480977064, + "grad_norm": 460.5263671875, + "learning_rate": 9.547632531328273e-06, + "loss": 31.8443, + "step": 111400 + }, + { + "epoch": 0.22505524873039023, + "grad_norm": 510.1470031738281, + "learning_rate": 9.54748743244805e-06, + "loss": 33.939, + "step": 111410 + }, + { + "epoch": 0.22507544936307405, + "grad_norm": 775.4287719726562, + "learning_rate": 9.547342311403924e-06, + "loss": 23.3685, + "step": 111420 + }, + { + "epoch": 0.22509564999575787, + "grad_norm": 920.58544921875, + "learning_rate": 9.547197168196605e-06, + "loss": 27.1471, + "step": 111430 + }, + { + "epoch": 0.2251158506284417, + "grad_norm": 850.289306640625, + "learning_rate": 9.547052002826797e-06, + "loss": 16.5851, + "step": 111440 + }, + { + "epoch": 0.22513605126112549, + "grad_norm": 235.76002502441406, + "learning_rate": 9.546906815295209e-06, + "loss": 15.4825, + "step": 111450 + }, + { + "epoch": 0.2251562518938093, + "grad_norm": 392.30694580078125, + "learning_rate": 9.54676160560255e-06, + "loss": 28.6752, + "step": 111460 + }, + { + "epoch": 0.22517645252649313, + "grad_norm": 790.8316650390625, + "learning_rate": 9.546616373749525e-06, + "loss": 22.8385, + "step": 111470 + }, + { + "epoch": 0.22519665315917695, + "grad_norm": 321.32208251953125, + "learning_rate": 9.546471119736845e-06, + "loss": 22.9641, + "step": 111480 + }, + { + "epoch": 0.22521685379186077, + "grad_norm": 283.69757080078125, + "learning_rate": 9.546325843565213e-06, + "loss": 22.2612, + "step": 111490 + }, + { + "epoch": 0.22523705442454459, + "grad_norm": 750.2130737304688, + "learning_rate": 9.546180545235344e-06, + "loss": 35.2801, + "step": 111500 + }, + { + "epoch": 0.2252572550572284, + "grad_norm": 676.2631225585938, + "learning_rate": 9.54603522474794e-06, + "loss": 36.5525, + "step": 111510 + }, + { + "epoch": 0.2252774556899122, + "grad_norm": 475.3135986328125, + "learning_rate": 9.545889882103712e-06, + "loss": 21.8077, + "step": 111520 + }, + { + "epoch": 0.22529765632259602, + "grad_norm": 381.10906982421875, + "learning_rate": 9.545744517303368e-06, + "loss": 18.8951, + "step": 111530 + }, + { + "epoch": 0.22531785695527984, + "grad_norm": 235.5245819091797, + "learning_rate": 9.545599130347618e-06, + "loss": 20.7448, + "step": 111540 + }, + { + "epoch": 0.22533805758796366, + "grad_norm": 642.7045288085938, + "learning_rate": 9.545453721237167e-06, + "loss": 40.0942, + "step": 111550 + }, + { + "epoch": 0.22535825822064748, + "grad_norm": 204.07562255859375, + "learning_rate": 9.545308289972727e-06, + "loss": 26.127, + "step": 111560 + }, + { + "epoch": 0.2253784588533313, + "grad_norm": 640.2852783203125, + "learning_rate": 9.545162836555006e-06, + "loss": 31.8185, + "step": 111570 + }, + { + "epoch": 0.2253986594860151, + "grad_norm": 175.4241180419922, + "learning_rate": 9.545017360984713e-06, + "loss": 13.45, + "step": 111580 + }, + { + "epoch": 0.2254188601186989, + "grad_norm": 404.0137023925781, + "learning_rate": 9.544871863262556e-06, + "loss": 22.8184, + "step": 111590 + }, + { + "epoch": 0.22543906075138273, + "grad_norm": 473.1271667480469, + "learning_rate": 9.544726343389245e-06, + "loss": 17.9111, + "step": 111600 + }, + { + "epoch": 0.22545926138406655, + "grad_norm": 268.3240661621094, + "learning_rate": 9.544580801365488e-06, + "loss": 22.2862, + "step": 111610 + }, + { + "epoch": 0.22547946201675037, + "grad_norm": 25.966659545898438, + "learning_rate": 9.544435237191996e-06, + "loss": 8.5848, + "step": 111620 + }, + { + "epoch": 0.2254996626494342, + "grad_norm": 814.0604858398438, + "learning_rate": 9.544289650869477e-06, + "loss": 37.6229, + "step": 111630 + }, + { + "epoch": 0.22551986328211798, + "grad_norm": 266.2414855957031, + "learning_rate": 9.544144042398643e-06, + "loss": 17.7856, + "step": 111640 + }, + { + "epoch": 0.2255400639148018, + "grad_norm": 972.8816528320312, + "learning_rate": 9.543998411780202e-06, + "loss": 21.9517, + "step": 111650 + }, + { + "epoch": 0.22556026454748562, + "grad_norm": 123.94517517089844, + "learning_rate": 9.543852759014863e-06, + "loss": 27.2369, + "step": 111660 + }, + { + "epoch": 0.22558046518016944, + "grad_norm": 396.4852294921875, + "learning_rate": 9.543707084103337e-06, + "loss": 18.2719, + "step": 111670 + }, + { + "epoch": 0.22560066581285326, + "grad_norm": 260.7420959472656, + "learning_rate": 9.543561387046333e-06, + "loss": 22.495, + "step": 111680 + }, + { + "epoch": 0.22562086644553708, + "grad_norm": 660.4745483398438, + "learning_rate": 9.543415667844562e-06, + "loss": 14.4435, + "step": 111690 + }, + { + "epoch": 0.2256410670782209, + "grad_norm": 599.4949951171875, + "learning_rate": 9.543269926498735e-06, + "loss": 21.1961, + "step": 111700 + }, + { + "epoch": 0.2256612677109047, + "grad_norm": 991.4645385742188, + "learning_rate": 9.54312416300956e-06, + "loss": 29.988, + "step": 111710 + }, + { + "epoch": 0.22568146834358851, + "grad_norm": 1059.73779296875, + "learning_rate": 9.542978377377752e-06, + "loss": 19.3108, + "step": 111720 + }, + { + "epoch": 0.22570166897627233, + "grad_norm": 53.69286346435547, + "learning_rate": 9.542832569604014e-06, + "loss": 19.8365, + "step": 111730 + }, + { + "epoch": 0.22572186960895615, + "grad_norm": 449.1264343261719, + "learning_rate": 9.542686739689064e-06, + "loss": 31.5003, + "step": 111740 + }, + { + "epoch": 0.22574207024163997, + "grad_norm": 1351.2528076171875, + "learning_rate": 9.54254088763361e-06, + "loss": 38.6739, + "step": 111750 + }, + { + "epoch": 0.2257622708743238, + "grad_norm": 235.5785675048828, + "learning_rate": 9.54239501343836e-06, + "loss": 12.7008, + "step": 111760 + }, + { + "epoch": 0.2257824715070076, + "grad_norm": 852.9555053710938, + "learning_rate": 9.542249117104032e-06, + "loss": 26.4823, + "step": 111770 + }, + { + "epoch": 0.2258026721396914, + "grad_norm": 936.4501342773438, + "learning_rate": 9.54210319863133e-06, + "loss": 28.6118, + "step": 111780 + }, + { + "epoch": 0.22582287277237523, + "grad_norm": 168.53668212890625, + "learning_rate": 9.541957258020967e-06, + "loss": 20.6891, + "step": 111790 + }, + { + "epoch": 0.22584307340505905, + "grad_norm": 417.7174072265625, + "learning_rate": 9.541811295273657e-06, + "loss": 19.725, + "step": 111800 + }, + { + "epoch": 0.22586327403774287, + "grad_norm": 171.90603637695312, + "learning_rate": 9.541665310390109e-06, + "loss": 25.9919, + "step": 111810 + }, + { + "epoch": 0.2258834746704267, + "grad_norm": 428.6181335449219, + "learning_rate": 9.541519303371034e-06, + "loss": 14.9894, + "step": 111820 + }, + { + "epoch": 0.2259036753031105, + "grad_norm": 104.92010498046875, + "learning_rate": 9.541373274217145e-06, + "loss": 19.1093, + "step": 111830 + }, + { + "epoch": 0.2259238759357943, + "grad_norm": 191.61997985839844, + "learning_rate": 9.541227222929155e-06, + "loss": 20.3224, + "step": 111840 + }, + { + "epoch": 0.22594407656847812, + "grad_norm": 79.96380615234375, + "learning_rate": 9.541081149507774e-06, + "loss": 25.1008, + "step": 111850 + }, + { + "epoch": 0.22596427720116194, + "grad_norm": 336.1278076171875, + "learning_rate": 9.540935053953713e-06, + "loss": 29.8053, + "step": 111860 + }, + { + "epoch": 0.22598447783384576, + "grad_norm": 9.312488555908203, + "learning_rate": 9.540788936267686e-06, + "loss": 26.7509, + "step": 111870 + }, + { + "epoch": 0.22600467846652958, + "grad_norm": 24.288253784179688, + "learning_rate": 9.540642796450403e-06, + "loss": 18.424, + "step": 111880 + }, + { + "epoch": 0.2260248790992134, + "grad_norm": 492.887451171875, + "learning_rate": 9.540496634502581e-06, + "loss": 17.8159, + "step": 111890 + }, + { + "epoch": 0.2260450797318972, + "grad_norm": 566.306396484375, + "learning_rate": 9.540350450424927e-06, + "loss": 16.7127, + "step": 111900 + }, + { + "epoch": 0.226065280364581, + "grad_norm": 519.557861328125, + "learning_rate": 9.540204244218155e-06, + "loss": 26.8343, + "step": 111910 + }, + { + "epoch": 0.22608548099726483, + "grad_norm": 24.523590087890625, + "learning_rate": 9.54005801588298e-06, + "loss": 22.0106, + "step": 111920 + }, + { + "epoch": 0.22610568162994865, + "grad_norm": 392.7301940917969, + "learning_rate": 9.539911765420112e-06, + "loss": 17.0939, + "step": 111930 + }, + { + "epoch": 0.22612588226263247, + "grad_norm": 214.16812133789062, + "learning_rate": 9.539765492830265e-06, + "loss": 18.5989, + "step": 111940 + }, + { + "epoch": 0.2261460828953163, + "grad_norm": 410.8782043457031, + "learning_rate": 9.53961919811415e-06, + "loss": 17.4185, + "step": 111950 + }, + { + "epoch": 0.22616628352800008, + "grad_norm": 475.45269775390625, + "learning_rate": 9.539472881272483e-06, + "loss": 24.886, + "step": 111960 + }, + { + "epoch": 0.2261864841606839, + "grad_norm": 322.9455261230469, + "learning_rate": 9.539326542305975e-06, + "loss": 15.8871, + "step": 111970 + }, + { + "epoch": 0.22620668479336772, + "grad_norm": 180.3836669921875, + "learning_rate": 9.53918018121534e-06, + "loss": 23.3022, + "step": 111980 + }, + { + "epoch": 0.22622688542605154, + "grad_norm": 707.36669921875, + "learning_rate": 9.539033798001293e-06, + "loss": 26.9615, + "step": 111990 + }, + { + "epoch": 0.22624708605873536, + "grad_norm": 3.039818525314331, + "learning_rate": 9.538887392664544e-06, + "loss": 17.2103, + "step": 112000 + }, + { + "epoch": 0.22626728669141918, + "grad_norm": 361.5117492675781, + "learning_rate": 9.538740965205809e-06, + "loss": 32.8633, + "step": 112010 + }, + { + "epoch": 0.226287487324103, + "grad_norm": 146.8135986328125, + "learning_rate": 9.538594515625802e-06, + "loss": 18.9709, + "step": 112020 + }, + { + "epoch": 0.2263076879567868, + "grad_norm": 872.6524658203125, + "learning_rate": 9.538448043925234e-06, + "loss": 24.574, + "step": 112030 + }, + { + "epoch": 0.22632788858947062, + "grad_norm": 659.1326293945312, + "learning_rate": 9.538301550104822e-06, + "loss": 15.0339, + "step": 112040 + }, + { + "epoch": 0.22634808922215444, + "grad_norm": 143.9538116455078, + "learning_rate": 9.538155034165277e-06, + "loss": 16.5316, + "step": 112050 + }, + { + "epoch": 0.22636828985483826, + "grad_norm": 904.4579467773438, + "learning_rate": 9.538008496107317e-06, + "loss": 21.554, + "step": 112060 + }, + { + "epoch": 0.22638849048752208, + "grad_norm": 102.9317626953125, + "learning_rate": 9.537861935931651e-06, + "loss": 18.2125, + "step": 112070 + }, + { + "epoch": 0.2264086911202059, + "grad_norm": 378.7657165527344, + "learning_rate": 9.537715353639e-06, + "loss": 21.2663, + "step": 112080 + }, + { + "epoch": 0.2264288917528897, + "grad_norm": 641.4647827148438, + "learning_rate": 9.537568749230074e-06, + "loss": 21.2919, + "step": 112090 + }, + { + "epoch": 0.2264490923855735, + "grad_norm": 263.9422302246094, + "learning_rate": 9.537422122705585e-06, + "loss": 32.0067, + "step": 112100 + }, + { + "epoch": 0.22646929301825733, + "grad_norm": 938.2413940429688, + "learning_rate": 9.537275474066254e-06, + "loss": 33.6979, + "step": 112110 + }, + { + "epoch": 0.22648949365094115, + "grad_norm": 665.4387817382812, + "learning_rate": 9.537128803312792e-06, + "loss": 26.8343, + "step": 112120 + }, + { + "epoch": 0.22650969428362497, + "grad_norm": 271.7997741699219, + "learning_rate": 9.536982110445913e-06, + "loss": 32.9056, + "step": 112130 + }, + { + "epoch": 0.2265298949163088, + "grad_norm": 257.39288330078125, + "learning_rate": 9.536835395466334e-06, + "loss": 12.9188, + "step": 112140 + }, + { + "epoch": 0.2265500955489926, + "grad_norm": 767.7734985351562, + "learning_rate": 9.536688658374771e-06, + "loss": 33.6596, + "step": 112150 + }, + { + "epoch": 0.2265702961816764, + "grad_norm": 471.4989929199219, + "learning_rate": 9.536541899171936e-06, + "loss": 23.1471, + "step": 112160 + }, + { + "epoch": 0.22659049681436022, + "grad_norm": 717.8173217773438, + "learning_rate": 9.536395117858548e-06, + "loss": 18.662, + "step": 112170 + }, + { + "epoch": 0.22661069744704404, + "grad_norm": 242.91160583496094, + "learning_rate": 9.53624831443532e-06, + "loss": 29.4914, + "step": 112180 + }, + { + "epoch": 0.22663089807972786, + "grad_norm": 279.1715087890625, + "learning_rate": 9.536101488902966e-06, + "loss": 19.7264, + "step": 112190 + }, + { + "epoch": 0.22665109871241168, + "grad_norm": 438.48895263671875, + "learning_rate": 9.535954641262206e-06, + "loss": 30.4032, + "step": 112200 + }, + { + "epoch": 0.2266712993450955, + "grad_norm": 118.90886688232422, + "learning_rate": 9.535807771513751e-06, + "loss": 12.928, + "step": 112210 + }, + { + "epoch": 0.2266914999777793, + "grad_norm": 493.9158935546875, + "learning_rate": 9.53566087965832e-06, + "loss": 18.6299, + "step": 112220 + }, + { + "epoch": 0.2267117006104631, + "grad_norm": 15.68353271484375, + "learning_rate": 9.535513965696628e-06, + "loss": 13.0776, + "step": 112230 + }, + { + "epoch": 0.22673190124314693, + "grad_norm": 35.68951416015625, + "learning_rate": 9.535367029629392e-06, + "loss": 18.3588, + "step": 112240 + }, + { + "epoch": 0.22675210187583075, + "grad_norm": 236.45184326171875, + "learning_rate": 9.535220071457325e-06, + "loss": 12.6726, + "step": 112250 + }, + { + "epoch": 0.22677230250851457, + "grad_norm": 247.4632110595703, + "learning_rate": 9.535073091181148e-06, + "loss": 32.0151, + "step": 112260 + }, + { + "epoch": 0.2267925031411984, + "grad_norm": 541.4338989257812, + "learning_rate": 9.534926088801572e-06, + "loss": 16.6592, + "step": 112270 + }, + { + "epoch": 0.22681270377388218, + "grad_norm": 585.0704345703125, + "learning_rate": 9.534779064319318e-06, + "loss": 21.0367, + "step": 112280 + }, + { + "epoch": 0.226832904406566, + "grad_norm": 198.61337280273438, + "learning_rate": 9.5346320177351e-06, + "loss": 18.1143, + "step": 112290 + }, + { + "epoch": 0.22685310503924982, + "grad_norm": 369.2171325683594, + "learning_rate": 9.534484949049636e-06, + "loss": 13.4298, + "step": 112300 + }, + { + "epoch": 0.22687330567193364, + "grad_norm": 388.0574951171875, + "learning_rate": 9.534337858263643e-06, + "loss": 44.5849, + "step": 112310 + }, + { + "epoch": 0.22689350630461746, + "grad_norm": 365.3808898925781, + "learning_rate": 9.534190745377837e-06, + "loss": 15.5231, + "step": 112320 + }, + { + "epoch": 0.22691370693730128, + "grad_norm": 521.5186767578125, + "learning_rate": 9.534043610392934e-06, + "loss": 41.7762, + "step": 112330 + }, + { + "epoch": 0.2269339075699851, + "grad_norm": 520.6649169921875, + "learning_rate": 9.533896453309654e-06, + "loss": 22.0477, + "step": 112340 + }, + { + "epoch": 0.2269541082026689, + "grad_norm": 124.58840942382812, + "learning_rate": 9.533749274128712e-06, + "loss": 15.6566, + "step": 112350 + }, + { + "epoch": 0.22697430883535272, + "grad_norm": 254.6902618408203, + "learning_rate": 9.533602072850826e-06, + "loss": 21.6187, + "step": 112360 + }, + { + "epoch": 0.22699450946803654, + "grad_norm": 391.0091247558594, + "learning_rate": 9.533454849476712e-06, + "loss": 28.8004, + "step": 112370 + }, + { + "epoch": 0.22701471010072036, + "grad_norm": 491.1453552246094, + "learning_rate": 9.533307604007089e-06, + "loss": 11.2923, + "step": 112380 + }, + { + "epoch": 0.22703491073340418, + "grad_norm": 175.738525390625, + "learning_rate": 9.533160336442677e-06, + "loss": 36.2641, + "step": 112390 + }, + { + "epoch": 0.227055111366088, + "grad_norm": 795.9658203125, + "learning_rate": 9.53301304678419e-06, + "loss": 27.7926, + "step": 112400 + }, + { + "epoch": 0.2270753119987718, + "grad_norm": 3.568608283996582, + "learning_rate": 9.532865735032346e-06, + "loss": 12.5416, + "step": 112410 + }, + { + "epoch": 0.2270955126314556, + "grad_norm": 279.2919006347656, + "learning_rate": 9.532718401187866e-06, + "loss": 16.607, + "step": 112420 + }, + { + "epoch": 0.22711571326413943, + "grad_norm": 816.3533935546875, + "learning_rate": 9.532571045251465e-06, + "loss": 21.4067, + "step": 112430 + }, + { + "epoch": 0.22713591389682325, + "grad_norm": 700.81884765625, + "learning_rate": 9.532423667223863e-06, + "loss": 32.1505, + "step": 112440 + }, + { + "epoch": 0.22715611452950707, + "grad_norm": 813.9895629882812, + "learning_rate": 9.532276267105778e-06, + "loss": 14.7046, + "step": 112450 + }, + { + "epoch": 0.2271763151621909, + "grad_norm": 821.1465454101562, + "learning_rate": 9.532128844897928e-06, + "loss": 28.3573, + "step": 112460 + }, + { + "epoch": 0.22719651579487468, + "grad_norm": 235.25338745117188, + "learning_rate": 9.531981400601032e-06, + "loss": 13.526, + "step": 112470 + }, + { + "epoch": 0.2272167164275585, + "grad_norm": 904.8468017578125, + "learning_rate": 9.531833934215807e-06, + "loss": 28.7744, + "step": 112480 + }, + { + "epoch": 0.22723691706024232, + "grad_norm": 13.919620513916016, + "learning_rate": 9.531686445742973e-06, + "loss": 14.4584, + "step": 112490 + }, + { + "epoch": 0.22725711769292614, + "grad_norm": 431.6491394042969, + "learning_rate": 9.531538935183252e-06, + "loss": 27.3013, + "step": 112500 + }, + { + "epoch": 0.22727731832560996, + "grad_norm": 325.6951904296875, + "learning_rate": 9.531391402537355e-06, + "loss": 20.2664, + "step": 112510 + }, + { + "epoch": 0.22729751895829378, + "grad_norm": 207.41888427734375, + "learning_rate": 9.53124384780601e-06, + "loss": 32.2594, + "step": 112520 + }, + { + "epoch": 0.2273177195909776, + "grad_norm": 669.0628051757812, + "learning_rate": 9.53109627098993e-06, + "loss": 22.2678, + "step": 112530 + }, + { + "epoch": 0.2273379202236614, + "grad_norm": 403.3323974609375, + "learning_rate": 9.530948672089837e-06, + "loss": 16.2452, + "step": 112540 + }, + { + "epoch": 0.2273581208563452, + "grad_norm": 529.3194580078125, + "learning_rate": 9.530801051106449e-06, + "loss": 16.2755, + "step": 112550 + }, + { + "epoch": 0.22737832148902903, + "grad_norm": 595.4201049804688, + "learning_rate": 9.530653408040487e-06, + "loss": 33.8954, + "step": 112560 + }, + { + "epoch": 0.22739852212171285, + "grad_norm": 463.9022521972656, + "learning_rate": 9.53050574289267e-06, + "loss": 10.3893, + "step": 112570 + }, + { + "epoch": 0.22741872275439667, + "grad_norm": 379.9695129394531, + "learning_rate": 9.530358055663718e-06, + "loss": 45.6644, + "step": 112580 + }, + { + "epoch": 0.2274389233870805, + "grad_norm": 393.81353759765625, + "learning_rate": 9.53021034635435e-06, + "loss": 16.6902, + "step": 112590 + }, + { + "epoch": 0.22745912401976429, + "grad_norm": 434.11199951171875, + "learning_rate": 9.530062614965286e-06, + "loss": 18.362, + "step": 112600 + }, + { + "epoch": 0.2274793246524481, + "grad_norm": 171.09864807128906, + "learning_rate": 9.529914861497244e-06, + "loss": 21.0003, + "step": 112610 + }, + { + "epoch": 0.22749952528513193, + "grad_norm": 449.3027038574219, + "learning_rate": 9.52976708595095e-06, + "loss": 32.4687, + "step": 112620 + }, + { + "epoch": 0.22751972591781575, + "grad_norm": 510.5466613769531, + "learning_rate": 9.529619288327118e-06, + "loss": 23.3475, + "step": 112630 + }, + { + "epoch": 0.22753992655049957, + "grad_norm": 550.9493408203125, + "learning_rate": 9.529471468626472e-06, + "loss": 16.5837, + "step": 112640 + }, + { + "epoch": 0.22756012718318339, + "grad_norm": 85.97711944580078, + "learning_rate": 9.529323626849731e-06, + "loss": 23.2321, + "step": 112650 + }, + { + "epoch": 0.2275803278158672, + "grad_norm": 622.16650390625, + "learning_rate": 9.529175762997616e-06, + "loss": 29.7396, + "step": 112660 + }, + { + "epoch": 0.227600528448551, + "grad_norm": 308.7371826171875, + "learning_rate": 9.529027877070848e-06, + "loss": 16.2574, + "step": 112670 + }, + { + "epoch": 0.22762072908123482, + "grad_norm": 538.2896728515625, + "learning_rate": 9.528879969070148e-06, + "loss": 14.8027, + "step": 112680 + }, + { + "epoch": 0.22764092971391864, + "grad_norm": 121.7942123413086, + "learning_rate": 9.528732038996236e-06, + "loss": 16.3837, + "step": 112690 + }, + { + "epoch": 0.22766113034660246, + "grad_norm": 317.30841064453125, + "learning_rate": 9.528584086849832e-06, + "loss": 15.4871, + "step": 112700 + }, + { + "epoch": 0.22768133097928628, + "grad_norm": 615.7967529296875, + "learning_rate": 9.528436112631659e-06, + "loss": 20.6995, + "step": 112710 + }, + { + "epoch": 0.2277015316119701, + "grad_norm": 272.8873596191406, + "learning_rate": 9.528288116342439e-06, + "loss": 20.939, + "step": 112720 + }, + { + "epoch": 0.2277217322446539, + "grad_norm": 316.3938903808594, + "learning_rate": 9.52814009798289e-06, + "loss": 30.0582, + "step": 112730 + }, + { + "epoch": 0.2277419328773377, + "grad_norm": 345.8398132324219, + "learning_rate": 9.527992057553736e-06, + "loss": 23.2847, + "step": 112740 + }, + { + "epoch": 0.22776213351002153, + "grad_norm": 638.2636108398438, + "learning_rate": 9.527843995055698e-06, + "loss": 19.8952, + "step": 112750 + }, + { + "epoch": 0.22778233414270535, + "grad_norm": 160.67169189453125, + "learning_rate": 9.527695910489498e-06, + "loss": 22.0684, + "step": 112760 + }, + { + "epoch": 0.22780253477538917, + "grad_norm": 281.597900390625, + "learning_rate": 9.527547803855858e-06, + "loss": 26.0992, + "step": 112770 + }, + { + "epoch": 0.227822735408073, + "grad_norm": 515.9508056640625, + "learning_rate": 9.527399675155497e-06, + "loss": 21.5287, + "step": 112780 + }, + { + "epoch": 0.22784293604075678, + "grad_norm": 284.7909851074219, + "learning_rate": 9.52725152438914e-06, + "loss": 53.4256, + "step": 112790 + }, + { + "epoch": 0.2278631366734406, + "grad_norm": 876.8087158203125, + "learning_rate": 9.52710335155751e-06, + "loss": 24.4455, + "step": 112800 + }, + { + "epoch": 0.22788333730612442, + "grad_norm": 279.994140625, + "learning_rate": 9.526955156661324e-06, + "loss": 13.2789, + "step": 112810 + }, + { + "epoch": 0.22790353793880824, + "grad_norm": 479.1186218261719, + "learning_rate": 9.52680693970131e-06, + "loss": 32.3463, + "step": 112820 + }, + { + "epoch": 0.22792373857149206, + "grad_norm": 243.9750213623047, + "learning_rate": 9.526658700678188e-06, + "loss": 43.7144, + "step": 112830 + }, + { + "epoch": 0.22794393920417588, + "grad_norm": 639.2210083007812, + "learning_rate": 9.52651043959268e-06, + "loss": 16.2402, + "step": 112840 + }, + { + "epoch": 0.2279641398368597, + "grad_norm": 250.73939514160156, + "learning_rate": 9.526362156445508e-06, + "loss": 18.0421, + "step": 112850 + }, + { + "epoch": 0.2279843404695435, + "grad_norm": 760.5935668945312, + "learning_rate": 9.526213851237396e-06, + "loss": 25.3497, + "step": 112860 + }, + { + "epoch": 0.22800454110222731, + "grad_norm": 196.826904296875, + "learning_rate": 9.526065523969068e-06, + "loss": 21.9941, + "step": 112870 + }, + { + "epoch": 0.22802474173491113, + "grad_norm": 480.93963623046875, + "learning_rate": 9.525917174641246e-06, + "loss": 33.3459, + "step": 112880 + }, + { + "epoch": 0.22804494236759495, + "grad_norm": 358.2469177246094, + "learning_rate": 9.525768803254651e-06, + "loss": 20.38, + "step": 112890 + }, + { + "epoch": 0.22806514300027877, + "grad_norm": 581.6536254882812, + "learning_rate": 9.525620409810009e-06, + "loss": 19.1054, + "step": 112900 + }, + { + "epoch": 0.2280853436329626, + "grad_norm": 692.8551635742188, + "learning_rate": 9.52547199430804e-06, + "loss": 32.9187, + "step": 112910 + }, + { + "epoch": 0.2281055442656464, + "grad_norm": 148.31716918945312, + "learning_rate": 9.525323556749472e-06, + "loss": 23.5628, + "step": 112920 + }, + { + "epoch": 0.2281257448983302, + "grad_norm": 1818.9072265625, + "learning_rate": 9.525175097135024e-06, + "loss": 25.2567, + "step": 112930 + }, + { + "epoch": 0.22814594553101403, + "grad_norm": 522.2452392578125, + "learning_rate": 9.525026615465423e-06, + "loss": 27.8579, + "step": 112940 + }, + { + "epoch": 0.22816614616369785, + "grad_norm": 467.10198974609375, + "learning_rate": 9.524878111741388e-06, + "loss": 11.4076, + "step": 112950 + }, + { + "epoch": 0.22818634679638167, + "grad_norm": 308.8968505859375, + "learning_rate": 9.52472958596365e-06, + "loss": 38.7424, + "step": 112960 + }, + { + "epoch": 0.2282065474290655, + "grad_norm": 1003.9118041992188, + "learning_rate": 9.524581038132927e-06, + "loss": 52.8455, + "step": 112970 + }, + { + "epoch": 0.2282267480617493, + "grad_norm": 84.62493896484375, + "learning_rate": 9.524432468249944e-06, + "loss": 29.7125, + "step": 112980 + }, + { + "epoch": 0.2282469486944331, + "grad_norm": 271.1474609375, + "learning_rate": 9.524283876315427e-06, + "loss": 51.8876, + "step": 112990 + }, + { + "epoch": 0.22826714932711692, + "grad_norm": 345.8349609375, + "learning_rate": 9.524135262330098e-06, + "loss": 29.9309, + "step": 113000 + }, + { + "epoch": 0.22828734995980074, + "grad_norm": 87.12869262695312, + "learning_rate": 9.523986626294683e-06, + "loss": 30.1322, + "step": 113010 + }, + { + "epoch": 0.22830755059248456, + "grad_norm": 341.88641357421875, + "learning_rate": 9.523837968209906e-06, + "loss": 20.9121, + "step": 113020 + }, + { + "epoch": 0.22832775122516838, + "grad_norm": 96.60131072998047, + "learning_rate": 9.523689288076491e-06, + "loss": 26.9941, + "step": 113030 + }, + { + "epoch": 0.2283479518578522, + "grad_norm": 236.14979553222656, + "learning_rate": 9.523540585895164e-06, + "loss": 28.8258, + "step": 113040 + }, + { + "epoch": 0.228368152490536, + "grad_norm": 124.12235260009766, + "learning_rate": 9.523391861666649e-06, + "loss": 15.0267, + "step": 113050 + }, + { + "epoch": 0.2283883531232198, + "grad_norm": 914.7811889648438, + "learning_rate": 9.52324311539167e-06, + "loss": 31.5337, + "step": 113060 + }, + { + "epoch": 0.22840855375590363, + "grad_norm": 457.1055603027344, + "learning_rate": 9.523094347070951e-06, + "loss": 28.5129, + "step": 113070 + }, + { + "epoch": 0.22842875438858745, + "grad_norm": 285.68121337890625, + "learning_rate": 9.522945556705221e-06, + "loss": 25.1645, + "step": 113080 + }, + { + "epoch": 0.22844895502127127, + "grad_norm": 228.176025390625, + "learning_rate": 9.522796744295202e-06, + "loss": 15.6948, + "step": 113090 + }, + { + "epoch": 0.2284691556539551, + "grad_norm": 198.18495178222656, + "learning_rate": 9.52264790984162e-06, + "loss": 21.6688, + "step": 113100 + }, + { + "epoch": 0.22848935628663888, + "grad_norm": 365.2386169433594, + "learning_rate": 9.522499053345203e-06, + "loss": 21.5408, + "step": 113110 + }, + { + "epoch": 0.2285095569193227, + "grad_norm": 750.3790893554688, + "learning_rate": 9.522350174806672e-06, + "loss": 29.4794, + "step": 113120 + }, + { + "epoch": 0.22852975755200652, + "grad_norm": 326.8194580078125, + "learning_rate": 9.522201274226755e-06, + "loss": 17.8257, + "step": 113130 + }, + { + "epoch": 0.22854995818469034, + "grad_norm": 378.5831604003906, + "learning_rate": 9.522052351606177e-06, + "loss": 17.2237, + "step": 113140 + }, + { + "epoch": 0.22857015881737416, + "grad_norm": 251.2360076904297, + "learning_rate": 9.521903406945666e-06, + "loss": 25.7183, + "step": 113150 + }, + { + "epoch": 0.22859035945005798, + "grad_norm": 592.8699951171875, + "learning_rate": 9.521754440245944e-06, + "loss": 16.1047, + "step": 113160 + }, + { + "epoch": 0.2286105600827418, + "grad_norm": 464.73004150390625, + "learning_rate": 9.52160545150774e-06, + "loss": 34.6223, + "step": 113170 + }, + { + "epoch": 0.2286307607154256, + "grad_norm": 495.97625732421875, + "learning_rate": 9.52145644073178e-06, + "loss": 18.1199, + "step": 113180 + }, + { + "epoch": 0.22865096134810942, + "grad_norm": 785.1619262695312, + "learning_rate": 9.52130740791879e-06, + "loss": 20.4017, + "step": 113190 + }, + { + "epoch": 0.22867116198079324, + "grad_norm": 369.74822998046875, + "learning_rate": 9.521158353069494e-06, + "loss": 21.0754, + "step": 113200 + }, + { + "epoch": 0.22869136261347706, + "grad_norm": 249.15182495117188, + "learning_rate": 9.521009276184624e-06, + "loss": 11.2801, + "step": 113210 + }, + { + "epoch": 0.22871156324616088, + "grad_norm": 665.263671875, + "learning_rate": 9.520860177264898e-06, + "loss": 11.3987, + "step": 113220 + }, + { + "epoch": 0.2287317638788447, + "grad_norm": 1030.80078125, + "learning_rate": 9.520711056311052e-06, + "loss": 33.1406, + "step": 113230 + }, + { + "epoch": 0.2287519645115285, + "grad_norm": 1151.8558349609375, + "learning_rate": 9.520561913323807e-06, + "loss": 30.7249, + "step": 113240 + }, + { + "epoch": 0.2287721651442123, + "grad_norm": 860.5791015625, + "learning_rate": 9.520412748303894e-06, + "loss": 33.5727, + "step": 113250 + }, + { + "epoch": 0.22879236577689613, + "grad_norm": 982.9208984375, + "learning_rate": 9.520263561252035e-06, + "loss": 29.1938, + "step": 113260 + }, + { + "epoch": 0.22881256640957995, + "grad_norm": 307.06475830078125, + "learning_rate": 9.52011435216896e-06, + "loss": 25.6368, + "step": 113270 + }, + { + "epoch": 0.22883276704226377, + "grad_norm": 247.4782257080078, + "learning_rate": 9.519965121055395e-06, + "loss": 25.1266, + "step": 113280 + }, + { + "epoch": 0.2288529676749476, + "grad_norm": 870.2067260742188, + "learning_rate": 9.51981586791207e-06, + "loss": 32.7014, + "step": 113290 + }, + { + "epoch": 0.2288731683076314, + "grad_norm": 69.65391540527344, + "learning_rate": 9.51966659273971e-06, + "loss": 28.6963, + "step": 113300 + }, + { + "epoch": 0.2288933689403152, + "grad_norm": 327.62384033203125, + "learning_rate": 9.519517295539042e-06, + "loss": 33.9486, + "step": 113310 + }, + { + "epoch": 0.22891356957299902, + "grad_norm": 345.7291259765625, + "learning_rate": 9.519367976310796e-06, + "loss": 19.4765, + "step": 113320 + }, + { + "epoch": 0.22893377020568284, + "grad_norm": 516.2244262695312, + "learning_rate": 9.5192186350557e-06, + "loss": 22.1515, + "step": 113330 + }, + { + "epoch": 0.22895397083836666, + "grad_norm": 175.62525939941406, + "learning_rate": 9.519069271774479e-06, + "loss": 17.5981, + "step": 113340 + }, + { + "epoch": 0.22897417147105048, + "grad_norm": 374.07574462890625, + "learning_rate": 9.51891988646786e-06, + "loss": 29.519, + "step": 113350 + }, + { + "epoch": 0.2289943721037343, + "grad_norm": 430.2989807128906, + "learning_rate": 9.51877047913658e-06, + "loss": 24.8094, + "step": 113360 + }, + { + "epoch": 0.2290145727364181, + "grad_norm": 630.27392578125, + "learning_rate": 9.518621049781356e-06, + "loss": 20.1694, + "step": 113370 + }, + { + "epoch": 0.2290347733691019, + "grad_norm": 365.74542236328125, + "learning_rate": 9.51847159840292e-06, + "loss": 21.1702, + "step": 113380 + }, + { + "epoch": 0.22905497400178573, + "grad_norm": 506.5300598144531, + "learning_rate": 9.518322125002004e-06, + "loss": 15.3547, + "step": 113390 + }, + { + "epoch": 0.22907517463446955, + "grad_norm": 395.3695983886719, + "learning_rate": 9.518172629579334e-06, + "loss": 17.4382, + "step": 113400 + }, + { + "epoch": 0.22909537526715337, + "grad_norm": 669.5462036132812, + "learning_rate": 9.518023112135636e-06, + "loss": 23.1995, + "step": 113410 + }, + { + "epoch": 0.2291155758998372, + "grad_norm": 181.2217254638672, + "learning_rate": 9.517873572671646e-06, + "loss": 25.9407, + "step": 113420 + }, + { + "epoch": 0.22913577653252098, + "grad_norm": 992.484619140625, + "learning_rate": 9.517724011188083e-06, + "loss": 29.2899, + "step": 113430 + }, + { + "epoch": 0.2291559771652048, + "grad_norm": 842.7274169921875, + "learning_rate": 9.517574427685686e-06, + "loss": 32.8349, + "step": 113440 + }, + { + "epoch": 0.22917617779788862, + "grad_norm": 422.0934753417969, + "learning_rate": 9.517424822165175e-06, + "loss": 26.1276, + "step": 113450 + }, + { + "epoch": 0.22919637843057244, + "grad_norm": 738.347412109375, + "learning_rate": 9.517275194627285e-06, + "loss": 27.2194, + "step": 113460 + }, + { + "epoch": 0.22921657906325626, + "grad_norm": 692.671875, + "learning_rate": 9.517125545072743e-06, + "loss": 17.9071, + "step": 113470 + }, + { + "epoch": 0.22923677969594008, + "grad_norm": 302.58978271484375, + "learning_rate": 9.51697587350228e-06, + "loss": 22.6542, + "step": 113480 + }, + { + "epoch": 0.2292569803286239, + "grad_norm": 560.926025390625, + "learning_rate": 9.516826179916625e-06, + "loss": 28.4785, + "step": 113490 + }, + { + "epoch": 0.2292771809613077, + "grad_norm": 296.8754577636719, + "learning_rate": 9.516676464316505e-06, + "loss": 26.4145, + "step": 113500 + }, + { + "epoch": 0.22929738159399152, + "grad_norm": 276.0968933105469, + "learning_rate": 9.516526726702653e-06, + "loss": 18.9407, + "step": 113510 + }, + { + "epoch": 0.22931758222667534, + "grad_norm": 711.6101684570312, + "learning_rate": 9.516376967075797e-06, + "loss": 45.2008, + "step": 113520 + }, + { + "epoch": 0.22933778285935916, + "grad_norm": 378.4603576660156, + "learning_rate": 9.516227185436667e-06, + "loss": 25.7412, + "step": 113530 + }, + { + "epoch": 0.22935798349204298, + "grad_norm": 275.92950439453125, + "learning_rate": 9.516077381785995e-06, + "loss": 11.1533, + "step": 113540 + }, + { + "epoch": 0.2293781841247268, + "grad_norm": 602.3226928710938, + "learning_rate": 9.515927556124508e-06, + "loss": 21.1007, + "step": 113550 + }, + { + "epoch": 0.2293983847574106, + "grad_norm": 15.636978149414062, + "learning_rate": 9.515777708452938e-06, + "loss": 35.1794, + "step": 113560 + }, + { + "epoch": 0.2294185853900944, + "grad_norm": 364.009521484375, + "learning_rate": 9.515627838772016e-06, + "loss": 11.867, + "step": 113570 + }, + { + "epoch": 0.22943878602277823, + "grad_norm": 677.7067260742188, + "learning_rate": 9.515477947082473e-06, + "loss": 18.9565, + "step": 113580 + }, + { + "epoch": 0.22945898665546205, + "grad_norm": 423.3853759765625, + "learning_rate": 9.515328033385035e-06, + "loss": 17.6075, + "step": 113590 + }, + { + "epoch": 0.22947918728814587, + "grad_norm": 252.23199462890625, + "learning_rate": 9.515178097680437e-06, + "loss": 18.3802, + "step": 113600 + }, + { + "epoch": 0.2294993879208297, + "grad_norm": 440.6402893066406, + "learning_rate": 9.515028139969409e-06, + "loss": 28.2897, + "step": 113610 + }, + { + "epoch": 0.2295195885535135, + "grad_norm": 836.3695068359375, + "learning_rate": 9.514878160252681e-06, + "loss": 28.0076, + "step": 113620 + }, + { + "epoch": 0.2295397891861973, + "grad_norm": 387.6072082519531, + "learning_rate": 9.514728158530983e-06, + "loss": 22.3299, + "step": 113630 + }, + { + "epoch": 0.22955998981888112, + "grad_norm": 1034.2071533203125, + "learning_rate": 9.51457813480505e-06, + "loss": 23.7812, + "step": 113640 + }, + { + "epoch": 0.22958019045156494, + "grad_norm": 415.7701721191406, + "learning_rate": 9.514428089075611e-06, + "loss": 31.8249, + "step": 113650 + }, + { + "epoch": 0.22960039108424876, + "grad_norm": 496.410888671875, + "learning_rate": 9.514278021343395e-06, + "loss": 12.046, + "step": 113660 + }, + { + "epoch": 0.22962059171693258, + "grad_norm": 748.201171875, + "learning_rate": 9.514127931609136e-06, + "loss": 28.4757, + "step": 113670 + }, + { + "epoch": 0.2296407923496164, + "grad_norm": 80.744140625, + "learning_rate": 9.513977819873565e-06, + "loss": 25.2247, + "step": 113680 + }, + { + "epoch": 0.2296609929823002, + "grad_norm": 148.37989807128906, + "learning_rate": 9.513827686137415e-06, + "loss": 43.3999, + "step": 113690 + }, + { + "epoch": 0.229681193614984, + "grad_norm": 455.69915771484375, + "learning_rate": 9.513677530401415e-06, + "loss": 23.1143, + "step": 113700 + }, + { + "epoch": 0.22970139424766783, + "grad_norm": 338.1597595214844, + "learning_rate": 9.513527352666298e-06, + "loss": 26.9409, + "step": 113710 + }, + { + "epoch": 0.22972159488035165, + "grad_norm": 375.36627197265625, + "learning_rate": 9.513377152932796e-06, + "loss": 16.2152, + "step": 113720 + }, + { + "epoch": 0.22974179551303547, + "grad_norm": 55.9153938293457, + "learning_rate": 9.513226931201642e-06, + "loss": 30.1076, + "step": 113730 + }, + { + "epoch": 0.2297619961457193, + "grad_norm": 522.5823974609375, + "learning_rate": 9.513076687473568e-06, + "loss": 31.5706, + "step": 113740 + }, + { + "epoch": 0.22978219677840309, + "grad_norm": 474.190185546875, + "learning_rate": 9.512926421749305e-06, + "loss": 28.7402, + "step": 113750 + }, + { + "epoch": 0.2298023974110869, + "grad_norm": 568.06005859375, + "learning_rate": 9.512776134029585e-06, + "loss": 26.3172, + "step": 113760 + }, + { + "epoch": 0.22982259804377073, + "grad_norm": 829.3409423828125, + "learning_rate": 9.512625824315142e-06, + "loss": 25.2735, + "step": 113770 + }, + { + "epoch": 0.22984279867645455, + "grad_norm": 744.4042358398438, + "learning_rate": 9.512475492606707e-06, + "loss": 31.0177, + "step": 113780 + }, + { + "epoch": 0.22986299930913837, + "grad_norm": 757.982177734375, + "learning_rate": 9.512325138905015e-06, + "loss": 19.734, + "step": 113790 + }, + { + "epoch": 0.22988319994182219, + "grad_norm": 524.1359252929688, + "learning_rate": 9.512174763210798e-06, + "loss": 19.0216, + "step": 113800 + }, + { + "epoch": 0.229903400574506, + "grad_norm": 266.0520324707031, + "learning_rate": 9.512024365524788e-06, + "loss": 29.473, + "step": 113810 + }, + { + "epoch": 0.2299236012071898, + "grad_norm": 283.6138916015625, + "learning_rate": 9.511873945847718e-06, + "loss": 20.3686, + "step": 113820 + }, + { + "epoch": 0.22994380183987362, + "grad_norm": 327.4731750488281, + "learning_rate": 9.511723504180321e-06, + "loss": 10.5312, + "step": 113830 + }, + { + "epoch": 0.22996400247255744, + "grad_norm": 85.706298828125, + "learning_rate": 9.511573040523332e-06, + "loss": 29.0429, + "step": 113840 + }, + { + "epoch": 0.22998420310524126, + "grad_norm": 209.13125610351562, + "learning_rate": 9.511422554877482e-06, + "loss": 52.1279, + "step": 113850 + }, + { + "epoch": 0.23000440373792508, + "grad_norm": 306.1725158691406, + "learning_rate": 9.511272047243507e-06, + "loss": 23.8104, + "step": 113860 + }, + { + "epoch": 0.2300246043706089, + "grad_norm": 101.19133758544922, + "learning_rate": 9.51112151762214e-06, + "loss": 23.7251, + "step": 113870 + }, + { + "epoch": 0.2300448050032927, + "grad_norm": 710.2012329101562, + "learning_rate": 9.510970966014112e-06, + "loss": 20.1284, + "step": 113880 + }, + { + "epoch": 0.2300650056359765, + "grad_norm": 807.711181640625, + "learning_rate": 9.51082039242016e-06, + "loss": 16.4794, + "step": 113890 + }, + { + "epoch": 0.23008520626866033, + "grad_norm": 560.6378784179688, + "learning_rate": 9.510669796841014e-06, + "loss": 38.8351, + "step": 113900 + }, + { + "epoch": 0.23010540690134415, + "grad_norm": 3355.491943359375, + "learning_rate": 9.510519179277414e-06, + "loss": 40.4513, + "step": 113910 + }, + { + "epoch": 0.23012560753402797, + "grad_norm": 583.879638671875, + "learning_rate": 9.510368539730089e-06, + "loss": 26.1312, + "step": 113920 + }, + { + "epoch": 0.2301458081667118, + "grad_norm": 146.33290100097656, + "learning_rate": 9.510217878199773e-06, + "loss": 30.3424, + "step": 113930 + }, + { + "epoch": 0.2301660087993956, + "grad_norm": 609.0533447265625, + "learning_rate": 9.510067194687205e-06, + "loss": 26.2492, + "step": 113940 + }, + { + "epoch": 0.2301862094320794, + "grad_norm": 240.16952514648438, + "learning_rate": 9.509916489193114e-06, + "loss": 28.5853, + "step": 113950 + }, + { + "epoch": 0.23020641006476322, + "grad_norm": 862.90234375, + "learning_rate": 9.50976576171824e-06, + "loss": 21.525, + "step": 113960 + }, + { + "epoch": 0.23022661069744704, + "grad_norm": 549.5601806640625, + "learning_rate": 9.509615012263311e-06, + "loss": 21.0239, + "step": 113970 + }, + { + "epoch": 0.23024681133013086, + "grad_norm": 185.5392303466797, + "learning_rate": 9.509464240829067e-06, + "loss": 23.9225, + "step": 113980 + }, + { + "epoch": 0.23026701196281468, + "grad_norm": 881.9199829101562, + "learning_rate": 9.509313447416241e-06, + "loss": 22.6354, + "step": 113990 + }, + { + "epoch": 0.2302872125954985, + "grad_norm": 229.3199462890625, + "learning_rate": 9.50916263202557e-06, + "loss": 15.036, + "step": 114000 + }, + { + "epoch": 0.2303074132281823, + "grad_norm": 208.56417846679688, + "learning_rate": 9.509011794657785e-06, + "loss": 18.8227, + "step": 114010 + }, + { + "epoch": 0.23032761386086611, + "grad_norm": 432.16339111328125, + "learning_rate": 9.508860935313623e-06, + "loss": 19.2313, + "step": 114020 + }, + { + "epoch": 0.23034781449354993, + "grad_norm": 704.3341064453125, + "learning_rate": 9.508710053993822e-06, + "loss": 25.9992, + "step": 114030 + }, + { + "epoch": 0.23036801512623375, + "grad_norm": 382.67144775390625, + "learning_rate": 9.508559150699115e-06, + "loss": 12.15, + "step": 114040 + }, + { + "epoch": 0.23038821575891757, + "grad_norm": 115.54539489746094, + "learning_rate": 9.508408225430237e-06, + "loss": 25.6531, + "step": 114050 + }, + { + "epoch": 0.2304084163916014, + "grad_norm": 557.89892578125, + "learning_rate": 9.508257278187923e-06, + "loss": 25.4176, + "step": 114060 + }, + { + "epoch": 0.2304286170242852, + "grad_norm": 154.92672729492188, + "learning_rate": 9.50810630897291e-06, + "loss": 14.019, + "step": 114070 + }, + { + "epoch": 0.230448817656969, + "grad_norm": 530.8345336914062, + "learning_rate": 9.507955317785935e-06, + "loss": 21.4318, + "step": 114080 + }, + { + "epoch": 0.23046901828965283, + "grad_norm": 230.95797729492188, + "learning_rate": 9.50780430462773e-06, + "loss": 28.5689, + "step": 114090 + }, + { + "epoch": 0.23048921892233665, + "grad_norm": 389.99969482421875, + "learning_rate": 9.507653269499035e-06, + "loss": 27.033, + "step": 114100 + }, + { + "epoch": 0.23050941955502047, + "grad_norm": 452.2672424316406, + "learning_rate": 9.507502212400585e-06, + "loss": 25.4473, + "step": 114110 + }, + { + "epoch": 0.2305296201877043, + "grad_norm": 111.10558319091797, + "learning_rate": 9.507351133333116e-06, + "loss": 23.7443, + "step": 114120 + }, + { + "epoch": 0.2305498208203881, + "grad_norm": 7.091848850250244, + "learning_rate": 9.507200032297364e-06, + "loss": 42.0829, + "step": 114130 + }, + { + "epoch": 0.2305700214530719, + "grad_norm": 282.49462890625, + "learning_rate": 9.507048909294065e-06, + "loss": 30.353, + "step": 114140 + }, + { + "epoch": 0.23059022208575572, + "grad_norm": 221.93344116210938, + "learning_rate": 9.506897764323957e-06, + "loss": 29.1922, + "step": 114150 + }, + { + "epoch": 0.23061042271843954, + "grad_norm": 432.72589111328125, + "learning_rate": 9.506746597387776e-06, + "loss": 23.8334, + "step": 114160 + }, + { + "epoch": 0.23063062335112336, + "grad_norm": 385.8450012207031, + "learning_rate": 9.50659540848626e-06, + "loss": 13.5499, + "step": 114170 + }, + { + "epoch": 0.23065082398380718, + "grad_norm": 153.2083740234375, + "learning_rate": 9.506444197620142e-06, + "loss": 13.4851, + "step": 114180 + }, + { + "epoch": 0.230671024616491, + "grad_norm": 226.3197479248047, + "learning_rate": 9.506292964790162e-06, + "loss": 15.1022, + "step": 114190 + }, + { + "epoch": 0.2306912252491748, + "grad_norm": 421.44403076171875, + "learning_rate": 9.506141709997058e-06, + "loss": 17.9024, + "step": 114200 + }, + { + "epoch": 0.2307114258818586, + "grad_norm": 657.6426391601562, + "learning_rate": 9.505990433241565e-06, + "loss": 17.4907, + "step": 114210 + }, + { + "epoch": 0.23073162651454243, + "grad_norm": 714.735595703125, + "learning_rate": 9.50583913452442e-06, + "loss": 33.5603, + "step": 114220 + }, + { + "epoch": 0.23075182714722625, + "grad_norm": 419.1542053222656, + "learning_rate": 9.505687813846363e-06, + "loss": 8.4052, + "step": 114230 + }, + { + "epoch": 0.23077202777991007, + "grad_norm": 719.9962158203125, + "learning_rate": 9.50553647120813e-06, + "loss": 36.5737, + "step": 114240 + }, + { + "epoch": 0.2307922284125939, + "grad_norm": 463.834228515625, + "learning_rate": 9.505385106610458e-06, + "loss": 25.4381, + "step": 114250 + }, + { + "epoch": 0.2308124290452777, + "grad_norm": 253.3015899658203, + "learning_rate": 9.505233720054086e-06, + "loss": 25.6091, + "step": 114260 + }, + { + "epoch": 0.2308326296779615, + "grad_norm": 309.06396484375, + "learning_rate": 9.505082311539752e-06, + "loss": 32.5767, + "step": 114270 + }, + { + "epoch": 0.23085283031064532, + "grad_norm": 387.4048156738281, + "learning_rate": 9.504930881068193e-06, + "loss": 33.0901, + "step": 114280 + }, + { + "epoch": 0.23087303094332914, + "grad_norm": 509.7787170410156, + "learning_rate": 9.504779428640146e-06, + "loss": 15.3536, + "step": 114290 + }, + { + "epoch": 0.23089323157601296, + "grad_norm": 419.689208984375, + "learning_rate": 9.504627954256352e-06, + "loss": 22.8543, + "step": 114300 + }, + { + "epoch": 0.23091343220869678, + "grad_norm": 524.3058471679688, + "learning_rate": 9.504476457917546e-06, + "loss": 21.437, + "step": 114310 + }, + { + "epoch": 0.2309336328413806, + "grad_norm": 582.3783569335938, + "learning_rate": 9.504324939624469e-06, + "loss": 27.2149, + "step": 114320 + }, + { + "epoch": 0.2309538334740644, + "grad_norm": 572.2953491210938, + "learning_rate": 9.504173399377858e-06, + "loss": 13.3763, + "step": 114330 + }, + { + "epoch": 0.23097403410674822, + "grad_norm": 831.9983520507812, + "learning_rate": 9.504021837178452e-06, + "loss": 20.36, + "step": 114340 + }, + { + "epoch": 0.23099423473943204, + "grad_norm": 1011.7919921875, + "learning_rate": 9.503870253026992e-06, + "loss": 37.3983, + "step": 114350 + }, + { + "epoch": 0.23101443537211586, + "grad_norm": 662.2227783203125, + "learning_rate": 9.503718646924211e-06, + "loss": 24.7651, + "step": 114360 + }, + { + "epoch": 0.23103463600479968, + "grad_norm": 674.4508056640625, + "learning_rate": 9.503567018870855e-06, + "loss": 25.7229, + "step": 114370 + }, + { + "epoch": 0.2310548366374835, + "grad_norm": 447.40557861328125, + "learning_rate": 9.503415368867658e-06, + "loss": 16.263, + "step": 114380 + }, + { + "epoch": 0.2310750372701673, + "grad_norm": 412.7706298828125, + "learning_rate": 9.503263696915361e-06, + "loss": 20.0842, + "step": 114390 + }, + { + "epoch": 0.2310952379028511, + "grad_norm": 934.6969604492188, + "learning_rate": 9.503112003014702e-06, + "loss": 33.9432, + "step": 114400 + }, + { + "epoch": 0.23111543853553493, + "grad_norm": 325.6151123046875, + "learning_rate": 9.502960287166423e-06, + "loss": 20.7496, + "step": 114410 + }, + { + "epoch": 0.23113563916821875, + "grad_norm": 72.51821899414062, + "learning_rate": 9.50280854937126e-06, + "loss": 21.2566, + "step": 114420 + }, + { + "epoch": 0.23115583980090257, + "grad_norm": 172.7029266357422, + "learning_rate": 9.502656789629956e-06, + "loss": 18.6805, + "step": 114430 + }, + { + "epoch": 0.2311760404335864, + "grad_norm": 1268.697265625, + "learning_rate": 9.502505007943248e-06, + "loss": 28.5747, + "step": 114440 + }, + { + "epoch": 0.2311962410662702, + "grad_norm": 383.0333251953125, + "learning_rate": 9.502353204311876e-06, + "loss": 25.1674, + "step": 114450 + }, + { + "epoch": 0.231216441698954, + "grad_norm": 565.036865234375, + "learning_rate": 9.50220137873658e-06, + "loss": 26.0444, + "step": 114460 + }, + { + "epoch": 0.23123664233163782, + "grad_norm": 174.50863647460938, + "learning_rate": 9.502049531218103e-06, + "loss": 25.0231, + "step": 114470 + }, + { + "epoch": 0.23125684296432164, + "grad_norm": 396.9252014160156, + "learning_rate": 9.501897661757182e-06, + "loss": 27.3269, + "step": 114480 + }, + { + "epoch": 0.23127704359700546, + "grad_norm": 202.41270446777344, + "learning_rate": 9.501745770354555e-06, + "loss": 17.6822, + "step": 114490 + }, + { + "epoch": 0.23129724422968928, + "grad_norm": 246.49981689453125, + "learning_rate": 9.501593857010968e-06, + "loss": 20.3343, + "step": 114500 + }, + { + "epoch": 0.2313174448623731, + "grad_norm": 0.0, + "learning_rate": 9.501441921727158e-06, + "loss": 20.5623, + "step": 114510 + }, + { + "epoch": 0.2313376454950569, + "grad_norm": 356.1559143066406, + "learning_rate": 9.501289964503866e-06, + "loss": 13.547, + "step": 114520 + }, + { + "epoch": 0.2313578461277407, + "grad_norm": 146.77247619628906, + "learning_rate": 9.501137985341832e-06, + "loss": 23.4428, + "step": 114530 + }, + { + "epoch": 0.23137804676042453, + "grad_norm": 193.9114227294922, + "learning_rate": 9.500985984241797e-06, + "loss": 40.4239, + "step": 114540 + }, + { + "epoch": 0.23139824739310835, + "grad_norm": 916.8033447265625, + "learning_rate": 9.500833961204504e-06, + "loss": 27.8038, + "step": 114550 + }, + { + "epoch": 0.23141844802579217, + "grad_norm": 481.2568359375, + "learning_rate": 9.500681916230692e-06, + "loss": 16.5165, + "step": 114560 + }, + { + "epoch": 0.231438648658476, + "grad_norm": 603.3992309570312, + "learning_rate": 9.500529849321101e-06, + "loss": 30.3873, + "step": 114570 + }, + { + "epoch": 0.2314588492911598, + "grad_norm": 615.4487915039062, + "learning_rate": 9.500377760476473e-06, + "loss": 29.7213, + "step": 114580 + }, + { + "epoch": 0.2314790499238436, + "grad_norm": 243.64566040039062, + "learning_rate": 9.500225649697551e-06, + "loss": 15.1665, + "step": 114590 + }, + { + "epoch": 0.23149925055652743, + "grad_norm": 710.8563232421875, + "learning_rate": 9.500073516985074e-06, + "loss": 35.1212, + "step": 114600 + }, + { + "epoch": 0.23151945118921125, + "grad_norm": 417.65704345703125, + "learning_rate": 9.499921362339786e-06, + "loss": 23.8463, + "step": 114610 + }, + { + "epoch": 0.23153965182189506, + "grad_norm": 76.81388854980469, + "learning_rate": 9.499769185762425e-06, + "loss": 34.4666, + "step": 114620 + }, + { + "epoch": 0.23155985245457888, + "grad_norm": 228.5997314453125, + "learning_rate": 9.499616987253734e-06, + "loss": 39.6991, + "step": 114630 + }, + { + "epoch": 0.2315800530872627, + "grad_norm": 573.35498046875, + "learning_rate": 9.49946476681446e-06, + "loss": 34.7777, + "step": 114640 + }, + { + "epoch": 0.2316002537199465, + "grad_norm": 261.21356201171875, + "learning_rate": 9.499312524445337e-06, + "loss": 10.3347, + "step": 114650 + }, + { + "epoch": 0.23162045435263032, + "grad_norm": 0.0, + "learning_rate": 9.499160260147111e-06, + "loss": 21.4552, + "step": 114660 + }, + { + "epoch": 0.23164065498531414, + "grad_norm": 466.75244140625, + "learning_rate": 9.499007973920523e-06, + "loss": 24.9358, + "step": 114670 + }, + { + "epoch": 0.23166085561799796, + "grad_norm": 160.8385772705078, + "learning_rate": 9.498855665766316e-06, + "loss": 15.6375, + "step": 114680 + }, + { + "epoch": 0.23168105625068178, + "grad_norm": 639.6903076171875, + "learning_rate": 9.498703335685233e-06, + "loss": 28.7835, + "step": 114690 + }, + { + "epoch": 0.2317012568833656, + "grad_norm": 398.8724060058594, + "learning_rate": 9.498550983678016e-06, + "loss": 25.5717, + "step": 114700 + }, + { + "epoch": 0.2317214575160494, + "grad_norm": 409.29437255859375, + "learning_rate": 9.498398609745405e-06, + "loss": 27.9308, + "step": 114710 + }, + { + "epoch": 0.2317416581487332, + "grad_norm": 391.4109802246094, + "learning_rate": 9.498246213888148e-06, + "loss": 16.9435, + "step": 114720 + }, + { + "epoch": 0.23176185878141703, + "grad_norm": 312.2937316894531, + "learning_rate": 9.498093796106982e-06, + "loss": 31.0777, + "step": 114730 + }, + { + "epoch": 0.23178205941410085, + "grad_norm": 407.076171875, + "learning_rate": 9.497941356402653e-06, + "loss": 16.9753, + "step": 114740 + }, + { + "epoch": 0.23180226004678467, + "grad_norm": 283.9637756347656, + "learning_rate": 9.497788894775903e-06, + "loss": 19.82, + "step": 114750 + }, + { + "epoch": 0.2318224606794685, + "grad_norm": 124.15992736816406, + "learning_rate": 9.497636411227476e-06, + "loss": 23.3035, + "step": 114760 + }, + { + "epoch": 0.2318426613121523, + "grad_norm": 662.7410888671875, + "learning_rate": 9.497483905758114e-06, + "loss": 27.8046, + "step": 114770 + }, + { + "epoch": 0.2318628619448361, + "grad_norm": 737.6326293945312, + "learning_rate": 9.49733137836856e-06, + "loss": 24.7341, + "step": 114780 + }, + { + "epoch": 0.23188306257751992, + "grad_norm": 245.0981903076172, + "learning_rate": 9.497178829059561e-06, + "loss": 25.5373, + "step": 114790 + }, + { + "epoch": 0.23190326321020374, + "grad_norm": 300.2393798828125, + "learning_rate": 9.497026257831856e-06, + "loss": 23.1748, + "step": 114800 + }, + { + "epoch": 0.23192346384288756, + "grad_norm": 321.494873046875, + "learning_rate": 9.49687366468619e-06, + "loss": 40.4303, + "step": 114810 + }, + { + "epoch": 0.23194366447557138, + "grad_norm": 557.5247802734375, + "learning_rate": 9.49672104962331e-06, + "loss": 26.8173, + "step": 114820 + }, + { + "epoch": 0.2319638651082552, + "grad_norm": 1040.2940673828125, + "learning_rate": 9.496568412643954e-06, + "loss": 45.3982, + "step": 114830 + }, + { + "epoch": 0.231984065740939, + "grad_norm": 247.10780334472656, + "learning_rate": 9.49641575374887e-06, + "loss": 17.9488, + "step": 114840 + }, + { + "epoch": 0.23200426637362281, + "grad_norm": 348.88427734375, + "learning_rate": 9.496263072938801e-06, + "loss": 16.5253, + "step": 114850 + }, + { + "epoch": 0.23202446700630663, + "grad_norm": 170.89480590820312, + "learning_rate": 9.49611037021449e-06, + "loss": 18.8638, + "step": 114860 + }, + { + "epoch": 0.23204466763899045, + "grad_norm": 639.7593383789062, + "learning_rate": 9.495957645576685e-06, + "loss": 32.4327, + "step": 114870 + }, + { + "epoch": 0.23206486827167427, + "grad_norm": 160.24334716796875, + "learning_rate": 9.495804899026126e-06, + "loss": 21.9294, + "step": 114880 + }, + { + "epoch": 0.2320850689043581, + "grad_norm": 84.15010070800781, + "learning_rate": 9.49565213056356e-06, + "loss": 25.3117, + "step": 114890 + }, + { + "epoch": 0.23210526953704191, + "grad_norm": 325.7503967285156, + "learning_rate": 9.495499340189729e-06, + "loss": 37.906, + "step": 114900 + }, + { + "epoch": 0.2321254701697257, + "grad_norm": 611.479248046875, + "learning_rate": 9.495346527905381e-06, + "loss": 32.5419, + "step": 114910 + }, + { + "epoch": 0.23214567080240953, + "grad_norm": 536.5656127929688, + "learning_rate": 9.495193693711259e-06, + "loss": 21.853, + "step": 114920 + }, + { + "epoch": 0.23216587143509335, + "grad_norm": 714.2391357421875, + "learning_rate": 9.495040837608107e-06, + "loss": 37.9951, + "step": 114930 + }, + { + "epoch": 0.23218607206777717, + "grad_norm": 343.0403137207031, + "learning_rate": 9.494887959596672e-06, + "loss": 24.4986, + "step": 114940 + }, + { + "epoch": 0.23220627270046099, + "grad_norm": 463.78173828125, + "learning_rate": 9.4947350596777e-06, + "loss": 22.8133, + "step": 114950 + }, + { + "epoch": 0.2322264733331448, + "grad_norm": 211.89515686035156, + "learning_rate": 9.494582137851932e-06, + "loss": 23.9647, + "step": 114960 + }, + { + "epoch": 0.2322466739658286, + "grad_norm": 44.057308197021484, + "learning_rate": 9.494429194120117e-06, + "loss": 26.3957, + "step": 114970 + }, + { + "epoch": 0.23226687459851242, + "grad_norm": 346.5908203125, + "learning_rate": 9.494276228482998e-06, + "loss": 21.106, + "step": 114980 + }, + { + "epoch": 0.23228707523119624, + "grad_norm": 586.0257568359375, + "learning_rate": 9.494123240941321e-06, + "loss": 25.8126, + "step": 114990 + }, + { + "epoch": 0.23230727586388006, + "grad_norm": 766.7527465820312, + "learning_rate": 9.493970231495836e-06, + "loss": 28.4578, + "step": 115000 + }, + { + "epoch": 0.23232747649656388, + "grad_norm": 518.5211181640625, + "learning_rate": 9.493817200147282e-06, + "loss": 32.7023, + "step": 115010 + }, + { + "epoch": 0.2323476771292477, + "grad_norm": 354.9346008300781, + "learning_rate": 9.493664146896411e-06, + "loss": 20.4563, + "step": 115020 + }, + { + "epoch": 0.2323678777619315, + "grad_norm": 339.10443115234375, + "learning_rate": 9.493511071743963e-06, + "loss": 24.1566, + "step": 115030 + }, + { + "epoch": 0.2323880783946153, + "grad_norm": 731.3485107421875, + "learning_rate": 9.493357974690689e-06, + "loss": 30.5574, + "step": 115040 + }, + { + "epoch": 0.23240827902729913, + "grad_norm": 395.5550231933594, + "learning_rate": 9.493204855737332e-06, + "loss": 27.9317, + "step": 115050 + }, + { + "epoch": 0.23242847965998295, + "grad_norm": 115.83318328857422, + "learning_rate": 9.49305171488464e-06, + "loss": 15.4804, + "step": 115060 + }, + { + "epoch": 0.23244868029266677, + "grad_norm": 0.0, + "learning_rate": 9.492898552133358e-06, + "loss": 13.9627, + "step": 115070 + }, + { + "epoch": 0.2324688809253506, + "grad_norm": 338.2238464355469, + "learning_rate": 9.492745367484234e-06, + "loss": 10.4695, + "step": 115080 + }, + { + "epoch": 0.2324890815580344, + "grad_norm": 604.24560546875, + "learning_rate": 9.492592160938014e-06, + "loss": 18.8031, + "step": 115090 + }, + { + "epoch": 0.2325092821907182, + "grad_norm": 497.5995178222656, + "learning_rate": 9.492438932495444e-06, + "loss": 15.7976, + "step": 115100 + }, + { + "epoch": 0.23252948282340202, + "grad_norm": 740.5108032226562, + "learning_rate": 9.492285682157274e-06, + "loss": 24.4216, + "step": 115110 + }, + { + "epoch": 0.23254968345608584, + "grad_norm": 571.9484252929688, + "learning_rate": 9.492132409924247e-06, + "loss": 33.1764, + "step": 115120 + }, + { + "epoch": 0.23256988408876966, + "grad_norm": 461.05108642578125, + "learning_rate": 9.49197911579711e-06, + "loss": 24.2657, + "step": 115130 + }, + { + "epoch": 0.23259008472145348, + "grad_norm": 32.682518005371094, + "learning_rate": 9.491825799776613e-06, + "loss": 29.9697, + "step": 115140 + }, + { + "epoch": 0.2326102853541373, + "grad_norm": 244.05482482910156, + "learning_rate": 9.491672461863501e-06, + "loss": 31.8098, + "step": 115150 + }, + { + "epoch": 0.2326304859868211, + "grad_norm": 21.306440353393555, + "learning_rate": 9.491519102058523e-06, + "loss": 33.1123, + "step": 115160 + }, + { + "epoch": 0.23265068661950492, + "grad_norm": 324.6059265136719, + "learning_rate": 9.491365720362427e-06, + "loss": 18.2033, + "step": 115170 + }, + { + "epoch": 0.23267088725218874, + "grad_norm": 190.6021270751953, + "learning_rate": 9.491212316775956e-06, + "loss": 22.1311, + "step": 115180 + }, + { + "epoch": 0.23269108788487256, + "grad_norm": 506.2206115722656, + "learning_rate": 9.491058891299863e-06, + "loss": 29.1972, + "step": 115190 + }, + { + "epoch": 0.23271128851755638, + "grad_norm": 89.00489044189453, + "learning_rate": 9.490905443934892e-06, + "loss": 24.539, + "step": 115200 + }, + { + "epoch": 0.2327314891502402, + "grad_norm": 453.9803771972656, + "learning_rate": 9.490751974681795e-06, + "loss": 14.0995, + "step": 115210 + }, + { + "epoch": 0.23275168978292402, + "grad_norm": 576.4542236328125, + "learning_rate": 9.490598483541316e-06, + "loss": 38.7763, + "step": 115220 + }, + { + "epoch": 0.2327718904156078, + "grad_norm": 409.8153381347656, + "learning_rate": 9.490444970514205e-06, + "loss": 21.8848, + "step": 115230 + }, + { + "epoch": 0.23279209104829163, + "grad_norm": 360.01007080078125, + "learning_rate": 9.49029143560121e-06, + "loss": 29.0496, + "step": 115240 + }, + { + "epoch": 0.23281229168097545, + "grad_norm": 32.68125534057617, + "learning_rate": 9.490137878803078e-06, + "loss": 19.6349, + "step": 115250 + }, + { + "epoch": 0.23283249231365927, + "grad_norm": 586.7561645507812, + "learning_rate": 9.48998430012056e-06, + "loss": 16.7389, + "step": 115260 + }, + { + "epoch": 0.2328526929463431, + "grad_norm": 709.1950073242188, + "learning_rate": 9.489830699554403e-06, + "loss": 26.743, + "step": 115270 + }, + { + "epoch": 0.2328728935790269, + "grad_norm": 39.28255844116211, + "learning_rate": 9.489677077105355e-06, + "loss": 19.1558, + "step": 115280 + }, + { + "epoch": 0.2328930942117107, + "grad_norm": 488.423095703125, + "learning_rate": 9.489523432774165e-06, + "loss": 28.9454, + "step": 115290 + }, + { + "epoch": 0.23291329484439452, + "grad_norm": 145.56295776367188, + "learning_rate": 9.489369766561584e-06, + "loss": 22.6579, + "step": 115300 + }, + { + "epoch": 0.23293349547707834, + "grad_norm": 838.90966796875, + "learning_rate": 9.489216078468359e-06, + "loss": 13.8322, + "step": 115310 + }, + { + "epoch": 0.23295369610976216, + "grad_norm": 531.090576171875, + "learning_rate": 9.48906236849524e-06, + "loss": 46.4313, + "step": 115320 + }, + { + "epoch": 0.23297389674244598, + "grad_norm": 238.19915771484375, + "learning_rate": 9.488908636642972e-06, + "loss": 17.5851, + "step": 115330 + }, + { + "epoch": 0.2329940973751298, + "grad_norm": 351.15106201171875, + "learning_rate": 9.48875488291231e-06, + "loss": 27.8192, + "step": 115340 + }, + { + "epoch": 0.2330142980078136, + "grad_norm": 323.8350524902344, + "learning_rate": 9.488601107304001e-06, + "loss": 25.9669, + "step": 115350 + }, + { + "epoch": 0.2330344986404974, + "grad_norm": 376.9538879394531, + "learning_rate": 9.488447309818795e-06, + "loss": 37.1908, + "step": 115360 + }, + { + "epoch": 0.23305469927318123, + "grad_norm": 79.04154968261719, + "learning_rate": 9.488293490457441e-06, + "loss": 16.2454, + "step": 115370 + }, + { + "epoch": 0.23307489990586505, + "grad_norm": 664.3616943359375, + "learning_rate": 9.48813964922069e-06, + "loss": 34.9776, + "step": 115380 + }, + { + "epoch": 0.23309510053854887, + "grad_norm": 836.3585205078125, + "learning_rate": 9.487985786109288e-06, + "loss": 18.6744, + "step": 115390 + }, + { + "epoch": 0.2331153011712327, + "grad_norm": 612.2698364257812, + "learning_rate": 9.487831901123989e-06, + "loss": 24.8608, + "step": 115400 + }, + { + "epoch": 0.2331355018039165, + "grad_norm": 100.1115493774414, + "learning_rate": 9.48767799426554e-06, + "loss": 11.9513, + "step": 115410 + }, + { + "epoch": 0.2331557024366003, + "grad_norm": 516.9166259765625, + "learning_rate": 9.487524065534696e-06, + "loss": 16.9859, + "step": 115420 + }, + { + "epoch": 0.23317590306928412, + "grad_norm": 773.397705078125, + "learning_rate": 9.487370114932201e-06, + "loss": 24.2073, + "step": 115430 + }, + { + "epoch": 0.23319610370196794, + "grad_norm": 295.19964599609375, + "learning_rate": 9.48721614245881e-06, + "loss": 35.1301, + "step": 115440 + }, + { + "epoch": 0.23321630433465176, + "grad_norm": 247.9727020263672, + "learning_rate": 9.487062148115272e-06, + "loss": 28.4932, + "step": 115450 + }, + { + "epoch": 0.23323650496733558, + "grad_norm": 312.9109802246094, + "learning_rate": 9.486908131902336e-06, + "loss": 26.1924, + "step": 115460 + }, + { + "epoch": 0.2332567056000194, + "grad_norm": 381.10455322265625, + "learning_rate": 9.486754093820755e-06, + "loss": 20.8769, + "step": 115470 + }, + { + "epoch": 0.2332769062327032, + "grad_norm": 469.810302734375, + "learning_rate": 9.486600033871279e-06, + "loss": 46.3613, + "step": 115480 + }, + { + "epoch": 0.23329710686538702, + "grad_norm": 277.09747314453125, + "learning_rate": 9.486445952054658e-06, + "loss": 15.3754, + "step": 115490 + }, + { + "epoch": 0.23331730749807084, + "grad_norm": 954.3956298828125, + "learning_rate": 9.486291848371642e-06, + "loss": 34.4752, + "step": 115500 + }, + { + "epoch": 0.23333750813075466, + "grad_norm": 231.85748291015625, + "learning_rate": 9.486137722822986e-06, + "loss": 21.8481, + "step": 115510 + }, + { + "epoch": 0.23335770876343848, + "grad_norm": 199.6193389892578, + "learning_rate": 9.48598357540944e-06, + "loss": 11.9974, + "step": 115520 + }, + { + "epoch": 0.2333779093961223, + "grad_norm": 422.9839172363281, + "learning_rate": 9.48582940613175e-06, + "loss": 27.3381, + "step": 115530 + }, + { + "epoch": 0.2333981100288061, + "grad_norm": 173.09454345703125, + "learning_rate": 9.485675214990673e-06, + "loss": 20.8606, + "step": 115540 + }, + { + "epoch": 0.2334183106614899, + "grad_norm": 368.62884521484375, + "learning_rate": 9.485521001986961e-06, + "loss": 24.6399, + "step": 115550 + }, + { + "epoch": 0.23343851129417373, + "grad_norm": 1182.2962646484375, + "learning_rate": 9.485366767121363e-06, + "loss": 54.2244, + "step": 115560 + }, + { + "epoch": 0.23345871192685755, + "grad_norm": 276.2958679199219, + "learning_rate": 9.48521251039463e-06, + "loss": 27.9185, + "step": 115570 + }, + { + "epoch": 0.23347891255954137, + "grad_norm": 435.47625732421875, + "learning_rate": 9.485058231807515e-06, + "loss": 21.1052, + "step": 115580 + }, + { + "epoch": 0.2334991131922252, + "grad_norm": 682.1556396484375, + "learning_rate": 9.484903931360772e-06, + "loss": 39.5824, + "step": 115590 + }, + { + "epoch": 0.233519313824909, + "grad_norm": 487.5298156738281, + "learning_rate": 9.484749609055151e-06, + "loss": 24.5813, + "step": 115600 + }, + { + "epoch": 0.2335395144575928, + "grad_norm": 287.06866455078125, + "learning_rate": 9.484595264891403e-06, + "loss": 23.584, + "step": 115610 + }, + { + "epoch": 0.23355971509027662, + "grad_norm": 1571.0631103515625, + "learning_rate": 9.484440898870282e-06, + "loss": 31.6812, + "step": 115620 + }, + { + "epoch": 0.23357991572296044, + "grad_norm": 50.13099670410156, + "learning_rate": 9.484286510992541e-06, + "loss": 17.1068, + "step": 115630 + }, + { + "epoch": 0.23360011635564426, + "grad_norm": 374.7189636230469, + "learning_rate": 9.48413210125893e-06, + "loss": 26.3826, + "step": 115640 + }, + { + "epoch": 0.23362031698832808, + "grad_norm": 165.21685791015625, + "learning_rate": 9.483977669670204e-06, + "loss": 17.2194, + "step": 115650 + }, + { + "epoch": 0.2336405176210119, + "grad_norm": 311.05792236328125, + "learning_rate": 9.483823216227115e-06, + "loss": 29.0539, + "step": 115660 + }, + { + "epoch": 0.2336607182536957, + "grad_norm": 547.1766357421875, + "learning_rate": 9.483668740930413e-06, + "loss": 25.1947, + "step": 115670 + }, + { + "epoch": 0.2336809188863795, + "grad_norm": 82.5296630859375, + "learning_rate": 9.483514243780856e-06, + "loss": 17.5631, + "step": 115680 + }, + { + "epoch": 0.23370111951906333, + "grad_norm": 161.6363525390625, + "learning_rate": 9.483359724779194e-06, + "loss": 35.9948, + "step": 115690 + }, + { + "epoch": 0.23372132015174715, + "grad_norm": 442.1202697753906, + "learning_rate": 9.48320518392618e-06, + "loss": 30.1484, + "step": 115700 + }, + { + "epoch": 0.23374152078443097, + "grad_norm": 950.1380615234375, + "learning_rate": 9.483050621222567e-06, + "loss": 19.3751, + "step": 115710 + }, + { + "epoch": 0.2337617214171148, + "grad_norm": 682.4646606445312, + "learning_rate": 9.482896036669111e-06, + "loss": 50.0991, + "step": 115720 + }, + { + "epoch": 0.2337819220497986, + "grad_norm": 361.4498596191406, + "learning_rate": 9.482741430266563e-06, + "loss": 25.1781, + "step": 115730 + }, + { + "epoch": 0.2338021226824824, + "grad_norm": 712.1513061523438, + "learning_rate": 9.482586802015673e-06, + "loss": 20.1124, + "step": 115740 + }, + { + "epoch": 0.23382232331516623, + "grad_norm": 644.6577758789062, + "learning_rate": 9.482432151917204e-06, + "loss": 30.7717, + "step": 115750 + }, + { + "epoch": 0.23384252394785005, + "grad_norm": 428.7471618652344, + "learning_rate": 9.482277479971902e-06, + "loss": 20.5207, + "step": 115760 + }, + { + "epoch": 0.23386272458053387, + "grad_norm": 1580.9759521484375, + "learning_rate": 9.482122786180524e-06, + "loss": 39.2061, + "step": 115770 + }, + { + "epoch": 0.23388292521321769, + "grad_norm": 819.4335327148438, + "learning_rate": 9.481968070543822e-06, + "loss": 23.9595, + "step": 115780 + }, + { + "epoch": 0.2339031258459015, + "grad_norm": 222.69732666015625, + "learning_rate": 9.48181333306255e-06, + "loss": 22.0195, + "step": 115790 + }, + { + "epoch": 0.2339233264785853, + "grad_norm": 786.6558837890625, + "learning_rate": 9.481658573737465e-06, + "loss": 19.2621, + "step": 115800 + }, + { + "epoch": 0.23394352711126912, + "grad_norm": 129.64132690429688, + "learning_rate": 9.48150379256932e-06, + "loss": 25.3186, + "step": 115810 + }, + { + "epoch": 0.23396372774395294, + "grad_norm": 769.839599609375, + "learning_rate": 9.48134898955887e-06, + "loss": 19.8686, + "step": 115820 + }, + { + "epoch": 0.23398392837663676, + "grad_norm": 260.1893615722656, + "learning_rate": 9.481194164706866e-06, + "loss": 26.4367, + "step": 115830 + }, + { + "epoch": 0.23400412900932058, + "grad_norm": 141.46632385253906, + "learning_rate": 9.481039318014068e-06, + "loss": 20.8942, + "step": 115840 + }, + { + "epoch": 0.2340243296420044, + "grad_norm": 426.383056640625, + "learning_rate": 9.480884449481224e-06, + "loss": 30.8326, + "step": 115850 + }, + { + "epoch": 0.2340445302746882, + "grad_norm": 944.14208984375, + "learning_rate": 9.480729559109096e-06, + "loss": 45.3746, + "step": 115860 + }, + { + "epoch": 0.234064730907372, + "grad_norm": 54.95215606689453, + "learning_rate": 9.480574646898434e-06, + "loss": 13.8032, + "step": 115870 + }, + { + "epoch": 0.23408493154005583, + "grad_norm": 1639.69677734375, + "learning_rate": 9.480419712849996e-06, + "loss": 21.4958, + "step": 115880 + }, + { + "epoch": 0.23410513217273965, + "grad_norm": 222.08889770507812, + "learning_rate": 9.480264756964535e-06, + "loss": 23.5316, + "step": 115890 + }, + { + "epoch": 0.23412533280542347, + "grad_norm": 242.14276123046875, + "learning_rate": 9.480109779242805e-06, + "loss": 16.9658, + "step": 115900 + }, + { + "epoch": 0.2341455334381073, + "grad_norm": 95.41576385498047, + "learning_rate": 9.479954779685566e-06, + "loss": 16.2513, + "step": 115910 + }, + { + "epoch": 0.2341657340707911, + "grad_norm": 364.3272399902344, + "learning_rate": 9.47979975829357e-06, + "loss": 19.5354, + "step": 115920 + }, + { + "epoch": 0.2341859347034749, + "grad_norm": 590.6675415039062, + "learning_rate": 9.479644715067572e-06, + "loss": 16.4956, + "step": 115930 + }, + { + "epoch": 0.23420613533615872, + "grad_norm": 351.85833740234375, + "learning_rate": 9.479489650008331e-06, + "loss": 32.4146, + "step": 115940 + }, + { + "epoch": 0.23422633596884254, + "grad_norm": 371.4349670410156, + "learning_rate": 9.4793345631166e-06, + "loss": 14.2059, + "step": 115950 + }, + { + "epoch": 0.23424653660152636, + "grad_norm": 194.46673583984375, + "learning_rate": 9.479179454393135e-06, + "loss": 15.5061, + "step": 115960 + }, + { + "epoch": 0.23426673723421018, + "grad_norm": 223.16856384277344, + "learning_rate": 9.479024323838694e-06, + "loss": 29.0022, + "step": 115970 + }, + { + "epoch": 0.234286937866894, + "grad_norm": 44.55643081665039, + "learning_rate": 9.478869171454031e-06, + "loss": 16.332, + "step": 115980 + }, + { + "epoch": 0.2343071384995778, + "grad_norm": 376.872314453125, + "learning_rate": 9.478713997239902e-06, + "loss": 33.6666, + "step": 115990 + }, + { + "epoch": 0.23432733913226161, + "grad_norm": 296.7268981933594, + "learning_rate": 9.478558801197065e-06, + "loss": 21.58, + "step": 116000 + }, + { + "epoch": 0.23434753976494543, + "grad_norm": 426.90374755859375, + "learning_rate": 9.478403583326275e-06, + "loss": 16.5128, + "step": 116010 + }, + { + "epoch": 0.23436774039762925, + "grad_norm": 207.19375610351562, + "learning_rate": 9.47824834362829e-06, + "loss": 16.7943, + "step": 116020 + }, + { + "epoch": 0.23438794103031307, + "grad_norm": 73.24336242675781, + "learning_rate": 9.478093082103865e-06, + "loss": 30.4707, + "step": 116030 + }, + { + "epoch": 0.2344081416629969, + "grad_norm": 968.002197265625, + "learning_rate": 9.477937798753757e-06, + "loss": 29.8604, + "step": 116040 + }, + { + "epoch": 0.23442834229568071, + "grad_norm": 612.5117797851562, + "learning_rate": 9.477782493578724e-06, + "loss": 14.9429, + "step": 116050 + }, + { + "epoch": 0.2344485429283645, + "grad_norm": 384.3153076171875, + "learning_rate": 9.477627166579523e-06, + "loss": 26.4373, + "step": 116060 + }, + { + "epoch": 0.23446874356104833, + "grad_norm": 462.5488586425781, + "learning_rate": 9.47747181775691e-06, + "loss": 37.0143, + "step": 116070 + }, + { + "epoch": 0.23448894419373215, + "grad_norm": 1343.555908203125, + "learning_rate": 9.477316447111642e-06, + "loss": 29.2987, + "step": 116080 + }, + { + "epoch": 0.23450914482641597, + "grad_norm": 382.63250732421875, + "learning_rate": 9.477161054644475e-06, + "loss": 18.1374, + "step": 116090 + }, + { + "epoch": 0.2345293454590998, + "grad_norm": 262.3546447753906, + "learning_rate": 9.47700564035617e-06, + "loss": 26.7119, + "step": 116100 + }, + { + "epoch": 0.2345495460917836, + "grad_norm": 261.574951171875, + "learning_rate": 9.476850204247483e-06, + "loss": 43.5217, + "step": 116110 + }, + { + "epoch": 0.2345697467244674, + "grad_norm": 491.40167236328125, + "learning_rate": 9.47669474631917e-06, + "loss": 18.6674, + "step": 116120 + }, + { + "epoch": 0.23458994735715122, + "grad_norm": 467.8964538574219, + "learning_rate": 9.476539266571988e-06, + "loss": 21.3494, + "step": 116130 + }, + { + "epoch": 0.23461014798983504, + "grad_norm": 394.40240478515625, + "learning_rate": 9.4763837650067e-06, + "loss": 32.0296, + "step": 116140 + }, + { + "epoch": 0.23463034862251886, + "grad_norm": 301.9280090332031, + "learning_rate": 9.476228241624059e-06, + "loss": 28.1887, + "step": 116150 + }, + { + "epoch": 0.23465054925520268, + "grad_norm": 350.1358947753906, + "learning_rate": 9.476072696424825e-06, + "loss": 29.1411, + "step": 116160 + }, + { + "epoch": 0.2346707498878865, + "grad_norm": 184.35031127929688, + "learning_rate": 9.475917129409755e-06, + "loss": 24.4947, + "step": 116170 + }, + { + "epoch": 0.2346909505205703, + "grad_norm": 555.5595703125, + "learning_rate": 9.475761540579607e-06, + "loss": 24.8404, + "step": 116180 + }, + { + "epoch": 0.2347111511532541, + "grad_norm": 301.1944885253906, + "learning_rate": 9.475605929935142e-06, + "loss": 39.6237, + "step": 116190 + }, + { + "epoch": 0.23473135178593793, + "grad_norm": 88.20915985107422, + "learning_rate": 9.475450297477113e-06, + "loss": 19.002, + "step": 116200 + }, + { + "epoch": 0.23475155241862175, + "grad_norm": 466.5421447753906, + "learning_rate": 9.475294643206285e-06, + "loss": 15.9766, + "step": 116210 + }, + { + "epoch": 0.23477175305130557, + "grad_norm": 6.774113655090332, + "learning_rate": 9.475138967123414e-06, + "loss": 12.3952, + "step": 116220 + }, + { + "epoch": 0.2347919536839894, + "grad_norm": 230.72752380371094, + "learning_rate": 9.474983269229256e-06, + "loss": 12.4458, + "step": 116230 + }, + { + "epoch": 0.2348121543166732, + "grad_norm": 268.2113037109375, + "learning_rate": 9.474827549524574e-06, + "loss": 14.7699, + "step": 116240 + }, + { + "epoch": 0.234832354949357, + "grad_norm": 178.18630981445312, + "learning_rate": 9.474671808010126e-06, + "loss": 13.323, + "step": 116250 + }, + { + "epoch": 0.23485255558204082, + "grad_norm": 458.60760498046875, + "learning_rate": 9.47451604468667e-06, + "loss": 19.6697, + "step": 116260 + }, + { + "epoch": 0.23487275621472464, + "grad_norm": 317.4723205566406, + "learning_rate": 9.474360259554965e-06, + "loss": 22.8527, + "step": 116270 + }, + { + "epoch": 0.23489295684740846, + "grad_norm": 272.6497802734375, + "learning_rate": 9.47420445261577e-06, + "loss": 26.309, + "step": 116280 + }, + { + "epoch": 0.23491315748009228, + "grad_norm": 630.2426147460938, + "learning_rate": 9.474048623869846e-06, + "loss": 13.5902, + "step": 116290 + }, + { + "epoch": 0.2349333581127761, + "grad_norm": 2414.823486328125, + "learning_rate": 9.473892773317952e-06, + "loss": 33.2801, + "step": 116300 + }, + { + "epoch": 0.2349535587454599, + "grad_norm": 948.0327758789062, + "learning_rate": 9.473736900960845e-06, + "loss": 26.3757, + "step": 116310 + }, + { + "epoch": 0.23497375937814372, + "grad_norm": 824.4536743164062, + "learning_rate": 9.47358100679929e-06, + "loss": 20.7127, + "step": 116320 + }, + { + "epoch": 0.23499396001082754, + "grad_norm": 274.6886901855469, + "learning_rate": 9.473425090834041e-06, + "loss": 28.9095, + "step": 116330 + }, + { + "epoch": 0.23501416064351136, + "grad_norm": 557.03466796875, + "learning_rate": 9.473269153065863e-06, + "loss": 34.9061, + "step": 116340 + }, + { + "epoch": 0.23503436127619518, + "grad_norm": 184.921142578125, + "learning_rate": 9.473113193495513e-06, + "loss": 24.6152, + "step": 116350 + }, + { + "epoch": 0.235054561908879, + "grad_norm": 229.9757843017578, + "learning_rate": 9.472957212123751e-06, + "loss": 10.6152, + "step": 116360 + }, + { + "epoch": 0.23507476254156282, + "grad_norm": 450.23809814453125, + "learning_rate": 9.472801208951339e-06, + "loss": 17.6649, + "step": 116370 + }, + { + "epoch": 0.2350949631742466, + "grad_norm": 337.5602722167969, + "learning_rate": 9.472645183979037e-06, + "loss": 25.5706, + "step": 116380 + }, + { + "epoch": 0.23511516380693043, + "grad_norm": 638.6205444335938, + "learning_rate": 9.472489137207604e-06, + "loss": 17.8359, + "step": 116390 + }, + { + "epoch": 0.23513536443961425, + "grad_norm": 721.173583984375, + "learning_rate": 9.4723330686378e-06, + "loss": 23.6118, + "step": 116400 + }, + { + "epoch": 0.23515556507229807, + "grad_norm": 544.9619140625, + "learning_rate": 9.472176978270389e-06, + "loss": 25.2253, + "step": 116410 + }, + { + "epoch": 0.2351757657049819, + "grad_norm": 738.7373657226562, + "learning_rate": 9.472020866106128e-06, + "loss": 34.294, + "step": 116420 + }, + { + "epoch": 0.2351959663376657, + "grad_norm": 765.0400390625, + "learning_rate": 9.47186473214578e-06, + "loss": 20.823, + "step": 116430 + }, + { + "epoch": 0.2352161669703495, + "grad_norm": 480.0275573730469, + "learning_rate": 9.471708576390108e-06, + "loss": 22.5498, + "step": 116440 + }, + { + "epoch": 0.23523636760303332, + "grad_norm": 566.5287475585938, + "learning_rate": 9.47155239883987e-06, + "loss": 21.7083, + "step": 116450 + }, + { + "epoch": 0.23525656823571714, + "grad_norm": 627.3939208984375, + "learning_rate": 9.471396199495825e-06, + "loss": 22.2768, + "step": 116460 + }, + { + "epoch": 0.23527676886840096, + "grad_norm": 845.6305541992188, + "learning_rate": 9.471239978358741e-06, + "loss": 25.5389, + "step": 116470 + }, + { + "epoch": 0.23529696950108478, + "grad_norm": 421.12469482421875, + "learning_rate": 9.471083735429374e-06, + "loss": 43.7774, + "step": 116480 + }, + { + "epoch": 0.2353171701337686, + "grad_norm": 229.1207733154297, + "learning_rate": 9.470927470708486e-06, + "loss": 52.3167, + "step": 116490 + }, + { + "epoch": 0.2353373707664524, + "grad_norm": 416.5636901855469, + "learning_rate": 9.470771184196842e-06, + "loss": 65.1665, + "step": 116500 + }, + { + "epoch": 0.2353575713991362, + "grad_norm": 441.8174743652344, + "learning_rate": 9.4706148758952e-06, + "loss": 18.8124, + "step": 116510 + }, + { + "epoch": 0.23537777203182003, + "grad_norm": 510.05340576171875, + "learning_rate": 9.470458545804325e-06, + "loss": 34.6727, + "step": 116520 + }, + { + "epoch": 0.23539797266450385, + "grad_norm": 434.70062255859375, + "learning_rate": 9.470302193924975e-06, + "loss": 22.0199, + "step": 116530 + }, + { + "epoch": 0.23541817329718767, + "grad_norm": 411.9683837890625, + "learning_rate": 9.470145820257915e-06, + "loss": 32.3334, + "step": 116540 + }, + { + "epoch": 0.2354383739298715, + "grad_norm": 462.7821350097656, + "learning_rate": 9.469989424803907e-06, + "loss": 12.0787, + "step": 116550 + }, + { + "epoch": 0.2354585745625553, + "grad_norm": 46.51176834106445, + "learning_rate": 9.469833007563712e-06, + "loss": 14.8491, + "step": 116560 + }, + { + "epoch": 0.2354787751952391, + "grad_norm": 171.05972290039062, + "learning_rate": 9.469676568538094e-06, + "loss": 31.8924, + "step": 116570 + }, + { + "epoch": 0.23549897582792292, + "grad_norm": 293.23077392578125, + "learning_rate": 9.469520107727815e-06, + "loss": 24.2429, + "step": 116580 + }, + { + "epoch": 0.23551917646060674, + "grad_norm": 632.8076782226562, + "learning_rate": 9.469363625133634e-06, + "loss": 15.9344, + "step": 116590 + }, + { + "epoch": 0.23553937709329056, + "grad_norm": 186.47857666015625, + "learning_rate": 9.46920712075632e-06, + "loss": 31.9356, + "step": 116600 + }, + { + "epoch": 0.23555957772597438, + "grad_norm": 368.3692626953125, + "learning_rate": 9.469050594596631e-06, + "loss": 15.9615, + "step": 116610 + }, + { + "epoch": 0.2355797783586582, + "grad_norm": 337.5687255859375, + "learning_rate": 9.468894046655332e-06, + "loss": 22.172, + "step": 116620 + }, + { + "epoch": 0.235599978991342, + "grad_norm": 372.5582580566406, + "learning_rate": 9.468737476933186e-06, + "loss": 17.3395, + "step": 116630 + }, + { + "epoch": 0.23562017962402582, + "grad_norm": 458.6085205078125, + "learning_rate": 9.468580885430953e-06, + "loss": 17.8977, + "step": 116640 + }, + { + "epoch": 0.23564038025670964, + "grad_norm": 527.5307006835938, + "learning_rate": 9.468424272149402e-06, + "loss": 31.6, + "step": 116650 + }, + { + "epoch": 0.23566058088939346, + "grad_norm": 0.0, + "learning_rate": 9.46826763708929e-06, + "loss": 15.7834, + "step": 116660 + }, + { + "epoch": 0.23568078152207728, + "grad_norm": 751.5640258789062, + "learning_rate": 9.468110980251386e-06, + "loss": 25.4928, + "step": 116670 + }, + { + "epoch": 0.2357009821547611, + "grad_norm": 623.1112060546875, + "learning_rate": 9.467954301636451e-06, + "loss": 24.9689, + "step": 116680 + }, + { + "epoch": 0.23572118278744492, + "grad_norm": 209.68223571777344, + "learning_rate": 9.467797601245246e-06, + "loss": 21.5656, + "step": 116690 + }, + { + "epoch": 0.2357413834201287, + "grad_norm": 492.0634460449219, + "learning_rate": 9.46764087907854e-06, + "loss": 16.8456, + "step": 116700 + }, + { + "epoch": 0.23576158405281253, + "grad_norm": 536.7673950195312, + "learning_rate": 9.467484135137093e-06, + "loss": 32.4083, + "step": 116710 + }, + { + "epoch": 0.23578178468549635, + "grad_norm": 1734.8011474609375, + "learning_rate": 9.46732736942167e-06, + "loss": 30.417, + "step": 116720 + }, + { + "epoch": 0.23580198531818017, + "grad_norm": 538.3679809570312, + "learning_rate": 9.467170581933037e-06, + "loss": 31.2273, + "step": 116730 + }, + { + "epoch": 0.235822185950864, + "grad_norm": 232.93023681640625, + "learning_rate": 9.467013772671953e-06, + "loss": 12.2296, + "step": 116740 + }, + { + "epoch": 0.2358423865835478, + "grad_norm": 820.4888305664062, + "learning_rate": 9.46685694163919e-06, + "loss": 20.1756, + "step": 116750 + }, + { + "epoch": 0.2358625872162316, + "grad_norm": 378.5989990234375, + "learning_rate": 9.466700088835505e-06, + "loss": 24.1628, + "step": 116760 + }, + { + "epoch": 0.23588278784891542, + "grad_norm": 0.0, + "learning_rate": 9.466543214261666e-06, + "loss": 19.2676, + "step": 116770 + }, + { + "epoch": 0.23590298848159924, + "grad_norm": 143.71340942382812, + "learning_rate": 9.466386317918436e-06, + "loss": 19.0917, + "step": 116780 + }, + { + "epoch": 0.23592318911428306, + "grad_norm": 734.0843505859375, + "learning_rate": 9.466229399806583e-06, + "loss": 24.1006, + "step": 116790 + }, + { + "epoch": 0.23594338974696688, + "grad_norm": 654.5097045898438, + "learning_rate": 9.46607245992687e-06, + "loss": 30.1238, + "step": 116800 + }, + { + "epoch": 0.2359635903796507, + "grad_norm": 198.99844360351562, + "learning_rate": 9.465915498280058e-06, + "loss": 18.8972, + "step": 116810 + }, + { + "epoch": 0.2359837910123345, + "grad_norm": 635.9158935546875, + "learning_rate": 9.465758514866919e-06, + "loss": 33.3588, + "step": 116820 + }, + { + "epoch": 0.2360039916450183, + "grad_norm": 253.74960327148438, + "learning_rate": 9.465601509688212e-06, + "loss": 21.6923, + "step": 116830 + }, + { + "epoch": 0.23602419227770213, + "grad_norm": 268.11517333984375, + "learning_rate": 9.465444482744708e-06, + "loss": 20.9421, + "step": 116840 + }, + { + "epoch": 0.23604439291038595, + "grad_norm": 155.50680541992188, + "learning_rate": 9.465287434037167e-06, + "loss": 21.0103, + "step": 116850 + }, + { + "epoch": 0.23606459354306977, + "grad_norm": 969.3081665039062, + "learning_rate": 9.465130363566357e-06, + "loss": 24.801, + "step": 116860 + }, + { + "epoch": 0.2360847941757536, + "grad_norm": 793.8674926757812, + "learning_rate": 9.464973271333042e-06, + "loss": 15.4893, + "step": 116870 + }, + { + "epoch": 0.2361049948084374, + "grad_norm": 338.47918701171875, + "learning_rate": 9.464816157337991e-06, + "loss": 29.3621, + "step": 116880 + }, + { + "epoch": 0.2361251954411212, + "grad_norm": 641.8385009765625, + "learning_rate": 9.464659021581966e-06, + "loss": 21.0026, + "step": 116890 + }, + { + "epoch": 0.23614539607380503, + "grad_norm": 145.12539672851562, + "learning_rate": 9.464501864065735e-06, + "loss": 20.4296, + "step": 116900 + }, + { + "epoch": 0.23616559670648885, + "grad_norm": 417.63482666015625, + "learning_rate": 9.464344684790063e-06, + "loss": 27.0664, + "step": 116910 + }, + { + "epoch": 0.23618579733917267, + "grad_norm": 582.3400268554688, + "learning_rate": 9.464187483755718e-06, + "loss": 33.1719, + "step": 116920 + }, + { + "epoch": 0.23620599797185649, + "grad_norm": 356.63433837890625, + "learning_rate": 9.464030260963463e-06, + "loss": 16.0722, + "step": 116930 + }, + { + "epoch": 0.2362261986045403, + "grad_norm": 431.3008728027344, + "learning_rate": 9.463873016414066e-06, + "loss": 18.8018, + "step": 116940 + }, + { + "epoch": 0.2362463992372241, + "grad_norm": 230.9427490234375, + "learning_rate": 9.463715750108293e-06, + "loss": 19.6771, + "step": 116950 + }, + { + "epoch": 0.23626659986990792, + "grad_norm": 4518.6083984375, + "learning_rate": 9.463558462046912e-06, + "loss": 30.278, + "step": 116960 + }, + { + "epoch": 0.23628680050259174, + "grad_norm": 729.4716796875, + "learning_rate": 9.463401152230688e-06, + "loss": 18.7407, + "step": 116970 + }, + { + "epoch": 0.23630700113527556, + "grad_norm": 413.8880920410156, + "learning_rate": 9.463243820660389e-06, + "loss": 23.859, + "step": 116980 + }, + { + "epoch": 0.23632720176795938, + "grad_norm": 14.609228134155273, + "learning_rate": 9.463086467336779e-06, + "loss": 22.6864, + "step": 116990 + }, + { + "epoch": 0.2363474024006432, + "grad_norm": 275.2424011230469, + "learning_rate": 9.46292909226063e-06, + "loss": 20.4147, + "step": 117000 + }, + { + "epoch": 0.23636760303332702, + "grad_norm": 254.34274291992188, + "learning_rate": 9.462771695432702e-06, + "loss": 15.3926, + "step": 117010 + }, + { + "epoch": 0.2363878036660108, + "grad_norm": 345.982421875, + "learning_rate": 9.462614276853767e-06, + "loss": 31.9936, + "step": 117020 + }, + { + "epoch": 0.23640800429869463, + "grad_norm": 89.97821807861328, + "learning_rate": 9.462456836524593e-06, + "loss": 24.8761, + "step": 117030 + }, + { + "epoch": 0.23642820493137845, + "grad_norm": 177.04977416992188, + "learning_rate": 9.462299374445944e-06, + "loss": 21.204, + "step": 117040 + }, + { + "epoch": 0.23644840556406227, + "grad_norm": 456.4011535644531, + "learning_rate": 9.46214189061859e-06, + "loss": 35.3761, + "step": 117050 + }, + { + "epoch": 0.2364686061967461, + "grad_norm": 292.1644287109375, + "learning_rate": 9.461984385043297e-06, + "loss": 20.2125, + "step": 117060 + }, + { + "epoch": 0.2364888068294299, + "grad_norm": 743.0880126953125, + "learning_rate": 9.461826857720835e-06, + "loss": 34.9542, + "step": 117070 + }, + { + "epoch": 0.2365090074621137, + "grad_norm": 0.0, + "learning_rate": 9.461669308651968e-06, + "loss": 25.9587, + "step": 117080 + }, + { + "epoch": 0.23652920809479752, + "grad_norm": 336.8008117675781, + "learning_rate": 9.461511737837467e-06, + "loss": 17.3046, + "step": 117090 + }, + { + "epoch": 0.23654940872748134, + "grad_norm": 651.3720092773438, + "learning_rate": 9.461354145278098e-06, + "loss": 23.1151, + "step": 117100 + }, + { + "epoch": 0.23656960936016516, + "grad_norm": 353.6770935058594, + "learning_rate": 9.46119653097463e-06, + "loss": 20.6156, + "step": 117110 + }, + { + "epoch": 0.23658980999284898, + "grad_norm": 910.6161499023438, + "learning_rate": 9.461038894927833e-06, + "loss": 36.3535, + "step": 117120 + }, + { + "epoch": 0.2366100106255328, + "grad_norm": 339.4697265625, + "learning_rate": 9.460881237138472e-06, + "loss": 13.3867, + "step": 117130 + }, + { + "epoch": 0.2366302112582166, + "grad_norm": 675.8087158203125, + "learning_rate": 9.460723557607317e-06, + "loss": 29.5929, + "step": 117140 + }, + { + "epoch": 0.23665041189090041, + "grad_norm": 351.864501953125, + "learning_rate": 9.460565856335136e-06, + "loss": 22.8608, + "step": 117150 + }, + { + "epoch": 0.23667061252358423, + "grad_norm": 287.1529541015625, + "learning_rate": 9.460408133322698e-06, + "loss": 37.3685, + "step": 117160 + }, + { + "epoch": 0.23669081315626805, + "grad_norm": 905.3443603515625, + "learning_rate": 9.460250388570772e-06, + "loss": 51.5777, + "step": 117170 + }, + { + "epoch": 0.23671101378895187, + "grad_norm": 815.42626953125, + "learning_rate": 9.460092622080128e-06, + "loss": 40.0098, + "step": 117180 + }, + { + "epoch": 0.2367312144216357, + "grad_norm": 6.8798604011535645, + "learning_rate": 9.459934833851531e-06, + "loss": 15.9156, + "step": 117190 + }, + { + "epoch": 0.23675141505431951, + "grad_norm": 303.4665222167969, + "learning_rate": 9.459777023885754e-06, + "loss": 32.0095, + "step": 117200 + }, + { + "epoch": 0.2367716156870033, + "grad_norm": 525.3986206054688, + "learning_rate": 9.459619192183565e-06, + "loss": 28.3864, + "step": 117210 + }, + { + "epoch": 0.23679181631968713, + "grad_norm": 805.0297241210938, + "learning_rate": 9.459461338745733e-06, + "loss": 12.2511, + "step": 117220 + }, + { + "epoch": 0.23681201695237095, + "grad_norm": 108.30651092529297, + "learning_rate": 9.459303463573027e-06, + "loss": 29.9991, + "step": 117230 + }, + { + "epoch": 0.23683221758505477, + "grad_norm": 545.5927734375, + "learning_rate": 9.459145566666216e-06, + "loss": 27.7245, + "step": 117240 + }, + { + "epoch": 0.2368524182177386, + "grad_norm": 832.1116333007812, + "learning_rate": 9.458987648026071e-06, + "loss": 17.0151, + "step": 117250 + }, + { + "epoch": 0.2368726188504224, + "grad_norm": 125.13150024414062, + "learning_rate": 9.458829707653362e-06, + "loss": 33.1239, + "step": 117260 + }, + { + "epoch": 0.2368928194831062, + "grad_norm": 186.43370056152344, + "learning_rate": 9.458671745548855e-06, + "loss": 16.7115, + "step": 117270 + }, + { + "epoch": 0.23691302011579002, + "grad_norm": 708.531005859375, + "learning_rate": 9.458513761713324e-06, + "loss": 21.2725, + "step": 117280 + }, + { + "epoch": 0.23693322074847384, + "grad_norm": 810.84375, + "learning_rate": 9.45835575614754e-06, + "loss": 29.8557, + "step": 117290 + }, + { + "epoch": 0.23695342138115766, + "grad_norm": 277.344970703125, + "learning_rate": 9.458197728852268e-06, + "loss": 16.7018, + "step": 117300 + }, + { + "epoch": 0.23697362201384148, + "grad_norm": 448.7570495605469, + "learning_rate": 9.458039679828281e-06, + "loss": 24.4709, + "step": 117310 + }, + { + "epoch": 0.2369938226465253, + "grad_norm": 330.569091796875, + "learning_rate": 9.457881609076352e-06, + "loss": 16.9305, + "step": 117320 + }, + { + "epoch": 0.23701402327920912, + "grad_norm": 63.78606414794922, + "learning_rate": 9.457723516597247e-06, + "loss": 27.3925, + "step": 117330 + }, + { + "epoch": 0.2370342239118929, + "grad_norm": 265.64031982421875, + "learning_rate": 9.457565402391738e-06, + "loss": 8.668, + "step": 117340 + }, + { + "epoch": 0.23705442454457673, + "grad_norm": 503.2449645996094, + "learning_rate": 9.457407266460595e-06, + "loss": 23.1339, + "step": 117350 + }, + { + "epoch": 0.23707462517726055, + "grad_norm": 528.9575805664062, + "learning_rate": 9.45724910880459e-06, + "loss": 24.2766, + "step": 117360 + }, + { + "epoch": 0.23709482580994437, + "grad_norm": 511.1745910644531, + "learning_rate": 9.457090929424495e-06, + "loss": 27.5194, + "step": 117370 + }, + { + "epoch": 0.2371150264426282, + "grad_norm": 244.69229125976562, + "learning_rate": 9.456932728321078e-06, + "loss": 17.6802, + "step": 117380 + }, + { + "epoch": 0.237135227075312, + "grad_norm": 433.9642028808594, + "learning_rate": 9.456774505495112e-06, + "loss": 11.6906, + "step": 117390 + }, + { + "epoch": 0.2371554277079958, + "grad_norm": 514.8110961914062, + "learning_rate": 9.456616260947367e-06, + "loss": 20.3018, + "step": 117400 + }, + { + "epoch": 0.23717562834067962, + "grad_norm": 294.82403564453125, + "learning_rate": 9.456457994678616e-06, + "loss": 19.9833, + "step": 117410 + }, + { + "epoch": 0.23719582897336344, + "grad_norm": 672.5086669921875, + "learning_rate": 9.456299706689627e-06, + "loss": 31.9594, + "step": 117420 + }, + { + "epoch": 0.23721602960604726, + "grad_norm": 38.29913330078125, + "learning_rate": 9.456141396981176e-06, + "loss": 19.7459, + "step": 117430 + }, + { + "epoch": 0.23723623023873108, + "grad_norm": 317.1351318359375, + "learning_rate": 9.455983065554032e-06, + "loss": 25.9537, + "step": 117440 + }, + { + "epoch": 0.2372564308714149, + "grad_norm": 591.7095947265625, + "learning_rate": 9.455824712408967e-06, + "loss": 25.0482, + "step": 117450 + }, + { + "epoch": 0.2372766315040987, + "grad_norm": 1024.886962890625, + "learning_rate": 9.455666337546751e-06, + "loss": 53.5332, + "step": 117460 + }, + { + "epoch": 0.23729683213678252, + "grad_norm": 530.8159790039062, + "learning_rate": 9.45550794096816e-06, + "loss": 19.6555, + "step": 117470 + }, + { + "epoch": 0.23731703276946634, + "grad_norm": 665.9408569335938, + "learning_rate": 9.455349522673962e-06, + "loss": 39.3288, + "step": 117480 + }, + { + "epoch": 0.23733723340215016, + "grad_norm": 522.2376098632812, + "learning_rate": 9.455191082664931e-06, + "loss": 20.3337, + "step": 117490 + }, + { + "epoch": 0.23735743403483398, + "grad_norm": 383.4013977050781, + "learning_rate": 9.45503262094184e-06, + "loss": 29.2384, + "step": 117500 + }, + { + "epoch": 0.2373776346675178, + "grad_norm": 446.2220153808594, + "learning_rate": 9.45487413750546e-06, + "loss": 33.0276, + "step": 117510 + }, + { + "epoch": 0.23739783530020162, + "grad_norm": 641.8443603515625, + "learning_rate": 9.454715632356564e-06, + "loss": 23.3312, + "step": 117520 + }, + { + "epoch": 0.2374180359328854, + "grad_norm": 696.3156127929688, + "learning_rate": 9.454557105495922e-06, + "loss": 21.8034, + "step": 117530 + }, + { + "epoch": 0.23743823656556923, + "grad_norm": 398.4124450683594, + "learning_rate": 9.454398556924312e-06, + "loss": 29.9253, + "step": 117540 + }, + { + "epoch": 0.23745843719825305, + "grad_norm": 1015.231689453125, + "learning_rate": 9.454239986642503e-06, + "loss": 20.2801, + "step": 117550 + }, + { + "epoch": 0.23747863783093687, + "grad_norm": 324.8863525390625, + "learning_rate": 9.454081394651267e-06, + "loss": 15.2749, + "step": 117560 + }, + { + "epoch": 0.2374988384636207, + "grad_norm": 670.9252319335938, + "learning_rate": 9.453922780951382e-06, + "loss": 22.9192, + "step": 117570 + }, + { + "epoch": 0.2375190390963045, + "grad_norm": 64.24264526367188, + "learning_rate": 9.453764145543614e-06, + "loss": 24.5582, + "step": 117580 + }, + { + "epoch": 0.2375392397289883, + "grad_norm": 348.1638488769531, + "learning_rate": 9.453605488428741e-06, + "loss": 15.0601, + "step": 117590 + }, + { + "epoch": 0.23755944036167212, + "grad_norm": 530.2588500976562, + "learning_rate": 9.453446809607534e-06, + "loss": 32.7515, + "step": 117600 + }, + { + "epoch": 0.23757964099435594, + "grad_norm": 1041.0078125, + "learning_rate": 9.453288109080768e-06, + "loss": 29.4589, + "step": 117610 + }, + { + "epoch": 0.23759984162703976, + "grad_norm": 236.34274291992188, + "learning_rate": 9.453129386849216e-06, + "loss": 29.0242, + "step": 117620 + }, + { + "epoch": 0.23762004225972358, + "grad_norm": 474.6619873046875, + "learning_rate": 9.452970642913652e-06, + "loss": 13.05, + "step": 117630 + }, + { + "epoch": 0.2376402428924074, + "grad_norm": 379.6215515136719, + "learning_rate": 9.452811877274848e-06, + "loss": 15.9084, + "step": 117640 + }, + { + "epoch": 0.23766044352509122, + "grad_norm": 482.5552673339844, + "learning_rate": 9.45265308993358e-06, + "loss": 14.5373, + "step": 117650 + }, + { + "epoch": 0.237680644157775, + "grad_norm": 498.3376159667969, + "learning_rate": 9.452494280890621e-06, + "loss": 37.3605, + "step": 117660 + }, + { + "epoch": 0.23770084479045883, + "grad_norm": 502.6356201171875, + "learning_rate": 9.452335450146744e-06, + "loss": 24.045, + "step": 117670 + }, + { + "epoch": 0.23772104542314265, + "grad_norm": 480.24542236328125, + "learning_rate": 9.452176597702724e-06, + "loss": 26.7962, + "step": 117680 + }, + { + "epoch": 0.23774124605582647, + "grad_norm": 397.1744079589844, + "learning_rate": 9.452017723559337e-06, + "loss": 24.1105, + "step": 117690 + }, + { + "epoch": 0.2377614466885103, + "grad_norm": 0.5460114479064941, + "learning_rate": 9.451858827717354e-06, + "loss": 24.0886, + "step": 117700 + }, + { + "epoch": 0.2377816473211941, + "grad_norm": 29.42403221130371, + "learning_rate": 9.451699910177551e-06, + "loss": 23.887, + "step": 117710 + }, + { + "epoch": 0.2378018479538779, + "grad_norm": 366.1441955566406, + "learning_rate": 9.451540970940703e-06, + "loss": 14.9171, + "step": 117720 + }, + { + "epoch": 0.23782204858656172, + "grad_norm": 42.585025787353516, + "learning_rate": 9.451382010007584e-06, + "loss": 26.0252, + "step": 117730 + }, + { + "epoch": 0.23784224921924554, + "grad_norm": 572.30419921875, + "learning_rate": 9.45122302737897e-06, + "loss": 22.0163, + "step": 117740 + }, + { + "epoch": 0.23786244985192936, + "grad_norm": 498.8094787597656, + "learning_rate": 9.451064023055634e-06, + "loss": 18.0851, + "step": 117750 + }, + { + "epoch": 0.23788265048461318, + "grad_norm": 382.15118408203125, + "learning_rate": 9.450904997038351e-06, + "loss": 20.9569, + "step": 117760 + }, + { + "epoch": 0.237902851117297, + "grad_norm": 671.4730834960938, + "learning_rate": 9.450745949327897e-06, + "loss": 15.1307, + "step": 117770 + }, + { + "epoch": 0.2379230517499808, + "grad_norm": 457.44891357421875, + "learning_rate": 9.450586879925048e-06, + "loss": 17.5412, + "step": 117780 + }, + { + "epoch": 0.23794325238266462, + "grad_norm": 503.4642333984375, + "learning_rate": 9.450427788830578e-06, + "loss": 20.7526, + "step": 117790 + }, + { + "epoch": 0.23796345301534844, + "grad_norm": 639.3961181640625, + "learning_rate": 9.450268676045261e-06, + "loss": 31.7642, + "step": 117800 + }, + { + "epoch": 0.23798365364803226, + "grad_norm": 872.4010620117188, + "learning_rate": 9.450109541569879e-06, + "loss": 25.558, + "step": 117810 + }, + { + "epoch": 0.23800385428071608, + "grad_norm": 937.5293579101562, + "learning_rate": 9.4499503854052e-06, + "loss": 25.0143, + "step": 117820 + }, + { + "epoch": 0.2380240549133999, + "grad_norm": 587.4414672851562, + "learning_rate": 9.449791207552001e-06, + "loss": 15.5493, + "step": 117830 + }, + { + "epoch": 0.23804425554608372, + "grad_norm": 441.7513122558594, + "learning_rate": 9.44963200801106e-06, + "loss": 14.2472, + "step": 117840 + }, + { + "epoch": 0.2380644561787675, + "grad_norm": 603.2429809570312, + "learning_rate": 9.449472786783153e-06, + "loss": 22.1237, + "step": 117850 + }, + { + "epoch": 0.23808465681145133, + "grad_norm": 518.7314453125, + "learning_rate": 9.449313543869056e-06, + "loss": 28.391, + "step": 117860 + }, + { + "epoch": 0.23810485744413515, + "grad_norm": 611.9623413085938, + "learning_rate": 9.449154279269543e-06, + "loss": 25.6668, + "step": 117870 + }, + { + "epoch": 0.23812505807681897, + "grad_norm": 445.8013000488281, + "learning_rate": 9.448994992985393e-06, + "loss": 16.1492, + "step": 117880 + }, + { + "epoch": 0.2381452587095028, + "grad_norm": 466.94317626953125, + "learning_rate": 9.44883568501738e-06, + "loss": 12.4663, + "step": 117890 + }, + { + "epoch": 0.2381654593421866, + "grad_norm": 209.4217529296875, + "learning_rate": 9.448676355366282e-06, + "loss": 26.3823, + "step": 117900 + }, + { + "epoch": 0.2381856599748704, + "grad_norm": 551.5216674804688, + "learning_rate": 9.448517004032876e-06, + "loss": 14.8769, + "step": 117910 + }, + { + "epoch": 0.23820586060755422, + "grad_norm": 620.2489013671875, + "learning_rate": 9.448357631017934e-06, + "loss": 20.2437, + "step": 117920 + }, + { + "epoch": 0.23822606124023804, + "grad_norm": 348.79095458984375, + "learning_rate": 9.44819823632224e-06, + "loss": 23.0824, + "step": 117930 + }, + { + "epoch": 0.23824626187292186, + "grad_norm": 201.4728546142578, + "learning_rate": 9.448038819946566e-06, + "loss": 16.3949, + "step": 117940 + }, + { + "epoch": 0.23826646250560568, + "grad_norm": 835.5105590820312, + "learning_rate": 9.447879381891691e-06, + "loss": 34.25, + "step": 117950 + }, + { + "epoch": 0.2382866631382895, + "grad_norm": 349.479736328125, + "learning_rate": 9.447719922158391e-06, + "loss": 15.4773, + "step": 117960 + }, + { + "epoch": 0.23830686377097332, + "grad_norm": 626.60205078125, + "learning_rate": 9.447560440747443e-06, + "loss": 31.5524, + "step": 117970 + }, + { + "epoch": 0.2383270644036571, + "grad_norm": 294.943359375, + "learning_rate": 9.447400937659625e-06, + "loss": 48.2904, + "step": 117980 + }, + { + "epoch": 0.23834726503634093, + "grad_norm": 209.84666442871094, + "learning_rate": 9.447241412895714e-06, + "loss": 23.8491, + "step": 117990 + }, + { + "epoch": 0.23836746566902475, + "grad_norm": 969.4122924804688, + "learning_rate": 9.44708186645649e-06, + "loss": 37.3985, + "step": 118000 + }, + { + "epoch": 0.23838766630170857, + "grad_norm": 94.50203704833984, + "learning_rate": 9.446922298342725e-06, + "loss": 27.4426, + "step": 118010 + }, + { + "epoch": 0.2384078669343924, + "grad_norm": 392.3721618652344, + "learning_rate": 9.446762708555202e-06, + "loss": 21.8015, + "step": 118020 + }, + { + "epoch": 0.2384280675670762, + "grad_norm": 428.6141052246094, + "learning_rate": 9.446603097094696e-06, + "loss": 26.3162, + "step": 118030 + }, + { + "epoch": 0.23844826819976, + "grad_norm": 472.3879699707031, + "learning_rate": 9.446443463961986e-06, + "loss": 10.2355, + "step": 118040 + }, + { + "epoch": 0.23846846883244383, + "grad_norm": 272.6233215332031, + "learning_rate": 9.44628380915785e-06, + "loss": 7.925, + "step": 118050 + }, + { + "epoch": 0.23848866946512765, + "grad_norm": 1400.2432861328125, + "learning_rate": 9.446124132683066e-06, + "loss": 30.2269, + "step": 118060 + }, + { + "epoch": 0.23850887009781147, + "grad_norm": 491.30865478515625, + "learning_rate": 9.445964434538412e-06, + "loss": 26.4811, + "step": 118070 + }, + { + "epoch": 0.23852907073049529, + "grad_norm": 474.2497863769531, + "learning_rate": 9.445804714724667e-06, + "loss": 30.6656, + "step": 118080 + }, + { + "epoch": 0.2385492713631791, + "grad_norm": 9.611652374267578, + "learning_rate": 9.44564497324261e-06, + "loss": 28.9517, + "step": 118090 + }, + { + "epoch": 0.2385694719958629, + "grad_norm": 590.2717895507812, + "learning_rate": 9.445485210093018e-06, + "loss": 19.1242, + "step": 118100 + }, + { + "epoch": 0.23858967262854672, + "grad_norm": 244.5931854248047, + "learning_rate": 9.445325425276668e-06, + "loss": 14.1212, + "step": 118110 + }, + { + "epoch": 0.23860987326123054, + "grad_norm": 709.9424438476562, + "learning_rate": 9.445165618794343e-06, + "loss": 21.64, + "step": 118120 + }, + { + "epoch": 0.23863007389391436, + "grad_norm": 316.25927734375, + "learning_rate": 9.44500579064682e-06, + "loss": 18.8832, + "step": 118130 + }, + { + "epoch": 0.23865027452659818, + "grad_norm": 217.62611389160156, + "learning_rate": 9.444845940834876e-06, + "loss": 18.0076, + "step": 118140 + }, + { + "epoch": 0.238670475159282, + "grad_norm": 251.6680145263672, + "learning_rate": 9.444686069359294e-06, + "loss": 27.4444, + "step": 118150 + }, + { + "epoch": 0.23869067579196582, + "grad_norm": 362.588623046875, + "learning_rate": 9.444526176220851e-06, + "loss": 34.5831, + "step": 118160 + }, + { + "epoch": 0.2387108764246496, + "grad_norm": 298.76190185546875, + "learning_rate": 9.444366261420328e-06, + "loss": 12.2111, + "step": 118170 + }, + { + "epoch": 0.23873107705733343, + "grad_norm": 59.43899917602539, + "learning_rate": 9.4442063249585e-06, + "loss": 26.4651, + "step": 118180 + }, + { + "epoch": 0.23875127769001725, + "grad_norm": 327.2350769042969, + "learning_rate": 9.44404636683615e-06, + "loss": 28.7903, + "step": 118190 + }, + { + "epoch": 0.23877147832270107, + "grad_norm": 409.3868103027344, + "learning_rate": 9.443886387054058e-06, + "loss": 33.7824, + "step": 118200 + }, + { + "epoch": 0.2387916789553849, + "grad_norm": 632.2372436523438, + "learning_rate": 9.443726385613003e-06, + "loss": 14.7928, + "step": 118210 + }, + { + "epoch": 0.2388118795880687, + "grad_norm": 587.41357421875, + "learning_rate": 9.443566362513763e-06, + "loss": 23.6732, + "step": 118220 + }, + { + "epoch": 0.2388320802207525, + "grad_norm": 1263.5318603515625, + "learning_rate": 9.44340631775712e-06, + "loss": 46.6303, + "step": 118230 + }, + { + "epoch": 0.23885228085343632, + "grad_norm": 158.19912719726562, + "learning_rate": 9.443246251343855e-06, + "loss": 19.1064, + "step": 118240 + }, + { + "epoch": 0.23887248148612014, + "grad_norm": 843.1229248046875, + "learning_rate": 9.443086163274745e-06, + "loss": 38.6345, + "step": 118250 + }, + { + "epoch": 0.23889268211880396, + "grad_norm": 163.65879821777344, + "learning_rate": 9.442926053550572e-06, + "loss": 18.5162, + "step": 118260 + }, + { + "epoch": 0.23891288275148778, + "grad_norm": 1377.5970458984375, + "learning_rate": 9.442765922172117e-06, + "loss": 23.9794, + "step": 118270 + }, + { + "epoch": 0.2389330833841716, + "grad_norm": 83.8387680053711, + "learning_rate": 9.442605769140159e-06, + "loss": 18.4275, + "step": 118280 + }, + { + "epoch": 0.23895328401685542, + "grad_norm": 246.1664581298828, + "learning_rate": 9.44244559445548e-06, + "loss": 19.1218, + "step": 118290 + }, + { + "epoch": 0.23897348464953921, + "grad_norm": 283.3323974609375, + "learning_rate": 9.44228539811886e-06, + "loss": 34.5833, + "step": 118300 + }, + { + "epoch": 0.23899368528222303, + "grad_norm": 349.25634765625, + "learning_rate": 9.44212518013108e-06, + "loss": 23.3768, + "step": 118310 + }, + { + "epoch": 0.23901388591490685, + "grad_norm": 243.878173828125, + "learning_rate": 9.44196494049292e-06, + "loss": 25.8638, + "step": 118320 + }, + { + "epoch": 0.23903408654759067, + "grad_norm": 558.2456665039062, + "learning_rate": 9.44180467920516e-06, + "loss": 25.0371, + "step": 118330 + }, + { + "epoch": 0.2390542871802745, + "grad_norm": 84.71350860595703, + "learning_rate": 9.441644396268586e-06, + "loss": 18.0213, + "step": 118340 + }, + { + "epoch": 0.23907448781295831, + "grad_norm": 7.51152229309082, + "learning_rate": 9.441484091683975e-06, + "loss": 14.0777, + "step": 118350 + }, + { + "epoch": 0.2390946884456421, + "grad_norm": 434.8461608886719, + "learning_rate": 9.441323765452107e-06, + "loss": 12.1847, + "step": 118360 + }, + { + "epoch": 0.23911488907832593, + "grad_norm": 642.7026977539062, + "learning_rate": 9.441163417573768e-06, + "loss": 23.7775, + "step": 118370 + }, + { + "epoch": 0.23913508971100975, + "grad_norm": 748.6219482421875, + "learning_rate": 9.441003048049734e-06, + "loss": 27.3327, + "step": 118380 + }, + { + "epoch": 0.23915529034369357, + "grad_norm": 625.2716674804688, + "learning_rate": 9.440842656880792e-06, + "loss": 22.5399, + "step": 118390 + }, + { + "epoch": 0.2391754909763774, + "grad_norm": 180.45635986328125, + "learning_rate": 9.440682244067724e-06, + "loss": 23.0293, + "step": 118400 + }, + { + "epoch": 0.2391956916090612, + "grad_norm": 221.6510772705078, + "learning_rate": 9.440521809611307e-06, + "loss": 18.2394, + "step": 118410 + }, + { + "epoch": 0.239215892241745, + "grad_norm": 1343.8333740234375, + "learning_rate": 9.440361353512325e-06, + "loss": 33.6273, + "step": 118420 + }, + { + "epoch": 0.23923609287442882, + "grad_norm": 658.0180053710938, + "learning_rate": 9.44020087577156e-06, + "loss": 21.0594, + "step": 118430 + }, + { + "epoch": 0.23925629350711264, + "grad_norm": 436.8914794921875, + "learning_rate": 9.440040376389795e-06, + "loss": 20.5959, + "step": 118440 + }, + { + "epoch": 0.23927649413979646, + "grad_norm": 496.1158142089844, + "learning_rate": 9.439879855367813e-06, + "loss": 21.0232, + "step": 118450 + }, + { + "epoch": 0.23929669477248028, + "grad_norm": 676.6805419921875, + "learning_rate": 9.439719312706393e-06, + "loss": 15.3883, + "step": 118460 + }, + { + "epoch": 0.2393168954051641, + "grad_norm": 498.2493591308594, + "learning_rate": 9.43955874840632e-06, + "loss": 34.6778, + "step": 118470 + }, + { + "epoch": 0.23933709603784792, + "grad_norm": 743.6807861328125, + "learning_rate": 9.439398162468376e-06, + "loss": 26.3646, + "step": 118480 + }, + { + "epoch": 0.2393572966705317, + "grad_norm": 175.42564392089844, + "learning_rate": 9.439237554893344e-06, + "loss": 18.911, + "step": 118490 + }, + { + "epoch": 0.23937749730321553, + "grad_norm": 300.9667663574219, + "learning_rate": 9.439076925682006e-06, + "loss": 31.7426, + "step": 118500 + }, + { + "epoch": 0.23939769793589935, + "grad_norm": 286.52227783203125, + "learning_rate": 9.438916274835148e-06, + "loss": 37.3805, + "step": 118510 + }, + { + "epoch": 0.23941789856858317, + "grad_norm": 279.5403747558594, + "learning_rate": 9.438755602353549e-06, + "loss": 8.7395, + "step": 118520 + }, + { + "epoch": 0.239438099201267, + "grad_norm": 511.781005859375, + "learning_rate": 9.438594908237993e-06, + "loss": 13.8962, + "step": 118530 + }, + { + "epoch": 0.2394582998339508, + "grad_norm": 442.2402038574219, + "learning_rate": 9.438434192489263e-06, + "loss": 16.4463, + "step": 118540 + }, + { + "epoch": 0.2394785004666346, + "grad_norm": 737.2451171875, + "learning_rate": 9.438273455108145e-06, + "loss": 21.8331, + "step": 118550 + }, + { + "epoch": 0.23949870109931842, + "grad_norm": 558.0885009765625, + "learning_rate": 9.43811269609542e-06, + "loss": 17.9035, + "step": 118560 + }, + { + "epoch": 0.23951890173200224, + "grad_norm": 815.43798828125, + "learning_rate": 9.43795191545187e-06, + "loss": 16.2661, + "step": 118570 + }, + { + "epoch": 0.23953910236468606, + "grad_norm": 859.9912719726562, + "learning_rate": 9.437791113178283e-06, + "loss": 17.6522, + "step": 118580 + }, + { + "epoch": 0.23955930299736988, + "grad_norm": 528.0261840820312, + "learning_rate": 9.43763028927544e-06, + "loss": 20.2724, + "step": 118590 + }, + { + "epoch": 0.2395795036300537, + "grad_norm": 719.481201171875, + "learning_rate": 9.437469443744124e-06, + "loss": 29.6069, + "step": 118600 + }, + { + "epoch": 0.2395997042627375, + "grad_norm": 405.55743408203125, + "learning_rate": 9.437308576585121e-06, + "loss": 24.4762, + "step": 118610 + }, + { + "epoch": 0.23961990489542132, + "grad_norm": 184.1614227294922, + "learning_rate": 9.437147687799213e-06, + "loss": 26.7506, + "step": 118620 + }, + { + "epoch": 0.23964010552810514, + "grad_norm": 239.3774871826172, + "learning_rate": 9.436986777387187e-06, + "loss": 18.1325, + "step": 118630 + }, + { + "epoch": 0.23966030616078896, + "grad_norm": 587.8845825195312, + "learning_rate": 9.436825845349826e-06, + "loss": 26.7064, + "step": 118640 + }, + { + "epoch": 0.23968050679347278, + "grad_norm": 848.9060668945312, + "learning_rate": 9.436664891687911e-06, + "loss": 29.1183, + "step": 118650 + }, + { + "epoch": 0.2397007074261566, + "grad_norm": 127.81281280517578, + "learning_rate": 9.436503916402234e-06, + "loss": 21.8435, + "step": 118660 + }, + { + "epoch": 0.23972090805884042, + "grad_norm": 753.8478393554688, + "learning_rate": 9.436342919493571e-06, + "loss": 31.7494, + "step": 118670 + }, + { + "epoch": 0.2397411086915242, + "grad_norm": 482.82415771484375, + "learning_rate": 9.436181900962713e-06, + "loss": 28.5088, + "step": 118680 + }, + { + "epoch": 0.23976130932420803, + "grad_norm": 286.2489318847656, + "learning_rate": 9.43602086081044e-06, + "loss": 15.6981, + "step": 118690 + }, + { + "epoch": 0.23978150995689185, + "grad_norm": 228.73312377929688, + "learning_rate": 9.435859799037541e-06, + "loss": 15.802, + "step": 118700 + }, + { + "epoch": 0.23980171058957567, + "grad_norm": 841.956298828125, + "learning_rate": 9.4356987156448e-06, + "loss": 22.0875, + "step": 118710 + }, + { + "epoch": 0.2398219112222595, + "grad_norm": 455.1575012207031, + "learning_rate": 9.435537610633002e-06, + "loss": 25.2311, + "step": 118720 + }, + { + "epoch": 0.2398421118549433, + "grad_norm": 403.8409729003906, + "learning_rate": 9.435376484002927e-06, + "loss": 21.4939, + "step": 118730 + }, + { + "epoch": 0.2398623124876271, + "grad_norm": 1453.3387451171875, + "learning_rate": 9.43521533575537e-06, + "loss": 37.6629, + "step": 118740 + }, + { + "epoch": 0.23988251312031092, + "grad_norm": 278.9483337402344, + "learning_rate": 9.43505416589111e-06, + "loss": 15.3473, + "step": 118750 + }, + { + "epoch": 0.23990271375299474, + "grad_norm": 823.5610961914062, + "learning_rate": 9.434892974410932e-06, + "loss": 31.414, + "step": 118760 + }, + { + "epoch": 0.23992291438567856, + "grad_norm": 374.9403381347656, + "learning_rate": 9.434731761315625e-06, + "loss": 28.9586, + "step": 118770 + }, + { + "epoch": 0.23994311501836238, + "grad_norm": 307.4246520996094, + "learning_rate": 9.434570526605974e-06, + "loss": 15.3941, + "step": 118780 + }, + { + "epoch": 0.2399633156510462, + "grad_norm": 424.46905517578125, + "learning_rate": 9.434409270282762e-06, + "loss": 26.6051, + "step": 118790 + }, + { + "epoch": 0.23998351628373002, + "grad_norm": 362.89013671875, + "learning_rate": 9.43424799234678e-06, + "loss": 28.606, + "step": 118800 + }, + { + "epoch": 0.2400037169164138, + "grad_norm": 671.39501953125, + "learning_rate": 9.43408669279881e-06, + "loss": 12.6954, + "step": 118810 + }, + { + "epoch": 0.24002391754909763, + "grad_norm": 168.6937255859375, + "learning_rate": 9.433925371639639e-06, + "loss": 19.6997, + "step": 118820 + }, + { + "epoch": 0.24004411818178145, + "grad_norm": 288.7983703613281, + "learning_rate": 9.433764028870053e-06, + "loss": 25.7118, + "step": 118830 + }, + { + "epoch": 0.24006431881446527, + "grad_norm": 322.3669738769531, + "learning_rate": 9.433602664490838e-06, + "loss": 10.3236, + "step": 118840 + }, + { + "epoch": 0.2400845194471491, + "grad_norm": 250.17198181152344, + "learning_rate": 9.433441278502784e-06, + "loss": 19.0235, + "step": 118850 + }, + { + "epoch": 0.2401047200798329, + "grad_norm": 538.3212890625, + "learning_rate": 9.433279870906673e-06, + "loss": 23.5239, + "step": 118860 + }, + { + "epoch": 0.2401249207125167, + "grad_norm": 864.250732421875, + "learning_rate": 9.433118441703293e-06, + "loss": 26.127, + "step": 118870 + }, + { + "epoch": 0.24014512134520052, + "grad_norm": 504.7637939453125, + "learning_rate": 9.432956990893434e-06, + "loss": 26.1027, + "step": 118880 + }, + { + "epoch": 0.24016532197788434, + "grad_norm": 338.23028564453125, + "learning_rate": 9.432795518477878e-06, + "loss": 12.1233, + "step": 118890 + }, + { + "epoch": 0.24018552261056816, + "grad_norm": 782.1828002929688, + "learning_rate": 9.432634024457414e-06, + "loss": 17.7826, + "step": 118900 + }, + { + "epoch": 0.24020572324325198, + "grad_norm": 355.4798278808594, + "learning_rate": 9.43247250883283e-06, + "loss": 31.2056, + "step": 118910 + }, + { + "epoch": 0.2402259238759358, + "grad_norm": 203.5426483154297, + "learning_rate": 9.432310971604914e-06, + "loss": 19.9125, + "step": 118920 + }, + { + "epoch": 0.2402461245086196, + "grad_norm": 439.1073303222656, + "learning_rate": 9.432149412774452e-06, + "loss": 21.6993, + "step": 118930 + }, + { + "epoch": 0.24026632514130342, + "grad_norm": 28.7581787109375, + "learning_rate": 9.431987832342228e-06, + "loss": 7.5493, + "step": 118940 + }, + { + "epoch": 0.24028652577398724, + "grad_norm": 434.7237243652344, + "learning_rate": 9.431826230309035e-06, + "loss": 24.2173, + "step": 118950 + }, + { + "epoch": 0.24030672640667106, + "grad_norm": 136.76669311523438, + "learning_rate": 9.431664606675659e-06, + "loss": 33.7794, + "step": 118960 + }, + { + "epoch": 0.24032692703935488, + "grad_norm": 550.2783203125, + "learning_rate": 9.431502961442887e-06, + "loss": 19.0586, + "step": 118970 + }, + { + "epoch": 0.2403471276720387, + "grad_norm": 470.1292724609375, + "learning_rate": 9.431341294611506e-06, + "loss": 23.7827, + "step": 118980 + }, + { + "epoch": 0.24036732830472252, + "grad_norm": 397.0516662597656, + "learning_rate": 9.431179606182306e-06, + "loss": 33.7048, + "step": 118990 + }, + { + "epoch": 0.2403875289374063, + "grad_norm": 419.5294494628906, + "learning_rate": 9.431017896156074e-06, + "loss": 21.6062, + "step": 119000 + }, + { + "epoch": 0.24040772957009013, + "grad_norm": 433.27874755859375, + "learning_rate": 9.430856164533598e-06, + "loss": 29.5763, + "step": 119010 + }, + { + "epoch": 0.24042793020277395, + "grad_norm": 420.1417236328125, + "learning_rate": 9.430694411315667e-06, + "loss": 21.6851, + "step": 119020 + }, + { + "epoch": 0.24044813083545777, + "grad_norm": 90.33391571044922, + "learning_rate": 9.430532636503067e-06, + "loss": 13.6489, + "step": 119030 + }, + { + "epoch": 0.2404683314681416, + "grad_norm": 480.8397216796875, + "learning_rate": 9.43037084009659e-06, + "loss": 16.3171, + "step": 119040 + }, + { + "epoch": 0.2404885321008254, + "grad_norm": 325.5364685058594, + "learning_rate": 9.430209022097024e-06, + "loss": 25.2489, + "step": 119050 + }, + { + "epoch": 0.2405087327335092, + "grad_norm": 162.39744567871094, + "learning_rate": 9.430047182505152e-06, + "loss": 18.4027, + "step": 119060 + }, + { + "epoch": 0.24052893336619302, + "grad_norm": 401.30877685546875, + "learning_rate": 9.429885321321772e-06, + "loss": 24.318, + "step": 119070 + }, + { + "epoch": 0.24054913399887684, + "grad_norm": 362.2718505859375, + "learning_rate": 9.429723438547666e-06, + "loss": 27.725, + "step": 119080 + }, + { + "epoch": 0.24056933463156066, + "grad_norm": 667.8294677734375, + "learning_rate": 9.429561534183627e-06, + "loss": 21.12, + "step": 119090 + }, + { + "epoch": 0.24058953526424448, + "grad_norm": 592.030029296875, + "learning_rate": 9.429399608230441e-06, + "loss": 33.3193, + "step": 119100 + }, + { + "epoch": 0.2406097358969283, + "grad_norm": 246.8064727783203, + "learning_rate": 9.429237660688896e-06, + "loss": 17.4129, + "step": 119110 + }, + { + "epoch": 0.24062993652961212, + "grad_norm": 442.88714599609375, + "learning_rate": 9.429075691559788e-06, + "loss": 14.079, + "step": 119120 + }, + { + "epoch": 0.2406501371622959, + "grad_norm": 565.0037841796875, + "learning_rate": 9.4289137008439e-06, + "loss": 29.0246, + "step": 119130 + }, + { + "epoch": 0.24067033779497973, + "grad_norm": 500.62115478515625, + "learning_rate": 9.428751688542025e-06, + "loss": 18.6905, + "step": 119140 + }, + { + "epoch": 0.24069053842766355, + "grad_norm": 153.9894256591797, + "learning_rate": 9.428589654654951e-06, + "loss": 25.6751, + "step": 119150 + }, + { + "epoch": 0.24071073906034737, + "grad_norm": 24.473669052124023, + "learning_rate": 9.428427599183467e-06, + "loss": 10.0738, + "step": 119160 + }, + { + "epoch": 0.2407309396930312, + "grad_norm": 326.5309753417969, + "learning_rate": 9.428265522128366e-06, + "loss": 14.4481, + "step": 119170 + }, + { + "epoch": 0.240751140325715, + "grad_norm": 373.0992126464844, + "learning_rate": 9.428103423490434e-06, + "loss": 27.2887, + "step": 119180 + }, + { + "epoch": 0.2407713409583988, + "grad_norm": 8.004453659057617, + "learning_rate": 9.427941303270464e-06, + "loss": 11.0246, + "step": 119190 + }, + { + "epoch": 0.24079154159108263, + "grad_norm": 314.0025329589844, + "learning_rate": 9.427779161469246e-06, + "loss": 24.9284, + "step": 119200 + }, + { + "epoch": 0.24081174222376645, + "grad_norm": 228.9098358154297, + "learning_rate": 9.427616998087568e-06, + "loss": 36.9234, + "step": 119210 + }, + { + "epoch": 0.24083194285645027, + "grad_norm": 47.23506164550781, + "learning_rate": 9.427454813126222e-06, + "loss": 30.2929, + "step": 119220 + }, + { + "epoch": 0.24085214348913409, + "grad_norm": 195.05516052246094, + "learning_rate": 9.427292606585998e-06, + "loss": 17.1163, + "step": 119230 + }, + { + "epoch": 0.2408723441218179, + "grad_norm": 244.37149047851562, + "learning_rate": 9.427130378467689e-06, + "loss": 29.368, + "step": 119240 + }, + { + "epoch": 0.2408925447545017, + "grad_norm": 667.91455078125, + "learning_rate": 9.42696812877208e-06, + "loss": 21.5214, + "step": 119250 + }, + { + "epoch": 0.24091274538718552, + "grad_norm": 169.0312957763672, + "learning_rate": 9.426805857499968e-06, + "loss": 17.9621, + "step": 119260 + }, + { + "epoch": 0.24093294601986934, + "grad_norm": 0.0, + "learning_rate": 9.426643564652139e-06, + "loss": 20.2011, + "step": 119270 + }, + { + "epoch": 0.24095314665255316, + "grad_norm": 694.5930786132812, + "learning_rate": 9.426481250229387e-06, + "loss": 19.3448, + "step": 119280 + }, + { + "epoch": 0.24097334728523698, + "grad_norm": 168.7473602294922, + "learning_rate": 9.426318914232503e-06, + "loss": 20.6237, + "step": 119290 + }, + { + "epoch": 0.2409935479179208, + "grad_norm": 223.9930877685547, + "learning_rate": 9.426156556662276e-06, + "loss": 37.335, + "step": 119300 + }, + { + "epoch": 0.24101374855060462, + "grad_norm": 709.3331298828125, + "learning_rate": 9.425994177519501e-06, + "loss": 35.3504, + "step": 119310 + }, + { + "epoch": 0.2410339491832884, + "grad_norm": 148.9415740966797, + "learning_rate": 9.425831776804966e-06, + "loss": 20.1295, + "step": 119320 + }, + { + "epoch": 0.24105414981597223, + "grad_norm": 130.87818908691406, + "learning_rate": 9.425669354519464e-06, + "loss": 10.6013, + "step": 119330 + }, + { + "epoch": 0.24107435044865605, + "grad_norm": 484.9830627441406, + "learning_rate": 9.425506910663785e-06, + "loss": 27.6242, + "step": 119340 + }, + { + "epoch": 0.24109455108133987, + "grad_norm": 152.82427978515625, + "learning_rate": 9.425344445238723e-06, + "loss": 11.1665, + "step": 119350 + }, + { + "epoch": 0.2411147517140237, + "grad_norm": 394.40301513671875, + "learning_rate": 9.425181958245069e-06, + "loss": 26.7533, + "step": 119360 + }, + { + "epoch": 0.2411349523467075, + "grad_norm": 547.2018432617188, + "learning_rate": 9.425019449683614e-06, + "loss": 29.0329, + "step": 119370 + }, + { + "epoch": 0.2411551529793913, + "grad_norm": 49.635162353515625, + "learning_rate": 9.424856919555152e-06, + "loss": 17.9029, + "step": 119380 + }, + { + "epoch": 0.24117535361207512, + "grad_norm": 227.09913635253906, + "learning_rate": 9.424694367860475e-06, + "loss": 15.4209, + "step": 119390 + }, + { + "epoch": 0.24119555424475894, + "grad_norm": 470.6295166015625, + "learning_rate": 9.424531794600372e-06, + "loss": 15.6939, + "step": 119400 + }, + { + "epoch": 0.24121575487744276, + "grad_norm": 377.6591796875, + "learning_rate": 9.424369199775639e-06, + "loss": 28.1742, + "step": 119410 + }, + { + "epoch": 0.24123595551012658, + "grad_norm": 477.1115417480469, + "learning_rate": 9.424206583387066e-06, + "loss": 28.7678, + "step": 119420 + }, + { + "epoch": 0.2412561561428104, + "grad_norm": 404.892822265625, + "learning_rate": 9.424043945435449e-06, + "loss": 32.6534, + "step": 119430 + }, + { + "epoch": 0.24127635677549422, + "grad_norm": 431.056884765625, + "learning_rate": 9.423881285921576e-06, + "loss": 16.7084, + "step": 119440 + }, + { + "epoch": 0.24129655740817801, + "grad_norm": 587.3072509765625, + "learning_rate": 9.423718604846243e-06, + "loss": 28.7116, + "step": 119450 + }, + { + "epoch": 0.24131675804086183, + "grad_norm": 286.3149719238281, + "learning_rate": 9.423555902210241e-06, + "loss": 29.833, + "step": 119460 + }, + { + "epoch": 0.24133695867354565, + "grad_norm": 488.9961853027344, + "learning_rate": 9.423393178014366e-06, + "loss": 23.0982, + "step": 119470 + }, + { + "epoch": 0.24135715930622947, + "grad_norm": 1299.418701171875, + "learning_rate": 9.423230432259409e-06, + "loss": 19.3942, + "step": 119480 + }, + { + "epoch": 0.2413773599389133, + "grad_norm": 489.6849060058594, + "learning_rate": 9.423067664946162e-06, + "loss": 15.4288, + "step": 119490 + }, + { + "epoch": 0.24139756057159711, + "grad_norm": 479.0276794433594, + "learning_rate": 9.42290487607542e-06, + "loss": 21.8808, + "step": 119500 + }, + { + "epoch": 0.2414177612042809, + "grad_norm": 454.5273742675781, + "learning_rate": 9.422742065647976e-06, + "loss": 21.8085, + "step": 119510 + }, + { + "epoch": 0.24143796183696473, + "grad_norm": 1.8563333749771118, + "learning_rate": 9.422579233664624e-06, + "loss": 15.3194, + "step": 119520 + }, + { + "epoch": 0.24145816246964855, + "grad_norm": 672.4547119140625, + "learning_rate": 9.422416380126157e-06, + "loss": 31.2583, + "step": 119530 + }, + { + "epoch": 0.24147836310233237, + "grad_norm": 327.16064453125, + "learning_rate": 9.42225350503337e-06, + "loss": 23.3336, + "step": 119540 + }, + { + "epoch": 0.2414985637350162, + "grad_norm": 458.59814453125, + "learning_rate": 9.422090608387056e-06, + "loss": 41.3047, + "step": 119550 + }, + { + "epoch": 0.2415187643677, + "grad_norm": 359.75482177734375, + "learning_rate": 9.421927690188006e-06, + "loss": 23.0521, + "step": 119560 + }, + { + "epoch": 0.2415389650003838, + "grad_norm": 286.3177795410156, + "learning_rate": 9.421764750437019e-06, + "loss": 19.8835, + "step": 119570 + }, + { + "epoch": 0.24155916563306762, + "grad_norm": 489.7880554199219, + "learning_rate": 9.421601789134887e-06, + "loss": 29.9945, + "step": 119580 + }, + { + "epoch": 0.24157936626575144, + "grad_norm": 194.4234619140625, + "learning_rate": 9.421438806282402e-06, + "loss": 17.1849, + "step": 119590 + }, + { + "epoch": 0.24159956689843526, + "grad_norm": 0.0, + "learning_rate": 9.421275801880363e-06, + "loss": 28.6513, + "step": 119600 + }, + { + "epoch": 0.24161976753111908, + "grad_norm": 222.85772705078125, + "learning_rate": 9.42111277592956e-06, + "loss": 16.1013, + "step": 119610 + }, + { + "epoch": 0.2416399681638029, + "grad_norm": 537.223388671875, + "learning_rate": 9.42094972843079e-06, + "loss": 32.1397, + "step": 119620 + }, + { + "epoch": 0.24166016879648672, + "grad_norm": 357.1800231933594, + "learning_rate": 9.420786659384849e-06, + "loss": 13.7829, + "step": 119630 + }, + { + "epoch": 0.2416803694291705, + "grad_norm": 674.0662841796875, + "learning_rate": 9.420623568792528e-06, + "loss": 9.7583, + "step": 119640 + }, + { + "epoch": 0.24170057006185433, + "grad_norm": 674.779296875, + "learning_rate": 9.420460456654625e-06, + "loss": 43.7078, + "step": 119650 + }, + { + "epoch": 0.24172077069453815, + "grad_norm": 269.4070129394531, + "learning_rate": 9.420297322971934e-06, + "loss": 42.7253, + "step": 119660 + }, + { + "epoch": 0.24174097132722197, + "grad_norm": 273.6134948730469, + "learning_rate": 9.420134167745249e-06, + "loss": 17.3662, + "step": 119670 + }, + { + "epoch": 0.2417611719599058, + "grad_norm": 364.1605224609375, + "learning_rate": 9.419970990975366e-06, + "loss": 33.5243, + "step": 119680 + }, + { + "epoch": 0.2417813725925896, + "grad_norm": 81.76612854003906, + "learning_rate": 9.41980779266308e-06, + "loss": 25.4665, + "step": 119690 + }, + { + "epoch": 0.2418015732252734, + "grad_norm": 155.71640014648438, + "learning_rate": 9.419644572809189e-06, + "loss": 25.1629, + "step": 119700 + }, + { + "epoch": 0.24182177385795722, + "grad_norm": 261.5838623046875, + "learning_rate": 9.419481331414485e-06, + "loss": 29.5781, + "step": 119710 + }, + { + "epoch": 0.24184197449064104, + "grad_norm": 535.605224609375, + "learning_rate": 9.419318068479765e-06, + "loss": 26.7493, + "step": 119720 + }, + { + "epoch": 0.24186217512332486, + "grad_norm": 575.5614624023438, + "learning_rate": 9.419154784005826e-06, + "loss": 24.8415, + "step": 119730 + }, + { + "epoch": 0.24188237575600868, + "grad_norm": 468.4418029785156, + "learning_rate": 9.418991477993461e-06, + "loss": 30.0817, + "step": 119740 + }, + { + "epoch": 0.2419025763886925, + "grad_norm": 400.21563720703125, + "learning_rate": 9.418828150443469e-06, + "loss": 32.9474, + "step": 119750 + }, + { + "epoch": 0.24192277702137632, + "grad_norm": 758.7032470703125, + "learning_rate": 9.418664801356643e-06, + "loss": 23.37, + "step": 119760 + }, + { + "epoch": 0.24194297765406012, + "grad_norm": 685.31591796875, + "learning_rate": 9.418501430733781e-06, + "loss": 35.0462, + "step": 119770 + }, + { + "epoch": 0.24196317828674394, + "grad_norm": 859.9779663085938, + "learning_rate": 9.418338038575678e-06, + "loss": 21.0738, + "step": 119780 + }, + { + "epoch": 0.24198337891942776, + "grad_norm": 309.0888977050781, + "learning_rate": 9.418174624883134e-06, + "loss": 18.5668, + "step": 119790 + }, + { + "epoch": 0.24200357955211158, + "grad_norm": 359.8492431640625, + "learning_rate": 9.418011189656942e-06, + "loss": 34.0436, + "step": 119800 + }, + { + "epoch": 0.2420237801847954, + "grad_norm": 372.9681091308594, + "learning_rate": 9.417847732897897e-06, + "loss": 26.2594, + "step": 119810 + }, + { + "epoch": 0.24204398081747922, + "grad_norm": 289.66851806640625, + "learning_rate": 9.4176842546068e-06, + "loss": 19.6268, + "step": 119820 + }, + { + "epoch": 0.242064181450163, + "grad_norm": 827.5708618164062, + "learning_rate": 9.417520754784445e-06, + "loss": 30.1527, + "step": 119830 + }, + { + "epoch": 0.24208438208284683, + "grad_norm": 680.2725219726562, + "learning_rate": 9.41735723343163e-06, + "loss": 34.8086, + "step": 119840 + }, + { + "epoch": 0.24210458271553065, + "grad_norm": 652.0809326171875, + "learning_rate": 9.417193690549151e-06, + "loss": 27.441, + "step": 119850 + }, + { + "epoch": 0.24212478334821447, + "grad_norm": 362.1368103027344, + "learning_rate": 9.417030126137807e-06, + "loss": 20.5512, + "step": 119860 + }, + { + "epoch": 0.2421449839808983, + "grad_norm": 154.55960083007812, + "learning_rate": 9.416866540198393e-06, + "loss": 26.823, + "step": 119870 + }, + { + "epoch": 0.2421651846135821, + "grad_norm": 470.00567626953125, + "learning_rate": 9.416702932731707e-06, + "loss": 44.4742, + "step": 119880 + }, + { + "epoch": 0.2421853852462659, + "grad_norm": 920.4050903320312, + "learning_rate": 9.416539303738546e-06, + "loss": 26.894, + "step": 119890 + }, + { + "epoch": 0.24220558587894972, + "grad_norm": 629.051025390625, + "learning_rate": 9.41637565321971e-06, + "loss": 26.9012, + "step": 119900 + }, + { + "epoch": 0.24222578651163354, + "grad_norm": 538.2350463867188, + "learning_rate": 9.416211981175993e-06, + "loss": 39.9106, + "step": 119910 + }, + { + "epoch": 0.24224598714431736, + "grad_norm": 532.4452514648438, + "learning_rate": 9.416048287608195e-06, + "loss": 13.5495, + "step": 119920 + }, + { + "epoch": 0.24226618777700118, + "grad_norm": 203.54214477539062, + "learning_rate": 9.415884572517113e-06, + "loss": 17.1925, + "step": 119930 + }, + { + "epoch": 0.242286388409685, + "grad_norm": 463.0719909667969, + "learning_rate": 9.415720835903546e-06, + "loss": 28.3784, + "step": 119940 + }, + { + "epoch": 0.24230658904236882, + "grad_norm": 352.2186584472656, + "learning_rate": 9.41555707776829e-06, + "loss": 30.7774, + "step": 119950 + }, + { + "epoch": 0.2423267896750526, + "grad_norm": 860.6155395507812, + "learning_rate": 9.415393298112145e-06, + "loss": 60.4257, + "step": 119960 + }, + { + "epoch": 0.24234699030773643, + "grad_norm": 319.4090576171875, + "learning_rate": 9.415229496935909e-06, + "loss": 28.4482, + "step": 119970 + }, + { + "epoch": 0.24236719094042025, + "grad_norm": 486.388916015625, + "learning_rate": 9.41506567424038e-06, + "loss": 23.7481, + "step": 119980 + }, + { + "epoch": 0.24238739157310407, + "grad_norm": 812.5471801757812, + "learning_rate": 9.414901830026355e-06, + "loss": 36.6252, + "step": 119990 + }, + { + "epoch": 0.2424075922057879, + "grad_norm": 485.1556091308594, + "learning_rate": 9.414737964294636e-06, + "loss": 25.9231, + "step": 120000 + }, + { + "epoch": 0.2424277928384717, + "grad_norm": 369.75054931640625, + "learning_rate": 9.414574077046019e-06, + "loss": 11.3446, + "step": 120010 + }, + { + "epoch": 0.2424479934711555, + "grad_norm": 216.30418395996094, + "learning_rate": 9.414410168281303e-06, + "loss": 28.49, + "step": 120020 + }, + { + "epoch": 0.24246819410383932, + "grad_norm": 412.84197998046875, + "learning_rate": 9.414246238001286e-06, + "loss": 21.8082, + "step": 120030 + }, + { + "epoch": 0.24248839473652314, + "grad_norm": 370.929443359375, + "learning_rate": 9.414082286206769e-06, + "loss": 21.473, + "step": 120040 + }, + { + "epoch": 0.24250859536920696, + "grad_norm": 827.7217407226562, + "learning_rate": 9.41391831289855e-06, + "loss": 21.2329, + "step": 120050 + }, + { + "epoch": 0.24252879600189078, + "grad_norm": 259.69818115234375, + "learning_rate": 9.41375431807743e-06, + "loss": 38.319, + "step": 120060 + }, + { + "epoch": 0.2425489966345746, + "grad_norm": 640.1962280273438, + "learning_rate": 9.413590301744207e-06, + "loss": 19.2748, + "step": 120070 + }, + { + "epoch": 0.24256919726725842, + "grad_norm": 366.4822998046875, + "learning_rate": 9.413426263899677e-06, + "loss": 15.9771, + "step": 120080 + }, + { + "epoch": 0.24258939789994222, + "grad_norm": 819.957763671875, + "learning_rate": 9.413262204544645e-06, + "loss": 13.7612, + "step": 120090 + }, + { + "epoch": 0.24260959853262604, + "grad_norm": 292.8883361816406, + "learning_rate": 9.41309812367991e-06, + "loss": 17.4417, + "step": 120100 + }, + { + "epoch": 0.24262979916530986, + "grad_norm": 463.2019958496094, + "learning_rate": 9.412934021306267e-06, + "loss": 30.2767, + "step": 120110 + }, + { + "epoch": 0.24264999979799368, + "grad_norm": 233.68606567382812, + "learning_rate": 9.412769897424519e-06, + "loss": 17.6851, + "step": 120120 + }, + { + "epoch": 0.2426702004306775, + "grad_norm": 2112.52294921875, + "learning_rate": 9.412605752035467e-06, + "loss": 32.6304, + "step": 120130 + }, + { + "epoch": 0.24269040106336132, + "grad_norm": 206.77734375, + "learning_rate": 9.412441585139908e-06, + "loss": 25.8283, + "step": 120140 + }, + { + "epoch": 0.2427106016960451, + "grad_norm": 519.0888671875, + "learning_rate": 9.412277396738647e-06, + "loss": 23.6317, + "step": 120150 + }, + { + "epoch": 0.24273080232872893, + "grad_norm": 284.4267272949219, + "learning_rate": 9.41211318683248e-06, + "loss": 25.1953, + "step": 120160 + }, + { + "epoch": 0.24275100296141275, + "grad_norm": 525.6731567382812, + "learning_rate": 9.411948955422207e-06, + "loss": 28.4889, + "step": 120170 + }, + { + "epoch": 0.24277120359409657, + "grad_norm": 1001.0545043945312, + "learning_rate": 9.411784702508631e-06, + "loss": 26.5903, + "step": 120180 + }, + { + "epoch": 0.2427914042267804, + "grad_norm": 105.27946472167969, + "learning_rate": 9.41162042809255e-06, + "loss": 8.426, + "step": 120190 + }, + { + "epoch": 0.2428116048594642, + "grad_norm": 738.8587646484375, + "learning_rate": 9.411456132174768e-06, + "loss": 37.9383, + "step": 120200 + }, + { + "epoch": 0.242831805492148, + "grad_norm": 464.0848693847656, + "learning_rate": 9.411291814756082e-06, + "loss": 24.2003, + "step": 120210 + }, + { + "epoch": 0.24285200612483182, + "grad_norm": 1074.05517578125, + "learning_rate": 9.411127475837297e-06, + "loss": 23.8898, + "step": 120220 + }, + { + "epoch": 0.24287220675751564, + "grad_norm": 698.5719604492188, + "learning_rate": 9.410963115419209e-06, + "loss": 31.1965, + "step": 120230 + }, + { + "epoch": 0.24289240739019946, + "grad_norm": 708.5223999023438, + "learning_rate": 9.410798733502624e-06, + "loss": 27.1567, + "step": 120240 + }, + { + "epoch": 0.24291260802288328, + "grad_norm": 225.2885284423828, + "learning_rate": 9.41063433008834e-06, + "loss": 26.5465, + "step": 120250 + }, + { + "epoch": 0.2429328086555671, + "grad_norm": 242.16827392578125, + "learning_rate": 9.410469905177159e-06, + "loss": 23.8723, + "step": 120260 + }, + { + "epoch": 0.24295300928825092, + "grad_norm": 489.69720458984375, + "learning_rate": 9.410305458769882e-06, + "loss": 27.86, + "step": 120270 + }, + { + "epoch": 0.2429732099209347, + "grad_norm": 210.8108673095703, + "learning_rate": 9.410140990867313e-06, + "loss": 18.1833, + "step": 120280 + }, + { + "epoch": 0.24299341055361853, + "grad_norm": 102.53578186035156, + "learning_rate": 9.40997650147025e-06, + "loss": 11.5105, + "step": 120290 + }, + { + "epoch": 0.24301361118630235, + "grad_norm": 482.7730712890625, + "learning_rate": 9.409811990579498e-06, + "loss": 38.5142, + "step": 120300 + }, + { + "epoch": 0.24303381181898617, + "grad_norm": 496.66717529296875, + "learning_rate": 9.409647458195857e-06, + "loss": 19.2649, + "step": 120310 + }, + { + "epoch": 0.24305401245167, + "grad_norm": 339.0413818359375, + "learning_rate": 9.409482904320128e-06, + "loss": 14.2474, + "step": 120320 + }, + { + "epoch": 0.2430742130843538, + "grad_norm": 334.78167724609375, + "learning_rate": 9.409318328953115e-06, + "loss": 18.835, + "step": 120330 + }, + { + "epoch": 0.2430944137170376, + "grad_norm": 608.3538208007812, + "learning_rate": 9.409153732095617e-06, + "loss": 20.304, + "step": 120340 + }, + { + "epoch": 0.24311461434972143, + "grad_norm": 52.93294143676758, + "learning_rate": 9.408989113748442e-06, + "loss": 22.8354, + "step": 120350 + }, + { + "epoch": 0.24313481498240525, + "grad_norm": 441.4234619140625, + "learning_rate": 9.408824473912387e-06, + "loss": 13.467, + "step": 120360 + }, + { + "epoch": 0.24315501561508907, + "grad_norm": 390.3205261230469, + "learning_rate": 9.408659812588257e-06, + "loss": 26.7265, + "step": 120370 + }, + { + "epoch": 0.24317521624777289, + "grad_norm": 246.2841339111328, + "learning_rate": 9.408495129776851e-06, + "loss": 21.4439, + "step": 120380 + }, + { + "epoch": 0.2431954168804567, + "grad_norm": 686.0664672851562, + "learning_rate": 9.408330425478978e-06, + "loss": 35.6674, + "step": 120390 + }, + { + "epoch": 0.24321561751314053, + "grad_norm": 456.25872802734375, + "learning_rate": 9.408165699695435e-06, + "loss": 26.4402, + "step": 120400 + }, + { + "epoch": 0.24323581814582432, + "grad_norm": 776.8385620117188, + "learning_rate": 9.408000952427028e-06, + "loss": 24.7706, + "step": 120410 + }, + { + "epoch": 0.24325601877850814, + "grad_norm": 295.9007263183594, + "learning_rate": 9.40783618367456e-06, + "loss": 12.6116, + "step": 120420 + }, + { + "epoch": 0.24327621941119196, + "grad_norm": 102.0827407836914, + "learning_rate": 9.40767139343883e-06, + "loss": 14.6564, + "step": 120430 + }, + { + "epoch": 0.24329642004387578, + "grad_norm": 109.94143676757812, + "learning_rate": 9.407506581720647e-06, + "loss": 20.5629, + "step": 120440 + }, + { + "epoch": 0.2433166206765596, + "grad_norm": 721.9949951171875, + "learning_rate": 9.407341748520811e-06, + "loss": 37.1303, + "step": 120450 + }, + { + "epoch": 0.24333682130924342, + "grad_norm": 252.1715087890625, + "learning_rate": 9.407176893840125e-06, + "loss": 17.8643, + "step": 120460 + }, + { + "epoch": 0.2433570219419272, + "grad_norm": 108.42328643798828, + "learning_rate": 9.407012017679393e-06, + "loss": 18.0431, + "step": 120470 + }, + { + "epoch": 0.24337722257461103, + "grad_norm": 608.70947265625, + "learning_rate": 9.40684712003942e-06, + "loss": 27.4288, + "step": 120480 + }, + { + "epoch": 0.24339742320729485, + "grad_norm": 679.6499633789062, + "learning_rate": 9.40668220092101e-06, + "loss": 22.7119, + "step": 120490 + }, + { + "epoch": 0.24341762383997867, + "grad_norm": 501.53582763671875, + "learning_rate": 9.406517260324962e-06, + "loss": 25.1944, + "step": 120500 + }, + { + "epoch": 0.2434378244726625, + "grad_norm": 473.3844299316406, + "learning_rate": 9.406352298252085e-06, + "loss": 37.3264, + "step": 120510 + }, + { + "epoch": 0.2434580251053463, + "grad_norm": 537.971923828125, + "learning_rate": 9.406187314703182e-06, + "loss": 28.715, + "step": 120520 + }, + { + "epoch": 0.2434782257380301, + "grad_norm": 425.8844909667969, + "learning_rate": 9.406022309679055e-06, + "loss": 18.4034, + "step": 120530 + }, + { + "epoch": 0.24349842637071392, + "grad_norm": 538.1511840820312, + "learning_rate": 9.40585728318051e-06, + "loss": 17.8412, + "step": 120540 + }, + { + "epoch": 0.24351862700339774, + "grad_norm": 154.53443908691406, + "learning_rate": 9.405692235208353e-06, + "loss": 24.7516, + "step": 120550 + }, + { + "epoch": 0.24353882763608156, + "grad_norm": 360.3779602050781, + "learning_rate": 9.405527165763384e-06, + "loss": 26.9894, + "step": 120560 + }, + { + "epoch": 0.24355902826876538, + "grad_norm": 817.6304321289062, + "learning_rate": 9.40536207484641e-06, + "loss": 21.4723, + "step": 120570 + }, + { + "epoch": 0.2435792289014492, + "grad_norm": 281.68701171875, + "learning_rate": 9.405196962458235e-06, + "loss": 12.8594, + "step": 120580 + }, + { + "epoch": 0.24359942953413302, + "grad_norm": 612.75634765625, + "learning_rate": 9.405031828599666e-06, + "loss": 39.7872, + "step": 120590 + }, + { + "epoch": 0.24361963016681681, + "grad_norm": 75.3757095336914, + "learning_rate": 9.404866673271506e-06, + "loss": 7.8291, + "step": 120600 + }, + { + "epoch": 0.24363983079950063, + "grad_norm": 498.9643249511719, + "learning_rate": 9.40470149647456e-06, + "loss": 20.3929, + "step": 120610 + }, + { + "epoch": 0.24366003143218445, + "grad_norm": 166.3463134765625, + "learning_rate": 9.404536298209633e-06, + "loss": 20.5384, + "step": 120620 + }, + { + "epoch": 0.24368023206486827, + "grad_norm": 358.24346923828125, + "learning_rate": 9.40437107847753e-06, + "loss": 18.2434, + "step": 120630 + }, + { + "epoch": 0.2437004326975521, + "grad_norm": 562.0745849609375, + "learning_rate": 9.404205837279057e-06, + "loss": 15.5326, + "step": 120640 + }, + { + "epoch": 0.24372063333023591, + "grad_norm": 441.2648010253906, + "learning_rate": 9.404040574615018e-06, + "loss": 24.7184, + "step": 120650 + }, + { + "epoch": 0.2437408339629197, + "grad_norm": 253.6767578125, + "learning_rate": 9.40387529048622e-06, + "loss": 19.4828, + "step": 120660 + }, + { + "epoch": 0.24376103459560353, + "grad_norm": 235.23439025878906, + "learning_rate": 9.403709984893469e-06, + "loss": 20.0599, + "step": 120670 + }, + { + "epoch": 0.24378123522828735, + "grad_norm": 363.85443115234375, + "learning_rate": 9.403544657837569e-06, + "loss": 16.9035, + "step": 120680 + }, + { + "epoch": 0.24380143586097117, + "grad_norm": 475.27203369140625, + "learning_rate": 9.403379309319325e-06, + "loss": 17.1978, + "step": 120690 + }, + { + "epoch": 0.243821636493655, + "grad_norm": 208.4928741455078, + "learning_rate": 9.403213939339546e-06, + "loss": 26.1766, + "step": 120700 + }, + { + "epoch": 0.2438418371263388, + "grad_norm": 297.894775390625, + "learning_rate": 9.403048547899034e-06, + "loss": 19.4729, + "step": 120710 + }, + { + "epoch": 0.24386203775902263, + "grad_norm": 898.6644897460938, + "learning_rate": 9.402883134998601e-06, + "loss": 29.4924, + "step": 120720 + }, + { + "epoch": 0.24388223839170642, + "grad_norm": 350.4821472167969, + "learning_rate": 9.402717700639047e-06, + "loss": 17.4218, + "step": 120730 + }, + { + "epoch": 0.24390243902439024, + "grad_norm": 542.610595703125, + "learning_rate": 9.402552244821181e-06, + "loss": 23.77, + "step": 120740 + }, + { + "epoch": 0.24392263965707406, + "grad_norm": 475.7983093261719, + "learning_rate": 9.40238676754581e-06, + "loss": 10.7983, + "step": 120750 + }, + { + "epoch": 0.24394284028975788, + "grad_norm": 776.457275390625, + "learning_rate": 9.402221268813741e-06, + "loss": 29.9458, + "step": 120760 + }, + { + "epoch": 0.2439630409224417, + "grad_norm": 663.1685791015625, + "learning_rate": 9.402055748625779e-06, + "loss": 16.0243, + "step": 120770 + }, + { + "epoch": 0.24398324155512552, + "grad_norm": 687.3117065429688, + "learning_rate": 9.40189020698273e-06, + "loss": 18.4069, + "step": 120780 + }, + { + "epoch": 0.2440034421878093, + "grad_norm": 642.6618041992188, + "learning_rate": 9.4017246438854e-06, + "loss": 20.7356, + "step": 120790 + }, + { + "epoch": 0.24402364282049313, + "grad_norm": 304.1505432128906, + "learning_rate": 9.401559059334601e-06, + "loss": 16.3073, + "step": 120800 + }, + { + "epoch": 0.24404384345317695, + "grad_norm": 92.07533264160156, + "learning_rate": 9.401393453331138e-06, + "loss": 15.306, + "step": 120810 + }, + { + "epoch": 0.24406404408586077, + "grad_norm": 159.5988311767578, + "learning_rate": 9.401227825875814e-06, + "loss": 27.5869, + "step": 120820 + }, + { + "epoch": 0.2440842447185446, + "grad_norm": 394.03057861328125, + "learning_rate": 9.401062176969442e-06, + "loss": 17.108, + "step": 120830 + }, + { + "epoch": 0.2441044453512284, + "grad_norm": 89.00894165039062, + "learning_rate": 9.400896506612824e-06, + "loss": 32.4733, + "step": 120840 + }, + { + "epoch": 0.2441246459839122, + "grad_norm": 211.0887908935547, + "learning_rate": 9.400730814806774e-06, + "loss": 34.7247, + "step": 120850 + }, + { + "epoch": 0.24414484661659602, + "grad_norm": 295.0898132324219, + "learning_rate": 9.400565101552093e-06, + "loss": 27.0561, + "step": 120860 + }, + { + "epoch": 0.24416504724927984, + "grad_norm": 402.59906005859375, + "learning_rate": 9.400399366849591e-06, + "loss": 25.1451, + "step": 120870 + }, + { + "epoch": 0.24418524788196366, + "grad_norm": 315.1714782714844, + "learning_rate": 9.400233610700078e-06, + "loss": 13.9015, + "step": 120880 + }, + { + "epoch": 0.24420544851464748, + "grad_norm": 17.67019271850586, + "learning_rate": 9.400067833104358e-06, + "loss": 41.4506, + "step": 120890 + }, + { + "epoch": 0.2442256491473313, + "grad_norm": 234.94468688964844, + "learning_rate": 9.399902034063244e-06, + "loss": 18.8753, + "step": 120900 + }, + { + "epoch": 0.24424584978001512, + "grad_norm": 137.276123046875, + "learning_rate": 9.399736213577537e-06, + "loss": 17.0852, + "step": 120910 + }, + { + "epoch": 0.24426605041269892, + "grad_norm": 332.58282470703125, + "learning_rate": 9.399570371648052e-06, + "loss": 28.0115, + "step": 120920 + }, + { + "epoch": 0.24428625104538274, + "grad_norm": 497.4787902832031, + "learning_rate": 9.399404508275596e-06, + "loss": 21.4659, + "step": 120930 + }, + { + "epoch": 0.24430645167806656, + "grad_norm": 437.379638671875, + "learning_rate": 9.399238623460973e-06, + "loss": 10.7566, + "step": 120940 + }, + { + "epoch": 0.24432665231075038, + "grad_norm": 479.4838562011719, + "learning_rate": 9.399072717204995e-06, + "loss": 21.467, + "step": 120950 + }, + { + "epoch": 0.2443468529434342, + "grad_norm": 0.0, + "learning_rate": 9.398906789508474e-06, + "loss": 12.9235, + "step": 120960 + }, + { + "epoch": 0.24436705357611802, + "grad_norm": 714.1256103515625, + "learning_rate": 9.39874084037221e-06, + "loss": 32.9326, + "step": 120970 + }, + { + "epoch": 0.2443872542088018, + "grad_norm": 171.83334350585938, + "learning_rate": 9.39857486979702e-06, + "loss": 16.5461, + "step": 120980 + }, + { + "epoch": 0.24440745484148563, + "grad_norm": 504.98468017578125, + "learning_rate": 9.398408877783707e-06, + "loss": 17.338, + "step": 120990 + }, + { + "epoch": 0.24442765547416945, + "grad_norm": 526.178955078125, + "learning_rate": 9.398242864333084e-06, + "loss": 23.0164, + "step": 121000 + }, + { + "epoch": 0.24444785610685327, + "grad_norm": 173.3816375732422, + "learning_rate": 9.398076829445958e-06, + "loss": 20.6809, + "step": 121010 + }, + { + "epoch": 0.2444680567395371, + "grad_norm": 407.1698913574219, + "learning_rate": 9.397910773123139e-06, + "loss": 7.2511, + "step": 121020 + }, + { + "epoch": 0.2444882573722209, + "grad_norm": 299.86993408203125, + "learning_rate": 9.397744695365435e-06, + "loss": 24.5106, + "step": 121030 + }, + { + "epoch": 0.24450845800490473, + "grad_norm": 1071.1138916015625, + "learning_rate": 9.39757859617366e-06, + "loss": 34.3683, + "step": 121040 + }, + { + "epoch": 0.24452865863758852, + "grad_norm": 149.59170532226562, + "learning_rate": 9.397412475548619e-06, + "loss": 12.2644, + "step": 121050 + }, + { + "epoch": 0.24454885927027234, + "grad_norm": 867.1926879882812, + "learning_rate": 9.397246333491121e-06, + "loss": 26.0245, + "step": 121060 + }, + { + "epoch": 0.24456905990295616, + "grad_norm": 202.2417449951172, + "learning_rate": 9.39708017000198e-06, + "loss": 46.8165, + "step": 121070 + }, + { + "epoch": 0.24458926053563998, + "grad_norm": 95.05743408203125, + "learning_rate": 9.396913985082003e-06, + "loss": 17.9895, + "step": 121080 + }, + { + "epoch": 0.2446094611683238, + "grad_norm": 987.2474365234375, + "learning_rate": 9.396747778732001e-06, + "loss": 23.2907, + "step": 121090 + }, + { + "epoch": 0.24462966180100762, + "grad_norm": 492.4799499511719, + "learning_rate": 9.396581550952781e-06, + "loss": 42.5354, + "step": 121100 + }, + { + "epoch": 0.2446498624336914, + "grad_norm": 224.4016876220703, + "learning_rate": 9.396415301745158e-06, + "loss": 29.2263, + "step": 121110 + }, + { + "epoch": 0.24467006306637523, + "grad_norm": 596.227294921875, + "learning_rate": 9.39624903110994e-06, + "loss": 22.3733, + "step": 121120 + }, + { + "epoch": 0.24469026369905905, + "grad_norm": 203.60916137695312, + "learning_rate": 9.396082739047938e-06, + "loss": 14.8306, + "step": 121130 + }, + { + "epoch": 0.24471046433174287, + "grad_norm": 499.0995178222656, + "learning_rate": 9.39591642555996e-06, + "loss": 11.4995, + "step": 121140 + }, + { + "epoch": 0.2447306649644267, + "grad_norm": 265.4461975097656, + "learning_rate": 9.39575009064682e-06, + "loss": 19.1836, + "step": 121150 + }, + { + "epoch": 0.2447508655971105, + "grad_norm": 507.4720153808594, + "learning_rate": 9.395583734309327e-06, + "loss": 13.8517, + "step": 121160 + }, + { + "epoch": 0.2447710662297943, + "grad_norm": 416.8929748535156, + "learning_rate": 9.39541735654829e-06, + "loss": 38.9064, + "step": 121170 + }, + { + "epoch": 0.24479126686247812, + "grad_norm": 363.3443298339844, + "learning_rate": 9.395250957364526e-06, + "loss": 18.1596, + "step": 121180 + }, + { + "epoch": 0.24481146749516194, + "grad_norm": 585.190185546875, + "learning_rate": 9.395084536758838e-06, + "loss": 13.4733, + "step": 121190 + }, + { + "epoch": 0.24483166812784576, + "grad_norm": 266.3756103515625, + "learning_rate": 9.394918094732044e-06, + "loss": 15.6169, + "step": 121200 + }, + { + "epoch": 0.24485186876052958, + "grad_norm": 80.72816467285156, + "learning_rate": 9.394751631284951e-06, + "loss": 14.0201, + "step": 121210 + }, + { + "epoch": 0.2448720693932134, + "grad_norm": 325.4801330566406, + "learning_rate": 9.39458514641837e-06, + "loss": 27.5059, + "step": 121220 + }, + { + "epoch": 0.24489227002589722, + "grad_norm": 257.91668701171875, + "learning_rate": 9.394418640133116e-06, + "loss": 8.8437, + "step": 121230 + }, + { + "epoch": 0.24491247065858102, + "grad_norm": 191.24307250976562, + "learning_rate": 9.394252112429998e-06, + "loss": 13.821, + "step": 121240 + }, + { + "epoch": 0.24493267129126484, + "grad_norm": 654.3187255859375, + "learning_rate": 9.394085563309827e-06, + "loss": 39.5012, + "step": 121250 + }, + { + "epoch": 0.24495287192394866, + "grad_norm": 134.41928100585938, + "learning_rate": 9.393918992773418e-06, + "loss": 30.6244, + "step": 121260 + }, + { + "epoch": 0.24497307255663248, + "grad_norm": 289.5005798339844, + "learning_rate": 9.393752400821578e-06, + "loss": 34.7754, + "step": 121270 + }, + { + "epoch": 0.2449932731893163, + "grad_norm": 314.14031982421875, + "learning_rate": 9.393585787455125e-06, + "loss": 27.0609, + "step": 121280 + }, + { + "epoch": 0.24501347382200012, + "grad_norm": 803.166259765625, + "learning_rate": 9.393419152674866e-06, + "loss": 15.494, + "step": 121290 + }, + { + "epoch": 0.2450336744546839, + "grad_norm": 485.0401611328125, + "learning_rate": 9.393252496481615e-06, + "loss": 19.0768, + "step": 121300 + }, + { + "epoch": 0.24505387508736773, + "grad_norm": 142.10533142089844, + "learning_rate": 9.393085818876184e-06, + "loss": 16.7439, + "step": 121310 + }, + { + "epoch": 0.24507407572005155, + "grad_norm": 271.49945068359375, + "learning_rate": 9.392919119859387e-06, + "loss": 9.3258, + "step": 121320 + }, + { + "epoch": 0.24509427635273537, + "grad_norm": 1029.8131103515625, + "learning_rate": 9.392752399432032e-06, + "loss": 30.2744, + "step": 121330 + }, + { + "epoch": 0.2451144769854192, + "grad_norm": 349.37835693359375, + "learning_rate": 9.392585657594938e-06, + "loss": 17.3984, + "step": 121340 + }, + { + "epoch": 0.245134677618103, + "grad_norm": 407.53228759765625, + "learning_rate": 9.392418894348912e-06, + "loss": 24.1279, + "step": 121350 + }, + { + "epoch": 0.24515487825078683, + "grad_norm": 308.86920166015625, + "learning_rate": 9.39225210969477e-06, + "loss": 24.4006, + "step": 121360 + }, + { + "epoch": 0.24517507888347062, + "grad_norm": 78.63629150390625, + "learning_rate": 9.392085303633322e-06, + "loss": 21.8307, + "step": 121370 + }, + { + "epoch": 0.24519527951615444, + "grad_norm": 640.35888671875, + "learning_rate": 9.391918476165385e-06, + "loss": 27.6322, + "step": 121380 + }, + { + "epoch": 0.24521548014883826, + "grad_norm": 98.9591064453125, + "learning_rate": 9.39175162729177e-06, + "loss": 17.5419, + "step": 121390 + }, + { + "epoch": 0.24523568078152208, + "grad_norm": 1276.994140625, + "learning_rate": 9.39158475701329e-06, + "loss": 32.9141, + "step": 121400 + }, + { + "epoch": 0.2452558814142059, + "grad_norm": 712.490478515625, + "learning_rate": 9.391417865330759e-06, + "loss": 26.0464, + "step": 121410 + }, + { + "epoch": 0.24527608204688972, + "grad_norm": 421.4114685058594, + "learning_rate": 9.391250952244987e-06, + "loss": 15.0933, + "step": 121420 + }, + { + "epoch": 0.2452962826795735, + "grad_norm": 512.715576171875, + "learning_rate": 9.391084017756794e-06, + "loss": 31.4504, + "step": 121430 + }, + { + "epoch": 0.24531648331225733, + "grad_norm": 669.548583984375, + "learning_rate": 9.390917061866988e-06, + "loss": 32.6564, + "step": 121440 + }, + { + "epoch": 0.24533668394494115, + "grad_norm": 459.4366760253906, + "learning_rate": 9.390750084576387e-06, + "loss": 36.9348, + "step": 121450 + }, + { + "epoch": 0.24535688457762497, + "grad_norm": 218.767822265625, + "learning_rate": 9.3905830858858e-06, + "loss": 17.6133, + "step": 121460 + }, + { + "epoch": 0.2453770852103088, + "grad_norm": 693.1647338867188, + "learning_rate": 9.390416065796045e-06, + "loss": 29.3941, + "step": 121470 + }, + { + "epoch": 0.2453972858429926, + "grad_norm": 509.7262878417969, + "learning_rate": 9.390249024307934e-06, + "loss": 17.0121, + "step": 121480 + }, + { + "epoch": 0.2454174864756764, + "grad_norm": 480.4788513183594, + "learning_rate": 9.390081961422283e-06, + "loss": 16.8307, + "step": 121490 + }, + { + "epoch": 0.24543768710836023, + "grad_norm": 754.36474609375, + "learning_rate": 9.389914877139903e-06, + "loss": 28.9384, + "step": 121500 + }, + { + "epoch": 0.24545788774104405, + "grad_norm": 223.61109924316406, + "learning_rate": 9.389747771461612e-06, + "loss": 20.6062, + "step": 121510 + }, + { + "epoch": 0.24547808837372787, + "grad_norm": 598.3352661132812, + "learning_rate": 9.389580644388222e-06, + "loss": 15.6587, + "step": 121520 + }, + { + "epoch": 0.24549828900641169, + "grad_norm": 1525.0599365234375, + "learning_rate": 9.38941349592055e-06, + "loss": 52.0996, + "step": 121530 + }, + { + "epoch": 0.2455184896390955, + "grad_norm": 81.1236572265625, + "learning_rate": 9.389246326059406e-06, + "loss": 20.4734, + "step": 121540 + }, + { + "epoch": 0.24553869027177933, + "grad_norm": 391.3033142089844, + "learning_rate": 9.38907913480561e-06, + "loss": 23.5889, + "step": 121550 + }, + { + "epoch": 0.24555889090446312, + "grad_norm": 527.8965454101562, + "learning_rate": 9.388911922159973e-06, + "loss": 21.0002, + "step": 121560 + }, + { + "epoch": 0.24557909153714694, + "grad_norm": 635.4254760742188, + "learning_rate": 9.388744688123313e-06, + "loss": 20.7065, + "step": 121570 + }, + { + "epoch": 0.24559929216983076, + "grad_norm": 361.8155517578125, + "learning_rate": 9.388577432696441e-06, + "loss": 16.9074, + "step": 121580 + }, + { + "epoch": 0.24561949280251458, + "grad_norm": 853.1696166992188, + "learning_rate": 9.388410155880178e-06, + "loss": 23.3569, + "step": 121590 + }, + { + "epoch": 0.2456396934351984, + "grad_norm": 162.77798461914062, + "learning_rate": 9.388242857675336e-06, + "loss": 19.8373, + "step": 121600 + }, + { + "epoch": 0.24565989406788222, + "grad_norm": 18.374544143676758, + "learning_rate": 9.388075538082729e-06, + "loss": 17.2537, + "step": 121610 + }, + { + "epoch": 0.245680094700566, + "grad_norm": 259.9162292480469, + "learning_rate": 9.387908197103175e-06, + "loss": 14.7752, + "step": 121620 + }, + { + "epoch": 0.24570029533324983, + "grad_norm": 359.5132141113281, + "learning_rate": 9.38774083473749e-06, + "loss": 20.5081, + "step": 121630 + }, + { + "epoch": 0.24572049596593365, + "grad_norm": 135.332275390625, + "learning_rate": 9.387573450986485e-06, + "loss": 33.3474, + "step": 121640 + }, + { + "epoch": 0.24574069659861747, + "grad_norm": 75.51333618164062, + "learning_rate": 9.38740604585098e-06, + "loss": 28.3052, + "step": 121650 + }, + { + "epoch": 0.2457608972313013, + "grad_norm": 229.2150421142578, + "learning_rate": 9.387238619331791e-06, + "loss": 25.0102, + "step": 121660 + }, + { + "epoch": 0.2457810978639851, + "grad_norm": 449.738037109375, + "learning_rate": 9.387071171429734e-06, + "loss": 21.5454, + "step": 121670 + }, + { + "epoch": 0.2458012984966689, + "grad_norm": 23.67981719970703, + "learning_rate": 9.386903702145622e-06, + "loss": 23.7492, + "step": 121680 + }, + { + "epoch": 0.24582149912935272, + "grad_norm": 591.4840698242188, + "learning_rate": 9.386736211480276e-06, + "loss": 20.9584, + "step": 121690 + }, + { + "epoch": 0.24584169976203654, + "grad_norm": 771.146728515625, + "learning_rate": 9.386568699434509e-06, + "loss": 16.8184, + "step": 121700 + }, + { + "epoch": 0.24586190039472036, + "grad_norm": 372.983154296875, + "learning_rate": 9.386401166009135e-06, + "loss": 21.9896, + "step": 121710 + }, + { + "epoch": 0.24588210102740418, + "grad_norm": 2.5158207416534424, + "learning_rate": 9.386233611204979e-06, + "loss": 17.8203, + "step": 121720 + }, + { + "epoch": 0.245902301660088, + "grad_norm": 437.3326721191406, + "learning_rate": 9.386066035022849e-06, + "loss": 32.8128, + "step": 121730 + }, + { + "epoch": 0.24592250229277182, + "grad_norm": 124.55708312988281, + "learning_rate": 9.385898437463565e-06, + "loss": 15.7339, + "step": 121740 + }, + { + "epoch": 0.24594270292545561, + "grad_norm": 569.1013793945312, + "learning_rate": 9.385730818527945e-06, + "loss": 21.1294, + "step": 121750 + }, + { + "epoch": 0.24596290355813943, + "grad_norm": 438.2088928222656, + "learning_rate": 9.385563178216804e-06, + "loss": 42.4981, + "step": 121760 + }, + { + "epoch": 0.24598310419082325, + "grad_norm": 476.3517150878906, + "learning_rate": 9.38539551653096e-06, + "loss": 33.1509, + "step": 121770 + }, + { + "epoch": 0.24600330482350707, + "grad_norm": 438.19091796875, + "learning_rate": 9.385227833471232e-06, + "loss": 16.9427, + "step": 121780 + }, + { + "epoch": 0.2460235054561909, + "grad_norm": 627.431396484375, + "learning_rate": 9.385060129038434e-06, + "loss": 23.1106, + "step": 121790 + }, + { + "epoch": 0.24604370608887471, + "grad_norm": 863.0065307617188, + "learning_rate": 9.384892403233384e-06, + "loss": 35.0544, + "step": 121800 + }, + { + "epoch": 0.2460639067215585, + "grad_norm": 797.5457763671875, + "learning_rate": 9.384724656056902e-06, + "loss": 32.2254, + "step": 121810 + }, + { + "epoch": 0.24608410735424233, + "grad_norm": 339.25347900390625, + "learning_rate": 9.384556887509802e-06, + "loss": 25.5886, + "step": 121820 + }, + { + "epoch": 0.24610430798692615, + "grad_norm": 117.08829498291016, + "learning_rate": 9.384389097592904e-06, + "loss": 18.4469, + "step": 121830 + }, + { + "epoch": 0.24612450861960997, + "grad_norm": 681.5288696289062, + "learning_rate": 9.384221286307028e-06, + "loss": 18.808, + "step": 121840 + }, + { + "epoch": 0.2461447092522938, + "grad_norm": 913.95703125, + "learning_rate": 9.384053453652986e-06, + "loss": 37.2574, + "step": 121850 + }, + { + "epoch": 0.2461649098849776, + "grad_norm": 273.5444030761719, + "learning_rate": 9.3838855996316e-06, + "loss": 17.6005, + "step": 121860 + }, + { + "epoch": 0.24618511051766143, + "grad_norm": 410.3824462890625, + "learning_rate": 9.383717724243688e-06, + "loss": 16.0778, + "step": 121870 + }, + { + "epoch": 0.24620531115034522, + "grad_norm": 311.57318115234375, + "learning_rate": 9.383549827490066e-06, + "loss": 41.0608, + "step": 121880 + }, + { + "epoch": 0.24622551178302904, + "grad_norm": 471.83331298828125, + "learning_rate": 9.383381909371555e-06, + "loss": 26.9694, + "step": 121890 + }, + { + "epoch": 0.24624571241571286, + "grad_norm": 129.34288024902344, + "learning_rate": 9.383213969888972e-06, + "loss": 26.0415, + "step": 121900 + }, + { + "epoch": 0.24626591304839668, + "grad_norm": 260.0030517578125, + "learning_rate": 9.383046009043134e-06, + "loss": 26.3524, + "step": 121910 + }, + { + "epoch": 0.2462861136810805, + "grad_norm": 292.06964111328125, + "learning_rate": 9.382878026834865e-06, + "loss": 16.6176, + "step": 121920 + }, + { + "epoch": 0.24630631431376432, + "grad_norm": 267.7262268066406, + "learning_rate": 9.382710023264978e-06, + "loss": 10.9908, + "step": 121930 + }, + { + "epoch": 0.2463265149464481, + "grad_norm": 552.8212280273438, + "learning_rate": 9.382541998334293e-06, + "loss": 24.4882, + "step": 121940 + }, + { + "epoch": 0.24634671557913193, + "grad_norm": 392.6438903808594, + "learning_rate": 9.382373952043631e-06, + "loss": 23.7668, + "step": 121950 + }, + { + "epoch": 0.24636691621181575, + "grad_norm": 157.22264099121094, + "learning_rate": 9.38220588439381e-06, + "loss": 18.662, + "step": 121960 + }, + { + "epoch": 0.24638711684449957, + "grad_norm": 660.43310546875, + "learning_rate": 9.38203779538565e-06, + "loss": 15.036, + "step": 121970 + }, + { + "epoch": 0.2464073174771834, + "grad_norm": 428.3371887207031, + "learning_rate": 9.381869685019967e-06, + "loss": 25.8523, + "step": 121980 + }, + { + "epoch": 0.2464275181098672, + "grad_norm": 245.2991943359375, + "learning_rate": 9.381701553297584e-06, + "loss": 11.6478, + "step": 121990 + }, + { + "epoch": 0.246447718742551, + "grad_norm": 1.7876890897750854, + "learning_rate": 9.381533400219319e-06, + "loss": 32.3656, + "step": 122000 + }, + { + "epoch": 0.24646791937523482, + "grad_norm": 264.4454040527344, + "learning_rate": 9.38136522578599e-06, + "loss": 26.1722, + "step": 122010 + }, + { + "epoch": 0.24648812000791864, + "grad_norm": 1047.522705078125, + "learning_rate": 9.381197029998422e-06, + "loss": 60.2953, + "step": 122020 + }, + { + "epoch": 0.24650832064060246, + "grad_norm": 1142.136474609375, + "learning_rate": 9.381028812857426e-06, + "loss": 33.6777, + "step": 122030 + }, + { + "epoch": 0.24652852127328628, + "grad_norm": 295.67657470703125, + "learning_rate": 9.38086057436383e-06, + "loss": 26.6534, + "step": 122040 + }, + { + "epoch": 0.2465487219059701, + "grad_norm": 701.2647705078125, + "learning_rate": 9.38069231451845e-06, + "loss": 26.1704, + "step": 122050 + }, + { + "epoch": 0.24656892253865392, + "grad_norm": 89.219970703125, + "learning_rate": 9.380524033322108e-06, + "loss": 17.7089, + "step": 122060 + }, + { + "epoch": 0.24658912317133772, + "grad_norm": 161.54380798339844, + "learning_rate": 9.380355730775623e-06, + "loss": 26.5697, + "step": 122070 + }, + { + "epoch": 0.24660932380402154, + "grad_norm": 879.8580322265625, + "learning_rate": 9.380187406879815e-06, + "loss": 44.7753, + "step": 122080 + }, + { + "epoch": 0.24662952443670536, + "grad_norm": 297.9383544921875, + "learning_rate": 9.380019061635506e-06, + "loss": 18.0576, + "step": 122090 + }, + { + "epoch": 0.24664972506938918, + "grad_norm": 502.1921691894531, + "learning_rate": 9.379850695043513e-06, + "loss": 15.0395, + "step": 122100 + }, + { + "epoch": 0.246669925702073, + "grad_norm": 479.7265319824219, + "learning_rate": 9.37968230710466e-06, + "loss": 12.569, + "step": 122110 + }, + { + "epoch": 0.24669012633475682, + "grad_norm": 135.19354248046875, + "learning_rate": 9.379513897819768e-06, + "loss": 24.4919, + "step": 122120 + }, + { + "epoch": 0.2467103269674406, + "grad_norm": 159.18887329101562, + "learning_rate": 9.379345467189655e-06, + "loss": 14.196, + "step": 122130 + }, + { + "epoch": 0.24673052760012443, + "grad_norm": 530.904052734375, + "learning_rate": 9.379177015215145e-06, + "loss": 22.6805, + "step": 122140 + }, + { + "epoch": 0.24675072823280825, + "grad_norm": 529.093017578125, + "learning_rate": 9.379008541897054e-06, + "loss": 25.6806, + "step": 122150 + }, + { + "epoch": 0.24677092886549207, + "grad_norm": 518.6043701171875, + "learning_rate": 9.378840047236209e-06, + "loss": 21.9906, + "step": 122160 + }, + { + "epoch": 0.2467911294981759, + "grad_norm": 847.4872436523438, + "learning_rate": 9.378671531233428e-06, + "loss": 32.1995, + "step": 122170 + }, + { + "epoch": 0.2468113301308597, + "grad_norm": 609.507080078125, + "learning_rate": 9.378502993889533e-06, + "loss": 16.1266, + "step": 122180 + }, + { + "epoch": 0.24683153076354353, + "grad_norm": 308.99468994140625, + "learning_rate": 9.378334435205345e-06, + "loss": 19.4652, + "step": 122190 + }, + { + "epoch": 0.24685173139622732, + "grad_norm": 234.73680114746094, + "learning_rate": 9.378165855181687e-06, + "loss": 28.5967, + "step": 122200 + }, + { + "epoch": 0.24687193202891114, + "grad_norm": 882.1802978515625, + "learning_rate": 9.377997253819378e-06, + "loss": 30.3689, + "step": 122210 + }, + { + "epoch": 0.24689213266159496, + "grad_norm": 647.518798828125, + "learning_rate": 9.377828631119243e-06, + "loss": 29.7149, + "step": 122220 + }, + { + "epoch": 0.24691233329427878, + "grad_norm": 196.408203125, + "learning_rate": 9.377659987082101e-06, + "loss": 21.2224, + "step": 122230 + }, + { + "epoch": 0.2469325339269626, + "grad_norm": 687.9680786132812, + "learning_rate": 9.377491321708777e-06, + "loss": 42.0738, + "step": 122240 + }, + { + "epoch": 0.24695273455964642, + "grad_norm": 221.20986938476562, + "learning_rate": 9.37732263500009e-06, + "loss": 34.4007, + "step": 122250 + }, + { + "epoch": 0.2469729351923302, + "grad_norm": 421.7748107910156, + "learning_rate": 9.377153926956864e-06, + "loss": 11.6844, + "step": 122260 + }, + { + "epoch": 0.24699313582501403, + "grad_norm": 608.4430541992188, + "learning_rate": 9.376985197579919e-06, + "loss": 38.0833, + "step": 122270 + }, + { + "epoch": 0.24701333645769785, + "grad_norm": 37.26884841918945, + "learning_rate": 9.37681644687008e-06, + "loss": 13.1641, + "step": 122280 + }, + { + "epoch": 0.24703353709038167, + "grad_norm": 871.173828125, + "learning_rate": 9.37664767482817e-06, + "loss": 29.0666, + "step": 122290 + }, + { + "epoch": 0.2470537377230655, + "grad_norm": 231.88839721679688, + "learning_rate": 9.376478881455008e-06, + "loss": 50.6193, + "step": 122300 + }, + { + "epoch": 0.2470739383557493, + "grad_norm": 58.269229888916016, + "learning_rate": 9.37631006675142e-06, + "loss": 27.0972, + "step": 122310 + }, + { + "epoch": 0.2470941389884331, + "grad_norm": 155.5174102783203, + "learning_rate": 9.376141230718228e-06, + "loss": 34.523, + "step": 122320 + }, + { + "epoch": 0.24711433962111692, + "grad_norm": 484.3507995605469, + "learning_rate": 9.375972373356253e-06, + "loss": 36.5884, + "step": 122330 + }, + { + "epoch": 0.24713454025380074, + "grad_norm": 343.76629638671875, + "learning_rate": 9.375803494666319e-06, + "loss": 18.2555, + "step": 122340 + }, + { + "epoch": 0.24715474088648456, + "grad_norm": 183.2716064453125, + "learning_rate": 9.37563459464925e-06, + "loss": 33.7194, + "step": 122350 + }, + { + "epoch": 0.24717494151916838, + "grad_norm": 570.3797607421875, + "learning_rate": 9.37546567330587e-06, + "loss": 28.8831, + "step": 122360 + }, + { + "epoch": 0.2471951421518522, + "grad_norm": 331.79754638671875, + "learning_rate": 9.375296730636999e-06, + "loss": 19.3407, + "step": 122370 + }, + { + "epoch": 0.24721534278453602, + "grad_norm": 270.3473815917969, + "learning_rate": 9.375127766643464e-06, + "loss": 25.9224, + "step": 122380 + }, + { + "epoch": 0.24723554341721982, + "grad_norm": 555.08349609375, + "learning_rate": 9.374958781326085e-06, + "loss": 20.2447, + "step": 122390 + }, + { + "epoch": 0.24725574404990364, + "grad_norm": 406.95281982421875, + "learning_rate": 9.37478977468569e-06, + "loss": 22.6607, + "step": 122400 + }, + { + "epoch": 0.24727594468258746, + "grad_norm": 434.1924743652344, + "learning_rate": 9.374620746723097e-06, + "loss": 14.5743, + "step": 122410 + }, + { + "epoch": 0.24729614531527128, + "grad_norm": 72.60065460205078, + "learning_rate": 9.374451697439137e-06, + "loss": 28.7614, + "step": 122420 + }, + { + "epoch": 0.2473163459479551, + "grad_norm": 458.9649963378906, + "learning_rate": 9.374282626834627e-06, + "loss": 29.1018, + "step": 122430 + }, + { + "epoch": 0.24733654658063892, + "grad_norm": 328.2051086425781, + "learning_rate": 9.374113534910396e-06, + "loss": 12.9496, + "step": 122440 + }, + { + "epoch": 0.2473567472133227, + "grad_norm": 322.018798828125, + "learning_rate": 9.373944421667264e-06, + "loss": 16.3701, + "step": 122450 + }, + { + "epoch": 0.24737694784600653, + "grad_norm": 463.0480651855469, + "learning_rate": 9.37377528710606e-06, + "loss": 33.54, + "step": 122460 + }, + { + "epoch": 0.24739714847869035, + "grad_norm": 196.13043212890625, + "learning_rate": 9.373606131227604e-06, + "loss": 29.395, + "step": 122470 + }, + { + "epoch": 0.24741734911137417, + "grad_norm": 521.9775390625, + "learning_rate": 9.373436954032722e-06, + "loss": 22.9881, + "step": 122480 + }, + { + "epoch": 0.247437549744058, + "grad_norm": 784.3359985351562, + "learning_rate": 9.373267755522239e-06, + "loss": 23.227, + "step": 122490 + }, + { + "epoch": 0.2474577503767418, + "grad_norm": 486.8966064453125, + "learning_rate": 9.37309853569698e-06, + "loss": 14.8526, + "step": 122500 + }, + { + "epoch": 0.24747795100942563, + "grad_norm": 637.5076904296875, + "learning_rate": 9.372929294557768e-06, + "loss": 25.1422, + "step": 122510 + }, + { + "epoch": 0.24749815164210942, + "grad_norm": 1082.929443359375, + "learning_rate": 9.37276003210543e-06, + "loss": 28.4446, + "step": 122520 + }, + { + "epoch": 0.24751835227479324, + "grad_norm": 687.2918701171875, + "learning_rate": 9.37259074834079e-06, + "loss": 25.4553, + "step": 122530 + }, + { + "epoch": 0.24753855290747706, + "grad_norm": 328.588134765625, + "learning_rate": 9.372421443264672e-06, + "loss": 22.607, + "step": 122540 + }, + { + "epoch": 0.24755875354016088, + "grad_norm": 397.86151123046875, + "learning_rate": 9.372252116877904e-06, + "loss": 19.548, + "step": 122550 + }, + { + "epoch": 0.2475789541728447, + "grad_norm": 283.01861572265625, + "learning_rate": 9.372082769181307e-06, + "loss": 27.8385, + "step": 122560 + }, + { + "epoch": 0.24759915480552852, + "grad_norm": 219.72633361816406, + "learning_rate": 9.371913400175711e-06, + "loss": 13.4904, + "step": 122570 + }, + { + "epoch": 0.2476193554382123, + "grad_norm": 226.8003692626953, + "learning_rate": 9.371744009861938e-06, + "loss": 20.0402, + "step": 122580 + }, + { + "epoch": 0.24763955607089613, + "grad_norm": 317.58880615234375, + "learning_rate": 9.371574598240816e-06, + "loss": 16.201, + "step": 122590 + }, + { + "epoch": 0.24765975670357995, + "grad_norm": 123.96427917480469, + "learning_rate": 9.371405165313169e-06, + "loss": 12.6909, + "step": 122600 + }, + { + "epoch": 0.24767995733626377, + "grad_norm": 538.021240234375, + "learning_rate": 9.371235711079824e-06, + "loss": 27.7117, + "step": 122610 + }, + { + "epoch": 0.2477001579689476, + "grad_norm": 649.9598388671875, + "learning_rate": 9.371066235541607e-06, + "loss": 19.2145, + "step": 122620 + }, + { + "epoch": 0.2477203586016314, + "grad_norm": 320.6629943847656, + "learning_rate": 9.37089673869934e-06, + "loss": 30.1592, + "step": 122630 + }, + { + "epoch": 0.2477405592343152, + "grad_norm": 710.37548828125, + "learning_rate": 9.370727220553854e-06, + "loss": 33.0565, + "step": 122640 + }, + { + "epoch": 0.24776075986699903, + "grad_norm": 1021.4251098632812, + "learning_rate": 9.370557681105975e-06, + "loss": 23.931, + "step": 122650 + }, + { + "epoch": 0.24778096049968285, + "grad_norm": 335.6628112792969, + "learning_rate": 9.370388120356527e-06, + "loss": 27.4605, + "step": 122660 + }, + { + "epoch": 0.24780116113236667, + "grad_norm": 286.7644958496094, + "learning_rate": 9.370218538306338e-06, + "loss": 21.0476, + "step": 122670 + }, + { + "epoch": 0.24782136176505049, + "grad_norm": 302.48785400390625, + "learning_rate": 9.370048934956232e-06, + "loss": 26.8166, + "step": 122680 + }, + { + "epoch": 0.2478415623977343, + "grad_norm": 848.8372802734375, + "learning_rate": 9.36987931030704e-06, + "loss": 35.0775, + "step": 122690 + }, + { + "epoch": 0.24786176303041813, + "grad_norm": 293.80426025390625, + "learning_rate": 9.369709664359585e-06, + "loss": 27.3197, + "step": 122700 + }, + { + "epoch": 0.24788196366310192, + "grad_norm": 5.1223015785217285, + "learning_rate": 9.369539997114694e-06, + "loss": 23.6149, + "step": 122710 + }, + { + "epoch": 0.24790216429578574, + "grad_norm": 380.6856384277344, + "learning_rate": 9.369370308573198e-06, + "loss": 17.1666, + "step": 122720 + }, + { + "epoch": 0.24792236492846956, + "grad_norm": 444.42645263671875, + "learning_rate": 9.36920059873592e-06, + "loss": 18.0811, + "step": 122730 + }, + { + "epoch": 0.24794256556115338, + "grad_norm": 275.97589111328125, + "learning_rate": 9.369030867603686e-06, + "loss": 18.1146, + "step": 122740 + }, + { + "epoch": 0.2479627661938372, + "grad_norm": 192.14076232910156, + "learning_rate": 9.368861115177327e-06, + "loss": 20.6096, + "step": 122750 + }, + { + "epoch": 0.24798296682652102, + "grad_norm": 1214.2664794921875, + "learning_rate": 9.36869134145767e-06, + "loss": 37.8635, + "step": 122760 + }, + { + "epoch": 0.2480031674592048, + "grad_norm": 194.01011657714844, + "learning_rate": 9.36852154644554e-06, + "loss": 25.4198, + "step": 122770 + }, + { + "epoch": 0.24802336809188863, + "grad_norm": 304.0776672363281, + "learning_rate": 9.368351730141764e-06, + "loss": 18.8762, + "step": 122780 + }, + { + "epoch": 0.24804356872457245, + "grad_norm": 606.027587890625, + "learning_rate": 9.368181892547174e-06, + "loss": 29.1206, + "step": 122790 + }, + { + "epoch": 0.24806376935725627, + "grad_norm": 583.8778686523438, + "learning_rate": 9.368012033662594e-06, + "loss": 26.1224, + "step": 122800 + }, + { + "epoch": 0.2480839699899401, + "grad_norm": 216.15966796875, + "learning_rate": 9.367842153488853e-06, + "loss": 14.5165, + "step": 122810 + }, + { + "epoch": 0.2481041706226239, + "grad_norm": 140.9263916015625, + "learning_rate": 9.36767225202678e-06, + "loss": 29.1583, + "step": 122820 + }, + { + "epoch": 0.24812437125530773, + "grad_norm": 489.19024658203125, + "learning_rate": 9.367502329277203e-06, + "loss": 16.5625, + "step": 122830 + }, + { + "epoch": 0.24814457188799152, + "grad_norm": 784.4625854492188, + "learning_rate": 9.367332385240949e-06, + "loss": 17.7571, + "step": 122840 + }, + { + "epoch": 0.24816477252067534, + "grad_norm": 1200.139892578125, + "learning_rate": 9.367162419918845e-06, + "loss": 31.9097, + "step": 122850 + }, + { + "epoch": 0.24818497315335916, + "grad_norm": 1665.3638916015625, + "learning_rate": 9.366992433311722e-06, + "loss": 26.635, + "step": 122860 + }, + { + "epoch": 0.24820517378604298, + "grad_norm": 333.7507019042969, + "learning_rate": 9.366822425420407e-06, + "loss": 23.2256, + "step": 122870 + }, + { + "epoch": 0.2482253744187268, + "grad_norm": 670.1807861328125, + "learning_rate": 9.36665239624573e-06, + "loss": 36.5895, + "step": 122880 + }, + { + "epoch": 0.24824557505141062, + "grad_norm": 127.76399993896484, + "learning_rate": 9.366482345788519e-06, + "loss": 25.4427, + "step": 122890 + }, + { + "epoch": 0.24826577568409441, + "grad_norm": 239.83836364746094, + "learning_rate": 9.366312274049602e-06, + "loss": 21.146, + "step": 122900 + }, + { + "epoch": 0.24828597631677823, + "grad_norm": 47.40636444091797, + "learning_rate": 9.366142181029808e-06, + "loss": 17.1439, + "step": 122910 + }, + { + "epoch": 0.24830617694946205, + "grad_norm": 257.0614013671875, + "learning_rate": 9.365972066729967e-06, + "loss": 24.4304, + "step": 122920 + }, + { + "epoch": 0.24832637758214587, + "grad_norm": 262.1944885253906, + "learning_rate": 9.365801931150909e-06, + "loss": 12.9803, + "step": 122930 + }, + { + "epoch": 0.2483465782148297, + "grad_norm": 593.2332763671875, + "learning_rate": 9.36563177429346e-06, + "loss": 23.0006, + "step": 122940 + }, + { + "epoch": 0.24836677884751351, + "grad_norm": 267.6748046875, + "learning_rate": 9.365461596158451e-06, + "loss": 18.7587, + "step": 122950 + }, + { + "epoch": 0.2483869794801973, + "grad_norm": 326.822021484375, + "learning_rate": 9.365291396746714e-06, + "loss": 17.3896, + "step": 122960 + }, + { + "epoch": 0.24840718011288113, + "grad_norm": 348.2276916503906, + "learning_rate": 9.365121176059075e-06, + "loss": 14.8069, + "step": 122970 + }, + { + "epoch": 0.24842738074556495, + "grad_norm": 256.3633117675781, + "learning_rate": 9.364950934096365e-06, + "loss": 15.0468, + "step": 122980 + }, + { + "epoch": 0.24844758137824877, + "grad_norm": 666.3392944335938, + "learning_rate": 9.364780670859412e-06, + "loss": 25.8556, + "step": 122990 + }, + { + "epoch": 0.2484677820109326, + "grad_norm": 477.95025634765625, + "learning_rate": 9.364610386349048e-06, + "loss": 27.4389, + "step": 123000 + }, + { + "epoch": 0.2484879826436164, + "grad_norm": 380.0039367675781, + "learning_rate": 9.364440080566104e-06, + "loss": 16.6239, + "step": 123010 + }, + { + "epoch": 0.24850818327630023, + "grad_norm": 472.1536560058594, + "learning_rate": 9.364269753511407e-06, + "loss": 37.1813, + "step": 123020 + }, + { + "epoch": 0.24852838390898402, + "grad_norm": 291.4718322753906, + "learning_rate": 9.36409940518579e-06, + "loss": 33.4035, + "step": 123030 + }, + { + "epoch": 0.24854858454166784, + "grad_norm": 438.97589111328125, + "learning_rate": 9.363929035590081e-06, + "loss": 27.887, + "step": 123040 + }, + { + "epoch": 0.24856878517435166, + "grad_norm": 222.2158203125, + "learning_rate": 9.36375864472511e-06, + "loss": 39.406, + "step": 123050 + }, + { + "epoch": 0.24858898580703548, + "grad_norm": 177.656005859375, + "learning_rate": 9.363588232591709e-06, + "loss": 37.6204, + "step": 123060 + }, + { + "epoch": 0.2486091864397193, + "grad_norm": 834.9160766601562, + "learning_rate": 9.363417799190708e-06, + "loss": 30.6445, + "step": 123070 + }, + { + "epoch": 0.24862938707240312, + "grad_norm": 207.18080139160156, + "learning_rate": 9.363247344522939e-06, + "loss": 17.49, + "step": 123080 + }, + { + "epoch": 0.2486495877050869, + "grad_norm": 387.3349914550781, + "learning_rate": 9.363076868589232e-06, + "loss": 12.6426, + "step": 123090 + }, + { + "epoch": 0.24866978833777073, + "grad_norm": 316.5187683105469, + "learning_rate": 9.362906371390416e-06, + "loss": 18.9102, + "step": 123100 + }, + { + "epoch": 0.24868998897045455, + "grad_norm": 1221.362060546875, + "learning_rate": 9.362735852927324e-06, + "loss": 41.7353, + "step": 123110 + }, + { + "epoch": 0.24871018960313837, + "grad_norm": 835.676025390625, + "learning_rate": 9.362565313200786e-06, + "loss": 30.4273, + "step": 123120 + }, + { + "epoch": 0.2487303902358222, + "grad_norm": 370.47784423828125, + "learning_rate": 9.362394752211636e-06, + "loss": 8.4214, + "step": 123130 + }, + { + "epoch": 0.248750590868506, + "grad_norm": 692.7476806640625, + "learning_rate": 9.3622241699607e-06, + "loss": 20.3629, + "step": 123140 + }, + { + "epoch": 0.24877079150118983, + "grad_norm": 908.5383911132812, + "learning_rate": 9.362053566448816e-06, + "loss": 24.5112, + "step": 123150 + }, + { + "epoch": 0.24879099213387362, + "grad_norm": 200.86033630371094, + "learning_rate": 9.36188294167681e-06, + "loss": 15.0903, + "step": 123160 + }, + { + "epoch": 0.24881119276655744, + "grad_norm": 141.50022888183594, + "learning_rate": 9.361712295645515e-06, + "loss": 20.2606, + "step": 123170 + }, + { + "epoch": 0.24883139339924126, + "grad_norm": 587.0171508789062, + "learning_rate": 9.361541628355763e-06, + "loss": 25.998, + "step": 123180 + }, + { + "epoch": 0.24885159403192508, + "grad_norm": 357.0301818847656, + "learning_rate": 9.361370939808387e-06, + "loss": 12.2195, + "step": 123190 + }, + { + "epoch": 0.2488717946646089, + "grad_norm": 718.8137817382812, + "learning_rate": 9.361200230004219e-06, + "loss": 34.9343, + "step": 123200 + }, + { + "epoch": 0.24889199529729272, + "grad_norm": 170.9770965576172, + "learning_rate": 9.36102949894409e-06, + "loss": 13.2904, + "step": 123210 + }, + { + "epoch": 0.24891219592997652, + "grad_norm": 665.4484252929688, + "learning_rate": 9.36085874662883e-06, + "loss": 26.2902, + "step": 123220 + }, + { + "epoch": 0.24893239656266034, + "grad_norm": 572.6903076171875, + "learning_rate": 9.360687973059274e-06, + "loss": 29.4998, + "step": 123230 + }, + { + "epoch": 0.24895259719534416, + "grad_norm": 405.1957092285156, + "learning_rate": 9.360517178236255e-06, + "loss": 24.1565, + "step": 123240 + }, + { + "epoch": 0.24897279782802798, + "grad_norm": 488.4898376464844, + "learning_rate": 9.360346362160604e-06, + "loss": 21.1325, + "step": 123250 + }, + { + "epoch": 0.2489929984607118, + "grad_norm": 320.693603515625, + "learning_rate": 9.360175524833153e-06, + "loss": 34.7406, + "step": 123260 + }, + { + "epoch": 0.24901319909339562, + "grad_norm": 60.05967330932617, + "learning_rate": 9.360004666254735e-06, + "loss": 30.0965, + "step": 123270 + }, + { + "epoch": 0.2490333997260794, + "grad_norm": 175.448486328125, + "learning_rate": 9.359833786426183e-06, + "loss": 15.8951, + "step": 123280 + }, + { + "epoch": 0.24905360035876323, + "grad_norm": 278.36834716796875, + "learning_rate": 9.35966288534833e-06, + "loss": 12.8098, + "step": 123290 + }, + { + "epoch": 0.24907380099144705, + "grad_norm": 524.27392578125, + "learning_rate": 9.35949196302201e-06, + "loss": 25.2071, + "step": 123300 + }, + { + "epoch": 0.24909400162413087, + "grad_norm": 353.5171203613281, + "learning_rate": 9.359321019448054e-06, + "loss": 18.8964, + "step": 123310 + }, + { + "epoch": 0.2491142022568147, + "grad_norm": 693.8262939453125, + "learning_rate": 9.359150054627298e-06, + "loss": 17.0254, + "step": 123320 + }, + { + "epoch": 0.2491344028894985, + "grad_norm": 403.408935546875, + "learning_rate": 9.35897906856057e-06, + "loss": 11.0137, + "step": 123330 + }, + { + "epoch": 0.24915460352218233, + "grad_norm": 1126.0416259765625, + "learning_rate": 9.358808061248708e-06, + "loss": 32.328, + "step": 123340 + }, + { + "epoch": 0.24917480415486612, + "grad_norm": 249.3668670654297, + "learning_rate": 9.358637032692546e-06, + "loss": 12.516, + "step": 123350 + }, + { + "epoch": 0.24919500478754994, + "grad_norm": 617.3007202148438, + "learning_rate": 9.358465982892913e-06, + "loss": 23.9673, + "step": 123360 + }, + { + "epoch": 0.24921520542023376, + "grad_norm": 24.10763931274414, + "learning_rate": 9.358294911850648e-06, + "loss": 10.9903, + "step": 123370 + }, + { + "epoch": 0.24923540605291758, + "grad_norm": 424.51885986328125, + "learning_rate": 9.35812381956658e-06, + "loss": 10.8367, + "step": 123380 + }, + { + "epoch": 0.2492556066856014, + "grad_norm": 338.3468322753906, + "learning_rate": 9.357952706041545e-06, + "loss": 47.2815, + "step": 123390 + }, + { + "epoch": 0.24927580731828522, + "grad_norm": 124.84664154052734, + "learning_rate": 9.357781571276379e-06, + "loss": 34.763, + "step": 123400 + }, + { + "epoch": 0.249296007950969, + "grad_norm": 654.6494750976562, + "learning_rate": 9.357610415271913e-06, + "loss": 26.2736, + "step": 123410 + }, + { + "epoch": 0.24931620858365283, + "grad_norm": 230.8025360107422, + "learning_rate": 9.357439238028982e-06, + "loss": 24.442, + "step": 123420 + }, + { + "epoch": 0.24933640921633665, + "grad_norm": 689.7447509765625, + "learning_rate": 9.357268039548422e-06, + "loss": 25.5089, + "step": 123430 + }, + { + "epoch": 0.24935660984902047, + "grad_norm": 71.48304748535156, + "learning_rate": 9.357096819831065e-06, + "loss": 14.6578, + "step": 123440 + }, + { + "epoch": 0.2493768104817043, + "grad_norm": 1084.497802734375, + "learning_rate": 9.356925578877748e-06, + "loss": 25.6389, + "step": 123450 + }, + { + "epoch": 0.2493970111143881, + "grad_norm": 68.59516143798828, + "learning_rate": 9.3567543166893e-06, + "loss": 28.2204, + "step": 123460 + }, + { + "epoch": 0.24941721174707193, + "grad_norm": 457.4917297363281, + "learning_rate": 9.356583033266565e-06, + "loss": 27.1434, + "step": 123470 + }, + { + "epoch": 0.24943741237975572, + "grad_norm": 414.35943603515625, + "learning_rate": 9.356411728610368e-06, + "loss": 22.7297, + "step": 123480 + }, + { + "epoch": 0.24945761301243954, + "grad_norm": 540.7085571289062, + "learning_rate": 9.356240402721552e-06, + "loss": 25.5983, + "step": 123490 + }, + { + "epoch": 0.24947781364512336, + "grad_norm": 24.585803985595703, + "learning_rate": 9.356069055600949e-06, + "loss": 20.0636, + "step": 123500 + }, + { + "epoch": 0.24949801427780718, + "grad_norm": 492.99542236328125, + "learning_rate": 9.35589768724939e-06, + "loss": 25.6995, + "step": 123510 + }, + { + "epoch": 0.249518214910491, + "grad_norm": 631.908447265625, + "learning_rate": 9.355726297667717e-06, + "loss": 34.1041, + "step": 123520 + }, + { + "epoch": 0.24953841554317482, + "grad_norm": 354.6398620605469, + "learning_rate": 9.355554886856762e-06, + "loss": 20.5488, + "step": 123530 + }, + { + "epoch": 0.24955861617585862, + "grad_norm": 272.0207214355469, + "learning_rate": 9.355383454817362e-06, + "loss": 19.0143, + "step": 123540 + }, + { + "epoch": 0.24957881680854244, + "grad_norm": 254.9385528564453, + "learning_rate": 9.355212001550349e-06, + "loss": 32.9845, + "step": 123550 + }, + { + "epoch": 0.24959901744122626, + "grad_norm": 309.18115234375, + "learning_rate": 9.35504052705656e-06, + "loss": 25.1024, + "step": 123560 + }, + { + "epoch": 0.24961921807391008, + "grad_norm": 370.5217590332031, + "learning_rate": 9.354869031336835e-06, + "loss": 22.137, + "step": 123570 + }, + { + "epoch": 0.2496394187065939, + "grad_norm": 237.80308532714844, + "learning_rate": 9.354697514392005e-06, + "loss": 37.1087, + "step": 123580 + }, + { + "epoch": 0.24965961933927772, + "grad_norm": 332.1741027832031, + "learning_rate": 9.354525976222907e-06, + "loss": 23.1131, + "step": 123590 + }, + { + "epoch": 0.2496798199719615, + "grad_norm": 216.78390502929688, + "learning_rate": 9.354354416830377e-06, + "loss": 32.0582, + "step": 123600 + }, + { + "epoch": 0.24970002060464533, + "grad_norm": 353.84637451171875, + "learning_rate": 9.354182836215252e-06, + "loss": 20.9798, + "step": 123610 + }, + { + "epoch": 0.24972022123732915, + "grad_norm": 361.56280517578125, + "learning_rate": 9.35401123437837e-06, + "loss": 28.2848, + "step": 123620 + }, + { + "epoch": 0.24974042187001297, + "grad_norm": 722.0396118164062, + "learning_rate": 9.353839611320563e-06, + "loss": 35.3713, + "step": 123630 + }, + { + "epoch": 0.2497606225026968, + "grad_norm": 759.531982421875, + "learning_rate": 9.35366796704267e-06, + "loss": 68.8292, + "step": 123640 + }, + { + "epoch": 0.2497808231353806, + "grad_norm": 540.5938720703125, + "learning_rate": 9.353496301545529e-06, + "loss": 36.0932, + "step": 123650 + }, + { + "epoch": 0.24980102376806443, + "grad_norm": 1160.757080078125, + "learning_rate": 9.353324614829974e-06, + "loss": 30.2268, + "step": 123660 + }, + { + "epoch": 0.24982122440074822, + "grad_norm": 377.4170837402344, + "learning_rate": 9.353152906896842e-06, + "loss": 10.1157, + "step": 123670 + }, + { + "epoch": 0.24984142503343204, + "grad_norm": 586.1427001953125, + "learning_rate": 9.352981177746972e-06, + "loss": 30.9146, + "step": 123680 + }, + { + "epoch": 0.24986162566611586, + "grad_norm": 490.35498046875, + "learning_rate": 9.3528094273812e-06, + "loss": 16.9506, + "step": 123690 + }, + { + "epoch": 0.24988182629879968, + "grad_norm": 178.823974609375, + "learning_rate": 9.352637655800362e-06, + "loss": 7.6823, + "step": 123700 + }, + { + "epoch": 0.2499020269314835, + "grad_norm": 260.7527770996094, + "learning_rate": 9.352465863005295e-06, + "loss": 27.2347, + "step": 123710 + }, + { + "epoch": 0.24992222756416732, + "grad_norm": 626.5932006835938, + "learning_rate": 9.35229404899684e-06, + "loss": 21.2371, + "step": 123720 + }, + { + "epoch": 0.2499424281968511, + "grad_norm": 637.3138427734375, + "learning_rate": 9.352122213775829e-06, + "loss": 27.2617, + "step": 123730 + }, + { + "epoch": 0.24996262882953493, + "grad_norm": 320.4120178222656, + "learning_rate": 9.351950357343103e-06, + "loss": 6.5722, + "step": 123740 + }, + { + "epoch": 0.24998282946221875, + "grad_norm": 1096.346435546875, + "learning_rate": 9.351778479699499e-06, + "loss": 36.8681, + "step": 123750 + }, + { + "epoch": 0.25000303009490255, + "grad_norm": 263.7565002441406, + "learning_rate": 9.351606580845854e-06, + "loss": 15.5306, + "step": 123760 + }, + { + "epoch": 0.25002323072758637, + "grad_norm": 611.5940551757812, + "learning_rate": 9.351434660783007e-06, + "loss": 17.9882, + "step": 123770 + }, + { + "epoch": 0.2500434313602702, + "grad_norm": 200.8699493408203, + "learning_rate": 9.351262719511796e-06, + "loss": 11.1526, + "step": 123780 + }, + { + "epoch": 0.250063631992954, + "grad_norm": 279.4588928222656, + "learning_rate": 9.351090757033056e-06, + "loss": 21.3229, + "step": 123790 + }, + { + "epoch": 0.2500838326256378, + "grad_norm": 146.48106384277344, + "learning_rate": 9.35091877334763e-06, + "loss": 30.026, + "step": 123800 + }, + { + "epoch": 0.25010403325832165, + "grad_norm": 462.09674072265625, + "learning_rate": 9.350746768456351e-06, + "loss": 17.7745, + "step": 123810 + }, + { + "epoch": 0.25012423389100547, + "grad_norm": 535.4681396484375, + "learning_rate": 9.350574742360062e-06, + "loss": 21.889, + "step": 123820 + }, + { + "epoch": 0.2501444345236893, + "grad_norm": 523.9403686523438, + "learning_rate": 9.350402695059597e-06, + "loss": 28.2628, + "step": 123830 + }, + { + "epoch": 0.2501646351563731, + "grad_norm": 716.45166015625, + "learning_rate": 9.3502306265558e-06, + "loss": 36.8315, + "step": 123840 + }, + { + "epoch": 0.2501848357890569, + "grad_norm": 176.59716796875, + "learning_rate": 9.350058536849505e-06, + "loss": 14.8663, + "step": 123850 + }, + { + "epoch": 0.25020503642174075, + "grad_norm": 352.4153137207031, + "learning_rate": 9.349886425941553e-06, + "loss": 16.6689, + "step": 123860 + }, + { + "epoch": 0.25022523705442457, + "grad_norm": 814.4611206054688, + "learning_rate": 9.34971429383278e-06, + "loss": 49.609, + "step": 123870 + }, + { + "epoch": 0.2502454376871084, + "grad_norm": 696.630859375, + "learning_rate": 9.349542140524029e-06, + "loss": 31.2652, + "step": 123880 + }, + { + "epoch": 0.25026563831979215, + "grad_norm": 253.35569763183594, + "learning_rate": 9.349369966016135e-06, + "loss": 19.0699, + "step": 123890 + }, + { + "epoch": 0.25028583895247597, + "grad_norm": 776.6314086914062, + "learning_rate": 9.349197770309942e-06, + "loss": 22.1644, + "step": 123900 + }, + { + "epoch": 0.2503060395851598, + "grad_norm": 338.4505310058594, + "learning_rate": 9.349025553406286e-06, + "loss": 18.2984, + "step": 123910 + }, + { + "epoch": 0.2503262402178436, + "grad_norm": 505.5835876464844, + "learning_rate": 9.348853315306006e-06, + "loss": 16.0944, + "step": 123920 + }, + { + "epoch": 0.25034644085052743, + "grad_norm": 213.57562255859375, + "learning_rate": 9.348681056009942e-06, + "loss": 17.9506, + "step": 123930 + }, + { + "epoch": 0.25036664148321125, + "grad_norm": 196.11744689941406, + "learning_rate": 9.348508775518935e-06, + "loss": 13.4443, + "step": 123940 + }, + { + "epoch": 0.25038684211589507, + "grad_norm": 486.9748229980469, + "learning_rate": 9.348336473833824e-06, + "loss": 21.9713, + "step": 123950 + }, + { + "epoch": 0.2504070427485789, + "grad_norm": 202.58990478515625, + "learning_rate": 9.348164150955448e-06, + "loss": 18.8582, + "step": 123960 + }, + { + "epoch": 0.2504272433812627, + "grad_norm": 446.57952880859375, + "learning_rate": 9.347991806884646e-06, + "loss": 19.8669, + "step": 123970 + }, + { + "epoch": 0.25044744401394653, + "grad_norm": 685.4554443359375, + "learning_rate": 9.347819441622261e-06, + "loss": 8.7122, + "step": 123980 + }, + { + "epoch": 0.25046764464663035, + "grad_norm": 937.5147094726562, + "learning_rate": 9.347647055169132e-06, + "loss": 33.5275, + "step": 123990 + }, + { + "epoch": 0.25048784527931417, + "grad_norm": 887.2894897460938, + "learning_rate": 9.347474647526095e-06, + "loss": 24.8595, + "step": 124000 + }, + { + "epoch": 0.250508045911998, + "grad_norm": 0.0, + "learning_rate": 9.347302218693997e-06, + "loss": 18.8776, + "step": 124010 + }, + { + "epoch": 0.25052824654468175, + "grad_norm": 3751.490478515625, + "learning_rate": 9.347129768673675e-06, + "loss": 37.2533, + "step": 124020 + }, + { + "epoch": 0.2505484471773656, + "grad_norm": 355.5738525390625, + "learning_rate": 9.346957297465968e-06, + "loss": 15.4733, + "step": 124030 + }, + { + "epoch": 0.2505686478100494, + "grad_norm": 49.08369445800781, + "learning_rate": 9.34678480507172e-06, + "loss": 20.9675, + "step": 124040 + }, + { + "epoch": 0.2505888484427332, + "grad_norm": 257.855224609375, + "learning_rate": 9.34661229149177e-06, + "loss": 11.4117, + "step": 124050 + }, + { + "epoch": 0.25060904907541703, + "grad_norm": 481.6152038574219, + "learning_rate": 9.346439756726959e-06, + "loss": 29.9393, + "step": 124060 + }, + { + "epoch": 0.25062924970810085, + "grad_norm": 562.333984375, + "learning_rate": 9.346267200778127e-06, + "loss": 25.6238, + "step": 124070 + }, + { + "epoch": 0.2506494503407847, + "grad_norm": 262.5706787109375, + "learning_rate": 9.346094623646116e-06, + "loss": 36.1854, + "step": 124080 + }, + { + "epoch": 0.2506696509734685, + "grad_norm": 798.1765747070312, + "learning_rate": 9.345922025331765e-06, + "loss": 18.6843, + "step": 124090 + }, + { + "epoch": 0.2506898516061523, + "grad_norm": 656.6163940429688, + "learning_rate": 9.34574940583592e-06, + "loss": 50.7797, + "step": 124100 + }, + { + "epoch": 0.25071005223883613, + "grad_norm": 737.9656372070312, + "learning_rate": 9.345576765159419e-06, + "loss": 14.9178, + "step": 124110 + }, + { + "epoch": 0.25073025287151995, + "grad_norm": 159.78631591796875, + "learning_rate": 9.345404103303104e-06, + "loss": 20.8397, + "step": 124120 + }, + { + "epoch": 0.2507504535042038, + "grad_norm": 250.57003784179688, + "learning_rate": 9.345231420267816e-06, + "loss": 30.9404, + "step": 124130 + }, + { + "epoch": 0.2507706541368876, + "grad_norm": 323.52801513671875, + "learning_rate": 9.345058716054396e-06, + "loss": 29.3399, + "step": 124140 + }, + { + "epoch": 0.25079085476957136, + "grad_norm": 485.9049377441406, + "learning_rate": 9.344885990663689e-06, + "loss": 18.454, + "step": 124150 + }, + { + "epoch": 0.2508110554022552, + "grad_norm": 185.63893127441406, + "learning_rate": 9.344713244096533e-06, + "loss": 28.1718, + "step": 124160 + }, + { + "epoch": 0.250831256034939, + "grad_norm": 491.8743591308594, + "learning_rate": 9.344540476353772e-06, + "loss": 27.9859, + "step": 124170 + }, + { + "epoch": 0.2508514566676228, + "grad_norm": 513.3983764648438, + "learning_rate": 9.344367687436246e-06, + "loss": 17.946, + "step": 124180 + }, + { + "epoch": 0.25087165730030664, + "grad_norm": 173.89114379882812, + "learning_rate": 9.344194877344802e-06, + "loss": 16.4617, + "step": 124190 + }, + { + "epoch": 0.25089185793299046, + "grad_norm": 221.052001953125, + "learning_rate": 9.344022046080277e-06, + "loss": 12.404, + "step": 124200 + }, + { + "epoch": 0.2509120585656743, + "grad_norm": 184.27113342285156, + "learning_rate": 9.343849193643517e-06, + "loss": 25.8977, + "step": 124210 + }, + { + "epoch": 0.2509322591983581, + "grad_norm": 214.9903564453125, + "learning_rate": 9.343676320035362e-06, + "loss": 15.2502, + "step": 124220 + }, + { + "epoch": 0.2509524598310419, + "grad_norm": 739.7628784179688, + "learning_rate": 9.343503425256655e-06, + "loss": 29.346, + "step": 124230 + }, + { + "epoch": 0.25097266046372574, + "grad_norm": 279.92120361328125, + "learning_rate": 9.343330509308239e-06, + "loss": 18.9458, + "step": 124240 + }, + { + "epoch": 0.25099286109640956, + "grad_norm": 189.4911346435547, + "learning_rate": 9.343157572190957e-06, + "loss": 20.5485, + "step": 124250 + }, + { + "epoch": 0.2510130617290934, + "grad_norm": 346.7785339355469, + "learning_rate": 9.342984613905653e-06, + "loss": 23.0963, + "step": 124260 + }, + { + "epoch": 0.2510332623617772, + "grad_norm": 107.39372253417969, + "learning_rate": 9.342811634453168e-06, + "loss": 10.2146, + "step": 124270 + }, + { + "epoch": 0.25105346299446096, + "grad_norm": 411.5723571777344, + "learning_rate": 9.342638633834344e-06, + "loss": 17.3026, + "step": 124280 + }, + { + "epoch": 0.2510736636271448, + "grad_norm": 599.7190551757812, + "learning_rate": 9.342465612050028e-06, + "loss": 21.0134, + "step": 124290 + }, + { + "epoch": 0.2510938642598286, + "grad_norm": 708.0547485351562, + "learning_rate": 9.342292569101061e-06, + "loss": 30.2688, + "step": 124300 + }, + { + "epoch": 0.2511140648925124, + "grad_norm": 707.1171264648438, + "learning_rate": 9.342119504988287e-06, + "loss": 29.5308, + "step": 124310 + }, + { + "epoch": 0.25113426552519624, + "grad_norm": 379.90093994140625, + "learning_rate": 9.341946419712549e-06, + "loss": 23.1947, + "step": 124320 + }, + { + "epoch": 0.25115446615788006, + "grad_norm": 552.807861328125, + "learning_rate": 9.341773313274689e-06, + "loss": 12.9583, + "step": 124330 + }, + { + "epoch": 0.2511746667905639, + "grad_norm": 649.8052368164062, + "learning_rate": 9.341600185675555e-06, + "loss": 31.7203, + "step": 124340 + }, + { + "epoch": 0.2511948674232477, + "grad_norm": 326.8381042480469, + "learning_rate": 9.341427036915987e-06, + "loss": 29.4605, + "step": 124350 + }, + { + "epoch": 0.2512150680559315, + "grad_norm": 362.11376953125, + "learning_rate": 9.34125386699683e-06, + "loss": 21.7367, + "step": 124360 + }, + { + "epoch": 0.25123526868861534, + "grad_norm": 742.8120727539062, + "learning_rate": 9.341080675918927e-06, + "loss": 14.9553, + "step": 124370 + }, + { + "epoch": 0.25125546932129916, + "grad_norm": 259.9765930175781, + "learning_rate": 9.340907463683126e-06, + "loss": 33.5456, + "step": 124380 + }, + { + "epoch": 0.251275669953983, + "grad_norm": 598.995849609375, + "learning_rate": 9.340734230290267e-06, + "loss": 28.3483, + "step": 124390 + }, + { + "epoch": 0.25129587058666675, + "grad_norm": 745.3855590820312, + "learning_rate": 9.340560975741198e-06, + "loss": 24.1055, + "step": 124400 + }, + { + "epoch": 0.25131607121935057, + "grad_norm": 18.715286254882812, + "learning_rate": 9.340387700036758e-06, + "loss": 20.7292, + "step": 124410 + }, + { + "epoch": 0.2513362718520344, + "grad_norm": 491.3661804199219, + "learning_rate": 9.340214403177797e-06, + "loss": 20.2669, + "step": 124420 + }, + { + "epoch": 0.2513564724847182, + "grad_norm": 192.1144256591797, + "learning_rate": 9.340041085165157e-06, + "loss": 20.6693, + "step": 124430 + }, + { + "epoch": 0.251376673117402, + "grad_norm": 286.21240234375, + "learning_rate": 9.339867745999682e-06, + "loss": 18.1965, + "step": 124440 + }, + { + "epoch": 0.25139687375008585, + "grad_norm": 494.5196228027344, + "learning_rate": 9.339694385682219e-06, + "loss": 12.9852, + "step": 124450 + }, + { + "epoch": 0.25141707438276967, + "grad_norm": 502.58599853515625, + "learning_rate": 9.339521004213611e-06, + "loss": 18.5937, + "step": 124460 + }, + { + "epoch": 0.2514372750154535, + "grad_norm": 535.0, + "learning_rate": 9.339347601594704e-06, + "loss": 25.7947, + "step": 124470 + }, + { + "epoch": 0.2514574756481373, + "grad_norm": 841.1640014648438, + "learning_rate": 9.339174177826345e-06, + "loss": 15.6669, + "step": 124480 + }, + { + "epoch": 0.2514776762808211, + "grad_norm": 568.3220825195312, + "learning_rate": 9.339000732909376e-06, + "loss": 17.6957, + "step": 124490 + }, + { + "epoch": 0.25149787691350495, + "grad_norm": 348.40625, + "learning_rate": 9.338827266844643e-06, + "loss": 22.942, + "step": 124500 + }, + { + "epoch": 0.25151807754618877, + "grad_norm": 431.1079406738281, + "learning_rate": 9.338653779632993e-06, + "loss": 18.2831, + "step": 124510 + }, + { + "epoch": 0.2515382781788726, + "grad_norm": 1128.4862060546875, + "learning_rate": 9.33848027127527e-06, + "loss": 31.4034, + "step": 124520 + }, + { + "epoch": 0.25155847881155635, + "grad_norm": 49.614112854003906, + "learning_rate": 9.33830674177232e-06, + "loss": 14.1368, + "step": 124530 + }, + { + "epoch": 0.25157867944424017, + "grad_norm": 329.799560546875, + "learning_rate": 9.338133191124992e-06, + "loss": 52.1327, + "step": 124540 + }, + { + "epoch": 0.251598880076924, + "grad_norm": 521.410888671875, + "learning_rate": 9.337959619334125e-06, + "loss": 25.7208, + "step": 124550 + }, + { + "epoch": 0.2516190807096078, + "grad_norm": 449.039794921875, + "learning_rate": 9.337786026400573e-06, + "loss": 20.2129, + "step": 124560 + }, + { + "epoch": 0.25163928134229163, + "grad_norm": 72.53112030029297, + "learning_rate": 9.337612412325174e-06, + "loss": 28.8025, + "step": 124570 + }, + { + "epoch": 0.25165948197497545, + "grad_norm": 627.7360229492188, + "learning_rate": 9.33743877710878e-06, + "loss": 21.4003, + "step": 124580 + }, + { + "epoch": 0.25167968260765927, + "grad_norm": 595.9915771484375, + "learning_rate": 9.337265120752236e-06, + "loss": 25.925, + "step": 124590 + }, + { + "epoch": 0.2516998832403431, + "grad_norm": 402.636474609375, + "learning_rate": 9.337091443256388e-06, + "loss": 13.7396, + "step": 124600 + }, + { + "epoch": 0.2517200838730269, + "grad_norm": 628.917724609375, + "learning_rate": 9.336917744622083e-06, + "loss": 26.3352, + "step": 124610 + }, + { + "epoch": 0.25174028450571073, + "grad_norm": 202.30224609375, + "learning_rate": 9.336744024850165e-06, + "loss": 28.0125, + "step": 124620 + }, + { + "epoch": 0.25176048513839455, + "grad_norm": 97.6500473022461, + "learning_rate": 9.336570283941483e-06, + "loss": 19.9929, + "step": 124630 + }, + { + "epoch": 0.25178068577107837, + "grad_norm": 143.10940551757812, + "learning_rate": 9.336396521896884e-06, + "loss": 19.2747, + "step": 124640 + }, + { + "epoch": 0.2518008864037622, + "grad_norm": 663.7200317382812, + "learning_rate": 9.336222738717214e-06, + "loss": 22.4514, + "step": 124650 + }, + { + "epoch": 0.25182108703644596, + "grad_norm": 553.7361450195312, + "learning_rate": 9.33604893440332e-06, + "loss": 23.7986, + "step": 124660 + }, + { + "epoch": 0.2518412876691298, + "grad_norm": 317.4814758300781, + "learning_rate": 9.33587510895605e-06, + "loss": 30.1541, + "step": 124670 + }, + { + "epoch": 0.2518614883018136, + "grad_norm": 479.4104919433594, + "learning_rate": 9.335701262376249e-06, + "loss": 38.8416, + "step": 124680 + }, + { + "epoch": 0.2518816889344974, + "grad_norm": 298.1420593261719, + "learning_rate": 9.335527394664767e-06, + "loss": 29.4055, + "step": 124690 + }, + { + "epoch": 0.25190188956718124, + "grad_norm": 212.33372497558594, + "learning_rate": 9.33535350582245e-06, + "loss": 12.1949, + "step": 124700 + }, + { + "epoch": 0.25192209019986506, + "grad_norm": 507.652099609375, + "learning_rate": 9.335179595850147e-06, + "loss": 34.6706, + "step": 124710 + }, + { + "epoch": 0.2519422908325489, + "grad_norm": 91.93364715576172, + "learning_rate": 9.335005664748704e-06, + "loss": 15.2404, + "step": 124720 + }, + { + "epoch": 0.2519624914652327, + "grad_norm": 547.1591186523438, + "learning_rate": 9.33483171251897e-06, + "loss": 21.646, + "step": 124730 + }, + { + "epoch": 0.2519826920979165, + "grad_norm": 287.1502380371094, + "learning_rate": 9.33465773916179e-06, + "loss": 15.2263, + "step": 124740 + }, + { + "epoch": 0.25200289273060034, + "grad_norm": 267.3254699707031, + "learning_rate": 9.334483744678015e-06, + "loss": 14.3156, + "step": 124750 + }, + { + "epoch": 0.25202309336328416, + "grad_norm": 462.9267883300781, + "learning_rate": 9.33430972906849e-06, + "loss": 22.0699, + "step": 124760 + }, + { + "epoch": 0.252043293995968, + "grad_norm": 1060.0771484375, + "learning_rate": 9.334135692334067e-06, + "loss": 29.6059, + "step": 124770 + }, + { + "epoch": 0.2520634946286518, + "grad_norm": 1192.935791015625, + "learning_rate": 9.333961634475593e-06, + "loss": 31.8512, + "step": 124780 + }, + { + "epoch": 0.25208369526133556, + "grad_norm": 671.791748046875, + "learning_rate": 9.333787555493915e-06, + "loss": 20.1712, + "step": 124790 + }, + { + "epoch": 0.2521038958940194, + "grad_norm": 606.4688720703125, + "learning_rate": 9.333613455389883e-06, + "loss": 14.9991, + "step": 124800 + }, + { + "epoch": 0.2521240965267032, + "grad_norm": 583.3821411132812, + "learning_rate": 9.333439334164343e-06, + "loss": 18.1324, + "step": 124810 + }, + { + "epoch": 0.252144297159387, + "grad_norm": 616.3452758789062, + "learning_rate": 9.333265191818145e-06, + "loss": 25.5548, + "step": 124820 + }, + { + "epoch": 0.25216449779207084, + "grad_norm": 96.70596313476562, + "learning_rate": 9.333091028352138e-06, + "loss": 53.8161, + "step": 124830 + }, + { + "epoch": 0.25218469842475466, + "grad_norm": 740.4271240234375, + "learning_rate": 9.332916843767173e-06, + "loss": 23.7683, + "step": 124840 + }, + { + "epoch": 0.2522048990574385, + "grad_norm": 479.810302734375, + "learning_rate": 9.332742638064094e-06, + "loss": 27.6112, + "step": 124850 + }, + { + "epoch": 0.2522250996901223, + "grad_norm": 184.2608184814453, + "learning_rate": 9.332568411243754e-06, + "loss": 19.7329, + "step": 124860 + }, + { + "epoch": 0.2522453003228061, + "grad_norm": 362.44598388671875, + "learning_rate": 9.332394163307003e-06, + "loss": 27.9859, + "step": 124870 + }, + { + "epoch": 0.25226550095548994, + "grad_norm": 198.32406616210938, + "learning_rate": 9.332219894254686e-06, + "loss": 19.4701, + "step": 124880 + }, + { + "epoch": 0.25228570158817376, + "grad_norm": 43.71586227416992, + "learning_rate": 9.332045604087656e-06, + "loss": 21.2054, + "step": 124890 + }, + { + "epoch": 0.2523059022208576, + "grad_norm": 165.63328552246094, + "learning_rate": 9.33187129280676e-06, + "loss": 11.1213, + "step": 124900 + }, + { + "epoch": 0.25232610285354135, + "grad_norm": 0.5600117444992065, + "learning_rate": 9.331696960412849e-06, + "loss": 23.7717, + "step": 124910 + }, + { + "epoch": 0.25234630348622517, + "grad_norm": 377.10498046875, + "learning_rate": 9.331522606906773e-06, + "loss": 16.4109, + "step": 124920 + }, + { + "epoch": 0.252366504118909, + "grad_norm": 45.88465118408203, + "learning_rate": 9.331348232289382e-06, + "loss": 15.0914, + "step": 124930 + }, + { + "epoch": 0.2523867047515928, + "grad_norm": 474.8992919921875, + "learning_rate": 9.331173836561522e-06, + "loss": 51.2465, + "step": 124940 + }, + { + "epoch": 0.2524069053842766, + "grad_norm": 520.2603759765625, + "learning_rate": 9.330999419724048e-06, + "loss": 30.8441, + "step": 124950 + }, + { + "epoch": 0.25242710601696045, + "grad_norm": 631.8528442382812, + "learning_rate": 9.330824981777808e-06, + "loss": 20.9879, + "step": 124960 + }, + { + "epoch": 0.25244730664964427, + "grad_norm": 455.13140869140625, + "learning_rate": 9.330650522723653e-06, + "loss": 29.3983, + "step": 124970 + }, + { + "epoch": 0.2524675072823281, + "grad_norm": 847.7002563476562, + "learning_rate": 9.33047604256243e-06, + "loss": 30.7107, + "step": 124980 + }, + { + "epoch": 0.2524877079150119, + "grad_norm": 339.4563903808594, + "learning_rate": 9.330301541294994e-06, + "loss": 33.6187, + "step": 124990 + }, + { + "epoch": 0.2525079085476957, + "grad_norm": 318.11883544921875, + "learning_rate": 9.330127018922195e-06, + "loss": 17.0702, + "step": 125000 + }, + { + "epoch": 0.25252810918037955, + "grad_norm": 336.95672607421875, + "learning_rate": 9.32995247544488e-06, + "loss": 23.9262, + "step": 125010 + }, + { + "epoch": 0.25254830981306337, + "grad_norm": 269.0525817871094, + "learning_rate": 9.329777910863902e-06, + "loss": 13.5515, + "step": 125020 + }, + { + "epoch": 0.2525685104457472, + "grad_norm": 295.6583251953125, + "learning_rate": 9.32960332518011e-06, + "loss": 23.0723, + "step": 125030 + }, + { + "epoch": 0.25258871107843095, + "grad_norm": 90.92164611816406, + "learning_rate": 9.32942871839436e-06, + "loss": 15.5435, + "step": 125040 + }, + { + "epoch": 0.25260891171111477, + "grad_norm": 1134.6337890625, + "learning_rate": 9.329254090507498e-06, + "loss": 18.1013, + "step": 125050 + }, + { + "epoch": 0.2526291123437986, + "grad_norm": 399.2601318359375, + "learning_rate": 9.329079441520377e-06, + "loss": 28.3305, + "step": 125060 + }, + { + "epoch": 0.2526493129764824, + "grad_norm": 131.95816040039062, + "learning_rate": 9.328904771433846e-06, + "loss": 51.0927, + "step": 125070 + }, + { + "epoch": 0.25266951360916623, + "grad_norm": 6.422798156738281, + "learning_rate": 9.32873008024876e-06, + "loss": 15.6498, + "step": 125080 + }, + { + "epoch": 0.25268971424185005, + "grad_norm": 252.98773193359375, + "learning_rate": 9.328555367965969e-06, + "loss": 16.4012, + "step": 125090 + }, + { + "epoch": 0.25270991487453387, + "grad_norm": 435.1174011230469, + "learning_rate": 9.328380634586322e-06, + "loss": 45.6191, + "step": 125100 + }, + { + "epoch": 0.2527301155072177, + "grad_norm": 405.8626708984375, + "learning_rate": 9.328205880110675e-06, + "loss": 27.0458, + "step": 125110 + }, + { + "epoch": 0.2527503161399015, + "grad_norm": 456.6902160644531, + "learning_rate": 9.328031104539876e-06, + "loss": 40.6759, + "step": 125120 + }, + { + "epoch": 0.25277051677258533, + "grad_norm": 5385.3447265625, + "learning_rate": 9.32785630787478e-06, + "loss": 39.5671, + "step": 125130 + }, + { + "epoch": 0.25279071740526915, + "grad_norm": 1896.0120849609375, + "learning_rate": 9.327681490116233e-06, + "loss": 34.2355, + "step": 125140 + }, + { + "epoch": 0.25281091803795297, + "grad_norm": 389.5846252441406, + "learning_rate": 9.327506651265096e-06, + "loss": 18.4686, + "step": 125150 + }, + { + "epoch": 0.2528311186706368, + "grad_norm": 295.4189453125, + "learning_rate": 9.327331791322214e-06, + "loss": 32.8881, + "step": 125160 + }, + { + "epoch": 0.25285131930332055, + "grad_norm": 358.54852294921875, + "learning_rate": 9.327156910288444e-06, + "loss": 49.5071, + "step": 125170 + }, + { + "epoch": 0.2528715199360044, + "grad_norm": 456.89776611328125, + "learning_rate": 9.326982008164633e-06, + "loss": 11.1545, + "step": 125180 + }, + { + "epoch": 0.2528917205686882, + "grad_norm": 459.1197204589844, + "learning_rate": 9.326807084951639e-06, + "loss": 26.3985, + "step": 125190 + }, + { + "epoch": 0.252911921201372, + "grad_norm": 272.07696533203125, + "learning_rate": 9.326632140650311e-06, + "loss": 15.2639, + "step": 125200 + }, + { + "epoch": 0.25293212183405583, + "grad_norm": 327.7573547363281, + "learning_rate": 9.326457175261503e-06, + "loss": 43.4969, + "step": 125210 + }, + { + "epoch": 0.25295232246673965, + "grad_norm": 609.4105224609375, + "learning_rate": 9.326282188786066e-06, + "loss": 28.7113, + "step": 125220 + }, + { + "epoch": 0.2529725230994235, + "grad_norm": 177.5101318359375, + "learning_rate": 9.326107181224857e-06, + "loss": 33.3363, + "step": 125230 + }, + { + "epoch": 0.2529927237321073, + "grad_norm": 382.63250732421875, + "learning_rate": 9.325932152578726e-06, + "loss": 33.746, + "step": 125240 + }, + { + "epoch": 0.2530129243647911, + "grad_norm": 206.65513610839844, + "learning_rate": 9.325757102848523e-06, + "loss": 25.7514, + "step": 125250 + }, + { + "epoch": 0.25303312499747493, + "grad_norm": 332.5850524902344, + "learning_rate": 9.325582032035108e-06, + "loss": 19.6042, + "step": 125260 + }, + { + "epoch": 0.25305332563015875, + "grad_norm": 314.7016296386719, + "learning_rate": 9.325406940139327e-06, + "loss": 20.9479, + "step": 125270 + }, + { + "epoch": 0.2530735262628426, + "grad_norm": 500.2131652832031, + "learning_rate": 9.32523182716204e-06, + "loss": 20.6874, + "step": 125280 + }, + { + "epoch": 0.2530937268955264, + "grad_norm": 303.85430908203125, + "learning_rate": 9.325056693104099e-06, + "loss": 24.6704, + "step": 125290 + }, + { + "epoch": 0.25311392752821016, + "grad_norm": 716.2509155273438, + "learning_rate": 9.324881537966355e-06, + "loss": 20.6399, + "step": 125300 + }, + { + "epoch": 0.253134128160894, + "grad_norm": 337.0372009277344, + "learning_rate": 9.324706361749663e-06, + "loss": 31.2655, + "step": 125310 + }, + { + "epoch": 0.2531543287935778, + "grad_norm": 490.2641296386719, + "learning_rate": 9.324531164454876e-06, + "loss": 41.4906, + "step": 125320 + }, + { + "epoch": 0.2531745294262616, + "grad_norm": 355.2698059082031, + "learning_rate": 9.324355946082848e-06, + "loss": 26.2124, + "step": 125330 + }, + { + "epoch": 0.25319473005894544, + "grad_norm": 927.72900390625, + "learning_rate": 9.324180706634434e-06, + "loss": 31.8564, + "step": 125340 + }, + { + "epoch": 0.25321493069162926, + "grad_norm": 335.15618896484375, + "learning_rate": 9.32400544611049e-06, + "loss": 34.1606, + "step": 125350 + }, + { + "epoch": 0.2532351313243131, + "grad_norm": 701.79248046875, + "learning_rate": 9.323830164511865e-06, + "loss": 31.1435, + "step": 125360 + }, + { + "epoch": 0.2532553319569969, + "grad_norm": 432.52545166015625, + "learning_rate": 9.323654861839418e-06, + "loss": 14.9118, + "step": 125370 + }, + { + "epoch": 0.2532755325896807, + "grad_norm": 340.4264221191406, + "learning_rate": 9.323479538094001e-06, + "loss": 23.3531, + "step": 125380 + }, + { + "epoch": 0.25329573322236454, + "grad_norm": 368.6153564453125, + "learning_rate": 9.323304193276468e-06, + "loss": 13.5629, + "step": 125390 + }, + { + "epoch": 0.25331593385504836, + "grad_norm": 346.3034362792969, + "learning_rate": 9.323128827387675e-06, + "loss": 28.6539, + "step": 125400 + }, + { + "epoch": 0.2533361344877322, + "grad_norm": 82.48759460449219, + "learning_rate": 9.322953440428478e-06, + "loss": 24.1632, + "step": 125410 + }, + { + "epoch": 0.253356335120416, + "grad_norm": 127.63971710205078, + "learning_rate": 9.322778032399728e-06, + "loss": 10.0801, + "step": 125420 + }, + { + "epoch": 0.25337653575309976, + "grad_norm": 126.68302917480469, + "learning_rate": 9.322602603302285e-06, + "loss": 15.9136, + "step": 125430 + }, + { + "epoch": 0.2533967363857836, + "grad_norm": 214.1927032470703, + "learning_rate": 9.322427153136999e-06, + "loss": 12.6137, + "step": 125440 + }, + { + "epoch": 0.2534169370184674, + "grad_norm": 501.4695739746094, + "learning_rate": 9.322251681904728e-06, + "loss": 17.3749, + "step": 125450 + }, + { + "epoch": 0.2534371376511512, + "grad_norm": 473.4718933105469, + "learning_rate": 9.322076189606326e-06, + "loss": 23.6231, + "step": 125460 + }, + { + "epoch": 0.25345733828383504, + "grad_norm": 274.3175048828125, + "learning_rate": 9.32190067624265e-06, + "loss": 24.7301, + "step": 125470 + }, + { + "epoch": 0.25347753891651886, + "grad_norm": 624.3567504882812, + "learning_rate": 9.321725141814553e-06, + "loss": 34.4827, + "step": 125480 + }, + { + "epoch": 0.2534977395492027, + "grad_norm": 136.22207641601562, + "learning_rate": 9.321549586322894e-06, + "loss": 30.056, + "step": 125490 + }, + { + "epoch": 0.2535179401818865, + "grad_norm": 246.750732421875, + "learning_rate": 9.321374009768525e-06, + "loss": 9.8188, + "step": 125500 + }, + { + "epoch": 0.2535381408145703, + "grad_norm": 372.09375, + "learning_rate": 9.321198412152303e-06, + "loss": 15.8852, + "step": 125510 + }, + { + "epoch": 0.25355834144725414, + "grad_norm": 243.27505493164062, + "learning_rate": 9.321022793475082e-06, + "loss": 20.912, + "step": 125520 + }, + { + "epoch": 0.25357854207993796, + "grad_norm": 719.737060546875, + "learning_rate": 9.320847153737724e-06, + "loss": 21.6542, + "step": 125530 + }, + { + "epoch": 0.2535987427126218, + "grad_norm": 2670.60546875, + "learning_rate": 9.320671492941079e-06, + "loss": 86.2205, + "step": 125540 + }, + { + "epoch": 0.25361894334530555, + "grad_norm": 165.8459014892578, + "learning_rate": 9.320495811086005e-06, + "loss": 29.4207, + "step": 125550 + }, + { + "epoch": 0.25363914397798937, + "grad_norm": 102.05403900146484, + "learning_rate": 9.320320108173359e-06, + "loss": 11.2042, + "step": 125560 + }, + { + "epoch": 0.2536593446106732, + "grad_norm": 253.1824951171875, + "learning_rate": 9.320144384203997e-06, + "loss": 15.9787, + "step": 125570 + }, + { + "epoch": 0.253679545243357, + "grad_norm": 28.418079376220703, + "learning_rate": 9.319968639178775e-06, + "loss": 22.1027, + "step": 125580 + }, + { + "epoch": 0.2536997458760408, + "grad_norm": 580.892333984375, + "learning_rate": 9.31979287309855e-06, + "loss": 37.5344, + "step": 125590 + }, + { + "epoch": 0.25371994650872465, + "grad_norm": 705.298095703125, + "learning_rate": 9.319617085964177e-06, + "loss": 30.1274, + "step": 125600 + }, + { + "epoch": 0.25374014714140847, + "grad_norm": 263.25457763671875, + "learning_rate": 9.319441277776515e-06, + "loss": 18.3837, + "step": 125610 + }, + { + "epoch": 0.2537603477740923, + "grad_norm": 440.63330078125, + "learning_rate": 9.31926544853642e-06, + "loss": 21.0317, + "step": 125620 + }, + { + "epoch": 0.2537805484067761, + "grad_norm": 188.52142333984375, + "learning_rate": 9.319089598244751e-06, + "loss": 21.7909, + "step": 125630 + }, + { + "epoch": 0.2538007490394599, + "grad_norm": 318.95556640625, + "learning_rate": 9.318913726902361e-06, + "loss": 10.416, + "step": 125640 + }, + { + "epoch": 0.25382094967214375, + "grad_norm": 202.9970245361328, + "learning_rate": 9.31873783451011e-06, + "loss": 26.4527, + "step": 125650 + }, + { + "epoch": 0.25384115030482757, + "grad_norm": 312.7405090332031, + "learning_rate": 9.318561921068856e-06, + "loss": 23.1204, + "step": 125660 + }, + { + "epoch": 0.2538613509375114, + "grad_norm": 516.2415771484375, + "learning_rate": 9.318385986579453e-06, + "loss": 32.1379, + "step": 125670 + }, + { + "epoch": 0.25388155157019515, + "grad_norm": 94.18694305419922, + "learning_rate": 9.31821003104276e-06, + "loss": 13.0049, + "step": 125680 + }, + { + "epoch": 0.25390175220287897, + "grad_norm": 390.0401306152344, + "learning_rate": 9.318034054459637e-06, + "loss": 22.9145, + "step": 125690 + }, + { + "epoch": 0.2539219528355628, + "grad_norm": 178.14857482910156, + "learning_rate": 9.317858056830938e-06, + "loss": 26.2758, + "step": 125700 + }, + { + "epoch": 0.2539421534682466, + "grad_norm": 623.6525268554688, + "learning_rate": 9.317682038157523e-06, + "loss": 26.7489, + "step": 125710 + }, + { + "epoch": 0.25396235410093043, + "grad_norm": 785.6990966796875, + "learning_rate": 9.31750599844025e-06, + "loss": 42.065, + "step": 125720 + }, + { + "epoch": 0.25398255473361425, + "grad_norm": 441.115966796875, + "learning_rate": 9.317329937679976e-06, + "loss": 15.2299, + "step": 125730 + }, + { + "epoch": 0.25400275536629807, + "grad_norm": 411.2340087890625, + "learning_rate": 9.31715385587756e-06, + "loss": 20.9233, + "step": 125740 + }, + { + "epoch": 0.2540229559989819, + "grad_norm": 0.0, + "learning_rate": 9.316977753033858e-06, + "loss": 26.4595, + "step": 125750 + }, + { + "epoch": 0.2540431566316657, + "grad_norm": 417.7156677246094, + "learning_rate": 9.316801629149732e-06, + "loss": 21.4012, + "step": 125760 + }, + { + "epoch": 0.25406335726434953, + "grad_norm": 297.7471008300781, + "learning_rate": 9.316625484226039e-06, + "loss": 22.0466, + "step": 125770 + }, + { + "epoch": 0.25408355789703335, + "grad_norm": 514.2144165039062, + "learning_rate": 9.316449318263635e-06, + "loss": 12.2763, + "step": 125780 + }, + { + "epoch": 0.25410375852971717, + "grad_norm": 429.86260986328125, + "learning_rate": 9.316273131263382e-06, + "loss": 33.9502, + "step": 125790 + }, + { + "epoch": 0.254123959162401, + "grad_norm": 487.69451904296875, + "learning_rate": 9.316096923226135e-06, + "loss": 16.7534, + "step": 125800 + }, + { + "epoch": 0.25414415979508476, + "grad_norm": 408.4544677734375, + "learning_rate": 9.315920694152758e-06, + "loss": 37.152, + "step": 125810 + }, + { + "epoch": 0.2541643604277686, + "grad_norm": 348.5964050292969, + "learning_rate": 9.315744444044105e-06, + "loss": 27.8744, + "step": 125820 + }, + { + "epoch": 0.2541845610604524, + "grad_norm": 360.5810546875, + "learning_rate": 9.315568172901038e-06, + "loss": 33.8791, + "step": 125830 + }, + { + "epoch": 0.2542047616931362, + "grad_norm": 145.67672729492188, + "learning_rate": 9.315391880724414e-06, + "loss": 27.9125, + "step": 125840 + }, + { + "epoch": 0.25422496232582004, + "grad_norm": 315.48248291015625, + "learning_rate": 9.315215567515095e-06, + "loss": 12.6001, + "step": 125850 + }, + { + "epoch": 0.25424516295850386, + "grad_norm": 364.5586242675781, + "learning_rate": 9.315039233273937e-06, + "loss": 28.8424, + "step": 125860 + }, + { + "epoch": 0.2542653635911877, + "grad_norm": 731.7398681640625, + "learning_rate": 9.314862878001802e-06, + "loss": 19.6772, + "step": 125870 + }, + { + "epoch": 0.2542855642238715, + "grad_norm": 22.68682098388672, + "learning_rate": 9.314686501699548e-06, + "loss": 35.0526, + "step": 125880 + }, + { + "epoch": 0.2543057648565553, + "grad_norm": 1074.5439453125, + "learning_rate": 9.314510104368036e-06, + "loss": 56.1964, + "step": 125890 + }, + { + "epoch": 0.25432596548923914, + "grad_norm": 579.945556640625, + "learning_rate": 9.314333686008125e-06, + "loss": 8.7587, + "step": 125900 + }, + { + "epoch": 0.25434616612192296, + "grad_norm": 301.3296203613281, + "learning_rate": 9.314157246620677e-06, + "loss": 28.6569, + "step": 125910 + }, + { + "epoch": 0.2543663667546068, + "grad_norm": 433.71417236328125, + "learning_rate": 9.313980786206547e-06, + "loss": 14.9204, + "step": 125920 + }, + { + "epoch": 0.2543865673872906, + "grad_norm": 531.1284790039062, + "learning_rate": 9.313804304766598e-06, + "loss": 24.0472, + "step": 125930 + }, + { + "epoch": 0.25440676801997436, + "grad_norm": 549.3108520507812, + "learning_rate": 9.31362780230169e-06, + "loss": 25.5688, + "step": 125940 + }, + { + "epoch": 0.2544269686526582, + "grad_norm": 15.446022987365723, + "learning_rate": 9.313451278812684e-06, + "loss": 21.3832, + "step": 125950 + }, + { + "epoch": 0.254447169285342, + "grad_norm": 416.72601318359375, + "learning_rate": 9.31327473430044e-06, + "loss": 14.7539, + "step": 125960 + }, + { + "epoch": 0.2544673699180258, + "grad_norm": 204.7281494140625, + "learning_rate": 9.313098168765818e-06, + "loss": 18.962, + "step": 125970 + }, + { + "epoch": 0.25448757055070964, + "grad_norm": 1.3953946828842163, + "learning_rate": 9.312921582209678e-06, + "loss": 22.8346, + "step": 125980 + }, + { + "epoch": 0.25450777118339346, + "grad_norm": 380.04534912109375, + "learning_rate": 9.31274497463288e-06, + "loss": 17.6816, + "step": 125990 + }, + { + "epoch": 0.2545279718160773, + "grad_norm": 1047.90966796875, + "learning_rate": 9.312568346036288e-06, + "loss": 39.1876, + "step": 126000 + }, + { + "epoch": 0.2545481724487611, + "grad_norm": 304.3274230957031, + "learning_rate": 9.31239169642076e-06, + "loss": 18.6807, + "step": 126010 + }, + { + "epoch": 0.2545683730814449, + "grad_norm": 605.7481689453125, + "learning_rate": 9.312215025787159e-06, + "loss": 27.6769, + "step": 126020 + }, + { + "epoch": 0.25458857371412874, + "grad_norm": 185.8279571533203, + "learning_rate": 9.312038334136345e-06, + "loss": 11.9562, + "step": 126030 + }, + { + "epoch": 0.25460877434681256, + "grad_norm": 270.0262451171875, + "learning_rate": 9.311861621469178e-06, + "loss": 15.7294, + "step": 126040 + }, + { + "epoch": 0.2546289749794964, + "grad_norm": 432.89654541015625, + "learning_rate": 9.31168488778652e-06, + "loss": 16.5122, + "step": 126050 + }, + { + "epoch": 0.2546491756121802, + "grad_norm": 430.0558776855469, + "learning_rate": 9.311508133089234e-06, + "loss": 20.3825, + "step": 126060 + }, + { + "epoch": 0.25466937624486397, + "grad_norm": 550.1329345703125, + "learning_rate": 9.311331357378181e-06, + "loss": 35.2528, + "step": 126070 + }, + { + "epoch": 0.2546895768775478, + "grad_norm": 407.1488037109375, + "learning_rate": 9.31115456065422e-06, + "loss": 13.0109, + "step": 126080 + }, + { + "epoch": 0.2547097775102316, + "grad_norm": 107.07001495361328, + "learning_rate": 9.310977742918215e-06, + "loss": 27.5119, + "step": 126090 + }, + { + "epoch": 0.2547299781429154, + "grad_norm": 417.6630554199219, + "learning_rate": 9.31080090417103e-06, + "loss": 17.8397, + "step": 126100 + }, + { + "epoch": 0.25475017877559925, + "grad_norm": 360.2039489746094, + "learning_rate": 9.310624044413521e-06, + "loss": 23.1929, + "step": 126110 + }, + { + "epoch": 0.25477037940828307, + "grad_norm": 299.057373046875, + "learning_rate": 9.310447163646554e-06, + "loss": 25.1292, + "step": 126120 + }, + { + "epoch": 0.2547905800409669, + "grad_norm": 311.0969543457031, + "learning_rate": 9.31027026187099e-06, + "loss": 17.6882, + "step": 126130 + }, + { + "epoch": 0.2548107806736507, + "grad_norm": 32.40909194946289, + "learning_rate": 9.31009333908769e-06, + "loss": 18.7168, + "step": 126140 + }, + { + "epoch": 0.2548309813063345, + "grad_norm": 319.6306457519531, + "learning_rate": 9.309916395297523e-06, + "loss": 26.3919, + "step": 126150 + }, + { + "epoch": 0.25485118193901835, + "grad_norm": 417.1777038574219, + "learning_rate": 9.309739430501341e-06, + "loss": 21.3647, + "step": 126160 + }, + { + "epoch": 0.25487138257170217, + "grad_norm": 487.8786926269531, + "learning_rate": 9.309562444700016e-06, + "loss": 17.336, + "step": 126170 + }, + { + "epoch": 0.254891583204386, + "grad_norm": 91.7564926147461, + "learning_rate": 9.309385437894402e-06, + "loss": 11.7092, + "step": 126180 + }, + { + "epoch": 0.25491178383706975, + "grad_norm": 182.48744201660156, + "learning_rate": 9.309208410085368e-06, + "loss": 26.1819, + "step": 126190 + }, + { + "epoch": 0.25493198446975357, + "grad_norm": 602.0535888671875, + "learning_rate": 9.309031361273775e-06, + "loss": 24.8527, + "step": 126200 + }, + { + "epoch": 0.2549521851024374, + "grad_norm": 328.72174072265625, + "learning_rate": 9.308854291460487e-06, + "loss": 16.4376, + "step": 126210 + }, + { + "epoch": 0.2549723857351212, + "grad_norm": 898.3261108398438, + "learning_rate": 9.308677200646364e-06, + "loss": 26.2678, + "step": 126220 + }, + { + "epoch": 0.25499258636780503, + "grad_norm": 438.5179443359375, + "learning_rate": 9.308500088832271e-06, + "loss": 34.204, + "step": 126230 + }, + { + "epoch": 0.25501278700048885, + "grad_norm": 160.48248291015625, + "learning_rate": 9.308322956019073e-06, + "loss": 14.5189, + "step": 126240 + }, + { + "epoch": 0.25503298763317267, + "grad_norm": 188.25396728515625, + "learning_rate": 9.30814580220763e-06, + "loss": 23.2258, + "step": 126250 + }, + { + "epoch": 0.2550531882658565, + "grad_norm": 122.05780792236328, + "learning_rate": 9.307968627398807e-06, + "loss": 15.8943, + "step": 126260 + }, + { + "epoch": 0.2550733888985403, + "grad_norm": 178.4373321533203, + "learning_rate": 9.307791431593468e-06, + "loss": 35.2254, + "step": 126270 + }, + { + "epoch": 0.25509358953122413, + "grad_norm": 380.3116455078125, + "learning_rate": 9.307614214792474e-06, + "loss": 21.2028, + "step": 126280 + }, + { + "epoch": 0.25511379016390795, + "grad_norm": 366.8847351074219, + "learning_rate": 9.307436976996692e-06, + "loss": 23.8709, + "step": 126290 + }, + { + "epoch": 0.25513399079659177, + "grad_norm": 671.3023071289062, + "learning_rate": 9.307259718206984e-06, + "loss": 17.903, + "step": 126300 + }, + { + "epoch": 0.2551541914292756, + "grad_norm": 427.435791015625, + "learning_rate": 9.307082438424216e-06, + "loss": 14.3205, + "step": 126310 + }, + { + "epoch": 0.25517439206195935, + "grad_norm": 150.7323760986328, + "learning_rate": 9.30690513764925e-06, + "loss": 13.4545, + "step": 126320 + }, + { + "epoch": 0.2551945926946432, + "grad_norm": 492.7773132324219, + "learning_rate": 9.30672781588295e-06, + "loss": 35.5066, + "step": 126330 + }, + { + "epoch": 0.255214793327327, + "grad_norm": 252.93759155273438, + "learning_rate": 9.306550473126182e-06, + "loss": 16.4858, + "step": 126340 + }, + { + "epoch": 0.2552349939600108, + "grad_norm": 233.88818359375, + "learning_rate": 9.30637310937981e-06, + "loss": 12.5319, + "step": 126350 + }, + { + "epoch": 0.25525519459269463, + "grad_norm": 158.14404296875, + "learning_rate": 9.306195724644695e-06, + "loss": 17.5493, + "step": 126360 + }, + { + "epoch": 0.25527539522537845, + "grad_norm": 16.396745681762695, + "learning_rate": 9.306018318921707e-06, + "loss": 28.2651, + "step": 126370 + }, + { + "epoch": 0.2552955958580623, + "grad_norm": 534.8278198242188, + "learning_rate": 9.305840892211705e-06, + "loss": 41.8798, + "step": 126380 + }, + { + "epoch": 0.2553157964907461, + "grad_norm": 567.7081909179688, + "learning_rate": 9.30566344451556e-06, + "loss": 32.1383, + "step": 126390 + }, + { + "epoch": 0.2553359971234299, + "grad_norm": 482.4659118652344, + "learning_rate": 9.305485975834132e-06, + "loss": 24.9557, + "step": 126400 + }, + { + "epoch": 0.25535619775611373, + "grad_norm": 480.99163818359375, + "learning_rate": 9.305308486168288e-06, + "loss": 24.6496, + "step": 126410 + }, + { + "epoch": 0.25537639838879755, + "grad_norm": 14.627530097961426, + "learning_rate": 9.305130975518893e-06, + "loss": 12.2533, + "step": 126420 + }, + { + "epoch": 0.2553965990214814, + "grad_norm": 611.604736328125, + "learning_rate": 9.304953443886811e-06, + "loss": 41.7902, + "step": 126430 + }, + { + "epoch": 0.2554167996541652, + "grad_norm": 248.27984619140625, + "learning_rate": 9.304775891272908e-06, + "loss": 24.2118, + "step": 126440 + }, + { + "epoch": 0.25543700028684896, + "grad_norm": 324.9423828125, + "learning_rate": 9.30459831767805e-06, + "loss": 24.758, + "step": 126450 + }, + { + "epoch": 0.2554572009195328, + "grad_norm": 61.277671813964844, + "learning_rate": 9.304420723103101e-06, + "loss": 16.0402, + "step": 126460 + }, + { + "epoch": 0.2554774015522166, + "grad_norm": 127.63201141357422, + "learning_rate": 9.304243107548928e-06, + "loss": 31.894, + "step": 126470 + }, + { + "epoch": 0.2554976021849004, + "grad_norm": 521.2554321289062, + "learning_rate": 9.304065471016396e-06, + "loss": 23.6338, + "step": 126480 + }, + { + "epoch": 0.25551780281758424, + "grad_norm": 381.10595703125, + "learning_rate": 9.303887813506372e-06, + "loss": 14.5358, + "step": 126490 + }, + { + "epoch": 0.25553800345026806, + "grad_norm": 433.2705078125, + "learning_rate": 9.30371013501972e-06, + "loss": 21.2155, + "step": 126500 + }, + { + "epoch": 0.2555582040829519, + "grad_norm": 291.43621826171875, + "learning_rate": 9.303532435557305e-06, + "loss": 20.5499, + "step": 126510 + }, + { + "epoch": 0.2555784047156357, + "grad_norm": 344.5585632324219, + "learning_rate": 9.303354715119997e-06, + "loss": 28.2469, + "step": 126520 + }, + { + "epoch": 0.2555986053483195, + "grad_norm": 677.4943237304688, + "learning_rate": 9.30317697370866e-06, + "loss": 29.1504, + "step": 126530 + }, + { + "epoch": 0.25561880598100334, + "grad_norm": 710.51171875, + "learning_rate": 9.302999211324159e-06, + "loss": 22.6089, + "step": 126540 + }, + { + "epoch": 0.25563900661368716, + "grad_norm": 515.8234252929688, + "learning_rate": 9.302821427967363e-06, + "loss": 46.5509, + "step": 126550 + }, + { + "epoch": 0.255659207246371, + "grad_norm": 169.32244873046875, + "learning_rate": 9.302643623639136e-06, + "loss": 31.265, + "step": 126560 + }, + { + "epoch": 0.2556794078790548, + "grad_norm": 19.139883041381836, + "learning_rate": 9.302465798340347e-06, + "loss": 23.1725, + "step": 126570 + }, + { + "epoch": 0.25569960851173856, + "grad_norm": 740.4823608398438, + "learning_rate": 9.302287952071862e-06, + "loss": 37.6291, + "step": 126580 + }, + { + "epoch": 0.2557198091444224, + "grad_norm": 231.51718139648438, + "learning_rate": 9.302110084834545e-06, + "loss": 25.9045, + "step": 126590 + }, + { + "epoch": 0.2557400097771062, + "grad_norm": 248.5474853515625, + "learning_rate": 9.301932196629267e-06, + "loss": 29.5799, + "step": 126600 + }, + { + "epoch": 0.25576021040979, + "grad_norm": 477.0473327636719, + "learning_rate": 9.301754287456894e-06, + "loss": 20.7088, + "step": 126610 + }, + { + "epoch": 0.25578041104247384, + "grad_norm": 1489.47998046875, + "learning_rate": 9.301576357318291e-06, + "loss": 27.1703, + "step": 126620 + }, + { + "epoch": 0.25580061167515766, + "grad_norm": 218.8579559326172, + "learning_rate": 9.301398406214326e-06, + "loss": 30.6336, + "step": 126630 + }, + { + "epoch": 0.2558208123078415, + "grad_norm": 362.53033447265625, + "learning_rate": 9.301220434145868e-06, + "loss": 23.0781, + "step": 126640 + }, + { + "epoch": 0.2558410129405253, + "grad_norm": 491.1239318847656, + "learning_rate": 9.301042441113784e-06, + "loss": 20.6586, + "step": 126650 + }, + { + "epoch": 0.2558612135732091, + "grad_norm": 650.5254516601562, + "learning_rate": 9.300864427118938e-06, + "loss": 21.4127, + "step": 126660 + }, + { + "epoch": 0.25588141420589294, + "grad_norm": 504.1291198730469, + "learning_rate": 9.300686392162203e-06, + "loss": 22.4969, + "step": 126670 + }, + { + "epoch": 0.25590161483857676, + "grad_norm": 773.797607421875, + "learning_rate": 9.300508336244443e-06, + "loss": 27.0239, + "step": 126680 + }, + { + "epoch": 0.2559218154712606, + "grad_norm": 350.9950866699219, + "learning_rate": 9.300330259366528e-06, + "loss": 9.2837, + "step": 126690 + }, + { + "epoch": 0.2559420161039444, + "grad_norm": 721.0562744140625, + "learning_rate": 9.300152161529325e-06, + "loss": 21.5986, + "step": 126700 + }, + { + "epoch": 0.25596221673662817, + "grad_norm": 304.818603515625, + "learning_rate": 9.299974042733701e-06, + "loss": 22.3794, + "step": 126710 + }, + { + "epoch": 0.255982417369312, + "grad_norm": 330.103759765625, + "learning_rate": 9.299795902980524e-06, + "loss": 35.504, + "step": 126720 + }, + { + "epoch": 0.2560026180019958, + "grad_norm": 726.6624145507812, + "learning_rate": 9.299617742270665e-06, + "loss": 13.4531, + "step": 126730 + }, + { + "epoch": 0.2560228186346796, + "grad_norm": 817.6095581054688, + "learning_rate": 9.29943956060499e-06, + "loss": 24.4785, + "step": 126740 + }, + { + "epoch": 0.25604301926736345, + "grad_norm": 472.0202331542969, + "learning_rate": 9.299261357984368e-06, + "loss": 31.5839, + "step": 126750 + }, + { + "epoch": 0.25606321990004727, + "grad_norm": 377.89208984375, + "learning_rate": 9.299083134409667e-06, + "loss": 19.9071, + "step": 126760 + }, + { + "epoch": 0.2560834205327311, + "grad_norm": 1075.21435546875, + "learning_rate": 9.298904889881757e-06, + "loss": 26.7717, + "step": 126770 + }, + { + "epoch": 0.2561036211654149, + "grad_norm": 332.4275817871094, + "learning_rate": 9.298726624401507e-06, + "loss": 21.1105, + "step": 126780 + }, + { + "epoch": 0.2561238217980987, + "grad_norm": 422.2454528808594, + "learning_rate": 9.298548337969784e-06, + "loss": 24.6704, + "step": 126790 + }, + { + "epoch": 0.25614402243078255, + "grad_norm": 1162.3631591796875, + "learning_rate": 9.298370030587456e-06, + "loss": 32.4518, + "step": 126800 + }, + { + "epoch": 0.25616422306346637, + "grad_norm": 221.30853271484375, + "learning_rate": 9.298191702255395e-06, + "loss": 22.9621, + "step": 126810 + }, + { + "epoch": 0.2561844236961502, + "grad_norm": 205.13914489746094, + "learning_rate": 9.298013352974469e-06, + "loss": 22.1188, + "step": 126820 + }, + { + "epoch": 0.25620462432883395, + "grad_norm": 349.7917785644531, + "learning_rate": 9.297834982745548e-06, + "loss": 29.8036, + "step": 126830 + }, + { + "epoch": 0.25622482496151777, + "grad_norm": 614.1033935546875, + "learning_rate": 9.2976565915695e-06, + "loss": 30.9366, + "step": 126840 + }, + { + "epoch": 0.2562450255942016, + "grad_norm": 349.589111328125, + "learning_rate": 9.297478179447195e-06, + "loss": 21.0276, + "step": 126850 + }, + { + "epoch": 0.2562652262268854, + "grad_norm": 590.1732788085938, + "learning_rate": 9.297299746379503e-06, + "loss": 23.7506, + "step": 126860 + }, + { + "epoch": 0.25628542685956923, + "grad_norm": 904.5769653320312, + "learning_rate": 9.297121292367293e-06, + "loss": 31.6848, + "step": 126870 + }, + { + "epoch": 0.25630562749225305, + "grad_norm": 446.6373291015625, + "learning_rate": 9.296942817411433e-06, + "loss": 17.5534, + "step": 126880 + }, + { + "epoch": 0.25632582812493687, + "grad_norm": 3.2007405757904053, + "learning_rate": 9.296764321512797e-06, + "loss": 34.6667, + "step": 126890 + }, + { + "epoch": 0.2563460287576207, + "grad_norm": 352.6800842285156, + "learning_rate": 9.296585804672253e-06, + "loss": 14.5502, + "step": 126900 + }, + { + "epoch": 0.2563662293903045, + "grad_norm": 555.1561889648438, + "learning_rate": 9.29640726689067e-06, + "loss": 42.8015, + "step": 126910 + }, + { + "epoch": 0.25638643002298833, + "grad_norm": 1103.8218994140625, + "learning_rate": 9.29622870816892e-06, + "loss": 51.0637, + "step": 126920 + }, + { + "epoch": 0.25640663065567215, + "grad_norm": 283.2348327636719, + "learning_rate": 9.29605012850787e-06, + "loss": 31.336, + "step": 126930 + }, + { + "epoch": 0.25642683128835597, + "grad_norm": 283.6893310546875, + "learning_rate": 9.295871527908396e-06, + "loss": 27.0819, + "step": 126940 + }, + { + "epoch": 0.2564470319210398, + "grad_norm": 938.28369140625, + "learning_rate": 9.295692906371362e-06, + "loss": 51.4435, + "step": 126950 + }, + { + "epoch": 0.25646723255372356, + "grad_norm": 370.732421875, + "learning_rate": 9.295514263897644e-06, + "loss": 15.1923, + "step": 126960 + }, + { + "epoch": 0.2564874331864074, + "grad_norm": 837.4765014648438, + "learning_rate": 9.29533560048811e-06, + "loss": 30.7102, + "step": 126970 + }, + { + "epoch": 0.2565076338190912, + "grad_norm": 291.0663146972656, + "learning_rate": 9.295156916143631e-06, + "loss": 33.8184, + "step": 126980 + }, + { + "epoch": 0.256527834451775, + "grad_norm": 300.3931579589844, + "learning_rate": 9.294978210865078e-06, + "loss": 22.4324, + "step": 126990 + }, + { + "epoch": 0.25654803508445884, + "grad_norm": 324.9526062011719, + "learning_rate": 9.294799484653323e-06, + "loss": 20.5186, + "step": 127000 + }, + { + "epoch": 0.25656823571714266, + "grad_norm": 558.15576171875, + "learning_rate": 9.294620737509235e-06, + "loss": 25.6781, + "step": 127010 + }, + { + "epoch": 0.2565884363498265, + "grad_norm": 526.6491088867188, + "learning_rate": 9.294441969433688e-06, + "loss": 26.2408, + "step": 127020 + }, + { + "epoch": 0.2566086369825103, + "grad_norm": 279.3838806152344, + "learning_rate": 9.294263180427549e-06, + "loss": 22.4848, + "step": 127030 + }, + { + "epoch": 0.2566288376151941, + "grad_norm": 297.49139404296875, + "learning_rate": 9.294084370491695e-06, + "loss": 24.0509, + "step": 127040 + }, + { + "epoch": 0.25664903824787794, + "grad_norm": 768.2383422851562, + "learning_rate": 9.293905539626992e-06, + "loss": 31.2574, + "step": 127050 + }, + { + "epoch": 0.25666923888056176, + "grad_norm": 235.2180938720703, + "learning_rate": 9.293726687834318e-06, + "loss": 22.5829, + "step": 127060 + }, + { + "epoch": 0.2566894395132456, + "grad_norm": 491.4215087890625, + "learning_rate": 9.293547815114537e-06, + "loss": 16.4592, + "step": 127070 + }, + { + "epoch": 0.2567096401459294, + "grad_norm": 306.5160827636719, + "learning_rate": 9.293368921468526e-06, + "loss": 22.34, + "step": 127080 + }, + { + "epoch": 0.25672984077861316, + "grad_norm": 455.01708984375, + "learning_rate": 9.293190006897156e-06, + "loss": 23.3811, + "step": 127090 + }, + { + "epoch": 0.256750041411297, + "grad_norm": 402.4524230957031, + "learning_rate": 9.293011071401299e-06, + "loss": 17.4413, + "step": 127100 + }, + { + "epoch": 0.2567702420439808, + "grad_norm": 406.9162902832031, + "learning_rate": 9.292832114981825e-06, + "loss": 26.5433, + "step": 127110 + }, + { + "epoch": 0.2567904426766646, + "grad_norm": 460.7688293457031, + "learning_rate": 9.29265313763961e-06, + "loss": 25.3499, + "step": 127120 + }, + { + "epoch": 0.25681064330934844, + "grad_norm": 385.5970764160156, + "learning_rate": 9.292474139375523e-06, + "loss": 19.3667, + "step": 127130 + }, + { + "epoch": 0.25683084394203226, + "grad_norm": 479.5018615722656, + "learning_rate": 9.292295120190438e-06, + "loss": 21.7879, + "step": 127140 + }, + { + "epoch": 0.2568510445747161, + "grad_norm": 410.8296203613281, + "learning_rate": 9.292116080085226e-06, + "loss": 14.9632, + "step": 127150 + }, + { + "epoch": 0.2568712452073999, + "grad_norm": 256.1704406738281, + "learning_rate": 9.291937019060762e-06, + "loss": 30.5486, + "step": 127160 + }, + { + "epoch": 0.2568914458400837, + "grad_norm": 314.48760986328125, + "learning_rate": 9.291757937117917e-06, + "loss": 18.2682, + "step": 127170 + }, + { + "epoch": 0.25691164647276754, + "grad_norm": 363.6204528808594, + "learning_rate": 9.291578834257565e-06, + "loss": 26.4588, + "step": 127180 + }, + { + "epoch": 0.25693184710545136, + "grad_norm": 143.50018310546875, + "learning_rate": 9.291399710480578e-06, + "loss": 21.9556, + "step": 127190 + }, + { + "epoch": 0.2569520477381352, + "grad_norm": 328.93560791015625, + "learning_rate": 9.291220565787829e-06, + "loss": 25.0751, + "step": 127200 + }, + { + "epoch": 0.256972248370819, + "grad_norm": 155.34873962402344, + "learning_rate": 9.291041400180193e-06, + "loss": 19.5352, + "step": 127210 + }, + { + "epoch": 0.25699244900350277, + "grad_norm": 182.27647399902344, + "learning_rate": 9.29086221365854e-06, + "loss": 40.0581, + "step": 127220 + }, + { + "epoch": 0.2570126496361866, + "grad_norm": 35.468082427978516, + "learning_rate": 9.290683006223745e-06, + "loss": 19.2183, + "step": 127230 + }, + { + "epoch": 0.2570328502688704, + "grad_norm": 217.8312530517578, + "learning_rate": 9.290503777876683e-06, + "loss": 26.7485, + "step": 127240 + }, + { + "epoch": 0.2570530509015542, + "grad_norm": 244.3412628173828, + "learning_rate": 9.290324528618225e-06, + "loss": 20.2491, + "step": 127250 + }, + { + "epoch": 0.25707325153423805, + "grad_norm": 340.1300964355469, + "learning_rate": 9.290145258449243e-06, + "loss": 25.7546, + "step": 127260 + }, + { + "epoch": 0.25709345216692187, + "grad_norm": 624.1416625976562, + "learning_rate": 9.289965967370617e-06, + "loss": 32.9281, + "step": 127270 + }, + { + "epoch": 0.2571136527996057, + "grad_norm": 398.3768615722656, + "learning_rate": 9.289786655383215e-06, + "loss": 20.2076, + "step": 127280 + }, + { + "epoch": 0.2571338534322895, + "grad_norm": 447.0444641113281, + "learning_rate": 9.289607322487914e-06, + "loss": 20.0938, + "step": 127290 + }, + { + "epoch": 0.2571540540649733, + "grad_norm": 764.7389526367188, + "learning_rate": 9.289427968685588e-06, + "loss": 20.0609, + "step": 127300 + }, + { + "epoch": 0.25717425469765715, + "grad_norm": 594.932373046875, + "learning_rate": 9.28924859397711e-06, + "loss": 23.3758, + "step": 127310 + }, + { + "epoch": 0.25719445533034097, + "grad_norm": 153.19818115234375, + "learning_rate": 9.289069198363353e-06, + "loss": 15.733, + "step": 127320 + }, + { + "epoch": 0.2572146559630248, + "grad_norm": 650.463134765625, + "learning_rate": 9.288889781845193e-06, + "loss": 26.4759, + "step": 127330 + }, + { + "epoch": 0.2572348565957086, + "grad_norm": 384.6339111328125, + "learning_rate": 9.288710344423505e-06, + "loss": 21.3026, + "step": 127340 + }, + { + "epoch": 0.25725505722839237, + "grad_norm": 1784.262939453125, + "learning_rate": 9.288530886099165e-06, + "loss": 30.8165, + "step": 127350 + }, + { + "epoch": 0.2572752578610762, + "grad_norm": 328.6500549316406, + "learning_rate": 9.288351406873044e-06, + "loss": 18.8066, + "step": 127360 + }, + { + "epoch": 0.25729545849376, + "grad_norm": 528.574462890625, + "learning_rate": 9.288171906746018e-06, + "loss": 15.9671, + "step": 127370 + }, + { + "epoch": 0.25731565912644383, + "grad_norm": 514.7537231445312, + "learning_rate": 9.287992385718963e-06, + "loss": 15.6263, + "step": 127380 + }, + { + "epoch": 0.25733585975912765, + "grad_norm": 248.57421875, + "learning_rate": 9.287812843792752e-06, + "loss": 25.0234, + "step": 127390 + }, + { + "epoch": 0.25735606039181147, + "grad_norm": 666.0214233398438, + "learning_rate": 9.287633280968263e-06, + "loss": 30.8517, + "step": 127400 + }, + { + "epoch": 0.2573762610244953, + "grad_norm": 346.4957580566406, + "learning_rate": 9.287453697246367e-06, + "loss": 20.1712, + "step": 127410 + }, + { + "epoch": 0.2573964616571791, + "grad_norm": 343.5994873046875, + "learning_rate": 9.287274092627944e-06, + "loss": 31.8904, + "step": 127420 + }, + { + "epoch": 0.25741666228986293, + "grad_norm": 892.2613525390625, + "learning_rate": 9.287094467113866e-06, + "loss": 46.5281, + "step": 127430 + }, + { + "epoch": 0.25743686292254675, + "grad_norm": 586.280517578125, + "learning_rate": 9.28691482070501e-06, + "loss": 36.4094, + "step": 127440 + }, + { + "epoch": 0.25745706355523057, + "grad_norm": 852.3339233398438, + "learning_rate": 9.28673515340225e-06, + "loss": 37.6252, + "step": 127450 + }, + { + "epoch": 0.2574772641879144, + "grad_norm": 217.954833984375, + "learning_rate": 9.286555465206463e-06, + "loss": 18.7746, + "step": 127460 + }, + { + "epoch": 0.25749746482059815, + "grad_norm": 298.8985290527344, + "learning_rate": 9.286375756118526e-06, + "loss": 18.3606, + "step": 127470 + }, + { + "epoch": 0.257517665453282, + "grad_norm": 375.60205078125, + "learning_rate": 9.286196026139311e-06, + "loss": 28.7833, + "step": 127480 + }, + { + "epoch": 0.2575378660859658, + "grad_norm": 376.5921936035156, + "learning_rate": 9.2860162752697e-06, + "loss": 14.3099, + "step": 127490 + }, + { + "epoch": 0.2575580667186496, + "grad_norm": 249.449462890625, + "learning_rate": 9.285836503510562e-06, + "loss": 19.4126, + "step": 127500 + }, + { + "epoch": 0.25757826735133343, + "grad_norm": 9.907649993896484, + "learning_rate": 9.285656710862778e-06, + "loss": 33.7773, + "step": 127510 + }, + { + "epoch": 0.25759846798401725, + "grad_norm": 627.5342407226562, + "learning_rate": 9.285476897327223e-06, + "loss": 22.5205, + "step": 127520 + }, + { + "epoch": 0.2576186686167011, + "grad_norm": 223.77120971679688, + "learning_rate": 9.285297062904774e-06, + "loss": 21.9005, + "step": 127530 + }, + { + "epoch": 0.2576388692493849, + "grad_norm": 185.26669311523438, + "learning_rate": 9.285117207596307e-06, + "loss": 18.3743, + "step": 127540 + }, + { + "epoch": 0.2576590698820687, + "grad_norm": 319.34002685546875, + "learning_rate": 9.284937331402697e-06, + "loss": 32.4146, + "step": 127550 + }, + { + "epoch": 0.25767927051475253, + "grad_norm": 1139.358642578125, + "learning_rate": 9.284757434324823e-06, + "loss": 24.3872, + "step": 127560 + }, + { + "epoch": 0.25769947114743635, + "grad_norm": 46.01262283325195, + "learning_rate": 9.284577516363561e-06, + "loss": 34.8342, + "step": 127570 + }, + { + "epoch": 0.2577196717801202, + "grad_norm": 78.2344741821289, + "learning_rate": 9.284397577519788e-06, + "loss": 43.596, + "step": 127580 + }, + { + "epoch": 0.257739872412804, + "grad_norm": 695.995849609375, + "learning_rate": 9.28421761779438e-06, + "loss": 30.5016, + "step": 127590 + }, + { + "epoch": 0.25776007304548776, + "grad_norm": 519.5594482421875, + "learning_rate": 9.284037637188215e-06, + "loss": 21.8365, + "step": 127600 + }, + { + "epoch": 0.2577802736781716, + "grad_norm": 239.10166931152344, + "learning_rate": 9.28385763570217e-06, + "loss": 21.9353, + "step": 127610 + }, + { + "epoch": 0.2578004743108554, + "grad_norm": 716.4600219726562, + "learning_rate": 9.283677613337124e-06, + "loss": 21.1781, + "step": 127620 + }, + { + "epoch": 0.2578206749435392, + "grad_norm": 349.0670471191406, + "learning_rate": 9.283497570093952e-06, + "loss": 12.2865, + "step": 127630 + }, + { + "epoch": 0.25784087557622304, + "grad_norm": 222.88279724121094, + "learning_rate": 9.283317505973533e-06, + "loss": 26.176, + "step": 127640 + }, + { + "epoch": 0.25786107620890686, + "grad_norm": 289.0815734863281, + "learning_rate": 9.283137420976742e-06, + "loss": 22.7614, + "step": 127650 + }, + { + "epoch": 0.2578812768415907, + "grad_norm": 1140.44091796875, + "learning_rate": 9.282957315104462e-06, + "loss": 34.3784, + "step": 127660 + }, + { + "epoch": 0.2579014774742745, + "grad_norm": 181.1317138671875, + "learning_rate": 9.282777188357563e-06, + "loss": 22.6653, + "step": 127670 + }, + { + "epoch": 0.2579216781069583, + "grad_norm": 420.8912353515625, + "learning_rate": 9.282597040736932e-06, + "loss": 22.2477, + "step": 127680 + }, + { + "epoch": 0.25794187873964214, + "grad_norm": 6.192861557006836, + "learning_rate": 9.282416872243441e-06, + "loss": 22.3886, + "step": 127690 + }, + { + "epoch": 0.25796207937232596, + "grad_norm": 312.1874694824219, + "learning_rate": 9.282236682877968e-06, + "loss": 29.201, + "step": 127700 + }, + { + "epoch": 0.2579822800050098, + "grad_norm": 299.1607666015625, + "learning_rate": 9.282056472641393e-06, + "loss": 18.8325, + "step": 127710 + }, + { + "epoch": 0.2580024806376936, + "grad_norm": 181.7383270263672, + "learning_rate": 9.281876241534595e-06, + "loss": 19.3202, + "step": 127720 + }, + { + "epoch": 0.25802268127037736, + "grad_norm": 542.5490112304688, + "learning_rate": 9.28169598955845e-06, + "loss": 31.3417, + "step": 127730 + }, + { + "epoch": 0.2580428819030612, + "grad_norm": 238.58201599121094, + "learning_rate": 9.28151571671384e-06, + "loss": 24.5436, + "step": 127740 + }, + { + "epoch": 0.258063082535745, + "grad_norm": 390.826904296875, + "learning_rate": 9.281335423001641e-06, + "loss": 21.4078, + "step": 127750 + }, + { + "epoch": 0.2580832831684288, + "grad_norm": 618.39111328125, + "learning_rate": 9.281155108422732e-06, + "loss": 21.4185, + "step": 127760 + }, + { + "epoch": 0.25810348380111264, + "grad_norm": 194.64149475097656, + "learning_rate": 9.280974772977994e-06, + "loss": 14.8643, + "step": 127770 + }, + { + "epoch": 0.25812368443379646, + "grad_norm": 726.81005859375, + "learning_rate": 9.280794416668303e-06, + "loss": 26.2176, + "step": 127780 + }, + { + "epoch": 0.2581438850664803, + "grad_norm": 223.7606964111328, + "learning_rate": 9.280614039494538e-06, + "loss": 15.3907, + "step": 127790 + }, + { + "epoch": 0.2581640856991641, + "grad_norm": 294.724609375, + "learning_rate": 9.280433641457582e-06, + "loss": 15.0769, + "step": 127800 + }, + { + "epoch": 0.2581842863318479, + "grad_norm": 269.91461181640625, + "learning_rate": 9.28025322255831e-06, + "loss": 16.6367, + "step": 127810 + }, + { + "epoch": 0.25820448696453174, + "grad_norm": 277.8019714355469, + "learning_rate": 9.280072782797602e-06, + "loss": 9.468, + "step": 127820 + }, + { + "epoch": 0.25822468759721556, + "grad_norm": 363.867919921875, + "learning_rate": 9.279892322176341e-06, + "loss": 11.1287, + "step": 127830 + }, + { + "epoch": 0.2582448882298994, + "grad_norm": 55.111881256103516, + "learning_rate": 9.279711840695401e-06, + "loss": 43.9801, + "step": 127840 + }, + { + "epoch": 0.2582650888625832, + "grad_norm": 228.3421630859375, + "learning_rate": 9.279531338355666e-06, + "loss": 15.5273, + "step": 127850 + }, + { + "epoch": 0.25828528949526697, + "grad_norm": 594.6398315429688, + "learning_rate": 9.279350815158014e-06, + "loss": 27.1824, + "step": 127860 + }, + { + "epoch": 0.2583054901279508, + "grad_norm": 611.1942749023438, + "learning_rate": 9.279170271103326e-06, + "loss": 16.6828, + "step": 127870 + }, + { + "epoch": 0.2583256907606346, + "grad_norm": 245.9205322265625, + "learning_rate": 9.27898970619248e-06, + "loss": 17.6414, + "step": 127880 + }, + { + "epoch": 0.2583458913933184, + "grad_norm": 314.4803771972656, + "learning_rate": 9.278809120426358e-06, + "loss": 16.0779, + "step": 127890 + }, + { + "epoch": 0.25836609202600225, + "grad_norm": 169.61856079101562, + "learning_rate": 9.278628513805838e-06, + "loss": 16.6567, + "step": 127900 + }, + { + "epoch": 0.25838629265868607, + "grad_norm": 659.0407104492188, + "learning_rate": 9.278447886331803e-06, + "loss": 27.8375, + "step": 127910 + }, + { + "epoch": 0.2584064932913699, + "grad_norm": 297.7413024902344, + "learning_rate": 9.27826723800513e-06, + "loss": 19.5705, + "step": 127920 + }, + { + "epoch": 0.2584266939240537, + "grad_norm": 366.10211181640625, + "learning_rate": 9.278086568826702e-06, + "loss": 23.52, + "step": 127930 + }, + { + "epoch": 0.2584468945567375, + "grad_norm": 121.39274597167969, + "learning_rate": 9.277905878797401e-06, + "loss": 8.4138, + "step": 127940 + }, + { + "epoch": 0.25846709518942135, + "grad_norm": 106.3432846069336, + "learning_rate": 9.277725167918103e-06, + "loss": 24.0516, + "step": 127950 + }, + { + "epoch": 0.25848729582210517, + "grad_norm": 202.67037963867188, + "learning_rate": 9.277544436189693e-06, + "loss": 24.5088, + "step": 127960 + }, + { + "epoch": 0.258507496454789, + "grad_norm": 521.38427734375, + "learning_rate": 9.27736368361305e-06, + "loss": 14.0373, + "step": 127970 + }, + { + "epoch": 0.25852769708747275, + "grad_norm": 328.37091064453125, + "learning_rate": 9.277182910189056e-06, + "loss": 19.4296, + "step": 127980 + }, + { + "epoch": 0.25854789772015657, + "grad_norm": 70.05731964111328, + "learning_rate": 9.27700211591859e-06, + "loss": 22.5398, + "step": 127990 + }, + { + "epoch": 0.2585680983528404, + "grad_norm": 293.0453186035156, + "learning_rate": 9.276821300802535e-06, + "loss": 14.6514, + "step": 128000 + }, + { + "epoch": 0.2585882989855242, + "grad_norm": 384.33099365234375, + "learning_rate": 9.27664046484177e-06, + "loss": 23.3837, + "step": 128010 + }, + { + "epoch": 0.25860849961820803, + "grad_norm": 344.4331359863281, + "learning_rate": 9.27645960803718e-06, + "loss": 24.3928, + "step": 128020 + }, + { + "epoch": 0.25862870025089185, + "grad_norm": 467.5977783203125, + "learning_rate": 9.276278730389642e-06, + "loss": 21.3627, + "step": 128030 + }, + { + "epoch": 0.25864890088357567, + "grad_norm": 633.8275146484375, + "learning_rate": 9.276097831900044e-06, + "loss": 24.7434, + "step": 128040 + }, + { + "epoch": 0.2586691015162595, + "grad_norm": 797.9624633789062, + "learning_rate": 9.275916912569261e-06, + "loss": 13.6561, + "step": 128050 + }, + { + "epoch": 0.2586893021489433, + "grad_norm": 675.3600463867188, + "learning_rate": 9.27573597239818e-06, + "loss": 27.3217, + "step": 128060 + }, + { + "epoch": 0.25870950278162713, + "grad_norm": 677.53515625, + "learning_rate": 9.275555011387679e-06, + "loss": 9.3107, + "step": 128070 + }, + { + "epoch": 0.25872970341431095, + "grad_norm": 60.00661087036133, + "learning_rate": 9.275374029538639e-06, + "loss": 35.8117, + "step": 128080 + }, + { + "epoch": 0.25874990404699477, + "grad_norm": 431.9416809082031, + "learning_rate": 9.275193026851947e-06, + "loss": 16.7078, + "step": 128090 + }, + { + "epoch": 0.2587701046796786, + "grad_norm": 206.30618286132812, + "learning_rate": 9.275012003328483e-06, + "loss": 21.6261, + "step": 128100 + }, + { + "epoch": 0.25879030531236236, + "grad_norm": 149.74598693847656, + "learning_rate": 9.274830958969129e-06, + "loss": 23.0925, + "step": 128110 + }, + { + "epoch": 0.2588105059450462, + "grad_norm": 473.61517333984375, + "learning_rate": 9.274649893774768e-06, + "loss": 36.3299, + "step": 128120 + }, + { + "epoch": 0.25883070657773, + "grad_norm": 210.2192840576172, + "learning_rate": 9.27446880774628e-06, + "loss": 39.463, + "step": 128130 + }, + { + "epoch": 0.2588509072104138, + "grad_norm": 338.50628662109375, + "learning_rate": 9.27428770088455e-06, + "loss": 14.9153, + "step": 128140 + }, + { + "epoch": 0.25887110784309764, + "grad_norm": 15.398112297058105, + "learning_rate": 9.27410657319046e-06, + "loss": 27.2724, + "step": 128150 + }, + { + "epoch": 0.25889130847578146, + "grad_norm": 289.1322937011719, + "learning_rate": 9.273925424664894e-06, + "loss": 14.9957, + "step": 128160 + }, + { + "epoch": 0.2589115091084653, + "grad_norm": 551.4632568359375, + "learning_rate": 9.273744255308733e-06, + "loss": 24.0425, + "step": 128170 + }, + { + "epoch": 0.2589317097411491, + "grad_norm": 342.7764892578125, + "learning_rate": 9.273563065122862e-06, + "loss": 21.7269, + "step": 128180 + }, + { + "epoch": 0.2589519103738329, + "grad_norm": 715.4881591796875, + "learning_rate": 9.27338185410816e-06, + "loss": 23.4469, + "step": 128190 + }, + { + "epoch": 0.25897211100651674, + "grad_norm": 710.1723022460938, + "learning_rate": 9.273200622265516e-06, + "loss": 23.1727, + "step": 128200 + }, + { + "epoch": 0.25899231163920056, + "grad_norm": 335.8462219238281, + "learning_rate": 9.27301936959581e-06, + "loss": 26.6075, + "step": 128210 + }, + { + "epoch": 0.2590125122718844, + "grad_norm": 757.0194091796875, + "learning_rate": 9.272838096099926e-06, + "loss": 38.6605, + "step": 128220 + }, + { + "epoch": 0.2590327129045682, + "grad_norm": 285.6708068847656, + "learning_rate": 9.272656801778745e-06, + "loss": 21.8727, + "step": 128230 + }, + { + "epoch": 0.25905291353725196, + "grad_norm": 182.349365234375, + "learning_rate": 9.272475486633155e-06, + "loss": 21.5104, + "step": 128240 + }, + { + "epoch": 0.2590731141699358, + "grad_norm": 310.44403076171875, + "learning_rate": 9.272294150664039e-06, + "loss": 43.0297, + "step": 128250 + }, + { + "epoch": 0.2590933148026196, + "grad_norm": 414.88885498046875, + "learning_rate": 9.272112793872277e-06, + "loss": 18.6812, + "step": 128260 + }, + { + "epoch": 0.2591135154353034, + "grad_norm": 254.77940368652344, + "learning_rate": 9.271931416258756e-06, + "loss": 31.6537, + "step": 128270 + }, + { + "epoch": 0.25913371606798724, + "grad_norm": 203.3861541748047, + "learning_rate": 9.27175001782436e-06, + "loss": 17.7203, + "step": 128280 + }, + { + "epoch": 0.25915391670067106, + "grad_norm": 674.7282104492188, + "learning_rate": 9.271568598569971e-06, + "loss": 23.375, + "step": 128290 + }, + { + "epoch": 0.2591741173333549, + "grad_norm": 314.7342529296875, + "learning_rate": 9.271387158496477e-06, + "loss": 13.6596, + "step": 128300 + }, + { + "epoch": 0.2591943179660387, + "grad_norm": 298.5931701660156, + "learning_rate": 9.271205697604759e-06, + "loss": 11.1612, + "step": 128310 + }, + { + "epoch": 0.2592145185987225, + "grad_norm": 680.8109130859375, + "learning_rate": 9.271024215895702e-06, + "loss": 25.9474, + "step": 128320 + }, + { + "epoch": 0.25923471923140634, + "grad_norm": 501.0088195800781, + "learning_rate": 9.270842713370192e-06, + "loss": 20.2804, + "step": 128330 + }, + { + "epoch": 0.25925491986409016, + "grad_norm": 278.8302917480469, + "learning_rate": 9.270661190029112e-06, + "loss": 15.063, + "step": 128340 + }, + { + "epoch": 0.259275120496774, + "grad_norm": 368.35577392578125, + "learning_rate": 9.270479645873347e-06, + "loss": 26.0217, + "step": 128350 + }, + { + "epoch": 0.2592953211294578, + "grad_norm": 692.879638671875, + "learning_rate": 9.270298080903782e-06, + "loss": 25.3173, + "step": 128360 + }, + { + "epoch": 0.25931552176214157, + "grad_norm": 437.3133239746094, + "learning_rate": 9.270116495121303e-06, + "loss": 39.2009, + "step": 128370 + }, + { + "epoch": 0.2593357223948254, + "grad_norm": 466.69622802734375, + "learning_rate": 9.269934888526793e-06, + "loss": 25.8048, + "step": 128380 + }, + { + "epoch": 0.2593559230275092, + "grad_norm": 87.55073547363281, + "learning_rate": 9.269753261121139e-06, + "loss": 21.5006, + "step": 128390 + }, + { + "epoch": 0.259376123660193, + "grad_norm": 210.5568084716797, + "learning_rate": 9.269571612905227e-06, + "loss": 15.77, + "step": 128400 + }, + { + "epoch": 0.25939632429287685, + "grad_norm": 611.5077514648438, + "learning_rate": 9.269389943879938e-06, + "loss": 18.144, + "step": 128410 + }, + { + "epoch": 0.25941652492556067, + "grad_norm": 155.47767639160156, + "learning_rate": 9.269208254046161e-06, + "loss": 22.0572, + "step": 128420 + }, + { + "epoch": 0.2594367255582445, + "grad_norm": 497.9201965332031, + "learning_rate": 9.269026543404782e-06, + "loss": 18.4371, + "step": 128430 + }, + { + "epoch": 0.2594569261909283, + "grad_norm": 448.6292724609375, + "learning_rate": 9.268844811956683e-06, + "loss": 11.5372, + "step": 128440 + }, + { + "epoch": 0.2594771268236121, + "grad_norm": 618.7467651367188, + "learning_rate": 9.268663059702753e-06, + "loss": 36.2373, + "step": 128450 + }, + { + "epoch": 0.25949732745629595, + "grad_norm": 165.6209259033203, + "learning_rate": 9.268481286643878e-06, + "loss": 25.0362, + "step": 128460 + }, + { + "epoch": 0.25951752808897977, + "grad_norm": 292.6033630371094, + "learning_rate": 9.268299492780942e-06, + "loss": 11.3669, + "step": 128470 + }, + { + "epoch": 0.2595377287216636, + "grad_norm": 1330.187744140625, + "learning_rate": 9.268117678114833e-06, + "loss": 37.4719, + "step": 128480 + }, + { + "epoch": 0.2595579293543474, + "grad_norm": 211.21011352539062, + "learning_rate": 9.267935842646437e-06, + "loss": 21.5859, + "step": 128490 + }, + { + "epoch": 0.25957812998703117, + "grad_norm": 214.05702209472656, + "learning_rate": 9.267753986376638e-06, + "loss": 30.5191, + "step": 128500 + }, + { + "epoch": 0.259598330619715, + "grad_norm": 1050.858642578125, + "learning_rate": 9.267572109306325e-06, + "loss": 30.533, + "step": 128510 + }, + { + "epoch": 0.2596185312523988, + "grad_norm": 405.3336181640625, + "learning_rate": 9.26739021143638e-06, + "loss": 40.4927, + "step": 128520 + }, + { + "epoch": 0.25963873188508263, + "grad_norm": 607.3820190429688, + "learning_rate": 9.267208292767696e-06, + "loss": 24.9273, + "step": 128530 + }, + { + "epoch": 0.25965893251776645, + "grad_norm": 551.8255004882812, + "learning_rate": 9.267026353301155e-06, + "loss": 32.4756, + "step": 128540 + }, + { + "epoch": 0.25967913315045027, + "grad_norm": 1008.7666015625, + "learning_rate": 9.266844393037644e-06, + "loss": 35.6306, + "step": 128550 + }, + { + "epoch": 0.2596993337831341, + "grad_norm": 679.9075927734375, + "learning_rate": 9.266662411978052e-06, + "loss": 16.6644, + "step": 128560 + }, + { + "epoch": 0.2597195344158179, + "grad_norm": 0.0, + "learning_rate": 9.266480410123264e-06, + "loss": 23.0932, + "step": 128570 + }, + { + "epoch": 0.25973973504850173, + "grad_norm": 347.7939758300781, + "learning_rate": 9.266298387474169e-06, + "loss": 22.2822, + "step": 128580 + }, + { + "epoch": 0.25975993568118555, + "grad_norm": 508.5229187011719, + "learning_rate": 9.266116344031652e-06, + "loss": 13.1423, + "step": 128590 + }, + { + "epoch": 0.25978013631386937, + "grad_norm": 478.3236083984375, + "learning_rate": 9.265934279796602e-06, + "loss": 20.7805, + "step": 128600 + }, + { + "epoch": 0.2598003369465532, + "grad_norm": 407.348388671875, + "learning_rate": 9.265752194769906e-06, + "loss": 18.2017, + "step": 128610 + }, + { + "epoch": 0.25982053757923695, + "grad_norm": 0.0, + "learning_rate": 9.265570088952452e-06, + "loss": 22.6018, + "step": 128620 + }, + { + "epoch": 0.2598407382119208, + "grad_norm": 640.04296875, + "learning_rate": 9.265387962345125e-06, + "loss": 40.0134, + "step": 128630 + }, + { + "epoch": 0.2598609388446046, + "grad_norm": 108.46620178222656, + "learning_rate": 9.265205814948814e-06, + "loss": 21.3176, + "step": 128640 + }, + { + "epoch": 0.2598811394772884, + "grad_norm": 237.70730590820312, + "learning_rate": 9.265023646764409e-06, + "loss": 26.6285, + "step": 128650 + }, + { + "epoch": 0.25990134010997223, + "grad_norm": 1318.1649169921875, + "learning_rate": 9.264841457792795e-06, + "loss": 34.5219, + "step": 128660 + }, + { + "epoch": 0.25992154074265605, + "grad_norm": 185.10508728027344, + "learning_rate": 9.264659248034861e-06, + "loss": 39.616, + "step": 128670 + }, + { + "epoch": 0.2599417413753399, + "grad_norm": 173.03404235839844, + "learning_rate": 9.264477017491496e-06, + "loss": 9.3008, + "step": 128680 + }, + { + "epoch": 0.2599619420080237, + "grad_norm": 412.54022216796875, + "learning_rate": 9.264294766163587e-06, + "loss": 26.4566, + "step": 128690 + }, + { + "epoch": 0.2599821426407075, + "grad_norm": 589.6472778320312, + "learning_rate": 9.264112494052022e-06, + "loss": 18.9564, + "step": 128700 + }, + { + "epoch": 0.26000234327339133, + "grad_norm": 354.973388671875, + "learning_rate": 9.26393020115769e-06, + "loss": 36.5721, + "step": 128710 + }, + { + "epoch": 0.26002254390607515, + "grad_norm": 221.44171142578125, + "learning_rate": 9.26374788748148e-06, + "loss": 23.2721, + "step": 128720 + }, + { + "epoch": 0.260042744538759, + "grad_norm": 152.46949768066406, + "learning_rate": 9.263565553024279e-06, + "loss": 30.7172, + "step": 128730 + }, + { + "epoch": 0.2600629451714428, + "grad_norm": 396.7294921875, + "learning_rate": 9.263383197786978e-06, + "loss": 18.8694, + "step": 128740 + }, + { + "epoch": 0.26008314580412656, + "grad_norm": 647.7705078125, + "learning_rate": 9.263200821770462e-06, + "loss": 24.1541, + "step": 128750 + }, + { + "epoch": 0.2601033464368104, + "grad_norm": 296.6133728027344, + "learning_rate": 9.263018424975624e-06, + "loss": 23.7705, + "step": 128760 + }, + { + "epoch": 0.2601235470694942, + "grad_norm": 296.7532958984375, + "learning_rate": 9.262836007403352e-06, + "loss": 18.1715, + "step": 128770 + }, + { + "epoch": 0.260143747702178, + "grad_norm": 285.9989318847656, + "learning_rate": 9.262653569054532e-06, + "loss": 22.2177, + "step": 128780 + }, + { + "epoch": 0.26016394833486184, + "grad_norm": 214.42108154296875, + "learning_rate": 9.262471109930056e-06, + "loss": 14.9034, + "step": 128790 + }, + { + "epoch": 0.26018414896754566, + "grad_norm": 420.13201904296875, + "learning_rate": 9.262288630030814e-06, + "loss": 33.405, + "step": 128800 + }, + { + "epoch": 0.2602043496002295, + "grad_norm": 779.2055053710938, + "learning_rate": 9.262106129357693e-06, + "loss": 22.0531, + "step": 128810 + }, + { + "epoch": 0.2602245502329133, + "grad_norm": 295.20928955078125, + "learning_rate": 9.261923607911584e-06, + "loss": 19.7791, + "step": 128820 + }, + { + "epoch": 0.2602447508655971, + "grad_norm": 365.7851257324219, + "learning_rate": 9.261741065693377e-06, + "loss": 19.8517, + "step": 128830 + }, + { + "epoch": 0.26026495149828094, + "grad_norm": 494.4757080078125, + "learning_rate": 9.26155850270396e-06, + "loss": 12.8213, + "step": 128840 + }, + { + "epoch": 0.26028515213096476, + "grad_norm": 258.4847412109375, + "learning_rate": 9.261375918944224e-06, + "loss": 18.8568, + "step": 128850 + }, + { + "epoch": 0.2603053527636486, + "grad_norm": 369.2894287109375, + "learning_rate": 9.261193314415058e-06, + "loss": 16.7122, + "step": 128860 + }, + { + "epoch": 0.2603255533963324, + "grad_norm": 539.140869140625, + "learning_rate": 9.261010689117353e-06, + "loss": 40.7588, + "step": 128870 + }, + { + "epoch": 0.26034575402901616, + "grad_norm": 216.14306640625, + "learning_rate": 9.260828043051999e-06, + "loss": 24.3153, + "step": 128880 + }, + { + "epoch": 0.2603659546617, + "grad_norm": 368.29034423828125, + "learning_rate": 9.260645376219887e-06, + "loss": 14.8069, + "step": 128890 + }, + { + "epoch": 0.2603861552943838, + "grad_norm": 886.6476440429688, + "learning_rate": 9.260462688621906e-06, + "loss": 20.6665, + "step": 128900 + }, + { + "epoch": 0.2604063559270676, + "grad_norm": 649.2222290039062, + "learning_rate": 9.260279980258945e-06, + "loss": 21.8367, + "step": 128910 + }, + { + "epoch": 0.26042655655975144, + "grad_norm": 171.771728515625, + "learning_rate": 9.260097251131896e-06, + "loss": 13.6678, + "step": 128920 + }, + { + "epoch": 0.26044675719243526, + "grad_norm": 389.5770263671875, + "learning_rate": 9.259914501241651e-06, + "loss": 22.7834, + "step": 128930 + }, + { + "epoch": 0.2604669578251191, + "grad_norm": 237.25209045410156, + "learning_rate": 9.259731730589099e-06, + "loss": 21.3835, + "step": 128940 + }, + { + "epoch": 0.2604871584578029, + "grad_norm": 442.96246337890625, + "learning_rate": 9.25954893917513e-06, + "loss": 21.5349, + "step": 128950 + }, + { + "epoch": 0.2605073590904867, + "grad_norm": 955.6649780273438, + "learning_rate": 9.259366127000637e-06, + "loss": 15.3926, + "step": 128960 + }, + { + "epoch": 0.26052755972317054, + "grad_norm": 894.2137451171875, + "learning_rate": 9.259183294066512e-06, + "loss": 25.5139, + "step": 128970 + }, + { + "epoch": 0.26054776035585436, + "grad_norm": 378.1171569824219, + "learning_rate": 9.259000440373643e-06, + "loss": 34.2166, + "step": 128980 + }, + { + "epoch": 0.2605679609885382, + "grad_norm": 427.4106140136719, + "learning_rate": 9.258817565922919e-06, + "loss": 25.1068, + "step": 128990 + }, + { + "epoch": 0.260588161621222, + "grad_norm": 396.2770690917969, + "learning_rate": 9.25863467071524e-06, + "loss": 17.0746, + "step": 129000 + }, + { + "epoch": 0.26060836225390577, + "grad_norm": 768.878662109375, + "learning_rate": 9.258451754751488e-06, + "loss": 27.6665, + "step": 129010 + }, + { + "epoch": 0.2606285628865896, + "grad_norm": 377.0856628417969, + "learning_rate": 9.25826881803256e-06, + "loss": 23.5891, + "step": 129020 + }, + { + "epoch": 0.2606487635192734, + "grad_norm": 942.701171875, + "learning_rate": 9.258085860559348e-06, + "loss": 33.9537, + "step": 129030 + }, + { + "epoch": 0.2606689641519572, + "grad_norm": 1123.97509765625, + "learning_rate": 9.257902882332739e-06, + "loss": 17.8747, + "step": 129040 + }, + { + "epoch": 0.26068916478464105, + "grad_norm": 260.6478576660156, + "learning_rate": 9.25771988335363e-06, + "loss": 28.6881, + "step": 129050 + }, + { + "epoch": 0.26070936541732487, + "grad_norm": 357.36151123046875, + "learning_rate": 9.25753686362291e-06, + "loss": 33.5757, + "step": 129060 + }, + { + "epoch": 0.2607295660500087, + "grad_norm": 423.8187255859375, + "learning_rate": 9.257353823141472e-06, + "loss": 16.5748, + "step": 129070 + }, + { + "epoch": 0.2607497666826925, + "grad_norm": 384.4149475097656, + "learning_rate": 9.257170761910208e-06, + "loss": 24.709, + "step": 129080 + }, + { + "epoch": 0.2607699673153763, + "grad_norm": 1516.5589599609375, + "learning_rate": 9.25698767993001e-06, + "loss": 45.4619, + "step": 129090 + }, + { + "epoch": 0.26079016794806015, + "grad_norm": 548.0400390625, + "learning_rate": 9.256804577201768e-06, + "loss": 39.1325, + "step": 129100 + }, + { + "epoch": 0.26081036858074397, + "grad_norm": 751.7698974609375, + "learning_rate": 9.25662145372638e-06, + "loss": 33.602, + "step": 129110 + }, + { + "epoch": 0.2608305692134278, + "grad_norm": 709.4901733398438, + "learning_rate": 9.256438309504733e-06, + "loss": 31.2311, + "step": 129120 + }, + { + "epoch": 0.2608507698461116, + "grad_norm": 785.0630493164062, + "learning_rate": 9.256255144537724e-06, + "loss": 16.7887, + "step": 129130 + }, + { + "epoch": 0.26087097047879537, + "grad_norm": 260.6936340332031, + "learning_rate": 9.256071958826243e-06, + "loss": 19.8295, + "step": 129140 + }, + { + "epoch": 0.2608911711114792, + "grad_norm": 613.6012573242188, + "learning_rate": 9.255888752371182e-06, + "loss": 25.0147, + "step": 129150 + }, + { + "epoch": 0.260911371744163, + "grad_norm": 464.50225830078125, + "learning_rate": 9.255705525173437e-06, + "loss": 21.9297, + "step": 129160 + }, + { + "epoch": 0.26093157237684683, + "grad_norm": 494.2185974121094, + "learning_rate": 9.255522277233899e-06, + "loss": 31.056, + "step": 129170 + }, + { + "epoch": 0.26095177300953065, + "grad_norm": 172.20408630371094, + "learning_rate": 9.255339008553462e-06, + "loss": 11.3265, + "step": 129180 + }, + { + "epoch": 0.26097197364221447, + "grad_norm": 808.3148193359375, + "learning_rate": 9.255155719133016e-06, + "loss": 41.8549, + "step": 129190 + }, + { + "epoch": 0.2609921742748983, + "grad_norm": 479.7608642578125, + "learning_rate": 9.25497240897346e-06, + "loss": 26.3807, + "step": 129200 + }, + { + "epoch": 0.2610123749075821, + "grad_norm": 312.90435791015625, + "learning_rate": 9.254789078075684e-06, + "loss": 20.9368, + "step": 129210 + }, + { + "epoch": 0.26103257554026593, + "grad_norm": 249.3673095703125, + "learning_rate": 9.254605726440582e-06, + "loss": 28.2333, + "step": 129220 + }, + { + "epoch": 0.26105277617294975, + "grad_norm": 8.875776290893555, + "learning_rate": 9.254422354069048e-06, + "loss": 47.21, + "step": 129230 + }, + { + "epoch": 0.26107297680563357, + "grad_norm": 676.8912963867188, + "learning_rate": 9.254238960961975e-06, + "loss": 26.9934, + "step": 129240 + }, + { + "epoch": 0.2610931774383174, + "grad_norm": 288.67999267578125, + "learning_rate": 9.254055547120258e-06, + "loss": 19.4308, + "step": 129250 + }, + { + "epoch": 0.26111337807100116, + "grad_norm": 215.28472900390625, + "learning_rate": 9.253872112544788e-06, + "loss": 16.4805, + "step": 129260 + }, + { + "epoch": 0.261133578703685, + "grad_norm": 202.24591064453125, + "learning_rate": 9.253688657236463e-06, + "loss": 29.6428, + "step": 129270 + }, + { + "epoch": 0.2611537793363688, + "grad_norm": 246.48748779296875, + "learning_rate": 9.253505181196176e-06, + "loss": 34.1483, + "step": 129280 + }, + { + "epoch": 0.2611739799690526, + "grad_norm": 745.95166015625, + "learning_rate": 9.25332168442482e-06, + "loss": 15.1879, + "step": 129290 + }, + { + "epoch": 0.26119418060173644, + "grad_norm": 805.440185546875, + "learning_rate": 9.25313816692329e-06, + "loss": 24.1355, + "step": 129300 + }, + { + "epoch": 0.26121438123442026, + "grad_norm": 663.4802856445312, + "learning_rate": 9.252954628692479e-06, + "loss": 19.649, + "step": 129310 + }, + { + "epoch": 0.2612345818671041, + "grad_norm": 197.54844665527344, + "learning_rate": 9.252771069733285e-06, + "loss": 24.2695, + "step": 129320 + }, + { + "epoch": 0.2612547824997879, + "grad_norm": 470.1011047363281, + "learning_rate": 9.2525874900466e-06, + "loss": 17.2617, + "step": 129330 + }, + { + "epoch": 0.2612749831324717, + "grad_norm": 385.9554443359375, + "learning_rate": 9.252403889633319e-06, + "loss": 15.2263, + "step": 129340 + }, + { + "epoch": 0.26129518376515554, + "grad_norm": 262.08868408203125, + "learning_rate": 9.252220268494336e-06, + "loss": 25.9276, + "step": 129350 + }, + { + "epoch": 0.26131538439783936, + "grad_norm": 526.8399658203125, + "learning_rate": 9.25203662663055e-06, + "loss": 27.2101, + "step": 129360 + }, + { + "epoch": 0.2613355850305232, + "grad_norm": 315.19610595703125, + "learning_rate": 9.251852964042852e-06, + "loss": 14.869, + "step": 129370 + }, + { + "epoch": 0.261355785663207, + "grad_norm": 349.370361328125, + "learning_rate": 9.251669280732137e-06, + "loss": 40.9022, + "step": 129380 + }, + { + "epoch": 0.26137598629589076, + "grad_norm": 304.42108154296875, + "learning_rate": 9.251485576699302e-06, + "loss": 45.1989, + "step": 129390 + }, + { + "epoch": 0.2613961869285746, + "grad_norm": 348.2828369140625, + "learning_rate": 9.251301851945244e-06, + "loss": 16.924, + "step": 129400 + }, + { + "epoch": 0.2614163875612584, + "grad_norm": 498.5204162597656, + "learning_rate": 9.251118106470855e-06, + "loss": 28.0091, + "step": 129410 + }, + { + "epoch": 0.2614365881939422, + "grad_norm": 599.5923461914062, + "learning_rate": 9.250934340277031e-06, + "loss": 19.8481, + "step": 129420 + }, + { + "epoch": 0.26145678882662604, + "grad_norm": 218.1306610107422, + "learning_rate": 9.250750553364669e-06, + "loss": 21.7586, + "step": 129430 + }, + { + "epoch": 0.26147698945930986, + "grad_norm": 538.0509643554688, + "learning_rate": 9.250566745734666e-06, + "loss": 11.41, + "step": 129440 + }, + { + "epoch": 0.2614971900919937, + "grad_norm": 306.4842529296875, + "learning_rate": 9.250382917387915e-06, + "loss": 24.8353, + "step": 129450 + }, + { + "epoch": 0.2615173907246775, + "grad_norm": 590.1456298828125, + "learning_rate": 9.250199068325314e-06, + "loss": 23.3935, + "step": 129460 + }, + { + "epoch": 0.2615375913573613, + "grad_norm": 461.6360778808594, + "learning_rate": 9.250015198547757e-06, + "loss": 15.0752, + "step": 129470 + }, + { + "epoch": 0.26155779199004514, + "grad_norm": 509.09942626953125, + "learning_rate": 9.249831308056141e-06, + "loss": 30.315, + "step": 129480 + }, + { + "epoch": 0.26157799262272896, + "grad_norm": 353.9985656738281, + "learning_rate": 9.249647396851364e-06, + "loss": 24.9397, + "step": 129490 + }, + { + "epoch": 0.2615981932554128, + "grad_norm": 305.169921875, + "learning_rate": 9.24946346493432e-06, + "loss": 22.4525, + "step": 129500 + }, + { + "epoch": 0.2616183938880966, + "grad_norm": 231.9904327392578, + "learning_rate": 9.249279512305907e-06, + "loss": 10.315, + "step": 129510 + }, + { + "epoch": 0.26163859452078037, + "grad_norm": 152.6187286376953, + "learning_rate": 9.249095538967021e-06, + "loss": 20.6671, + "step": 129520 + }, + { + "epoch": 0.2616587951534642, + "grad_norm": 474.48828125, + "learning_rate": 9.248911544918559e-06, + "loss": 35.5178, + "step": 129530 + }, + { + "epoch": 0.261678995786148, + "grad_norm": 339.6426696777344, + "learning_rate": 9.248727530161417e-06, + "loss": 16.1868, + "step": 129540 + }, + { + "epoch": 0.2616991964188318, + "grad_norm": 502.6196594238281, + "learning_rate": 9.248543494696493e-06, + "loss": 32.4252, + "step": 129550 + }, + { + "epoch": 0.26171939705151565, + "grad_norm": 104.3818359375, + "learning_rate": 9.248359438524683e-06, + "loss": 17.978, + "step": 129560 + }, + { + "epoch": 0.26173959768419947, + "grad_norm": 536.156005859375, + "learning_rate": 9.248175361646884e-06, + "loss": 20.8205, + "step": 129570 + }, + { + "epoch": 0.2617597983168833, + "grad_norm": 112.09039306640625, + "learning_rate": 9.247991264063994e-06, + "loss": 12.219, + "step": 129580 + }, + { + "epoch": 0.2617799989495671, + "grad_norm": 381.011962890625, + "learning_rate": 9.247807145776909e-06, + "loss": 23.9607, + "step": 129590 + }, + { + "epoch": 0.2618001995822509, + "grad_norm": 86.920166015625, + "learning_rate": 9.247623006786529e-06, + "loss": 16.1657, + "step": 129600 + }, + { + "epoch": 0.26182040021493475, + "grad_norm": 379.3994140625, + "learning_rate": 9.247438847093747e-06, + "loss": 22.3879, + "step": 129610 + }, + { + "epoch": 0.26184060084761857, + "grad_norm": 583.8914184570312, + "learning_rate": 9.247254666699465e-06, + "loss": 21.4429, + "step": 129620 + }, + { + "epoch": 0.2618608014803024, + "grad_norm": 155.83740234375, + "learning_rate": 9.247070465604578e-06, + "loss": 22.1425, + "step": 129630 + }, + { + "epoch": 0.2618810021129862, + "grad_norm": 0.0, + "learning_rate": 9.246886243809985e-06, + "loss": 25.3997, + "step": 129640 + }, + { + "epoch": 0.26190120274566997, + "grad_norm": 722.5323486328125, + "learning_rate": 9.246702001316584e-06, + "loss": 23.2897, + "step": 129650 + }, + { + "epoch": 0.2619214033783538, + "grad_norm": 293.03741455078125, + "learning_rate": 9.246517738125271e-06, + "loss": 18.687, + "step": 129660 + }, + { + "epoch": 0.2619416040110376, + "grad_norm": 763.05859375, + "learning_rate": 9.246333454236946e-06, + "loss": 30.6125, + "step": 129670 + }, + { + "epoch": 0.26196180464372143, + "grad_norm": 316.6152038574219, + "learning_rate": 9.246149149652507e-06, + "loss": 24.5053, + "step": 129680 + }, + { + "epoch": 0.26198200527640525, + "grad_norm": 319.44903564453125, + "learning_rate": 9.245964824372855e-06, + "loss": 20.3691, + "step": 129690 + }, + { + "epoch": 0.26200220590908907, + "grad_norm": 428.3174133300781, + "learning_rate": 9.245780478398883e-06, + "loss": 29.8476, + "step": 129700 + }, + { + "epoch": 0.2620224065417729, + "grad_norm": 305.292236328125, + "learning_rate": 9.245596111731492e-06, + "loss": 15.4894, + "step": 129710 + }, + { + "epoch": 0.2620426071744567, + "grad_norm": 107.9541015625, + "learning_rate": 9.245411724371578e-06, + "loss": 18.3254, + "step": 129720 + }, + { + "epoch": 0.26206280780714053, + "grad_norm": 519.9327392578125, + "learning_rate": 9.245227316320046e-06, + "loss": 14.6097, + "step": 129730 + }, + { + "epoch": 0.26208300843982435, + "grad_norm": 408.0529479980469, + "learning_rate": 9.245042887577789e-06, + "loss": 22.4248, + "step": 129740 + }, + { + "epoch": 0.26210320907250817, + "grad_norm": 772.3802490234375, + "learning_rate": 9.244858438145709e-06, + "loss": 46.9441, + "step": 129750 + }, + { + "epoch": 0.262123409705192, + "grad_norm": 654.0269775390625, + "learning_rate": 9.244673968024701e-06, + "loss": 31.8628, + "step": 129760 + }, + { + "epoch": 0.2621436103378758, + "grad_norm": 159.417724609375, + "learning_rate": 9.24448947721567e-06, + "loss": 22.3518, + "step": 129770 + }, + { + "epoch": 0.2621638109705596, + "grad_norm": 395.1319274902344, + "learning_rate": 9.24430496571951e-06, + "loss": 15.4356, + "step": 129780 + }, + { + "epoch": 0.2621840116032434, + "grad_norm": 196.5967559814453, + "learning_rate": 9.244120433537126e-06, + "loss": 13.7517, + "step": 129790 + }, + { + "epoch": 0.2622042122359272, + "grad_norm": 434.8796081542969, + "learning_rate": 9.24393588066941e-06, + "loss": 33.0948, + "step": 129800 + }, + { + "epoch": 0.26222441286861103, + "grad_norm": 1161.2562255859375, + "learning_rate": 9.243751307117266e-06, + "loss": 22.9127, + "step": 129810 + }, + { + "epoch": 0.26224461350129485, + "grad_norm": 59.643428802490234, + "learning_rate": 9.243566712881593e-06, + "loss": 17.4929, + "step": 129820 + }, + { + "epoch": 0.2622648141339787, + "grad_norm": 390.9410400390625, + "learning_rate": 9.243382097963292e-06, + "loss": 18.3477, + "step": 129830 + }, + { + "epoch": 0.2622850147666625, + "grad_norm": 0.0, + "learning_rate": 9.24319746236326e-06, + "loss": 15.5247, + "step": 129840 + }, + { + "epoch": 0.2623052153993463, + "grad_norm": 256.6327209472656, + "learning_rate": 9.243012806082398e-06, + "loss": 36.769, + "step": 129850 + }, + { + "epoch": 0.26232541603203013, + "grad_norm": 471.3330383300781, + "learning_rate": 9.242828129121607e-06, + "loss": 15.0537, + "step": 129860 + }, + { + "epoch": 0.26234561666471395, + "grad_norm": 514.5746459960938, + "learning_rate": 9.242643431481783e-06, + "loss": 26.198, + "step": 129870 + }, + { + "epoch": 0.2623658172973978, + "grad_norm": 481.345458984375, + "learning_rate": 9.242458713163834e-06, + "loss": 22.4653, + "step": 129880 + }, + { + "epoch": 0.2623860179300816, + "grad_norm": 155.65939331054688, + "learning_rate": 9.242273974168655e-06, + "loss": 30.5696, + "step": 129890 + }, + { + "epoch": 0.26240621856276536, + "grad_norm": 383.5204162597656, + "learning_rate": 9.242089214497146e-06, + "loss": 36.1662, + "step": 129900 + }, + { + "epoch": 0.2624264191954492, + "grad_norm": 235.37841796875, + "learning_rate": 9.241904434150208e-06, + "loss": 29.4283, + "step": 129910 + }, + { + "epoch": 0.262446619828133, + "grad_norm": 1004.9755249023438, + "learning_rate": 9.241719633128743e-06, + "loss": 55.9024, + "step": 129920 + }, + { + "epoch": 0.2624668204608168, + "grad_norm": 759.134765625, + "learning_rate": 9.241534811433651e-06, + "loss": 29.0743, + "step": 129930 + }, + { + "epoch": 0.26248702109350064, + "grad_norm": 522.8464965820312, + "learning_rate": 9.241349969065834e-06, + "loss": 14.5997, + "step": 129940 + }, + { + "epoch": 0.26250722172618446, + "grad_norm": 476.12823486328125, + "learning_rate": 9.241165106026189e-06, + "loss": 32.0307, + "step": 129950 + }, + { + "epoch": 0.2625274223588683, + "grad_norm": 395.36431884765625, + "learning_rate": 9.24098022231562e-06, + "loss": 29.4751, + "step": 129960 + }, + { + "epoch": 0.2625476229915521, + "grad_norm": 204.78297424316406, + "learning_rate": 9.24079531793503e-06, + "loss": 25.3252, + "step": 129970 + }, + { + "epoch": 0.2625678236242359, + "grad_norm": 29.498233795166016, + "learning_rate": 9.24061039288532e-06, + "loss": 14.5478, + "step": 129980 + }, + { + "epoch": 0.26258802425691974, + "grad_norm": 4.016162872314453, + "learning_rate": 9.240425447167384e-06, + "loss": 22.3436, + "step": 129990 + }, + { + "epoch": 0.26260822488960356, + "grad_norm": 320.4252014160156, + "learning_rate": 9.24024048078213e-06, + "loss": 31.1223, + "step": 130000 + }, + { + "epoch": 0.2626284255222874, + "grad_norm": 149.36740112304688, + "learning_rate": 9.24005549373046e-06, + "loss": 11.1488, + "step": 130010 + }, + { + "epoch": 0.2626486261549712, + "grad_norm": 415.9408264160156, + "learning_rate": 9.239870486013272e-06, + "loss": 30.8264, + "step": 130020 + }, + { + "epoch": 0.26266882678765496, + "grad_norm": 311.9599609375, + "learning_rate": 9.23968545763147e-06, + "loss": 23.694, + "step": 130030 + }, + { + "epoch": 0.2626890274203388, + "grad_norm": 177.8672637939453, + "learning_rate": 9.239500408585956e-06, + "loss": 24.0172, + "step": 130040 + }, + { + "epoch": 0.2627092280530226, + "grad_norm": 159.47459411621094, + "learning_rate": 9.239315338877632e-06, + "loss": 16.004, + "step": 130050 + }, + { + "epoch": 0.2627294286857064, + "grad_norm": 143.70700073242188, + "learning_rate": 9.239130248507398e-06, + "loss": 10.5977, + "step": 130060 + }, + { + "epoch": 0.26274962931839024, + "grad_norm": 714.693603515625, + "learning_rate": 9.238945137476157e-06, + "loss": 23.609, + "step": 130070 + }, + { + "epoch": 0.26276982995107406, + "grad_norm": 313.3095703125, + "learning_rate": 9.23876000578481e-06, + "loss": 29.8307, + "step": 130080 + }, + { + "epoch": 0.2627900305837579, + "grad_norm": 882.6751708984375, + "learning_rate": 9.238574853434264e-06, + "loss": 17.9319, + "step": 130090 + }, + { + "epoch": 0.2628102312164417, + "grad_norm": 421.0577087402344, + "learning_rate": 9.238389680425417e-06, + "loss": 31.1081, + "step": 130100 + }, + { + "epoch": 0.2628304318491255, + "grad_norm": 305.2528991699219, + "learning_rate": 9.238204486759172e-06, + "loss": 28.0084, + "step": 130110 + }, + { + "epoch": 0.26285063248180934, + "grad_norm": 252.5142364501953, + "learning_rate": 9.238019272436434e-06, + "loss": 24.6949, + "step": 130120 + }, + { + "epoch": 0.26287083311449316, + "grad_norm": 502.39666748046875, + "learning_rate": 9.237834037458102e-06, + "loss": 16.0274, + "step": 130130 + }, + { + "epoch": 0.262891033747177, + "grad_norm": 710.8877563476562, + "learning_rate": 9.237648781825082e-06, + "loss": 31.8371, + "step": 130140 + }, + { + "epoch": 0.2629112343798608, + "grad_norm": 328.3483581542969, + "learning_rate": 9.237463505538277e-06, + "loss": 29.8537, + "step": 130150 + }, + { + "epoch": 0.26293143501254457, + "grad_norm": 227.37306213378906, + "learning_rate": 9.237278208598587e-06, + "loss": 22.7519, + "step": 130160 + }, + { + "epoch": 0.2629516356452284, + "grad_norm": 210.96592712402344, + "learning_rate": 9.237092891006918e-06, + "loss": 17.8267, + "step": 130170 + }, + { + "epoch": 0.2629718362779122, + "grad_norm": 525.88037109375, + "learning_rate": 9.236907552764171e-06, + "loss": 16.9909, + "step": 130180 + }, + { + "epoch": 0.262992036910596, + "grad_norm": 43.60026931762695, + "learning_rate": 9.236722193871252e-06, + "loss": 16.2976, + "step": 130190 + }, + { + "epoch": 0.26301223754327985, + "grad_norm": 512.9727172851562, + "learning_rate": 9.236536814329062e-06, + "loss": 21.3238, + "step": 130200 + }, + { + "epoch": 0.26303243817596367, + "grad_norm": 68.06608581542969, + "learning_rate": 9.236351414138505e-06, + "loss": 14.0629, + "step": 130210 + }, + { + "epoch": 0.2630526388086475, + "grad_norm": 297.6448669433594, + "learning_rate": 9.236165993300486e-06, + "loss": 15.0702, + "step": 130220 + }, + { + "epoch": 0.2630728394413313, + "grad_norm": 377.9416809082031, + "learning_rate": 9.235980551815907e-06, + "loss": 14.0996, + "step": 130230 + }, + { + "epoch": 0.2630930400740151, + "grad_norm": 254.04336547851562, + "learning_rate": 9.235795089685673e-06, + "loss": 25.687, + "step": 130240 + }, + { + "epoch": 0.26311324070669895, + "grad_norm": 416.20892333984375, + "learning_rate": 9.235609606910687e-06, + "loss": 22.9083, + "step": 130250 + }, + { + "epoch": 0.26313344133938277, + "grad_norm": 98.65184783935547, + "learning_rate": 9.235424103491853e-06, + "loss": 21.522, + "step": 130260 + }, + { + "epoch": 0.2631536419720666, + "grad_norm": 712.9963989257812, + "learning_rate": 9.235238579430077e-06, + "loss": 20.6476, + "step": 130270 + }, + { + "epoch": 0.2631738426047504, + "grad_norm": 431.2131042480469, + "learning_rate": 9.235053034726261e-06, + "loss": 16.2226, + "step": 130280 + }, + { + "epoch": 0.26319404323743417, + "grad_norm": 429.3744812011719, + "learning_rate": 9.23486746938131e-06, + "loss": 23.2903, + "step": 130290 + }, + { + "epoch": 0.263214243870118, + "grad_norm": 937.98583984375, + "learning_rate": 9.234681883396129e-06, + "loss": 16.3498, + "step": 130300 + }, + { + "epoch": 0.2632344445028018, + "grad_norm": 419.97271728515625, + "learning_rate": 9.234496276771622e-06, + "loss": 12.2704, + "step": 130310 + }, + { + "epoch": 0.26325464513548563, + "grad_norm": 261.0768127441406, + "learning_rate": 9.234310649508694e-06, + "loss": 18.5051, + "step": 130320 + }, + { + "epoch": 0.26327484576816945, + "grad_norm": 640.5191650390625, + "learning_rate": 9.23412500160825e-06, + "loss": 30.3407, + "step": 130330 + }, + { + "epoch": 0.26329504640085327, + "grad_norm": 0.0, + "learning_rate": 9.233939333071193e-06, + "loss": 20.475, + "step": 130340 + }, + { + "epoch": 0.2633152470335371, + "grad_norm": 270.3042297363281, + "learning_rate": 9.233753643898428e-06, + "loss": 20.3726, + "step": 130350 + }, + { + "epoch": 0.2633354476662209, + "grad_norm": 309.3485107421875, + "learning_rate": 9.233567934090864e-06, + "loss": 10.1988, + "step": 130360 + }, + { + "epoch": 0.26335564829890473, + "grad_norm": 881.8538208007812, + "learning_rate": 9.233382203649402e-06, + "loss": 43.7343, + "step": 130370 + }, + { + "epoch": 0.26337584893158855, + "grad_norm": 261.611572265625, + "learning_rate": 9.23319645257495e-06, + "loss": 19.0534, + "step": 130380 + }, + { + "epoch": 0.26339604956427237, + "grad_norm": 305.3958435058594, + "learning_rate": 9.233010680868409e-06, + "loss": 22.8678, + "step": 130390 + }, + { + "epoch": 0.2634162501969562, + "grad_norm": 885.7476806640625, + "learning_rate": 9.232824888530689e-06, + "loss": 20.3841, + "step": 130400 + }, + { + "epoch": 0.26343645082964, + "grad_norm": 751.0313110351562, + "learning_rate": 9.232639075562695e-06, + "loss": 25.4823, + "step": 130410 + }, + { + "epoch": 0.2634566514623238, + "grad_norm": 535.940673828125, + "learning_rate": 9.23245324196533e-06, + "loss": 33.4305, + "step": 130420 + }, + { + "epoch": 0.2634768520950076, + "grad_norm": 330.66009521484375, + "learning_rate": 9.232267387739502e-06, + "loss": 24.7655, + "step": 130430 + }, + { + "epoch": 0.2634970527276914, + "grad_norm": 475.0093078613281, + "learning_rate": 9.232081512886116e-06, + "loss": 17.1832, + "step": 130440 + }, + { + "epoch": 0.26351725336037524, + "grad_norm": 1036.0198974609375, + "learning_rate": 9.231895617406076e-06, + "loss": 17.8261, + "step": 130450 + }, + { + "epoch": 0.26353745399305906, + "grad_norm": 1016.4810180664062, + "learning_rate": 9.231709701300293e-06, + "loss": 38.8252, + "step": 130460 + }, + { + "epoch": 0.2635576546257429, + "grad_norm": 725.9237060546875, + "learning_rate": 9.23152376456967e-06, + "loss": 24.6006, + "step": 130470 + }, + { + "epoch": 0.2635778552584267, + "grad_norm": 305.17742919921875, + "learning_rate": 9.231337807215111e-06, + "loss": 23.3218, + "step": 130480 + }, + { + "epoch": 0.2635980558911105, + "grad_norm": 383.8266296386719, + "learning_rate": 9.231151829237527e-06, + "loss": 30.612, + "step": 130490 + }, + { + "epoch": 0.26361825652379434, + "grad_norm": 311.66461181640625, + "learning_rate": 9.230965830637821e-06, + "loss": 16.6466, + "step": 130500 + }, + { + "epoch": 0.26363845715647816, + "grad_norm": 391.4200439453125, + "learning_rate": 9.230779811416901e-06, + "loss": 13.5666, + "step": 130510 + }, + { + "epoch": 0.263658657789162, + "grad_norm": 273.90948486328125, + "learning_rate": 9.230593771575673e-06, + "loss": 19.3705, + "step": 130520 + }, + { + "epoch": 0.2636788584218458, + "grad_norm": 291.2491760253906, + "learning_rate": 9.230407711115043e-06, + "loss": 14.9211, + "step": 130530 + }, + { + "epoch": 0.26369905905452956, + "grad_norm": 964.318115234375, + "learning_rate": 9.230221630035921e-06, + "loss": 18.9726, + "step": 130540 + }, + { + "epoch": 0.2637192596872134, + "grad_norm": 253.50123596191406, + "learning_rate": 9.230035528339212e-06, + "loss": 15.7515, + "step": 130550 + }, + { + "epoch": 0.2637394603198972, + "grad_norm": 256.3843688964844, + "learning_rate": 9.229849406025821e-06, + "loss": 24.3174, + "step": 130560 + }, + { + "epoch": 0.263759660952581, + "grad_norm": 745.5055541992188, + "learning_rate": 9.22966326309666e-06, + "loss": 23.5996, + "step": 130570 + }, + { + "epoch": 0.26377986158526484, + "grad_norm": 312.48614501953125, + "learning_rate": 9.22947709955263e-06, + "loss": 9.0544, + "step": 130580 + }, + { + "epoch": 0.26380006221794866, + "grad_norm": 367.24407958984375, + "learning_rate": 9.229290915394643e-06, + "loss": 22.1031, + "step": 130590 + }, + { + "epoch": 0.2638202628506325, + "grad_norm": 169.04710388183594, + "learning_rate": 9.229104710623604e-06, + "loss": 17.1724, + "step": 130600 + }, + { + "epoch": 0.2638404634833163, + "grad_norm": 228.10475158691406, + "learning_rate": 9.228918485240423e-06, + "loss": 16.7043, + "step": 130610 + }, + { + "epoch": 0.2638606641160001, + "grad_norm": 189.06100463867188, + "learning_rate": 9.228732239246005e-06, + "loss": 16.8849, + "step": 130620 + }, + { + "epoch": 0.26388086474868394, + "grad_norm": 416.0789489746094, + "learning_rate": 9.22854597264126e-06, + "loss": 13.3817, + "step": 130630 + }, + { + "epoch": 0.26390106538136776, + "grad_norm": 662.724609375, + "learning_rate": 9.228359685427095e-06, + "loss": 47.7258, + "step": 130640 + }, + { + "epoch": 0.2639212660140516, + "grad_norm": 282.7579650878906, + "learning_rate": 9.228173377604417e-06, + "loss": 20.3818, + "step": 130650 + }, + { + "epoch": 0.2639414666467354, + "grad_norm": 272.92059326171875, + "learning_rate": 9.227987049174133e-06, + "loss": 15.7969, + "step": 130660 + }, + { + "epoch": 0.26396166727941917, + "grad_norm": 249.759521484375, + "learning_rate": 9.227800700137156e-06, + "loss": 39.5092, + "step": 130670 + }, + { + "epoch": 0.263981867912103, + "grad_norm": 421.3340148925781, + "learning_rate": 9.22761433049439e-06, + "loss": 15.4833, + "step": 130680 + }, + { + "epoch": 0.2640020685447868, + "grad_norm": 301.9707946777344, + "learning_rate": 9.227427940246744e-06, + "loss": 17.5809, + "step": 130690 + }, + { + "epoch": 0.2640222691774706, + "grad_norm": 400.67236328125, + "learning_rate": 9.227241529395127e-06, + "loss": 26.925, + "step": 130700 + }, + { + "epoch": 0.26404246981015445, + "grad_norm": 619.1936645507812, + "learning_rate": 9.22705509794045e-06, + "loss": 19.8829, + "step": 130710 + }, + { + "epoch": 0.26406267044283827, + "grad_norm": 857.1577758789062, + "learning_rate": 9.226868645883616e-06, + "loss": 34.858, + "step": 130720 + }, + { + "epoch": 0.2640828710755221, + "grad_norm": 753.0050048828125, + "learning_rate": 9.226682173225537e-06, + "loss": 16.9288, + "step": 130730 + }, + { + "epoch": 0.2641030717082059, + "grad_norm": 195.84051513671875, + "learning_rate": 9.226495679967123e-06, + "loss": 29.0746, + "step": 130740 + }, + { + "epoch": 0.2641232723408897, + "grad_norm": 890.2536010742188, + "learning_rate": 9.226309166109281e-06, + "loss": 37.294, + "step": 130750 + }, + { + "epoch": 0.26414347297357355, + "grad_norm": 576.2210693359375, + "learning_rate": 9.226122631652921e-06, + "loss": 26.1176, + "step": 130760 + }, + { + "epoch": 0.26416367360625737, + "grad_norm": 340.2896728515625, + "learning_rate": 9.225936076598952e-06, + "loss": 13.9397, + "step": 130770 + }, + { + "epoch": 0.2641838742389412, + "grad_norm": 226.89231872558594, + "learning_rate": 9.225749500948283e-06, + "loss": 16.8131, + "step": 130780 + }, + { + "epoch": 0.264204074871625, + "grad_norm": 303.71234130859375, + "learning_rate": 9.225562904701823e-06, + "loss": 37.1931, + "step": 130790 + }, + { + "epoch": 0.26422427550430877, + "grad_norm": 440.4466552734375, + "learning_rate": 9.225376287860484e-06, + "loss": 25.3327, + "step": 130800 + }, + { + "epoch": 0.2642444761369926, + "grad_norm": 573.6793212890625, + "learning_rate": 9.22518965042517e-06, + "loss": 28.6933, + "step": 130810 + }, + { + "epoch": 0.2642646767696764, + "grad_norm": 279.36041259765625, + "learning_rate": 9.225002992396797e-06, + "loss": 22.1667, + "step": 130820 + }, + { + "epoch": 0.26428487740236023, + "grad_norm": 210.71812438964844, + "learning_rate": 9.22481631377627e-06, + "loss": 14.5621, + "step": 130830 + }, + { + "epoch": 0.26430507803504405, + "grad_norm": 578.2293701171875, + "learning_rate": 9.224629614564502e-06, + "loss": 15.9755, + "step": 130840 + }, + { + "epoch": 0.26432527866772787, + "grad_norm": 100.01342010498047, + "learning_rate": 9.224442894762401e-06, + "loss": 24.9578, + "step": 130850 + }, + { + "epoch": 0.2643454793004117, + "grad_norm": 2.627403974533081, + "learning_rate": 9.224256154370878e-06, + "loss": 21.8294, + "step": 130860 + }, + { + "epoch": 0.2643656799330955, + "grad_norm": 225.63209533691406, + "learning_rate": 9.224069393390843e-06, + "loss": 25.4549, + "step": 130870 + }, + { + "epoch": 0.26438588056577933, + "grad_norm": 549.8444213867188, + "learning_rate": 9.223882611823205e-06, + "loss": 32.2733, + "step": 130880 + }, + { + "epoch": 0.26440608119846315, + "grad_norm": 439.44549560546875, + "learning_rate": 9.223695809668876e-06, + "loss": 19.6963, + "step": 130890 + }, + { + "epoch": 0.26442628183114697, + "grad_norm": 125.24061584472656, + "learning_rate": 9.223508986928766e-06, + "loss": 18.845, + "step": 130900 + }, + { + "epoch": 0.2644464824638308, + "grad_norm": 212.72653198242188, + "learning_rate": 9.223322143603786e-06, + "loss": 27.8434, + "step": 130910 + }, + { + "epoch": 0.2644666830965146, + "grad_norm": 1187.4769287109375, + "learning_rate": 9.223135279694845e-06, + "loss": 17.0523, + "step": 130920 + }, + { + "epoch": 0.2644868837291984, + "grad_norm": 213.18798828125, + "learning_rate": 9.222948395202855e-06, + "loss": 18.4288, + "step": 130930 + }, + { + "epoch": 0.2645070843618822, + "grad_norm": 890.8534545898438, + "learning_rate": 9.222761490128726e-06, + "loss": 28.4153, + "step": 130940 + }, + { + "epoch": 0.264527284994566, + "grad_norm": 285.16400146484375, + "learning_rate": 9.222574564473372e-06, + "loss": 23.9872, + "step": 130950 + }, + { + "epoch": 0.26454748562724983, + "grad_norm": 436.7076110839844, + "learning_rate": 9.222387618237701e-06, + "loss": 28.0928, + "step": 130960 + }, + { + "epoch": 0.26456768625993365, + "grad_norm": 523.7005004882812, + "learning_rate": 9.222200651422624e-06, + "loss": 18.3557, + "step": 130970 + }, + { + "epoch": 0.2645878868926175, + "grad_norm": 363.5169372558594, + "learning_rate": 9.222013664029053e-06, + "loss": 17.662, + "step": 130980 + }, + { + "epoch": 0.2646080875253013, + "grad_norm": 249.8449249267578, + "learning_rate": 9.2218266560579e-06, + "loss": 19.4438, + "step": 130990 + }, + { + "epoch": 0.2646282881579851, + "grad_norm": 404.82635498046875, + "learning_rate": 9.221639627510076e-06, + "loss": 14.5118, + "step": 131000 + }, + { + "epoch": 0.26464848879066893, + "grad_norm": 347.28900146484375, + "learning_rate": 9.221452578386492e-06, + "loss": 29.3885, + "step": 131010 + }, + { + "epoch": 0.26466868942335275, + "grad_norm": 310.66021728515625, + "learning_rate": 9.221265508688061e-06, + "loss": 17.9626, + "step": 131020 + }, + { + "epoch": 0.2646888900560366, + "grad_norm": 421.09246826171875, + "learning_rate": 9.221078418415692e-06, + "loss": 19.3031, + "step": 131030 + }, + { + "epoch": 0.2647090906887204, + "grad_norm": 373.6748352050781, + "learning_rate": 9.220891307570301e-06, + "loss": 11.5313, + "step": 131040 + }, + { + "epoch": 0.26472929132140416, + "grad_norm": 375.723876953125, + "learning_rate": 9.220704176152798e-06, + "loss": 16.8964, + "step": 131050 + }, + { + "epoch": 0.264749491954088, + "grad_norm": 82.238525390625, + "learning_rate": 9.220517024164092e-06, + "loss": 14.9036, + "step": 131060 + }, + { + "epoch": 0.2647696925867718, + "grad_norm": 649.5223999023438, + "learning_rate": 9.2203298516051e-06, + "loss": 16.9638, + "step": 131070 + }, + { + "epoch": 0.2647898932194556, + "grad_norm": 438.2021789550781, + "learning_rate": 9.220142658476732e-06, + "loss": 24.1164, + "step": 131080 + }, + { + "epoch": 0.26481009385213944, + "grad_norm": 409.599365234375, + "learning_rate": 9.2199554447799e-06, + "loss": 13.8918, + "step": 131090 + }, + { + "epoch": 0.26483029448482326, + "grad_norm": 563.10546875, + "learning_rate": 9.219768210515518e-06, + "loss": 19.8584, + "step": 131100 + }, + { + "epoch": 0.2648504951175071, + "grad_norm": 448.8885803222656, + "learning_rate": 9.219580955684495e-06, + "loss": 30.1777, + "step": 131110 + }, + { + "epoch": 0.2648706957501909, + "grad_norm": 115.77897644042969, + "learning_rate": 9.21939368028775e-06, + "loss": 25.812, + "step": 131120 + }, + { + "epoch": 0.2648908963828747, + "grad_norm": 526.2110595703125, + "learning_rate": 9.21920638432619e-06, + "loss": 17.335, + "step": 131130 + }, + { + "epoch": 0.26491109701555854, + "grad_norm": 590.3601684570312, + "learning_rate": 9.219019067800728e-06, + "loss": 31.8216, + "step": 131140 + }, + { + "epoch": 0.26493129764824236, + "grad_norm": 239.01412963867188, + "learning_rate": 9.218831730712281e-06, + "loss": 25.5872, + "step": 131150 + }, + { + "epoch": 0.2649514982809262, + "grad_norm": 281.72625732421875, + "learning_rate": 9.218644373061759e-06, + "loss": 28.0989, + "step": 131160 + }, + { + "epoch": 0.26497169891361, + "grad_norm": 679.1547241210938, + "learning_rate": 9.218456994850076e-06, + "loss": 27.0523, + "step": 131170 + }, + { + "epoch": 0.26499189954629376, + "grad_norm": 516.555908203125, + "learning_rate": 9.218269596078145e-06, + "loss": 18.7261, + "step": 131180 + }, + { + "epoch": 0.2650121001789776, + "grad_norm": 611.985595703125, + "learning_rate": 9.21808217674688e-06, + "loss": 29.438, + "step": 131190 + }, + { + "epoch": 0.2650323008116614, + "grad_norm": 415.1947326660156, + "learning_rate": 9.217894736857195e-06, + "loss": 32.3591, + "step": 131200 + }, + { + "epoch": 0.2650525014443452, + "grad_norm": 424.1458435058594, + "learning_rate": 9.217707276410002e-06, + "loss": 19.8318, + "step": 131210 + }, + { + "epoch": 0.26507270207702904, + "grad_norm": 426.78851318359375, + "learning_rate": 9.217519795406214e-06, + "loss": 18.5401, + "step": 131220 + }, + { + "epoch": 0.26509290270971286, + "grad_norm": 233.7823486328125, + "learning_rate": 9.217332293846747e-06, + "loss": 15.6772, + "step": 131230 + }, + { + "epoch": 0.2651131033423967, + "grad_norm": 418.2563781738281, + "learning_rate": 9.217144771732515e-06, + "loss": 17.4979, + "step": 131240 + }, + { + "epoch": 0.2651333039750805, + "grad_norm": 775.1187133789062, + "learning_rate": 9.21695722906443e-06, + "loss": 35.5911, + "step": 131250 + }, + { + "epoch": 0.2651535046077643, + "grad_norm": 476.4820556640625, + "learning_rate": 9.216769665843406e-06, + "loss": 20.8171, + "step": 131260 + }, + { + "epoch": 0.26517370524044814, + "grad_norm": 411.09490966796875, + "learning_rate": 9.216582082070359e-06, + "loss": 25.5806, + "step": 131270 + }, + { + "epoch": 0.26519390587313196, + "grad_norm": 180.24594116210938, + "learning_rate": 9.216394477746202e-06, + "loss": 22.324, + "step": 131280 + }, + { + "epoch": 0.2652141065058158, + "grad_norm": 604.4251098632812, + "learning_rate": 9.21620685287185e-06, + "loss": 23.6203, + "step": 131290 + }, + { + "epoch": 0.2652343071384996, + "grad_norm": 639.6996459960938, + "learning_rate": 9.216019207448216e-06, + "loss": 25.4689, + "step": 131300 + }, + { + "epoch": 0.26525450777118337, + "grad_norm": 288.1322937011719, + "learning_rate": 9.215831541476217e-06, + "loss": 27.2807, + "step": 131310 + }, + { + "epoch": 0.2652747084038672, + "grad_norm": 364.5860900878906, + "learning_rate": 9.215643854956766e-06, + "loss": 34.1475, + "step": 131320 + }, + { + "epoch": 0.265294909036551, + "grad_norm": 18.383115768432617, + "learning_rate": 9.215456147890778e-06, + "loss": 25.5754, + "step": 131330 + }, + { + "epoch": 0.2653151096692348, + "grad_norm": 436.0059509277344, + "learning_rate": 9.215268420279168e-06, + "loss": 24.1076, + "step": 131340 + }, + { + "epoch": 0.26533531030191865, + "grad_norm": 48.3547248840332, + "learning_rate": 9.215080672122854e-06, + "loss": 16.4566, + "step": 131350 + }, + { + "epoch": 0.26535551093460247, + "grad_norm": 45.05363082885742, + "learning_rate": 9.214892903422745e-06, + "loss": 26.8547, + "step": 131360 + }, + { + "epoch": 0.2653757115672863, + "grad_norm": 28.296865463256836, + "learning_rate": 9.214705114179759e-06, + "loss": 24.4135, + "step": 131370 + }, + { + "epoch": 0.2653959121999701, + "grad_norm": 1304.309814453125, + "learning_rate": 9.214517304394813e-06, + "loss": 23.6039, + "step": 131380 + }, + { + "epoch": 0.2654161128326539, + "grad_norm": 82.30618286132812, + "learning_rate": 9.214329474068818e-06, + "loss": 15.2886, + "step": 131390 + }, + { + "epoch": 0.26543631346533775, + "grad_norm": 674.7920532226562, + "learning_rate": 9.214141623202694e-06, + "loss": 29.9275, + "step": 131400 + }, + { + "epoch": 0.26545651409802157, + "grad_norm": 362.3443603515625, + "learning_rate": 9.213953751797355e-06, + "loss": 15.6839, + "step": 131410 + }, + { + "epoch": 0.2654767147307054, + "grad_norm": 501.14617919921875, + "learning_rate": 9.213765859853717e-06, + "loss": 16.9059, + "step": 131420 + }, + { + "epoch": 0.2654969153633892, + "grad_norm": 593.780517578125, + "learning_rate": 9.213577947372694e-06, + "loss": 25.2237, + "step": 131430 + }, + { + "epoch": 0.26551711599607297, + "grad_norm": 464.27447509765625, + "learning_rate": 9.213390014355204e-06, + "loss": 31.1519, + "step": 131440 + }, + { + "epoch": 0.2655373166287568, + "grad_norm": 238.30935668945312, + "learning_rate": 9.213202060802162e-06, + "loss": 51.9815, + "step": 131450 + }, + { + "epoch": 0.2655575172614406, + "grad_norm": 99.66514587402344, + "learning_rate": 9.213014086714484e-06, + "loss": 20.4605, + "step": 131460 + }, + { + "epoch": 0.26557771789412443, + "grad_norm": 178.2960205078125, + "learning_rate": 9.212826092093085e-06, + "loss": 15.421, + "step": 131470 + }, + { + "epoch": 0.26559791852680825, + "grad_norm": 472.04132080078125, + "learning_rate": 9.212638076938885e-06, + "loss": 19.2474, + "step": 131480 + }, + { + "epoch": 0.26561811915949207, + "grad_norm": 318.9012451171875, + "learning_rate": 9.212450041252797e-06, + "loss": 26.4302, + "step": 131490 + }, + { + "epoch": 0.2656383197921759, + "grad_norm": 324.2199401855469, + "learning_rate": 9.21226198503574e-06, + "loss": 23.9791, + "step": 131500 + }, + { + "epoch": 0.2656585204248597, + "grad_norm": 645.5361938476562, + "learning_rate": 9.212073908288626e-06, + "loss": 16.8702, + "step": 131510 + }, + { + "epoch": 0.26567872105754353, + "grad_norm": 997.70751953125, + "learning_rate": 9.211885811012376e-06, + "loss": 20.7685, + "step": 131520 + }, + { + "epoch": 0.26569892169022735, + "grad_norm": 432.2757263183594, + "learning_rate": 9.211697693207905e-06, + "loss": 30.3959, + "step": 131530 + }, + { + "epoch": 0.26571912232291117, + "grad_norm": 308.8063049316406, + "learning_rate": 9.21150955487613e-06, + "loss": 19.4911, + "step": 131540 + }, + { + "epoch": 0.265739322955595, + "grad_norm": 431.20013427734375, + "learning_rate": 9.21132139601797e-06, + "loss": 40.1249, + "step": 131550 + }, + { + "epoch": 0.2657595235882788, + "grad_norm": 701.2637329101562, + "learning_rate": 9.211133216634339e-06, + "loss": 26.772, + "step": 131560 + }, + { + "epoch": 0.2657797242209626, + "grad_norm": 452.8524169921875, + "learning_rate": 9.210945016726155e-06, + "loss": 18.2151, + "step": 131570 + }, + { + "epoch": 0.2657999248536464, + "grad_norm": 106.77520751953125, + "learning_rate": 9.210756796294335e-06, + "loss": 27.5683, + "step": 131580 + }, + { + "epoch": 0.2658201254863302, + "grad_norm": 386.91998291015625, + "learning_rate": 9.2105685553398e-06, + "loss": 35.6824, + "step": 131590 + }, + { + "epoch": 0.26584032611901404, + "grad_norm": 327.1443176269531, + "learning_rate": 9.210380293863462e-06, + "loss": 24.1842, + "step": 131600 + }, + { + "epoch": 0.26586052675169786, + "grad_norm": 212.36473083496094, + "learning_rate": 9.210192011866242e-06, + "loss": 30.5665, + "step": 131610 + }, + { + "epoch": 0.2658807273843817, + "grad_norm": 524.0457153320312, + "learning_rate": 9.210003709349058e-06, + "loss": 28.7755, + "step": 131620 + }, + { + "epoch": 0.2659009280170655, + "grad_norm": 935.5753173828125, + "learning_rate": 9.209815386312824e-06, + "loss": 27.792, + "step": 131630 + }, + { + "epoch": 0.2659211286497493, + "grad_norm": 70.88928985595703, + "learning_rate": 9.209627042758462e-06, + "loss": 17.7293, + "step": 131640 + }, + { + "epoch": 0.26594132928243314, + "grad_norm": 119.21331024169922, + "learning_rate": 9.209438678686888e-06, + "loss": 18.8387, + "step": 131650 + }, + { + "epoch": 0.26596152991511696, + "grad_norm": 415.9164733886719, + "learning_rate": 9.209250294099021e-06, + "loss": 23.0434, + "step": 131660 + }, + { + "epoch": 0.2659817305478008, + "grad_norm": 566.3864135742188, + "learning_rate": 9.209061888995777e-06, + "loss": 14.7185, + "step": 131670 + }, + { + "epoch": 0.2660019311804846, + "grad_norm": 713.2189331054688, + "learning_rate": 9.208873463378078e-06, + "loss": 20.9568, + "step": 131680 + }, + { + "epoch": 0.26602213181316836, + "grad_norm": 374.5089416503906, + "learning_rate": 9.208685017246839e-06, + "loss": 20.8218, + "step": 131690 + }, + { + "epoch": 0.2660423324458522, + "grad_norm": 434.3398132324219, + "learning_rate": 9.208496550602979e-06, + "loss": 24.3201, + "step": 131700 + }, + { + "epoch": 0.266062533078536, + "grad_norm": 401.7296142578125, + "learning_rate": 9.208308063447418e-06, + "loss": 17.9843, + "step": 131710 + }, + { + "epoch": 0.2660827337112198, + "grad_norm": 180.731689453125, + "learning_rate": 9.208119555781074e-06, + "loss": 24.8958, + "step": 131720 + }, + { + "epoch": 0.26610293434390364, + "grad_norm": 126.87080383300781, + "learning_rate": 9.207931027604867e-06, + "loss": 15.78, + "step": 131730 + }, + { + "epoch": 0.26612313497658746, + "grad_norm": 481.7561950683594, + "learning_rate": 9.207742478919713e-06, + "loss": 17.5088, + "step": 131740 + }, + { + "epoch": 0.2661433356092713, + "grad_norm": 46.55644989013672, + "learning_rate": 9.207553909726532e-06, + "loss": 19.1678, + "step": 131750 + }, + { + "epoch": 0.2661635362419551, + "grad_norm": 709.1443481445312, + "learning_rate": 9.207365320026244e-06, + "loss": 26.2364, + "step": 131760 + }, + { + "epoch": 0.2661837368746389, + "grad_norm": 76.94552612304688, + "learning_rate": 9.207176709819768e-06, + "loss": 21.8475, + "step": 131770 + }, + { + "epoch": 0.26620393750732274, + "grad_norm": 422.1698303222656, + "learning_rate": 9.206988079108023e-06, + "loss": 22.4855, + "step": 131780 + }, + { + "epoch": 0.26622413814000656, + "grad_norm": 344.1897277832031, + "learning_rate": 9.206799427891928e-06, + "loss": 14.6986, + "step": 131790 + }, + { + "epoch": 0.2662443387726904, + "grad_norm": 1501.330078125, + "learning_rate": 9.206610756172402e-06, + "loss": 38.14, + "step": 131800 + }, + { + "epoch": 0.2662645394053742, + "grad_norm": 646.0186767578125, + "learning_rate": 9.206422063950368e-06, + "loss": 40.8852, + "step": 131810 + }, + { + "epoch": 0.26628474003805797, + "grad_norm": 139.66990661621094, + "learning_rate": 9.206233351226742e-06, + "loss": 22.6399, + "step": 131820 + }, + { + "epoch": 0.2663049406707418, + "grad_norm": 247.84478759765625, + "learning_rate": 9.206044618002443e-06, + "loss": 22.2184, + "step": 131830 + }, + { + "epoch": 0.2663251413034256, + "grad_norm": 1052.5062255859375, + "learning_rate": 9.205855864278394e-06, + "loss": 26.3953, + "step": 131840 + }, + { + "epoch": 0.2663453419361094, + "grad_norm": 373.505615234375, + "learning_rate": 9.205667090055513e-06, + "loss": 27.9726, + "step": 131850 + }, + { + "epoch": 0.26636554256879325, + "grad_norm": 596.9949951171875, + "learning_rate": 9.205478295334722e-06, + "loss": 24.5974, + "step": 131860 + }, + { + "epoch": 0.26638574320147707, + "grad_norm": 127.57281494140625, + "learning_rate": 9.20528948011694e-06, + "loss": 15.9013, + "step": 131870 + }, + { + "epoch": 0.2664059438341609, + "grad_norm": 152.38870239257812, + "learning_rate": 9.205100644403084e-06, + "loss": 25.7809, + "step": 131880 + }, + { + "epoch": 0.2664261444668447, + "grad_norm": 296.1481018066406, + "learning_rate": 9.20491178819408e-06, + "loss": 12.8902, + "step": 131890 + }, + { + "epoch": 0.2664463450995285, + "grad_norm": 191.02944946289062, + "learning_rate": 9.204722911490847e-06, + "loss": 14.6816, + "step": 131900 + }, + { + "epoch": 0.26646654573221235, + "grad_norm": 81.29207611083984, + "learning_rate": 9.204534014294302e-06, + "loss": 13.4297, + "step": 131910 + }, + { + "epoch": 0.26648674636489617, + "grad_norm": 520.2135620117188, + "learning_rate": 9.204345096605369e-06, + "loss": 20.1724, + "step": 131920 + }, + { + "epoch": 0.26650694699758, + "grad_norm": 408.005126953125, + "learning_rate": 9.204156158424969e-06, + "loss": 29.1905, + "step": 131930 + }, + { + "epoch": 0.2665271476302638, + "grad_norm": 641.2085571289062, + "learning_rate": 9.20396719975402e-06, + "loss": 21.5415, + "step": 131940 + }, + { + "epoch": 0.26654734826294757, + "grad_norm": 627.3342895507812, + "learning_rate": 9.203778220593447e-06, + "loss": 23.7676, + "step": 131950 + }, + { + "epoch": 0.2665675488956314, + "grad_norm": 519.7887573242188, + "learning_rate": 9.203589220944166e-06, + "loss": 29.4992, + "step": 131960 + }, + { + "epoch": 0.2665877495283152, + "grad_norm": 183.91842651367188, + "learning_rate": 9.203400200807104e-06, + "loss": 22.6261, + "step": 131970 + }, + { + "epoch": 0.26660795016099903, + "grad_norm": 587.1560668945312, + "learning_rate": 9.203211160183177e-06, + "loss": 26.334, + "step": 131980 + }, + { + "epoch": 0.26662815079368285, + "grad_norm": 272.0841369628906, + "learning_rate": 9.20302209907331e-06, + "loss": 23.0761, + "step": 131990 + }, + { + "epoch": 0.26664835142636667, + "grad_norm": 351.566162109375, + "learning_rate": 9.202833017478421e-06, + "loss": 13.8574, + "step": 132000 + }, + { + "epoch": 0.2666685520590505, + "grad_norm": 220.77423095703125, + "learning_rate": 9.202643915399436e-06, + "loss": 22.7003, + "step": 132010 + }, + { + "epoch": 0.2666887526917343, + "grad_norm": 679.429443359375, + "learning_rate": 9.202454792837273e-06, + "loss": 17.5129, + "step": 132020 + }, + { + "epoch": 0.26670895332441813, + "grad_norm": 395.39178466796875, + "learning_rate": 9.202265649792856e-06, + "loss": 27.1673, + "step": 132030 + }, + { + "epoch": 0.26672915395710195, + "grad_norm": 610.6573486328125, + "learning_rate": 9.202076486267106e-06, + "loss": 46.3144, + "step": 132040 + }, + { + "epoch": 0.26674935458978577, + "grad_norm": 292.31298828125, + "learning_rate": 9.201887302260943e-06, + "loss": 37.9318, + "step": 132050 + }, + { + "epoch": 0.2667695552224696, + "grad_norm": 377.9103088378906, + "learning_rate": 9.201698097775291e-06, + "loss": 11.6902, + "step": 132060 + }, + { + "epoch": 0.2667897558551534, + "grad_norm": 771.6697998046875, + "learning_rate": 9.201508872811074e-06, + "loss": 24.8368, + "step": 132070 + }, + { + "epoch": 0.2668099564878372, + "grad_norm": 394.54052734375, + "learning_rate": 9.201319627369211e-06, + "loss": 23.2955, + "step": 132080 + }, + { + "epoch": 0.266830157120521, + "grad_norm": 488.073486328125, + "learning_rate": 9.201130361450627e-06, + "loss": 17.0227, + "step": 132090 + }, + { + "epoch": 0.2668503577532048, + "grad_norm": 606.3373413085938, + "learning_rate": 9.200941075056242e-06, + "loss": 15.1066, + "step": 132100 + }, + { + "epoch": 0.26687055838588863, + "grad_norm": 695.5035400390625, + "learning_rate": 9.20075176818698e-06, + "loss": 19.2674, + "step": 132110 + }, + { + "epoch": 0.26689075901857245, + "grad_norm": 316.67376708984375, + "learning_rate": 9.200562440843763e-06, + "loss": 24.817, + "step": 132120 + }, + { + "epoch": 0.2669109596512563, + "grad_norm": 676.6490478515625, + "learning_rate": 9.200373093027515e-06, + "loss": 23.4508, + "step": 132130 + }, + { + "epoch": 0.2669311602839401, + "grad_norm": 386.1720886230469, + "learning_rate": 9.200183724739158e-06, + "loss": 21.1068, + "step": 132140 + }, + { + "epoch": 0.2669513609166239, + "grad_norm": 33.5107536315918, + "learning_rate": 9.199994335979613e-06, + "loss": 16.8658, + "step": 132150 + }, + { + "epoch": 0.26697156154930773, + "grad_norm": 335.3405456542969, + "learning_rate": 9.199804926749807e-06, + "loss": 24.8775, + "step": 132160 + }, + { + "epoch": 0.26699176218199155, + "grad_norm": 572.8211059570312, + "learning_rate": 9.19961549705066e-06, + "loss": 13.9328, + "step": 132170 + }, + { + "epoch": 0.2670119628146754, + "grad_norm": 433.8130798339844, + "learning_rate": 9.199426046883097e-06, + "loss": 27.4959, + "step": 132180 + }, + { + "epoch": 0.2670321634473592, + "grad_norm": 777.7250366210938, + "learning_rate": 9.19923657624804e-06, + "loss": 23.5044, + "step": 132190 + }, + { + "epoch": 0.267052364080043, + "grad_norm": 34.473838806152344, + "learning_rate": 9.199047085146415e-06, + "loss": 19.792, + "step": 132200 + }, + { + "epoch": 0.2670725647127268, + "grad_norm": 1079.24560546875, + "learning_rate": 9.198857573579143e-06, + "loss": 17.6298, + "step": 132210 + }, + { + "epoch": 0.2670927653454106, + "grad_norm": 273.13128662109375, + "learning_rate": 9.198668041547149e-06, + "loss": 29.453, + "step": 132220 + }, + { + "epoch": 0.2671129659780944, + "grad_norm": 406.24365234375, + "learning_rate": 9.198478489051355e-06, + "loss": 20.6822, + "step": 132230 + }, + { + "epoch": 0.26713316661077824, + "grad_norm": 374.2793884277344, + "learning_rate": 9.198288916092685e-06, + "loss": 16.7359, + "step": 132240 + }, + { + "epoch": 0.26715336724346206, + "grad_norm": 336.56878662109375, + "learning_rate": 9.198099322672066e-06, + "loss": 28.721, + "step": 132250 + }, + { + "epoch": 0.2671735678761459, + "grad_norm": 131.42808532714844, + "learning_rate": 9.19790970879042e-06, + "loss": 23.3139, + "step": 132260 + }, + { + "epoch": 0.2671937685088297, + "grad_norm": 454.2491149902344, + "learning_rate": 9.19772007444867e-06, + "loss": 20.6341, + "step": 132270 + }, + { + "epoch": 0.2672139691415135, + "grad_norm": 4854.48876953125, + "learning_rate": 9.197530419647744e-06, + "loss": 33.9443, + "step": 132280 + }, + { + "epoch": 0.26723416977419734, + "grad_norm": 425.8404846191406, + "learning_rate": 9.197340744388562e-06, + "loss": 16.7814, + "step": 132290 + }, + { + "epoch": 0.26725437040688116, + "grad_norm": 169.8668212890625, + "learning_rate": 9.197151048672051e-06, + "loss": 19.8753, + "step": 132300 + }, + { + "epoch": 0.267274571039565, + "grad_norm": 472.1898193359375, + "learning_rate": 9.196961332499133e-06, + "loss": 19.4039, + "step": 132310 + }, + { + "epoch": 0.2672947716722488, + "grad_norm": 320.7469787597656, + "learning_rate": 9.196771595870736e-06, + "loss": 21.0947, + "step": 132320 + }, + { + "epoch": 0.26731497230493256, + "grad_norm": 298.8196105957031, + "learning_rate": 9.196581838787784e-06, + "loss": 17.3034, + "step": 132330 + }, + { + "epoch": 0.2673351729376164, + "grad_norm": 276.4595031738281, + "learning_rate": 9.196392061251199e-06, + "loss": 22.1888, + "step": 132340 + }, + { + "epoch": 0.2673553735703002, + "grad_norm": 245.24012756347656, + "learning_rate": 9.196202263261908e-06, + "loss": 16.4098, + "step": 132350 + }, + { + "epoch": 0.267375574202984, + "grad_norm": 432.42022705078125, + "learning_rate": 9.196012444820839e-06, + "loss": 20.4767, + "step": 132360 + }, + { + "epoch": 0.26739577483566784, + "grad_norm": 470.5956115722656, + "learning_rate": 9.195822605928913e-06, + "loss": 30.7396, + "step": 132370 + }, + { + "epoch": 0.26741597546835166, + "grad_norm": 514.9091796875, + "learning_rate": 9.195632746587055e-06, + "loss": 21.2021, + "step": 132380 + }, + { + "epoch": 0.2674361761010355, + "grad_norm": 293.6721496582031, + "learning_rate": 9.195442866796194e-06, + "loss": 22.6942, + "step": 132390 + }, + { + "epoch": 0.2674563767337193, + "grad_norm": 331.6898193359375, + "learning_rate": 9.195252966557252e-06, + "loss": 24.5026, + "step": 132400 + }, + { + "epoch": 0.2674765773664031, + "grad_norm": 282.0184631347656, + "learning_rate": 9.195063045871156e-06, + "loss": 15.5731, + "step": 132410 + }, + { + "epoch": 0.26749677799908694, + "grad_norm": 510.8322448730469, + "learning_rate": 9.194873104738831e-06, + "loss": 19.1523, + "step": 132420 + }, + { + "epoch": 0.26751697863177076, + "grad_norm": 401.6566162109375, + "learning_rate": 9.194683143161205e-06, + "loss": 31.8837, + "step": 132430 + }, + { + "epoch": 0.2675371792644546, + "grad_norm": 135.264404296875, + "learning_rate": 9.1944931611392e-06, + "loss": 29.447, + "step": 132440 + }, + { + "epoch": 0.2675573798971384, + "grad_norm": 187.27471923828125, + "learning_rate": 9.194303158673744e-06, + "loss": 25.0556, + "step": 132450 + }, + { + "epoch": 0.26757758052982217, + "grad_norm": 222.26097106933594, + "learning_rate": 9.194113135765766e-06, + "loss": 20.4969, + "step": 132460 + }, + { + "epoch": 0.267597781162506, + "grad_norm": 746.4765625, + "learning_rate": 9.193923092416187e-06, + "loss": 34.7272, + "step": 132470 + }, + { + "epoch": 0.2676179817951898, + "grad_norm": 783.6100463867188, + "learning_rate": 9.193733028625936e-06, + "loss": 14.6561, + "step": 132480 + }, + { + "epoch": 0.2676381824278736, + "grad_norm": 592.0213012695312, + "learning_rate": 9.193542944395938e-06, + "loss": 28.9808, + "step": 132490 + }, + { + "epoch": 0.26765838306055745, + "grad_norm": 630.4940185546875, + "learning_rate": 9.193352839727122e-06, + "loss": 22.5505, + "step": 132500 + }, + { + "epoch": 0.26767858369324127, + "grad_norm": 450.4183654785156, + "learning_rate": 9.193162714620411e-06, + "loss": 11.2748, + "step": 132510 + }, + { + "epoch": 0.2676987843259251, + "grad_norm": 647.8392333984375, + "learning_rate": 9.192972569076734e-06, + "loss": 19.0283, + "step": 132520 + }, + { + "epoch": 0.2677189849586089, + "grad_norm": 295.0071716308594, + "learning_rate": 9.192782403097018e-06, + "loss": 10.4461, + "step": 132530 + }, + { + "epoch": 0.2677391855912927, + "grad_norm": 437.8892822265625, + "learning_rate": 9.192592216682189e-06, + "loss": 36.5702, + "step": 132540 + }, + { + "epoch": 0.26775938622397655, + "grad_norm": 276.1405944824219, + "learning_rate": 9.192402009833174e-06, + "loss": 15.4697, + "step": 132550 + }, + { + "epoch": 0.26777958685666037, + "grad_norm": 1536.82763671875, + "learning_rate": 9.192211782550899e-06, + "loss": 32.0363, + "step": 132560 + }, + { + "epoch": 0.2677997874893442, + "grad_norm": 1108.8671875, + "learning_rate": 9.192021534836293e-06, + "loss": 30.7985, + "step": 132570 + }, + { + "epoch": 0.267819988122028, + "grad_norm": 796.626953125, + "learning_rate": 9.191831266690284e-06, + "loss": 27.7736, + "step": 132580 + }, + { + "epoch": 0.26784018875471177, + "grad_norm": 288.6631164550781, + "learning_rate": 9.191640978113796e-06, + "loss": 21.322, + "step": 132590 + }, + { + "epoch": 0.2678603893873956, + "grad_norm": 299.3996276855469, + "learning_rate": 9.191450669107758e-06, + "loss": 54.9828, + "step": 132600 + }, + { + "epoch": 0.2678805900200794, + "grad_norm": 1362.4151611328125, + "learning_rate": 9.191260339673099e-06, + "loss": 19.2313, + "step": 132610 + }, + { + "epoch": 0.26790079065276323, + "grad_norm": 383.589111328125, + "learning_rate": 9.191069989810743e-06, + "loss": 21.1399, + "step": 132620 + }, + { + "epoch": 0.26792099128544705, + "grad_norm": 230.8157196044922, + "learning_rate": 9.190879619521623e-06, + "loss": 23.5327, + "step": 132630 + }, + { + "epoch": 0.26794119191813087, + "grad_norm": 0.0, + "learning_rate": 9.190689228806664e-06, + "loss": 19.4128, + "step": 132640 + }, + { + "epoch": 0.2679613925508147, + "grad_norm": 0.0, + "learning_rate": 9.190498817666793e-06, + "loss": 17.6414, + "step": 132650 + }, + { + "epoch": 0.2679815931834985, + "grad_norm": 165.3369903564453, + "learning_rate": 9.190308386102938e-06, + "loss": 15.453, + "step": 132660 + }, + { + "epoch": 0.26800179381618233, + "grad_norm": 664.3617553710938, + "learning_rate": 9.19011793411603e-06, + "loss": 37.5341, + "step": 132670 + }, + { + "epoch": 0.26802199444886615, + "grad_norm": 253.08544921875, + "learning_rate": 9.189927461706994e-06, + "loss": 24.1027, + "step": 132680 + }, + { + "epoch": 0.26804219508154997, + "grad_norm": 647.598388671875, + "learning_rate": 9.189736968876761e-06, + "loss": 24.0179, + "step": 132690 + }, + { + "epoch": 0.2680623957142338, + "grad_norm": 519.2431640625, + "learning_rate": 9.189546455626258e-06, + "loss": 25.4059, + "step": 132700 + }, + { + "epoch": 0.2680825963469176, + "grad_norm": 7.84843111038208, + "learning_rate": 9.189355921956414e-06, + "loss": 28.0975, + "step": 132710 + }, + { + "epoch": 0.2681027969796014, + "grad_norm": 201.0693817138672, + "learning_rate": 9.189165367868157e-06, + "loss": 24.6074, + "step": 132720 + }, + { + "epoch": 0.2681229976122852, + "grad_norm": 301.8020935058594, + "learning_rate": 9.188974793362412e-06, + "loss": 26.2875, + "step": 132730 + }, + { + "epoch": 0.268143198244969, + "grad_norm": 321.9702453613281, + "learning_rate": 9.188784198440118e-06, + "loss": 11.4118, + "step": 132740 + }, + { + "epoch": 0.26816339887765284, + "grad_norm": 556.8260498046875, + "learning_rate": 9.188593583102195e-06, + "loss": 18.8817, + "step": 132750 + }, + { + "epoch": 0.26818359951033666, + "grad_norm": 342.0984191894531, + "learning_rate": 9.188402947349575e-06, + "loss": 17.7907, + "step": 132760 + }, + { + "epoch": 0.2682038001430205, + "grad_norm": 652.8034057617188, + "learning_rate": 9.188212291183187e-06, + "loss": 17.4182, + "step": 132770 + }, + { + "epoch": 0.2682240007757043, + "grad_norm": 213.09066772460938, + "learning_rate": 9.18802161460396e-06, + "loss": 29.8512, + "step": 132780 + }, + { + "epoch": 0.2682442014083881, + "grad_norm": 92.26563262939453, + "learning_rate": 9.187830917612826e-06, + "loss": 22.9727, + "step": 132790 + }, + { + "epoch": 0.26826440204107194, + "grad_norm": 98.51717376708984, + "learning_rate": 9.18764020021071e-06, + "loss": 57.7267, + "step": 132800 + }, + { + "epoch": 0.26828460267375576, + "grad_norm": 133.5460968017578, + "learning_rate": 9.187449462398544e-06, + "loss": 16.3387, + "step": 132810 + }, + { + "epoch": 0.2683048033064396, + "grad_norm": 669.2384643554688, + "learning_rate": 9.187258704177255e-06, + "loss": 36.3648, + "step": 132820 + }, + { + "epoch": 0.2683250039391234, + "grad_norm": 61.76363754272461, + "learning_rate": 9.187067925547779e-06, + "loss": 16.2125, + "step": 132830 + }, + { + "epoch": 0.2683452045718072, + "grad_norm": 265.6653137207031, + "learning_rate": 9.186877126511039e-06, + "loss": 17.4552, + "step": 132840 + }, + { + "epoch": 0.268365405204491, + "grad_norm": 110.96267700195312, + "learning_rate": 9.186686307067968e-06, + "loss": 25.6673, + "step": 132850 + }, + { + "epoch": 0.2683856058371748, + "grad_norm": 352.1205139160156, + "learning_rate": 9.186495467219497e-06, + "loss": 43.148, + "step": 132860 + }, + { + "epoch": 0.2684058064698586, + "grad_norm": 223.99241638183594, + "learning_rate": 9.186304606966554e-06, + "loss": 26.3819, + "step": 132870 + }, + { + "epoch": 0.26842600710254244, + "grad_norm": 573.003662109375, + "learning_rate": 9.186113726310068e-06, + "loss": 26.4017, + "step": 132880 + }, + { + "epoch": 0.26844620773522626, + "grad_norm": 751.8842163085938, + "learning_rate": 9.185922825250975e-06, + "loss": 20.1029, + "step": 132890 + }, + { + "epoch": 0.2684664083679101, + "grad_norm": 246.11575317382812, + "learning_rate": 9.1857319037902e-06, + "loss": 18.4643, + "step": 132900 + }, + { + "epoch": 0.2684866090005939, + "grad_norm": 263.97454833984375, + "learning_rate": 9.185540961928677e-06, + "loss": 27.4157, + "step": 132910 + }, + { + "epoch": 0.2685068096332777, + "grad_norm": 643.5626220703125, + "learning_rate": 9.185349999667333e-06, + "loss": 17.9134, + "step": 132920 + }, + { + "epoch": 0.26852701026596154, + "grad_norm": 255.15863037109375, + "learning_rate": 9.185159017007101e-06, + "loss": 11.7951, + "step": 132930 + }, + { + "epoch": 0.26854721089864536, + "grad_norm": 820.3862915039062, + "learning_rate": 9.184968013948912e-06, + "loss": 21.1888, + "step": 132940 + }, + { + "epoch": 0.2685674115313292, + "grad_norm": 384.4031066894531, + "learning_rate": 9.184776990493696e-06, + "loss": 18.4046, + "step": 132950 + }, + { + "epoch": 0.268587612164013, + "grad_norm": 441.9537658691406, + "learning_rate": 9.184585946642384e-06, + "loss": 14.0234, + "step": 132960 + }, + { + "epoch": 0.26860781279669677, + "grad_norm": 234.75209045410156, + "learning_rate": 9.184394882395909e-06, + "loss": 34.9167, + "step": 132970 + }, + { + "epoch": 0.2686280134293806, + "grad_norm": 244.43898010253906, + "learning_rate": 9.1842037977552e-06, + "loss": 24.3913, + "step": 132980 + }, + { + "epoch": 0.2686482140620644, + "grad_norm": 195.8040771484375, + "learning_rate": 9.18401269272119e-06, + "loss": 52.0898, + "step": 132990 + }, + { + "epoch": 0.2686684146947482, + "grad_norm": 517.8396606445312, + "learning_rate": 9.18382156729481e-06, + "loss": 16.1742, + "step": 133000 + }, + { + "epoch": 0.26868861532743205, + "grad_norm": 535.0672607421875, + "learning_rate": 9.18363042147699e-06, + "loss": 15.3108, + "step": 133010 + }, + { + "epoch": 0.26870881596011587, + "grad_norm": 183.6185760498047, + "learning_rate": 9.183439255268662e-06, + "loss": 33.4402, + "step": 133020 + }, + { + "epoch": 0.2687290165927997, + "grad_norm": 231.61705017089844, + "learning_rate": 9.18324806867076e-06, + "loss": 30.3354, + "step": 133030 + }, + { + "epoch": 0.2687492172254835, + "grad_norm": 334.2143249511719, + "learning_rate": 9.183056861684215e-06, + "loss": 11.6259, + "step": 133040 + }, + { + "epoch": 0.2687694178581673, + "grad_norm": 646.425048828125, + "learning_rate": 9.182865634309956e-06, + "loss": 13.3922, + "step": 133050 + }, + { + "epoch": 0.26878961849085115, + "grad_norm": 570.08935546875, + "learning_rate": 9.182674386548918e-06, + "loss": 38.6776, + "step": 133060 + }, + { + "epoch": 0.26880981912353497, + "grad_norm": 369.7538757324219, + "learning_rate": 9.182483118402032e-06, + "loss": 15.7544, + "step": 133070 + }, + { + "epoch": 0.2688300197562188, + "grad_norm": 914.4676513671875, + "learning_rate": 9.18229182987023e-06, + "loss": 15.9988, + "step": 133080 + }, + { + "epoch": 0.2688502203889026, + "grad_norm": 158.03488159179688, + "learning_rate": 9.182100520954446e-06, + "loss": 29.8605, + "step": 133090 + }, + { + "epoch": 0.26887042102158637, + "grad_norm": 623.6317749023438, + "learning_rate": 9.181909191655613e-06, + "loss": 22.6347, + "step": 133100 + }, + { + "epoch": 0.2688906216542702, + "grad_norm": 444.5938415527344, + "learning_rate": 9.18171784197466e-06, + "loss": 35.1076, + "step": 133110 + }, + { + "epoch": 0.268910822286954, + "grad_norm": 306.80548095703125, + "learning_rate": 9.18152647191252e-06, + "loss": 25.4284, + "step": 133120 + }, + { + "epoch": 0.26893102291963783, + "grad_norm": 552.109130859375, + "learning_rate": 9.181335081470128e-06, + "loss": 23.2609, + "step": 133130 + }, + { + "epoch": 0.26895122355232165, + "grad_norm": 421.8555603027344, + "learning_rate": 9.181143670648418e-06, + "loss": 21.4386, + "step": 133140 + }, + { + "epoch": 0.26897142418500547, + "grad_norm": 510.4923400878906, + "learning_rate": 9.18095223944832e-06, + "loss": 22.8459, + "step": 133150 + }, + { + "epoch": 0.2689916248176893, + "grad_norm": 135.2384796142578, + "learning_rate": 9.180760787870766e-06, + "loss": 26.551, + "step": 133160 + }, + { + "epoch": 0.2690118254503731, + "grad_norm": 858.7987060546875, + "learning_rate": 9.180569315916693e-06, + "loss": 28.7734, + "step": 133170 + }, + { + "epoch": 0.26903202608305693, + "grad_norm": 1593.05810546875, + "learning_rate": 9.180377823587031e-06, + "loss": 34.3314, + "step": 133180 + }, + { + "epoch": 0.26905222671574075, + "grad_norm": 417.97918701171875, + "learning_rate": 9.180186310882715e-06, + "loss": 28.1385, + "step": 133190 + }, + { + "epoch": 0.26907242734842457, + "grad_norm": 1240.343994140625, + "learning_rate": 9.179994777804677e-06, + "loss": 28.292, + "step": 133200 + }, + { + "epoch": 0.2690926279811084, + "grad_norm": 388.9484558105469, + "learning_rate": 9.179803224353854e-06, + "loss": 25.5228, + "step": 133210 + }, + { + "epoch": 0.2691128286137922, + "grad_norm": 617.9100341796875, + "learning_rate": 9.179611650531174e-06, + "loss": 18.4147, + "step": 133220 + }, + { + "epoch": 0.269133029246476, + "grad_norm": 370.5556640625, + "learning_rate": 9.179420056337576e-06, + "loss": 18.3568, + "step": 133230 + }, + { + "epoch": 0.2691532298791598, + "grad_norm": 315.80926513671875, + "learning_rate": 9.179228441773991e-06, + "loss": 18.8859, + "step": 133240 + }, + { + "epoch": 0.2691734305118436, + "grad_norm": 442.8673095703125, + "learning_rate": 9.179036806841352e-06, + "loss": 10.576, + "step": 133250 + }, + { + "epoch": 0.26919363114452743, + "grad_norm": 329.0492858886719, + "learning_rate": 9.178845151540597e-06, + "loss": 16.9877, + "step": 133260 + }, + { + "epoch": 0.26921383177721125, + "grad_norm": 255.6985321044922, + "learning_rate": 9.178653475872655e-06, + "loss": 14.3121, + "step": 133270 + }, + { + "epoch": 0.2692340324098951, + "grad_norm": 881.1149291992188, + "learning_rate": 9.178461779838464e-06, + "loss": 35.6185, + "step": 133280 + }, + { + "epoch": 0.2692542330425789, + "grad_norm": 518.59814453125, + "learning_rate": 9.178270063438957e-06, + "loss": 27.3528, + "step": 133290 + }, + { + "epoch": 0.2692744336752627, + "grad_norm": 671.7938842773438, + "learning_rate": 9.178078326675069e-06, + "loss": 37.0915, + "step": 133300 + }, + { + "epoch": 0.26929463430794653, + "grad_norm": 283.6918029785156, + "learning_rate": 9.177886569547731e-06, + "loss": 24.2198, + "step": 133310 + }, + { + "epoch": 0.26931483494063035, + "grad_norm": 653.0701904296875, + "learning_rate": 9.177694792057884e-06, + "loss": 25.0911, + "step": 133320 + }, + { + "epoch": 0.2693350355733142, + "grad_norm": 474.48077392578125, + "learning_rate": 9.177502994206457e-06, + "loss": 15.433, + "step": 133330 + }, + { + "epoch": 0.269355236205998, + "grad_norm": 461.8171691894531, + "learning_rate": 9.17731117599439e-06, + "loss": 20.6739, + "step": 133340 + }, + { + "epoch": 0.2693754368386818, + "grad_norm": 582.8025512695312, + "learning_rate": 9.177119337422613e-06, + "loss": 19.432, + "step": 133350 + }, + { + "epoch": 0.2693956374713656, + "grad_norm": 454.8699645996094, + "learning_rate": 9.176927478492064e-06, + "loss": 24.2648, + "step": 133360 + }, + { + "epoch": 0.2694158381040494, + "grad_norm": 294.0679016113281, + "learning_rate": 9.176735599203676e-06, + "loss": 23.7354, + "step": 133370 + }, + { + "epoch": 0.2694360387367332, + "grad_norm": 270.6733093261719, + "learning_rate": 9.176543699558384e-06, + "loss": 17.6682, + "step": 133380 + }, + { + "epoch": 0.26945623936941704, + "grad_norm": 750.679443359375, + "learning_rate": 9.176351779557127e-06, + "loss": 26.0452, + "step": 133390 + }, + { + "epoch": 0.26947644000210086, + "grad_norm": 440.5780334472656, + "learning_rate": 9.176159839200838e-06, + "loss": 19.2952, + "step": 133400 + }, + { + "epoch": 0.2694966406347847, + "grad_norm": 197.2433624267578, + "learning_rate": 9.175967878490451e-06, + "loss": 14.8968, + "step": 133410 + }, + { + "epoch": 0.2695168412674685, + "grad_norm": 172.82565307617188, + "learning_rate": 9.175775897426904e-06, + "loss": 20.906, + "step": 133420 + }, + { + "epoch": 0.2695370419001523, + "grad_norm": 650.2919311523438, + "learning_rate": 9.17558389601113e-06, + "loss": 25.1461, + "step": 133430 + }, + { + "epoch": 0.26955724253283614, + "grad_norm": 189.42684936523438, + "learning_rate": 9.175391874244068e-06, + "loss": 14.5088, + "step": 133440 + }, + { + "epoch": 0.26957744316551996, + "grad_norm": 327.37689208984375, + "learning_rate": 9.175199832126654e-06, + "loss": 30.4333, + "step": 133450 + }, + { + "epoch": 0.2695976437982038, + "grad_norm": 107.6774673461914, + "learning_rate": 9.17500776965982e-06, + "loss": 20.1457, + "step": 133460 + }, + { + "epoch": 0.2696178444308876, + "grad_norm": 139.43043518066406, + "learning_rate": 9.174815686844506e-06, + "loss": 32.9828, + "step": 133470 + }, + { + "epoch": 0.2696380450635714, + "grad_norm": 438.24945068359375, + "learning_rate": 9.174623583681644e-06, + "loss": 32.7648, + "step": 133480 + }, + { + "epoch": 0.2696582456962552, + "grad_norm": 530.69580078125, + "learning_rate": 9.174431460172175e-06, + "loss": 10.383, + "step": 133490 + }, + { + "epoch": 0.269678446328939, + "grad_norm": 491.65020751953125, + "learning_rate": 9.174239316317034e-06, + "loss": 17.2912, + "step": 133500 + }, + { + "epoch": 0.2696986469616228, + "grad_norm": 390.62896728515625, + "learning_rate": 9.174047152117155e-06, + "loss": 19.479, + "step": 133510 + }, + { + "epoch": 0.26971884759430664, + "grad_norm": 521.3779907226562, + "learning_rate": 9.173854967573479e-06, + "loss": 45.7823, + "step": 133520 + }, + { + "epoch": 0.26973904822699046, + "grad_norm": 359.65325927734375, + "learning_rate": 9.173662762686937e-06, + "loss": 16.5244, + "step": 133530 + }, + { + "epoch": 0.2697592488596743, + "grad_norm": 682.971435546875, + "learning_rate": 9.17347053745847e-06, + "loss": 25.2273, + "step": 133540 + }, + { + "epoch": 0.2697794494923581, + "grad_norm": 806.0768432617188, + "learning_rate": 9.173278291889016e-06, + "loss": 17.358, + "step": 133550 + }, + { + "epoch": 0.2697996501250419, + "grad_norm": 363.98028564453125, + "learning_rate": 9.173086025979507e-06, + "loss": 12.2788, + "step": 133560 + }, + { + "epoch": 0.26981985075772574, + "grad_norm": 244.05532836914062, + "learning_rate": 9.172893739730884e-06, + "loss": 20.116, + "step": 133570 + }, + { + "epoch": 0.26984005139040956, + "grad_norm": 459.2618103027344, + "learning_rate": 9.172701433144083e-06, + "loss": 24.0229, + "step": 133580 + }, + { + "epoch": 0.2698602520230934, + "grad_norm": 186.79541015625, + "learning_rate": 9.172509106220041e-06, + "loss": 19.4491, + "step": 133590 + }, + { + "epoch": 0.2698804526557772, + "grad_norm": 157.8845977783203, + "learning_rate": 9.172316758959695e-06, + "loss": 15.3868, + "step": 133600 + }, + { + "epoch": 0.26990065328846097, + "grad_norm": 559.3065185546875, + "learning_rate": 9.172124391363986e-06, + "loss": 31.9676, + "step": 133610 + }, + { + "epoch": 0.2699208539211448, + "grad_norm": 125.10926818847656, + "learning_rate": 9.171932003433845e-06, + "loss": 19.5996, + "step": 133620 + }, + { + "epoch": 0.2699410545538286, + "grad_norm": 1056.4173583984375, + "learning_rate": 9.171739595170217e-06, + "loss": 33.6187, + "step": 133630 + }, + { + "epoch": 0.2699612551865124, + "grad_norm": 651.4199829101562, + "learning_rate": 9.171547166574034e-06, + "loss": 13.5563, + "step": 133640 + }, + { + "epoch": 0.26998145581919625, + "grad_norm": 200.57957458496094, + "learning_rate": 9.171354717646238e-06, + "loss": 32.7947, + "step": 133650 + }, + { + "epoch": 0.27000165645188007, + "grad_norm": 61.84878921508789, + "learning_rate": 9.171162248387762e-06, + "loss": 25.86, + "step": 133660 + }, + { + "epoch": 0.2700218570845639, + "grad_norm": 167.61216735839844, + "learning_rate": 9.170969758799549e-06, + "loss": 23.9729, + "step": 133670 + }, + { + "epoch": 0.2700420577172477, + "grad_norm": 478.9454040527344, + "learning_rate": 9.170777248882535e-06, + "loss": 17.3073, + "step": 133680 + }, + { + "epoch": 0.2700622583499315, + "grad_norm": 547.905029296875, + "learning_rate": 9.170584718637658e-06, + "loss": 21.9839, + "step": 133690 + }, + { + "epoch": 0.27008245898261535, + "grad_norm": 379.87994384765625, + "learning_rate": 9.170392168065858e-06, + "loss": 10.2162, + "step": 133700 + }, + { + "epoch": 0.27010265961529917, + "grad_norm": 1176.78515625, + "learning_rate": 9.17019959716807e-06, + "loss": 18.8268, + "step": 133710 + }, + { + "epoch": 0.270122860247983, + "grad_norm": 758.3802490234375, + "learning_rate": 9.170007005945236e-06, + "loss": 33.3501, + "step": 133720 + }, + { + "epoch": 0.2701430608806668, + "grad_norm": 91.6874008178711, + "learning_rate": 9.169814394398295e-06, + "loss": 21.1327, + "step": 133730 + }, + { + "epoch": 0.27016326151335057, + "grad_norm": 210.44874572753906, + "learning_rate": 9.169621762528184e-06, + "loss": 25.161, + "step": 133740 + }, + { + "epoch": 0.2701834621460344, + "grad_norm": 211.814453125, + "learning_rate": 9.169429110335842e-06, + "loss": 23.5289, + "step": 133750 + }, + { + "epoch": 0.2702036627787182, + "grad_norm": 88.92313385009766, + "learning_rate": 9.169236437822208e-06, + "loss": 34.3629, + "step": 133760 + }, + { + "epoch": 0.27022386341140203, + "grad_norm": 279.8443298339844, + "learning_rate": 9.169043744988222e-06, + "loss": 12.3574, + "step": 133770 + }, + { + "epoch": 0.27024406404408585, + "grad_norm": 261.79803466796875, + "learning_rate": 9.16885103183482e-06, + "loss": 15.2551, + "step": 133780 + }, + { + "epoch": 0.27026426467676967, + "grad_norm": 185.6011199951172, + "learning_rate": 9.168658298362947e-06, + "loss": 23.5912, + "step": 133790 + }, + { + "epoch": 0.2702844653094535, + "grad_norm": 358.6379089355469, + "learning_rate": 9.168465544573538e-06, + "loss": 8.0604, + "step": 133800 + }, + { + "epoch": 0.2703046659421373, + "grad_norm": 409.3935546875, + "learning_rate": 9.168272770467532e-06, + "loss": 21.4261, + "step": 133810 + }, + { + "epoch": 0.27032486657482113, + "grad_norm": 677.9612426757812, + "learning_rate": 9.16807997604587e-06, + "loss": 17.9283, + "step": 133820 + }, + { + "epoch": 0.27034506720750495, + "grad_norm": 464.4529113769531, + "learning_rate": 9.167887161309495e-06, + "loss": 18.2088, + "step": 133830 + }, + { + "epoch": 0.27036526784018877, + "grad_norm": 249.7884063720703, + "learning_rate": 9.16769432625934e-06, + "loss": 21.463, + "step": 133840 + }, + { + "epoch": 0.2703854684728726, + "grad_norm": 675.5252075195312, + "learning_rate": 9.16750147089635e-06, + "loss": 16.9006, + "step": 133850 + }, + { + "epoch": 0.2704056691055564, + "grad_norm": 269.93756103515625, + "learning_rate": 9.167308595221463e-06, + "loss": 17.7645, + "step": 133860 + }, + { + "epoch": 0.2704258697382402, + "grad_norm": 845.3773193359375, + "learning_rate": 9.167115699235618e-06, + "loss": 34.0293, + "step": 133870 + }, + { + "epoch": 0.270446070370924, + "grad_norm": 373.9945373535156, + "learning_rate": 9.166922782939759e-06, + "loss": 26.0111, + "step": 133880 + }, + { + "epoch": 0.2704662710036078, + "grad_norm": 642.9016723632812, + "learning_rate": 9.166729846334822e-06, + "loss": 46.0158, + "step": 133890 + }, + { + "epoch": 0.27048647163629164, + "grad_norm": 731.1618041992188, + "learning_rate": 9.16653688942175e-06, + "loss": 36.8734, + "step": 133900 + }, + { + "epoch": 0.27050667226897546, + "grad_norm": 348.7421875, + "learning_rate": 9.166343912201482e-06, + "loss": 17.0303, + "step": 133910 + }, + { + "epoch": 0.2705268729016593, + "grad_norm": 570.551025390625, + "learning_rate": 9.166150914674959e-06, + "loss": 31.2026, + "step": 133920 + }, + { + "epoch": 0.2705470735343431, + "grad_norm": 746.3660888671875, + "learning_rate": 9.165957896843122e-06, + "loss": 16.837, + "step": 133930 + }, + { + "epoch": 0.2705672741670269, + "grad_norm": 195.4158935546875, + "learning_rate": 9.165764858706912e-06, + "loss": 21.4735, + "step": 133940 + }, + { + "epoch": 0.27058747479971074, + "grad_norm": 401.3896789550781, + "learning_rate": 9.165571800267267e-06, + "loss": 18.1618, + "step": 133950 + }, + { + "epoch": 0.27060767543239456, + "grad_norm": 773.892822265625, + "learning_rate": 9.165378721525133e-06, + "loss": 36.3603, + "step": 133960 + }, + { + "epoch": 0.2706278760650784, + "grad_norm": 1609.6444091796875, + "learning_rate": 9.165185622481447e-06, + "loss": 28.9816, + "step": 133970 + }, + { + "epoch": 0.2706480766977622, + "grad_norm": 354.135009765625, + "learning_rate": 9.164992503137152e-06, + "loss": 15.0174, + "step": 133980 + }, + { + "epoch": 0.270668277330446, + "grad_norm": 4.084848880767822, + "learning_rate": 9.16479936349319e-06, + "loss": 27.4202, + "step": 133990 + }, + { + "epoch": 0.2706884779631298, + "grad_norm": 920.7405395507812, + "learning_rate": 9.164606203550498e-06, + "loss": 28.7478, + "step": 134000 + }, + { + "epoch": 0.2707086785958136, + "grad_norm": 434.6065979003906, + "learning_rate": 9.164413023310022e-06, + "loss": 24.9194, + "step": 134010 + }, + { + "epoch": 0.2707288792284974, + "grad_norm": 458.9222717285156, + "learning_rate": 9.164219822772702e-06, + "loss": 26.233, + "step": 134020 + }, + { + "epoch": 0.27074907986118124, + "grad_norm": 144.0622100830078, + "learning_rate": 9.16402660193948e-06, + "loss": 25.8593, + "step": 134030 + }, + { + "epoch": 0.27076928049386506, + "grad_norm": 705.0419921875, + "learning_rate": 9.163833360811296e-06, + "loss": 16.5194, + "step": 134040 + }, + { + "epoch": 0.2707894811265489, + "grad_norm": 458.0263977050781, + "learning_rate": 9.163640099389095e-06, + "loss": 23.7073, + "step": 134050 + }, + { + "epoch": 0.2708096817592327, + "grad_norm": 214.73873901367188, + "learning_rate": 9.163446817673817e-06, + "loss": 14.6718, + "step": 134060 + }, + { + "epoch": 0.2708298823919165, + "grad_norm": 265.9853515625, + "learning_rate": 9.163253515666403e-06, + "loss": 20.0908, + "step": 134070 + }, + { + "epoch": 0.27085008302460034, + "grad_norm": 945.6927490234375, + "learning_rate": 9.163060193367797e-06, + "loss": 21.8016, + "step": 134080 + }, + { + "epoch": 0.27087028365728416, + "grad_norm": 280.96636962890625, + "learning_rate": 9.16286685077894e-06, + "loss": 17.5716, + "step": 134090 + }, + { + "epoch": 0.270890484289968, + "grad_norm": 894.2539672851562, + "learning_rate": 9.162673487900775e-06, + "loss": 14.1572, + "step": 134100 + }, + { + "epoch": 0.2709106849226518, + "grad_norm": 303.4360046386719, + "learning_rate": 9.162480104734244e-06, + "loss": 14.7613, + "step": 134110 + }, + { + "epoch": 0.27093088555533557, + "grad_norm": 802.8580932617188, + "learning_rate": 9.162286701280292e-06, + "loss": 23.2513, + "step": 134120 + }, + { + "epoch": 0.2709510861880194, + "grad_norm": 619.9885864257812, + "learning_rate": 9.162093277539856e-06, + "loss": 16.6658, + "step": 134130 + }, + { + "epoch": 0.2709712868207032, + "grad_norm": 267.7410888671875, + "learning_rate": 9.161899833513885e-06, + "loss": 18.987, + "step": 134140 + }, + { + "epoch": 0.270991487453387, + "grad_norm": 340.4080810546875, + "learning_rate": 9.161706369203319e-06, + "loss": 14.3383, + "step": 134150 + }, + { + "epoch": 0.27101168808607085, + "grad_norm": 481.16546630859375, + "learning_rate": 9.161512884609099e-06, + "loss": 14.2786, + "step": 134160 + }, + { + "epoch": 0.27103188871875467, + "grad_norm": 882.806396484375, + "learning_rate": 9.16131937973217e-06, + "loss": 24.3084, + "step": 134170 + }, + { + "epoch": 0.2710520893514385, + "grad_norm": 558.1138916015625, + "learning_rate": 9.161125854573476e-06, + "loss": 31.4807, + "step": 134180 + }, + { + "epoch": 0.2710722899841223, + "grad_norm": 636.9767456054688, + "learning_rate": 9.160932309133959e-06, + "loss": 28.0938, + "step": 134190 + }, + { + "epoch": 0.2710924906168061, + "grad_norm": 818.5437622070312, + "learning_rate": 9.160738743414564e-06, + "loss": 30.3742, + "step": 134200 + }, + { + "epoch": 0.27111269124948995, + "grad_norm": 775.7115478515625, + "learning_rate": 9.160545157416231e-06, + "loss": 24.856, + "step": 134210 + }, + { + "epoch": 0.27113289188217377, + "grad_norm": 518.9937133789062, + "learning_rate": 9.160351551139906e-06, + "loss": 21.8429, + "step": 134220 + }, + { + "epoch": 0.2711530925148576, + "grad_norm": 245.49343872070312, + "learning_rate": 9.160157924586532e-06, + "loss": 21.4535, + "step": 134230 + }, + { + "epoch": 0.2711732931475414, + "grad_norm": 350.6053161621094, + "learning_rate": 9.159964277757054e-06, + "loss": 17.8508, + "step": 134240 + }, + { + "epoch": 0.27119349378022517, + "grad_norm": 300.557373046875, + "learning_rate": 9.159770610652413e-06, + "loss": 34.6474, + "step": 134250 + }, + { + "epoch": 0.271213694412909, + "grad_norm": 151.58203125, + "learning_rate": 9.159576923273557e-06, + "loss": 28.7178, + "step": 134260 + }, + { + "epoch": 0.2712338950455928, + "grad_norm": 730.8589477539062, + "learning_rate": 9.159383215621425e-06, + "loss": 48.4156, + "step": 134270 + }, + { + "epoch": 0.27125409567827663, + "grad_norm": 250.2581024169922, + "learning_rate": 9.159189487696965e-06, + "loss": 28.0459, + "step": 134280 + }, + { + "epoch": 0.27127429631096045, + "grad_norm": 508.3044738769531, + "learning_rate": 9.15899573950112e-06, + "loss": 25.1482, + "step": 134290 + }, + { + "epoch": 0.27129449694364427, + "grad_norm": 683.8849487304688, + "learning_rate": 9.158801971034832e-06, + "loss": 36.1386, + "step": 134300 + }, + { + "epoch": 0.2713146975763281, + "grad_norm": 217.94737243652344, + "learning_rate": 9.15860818229905e-06, + "loss": 21.7075, + "step": 134310 + }, + { + "epoch": 0.2713348982090119, + "grad_norm": 409.0342712402344, + "learning_rate": 9.158414373294715e-06, + "loss": 13.7616, + "step": 134320 + }, + { + "epoch": 0.27135509884169573, + "grad_norm": 251.45237731933594, + "learning_rate": 9.158220544022773e-06, + "loss": 21.7063, + "step": 134330 + }, + { + "epoch": 0.27137529947437955, + "grad_norm": 232.46438598632812, + "learning_rate": 9.15802669448417e-06, + "loss": 20.2642, + "step": 134340 + }, + { + "epoch": 0.27139550010706337, + "grad_norm": 563.8030395507812, + "learning_rate": 9.157832824679846e-06, + "loss": 22.5836, + "step": 134350 + }, + { + "epoch": 0.2714157007397472, + "grad_norm": 425.8003234863281, + "learning_rate": 9.15763893461075e-06, + "loss": 39.1836, + "step": 134360 + }, + { + "epoch": 0.271435901372431, + "grad_norm": 379.83837890625, + "learning_rate": 9.157445024277829e-06, + "loss": 16.1051, + "step": 134370 + }, + { + "epoch": 0.2714561020051148, + "grad_norm": 508.3604736328125, + "learning_rate": 9.157251093682022e-06, + "loss": 18.5233, + "step": 134380 + }, + { + "epoch": 0.2714763026377986, + "grad_norm": 325.0518798828125, + "learning_rate": 9.15705714282428e-06, + "loss": 14.0072, + "step": 134390 + }, + { + "epoch": 0.2714965032704824, + "grad_norm": 327.50885009765625, + "learning_rate": 9.156863171705543e-06, + "loss": 31.057, + "step": 134400 + }, + { + "epoch": 0.27151670390316623, + "grad_norm": 574.4484252929688, + "learning_rate": 9.15666918032676e-06, + "loss": 24.4694, + "step": 134410 + }, + { + "epoch": 0.27153690453585005, + "grad_norm": 356.52581787109375, + "learning_rate": 9.156475168688876e-06, + "loss": 21.091, + "step": 134420 + }, + { + "epoch": 0.2715571051685339, + "grad_norm": 76.50200653076172, + "learning_rate": 9.156281136792836e-06, + "loss": 24.1895, + "step": 134430 + }, + { + "epoch": 0.2715773058012177, + "grad_norm": 751.059814453125, + "learning_rate": 9.156087084639587e-06, + "loss": 25.2717, + "step": 134440 + }, + { + "epoch": 0.2715975064339015, + "grad_norm": 241.04539489746094, + "learning_rate": 9.155893012230072e-06, + "loss": 18.1675, + "step": 134450 + }, + { + "epoch": 0.27161770706658533, + "grad_norm": 484.7392883300781, + "learning_rate": 9.15569891956524e-06, + "loss": 18.552, + "step": 134460 + }, + { + "epoch": 0.27163790769926915, + "grad_norm": 369.731201171875, + "learning_rate": 9.155504806646033e-06, + "loss": 25.0763, + "step": 134470 + }, + { + "epoch": 0.271658108331953, + "grad_norm": 166.63563537597656, + "learning_rate": 9.1553106734734e-06, + "loss": 23.8329, + "step": 134480 + }, + { + "epoch": 0.2716783089646368, + "grad_norm": 72.80184173583984, + "learning_rate": 9.155116520048289e-06, + "loss": 16.3994, + "step": 134490 + }, + { + "epoch": 0.2716985095973206, + "grad_norm": 1032.9742431640625, + "learning_rate": 9.154922346371641e-06, + "loss": 33.3965, + "step": 134500 + }, + { + "epoch": 0.2717187102300044, + "grad_norm": 591.19677734375, + "learning_rate": 9.154728152444408e-06, + "loss": 14.1618, + "step": 134510 + }, + { + "epoch": 0.2717389108626882, + "grad_norm": 377.9610290527344, + "learning_rate": 9.154533938267534e-06, + "loss": 17.8524, + "step": 134520 + }, + { + "epoch": 0.271759111495372, + "grad_norm": 242.5041961669922, + "learning_rate": 9.154339703841963e-06, + "loss": 30.3299, + "step": 134530 + }, + { + "epoch": 0.27177931212805584, + "grad_norm": 560.8853759765625, + "learning_rate": 9.154145449168647e-06, + "loss": 19.1206, + "step": 134540 + }, + { + "epoch": 0.27179951276073966, + "grad_norm": 125.90225219726562, + "learning_rate": 9.153951174248528e-06, + "loss": 32.8272, + "step": 134550 + }, + { + "epoch": 0.2718197133934235, + "grad_norm": 555.1868286132812, + "learning_rate": 9.153756879082556e-06, + "loss": 25.5203, + "step": 134560 + }, + { + "epoch": 0.2718399140261073, + "grad_norm": 46.10591125488281, + "learning_rate": 9.153562563671676e-06, + "loss": 13.7072, + "step": 134570 + }, + { + "epoch": 0.2718601146587911, + "grad_norm": 78.36112976074219, + "learning_rate": 9.153368228016836e-06, + "loss": 13.7849, + "step": 134580 + }, + { + "epoch": 0.27188031529147494, + "grad_norm": 419.6697692871094, + "learning_rate": 9.153173872118984e-06, + "loss": 32.6198, + "step": 134590 + }, + { + "epoch": 0.27190051592415876, + "grad_norm": 435.4535827636719, + "learning_rate": 9.152979495979064e-06, + "loss": 16.4727, + "step": 134600 + }, + { + "epoch": 0.2719207165568426, + "grad_norm": 292.64007568359375, + "learning_rate": 9.152785099598027e-06, + "loss": 25.1713, + "step": 134610 + }, + { + "epoch": 0.2719409171895264, + "grad_norm": 492.0068664550781, + "learning_rate": 9.152590682976818e-06, + "loss": 16.8073, + "step": 134620 + }, + { + "epoch": 0.2719611178222102, + "grad_norm": 193.37913513183594, + "learning_rate": 9.152396246116387e-06, + "loss": 39.7841, + "step": 134630 + }, + { + "epoch": 0.271981318454894, + "grad_norm": 422.52130126953125, + "learning_rate": 9.15220178901768e-06, + "loss": 17.9294, + "step": 134640 + }, + { + "epoch": 0.2720015190875778, + "grad_norm": 340.7070007324219, + "learning_rate": 9.152007311681645e-06, + "loss": 22.5974, + "step": 134650 + }, + { + "epoch": 0.2720217197202616, + "grad_norm": 427.43450927734375, + "learning_rate": 9.151812814109231e-06, + "loss": 22.4301, + "step": 134660 + }, + { + "epoch": 0.27204192035294544, + "grad_norm": 319.4281005859375, + "learning_rate": 9.151618296301383e-06, + "loss": 14.1845, + "step": 134670 + }, + { + "epoch": 0.27206212098562926, + "grad_norm": 189.7301788330078, + "learning_rate": 9.151423758259052e-06, + "loss": 29.2532, + "step": 134680 + }, + { + "epoch": 0.2720823216183131, + "grad_norm": 566.9752807617188, + "learning_rate": 9.151229199983185e-06, + "loss": 29.6061, + "step": 134690 + }, + { + "epoch": 0.2721025222509969, + "grad_norm": 69.69047546386719, + "learning_rate": 9.15103462147473e-06, + "loss": 15.4756, + "step": 134700 + }, + { + "epoch": 0.2721227228836807, + "grad_norm": 808.6002807617188, + "learning_rate": 9.150840022734637e-06, + "loss": 22.1024, + "step": 134710 + }, + { + "epoch": 0.27214292351636454, + "grad_norm": 136.4609832763672, + "learning_rate": 9.150645403763852e-06, + "loss": 17.2627, + "step": 134720 + }, + { + "epoch": 0.27216312414904836, + "grad_norm": 1079.5589599609375, + "learning_rate": 9.150450764563324e-06, + "loss": 45.1729, + "step": 134730 + }, + { + "epoch": 0.2721833247817322, + "grad_norm": 314.94708251953125, + "learning_rate": 9.150256105134003e-06, + "loss": 10.2344, + "step": 134740 + }, + { + "epoch": 0.272203525414416, + "grad_norm": 426.2752380371094, + "learning_rate": 9.150061425476839e-06, + "loss": 22.0265, + "step": 134750 + }, + { + "epoch": 0.27222372604709977, + "grad_norm": 384.5572204589844, + "learning_rate": 9.149866725592777e-06, + "loss": 28.3392, + "step": 134760 + }, + { + "epoch": 0.2722439266797836, + "grad_norm": 354.8988342285156, + "learning_rate": 9.149672005482768e-06, + "loss": 24.0602, + "step": 134770 + }, + { + "epoch": 0.2722641273124674, + "grad_norm": 716.0555419921875, + "learning_rate": 9.149477265147762e-06, + "loss": 24.6733, + "step": 134780 + }, + { + "epoch": 0.2722843279451512, + "grad_norm": 312.19732666015625, + "learning_rate": 9.149282504588706e-06, + "loss": 38.7751, + "step": 134790 + }, + { + "epoch": 0.27230452857783505, + "grad_norm": 202.42605590820312, + "learning_rate": 9.14908772380655e-06, + "loss": 17.9143, + "step": 134800 + }, + { + "epoch": 0.27232472921051887, + "grad_norm": 249.84127807617188, + "learning_rate": 9.148892922802245e-06, + "loss": 41.8903, + "step": 134810 + }, + { + "epoch": 0.2723449298432027, + "grad_norm": 289.1515808105469, + "learning_rate": 9.148698101576737e-06, + "loss": 36.8615, + "step": 134820 + }, + { + "epoch": 0.2723651304758865, + "grad_norm": 840.993408203125, + "learning_rate": 9.14850326013098e-06, + "loss": 24.2888, + "step": 134830 + }, + { + "epoch": 0.2723853311085703, + "grad_norm": 388.4644470214844, + "learning_rate": 9.14830839846592e-06, + "loss": 20.4274, + "step": 134840 + }, + { + "epoch": 0.27240553174125415, + "grad_norm": 184.5399627685547, + "learning_rate": 9.148113516582508e-06, + "loss": 10.6888, + "step": 134850 + }, + { + "epoch": 0.27242573237393797, + "grad_norm": 655.6101684570312, + "learning_rate": 9.147918614481693e-06, + "loss": 29.1236, + "step": 134860 + }, + { + "epoch": 0.2724459330066218, + "grad_norm": 158.5425567626953, + "learning_rate": 9.147723692164428e-06, + "loss": 23.4754, + "step": 134870 + }, + { + "epoch": 0.2724661336393056, + "grad_norm": 248.134765625, + "learning_rate": 9.14752874963166e-06, + "loss": 22.1439, + "step": 134880 + }, + { + "epoch": 0.27248633427198937, + "grad_norm": 336.59246826171875, + "learning_rate": 9.147333786884338e-06, + "loss": 21.4644, + "step": 134890 + }, + { + "epoch": 0.2725065349046732, + "grad_norm": 280.18597412109375, + "learning_rate": 9.147138803923417e-06, + "loss": 20.4713, + "step": 134900 + }, + { + "epoch": 0.272526735537357, + "grad_norm": 463.82501220703125, + "learning_rate": 9.146943800749842e-06, + "loss": 23.571, + "step": 134910 + }, + { + "epoch": 0.27254693617004083, + "grad_norm": 390.9377746582031, + "learning_rate": 9.146748777364567e-06, + "loss": 20.9144, + "step": 134920 + }, + { + "epoch": 0.27256713680272465, + "grad_norm": 326.0651550292969, + "learning_rate": 9.146553733768541e-06, + "loss": 13.6753, + "step": 134930 + }, + { + "epoch": 0.27258733743540847, + "grad_norm": 304.681396484375, + "learning_rate": 9.146358669962717e-06, + "loss": 14.8385, + "step": 134940 + }, + { + "epoch": 0.2726075380680923, + "grad_norm": 377.82598876953125, + "learning_rate": 9.146163585948041e-06, + "loss": 24.0972, + "step": 134950 + }, + { + "epoch": 0.2726277387007761, + "grad_norm": 358.47674560546875, + "learning_rate": 9.145968481725466e-06, + "loss": 21.5306, + "step": 134960 + }, + { + "epoch": 0.27264793933345993, + "grad_norm": 305.32037353515625, + "learning_rate": 9.145773357295947e-06, + "loss": 26.0249, + "step": 134970 + }, + { + "epoch": 0.27266813996614375, + "grad_norm": 463.0208740234375, + "learning_rate": 9.14557821266043e-06, + "loss": 15.3451, + "step": 134980 + }, + { + "epoch": 0.27268834059882757, + "grad_norm": 291.29364013671875, + "learning_rate": 9.145383047819867e-06, + "loss": 18.1568, + "step": 134990 + }, + { + "epoch": 0.2727085412315114, + "grad_norm": 193.42633056640625, + "learning_rate": 9.145187862775208e-06, + "loss": 17.3333, + "step": 135000 + }, + { + "epoch": 0.2727287418641952, + "grad_norm": 1213.7652587890625, + "learning_rate": 9.144992657527409e-06, + "loss": 28.9438, + "step": 135010 + }, + { + "epoch": 0.272748942496879, + "grad_norm": 187.8778076171875, + "learning_rate": 9.144797432077418e-06, + "loss": 15.6976, + "step": 135020 + }, + { + "epoch": 0.2727691431295628, + "grad_norm": 19.426258087158203, + "learning_rate": 9.144602186426186e-06, + "loss": 8.3403, + "step": 135030 + }, + { + "epoch": 0.2727893437622466, + "grad_norm": 417.7514343261719, + "learning_rate": 9.144406920574667e-06, + "loss": 20.5598, + "step": 135040 + }, + { + "epoch": 0.27280954439493044, + "grad_norm": 381.2732238769531, + "learning_rate": 9.14421163452381e-06, + "loss": 22.3431, + "step": 135050 + }, + { + "epoch": 0.27282974502761426, + "grad_norm": 57.7036247253418, + "learning_rate": 9.144016328274569e-06, + "loss": 20.7668, + "step": 135060 + }, + { + "epoch": 0.2728499456602981, + "grad_norm": 513.7007446289062, + "learning_rate": 9.143821001827895e-06, + "loss": 17.7617, + "step": 135070 + }, + { + "epoch": 0.2728701462929819, + "grad_norm": 185.3470458984375, + "learning_rate": 9.14362565518474e-06, + "loss": 14.2232, + "step": 135080 + }, + { + "epoch": 0.2728903469256657, + "grad_norm": 332.8409729003906, + "learning_rate": 9.143430288346056e-06, + "loss": 12.8592, + "step": 135090 + }, + { + "epoch": 0.27291054755834954, + "grad_norm": 732.3867797851562, + "learning_rate": 9.143234901312794e-06, + "loss": 35.1978, + "step": 135100 + }, + { + "epoch": 0.27293074819103336, + "grad_norm": 365.0626525878906, + "learning_rate": 9.143039494085911e-06, + "loss": 11.9089, + "step": 135110 + }, + { + "epoch": 0.2729509488237172, + "grad_norm": 65.0551528930664, + "learning_rate": 9.142844066666352e-06, + "loss": 17.9208, + "step": 135120 + }, + { + "epoch": 0.272971149456401, + "grad_norm": 631.0284423828125, + "learning_rate": 9.142648619055074e-06, + "loss": 20.9481, + "step": 135130 + }, + { + "epoch": 0.2729913500890848, + "grad_norm": 689.35205078125, + "learning_rate": 9.142453151253033e-06, + "loss": 22.1548, + "step": 135140 + }, + { + "epoch": 0.2730115507217686, + "grad_norm": 425.0822448730469, + "learning_rate": 9.142257663261173e-06, + "loss": 11.7964, + "step": 135150 + }, + { + "epoch": 0.2730317513544524, + "grad_norm": 463.693359375, + "learning_rate": 9.142062155080455e-06, + "loss": 32.4459, + "step": 135160 + }, + { + "epoch": 0.2730519519871362, + "grad_norm": 76.3325424194336, + "learning_rate": 9.141866626711826e-06, + "loss": 26.7998, + "step": 135170 + }, + { + "epoch": 0.27307215261982004, + "grad_norm": 222.16542053222656, + "learning_rate": 9.141671078156242e-06, + "loss": 16.4523, + "step": 135180 + }, + { + "epoch": 0.27309235325250386, + "grad_norm": 256.1183776855469, + "learning_rate": 9.141475509414656e-06, + "loss": 12.4589, + "step": 135190 + }, + { + "epoch": 0.2731125538851877, + "grad_norm": 406.1766357421875, + "learning_rate": 9.141279920488021e-06, + "loss": 33.166, + "step": 135200 + }, + { + "epoch": 0.2731327545178715, + "grad_norm": 577.5496215820312, + "learning_rate": 9.141084311377291e-06, + "loss": 17.6375, + "step": 135210 + }, + { + "epoch": 0.2731529551505553, + "grad_norm": 258.7943115234375, + "learning_rate": 9.140888682083416e-06, + "loss": 22.6671, + "step": 135220 + }, + { + "epoch": 0.27317315578323914, + "grad_norm": 568.5740966796875, + "learning_rate": 9.140693032607353e-06, + "loss": 23.2073, + "step": 135230 + }, + { + "epoch": 0.27319335641592296, + "grad_norm": 217.25416564941406, + "learning_rate": 9.140497362950054e-06, + "loss": 20.6969, + "step": 135240 + }, + { + "epoch": 0.2732135570486068, + "grad_norm": 450.7647705078125, + "learning_rate": 9.140301673112472e-06, + "loss": 20.2831, + "step": 135250 + }, + { + "epoch": 0.2732337576812906, + "grad_norm": 381.1827392578125, + "learning_rate": 9.140105963095563e-06, + "loss": 21.1977, + "step": 135260 + }, + { + "epoch": 0.2732539583139744, + "grad_norm": 104.4590072631836, + "learning_rate": 9.13991023290028e-06, + "loss": 51.9077, + "step": 135270 + }, + { + "epoch": 0.2732741589466582, + "grad_norm": 251.69143676757812, + "learning_rate": 9.139714482527577e-06, + "loss": 17.3639, + "step": 135280 + }, + { + "epoch": 0.273294359579342, + "grad_norm": 194.8394012451172, + "learning_rate": 9.139518711978406e-06, + "loss": 26.8899, + "step": 135290 + }, + { + "epoch": 0.2733145602120258, + "grad_norm": 258.308349609375, + "learning_rate": 9.139322921253724e-06, + "loss": 32.5457, + "step": 135300 + }, + { + "epoch": 0.27333476084470965, + "grad_norm": 736.0946044921875, + "learning_rate": 9.139127110354484e-06, + "loss": 29.4364, + "step": 135310 + }, + { + "epoch": 0.27335496147739347, + "grad_norm": 966.8939819335938, + "learning_rate": 9.13893127928164e-06, + "loss": 32.0747, + "step": 135320 + }, + { + "epoch": 0.2733751621100773, + "grad_norm": 491.54620361328125, + "learning_rate": 9.138735428036147e-06, + "loss": 22.1527, + "step": 135330 + }, + { + "epoch": 0.2733953627427611, + "grad_norm": 543.1093139648438, + "learning_rate": 9.13853955661896e-06, + "loss": 35.6467, + "step": 135340 + }, + { + "epoch": 0.2734155633754449, + "grad_norm": 560.1389770507812, + "learning_rate": 9.138343665031033e-06, + "loss": 27.8347, + "step": 135350 + }, + { + "epoch": 0.27343576400812875, + "grad_norm": 732.5491943359375, + "learning_rate": 9.13814775327332e-06, + "loss": 23.0842, + "step": 135360 + }, + { + "epoch": 0.27345596464081257, + "grad_norm": 508.7496337890625, + "learning_rate": 9.137951821346779e-06, + "loss": 31.3813, + "step": 135370 + }, + { + "epoch": 0.2734761652734964, + "grad_norm": 120.45158386230469, + "learning_rate": 9.13775586925236e-06, + "loss": 31.4389, + "step": 135380 + }, + { + "epoch": 0.2734963659061802, + "grad_norm": 483.6618347167969, + "learning_rate": 9.137559896991023e-06, + "loss": 22.703, + "step": 135390 + }, + { + "epoch": 0.27351656653886397, + "grad_norm": 91.70558166503906, + "learning_rate": 9.13736390456372e-06, + "loss": 24.9209, + "step": 135400 + }, + { + "epoch": 0.2735367671715478, + "grad_norm": 452.6220397949219, + "learning_rate": 9.137167891971407e-06, + "loss": 19.4814, + "step": 135410 + }, + { + "epoch": 0.2735569678042316, + "grad_norm": 547.836669921875, + "learning_rate": 9.136971859215041e-06, + "loss": 36.5402, + "step": 135420 + }, + { + "epoch": 0.27357716843691543, + "grad_norm": 333.5689697265625, + "learning_rate": 9.136775806295572e-06, + "loss": 19.448, + "step": 135430 + }, + { + "epoch": 0.27359736906959925, + "grad_norm": 519.5537109375, + "learning_rate": 9.136579733213963e-06, + "loss": 30.3864, + "step": 135440 + }, + { + "epoch": 0.27361756970228307, + "grad_norm": 525.2999267578125, + "learning_rate": 9.136383639971166e-06, + "loss": 11.6618, + "step": 135450 + }, + { + "epoch": 0.2736377703349669, + "grad_norm": 274.5901794433594, + "learning_rate": 9.136187526568134e-06, + "loss": 17.5567, + "step": 135460 + }, + { + "epoch": 0.2736579709676507, + "grad_norm": 351.3760986328125, + "learning_rate": 9.135991393005827e-06, + "loss": 31.458, + "step": 135470 + }, + { + "epoch": 0.27367817160033453, + "grad_norm": 220.59791564941406, + "learning_rate": 9.135795239285201e-06, + "loss": 20.3189, + "step": 135480 + }, + { + "epoch": 0.27369837223301835, + "grad_norm": 749.9730834960938, + "learning_rate": 9.13559906540721e-06, + "loss": 38.2688, + "step": 135490 + }, + { + "epoch": 0.27371857286570217, + "grad_norm": 588.3572998046875, + "learning_rate": 9.13540287137281e-06, + "loss": 25.7997, + "step": 135500 + }, + { + "epoch": 0.273738773498386, + "grad_norm": 754.5552978515625, + "learning_rate": 9.135206657182958e-06, + "loss": 12.5387, + "step": 135510 + }, + { + "epoch": 0.2737589741310698, + "grad_norm": 377.010498046875, + "learning_rate": 9.13501042283861e-06, + "loss": 12.7068, + "step": 135520 + }, + { + "epoch": 0.2737791747637536, + "grad_norm": 462.2884521484375, + "learning_rate": 9.134814168340725e-06, + "loss": 19.7204, + "step": 135530 + }, + { + "epoch": 0.2737993753964374, + "grad_norm": 245.97598266601562, + "learning_rate": 9.134617893690253e-06, + "loss": 11.4352, + "step": 135540 + }, + { + "epoch": 0.2738195760291212, + "grad_norm": 366.9969482421875, + "learning_rate": 9.13442159888816e-06, + "loss": 31.2602, + "step": 135550 + }, + { + "epoch": 0.27383977666180503, + "grad_norm": 567.2673950195312, + "learning_rate": 9.134225283935395e-06, + "loss": 19.0601, + "step": 135560 + }, + { + "epoch": 0.27385997729448885, + "grad_norm": 236.04896545410156, + "learning_rate": 9.134028948832915e-06, + "loss": 19.0393, + "step": 135570 + }, + { + "epoch": 0.2738801779271727, + "grad_norm": 197.89019775390625, + "learning_rate": 9.133832593581683e-06, + "loss": 17.6415, + "step": 135580 + }, + { + "epoch": 0.2739003785598565, + "grad_norm": 616.6891479492188, + "learning_rate": 9.133636218182652e-06, + "loss": 45.4994, + "step": 135590 + }, + { + "epoch": 0.2739205791925403, + "grad_norm": 350.0677490234375, + "learning_rate": 9.133439822636779e-06, + "loss": 28.2331, + "step": 135600 + }, + { + "epoch": 0.27394077982522413, + "grad_norm": 313.6117248535156, + "learning_rate": 9.13324340694502e-06, + "loss": 18.7268, + "step": 135610 + }, + { + "epoch": 0.27396098045790795, + "grad_norm": 572.9440307617188, + "learning_rate": 9.133046971108335e-06, + "loss": 10.22, + "step": 135620 + }, + { + "epoch": 0.2739811810905918, + "grad_norm": 337.5039367675781, + "learning_rate": 9.13285051512768e-06, + "loss": 17.1278, + "step": 135630 + }, + { + "epoch": 0.2740013817232756, + "grad_norm": 347.3039855957031, + "learning_rate": 9.132654039004015e-06, + "loss": 14.5573, + "step": 135640 + }, + { + "epoch": 0.2740215823559594, + "grad_norm": 186.5449676513672, + "learning_rate": 9.132457542738292e-06, + "loss": 9.6061, + "step": 135650 + }, + { + "epoch": 0.2740417829886432, + "grad_norm": 577.7155151367188, + "learning_rate": 9.132261026331473e-06, + "loss": 20.9212, + "step": 135660 + }, + { + "epoch": 0.274061983621327, + "grad_norm": 642.8598022460938, + "learning_rate": 9.132064489784516e-06, + "loss": 19.4279, + "step": 135670 + }, + { + "epoch": 0.2740821842540108, + "grad_norm": 127.74354553222656, + "learning_rate": 9.131867933098379e-06, + "loss": 25.3777, + "step": 135680 + }, + { + "epoch": 0.27410238488669464, + "grad_norm": 364.76190185546875, + "learning_rate": 9.131671356274016e-06, + "loss": 24.1008, + "step": 135690 + }, + { + "epoch": 0.27412258551937846, + "grad_norm": 451.497802734375, + "learning_rate": 9.13147475931239e-06, + "loss": 13.1718, + "step": 135700 + }, + { + "epoch": 0.2741427861520623, + "grad_norm": 819.51318359375, + "learning_rate": 9.131278142214457e-06, + "loss": 19.6711, + "step": 135710 + }, + { + "epoch": 0.2741629867847461, + "grad_norm": 478.2668151855469, + "learning_rate": 9.131081504981175e-06, + "loss": 15.8314, + "step": 135720 + }, + { + "epoch": 0.2741831874174299, + "grad_norm": 291.201416015625, + "learning_rate": 9.1308848476135e-06, + "loss": 44.0003, + "step": 135730 + }, + { + "epoch": 0.27420338805011374, + "grad_norm": 237.51837158203125, + "learning_rate": 9.130688170112398e-06, + "loss": 30.6632, + "step": 135740 + }, + { + "epoch": 0.27422358868279756, + "grad_norm": 571.5438232421875, + "learning_rate": 9.130491472478819e-06, + "loss": 17.9365, + "step": 135750 + }, + { + "epoch": 0.2742437893154814, + "grad_norm": 790.66259765625, + "learning_rate": 9.130294754713728e-06, + "loss": 20.6232, + "step": 135760 + }, + { + "epoch": 0.2742639899481652, + "grad_norm": 28.99637222290039, + "learning_rate": 9.13009801681808e-06, + "loss": 30.6997, + "step": 135770 + }, + { + "epoch": 0.274284190580849, + "grad_norm": 304.7511291503906, + "learning_rate": 9.129901258792838e-06, + "loss": 53.585, + "step": 135780 + }, + { + "epoch": 0.2743043912135328, + "grad_norm": 62.51104736328125, + "learning_rate": 9.129704480638955e-06, + "loss": 16.246, + "step": 135790 + }, + { + "epoch": 0.2743245918462166, + "grad_norm": 206.17721557617188, + "learning_rate": 9.129507682357393e-06, + "loss": 11.2183, + "step": 135800 + }, + { + "epoch": 0.2743447924789004, + "grad_norm": 688.3556518554688, + "learning_rate": 9.129310863949113e-06, + "loss": 46.0043, + "step": 135810 + }, + { + "epoch": 0.27436499311158424, + "grad_norm": 758.4866333007812, + "learning_rate": 9.129114025415071e-06, + "loss": 28.1612, + "step": 135820 + }, + { + "epoch": 0.27438519374426806, + "grad_norm": 221.9129180908203, + "learning_rate": 9.12891716675623e-06, + "loss": 29.148, + "step": 135830 + }, + { + "epoch": 0.2744053943769519, + "grad_norm": 1008.09033203125, + "learning_rate": 9.128720287973547e-06, + "loss": 23.7683, + "step": 135840 + }, + { + "epoch": 0.2744255950096357, + "grad_norm": 1713.9359130859375, + "learning_rate": 9.128523389067983e-06, + "loss": 31.9497, + "step": 135850 + }, + { + "epoch": 0.2744457956423195, + "grad_norm": 713.1209106445312, + "learning_rate": 9.128326470040495e-06, + "loss": 21.8295, + "step": 135860 + }, + { + "epoch": 0.27446599627500334, + "grad_norm": 228.49380493164062, + "learning_rate": 9.128129530892044e-06, + "loss": 15.5112, + "step": 135870 + }, + { + "epoch": 0.27448619690768716, + "grad_norm": 429.3661804199219, + "learning_rate": 9.127932571623592e-06, + "loss": 18.5801, + "step": 135880 + }, + { + "epoch": 0.274506397540371, + "grad_norm": 302.1600341796875, + "learning_rate": 9.127735592236097e-06, + "loss": 14.2539, + "step": 135890 + }, + { + "epoch": 0.2745265981730548, + "grad_norm": 334.8915100097656, + "learning_rate": 9.12753859273052e-06, + "loss": 24.5498, + "step": 135900 + }, + { + "epoch": 0.2745467988057386, + "grad_norm": 91.45170593261719, + "learning_rate": 9.127341573107819e-06, + "loss": 21.5034, + "step": 135910 + }, + { + "epoch": 0.2745669994384224, + "grad_norm": 446.6305236816406, + "learning_rate": 9.127144533368956e-06, + "loss": 23.2795, + "step": 135920 + }, + { + "epoch": 0.2745872000711062, + "grad_norm": 672.91455078125, + "learning_rate": 9.126947473514891e-06, + "loss": 45.7162, + "step": 135930 + }, + { + "epoch": 0.27460740070379, + "grad_norm": 316.0632629394531, + "learning_rate": 9.126750393546587e-06, + "loss": 28.0067, + "step": 135940 + }, + { + "epoch": 0.27462760133647385, + "grad_norm": 331.94342041015625, + "learning_rate": 9.126553293465e-06, + "loss": 22.472, + "step": 135950 + }, + { + "epoch": 0.27464780196915767, + "grad_norm": 296.5798645019531, + "learning_rate": 9.126356173271092e-06, + "loss": 28.1315, + "step": 135960 + }, + { + "epoch": 0.2746680026018415, + "grad_norm": 1046.6729736328125, + "learning_rate": 9.126159032965825e-06, + "loss": 34.1038, + "step": 135970 + }, + { + "epoch": 0.2746882032345253, + "grad_norm": 691.593994140625, + "learning_rate": 9.125961872550159e-06, + "loss": 32.512, + "step": 135980 + }, + { + "epoch": 0.2747084038672091, + "grad_norm": 184.54931640625, + "learning_rate": 9.125764692025057e-06, + "loss": 17.5683, + "step": 135990 + }, + { + "epoch": 0.27472860449989295, + "grad_norm": 220.64520263671875, + "learning_rate": 9.125567491391476e-06, + "loss": 16.7871, + "step": 136000 + }, + { + "epoch": 0.27474880513257677, + "grad_norm": 221.02293395996094, + "learning_rate": 9.125370270650381e-06, + "loss": 11.8503, + "step": 136010 + }, + { + "epoch": 0.2747690057652606, + "grad_norm": 388.2686462402344, + "learning_rate": 9.125173029802732e-06, + "loss": 23.263, + "step": 136020 + }, + { + "epoch": 0.2747892063979444, + "grad_norm": 117.8719253540039, + "learning_rate": 9.124975768849489e-06, + "loss": 14.5228, + "step": 136030 + }, + { + "epoch": 0.27480940703062817, + "grad_norm": 450.4751281738281, + "learning_rate": 9.124778487791615e-06, + "loss": 17.4647, + "step": 136040 + }, + { + "epoch": 0.274829607663312, + "grad_norm": 318.2745361328125, + "learning_rate": 9.12458118663007e-06, + "loss": 16.2264, + "step": 136050 + }, + { + "epoch": 0.2748498082959958, + "grad_norm": 423.7740173339844, + "learning_rate": 9.124383865365817e-06, + "loss": 17.5846, + "step": 136060 + }, + { + "epoch": 0.27487000892867963, + "grad_norm": 872.8343505859375, + "learning_rate": 9.124186523999818e-06, + "loss": 40.837, + "step": 136070 + }, + { + "epoch": 0.27489020956136345, + "grad_norm": 488.3759460449219, + "learning_rate": 9.123989162533032e-06, + "loss": 21.9602, + "step": 136080 + }, + { + "epoch": 0.27491041019404727, + "grad_norm": 255.27935791015625, + "learning_rate": 9.123791780966425e-06, + "loss": 24.8364, + "step": 136090 + }, + { + "epoch": 0.2749306108267311, + "grad_norm": 222.71697998046875, + "learning_rate": 9.123594379300956e-06, + "loss": 24.8809, + "step": 136100 + }, + { + "epoch": 0.2749508114594149, + "grad_norm": 311.8800964355469, + "learning_rate": 9.12339695753759e-06, + "loss": 21.3448, + "step": 136110 + }, + { + "epoch": 0.27497101209209873, + "grad_norm": 176.24217224121094, + "learning_rate": 9.123199515677285e-06, + "loss": 17.7472, + "step": 136120 + }, + { + "epoch": 0.27499121272478255, + "grad_norm": 332.91845703125, + "learning_rate": 9.123002053721006e-06, + "loss": 17.2235, + "step": 136130 + }, + { + "epoch": 0.27501141335746637, + "grad_norm": 164.54354858398438, + "learning_rate": 9.122804571669717e-06, + "loss": 24.1397, + "step": 136140 + }, + { + "epoch": 0.2750316139901502, + "grad_norm": 601.98193359375, + "learning_rate": 9.122607069524377e-06, + "loss": 17.1016, + "step": 136150 + }, + { + "epoch": 0.275051814622834, + "grad_norm": 716.9262084960938, + "learning_rate": 9.122409547285948e-06, + "loss": 17.5064, + "step": 136160 + }, + { + "epoch": 0.2750720152555178, + "grad_norm": 343.188720703125, + "learning_rate": 9.122212004955399e-06, + "loss": 14.1656, + "step": 136170 + }, + { + "epoch": 0.2750922158882016, + "grad_norm": 482.0539245605469, + "learning_rate": 9.122014442533686e-06, + "loss": 22.3766, + "step": 136180 + }, + { + "epoch": 0.2751124165208854, + "grad_norm": 420.0506591796875, + "learning_rate": 9.121816860021776e-06, + "loss": 21.8825, + "step": 136190 + }, + { + "epoch": 0.27513261715356924, + "grad_norm": 434.53692626953125, + "learning_rate": 9.12161925742063e-06, + "loss": 23.3705, + "step": 136200 + }, + { + "epoch": 0.27515281778625306, + "grad_norm": 308.1763916015625, + "learning_rate": 9.121421634731211e-06, + "loss": 20.2883, + "step": 136210 + }, + { + "epoch": 0.2751730184189369, + "grad_norm": 109.4878158569336, + "learning_rate": 9.121223991954484e-06, + "loss": 26.7603, + "step": 136220 + }, + { + "epoch": 0.2751932190516207, + "grad_norm": 296.6226806640625, + "learning_rate": 9.12102632909141e-06, + "loss": 14.9261, + "step": 136230 + }, + { + "epoch": 0.2752134196843045, + "grad_norm": 22.82421875, + "learning_rate": 9.120828646142954e-06, + "loss": 12.6671, + "step": 136240 + }, + { + "epoch": 0.27523362031698834, + "grad_norm": 100.18248748779297, + "learning_rate": 9.120630943110078e-06, + "loss": 23.0864, + "step": 136250 + }, + { + "epoch": 0.27525382094967216, + "grad_norm": 198.57740783691406, + "learning_rate": 9.120433219993749e-06, + "loss": 22.5658, + "step": 136260 + }, + { + "epoch": 0.275274021582356, + "grad_norm": 517.2141723632812, + "learning_rate": 9.120235476794926e-06, + "loss": 18.6785, + "step": 136270 + }, + { + "epoch": 0.2752942222150398, + "grad_norm": 132.4513702392578, + "learning_rate": 9.120037713514575e-06, + "loss": 22.0669, + "step": 136280 + }, + { + "epoch": 0.2753144228477236, + "grad_norm": 476.9325256347656, + "learning_rate": 9.119839930153663e-06, + "loss": 26.4279, + "step": 136290 + }, + { + "epoch": 0.2753346234804074, + "grad_norm": 94.77568817138672, + "learning_rate": 9.119642126713147e-06, + "loss": 36.7561, + "step": 136300 + }, + { + "epoch": 0.2753548241130912, + "grad_norm": 828.9990234375, + "learning_rate": 9.119444303193997e-06, + "loss": 17.9698, + "step": 136310 + }, + { + "epoch": 0.275375024745775, + "grad_norm": 971.7246704101562, + "learning_rate": 9.119246459597173e-06, + "loss": 28.4067, + "step": 136320 + }, + { + "epoch": 0.27539522537845884, + "grad_norm": 543.5722045898438, + "learning_rate": 9.119048595923643e-06, + "loss": 19.151, + "step": 136330 + }, + { + "epoch": 0.27541542601114266, + "grad_norm": 295.9839172363281, + "learning_rate": 9.11885071217437e-06, + "loss": 34.4547, + "step": 136340 + }, + { + "epoch": 0.2754356266438265, + "grad_norm": 841.3892211914062, + "learning_rate": 9.11865280835032e-06, + "loss": 24.7327, + "step": 136350 + }, + { + "epoch": 0.2754558272765103, + "grad_norm": 487.9488220214844, + "learning_rate": 9.118454884452452e-06, + "loss": 35.7545, + "step": 136360 + }, + { + "epoch": 0.2754760279091941, + "grad_norm": 439.5728454589844, + "learning_rate": 9.118256940481735e-06, + "loss": 14.4582, + "step": 136370 + }, + { + "epoch": 0.27549622854187794, + "grad_norm": 16.771465301513672, + "learning_rate": 9.118058976439134e-06, + "loss": 16.6588, + "step": 136380 + }, + { + "epoch": 0.27551642917456176, + "grad_norm": 1496.6934814453125, + "learning_rate": 9.117860992325613e-06, + "loss": 37.2434, + "step": 136390 + }, + { + "epoch": 0.2755366298072456, + "grad_norm": 310.22509765625, + "learning_rate": 9.117662988142138e-06, + "loss": 22.0505, + "step": 136400 + }, + { + "epoch": 0.2755568304399294, + "grad_norm": 385.6380615234375, + "learning_rate": 9.117464963889672e-06, + "loss": 25.1847, + "step": 136410 + }, + { + "epoch": 0.2755770310726132, + "grad_norm": 446.22418212890625, + "learning_rate": 9.117266919569181e-06, + "loss": 24.8115, + "step": 136420 + }, + { + "epoch": 0.275597231705297, + "grad_norm": 458.6966857910156, + "learning_rate": 9.11706885518163e-06, + "loss": 18.2814, + "step": 136430 + }, + { + "epoch": 0.2756174323379808, + "grad_norm": 117.55816650390625, + "learning_rate": 9.116870770727986e-06, + "loss": 17.5085, + "step": 136440 + }, + { + "epoch": 0.2756376329706646, + "grad_norm": 279.3009338378906, + "learning_rate": 9.116672666209211e-06, + "loss": 19.4594, + "step": 136450 + }, + { + "epoch": 0.27565783360334845, + "grad_norm": 293.4078674316406, + "learning_rate": 9.116474541626277e-06, + "loss": 32.4162, + "step": 136460 + }, + { + "epoch": 0.27567803423603227, + "grad_norm": 301.1860656738281, + "learning_rate": 9.116276396980141e-06, + "loss": 14.9995, + "step": 136470 + }, + { + "epoch": 0.2756982348687161, + "grad_norm": 8.642691612243652, + "learning_rate": 9.116078232271774e-06, + "loss": 24.8244, + "step": 136480 + }, + { + "epoch": 0.2757184355013999, + "grad_norm": 378.1164245605469, + "learning_rate": 9.115880047502142e-06, + "loss": 11.2514, + "step": 136490 + }, + { + "epoch": 0.2757386361340837, + "grad_norm": 299.1815185546875, + "learning_rate": 9.115681842672211e-06, + "loss": 18.7732, + "step": 136500 + }, + { + "epoch": 0.27575883676676755, + "grad_norm": 486.540283203125, + "learning_rate": 9.115483617782943e-06, + "loss": 32.1482, + "step": 136510 + }, + { + "epoch": 0.27577903739945137, + "grad_norm": 346.2464904785156, + "learning_rate": 9.115285372835309e-06, + "loss": 27.7063, + "step": 136520 + }, + { + "epoch": 0.2757992380321352, + "grad_norm": 408.13946533203125, + "learning_rate": 9.115087107830272e-06, + "loss": 35.1097, + "step": 136530 + }, + { + "epoch": 0.275819438664819, + "grad_norm": 242.6774444580078, + "learning_rate": 9.114888822768801e-06, + "loss": 13.6486, + "step": 136540 + }, + { + "epoch": 0.2758396392975028, + "grad_norm": 289.8144226074219, + "learning_rate": 9.11469051765186e-06, + "loss": 19.7283, + "step": 136550 + }, + { + "epoch": 0.2758598399301866, + "grad_norm": 250.8406219482422, + "learning_rate": 9.114492192480416e-06, + "loss": 17.4547, + "step": 136560 + }, + { + "epoch": 0.2758800405628704, + "grad_norm": 597.64599609375, + "learning_rate": 9.114293847255437e-06, + "loss": 23.1336, + "step": 136570 + }, + { + "epoch": 0.27590024119555423, + "grad_norm": 477.0276184082031, + "learning_rate": 9.114095481977887e-06, + "loss": 30.7469, + "step": 136580 + }, + { + "epoch": 0.27592044182823805, + "grad_norm": 411.8534240722656, + "learning_rate": 9.113897096648735e-06, + "loss": 17.9106, + "step": 136590 + }, + { + "epoch": 0.27594064246092187, + "grad_norm": 419.3416442871094, + "learning_rate": 9.11369869126895e-06, + "loss": 23.661, + "step": 136600 + }, + { + "epoch": 0.2759608430936057, + "grad_norm": 491.56884765625, + "learning_rate": 9.113500265839495e-06, + "loss": 28.9299, + "step": 136610 + }, + { + "epoch": 0.2759810437262895, + "grad_norm": 1747.8265380859375, + "learning_rate": 9.113301820361339e-06, + "loss": 41.6999, + "step": 136620 + }, + { + "epoch": 0.27600124435897333, + "grad_norm": 372.26727294921875, + "learning_rate": 9.113103354835448e-06, + "loss": 30.6698, + "step": 136630 + }, + { + "epoch": 0.27602144499165715, + "grad_norm": 421.3455810546875, + "learning_rate": 9.112904869262791e-06, + "loss": 28.1034, + "step": 136640 + }, + { + "epoch": 0.27604164562434097, + "grad_norm": 437.5903625488281, + "learning_rate": 9.112706363644334e-06, + "loss": 42.669, + "step": 136650 + }, + { + "epoch": 0.2760618462570248, + "grad_norm": 630.2725830078125, + "learning_rate": 9.112507837981045e-06, + "loss": 28.7041, + "step": 136660 + }, + { + "epoch": 0.2760820468897086, + "grad_norm": 268.4493408203125, + "learning_rate": 9.112309292273891e-06, + "loss": 31.938, + "step": 136670 + }, + { + "epoch": 0.2761022475223924, + "grad_norm": 440.1805114746094, + "learning_rate": 9.112110726523841e-06, + "loss": 28.1518, + "step": 136680 + }, + { + "epoch": 0.2761224481550762, + "grad_norm": 338.7571716308594, + "learning_rate": 9.111912140731862e-06, + "loss": 26.5386, + "step": 136690 + }, + { + "epoch": 0.27614264878776, + "grad_norm": 194.66127014160156, + "learning_rate": 9.111713534898923e-06, + "loss": 12.6745, + "step": 136700 + }, + { + "epoch": 0.27616284942044383, + "grad_norm": 289.3456726074219, + "learning_rate": 9.111514909025988e-06, + "loss": 29.55, + "step": 136710 + }, + { + "epoch": 0.27618305005312765, + "grad_norm": 634.5272216796875, + "learning_rate": 9.11131626311403e-06, + "loss": 16.9974, + "step": 136720 + }, + { + "epoch": 0.2762032506858115, + "grad_norm": 572.50439453125, + "learning_rate": 9.111117597164016e-06, + "loss": 25.0887, + "step": 136730 + }, + { + "epoch": 0.2762234513184953, + "grad_norm": 303.0616760253906, + "learning_rate": 9.110918911176911e-06, + "loss": 12.9918, + "step": 136740 + }, + { + "epoch": 0.2762436519511791, + "grad_norm": 376.22662353515625, + "learning_rate": 9.110720205153688e-06, + "loss": 17.0476, + "step": 136750 + }, + { + "epoch": 0.27626385258386293, + "grad_norm": 270.92962646484375, + "learning_rate": 9.110521479095314e-06, + "loss": 27.9115, + "step": 136760 + }, + { + "epoch": 0.27628405321654675, + "grad_norm": 160.26510620117188, + "learning_rate": 9.110322733002755e-06, + "loss": 30.4222, + "step": 136770 + }, + { + "epoch": 0.2763042538492306, + "grad_norm": 377.6893005371094, + "learning_rate": 9.11012396687698e-06, + "loss": 22.0914, + "step": 136780 + }, + { + "epoch": 0.2763244544819144, + "grad_norm": 424.3580017089844, + "learning_rate": 9.10992518071896e-06, + "loss": 38.3696, + "step": 136790 + }, + { + "epoch": 0.2763446551145982, + "grad_norm": 164.9799041748047, + "learning_rate": 9.109726374529666e-06, + "loss": 15.7721, + "step": 136800 + }, + { + "epoch": 0.276364855747282, + "grad_norm": 457.60418701171875, + "learning_rate": 9.109527548310062e-06, + "loss": 11.6539, + "step": 136810 + }, + { + "epoch": 0.2763850563799658, + "grad_norm": 350.32342529296875, + "learning_rate": 9.10932870206112e-06, + "loss": 25.4811, + "step": 136820 + }, + { + "epoch": 0.2764052570126496, + "grad_norm": 198.0208740234375, + "learning_rate": 9.109129835783808e-06, + "loss": 15.2665, + "step": 136830 + }, + { + "epoch": 0.27642545764533344, + "grad_norm": 669.3509521484375, + "learning_rate": 9.108930949479096e-06, + "loss": 18.8688, + "step": 136840 + }, + { + "epoch": 0.27644565827801726, + "grad_norm": 413.8048400878906, + "learning_rate": 9.108732043147952e-06, + "loss": 13.1434, + "step": 136850 + }, + { + "epoch": 0.2764658589107011, + "grad_norm": 299.4035949707031, + "learning_rate": 9.108533116791347e-06, + "loss": 10.0517, + "step": 136860 + }, + { + "epoch": 0.2764860595433849, + "grad_norm": 273.9521179199219, + "learning_rate": 9.108334170410249e-06, + "loss": 18.742, + "step": 136870 + }, + { + "epoch": 0.2765062601760687, + "grad_norm": 104.94783020019531, + "learning_rate": 9.10813520400563e-06, + "loss": 22.7361, + "step": 136880 + }, + { + "epoch": 0.27652646080875254, + "grad_norm": 210.21517944335938, + "learning_rate": 9.107936217578457e-06, + "loss": 18.2072, + "step": 136890 + }, + { + "epoch": 0.27654666144143636, + "grad_norm": 998.265380859375, + "learning_rate": 9.107737211129702e-06, + "loss": 39.4067, + "step": 136900 + }, + { + "epoch": 0.2765668620741202, + "grad_norm": 459.36737060546875, + "learning_rate": 9.107538184660333e-06, + "loss": 14.1219, + "step": 136910 + }, + { + "epoch": 0.276587062706804, + "grad_norm": 110.8598861694336, + "learning_rate": 9.107339138171321e-06, + "loss": 16.6522, + "step": 136920 + }, + { + "epoch": 0.2766072633394878, + "grad_norm": 385.3807373046875, + "learning_rate": 9.107140071663637e-06, + "loss": 20.694, + "step": 136930 + }, + { + "epoch": 0.2766274639721716, + "grad_norm": 422.10626220703125, + "learning_rate": 9.106940985138251e-06, + "loss": 18.0311, + "step": 136940 + }, + { + "epoch": 0.2766476646048554, + "grad_norm": 811.509033203125, + "learning_rate": 9.106741878596132e-06, + "loss": 29.9824, + "step": 136950 + }, + { + "epoch": 0.2766678652375392, + "grad_norm": 368.9154968261719, + "learning_rate": 9.10654275203825e-06, + "loss": 12.0087, + "step": 136960 + }, + { + "epoch": 0.27668806587022304, + "grad_norm": 143.35433959960938, + "learning_rate": 9.106343605465578e-06, + "loss": 25.1284, + "step": 136970 + }, + { + "epoch": 0.27670826650290686, + "grad_norm": 767.0995483398438, + "learning_rate": 9.106144438879086e-06, + "loss": 34.238, + "step": 136980 + }, + { + "epoch": 0.2767284671355907, + "grad_norm": 163.25469970703125, + "learning_rate": 9.105945252279743e-06, + "loss": 24.4687, + "step": 136990 + }, + { + "epoch": 0.2767486677682745, + "grad_norm": 538.8115234375, + "learning_rate": 9.10574604566852e-06, + "loss": 27.7695, + "step": 137000 + }, + { + "epoch": 0.2767688684009583, + "grad_norm": 482.6521301269531, + "learning_rate": 9.105546819046391e-06, + "loss": 14.6953, + "step": 137010 + }, + { + "epoch": 0.27678906903364214, + "grad_norm": 463.677734375, + "learning_rate": 9.105347572414323e-06, + "loss": 15.1554, + "step": 137020 + }, + { + "epoch": 0.27680926966632596, + "grad_norm": 182.11375427246094, + "learning_rate": 9.10514830577329e-06, + "loss": 17.1029, + "step": 137030 + }, + { + "epoch": 0.2768294702990098, + "grad_norm": 751.0150756835938, + "learning_rate": 9.104949019124261e-06, + "loss": 30.9878, + "step": 137040 + }, + { + "epoch": 0.2768496709316936, + "grad_norm": 570.7824096679688, + "learning_rate": 9.104749712468208e-06, + "loss": 24.0046, + "step": 137050 + }, + { + "epoch": 0.2768698715643774, + "grad_norm": 13508.3525390625, + "learning_rate": 9.104550385806103e-06, + "loss": 39.5268, + "step": 137060 + }, + { + "epoch": 0.2768900721970612, + "grad_norm": 792.635009765625, + "learning_rate": 9.104351039138919e-06, + "loss": 27.5066, + "step": 137070 + }, + { + "epoch": 0.276910272829745, + "grad_norm": 724.1927490234375, + "learning_rate": 9.104151672467625e-06, + "loss": 27.1776, + "step": 137080 + }, + { + "epoch": 0.27693047346242883, + "grad_norm": 339.5395812988281, + "learning_rate": 9.103952285793192e-06, + "loss": 26.116, + "step": 137090 + }, + { + "epoch": 0.27695067409511265, + "grad_norm": 293.49639892578125, + "learning_rate": 9.103752879116595e-06, + "loss": 27.5327, + "step": 137100 + }, + { + "epoch": 0.27697087472779647, + "grad_norm": 701.0228881835938, + "learning_rate": 9.103553452438803e-06, + "loss": 21.5859, + "step": 137110 + }, + { + "epoch": 0.2769910753604803, + "grad_norm": 801.4207763671875, + "learning_rate": 9.10335400576079e-06, + "loss": 15.4929, + "step": 137120 + }, + { + "epoch": 0.2770112759931641, + "grad_norm": 552.5881958007812, + "learning_rate": 9.103154539083527e-06, + "loss": 29.8185, + "step": 137130 + }, + { + "epoch": 0.27703147662584793, + "grad_norm": 1147.41943359375, + "learning_rate": 9.102955052407986e-06, + "loss": 24.181, + "step": 137140 + }, + { + "epoch": 0.27705167725853175, + "grad_norm": 297.5654296875, + "learning_rate": 9.102755545735141e-06, + "loss": 21.4677, + "step": 137150 + }, + { + "epoch": 0.27707187789121557, + "grad_norm": 833.3944702148438, + "learning_rate": 9.102556019065962e-06, + "loss": 27.3052, + "step": 137160 + }, + { + "epoch": 0.2770920785238994, + "grad_norm": 398.9085388183594, + "learning_rate": 9.102356472401424e-06, + "loss": 10.619, + "step": 137170 + }, + { + "epoch": 0.2771122791565832, + "grad_norm": 830.4307861328125, + "learning_rate": 9.102156905742497e-06, + "loss": 29.5127, + "step": 137180 + }, + { + "epoch": 0.27713247978926697, + "grad_norm": 403.244873046875, + "learning_rate": 9.101957319090153e-06, + "loss": 26.0385, + "step": 137190 + }, + { + "epoch": 0.2771526804219508, + "grad_norm": 763.9085083007812, + "learning_rate": 9.101757712445369e-06, + "loss": 28.5399, + "step": 137200 + }, + { + "epoch": 0.2771728810546346, + "grad_norm": 391.4627380371094, + "learning_rate": 9.101558085809114e-06, + "loss": 21.6998, + "step": 137210 + }, + { + "epoch": 0.27719308168731843, + "grad_norm": 383.41033935546875, + "learning_rate": 9.101358439182364e-06, + "loss": 11.3443, + "step": 137220 + }, + { + "epoch": 0.27721328232000225, + "grad_norm": 249.82388305664062, + "learning_rate": 9.101158772566088e-06, + "loss": 14.4067, + "step": 137230 + }, + { + "epoch": 0.27723348295268607, + "grad_norm": 342.18780517578125, + "learning_rate": 9.100959085961263e-06, + "loss": 38.9308, + "step": 137240 + }, + { + "epoch": 0.2772536835853699, + "grad_norm": 528.7169189453125, + "learning_rate": 9.100759379368863e-06, + "loss": 25.9011, + "step": 137250 + }, + { + "epoch": 0.2772738842180537, + "grad_norm": 38.9533805847168, + "learning_rate": 9.100559652789856e-06, + "loss": 19.0543, + "step": 137260 + }, + { + "epoch": 0.27729408485073753, + "grad_norm": 216.45538330078125, + "learning_rate": 9.100359906225219e-06, + "loss": 24.176, + "step": 137270 + }, + { + "epoch": 0.27731428548342135, + "grad_norm": 231.65589904785156, + "learning_rate": 9.100160139675925e-06, + "loss": 15.8339, + "step": 137280 + }, + { + "epoch": 0.27733448611610517, + "grad_norm": 677.2991333007812, + "learning_rate": 9.099960353142948e-06, + "loss": 14.7094, + "step": 137290 + }, + { + "epoch": 0.277354686748789, + "grad_norm": 459.41119384765625, + "learning_rate": 9.099760546627262e-06, + "loss": 27.8189, + "step": 137300 + }, + { + "epoch": 0.2773748873814728, + "grad_norm": 320.6719970703125, + "learning_rate": 9.099560720129842e-06, + "loss": 19.2016, + "step": 137310 + }, + { + "epoch": 0.2773950880141566, + "grad_norm": 900.2733154296875, + "learning_rate": 9.099360873651658e-06, + "loss": 24.9501, + "step": 137320 + }, + { + "epoch": 0.2774152886468404, + "grad_norm": 383.8463134765625, + "learning_rate": 9.099161007193686e-06, + "loss": 16.2387, + "step": 137330 + }, + { + "epoch": 0.2774354892795242, + "grad_norm": 370.25323486328125, + "learning_rate": 9.098961120756902e-06, + "loss": 19.4532, + "step": 137340 + }, + { + "epoch": 0.27745568991220804, + "grad_norm": 474.8215637207031, + "learning_rate": 9.098761214342277e-06, + "loss": 18.6614, + "step": 137350 + }, + { + "epoch": 0.27747589054489186, + "grad_norm": 214.69796752929688, + "learning_rate": 9.098561287950788e-06, + "loss": 22.1373, + "step": 137360 + }, + { + "epoch": 0.2774960911775757, + "grad_norm": 696.3637084960938, + "learning_rate": 9.098361341583408e-06, + "loss": 30.7538, + "step": 137370 + }, + { + "epoch": 0.2775162918102595, + "grad_norm": 328.6319885253906, + "learning_rate": 9.098161375241112e-06, + "loss": 26.2747, + "step": 137380 + }, + { + "epoch": 0.2775364924429433, + "grad_norm": 0.0, + "learning_rate": 9.097961388924875e-06, + "loss": 20.9066, + "step": 137390 + }, + { + "epoch": 0.27755669307562714, + "grad_norm": 348.576904296875, + "learning_rate": 9.09776138263567e-06, + "loss": 30.0631, + "step": 137400 + }, + { + "epoch": 0.27757689370831096, + "grad_norm": 211.87579345703125, + "learning_rate": 9.097561356374473e-06, + "loss": 13.9718, + "step": 137410 + }, + { + "epoch": 0.2775970943409948, + "grad_norm": 936.2708740234375, + "learning_rate": 9.097361310142261e-06, + "loss": 32.1798, + "step": 137420 + }, + { + "epoch": 0.2776172949736786, + "grad_norm": 265.9165344238281, + "learning_rate": 9.097161243940005e-06, + "loss": 16.4136, + "step": 137430 + }, + { + "epoch": 0.2776374956063624, + "grad_norm": 676.0292358398438, + "learning_rate": 9.096961157768681e-06, + "loss": 26.7054, + "step": 137440 + }, + { + "epoch": 0.2776576962390462, + "grad_norm": 174.90513610839844, + "learning_rate": 9.096761051629268e-06, + "loss": 19.1602, + "step": 137450 + }, + { + "epoch": 0.27767789687173, + "grad_norm": 109.43218994140625, + "learning_rate": 9.096560925522738e-06, + "loss": 21.11, + "step": 137460 + }, + { + "epoch": 0.2776980975044138, + "grad_norm": 197.34710693359375, + "learning_rate": 9.096360779450066e-06, + "loss": 33.6084, + "step": 137470 + }, + { + "epoch": 0.27771829813709764, + "grad_norm": 387.3228759765625, + "learning_rate": 9.096160613412228e-06, + "loss": 16.9318, + "step": 137480 + }, + { + "epoch": 0.27773849876978146, + "grad_norm": 310.5277099609375, + "learning_rate": 9.095960427410202e-06, + "loss": 13.8329, + "step": 137490 + }, + { + "epoch": 0.2777586994024653, + "grad_norm": 1074.7540283203125, + "learning_rate": 9.09576022144496e-06, + "loss": 28.8925, + "step": 137500 + }, + { + "epoch": 0.2777789000351491, + "grad_norm": 7.782865047454834, + "learning_rate": 9.09555999551748e-06, + "loss": 15.7964, + "step": 137510 + }, + { + "epoch": 0.2777991006678329, + "grad_norm": 603.712646484375, + "learning_rate": 9.095359749628736e-06, + "loss": 19.4232, + "step": 137520 + }, + { + "epoch": 0.27781930130051674, + "grad_norm": 296.1772155761719, + "learning_rate": 9.095159483779707e-06, + "loss": 32.3323, + "step": 137530 + }, + { + "epoch": 0.27783950193320056, + "grad_norm": 277.2585144042969, + "learning_rate": 9.094959197971367e-06, + "loss": 27.0107, + "step": 137540 + }, + { + "epoch": 0.2778597025658844, + "grad_norm": 67.9524154663086, + "learning_rate": 9.09475889220469e-06, + "loss": 17.7709, + "step": 137550 + }, + { + "epoch": 0.2778799031985682, + "grad_norm": 344.07452392578125, + "learning_rate": 9.094558566480659e-06, + "loss": 16.1814, + "step": 137560 + }, + { + "epoch": 0.277900103831252, + "grad_norm": 481.0843505859375, + "learning_rate": 9.094358220800243e-06, + "loss": 24.1806, + "step": 137570 + }, + { + "epoch": 0.2779203044639358, + "grad_norm": 259.8028259277344, + "learning_rate": 9.094157855164424e-06, + "loss": 28.0218, + "step": 137580 + }, + { + "epoch": 0.2779405050966196, + "grad_norm": 381.5344543457031, + "learning_rate": 9.093957469574175e-06, + "loss": 18.8146, + "step": 137590 + }, + { + "epoch": 0.2779607057293034, + "grad_norm": 344.8265380859375, + "learning_rate": 9.093757064030473e-06, + "loss": 28.887, + "step": 137600 + }, + { + "epoch": 0.27798090636198725, + "grad_norm": 1606.595458984375, + "learning_rate": 9.093556638534298e-06, + "loss": 23.4497, + "step": 137610 + }, + { + "epoch": 0.27800110699467107, + "grad_norm": 228.54640197753906, + "learning_rate": 9.093356193086622e-06, + "loss": 14.7104, + "step": 137620 + }, + { + "epoch": 0.2780213076273549, + "grad_norm": 235.8044891357422, + "learning_rate": 9.093155727688426e-06, + "loss": 28.2329, + "step": 137630 + }, + { + "epoch": 0.2780415082600387, + "grad_norm": 619.3360595703125, + "learning_rate": 9.092955242340684e-06, + "loss": 29.1266, + "step": 137640 + }, + { + "epoch": 0.2780617088927225, + "grad_norm": 522.7439575195312, + "learning_rate": 9.092754737044375e-06, + "loss": 13.6259, + "step": 137650 + }, + { + "epoch": 0.27808190952540635, + "grad_norm": 677.4970703125, + "learning_rate": 9.092554211800476e-06, + "loss": 32.3102, + "step": 137660 + }, + { + "epoch": 0.27810211015809017, + "grad_norm": 244.76707458496094, + "learning_rate": 9.092353666609963e-06, + "loss": 22.9451, + "step": 137670 + }, + { + "epoch": 0.278122310790774, + "grad_norm": 917.2261962890625, + "learning_rate": 9.092153101473818e-06, + "loss": 19.912, + "step": 137680 + }, + { + "epoch": 0.2781425114234578, + "grad_norm": 399.02978515625, + "learning_rate": 9.091952516393012e-06, + "loss": 20.9459, + "step": 137690 + }, + { + "epoch": 0.2781627120561416, + "grad_norm": 675.6434326171875, + "learning_rate": 9.091751911368524e-06, + "loss": 28.4803, + "step": 137700 + }, + { + "epoch": 0.2781829126888254, + "grad_norm": 127.59564971923828, + "learning_rate": 9.091551286401337e-06, + "loss": 24.397, + "step": 137710 + }, + { + "epoch": 0.2782031133215092, + "grad_norm": 432.05291748046875, + "learning_rate": 9.091350641492424e-06, + "loss": 31.1793, + "step": 137720 + }, + { + "epoch": 0.27822331395419303, + "grad_norm": 359.1927795410156, + "learning_rate": 9.091149976642765e-06, + "loss": 19.6272, + "step": 137730 + }, + { + "epoch": 0.27824351458687685, + "grad_norm": 586.7080688476562, + "learning_rate": 9.090949291853337e-06, + "loss": 19.5357, + "step": 137740 + }, + { + "epoch": 0.27826371521956067, + "grad_norm": 376.0263977050781, + "learning_rate": 9.090748587125118e-06, + "loss": 15.923, + "step": 137750 + }, + { + "epoch": 0.2782839158522445, + "grad_norm": 484.933837890625, + "learning_rate": 9.090547862459087e-06, + "loss": 27.1255, + "step": 137760 + }, + { + "epoch": 0.2783041164849283, + "grad_norm": 451.0625, + "learning_rate": 9.09034711785622e-06, + "loss": 19.3612, + "step": 137770 + }, + { + "epoch": 0.27832431711761213, + "grad_norm": 354.6343078613281, + "learning_rate": 9.090146353317499e-06, + "loss": 17.4342, + "step": 137780 + }, + { + "epoch": 0.27834451775029595, + "grad_norm": 375.70721435546875, + "learning_rate": 9.0899455688439e-06, + "loss": 16.687, + "step": 137790 + }, + { + "epoch": 0.27836471838297977, + "grad_norm": 717.6085205078125, + "learning_rate": 9.089744764436404e-06, + "loss": 21.4564, + "step": 137800 + }, + { + "epoch": 0.2783849190156636, + "grad_norm": 658.6801147460938, + "learning_rate": 9.089543940095985e-06, + "loss": 24.0651, + "step": 137810 + }, + { + "epoch": 0.2784051196483474, + "grad_norm": 304.2969665527344, + "learning_rate": 9.089343095823628e-06, + "loss": 23.4212, + "step": 137820 + }, + { + "epoch": 0.2784253202810312, + "grad_norm": 1553.240234375, + "learning_rate": 9.089142231620306e-06, + "loss": 32.3794, + "step": 137830 + }, + { + "epoch": 0.278445520913715, + "grad_norm": 465.6623840332031, + "learning_rate": 9.088941347487004e-06, + "loss": 19.0398, + "step": 137840 + }, + { + "epoch": 0.2784657215463988, + "grad_norm": 793.8300170898438, + "learning_rate": 9.088740443424695e-06, + "loss": 30.4577, + "step": 137850 + }, + { + "epoch": 0.27848592217908263, + "grad_norm": 864.763671875, + "learning_rate": 9.088539519434362e-06, + "loss": 25.3503, + "step": 137860 + }, + { + "epoch": 0.27850612281176645, + "grad_norm": 302.22894287109375, + "learning_rate": 9.088338575516983e-06, + "loss": 12.4432, + "step": 137870 + }, + { + "epoch": 0.2785263234444503, + "grad_norm": 372.04144287109375, + "learning_rate": 9.088137611673538e-06, + "loss": 34.8318, + "step": 137880 + }, + { + "epoch": 0.2785465240771341, + "grad_norm": 272.3525390625, + "learning_rate": 9.087936627905005e-06, + "loss": 17.5451, + "step": 137890 + }, + { + "epoch": 0.2785667247098179, + "grad_norm": 1021.4248046875, + "learning_rate": 9.087735624212365e-06, + "loss": 19.9586, + "step": 137900 + }, + { + "epoch": 0.27858692534250173, + "grad_norm": 513.5504150390625, + "learning_rate": 9.087534600596599e-06, + "loss": 22.0206, + "step": 137910 + }, + { + "epoch": 0.27860712597518555, + "grad_norm": 225.71331787109375, + "learning_rate": 9.087333557058684e-06, + "loss": 18.2251, + "step": 137920 + }, + { + "epoch": 0.2786273266078694, + "grad_norm": 397.6352233886719, + "learning_rate": 9.087132493599601e-06, + "loss": 14.128, + "step": 137930 + }, + { + "epoch": 0.2786475272405532, + "grad_norm": 425.21856689453125, + "learning_rate": 9.08693141022033e-06, + "loss": 18.2089, + "step": 137940 + }, + { + "epoch": 0.278667727873237, + "grad_norm": 590.8599853515625, + "learning_rate": 9.08673030692185e-06, + "loss": 20.5031, + "step": 137950 + }, + { + "epoch": 0.2786879285059208, + "grad_norm": 184.47247314453125, + "learning_rate": 9.086529183705144e-06, + "loss": 14.7618, + "step": 137960 + }, + { + "epoch": 0.2787081291386046, + "grad_norm": 136.02565002441406, + "learning_rate": 9.086328040571189e-06, + "loss": 18.2466, + "step": 137970 + }, + { + "epoch": 0.2787283297712884, + "grad_norm": 681.7166748046875, + "learning_rate": 9.086126877520967e-06, + "loss": 15.8043, + "step": 137980 + }, + { + "epoch": 0.27874853040397224, + "grad_norm": 695.03955078125, + "learning_rate": 9.085925694555457e-06, + "loss": 18.401, + "step": 137990 + }, + { + "epoch": 0.27876873103665606, + "grad_norm": 77.13236236572266, + "learning_rate": 9.085724491675642e-06, + "loss": 23.4363, + "step": 138000 + }, + { + "epoch": 0.2787889316693399, + "grad_norm": 1009.24853515625, + "learning_rate": 9.085523268882504e-06, + "loss": 22.7571, + "step": 138010 + }, + { + "epoch": 0.2788091323020237, + "grad_norm": 735.2975463867188, + "learning_rate": 9.085322026177017e-06, + "loss": 33.2772, + "step": 138020 + }, + { + "epoch": 0.2788293329347075, + "grad_norm": 371.4656982421875, + "learning_rate": 9.085120763560168e-06, + "loss": 12.3176, + "step": 138030 + }, + { + "epoch": 0.27884953356739134, + "grad_norm": 639.376953125, + "learning_rate": 9.084919481032935e-06, + "loss": 32.2983, + "step": 138040 + }, + { + "epoch": 0.27886973420007516, + "grad_norm": 554.71484375, + "learning_rate": 9.084718178596301e-06, + "loss": 40.4096, + "step": 138050 + }, + { + "epoch": 0.278889934832759, + "grad_norm": 355.3971252441406, + "learning_rate": 9.084516856251244e-06, + "loss": 28.8127, + "step": 138060 + }, + { + "epoch": 0.2789101354654428, + "grad_norm": 373.5417785644531, + "learning_rate": 9.084315513998749e-06, + "loss": 22.5024, + "step": 138070 + }, + { + "epoch": 0.2789303360981266, + "grad_norm": 237.6198272705078, + "learning_rate": 9.084114151839795e-06, + "loss": 37.6147, + "step": 138080 + }, + { + "epoch": 0.2789505367308104, + "grad_norm": 759.7806396484375, + "learning_rate": 9.083912769775364e-06, + "loss": 18.7683, + "step": 138090 + }, + { + "epoch": 0.2789707373634942, + "grad_norm": 548.9072265625, + "learning_rate": 9.083711367806438e-06, + "loss": 17.0632, + "step": 138100 + }, + { + "epoch": 0.278990937996178, + "grad_norm": 146.3411865234375, + "learning_rate": 9.083509945933996e-06, + "loss": 17.3576, + "step": 138110 + }, + { + "epoch": 0.27901113862886184, + "grad_norm": 173.25881958007812, + "learning_rate": 9.083308504159025e-06, + "loss": 18.1152, + "step": 138120 + }, + { + "epoch": 0.27903133926154566, + "grad_norm": 540.9036865234375, + "learning_rate": 9.083107042482502e-06, + "loss": 11.4908, + "step": 138130 + }, + { + "epoch": 0.2790515398942295, + "grad_norm": 356.0698547363281, + "learning_rate": 9.08290556090541e-06, + "loss": 15.1667, + "step": 138140 + }, + { + "epoch": 0.2790717405269133, + "grad_norm": 373.5164489746094, + "learning_rate": 9.082704059428732e-06, + "loss": 21.7914, + "step": 138150 + }, + { + "epoch": 0.2790919411595971, + "grad_norm": 646.2813720703125, + "learning_rate": 9.08250253805345e-06, + "loss": 41.9151, + "step": 138160 + }, + { + "epoch": 0.27911214179228094, + "grad_norm": 8.045919418334961, + "learning_rate": 9.082300996780543e-06, + "loss": 15.8566, + "step": 138170 + }, + { + "epoch": 0.27913234242496476, + "grad_norm": 744.6547241210938, + "learning_rate": 9.082099435611001e-06, + "loss": 21.6561, + "step": 138180 + }, + { + "epoch": 0.2791525430576486, + "grad_norm": 943.8947143554688, + "learning_rate": 9.081897854545798e-06, + "loss": 19.71, + "step": 138190 + }, + { + "epoch": 0.2791727436903324, + "grad_norm": 116.42237854003906, + "learning_rate": 9.08169625358592e-06, + "loss": 23.7724, + "step": 138200 + }, + { + "epoch": 0.2791929443230162, + "grad_norm": 559.2406005859375, + "learning_rate": 9.08149463273235e-06, + "loss": 18.6301, + "step": 138210 + }, + { + "epoch": 0.2792131449557, + "grad_norm": 552.3639526367188, + "learning_rate": 9.081292991986072e-06, + "loss": 42.6683, + "step": 138220 + }, + { + "epoch": 0.2792333455883838, + "grad_norm": 318.72149658203125, + "learning_rate": 9.081091331348065e-06, + "loss": 15.5614, + "step": 138230 + }, + { + "epoch": 0.27925354622106763, + "grad_norm": 508.3001403808594, + "learning_rate": 9.080889650819313e-06, + "loss": 21.2383, + "step": 138240 + }, + { + "epoch": 0.27927374685375145, + "grad_norm": 664.1058349609375, + "learning_rate": 9.080687950400801e-06, + "loss": 22.6868, + "step": 138250 + }, + { + "epoch": 0.27929394748643527, + "grad_norm": 145.73680114746094, + "learning_rate": 9.08048623009351e-06, + "loss": 17.7912, + "step": 138260 + }, + { + "epoch": 0.2793141481191191, + "grad_norm": 633.4774169921875, + "learning_rate": 9.080284489898428e-06, + "loss": 16.7837, + "step": 138270 + }, + { + "epoch": 0.2793343487518029, + "grad_norm": 212.75233459472656, + "learning_rate": 9.08008272981653e-06, + "loss": 26.669, + "step": 138280 + }, + { + "epoch": 0.27935454938448673, + "grad_norm": 201.02984619140625, + "learning_rate": 9.079880949848804e-06, + "loss": 26.8011, + "step": 138290 + }, + { + "epoch": 0.27937475001717055, + "grad_norm": 254.2630615234375, + "learning_rate": 9.079679149996235e-06, + "loss": 26.1649, + "step": 138300 + }, + { + "epoch": 0.27939495064985437, + "grad_norm": 217.7797088623047, + "learning_rate": 9.079477330259803e-06, + "loss": 12.8471, + "step": 138310 + }, + { + "epoch": 0.2794151512825382, + "grad_norm": 1549.8648681640625, + "learning_rate": 9.079275490640494e-06, + "loss": 29.4622, + "step": 138320 + }, + { + "epoch": 0.279435351915222, + "grad_norm": 4345.27587890625, + "learning_rate": 9.079073631139291e-06, + "loss": 31.5164, + "step": 138330 + }, + { + "epoch": 0.27945555254790583, + "grad_norm": 664.3687133789062, + "learning_rate": 9.078871751757176e-06, + "loss": 25.7238, + "step": 138340 + }, + { + "epoch": 0.2794757531805896, + "grad_norm": 471.45782470703125, + "learning_rate": 9.078669852495138e-06, + "loss": 29.2242, + "step": 138350 + }, + { + "epoch": 0.2794959538132734, + "grad_norm": 520.059814453125, + "learning_rate": 9.078467933354156e-06, + "loss": 34.4871, + "step": 138360 + }, + { + "epoch": 0.27951615444595723, + "grad_norm": 167.5902557373047, + "learning_rate": 9.078265994335216e-06, + "loss": 23.6625, + "step": 138370 + }, + { + "epoch": 0.27953635507864105, + "grad_norm": 268.4201965332031, + "learning_rate": 9.078064035439303e-06, + "loss": 21.1321, + "step": 138380 + }, + { + "epoch": 0.2795565557113249, + "grad_norm": 643.2801513671875, + "learning_rate": 9.0778620566674e-06, + "loss": 24.712, + "step": 138390 + }, + { + "epoch": 0.2795767563440087, + "grad_norm": 452.7569274902344, + "learning_rate": 9.077660058020492e-06, + "loss": 43.9378, + "step": 138400 + }, + { + "epoch": 0.2795969569766925, + "grad_norm": 463.3872985839844, + "learning_rate": 9.077458039499563e-06, + "loss": 12.4162, + "step": 138410 + }, + { + "epoch": 0.27961715760937633, + "grad_norm": 669.3692626953125, + "learning_rate": 9.077256001105598e-06, + "loss": 32.6533, + "step": 138420 + }, + { + "epoch": 0.27963735824206015, + "grad_norm": 565.0306396484375, + "learning_rate": 9.077053942839581e-06, + "loss": 26.3031, + "step": 138430 + }, + { + "epoch": 0.27965755887474397, + "grad_norm": 234.78082275390625, + "learning_rate": 9.0768518647025e-06, + "loss": 29.5895, + "step": 138440 + }, + { + "epoch": 0.2796777595074278, + "grad_norm": 0.1866646558046341, + "learning_rate": 9.076649766695336e-06, + "loss": 21.0687, + "step": 138450 + }, + { + "epoch": 0.2796979601401116, + "grad_norm": 282.2435607910156, + "learning_rate": 9.076447648819076e-06, + "loss": 20.6133, + "step": 138460 + }, + { + "epoch": 0.2797181607727954, + "grad_norm": 282.5113525390625, + "learning_rate": 9.076245511074704e-06, + "loss": 20.6862, + "step": 138470 + }, + { + "epoch": 0.2797383614054792, + "grad_norm": 1906.7191162109375, + "learning_rate": 9.076043353463205e-06, + "loss": 14.3788, + "step": 138480 + }, + { + "epoch": 0.279758562038163, + "grad_norm": 716.705810546875, + "learning_rate": 9.075841175985566e-06, + "loss": 37.1254, + "step": 138490 + }, + { + "epoch": 0.27977876267084684, + "grad_norm": 408.14013671875, + "learning_rate": 9.07563897864277e-06, + "loss": 27.3324, + "step": 138500 + }, + { + "epoch": 0.27979896330353066, + "grad_norm": 444.34307861328125, + "learning_rate": 9.075436761435807e-06, + "loss": 19.5062, + "step": 138510 + }, + { + "epoch": 0.2798191639362145, + "grad_norm": 1251.0943603515625, + "learning_rate": 9.075234524365658e-06, + "loss": 23.5936, + "step": 138520 + }, + { + "epoch": 0.2798393645688983, + "grad_norm": 965.272705078125, + "learning_rate": 9.07503226743331e-06, + "loss": 36.7323, + "step": 138530 + }, + { + "epoch": 0.2798595652015821, + "grad_norm": 1015.2049560546875, + "learning_rate": 9.07482999063975e-06, + "loss": 30.3238, + "step": 138540 + }, + { + "epoch": 0.27987976583426594, + "grad_norm": 231.09129333496094, + "learning_rate": 9.07462769398596e-06, + "loss": 30.4964, + "step": 138550 + }, + { + "epoch": 0.27989996646694976, + "grad_norm": 487.2249755859375, + "learning_rate": 9.074425377472932e-06, + "loss": 25.6819, + "step": 138560 + }, + { + "epoch": 0.2799201670996336, + "grad_norm": 166.27435302734375, + "learning_rate": 9.074223041101647e-06, + "loss": 23.7328, + "step": 138570 + }, + { + "epoch": 0.2799403677323174, + "grad_norm": 586.8052368164062, + "learning_rate": 9.074020684873095e-06, + "loss": 39.2396, + "step": 138580 + }, + { + "epoch": 0.2799605683650012, + "grad_norm": 268.6707458496094, + "learning_rate": 9.073818308788258e-06, + "loss": 15.8867, + "step": 138590 + }, + { + "epoch": 0.279980768997685, + "grad_norm": 324.8249816894531, + "learning_rate": 9.073615912848126e-06, + "loss": 19.836, + "step": 138600 + }, + { + "epoch": 0.2800009696303688, + "grad_norm": 879.8435668945312, + "learning_rate": 9.073413497053683e-06, + "loss": 31.2466, + "step": 138610 + }, + { + "epoch": 0.2800211702630526, + "grad_norm": 170.1151123046875, + "learning_rate": 9.073211061405918e-06, + "loss": 19.4436, + "step": 138620 + }, + { + "epoch": 0.28004137089573644, + "grad_norm": 181.6733856201172, + "learning_rate": 9.073008605905816e-06, + "loss": 23.0449, + "step": 138630 + }, + { + "epoch": 0.28006157152842026, + "grad_norm": 318.5815124511719, + "learning_rate": 9.072806130554364e-06, + "loss": 13.7624, + "step": 138640 + }, + { + "epoch": 0.2800817721611041, + "grad_norm": 287.2176208496094, + "learning_rate": 9.072603635352548e-06, + "loss": 34.9895, + "step": 138650 + }, + { + "epoch": 0.2801019727937879, + "grad_norm": 193.97384643554688, + "learning_rate": 9.072401120301356e-06, + "loss": 16.6459, + "step": 138660 + }, + { + "epoch": 0.2801221734264717, + "grad_norm": 148.6316680908203, + "learning_rate": 9.072198585401775e-06, + "loss": 11.2359, + "step": 138670 + }, + { + "epoch": 0.28014237405915554, + "grad_norm": 617.3121337890625, + "learning_rate": 9.071996030654793e-06, + "loss": 31.7572, + "step": 138680 + }, + { + "epoch": 0.28016257469183936, + "grad_norm": 847.959716796875, + "learning_rate": 9.071793456061395e-06, + "loss": 39.0562, + "step": 138690 + }, + { + "epoch": 0.2801827753245232, + "grad_norm": 59.63226318359375, + "learning_rate": 9.07159086162257e-06, + "loss": 31.0103, + "step": 138700 + }, + { + "epoch": 0.280202975957207, + "grad_norm": 471.9793701171875, + "learning_rate": 9.071388247339306e-06, + "loss": 13.428, + "step": 138710 + }, + { + "epoch": 0.2802231765898908, + "grad_norm": 292.8183898925781, + "learning_rate": 9.071185613212588e-06, + "loss": 29.688, + "step": 138720 + }, + { + "epoch": 0.2802433772225746, + "grad_norm": 637.3397827148438, + "learning_rate": 9.070982959243406e-06, + "loss": 16.313, + "step": 138730 + }, + { + "epoch": 0.2802635778552584, + "grad_norm": 118.97696685791016, + "learning_rate": 9.070780285432746e-06, + "loss": 23.1142, + "step": 138740 + }, + { + "epoch": 0.2802837784879422, + "grad_norm": 723.663818359375, + "learning_rate": 9.070577591781598e-06, + "loss": 19.9465, + "step": 138750 + }, + { + "epoch": 0.28030397912062605, + "grad_norm": 360.73968505859375, + "learning_rate": 9.070374878290946e-06, + "loss": 15.4039, + "step": 138760 + }, + { + "epoch": 0.28032417975330987, + "grad_norm": 250.6209716796875, + "learning_rate": 9.070172144961781e-06, + "loss": 19.9679, + "step": 138770 + }, + { + "epoch": 0.2803443803859937, + "grad_norm": 270.03662109375, + "learning_rate": 9.069969391795093e-06, + "loss": 28.7409, + "step": 138780 + }, + { + "epoch": 0.2803645810186775, + "grad_norm": 890.7096557617188, + "learning_rate": 9.069766618791867e-06, + "loss": 20.5492, + "step": 138790 + }, + { + "epoch": 0.2803847816513613, + "grad_norm": 922.1776123046875, + "learning_rate": 9.069563825953092e-06, + "loss": 30.5126, + "step": 138800 + }, + { + "epoch": 0.28040498228404515, + "grad_norm": 695.0859985351562, + "learning_rate": 9.069361013279755e-06, + "loss": 41.9807, + "step": 138810 + }, + { + "epoch": 0.28042518291672897, + "grad_norm": 163.199951171875, + "learning_rate": 9.069158180772848e-06, + "loss": 7.1529, + "step": 138820 + }, + { + "epoch": 0.2804453835494128, + "grad_norm": 417.4488220214844, + "learning_rate": 9.068955328433355e-06, + "loss": 16.0531, + "step": 138830 + }, + { + "epoch": 0.2804655841820966, + "grad_norm": 230.33570861816406, + "learning_rate": 9.06875245626227e-06, + "loss": 32.4096, + "step": 138840 + }, + { + "epoch": 0.2804857848147804, + "grad_norm": 559.3120727539062, + "learning_rate": 9.068549564260578e-06, + "loss": 22.6386, + "step": 138850 + }, + { + "epoch": 0.2805059854474642, + "grad_norm": 552.2367553710938, + "learning_rate": 9.068346652429269e-06, + "loss": 34.1713, + "step": 138860 + }, + { + "epoch": 0.280526186080148, + "grad_norm": 238.18087768554688, + "learning_rate": 9.068143720769332e-06, + "loss": 12.5275, + "step": 138870 + }, + { + "epoch": 0.28054638671283183, + "grad_norm": 458.226318359375, + "learning_rate": 9.067940769281755e-06, + "loss": 22.525, + "step": 138880 + }, + { + "epoch": 0.28056658734551565, + "grad_norm": 296.74176025390625, + "learning_rate": 9.067737797967528e-06, + "loss": 25.6449, + "step": 138890 + }, + { + "epoch": 0.28058678797819947, + "grad_norm": 254.4502716064453, + "learning_rate": 9.06753480682764e-06, + "loss": 14.2461, + "step": 138900 + }, + { + "epoch": 0.2806069886108833, + "grad_norm": 358.54150390625, + "learning_rate": 9.067331795863084e-06, + "loss": 17.6906, + "step": 138910 + }, + { + "epoch": 0.2806271892435671, + "grad_norm": 799.5117797851562, + "learning_rate": 9.067128765074842e-06, + "loss": 20.4168, + "step": 138920 + }, + { + "epoch": 0.28064738987625093, + "grad_norm": 278.828369140625, + "learning_rate": 9.06692571446391e-06, + "loss": 14.3315, + "step": 138930 + }, + { + "epoch": 0.28066759050893475, + "grad_norm": 706.9019775390625, + "learning_rate": 9.066722644031274e-06, + "loss": 27.403, + "step": 138940 + }, + { + "epoch": 0.28068779114161857, + "grad_norm": 457.7975769042969, + "learning_rate": 9.066519553777926e-06, + "loss": 22.5423, + "step": 138950 + }, + { + "epoch": 0.2807079917743024, + "grad_norm": 620.1854858398438, + "learning_rate": 9.066316443704854e-06, + "loss": 26.5021, + "step": 138960 + }, + { + "epoch": 0.2807281924069862, + "grad_norm": 176.0418243408203, + "learning_rate": 9.06611331381305e-06, + "loss": 12.7354, + "step": 138970 + }, + { + "epoch": 0.28074839303967003, + "grad_norm": 388.2792663574219, + "learning_rate": 9.065910164103502e-06, + "loss": 16.8222, + "step": 138980 + }, + { + "epoch": 0.2807685936723538, + "grad_norm": 433.3446350097656, + "learning_rate": 9.065706994577203e-06, + "loss": 66.8012, + "step": 138990 + }, + { + "epoch": 0.2807887943050376, + "grad_norm": 432.26007080078125, + "learning_rate": 9.065503805235139e-06, + "loss": 21.6146, + "step": 139000 + }, + { + "epoch": 0.28080899493772143, + "grad_norm": 430.3299560546875, + "learning_rate": 9.065300596078304e-06, + "loss": 23.9434, + "step": 139010 + }, + { + "epoch": 0.28082919557040525, + "grad_norm": 265.5904846191406, + "learning_rate": 9.065097367107685e-06, + "loss": 19.374, + "step": 139020 + }, + { + "epoch": 0.2808493962030891, + "grad_norm": 606.0962524414062, + "learning_rate": 9.064894118324276e-06, + "loss": 31.1727, + "step": 139030 + }, + { + "epoch": 0.2808695968357729, + "grad_norm": 290.9197082519531, + "learning_rate": 9.064690849729066e-06, + "loss": 22.6817, + "step": 139040 + }, + { + "epoch": 0.2808897974684567, + "grad_norm": 345.8620300292969, + "learning_rate": 9.064487561323046e-06, + "loss": 20.0112, + "step": 139050 + }, + { + "epoch": 0.28090999810114053, + "grad_norm": 339.62493896484375, + "learning_rate": 9.064284253107206e-06, + "loss": 20.9928, + "step": 139060 + }, + { + "epoch": 0.28093019873382435, + "grad_norm": 123.88328552246094, + "learning_rate": 9.064080925082536e-06, + "loss": 18.6651, + "step": 139070 + }, + { + "epoch": 0.2809503993665082, + "grad_norm": 475.6920471191406, + "learning_rate": 9.063877577250031e-06, + "loss": 22.357, + "step": 139080 + }, + { + "epoch": 0.280970599999192, + "grad_norm": 600.6383666992188, + "learning_rate": 9.063674209610678e-06, + "loss": 30.9068, + "step": 139090 + }, + { + "epoch": 0.2809908006318758, + "grad_norm": 262.7354431152344, + "learning_rate": 9.06347082216547e-06, + "loss": 17.4674, + "step": 139100 + }, + { + "epoch": 0.2810110012645596, + "grad_norm": 695.3836669921875, + "learning_rate": 9.0632674149154e-06, + "loss": 46.7981, + "step": 139110 + }, + { + "epoch": 0.2810312018972434, + "grad_norm": 5.711061000823975, + "learning_rate": 9.063063987861455e-06, + "loss": 12.0205, + "step": 139120 + }, + { + "epoch": 0.2810514025299272, + "grad_norm": 6.8529133796691895, + "learning_rate": 9.06286054100463e-06, + "loss": 25.1362, + "step": 139130 + }, + { + "epoch": 0.28107160316261104, + "grad_norm": 20.48434066772461, + "learning_rate": 9.062657074345916e-06, + "loss": 18.4941, + "step": 139140 + }, + { + "epoch": 0.28109180379529486, + "grad_norm": 228.85006713867188, + "learning_rate": 9.062453587886302e-06, + "loss": 16.1669, + "step": 139150 + }, + { + "epoch": 0.2811120044279787, + "grad_norm": 438.3404846191406, + "learning_rate": 9.062250081626784e-06, + "loss": 29.6202, + "step": 139160 + }, + { + "epoch": 0.2811322050606625, + "grad_norm": 402.7679443359375, + "learning_rate": 9.062046555568351e-06, + "loss": 15.0169, + "step": 139170 + }, + { + "epoch": 0.2811524056933463, + "grad_norm": 184.88658142089844, + "learning_rate": 9.061843009711995e-06, + "loss": 13.5044, + "step": 139180 + }, + { + "epoch": 0.28117260632603014, + "grad_norm": 285.018310546875, + "learning_rate": 9.06163944405871e-06, + "loss": 23.2025, + "step": 139190 + }, + { + "epoch": 0.28119280695871396, + "grad_norm": 1371.1322021484375, + "learning_rate": 9.061435858609486e-06, + "loss": 44.7656, + "step": 139200 + }, + { + "epoch": 0.2812130075913978, + "grad_norm": 818.6122436523438, + "learning_rate": 9.061232253365317e-06, + "loss": 26.9839, + "step": 139210 + }, + { + "epoch": 0.2812332082240816, + "grad_norm": 695.9334106445312, + "learning_rate": 9.061028628327196e-06, + "loss": 24.9003, + "step": 139220 + }, + { + "epoch": 0.2812534088567654, + "grad_norm": 635.3130493164062, + "learning_rate": 9.060824983496113e-06, + "loss": 16.1886, + "step": 139230 + }, + { + "epoch": 0.2812736094894492, + "grad_norm": 412.47625732421875, + "learning_rate": 9.06062131887306e-06, + "loss": 18.6965, + "step": 139240 + }, + { + "epoch": 0.281293810122133, + "grad_norm": 949.825439453125, + "learning_rate": 9.060417634459032e-06, + "loss": 19.2848, + "step": 139250 + }, + { + "epoch": 0.2813140107548168, + "grad_norm": 741.3657836914062, + "learning_rate": 9.060213930255023e-06, + "loss": 26.5962, + "step": 139260 + }, + { + "epoch": 0.28133421138750064, + "grad_norm": 598.5442504882812, + "learning_rate": 9.06001020626202e-06, + "loss": 22.5946, + "step": 139270 + }, + { + "epoch": 0.28135441202018446, + "grad_norm": 534.9511108398438, + "learning_rate": 9.059806462481022e-06, + "loss": 26.616, + "step": 139280 + }, + { + "epoch": 0.2813746126528683, + "grad_norm": 536.8895263671875, + "learning_rate": 9.05960269891302e-06, + "loss": 13.6408, + "step": 139290 + }, + { + "epoch": 0.2813948132855521, + "grad_norm": 338.2876892089844, + "learning_rate": 9.059398915559005e-06, + "loss": 17.3098, + "step": 139300 + }, + { + "epoch": 0.2814150139182359, + "grad_norm": 281.8960266113281, + "learning_rate": 9.059195112419972e-06, + "loss": 27.7513, + "step": 139310 + }, + { + "epoch": 0.28143521455091974, + "grad_norm": 353.2132873535156, + "learning_rate": 9.058991289496916e-06, + "loss": 18.9002, + "step": 139320 + }, + { + "epoch": 0.28145541518360356, + "grad_norm": 0.0, + "learning_rate": 9.058787446790828e-06, + "loss": 15.0055, + "step": 139330 + }, + { + "epoch": 0.2814756158162874, + "grad_norm": 259.71734619140625, + "learning_rate": 9.058583584302702e-06, + "loss": 33.1441, + "step": 139340 + }, + { + "epoch": 0.2814958164489712, + "grad_norm": 348.2009582519531, + "learning_rate": 9.058379702033533e-06, + "loss": 15.4112, + "step": 139350 + }, + { + "epoch": 0.281516017081655, + "grad_norm": 293.4024963378906, + "learning_rate": 9.058175799984312e-06, + "loss": 11.7142, + "step": 139360 + }, + { + "epoch": 0.2815362177143388, + "grad_norm": 1326.43310546875, + "learning_rate": 9.057971878156036e-06, + "loss": 49.4331, + "step": 139370 + }, + { + "epoch": 0.2815564183470226, + "grad_norm": 396.4537353515625, + "learning_rate": 9.057767936549696e-06, + "loss": 19.5482, + "step": 139380 + }, + { + "epoch": 0.28157661897970643, + "grad_norm": 334.4983825683594, + "learning_rate": 9.057563975166288e-06, + "loss": 20.0406, + "step": 139390 + }, + { + "epoch": 0.28159681961239025, + "grad_norm": 573.329833984375, + "learning_rate": 9.057359994006806e-06, + "loss": 20.9532, + "step": 139400 + }, + { + "epoch": 0.28161702024507407, + "grad_norm": 308.3233947753906, + "learning_rate": 9.057155993072241e-06, + "loss": 32.7891, + "step": 139410 + }, + { + "epoch": 0.2816372208777579, + "grad_norm": 390.9920959472656, + "learning_rate": 9.056951972363592e-06, + "loss": 13.0427, + "step": 139420 + }, + { + "epoch": 0.2816574215104417, + "grad_norm": 325.1952209472656, + "learning_rate": 9.056747931881851e-06, + "loss": 38.5052, + "step": 139430 + }, + { + "epoch": 0.28167762214312553, + "grad_norm": 296.2378845214844, + "learning_rate": 9.056543871628012e-06, + "loss": 15.9121, + "step": 139440 + }, + { + "epoch": 0.28169782277580935, + "grad_norm": 494.32855224609375, + "learning_rate": 9.056339791603069e-06, + "loss": 16.8407, + "step": 139450 + }, + { + "epoch": 0.28171802340849317, + "grad_norm": 99.52635955810547, + "learning_rate": 9.056135691808019e-06, + "loss": 24.2374, + "step": 139460 + }, + { + "epoch": 0.281738224041177, + "grad_norm": 389.7237854003906, + "learning_rate": 9.055931572243857e-06, + "loss": 27.4766, + "step": 139470 + }, + { + "epoch": 0.2817584246738608, + "grad_norm": 279.0994873046875, + "learning_rate": 9.055727432911574e-06, + "loss": 56.0306, + "step": 139480 + }, + { + "epoch": 0.28177862530654463, + "grad_norm": 112.3465347290039, + "learning_rate": 9.055523273812168e-06, + "loss": 30.8866, + "step": 139490 + }, + { + "epoch": 0.2817988259392284, + "grad_norm": 353.2060241699219, + "learning_rate": 9.055319094946633e-06, + "loss": 21.7786, + "step": 139500 + }, + { + "epoch": 0.2818190265719122, + "grad_norm": 356.8995361328125, + "learning_rate": 9.055114896315966e-06, + "loss": 31.1258, + "step": 139510 + }, + { + "epoch": 0.28183922720459603, + "grad_norm": 319.8045654296875, + "learning_rate": 9.05491067792116e-06, + "loss": 21.9368, + "step": 139520 + }, + { + "epoch": 0.28185942783727985, + "grad_norm": 171.35629272460938, + "learning_rate": 9.054706439763212e-06, + "loss": 21.618, + "step": 139530 + }, + { + "epoch": 0.2818796284699637, + "grad_norm": 315.6375427246094, + "learning_rate": 9.054502181843117e-06, + "loss": 24.4054, + "step": 139540 + }, + { + "epoch": 0.2818998291026475, + "grad_norm": 468.3456726074219, + "learning_rate": 9.054297904161868e-06, + "loss": 14.5097, + "step": 139550 + }, + { + "epoch": 0.2819200297353313, + "grad_norm": 512.71728515625, + "learning_rate": 9.054093606720464e-06, + "loss": 27.3539, + "step": 139560 + }, + { + "epoch": 0.28194023036801513, + "grad_norm": 235.35549926757812, + "learning_rate": 9.0538892895199e-06, + "loss": 23.005, + "step": 139570 + }, + { + "epoch": 0.28196043100069895, + "grad_norm": 728.3839721679688, + "learning_rate": 9.053684952561171e-06, + "loss": 27.9579, + "step": 139580 + }, + { + "epoch": 0.2819806316333828, + "grad_norm": 335.0770568847656, + "learning_rate": 9.053480595845272e-06, + "loss": 11.292, + "step": 139590 + }, + { + "epoch": 0.2820008322660666, + "grad_norm": 27.807125091552734, + "learning_rate": 9.0532762193732e-06, + "loss": 24.8045, + "step": 139600 + }, + { + "epoch": 0.2820210328987504, + "grad_norm": 260.2357177734375, + "learning_rate": 9.053071823145953e-06, + "loss": 34.6502, + "step": 139610 + }, + { + "epoch": 0.28204123353143423, + "grad_norm": 377.25225830078125, + "learning_rate": 9.052867407164525e-06, + "loss": 43.1483, + "step": 139620 + }, + { + "epoch": 0.282061434164118, + "grad_norm": 743.7612915039062, + "learning_rate": 9.052662971429912e-06, + "loss": 17.927, + "step": 139630 + }, + { + "epoch": 0.2820816347968018, + "grad_norm": 158.10980224609375, + "learning_rate": 9.052458515943112e-06, + "loss": 7.0445, + "step": 139640 + }, + { + "epoch": 0.28210183542948564, + "grad_norm": 349.80194091796875, + "learning_rate": 9.052254040705121e-06, + "loss": 24.8305, + "step": 139650 + }, + { + "epoch": 0.28212203606216946, + "grad_norm": 393.1520080566406, + "learning_rate": 9.052049545716934e-06, + "loss": 19.4659, + "step": 139660 + }, + { + "epoch": 0.2821422366948533, + "grad_norm": 244.4481201171875, + "learning_rate": 9.05184503097955e-06, + "loss": 40.7217, + "step": 139670 + }, + { + "epoch": 0.2821624373275371, + "grad_norm": 627.539794921875, + "learning_rate": 9.051640496493965e-06, + "loss": 34.6599, + "step": 139680 + }, + { + "epoch": 0.2821826379602209, + "grad_norm": 333.36944580078125, + "learning_rate": 9.051435942261175e-06, + "loss": 14.3166, + "step": 139690 + }, + { + "epoch": 0.28220283859290474, + "grad_norm": 468.92840576171875, + "learning_rate": 9.051231368282177e-06, + "loss": 23.9827, + "step": 139700 + }, + { + "epoch": 0.28222303922558856, + "grad_norm": 821.8056640625, + "learning_rate": 9.051026774557969e-06, + "loss": 23.295, + "step": 139710 + }, + { + "epoch": 0.2822432398582724, + "grad_norm": 744.9319458007812, + "learning_rate": 9.05082216108955e-06, + "loss": 20.0495, + "step": 139720 + }, + { + "epoch": 0.2822634404909562, + "grad_norm": 281.4646911621094, + "learning_rate": 9.050617527877911e-06, + "loss": 18.5791, + "step": 139730 + }, + { + "epoch": 0.28228364112364, + "grad_norm": 574.1758422851562, + "learning_rate": 9.050412874924057e-06, + "loss": 30.0051, + "step": 139740 + }, + { + "epoch": 0.2823038417563238, + "grad_norm": 341.85565185546875, + "learning_rate": 9.050208202228981e-06, + "loss": 37.8916, + "step": 139750 + }, + { + "epoch": 0.2823240423890076, + "grad_norm": 107.9791488647461, + "learning_rate": 9.05000350979368e-06, + "loss": 26.4139, + "step": 139760 + }, + { + "epoch": 0.2823442430216914, + "grad_norm": 432.2236328125, + "learning_rate": 9.049798797619156e-06, + "loss": 27.9242, + "step": 139770 + }, + { + "epoch": 0.28236444365437524, + "grad_norm": 397.84930419921875, + "learning_rate": 9.049594065706401e-06, + "loss": 30.808, + "step": 139780 + }, + { + "epoch": 0.28238464428705906, + "grad_norm": 633.5519409179688, + "learning_rate": 9.049389314056417e-06, + "loss": 28.3848, + "step": 139790 + }, + { + "epoch": 0.2824048449197429, + "grad_norm": 509.9869384765625, + "learning_rate": 9.0491845426702e-06, + "loss": 15.8071, + "step": 139800 + }, + { + "epoch": 0.2824250455524267, + "grad_norm": 164.17051696777344, + "learning_rate": 9.04897975154875e-06, + "loss": 31.5073, + "step": 139810 + }, + { + "epoch": 0.2824452461851105, + "grad_norm": 286.58984375, + "learning_rate": 9.048774940693062e-06, + "loss": 16.4875, + "step": 139820 + }, + { + "epoch": 0.28246544681779434, + "grad_norm": 671.1658935546875, + "learning_rate": 9.048570110104137e-06, + "loss": 17.9476, + "step": 139830 + }, + { + "epoch": 0.28248564745047816, + "grad_norm": 312.6763610839844, + "learning_rate": 9.048365259782973e-06, + "loss": 14.704, + "step": 139840 + }, + { + "epoch": 0.282505848083162, + "grad_norm": 166.3192596435547, + "learning_rate": 9.048160389730565e-06, + "loss": 19.6797, + "step": 139850 + }, + { + "epoch": 0.2825260487158458, + "grad_norm": 304.2613525390625, + "learning_rate": 9.047955499947916e-06, + "loss": 14.4554, + "step": 139860 + }, + { + "epoch": 0.2825462493485296, + "grad_norm": 648.1076049804688, + "learning_rate": 9.047750590436023e-06, + "loss": 19.2086, + "step": 139870 + }, + { + "epoch": 0.2825664499812134, + "grad_norm": 841.6949462890625, + "learning_rate": 9.047545661195885e-06, + "loss": 22.0263, + "step": 139880 + }, + { + "epoch": 0.2825866506138972, + "grad_norm": 1014.7114868164062, + "learning_rate": 9.0473407122285e-06, + "loss": 35.6883, + "step": 139890 + }, + { + "epoch": 0.282606851246581, + "grad_norm": 728.3817138671875, + "learning_rate": 9.047135743534866e-06, + "loss": 14.7379, + "step": 139900 + }, + { + "epoch": 0.28262705187926485, + "grad_norm": 318.85577392578125, + "learning_rate": 9.046930755115986e-06, + "loss": 19.7937, + "step": 139910 + }, + { + "epoch": 0.28264725251194867, + "grad_norm": 655.446533203125, + "learning_rate": 9.046725746972855e-06, + "loss": 19.0856, + "step": 139920 + }, + { + "epoch": 0.2826674531446325, + "grad_norm": 319.9189758300781, + "learning_rate": 9.046520719106473e-06, + "loss": 25.6961, + "step": 139930 + }, + { + "epoch": 0.2826876537773163, + "grad_norm": 443.02099609375, + "learning_rate": 9.04631567151784e-06, + "loss": 40.0771, + "step": 139940 + }, + { + "epoch": 0.2827078544100001, + "grad_norm": 827.03564453125, + "learning_rate": 9.046110604207955e-06, + "loss": 19.5551, + "step": 139950 + }, + { + "epoch": 0.28272805504268395, + "grad_norm": 507.2126159667969, + "learning_rate": 9.045905517177817e-06, + "loss": 25.4966, + "step": 139960 + }, + { + "epoch": 0.28274825567536777, + "grad_norm": 327.4715270996094, + "learning_rate": 9.045700410428428e-06, + "loss": 13.62, + "step": 139970 + }, + { + "epoch": 0.2827684563080516, + "grad_norm": 394.3174743652344, + "learning_rate": 9.045495283960784e-06, + "loss": 13.4917, + "step": 139980 + }, + { + "epoch": 0.2827886569407354, + "grad_norm": 676.4134521484375, + "learning_rate": 9.045290137775888e-06, + "loss": 36.4538, + "step": 139990 + }, + { + "epoch": 0.2828088575734192, + "grad_norm": 719.8438720703125, + "learning_rate": 9.045084971874738e-06, + "loss": 15.936, + "step": 140000 + }, + { + "epoch": 0.282829058206103, + "grad_norm": 334.0062255859375, + "learning_rate": 9.044879786258335e-06, + "loss": 23.4169, + "step": 140010 + }, + { + "epoch": 0.2828492588387868, + "grad_norm": 398.6730651855469, + "learning_rate": 9.044674580927678e-06, + "loss": 36.6501, + "step": 140020 + }, + { + "epoch": 0.28286945947147063, + "grad_norm": 305.77349853515625, + "learning_rate": 9.044469355883767e-06, + "loss": 13.6044, + "step": 140030 + }, + { + "epoch": 0.28288966010415445, + "grad_norm": 385.3360900878906, + "learning_rate": 9.044264111127603e-06, + "loss": 23.8349, + "step": 140040 + }, + { + "epoch": 0.28290986073683827, + "grad_norm": 458.33624267578125, + "learning_rate": 9.044058846660187e-06, + "loss": 18.4828, + "step": 140050 + }, + { + "epoch": 0.2829300613695221, + "grad_norm": 472.4216003417969, + "learning_rate": 9.043853562482518e-06, + "loss": 21.9815, + "step": 140060 + }, + { + "epoch": 0.2829502620022059, + "grad_norm": 281.8251037597656, + "learning_rate": 9.043648258595598e-06, + "loss": 23.9108, + "step": 140070 + }, + { + "epoch": 0.28297046263488973, + "grad_norm": 350.5022277832031, + "learning_rate": 9.043442935000428e-06, + "loss": 19.8607, + "step": 140080 + }, + { + "epoch": 0.28299066326757355, + "grad_norm": 1277.1827392578125, + "learning_rate": 9.043237591698006e-06, + "loss": 38.2556, + "step": 140090 + }, + { + "epoch": 0.28301086390025737, + "grad_norm": 534.0780639648438, + "learning_rate": 9.043032228689333e-06, + "loss": 24.3573, + "step": 140100 + }, + { + "epoch": 0.2830310645329412, + "grad_norm": 429.0807189941406, + "learning_rate": 9.042826845975413e-06, + "loss": 30.0201, + "step": 140110 + }, + { + "epoch": 0.283051265165625, + "grad_norm": 502.4304504394531, + "learning_rate": 9.042621443557244e-06, + "loss": 13.0569, + "step": 140120 + }, + { + "epoch": 0.28307146579830883, + "grad_norm": 355.1749572753906, + "learning_rate": 9.042416021435831e-06, + "loss": 32.5609, + "step": 140130 + }, + { + "epoch": 0.2830916664309926, + "grad_norm": 948.6965942382812, + "learning_rate": 9.042210579612171e-06, + "loss": 43.2929, + "step": 140140 + }, + { + "epoch": 0.2831118670636764, + "grad_norm": 305.7770690917969, + "learning_rate": 9.042005118087267e-06, + "loss": 14.1311, + "step": 140150 + }, + { + "epoch": 0.28313206769636023, + "grad_norm": 245.43408203125, + "learning_rate": 9.041799636862119e-06, + "loss": 25.9679, + "step": 140160 + }, + { + "epoch": 0.28315226832904405, + "grad_norm": 552.7593383789062, + "learning_rate": 9.041594135937731e-06, + "loss": 19.4616, + "step": 140170 + }, + { + "epoch": 0.2831724689617279, + "grad_norm": 943.1445922851562, + "learning_rate": 9.041388615315102e-06, + "loss": 23.4781, + "step": 140180 + }, + { + "epoch": 0.2831926695944117, + "grad_norm": 281.3905334472656, + "learning_rate": 9.041183074995238e-06, + "loss": 22.9952, + "step": 140190 + }, + { + "epoch": 0.2832128702270955, + "grad_norm": 256.47320556640625, + "learning_rate": 9.040977514979136e-06, + "loss": 16.3178, + "step": 140200 + }, + { + "epoch": 0.28323307085977933, + "grad_norm": 19.432308197021484, + "learning_rate": 9.0407719352678e-06, + "loss": 16.9027, + "step": 140210 + }, + { + "epoch": 0.28325327149246315, + "grad_norm": 283.8543395996094, + "learning_rate": 9.040566335862231e-06, + "loss": 25.2738, + "step": 140220 + }, + { + "epoch": 0.283273472125147, + "grad_norm": 888.1716918945312, + "learning_rate": 9.040360716763432e-06, + "loss": 20.5632, + "step": 140230 + }, + { + "epoch": 0.2832936727578308, + "grad_norm": 191.85345458984375, + "learning_rate": 9.040155077972406e-06, + "loss": 15.5759, + "step": 140240 + }, + { + "epoch": 0.2833138733905146, + "grad_norm": 1257.1053466796875, + "learning_rate": 9.039949419490152e-06, + "loss": 23.4082, + "step": 140250 + }, + { + "epoch": 0.2833340740231984, + "grad_norm": 715.1559448242188, + "learning_rate": 9.039743741317677e-06, + "loss": 16.0674, + "step": 140260 + }, + { + "epoch": 0.2833542746558822, + "grad_norm": 528.466064453125, + "learning_rate": 9.03953804345598e-06, + "loss": 22.3576, + "step": 140270 + }, + { + "epoch": 0.283374475288566, + "grad_norm": 364.2476501464844, + "learning_rate": 9.039332325906065e-06, + "loss": 19.1027, + "step": 140280 + }, + { + "epoch": 0.28339467592124984, + "grad_norm": 474.8454895019531, + "learning_rate": 9.039126588668934e-06, + "loss": 22.6441, + "step": 140290 + }, + { + "epoch": 0.28341487655393366, + "grad_norm": 594.119140625, + "learning_rate": 9.038920831745587e-06, + "loss": 16.2823, + "step": 140300 + }, + { + "epoch": 0.2834350771866175, + "grad_norm": 572.68798828125, + "learning_rate": 9.038715055137033e-06, + "loss": 18.5837, + "step": 140310 + }, + { + "epoch": 0.2834552778193013, + "grad_norm": 257.25201416015625, + "learning_rate": 9.038509258844271e-06, + "loss": 28.9023, + "step": 140320 + }, + { + "epoch": 0.2834754784519851, + "grad_norm": 457.9421081542969, + "learning_rate": 9.038303442868304e-06, + "loss": 27.8972, + "step": 140330 + }, + { + "epoch": 0.28349567908466894, + "grad_norm": 736.3229370117188, + "learning_rate": 9.038097607210136e-06, + "loss": 19.0042, + "step": 140340 + }, + { + "epoch": 0.28351587971735276, + "grad_norm": 359.7777404785156, + "learning_rate": 9.037891751870772e-06, + "loss": 21.475, + "step": 140350 + }, + { + "epoch": 0.2835360803500366, + "grad_norm": 800.7240600585938, + "learning_rate": 9.037685876851211e-06, + "loss": 26.0903, + "step": 140360 + }, + { + "epoch": 0.2835562809827204, + "grad_norm": 1088.6314697265625, + "learning_rate": 9.03747998215246e-06, + "loss": 21.6544, + "step": 140370 + }, + { + "epoch": 0.2835764816154042, + "grad_norm": 114.48328399658203, + "learning_rate": 9.03727406777552e-06, + "loss": 11.678, + "step": 140380 + }, + { + "epoch": 0.283596682248088, + "grad_norm": 609.7506713867188, + "learning_rate": 9.037068133721396e-06, + "loss": 19.2222, + "step": 140390 + }, + { + "epoch": 0.2836168828807718, + "grad_norm": 35.8396110534668, + "learning_rate": 9.036862179991092e-06, + "loss": 6.4215, + "step": 140400 + }, + { + "epoch": 0.2836370835134556, + "grad_norm": 424.3556823730469, + "learning_rate": 9.036656206585612e-06, + "loss": 13.0586, + "step": 140410 + }, + { + "epoch": 0.28365728414613944, + "grad_norm": 688.9608154296875, + "learning_rate": 9.036450213505958e-06, + "loss": 32.8414, + "step": 140420 + }, + { + "epoch": 0.28367748477882326, + "grad_norm": 552.4269409179688, + "learning_rate": 9.036244200753136e-06, + "loss": 17.1787, + "step": 140430 + }, + { + "epoch": 0.2836976854115071, + "grad_norm": 90.48094940185547, + "learning_rate": 9.036038168328149e-06, + "loss": 19.5705, + "step": 140440 + }, + { + "epoch": 0.2837178860441909, + "grad_norm": 329.4688415527344, + "learning_rate": 9.035832116232002e-06, + "loss": 35.406, + "step": 140450 + }, + { + "epoch": 0.2837380866768747, + "grad_norm": 0.4015864133834839, + "learning_rate": 9.035626044465699e-06, + "loss": 21.8288, + "step": 140460 + }, + { + "epoch": 0.28375828730955854, + "grad_norm": 221.49996948242188, + "learning_rate": 9.035419953030244e-06, + "loss": 16.4197, + "step": 140470 + }, + { + "epoch": 0.28377848794224236, + "grad_norm": 128.67318725585938, + "learning_rate": 9.03521384192664e-06, + "loss": 15.8318, + "step": 140480 + }, + { + "epoch": 0.2837986885749262, + "grad_norm": 575.8938598632812, + "learning_rate": 9.035007711155894e-06, + "loss": 19.0869, + "step": 140490 + }, + { + "epoch": 0.28381888920761, + "grad_norm": 581.9172973632812, + "learning_rate": 9.03480156071901e-06, + "loss": 25.2945, + "step": 140500 + }, + { + "epoch": 0.2838390898402938, + "grad_norm": 393.1622314453125, + "learning_rate": 9.034595390616993e-06, + "loss": 16.7882, + "step": 140510 + }, + { + "epoch": 0.2838592904729776, + "grad_norm": 96.71426391601562, + "learning_rate": 9.034389200850847e-06, + "loss": 33.8566, + "step": 140520 + }, + { + "epoch": 0.2838794911056614, + "grad_norm": 195.7721710205078, + "learning_rate": 9.034182991421578e-06, + "loss": 16.3035, + "step": 140530 + }, + { + "epoch": 0.28389969173834523, + "grad_norm": 679.16748046875, + "learning_rate": 9.033976762330189e-06, + "loss": 18.6678, + "step": 140540 + }, + { + "epoch": 0.28391989237102905, + "grad_norm": 224.84298706054688, + "learning_rate": 9.033770513577688e-06, + "loss": 17.4794, + "step": 140550 + }, + { + "epoch": 0.28394009300371287, + "grad_norm": 60.07780456542969, + "learning_rate": 9.033564245165077e-06, + "loss": 15.1955, + "step": 140560 + }, + { + "epoch": 0.2839602936363967, + "grad_norm": 541.4494018554688, + "learning_rate": 9.033357957093366e-06, + "loss": 25.8799, + "step": 140570 + }, + { + "epoch": 0.2839804942690805, + "grad_norm": 325.6931457519531, + "learning_rate": 9.033151649363555e-06, + "loss": 31.2157, + "step": 140580 + }, + { + "epoch": 0.28400069490176433, + "grad_norm": 218.31802368164062, + "learning_rate": 9.032945321976652e-06, + "loss": 15.9393, + "step": 140590 + }, + { + "epoch": 0.28402089553444815, + "grad_norm": 689.8706665039062, + "learning_rate": 9.032738974933663e-06, + "loss": 33.0374, + "step": 140600 + }, + { + "epoch": 0.28404109616713197, + "grad_norm": 117.66937255859375, + "learning_rate": 9.032532608235594e-06, + "loss": 34.8357, + "step": 140610 + }, + { + "epoch": 0.2840612967998158, + "grad_norm": 549.00048828125, + "learning_rate": 9.03232622188345e-06, + "loss": 20.1212, + "step": 140620 + }, + { + "epoch": 0.2840814974324996, + "grad_norm": 464.6247863769531, + "learning_rate": 9.032119815878237e-06, + "loss": 27.099, + "step": 140630 + }, + { + "epoch": 0.28410169806518343, + "grad_norm": 440.7333984375, + "learning_rate": 9.03191339022096e-06, + "loss": 21.3173, + "step": 140640 + }, + { + "epoch": 0.2841218986978672, + "grad_norm": 401.0006408691406, + "learning_rate": 9.031706944912627e-06, + "loss": 19.9862, + "step": 140650 + }, + { + "epoch": 0.284142099330551, + "grad_norm": 198.79434204101562, + "learning_rate": 9.031500479954243e-06, + "loss": 16.9328, + "step": 140660 + }, + { + "epoch": 0.28416229996323483, + "grad_norm": 517.9761962890625, + "learning_rate": 9.031293995346814e-06, + "loss": 28.2514, + "step": 140670 + }, + { + "epoch": 0.28418250059591865, + "grad_norm": 277.5168762207031, + "learning_rate": 9.03108749109135e-06, + "loss": 20.5854, + "step": 140680 + }, + { + "epoch": 0.2842027012286025, + "grad_norm": 648.0955810546875, + "learning_rate": 9.030880967188852e-06, + "loss": 15.7541, + "step": 140690 + }, + { + "epoch": 0.2842229018612863, + "grad_norm": 616.1392822265625, + "learning_rate": 9.03067442364033e-06, + "loss": 37.7645, + "step": 140700 + }, + { + "epoch": 0.2842431024939701, + "grad_norm": 267.5377502441406, + "learning_rate": 9.030467860446789e-06, + "loss": 15.694, + "step": 140710 + }, + { + "epoch": 0.28426330312665393, + "grad_norm": 653.78662109375, + "learning_rate": 9.030261277609235e-06, + "loss": 23.0132, + "step": 140720 + }, + { + "epoch": 0.28428350375933775, + "grad_norm": 454.54779052734375, + "learning_rate": 9.030054675128679e-06, + "loss": 49.4573, + "step": 140730 + }, + { + "epoch": 0.2843037043920216, + "grad_norm": 576.9923706054688, + "learning_rate": 9.029848053006125e-06, + "loss": 31.1583, + "step": 140740 + }, + { + "epoch": 0.2843239050247054, + "grad_norm": 234.82867431640625, + "learning_rate": 9.02964141124258e-06, + "loss": 20.9029, + "step": 140750 + }, + { + "epoch": 0.2843441056573892, + "grad_norm": 392.6349792480469, + "learning_rate": 9.02943474983905e-06, + "loss": 17.4374, + "step": 140760 + }, + { + "epoch": 0.28436430629007303, + "grad_norm": 635.7166748046875, + "learning_rate": 9.029228068796546e-06, + "loss": 41.5775, + "step": 140770 + }, + { + "epoch": 0.2843845069227568, + "grad_norm": 418.59637451171875, + "learning_rate": 9.029021368116072e-06, + "loss": 19.3102, + "step": 140780 + }, + { + "epoch": 0.2844047075554406, + "grad_norm": 481.26287841796875, + "learning_rate": 9.028814647798635e-06, + "loss": 18.2928, + "step": 140790 + }, + { + "epoch": 0.28442490818812444, + "grad_norm": 540.19091796875, + "learning_rate": 9.028607907845247e-06, + "loss": 26.6712, + "step": 140800 + }, + { + "epoch": 0.28444510882080826, + "grad_norm": 444.8948669433594, + "learning_rate": 9.028401148256911e-06, + "loss": 23.629, + "step": 140810 + }, + { + "epoch": 0.2844653094534921, + "grad_norm": 227.0774383544922, + "learning_rate": 9.028194369034638e-06, + "loss": 16.6439, + "step": 140820 + }, + { + "epoch": 0.2844855100861759, + "grad_norm": 362.9493713378906, + "learning_rate": 9.027987570179432e-06, + "loss": 12.54, + "step": 140830 + }, + { + "epoch": 0.2845057107188597, + "grad_norm": 231.91323852539062, + "learning_rate": 9.027780751692303e-06, + "loss": 12.8618, + "step": 140840 + }, + { + "epoch": 0.28452591135154354, + "grad_norm": 499.4613342285156, + "learning_rate": 9.02757391357426e-06, + "loss": 16.7643, + "step": 140850 + }, + { + "epoch": 0.28454611198422736, + "grad_norm": 215.88856506347656, + "learning_rate": 9.027367055826311e-06, + "loss": 23.9327, + "step": 140860 + }, + { + "epoch": 0.2845663126169112, + "grad_norm": 279.5829162597656, + "learning_rate": 9.027160178449464e-06, + "loss": 14.2983, + "step": 140870 + }, + { + "epoch": 0.284586513249595, + "grad_norm": 83.72539520263672, + "learning_rate": 9.026953281444725e-06, + "loss": 26.6484, + "step": 140880 + }, + { + "epoch": 0.2846067138822788, + "grad_norm": 370.7086486816406, + "learning_rate": 9.026746364813105e-06, + "loss": 14.8684, + "step": 140890 + }, + { + "epoch": 0.2846269145149626, + "grad_norm": 390.4755859375, + "learning_rate": 9.026539428555609e-06, + "loss": 21.4055, + "step": 140900 + }, + { + "epoch": 0.2846471151476464, + "grad_norm": 791.41162109375, + "learning_rate": 9.026332472673251e-06, + "loss": 27.1457, + "step": 140910 + }, + { + "epoch": 0.2846673157803302, + "grad_norm": 373.7838439941406, + "learning_rate": 9.026125497167037e-06, + "loss": 15.93, + "step": 140920 + }, + { + "epoch": 0.28468751641301404, + "grad_norm": 488.69012451171875, + "learning_rate": 9.025918502037975e-06, + "loss": 9.9848, + "step": 140930 + }, + { + "epoch": 0.28470771704569786, + "grad_norm": 950.9098510742188, + "learning_rate": 9.025711487287074e-06, + "loss": 44.7586, + "step": 140940 + }, + { + "epoch": 0.2847279176783817, + "grad_norm": 307.61773681640625, + "learning_rate": 9.025504452915345e-06, + "loss": 16.8016, + "step": 140950 + }, + { + "epoch": 0.2847481183110655, + "grad_norm": 256.02911376953125, + "learning_rate": 9.025297398923794e-06, + "loss": 16.7141, + "step": 140960 + }, + { + "epoch": 0.2847683189437493, + "grad_norm": 547.9686279296875, + "learning_rate": 9.025090325313432e-06, + "loss": 25.2908, + "step": 140970 + }, + { + "epoch": 0.28478851957643314, + "grad_norm": 124.6249771118164, + "learning_rate": 9.024883232085268e-06, + "loss": 34.0416, + "step": 140980 + }, + { + "epoch": 0.28480872020911696, + "grad_norm": 574.1445922851562, + "learning_rate": 9.024676119240312e-06, + "loss": 22.9915, + "step": 140990 + }, + { + "epoch": 0.2848289208418008, + "grad_norm": 161.6092529296875, + "learning_rate": 9.02446898677957e-06, + "loss": 22.989, + "step": 141000 + }, + { + "epoch": 0.2848491214744846, + "grad_norm": 197.62750244140625, + "learning_rate": 9.024261834704058e-06, + "loss": 21.8625, + "step": 141010 + }, + { + "epoch": 0.2848693221071684, + "grad_norm": 256.64434814453125, + "learning_rate": 9.02405466301478e-06, + "loss": 28.2441, + "step": 141020 + }, + { + "epoch": 0.2848895227398522, + "grad_norm": 761.4275512695312, + "learning_rate": 9.023847471712748e-06, + "loss": 29.7179, + "step": 141030 + }, + { + "epoch": 0.284909723372536, + "grad_norm": 660.5105590820312, + "learning_rate": 9.023640260798972e-06, + "loss": 29.1144, + "step": 141040 + }, + { + "epoch": 0.2849299240052198, + "grad_norm": 680.9800415039062, + "learning_rate": 9.02343303027446e-06, + "loss": 31.2829, + "step": 141050 + }, + { + "epoch": 0.28495012463790365, + "grad_norm": 192.0106201171875, + "learning_rate": 9.023225780140223e-06, + "loss": 21.9095, + "step": 141060 + }, + { + "epoch": 0.28497032527058747, + "grad_norm": 1290.7042236328125, + "learning_rate": 9.023018510397274e-06, + "loss": 41.4337, + "step": 141070 + }, + { + "epoch": 0.2849905259032713, + "grad_norm": 739.5303955078125, + "learning_rate": 9.022811221046618e-06, + "loss": 15.2573, + "step": 141080 + }, + { + "epoch": 0.2850107265359551, + "grad_norm": 291.6741638183594, + "learning_rate": 9.02260391208927e-06, + "loss": 39.7825, + "step": 141090 + }, + { + "epoch": 0.2850309271686389, + "grad_norm": 676.3077392578125, + "learning_rate": 9.022396583526238e-06, + "loss": 28.4242, + "step": 141100 + }, + { + "epoch": 0.28505112780132275, + "grad_norm": 11.59755802154541, + "learning_rate": 9.022189235358533e-06, + "loss": 16.0288, + "step": 141110 + }, + { + "epoch": 0.28507132843400657, + "grad_norm": 877.0466918945312, + "learning_rate": 9.021981867587165e-06, + "loss": 23.9562, + "step": 141120 + }, + { + "epoch": 0.2850915290666904, + "grad_norm": 525.8517456054688, + "learning_rate": 9.021774480213145e-06, + "loss": 16.4442, + "step": 141130 + }, + { + "epoch": 0.2851117296993742, + "grad_norm": 696.8245239257812, + "learning_rate": 9.021567073237486e-06, + "loss": 12.7827, + "step": 141140 + }, + { + "epoch": 0.285131930332058, + "grad_norm": 606.3016967773438, + "learning_rate": 9.021359646661194e-06, + "loss": 12.2285, + "step": 141150 + }, + { + "epoch": 0.2851521309647418, + "grad_norm": 445.4494323730469, + "learning_rate": 9.021152200485283e-06, + "loss": 17.6888, + "step": 141160 + }, + { + "epoch": 0.2851723315974256, + "grad_norm": 507.1750183105469, + "learning_rate": 9.020944734710767e-06, + "loss": 21.5278, + "step": 141170 + }, + { + "epoch": 0.28519253223010943, + "grad_norm": 14.19613265991211, + "learning_rate": 9.02073724933865e-06, + "loss": 15.0407, + "step": 141180 + }, + { + "epoch": 0.28521273286279325, + "grad_norm": 309.0068664550781, + "learning_rate": 9.02052974436995e-06, + "loss": 20.7985, + "step": 141190 + }, + { + "epoch": 0.28523293349547707, + "grad_norm": 594.0772094726562, + "learning_rate": 9.020322219805674e-06, + "loss": 24.369, + "step": 141200 + }, + { + "epoch": 0.2852531341281609, + "grad_norm": 814.9288940429688, + "learning_rate": 9.020114675646835e-06, + "loss": 30.1738, + "step": 141210 + }, + { + "epoch": 0.2852733347608447, + "grad_norm": 711.9208984375, + "learning_rate": 9.019907111894447e-06, + "loss": 29.0659, + "step": 141220 + }, + { + "epoch": 0.28529353539352853, + "grad_norm": 406.48309326171875, + "learning_rate": 9.019699528549518e-06, + "loss": 32.3264, + "step": 141230 + }, + { + "epoch": 0.28531373602621235, + "grad_norm": 513.768310546875, + "learning_rate": 9.01949192561306e-06, + "loss": 24.5212, + "step": 141240 + }, + { + "epoch": 0.28533393665889617, + "grad_norm": 657.7827758789062, + "learning_rate": 9.019284303086086e-06, + "loss": 13.5329, + "step": 141250 + }, + { + "epoch": 0.28535413729158, + "grad_norm": 1400.362548828125, + "learning_rate": 9.01907666096961e-06, + "loss": 36.6206, + "step": 141260 + }, + { + "epoch": 0.2853743379242638, + "grad_norm": 339.25982666015625, + "learning_rate": 9.018868999264641e-06, + "loss": 25.5105, + "step": 141270 + }, + { + "epoch": 0.28539453855694763, + "grad_norm": 371.290771484375, + "learning_rate": 9.018661317972191e-06, + "loss": 13.5509, + "step": 141280 + }, + { + "epoch": 0.2854147391896314, + "grad_norm": 496.48199462890625, + "learning_rate": 9.018453617093273e-06, + "loss": 50.5274, + "step": 141290 + }, + { + "epoch": 0.2854349398223152, + "grad_norm": 525.364501953125, + "learning_rate": 9.0182458966289e-06, + "loss": 17.7513, + "step": 141300 + }, + { + "epoch": 0.28545514045499903, + "grad_norm": 375.22796630859375, + "learning_rate": 9.018038156580084e-06, + "loss": 21.2563, + "step": 141310 + }, + { + "epoch": 0.28547534108768285, + "grad_norm": 336.7750244140625, + "learning_rate": 9.017830396947838e-06, + "loss": 16.0464, + "step": 141320 + }, + { + "epoch": 0.2854955417203667, + "grad_norm": 600.1476440429688, + "learning_rate": 9.017622617733173e-06, + "loss": 13.1931, + "step": 141330 + }, + { + "epoch": 0.2855157423530505, + "grad_norm": 163.05801391601562, + "learning_rate": 9.017414818937101e-06, + "loss": 45.0232, + "step": 141340 + }, + { + "epoch": 0.2855359429857343, + "grad_norm": 403.7227783203125, + "learning_rate": 9.017207000560639e-06, + "loss": 15.3571, + "step": 141350 + }, + { + "epoch": 0.28555614361841813, + "grad_norm": 1307.354736328125, + "learning_rate": 9.016999162604795e-06, + "loss": 23.5802, + "step": 141360 + }, + { + "epoch": 0.28557634425110195, + "grad_norm": 614.9104614257812, + "learning_rate": 9.016791305070587e-06, + "loss": 23.8888, + "step": 141370 + }, + { + "epoch": 0.2855965448837858, + "grad_norm": 285.7597351074219, + "learning_rate": 9.016583427959025e-06, + "loss": 15.5126, + "step": 141380 + }, + { + "epoch": 0.2856167455164696, + "grad_norm": 300.525146484375, + "learning_rate": 9.01637553127112e-06, + "loss": 17.0144, + "step": 141390 + }, + { + "epoch": 0.2856369461491534, + "grad_norm": 405.881591796875, + "learning_rate": 9.01616761500789e-06, + "loss": 19.6971, + "step": 141400 + }, + { + "epoch": 0.28565714678183723, + "grad_norm": 221.88356018066406, + "learning_rate": 9.015959679170346e-06, + "loss": 18.885, + "step": 141410 + }, + { + "epoch": 0.285677347414521, + "grad_norm": 1110.6405029296875, + "learning_rate": 9.015751723759501e-06, + "loss": 27.1684, + "step": 141420 + }, + { + "epoch": 0.2856975480472048, + "grad_norm": 419.91925048828125, + "learning_rate": 9.01554374877637e-06, + "loss": 19.0117, + "step": 141430 + }, + { + "epoch": 0.28571774867988864, + "grad_norm": 137.30470275878906, + "learning_rate": 9.015335754221964e-06, + "loss": 17.3682, + "step": 141440 + }, + { + "epoch": 0.28573794931257246, + "grad_norm": 208.8695831298828, + "learning_rate": 9.015127740097301e-06, + "loss": 16.6924, + "step": 141450 + }, + { + "epoch": 0.2857581499452563, + "grad_norm": 245.57373046875, + "learning_rate": 9.01491970640339e-06, + "loss": 19.0873, + "step": 141460 + }, + { + "epoch": 0.2857783505779401, + "grad_norm": 533.674560546875, + "learning_rate": 9.014711653141248e-06, + "loss": 24.1613, + "step": 141470 + }, + { + "epoch": 0.2857985512106239, + "grad_norm": 384.47943115234375, + "learning_rate": 9.014503580311889e-06, + "loss": 16.388, + "step": 141480 + }, + { + "epoch": 0.28581875184330774, + "grad_norm": 268.9631042480469, + "learning_rate": 9.014295487916325e-06, + "loss": 25.5681, + "step": 141490 + }, + { + "epoch": 0.28583895247599156, + "grad_norm": 0.9439303278923035, + "learning_rate": 9.014087375955574e-06, + "loss": 24.6955, + "step": 141500 + }, + { + "epoch": 0.2858591531086754, + "grad_norm": 642.2147827148438, + "learning_rate": 9.013879244430645e-06, + "loss": 34.832, + "step": 141510 + }, + { + "epoch": 0.2858793537413592, + "grad_norm": 511.16973876953125, + "learning_rate": 9.013671093342557e-06, + "loss": 19.2557, + "step": 141520 + }, + { + "epoch": 0.285899554374043, + "grad_norm": 667.150634765625, + "learning_rate": 9.013462922692324e-06, + "loss": 30.6257, + "step": 141530 + }, + { + "epoch": 0.2859197550067268, + "grad_norm": 486.5180358886719, + "learning_rate": 9.013254732480958e-06, + "loss": 21.1171, + "step": 141540 + }, + { + "epoch": 0.2859399556394106, + "grad_norm": 584.9976196289062, + "learning_rate": 9.013046522709477e-06, + "loss": 30.7921, + "step": 141550 + }, + { + "epoch": 0.2859601562720944, + "grad_norm": 173.3647003173828, + "learning_rate": 9.01283829337889e-06, + "loss": 53.3171, + "step": 141560 + }, + { + "epoch": 0.28598035690477824, + "grad_norm": 139.5582275390625, + "learning_rate": 9.01263004449022e-06, + "loss": 26.5207, + "step": 141570 + }, + { + "epoch": 0.28600055753746206, + "grad_norm": 278.63323974609375, + "learning_rate": 9.012421776044477e-06, + "loss": 36.4622, + "step": 141580 + }, + { + "epoch": 0.2860207581701459, + "grad_norm": 0.0, + "learning_rate": 9.012213488042677e-06, + "loss": 20.3394, + "step": 141590 + }, + { + "epoch": 0.2860409588028297, + "grad_norm": 104.30828094482422, + "learning_rate": 9.012005180485834e-06, + "loss": 18.8843, + "step": 141600 + }, + { + "epoch": 0.2860611594355135, + "grad_norm": 943.4236450195312, + "learning_rate": 9.011796853374964e-06, + "loss": 32.3506, + "step": 141610 + }, + { + "epoch": 0.28608136006819734, + "grad_norm": 237.1873779296875, + "learning_rate": 9.011588506711085e-06, + "loss": 30.9954, + "step": 141620 + }, + { + "epoch": 0.28610156070088116, + "grad_norm": 619.3309936523438, + "learning_rate": 9.011380140495207e-06, + "loss": 28.4549, + "step": 141630 + }, + { + "epoch": 0.286121761333565, + "grad_norm": 1272.03759765625, + "learning_rate": 9.01117175472835e-06, + "loss": 26.1937, + "step": 141640 + }, + { + "epoch": 0.2861419619662488, + "grad_norm": 331.3840637207031, + "learning_rate": 9.010963349411529e-06, + "loss": 28.1638, + "step": 141650 + }, + { + "epoch": 0.2861621625989326, + "grad_norm": 72.40825653076172, + "learning_rate": 9.01075492454576e-06, + "loss": 15.7668, + "step": 141660 + }, + { + "epoch": 0.2861823632316164, + "grad_norm": 522.8346557617188, + "learning_rate": 9.010546480132055e-06, + "loss": 21.3102, + "step": 141670 + }, + { + "epoch": 0.2862025638643002, + "grad_norm": 296.7470397949219, + "learning_rate": 9.010338016171434e-06, + "loss": 11.4901, + "step": 141680 + }, + { + "epoch": 0.28622276449698403, + "grad_norm": 593.6846923828125, + "learning_rate": 9.010129532664914e-06, + "loss": 28.7207, + "step": 141690 + }, + { + "epoch": 0.28624296512966785, + "grad_norm": 339.2178649902344, + "learning_rate": 9.009921029613506e-06, + "loss": 15.8317, + "step": 141700 + }, + { + "epoch": 0.28626316576235167, + "grad_norm": 195.95559692382812, + "learning_rate": 9.00971250701823e-06, + "loss": 36.6844, + "step": 141710 + }, + { + "epoch": 0.2862833663950355, + "grad_norm": 855.398681640625, + "learning_rate": 9.009503964880105e-06, + "loss": 18.0744, + "step": 141720 + }, + { + "epoch": 0.2863035670277193, + "grad_norm": 88.88728332519531, + "learning_rate": 9.00929540320014e-06, + "loss": 18.5225, + "step": 141730 + }, + { + "epoch": 0.28632376766040313, + "grad_norm": 361.8889465332031, + "learning_rate": 9.009086821979358e-06, + "loss": 28.5676, + "step": 141740 + }, + { + "epoch": 0.28634396829308695, + "grad_norm": 212.55238342285156, + "learning_rate": 9.00887822121877e-06, + "loss": 11.9381, + "step": 141750 + }, + { + "epoch": 0.28636416892577077, + "grad_norm": 184.14773559570312, + "learning_rate": 9.008669600919399e-06, + "loss": 14.0141, + "step": 141760 + }, + { + "epoch": 0.2863843695584546, + "grad_norm": 583.1585693359375, + "learning_rate": 9.008460961082257e-06, + "loss": 26.22, + "step": 141770 + }, + { + "epoch": 0.2864045701911384, + "grad_norm": 423.38079833984375, + "learning_rate": 9.008252301708362e-06, + "loss": 13.1701, + "step": 141780 + }, + { + "epoch": 0.28642477082382223, + "grad_norm": 623.1273193359375, + "learning_rate": 9.008043622798732e-06, + "loss": 32.882, + "step": 141790 + }, + { + "epoch": 0.286444971456506, + "grad_norm": 553.2904663085938, + "learning_rate": 9.007834924354384e-06, + "loss": 28.11, + "step": 141800 + }, + { + "epoch": 0.2864651720891898, + "grad_norm": 922.823974609375, + "learning_rate": 9.007626206376335e-06, + "loss": 22.7093, + "step": 141810 + }, + { + "epoch": 0.28648537272187363, + "grad_norm": 689.900390625, + "learning_rate": 9.0074174688656e-06, + "loss": 18.8748, + "step": 141820 + }, + { + "epoch": 0.28650557335455745, + "grad_norm": 110.99595642089844, + "learning_rate": 9.007208711823198e-06, + "loss": 23.6381, + "step": 141830 + }, + { + "epoch": 0.2865257739872413, + "grad_norm": 679.4481201171875, + "learning_rate": 9.006999935250149e-06, + "loss": 15.933, + "step": 141840 + }, + { + "epoch": 0.2865459746199251, + "grad_norm": 518.9588012695312, + "learning_rate": 9.006791139147468e-06, + "loss": 24.9338, + "step": 141850 + }, + { + "epoch": 0.2865661752526089, + "grad_norm": 557.93701171875, + "learning_rate": 9.006582323516172e-06, + "loss": 25.8751, + "step": 141860 + }, + { + "epoch": 0.28658637588529273, + "grad_norm": 420.7420349121094, + "learning_rate": 9.006373488357281e-06, + "loss": 32.2564, + "step": 141870 + }, + { + "epoch": 0.28660657651797655, + "grad_norm": 334.7980041503906, + "learning_rate": 9.00616463367181e-06, + "loss": 21.5728, + "step": 141880 + }, + { + "epoch": 0.2866267771506604, + "grad_norm": 0.0, + "learning_rate": 9.005955759460779e-06, + "loss": 22.1354, + "step": 141890 + }, + { + "epoch": 0.2866469777833442, + "grad_norm": 324.70465087890625, + "learning_rate": 9.005746865725206e-06, + "loss": 22.3069, + "step": 141900 + }, + { + "epoch": 0.286667178416028, + "grad_norm": 480.416748046875, + "learning_rate": 9.005537952466108e-06, + "loss": 36.1294, + "step": 141910 + }, + { + "epoch": 0.28668737904871183, + "grad_norm": 208.1507568359375, + "learning_rate": 9.005329019684503e-06, + "loss": 36.0992, + "step": 141920 + }, + { + "epoch": 0.2867075796813956, + "grad_norm": 257.6912841796875, + "learning_rate": 9.005120067381413e-06, + "loss": 25.5808, + "step": 141930 + }, + { + "epoch": 0.2867277803140794, + "grad_norm": 467.33734130859375, + "learning_rate": 9.004911095557852e-06, + "loss": 27.5341, + "step": 141940 + }, + { + "epoch": 0.28674798094676324, + "grad_norm": 251.2129364013672, + "learning_rate": 9.00470210421484e-06, + "loss": 38.4227, + "step": 141950 + }, + { + "epoch": 0.28676818157944706, + "grad_norm": 0.7161155343055725, + "learning_rate": 9.004493093353394e-06, + "loss": 33.6804, + "step": 141960 + }, + { + "epoch": 0.2867883822121309, + "grad_norm": 168.31422424316406, + "learning_rate": 9.004284062974537e-06, + "loss": 19.0771, + "step": 141970 + }, + { + "epoch": 0.2868085828448147, + "grad_norm": 114.95464324951172, + "learning_rate": 9.004075013079284e-06, + "loss": 22.4422, + "step": 141980 + }, + { + "epoch": 0.2868287834774985, + "grad_norm": 479.05218505859375, + "learning_rate": 9.003865943668656e-06, + "loss": 16.7318, + "step": 141990 + }, + { + "epoch": 0.28684898411018234, + "grad_norm": 178.01829528808594, + "learning_rate": 9.003656854743667e-06, + "loss": 22.3476, + "step": 142000 + }, + { + "epoch": 0.28686918474286616, + "grad_norm": 576.8214111328125, + "learning_rate": 9.003447746305345e-06, + "loss": 30.2682, + "step": 142010 + }, + { + "epoch": 0.28688938537555, + "grad_norm": 400.33575439453125, + "learning_rate": 9.003238618354702e-06, + "loss": 15.8093, + "step": 142020 + }, + { + "epoch": 0.2869095860082338, + "grad_norm": 393.9613952636719, + "learning_rate": 9.003029470892759e-06, + "loss": 25.6593, + "step": 142030 + }, + { + "epoch": 0.2869297866409176, + "grad_norm": 152.47128295898438, + "learning_rate": 9.002820303920537e-06, + "loss": 19.8718, + "step": 142040 + }, + { + "epoch": 0.28694998727360144, + "grad_norm": 577.7484741210938, + "learning_rate": 9.002611117439054e-06, + "loss": 17.9984, + "step": 142050 + }, + { + "epoch": 0.2869701879062852, + "grad_norm": 184.30430603027344, + "learning_rate": 9.00240191144933e-06, + "loss": 20.9967, + "step": 142060 + }, + { + "epoch": 0.286990388538969, + "grad_norm": 239.4911651611328, + "learning_rate": 9.002192685952385e-06, + "loss": 20.1448, + "step": 142070 + }, + { + "epoch": 0.28701058917165284, + "grad_norm": 244.6146697998047, + "learning_rate": 9.001983440949236e-06, + "loss": 15.4028, + "step": 142080 + }, + { + "epoch": 0.28703078980433666, + "grad_norm": 150.16650390625, + "learning_rate": 9.001774176440908e-06, + "loss": 16.8485, + "step": 142090 + }, + { + "epoch": 0.2870509904370205, + "grad_norm": 309.9389953613281, + "learning_rate": 9.001564892428416e-06, + "loss": 31.5917, + "step": 142100 + }, + { + "epoch": 0.2870711910697043, + "grad_norm": 561.6334228515625, + "learning_rate": 9.001355588912784e-06, + "loss": 21.6469, + "step": 142110 + }, + { + "epoch": 0.2870913917023881, + "grad_norm": 728.5511474609375, + "learning_rate": 9.001146265895028e-06, + "loss": 18.9681, + "step": 142120 + }, + { + "epoch": 0.28711159233507194, + "grad_norm": 467.7611083984375, + "learning_rate": 9.000936923376171e-06, + "loss": 11.793, + "step": 142130 + }, + { + "epoch": 0.28713179296775576, + "grad_norm": 562.4061889648438, + "learning_rate": 9.000727561357234e-06, + "loss": 17.7584, + "step": 142140 + }, + { + "epoch": 0.2871519936004396, + "grad_norm": 128.87075805664062, + "learning_rate": 9.000518179839236e-06, + "loss": 13.9033, + "step": 142150 + }, + { + "epoch": 0.2871721942331234, + "grad_norm": 0.0, + "learning_rate": 9.000308778823196e-06, + "loss": 14.2383, + "step": 142160 + }, + { + "epoch": 0.2871923948658072, + "grad_norm": 4.180148601531982, + "learning_rate": 9.000099358310137e-06, + "loss": 21.0602, + "step": 142170 + }, + { + "epoch": 0.287212595498491, + "grad_norm": 281.80340576171875, + "learning_rate": 8.99988991830108e-06, + "loss": 18.984, + "step": 142180 + }, + { + "epoch": 0.2872327961311748, + "grad_norm": 313.60968017578125, + "learning_rate": 8.999680458797042e-06, + "loss": 21.767, + "step": 142190 + }, + { + "epoch": 0.2872529967638586, + "grad_norm": 511.8515319824219, + "learning_rate": 8.999470979799048e-06, + "loss": 25.0589, + "step": 142200 + }, + { + "epoch": 0.28727319739654245, + "grad_norm": 44.609806060791016, + "learning_rate": 8.999261481308117e-06, + "loss": 25.8067, + "step": 142210 + }, + { + "epoch": 0.28729339802922627, + "grad_norm": 176.3469696044922, + "learning_rate": 8.999051963325271e-06, + "loss": 13.9836, + "step": 142220 + }, + { + "epoch": 0.2873135986619101, + "grad_norm": 246.97377014160156, + "learning_rate": 8.998842425851531e-06, + "loss": 10.2865, + "step": 142230 + }, + { + "epoch": 0.2873337992945939, + "grad_norm": 147.90199279785156, + "learning_rate": 8.998632868887918e-06, + "loss": 11.0603, + "step": 142240 + }, + { + "epoch": 0.2873539999272777, + "grad_norm": 778.6231689453125, + "learning_rate": 8.998423292435455e-06, + "loss": 36.8754, + "step": 142250 + }, + { + "epoch": 0.28737420055996155, + "grad_norm": 309.0158386230469, + "learning_rate": 8.998213696495159e-06, + "loss": 30.2746, + "step": 142260 + }, + { + "epoch": 0.28739440119264537, + "grad_norm": 857.2860717773438, + "learning_rate": 8.998004081068055e-06, + "loss": 34.1955, + "step": 142270 + }, + { + "epoch": 0.2874146018253292, + "grad_norm": 651.5060424804688, + "learning_rate": 8.997794446155165e-06, + "loss": 17.9878, + "step": 142280 + }, + { + "epoch": 0.287434802458013, + "grad_norm": 777.4243774414062, + "learning_rate": 8.997584791757508e-06, + "loss": 32.8457, + "step": 142290 + }, + { + "epoch": 0.2874550030906968, + "grad_norm": 1243.1383056640625, + "learning_rate": 8.99737511787611e-06, + "loss": 41.3604, + "step": 142300 + }, + { + "epoch": 0.2874752037233806, + "grad_norm": 152.16807556152344, + "learning_rate": 8.997165424511988e-06, + "loss": 33.1704, + "step": 142310 + }, + { + "epoch": 0.2874954043560644, + "grad_norm": 183.40115356445312, + "learning_rate": 8.996955711666168e-06, + "loss": 21.6837, + "step": 142320 + }, + { + "epoch": 0.28751560498874823, + "grad_norm": 527.2123413085938, + "learning_rate": 8.996745979339671e-06, + "loss": 13.4808, + "step": 142330 + }, + { + "epoch": 0.28753580562143205, + "grad_norm": 312.4104919433594, + "learning_rate": 8.996536227533519e-06, + "loss": 17.1448, + "step": 142340 + }, + { + "epoch": 0.28755600625411587, + "grad_norm": 532.3212280273438, + "learning_rate": 8.996326456248732e-06, + "loss": 22.7429, + "step": 142350 + }, + { + "epoch": 0.2875762068867997, + "grad_norm": 267.4195251464844, + "learning_rate": 8.996116665486337e-06, + "loss": 38.7647, + "step": 142360 + }, + { + "epoch": 0.2875964075194835, + "grad_norm": 457.72979736328125, + "learning_rate": 8.995906855247354e-06, + "loss": 25.3404, + "step": 142370 + }, + { + "epoch": 0.28761660815216733, + "grad_norm": 1294.712158203125, + "learning_rate": 8.995697025532803e-06, + "loss": 52.108, + "step": 142380 + }, + { + "epoch": 0.28763680878485115, + "grad_norm": 603.0121459960938, + "learning_rate": 8.995487176343711e-06, + "loss": 27.2448, + "step": 142390 + }, + { + "epoch": 0.28765700941753497, + "grad_norm": 536.1098022460938, + "learning_rate": 8.9952773076811e-06, + "loss": 16.5084, + "step": 142400 + }, + { + "epoch": 0.2876772100502188, + "grad_norm": 55.689697265625, + "learning_rate": 8.99506741954599e-06, + "loss": 21.7152, + "step": 142410 + }, + { + "epoch": 0.2876974106829026, + "grad_norm": 58.7491455078125, + "learning_rate": 8.994857511939408e-06, + "loss": 23.1094, + "step": 142420 + }, + { + "epoch": 0.28771761131558643, + "grad_norm": 644.9966430664062, + "learning_rate": 8.994647584862374e-06, + "loss": 12.4131, + "step": 142430 + }, + { + "epoch": 0.2877378119482702, + "grad_norm": 346.7236633300781, + "learning_rate": 8.994437638315912e-06, + "loss": 28.0715, + "step": 142440 + }, + { + "epoch": 0.287758012580954, + "grad_norm": 579.5534057617188, + "learning_rate": 8.994227672301046e-06, + "loss": 11.6123, + "step": 142450 + }, + { + "epoch": 0.28777821321363783, + "grad_norm": 572.34326171875, + "learning_rate": 8.994017686818799e-06, + "loss": 21.997, + "step": 142460 + }, + { + "epoch": 0.28779841384632165, + "grad_norm": 464.645751953125, + "learning_rate": 8.993807681870192e-06, + "loss": 23.9822, + "step": 142470 + }, + { + "epoch": 0.2878186144790055, + "grad_norm": 271.6661376953125, + "learning_rate": 8.993597657456252e-06, + "loss": 20.6079, + "step": 142480 + }, + { + "epoch": 0.2878388151116893, + "grad_norm": 319.6411437988281, + "learning_rate": 8.993387613578003e-06, + "loss": 36.7985, + "step": 142490 + }, + { + "epoch": 0.2878590157443731, + "grad_norm": 785.7559814453125, + "learning_rate": 8.993177550236464e-06, + "loss": 21.1795, + "step": 142500 + }, + { + "epoch": 0.28787921637705693, + "grad_norm": 372.37860107421875, + "learning_rate": 8.992967467432665e-06, + "loss": 27.2943, + "step": 142510 + }, + { + "epoch": 0.28789941700974075, + "grad_norm": 1.3575628995895386, + "learning_rate": 8.992757365167625e-06, + "loss": 20.1759, + "step": 142520 + }, + { + "epoch": 0.2879196176424246, + "grad_norm": 253.48573303222656, + "learning_rate": 8.99254724344237e-06, + "loss": 25.6765, + "step": 142530 + }, + { + "epoch": 0.2879398182751084, + "grad_norm": 319.9288330078125, + "learning_rate": 8.992337102257925e-06, + "loss": 26.6335, + "step": 142540 + }, + { + "epoch": 0.2879600189077922, + "grad_norm": 591.0591430664062, + "learning_rate": 8.992126941615314e-06, + "loss": 45.3629, + "step": 142550 + }, + { + "epoch": 0.28798021954047603, + "grad_norm": 356.8609924316406, + "learning_rate": 8.991916761515557e-06, + "loss": 29.198, + "step": 142560 + }, + { + "epoch": 0.2880004201731598, + "grad_norm": 448.464111328125, + "learning_rate": 8.991706561959684e-06, + "loss": 16.613, + "step": 142570 + }, + { + "epoch": 0.2880206208058436, + "grad_norm": 941.5521240234375, + "learning_rate": 8.991496342948718e-06, + "loss": 20.0772, + "step": 142580 + }, + { + "epoch": 0.28804082143852744, + "grad_norm": 897.8544311523438, + "learning_rate": 8.991286104483682e-06, + "loss": 26.1854, + "step": 142590 + }, + { + "epoch": 0.28806102207121126, + "grad_norm": 336.386474609375, + "learning_rate": 8.991075846565603e-06, + "loss": 14.872, + "step": 142600 + }, + { + "epoch": 0.2880812227038951, + "grad_norm": 149.27305603027344, + "learning_rate": 8.990865569195502e-06, + "loss": 26.0966, + "step": 142610 + }, + { + "epoch": 0.2881014233365789, + "grad_norm": 629.7431640625, + "learning_rate": 8.990655272374409e-06, + "loss": 18.5056, + "step": 142620 + }, + { + "epoch": 0.2881216239692627, + "grad_norm": 362.06134033203125, + "learning_rate": 8.990444956103343e-06, + "loss": 16.4395, + "step": 142630 + }, + { + "epoch": 0.28814182460194654, + "grad_norm": 153.1188201904297, + "learning_rate": 8.990234620383335e-06, + "loss": 22.6376, + "step": 142640 + }, + { + "epoch": 0.28816202523463036, + "grad_norm": 414.06201171875, + "learning_rate": 8.990024265215405e-06, + "loss": 20.5046, + "step": 142650 + }, + { + "epoch": 0.2881822258673142, + "grad_norm": 434.1939697265625, + "learning_rate": 8.989813890600582e-06, + "loss": 26.4005, + "step": 142660 + }, + { + "epoch": 0.288202426499998, + "grad_norm": 587.8204956054688, + "learning_rate": 8.989603496539891e-06, + "loss": 22.4086, + "step": 142670 + }, + { + "epoch": 0.2882226271326818, + "grad_norm": 277.9704895019531, + "learning_rate": 8.989393083034355e-06, + "loss": 13.3265, + "step": 142680 + }, + { + "epoch": 0.28824282776536564, + "grad_norm": 635.5704345703125, + "learning_rate": 8.989182650085003e-06, + "loss": 23.5692, + "step": 142690 + }, + { + "epoch": 0.2882630283980494, + "grad_norm": 290.620361328125, + "learning_rate": 8.988972197692857e-06, + "loss": 22.3352, + "step": 142700 + }, + { + "epoch": 0.2882832290307332, + "grad_norm": 462.3267517089844, + "learning_rate": 8.988761725858942e-06, + "loss": 25.7847, + "step": 142710 + }, + { + "epoch": 0.28830342966341704, + "grad_norm": 304.3821716308594, + "learning_rate": 8.988551234584289e-06, + "loss": 22.0378, + "step": 142720 + }, + { + "epoch": 0.28832363029610086, + "grad_norm": 393.9117431640625, + "learning_rate": 8.988340723869921e-06, + "loss": 26.1371, + "step": 142730 + }, + { + "epoch": 0.2883438309287847, + "grad_norm": 212.9825439453125, + "learning_rate": 8.988130193716864e-06, + "loss": 22.2655, + "step": 142740 + }, + { + "epoch": 0.2883640315614685, + "grad_norm": 495.86407470703125, + "learning_rate": 8.987919644126145e-06, + "loss": 19.0947, + "step": 142750 + }, + { + "epoch": 0.2883842321941523, + "grad_norm": 599.6559448242188, + "learning_rate": 8.987709075098786e-06, + "loss": 16.4004, + "step": 142760 + }, + { + "epoch": 0.28840443282683614, + "grad_norm": 1017.9681396484375, + "learning_rate": 8.98749848663582e-06, + "loss": 23.2595, + "step": 142770 + }, + { + "epoch": 0.28842463345951996, + "grad_norm": 381.4158935546875, + "learning_rate": 8.987287878738269e-06, + "loss": 20.6209, + "step": 142780 + }, + { + "epoch": 0.2884448340922038, + "grad_norm": 277.0904846191406, + "learning_rate": 8.987077251407159e-06, + "loss": 29.096, + "step": 142790 + }, + { + "epoch": 0.2884650347248876, + "grad_norm": 234.6095733642578, + "learning_rate": 8.986866604643518e-06, + "loss": 14.6361, + "step": 142800 + }, + { + "epoch": 0.2884852353575714, + "grad_norm": 325.8774108886719, + "learning_rate": 8.986655938448373e-06, + "loss": 34.7078, + "step": 142810 + }, + { + "epoch": 0.2885054359902552, + "grad_norm": 349.6905822753906, + "learning_rate": 8.986445252822752e-06, + "loss": 23.9932, + "step": 142820 + }, + { + "epoch": 0.288525636622939, + "grad_norm": 509.04443359375, + "learning_rate": 8.986234547767681e-06, + "loss": 26.4596, + "step": 142830 + }, + { + "epoch": 0.28854583725562283, + "grad_norm": 397.302734375, + "learning_rate": 8.986023823284184e-06, + "loss": 13.2624, + "step": 142840 + }, + { + "epoch": 0.28856603788830665, + "grad_norm": 255.59266662597656, + "learning_rate": 8.985813079373293e-06, + "loss": 30.9302, + "step": 142850 + }, + { + "epoch": 0.28858623852099047, + "grad_norm": 546.9246826171875, + "learning_rate": 8.98560231603603e-06, + "loss": 13.2987, + "step": 142860 + }, + { + "epoch": 0.2886064391536743, + "grad_norm": 270.22564697265625, + "learning_rate": 8.985391533273425e-06, + "loss": 17.018, + "step": 142870 + }, + { + "epoch": 0.2886266397863581, + "grad_norm": 462.64117431640625, + "learning_rate": 8.985180731086505e-06, + "loss": 22.4081, + "step": 142880 + }, + { + "epoch": 0.28864684041904193, + "grad_norm": 352.8821105957031, + "learning_rate": 8.984969909476299e-06, + "loss": 26.6187, + "step": 142890 + }, + { + "epoch": 0.28866704105172575, + "grad_norm": 926.951416015625, + "learning_rate": 8.984759068443832e-06, + "loss": 34.7425, + "step": 142900 + }, + { + "epoch": 0.28868724168440957, + "grad_norm": 411.7799072265625, + "learning_rate": 8.984548207990133e-06, + "loss": 20.4422, + "step": 142910 + }, + { + "epoch": 0.2887074423170934, + "grad_norm": 561.7974243164062, + "learning_rate": 8.984337328116228e-06, + "loss": 10.887, + "step": 142920 + }, + { + "epoch": 0.2887276429497772, + "grad_norm": 259.2440490722656, + "learning_rate": 8.984126428823147e-06, + "loss": 16.6807, + "step": 142930 + }, + { + "epoch": 0.28874784358246103, + "grad_norm": 502.75067138671875, + "learning_rate": 8.983915510111918e-06, + "loss": 20.0843, + "step": 142940 + }, + { + "epoch": 0.2887680442151448, + "grad_norm": 260.4075927734375, + "learning_rate": 8.983704571983568e-06, + "loss": 21.9718, + "step": 142950 + }, + { + "epoch": 0.2887882448478286, + "grad_norm": 736.0562744140625, + "learning_rate": 8.983493614439123e-06, + "loss": 23.5602, + "step": 142960 + }, + { + "epoch": 0.28880844548051243, + "grad_norm": 48.65081024169922, + "learning_rate": 8.983282637479613e-06, + "loss": 24.7249, + "step": 142970 + }, + { + "epoch": 0.28882864611319625, + "grad_norm": 494.50213623046875, + "learning_rate": 8.983071641106068e-06, + "loss": 24.5031, + "step": 142980 + }, + { + "epoch": 0.2888488467458801, + "grad_norm": 534.8903198242188, + "learning_rate": 8.982860625319514e-06, + "loss": 10.7974, + "step": 142990 + }, + { + "epoch": 0.2888690473785639, + "grad_norm": 884.2274169921875, + "learning_rate": 8.982649590120982e-06, + "loss": 32.7576, + "step": 143000 + }, + { + "epoch": 0.2888892480112477, + "grad_norm": 830.9031372070312, + "learning_rate": 8.982438535511498e-06, + "loss": 22.6672, + "step": 143010 + }, + { + "epoch": 0.28890944864393153, + "grad_norm": 499.8160400390625, + "learning_rate": 8.982227461492092e-06, + "loss": 26.981, + "step": 143020 + }, + { + "epoch": 0.28892964927661535, + "grad_norm": 910.0860595703125, + "learning_rate": 8.982016368063793e-06, + "loss": 31.4334, + "step": 143030 + }, + { + "epoch": 0.2889498499092992, + "grad_norm": 146.77008056640625, + "learning_rate": 8.981805255227627e-06, + "loss": 12.6157, + "step": 143040 + }, + { + "epoch": 0.288970050541983, + "grad_norm": 354.0091247558594, + "learning_rate": 8.981594122984628e-06, + "loss": 35.6246, + "step": 143050 + }, + { + "epoch": 0.2889902511746668, + "grad_norm": 370.31414794921875, + "learning_rate": 8.98138297133582e-06, + "loss": 21.4761, + "step": 143060 + }, + { + "epoch": 0.28901045180735063, + "grad_norm": 293.3982849121094, + "learning_rate": 8.981171800282233e-06, + "loss": 14.7823, + "step": 143070 + }, + { + "epoch": 0.2890306524400344, + "grad_norm": 309.86907958984375, + "learning_rate": 8.9809606098249e-06, + "loss": 13.7215, + "step": 143080 + }, + { + "epoch": 0.2890508530727182, + "grad_norm": 441.5501403808594, + "learning_rate": 8.980749399964847e-06, + "loss": 14.571, + "step": 143090 + }, + { + "epoch": 0.28907105370540204, + "grad_norm": 792.845458984375, + "learning_rate": 8.980538170703104e-06, + "loss": 20.5297, + "step": 143100 + }, + { + "epoch": 0.28909125433808586, + "grad_norm": 323.4634094238281, + "learning_rate": 8.9803269220407e-06, + "loss": 18.91, + "step": 143110 + }, + { + "epoch": 0.2891114549707697, + "grad_norm": 197.86825561523438, + "learning_rate": 8.980115653978667e-06, + "loss": 27.4354, + "step": 143120 + }, + { + "epoch": 0.2891316556034535, + "grad_norm": 439.68585205078125, + "learning_rate": 8.979904366518034e-06, + "loss": 17.8669, + "step": 143130 + }, + { + "epoch": 0.2891518562361373, + "grad_norm": 39.589786529541016, + "learning_rate": 8.979693059659826e-06, + "loss": 44.6112, + "step": 143140 + }, + { + "epoch": 0.28917205686882114, + "grad_norm": 235.7417755126953, + "learning_rate": 8.97948173340508e-06, + "loss": 11.0216, + "step": 143150 + }, + { + "epoch": 0.28919225750150496, + "grad_norm": 420.8698425292969, + "learning_rate": 8.97927038775482e-06, + "loss": 28.2996, + "step": 143160 + }, + { + "epoch": 0.2892124581341888, + "grad_norm": 192.67665100097656, + "learning_rate": 8.979059022710081e-06, + "loss": 20.0547, + "step": 143170 + }, + { + "epoch": 0.2892326587668726, + "grad_norm": 587.1990356445312, + "learning_rate": 8.97884763827189e-06, + "loss": 27.0522, + "step": 143180 + }, + { + "epoch": 0.2892528593995564, + "grad_norm": 497.73663330078125, + "learning_rate": 8.97863623444128e-06, + "loss": 32.4231, + "step": 143190 + }, + { + "epoch": 0.28927306003224024, + "grad_norm": 328.3630065917969, + "learning_rate": 8.978424811219277e-06, + "loss": 18.4167, + "step": 143200 + }, + { + "epoch": 0.289293260664924, + "grad_norm": 114.45890808105469, + "learning_rate": 8.978213368606916e-06, + "loss": 9.9748, + "step": 143210 + }, + { + "epoch": 0.2893134612976078, + "grad_norm": 535.30615234375, + "learning_rate": 8.978001906605226e-06, + "loss": 13.5249, + "step": 143220 + }, + { + "epoch": 0.28933366193029164, + "grad_norm": 593.1908569335938, + "learning_rate": 8.977790425215234e-06, + "loss": 31.4074, + "step": 143230 + }, + { + "epoch": 0.28935386256297546, + "grad_norm": 209.8313446044922, + "learning_rate": 8.977578924437976e-06, + "loss": 17.6172, + "step": 143240 + }, + { + "epoch": 0.2893740631956593, + "grad_norm": 634.3653564453125, + "learning_rate": 8.97736740427448e-06, + "loss": 29.8174, + "step": 143250 + }, + { + "epoch": 0.2893942638283431, + "grad_norm": 753.666259765625, + "learning_rate": 8.977155864725778e-06, + "loss": 22.5378, + "step": 143260 + }, + { + "epoch": 0.2894144644610269, + "grad_norm": 473.6728210449219, + "learning_rate": 8.976944305792901e-06, + "loss": 18.1003, + "step": 143270 + }, + { + "epoch": 0.28943466509371074, + "grad_norm": 1239.0589599609375, + "learning_rate": 8.97673272747688e-06, + "loss": 55.9816, + "step": 143280 + }, + { + "epoch": 0.28945486572639456, + "grad_norm": 180.95069885253906, + "learning_rate": 8.976521129778746e-06, + "loss": 14.7166, + "step": 143290 + }, + { + "epoch": 0.2894750663590784, + "grad_norm": 52.53523635864258, + "learning_rate": 8.97630951269953e-06, + "loss": 15.5059, + "step": 143300 + }, + { + "epoch": 0.2894952669917622, + "grad_norm": 511.3759765625, + "learning_rate": 8.976097876240263e-06, + "loss": 35.4166, + "step": 143310 + }, + { + "epoch": 0.289515467624446, + "grad_norm": 558.9628295898438, + "learning_rate": 8.975886220401978e-06, + "loss": 14.7336, + "step": 143320 + }, + { + "epoch": 0.2895356682571298, + "grad_norm": 391.6131286621094, + "learning_rate": 8.975674545185704e-06, + "loss": 25.8173, + "step": 143330 + }, + { + "epoch": 0.2895558688898136, + "grad_norm": 269.6672058105469, + "learning_rate": 8.975462850592476e-06, + "loss": 16.7628, + "step": 143340 + }, + { + "epoch": 0.2895760695224974, + "grad_norm": 11.58529281616211, + "learning_rate": 8.975251136623326e-06, + "loss": 23.6183, + "step": 143350 + }, + { + "epoch": 0.28959627015518125, + "grad_norm": 594.9649658203125, + "learning_rate": 8.975039403279282e-06, + "loss": 21.1069, + "step": 143360 + }, + { + "epoch": 0.28961647078786507, + "grad_norm": 178.17930603027344, + "learning_rate": 8.974827650561378e-06, + "loss": 23.1182, + "step": 143370 + }, + { + "epoch": 0.2896366714205489, + "grad_norm": 606.8555297851562, + "learning_rate": 8.974615878470646e-06, + "loss": 49.7317, + "step": 143380 + }, + { + "epoch": 0.2896568720532327, + "grad_norm": 132.47006225585938, + "learning_rate": 8.97440408700812e-06, + "loss": 16.5019, + "step": 143390 + }, + { + "epoch": 0.2896770726859165, + "grad_norm": 65.81047821044922, + "learning_rate": 8.97419227617483e-06, + "loss": 20.2708, + "step": 143400 + }, + { + "epoch": 0.28969727331860035, + "grad_norm": 431.75469970703125, + "learning_rate": 8.973980445971806e-06, + "loss": 34.7638, + "step": 143410 + }, + { + "epoch": 0.28971747395128417, + "grad_norm": 456.8233947753906, + "learning_rate": 8.973768596400085e-06, + "loss": 24.3499, + "step": 143420 + }, + { + "epoch": 0.289737674583968, + "grad_norm": 57.23274612426758, + "learning_rate": 8.973556727460699e-06, + "loss": 33.2259, + "step": 143430 + }, + { + "epoch": 0.2897578752166518, + "grad_norm": 810.389892578125, + "learning_rate": 8.973344839154678e-06, + "loss": 26.781, + "step": 143440 + }, + { + "epoch": 0.2897780758493356, + "grad_norm": 571.724365234375, + "learning_rate": 8.973132931483057e-06, + "loss": 27.9718, + "step": 143450 + }, + { + "epoch": 0.2897982764820194, + "grad_norm": 459.9219665527344, + "learning_rate": 8.972921004446868e-06, + "loss": 18.6552, + "step": 143460 + }, + { + "epoch": 0.2898184771147032, + "grad_norm": 1089.4761962890625, + "learning_rate": 8.972709058047145e-06, + "loss": 32.9153, + "step": 143470 + }, + { + "epoch": 0.28983867774738703, + "grad_norm": 186.7657470703125, + "learning_rate": 8.972497092284918e-06, + "loss": 25.3111, + "step": 143480 + }, + { + "epoch": 0.28985887838007085, + "grad_norm": 149.28268432617188, + "learning_rate": 8.972285107161222e-06, + "loss": 18.3322, + "step": 143490 + }, + { + "epoch": 0.28987907901275467, + "grad_norm": 242.9874267578125, + "learning_rate": 8.972073102677091e-06, + "loss": 21.4952, + "step": 143500 + }, + { + "epoch": 0.2898992796454385, + "grad_norm": 21.775375366210938, + "learning_rate": 8.971861078833558e-06, + "loss": 18.7088, + "step": 143510 + }, + { + "epoch": 0.2899194802781223, + "grad_norm": 34.676124572753906, + "learning_rate": 8.971649035631655e-06, + "loss": 10.497, + "step": 143520 + }, + { + "epoch": 0.28993968091080613, + "grad_norm": 41.31591796875, + "learning_rate": 8.971436973072416e-06, + "loss": 15.0646, + "step": 143530 + }, + { + "epoch": 0.28995988154348995, + "grad_norm": 91.02080535888672, + "learning_rate": 8.971224891156876e-06, + "loss": 28.2071, + "step": 143540 + }, + { + "epoch": 0.28998008217617377, + "grad_norm": 1039.9039306640625, + "learning_rate": 8.971012789886066e-06, + "loss": 23.3713, + "step": 143550 + }, + { + "epoch": 0.2900002828088576, + "grad_norm": 561.6629638671875, + "learning_rate": 8.970800669261022e-06, + "loss": 26.126, + "step": 143560 + }, + { + "epoch": 0.2900204834415414, + "grad_norm": 701.478271484375, + "learning_rate": 8.970588529282778e-06, + "loss": 22.7546, + "step": 143570 + }, + { + "epoch": 0.29004068407422523, + "grad_norm": 411.79571533203125, + "learning_rate": 8.970376369952366e-06, + "loss": 17.5618, + "step": 143580 + }, + { + "epoch": 0.290060884706909, + "grad_norm": 343.6632385253906, + "learning_rate": 8.97016419127082e-06, + "loss": 21.3977, + "step": 143590 + }, + { + "epoch": 0.2900810853395928, + "grad_norm": 387.7538146972656, + "learning_rate": 8.969951993239177e-06, + "loss": 28.7602, + "step": 143600 + }, + { + "epoch": 0.29010128597227663, + "grad_norm": 326.79339599609375, + "learning_rate": 8.96973977585847e-06, + "loss": 22.5106, + "step": 143610 + }, + { + "epoch": 0.29012148660496045, + "grad_norm": 814.3696899414062, + "learning_rate": 8.969527539129732e-06, + "loss": 23.4005, + "step": 143620 + }, + { + "epoch": 0.2901416872376443, + "grad_norm": 354.3022155761719, + "learning_rate": 8.969315283053998e-06, + "loss": 25.594, + "step": 143630 + }, + { + "epoch": 0.2901618878703281, + "grad_norm": 608.0245361328125, + "learning_rate": 8.969103007632302e-06, + "loss": 14.8101, + "step": 143640 + }, + { + "epoch": 0.2901820885030119, + "grad_norm": 335.25665283203125, + "learning_rate": 8.96889071286568e-06, + "loss": 12.4076, + "step": 143650 + }, + { + "epoch": 0.29020228913569573, + "grad_norm": 464.4058837890625, + "learning_rate": 8.968678398755165e-06, + "loss": 24.3785, + "step": 143660 + }, + { + "epoch": 0.29022248976837955, + "grad_norm": 201.16714477539062, + "learning_rate": 8.968466065301796e-06, + "loss": 12.0195, + "step": 143670 + }, + { + "epoch": 0.2902426904010634, + "grad_norm": 879.4492797851562, + "learning_rate": 8.968253712506602e-06, + "loss": 31.0383, + "step": 143680 + }, + { + "epoch": 0.2902628910337472, + "grad_norm": 343.1659240722656, + "learning_rate": 8.968041340370622e-06, + "loss": 22.9296, + "step": 143690 + }, + { + "epoch": 0.290283091666431, + "grad_norm": 1271.926025390625, + "learning_rate": 8.96782894889489e-06, + "loss": 32.2323, + "step": 143700 + }, + { + "epoch": 0.29030329229911483, + "grad_norm": 215.3273468017578, + "learning_rate": 8.967616538080438e-06, + "loss": 18.3563, + "step": 143710 + }, + { + "epoch": 0.2903234929317986, + "grad_norm": 413.6395263671875, + "learning_rate": 8.967404107928309e-06, + "loss": 20.5151, + "step": 143720 + }, + { + "epoch": 0.2903436935644824, + "grad_norm": 713.0821533203125, + "learning_rate": 8.96719165843953e-06, + "loss": 44.3929, + "step": 143730 + }, + { + "epoch": 0.29036389419716624, + "grad_norm": 465.6328125, + "learning_rate": 8.966979189615142e-06, + "loss": 15.2196, + "step": 143740 + }, + { + "epoch": 0.29038409482985006, + "grad_norm": 299.3564758300781, + "learning_rate": 8.966766701456177e-06, + "loss": 12.7935, + "step": 143750 + }, + { + "epoch": 0.2904042954625339, + "grad_norm": 209.91482543945312, + "learning_rate": 8.966554193963673e-06, + "loss": 33.006, + "step": 143760 + }, + { + "epoch": 0.2904244960952177, + "grad_norm": 354.65521240234375, + "learning_rate": 8.966341667138663e-06, + "loss": 19.442, + "step": 143770 + }, + { + "epoch": 0.2904446967279015, + "grad_norm": 226.30235290527344, + "learning_rate": 8.966129120982188e-06, + "loss": 22.6549, + "step": 143780 + }, + { + "epoch": 0.29046489736058534, + "grad_norm": 105.86489868164062, + "learning_rate": 8.965916555495278e-06, + "loss": 8.8837, + "step": 143790 + }, + { + "epoch": 0.29048509799326916, + "grad_norm": 402.1645202636719, + "learning_rate": 8.965703970678974e-06, + "loss": 28.6892, + "step": 143800 + }, + { + "epoch": 0.290505298625953, + "grad_norm": 830.6857299804688, + "learning_rate": 8.965491366534309e-06, + "loss": 21.4636, + "step": 143810 + }, + { + "epoch": 0.2905254992586368, + "grad_norm": 241.9054718017578, + "learning_rate": 8.96527874306232e-06, + "loss": 12.2601, + "step": 143820 + }, + { + "epoch": 0.2905456998913206, + "grad_norm": 332.3222961425781, + "learning_rate": 8.965066100264042e-06, + "loss": 44.2617, + "step": 143830 + }, + { + "epoch": 0.29056590052400444, + "grad_norm": 409.7407531738281, + "learning_rate": 8.964853438140515e-06, + "loss": 15.4268, + "step": 143840 + }, + { + "epoch": 0.2905861011566882, + "grad_norm": 420.72845458984375, + "learning_rate": 8.96464075669277e-06, + "loss": 16.8882, + "step": 143850 + }, + { + "epoch": 0.290606301789372, + "grad_norm": 0.21845011413097382, + "learning_rate": 8.96442805592185e-06, + "loss": 20.7173, + "step": 143860 + }, + { + "epoch": 0.29062650242205584, + "grad_norm": 325.0856018066406, + "learning_rate": 8.964215335828788e-06, + "loss": 24.7074, + "step": 143870 + }, + { + "epoch": 0.29064670305473966, + "grad_norm": 572.4677124023438, + "learning_rate": 8.96400259641462e-06, + "loss": 19.0564, + "step": 143880 + }, + { + "epoch": 0.2906669036874235, + "grad_norm": 535.1879272460938, + "learning_rate": 8.963789837680386e-06, + "loss": 40.4394, + "step": 143890 + }, + { + "epoch": 0.2906871043201073, + "grad_norm": 136.49729919433594, + "learning_rate": 8.963577059627117e-06, + "loss": 20.6042, + "step": 143900 + }, + { + "epoch": 0.2907073049527911, + "grad_norm": 331.70404052734375, + "learning_rate": 8.963364262255859e-06, + "loss": 18.4239, + "step": 143910 + }, + { + "epoch": 0.29072750558547494, + "grad_norm": 315.2754821777344, + "learning_rate": 8.963151445567642e-06, + "loss": 23.5942, + "step": 143920 + }, + { + "epoch": 0.29074770621815876, + "grad_norm": 149.0545654296875, + "learning_rate": 8.962938609563506e-06, + "loss": 31.3397, + "step": 143930 + }, + { + "epoch": 0.2907679068508426, + "grad_norm": 820.7781372070312, + "learning_rate": 8.962725754244487e-06, + "loss": 31.2748, + "step": 143940 + }, + { + "epoch": 0.2907881074835264, + "grad_norm": 659.9220581054688, + "learning_rate": 8.962512879611624e-06, + "loss": 23.4115, + "step": 143950 + }, + { + "epoch": 0.2908083081162102, + "grad_norm": 373.0547180175781, + "learning_rate": 8.962299985665955e-06, + "loss": 9.2079, + "step": 143960 + }, + { + "epoch": 0.290828508748894, + "grad_norm": 562.0675659179688, + "learning_rate": 8.962087072408514e-06, + "loss": 21.1904, + "step": 143970 + }, + { + "epoch": 0.2908487093815778, + "grad_norm": 1321.6712646484375, + "learning_rate": 8.961874139840342e-06, + "loss": 56.7481, + "step": 143980 + }, + { + "epoch": 0.29086891001426163, + "grad_norm": 0.0, + "learning_rate": 8.961661187962477e-06, + "loss": 21.7907, + "step": 143990 + }, + { + "epoch": 0.29088911064694545, + "grad_norm": 125.6926498413086, + "learning_rate": 8.961448216775955e-06, + "loss": 41.779, + "step": 144000 + }, + { + "epoch": 0.29090931127962927, + "grad_norm": 380.9176025390625, + "learning_rate": 8.961235226281815e-06, + "loss": 25.6484, + "step": 144010 + }, + { + "epoch": 0.2909295119123131, + "grad_norm": 240.7569122314453, + "learning_rate": 8.961022216481094e-06, + "loss": 17.7789, + "step": 144020 + }, + { + "epoch": 0.2909497125449969, + "grad_norm": 227.57423400878906, + "learning_rate": 8.960809187374833e-06, + "loss": 21.8197, + "step": 144030 + }, + { + "epoch": 0.29096991317768073, + "grad_norm": 223.62075805664062, + "learning_rate": 8.960596138964065e-06, + "loss": 18.3656, + "step": 144040 + }, + { + "epoch": 0.29099011381036455, + "grad_norm": 381.46173095703125, + "learning_rate": 8.960383071249837e-06, + "loss": 10.363, + "step": 144050 + }, + { + "epoch": 0.29101031444304837, + "grad_norm": 324.1127014160156, + "learning_rate": 8.960169984233179e-06, + "loss": 19.3931, + "step": 144060 + }, + { + "epoch": 0.2910305150757322, + "grad_norm": 562.385498046875, + "learning_rate": 8.959956877915132e-06, + "loss": 13.9735, + "step": 144070 + }, + { + "epoch": 0.291050715708416, + "grad_norm": 302.4697265625, + "learning_rate": 8.959743752296736e-06, + "loss": 18.8322, + "step": 144080 + }, + { + "epoch": 0.29107091634109983, + "grad_norm": 515.53515625, + "learning_rate": 8.959530607379032e-06, + "loss": 35.0527, + "step": 144090 + }, + { + "epoch": 0.2910911169737836, + "grad_norm": 760.4889526367188, + "learning_rate": 8.959317443163054e-06, + "loss": 21.2706, + "step": 144100 + }, + { + "epoch": 0.2911113176064674, + "grad_norm": 123.36358642578125, + "learning_rate": 8.959104259649842e-06, + "loss": 24.0755, + "step": 144110 + }, + { + "epoch": 0.29113151823915123, + "grad_norm": 1043.007080078125, + "learning_rate": 8.958891056840438e-06, + "loss": 56.0429, + "step": 144120 + }, + { + "epoch": 0.29115171887183505, + "grad_norm": 55.80719757080078, + "learning_rate": 8.958677834735879e-06, + "loss": 13.53, + "step": 144130 + }, + { + "epoch": 0.2911719195045189, + "grad_norm": 256.8143310546875, + "learning_rate": 8.958464593337202e-06, + "loss": 12.0759, + "step": 144140 + }, + { + "epoch": 0.2911921201372027, + "grad_norm": 180.5006561279297, + "learning_rate": 8.95825133264545e-06, + "loss": 27.4772, + "step": 144150 + }, + { + "epoch": 0.2912123207698865, + "grad_norm": 268.6878662109375, + "learning_rate": 8.958038052661661e-06, + "loss": 7.3291, + "step": 144160 + }, + { + "epoch": 0.29123252140257033, + "grad_norm": 890.093017578125, + "learning_rate": 8.957824753386877e-06, + "loss": 27.2381, + "step": 144170 + }, + { + "epoch": 0.29125272203525415, + "grad_norm": 610.2390747070312, + "learning_rate": 8.957611434822133e-06, + "loss": 24.5062, + "step": 144180 + }, + { + "epoch": 0.291272922667938, + "grad_norm": 383.61016845703125, + "learning_rate": 8.95739809696847e-06, + "loss": 13.5431, + "step": 144190 + }, + { + "epoch": 0.2912931233006218, + "grad_norm": 855.97705078125, + "learning_rate": 8.957184739826929e-06, + "loss": 24.0863, + "step": 144200 + }, + { + "epoch": 0.2913133239333056, + "grad_norm": 446.4833068847656, + "learning_rate": 8.95697136339855e-06, + "loss": 22.1917, + "step": 144210 + }, + { + "epoch": 0.29133352456598943, + "grad_norm": 461.7430419921875, + "learning_rate": 8.956757967684372e-06, + "loss": 16.1331, + "step": 144220 + }, + { + "epoch": 0.2913537251986732, + "grad_norm": 43.587120056152344, + "learning_rate": 8.956544552685437e-06, + "loss": 31.9169, + "step": 144230 + }, + { + "epoch": 0.291373925831357, + "grad_norm": 200.32955932617188, + "learning_rate": 8.956331118402784e-06, + "loss": 17.4206, + "step": 144240 + }, + { + "epoch": 0.29139412646404084, + "grad_norm": 9.927374839782715, + "learning_rate": 8.956117664837452e-06, + "loss": 16.6463, + "step": 144250 + }, + { + "epoch": 0.29141432709672466, + "grad_norm": 229.5208282470703, + "learning_rate": 8.955904191990481e-06, + "loss": 21.387, + "step": 144260 + }, + { + "epoch": 0.2914345277294085, + "grad_norm": 1290.9556884765625, + "learning_rate": 8.955690699862913e-06, + "loss": 22.9719, + "step": 144270 + }, + { + "epoch": 0.2914547283620923, + "grad_norm": 347.4378662109375, + "learning_rate": 8.955477188455791e-06, + "loss": 12.1614, + "step": 144280 + }, + { + "epoch": 0.2914749289947761, + "grad_norm": 386.7215576171875, + "learning_rate": 8.95526365777015e-06, + "loss": 30.4455, + "step": 144290 + }, + { + "epoch": 0.29149512962745994, + "grad_norm": 568.8643188476562, + "learning_rate": 8.955050107807035e-06, + "loss": 42.8156, + "step": 144300 + }, + { + "epoch": 0.29151533026014376, + "grad_norm": 462.5457458496094, + "learning_rate": 8.954836538567486e-06, + "loss": 18.1224, + "step": 144310 + }, + { + "epoch": 0.2915355308928276, + "grad_norm": 862.5878295898438, + "learning_rate": 8.954622950052543e-06, + "loss": 33.7564, + "step": 144320 + }, + { + "epoch": 0.2915557315255114, + "grad_norm": 1225.79931640625, + "learning_rate": 8.954409342263246e-06, + "loss": 34.9419, + "step": 144330 + }, + { + "epoch": 0.2915759321581952, + "grad_norm": 536.2356567382812, + "learning_rate": 8.95419571520064e-06, + "loss": 23.5325, + "step": 144340 + }, + { + "epoch": 0.29159613279087904, + "grad_norm": 125.20826721191406, + "learning_rate": 8.95398206886576e-06, + "loss": 17.5736, + "step": 144350 + }, + { + "epoch": 0.2916163334235628, + "grad_norm": 201.34152221679688, + "learning_rate": 8.953768403259655e-06, + "loss": 13.0423, + "step": 144360 + }, + { + "epoch": 0.2916365340562466, + "grad_norm": 343.0006408691406, + "learning_rate": 8.95355471838336e-06, + "loss": 28.9384, + "step": 144370 + }, + { + "epoch": 0.29165673468893044, + "grad_norm": 450.1666564941406, + "learning_rate": 8.953341014237919e-06, + "loss": 39.7002, + "step": 144380 + }, + { + "epoch": 0.29167693532161426, + "grad_norm": 436.2256164550781, + "learning_rate": 8.953127290824374e-06, + "loss": 23.7114, + "step": 144390 + }, + { + "epoch": 0.2916971359542981, + "grad_norm": 500.2027587890625, + "learning_rate": 8.952913548143766e-06, + "loss": 14.5103, + "step": 144400 + }, + { + "epoch": 0.2917173365869819, + "grad_norm": 147.0856475830078, + "learning_rate": 8.952699786197137e-06, + "loss": 23.2955, + "step": 144410 + }, + { + "epoch": 0.2917375372196657, + "grad_norm": 286.50390625, + "learning_rate": 8.952486004985527e-06, + "loss": 35.0774, + "step": 144420 + }, + { + "epoch": 0.29175773785234954, + "grad_norm": 254.57830810546875, + "learning_rate": 8.95227220450998e-06, + "loss": 20.6649, + "step": 144430 + }, + { + "epoch": 0.29177793848503336, + "grad_norm": 275.4768371582031, + "learning_rate": 8.952058384771539e-06, + "loss": 12.5758, + "step": 144440 + }, + { + "epoch": 0.2917981391177172, + "grad_norm": 416.6864013671875, + "learning_rate": 8.951844545771244e-06, + "loss": 16.3182, + "step": 144450 + }, + { + "epoch": 0.291818339750401, + "grad_norm": 607.0198364257812, + "learning_rate": 8.951630687510137e-06, + "loss": 21.5984, + "step": 144460 + }, + { + "epoch": 0.2918385403830848, + "grad_norm": 393.8330383300781, + "learning_rate": 8.951416809989263e-06, + "loss": 32.2409, + "step": 144470 + }, + { + "epoch": 0.29185874101576864, + "grad_norm": 738.3124389648438, + "learning_rate": 8.951202913209662e-06, + "loss": 34.3631, + "step": 144480 + }, + { + "epoch": 0.2918789416484524, + "grad_norm": 426.9834899902344, + "learning_rate": 8.950988997172378e-06, + "loss": 23.0125, + "step": 144490 + }, + { + "epoch": 0.2918991422811362, + "grad_norm": 616.0340576171875, + "learning_rate": 8.950775061878453e-06, + "loss": 27.9565, + "step": 144500 + }, + { + "epoch": 0.29191934291382005, + "grad_norm": 239.58522033691406, + "learning_rate": 8.950561107328927e-06, + "loss": 47.7109, + "step": 144510 + }, + { + "epoch": 0.29193954354650387, + "grad_norm": 119.57493591308594, + "learning_rate": 8.950347133524849e-06, + "loss": 29.4281, + "step": 144520 + }, + { + "epoch": 0.2919597441791877, + "grad_norm": 58.043556213378906, + "learning_rate": 8.950133140467256e-06, + "loss": 32.7095, + "step": 144530 + }, + { + "epoch": 0.2919799448118715, + "grad_norm": 207.09091186523438, + "learning_rate": 8.949919128157194e-06, + "loss": 24.4547, + "step": 144540 + }, + { + "epoch": 0.2920001454445553, + "grad_norm": 609.0300903320312, + "learning_rate": 8.949705096595704e-06, + "loss": 25.7507, + "step": 144550 + }, + { + "epoch": 0.29202034607723915, + "grad_norm": 578.8982543945312, + "learning_rate": 8.94949104578383e-06, + "loss": 13.9327, + "step": 144560 + }, + { + "epoch": 0.29204054670992297, + "grad_norm": 423.1669616699219, + "learning_rate": 8.949276975722617e-06, + "loss": 20.6589, + "step": 144570 + }, + { + "epoch": 0.2920607473426068, + "grad_norm": 114.99697875976562, + "learning_rate": 8.949062886413106e-06, + "loss": 31.3314, + "step": 144580 + }, + { + "epoch": 0.2920809479752906, + "grad_norm": 261.10162353515625, + "learning_rate": 8.948848777856342e-06, + "loss": 15.5353, + "step": 144590 + }, + { + "epoch": 0.2921011486079744, + "grad_norm": 252.13076782226562, + "learning_rate": 8.94863465005337e-06, + "loss": 20.2258, + "step": 144600 + }, + { + "epoch": 0.2921213492406582, + "grad_norm": 600.653076171875, + "learning_rate": 8.948420503005229e-06, + "loss": 35.3409, + "step": 144610 + }, + { + "epoch": 0.292141549873342, + "grad_norm": 275.3252258300781, + "learning_rate": 8.948206336712966e-06, + "loss": 31.1141, + "step": 144620 + }, + { + "epoch": 0.29216175050602583, + "grad_norm": 526.3372802734375, + "learning_rate": 8.947992151177625e-06, + "loss": 22.6549, + "step": 144630 + }, + { + "epoch": 0.29218195113870965, + "grad_norm": 313.9520263671875, + "learning_rate": 8.947777946400247e-06, + "loss": 10.2737, + "step": 144640 + }, + { + "epoch": 0.29220215177139347, + "grad_norm": 196.8408660888672, + "learning_rate": 8.94756372238188e-06, + "loss": 30.4515, + "step": 144650 + }, + { + "epoch": 0.2922223524040773, + "grad_norm": 272.763427734375, + "learning_rate": 8.947349479123565e-06, + "loss": 36.4658, + "step": 144660 + }, + { + "epoch": 0.2922425530367611, + "grad_norm": 284.01666259765625, + "learning_rate": 8.947135216626349e-06, + "loss": 20.4259, + "step": 144670 + }, + { + "epoch": 0.29226275366944493, + "grad_norm": 351.39532470703125, + "learning_rate": 8.946920934891274e-06, + "loss": 15.2977, + "step": 144680 + }, + { + "epoch": 0.29228295430212875, + "grad_norm": 790.0850830078125, + "learning_rate": 8.946706633919385e-06, + "loss": 32.3869, + "step": 144690 + }, + { + "epoch": 0.29230315493481257, + "grad_norm": 377.062255859375, + "learning_rate": 8.946492313711725e-06, + "loss": 41.3869, + "step": 144700 + }, + { + "epoch": 0.2923233555674964, + "grad_norm": 509.7406311035156, + "learning_rate": 8.946277974269342e-06, + "loss": 28.8467, + "step": 144710 + }, + { + "epoch": 0.2923435562001802, + "grad_norm": 600.3405151367188, + "learning_rate": 8.94606361559328e-06, + "loss": 23.0266, + "step": 144720 + }, + { + "epoch": 0.29236375683286403, + "grad_norm": 650.2919921875, + "learning_rate": 8.945849237684578e-06, + "loss": 21.252, + "step": 144730 + }, + { + "epoch": 0.2923839574655478, + "grad_norm": 862.9744873046875, + "learning_rate": 8.94563484054429e-06, + "loss": 31.7525, + "step": 144740 + }, + { + "epoch": 0.2924041580982316, + "grad_norm": 197.72608947753906, + "learning_rate": 8.945420424173455e-06, + "loss": 16.2845, + "step": 144750 + }, + { + "epoch": 0.29242435873091543, + "grad_norm": 309.2283630371094, + "learning_rate": 8.945205988573117e-06, + "loss": 24.8, + "step": 144760 + }, + { + "epoch": 0.29244455936359925, + "grad_norm": 273.2706604003906, + "learning_rate": 8.944991533744327e-06, + "loss": 11.5869, + "step": 144770 + }, + { + "epoch": 0.2924647599962831, + "grad_norm": 470.6202087402344, + "learning_rate": 8.944777059688125e-06, + "loss": 18.9063, + "step": 144780 + }, + { + "epoch": 0.2924849606289669, + "grad_norm": 32.0260124206543, + "learning_rate": 8.944562566405558e-06, + "loss": 18.1894, + "step": 144790 + }, + { + "epoch": 0.2925051612616507, + "grad_norm": 569.5053100585938, + "learning_rate": 8.944348053897672e-06, + "loss": 22.1403, + "step": 144800 + }, + { + "epoch": 0.29252536189433453, + "grad_norm": 104.93385314941406, + "learning_rate": 8.94413352216551e-06, + "loss": 26.3922, + "step": 144810 + }, + { + "epoch": 0.29254556252701835, + "grad_norm": 833.8599243164062, + "learning_rate": 8.943918971210122e-06, + "loss": 21.0204, + "step": 144820 + }, + { + "epoch": 0.2925657631597022, + "grad_norm": 487.80242919921875, + "learning_rate": 8.943704401032551e-06, + "loss": 20.9569, + "step": 144830 + }, + { + "epoch": 0.292585963792386, + "grad_norm": 386.6275939941406, + "learning_rate": 8.943489811633843e-06, + "loss": 12.6614, + "step": 144840 + }, + { + "epoch": 0.2926061644250698, + "grad_norm": 1075.7357177734375, + "learning_rate": 8.943275203015042e-06, + "loss": 35.2753, + "step": 144850 + }, + { + "epoch": 0.29262636505775363, + "grad_norm": 349.0261535644531, + "learning_rate": 8.943060575177197e-06, + "loss": 16.3996, + "step": 144860 + }, + { + "epoch": 0.2926465656904374, + "grad_norm": 108.65377044677734, + "learning_rate": 8.942845928121356e-06, + "loss": 12.0128, + "step": 144870 + }, + { + "epoch": 0.2926667663231212, + "grad_norm": 484.78271484375, + "learning_rate": 8.942631261848558e-06, + "loss": 37.5167, + "step": 144880 + }, + { + "epoch": 0.29268696695580504, + "grad_norm": 464.53704833984375, + "learning_rate": 8.942416576359855e-06, + "loss": 20.0881, + "step": 144890 + }, + { + "epoch": 0.29270716758848886, + "grad_norm": 372.568359375, + "learning_rate": 8.942201871656292e-06, + "loss": 28.2813, + "step": 144900 + }, + { + "epoch": 0.2927273682211727, + "grad_norm": 501.3120422363281, + "learning_rate": 8.941987147738915e-06, + "loss": 21.7532, + "step": 144910 + }, + { + "epoch": 0.2927475688538565, + "grad_norm": 224.8948211669922, + "learning_rate": 8.94177240460877e-06, + "loss": 14.4497, + "step": 144920 + }, + { + "epoch": 0.2927677694865403, + "grad_norm": 110.44869995117188, + "learning_rate": 8.941557642266906e-06, + "loss": 18.5177, + "step": 144930 + }, + { + "epoch": 0.29278797011922414, + "grad_norm": 367.12896728515625, + "learning_rate": 8.941342860714368e-06, + "loss": 16.2311, + "step": 144940 + }, + { + "epoch": 0.29280817075190796, + "grad_norm": 429.4028625488281, + "learning_rate": 8.9411280599522e-06, + "loss": 23.6019, + "step": 144950 + }, + { + "epoch": 0.2928283713845918, + "grad_norm": 17.152137756347656, + "learning_rate": 8.940913239981454e-06, + "loss": 23.7691, + "step": 144960 + }, + { + "epoch": 0.2928485720172756, + "grad_norm": 934.8453979492188, + "learning_rate": 8.940698400803177e-06, + "loss": 26.2601, + "step": 144970 + }, + { + "epoch": 0.2928687726499594, + "grad_norm": 434.5068359375, + "learning_rate": 8.94048354241841e-06, + "loss": 22.1481, + "step": 144980 + }, + { + "epoch": 0.29288897328264324, + "grad_norm": 381.8167419433594, + "learning_rate": 8.940268664828207e-06, + "loss": 19.2977, + "step": 144990 + }, + { + "epoch": 0.292909173915327, + "grad_norm": 287.8057556152344, + "learning_rate": 8.94005376803361e-06, + "loss": 18.1322, + "step": 145000 + }, + { + "epoch": 0.2929293745480108, + "grad_norm": 185.08685302734375, + "learning_rate": 8.939838852035672e-06, + "loss": 16.3741, + "step": 145010 + }, + { + "epoch": 0.29294957518069464, + "grad_norm": 1287.8944091796875, + "learning_rate": 8.939623916835434e-06, + "loss": 20.9493, + "step": 145020 + }, + { + "epoch": 0.29296977581337846, + "grad_norm": 635.646484375, + "learning_rate": 8.939408962433949e-06, + "loss": 25.3974, + "step": 145030 + }, + { + "epoch": 0.2929899764460623, + "grad_norm": 374.07794189453125, + "learning_rate": 8.939193988832261e-06, + "loss": 10.0691, + "step": 145040 + }, + { + "epoch": 0.2930101770787461, + "grad_norm": 107.63578033447266, + "learning_rate": 8.93897899603142e-06, + "loss": 16.6006, + "step": 145050 + }, + { + "epoch": 0.2930303777114299, + "grad_norm": 519.5859985351562, + "learning_rate": 8.938763984032473e-06, + "loss": 13.1343, + "step": 145060 + }, + { + "epoch": 0.29305057834411374, + "grad_norm": 450.7736511230469, + "learning_rate": 8.938548952836469e-06, + "loss": 25.0087, + "step": 145070 + }, + { + "epoch": 0.29307077897679756, + "grad_norm": 198.88833618164062, + "learning_rate": 8.938333902444454e-06, + "loss": 14.4416, + "step": 145080 + }, + { + "epoch": 0.2930909796094814, + "grad_norm": 757.00146484375, + "learning_rate": 8.938118832857476e-06, + "loss": 25.4719, + "step": 145090 + }, + { + "epoch": 0.2931111802421652, + "grad_norm": 715.574951171875, + "learning_rate": 8.937903744076587e-06, + "loss": 16.1636, + "step": 145100 + }, + { + "epoch": 0.293131380874849, + "grad_norm": 634.7432250976562, + "learning_rate": 8.937688636102832e-06, + "loss": 51.1023, + "step": 145110 + }, + { + "epoch": 0.29315158150753284, + "grad_norm": 302.01226806640625, + "learning_rate": 8.93747350893726e-06, + "loss": 16.4928, + "step": 145120 + }, + { + "epoch": 0.2931717821402166, + "grad_norm": 257.2762145996094, + "learning_rate": 8.937258362580918e-06, + "loss": 21.9717, + "step": 145130 + }, + { + "epoch": 0.29319198277290043, + "grad_norm": 430.455078125, + "learning_rate": 8.937043197034858e-06, + "loss": 21.8851, + "step": 145140 + }, + { + "epoch": 0.29321218340558425, + "grad_norm": 2753.68017578125, + "learning_rate": 8.936828012300127e-06, + "loss": 30.6224, + "step": 145150 + }, + { + "epoch": 0.29323238403826807, + "grad_norm": 273.1519775390625, + "learning_rate": 8.936612808377773e-06, + "loss": 9.0698, + "step": 145160 + }, + { + "epoch": 0.2932525846709519, + "grad_norm": 439.8018798828125, + "learning_rate": 8.936397585268848e-06, + "loss": 21.0634, + "step": 145170 + }, + { + "epoch": 0.2932727853036357, + "grad_norm": 994.3154907226562, + "learning_rate": 8.936182342974396e-06, + "loss": 31.0293, + "step": 145180 + }, + { + "epoch": 0.29329298593631953, + "grad_norm": 262.0361328125, + "learning_rate": 8.93596708149547e-06, + "loss": 15.7493, + "step": 145190 + }, + { + "epoch": 0.29331318656900335, + "grad_norm": 222.2875213623047, + "learning_rate": 8.935751800833117e-06, + "loss": 21.3349, + "step": 145200 + }, + { + "epoch": 0.29333338720168717, + "grad_norm": 220.3922119140625, + "learning_rate": 8.935536500988387e-06, + "loss": 12.4372, + "step": 145210 + }, + { + "epoch": 0.293353587834371, + "grad_norm": 252.5332794189453, + "learning_rate": 8.93532118196233e-06, + "loss": 19.8117, + "step": 145220 + }, + { + "epoch": 0.2933737884670548, + "grad_norm": 239.65103149414062, + "learning_rate": 8.935105843755994e-06, + "loss": 19.7695, + "step": 145230 + }, + { + "epoch": 0.29339398909973863, + "grad_norm": 196.02635192871094, + "learning_rate": 8.93489048637043e-06, + "loss": 10.8984, + "step": 145240 + }, + { + "epoch": 0.2934141897324224, + "grad_norm": 86.53240203857422, + "learning_rate": 8.934675109806688e-06, + "loss": 37.7658, + "step": 145250 + }, + { + "epoch": 0.2934343903651062, + "grad_norm": 453.9691467285156, + "learning_rate": 8.934459714065815e-06, + "loss": 14.615, + "step": 145260 + }, + { + "epoch": 0.29345459099779003, + "grad_norm": 163.30526733398438, + "learning_rate": 8.934244299148864e-06, + "loss": 24.815, + "step": 145270 + }, + { + "epoch": 0.29347479163047385, + "grad_norm": 213.64715576171875, + "learning_rate": 8.934028865056883e-06, + "loss": 39.1907, + "step": 145280 + }, + { + "epoch": 0.2934949922631577, + "grad_norm": 767.2720947265625, + "learning_rate": 8.933813411790922e-06, + "loss": 28.3336, + "step": 145290 + }, + { + "epoch": 0.2935151928958415, + "grad_norm": 313.1923828125, + "learning_rate": 8.933597939352031e-06, + "loss": 41.4924, + "step": 145300 + }, + { + "epoch": 0.2935353935285253, + "grad_norm": 12.70064926147461, + "learning_rate": 8.93338244774126e-06, + "loss": 12.5341, + "step": 145310 + }, + { + "epoch": 0.29355559416120913, + "grad_norm": 296.32940673828125, + "learning_rate": 8.933166936959664e-06, + "loss": 32.5023, + "step": 145320 + }, + { + "epoch": 0.29357579479389295, + "grad_norm": 381.9779968261719, + "learning_rate": 8.932951407008286e-06, + "loss": 14.9476, + "step": 145330 + }, + { + "epoch": 0.2935959954265768, + "grad_norm": 471.7348327636719, + "learning_rate": 8.93273585788818e-06, + "loss": 32.7502, + "step": 145340 + }, + { + "epoch": 0.2936161960592606, + "grad_norm": 45.068382263183594, + "learning_rate": 8.932520289600396e-06, + "loss": 24.3668, + "step": 145350 + }, + { + "epoch": 0.2936363966919444, + "grad_norm": 211.95819091796875, + "learning_rate": 8.932304702145988e-06, + "loss": 14.7878, + "step": 145360 + }, + { + "epoch": 0.29365659732462823, + "grad_norm": 274.7302551269531, + "learning_rate": 8.932089095526003e-06, + "loss": 15.4987, + "step": 145370 + }, + { + "epoch": 0.293676797957312, + "grad_norm": 265.11651611328125, + "learning_rate": 8.93187346974149e-06, + "loss": 23.715, + "step": 145380 + }, + { + "epoch": 0.2936969985899958, + "grad_norm": 323.3000793457031, + "learning_rate": 8.931657824793505e-06, + "loss": 26.2044, + "step": 145390 + }, + { + "epoch": 0.29371719922267964, + "grad_norm": 263.9230651855469, + "learning_rate": 8.931442160683094e-06, + "loss": 8.327, + "step": 145400 + }, + { + "epoch": 0.29373739985536346, + "grad_norm": 499.3628234863281, + "learning_rate": 8.931226477411314e-06, + "loss": 32.8783, + "step": 145410 + }, + { + "epoch": 0.2937576004880473, + "grad_norm": 671.4661865234375, + "learning_rate": 8.931010774979212e-06, + "loss": 22.3906, + "step": 145420 + }, + { + "epoch": 0.2937778011207311, + "grad_norm": 299.31536865234375, + "learning_rate": 8.93079505338784e-06, + "loss": 16.962, + "step": 145430 + }, + { + "epoch": 0.2937980017534149, + "grad_norm": 276.3232116699219, + "learning_rate": 8.93057931263825e-06, + "loss": 54.696, + "step": 145440 + }, + { + "epoch": 0.29381820238609874, + "grad_norm": 556.1576538085938, + "learning_rate": 8.930363552731491e-06, + "loss": 23.0526, + "step": 145450 + }, + { + "epoch": 0.29383840301878256, + "grad_norm": 420.9582214355469, + "learning_rate": 8.930147773668618e-06, + "loss": 27.9474, + "step": 145460 + }, + { + "epoch": 0.2938586036514664, + "grad_norm": 86.83731079101562, + "learning_rate": 8.929931975450683e-06, + "loss": 16.0631, + "step": 145470 + }, + { + "epoch": 0.2938788042841502, + "grad_norm": 621.8159790039062, + "learning_rate": 8.929716158078734e-06, + "loss": 27.9742, + "step": 145480 + }, + { + "epoch": 0.293899004916834, + "grad_norm": 1001.0625610351562, + "learning_rate": 8.929500321553825e-06, + "loss": 44.1198, + "step": 145490 + }, + { + "epoch": 0.29391920554951784, + "grad_norm": 743.4075317382812, + "learning_rate": 8.92928446587701e-06, + "loss": 18.8021, + "step": 145500 + }, + { + "epoch": 0.2939394061822016, + "grad_norm": 1123.14453125, + "learning_rate": 8.929068591049338e-06, + "loss": 22.9869, + "step": 145510 + }, + { + "epoch": 0.2939596068148854, + "grad_norm": 406.63653564453125, + "learning_rate": 8.928852697071863e-06, + "loss": 25.7997, + "step": 145520 + }, + { + "epoch": 0.29397980744756924, + "grad_norm": 313.7357177734375, + "learning_rate": 8.928636783945635e-06, + "loss": 25.3153, + "step": 145530 + }, + { + "epoch": 0.29400000808025306, + "grad_norm": 552.120849609375, + "learning_rate": 8.928420851671708e-06, + "loss": 29.8396, + "step": 145540 + }, + { + "epoch": 0.2940202087129369, + "grad_norm": 415.21307373046875, + "learning_rate": 8.928204900251136e-06, + "loss": 20.4346, + "step": 145550 + }, + { + "epoch": 0.2940404093456207, + "grad_norm": 126.5959243774414, + "learning_rate": 8.92798892968497e-06, + "loss": 19.7, + "step": 145560 + }, + { + "epoch": 0.2940606099783045, + "grad_norm": 903.931396484375, + "learning_rate": 8.92777293997426e-06, + "loss": 22.6722, + "step": 145570 + }, + { + "epoch": 0.29408081061098834, + "grad_norm": 274.7602233886719, + "learning_rate": 8.92755693112006e-06, + "loss": 22.2178, + "step": 145580 + }, + { + "epoch": 0.29410101124367216, + "grad_norm": 242.60911560058594, + "learning_rate": 8.927340903123428e-06, + "loss": 19.2611, + "step": 145590 + }, + { + "epoch": 0.294121211876356, + "grad_norm": 233.6172637939453, + "learning_rate": 8.92712485598541e-06, + "loss": 39.959, + "step": 145600 + }, + { + "epoch": 0.2941414125090398, + "grad_norm": 424.9320983886719, + "learning_rate": 8.926908789707063e-06, + "loss": 16.9217, + "step": 145610 + }, + { + "epoch": 0.2941616131417236, + "grad_norm": 238.45736694335938, + "learning_rate": 8.926692704289437e-06, + "loss": 25.0203, + "step": 145620 + }, + { + "epoch": 0.29418181377440744, + "grad_norm": 551.928955078125, + "learning_rate": 8.926476599733588e-06, + "loss": 18.3843, + "step": 145630 + }, + { + "epoch": 0.2942020144070912, + "grad_norm": 366.42462158203125, + "learning_rate": 8.926260476040568e-06, + "loss": 24.9385, + "step": 145640 + }, + { + "epoch": 0.294222215039775, + "grad_norm": 356.678955078125, + "learning_rate": 8.926044333211433e-06, + "loss": 30.3019, + "step": 145650 + }, + { + "epoch": 0.29424241567245885, + "grad_norm": 158.8470458984375, + "learning_rate": 8.925828171247231e-06, + "loss": 28.3491, + "step": 145660 + }, + { + "epoch": 0.29426261630514267, + "grad_norm": 369.3500671386719, + "learning_rate": 8.925611990149021e-06, + "loss": 30.4343, + "step": 145670 + }, + { + "epoch": 0.2942828169378265, + "grad_norm": 333.98699951171875, + "learning_rate": 8.925395789917852e-06, + "loss": 18.3424, + "step": 145680 + }, + { + "epoch": 0.2943030175705103, + "grad_norm": 172.48178100585938, + "learning_rate": 8.925179570554783e-06, + "loss": 11.635, + "step": 145690 + }, + { + "epoch": 0.2943232182031941, + "grad_norm": 372.4821472167969, + "learning_rate": 8.924963332060863e-06, + "loss": 15.4591, + "step": 145700 + }, + { + "epoch": 0.29434341883587795, + "grad_norm": 472.9522399902344, + "learning_rate": 8.924747074437147e-06, + "loss": 28.8564, + "step": 145710 + }, + { + "epoch": 0.29436361946856177, + "grad_norm": 702.295654296875, + "learning_rate": 8.92453079768469e-06, + "loss": 48.8211, + "step": 145720 + }, + { + "epoch": 0.2943838201012456, + "grad_norm": 602.8131713867188, + "learning_rate": 8.924314501804548e-06, + "loss": 36.8326, + "step": 145730 + }, + { + "epoch": 0.2944040207339294, + "grad_norm": 306.16607666015625, + "learning_rate": 8.924098186797771e-06, + "loss": 21.1611, + "step": 145740 + }, + { + "epoch": 0.2944242213666132, + "grad_norm": 175.8815460205078, + "learning_rate": 8.923881852665416e-06, + "loss": 15.8559, + "step": 145750 + }, + { + "epoch": 0.29444442199929705, + "grad_norm": 296.03253173828125, + "learning_rate": 8.923665499408535e-06, + "loss": 18.4978, + "step": 145760 + }, + { + "epoch": 0.2944646226319808, + "grad_norm": 342.70672607421875, + "learning_rate": 8.923449127028187e-06, + "loss": 9.7858, + "step": 145770 + }, + { + "epoch": 0.29448482326466463, + "grad_norm": 288.128662109375, + "learning_rate": 8.923232735525422e-06, + "loss": 21.0356, + "step": 145780 + }, + { + "epoch": 0.29450502389734845, + "grad_norm": 79.99772644042969, + "learning_rate": 8.923016324901298e-06, + "loss": 18.7125, + "step": 145790 + }, + { + "epoch": 0.29452522453003227, + "grad_norm": 0.5629855394363403, + "learning_rate": 8.922799895156868e-06, + "loss": 16.9756, + "step": 145800 + }, + { + "epoch": 0.2945454251627161, + "grad_norm": 999.4471435546875, + "learning_rate": 8.922583446293186e-06, + "loss": 31.4349, + "step": 145810 + }, + { + "epoch": 0.2945656257953999, + "grad_norm": 725.2561645507812, + "learning_rate": 8.922366978311307e-06, + "loss": 28.9004, + "step": 145820 + }, + { + "epoch": 0.29458582642808373, + "grad_norm": 516.2388916015625, + "learning_rate": 8.92215049121229e-06, + "loss": 40.1277, + "step": 145830 + }, + { + "epoch": 0.29460602706076755, + "grad_norm": 145.9407196044922, + "learning_rate": 8.921933984997186e-06, + "loss": 21.9316, + "step": 145840 + }, + { + "epoch": 0.29462622769345137, + "grad_norm": 517.9457397460938, + "learning_rate": 8.921717459667052e-06, + "loss": 22.3559, + "step": 145850 + }, + { + "epoch": 0.2946464283261352, + "grad_norm": 548.7401123046875, + "learning_rate": 8.921500915222941e-06, + "loss": 21.2759, + "step": 145860 + }, + { + "epoch": 0.294666628958819, + "grad_norm": 221.891845703125, + "learning_rate": 8.921284351665911e-06, + "loss": 9.9675, + "step": 145870 + }, + { + "epoch": 0.29468682959150283, + "grad_norm": 143.00672912597656, + "learning_rate": 8.921067768997018e-06, + "loss": 16.967, + "step": 145880 + }, + { + "epoch": 0.2947070302241866, + "grad_norm": 348.74945068359375, + "learning_rate": 8.920851167217315e-06, + "loss": 16.3741, + "step": 145890 + }, + { + "epoch": 0.2947272308568704, + "grad_norm": 277.1033630371094, + "learning_rate": 8.920634546327857e-06, + "loss": 25.7971, + "step": 145900 + }, + { + "epoch": 0.29474743148955423, + "grad_norm": 335.9961242675781, + "learning_rate": 8.920417906329704e-06, + "loss": 26.0592, + "step": 145910 + }, + { + "epoch": 0.29476763212223805, + "grad_norm": 605.5419921875, + "learning_rate": 8.92020124722391e-06, + "loss": 21.711, + "step": 145920 + }, + { + "epoch": 0.2947878327549219, + "grad_norm": 459.62896728515625, + "learning_rate": 8.91998456901153e-06, + "loss": 26.0456, + "step": 145930 + }, + { + "epoch": 0.2948080333876057, + "grad_norm": 669.4099731445312, + "learning_rate": 8.91976787169362e-06, + "loss": 28.3342, + "step": 145940 + }, + { + "epoch": 0.2948282340202895, + "grad_norm": 224.82894897460938, + "learning_rate": 8.919551155271239e-06, + "loss": 26.5072, + "step": 145950 + }, + { + "epoch": 0.29484843465297333, + "grad_norm": 590.4454345703125, + "learning_rate": 8.91933441974544e-06, + "loss": 23.0178, + "step": 145960 + }, + { + "epoch": 0.29486863528565715, + "grad_norm": 161.39234924316406, + "learning_rate": 8.91911766511728e-06, + "loss": 19.901, + "step": 145970 + }, + { + "epoch": 0.294888835918341, + "grad_norm": 406.33294677734375, + "learning_rate": 8.918900891387814e-06, + "loss": 13.1816, + "step": 145980 + }, + { + "epoch": 0.2949090365510248, + "grad_norm": 575.1746826171875, + "learning_rate": 8.918684098558102e-06, + "loss": 24.3348, + "step": 145990 + }, + { + "epoch": 0.2949292371837086, + "grad_norm": 355.184326171875, + "learning_rate": 8.9184672866292e-06, + "loss": 16.0829, + "step": 146000 + }, + { + "epoch": 0.29494943781639243, + "grad_norm": 437.41790771484375, + "learning_rate": 8.918250455602162e-06, + "loss": 22.8996, + "step": 146010 + }, + { + "epoch": 0.2949696384490762, + "grad_norm": 291.26556396484375, + "learning_rate": 8.918033605478047e-06, + "loss": 35.5031, + "step": 146020 + }, + { + "epoch": 0.29498983908176, + "grad_norm": 368.38018798828125, + "learning_rate": 8.917816736257912e-06, + "loss": 23.9374, + "step": 146030 + }, + { + "epoch": 0.29501003971444384, + "grad_norm": 412.8968200683594, + "learning_rate": 8.917599847942813e-06, + "loss": 28.5956, + "step": 146040 + }, + { + "epoch": 0.29503024034712766, + "grad_norm": 43.92367172241211, + "learning_rate": 8.917382940533809e-06, + "loss": 17.0223, + "step": 146050 + }, + { + "epoch": 0.2950504409798115, + "grad_norm": 411.7793273925781, + "learning_rate": 8.917166014031953e-06, + "loss": 20.9394, + "step": 146060 + }, + { + "epoch": 0.2950706416124953, + "grad_norm": 287.5653991699219, + "learning_rate": 8.916949068438307e-06, + "loss": 17.0937, + "step": 146070 + }, + { + "epoch": 0.2950908422451791, + "grad_norm": 612.794189453125, + "learning_rate": 8.916732103753924e-06, + "loss": 25.5027, + "step": 146080 + }, + { + "epoch": 0.29511104287786294, + "grad_norm": 0.0, + "learning_rate": 8.916515119979867e-06, + "loss": 22.0467, + "step": 146090 + }, + { + "epoch": 0.29513124351054676, + "grad_norm": 159.05609130859375, + "learning_rate": 8.916298117117188e-06, + "loss": 24.5293, + "step": 146100 + }, + { + "epoch": 0.2951514441432306, + "grad_norm": 143.80015563964844, + "learning_rate": 8.916081095166947e-06, + "loss": 17.7001, + "step": 146110 + }, + { + "epoch": 0.2951716447759144, + "grad_norm": 102.78569793701172, + "learning_rate": 8.915864054130203e-06, + "loss": 11.5246, + "step": 146120 + }, + { + "epoch": 0.2951918454085982, + "grad_norm": 456.70733642578125, + "learning_rate": 8.915646994008011e-06, + "loss": 35.5333, + "step": 146130 + }, + { + "epoch": 0.29521204604128204, + "grad_norm": 394.3199768066406, + "learning_rate": 8.915429914801433e-06, + "loss": 23.1958, + "step": 146140 + }, + { + "epoch": 0.2952322466739658, + "grad_norm": 768.4058837890625, + "learning_rate": 8.915212816511521e-06, + "loss": 25.0435, + "step": 146150 + }, + { + "epoch": 0.2952524473066496, + "grad_norm": 392.62481689453125, + "learning_rate": 8.91499569913934e-06, + "loss": 30.6829, + "step": 146160 + }, + { + "epoch": 0.29527264793933344, + "grad_norm": 235.93899536132812, + "learning_rate": 8.914778562685941e-06, + "loss": 10.8239, + "step": 146170 + }, + { + "epoch": 0.29529284857201726, + "grad_norm": 323.7956848144531, + "learning_rate": 8.91456140715239e-06, + "loss": 32.8595, + "step": 146180 + }, + { + "epoch": 0.2953130492047011, + "grad_norm": 651.843994140625, + "learning_rate": 8.914344232539739e-06, + "loss": 31.1947, + "step": 146190 + }, + { + "epoch": 0.2953332498373849, + "grad_norm": 262.5066223144531, + "learning_rate": 8.91412703884905e-06, + "loss": 17.1492, + "step": 146200 + }, + { + "epoch": 0.2953534504700687, + "grad_norm": 1024.361083984375, + "learning_rate": 8.91390982608138e-06, + "loss": 23.8093, + "step": 146210 + }, + { + "epoch": 0.29537365110275254, + "grad_norm": 264.75152587890625, + "learning_rate": 8.91369259423779e-06, + "loss": 17.0897, + "step": 146220 + }, + { + "epoch": 0.29539385173543636, + "grad_norm": 138.51217651367188, + "learning_rate": 8.913475343319333e-06, + "loss": 21.5543, + "step": 146230 + }, + { + "epoch": 0.2954140523681202, + "grad_norm": 593.5452880859375, + "learning_rate": 8.913258073327075e-06, + "loss": 20.6775, + "step": 146240 + }, + { + "epoch": 0.295434253000804, + "grad_norm": 193.46112060546875, + "learning_rate": 8.91304078426207e-06, + "loss": 20.7935, + "step": 146250 + }, + { + "epoch": 0.2954544536334878, + "grad_norm": 550.3673095703125, + "learning_rate": 8.91282347612538e-06, + "loss": 19.0618, + "step": 146260 + }, + { + "epoch": 0.29547465426617164, + "grad_norm": 324.4193115234375, + "learning_rate": 8.912606148918063e-06, + "loss": 20.8737, + "step": 146270 + }, + { + "epoch": 0.2954948548988554, + "grad_norm": 659.3085327148438, + "learning_rate": 8.912388802641177e-06, + "loss": 24.7531, + "step": 146280 + }, + { + "epoch": 0.29551505553153923, + "grad_norm": 236.26272583007812, + "learning_rate": 8.912171437295785e-06, + "loss": 24.2559, + "step": 146290 + }, + { + "epoch": 0.29553525616422305, + "grad_norm": 218.22898864746094, + "learning_rate": 8.911954052882941e-06, + "loss": 11.1286, + "step": 146300 + }, + { + "epoch": 0.29555545679690687, + "grad_norm": 396.6911315917969, + "learning_rate": 8.91173664940371e-06, + "loss": 28.2047, + "step": 146310 + }, + { + "epoch": 0.2955756574295907, + "grad_norm": 277.3451843261719, + "learning_rate": 8.911519226859147e-06, + "loss": 21.9935, + "step": 146320 + }, + { + "epoch": 0.2955958580622745, + "grad_norm": 625.2225952148438, + "learning_rate": 8.911301785250315e-06, + "loss": 28.9494, + "step": 146330 + }, + { + "epoch": 0.29561605869495833, + "grad_norm": 394.705078125, + "learning_rate": 8.911084324578272e-06, + "loss": 31.6344, + "step": 146340 + }, + { + "epoch": 0.29563625932764215, + "grad_norm": 129.0830841064453, + "learning_rate": 8.910866844844077e-06, + "loss": 59.7862, + "step": 146350 + }, + { + "epoch": 0.29565645996032597, + "grad_norm": 264.7086181640625, + "learning_rate": 8.910649346048792e-06, + "loss": 25.4121, + "step": 146360 + }, + { + "epoch": 0.2956766605930098, + "grad_norm": 287.8779296875, + "learning_rate": 8.910431828193478e-06, + "loss": 24.0402, + "step": 146370 + }, + { + "epoch": 0.2956968612256936, + "grad_norm": 304.1589660644531, + "learning_rate": 8.910214291279192e-06, + "loss": 12.6678, + "step": 146380 + }, + { + "epoch": 0.29571706185837743, + "grad_norm": 302.09661865234375, + "learning_rate": 8.909996735306996e-06, + "loss": 43.1045, + "step": 146390 + }, + { + "epoch": 0.2957372624910612, + "grad_norm": 724.1412353515625, + "learning_rate": 8.909779160277951e-06, + "loss": 23.2681, + "step": 146400 + }, + { + "epoch": 0.295757463123745, + "grad_norm": 452.8772277832031, + "learning_rate": 8.909561566193118e-06, + "loss": 20.4508, + "step": 146410 + }, + { + "epoch": 0.29577766375642883, + "grad_norm": 228.56248474121094, + "learning_rate": 8.909343953053553e-06, + "loss": 27.4691, + "step": 146420 + }, + { + "epoch": 0.29579786438911265, + "grad_norm": 1085.392578125, + "learning_rate": 8.90912632086032e-06, + "loss": 25.6713, + "step": 146430 + }, + { + "epoch": 0.2958180650217965, + "grad_norm": 541.2378540039062, + "learning_rate": 8.90890866961448e-06, + "loss": 23.7755, + "step": 146440 + }, + { + "epoch": 0.2958382656544803, + "grad_norm": 225.4510040283203, + "learning_rate": 8.908690999317094e-06, + "loss": 42.209, + "step": 146450 + }, + { + "epoch": 0.2958584662871641, + "grad_norm": 419.2010192871094, + "learning_rate": 8.90847330996922e-06, + "loss": 22.9083, + "step": 146460 + }, + { + "epoch": 0.29587866691984793, + "grad_norm": 149.35641479492188, + "learning_rate": 8.908255601571924e-06, + "loss": 20.6513, + "step": 146470 + }, + { + "epoch": 0.29589886755253175, + "grad_norm": 933.8770141601562, + "learning_rate": 8.908037874126263e-06, + "loss": 46.9795, + "step": 146480 + }, + { + "epoch": 0.2959190681852156, + "grad_norm": 146.07037353515625, + "learning_rate": 8.9078201276333e-06, + "loss": 43.3425, + "step": 146490 + }, + { + "epoch": 0.2959392688178994, + "grad_norm": 274.5592041015625, + "learning_rate": 8.907602362094094e-06, + "loss": 14.9558, + "step": 146500 + }, + { + "epoch": 0.2959594694505832, + "grad_norm": 332.6141357421875, + "learning_rate": 8.90738457750971e-06, + "loss": 10.5094, + "step": 146510 + }, + { + "epoch": 0.29597967008326703, + "grad_norm": 655.5591430664062, + "learning_rate": 8.907166773881207e-06, + "loss": 35.6411, + "step": 146520 + }, + { + "epoch": 0.2959998707159508, + "grad_norm": 507.4541931152344, + "learning_rate": 8.906948951209647e-06, + "loss": 10.5311, + "step": 146530 + }, + { + "epoch": 0.2960200713486346, + "grad_norm": 349.426513671875, + "learning_rate": 8.90673110949609e-06, + "loss": 27.6748, + "step": 146540 + }, + { + "epoch": 0.29604027198131844, + "grad_norm": 283.3293762207031, + "learning_rate": 8.9065132487416e-06, + "loss": 40.9241, + "step": 146550 + }, + { + "epoch": 0.29606047261400226, + "grad_norm": 685.6849975585938, + "learning_rate": 8.90629536894724e-06, + "loss": 20.4005, + "step": 146560 + }, + { + "epoch": 0.2960806732466861, + "grad_norm": 315.76190185546875, + "learning_rate": 8.906077470114068e-06, + "loss": 23.1389, + "step": 146570 + }, + { + "epoch": 0.2961008738793699, + "grad_norm": 399.6425476074219, + "learning_rate": 8.90585955224315e-06, + "loss": 25.1947, + "step": 146580 + }, + { + "epoch": 0.2961210745120537, + "grad_norm": 505.26568603515625, + "learning_rate": 8.905641615335545e-06, + "loss": 17.7144, + "step": 146590 + }, + { + "epoch": 0.29614127514473754, + "grad_norm": 1093.8341064453125, + "learning_rate": 8.905423659392316e-06, + "loss": 33.1093, + "step": 146600 + }, + { + "epoch": 0.29616147577742136, + "grad_norm": 436.3739929199219, + "learning_rate": 8.905205684414527e-06, + "loss": 21.3882, + "step": 146610 + }, + { + "epoch": 0.2961816764101052, + "grad_norm": 295.1983947753906, + "learning_rate": 8.90498769040324e-06, + "loss": 22.0839, + "step": 146620 + }, + { + "epoch": 0.296201877042789, + "grad_norm": 543.0384521484375, + "learning_rate": 8.904769677359515e-06, + "loss": 20.9607, + "step": 146630 + }, + { + "epoch": 0.2962220776754728, + "grad_norm": 438.29888916015625, + "learning_rate": 8.904551645284416e-06, + "loss": 11.9037, + "step": 146640 + }, + { + "epoch": 0.29624227830815664, + "grad_norm": 358.1011657714844, + "learning_rate": 8.904333594179007e-06, + "loss": 22.6588, + "step": 146650 + }, + { + "epoch": 0.2962624789408404, + "grad_norm": 712.97119140625, + "learning_rate": 8.904115524044349e-06, + "loss": 14.9611, + "step": 146660 + }, + { + "epoch": 0.2962826795735242, + "grad_norm": 395.3158264160156, + "learning_rate": 8.903897434881506e-06, + "loss": 43.3063, + "step": 146670 + }, + { + "epoch": 0.29630288020620804, + "grad_norm": 605.7266235351562, + "learning_rate": 8.90367932669154e-06, + "loss": 25.3787, + "step": 146680 + }, + { + "epoch": 0.29632308083889186, + "grad_norm": 711.9656982421875, + "learning_rate": 8.903461199475514e-06, + "loss": 17.2008, + "step": 146690 + }, + { + "epoch": 0.2963432814715757, + "grad_norm": 259.397216796875, + "learning_rate": 8.903243053234492e-06, + "loss": 12.5827, + "step": 146700 + }, + { + "epoch": 0.2963634821042595, + "grad_norm": 398.7919616699219, + "learning_rate": 8.903024887969536e-06, + "loss": 15.5399, + "step": 146710 + }, + { + "epoch": 0.2963836827369433, + "grad_norm": 586.203125, + "learning_rate": 8.90280670368171e-06, + "loss": 27.2687, + "step": 146720 + }, + { + "epoch": 0.29640388336962714, + "grad_norm": 206.72064208984375, + "learning_rate": 8.902588500372078e-06, + "loss": 25.3577, + "step": 146730 + }, + { + "epoch": 0.29642408400231096, + "grad_norm": 519.1009521484375, + "learning_rate": 8.902370278041705e-06, + "loss": 16.6351, + "step": 146740 + }, + { + "epoch": 0.2964442846349948, + "grad_norm": 291.5577392578125, + "learning_rate": 8.902152036691649e-06, + "loss": 43.2595, + "step": 146750 + }, + { + "epoch": 0.2964644852676786, + "grad_norm": 444.7674255371094, + "learning_rate": 8.90193377632298e-06, + "loss": 35.0919, + "step": 146760 + }, + { + "epoch": 0.2964846859003624, + "grad_norm": 510.2618103027344, + "learning_rate": 8.901715496936758e-06, + "loss": 26.654, + "step": 146770 + }, + { + "epoch": 0.29650488653304624, + "grad_norm": 448.94219970703125, + "learning_rate": 8.901497198534048e-06, + "loss": 27.6202, + "step": 146780 + }, + { + "epoch": 0.29652508716573, + "grad_norm": 230.44442749023438, + "learning_rate": 8.901278881115914e-06, + "loss": 12.4462, + "step": 146790 + }, + { + "epoch": 0.2965452877984138, + "grad_norm": 39.407379150390625, + "learning_rate": 8.90106054468342e-06, + "loss": 17.1074, + "step": 146800 + }, + { + "epoch": 0.29656548843109765, + "grad_norm": 57.10234832763672, + "learning_rate": 8.90084218923763e-06, + "loss": 29.6358, + "step": 146810 + }, + { + "epoch": 0.29658568906378147, + "grad_norm": 647.422119140625, + "learning_rate": 8.900623814779605e-06, + "loss": 12.2245, + "step": 146820 + }, + { + "epoch": 0.2966058896964653, + "grad_norm": 0.0, + "learning_rate": 8.900405421310416e-06, + "loss": 16.0647, + "step": 146830 + }, + { + "epoch": 0.2966260903291491, + "grad_norm": 728.9190673828125, + "learning_rate": 8.900187008831124e-06, + "loss": 19.3653, + "step": 146840 + }, + { + "epoch": 0.2966462909618329, + "grad_norm": 444.3294372558594, + "learning_rate": 8.899968577342794e-06, + "loss": 12.0394, + "step": 146850 + }, + { + "epoch": 0.29666649159451675, + "grad_norm": 257.6813659667969, + "learning_rate": 8.89975012684649e-06, + "loss": 28.4348, + "step": 146860 + }, + { + "epoch": 0.29668669222720057, + "grad_norm": 249.75827026367188, + "learning_rate": 8.899531657343275e-06, + "loss": 19.7214, + "step": 146870 + }, + { + "epoch": 0.2967068928598844, + "grad_norm": 151.40345764160156, + "learning_rate": 8.899313168834216e-06, + "loss": 23.5436, + "step": 146880 + }, + { + "epoch": 0.2967270934925682, + "grad_norm": 457.6058044433594, + "learning_rate": 8.899094661320378e-06, + "loss": 20.3676, + "step": 146890 + }, + { + "epoch": 0.296747294125252, + "grad_norm": 564.66748046875, + "learning_rate": 8.898876134802827e-06, + "loss": 25.6532, + "step": 146900 + }, + { + "epoch": 0.29676749475793585, + "grad_norm": 54.622901916503906, + "learning_rate": 8.898657589282625e-06, + "loss": 26.233, + "step": 146910 + }, + { + "epoch": 0.2967876953906196, + "grad_norm": 814.7896118164062, + "learning_rate": 8.89843902476084e-06, + "loss": 24.8433, + "step": 146920 + }, + { + "epoch": 0.29680789602330343, + "grad_norm": 574.2201538085938, + "learning_rate": 8.898220441238534e-06, + "loss": 13.9479, + "step": 146930 + }, + { + "epoch": 0.29682809665598725, + "grad_norm": 289.9844665527344, + "learning_rate": 8.898001838716777e-06, + "loss": 18.3098, + "step": 146940 + }, + { + "epoch": 0.29684829728867107, + "grad_norm": 357.9109802246094, + "learning_rate": 8.897783217196629e-06, + "loss": 24.6636, + "step": 146950 + }, + { + "epoch": 0.2968684979213549, + "grad_norm": 343.20648193359375, + "learning_rate": 8.89756457667916e-06, + "loss": 23.2273, + "step": 146960 + }, + { + "epoch": 0.2968886985540387, + "grad_norm": 252.51998901367188, + "learning_rate": 8.897345917165434e-06, + "loss": 31.9909, + "step": 146970 + }, + { + "epoch": 0.29690889918672253, + "grad_norm": 548.3547973632812, + "learning_rate": 8.897127238656517e-06, + "loss": 13.3891, + "step": 146980 + }, + { + "epoch": 0.29692909981940635, + "grad_norm": 433.0122375488281, + "learning_rate": 8.896908541153475e-06, + "loss": 12.8016, + "step": 146990 + }, + { + "epoch": 0.29694930045209017, + "grad_norm": 140.67532348632812, + "learning_rate": 8.896689824657371e-06, + "loss": 14.628, + "step": 147000 + }, + { + "epoch": 0.296969501084774, + "grad_norm": 253.27105712890625, + "learning_rate": 8.896471089169275e-06, + "loss": 19.5172, + "step": 147010 + }, + { + "epoch": 0.2969897017174578, + "grad_norm": 1135.7210693359375, + "learning_rate": 8.896252334690251e-06, + "loss": 26.0944, + "step": 147020 + }, + { + "epoch": 0.29700990235014163, + "grad_norm": 180.94195556640625, + "learning_rate": 8.896033561221367e-06, + "loss": 21.1015, + "step": 147030 + }, + { + "epoch": 0.2970301029828254, + "grad_norm": 133.50790405273438, + "learning_rate": 8.895814768763686e-06, + "loss": 14.5759, + "step": 147040 + }, + { + "epoch": 0.2970503036155092, + "grad_norm": 666.6173706054688, + "learning_rate": 8.895595957318277e-06, + "loss": 30.8302, + "step": 147050 + }, + { + "epoch": 0.29707050424819303, + "grad_norm": 83.84063720703125, + "learning_rate": 8.895377126886206e-06, + "loss": 26.6543, + "step": 147060 + }, + { + "epoch": 0.29709070488087685, + "grad_norm": 358.62750244140625, + "learning_rate": 8.89515827746854e-06, + "loss": 23.4706, + "step": 147070 + }, + { + "epoch": 0.2971109055135607, + "grad_norm": 51.10934829711914, + "learning_rate": 8.894939409066344e-06, + "loss": 14.4996, + "step": 147080 + }, + { + "epoch": 0.2971311061462445, + "grad_norm": 561.810791015625, + "learning_rate": 8.894720521680687e-06, + "loss": 34.3612, + "step": 147090 + }, + { + "epoch": 0.2971513067789283, + "grad_norm": 375.919677734375, + "learning_rate": 8.894501615312633e-06, + "loss": 14.518, + "step": 147100 + }, + { + "epoch": 0.29717150741161213, + "grad_norm": 137.87319946289062, + "learning_rate": 8.894282689963252e-06, + "loss": 27.5433, + "step": 147110 + }, + { + "epoch": 0.29719170804429595, + "grad_norm": 270.0392761230469, + "learning_rate": 8.894063745633607e-06, + "loss": 15.0138, + "step": 147120 + }, + { + "epoch": 0.2972119086769798, + "grad_norm": 785.6463623046875, + "learning_rate": 8.89384478232477e-06, + "loss": 31.8076, + "step": 147130 + }, + { + "epoch": 0.2972321093096636, + "grad_norm": 504.37017822265625, + "learning_rate": 8.893625800037803e-06, + "loss": 34.1459, + "step": 147140 + }, + { + "epoch": 0.2972523099423474, + "grad_norm": 259.37835693359375, + "learning_rate": 8.89340679877378e-06, + "loss": 26.7039, + "step": 147150 + }, + { + "epoch": 0.29727251057503123, + "grad_norm": 425.8026123046875, + "learning_rate": 8.893187778533763e-06, + "loss": 31.1731, + "step": 147160 + }, + { + "epoch": 0.297292711207715, + "grad_norm": 129.9631805419922, + "learning_rate": 8.892968739318819e-06, + "loss": 41.2735, + "step": 147170 + }, + { + "epoch": 0.2973129118403988, + "grad_norm": 100.39238739013672, + "learning_rate": 8.89274968113002e-06, + "loss": 13.9167, + "step": 147180 + }, + { + "epoch": 0.29733311247308264, + "grad_norm": 529.4312744140625, + "learning_rate": 8.89253060396843e-06, + "loss": 37.019, + "step": 147190 + }, + { + "epoch": 0.29735331310576646, + "grad_norm": 103.83464050292969, + "learning_rate": 8.892311507835118e-06, + "loss": 18.8964, + "step": 147200 + }, + { + "epoch": 0.2973735137384503, + "grad_norm": 496.5546875, + "learning_rate": 8.892092392731152e-06, + "loss": 26.1287, + "step": 147210 + }, + { + "epoch": 0.2973937143711341, + "grad_norm": 546.2858276367188, + "learning_rate": 8.891873258657599e-06, + "loss": 28.8668, + "step": 147220 + }, + { + "epoch": 0.2974139150038179, + "grad_norm": 275.6580810546875, + "learning_rate": 8.891654105615528e-06, + "loss": 22.4762, + "step": 147230 + }, + { + "epoch": 0.29743411563650174, + "grad_norm": 370.995849609375, + "learning_rate": 8.891434933606009e-06, + "loss": 20.5459, + "step": 147240 + }, + { + "epoch": 0.29745431626918556, + "grad_norm": 343.0150451660156, + "learning_rate": 8.891215742630106e-06, + "loss": 13.0418, + "step": 147250 + }, + { + "epoch": 0.2974745169018694, + "grad_norm": 122.11812591552734, + "learning_rate": 8.890996532688889e-06, + "loss": 19.9959, + "step": 147260 + }, + { + "epoch": 0.2974947175345532, + "grad_norm": 338.8207702636719, + "learning_rate": 8.890777303783428e-06, + "loss": 20.0461, + "step": 147270 + }, + { + "epoch": 0.297514918167237, + "grad_norm": 336.1431884765625, + "learning_rate": 8.89055805591479e-06, + "loss": 18.2996, + "step": 147280 + }, + { + "epoch": 0.29753511879992084, + "grad_norm": 902.4846801757812, + "learning_rate": 8.890338789084043e-06, + "loss": 28.9435, + "step": 147290 + }, + { + "epoch": 0.2975553194326046, + "grad_norm": 1083.5321044921875, + "learning_rate": 8.890119503292258e-06, + "loss": 25.1867, + "step": 147300 + }, + { + "epoch": 0.2975755200652884, + "grad_norm": 0.0, + "learning_rate": 8.889900198540502e-06, + "loss": 19.9641, + "step": 147310 + }, + { + "epoch": 0.29759572069797224, + "grad_norm": 239.86764526367188, + "learning_rate": 8.889680874829845e-06, + "loss": 26.0744, + "step": 147320 + }, + { + "epoch": 0.29761592133065606, + "grad_norm": 229.78494262695312, + "learning_rate": 8.889461532161353e-06, + "loss": 23.5497, + "step": 147330 + }, + { + "epoch": 0.2976361219633399, + "grad_norm": 688.3148803710938, + "learning_rate": 8.889242170536099e-06, + "loss": 18.7986, + "step": 147340 + }, + { + "epoch": 0.2976563225960237, + "grad_norm": 238.5793914794922, + "learning_rate": 8.88902278995515e-06, + "loss": 22.7555, + "step": 147350 + }, + { + "epoch": 0.2976765232287075, + "grad_norm": 391.3150634765625, + "learning_rate": 8.888803390419576e-06, + "loss": 15.5628, + "step": 147360 + }, + { + "epoch": 0.29769672386139134, + "grad_norm": 465.0777282714844, + "learning_rate": 8.888583971930444e-06, + "loss": 34.3493, + "step": 147370 + }, + { + "epoch": 0.29771692449407516, + "grad_norm": 865.205078125, + "learning_rate": 8.888364534488828e-06, + "loss": 25.9289, + "step": 147380 + }, + { + "epoch": 0.297737125126759, + "grad_norm": 502.6608581542969, + "learning_rate": 8.888145078095794e-06, + "loss": 15.0483, + "step": 147390 + }, + { + "epoch": 0.2977573257594428, + "grad_norm": 227.12208557128906, + "learning_rate": 8.887925602752411e-06, + "loss": 13.7662, + "step": 147400 + }, + { + "epoch": 0.2977775263921266, + "grad_norm": 412.6221923828125, + "learning_rate": 8.887706108459751e-06, + "loss": 21.061, + "step": 147410 + }, + { + "epoch": 0.29779772702481044, + "grad_norm": 470.05987548828125, + "learning_rate": 8.887486595218884e-06, + "loss": 26.8712, + "step": 147420 + }, + { + "epoch": 0.2978179276574942, + "grad_norm": 443.9976501464844, + "learning_rate": 8.887267063030876e-06, + "loss": 18.5763, + "step": 147430 + }, + { + "epoch": 0.29783812829017803, + "grad_norm": 462.3858642578125, + "learning_rate": 8.887047511896803e-06, + "loss": 15.802, + "step": 147440 + }, + { + "epoch": 0.29785832892286185, + "grad_norm": 401.3211364746094, + "learning_rate": 8.886827941817731e-06, + "loss": 27.9149, + "step": 147450 + }, + { + "epoch": 0.29787852955554567, + "grad_norm": 356.8927917480469, + "learning_rate": 8.88660835279473e-06, + "loss": 24.4112, + "step": 147460 + }, + { + "epoch": 0.2978987301882295, + "grad_norm": 128.1566619873047, + "learning_rate": 8.886388744828872e-06, + "loss": 11.9876, + "step": 147470 + }, + { + "epoch": 0.2979189308209133, + "grad_norm": 1048.524169921875, + "learning_rate": 8.886169117921227e-06, + "loss": 57.3669, + "step": 147480 + }, + { + "epoch": 0.29793913145359713, + "grad_norm": 193.65866088867188, + "learning_rate": 8.885949472072864e-06, + "loss": 26.0427, + "step": 147490 + }, + { + "epoch": 0.29795933208628095, + "grad_norm": 240.7947998046875, + "learning_rate": 8.885729807284855e-06, + "loss": 18.3831, + "step": 147500 + }, + { + "epoch": 0.29797953271896477, + "grad_norm": 370.0865173339844, + "learning_rate": 8.88551012355827e-06, + "loss": 17.7053, + "step": 147510 + }, + { + "epoch": 0.2979997333516486, + "grad_norm": 282.04217529296875, + "learning_rate": 8.885290420894182e-06, + "loss": 15.4468, + "step": 147520 + }, + { + "epoch": 0.2980199339843324, + "grad_norm": 364.1242370605469, + "learning_rate": 8.885070699293656e-06, + "loss": 19.4559, + "step": 147530 + }, + { + "epoch": 0.29804013461701623, + "grad_norm": 586.850830078125, + "learning_rate": 8.884850958757769e-06, + "loss": 13.9707, + "step": 147540 + }, + { + "epoch": 0.29806033524970005, + "grad_norm": 236.7564697265625, + "learning_rate": 8.88463119928759e-06, + "loss": 28.6807, + "step": 147550 + }, + { + "epoch": 0.2980805358823838, + "grad_norm": 388.3937072753906, + "learning_rate": 8.88441142088419e-06, + "loss": 29.3031, + "step": 147560 + }, + { + "epoch": 0.29810073651506763, + "grad_norm": 646.3626098632812, + "learning_rate": 8.884191623548636e-06, + "loss": 30.6844, + "step": 147570 + }, + { + "epoch": 0.29812093714775145, + "grad_norm": 497.60528564453125, + "learning_rate": 8.883971807282007e-06, + "loss": 19.792, + "step": 147580 + }, + { + "epoch": 0.2981411377804353, + "grad_norm": 762.6091918945312, + "learning_rate": 8.88375197208537e-06, + "loss": 20.2638, + "step": 147590 + }, + { + "epoch": 0.2981613384131191, + "grad_norm": 59.60248947143555, + "learning_rate": 8.883532117959797e-06, + "loss": 47.3193, + "step": 147600 + }, + { + "epoch": 0.2981815390458029, + "grad_norm": 185.5955352783203, + "learning_rate": 8.883312244906358e-06, + "loss": 10.3251, + "step": 147610 + }, + { + "epoch": 0.29820173967848673, + "grad_norm": 255.9095458984375, + "learning_rate": 8.883092352926126e-06, + "loss": 28.2199, + "step": 147620 + }, + { + "epoch": 0.29822194031117055, + "grad_norm": 423.7047424316406, + "learning_rate": 8.882872442020174e-06, + "loss": 32.0141, + "step": 147630 + }, + { + "epoch": 0.2982421409438544, + "grad_norm": 435.6179504394531, + "learning_rate": 8.882652512189574e-06, + "loss": 17.5903, + "step": 147640 + }, + { + "epoch": 0.2982623415765382, + "grad_norm": 102.59506225585938, + "learning_rate": 8.882432563435394e-06, + "loss": 12.5239, + "step": 147650 + }, + { + "epoch": 0.298282542209222, + "grad_norm": 311.9666442871094, + "learning_rate": 8.88221259575871e-06, + "loss": 28.5527, + "step": 147660 + }, + { + "epoch": 0.29830274284190583, + "grad_norm": 383.766845703125, + "learning_rate": 8.881992609160592e-06, + "loss": 11.1076, + "step": 147670 + }, + { + "epoch": 0.2983229434745896, + "grad_norm": 399.8511657714844, + "learning_rate": 8.881772603642113e-06, + "loss": 23.4477, + "step": 147680 + }, + { + "epoch": 0.2983431441072734, + "grad_norm": 371.61968994140625, + "learning_rate": 8.881552579204345e-06, + "loss": 27.2733, + "step": 147690 + }, + { + "epoch": 0.29836334473995724, + "grad_norm": 425.9482116699219, + "learning_rate": 8.88133253584836e-06, + "loss": 28.8364, + "step": 147700 + }, + { + "epoch": 0.29838354537264106, + "grad_norm": 488.075927734375, + "learning_rate": 8.881112473575231e-06, + "loss": 33.4234, + "step": 147710 + }, + { + "epoch": 0.2984037460053249, + "grad_norm": 410.5538330078125, + "learning_rate": 8.880892392386032e-06, + "loss": 57.9526, + "step": 147720 + }, + { + "epoch": 0.2984239466380087, + "grad_norm": 73.84367370605469, + "learning_rate": 8.880672292281834e-06, + "loss": 17.895, + "step": 147730 + }, + { + "epoch": 0.2984441472706925, + "grad_norm": 290.07086181640625, + "learning_rate": 8.880452173263708e-06, + "loss": 17.2092, + "step": 147740 + }, + { + "epoch": 0.29846434790337634, + "grad_norm": 390.3843994140625, + "learning_rate": 8.88023203533273e-06, + "loss": 19.0085, + "step": 147750 + }, + { + "epoch": 0.29848454853606016, + "grad_norm": 766.4041748046875, + "learning_rate": 8.880011878489972e-06, + "loss": 20.1722, + "step": 147760 + }, + { + "epoch": 0.298504749168744, + "grad_norm": 318.4327392578125, + "learning_rate": 8.879791702736507e-06, + "loss": 22.6863, + "step": 147770 + }, + { + "epoch": 0.2985249498014278, + "grad_norm": 563.6470336914062, + "learning_rate": 8.879571508073407e-06, + "loss": 24.1672, + "step": 147780 + }, + { + "epoch": 0.2985451504341116, + "grad_norm": 455.07293701171875, + "learning_rate": 8.879351294501746e-06, + "loss": 18.4838, + "step": 147790 + }, + { + "epoch": 0.29856535106679544, + "grad_norm": 524.3452758789062, + "learning_rate": 8.879131062022598e-06, + "loss": 23.3109, + "step": 147800 + }, + { + "epoch": 0.2985855516994792, + "grad_norm": 765.2113037109375, + "learning_rate": 8.878910810637036e-06, + "loss": 22.2379, + "step": 147810 + }, + { + "epoch": 0.298605752332163, + "grad_norm": 489.6396484375, + "learning_rate": 8.878690540346132e-06, + "loss": 10.7837, + "step": 147820 + }, + { + "epoch": 0.29862595296484684, + "grad_norm": 752.63232421875, + "learning_rate": 8.878470251150959e-06, + "loss": 21.7687, + "step": 147830 + }, + { + "epoch": 0.29864615359753066, + "grad_norm": 952.3746337890625, + "learning_rate": 8.878249943052595e-06, + "loss": 39.7389, + "step": 147840 + }, + { + "epoch": 0.2986663542302145, + "grad_norm": 238.0981903076172, + "learning_rate": 8.87802961605211e-06, + "loss": 16.2032, + "step": 147850 + }, + { + "epoch": 0.2986865548628983, + "grad_norm": 119.70855712890625, + "learning_rate": 8.877809270150582e-06, + "loss": 17.705, + "step": 147860 + }, + { + "epoch": 0.2987067554955821, + "grad_norm": 541.710205078125, + "learning_rate": 8.877588905349079e-06, + "loss": 39.0836, + "step": 147870 + }, + { + "epoch": 0.29872695612826594, + "grad_norm": 1121.7535400390625, + "learning_rate": 8.877368521648678e-06, + "loss": 34.2635, + "step": 147880 + }, + { + "epoch": 0.29874715676094976, + "grad_norm": 455.7093505859375, + "learning_rate": 8.877148119050453e-06, + "loss": 19.5688, + "step": 147890 + }, + { + "epoch": 0.2987673573936336, + "grad_norm": 16.635032653808594, + "learning_rate": 8.87692769755548e-06, + "loss": 15.6266, + "step": 147900 + }, + { + "epoch": 0.2987875580263174, + "grad_norm": 498.6807556152344, + "learning_rate": 8.876707257164829e-06, + "loss": 17.2583, + "step": 147910 + }, + { + "epoch": 0.2988077586590012, + "grad_norm": 802.8692016601562, + "learning_rate": 8.87648679787958e-06, + "loss": 16.1728, + "step": 147920 + }, + { + "epoch": 0.29882795929168504, + "grad_norm": 298.41729736328125, + "learning_rate": 8.876266319700802e-06, + "loss": 13.2344, + "step": 147930 + }, + { + "epoch": 0.2988481599243688, + "grad_norm": 512.8638305664062, + "learning_rate": 8.876045822629573e-06, + "loss": 29.2116, + "step": 147940 + }, + { + "epoch": 0.2988683605570526, + "grad_norm": 811.17822265625, + "learning_rate": 8.875825306666968e-06, + "loss": 22.1756, + "step": 147950 + }, + { + "epoch": 0.29888856118973645, + "grad_norm": 91.45901489257812, + "learning_rate": 8.87560477181406e-06, + "loss": 24.5615, + "step": 147960 + }, + { + "epoch": 0.29890876182242027, + "grad_norm": 496.5906982421875, + "learning_rate": 8.875384218071923e-06, + "loss": 10.5712, + "step": 147970 + }, + { + "epoch": 0.2989289624551041, + "grad_norm": 80.80033874511719, + "learning_rate": 8.875163645441633e-06, + "loss": 21.25, + "step": 147980 + }, + { + "epoch": 0.2989491630877879, + "grad_norm": 359.9295654296875, + "learning_rate": 8.874943053924267e-06, + "loss": 13.4884, + "step": 147990 + }, + { + "epoch": 0.2989693637204717, + "grad_norm": 264.8037109375, + "learning_rate": 8.874722443520898e-06, + "loss": 30.6023, + "step": 148000 + }, + { + "epoch": 0.29898956435315555, + "grad_norm": 248.23143005371094, + "learning_rate": 8.874501814232603e-06, + "loss": 31.3907, + "step": 148010 + }, + { + "epoch": 0.29900976498583937, + "grad_norm": 447.3085021972656, + "learning_rate": 8.874281166060454e-06, + "loss": 18.9093, + "step": 148020 + }, + { + "epoch": 0.2990299656185232, + "grad_norm": 581.6533203125, + "learning_rate": 8.87406049900553e-06, + "loss": 18.1009, + "step": 148030 + }, + { + "epoch": 0.299050166251207, + "grad_norm": 369.7279357910156, + "learning_rate": 8.873839813068904e-06, + "loss": 12.6975, + "step": 148040 + }, + { + "epoch": 0.2990703668838908, + "grad_norm": 758.1390380859375, + "learning_rate": 8.873619108251654e-06, + "loss": 21.9412, + "step": 148050 + }, + { + "epoch": 0.29909056751657465, + "grad_norm": 0.9877247214317322, + "learning_rate": 8.873398384554852e-06, + "loss": 14.0648, + "step": 148060 + }, + { + "epoch": 0.2991107681492584, + "grad_norm": 266.4160461425781, + "learning_rate": 8.873177641979578e-06, + "loss": 20.1138, + "step": 148070 + }, + { + "epoch": 0.29913096878194223, + "grad_norm": 93.53788757324219, + "learning_rate": 8.872956880526906e-06, + "loss": 21.1116, + "step": 148080 + }, + { + "epoch": 0.29915116941462605, + "grad_norm": 732.2179565429688, + "learning_rate": 8.87273610019791e-06, + "loss": 29.8653, + "step": 148090 + }, + { + "epoch": 0.29917137004730987, + "grad_norm": 551.4814453125, + "learning_rate": 8.872515300993669e-06, + "loss": 17.968, + "step": 148100 + }, + { + "epoch": 0.2991915706799937, + "grad_norm": 377.6685791015625, + "learning_rate": 8.872294482915259e-06, + "loss": 18.0758, + "step": 148110 + }, + { + "epoch": 0.2992117713126775, + "grad_norm": 522.7916259765625, + "learning_rate": 8.872073645963755e-06, + "loss": 28.6316, + "step": 148120 + }, + { + "epoch": 0.29923197194536133, + "grad_norm": 286.47064208984375, + "learning_rate": 8.871852790140234e-06, + "loss": 16.396, + "step": 148130 + }, + { + "epoch": 0.29925217257804515, + "grad_norm": 335.798095703125, + "learning_rate": 8.87163191544577e-06, + "loss": 17.9535, + "step": 148140 + }, + { + "epoch": 0.29927237321072897, + "grad_norm": 338.4599304199219, + "learning_rate": 8.871411021881444e-06, + "loss": 23.8329, + "step": 148150 + }, + { + "epoch": 0.2992925738434128, + "grad_norm": 804.56787109375, + "learning_rate": 8.871190109448329e-06, + "loss": 31.3936, + "step": 148160 + }, + { + "epoch": 0.2993127744760966, + "grad_norm": 795.2534790039062, + "learning_rate": 8.870969178147502e-06, + "loss": 15.8334, + "step": 148170 + }, + { + "epoch": 0.29933297510878043, + "grad_norm": 404.1622009277344, + "learning_rate": 8.870748227980044e-06, + "loss": 21.6559, + "step": 148180 + }, + { + "epoch": 0.29935317574146425, + "grad_norm": 2237.97802734375, + "learning_rate": 8.870527258947025e-06, + "loss": 46.9274, + "step": 148190 + }, + { + "epoch": 0.299373376374148, + "grad_norm": 591.6390380859375, + "learning_rate": 8.870306271049527e-06, + "loss": 23.5487, + "step": 148200 + }, + { + "epoch": 0.29939357700683183, + "grad_norm": 681.0844116210938, + "learning_rate": 8.870085264288626e-06, + "loss": 28.6523, + "step": 148210 + }, + { + "epoch": 0.29941377763951565, + "grad_norm": 174.54888916015625, + "learning_rate": 8.869864238665398e-06, + "loss": 16.711, + "step": 148220 + }, + { + "epoch": 0.2994339782721995, + "grad_norm": 1209.55517578125, + "learning_rate": 8.86964319418092e-06, + "loss": 32.4493, + "step": 148230 + }, + { + "epoch": 0.2994541789048833, + "grad_norm": 856.8101806640625, + "learning_rate": 8.869422130836274e-06, + "loss": 44.8244, + "step": 148240 + }, + { + "epoch": 0.2994743795375671, + "grad_norm": 1115.9072265625, + "learning_rate": 8.869201048632531e-06, + "loss": 43.5113, + "step": 148250 + }, + { + "epoch": 0.29949458017025093, + "grad_norm": 380.28070068359375, + "learning_rate": 8.868979947570773e-06, + "loss": 21.9477, + "step": 148260 + }, + { + "epoch": 0.29951478080293475, + "grad_norm": 366.782958984375, + "learning_rate": 8.868758827652075e-06, + "loss": 17.2486, + "step": 148270 + }, + { + "epoch": 0.2995349814356186, + "grad_norm": 350.6100158691406, + "learning_rate": 8.868537688877516e-06, + "loss": 21.9206, + "step": 148280 + }, + { + "epoch": 0.2995551820683024, + "grad_norm": 352.701171875, + "learning_rate": 8.868316531248173e-06, + "loss": 24.8186, + "step": 148290 + }, + { + "epoch": 0.2995753827009862, + "grad_norm": 913.413818359375, + "learning_rate": 8.868095354765125e-06, + "loss": 17.7333, + "step": 148300 + }, + { + "epoch": 0.29959558333367003, + "grad_norm": 442.60107421875, + "learning_rate": 8.867874159429448e-06, + "loss": 17.0529, + "step": 148310 + }, + { + "epoch": 0.2996157839663538, + "grad_norm": 433.0865173339844, + "learning_rate": 8.867652945242225e-06, + "loss": 25.0527, + "step": 148320 + }, + { + "epoch": 0.2996359845990376, + "grad_norm": 532.3131713867188, + "learning_rate": 8.867431712204527e-06, + "loss": 24.9076, + "step": 148330 + }, + { + "epoch": 0.29965618523172144, + "grad_norm": 742.48876953125, + "learning_rate": 8.867210460317437e-06, + "loss": 31.1177, + "step": 148340 + }, + { + "epoch": 0.29967638586440526, + "grad_norm": 769.7986450195312, + "learning_rate": 8.866989189582033e-06, + "loss": 33.9652, + "step": 148350 + }, + { + "epoch": 0.2996965864970891, + "grad_norm": 303.25518798828125, + "learning_rate": 8.86676789999939e-06, + "loss": 16.3814, + "step": 148360 + }, + { + "epoch": 0.2997167871297729, + "grad_norm": 131.83396911621094, + "learning_rate": 8.866546591570593e-06, + "loss": 9.7804, + "step": 148370 + }, + { + "epoch": 0.2997369877624567, + "grad_norm": 61.998233795166016, + "learning_rate": 8.866325264296716e-06, + "loss": 15.5828, + "step": 148380 + }, + { + "epoch": 0.29975718839514054, + "grad_norm": 341.0106201171875, + "learning_rate": 8.866103918178837e-06, + "loss": 16.5429, + "step": 148390 + }, + { + "epoch": 0.29977738902782436, + "grad_norm": 355.2975769042969, + "learning_rate": 8.865882553218036e-06, + "loss": 20.0055, + "step": 148400 + }, + { + "epoch": 0.2997975896605082, + "grad_norm": 152.00953674316406, + "learning_rate": 8.865661169415396e-06, + "loss": 18.1553, + "step": 148410 + }, + { + "epoch": 0.299817790293192, + "grad_norm": 523.288330078125, + "learning_rate": 8.865439766771988e-06, + "loss": 13.4392, + "step": 148420 + }, + { + "epoch": 0.2998379909258758, + "grad_norm": 258.62744140625, + "learning_rate": 8.865218345288897e-06, + "loss": 16.0073, + "step": 148430 + }, + { + "epoch": 0.29985819155855964, + "grad_norm": 260.50823974609375, + "learning_rate": 8.864996904967202e-06, + "loss": 24.4141, + "step": 148440 + }, + { + "epoch": 0.2998783921912434, + "grad_norm": 297.6823425292969, + "learning_rate": 8.864775445807979e-06, + "loss": 24.0066, + "step": 148450 + }, + { + "epoch": 0.2998985928239272, + "grad_norm": 170.50486755371094, + "learning_rate": 8.86455396781231e-06, + "loss": 32.7914, + "step": 148460 + }, + { + "epoch": 0.29991879345661104, + "grad_norm": 201.24354553222656, + "learning_rate": 8.864332470981274e-06, + "loss": 18.1797, + "step": 148470 + }, + { + "epoch": 0.29993899408929486, + "grad_norm": 599.9772338867188, + "learning_rate": 8.86411095531595e-06, + "loss": 24.4635, + "step": 148480 + }, + { + "epoch": 0.2999591947219787, + "grad_norm": 25.860544204711914, + "learning_rate": 8.863889420817418e-06, + "loss": 20.8214, + "step": 148490 + }, + { + "epoch": 0.2999793953546625, + "grad_norm": 277.68365478515625, + "learning_rate": 8.863667867486756e-06, + "loss": 14.4374, + "step": 148500 + }, + { + "epoch": 0.2999995959873463, + "grad_norm": 1149.8858642578125, + "learning_rate": 8.863446295325047e-06, + "loss": 31.039, + "step": 148510 + }, + { + "epoch": 0.30001979662003014, + "grad_norm": 527.5618286132812, + "learning_rate": 8.863224704333368e-06, + "loss": 17.3079, + "step": 148520 + }, + { + "epoch": 0.30003999725271396, + "grad_norm": 315.8086242675781, + "learning_rate": 8.863003094512801e-06, + "loss": 18.0132, + "step": 148530 + }, + { + "epoch": 0.3000601978853978, + "grad_norm": 202.37364196777344, + "learning_rate": 8.862781465864427e-06, + "loss": 17.174, + "step": 148540 + }, + { + "epoch": 0.3000803985180816, + "grad_norm": 254.23915100097656, + "learning_rate": 8.862559818389322e-06, + "loss": 24.6349, + "step": 148550 + }, + { + "epoch": 0.3001005991507654, + "grad_norm": 620.2166748046875, + "learning_rate": 8.862338152088573e-06, + "loss": 31.1625, + "step": 148560 + }, + { + "epoch": 0.30012079978344924, + "grad_norm": 253.61141967773438, + "learning_rate": 8.862116466963251e-06, + "loss": 20.4655, + "step": 148570 + }, + { + "epoch": 0.300141000416133, + "grad_norm": 197.57196044921875, + "learning_rate": 8.861894763014444e-06, + "loss": 16.0904, + "step": 148580 + }, + { + "epoch": 0.30016120104881683, + "grad_norm": 282.3333435058594, + "learning_rate": 8.861673040243231e-06, + "loss": 26.0632, + "step": 148590 + }, + { + "epoch": 0.30018140168150065, + "grad_norm": 331.51220703125, + "learning_rate": 8.861451298650692e-06, + "loss": 22.1169, + "step": 148600 + }, + { + "epoch": 0.30020160231418447, + "grad_norm": 673.6158447265625, + "learning_rate": 8.861229538237908e-06, + "loss": 22.2989, + "step": 148610 + }, + { + "epoch": 0.3002218029468683, + "grad_norm": 127.45361328125, + "learning_rate": 8.861007759005959e-06, + "loss": 38.9359, + "step": 148620 + }, + { + "epoch": 0.3002420035795521, + "grad_norm": 285.5706787109375, + "learning_rate": 8.860785960955926e-06, + "loss": 36.556, + "step": 148630 + }, + { + "epoch": 0.30026220421223593, + "grad_norm": 534.787353515625, + "learning_rate": 8.860564144088891e-06, + "loss": 23.1304, + "step": 148640 + }, + { + "epoch": 0.30028240484491975, + "grad_norm": 253.01190185546875, + "learning_rate": 8.860342308405933e-06, + "loss": 46.5173, + "step": 148650 + }, + { + "epoch": 0.30030260547760357, + "grad_norm": 52.00938415527344, + "learning_rate": 8.860120453908138e-06, + "loss": 39.1426, + "step": 148660 + }, + { + "epoch": 0.3003228061102874, + "grad_norm": 403.021240234375, + "learning_rate": 8.859898580596581e-06, + "loss": 24.2871, + "step": 148670 + }, + { + "epoch": 0.3003430067429712, + "grad_norm": 446.07373046875, + "learning_rate": 8.859676688472349e-06, + "loss": 29.1645, + "step": 148680 + }, + { + "epoch": 0.30036320737565503, + "grad_norm": 36.96280288696289, + "learning_rate": 8.85945477753652e-06, + "loss": 22.4767, + "step": 148690 + }, + { + "epoch": 0.30038340800833885, + "grad_norm": 534.2410278320312, + "learning_rate": 8.859232847790175e-06, + "loss": 27.2594, + "step": 148700 + }, + { + "epoch": 0.3004036086410226, + "grad_norm": 391.3323059082031, + "learning_rate": 8.859010899234399e-06, + "loss": 14.8472, + "step": 148710 + }, + { + "epoch": 0.30042380927370643, + "grad_norm": 955.7097778320312, + "learning_rate": 8.85878893187027e-06, + "loss": 21.2886, + "step": 148720 + }, + { + "epoch": 0.30044400990639025, + "grad_norm": 309.7611389160156, + "learning_rate": 8.858566945698874e-06, + "loss": 25.0879, + "step": 148730 + }, + { + "epoch": 0.3004642105390741, + "grad_norm": 423.8357849121094, + "learning_rate": 8.858344940721291e-06, + "loss": 32.2133, + "step": 148740 + }, + { + "epoch": 0.3004844111717579, + "grad_norm": 368.7214660644531, + "learning_rate": 8.858122916938601e-06, + "loss": 27.5963, + "step": 148750 + }, + { + "epoch": 0.3005046118044417, + "grad_norm": 1165.051025390625, + "learning_rate": 8.857900874351888e-06, + "loss": 13.9224, + "step": 148760 + }, + { + "epoch": 0.30052481243712553, + "grad_norm": 721.8985595703125, + "learning_rate": 8.857678812962234e-06, + "loss": 40.7466, + "step": 148770 + }, + { + "epoch": 0.30054501306980935, + "grad_norm": 1.9701683521270752, + "learning_rate": 8.857456732770723e-06, + "loss": 12.1633, + "step": 148780 + }, + { + "epoch": 0.3005652137024932, + "grad_norm": 347.9275207519531, + "learning_rate": 8.857234633778434e-06, + "loss": 26.421, + "step": 148790 + }, + { + "epoch": 0.300585414335177, + "grad_norm": 676.7006225585938, + "learning_rate": 8.857012515986452e-06, + "loss": 20.2109, + "step": 148800 + }, + { + "epoch": 0.3006056149678608, + "grad_norm": 375.9146728515625, + "learning_rate": 8.856790379395858e-06, + "loss": 18.1709, + "step": 148810 + }, + { + "epoch": 0.30062581560054463, + "grad_norm": 849.3779907226562, + "learning_rate": 8.856568224007736e-06, + "loss": 29.7461, + "step": 148820 + }, + { + "epoch": 0.3006460162332284, + "grad_norm": 428.88885498046875, + "learning_rate": 8.856346049823169e-06, + "loss": 20.8408, + "step": 148830 + }, + { + "epoch": 0.3006662168659122, + "grad_norm": 131.7697296142578, + "learning_rate": 8.856123856843236e-06, + "loss": 25.3181, + "step": 148840 + }, + { + "epoch": 0.30068641749859604, + "grad_norm": 74.23992156982422, + "learning_rate": 8.855901645069026e-06, + "loss": 33.1354, + "step": 148850 + }, + { + "epoch": 0.30070661813127986, + "grad_norm": 176.134521484375, + "learning_rate": 8.855679414501619e-06, + "loss": 16.2658, + "step": 148860 + }, + { + "epoch": 0.3007268187639637, + "grad_norm": 615.5466918945312, + "learning_rate": 8.855457165142096e-06, + "loss": 13.6649, + "step": 148870 + }, + { + "epoch": 0.3007470193966475, + "grad_norm": 1026.47607421875, + "learning_rate": 8.855234896991544e-06, + "loss": 36.0267, + "step": 148880 + }, + { + "epoch": 0.3007672200293313, + "grad_norm": 62.51200866699219, + "learning_rate": 8.855012610051045e-06, + "loss": 20.6455, + "step": 148890 + }, + { + "epoch": 0.30078742066201514, + "grad_norm": 173.38348388671875, + "learning_rate": 8.854790304321682e-06, + "loss": 12.8308, + "step": 148900 + }, + { + "epoch": 0.30080762129469896, + "grad_norm": 337.7551574707031, + "learning_rate": 8.854567979804538e-06, + "loss": 14.6456, + "step": 148910 + }, + { + "epoch": 0.3008278219273828, + "grad_norm": 418.7915954589844, + "learning_rate": 8.854345636500698e-06, + "loss": 27.0897, + "step": 148920 + }, + { + "epoch": 0.3008480225600666, + "grad_norm": 219.07815551757812, + "learning_rate": 8.854123274411243e-06, + "loss": 17.9353, + "step": 148930 + }, + { + "epoch": 0.3008682231927504, + "grad_norm": 667.05419921875, + "learning_rate": 8.85390089353726e-06, + "loss": 20.7854, + "step": 148940 + }, + { + "epoch": 0.30088842382543424, + "grad_norm": 573.677978515625, + "learning_rate": 8.853678493879832e-06, + "loss": 39.0861, + "step": 148950 + }, + { + "epoch": 0.300908624458118, + "grad_norm": 249.09042358398438, + "learning_rate": 8.853456075440041e-06, + "loss": 19.4665, + "step": 148960 + }, + { + "epoch": 0.3009288250908018, + "grad_norm": 94.33660888671875, + "learning_rate": 8.853233638218974e-06, + "loss": 26.8469, + "step": 148970 + }, + { + "epoch": 0.30094902572348564, + "grad_norm": 193.69461059570312, + "learning_rate": 8.853011182217712e-06, + "loss": 23.3123, + "step": 148980 + }, + { + "epoch": 0.30096922635616946, + "grad_norm": 241.58082580566406, + "learning_rate": 8.852788707437343e-06, + "loss": 17.7294, + "step": 148990 + }, + { + "epoch": 0.3009894269888533, + "grad_norm": 17.932788848876953, + "learning_rate": 8.852566213878947e-06, + "loss": 18.8882, + "step": 149000 + }, + { + "epoch": 0.3010096276215371, + "grad_norm": 572.9925537109375, + "learning_rate": 8.852343701543611e-06, + "loss": 16.8183, + "step": 149010 + }, + { + "epoch": 0.3010298282542209, + "grad_norm": 693.1661987304688, + "learning_rate": 8.85212117043242e-06, + "loss": 34.758, + "step": 149020 + }, + { + "epoch": 0.30105002888690474, + "grad_norm": 305.8961181640625, + "learning_rate": 8.851898620546456e-06, + "loss": 16.6461, + "step": 149030 + }, + { + "epoch": 0.30107022951958856, + "grad_norm": 335.46087646484375, + "learning_rate": 8.851676051886805e-06, + "loss": 28.4095, + "step": 149040 + }, + { + "epoch": 0.3010904301522724, + "grad_norm": 290.3029479980469, + "learning_rate": 8.851453464454555e-06, + "loss": 57.3568, + "step": 149050 + }, + { + "epoch": 0.3011106307849562, + "grad_norm": 184.10365295410156, + "learning_rate": 8.851230858250785e-06, + "loss": 14.5301, + "step": 149060 + }, + { + "epoch": 0.30113083141764, + "grad_norm": 259.8624572753906, + "learning_rate": 8.851008233276586e-06, + "loss": 24.3177, + "step": 149070 + }, + { + "epoch": 0.30115103205032384, + "grad_norm": 420.3094787597656, + "learning_rate": 8.850785589533038e-06, + "loss": 21.9963, + "step": 149080 + }, + { + "epoch": 0.3011712326830076, + "grad_norm": 785.2965698242188, + "learning_rate": 8.850562927021227e-06, + "loss": 22.0542, + "step": 149090 + }, + { + "epoch": 0.3011914333156914, + "grad_norm": 389.8663635253906, + "learning_rate": 8.85034024574224e-06, + "loss": 26.2576, + "step": 149100 + }, + { + "epoch": 0.30121163394837525, + "grad_norm": 51.002445220947266, + "learning_rate": 8.850117545697163e-06, + "loss": 24.099, + "step": 149110 + }, + { + "epoch": 0.30123183458105907, + "grad_norm": 1020.509765625, + "learning_rate": 8.849894826887078e-06, + "loss": 27.3531, + "step": 149120 + }, + { + "epoch": 0.3012520352137429, + "grad_norm": 355.1165466308594, + "learning_rate": 8.849672089313074e-06, + "loss": 17.6457, + "step": 149130 + }, + { + "epoch": 0.3012722358464267, + "grad_norm": 596.9937744140625, + "learning_rate": 8.849449332976235e-06, + "loss": 28.6967, + "step": 149140 + }, + { + "epoch": 0.3012924364791105, + "grad_norm": 451.7833251953125, + "learning_rate": 8.849226557877647e-06, + "loss": 22.5846, + "step": 149150 + }, + { + "epoch": 0.30131263711179435, + "grad_norm": 796.8441772460938, + "learning_rate": 8.849003764018395e-06, + "loss": 20.1726, + "step": 149160 + }, + { + "epoch": 0.30133283774447817, + "grad_norm": 398.1156311035156, + "learning_rate": 8.848780951399566e-06, + "loss": 22.2843, + "step": 149170 + }, + { + "epoch": 0.301353038377162, + "grad_norm": 278.8240661621094, + "learning_rate": 8.848558120022246e-06, + "loss": 17.262, + "step": 149180 + }, + { + "epoch": 0.3013732390098458, + "grad_norm": 442.8303527832031, + "learning_rate": 8.84833526988752e-06, + "loss": 22.3545, + "step": 149190 + }, + { + "epoch": 0.3013934396425296, + "grad_norm": 583.061767578125, + "learning_rate": 8.848112400996473e-06, + "loss": 29.7904, + "step": 149200 + }, + { + "epoch": 0.30141364027521345, + "grad_norm": 318.1643371582031, + "learning_rate": 8.847889513350195e-06, + "loss": 19.487, + "step": 149210 + }, + { + "epoch": 0.3014338409078972, + "grad_norm": 699.6163940429688, + "learning_rate": 8.847666606949768e-06, + "loss": 34.3073, + "step": 149220 + }, + { + "epoch": 0.30145404154058103, + "grad_norm": 454.4139709472656, + "learning_rate": 8.847443681796283e-06, + "loss": 25.9923, + "step": 149230 + }, + { + "epoch": 0.30147424217326485, + "grad_norm": 275.9908752441406, + "learning_rate": 8.847220737890823e-06, + "loss": 16.7415, + "step": 149240 + }, + { + "epoch": 0.30149444280594867, + "grad_norm": 469.0674133300781, + "learning_rate": 8.846997775234476e-06, + "loss": 16.5077, + "step": 149250 + }, + { + "epoch": 0.3015146434386325, + "grad_norm": 214.13780212402344, + "learning_rate": 8.846774793828328e-06, + "loss": 23.2627, + "step": 149260 + }, + { + "epoch": 0.3015348440713163, + "grad_norm": 91.90431213378906, + "learning_rate": 8.846551793673467e-06, + "loss": 14.4711, + "step": 149270 + }, + { + "epoch": 0.30155504470400013, + "grad_norm": 679.1061401367188, + "learning_rate": 8.846328774770978e-06, + "loss": 26.7924, + "step": 149280 + }, + { + "epoch": 0.30157524533668395, + "grad_norm": 333.43511962890625, + "learning_rate": 8.84610573712195e-06, + "loss": 30.4296, + "step": 149290 + }, + { + "epoch": 0.30159544596936777, + "grad_norm": 172.83203125, + "learning_rate": 8.84588268072747e-06, + "loss": 21.8123, + "step": 149300 + }, + { + "epoch": 0.3016156466020516, + "grad_norm": 50.759910583496094, + "learning_rate": 8.845659605588622e-06, + "loss": 10.55, + "step": 149310 + }, + { + "epoch": 0.3016358472347354, + "grad_norm": 423.1531677246094, + "learning_rate": 8.845436511706497e-06, + "loss": 27.9608, + "step": 149320 + }, + { + "epoch": 0.30165604786741923, + "grad_norm": 1031.787841796875, + "learning_rate": 8.84521339908218e-06, + "loss": 21.9223, + "step": 149330 + }, + { + "epoch": 0.30167624850010305, + "grad_norm": 473.7039489746094, + "learning_rate": 8.84499026771676e-06, + "loss": 37.165, + "step": 149340 + }, + { + "epoch": 0.3016964491327868, + "grad_norm": 576.4017944335938, + "learning_rate": 8.844767117611324e-06, + "loss": 26.5359, + "step": 149350 + }, + { + "epoch": 0.30171664976547063, + "grad_norm": 297.1952209472656, + "learning_rate": 8.844543948766958e-06, + "loss": 15.7599, + "step": 149360 + }, + { + "epoch": 0.30173685039815445, + "grad_norm": 466.27398681640625, + "learning_rate": 8.844320761184753e-06, + "loss": 15.319, + "step": 149370 + }, + { + "epoch": 0.3017570510308383, + "grad_norm": 417.2751770019531, + "learning_rate": 8.844097554865792e-06, + "loss": 27.3471, + "step": 149380 + }, + { + "epoch": 0.3017772516635221, + "grad_norm": 739.0093994140625, + "learning_rate": 8.84387432981117e-06, + "loss": 21.3136, + "step": 149390 + }, + { + "epoch": 0.3017974522962059, + "grad_norm": 340.4967956542969, + "learning_rate": 8.843651086021966e-06, + "loss": 17.8877, + "step": 149400 + }, + { + "epoch": 0.30181765292888973, + "grad_norm": 435.0361633300781, + "learning_rate": 8.843427823499275e-06, + "loss": 13.4663, + "step": 149410 + }, + { + "epoch": 0.30183785356157355, + "grad_norm": 152.16822814941406, + "learning_rate": 8.843204542244184e-06, + "loss": 16.7841, + "step": 149420 + }, + { + "epoch": 0.3018580541942574, + "grad_norm": 545.1543579101562, + "learning_rate": 8.842981242257779e-06, + "loss": 12.907, + "step": 149430 + }, + { + "epoch": 0.3018782548269412, + "grad_norm": 474.6297912597656, + "learning_rate": 8.84275792354115e-06, + "loss": 26.8501, + "step": 149440 + }, + { + "epoch": 0.301898455459625, + "grad_norm": 616.8176879882812, + "learning_rate": 8.842534586095383e-06, + "loss": 25.1576, + "step": 149450 + }, + { + "epoch": 0.30191865609230883, + "grad_norm": 318.8073425292969, + "learning_rate": 8.842311229921571e-06, + "loss": 17.5954, + "step": 149460 + }, + { + "epoch": 0.3019388567249926, + "grad_norm": 375.3653869628906, + "learning_rate": 8.8420878550208e-06, + "loss": 21.3203, + "step": 149470 + }, + { + "epoch": 0.3019590573576764, + "grad_norm": 268.08160400390625, + "learning_rate": 8.841864461394158e-06, + "loss": 24.1415, + "step": 149480 + }, + { + "epoch": 0.30197925799036024, + "grad_norm": 27.583419799804688, + "learning_rate": 8.841641049042732e-06, + "loss": 34.7693, + "step": 149490 + }, + { + "epoch": 0.30199945862304406, + "grad_norm": 709.3275146484375, + "learning_rate": 8.841417617967618e-06, + "loss": 42.0899, + "step": 149500 + }, + { + "epoch": 0.3020196592557279, + "grad_norm": 278.5901794433594, + "learning_rate": 8.841194168169897e-06, + "loss": 16.8193, + "step": 149510 + }, + { + "epoch": 0.3020398598884117, + "grad_norm": 536.4020385742188, + "learning_rate": 8.840970699650665e-06, + "loss": 22.6882, + "step": 149520 + }, + { + "epoch": 0.3020600605210955, + "grad_norm": 147.03111267089844, + "learning_rate": 8.840747212411005e-06, + "loss": 22.2375, + "step": 149530 + }, + { + "epoch": 0.30208026115377934, + "grad_norm": 389.63531494140625, + "learning_rate": 8.84052370645201e-06, + "loss": 41.2673, + "step": 149540 + }, + { + "epoch": 0.30210046178646316, + "grad_norm": 603.1446533203125, + "learning_rate": 8.840300181774767e-06, + "loss": 20.7329, + "step": 149550 + }, + { + "epoch": 0.302120662419147, + "grad_norm": 198.4102020263672, + "learning_rate": 8.840076638380368e-06, + "loss": 23.4251, + "step": 149560 + }, + { + "epoch": 0.3021408630518308, + "grad_norm": 312.51983642578125, + "learning_rate": 8.8398530762699e-06, + "loss": 11.6308, + "step": 149570 + }, + { + "epoch": 0.3021610636845146, + "grad_norm": 326.4072265625, + "learning_rate": 8.839629495444455e-06, + "loss": 25.1864, + "step": 149580 + }, + { + "epoch": 0.30218126431719844, + "grad_norm": 297.5825500488281, + "learning_rate": 8.83940589590512e-06, + "loss": 24.1519, + "step": 149590 + }, + { + "epoch": 0.3022014649498822, + "grad_norm": 606.0067749023438, + "learning_rate": 8.83918227765299e-06, + "loss": 14.6197, + "step": 149600 + }, + { + "epoch": 0.302221665582566, + "grad_norm": 760.7696533203125, + "learning_rate": 8.838958640689146e-06, + "loss": 20.4481, + "step": 149610 + }, + { + "epoch": 0.30224186621524984, + "grad_norm": 314.8655700683594, + "learning_rate": 8.838734985014686e-06, + "loss": 14.067, + "step": 149620 + }, + { + "epoch": 0.30226206684793366, + "grad_norm": 378.96044921875, + "learning_rate": 8.838511310630697e-06, + "loss": 15.6507, + "step": 149630 + }, + { + "epoch": 0.3022822674806175, + "grad_norm": 649.549560546875, + "learning_rate": 8.83828761753827e-06, + "loss": 13.1199, + "step": 149640 + }, + { + "epoch": 0.3023024681133013, + "grad_norm": 782.0072631835938, + "learning_rate": 8.838063905738495e-06, + "loss": 34.4812, + "step": 149650 + }, + { + "epoch": 0.3023226687459851, + "grad_norm": 651.4970092773438, + "learning_rate": 8.83784017523246e-06, + "loss": 16.3783, + "step": 149660 + }, + { + "epoch": 0.30234286937866894, + "grad_norm": 219.46450805664062, + "learning_rate": 8.837616426021259e-06, + "loss": 30.7405, + "step": 149670 + }, + { + "epoch": 0.30236307001135276, + "grad_norm": 335.22442626953125, + "learning_rate": 8.837392658105981e-06, + "loss": 15.6363, + "step": 149680 + }, + { + "epoch": 0.3023832706440366, + "grad_norm": 191.90707397460938, + "learning_rate": 8.837168871487715e-06, + "loss": 21.2069, + "step": 149690 + }, + { + "epoch": 0.3024034712767204, + "grad_norm": 288.7420959472656, + "learning_rate": 8.836945066167556e-06, + "loss": 21.0821, + "step": 149700 + }, + { + "epoch": 0.3024236719094042, + "grad_norm": 437.4322509765625, + "learning_rate": 8.83672124214659e-06, + "loss": 48.1713, + "step": 149710 + }, + { + "epoch": 0.30244387254208804, + "grad_norm": 1047.5965576171875, + "learning_rate": 8.83649739942591e-06, + "loss": 14.8326, + "step": 149720 + }, + { + "epoch": 0.3024640731747718, + "grad_norm": 359.870849609375, + "learning_rate": 8.836273538006608e-06, + "loss": 19.7465, + "step": 149730 + }, + { + "epoch": 0.30248427380745563, + "grad_norm": 127.4852523803711, + "learning_rate": 8.836049657889774e-06, + "loss": 24.2413, + "step": 149740 + }, + { + "epoch": 0.30250447444013945, + "grad_norm": 557.8663330078125, + "learning_rate": 8.8358257590765e-06, + "loss": 16.4603, + "step": 149750 + }, + { + "epoch": 0.30252467507282327, + "grad_norm": 460.479248046875, + "learning_rate": 8.835601841567874e-06, + "loss": 18.994, + "step": 149760 + }, + { + "epoch": 0.3025448757055071, + "grad_norm": 961.6781616210938, + "learning_rate": 8.835377905364992e-06, + "loss": 11.2767, + "step": 149770 + }, + { + "epoch": 0.3025650763381909, + "grad_norm": 1103.5975341796875, + "learning_rate": 8.835153950468943e-06, + "loss": 45.3997, + "step": 149780 + }, + { + "epoch": 0.30258527697087473, + "grad_norm": 306.8983459472656, + "learning_rate": 8.834929976880818e-06, + "loss": 22.383, + "step": 149790 + }, + { + "epoch": 0.30260547760355855, + "grad_norm": 438.7554016113281, + "learning_rate": 8.834705984601708e-06, + "loss": 15.0056, + "step": 149800 + }, + { + "epoch": 0.30262567823624237, + "grad_norm": 559.1272583007812, + "learning_rate": 8.834481973632709e-06, + "loss": 27.3405, + "step": 149810 + }, + { + "epoch": 0.3026458788689262, + "grad_norm": 867.9369506835938, + "learning_rate": 8.83425794397491e-06, + "loss": 19.3661, + "step": 149820 + }, + { + "epoch": 0.30266607950161, + "grad_norm": 116.16118621826172, + "learning_rate": 8.8340338956294e-06, + "loss": 21.6849, + "step": 149830 + }, + { + "epoch": 0.30268628013429383, + "grad_norm": 685.396240234375, + "learning_rate": 8.833809828597275e-06, + "loss": 10.8325, + "step": 149840 + }, + { + "epoch": 0.30270648076697765, + "grad_norm": 608.677978515625, + "learning_rate": 8.833585742879627e-06, + "loss": 22.01, + "step": 149850 + }, + { + "epoch": 0.3027266813996614, + "grad_norm": 234.95603942871094, + "learning_rate": 8.833361638477546e-06, + "loss": 25.5733, + "step": 149860 + }, + { + "epoch": 0.30274688203234523, + "grad_norm": 517.9550170898438, + "learning_rate": 8.833137515392125e-06, + "loss": 52.3825, + "step": 149870 + }, + { + "epoch": 0.30276708266502905, + "grad_norm": 1143.35498046875, + "learning_rate": 8.832913373624458e-06, + "loss": 25.8712, + "step": 149880 + }, + { + "epoch": 0.3027872832977129, + "grad_norm": 573.56396484375, + "learning_rate": 8.832689213175636e-06, + "loss": 25.1794, + "step": 149890 + }, + { + "epoch": 0.3028074839303967, + "grad_norm": 238.27435302734375, + "learning_rate": 8.83246503404675e-06, + "loss": 17.7551, + "step": 149900 + }, + { + "epoch": 0.3028276845630805, + "grad_norm": 483.5244445800781, + "learning_rate": 8.832240836238894e-06, + "loss": 24.2964, + "step": 149910 + }, + { + "epoch": 0.30284788519576433, + "grad_norm": 274.0011291503906, + "learning_rate": 8.832016619753164e-06, + "loss": 30.2346, + "step": 149920 + }, + { + "epoch": 0.30286808582844815, + "grad_norm": 139.33047485351562, + "learning_rate": 8.831792384590646e-06, + "loss": 18.8885, + "step": 149930 + }, + { + "epoch": 0.302888286461132, + "grad_norm": 232.16989135742188, + "learning_rate": 8.831568130752439e-06, + "loss": 21.3329, + "step": 149940 + }, + { + "epoch": 0.3029084870938158, + "grad_norm": 214.23403930664062, + "learning_rate": 8.831343858239634e-06, + "loss": 10.0199, + "step": 149950 + }, + { + "epoch": 0.3029286877264996, + "grad_norm": 383.21649169921875, + "learning_rate": 8.831119567053323e-06, + "loss": 21.5086, + "step": 149960 + }, + { + "epoch": 0.30294888835918343, + "grad_norm": 685.4547119140625, + "learning_rate": 8.8308952571946e-06, + "loss": 30.082, + "step": 149970 + }, + { + "epoch": 0.30296908899186725, + "grad_norm": 372.8331298828125, + "learning_rate": 8.830670928664558e-06, + "loss": 19.3065, + "step": 149980 + }, + { + "epoch": 0.302989289624551, + "grad_norm": 211.326904296875, + "learning_rate": 8.83044658146429e-06, + "loss": 20.4998, + "step": 149990 + }, + { + "epoch": 0.30300949025723484, + "grad_norm": 581.1596069335938, + "learning_rate": 8.83022221559489e-06, + "loss": 16.4117, + "step": 150000 + }, + { + "epoch": 0.30302969088991866, + "grad_norm": 486.1495056152344, + "learning_rate": 8.829997831057454e-06, + "loss": 33.2905, + "step": 150010 + }, + { + "epoch": 0.3030498915226025, + "grad_norm": 880.4935302734375, + "learning_rate": 8.82977342785307e-06, + "loss": 48.0187, + "step": 150020 + }, + { + "epoch": 0.3030700921552863, + "grad_norm": 184.04718017578125, + "learning_rate": 8.829549005982836e-06, + "loss": 24.734, + "step": 150030 + }, + { + "epoch": 0.3030902927879701, + "grad_norm": 627.1402587890625, + "learning_rate": 8.829324565447844e-06, + "loss": 14.1204, + "step": 150040 + }, + { + "epoch": 0.30311049342065394, + "grad_norm": 205.56370544433594, + "learning_rate": 8.829100106249189e-06, + "loss": 21.3699, + "step": 150050 + }, + { + "epoch": 0.30313069405333776, + "grad_norm": 785.6224975585938, + "learning_rate": 8.828875628387964e-06, + "loss": 19.8302, + "step": 150060 + }, + { + "epoch": 0.3031508946860216, + "grad_norm": 745.4777221679688, + "learning_rate": 8.828651131865264e-06, + "loss": 11.0934, + "step": 150070 + }, + { + "epoch": 0.3031710953187054, + "grad_norm": 432.5574035644531, + "learning_rate": 8.828426616682184e-06, + "loss": 25.6614, + "step": 150080 + }, + { + "epoch": 0.3031912959513892, + "grad_norm": 634.8599243164062, + "learning_rate": 8.828202082839816e-06, + "loss": 30.2482, + "step": 150090 + }, + { + "epoch": 0.30321149658407304, + "grad_norm": 481.1977233886719, + "learning_rate": 8.827977530339254e-06, + "loss": 15.1406, + "step": 150100 + }, + { + "epoch": 0.3032316972167568, + "grad_norm": 526.1364135742188, + "learning_rate": 8.827752959181594e-06, + "loss": 31.77, + "step": 150110 + }, + { + "epoch": 0.3032518978494406, + "grad_norm": 54.602298736572266, + "learning_rate": 8.827528369367932e-06, + "loss": 20.0991, + "step": 150120 + }, + { + "epoch": 0.30327209848212444, + "grad_norm": 231.0094757080078, + "learning_rate": 8.82730376089936e-06, + "loss": 34.4267, + "step": 150130 + }, + { + "epoch": 0.30329229911480826, + "grad_norm": 710.7511596679688, + "learning_rate": 8.827079133776975e-06, + "loss": 43.0145, + "step": 150140 + }, + { + "epoch": 0.3033124997474921, + "grad_norm": 449.840576171875, + "learning_rate": 8.826854488001869e-06, + "loss": 21.4409, + "step": 150150 + }, + { + "epoch": 0.3033327003801759, + "grad_norm": 290.668212890625, + "learning_rate": 8.826629823575138e-06, + "loss": 19.9206, + "step": 150160 + }, + { + "epoch": 0.3033529010128597, + "grad_norm": 330.00701904296875, + "learning_rate": 8.826405140497878e-06, + "loss": 19.1429, + "step": 150170 + }, + { + "epoch": 0.30337310164554354, + "grad_norm": 497.4096984863281, + "learning_rate": 8.826180438771184e-06, + "loss": 12.3778, + "step": 150180 + }, + { + "epoch": 0.30339330227822736, + "grad_norm": 344.986572265625, + "learning_rate": 8.82595571839615e-06, + "loss": 19.5334, + "step": 150190 + }, + { + "epoch": 0.3034135029109112, + "grad_norm": 323.72918701171875, + "learning_rate": 8.825730979373873e-06, + "loss": 21.6462, + "step": 150200 + }, + { + "epoch": 0.303433703543595, + "grad_norm": 276.95745849609375, + "learning_rate": 8.825506221705445e-06, + "loss": 30.4611, + "step": 150210 + }, + { + "epoch": 0.3034539041762788, + "grad_norm": 318.9707336425781, + "learning_rate": 8.825281445391965e-06, + "loss": 29.1202, + "step": 150220 + }, + { + "epoch": 0.30347410480896264, + "grad_norm": 257.0148010253906, + "learning_rate": 8.825056650434528e-06, + "loss": 31.8865, + "step": 150230 + }, + { + "epoch": 0.3034943054416464, + "grad_norm": 279.8083801269531, + "learning_rate": 8.824831836834227e-06, + "loss": 41.3482, + "step": 150240 + }, + { + "epoch": 0.3035145060743302, + "grad_norm": 881.94580078125, + "learning_rate": 8.824607004592161e-06, + "loss": 40.3344, + "step": 150250 + }, + { + "epoch": 0.30353470670701405, + "grad_norm": 513.8170776367188, + "learning_rate": 8.824382153709423e-06, + "loss": 10.2271, + "step": 150260 + }, + { + "epoch": 0.30355490733969787, + "grad_norm": 231.94570922851562, + "learning_rate": 8.824157284187111e-06, + "loss": 12.5738, + "step": 150270 + }, + { + "epoch": 0.3035751079723817, + "grad_norm": 339.27716064453125, + "learning_rate": 8.82393239602632e-06, + "loss": 15.0772, + "step": 150280 + }, + { + "epoch": 0.3035953086050655, + "grad_norm": 293.11962890625, + "learning_rate": 8.823707489228145e-06, + "loss": 27.0696, + "step": 150290 + }, + { + "epoch": 0.3036155092377493, + "grad_norm": 374.0193176269531, + "learning_rate": 8.823482563793687e-06, + "loss": 22.8484, + "step": 150300 + }, + { + "epoch": 0.30363570987043315, + "grad_norm": 483.96160888671875, + "learning_rate": 8.823257619724036e-06, + "loss": 15.1993, + "step": 150310 + }, + { + "epoch": 0.30365591050311697, + "grad_norm": 179.94659423828125, + "learning_rate": 8.82303265702029e-06, + "loss": 11.1126, + "step": 150320 + }, + { + "epoch": 0.3036761111358008, + "grad_norm": 181.8113555908203, + "learning_rate": 8.82280767568355e-06, + "loss": 13.8506, + "step": 150330 + }, + { + "epoch": 0.3036963117684846, + "grad_norm": 278.054931640625, + "learning_rate": 8.822582675714906e-06, + "loss": 29.6194, + "step": 150340 + }, + { + "epoch": 0.3037165124011684, + "grad_norm": 674.2128295898438, + "learning_rate": 8.82235765711546e-06, + "loss": 32.5813, + "step": 150350 + }, + { + "epoch": 0.30373671303385225, + "grad_norm": 388.39276123046875, + "learning_rate": 8.822132619886303e-06, + "loss": 14.978, + "step": 150360 + }, + { + "epoch": 0.303756913666536, + "grad_norm": 140.5186309814453, + "learning_rate": 8.821907564028538e-06, + "loss": 16.2977, + "step": 150370 + }, + { + "epoch": 0.30377711429921983, + "grad_norm": 503.03759765625, + "learning_rate": 8.821682489543259e-06, + "loss": 42.961, + "step": 150380 + }, + { + "epoch": 0.30379731493190365, + "grad_norm": 1607.3330078125, + "learning_rate": 8.821457396431563e-06, + "loss": 36.7542, + "step": 150390 + }, + { + "epoch": 0.30381751556458747, + "grad_norm": 450.153564453125, + "learning_rate": 8.821232284694545e-06, + "loss": 13.293, + "step": 150400 + }, + { + "epoch": 0.3038377161972713, + "grad_norm": 173.54832458496094, + "learning_rate": 8.821007154333308e-06, + "loss": 11.8466, + "step": 150410 + }, + { + "epoch": 0.3038579168299551, + "grad_norm": 55.469947814941406, + "learning_rate": 8.820782005348943e-06, + "loss": 36.09, + "step": 150420 + }, + { + "epoch": 0.30387811746263893, + "grad_norm": 509.9010009765625, + "learning_rate": 8.82055683774255e-06, + "loss": 21.5323, + "step": 150430 + }, + { + "epoch": 0.30389831809532275, + "grad_norm": 361.66937255859375, + "learning_rate": 8.820331651515226e-06, + "loss": 24.5503, + "step": 150440 + }, + { + "epoch": 0.30391851872800657, + "grad_norm": 479.13800048828125, + "learning_rate": 8.820106446668071e-06, + "loss": 10.7529, + "step": 150450 + }, + { + "epoch": 0.3039387193606904, + "grad_norm": 371.19537353515625, + "learning_rate": 8.819881223202179e-06, + "loss": 24.8704, + "step": 150460 + }, + { + "epoch": 0.3039589199933742, + "grad_norm": 139.8623046875, + "learning_rate": 8.819655981118649e-06, + "loss": 24.5305, + "step": 150470 + }, + { + "epoch": 0.30397912062605803, + "grad_norm": 502.39508056640625, + "learning_rate": 8.819430720418579e-06, + "loss": 27.0709, + "step": 150480 + }, + { + "epoch": 0.30399932125874185, + "grad_norm": 18.935550689697266, + "learning_rate": 8.819205441103067e-06, + "loss": 8.4231, + "step": 150490 + }, + { + "epoch": 0.3040195218914256, + "grad_norm": 813.5499877929688, + "learning_rate": 8.818980143173212e-06, + "loss": 19.9964, + "step": 150500 + }, + { + "epoch": 0.30403972252410943, + "grad_norm": 372.073486328125, + "learning_rate": 8.818754826630109e-06, + "loss": 16.9529, + "step": 150510 + }, + { + "epoch": 0.30405992315679325, + "grad_norm": 553.2196044921875, + "learning_rate": 8.81852949147486e-06, + "loss": 23.6812, + "step": 150520 + }, + { + "epoch": 0.3040801237894771, + "grad_norm": 510.5747375488281, + "learning_rate": 8.81830413770856e-06, + "loss": 16.7492, + "step": 150530 + }, + { + "epoch": 0.3041003244221609, + "grad_norm": 426.5841369628906, + "learning_rate": 8.818078765332309e-06, + "loss": 19.8778, + "step": 150540 + }, + { + "epoch": 0.3041205250548447, + "grad_norm": 267.7163391113281, + "learning_rate": 8.817853374347208e-06, + "loss": 14.2025, + "step": 150550 + }, + { + "epoch": 0.30414072568752853, + "grad_norm": 299.236328125, + "learning_rate": 8.81762796475435e-06, + "loss": 21.0008, + "step": 150560 + }, + { + "epoch": 0.30416092632021235, + "grad_norm": 1006.8319702148438, + "learning_rate": 8.817402536554835e-06, + "loss": 25.6188, + "step": 150570 + }, + { + "epoch": 0.3041811269528962, + "grad_norm": 292.3677062988281, + "learning_rate": 8.817177089749766e-06, + "loss": 17.9441, + "step": 150580 + }, + { + "epoch": 0.30420132758558, + "grad_norm": 192.40882873535156, + "learning_rate": 8.816951624340238e-06, + "loss": 20.5989, + "step": 150590 + }, + { + "epoch": 0.3042215282182638, + "grad_norm": 404.0854797363281, + "learning_rate": 8.81672614032735e-06, + "loss": 7.8247, + "step": 150600 + }, + { + "epoch": 0.30424172885094763, + "grad_norm": 563.6555786132812, + "learning_rate": 8.816500637712201e-06, + "loss": 22.9061, + "step": 150610 + }, + { + "epoch": 0.30426192948363145, + "grad_norm": 723.2141723632812, + "learning_rate": 8.816275116495891e-06, + "loss": 22.3131, + "step": 150620 + }, + { + "epoch": 0.3042821301163152, + "grad_norm": 294.1592712402344, + "learning_rate": 8.816049576679521e-06, + "loss": 15.9305, + "step": 150630 + }, + { + "epoch": 0.30430233074899904, + "grad_norm": 740.9974975585938, + "learning_rate": 8.815824018264187e-06, + "loss": 20.614, + "step": 150640 + }, + { + "epoch": 0.30432253138168286, + "grad_norm": 455.1861572265625, + "learning_rate": 8.815598441250987e-06, + "loss": 15.316, + "step": 150650 + }, + { + "epoch": 0.3043427320143667, + "grad_norm": 623.273193359375, + "learning_rate": 8.815372845641027e-06, + "loss": 24.1247, + "step": 150660 + }, + { + "epoch": 0.3043629326470505, + "grad_norm": 423.2725830078125, + "learning_rate": 8.815147231435402e-06, + "loss": 19.7505, + "step": 150670 + }, + { + "epoch": 0.3043831332797343, + "grad_norm": 416.5577392578125, + "learning_rate": 8.81492159863521e-06, + "loss": 26.7691, + "step": 150680 + }, + { + "epoch": 0.30440333391241814, + "grad_norm": 410.5572509765625, + "learning_rate": 8.814695947241555e-06, + "loss": 20.1439, + "step": 150690 + }, + { + "epoch": 0.30442353454510196, + "grad_norm": 1430.1671142578125, + "learning_rate": 8.814470277255532e-06, + "loss": 29.6562, + "step": 150700 + }, + { + "epoch": 0.3044437351777858, + "grad_norm": 297.33447265625, + "learning_rate": 8.814244588678245e-06, + "loss": 24.6234, + "step": 150710 + }, + { + "epoch": 0.3044639358104696, + "grad_norm": 442.55377197265625, + "learning_rate": 8.814018881510793e-06, + "loss": 25.6873, + "step": 150720 + }, + { + "epoch": 0.3044841364431534, + "grad_norm": 284.0892639160156, + "learning_rate": 8.813793155754273e-06, + "loss": 21.4096, + "step": 150730 + }, + { + "epoch": 0.30450433707583724, + "grad_norm": 378.8963317871094, + "learning_rate": 8.81356741140979e-06, + "loss": 31.793, + "step": 150740 + }, + { + "epoch": 0.304524537708521, + "grad_norm": 391.635498046875, + "learning_rate": 8.813341648478443e-06, + "loss": 17.9829, + "step": 150750 + }, + { + "epoch": 0.3045447383412048, + "grad_norm": 624.3565673828125, + "learning_rate": 8.81311586696133e-06, + "loss": 33.0778, + "step": 150760 + }, + { + "epoch": 0.30456493897388864, + "grad_norm": 898.7164306640625, + "learning_rate": 8.812890066859552e-06, + "loss": 39.0056, + "step": 150770 + }, + { + "epoch": 0.30458513960657246, + "grad_norm": 335.1961669921875, + "learning_rate": 8.81266424817421e-06, + "loss": 17.4489, + "step": 150780 + }, + { + "epoch": 0.3046053402392563, + "grad_norm": 150.5221710205078, + "learning_rate": 8.812438410906407e-06, + "loss": 17.4007, + "step": 150790 + }, + { + "epoch": 0.3046255408719401, + "grad_norm": 478.2091064453125, + "learning_rate": 8.81221255505724e-06, + "loss": 30.3314, + "step": 150800 + }, + { + "epoch": 0.3046457415046239, + "grad_norm": 200.3534393310547, + "learning_rate": 8.811986680627812e-06, + "loss": 17.8803, + "step": 150810 + }, + { + "epoch": 0.30466594213730774, + "grad_norm": 235.67787170410156, + "learning_rate": 8.811760787619224e-06, + "loss": 29.5374, + "step": 150820 + }, + { + "epoch": 0.30468614276999156, + "grad_norm": 124.64483642578125, + "learning_rate": 8.811534876032575e-06, + "loss": 23.3495, + "step": 150830 + }, + { + "epoch": 0.3047063434026754, + "grad_norm": 801.1688232421875, + "learning_rate": 8.811308945868966e-06, + "loss": 15.842, + "step": 150840 + }, + { + "epoch": 0.3047265440353592, + "grad_norm": 195.32472229003906, + "learning_rate": 8.811082997129501e-06, + "loss": 17.8216, + "step": 150850 + }, + { + "epoch": 0.304746744668043, + "grad_norm": 332.58074951171875, + "learning_rate": 8.81085702981528e-06, + "loss": 17.0482, + "step": 150860 + }, + { + "epoch": 0.30476694530072684, + "grad_norm": 864.2814331054688, + "learning_rate": 8.810631043927405e-06, + "loss": 21.3788, + "step": 150870 + }, + { + "epoch": 0.3047871459334106, + "grad_norm": 423.365478515625, + "learning_rate": 8.810405039466973e-06, + "loss": 30.1768, + "step": 150880 + }, + { + "epoch": 0.30480734656609443, + "grad_norm": 341.9500732421875, + "learning_rate": 8.810179016435092e-06, + "loss": 20.8337, + "step": 150890 + }, + { + "epoch": 0.30482754719877825, + "grad_norm": 575.5938110351562, + "learning_rate": 8.80995297483286e-06, + "loss": 31.6785, + "step": 150900 + }, + { + "epoch": 0.30484774783146207, + "grad_norm": 519.7661743164062, + "learning_rate": 8.80972691466138e-06, + "loss": 21.3529, + "step": 150910 + }, + { + "epoch": 0.3048679484641459, + "grad_norm": 451.3832092285156, + "learning_rate": 8.809500835921751e-06, + "loss": 41.5301, + "step": 150920 + }, + { + "epoch": 0.3048881490968297, + "grad_norm": 420.1068420410156, + "learning_rate": 8.809274738615079e-06, + "loss": 11.2938, + "step": 150930 + }, + { + "epoch": 0.30490834972951353, + "grad_norm": 539.9143676757812, + "learning_rate": 8.809048622742463e-06, + "loss": 9.6369, + "step": 150940 + }, + { + "epoch": 0.30492855036219735, + "grad_norm": 185.07460021972656, + "learning_rate": 8.808822488305005e-06, + "loss": 16.2636, + "step": 150950 + }, + { + "epoch": 0.30494875099488117, + "grad_norm": 300.67254638671875, + "learning_rate": 8.80859633530381e-06, + "loss": 22.1122, + "step": 150960 + }, + { + "epoch": 0.304968951627565, + "grad_norm": 820.0911254882812, + "learning_rate": 8.808370163739978e-06, + "loss": 26.7831, + "step": 150970 + }, + { + "epoch": 0.3049891522602488, + "grad_norm": 120.84183502197266, + "learning_rate": 8.808143973614612e-06, + "loss": 10.9375, + "step": 150980 + }, + { + "epoch": 0.30500935289293263, + "grad_norm": 223.33592224121094, + "learning_rate": 8.807917764928813e-06, + "loss": 17.7902, + "step": 150990 + }, + { + "epoch": 0.30502955352561645, + "grad_norm": 956.6483764648438, + "learning_rate": 8.807691537683685e-06, + "loss": 48.7926, + "step": 151000 + }, + { + "epoch": 0.3050497541583002, + "grad_norm": 292.8885498046875, + "learning_rate": 8.807465291880331e-06, + "loss": 47.7859, + "step": 151010 + }, + { + "epoch": 0.30506995479098403, + "grad_norm": 285.24810791015625, + "learning_rate": 8.807239027519852e-06, + "loss": 11.9052, + "step": 151020 + }, + { + "epoch": 0.30509015542366785, + "grad_norm": 325.638427734375, + "learning_rate": 8.807012744603352e-06, + "loss": 11.6426, + "step": 151030 + }, + { + "epoch": 0.3051103560563517, + "grad_norm": 344.9587707519531, + "learning_rate": 8.806786443131932e-06, + "loss": 25.0543, + "step": 151040 + }, + { + "epoch": 0.3051305566890355, + "grad_norm": 575.2521362304688, + "learning_rate": 8.8065601231067e-06, + "loss": 14.5129, + "step": 151050 + }, + { + "epoch": 0.3051507573217193, + "grad_norm": 277.29754638671875, + "learning_rate": 8.806333784528754e-06, + "loss": 27.8048, + "step": 151060 + }, + { + "epoch": 0.30517095795440313, + "grad_norm": 1071.9381103515625, + "learning_rate": 8.806107427399198e-06, + "loss": 21.264, + "step": 151070 + }, + { + "epoch": 0.30519115858708695, + "grad_norm": 700.8414916992188, + "learning_rate": 8.805881051719137e-06, + "loss": 27.9538, + "step": 151080 + }, + { + "epoch": 0.3052113592197708, + "grad_norm": 1784.7879638671875, + "learning_rate": 8.805654657489673e-06, + "loss": 21.2077, + "step": 151090 + }, + { + "epoch": 0.3052315598524546, + "grad_norm": 364.26922607421875, + "learning_rate": 8.80542824471191e-06, + "loss": 33.0699, + "step": 151100 + }, + { + "epoch": 0.3052517604851384, + "grad_norm": 417.54669189453125, + "learning_rate": 8.80520181338695e-06, + "loss": 15.0805, + "step": 151110 + }, + { + "epoch": 0.30527196111782223, + "grad_norm": 330.0821533203125, + "learning_rate": 8.8049753635159e-06, + "loss": 37.3532, + "step": 151120 + }, + { + "epoch": 0.30529216175050605, + "grad_norm": 614.7396850585938, + "learning_rate": 8.80474889509986e-06, + "loss": 13.9208, + "step": 151130 + }, + { + "epoch": 0.3053123623831898, + "grad_norm": 488.34124755859375, + "learning_rate": 8.804522408139936e-06, + "loss": 24.2354, + "step": 151140 + }, + { + "epoch": 0.30533256301587364, + "grad_norm": 731.0759887695312, + "learning_rate": 8.804295902637233e-06, + "loss": 13.7202, + "step": 151150 + }, + { + "epoch": 0.30535276364855746, + "grad_norm": 26.747020721435547, + "learning_rate": 8.80406937859285e-06, + "loss": 10.319, + "step": 151160 + }, + { + "epoch": 0.3053729642812413, + "grad_norm": 245.74026489257812, + "learning_rate": 8.803842836007895e-06, + "loss": 13.8397, + "step": 151170 + }, + { + "epoch": 0.3053931649139251, + "grad_norm": 172.47738647460938, + "learning_rate": 8.803616274883473e-06, + "loss": 26.6136, + "step": 151180 + }, + { + "epoch": 0.3054133655466089, + "grad_norm": 51.60191345214844, + "learning_rate": 8.803389695220685e-06, + "loss": 18.4662, + "step": 151190 + }, + { + "epoch": 0.30543356617929274, + "grad_norm": 185.0488739013672, + "learning_rate": 8.803163097020637e-06, + "loss": 17.8821, + "step": 151200 + }, + { + "epoch": 0.30545376681197656, + "grad_norm": 311.64788818359375, + "learning_rate": 8.802936480284434e-06, + "loss": 16.2108, + "step": 151210 + }, + { + "epoch": 0.3054739674446604, + "grad_norm": 318.5120544433594, + "learning_rate": 8.80270984501318e-06, + "loss": 30.7696, + "step": 151220 + }, + { + "epoch": 0.3054941680773442, + "grad_norm": 144.05270385742188, + "learning_rate": 8.802483191207978e-06, + "loss": 19.6995, + "step": 151230 + }, + { + "epoch": 0.305514368710028, + "grad_norm": 172.28659057617188, + "learning_rate": 8.802256518869935e-06, + "loss": 23.231, + "step": 151240 + }, + { + "epoch": 0.30553456934271184, + "grad_norm": 370.2278747558594, + "learning_rate": 8.802029828000157e-06, + "loss": 14.2334, + "step": 151250 + }, + { + "epoch": 0.30555476997539566, + "grad_norm": 341.0437927246094, + "learning_rate": 8.801803118599743e-06, + "loss": 12.3321, + "step": 151260 + }, + { + "epoch": 0.3055749706080794, + "grad_norm": 330.0649108886719, + "learning_rate": 8.801576390669803e-06, + "loss": 27.4561, + "step": 151270 + }, + { + "epoch": 0.30559517124076324, + "grad_norm": 393.53973388671875, + "learning_rate": 8.801349644211442e-06, + "loss": 33.3796, + "step": 151280 + }, + { + "epoch": 0.30561537187344706, + "grad_norm": 719.0829467773438, + "learning_rate": 8.801122879225762e-06, + "loss": 30.0677, + "step": 151290 + }, + { + "epoch": 0.3056355725061309, + "grad_norm": 5.776564121246338, + "learning_rate": 8.80089609571387e-06, + "loss": 23.2832, + "step": 151300 + }, + { + "epoch": 0.3056557731388147, + "grad_norm": 748.0703735351562, + "learning_rate": 8.800669293676876e-06, + "loss": 17.6918, + "step": 151310 + }, + { + "epoch": 0.3056759737714985, + "grad_norm": 338.00482177734375, + "learning_rate": 8.800442473115877e-06, + "loss": 24.5486, + "step": 151320 + }, + { + "epoch": 0.30569617440418234, + "grad_norm": 414.44073486328125, + "learning_rate": 8.800215634031983e-06, + "loss": 23.3791, + "step": 151330 + }, + { + "epoch": 0.30571637503686616, + "grad_norm": 427.2399597167969, + "learning_rate": 8.799988776426298e-06, + "loss": 17.1218, + "step": 151340 + }, + { + "epoch": 0.30573657566955, + "grad_norm": 15.36292839050293, + "learning_rate": 8.799761900299929e-06, + "loss": 24.8374, + "step": 151350 + }, + { + "epoch": 0.3057567763022338, + "grad_norm": 346.0489196777344, + "learning_rate": 8.799535005653982e-06, + "loss": 17.9352, + "step": 151360 + }, + { + "epoch": 0.3057769769349176, + "grad_norm": 172.96986389160156, + "learning_rate": 8.799308092489561e-06, + "loss": 28.0686, + "step": 151370 + }, + { + "epoch": 0.30579717756760144, + "grad_norm": 473.66351318359375, + "learning_rate": 8.799081160807773e-06, + "loss": 19.6241, + "step": 151380 + }, + { + "epoch": 0.3058173782002852, + "grad_norm": 510.2415771484375, + "learning_rate": 8.798854210609727e-06, + "loss": 19.7143, + "step": 151390 + }, + { + "epoch": 0.305837578832969, + "grad_norm": 299.6713562011719, + "learning_rate": 8.798627241896524e-06, + "loss": 14.5041, + "step": 151400 + }, + { + "epoch": 0.30585777946565285, + "grad_norm": 21.89015769958496, + "learning_rate": 8.798400254669272e-06, + "loss": 16.8085, + "step": 151410 + }, + { + "epoch": 0.30587798009833667, + "grad_norm": 638.985107421875, + "learning_rate": 8.798173248929081e-06, + "loss": 14.6222, + "step": 151420 + }, + { + "epoch": 0.3058981807310205, + "grad_norm": 0.38278064131736755, + "learning_rate": 8.797946224677051e-06, + "loss": 27.8758, + "step": 151430 + }, + { + "epoch": 0.3059183813637043, + "grad_norm": 278.8282775878906, + "learning_rate": 8.797719181914292e-06, + "loss": 13.1075, + "step": 151440 + }, + { + "epoch": 0.3059385819963881, + "grad_norm": 368.1073913574219, + "learning_rate": 8.797492120641913e-06, + "loss": 25.1111, + "step": 151450 + }, + { + "epoch": 0.30595878262907195, + "grad_norm": 422.73663330078125, + "learning_rate": 8.797265040861016e-06, + "loss": 28.2203, + "step": 151460 + }, + { + "epoch": 0.30597898326175577, + "grad_norm": 501.1971130371094, + "learning_rate": 8.79703794257271e-06, + "loss": 21.7, + "step": 151470 + }, + { + "epoch": 0.3059991838944396, + "grad_norm": 15.982429504394531, + "learning_rate": 8.796810825778101e-06, + "loss": 17.1545, + "step": 151480 + }, + { + "epoch": 0.3060193845271234, + "grad_norm": 247.7742462158203, + "learning_rate": 8.796583690478297e-06, + "loss": 25.6396, + "step": 151490 + }, + { + "epoch": 0.3060395851598072, + "grad_norm": 537.6829223632812, + "learning_rate": 8.796356536674404e-06, + "loss": 14.4362, + "step": 151500 + }, + { + "epoch": 0.30605978579249105, + "grad_norm": 227.258056640625, + "learning_rate": 8.796129364367532e-06, + "loss": 29.386, + "step": 151510 + }, + { + "epoch": 0.3060799864251748, + "grad_norm": 475.4372253417969, + "learning_rate": 8.795902173558784e-06, + "loss": 24.9269, + "step": 151520 + }, + { + "epoch": 0.30610018705785863, + "grad_norm": 557.894775390625, + "learning_rate": 8.79567496424927e-06, + "loss": 27.5515, + "step": 151530 + }, + { + "epoch": 0.30612038769054245, + "grad_norm": 847.9214477539062, + "learning_rate": 8.795447736440095e-06, + "loss": 24.6747, + "step": 151540 + }, + { + "epoch": 0.30614058832322627, + "grad_norm": 187.60552978515625, + "learning_rate": 8.795220490132369e-06, + "loss": 14.0796, + "step": 151550 + }, + { + "epoch": 0.3061607889559101, + "grad_norm": 518.7286987304688, + "learning_rate": 8.794993225327199e-06, + "loss": 27.9041, + "step": 151560 + }, + { + "epoch": 0.3061809895885939, + "grad_norm": 2.9284021854400635, + "learning_rate": 8.794765942025692e-06, + "loss": 11.9497, + "step": 151570 + }, + { + "epoch": 0.30620119022127773, + "grad_norm": 610.6417846679688, + "learning_rate": 8.794538640228956e-06, + "loss": 49.2123, + "step": 151580 + }, + { + "epoch": 0.30622139085396155, + "grad_norm": 422.1538391113281, + "learning_rate": 8.794311319938098e-06, + "loss": 31.4757, + "step": 151590 + }, + { + "epoch": 0.30624159148664537, + "grad_norm": 586.0907592773438, + "learning_rate": 8.794083981154229e-06, + "loss": 45.8676, + "step": 151600 + }, + { + "epoch": 0.3062617921193292, + "grad_norm": 522.1445922851562, + "learning_rate": 8.793856623878453e-06, + "loss": 28.3242, + "step": 151610 + }, + { + "epoch": 0.306281992752013, + "grad_norm": 53.978363037109375, + "learning_rate": 8.79362924811188e-06, + "loss": 41.9327, + "step": 151620 + }, + { + "epoch": 0.30630219338469683, + "grad_norm": 277.22320556640625, + "learning_rate": 8.793401853855619e-06, + "loss": 23.9439, + "step": 151630 + }, + { + "epoch": 0.30632239401738065, + "grad_norm": 217.17184448242188, + "learning_rate": 8.793174441110777e-06, + "loss": 25.4111, + "step": 151640 + }, + { + "epoch": 0.3063425946500644, + "grad_norm": 107.39692687988281, + "learning_rate": 8.792947009878463e-06, + "loss": 18.2247, + "step": 151650 + }, + { + "epoch": 0.30636279528274823, + "grad_norm": 936.0779418945312, + "learning_rate": 8.792719560159786e-06, + "loss": 41.7242, + "step": 151660 + }, + { + "epoch": 0.30638299591543205, + "grad_norm": 123.1182861328125, + "learning_rate": 8.792492091955852e-06, + "loss": 16.1218, + "step": 151670 + }, + { + "epoch": 0.3064031965481159, + "grad_norm": 173.78582763671875, + "learning_rate": 8.792264605267772e-06, + "loss": 15.1619, + "step": 151680 + }, + { + "epoch": 0.3064233971807997, + "grad_norm": 304.4840393066406, + "learning_rate": 8.792037100096656e-06, + "loss": 24.324, + "step": 151690 + }, + { + "epoch": 0.3064435978134835, + "grad_norm": 816.9924926757812, + "learning_rate": 8.791809576443611e-06, + "loss": 19.6709, + "step": 151700 + }, + { + "epoch": 0.30646379844616733, + "grad_norm": 411.3579406738281, + "learning_rate": 8.791582034309745e-06, + "loss": 35.6563, + "step": 151710 + }, + { + "epoch": 0.30648399907885115, + "grad_norm": 760.7244262695312, + "learning_rate": 8.791354473696167e-06, + "loss": 27.6784, + "step": 151720 + }, + { + "epoch": 0.306504199711535, + "grad_norm": 303.0555114746094, + "learning_rate": 8.791126894603987e-06, + "loss": 16.0399, + "step": 151730 + }, + { + "epoch": 0.3065244003442188, + "grad_norm": 1149.0203857421875, + "learning_rate": 8.790899297034317e-06, + "loss": 18.7669, + "step": 151740 + }, + { + "epoch": 0.3065446009769026, + "grad_norm": 374.638671875, + "learning_rate": 8.790671680988261e-06, + "loss": 10.4598, + "step": 151750 + }, + { + "epoch": 0.30656480160958643, + "grad_norm": 321.0159606933594, + "learning_rate": 8.790444046466933e-06, + "loss": 25.1233, + "step": 151760 + }, + { + "epoch": 0.30658500224227025, + "grad_norm": 115.95552062988281, + "learning_rate": 8.79021639347144e-06, + "loss": 31.6759, + "step": 151770 + }, + { + "epoch": 0.306605202874954, + "grad_norm": 172.00579833984375, + "learning_rate": 8.789988722002891e-06, + "loss": 11.6271, + "step": 151780 + }, + { + "epoch": 0.30662540350763784, + "grad_norm": 172.10379028320312, + "learning_rate": 8.789761032062398e-06, + "loss": 14.2386, + "step": 151790 + }, + { + "epoch": 0.30664560414032166, + "grad_norm": 762.5750122070312, + "learning_rate": 8.789533323651067e-06, + "loss": 19.338, + "step": 151800 + }, + { + "epoch": 0.3066658047730055, + "grad_norm": 392.90447998046875, + "learning_rate": 8.789305596770013e-06, + "loss": 10.5755, + "step": 151810 + }, + { + "epoch": 0.3066860054056893, + "grad_norm": 707.3402099609375, + "learning_rate": 8.789077851420341e-06, + "loss": 13.1562, + "step": 151820 + }, + { + "epoch": 0.3067062060383731, + "grad_norm": 517.05419921875, + "learning_rate": 8.788850087603164e-06, + "loss": 24.0082, + "step": 151830 + }, + { + "epoch": 0.30672640667105694, + "grad_norm": 438.8190612792969, + "learning_rate": 8.788622305319591e-06, + "loss": 26.8024, + "step": 151840 + }, + { + "epoch": 0.30674660730374076, + "grad_norm": 289.7139892578125, + "learning_rate": 8.788394504570732e-06, + "loss": 20.7359, + "step": 151850 + }, + { + "epoch": 0.3067668079364246, + "grad_norm": 459.5687561035156, + "learning_rate": 8.7881666853577e-06, + "loss": 18.1115, + "step": 151860 + }, + { + "epoch": 0.3067870085691084, + "grad_norm": 904.3475341796875, + "learning_rate": 8.7879388476816e-06, + "loss": 19.3853, + "step": 151870 + }, + { + "epoch": 0.3068072092017922, + "grad_norm": 365.3310241699219, + "learning_rate": 8.787710991543547e-06, + "loss": 28.7085, + "step": 151880 + }, + { + "epoch": 0.30682740983447604, + "grad_norm": 716.1660766601562, + "learning_rate": 8.78748311694465e-06, + "loss": 19.3352, + "step": 151890 + }, + { + "epoch": 0.3068476104671598, + "grad_norm": 482.2041931152344, + "learning_rate": 8.78725522388602e-06, + "loss": 37.9838, + "step": 151900 + }, + { + "epoch": 0.3068678110998436, + "grad_norm": 664.7199096679688, + "learning_rate": 8.787027312368766e-06, + "loss": 30.4015, + "step": 151910 + }, + { + "epoch": 0.30688801173252744, + "grad_norm": 329.94268798828125, + "learning_rate": 8.786799382394e-06, + "loss": 17.9445, + "step": 151920 + }, + { + "epoch": 0.30690821236521126, + "grad_norm": 534.4207153320312, + "learning_rate": 8.786571433962837e-06, + "loss": 19.7566, + "step": 151930 + }, + { + "epoch": 0.3069284129978951, + "grad_norm": 233.0865936279297, + "learning_rate": 8.78634346707638e-06, + "loss": 13.216, + "step": 151940 + }, + { + "epoch": 0.3069486136305789, + "grad_norm": 622.2435913085938, + "learning_rate": 8.786115481735745e-06, + "loss": 15.6773, + "step": 151950 + }, + { + "epoch": 0.3069688142632627, + "grad_norm": 414.759521484375, + "learning_rate": 8.785887477942041e-06, + "loss": 31.4934, + "step": 151960 + }, + { + "epoch": 0.30698901489594654, + "grad_norm": 375.6351013183594, + "learning_rate": 8.785659455696384e-06, + "loss": 32.9653, + "step": 151970 + }, + { + "epoch": 0.30700921552863036, + "grad_norm": 329.70428466796875, + "learning_rate": 8.78543141499988e-06, + "loss": 19.4679, + "step": 151980 + }, + { + "epoch": 0.3070294161613142, + "grad_norm": 582.3593139648438, + "learning_rate": 8.785203355853642e-06, + "loss": 27.1528, + "step": 151990 + }, + { + "epoch": 0.307049616793998, + "grad_norm": 520.7802734375, + "learning_rate": 8.784975278258783e-06, + "loss": 28.7186, + "step": 152000 + }, + { + "epoch": 0.3070698174266818, + "grad_norm": 485.47332763671875, + "learning_rate": 8.784747182216414e-06, + "loss": 23.6981, + "step": 152010 + }, + { + "epoch": 0.30709001805936564, + "grad_norm": 574.9630126953125, + "learning_rate": 8.784519067727644e-06, + "loss": 24.5485, + "step": 152020 + }, + { + "epoch": 0.3071102186920494, + "grad_norm": 237.2230224609375, + "learning_rate": 8.78429093479359e-06, + "loss": 17.3559, + "step": 152030 + }, + { + "epoch": 0.30713041932473323, + "grad_norm": 459.49072265625, + "learning_rate": 8.78406278341536e-06, + "loss": 14.4572, + "step": 152040 + }, + { + "epoch": 0.30715061995741705, + "grad_norm": 412.63922119140625, + "learning_rate": 8.783834613594064e-06, + "loss": 16.5848, + "step": 152050 + }, + { + "epoch": 0.30717082059010087, + "grad_norm": 359.7923278808594, + "learning_rate": 8.78360642533082e-06, + "loss": 18.5688, + "step": 152060 + }, + { + "epoch": 0.3071910212227847, + "grad_norm": 273.905029296875, + "learning_rate": 8.783378218626737e-06, + "loss": 21.732, + "step": 152070 + }, + { + "epoch": 0.3072112218554685, + "grad_norm": 462.4580383300781, + "learning_rate": 8.783149993482928e-06, + "loss": 19.3913, + "step": 152080 + }, + { + "epoch": 0.30723142248815233, + "grad_norm": 555.3651733398438, + "learning_rate": 8.782921749900502e-06, + "loss": 16.1707, + "step": 152090 + }, + { + "epoch": 0.30725162312083615, + "grad_norm": 487.9097900390625, + "learning_rate": 8.782693487880575e-06, + "loss": 33.2753, + "step": 152100 + }, + { + "epoch": 0.30727182375351997, + "grad_norm": 1059.037353515625, + "learning_rate": 8.782465207424261e-06, + "loss": 27.1952, + "step": 152110 + }, + { + "epoch": 0.3072920243862038, + "grad_norm": 1091.01513671875, + "learning_rate": 8.78223690853267e-06, + "loss": 21.2001, + "step": 152120 + }, + { + "epoch": 0.3073122250188876, + "grad_norm": 364.620361328125, + "learning_rate": 8.782008591206914e-06, + "loss": 20.7926, + "step": 152130 + }, + { + "epoch": 0.30733242565157143, + "grad_norm": 443.7419738769531, + "learning_rate": 8.781780255448106e-06, + "loss": 27.453, + "step": 152140 + }, + { + "epoch": 0.30735262628425525, + "grad_norm": 462.845458984375, + "learning_rate": 8.78155190125736e-06, + "loss": 21.5817, + "step": 152150 + }, + { + "epoch": 0.307372826916939, + "grad_norm": 250.06491088867188, + "learning_rate": 8.78132352863579e-06, + "loss": 21.9177, + "step": 152160 + }, + { + "epoch": 0.30739302754962283, + "grad_norm": 308.0704040527344, + "learning_rate": 8.781095137584506e-06, + "loss": 12.0368, + "step": 152170 + }, + { + "epoch": 0.30741322818230665, + "grad_norm": 266.1795959472656, + "learning_rate": 8.780866728104625e-06, + "loss": 14.4003, + "step": 152180 + }, + { + "epoch": 0.3074334288149905, + "grad_norm": 126.96919250488281, + "learning_rate": 8.780638300197258e-06, + "loss": 13.0788, + "step": 152190 + }, + { + "epoch": 0.3074536294476743, + "grad_norm": 1097.9608154296875, + "learning_rate": 8.780409853863517e-06, + "loss": 35.0315, + "step": 152200 + }, + { + "epoch": 0.3074738300803581, + "grad_norm": 452.70880126953125, + "learning_rate": 8.780181389104516e-06, + "loss": 20.7623, + "step": 152210 + }, + { + "epoch": 0.30749403071304193, + "grad_norm": 370.7041320800781, + "learning_rate": 8.779952905921372e-06, + "loss": 25.4909, + "step": 152220 + }, + { + "epoch": 0.30751423134572575, + "grad_norm": 190.17071533203125, + "learning_rate": 8.779724404315195e-06, + "loss": 18.3436, + "step": 152230 + }, + { + "epoch": 0.3075344319784096, + "grad_norm": 550.6524047851562, + "learning_rate": 8.779495884287099e-06, + "loss": 32.8239, + "step": 152240 + }, + { + "epoch": 0.3075546326110934, + "grad_norm": 292.2104187011719, + "learning_rate": 8.779267345838198e-06, + "loss": 38.8242, + "step": 152250 + }, + { + "epoch": 0.3075748332437772, + "grad_norm": 514.6538696289062, + "learning_rate": 8.779038788969607e-06, + "loss": 29.647, + "step": 152260 + }, + { + "epoch": 0.30759503387646103, + "grad_norm": 358.833740234375, + "learning_rate": 8.77881021368244e-06, + "loss": 16.6317, + "step": 152270 + }, + { + "epoch": 0.30761523450914485, + "grad_norm": 269.1551818847656, + "learning_rate": 8.778581619977811e-06, + "loss": 23.7684, + "step": 152280 + }, + { + "epoch": 0.3076354351418286, + "grad_norm": 448.9606018066406, + "learning_rate": 8.778353007856832e-06, + "loss": 18.1493, + "step": 152290 + }, + { + "epoch": 0.30765563577451244, + "grad_norm": 321.6992492675781, + "learning_rate": 8.778124377320619e-06, + "loss": 21.9051, + "step": 152300 + }, + { + "epoch": 0.30767583640719626, + "grad_norm": 842.9527587890625, + "learning_rate": 8.777895728370285e-06, + "loss": 39.2258, + "step": 152310 + }, + { + "epoch": 0.3076960370398801, + "grad_norm": 870.618896484375, + "learning_rate": 8.777667061006947e-06, + "loss": 22.0667, + "step": 152320 + }, + { + "epoch": 0.3077162376725639, + "grad_norm": 382.679931640625, + "learning_rate": 8.777438375231717e-06, + "loss": 22.2914, + "step": 152330 + }, + { + "epoch": 0.3077364383052477, + "grad_norm": 350.8429260253906, + "learning_rate": 8.77720967104571e-06, + "loss": 24.1487, + "step": 152340 + }, + { + "epoch": 0.30775663893793154, + "grad_norm": 450.3186950683594, + "learning_rate": 8.776980948450043e-06, + "loss": 11.0823, + "step": 152350 + }, + { + "epoch": 0.30777683957061536, + "grad_norm": 218.6016845703125, + "learning_rate": 8.776752207445829e-06, + "loss": 32.1349, + "step": 152360 + }, + { + "epoch": 0.3077970402032992, + "grad_norm": 306.7082824707031, + "learning_rate": 8.776523448034182e-06, + "loss": 18.5718, + "step": 152370 + }, + { + "epoch": 0.307817240835983, + "grad_norm": 297.98638916015625, + "learning_rate": 8.776294670216217e-06, + "loss": 15.9312, + "step": 152380 + }, + { + "epoch": 0.3078374414686668, + "grad_norm": 477.687255859375, + "learning_rate": 8.776065873993049e-06, + "loss": 14.8394, + "step": 152390 + }, + { + "epoch": 0.30785764210135064, + "grad_norm": 415.9744567871094, + "learning_rate": 8.775837059365796e-06, + "loss": 27.5502, + "step": 152400 + }, + { + "epoch": 0.30787784273403446, + "grad_norm": 554.4899291992188, + "learning_rate": 8.77560822633557e-06, + "loss": 36.1364, + "step": 152410 + }, + { + "epoch": 0.3078980433667182, + "grad_norm": 229.49571228027344, + "learning_rate": 8.775379374903487e-06, + "loss": 30.9903, + "step": 152420 + }, + { + "epoch": 0.30791824399940204, + "grad_norm": 51.77035140991211, + "learning_rate": 8.775150505070664e-06, + "loss": 16.698, + "step": 152430 + }, + { + "epoch": 0.30793844463208586, + "grad_norm": 301.8908386230469, + "learning_rate": 8.774921616838217e-06, + "loss": 21.288, + "step": 152440 + }, + { + "epoch": 0.3079586452647697, + "grad_norm": 648.9600830078125, + "learning_rate": 8.774692710207257e-06, + "loss": 27.5579, + "step": 152450 + }, + { + "epoch": 0.3079788458974535, + "grad_norm": 731.9532470703125, + "learning_rate": 8.774463785178904e-06, + "loss": 29.7746, + "step": 152460 + }, + { + "epoch": 0.3079990465301373, + "grad_norm": 714.5652465820312, + "learning_rate": 8.774234841754271e-06, + "loss": 15.7488, + "step": 152470 + }, + { + "epoch": 0.30801924716282114, + "grad_norm": 278.4861755371094, + "learning_rate": 8.774005879934475e-06, + "loss": 30.5312, + "step": 152480 + }, + { + "epoch": 0.30803944779550496, + "grad_norm": 458.5387878417969, + "learning_rate": 8.773776899720634e-06, + "loss": 15.5067, + "step": 152490 + }, + { + "epoch": 0.3080596484281888, + "grad_norm": 543.5549926757812, + "learning_rate": 8.773547901113862e-06, + "loss": 19.926, + "step": 152500 + }, + { + "epoch": 0.3080798490608726, + "grad_norm": 504.25628662109375, + "learning_rate": 8.773318884115273e-06, + "loss": 39.7353, + "step": 152510 + }, + { + "epoch": 0.3081000496935564, + "grad_norm": 482.6075439453125, + "learning_rate": 8.773089848725986e-06, + "loss": 26.9789, + "step": 152520 + }, + { + "epoch": 0.30812025032624024, + "grad_norm": 178.32061767578125, + "learning_rate": 8.772860794947119e-06, + "loss": 44.1952, + "step": 152530 + }, + { + "epoch": 0.308140450958924, + "grad_norm": 263.0315246582031, + "learning_rate": 8.772631722779783e-06, + "loss": 16.951, + "step": 152540 + }, + { + "epoch": 0.3081606515916078, + "grad_norm": 445.8074035644531, + "learning_rate": 8.772402632225098e-06, + "loss": 18.989, + "step": 152550 + }, + { + "epoch": 0.30818085222429165, + "grad_norm": 383.47491455078125, + "learning_rate": 8.772173523284182e-06, + "loss": 21.363, + "step": 152560 + }, + { + "epoch": 0.30820105285697547, + "grad_norm": 259.873046875, + "learning_rate": 8.77194439595815e-06, + "loss": 15.2514, + "step": 152570 + }, + { + "epoch": 0.3082212534896593, + "grad_norm": 301.7531433105469, + "learning_rate": 8.771715250248116e-06, + "loss": 42.1112, + "step": 152580 + }, + { + "epoch": 0.3082414541223431, + "grad_norm": 15.518353462219238, + "learning_rate": 8.771486086155201e-06, + "loss": 23.7325, + "step": 152590 + }, + { + "epoch": 0.3082616547550269, + "grad_norm": 714.9344482421875, + "learning_rate": 8.77125690368052e-06, + "loss": 29.0086, + "step": 152600 + }, + { + "epoch": 0.30828185538771075, + "grad_norm": 539.0986328125, + "learning_rate": 8.77102770282519e-06, + "loss": 33.3592, + "step": 152610 + }, + { + "epoch": 0.30830205602039457, + "grad_norm": 708.436279296875, + "learning_rate": 8.770798483590327e-06, + "loss": 23.5398, + "step": 152620 + }, + { + "epoch": 0.3083222566530784, + "grad_norm": 287.7116394042969, + "learning_rate": 8.770569245977052e-06, + "loss": 29.4807, + "step": 152630 + }, + { + "epoch": 0.3083424572857622, + "grad_norm": 782.888916015625, + "learning_rate": 8.770339989986479e-06, + "loss": 20.0547, + "step": 152640 + }, + { + "epoch": 0.308362657918446, + "grad_norm": 497.072998046875, + "learning_rate": 8.770110715619726e-06, + "loss": 9.7068, + "step": 152650 + }, + { + "epoch": 0.30838285855112985, + "grad_norm": 568.1920166015625, + "learning_rate": 8.769881422877911e-06, + "loss": 18.5282, + "step": 152660 + }, + { + "epoch": 0.3084030591838136, + "grad_norm": 234.79539489746094, + "learning_rate": 8.76965211176215e-06, + "loss": 17.9275, + "step": 152670 + }, + { + "epoch": 0.30842325981649743, + "grad_norm": 579.5450439453125, + "learning_rate": 8.769422782273563e-06, + "loss": 30.9816, + "step": 152680 + }, + { + "epoch": 0.30844346044918125, + "grad_norm": 390.92034912109375, + "learning_rate": 8.769193434413266e-06, + "loss": 18.3398, + "step": 152690 + }, + { + "epoch": 0.30846366108186507, + "grad_norm": 295.37261962890625, + "learning_rate": 8.768964068182378e-06, + "loss": 12.4739, + "step": 152700 + }, + { + "epoch": 0.3084838617145489, + "grad_norm": 162.08419799804688, + "learning_rate": 8.768734683582017e-06, + "loss": 19.1307, + "step": 152710 + }, + { + "epoch": 0.3085040623472327, + "grad_norm": 79.44987487792969, + "learning_rate": 8.768505280613297e-06, + "loss": 16.8558, + "step": 152720 + }, + { + "epoch": 0.30852426297991653, + "grad_norm": 234.40797424316406, + "learning_rate": 8.768275859277342e-06, + "loss": 43.9868, + "step": 152730 + }, + { + "epoch": 0.30854446361260035, + "grad_norm": 627.1044311523438, + "learning_rate": 8.768046419575267e-06, + "loss": 22.7319, + "step": 152740 + }, + { + "epoch": 0.30856466424528417, + "grad_norm": 148.47650146484375, + "learning_rate": 8.767816961508191e-06, + "loss": 14.009, + "step": 152750 + }, + { + "epoch": 0.308584864877968, + "grad_norm": 272.833251953125, + "learning_rate": 8.76758748507723e-06, + "loss": 30.4871, + "step": 152760 + }, + { + "epoch": 0.3086050655106518, + "grad_norm": 523.990478515625, + "learning_rate": 8.767357990283507e-06, + "loss": 19.5321, + "step": 152770 + }, + { + "epoch": 0.30862526614333563, + "grad_norm": 558.1668090820312, + "learning_rate": 8.767128477128138e-06, + "loss": 22.5566, + "step": 152780 + }, + { + "epoch": 0.30864546677601945, + "grad_norm": 291.9372253417969, + "learning_rate": 8.766898945612241e-06, + "loss": 36.3042, + "step": 152790 + }, + { + "epoch": 0.3086656674087032, + "grad_norm": 474.65264892578125, + "learning_rate": 8.766669395736936e-06, + "loss": 23.7728, + "step": 152800 + }, + { + "epoch": 0.30868586804138703, + "grad_norm": 310.5718994140625, + "learning_rate": 8.766439827503339e-06, + "loss": 18.2838, + "step": 152810 + }, + { + "epoch": 0.30870606867407085, + "grad_norm": 158.04115295410156, + "learning_rate": 8.766210240912574e-06, + "loss": 18.8744, + "step": 152820 + }, + { + "epoch": 0.3087262693067547, + "grad_norm": 400.5627746582031, + "learning_rate": 8.765980635965755e-06, + "loss": 27.1191, + "step": 152830 + }, + { + "epoch": 0.3087464699394385, + "grad_norm": 519.1942749023438, + "learning_rate": 8.765751012664004e-06, + "loss": 29.7915, + "step": 152840 + }, + { + "epoch": 0.3087666705721223, + "grad_norm": 200.17587280273438, + "learning_rate": 8.765521371008439e-06, + "loss": 22.471, + "step": 152850 + }, + { + "epoch": 0.30878687120480613, + "grad_norm": 521.7061767578125, + "learning_rate": 8.76529171100018e-06, + "loss": 17.9913, + "step": 152860 + }, + { + "epoch": 0.30880707183748995, + "grad_norm": 756.877685546875, + "learning_rate": 8.765062032640346e-06, + "loss": 23.4483, + "step": 152870 + }, + { + "epoch": 0.3088272724701738, + "grad_norm": 189.5233612060547, + "learning_rate": 8.764832335930055e-06, + "loss": 19.8989, + "step": 152880 + }, + { + "epoch": 0.3088474731028576, + "grad_norm": 506.61273193359375, + "learning_rate": 8.764602620870429e-06, + "loss": 15.1679, + "step": 152890 + }, + { + "epoch": 0.3088676737355414, + "grad_norm": 669.17626953125, + "learning_rate": 8.764372887462587e-06, + "loss": 17.2644, + "step": 152900 + }, + { + "epoch": 0.30888787436822523, + "grad_norm": 308.8310241699219, + "learning_rate": 8.764143135707647e-06, + "loss": 20.6132, + "step": 152910 + }, + { + "epoch": 0.30890807500090905, + "grad_norm": 683.5242919921875, + "learning_rate": 8.76391336560673e-06, + "loss": 31.0237, + "step": 152920 + }, + { + "epoch": 0.3089282756335928, + "grad_norm": 545.5559692382812, + "learning_rate": 8.763683577160955e-06, + "loss": 25.7465, + "step": 152930 + }, + { + "epoch": 0.30894847626627664, + "grad_norm": 395.28021240234375, + "learning_rate": 8.763453770371444e-06, + "loss": 26.0159, + "step": 152940 + }, + { + "epoch": 0.30896867689896046, + "grad_norm": 20.908044815063477, + "learning_rate": 8.763223945239317e-06, + "loss": 25.7089, + "step": 152950 + }, + { + "epoch": 0.3089888775316443, + "grad_norm": 725.0459594726562, + "learning_rate": 8.76299410176569e-06, + "loss": 14.0466, + "step": 152960 + }, + { + "epoch": 0.3090090781643281, + "grad_norm": 91.55359649658203, + "learning_rate": 8.762764239951688e-06, + "loss": 15.5929, + "step": 152970 + }, + { + "epoch": 0.3090292787970119, + "grad_norm": 144.00570678710938, + "learning_rate": 8.76253435979843e-06, + "loss": 18.0419, + "step": 152980 + }, + { + "epoch": 0.30904947942969574, + "grad_norm": 362.8430480957031, + "learning_rate": 8.762304461307033e-06, + "loss": 27.8007, + "step": 152990 + }, + { + "epoch": 0.30906968006237956, + "grad_norm": 353.3616027832031, + "learning_rate": 8.762074544478622e-06, + "loss": 24.0006, + "step": 153000 + }, + { + "epoch": 0.3090898806950634, + "grad_norm": 366.91448974609375, + "learning_rate": 8.761844609314316e-06, + "loss": 12.364, + "step": 153010 + }, + { + "epoch": 0.3091100813277472, + "grad_norm": 259.1235046386719, + "learning_rate": 8.761614655815237e-06, + "loss": 25.123, + "step": 153020 + }, + { + "epoch": 0.309130281960431, + "grad_norm": 555.457763671875, + "learning_rate": 8.761384683982503e-06, + "loss": 29.7559, + "step": 153030 + }, + { + "epoch": 0.30915048259311484, + "grad_norm": 155.01808166503906, + "learning_rate": 8.761154693817236e-06, + "loss": 23.0903, + "step": 153040 + }, + { + "epoch": 0.30917068322579866, + "grad_norm": 171.6526641845703, + "learning_rate": 8.760924685320558e-06, + "loss": 18.9855, + "step": 153050 + }, + { + "epoch": 0.3091908838584824, + "grad_norm": 371.7682189941406, + "learning_rate": 8.760694658493589e-06, + "loss": 30.759, + "step": 153060 + }, + { + "epoch": 0.30921108449116624, + "grad_norm": 199.42379760742188, + "learning_rate": 8.76046461333745e-06, + "loss": 21.7867, + "step": 153070 + }, + { + "epoch": 0.30923128512385006, + "grad_norm": 392.96441650390625, + "learning_rate": 8.760234549853263e-06, + "loss": 28.6661, + "step": 153080 + }, + { + "epoch": 0.3092514857565339, + "grad_norm": 567.9005737304688, + "learning_rate": 8.760004468042148e-06, + "loss": 37.3038, + "step": 153090 + }, + { + "epoch": 0.3092716863892177, + "grad_norm": 542.5743408203125, + "learning_rate": 8.759774367905228e-06, + "loss": 15.6898, + "step": 153100 + }, + { + "epoch": 0.3092918870219015, + "grad_norm": 357.5645446777344, + "learning_rate": 8.759544249443624e-06, + "loss": 19.9222, + "step": 153110 + }, + { + "epoch": 0.30931208765458534, + "grad_norm": 382.986572265625, + "learning_rate": 8.759314112658458e-06, + "loss": 28.247, + "step": 153120 + }, + { + "epoch": 0.30933228828726916, + "grad_norm": 362.6043395996094, + "learning_rate": 8.759083957550849e-06, + "loss": 20.1051, + "step": 153130 + }, + { + "epoch": 0.309352488919953, + "grad_norm": 335.9272155761719, + "learning_rate": 8.758853784121921e-06, + "loss": 21.8557, + "step": 153140 + }, + { + "epoch": 0.3093726895526368, + "grad_norm": 680.1184692382812, + "learning_rate": 8.758623592372797e-06, + "loss": 24.2451, + "step": 153150 + }, + { + "epoch": 0.3093928901853206, + "grad_norm": 490.99578857421875, + "learning_rate": 8.758393382304597e-06, + "loss": 19.7592, + "step": 153160 + }, + { + "epoch": 0.30941309081800444, + "grad_norm": 358.3629455566406, + "learning_rate": 8.758163153918442e-06, + "loss": 16.8443, + "step": 153170 + }, + { + "epoch": 0.3094332914506882, + "grad_norm": 110.01791381835938, + "learning_rate": 8.757932907215457e-06, + "loss": 19.1974, + "step": 153180 + }, + { + "epoch": 0.30945349208337203, + "grad_norm": 797.3854370117188, + "learning_rate": 8.757702642196763e-06, + "loss": 28.4614, + "step": 153190 + }, + { + "epoch": 0.30947369271605585, + "grad_norm": 447.52392578125, + "learning_rate": 8.757472358863481e-06, + "loss": 29.6217, + "step": 153200 + }, + { + "epoch": 0.30949389334873967, + "grad_norm": 749.3699340820312, + "learning_rate": 8.757242057216735e-06, + "loss": 25.4117, + "step": 153210 + }, + { + "epoch": 0.3095140939814235, + "grad_norm": 346.72735595703125, + "learning_rate": 8.757011737257646e-06, + "loss": 23.0122, + "step": 153220 + }, + { + "epoch": 0.3095342946141073, + "grad_norm": 368.5466003417969, + "learning_rate": 8.75678139898734e-06, + "loss": 15.7692, + "step": 153230 + }, + { + "epoch": 0.30955449524679113, + "grad_norm": 720.599609375, + "learning_rate": 8.756551042406936e-06, + "loss": 41.3858, + "step": 153240 + }, + { + "epoch": 0.30957469587947495, + "grad_norm": 1139.2069091796875, + "learning_rate": 8.756320667517557e-06, + "loss": 41.2011, + "step": 153250 + }, + { + "epoch": 0.30959489651215877, + "grad_norm": 836.5335083007812, + "learning_rate": 8.756090274320326e-06, + "loss": 22.5103, + "step": 153260 + }, + { + "epoch": 0.3096150971448426, + "grad_norm": 724.9573974609375, + "learning_rate": 8.755859862816368e-06, + "loss": 38.7618, + "step": 153270 + }, + { + "epoch": 0.3096352977775264, + "grad_norm": 269.16058349609375, + "learning_rate": 8.755629433006804e-06, + "loss": 19.8056, + "step": 153280 + }, + { + "epoch": 0.30965549841021023, + "grad_norm": 79.0902099609375, + "learning_rate": 8.755398984892757e-06, + "loss": 32.1713, + "step": 153290 + }, + { + "epoch": 0.30967569904289405, + "grad_norm": 783.4654541015625, + "learning_rate": 8.755168518475351e-06, + "loss": 25.255, + "step": 153300 + }, + { + "epoch": 0.3096958996755778, + "grad_norm": 35.45897674560547, + "learning_rate": 8.754938033755712e-06, + "loss": 15.7536, + "step": 153310 + }, + { + "epoch": 0.30971610030826163, + "grad_norm": 158.96868896484375, + "learning_rate": 8.754707530734958e-06, + "loss": 40.3681, + "step": 153320 + }, + { + "epoch": 0.30973630094094545, + "grad_norm": 1015.9963989257812, + "learning_rate": 8.754477009414215e-06, + "loss": 13.0651, + "step": 153330 + }, + { + "epoch": 0.3097565015736293, + "grad_norm": 575.8265380859375, + "learning_rate": 8.754246469794606e-06, + "loss": 18.4281, + "step": 153340 + }, + { + "epoch": 0.3097767022063131, + "grad_norm": 611.1806030273438, + "learning_rate": 8.754015911877255e-06, + "loss": 30.3986, + "step": 153350 + }, + { + "epoch": 0.3097969028389969, + "grad_norm": 170.62525939941406, + "learning_rate": 8.753785335663287e-06, + "loss": 11.3904, + "step": 153360 + }, + { + "epoch": 0.30981710347168073, + "grad_norm": 284.2012023925781, + "learning_rate": 8.753554741153822e-06, + "loss": 14.7108, + "step": 153370 + }, + { + "epoch": 0.30983730410436455, + "grad_norm": 803.2312622070312, + "learning_rate": 8.75332412834999e-06, + "loss": 19.1856, + "step": 153380 + }, + { + "epoch": 0.3098575047370484, + "grad_norm": 967.8285522460938, + "learning_rate": 8.75309349725291e-06, + "loss": 28.5167, + "step": 153390 + }, + { + "epoch": 0.3098777053697322, + "grad_norm": 110.3150634765625, + "learning_rate": 8.752862847863707e-06, + "loss": 27.7898, + "step": 153400 + }, + { + "epoch": 0.309897906002416, + "grad_norm": 478.0721740722656, + "learning_rate": 8.752632180183504e-06, + "loss": 28.3004, + "step": 153410 + }, + { + "epoch": 0.30991810663509983, + "grad_norm": 77.1426010131836, + "learning_rate": 8.75240149421343e-06, + "loss": 19.5355, + "step": 153420 + }, + { + "epoch": 0.30993830726778365, + "grad_norm": 322.9454650878906, + "learning_rate": 8.752170789954604e-06, + "loss": 20.2358, + "step": 153430 + }, + { + "epoch": 0.3099585079004674, + "grad_norm": 0.0, + "learning_rate": 8.751940067408155e-06, + "loss": 25.7665, + "step": 153440 + }, + { + "epoch": 0.30997870853315124, + "grad_norm": 872.8051147460938, + "learning_rate": 8.751709326575204e-06, + "loss": 14.2968, + "step": 153450 + }, + { + "epoch": 0.30999890916583506, + "grad_norm": 253.02938842773438, + "learning_rate": 8.751478567456874e-06, + "loss": 12.1771, + "step": 153460 + }, + { + "epoch": 0.3100191097985189, + "grad_norm": 740.6807861328125, + "learning_rate": 8.751247790054297e-06, + "loss": 11.7632, + "step": 153470 + }, + { + "epoch": 0.3100393104312027, + "grad_norm": 388.167724609375, + "learning_rate": 8.75101699436859e-06, + "loss": 27.8447, + "step": 153480 + }, + { + "epoch": 0.3100595110638865, + "grad_norm": 353.12030029296875, + "learning_rate": 8.750786180400883e-06, + "loss": 17.9569, + "step": 153490 + }, + { + "epoch": 0.31007971169657034, + "grad_norm": 411.9723815917969, + "learning_rate": 8.750555348152299e-06, + "loss": 20.2977, + "step": 153500 + }, + { + "epoch": 0.31009991232925416, + "grad_norm": 465.92498779296875, + "learning_rate": 8.750324497623963e-06, + "loss": 26.1791, + "step": 153510 + }, + { + "epoch": 0.310120112961938, + "grad_norm": 1112.1820068359375, + "learning_rate": 8.750093628817e-06, + "loss": 38.8637, + "step": 153520 + }, + { + "epoch": 0.3101403135946218, + "grad_norm": 467.22735595703125, + "learning_rate": 8.749862741732534e-06, + "loss": 27.2548, + "step": 153530 + }, + { + "epoch": 0.3101605142273056, + "grad_norm": 391.3013000488281, + "learning_rate": 8.749631836371692e-06, + "loss": 9.4652, + "step": 153540 + }, + { + "epoch": 0.31018071485998944, + "grad_norm": 515.9251098632812, + "learning_rate": 8.749400912735602e-06, + "loss": 20.3771, + "step": 153550 + }, + { + "epoch": 0.31020091549267326, + "grad_norm": 1368.5169677734375, + "learning_rate": 8.749169970825384e-06, + "loss": 29.3309, + "step": 153560 + }, + { + "epoch": 0.310221116125357, + "grad_norm": 539.8248901367188, + "learning_rate": 8.748939010642168e-06, + "loss": 26.0765, + "step": 153570 + }, + { + "epoch": 0.31024131675804084, + "grad_norm": 494.73089599609375, + "learning_rate": 8.748708032187076e-06, + "loss": 29.5157, + "step": 153580 + }, + { + "epoch": 0.31026151739072466, + "grad_norm": 515.84228515625, + "learning_rate": 8.748477035461237e-06, + "loss": 12.7383, + "step": 153590 + }, + { + "epoch": 0.3102817180234085, + "grad_norm": 231.21498107910156, + "learning_rate": 8.748246020465776e-06, + "loss": 34.8677, + "step": 153600 + }, + { + "epoch": 0.3103019186560923, + "grad_norm": 294.052734375, + "learning_rate": 8.748014987201818e-06, + "loss": 15.8449, + "step": 153610 + }, + { + "epoch": 0.3103221192887761, + "grad_norm": 204.9749298095703, + "learning_rate": 8.74778393567049e-06, + "loss": 17.567, + "step": 153620 + }, + { + "epoch": 0.31034231992145994, + "grad_norm": 276.3417053222656, + "learning_rate": 8.747552865872918e-06, + "loss": 21.6977, + "step": 153630 + }, + { + "epoch": 0.31036252055414376, + "grad_norm": 545.5340576171875, + "learning_rate": 8.747321777810226e-06, + "loss": 28.0072, + "step": 153640 + }, + { + "epoch": 0.3103827211868276, + "grad_norm": 379.01806640625, + "learning_rate": 8.747090671483542e-06, + "loss": 12.8451, + "step": 153650 + }, + { + "epoch": 0.3104029218195114, + "grad_norm": 162.28639221191406, + "learning_rate": 8.746859546893995e-06, + "loss": 23.2111, + "step": 153660 + }, + { + "epoch": 0.3104231224521952, + "grad_norm": 328.7956237792969, + "learning_rate": 8.746628404042707e-06, + "loss": 17.2274, + "step": 153670 + }, + { + "epoch": 0.31044332308487904, + "grad_norm": 695.1113891601562, + "learning_rate": 8.74639724293081e-06, + "loss": 31.8599, + "step": 153680 + }, + { + "epoch": 0.31046352371756286, + "grad_norm": 310.2861022949219, + "learning_rate": 8.746166063559423e-06, + "loss": 20.8219, + "step": 153690 + }, + { + "epoch": 0.3104837243502466, + "grad_norm": 123.86649322509766, + "learning_rate": 8.745934865929676e-06, + "loss": 16.3994, + "step": 153700 + }, + { + "epoch": 0.31050392498293045, + "grad_norm": 211.2790069580078, + "learning_rate": 8.745703650042701e-06, + "loss": 33.4938, + "step": 153710 + }, + { + "epoch": 0.31052412561561427, + "grad_norm": 80.10101318359375, + "learning_rate": 8.74547241589962e-06, + "loss": 33.1368, + "step": 153720 + }, + { + "epoch": 0.3105443262482981, + "grad_norm": 355.1980285644531, + "learning_rate": 8.74524116350156e-06, + "loss": 19.4098, + "step": 153730 + }, + { + "epoch": 0.3105645268809819, + "grad_norm": 374.2474670410156, + "learning_rate": 8.745009892849647e-06, + "loss": 13.9502, + "step": 153740 + }, + { + "epoch": 0.3105847275136657, + "grad_norm": 528.13623046875, + "learning_rate": 8.744778603945013e-06, + "loss": 38.6225, + "step": 153750 + }, + { + "epoch": 0.31060492814634955, + "grad_norm": 3.575230598449707, + "learning_rate": 8.744547296788779e-06, + "loss": 15.232, + "step": 153760 + }, + { + "epoch": 0.31062512877903337, + "grad_norm": 270.492919921875, + "learning_rate": 8.744315971382078e-06, + "loss": 19.9082, + "step": 153770 + }, + { + "epoch": 0.3106453294117172, + "grad_norm": 451.46881103515625, + "learning_rate": 8.744084627726034e-06, + "loss": 14.2561, + "step": 153780 + }, + { + "epoch": 0.310665530044401, + "grad_norm": 476.95574951171875, + "learning_rate": 8.743853265821776e-06, + "loss": 15.3103, + "step": 153790 + }, + { + "epoch": 0.3106857306770848, + "grad_norm": 447.3099670410156, + "learning_rate": 8.743621885670431e-06, + "loss": 17.8824, + "step": 153800 + }, + { + "epoch": 0.31070593130976865, + "grad_norm": 476.6871032714844, + "learning_rate": 8.743390487273127e-06, + "loss": 16.4435, + "step": 153810 + }, + { + "epoch": 0.3107261319424524, + "grad_norm": 405.0229187011719, + "learning_rate": 8.743159070630993e-06, + "loss": 34.1352, + "step": 153820 + }, + { + "epoch": 0.31074633257513623, + "grad_norm": 79.20420837402344, + "learning_rate": 8.742927635745155e-06, + "loss": 20.4067, + "step": 153830 + }, + { + "epoch": 0.31076653320782005, + "grad_norm": 275.6741027832031, + "learning_rate": 8.742696182616742e-06, + "loss": 19.7314, + "step": 153840 + }, + { + "epoch": 0.31078673384050387, + "grad_norm": 513.9413452148438, + "learning_rate": 8.74246471124688e-06, + "loss": 21.0318, + "step": 153850 + }, + { + "epoch": 0.3108069344731877, + "grad_norm": 304.9493408203125, + "learning_rate": 8.7422332216367e-06, + "loss": 13.3125, + "step": 153860 + }, + { + "epoch": 0.3108271351058715, + "grad_norm": 617.163818359375, + "learning_rate": 8.742001713787329e-06, + "loss": 10.1792, + "step": 153870 + }, + { + "epoch": 0.31084733573855533, + "grad_norm": 423.6902160644531, + "learning_rate": 8.741770187699897e-06, + "loss": 13.1258, + "step": 153880 + }, + { + "epoch": 0.31086753637123915, + "grad_norm": 356.60888671875, + "learning_rate": 8.741538643375528e-06, + "loss": 37.1837, + "step": 153890 + }, + { + "epoch": 0.31088773700392297, + "grad_norm": 519.690673828125, + "learning_rate": 8.741307080815357e-06, + "loss": 40.6583, + "step": 153900 + }, + { + "epoch": 0.3109079376366068, + "grad_norm": 1111.084228515625, + "learning_rate": 8.741075500020506e-06, + "loss": 18.5098, + "step": 153910 + }, + { + "epoch": 0.3109281382692906, + "grad_norm": 591.6962280273438, + "learning_rate": 8.74084390099211e-06, + "loss": 18.7493, + "step": 153920 + }, + { + "epoch": 0.31094833890197443, + "grad_norm": 285.12152099609375, + "learning_rate": 8.74061228373129e-06, + "loss": 17.6761, + "step": 153930 + }, + { + "epoch": 0.31096853953465825, + "grad_norm": 294.6405944824219, + "learning_rate": 8.740380648239182e-06, + "loss": 20.9134, + "step": 153940 + }, + { + "epoch": 0.310988740167342, + "grad_norm": 380.5940246582031, + "learning_rate": 8.740148994516912e-06, + "loss": 15.1802, + "step": 153950 + }, + { + "epoch": 0.31100894080002583, + "grad_norm": 644.829345703125, + "learning_rate": 8.73991732256561e-06, + "loss": 21.9968, + "step": 153960 + }, + { + "epoch": 0.31102914143270965, + "grad_norm": 271.3200378417969, + "learning_rate": 8.739685632386405e-06, + "loss": 13.6429, + "step": 153970 + }, + { + "epoch": 0.3110493420653935, + "grad_norm": 644.6058959960938, + "learning_rate": 8.739453923980425e-06, + "loss": 21.9192, + "step": 153980 + }, + { + "epoch": 0.3110695426980773, + "grad_norm": 371.74688720703125, + "learning_rate": 8.7392221973488e-06, + "loss": 24.7475, + "step": 153990 + }, + { + "epoch": 0.3110897433307611, + "grad_norm": 1066.115234375, + "learning_rate": 8.73899045249266e-06, + "loss": 44.2597, + "step": 154000 + }, + { + "epoch": 0.31110994396344493, + "grad_norm": 613.54833984375, + "learning_rate": 8.738758689413133e-06, + "loss": 22.0096, + "step": 154010 + }, + { + "epoch": 0.31113014459612875, + "grad_norm": 460.6983337402344, + "learning_rate": 8.738526908111352e-06, + "loss": 34.8959, + "step": 154020 + }, + { + "epoch": 0.3111503452288126, + "grad_norm": 283.304931640625, + "learning_rate": 8.738295108588442e-06, + "loss": 23.2965, + "step": 154030 + }, + { + "epoch": 0.3111705458614964, + "grad_norm": 393.63427734375, + "learning_rate": 8.738063290845536e-06, + "loss": 19.5414, + "step": 154040 + }, + { + "epoch": 0.3111907464941802, + "grad_norm": 595.6905517578125, + "learning_rate": 8.737831454883762e-06, + "loss": 32.4807, + "step": 154050 + }, + { + "epoch": 0.31121094712686403, + "grad_norm": 630.62060546875, + "learning_rate": 8.737599600704251e-06, + "loss": 37.6232, + "step": 154060 + }, + { + "epoch": 0.31123114775954785, + "grad_norm": 290.7593688964844, + "learning_rate": 8.737367728308134e-06, + "loss": 16.2704, + "step": 154070 + }, + { + "epoch": 0.3112513483922316, + "grad_norm": 105.99109649658203, + "learning_rate": 8.737135837696539e-06, + "loss": 11.9972, + "step": 154080 + }, + { + "epoch": 0.31127154902491544, + "grad_norm": 205.48757934570312, + "learning_rate": 8.736903928870597e-06, + "loss": 31.4111, + "step": 154090 + }, + { + "epoch": 0.31129174965759926, + "grad_norm": 18.020917892456055, + "learning_rate": 8.736672001831438e-06, + "loss": 18.1271, + "step": 154100 + }, + { + "epoch": 0.3113119502902831, + "grad_norm": 534.1895751953125, + "learning_rate": 8.736440056580196e-06, + "loss": 25.1162, + "step": 154110 + }, + { + "epoch": 0.3113321509229669, + "grad_norm": 1209.593994140625, + "learning_rate": 8.736208093117994e-06, + "loss": 28.3276, + "step": 154120 + }, + { + "epoch": 0.3113523515556507, + "grad_norm": 374.8323669433594, + "learning_rate": 8.73597611144597e-06, + "loss": 18.5329, + "step": 154130 + }, + { + "epoch": 0.31137255218833454, + "grad_norm": 579.5465698242188, + "learning_rate": 8.73574411156525e-06, + "loss": 29.5733, + "step": 154140 + }, + { + "epoch": 0.31139275282101836, + "grad_norm": 65.66193389892578, + "learning_rate": 8.735512093476968e-06, + "loss": 33.4175, + "step": 154150 + }, + { + "epoch": 0.3114129534537022, + "grad_norm": 162.11863708496094, + "learning_rate": 8.735280057182252e-06, + "loss": 13.7979, + "step": 154160 + }, + { + "epoch": 0.311433154086386, + "grad_norm": 179.59805297851562, + "learning_rate": 8.735048002682233e-06, + "loss": 28.4929, + "step": 154170 + }, + { + "epoch": 0.3114533547190698, + "grad_norm": 291.77435302734375, + "learning_rate": 8.734815929978045e-06, + "loss": 22.1306, + "step": 154180 + }, + { + "epoch": 0.31147355535175364, + "grad_norm": 370.16790771484375, + "learning_rate": 8.734583839070817e-06, + "loss": 20.3002, + "step": 154190 + }, + { + "epoch": 0.31149375598443746, + "grad_norm": 365.54107666015625, + "learning_rate": 8.73435172996168e-06, + "loss": 30.2541, + "step": 154200 + }, + { + "epoch": 0.3115139566171212, + "grad_norm": 600.3662109375, + "learning_rate": 8.734119602651762e-06, + "loss": 19.5633, + "step": 154210 + }, + { + "epoch": 0.31153415724980504, + "grad_norm": 300.4071960449219, + "learning_rate": 8.733887457142202e-06, + "loss": 20.3158, + "step": 154220 + }, + { + "epoch": 0.31155435788248886, + "grad_norm": 463.6324462890625, + "learning_rate": 8.733655293434127e-06, + "loss": 28.0891, + "step": 154230 + }, + { + "epoch": 0.3115745585151727, + "grad_norm": 286.9560852050781, + "learning_rate": 8.733423111528667e-06, + "loss": 16.2501, + "step": 154240 + }, + { + "epoch": 0.3115947591478565, + "grad_norm": 395.87042236328125, + "learning_rate": 8.733190911426957e-06, + "loss": 16.5527, + "step": 154250 + }, + { + "epoch": 0.3116149597805403, + "grad_norm": 307.3493957519531, + "learning_rate": 8.732958693130128e-06, + "loss": 25.7269, + "step": 154260 + }, + { + "epoch": 0.31163516041322414, + "grad_norm": 596.006591796875, + "learning_rate": 8.73272645663931e-06, + "loss": 18.0265, + "step": 154270 + }, + { + "epoch": 0.31165536104590796, + "grad_norm": 510.0229797363281, + "learning_rate": 8.732494201955636e-06, + "loss": 18.1334, + "step": 154280 + }, + { + "epoch": 0.3116755616785918, + "grad_norm": 475.0591125488281, + "learning_rate": 8.732261929080239e-06, + "loss": 8.0178, + "step": 154290 + }, + { + "epoch": 0.3116957623112756, + "grad_norm": 210.19642639160156, + "learning_rate": 8.732029638014249e-06, + "loss": 15.1319, + "step": 154300 + }, + { + "epoch": 0.3117159629439594, + "grad_norm": 445.4887390136719, + "learning_rate": 8.7317973287588e-06, + "loss": 29.9532, + "step": 154310 + }, + { + "epoch": 0.31173616357664324, + "grad_norm": 353.43048095703125, + "learning_rate": 8.73156500131502e-06, + "loss": 33.7921, + "step": 154320 + }, + { + "epoch": 0.31175636420932706, + "grad_norm": 545.59716796875, + "learning_rate": 8.73133265568405e-06, + "loss": 18.2144, + "step": 154330 + }, + { + "epoch": 0.31177656484201083, + "grad_norm": 366.07177734375, + "learning_rate": 8.731100291867013e-06, + "loss": 16.1114, + "step": 154340 + }, + { + "epoch": 0.31179676547469465, + "grad_norm": 157.15353393554688, + "learning_rate": 8.730867909865048e-06, + "loss": 31.1077, + "step": 154350 + }, + { + "epoch": 0.31181696610737847, + "grad_norm": 177.85256958007812, + "learning_rate": 8.730635509679286e-06, + "loss": 34.1657, + "step": 154360 + }, + { + "epoch": 0.3118371667400623, + "grad_norm": 144.4081573486328, + "learning_rate": 8.730403091310857e-06, + "loss": 16.4859, + "step": 154370 + }, + { + "epoch": 0.3118573673727461, + "grad_norm": 847.1827392578125, + "learning_rate": 8.730170654760896e-06, + "loss": 25.0965, + "step": 154380 + }, + { + "epoch": 0.31187756800542993, + "grad_norm": 388.1310119628906, + "learning_rate": 8.729938200030537e-06, + "loss": 19.9993, + "step": 154390 + }, + { + "epoch": 0.31189776863811375, + "grad_norm": 419.26690673828125, + "learning_rate": 8.729705727120911e-06, + "loss": 19.7634, + "step": 154400 + }, + { + "epoch": 0.31191796927079757, + "grad_norm": 433.9167785644531, + "learning_rate": 8.729473236033152e-06, + "loss": 20.6485, + "step": 154410 + }, + { + "epoch": 0.3119381699034814, + "grad_norm": 179.41015625, + "learning_rate": 8.729240726768393e-06, + "loss": 21.4454, + "step": 154420 + }, + { + "epoch": 0.3119583705361652, + "grad_norm": 657.5485229492188, + "learning_rate": 8.729008199327767e-06, + "loss": 29.4549, + "step": 154430 + }, + { + "epoch": 0.31197857116884903, + "grad_norm": 137.22616577148438, + "learning_rate": 8.728775653712405e-06, + "loss": 20.9253, + "step": 154440 + }, + { + "epoch": 0.31199877180153285, + "grad_norm": 252.69107055664062, + "learning_rate": 8.728543089923444e-06, + "loss": 24.214, + "step": 154450 + }, + { + "epoch": 0.3120189724342166, + "grad_norm": 839.6799926757812, + "learning_rate": 8.728310507962016e-06, + "loss": 19.3649, + "step": 154460 + }, + { + "epoch": 0.31203917306690043, + "grad_norm": 434.42059326171875, + "learning_rate": 8.728077907829256e-06, + "loss": 42.9605, + "step": 154470 + }, + { + "epoch": 0.31205937369958425, + "grad_norm": 134.92294311523438, + "learning_rate": 8.727845289526296e-06, + "loss": 8.4402, + "step": 154480 + }, + { + "epoch": 0.3120795743322681, + "grad_norm": 30.486722946166992, + "learning_rate": 8.72761265305427e-06, + "loss": 20.2543, + "step": 154490 + }, + { + "epoch": 0.3120997749649519, + "grad_norm": 487.7585754394531, + "learning_rate": 8.727379998414311e-06, + "loss": 26.7082, + "step": 154500 + }, + { + "epoch": 0.3121199755976357, + "grad_norm": 501.4851379394531, + "learning_rate": 8.727147325607556e-06, + "loss": 25.2665, + "step": 154510 + }, + { + "epoch": 0.31214017623031953, + "grad_norm": 593.8209838867188, + "learning_rate": 8.726914634635136e-06, + "loss": 59.8455, + "step": 154520 + }, + { + "epoch": 0.31216037686300335, + "grad_norm": 109.87444305419922, + "learning_rate": 8.726681925498187e-06, + "loss": 19.6351, + "step": 154530 + }, + { + "epoch": 0.3121805774956872, + "grad_norm": 569.3522338867188, + "learning_rate": 8.72644919819784e-06, + "loss": 18.8903, + "step": 154540 + }, + { + "epoch": 0.312200778128371, + "grad_norm": 112.68116760253906, + "learning_rate": 8.726216452735233e-06, + "loss": 15.7711, + "step": 154550 + }, + { + "epoch": 0.3122209787610548, + "grad_norm": 271.089599609375, + "learning_rate": 8.725983689111499e-06, + "loss": 14.8479, + "step": 154560 + }, + { + "epoch": 0.31224117939373863, + "grad_norm": 431.1347961425781, + "learning_rate": 8.725750907327772e-06, + "loss": 26.109, + "step": 154570 + }, + { + "epoch": 0.31226138002642245, + "grad_norm": 448.3652648925781, + "learning_rate": 8.725518107385188e-06, + "loss": 26.0147, + "step": 154580 + }, + { + "epoch": 0.3122815806591062, + "grad_norm": 316.93524169921875, + "learning_rate": 8.725285289284879e-06, + "loss": 33.8821, + "step": 154590 + }, + { + "epoch": 0.31230178129179004, + "grad_norm": 330.87933349609375, + "learning_rate": 8.725052453027982e-06, + "loss": 15.7643, + "step": 154600 + }, + { + "epoch": 0.31232198192447386, + "grad_norm": 209.00180053710938, + "learning_rate": 8.72481959861563e-06, + "loss": 23.5942, + "step": 154610 + }, + { + "epoch": 0.3123421825571577, + "grad_norm": 428.0066833496094, + "learning_rate": 8.72458672604896e-06, + "loss": 24.2969, + "step": 154620 + }, + { + "epoch": 0.3123623831898415, + "grad_norm": 778.66552734375, + "learning_rate": 8.724353835329107e-06, + "loss": 40.4456, + "step": 154630 + }, + { + "epoch": 0.3123825838225253, + "grad_norm": 392.69287109375, + "learning_rate": 8.724120926457205e-06, + "loss": 22.3423, + "step": 154640 + }, + { + "epoch": 0.31240278445520914, + "grad_norm": 720.3321533203125, + "learning_rate": 8.723887999434389e-06, + "loss": 29.4737, + "step": 154650 + }, + { + "epoch": 0.31242298508789296, + "grad_norm": 296.5901794433594, + "learning_rate": 8.723655054261792e-06, + "loss": 22.3836, + "step": 154660 + }, + { + "epoch": 0.3124431857205768, + "grad_norm": 352.9546203613281, + "learning_rate": 8.723422090940556e-06, + "loss": 24.9276, + "step": 154670 + }, + { + "epoch": 0.3124633863532606, + "grad_norm": 666.4312133789062, + "learning_rate": 8.72318910947181e-06, + "loss": 23.2714, + "step": 154680 + }, + { + "epoch": 0.3124835869859444, + "grad_norm": 682.2530517578125, + "learning_rate": 8.722956109856693e-06, + "loss": 25.6024, + "step": 154690 + }, + { + "epoch": 0.31250378761862824, + "grad_norm": 338.8300476074219, + "learning_rate": 8.722723092096337e-06, + "loss": 29.3265, + "step": 154700 + }, + { + "epoch": 0.31252398825131206, + "grad_norm": 135.53916931152344, + "learning_rate": 8.722490056191884e-06, + "loss": 17.6037, + "step": 154710 + }, + { + "epoch": 0.3125441888839958, + "grad_norm": 253.51405334472656, + "learning_rate": 8.722257002144462e-06, + "loss": 27.952, + "step": 154720 + }, + { + "epoch": 0.31256438951667964, + "grad_norm": 356.64166259765625, + "learning_rate": 8.722023929955213e-06, + "loss": 22.1669, + "step": 154730 + }, + { + "epoch": 0.31258459014936346, + "grad_norm": 321.4786376953125, + "learning_rate": 8.72179083962527e-06, + "loss": 19.3403, + "step": 154740 + }, + { + "epoch": 0.3126047907820473, + "grad_norm": 240.9815216064453, + "learning_rate": 8.72155773115577e-06, + "loss": 16.3145, + "step": 154750 + }, + { + "epoch": 0.3126249914147311, + "grad_norm": 441.646728515625, + "learning_rate": 8.721324604547851e-06, + "loss": 31.5702, + "step": 154760 + }, + { + "epoch": 0.3126451920474149, + "grad_norm": 379.2781066894531, + "learning_rate": 8.721091459802646e-06, + "loss": 27.9705, + "step": 154770 + }, + { + "epoch": 0.31266539268009874, + "grad_norm": 1475.4915771484375, + "learning_rate": 8.72085829692129e-06, + "loss": 23.9081, + "step": 154780 + }, + { + "epoch": 0.31268559331278256, + "grad_norm": 169.7453155517578, + "learning_rate": 8.720625115904927e-06, + "loss": 22.1654, + "step": 154790 + }, + { + "epoch": 0.3127057939454664, + "grad_norm": 100.61199188232422, + "learning_rate": 8.720391916754683e-06, + "loss": 40.4935, + "step": 154800 + }, + { + "epoch": 0.3127259945781502, + "grad_norm": 683.3108520507812, + "learning_rate": 8.720158699471704e-06, + "loss": 53.2084, + "step": 154810 + }, + { + "epoch": 0.312746195210834, + "grad_norm": 250.6405792236328, + "learning_rate": 8.71992546405712e-06, + "loss": 23.5227, + "step": 154820 + }, + { + "epoch": 0.31276639584351784, + "grad_norm": 421.2986145019531, + "learning_rate": 8.719692210512072e-06, + "loss": 16.4337, + "step": 154830 + }, + { + "epoch": 0.31278659647620166, + "grad_norm": 145.6516876220703, + "learning_rate": 8.719458938837695e-06, + "loss": 20.4812, + "step": 154840 + }, + { + "epoch": 0.3128067971088854, + "grad_norm": 173.38214111328125, + "learning_rate": 8.719225649035126e-06, + "loss": 16.6189, + "step": 154850 + }, + { + "epoch": 0.31282699774156925, + "grad_norm": 566.532958984375, + "learning_rate": 8.718992341105503e-06, + "loss": 24.0527, + "step": 154860 + }, + { + "epoch": 0.31284719837425307, + "grad_norm": 234.0479278564453, + "learning_rate": 8.718759015049963e-06, + "loss": 18.3841, + "step": 154870 + }, + { + "epoch": 0.3128673990069369, + "grad_norm": 297.26959228515625, + "learning_rate": 8.71852567086964e-06, + "loss": 23.2053, + "step": 154880 + }, + { + "epoch": 0.3128875996396207, + "grad_norm": 723.6534423828125, + "learning_rate": 8.718292308565675e-06, + "loss": 29.5211, + "step": 154890 + }, + { + "epoch": 0.3129078002723045, + "grad_norm": 116.78231048583984, + "learning_rate": 8.718058928139205e-06, + "loss": 29.8787, + "step": 154900 + }, + { + "epoch": 0.31292800090498835, + "grad_norm": 1550.7498779296875, + "learning_rate": 8.717825529591367e-06, + "loss": 20.1995, + "step": 154910 + }, + { + "epoch": 0.31294820153767217, + "grad_norm": 245.3291015625, + "learning_rate": 8.717592112923296e-06, + "loss": 22.0477, + "step": 154920 + }, + { + "epoch": 0.312968402170356, + "grad_norm": 822.2991333007812, + "learning_rate": 8.717358678136133e-06, + "loss": 33.4508, + "step": 154930 + }, + { + "epoch": 0.3129886028030398, + "grad_norm": 551.5245361328125, + "learning_rate": 8.717125225231018e-06, + "loss": 18.003, + "step": 154940 + }, + { + "epoch": 0.3130088034357236, + "grad_norm": 425.37152099609375, + "learning_rate": 8.716891754209081e-06, + "loss": 32.9085, + "step": 154950 + }, + { + "epoch": 0.31302900406840745, + "grad_norm": 338.026611328125, + "learning_rate": 8.716658265071467e-06, + "loss": 50.6049, + "step": 154960 + }, + { + "epoch": 0.3130492047010912, + "grad_norm": 283.1708984375, + "learning_rate": 8.71642475781931e-06, + "loss": 12.3127, + "step": 154970 + }, + { + "epoch": 0.31306940533377503, + "grad_norm": 272.32489013671875, + "learning_rate": 8.71619123245375e-06, + "loss": 13.3999, + "step": 154980 + }, + { + "epoch": 0.31308960596645885, + "grad_norm": 2465.364990234375, + "learning_rate": 8.715957688975925e-06, + "loss": 29.2973, + "step": 154990 + }, + { + "epoch": 0.31310980659914267, + "grad_norm": 1328.699462890625, + "learning_rate": 8.715724127386971e-06, + "loss": 43.5636, + "step": 155000 + }, + { + "epoch": 0.3131300072318265, + "grad_norm": 480.14715576171875, + "learning_rate": 8.71549054768803e-06, + "loss": 38.8249, + "step": 155010 + }, + { + "epoch": 0.3131502078645103, + "grad_norm": 385.1468505859375, + "learning_rate": 8.715256949880239e-06, + "loss": 33.3817, + "step": 155020 + }, + { + "epoch": 0.31317040849719413, + "grad_norm": 245.51141357421875, + "learning_rate": 8.715023333964737e-06, + "loss": 21.2037, + "step": 155030 + }, + { + "epoch": 0.31319060912987795, + "grad_norm": 384.6320495605469, + "learning_rate": 8.714789699942659e-06, + "loss": 17.2748, + "step": 155040 + }, + { + "epoch": 0.31321080976256177, + "grad_norm": 239.67886352539062, + "learning_rate": 8.714556047815148e-06, + "loss": 19.1228, + "step": 155050 + }, + { + "epoch": 0.3132310103952456, + "grad_norm": 64.51019287109375, + "learning_rate": 8.714322377583341e-06, + "loss": 10.7204, + "step": 155060 + }, + { + "epoch": 0.3132512110279294, + "grad_norm": 570.5287475585938, + "learning_rate": 8.714088689248379e-06, + "loss": 23.9701, + "step": 155070 + }, + { + "epoch": 0.31327141166061323, + "grad_norm": 487.402099609375, + "learning_rate": 8.713854982811398e-06, + "loss": 24.8807, + "step": 155080 + }, + { + "epoch": 0.31329161229329705, + "grad_norm": 304.18206787109375, + "learning_rate": 8.713621258273539e-06, + "loss": 37.0923, + "step": 155090 + }, + { + "epoch": 0.3133118129259808, + "grad_norm": 369.0475158691406, + "learning_rate": 8.713387515635938e-06, + "loss": 14.3489, + "step": 155100 + }, + { + "epoch": 0.31333201355866463, + "grad_norm": 69.50149536132812, + "learning_rate": 8.713153754899738e-06, + "loss": 16.3796, + "step": 155110 + }, + { + "epoch": 0.31335221419134845, + "grad_norm": 404.92431640625, + "learning_rate": 8.712919976066078e-06, + "loss": 24.8467, + "step": 155120 + }, + { + "epoch": 0.3133724148240323, + "grad_norm": 206.7754669189453, + "learning_rate": 8.712686179136097e-06, + "loss": 16.1075, + "step": 155130 + }, + { + "epoch": 0.3133926154567161, + "grad_norm": 358.1948547363281, + "learning_rate": 8.712452364110931e-06, + "loss": 16.4383, + "step": 155140 + }, + { + "epoch": 0.3134128160893999, + "grad_norm": 621.1553955078125, + "learning_rate": 8.712218530991723e-06, + "loss": 22.5428, + "step": 155150 + }, + { + "epoch": 0.31343301672208373, + "grad_norm": 247.3667755126953, + "learning_rate": 8.711984679779612e-06, + "loss": 37.8268, + "step": 155160 + }, + { + "epoch": 0.31345321735476755, + "grad_norm": 483.7298889160156, + "learning_rate": 8.71175081047574e-06, + "loss": 15.6308, + "step": 155170 + }, + { + "epoch": 0.3134734179874514, + "grad_norm": 719.111572265625, + "learning_rate": 8.711516923081244e-06, + "loss": 23.1817, + "step": 155180 + }, + { + "epoch": 0.3134936186201352, + "grad_norm": 56.7630500793457, + "learning_rate": 8.711283017597265e-06, + "loss": 15.7026, + "step": 155190 + }, + { + "epoch": 0.313513819252819, + "grad_norm": 712.2754516601562, + "learning_rate": 8.711049094024942e-06, + "loss": 41.0088, + "step": 155200 + }, + { + "epoch": 0.31353401988550283, + "grad_norm": 283.11627197265625, + "learning_rate": 8.710815152365416e-06, + "loss": 23.9812, + "step": 155210 + }, + { + "epoch": 0.31355422051818665, + "grad_norm": 137.64024353027344, + "learning_rate": 8.710581192619824e-06, + "loss": 15.8262, + "step": 155220 + }, + { + "epoch": 0.3135744211508704, + "grad_norm": 225.41720581054688, + "learning_rate": 8.710347214789313e-06, + "loss": 20.5877, + "step": 155230 + }, + { + "epoch": 0.31359462178355424, + "grad_norm": 300.43157958984375, + "learning_rate": 8.710113218875018e-06, + "loss": 16.573, + "step": 155240 + }, + { + "epoch": 0.31361482241623806, + "grad_norm": 427.4569091796875, + "learning_rate": 8.709879204878082e-06, + "loss": 25.7318, + "step": 155250 + }, + { + "epoch": 0.3136350230489219, + "grad_norm": 332.05511474609375, + "learning_rate": 8.709645172799646e-06, + "loss": 22.4278, + "step": 155260 + }, + { + "epoch": 0.3136552236816057, + "grad_norm": 748.4037475585938, + "learning_rate": 8.709411122640847e-06, + "loss": 32.8311, + "step": 155270 + }, + { + "epoch": 0.3136754243142895, + "grad_norm": 590.763671875, + "learning_rate": 8.709177054402829e-06, + "loss": 26.0616, + "step": 155280 + }, + { + "epoch": 0.31369562494697334, + "grad_norm": 405.1456604003906, + "learning_rate": 8.708942968086733e-06, + "loss": 18.7855, + "step": 155290 + }, + { + "epoch": 0.31371582557965716, + "grad_norm": 733.8826293945312, + "learning_rate": 8.708708863693696e-06, + "loss": 24.3785, + "step": 155300 + }, + { + "epoch": 0.313736026212341, + "grad_norm": 412.6300964355469, + "learning_rate": 8.708474741224863e-06, + "loss": 27.3805, + "step": 155310 + }, + { + "epoch": 0.3137562268450248, + "grad_norm": 884.5291748046875, + "learning_rate": 8.708240600681375e-06, + "loss": 23.0809, + "step": 155320 + }, + { + "epoch": 0.3137764274777086, + "grad_norm": 214.19802856445312, + "learning_rate": 8.708006442064373e-06, + "loss": 11.6219, + "step": 155330 + }, + { + "epoch": 0.31379662811039244, + "grad_norm": 296.8635559082031, + "learning_rate": 8.707772265374994e-06, + "loss": 20.7415, + "step": 155340 + }, + { + "epoch": 0.31381682874307626, + "grad_norm": 322.7183837890625, + "learning_rate": 8.707538070614385e-06, + "loss": 33.076, + "step": 155350 + }, + { + "epoch": 0.31383702937576, + "grad_norm": 132.36392211914062, + "learning_rate": 8.707303857783685e-06, + "loss": 29.2217, + "step": 155360 + }, + { + "epoch": 0.31385723000844384, + "grad_norm": 518.3986206054688, + "learning_rate": 8.707069626884034e-06, + "loss": 24.5106, + "step": 155370 + }, + { + "epoch": 0.31387743064112766, + "grad_norm": 303.0432434082031, + "learning_rate": 8.706835377916579e-06, + "loss": 18.2479, + "step": 155380 + }, + { + "epoch": 0.3138976312738115, + "grad_norm": 62.68539810180664, + "learning_rate": 8.706601110882456e-06, + "loss": 13.2302, + "step": 155390 + }, + { + "epoch": 0.3139178319064953, + "grad_norm": 627.250244140625, + "learning_rate": 8.706366825782805e-06, + "loss": 20.2127, + "step": 155400 + }, + { + "epoch": 0.3139380325391791, + "grad_norm": 548.8335571289062, + "learning_rate": 8.706132522618777e-06, + "loss": 29.7295, + "step": 155410 + }, + { + "epoch": 0.31395823317186294, + "grad_norm": 392.1265869140625, + "learning_rate": 8.705898201391504e-06, + "loss": 23.7052, + "step": 155420 + }, + { + "epoch": 0.31397843380454676, + "grad_norm": 507.8823547363281, + "learning_rate": 8.705663862102137e-06, + "loss": 28.8804, + "step": 155430 + }, + { + "epoch": 0.3139986344372306, + "grad_norm": 397.531982421875, + "learning_rate": 8.705429504751813e-06, + "loss": 33.2689, + "step": 155440 + }, + { + "epoch": 0.3140188350699144, + "grad_norm": 248.19107055664062, + "learning_rate": 8.705195129341672e-06, + "loss": 30.2313, + "step": 155450 + }, + { + "epoch": 0.3140390357025982, + "grad_norm": 411.1767883300781, + "learning_rate": 8.704960735872862e-06, + "loss": 29.2826, + "step": 155460 + }, + { + "epoch": 0.31405923633528204, + "grad_norm": 527.8690185546875, + "learning_rate": 8.704726324346521e-06, + "loss": 28.9641, + "step": 155470 + }, + { + "epoch": 0.31407943696796586, + "grad_norm": 317.4735107421875, + "learning_rate": 8.704491894763794e-06, + "loss": 17.0169, + "step": 155480 + }, + { + "epoch": 0.31409963760064963, + "grad_norm": 293.9752197265625, + "learning_rate": 8.704257447125823e-06, + "loss": 24.3327, + "step": 155490 + }, + { + "epoch": 0.31411983823333345, + "grad_norm": 558.1671142578125, + "learning_rate": 8.70402298143375e-06, + "loss": 21.332, + "step": 155500 + }, + { + "epoch": 0.31414003886601727, + "grad_norm": 548.06103515625, + "learning_rate": 8.70378849768872e-06, + "loss": 17.1295, + "step": 155510 + }, + { + "epoch": 0.3141602394987011, + "grad_norm": 432.9718933105469, + "learning_rate": 8.703553995891873e-06, + "loss": 19.9746, + "step": 155520 + }, + { + "epoch": 0.3141804401313849, + "grad_norm": 476.7720031738281, + "learning_rate": 8.703319476044352e-06, + "loss": 21.3201, + "step": 155530 + }, + { + "epoch": 0.31420064076406873, + "grad_norm": 536.4819946289062, + "learning_rate": 8.703084938147302e-06, + "loss": 25.0215, + "step": 155540 + }, + { + "epoch": 0.31422084139675255, + "grad_norm": 423.54718017578125, + "learning_rate": 8.702850382201863e-06, + "loss": 19.4235, + "step": 155550 + }, + { + "epoch": 0.31424104202943637, + "grad_norm": 244.44570922851562, + "learning_rate": 8.702615808209185e-06, + "loss": 12.5618, + "step": 155560 + }, + { + "epoch": 0.3142612426621202, + "grad_norm": 630.3048095703125, + "learning_rate": 8.702381216170404e-06, + "loss": 24.4882, + "step": 155570 + }, + { + "epoch": 0.314281443294804, + "grad_norm": 764.992919921875, + "learning_rate": 8.702146606086665e-06, + "loss": 32.9412, + "step": 155580 + }, + { + "epoch": 0.31430164392748783, + "grad_norm": 280.8818359375, + "learning_rate": 8.701911977959113e-06, + "loss": 16.6502, + "step": 155590 + }, + { + "epoch": 0.31432184456017165, + "grad_norm": 137.41639709472656, + "learning_rate": 8.701677331788891e-06, + "loss": 24.229, + "step": 155600 + }, + { + "epoch": 0.3143420451928554, + "grad_norm": 241.95132446289062, + "learning_rate": 8.701442667577143e-06, + "loss": 9.6414, + "step": 155610 + }, + { + "epoch": 0.31436224582553923, + "grad_norm": 600.4203491210938, + "learning_rate": 8.701207985325013e-06, + "loss": 19.5094, + "step": 155620 + }, + { + "epoch": 0.31438244645822305, + "grad_norm": 502.0248107910156, + "learning_rate": 8.700973285033642e-06, + "loss": 25.2597, + "step": 155630 + }, + { + "epoch": 0.3144026470909069, + "grad_norm": 254.17803955078125, + "learning_rate": 8.700738566704178e-06, + "loss": 13.8078, + "step": 155640 + }, + { + "epoch": 0.3144228477235907, + "grad_norm": 402.1114501953125, + "learning_rate": 8.700503830337763e-06, + "loss": 19.0496, + "step": 155650 + }, + { + "epoch": 0.3144430483562745, + "grad_norm": 553.7101440429688, + "learning_rate": 8.700269075935542e-06, + "loss": 20.5768, + "step": 155660 + }, + { + "epoch": 0.31446324898895833, + "grad_norm": 256.2198486328125, + "learning_rate": 8.700034303498657e-06, + "loss": 12.8878, + "step": 155670 + }, + { + "epoch": 0.31448344962164215, + "grad_norm": 497.68817138671875, + "learning_rate": 8.699799513028252e-06, + "loss": 11.7977, + "step": 155680 + }, + { + "epoch": 0.314503650254326, + "grad_norm": 261.4666442871094, + "learning_rate": 8.699564704525477e-06, + "loss": 17.6982, + "step": 155690 + }, + { + "epoch": 0.3145238508870098, + "grad_norm": 243.61245727539062, + "learning_rate": 8.699329877991469e-06, + "loss": 30.5446, + "step": 155700 + }, + { + "epoch": 0.3145440515196936, + "grad_norm": 161.9713592529297, + "learning_rate": 8.699095033427377e-06, + "loss": 20.471, + "step": 155710 + }, + { + "epoch": 0.31456425215237743, + "grad_norm": 230.16290283203125, + "learning_rate": 8.698860170834343e-06, + "loss": 45.0552, + "step": 155720 + }, + { + "epoch": 0.31458445278506125, + "grad_norm": 517.7139282226562, + "learning_rate": 8.698625290213515e-06, + "loss": 15.9301, + "step": 155730 + }, + { + "epoch": 0.314604653417745, + "grad_norm": 758.0924072265625, + "learning_rate": 8.698390391566036e-06, + "loss": 19.9534, + "step": 155740 + }, + { + "epoch": 0.31462485405042884, + "grad_norm": 371.744384765625, + "learning_rate": 8.69815547489305e-06, + "loss": 17.0234, + "step": 155750 + }, + { + "epoch": 0.31464505468311266, + "grad_norm": 658.6950073242188, + "learning_rate": 8.697920540195702e-06, + "loss": 24.9621, + "step": 155760 + }, + { + "epoch": 0.3146652553157965, + "grad_norm": 310.3829040527344, + "learning_rate": 8.697685587475139e-06, + "loss": 30.1228, + "step": 155770 + }, + { + "epoch": 0.3146854559484803, + "grad_norm": 27.588375091552734, + "learning_rate": 8.697450616732503e-06, + "loss": 27.7926, + "step": 155780 + }, + { + "epoch": 0.3147056565811641, + "grad_norm": 847.927490234375, + "learning_rate": 8.697215627968944e-06, + "loss": 25.3589, + "step": 155790 + }, + { + "epoch": 0.31472585721384794, + "grad_norm": 400.6221008300781, + "learning_rate": 8.696980621185602e-06, + "loss": 19.515, + "step": 155800 + }, + { + "epoch": 0.31474605784653176, + "grad_norm": 246.7696075439453, + "learning_rate": 8.696745596383627e-06, + "loss": 15.7324, + "step": 155810 + }, + { + "epoch": 0.3147662584792156, + "grad_norm": 309.1961364746094, + "learning_rate": 8.696510553564162e-06, + "loss": 22.45, + "step": 155820 + }, + { + "epoch": 0.3147864591118994, + "grad_norm": 183.41746520996094, + "learning_rate": 8.696275492728352e-06, + "loss": 28.1009, + "step": 155830 + }, + { + "epoch": 0.3148066597445832, + "grad_norm": 433.7793273925781, + "learning_rate": 8.696040413877344e-06, + "loss": 29.0775, + "step": 155840 + }, + { + "epoch": 0.31482686037726704, + "grad_norm": 280.98638916015625, + "learning_rate": 8.695805317012283e-06, + "loss": 24.2702, + "step": 155850 + }, + { + "epoch": 0.31484706100995086, + "grad_norm": 333.2210693359375, + "learning_rate": 8.695570202134314e-06, + "loss": 20.1262, + "step": 155860 + }, + { + "epoch": 0.3148672616426346, + "grad_norm": 396.2452697753906, + "learning_rate": 8.695335069244586e-06, + "loss": 16.4875, + "step": 155870 + }, + { + "epoch": 0.31488746227531844, + "grad_norm": 399.8049011230469, + "learning_rate": 8.695099918344243e-06, + "loss": 31.2423, + "step": 155880 + }, + { + "epoch": 0.31490766290800226, + "grad_norm": 294.70062255859375, + "learning_rate": 8.69486474943443e-06, + "loss": 17.1798, + "step": 155890 + }, + { + "epoch": 0.3149278635406861, + "grad_norm": 525.5172729492188, + "learning_rate": 8.694629562516295e-06, + "loss": 25.3958, + "step": 155900 + }, + { + "epoch": 0.3149480641733699, + "grad_norm": 1121.14599609375, + "learning_rate": 8.694394357590982e-06, + "loss": 18.7285, + "step": 155910 + }, + { + "epoch": 0.3149682648060537, + "grad_norm": 525.0504760742188, + "learning_rate": 8.694159134659641e-06, + "loss": 23.9852, + "step": 155920 + }, + { + "epoch": 0.31498846543873754, + "grad_norm": 364.8795166015625, + "learning_rate": 8.693923893723415e-06, + "loss": 18.5837, + "step": 155930 + }, + { + "epoch": 0.31500866607142136, + "grad_norm": 410.02691650390625, + "learning_rate": 8.693688634783453e-06, + "loss": 12.7264, + "step": 155940 + }, + { + "epoch": 0.3150288667041052, + "grad_norm": 133.11660766601562, + "learning_rate": 8.6934533578409e-06, + "loss": 9.3515, + "step": 155950 + }, + { + "epoch": 0.315049067336789, + "grad_norm": 391.3555908203125, + "learning_rate": 8.693218062896905e-06, + "loss": 10.1853, + "step": 155960 + }, + { + "epoch": 0.3150692679694728, + "grad_norm": 590.826416015625, + "learning_rate": 8.692982749952613e-06, + "loss": 22.8992, + "step": 155970 + }, + { + "epoch": 0.31508946860215664, + "grad_norm": 335.4452819824219, + "learning_rate": 8.692747419009168e-06, + "loss": 20.7711, + "step": 155980 + }, + { + "epoch": 0.31510966923484046, + "grad_norm": 464.6659851074219, + "learning_rate": 8.692512070067722e-06, + "loss": 35.9936, + "step": 155990 + }, + { + "epoch": 0.3151298698675242, + "grad_norm": 859.1676635742188, + "learning_rate": 8.692276703129421e-06, + "loss": 18.3248, + "step": 156000 + }, + { + "epoch": 0.31515007050020805, + "grad_norm": 563.6682739257812, + "learning_rate": 8.692041318195409e-06, + "loss": 14.7813, + "step": 156010 + }, + { + "epoch": 0.31517027113289187, + "grad_norm": 423.956298828125, + "learning_rate": 8.691805915266836e-06, + "loss": 18.973, + "step": 156020 + }, + { + "epoch": 0.3151904717655757, + "grad_norm": 572.4943237304688, + "learning_rate": 8.691570494344848e-06, + "loss": 20.4434, + "step": 156030 + }, + { + "epoch": 0.3152106723982595, + "grad_norm": 460.4983825683594, + "learning_rate": 8.691335055430595e-06, + "loss": 19.7382, + "step": 156040 + }, + { + "epoch": 0.3152308730309433, + "grad_norm": 364.9707336425781, + "learning_rate": 8.691099598525222e-06, + "loss": 17.7182, + "step": 156050 + }, + { + "epoch": 0.31525107366362715, + "grad_norm": 305.00274658203125, + "learning_rate": 8.690864123629876e-06, + "loss": 32.178, + "step": 156060 + }, + { + "epoch": 0.31527127429631097, + "grad_norm": 536.468994140625, + "learning_rate": 8.690628630745708e-06, + "loss": 22.6808, + "step": 156070 + }, + { + "epoch": 0.3152914749289948, + "grad_norm": 636.2994384765625, + "learning_rate": 8.690393119873863e-06, + "loss": 19.5498, + "step": 156080 + }, + { + "epoch": 0.3153116755616786, + "grad_norm": 181.8765106201172, + "learning_rate": 8.690157591015488e-06, + "loss": 30.41, + "step": 156090 + }, + { + "epoch": 0.3153318761943624, + "grad_norm": 500.489990234375, + "learning_rate": 8.689922044171735e-06, + "loss": 15.6294, + "step": 156100 + }, + { + "epoch": 0.31535207682704625, + "grad_norm": 278.20111083984375, + "learning_rate": 8.689686479343747e-06, + "loss": 18.7786, + "step": 156110 + }, + { + "epoch": 0.31537227745973007, + "grad_norm": 468.1935729980469, + "learning_rate": 8.689450896532675e-06, + "loss": 11.8421, + "step": 156120 + }, + { + "epoch": 0.31539247809241383, + "grad_norm": 126.52347564697266, + "learning_rate": 8.689215295739669e-06, + "loss": 20.7499, + "step": 156130 + }, + { + "epoch": 0.31541267872509765, + "grad_norm": 160.05636596679688, + "learning_rate": 8.688979676965872e-06, + "loss": 13.2907, + "step": 156140 + }, + { + "epoch": 0.31543287935778147, + "grad_norm": 570.4967041015625, + "learning_rate": 8.688744040212438e-06, + "loss": 12.3124, + "step": 156150 + }, + { + "epoch": 0.3154530799904653, + "grad_norm": 454.1628112792969, + "learning_rate": 8.688508385480513e-06, + "loss": 30.6661, + "step": 156160 + }, + { + "epoch": 0.3154732806231491, + "grad_norm": 20.657920837402344, + "learning_rate": 8.688272712771243e-06, + "loss": 11.9086, + "step": 156170 + }, + { + "epoch": 0.31549348125583293, + "grad_norm": 935.6939086914062, + "learning_rate": 8.688037022085783e-06, + "loss": 18.0572, + "step": 156180 + }, + { + "epoch": 0.31551368188851675, + "grad_norm": 279.9620666503906, + "learning_rate": 8.687801313425275e-06, + "loss": 34.7209, + "step": 156190 + }, + { + "epoch": 0.31553388252120057, + "grad_norm": 215.29830932617188, + "learning_rate": 8.68756558679087e-06, + "loss": 14.8009, + "step": 156200 + }, + { + "epoch": 0.3155540831538844, + "grad_norm": 438.32879638671875, + "learning_rate": 8.68732984218372e-06, + "loss": 19.9984, + "step": 156210 + }, + { + "epoch": 0.3155742837865682, + "grad_norm": 1304.1524658203125, + "learning_rate": 8.68709407960497e-06, + "loss": 34.2655, + "step": 156220 + }, + { + "epoch": 0.31559448441925203, + "grad_norm": 35.739952087402344, + "learning_rate": 8.68685829905577e-06, + "loss": 20.0038, + "step": 156230 + }, + { + "epoch": 0.31561468505193585, + "grad_norm": 152.79037475585938, + "learning_rate": 8.686622500537272e-06, + "loss": 15.1975, + "step": 156240 + }, + { + "epoch": 0.3156348856846196, + "grad_norm": 212.28329467773438, + "learning_rate": 8.68638668405062e-06, + "loss": 14.6955, + "step": 156250 + }, + { + "epoch": 0.31565508631730343, + "grad_norm": 85.4367904663086, + "learning_rate": 8.68615084959697e-06, + "loss": 19.7354, + "step": 156260 + }, + { + "epoch": 0.31567528694998725, + "grad_norm": 764.028564453125, + "learning_rate": 8.685914997177465e-06, + "loss": 31.5282, + "step": 156270 + }, + { + "epoch": 0.3156954875826711, + "grad_norm": 414.4305114746094, + "learning_rate": 8.685679126793258e-06, + "loss": 23.7288, + "step": 156280 + }, + { + "epoch": 0.3157156882153549, + "grad_norm": 265.472412109375, + "learning_rate": 8.6854432384455e-06, + "loss": 18.5333, + "step": 156290 + }, + { + "epoch": 0.3157358888480387, + "grad_norm": 460.2200012207031, + "learning_rate": 8.685207332135337e-06, + "loss": 23.1256, + "step": 156300 + }, + { + "epoch": 0.31575608948072253, + "grad_norm": 353.0555114746094, + "learning_rate": 8.68497140786392e-06, + "loss": 23.9941, + "step": 156310 + }, + { + "epoch": 0.31577629011340635, + "grad_norm": 492.4260559082031, + "learning_rate": 8.6847354656324e-06, + "loss": 21.3439, + "step": 156320 + }, + { + "epoch": 0.3157964907460902, + "grad_norm": 295.3174743652344, + "learning_rate": 8.684499505441926e-06, + "loss": 21.2145, + "step": 156330 + }, + { + "epoch": 0.315816691378774, + "grad_norm": 168.670654296875, + "learning_rate": 8.684263527293649e-06, + "loss": 29.3661, + "step": 156340 + }, + { + "epoch": 0.3158368920114578, + "grad_norm": 263.1680603027344, + "learning_rate": 8.684027531188717e-06, + "loss": 32.4153, + "step": 156350 + }, + { + "epoch": 0.31585709264414163, + "grad_norm": 364.7283935546875, + "learning_rate": 8.683791517128282e-06, + "loss": 16.3072, + "step": 156360 + }, + { + "epoch": 0.31587729327682545, + "grad_norm": 477.0263977050781, + "learning_rate": 8.683555485113493e-06, + "loss": 29.4266, + "step": 156370 + }, + { + "epoch": 0.3158974939095092, + "grad_norm": 545.2871704101562, + "learning_rate": 8.683319435145503e-06, + "loss": 29.2442, + "step": 156380 + }, + { + "epoch": 0.31591769454219304, + "grad_norm": 244.9238739013672, + "learning_rate": 8.683083367225461e-06, + "loss": 25.3003, + "step": 156390 + }, + { + "epoch": 0.31593789517487686, + "grad_norm": 204.55955505371094, + "learning_rate": 8.682847281354517e-06, + "loss": 43.7771, + "step": 156400 + }, + { + "epoch": 0.3159580958075607, + "grad_norm": 327.5174865722656, + "learning_rate": 8.682611177533822e-06, + "loss": 11.0926, + "step": 156410 + }, + { + "epoch": 0.3159782964402445, + "grad_norm": 512.2208862304688, + "learning_rate": 8.682375055764528e-06, + "loss": 25.5094, + "step": 156420 + }, + { + "epoch": 0.3159984970729283, + "grad_norm": 506.9759826660156, + "learning_rate": 8.682138916047782e-06, + "loss": 53.1913, + "step": 156430 + }, + { + "epoch": 0.31601869770561214, + "grad_norm": 133.79246520996094, + "learning_rate": 8.681902758384738e-06, + "loss": 22.5905, + "step": 156440 + }, + { + "epoch": 0.31603889833829596, + "grad_norm": 601.0071411132812, + "learning_rate": 8.681666582776547e-06, + "loss": 26.0011, + "step": 156450 + }, + { + "epoch": 0.3160590989709798, + "grad_norm": 455.7332458496094, + "learning_rate": 8.68143038922436e-06, + "loss": 12.9082, + "step": 156460 + }, + { + "epoch": 0.3160792996036636, + "grad_norm": 870.0426025390625, + "learning_rate": 8.681194177729328e-06, + "loss": 23.8988, + "step": 156470 + }, + { + "epoch": 0.3160995002363474, + "grad_norm": 982.4699096679688, + "learning_rate": 8.680957948292602e-06, + "loss": 19.0372, + "step": 156480 + }, + { + "epoch": 0.31611970086903124, + "grad_norm": 412.22589111328125, + "learning_rate": 8.680721700915333e-06, + "loss": 18.3669, + "step": 156490 + }, + { + "epoch": 0.31613990150171506, + "grad_norm": 364.1700439453125, + "learning_rate": 8.680485435598674e-06, + "loss": 15.7197, + "step": 156500 + }, + { + "epoch": 0.3161601021343988, + "grad_norm": 339.00054931640625, + "learning_rate": 8.680249152343772e-06, + "loss": 12.7125, + "step": 156510 + }, + { + "epoch": 0.31618030276708264, + "grad_norm": 782.67236328125, + "learning_rate": 8.680012851151785e-06, + "loss": 19.3984, + "step": 156520 + }, + { + "epoch": 0.31620050339976646, + "grad_norm": 572.0099487304688, + "learning_rate": 8.679776532023861e-06, + "loss": 23.4448, + "step": 156530 + }, + { + "epoch": 0.3162207040324503, + "grad_norm": 429.39044189453125, + "learning_rate": 8.679540194961153e-06, + "loss": 20.0682, + "step": 156540 + }, + { + "epoch": 0.3162409046651341, + "grad_norm": 282.64654541015625, + "learning_rate": 8.679303839964811e-06, + "loss": 31.609, + "step": 156550 + }, + { + "epoch": 0.3162611052978179, + "grad_norm": 724.362060546875, + "learning_rate": 8.679067467035989e-06, + "loss": 29.2453, + "step": 156560 + }, + { + "epoch": 0.31628130593050174, + "grad_norm": 787.4326171875, + "learning_rate": 8.678831076175838e-06, + "loss": 20.3108, + "step": 156570 + }, + { + "epoch": 0.31630150656318556, + "grad_norm": 312.4324645996094, + "learning_rate": 8.678594667385511e-06, + "loss": 18.152, + "step": 156580 + }, + { + "epoch": 0.3163217071958694, + "grad_norm": 498.779541015625, + "learning_rate": 8.67835824066616e-06, + "loss": 19.3695, + "step": 156590 + }, + { + "epoch": 0.3163419078285532, + "grad_norm": 335.0408020019531, + "learning_rate": 8.678121796018938e-06, + "loss": 36.8111, + "step": 156600 + }, + { + "epoch": 0.316362108461237, + "grad_norm": 416.4246520996094, + "learning_rate": 8.677885333444995e-06, + "loss": 20.3388, + "step": 156610 + }, + { + "epoch": 0.31638230909392084, + "grad_norm": 197.67642211914062, + "learning_rate": 8.677648852945486e-06, + "loss": 11.6373, + "step": 156620 + }, + { + "epoch": 0.31640250972660466, + "grad_norm": 434.5162048339844, + "learning_rate": 8.677412354521561e-06, + "loss": 21.7159, + "step": 156630 + }, + { + "epoch": 0.31642271035928843, + "grad_norm": 223.23451232910156, + "learning_rate": 8.677175838174374e-06, + "loss": 13.2333, + "step": 156640 + }, + { + "epoch": 0.31644291099197225, + "grad_norm": 374.6792907714844, + "learning_rate": 8.67693930390508e-06, + "loss": 26.7893, + "step": 156650 + }, + { + "epoch": 0.31646311162465607, + "grad_norm": 740.7330932617188, + "learning_rate": 8.676702751714829e-06, + "loss": 17.3265, + "step": 156660 + }, + { + "epoch": 0.3164833122573399, + "grad_norm": 722.9887084960938, + "learning_rate": 8.676466181604775e-06, + "loss": 34.9863, + "step": 156670 + }, + { + "epoch": 0.3165035128900237, + "grad_norm": 618.39697265625, + "learning_rate": 8.67622959357607e-06, + "loss": 31.7607, + "step": 156680 + }, + { + "epoch": 0.31652371352270753, + "grad_norm": 210.58859252929688, + "learning_rate": 8.675992987629869e-06, + "loss": 12.8285, + "step": 156690 + }, + { + "epoch": 0.31654391415539135, + "grad_norm": 192.90066528320312, + "learning_rate": 8.675756363767322e-06, + "loss": 19.3255, + "step": 156700 + }, + { + "epoch": 0.31656411478807517, + "grad_norm": 290.7137451171875, + "learning_rate": 8.675519721989585e-06, + "loss": 30.7223, + "step": 156710 + }, + { + "epoch": 0.316584315420759, + "grad_norm": 17.167661666870117, + "learning_rate": 8.675283062297811e-06, + "loss": 18.0766, + "step": 156720 + }, + { + "epoch": 0.3166045160534428, + "grad_norm": 453.1914978027344, + "learning_rate": 8.675046384693154e-06, + "loss": 28.4383, + "step": 156730 + }, + { + "epoch": 0.31662471668612663, + "grad_norm": 374.5320129394531, + "learning_rate": 8.674809689176765e-06, + "loss": 36.8366, + "step": 156740 + }, + { + "epoch": 0.31664491731881045, + "grad_norm": 351.17926025390625, + "learning_rate": 8.6745729757498e-06, + "loss": 23.2534, + "step": 156750 + }, + { + "epoch": 0.31666511795149427, + "grad_norm": 454.0247497558594, + "learning_rate": 8.674336244413413e-06, + "loss": 21.8932, + "step": 156760 + }, + { + "epoch": 0.31668531858417803, + "grad_norm": 240.80499267578125, + "learning_rate": 8.674099495168755e-06, + "loss": 24.6349, + "step": 156770 + }, + { + "epoch": 0.31670551921686185, + "grad_norm": 392.14068603515625, + "learning_rate": 8.673862728016983e-06, + "loss": 14.8336, + "step": 156780 + }, + { + "epoch": 0.3167257198495457, + "grad_norm": 701.1990356445312, + "learning_rate": 8.67362594295925e-06, + "loss": 24.7302, + "step": 156790 + }, + { + "epoch": 0.3167459204822295, + "grad_norm": 513.910400390625, + "learning_rate": 8.673389139996708e-06, + "loss": 25.9293, + "step": 156800 + }, + { + "epoch": 0.3167661211149133, + "grad_norm": 515.1926879882812, + "learning_rate": 8.673152319130514e-06, + "loss": 27.1508, + "step": 156810 + }, + { + "epoch": 0.31678632174759713, + "grad_norm": 540.6401977539062, + "learning_rate": 8.672915480361821e-06, + "loss": 20.6046, + "step": 156820 + }, + { + "epoch": 0.31680652238028095, + "grad_norm": 397.8475341796875, + "learning_rate": 8.672678623691783e-06, + "loss": 10.7563, + "step": 156830 + }, + { + "epoch": 0.3168267230129648, + "grad_norm": 631.1962890625, + "learning_rate": 8.672441749121555e-06, + "loss": 43.2764, + "step": 156840 + }, + { + "epoch": 0.3168469236456486, + "grad_norm": 31.80294418334961, + "learning_rate": 8.672204856652291e-06, + "loss": 15.2622, + "step": 156850 + }, + { + "epoch": 0.3168671242783324, + "grad_norm": 241.8560791015625, + "learning_rate": 8.671967946285147e-06, + "loss": 18.81, + "step": 156860 + }, + { + "epoch": 0.31688732491101623, + "grad_norm": 450.6624450683594, + "learning_rate": 8.671731018021275e-06, + "loss": 23.7015, + "step": 156870 + }, + { + "epoch": 0.31690752554370005, + "grad_norm": 252.8155975341797, + "learning_rate": 8.671494071861832e-06, + "loss": 19.1985, + "step": 156880 + }, + { + "epoch": 0.3169277261763838, + "grad_norm": 589.3222045898438, + "learning_rate": 8.671257107807974e-06, + "loss": 21.2078, + "step": 156890 + }, + { + "epoch": 0.31694792680906764, + "grad_norm": 201.46461486816406, + "learning_rate": 8.671020125860851e-06, + "loss": 36.1632, + "step": 156900 + }, + { + "epoch": 0.31696812744175146, + "grad_norm": 574.6801147460938, + "learning_rate": 8.670783126021623e-06, + "loss": 25.3402, + "step": 156910 + }, + { + "epoch": 0.3169883280744353, + "grad_norm": 560.37060546875, + "learning_rate": 8.670546108291443e-06, + "loss": 29.2263, + "step": 156920 + }, + { + "epoch": 0.3170085287071191, + "grad_norm": 198.41749572753906, + "learning_rate": 8.670309072671468e-06, + "loss": 33.0624, + "step": 156930 + }, + { + "epoch": 0.3170287293398029, + "grad_norm": 349.1961975097656, + "learning_rate": 8.67007201916285e-06, + "loss": 19.6403, + "step": 156940 + }, + { + "epoch": 0.31704892997248674, + "grad_norm": 511.9179382324219, + "learning_rate": 8.669834947766746e-06, + "loss": 20.1144, + "step": 156950 + }, + { + "epoch": 0.31706913060517056, + "grad_norm": 577.7297973632812, + "learning_rate": 8.66959785848431e-06, + "loss": 23.7455, + "step": 156960 + }, + { + "epoch": 0.3170893312378544, + "grad_norm": 777.7017211914062, + "learning_rate": 8.669360751316702e-06, + "loss": 26.5656, + "step": 156970 + }, + { + "epoch": 0.3171095318705382, + "grad_norm": 167.69903564453125, + "learning_rate": 8.669123626265074e-06, + "loss": 21.3804, + "step": 156980 + }, + { + "epoch": 0.317129732503222, + "grad_norm": 360.7198486328125, + "learning_rate": 8.668886483330584e-06, + "loss": 12.1088, + "step": 156990 + }, + { + "epoch": 0.31714993313590584, + "grad_norm": 371.81353759765625, + "learning_rate": 8.668649322514382e-06, + "loss": 26.2547, + "step": 157000 + }, + { + "epoch": 0.31717013376858966, + "grad_norm": 163.5631561279297, + "learning_rate": 8.66841214381763e-06, + "loss": 36.1617, + "step": 157010 + }, + { + "epoch": 0.3171903344012734, + "grad_norm": 279.35504150390625, + "learning_rate": 8.668174947241485e-06, + "loss": 35.6495, + "step": 157020 + }, + { + "epoch": 0.31721053503395724, + "grad_norm": 420.391845703125, + "learning_rate": 8.667937732787097e-06, + "loss": 20.8908, + "step": 157030 + }, + { + "epoch": 0.31723073566664106, + "grad_norm": 355.45245361328125, + "learning_rate": 8.667700500455627e-06, + "loss": 16.8542, + "step": 157040 + }, + { + "epoch": 0.3172509362993249, + "grad_norm": 314.1698303222656, + "learning_rate": 8.667463250248229e-06, + "loss": 17.1736, + "step": 157050 + }, + { + "epoch": 0.3172711369320087, + "grad_norm": 202.76202392578125, + "learning_rate": 8.667225982166058e-06, + "loss": 30.3521, + "step": 157060 + }, + { + "epoch": 0.3172913375646925, + "grad_norm": 28.479190826416016, + "learning_rate": 8.666988696210275e-06, + "loss": 14.3018, + "step": 157070 + }, + { + "epoch": 0.31731153819737634, + "grad_norm": 407.8205871582031, + "learning_rate": 8.666751392382033e-06, + "loss": 16.5902, + "step": 157080 + }, + { + "epoch": 0.31733173883006016, + "grad_norm": 123.9936294555664, + "learning_rate": 8.66651407068249e-06, + "loss": 22.9009, + "step": 157090 + }, + { + "epoch": 0.317351939462744, + "grad_norm": 1239.0494384765625, + "learning_rate": 8.666276731112802e-06, + "loss": 18.3345, + "step": 157100 + }, + { + "epoch": 0.3173721400954278, + "grad_norm": 490.563720703125, + "learning_rate": 8.666039373674124e-06, + "loss": 11.9196, + "step": 157110 + }, + { + "epoch": 0.3173923407281116, + "grad_norm": 1140.191650390625, + "learning_rate": 8.665801998367616e-06, + "loss": 22.4172, + "step": 157120 + }, + { + "epoch": 0.31741254136079544, + "grad_norm": 311.021240234375, + "learning_rate": 8.665564605194435e-06, + "loss": 19.7928, + "step": 157130 + }, + { + "epoch": 0.31743274199347926, + "grad_norm": 276.641845703125, + "learning_rate": 8.665327194155736e-06, + "loss": 15.2686, + "step": 157140 + }, + { + "epoch": 0.317452942626163, + "grad_norm": 509.0832824707031, + "learning_rate": 8.665089765252674e-06, + "loss": 15.1643, + "step": 157150 + }, + { + "epoch": 0.31747314325884685, + "grad_norm": 20.60483169555664, + "learning_rate": 8.664852318486412e-06, + "loss": 30.92, + "step": 157160 + }, + { + "epoch": 0.31749334389153067, + "grad_norm": 438.5542907714844, + "learning_rate": 8.664614853858105e-06, + "loss": 19.4307, + "step": 157170 + }, + { + "epoch": 0.3175135445242145, + "grad_norm": 628.7283935546875, + "learning_rate": 8.664377371368907e-06, + "loss": 21.6214, + "step": 157180 + }, + { + "epoch": 0.3175337451568983, + "grad_norm": 182.05941772460938, + "learning_rate": 8.664139871019979e-06, + "loss": 15.2398, + "step": 157190 + }, + { + "epoch": 0.3175539457895821, + "grad_norm": 967.3475341796875, + "learning_rate": 8.66390235281248e-06, + "loss": 17.6178, + "step": 157200 + }, + { + "epoch": 0.31757414642226595, + "grad_norm": 373.94219970703125, + "learning_rate": 8.663664816747562e-06, + "loss": 16.7508, + "step": 157210 + }, + { + "epoch": 0.31759434705494977, + "grad_norm": 228.75059509277344, + "learning_rate": 8.663427262826386e-06, + "loss": 11.1991, + "step": 157220 + }, + { + "epoch": 0.3176145476876336, + "grad_norm": 676.013671875, + "learning_rate": 8.663189691050114e-06, + "loss": 24.8321, + "step": 157230 + }, + { + "epoch": 0.3176347483203174, + "grad_norm": 606.6685791015625, + "learning_rate": 8.662952101419895e-06, + "loss": 20.2691, + "step": 157240 + }, + { + "epoch": 0.3176549489530012, + "grad_norm": 191.4118194580078, + "learning_rate": 8.662714493936895e-06, + "loss": 25.0529, + "step": 157250 + }, + { + "epoch": 0.31767514958568505, + "grad_norm": 393.2243957519531, + "learning_rate": 8.662476868602268e-06, + "loss": 19.0307, + "step": 157260 + }, + { + "epoch": 0.31769535021836887, + "grad_norm": 40.03284454345703, + "learning_rate": 8.662239225417171e-06, + "loss": 20.3379, + "step": 157270 + }, + { + "epoch": 0.31771555085105263, + "grad_norm": 516.3447875976562, + "learning_rate": 8.662001564382768e-06, + "loss": 15.767, + "step": 157280 + }, + { + "epoch": 0.31773575148373645, + "grad_norm": 2002.5963134765625, + "learning_rate": 8.66176388550021e-06, + "loss": 31.3092, + "step": 157290 + }, + { + "epoch": 0.31775595211642027, + "grad_norm": 204.0232696533203, + "learning_rate": 8.66152618877066e-06, + "loss": 27.5483, + "step": 157300 + }, + { + "epoch": 0.3177761527491041, + "grad_norm": 268.2770080566406, + "learning_rate": 8.661288474195275e-06, + "loss": 18.3599, + "step": 157310 + }, + { + "epoch": 0.3177963533817879, + "grad_norm": 439.6703186035156, + "learning_rate": 8.661050741775215e-06, + "loss": 11.543, + "step": 157320 + }, + { + "epoch": 0.31781655401447173, + "grad_norm": 579.0086059570312, + "learning_rate": 8.660812991511636e-06, + "loss": 24.7194, + "step": 157330 + }, + { + "epoch": 0.31783675464715555, + "grad_norm": 387.04150390625, + "learning_rate": 8.6605752234057e-06, + "loss": 26.5599, + "step": 157340 + }, + { + "epoch": 0.31785695527983937, + "grad_norm": 770.0346069335938, + "learning_rate": 8.660337437458565e-06, + "loss": 45.8609, + "step": 157350 + }, + { + "epoch": 0.3178771559125232, + "grad_norm": 734.6922607421875, + "learning_rate": 8.660099633671388e-06, + "loss": 15.791, + "step": 157360 + }, + { + "epoch": 0.317897356545207, + "grad_norm": 1305.1317138671875, + "learning_rate": 8.65986181204533e-06, + "loss": 37.2538, + "step": 157370 + }, + { + "epoch": 0.31791755717789083, + "grad_norm": 1059.372802734375, + "learning_rate": 8.659623972581548e-06, + "loss": 42.6554, + "step": 157380 + }, + { + "epoch": 0.31793775781057465, + "grad_norm": 298.19818115234375, + "learning_rate": 8.659386115281205e-06, + "loss": 11.3735, + "step": 157390 + }, + { + "epoch": 0.31795795844325847, + "grad_norm": 630.5972900390625, + "learning_rate": 8.659148240145456e-06, + "loss": 23.9854, + "step": 157400 + }, + { + "epoch": 0.31797815907594223, + "grad_norm": 400.2347412109375, + "learning_rate": 8.658910347175463e-06, + "loss": 15.1035, + "step": 157410 + }, + { + "epoch": 0.31799835970862605, + "grad_norm": 568.3389892578125, + "learning_rate": 8.658672436372385e-06, + "loss": 23.9981, + "step": 157420 + }, + { + "epoch": 0.3180185603413099, + "grad_norm": 858.0379638671875, + "learning_rate": 8.658434507737381e-06, + "loss": 31.0256, + "step": 157430 + }, + { + "epoch": 0.3180387609739937, + "grad_norm": 242.70700073242188, + "learning_rate": 8.65819656127161e-06, + "loss": 19.9308, + "step": 157440 + }, + { + "epoch": 0.3180589616066775, + "grad_norm": 104.55670166015625, + "learning_rate": 8.657958596976235e-06, + "loss": 39.1577, + "step": 157450 + }, + { + "epoch": 0.31807916223936133, + "grad_norm": 381.4013977050781, + "learning_rate": 8.657720614852412e-06, + "loss": 27.9169, + "step": 157460 + }, + { + "epoch": 0.31809936287204515, + "grad_norm": 851.9417724609375, + "learning_rate": 8.657482614901302e-06, + "loss": 17.0864, + "step": 157470 + }, + { + "epoch": 0.318119563504729, + "grad_norm": 253.64715576171875, + "learning_rate": 8.657244597124066e-06, + "loss": 19.8157, + "step": 157480 + }, + { + "epoch": 0.3181397641374128, + "grad_norm": 230.14015197753906, + "learning_rate": 8.657006561521863e-06, + "loss": 26.3671, + "step": 157490 + }, + { + "epoch": 0.3181599647700966, + "grad_norm": 392.8260498046875, + "learning_rate": 8.656768508095853e-06, + "loss": 32.9371, + "step": 157500 + }, + { + "epoch": 0.31818016540278043, + "grad_norm": 222.6374969482422, + "learning_rate": 8.656530436847196e-06, + "loss": 15.5303, + "step": 157510 + }, + { + "epoch": 0.31820036603546425, + "grad_norm": 0.0, + "learning_rate": 8.656292347777056e-06, + "loss": 27.2507, + "step": 157520 + }, + { + "epoch": 0.318220566668148, + "grad_norm": 327.7041015625, + "learning_rate": 8.65605424088659e-06, + "loss": 17.6667, + "step": 157530 + }, + { + "epoch": 0.31824076730083184, + "grad_norm": 799.1824951171875, + "learning_rate": 8.655816116176959e-06, + "loss": 26.6767, + "step": 157540 + }, + { + "epoch": 0.31826096793351566, + "grad_norm": 238.49359130859375, + "learning_rate": 8.655577973649322e-06, + "loss": 25.6931, + "step": 157550 + }, + { + "epoch": 0.3182811685661995, + "grad_norm": 530.2203979492188, + "learning_rate": 8.655339813304842e-06, + "loss": 31.7046, + "step": 157560 + }, + { + "epoch": 0.3183013691988833, + "grad_norm": 297.263427734375, + "learning_rate": 8.655101635144678e-06, + "loss": 34.6932, + "step": 157570 + }, + { + "epoch": 0.3183215698315671, + "grad_norm": 230.2835693359375, + "learning_rate": 8.654863439169994e-06, + "loss": 25.6645, + "step": 157580 + }, + { + "epoch": 0.31834177046425094, + "grad_norm": 410.8885498046875, + "learning_rate": 8.654625225381947e-06, + "loss": 21.8335, + "step": 157590 + }, + { + "epoch": 0.31836197109693476, + "grad_norm": 201.29010009765625, + "learning_rate": 8.654386993781703e-06, + "loss": 26.1597, + "step": 157600 + }, + { + "epoch": 0.3183821717296186, + "grad_norm": 324.5323486328125, + "learning_rate": 8.654148744370416e-06, + "loss": 6.3611, + "step": 157610 + }, + { + "epoch": 0.3184023723623024, + "grad_norm": 611.1500244140625, + "learning_rate": 8.653910477149254e-06, + "loss": 27.3416, + "step": 157620 + }, + { + "epoch": 0.3184225729949862, + "grad_norm": 464.58636474609375, + "learning_rate": 8.653672192119376e-06, + "loss": 25.9692, + "step": 157630 + }, + { + "epoch": 0.31844277362767004, + "grad_norm": 382.68646240234375, + "learning_rate": 8.65343388928194e-06, + "loss": 16.8495, + "step": 157640 + }, + { + "epoch": 0.31846297426035386, + "grad_norm": 567.3402099609375, + "learning_rate": 8.653195568638114e-06, + "loss": 13.9806, + "step": 157650 + }, + { + "epoch": 0.3184831748930376, + "grad_norm": 670.782470703125, + "learning_rate": 8.652957230189051e-06, + "loss": 17.2101, + "step": 157660 + }, + { + "epoch": 0.31850337552572144, + "grad_norm": 301.2229919433594, + "learning_rate": 8.652718873935922e-06, + "loss": 16.2374, + "step": 157670 + }, + { + "epoch": 0.31852357615840526, + "grad_norm": 1011.8859252929688, + "learning_rate": 8.652480499879881e-06, + "loss": 49.0083, + "step": 157680 + }, + { + "epoch": 0.3185437767910891, + "grad_norm": 254.2294921875, + "learning_rate": 8.652242108022095e-06, + "loss": 15.571, + "step": 157690 + }, + { + "epoch": 0.3185639774237729, + "grad_norm": 681.1078491210938, + "learning_rate": 8.652003698363724e-06, + "loss": 22.2986, + "step": 157700 + }, + { + "epoch": 0.3185841780564567, + "grad_norm": 573.0494384765625, + "learning_rate": 8.65176527090593e-06, + "loss": 42.1385, + "step": 157710 + }, + { + "epoch": 0.31860437868914054, + "grad_norm": 75.19812774658203, + "learning_rate": 8.651526825649874e-06, + "loss": 16.02, + "step": 157720 + }, + { + "epoch": 0.31862457932182436, + "grad_norm": 360.0973815917969, + "learning_rate": 8.651288362596719e-06, + "loss": 35.8746, + "step": 157730 + }, + { + "epoch": 0.3186447799545082, + "grad_norm": 344.9049377441406, + "learning_rate": 8.651049881747628e-06, + "loss": 15.7555, + "step": 157740 + }, + { + "epoch": 0.318664980587192, + "grad_norm": 369.2949523925781, + "learning_rate": 8.650811383103763e-06, + "loss": 18.4003, + "step": 157750 + }, + { + "epoch": 0.3186851812198758, + "grad_norm": 45.9865608215332, + "learning_rate": 8.650572866666285e-06, + "loss": 29.2727, + "step": 157760 + }, + { + "epoch": 0.31870538185255964, + "grad_norm": 64.4155044555664, + "learning_rate": 8.650334332436358e-06, + "loss": 25.9618, + "step": 157770 + }, + { + "epoch": 0.31872558248524346, + "grad_norm": 563.4820556640625, + "learning_rate": 8.650095780415144e-06, + "loss": 15.1927, + "step": 157780 + }, + { + "epoch": 0.31874578311792723, + "grad_norm": 1031.565673828125, + "learning_rate": 8.649857210603808e-06, + "loss": 26.4215, + "step": 157790 + }, + { + "epoch": 0.31876598375061105, + "grad_norm": 201.8151092529297, + "learning_rate": 8.649618623003509e-06, + "loss": 14.7141, + "step": 157800 + }, + { + "epoch": 0.31878618438329487, + "grad_norm": 543.1805419921875, + "learning_rate": 8.649380017615411e-06, + "loss": 19.4622, + "step": 157810 + }, + { + "epoch": 0.3188063850159787, + "grad_norm": 731.8388061523438, + "learning_rate": 8.649141394440677e-06, + "loss": 24.1656, + "step": 157820 + }, + { + "epoch": 0.3188265856486625, + "grad_norm": 284.24798583984375, + "learning_rate": 8.648902753480472e-06, + "loss": 23.817, + "step": 157830 + }, + { + "epoch": 0.31884678628134633, + "grad_norm": 494.2884216308594, + "learning_rate": 8.648664094735957e-06, + "loss": 15.5966, + "step": 157840 + }, + { + "epoch": 0.31886698691403015, + "grad_norm": 291.4805908203125, + "learning_rate": 8.648425418208294e-06, + "loss": 20.8839, + "step": 157850 + }, + { + "epoch": 0.31888718754671397, + "grad_norm": 646.5670166015625, + "learning_rate": 8.648186723898651e-06, + "loss": 12.6177, + "step": 157860 + }, + { + "epoch": 0.3189073881793978, + "grad_norm": 535.337646484375, + "learning_rate": 8.647948011808187e-06, + "loss": 25.9195, + "step": 157870 + }, + { + "epoch": 0.3189275888120816, + "grad_norm": 489.74029541015625, + "learning_rate": 8.647709281938066e-06, + "loss": 18.5283, + "step": 157880 + }, + { + "epoch": 0.31894778944476543, + "grad_norm": 525.4874877929688, + "learning_rate": 8.647470534289453e-06, + "loss": 19.2942, + "step": 157890 + }, + { + "epoch": 0.31896799007744925, + "grad_norm": 287.4463195800781, + "learning_rate": 8.647231768863513e-06, + "loss": 21.5834, + "step": 157900 + }, + { + "epoch": 0.31898819071013307, + "grad_norm": 684.4493408203125, + "learning_rate": 8.646992985661404e-06, + "loss": 18.918, + "step": 157910 + }, + { + "epoch": 0.31900839134281683, + "grad_norm": 495.8959045410156, + "learning_rate": 8.646754184684297e-06, + "loss": 20.9811, + "step": 157920 + }, + { + "epoch": 0.31902859197550065, + "grad_norm": 252.15960693359375, + "learning_rate": 8.64651536593335e-06, + "loss": 22.9397, + "step": 157930 + }, + { + "epoch": 0.3190487926081845, + "grad_norm": 199.3404541015625, + "learning_rate": 8.646276529409729e-06, + "loss": 17.9345, + "step": 157940 + }, + { + "epoch": 0.3190689932408683, + "grad_norm": 423.1203308105469, + "learning_rate": 8.6460376751146e-06, + "loss": 32.543, + "step": 157950 + }, + { + "epoch": 0.3190891938735521, + "grad_norm": 885.3731079101562, + "learning_rate": 8.645798803049126e-06, + "loss": 27.9252, + "step": 157960 + }, + { + "epoch": 0.31910939450623593, + "grad_norm": 846.163818359375, + "learning_rate": 8.64555991321447e-06, + "loss": 19.8226, + "step": 157970 + }, + { + "epoch": 0.31912959513891975, + "grad_norm": 1335.4134521484375, + "learning_rate": 8.645321005611797e-06, + "loss": 34.8784, + "step": 157980 + }, + { + "epoch": 0.3191497957716036, + "grad_norm": 115.35169982910156, + "learning_rate": 8.64508208024227e-06, + "loss": 23.3887, + "step": 157990 + }, + { + "epoch": 0.3191699964042874, + "grad_norm": 168.93600463867188, + "learning_rate": 8.644843137107058e-06, + "loss": 15.7694, + "step": 158000 + }, + { + "epoch": 0.3191901970369712, + "grad_norm": 792.327880859375, + "learning_rate": 8.644604176207322e-06, + "loss": 13.7187, + "step": 158010 + }, + { + "epoch": 0.31921039766965503, + "grad_norm": 186.10223388671875, + "learning_rate": 8.644365197544227e-06, + "loss": 14.0684, + "step": 158020 + }, + { + "epoch": 0.31923059830233885, + "grad_norm": 443.1025085449219, + "learning_rate": 8.644126201118936e-06, + "loss": 21.2446, + "step": 158030 + }, + { + "epoch": 0.3192507989350226, + "grad_norm": 260.9898986816406, + "learning_rate": 8.643887186932617e-06, + "loss": 27.4131, + "step": 158040 + }, + { + "epoch": 0.31927099956770644, + "grad_norm": 589.14111328125, + "learning_rate": 8.643648154986436e-06, + "loss": 35.1901, + "step": 158050 + }, + { + "epoch": 0.31929120020039026, + "grad_norm": 364.7909240722656, + "learning_rate": 8.643409105281554e-06, + "loss": 11.9995, + "step": 158060 + }, + { + "epoch": 0.3193114008330741, + "grad_norm": 635.873291015625, + "learning_rate": 8.643170037819137e-06, + "loss": 37.2956, + "step": 158070 + }, + { + "epoch": 0.3193316014657579, + "grad_norm": 604.2241821289062, + "learning_rate": 8.642930952600353e-06, + "loss": 26.6291, + "step": 158080 + }, + { + "epoch": 0.3193518020984417, + "grad_norm": 116.81539154052734, + "learning_rate": 8.642691849626364e-06, + "loss": 15.4781, + "step": 158090 + }, + { + "epoch": 0.31937200273112554, + "grad_norm": 331.23272705078125, + "learning_rate": 8.642452728898339e-06, + "loss": 30.6745, + "step": 158100 + }, + { + "epoch": 0.31939220336380936, + "grad_norm": 136.81800842285156, + "learning_rate": 8.642213590417439e-06, + "loss": 15.4753, + "step": 158110 + }, + { + "epoch": 0.3194124039964932, + "grad_norm": 306.7845764160156, + "learning_rate": 8.641974434184832e-06, + "loss": 12.2529, + "step": 158120 + }, + { + "epoch": 0.319432604629177, + "grad_norm": 985.0399169921875, + "learning_rate": 8.641735260201682e-06, + "loss": 21.8359, + "step": 158130 + }, + { + "epoch": 0.3194528052618608, + "grad_norm": 222.74391174316406, + "learning_rate": 8.641496068469159e-06, + "loss": 18.9181, + "step": 158140 + }, + { + "epoch": 0.31947300589454464, + "grad_norm": 359.5334167480469, + "learning_rate": 8.641256858988424e-06, + "loss": 24.1814, + "step": 158150 + }, + { + "epoch": 0.31949320652722846, + "grad_norm": 396.8135986328125, + "learning_rate": 8.641017631760646e-06, + "loss": 25.7287, + "step": 158160 + }, + { + "epoch": 0.3195134071599122, + "grad_norm": 826.8873291015625, + "learning_rate": 8.640778386786987e-06, + "loss": 17.03, + "step": 158170 + }, + { + "epoch": 0.31953360779259604, + "grad_norm": 0.36692190170288086, + "learning_rate": 8.640539124068617e-06, + "loss": 14.8261, + "step": 158180 + }, + { + "epoch": 0.31955380842527986, + "grad_norm": 366.53369140625, + "learning_rate": 8.640299843606702e-06, + "loss": 19.4884, + "step": 158190 + }, + { + "epoch": 0.3195740090579637, + "grad_norm": 245.4751434326172, + "learning_rate": 8.640060545402407e-06, + "loss": 16.6446, + "step": 158200 + }, + { + "epoch": 0.3195942096906475, + "grad_norm": 424.68695068359375, + "learning_rate": 8.639821229456898e-06, + "loss": 29.3442, + "step": 158210 + }, + { + "epoch": 0.3196144103233313, + "grad_norm": 332.3997497558594, + "learning_rate": 8.63958189577134e-06, + "loss": 20.0662, + "step": 158220 + }, + { + "epoch": 0.31963461095601514, + "grad_norm": 370.6135559082031, + "learning_rate": 8.639342544346903e-06, + "loss": 11.5215, + "step": 158230 + }, + { + "epoch": 0.31965481158869896, + "grad_norm": 104.91935729980469, + "learning_rate": 8.639103175184753e-06, + "loss": 8.5922, + "step": 158240 + }, + { + "epoch": 0.3196750122213828, + "grad_norm": 222.44091796875, + "learning_rate": 8.638863788286054e-06, + "loss": 15.4662, + "step": 158250 + }, + { + "epoch": 0.3196952128540666, + "grad_norm": 372.41680908203125, + "learning_rate": 8.638624383651974e-06, + "loss": 41.0226, + "step": 158260 + }, + { + "epoch": 0.3197154134867504, + "grad_norm": 319.10723876953125, + "learning_rate": 8.638384961283678e-06, + "loss": 16.1681, + "step": 158270 + }, + { + "epoch": 0.31973561411943424, + "grad_norm": 223.899169921875, + "learning_rate": 8.63814552118234e-06, + "loss": 26.3587, + "step": 158280 + }, + { + "epoch": 0.31975581475211806, + "grad_norm": 366.6710510253906, + "learning_rate": 8.637906063349119e-06, + "loss": 18.7715, + "step": 158290 + }, + { + "epoch": 0.3197760153848018, + "grad_norm": 940.7827758789062, + "learning_rate": 8.637666587785185e-06, + "loss": 26.3993, + "step": 158300 + }, + { + "epoch": 0.31979621601748565, + "grad_norm": 1038.5609130859375, + "learning_rate": 8.637427094491706e-06, + "loss": 26.0275, + "step": 158310 + }, + { + "epoch": 0.31981641665016947, + "grad_norm": 637.1026000976562, + "learning_rate": 8.637187583469849e-06, + "loss": 27.5549, + "step": 158320 + }, + { + "epoch": 0.3198366172828533, + "grad_norm": 1663.321044921875, + "learning_rate": 8.63694805472078e-06, + "loss": 39.1991, + "step": 158330 + }, + { + "epoch": 0.3198568179155371, + "grad_norm": 792.3937377929688, + "learning_rate": 8.636708508245666e-06, + "loss": 36.0736, + "step": 158340 + }, + { + "epoch": 0.3198770185482209, + "grad_norm": 220.81375122070312, + "learning_rate": 8.636468944045677e-06, + "loss": 11.5905, + "step": 158350 + }, + { + "epoch": 0.31989721918090475, + "grad_norm": 360.908447265625, + "learning_rate": 8.636229362121979e-06, + "loss": 15.2297, + "step": 158360 + }, + { + "epoch": 0.31991741981358857, + "grad_norm": 176.1470947265625, + "learning_rate": 8.635989762475742e-06, + "loss": 22.045, + "step": 158370 + }, + { + "epoch": 0.3199376204462724, + "grad_norm": 344.9416198730469, + "learning_rate": 8.63575014510813e-06, + "loss": 17.1547, + "step": 158380 + }, + { + "epoch": 0.3199578210789562, + "grad_norm": 461.4400634765625, + "learning_rate": 8.635510510020313e-06, + "loss": 27.6871, + "step": 158390 + }, + { + "epoch": 0.31997802171164, + "grad_norm": 231.4617156982422, + "learning_rate": 8.63527085721346e-06, + "loss": 29.2742, + "step": 158400 + }, + { + "epoch": 0.31999822234432385, + "grad_norm": 822.868408203125, + "learning_rate": 8.635031186688736e-06, + "loss": 19.7909, + "step": 158410 + }, + { + "epoch": 0.32001842297700767, + "grad_norm": 429.7058410644531, + "learning_rate": 8.634791498447313e-06, + "loss": 20.7485, + "step": 158420 + }, + { + "epoch": 0.32003862360969143, + "grad_norm": 483.2109375, + "learning_rate": 8.634551792490356e-06, + "loss": 44.433, + "step": 158430 + }, + { + "epoch": 0.32005882424237525, + "grad_norm": 1219.4669189453125, + "learning_rate": 8.634312068819032e-06, + "loss": 15.1578, + "step": 158440 + }, + { + "epoch": 0.32007902487505907, + "grad_norm": 244.9398651123047, + "learning_rate": 8.634072327434515e-06, + "loss": 23.0463, + "step": 158450 + }, + { + "epoch": 0.3200992255077429, + "grad_norm": 402.80908203125, + "learning_rate": 8.63383256833797e-06, + "loss": 29.757, + "step": 158460 + }, + { + "epoch": 0.3201194261404267, + "grad_norm": 767.7590942382812, + "learning_rate": 8.633592791530564e-06, + "loss": 17.3504, + "step": 158470 + }, + { + "epoch": 0.32013962677311053, + "grad_norm": 102.67618560791016, + "learning_rate": 8.63335299701347e-06, + "loss": 27.6856, + "step": 158480 + }, + { + "epoch": 0.32015982740579435, + "grad_norm": 11.8885498046875, + "learning_rate": 8.633113184787852e-06, + "loss": 24.0924, + "step": 158490 + }, + { + "epoch": 0.32018002803847817, + "grad_norm": 200.66864013671875, + "learning_rate": 8.632873354854881e-06, + "loss": 20.7576, + "step": 158500 + }, + { + "epoch": 0.320200228671162, + "grad_norm": 431.1730651855469, + "learning_rate": 8.632633507215726e-06, + "loss": 17.4027, + "step": 158510 + }, + { + "epoch": 0.3202204293038458, + "grad_norm": 377.1340637207031, + "learning_rate": 8.632393641871555e-06, + "loss": 30.595, + "step": 158520 + }, + { + "epoch": 0.32024062993652963, + "grad_norm": 270.2852478027344, + "learning_rate": 8.63215375882354e-06, + "loss": 22.4549, + "step": 158530 + }, + { + "epoch": 0.32026083056921345, + "grad_norm": 98.90607452392578, + "learning_rate": 8.631913858072846e-06, + "loss": 32.6666, + "step": 158540 + }, + { + "epoch": 0.32028103120189727, + "grad_norm": 1097.68896484375, + "learning_rate": 8.631673939620647e-06, + "loss": 26.0116, + "step": 158550 + }, + { + "epoch": 0.32030123183458103, + "grad_norm": 483.80401611328125, + "learning_rate": 8.631434003468108e-06, + "loss": 12.891, + "step": 158560 + }, + { + "epoch": 0.32032143246726485, + "grad_norm": 25.915245056152344, + "learning_rate": 8.6311940496164e-06, + "loss": 19.0802, + "step": 158570 + }, + { + "epoch": 0.3203416330999487, + "grad_norm": 348.0601806640625, + "learning_rate": 8.630954078066693e-06, + "loss": 18.2453, + "step": 158580 + }, + { + "epoch": 0.3203618337326325, + "grad_norm": 373.0682678222656, + "learning_rate": 8.630714088820158e-06, + "loss": 25.1146, + "step": 158590 + }, + { + "epoch": 0.3203820343653163, + "grad_norm": 5706.0205078125, + "learning_rate": 8.630474081877959e-06, + "loss": 48.6244, + "step": 158600 + }, + { + "epoch": 0.32040223499800013, + "grad_norm": 265.8067932128906, + "learning_rate": 8.630234057241274e-06, + "loss": 42.9384, + "step": 158610 + }, + { + "epoch": 0.32042243563068395, + "grad_norm": 269.0303039550781, + "learning_rate": 8.629994014911265e-06, + "loss": 29.8681, + "step": 158620 + }, + { + "epoch": 0.3204426362633678, + "grad_norm": 253.42100524902344, + "learning_rate": 8.629753954889108e-06, + "loss": 10.7813, + "step": 158630 + }, + { + "epoch": 0.3204628368960516, + "grad_norm": 863.8766479492188, + "learning_rate": 8.629513877175968e-06, + "loss": 17.4965, + "step": 158640 + }, + { + "epoch": 0.3204830375287354, + "grad_norm": 141.02073669433594, + "learning_rate": 8.62927378177302e-06, + "loss": 16.2872, + "step": 158650 + }, + { + "epoch": 0.32050323816141923, + "grad_norm": 413.55548095703125, + "learning_rate": 8.629033668681431e-06, + "loss": 23.4488, + "step": 158660 + }, + { + "epoch": 0.32052343879410305, + "grad_norm": 362.0960388183594, + "learning_rate": 8.62879353790237e-06, + "loss": 28.441, + "step": 158670 + }, + { + "epoch": 0.3205436394267868, + "grad_norm": 296.40264892578125, + "learning_rate": 8.628553389437011e-06, + "loss": 19.8648, + "step": 158680 + }, + { + "epoch": 0.32056384005947064, + "grad_norm": 643.4523315429688, + "learning_rate": 8.628313223286524e-06, + "loss": 34.9976, + "step": 158690 + }, + { + "epoch": 0.32058404069215446, + "grad_norm": 509.4014892578125, + "learning_rate": 8.628073039452076e-06, + "loss": 19.9797, + "step": 158700 + }, + { + "epoch": 0.3206042413248383, + "grad_norm": 369.1424865722656, + "learning_rate": 8.627832837934843e-06, + "loss": 17.0493, + "step": 158710 + }, + { + "epoch": 0.3206244419575221, + "grad_norm": 500.41961669921875, + "learning_rate": 8.627592618735989e-06, + "loss": 22.8432, + "step": 158720 + }, + { + "epoch": 0.3206446425902059, + "grad_norm": 438.64874267578125, + "learning_rate": 8.627352381856691e-06, + "loss": 19.6252, + "step": 158730 + }, + { + "epoch": 0.32066484322288974, + "grad_norm": 272.56451416015625, + "learning_rate": 8.627112127298117e-06, + "loss": 20.2484, + "step": 158740 + }, + { + "epoch": 0.32068504385557356, + "grad_norm": 302.90557861328125, + "learning_rate": 8.626871855061438e-06, + "loss": 21.2945, + "step": 158750 + }, + { + "epoch": 0.3207052444882574, + "grad_norm": 643.6118774414062, + "learning_rate": 8.626631565147827e-06, + "loss": 18.7646, + "step": 158760 + }, + { + "epoch": 0.3207254451209412, + "grad_norm": 448.28302001953125, + "learning_rate": 8.626391257558453e-06, + "loss": 21.5459, + "step": 158770 + }, + { + "epoch": 0.320745645753625, + "grad_norm": 689.1029663085938, + "learning_rate": 8.626150932294486e-06, + "loss": 28.1602, + "step": 158780 + }, + { + "epoch": 0.32076584638630884, + "grad_norm": 184.48121643066406, + "learning_rate": 8.625910589357102e-06, + "loss": 25.9541, + "step": 158790 + }, + { + "epoch": 0.32078604701899266, + "grad_norm": 216.4418487548828, + "learning_rate": 8.625670228747467e-06, + "loss": 53.694, + "step": 158800 + }, + { + "epoch": 0.3208062476516764, + "grad_norm": 524.427001953125, + "learning_rate": 8.625429850466756e-06, + "loss": 25.2995, + "step": 158810 + }, + { + "epoch": 0.32082644828436024, + "grad_norm": 788.6796875, + "learning_rate": 8.625189454516141e-06, + "loss": 16.7995, + "step": 158820 + }, + { + "epoch": 0.32084664891704406, + "grad_norm": 331.1761169433594, + "learning_rate": 8.62494904089679e-06, + "loss": 25.5924, + "step": 158830 + }, + { + "epoch": 0.3208668495497279, + "grad_norm": 555.55615234375, + "learning_rate": 8.624708609609879e-06, + "loss": 37.2603, + "step": 158840 + }, + { + "epoch": 0.3208870501824117, + "grad_norm": 193.11349487304688, + "learning_rate": 8.624468160656576e-06, + "loss": 25.2725, + "step": 158850 + }, + { + "epoch": 0.3209072508150955, + "grad_norm": 238.9698028564453, + "learning_rate": 8.624227694038057e-06, + "loss": 18.2588, + "step": 158860 + }, + { + "epoch": 0.32092745144777934, + "grad_norm": 507.4835205078125, + "learning_rate": 8.623987209755489e-06, + "loss": 21.4299, + "step": 158870 + }, + { + "epoch": 0.32094765208046316, + "grad_norm": 296.4114990234375, + "learning_rate": 8.62374670781005e-06, + "loss": 27.3128, + "step": 158880 + }, + { + "epoch": 0.320967852713147, + "grad_norm": 814.8472900390625, + "learning_rate": 8.623506188202906e-06, + "loss": 34.9156, + "step": 158890 + }, + { + "epoch": 0.3209880533458308, + "grad_norm": 561.2838134765625, + "learning_rate": 8.623265650935233e-06, + "loss": 24.7943, + "step": 158900 + }, + { + "epoch": 0.3210082539785146, + "grad_norm": 629.7000122070312, + "learning_rate": 8.623025096008203e-06, + "loss": 29.4117, + "step": 158910 + }, + { + "epoch": 0.32102845461119844, + "grad_norm": 72.86721801757812, + "learning_rate": 8.62278452342299e-06, + "loss": 20.8836, + "step": 158920 + }, + { + "epoch": 0.32104865524388226, + "grad_norm": 66.56548309326172, + "learning_rate": 8.622543933180762e-06, + "loss": 15.8122, + "step": 158930 + }, + { + "epoch": 0.32106885587656603, + "grad_norm": 236.08253479003906, + "learning_rate": 8.622303325282697e-06, + "loss": 12.6933, + "step": 158940 + }, + { + "epoch": 0.32108905650924985, + "grad_norm": 456.3116455078125, + "learning_rate": 8.622062699729963e-06, + "loss": 36.0511, + "step": 158950 + }, + { + "epoch": 0.32110925714193367, + "grad_norm": 351.74493408203125, + "learning_rate": 8.621822056523735e-06, + "loss": 12.0406, + "step": 158960 + }, + { + "epoch": 0.3211294577746175, + "grad_norm": 116.14690399169922, + "learning_rate": 8.621581395665185e-06, + "loss": 21.5926, + "step": 158970 + }, + { + "epoch": 0.3211496584073013, + "grad_norm": 470.22625732421875, + "learning_rate": 8.621340717155487e-06, + "loss": 20.1175, + "step": 158980 + }, + { + "epoch": 0.32116985903998513, + "grad_norm": 192.45420837402344, + "learning_rate": 8.621100020995813e-06, + "loss": 15.4986, + "step": 158990 + }, + { + "epoch": 0.32119005967266895, + "grad_norm": 224.54586791992188, + "learning_rate": 8.620859307187339e-06, + "loss": 25.7586, + "step": 159000 + }, + { + "epoch": 0.32121026030535277, + "grad_norm": 173.90090942382812, + "learning_rate": 8.620618575731233e-06, + "loss": 19.6338, + "step": 159010 + }, + { + "epoch": 0.3212304609380366, + "grad_norm": 409.9088439941406, + "learning_rate": 8.620377826628672e-06, + "loss": 24.1875, + "step": 159020 + }, + { + "epoch": 0.3212506615707204, + "grad_norm": 281.1211242675781, + "learning_rate": 8.62013705988083e-06, + "loss": 17.2278, + "step": 159030 + }, + { + "epoch": 0.32127086220340423, + "grad_norm": 227.73297119140625, + "learning_rate": 8.619896275488876e-06, + "loss": 28.7881, + "step": 159040 + }, + { + "epoch": 0.32129106283608805, + "grad_norm": 463.6237487792969, + "learning_rate": 8.61965547345399e-06, + "loss": 30.688, + "step": 159050 + }, + { + "epoch": 0.32131126346877187, + "grad_norm": 467.28204345703125, + "learning_rate": 8.619414653777341e-06, + "loss": 17.0095, + "step": 159060 + }, + { + "epoch": 0.32133146410145563, + "grad_norm": 106.27899169921875, + "learning_rate": 8.619173816460104e-06, + "loss": 23.959, + "step": 159070 + }, + { + "epoch": 0.32135166473413945, + "grad_norm": 203.37060546875, + "learning_rate": 8.618932961503452e-06, + "loss": 10.4024, + "step": 159080 + }, + { + "epoch": 0.3213718653668233, + "grad_norm": 109.97218322753906, + "learning_rate": 8.618692088908562e-06, + "loss": 17.3537, + "step": 159090 + }, + { + "epoch": 0.3213920659995071, + "grad_norm": 427.2147521972656, + "learning_rate": 8.618451198676602e-06, + "loss": 27.7377, + "step": 159100 + }, + { + "epoch": 0.3214122666321909, + "grad_norm": 371.28009033203125, + "learning_rate": 8.618210290808753e-06, + "loss": 10.7697, + "step": 159110 + }, + { + "epoch": 0.32143246726487473, + "grad_norm": 709.9109497070312, + "learning_rate": 8.617969365306184e-06, + "loss": 17.8241, + "step": 159120 + }, + { + "epoch": 0.32145266789755855, + "grad_norm": 611.5872802734375, + "learning_rate": 8.617728422170071e-06, + "loss": 18.3207, + "step": 159130 + }, + { + "epoch": 0.3214728685302424, + "grad_norm": 256.28411865234375, + "learning_rate": 8.61748746140159e-06, + "loss": 18.4094, + "step": 159140 + }, + { + "epoch": 0.3214930691629262, + "grad_norm": 697.2211303710938, + "learning_rate": 8.617246483001914e-06, + "loss": 20.5986, + "step": 159150 + }, + { + "epoch": 0.32151326979561, + "grad_norm": 935.1668701171875, + "learning_rate": 8.617005486972214e-06, + "loss": 24.9866, + "step": 159160 + }, + { + "epoch": 0.32153347042829383, + "grad_norm": 103.9272232055664, + "learning_rate": 8.616764473313671e-06, + "loss": 23.4761, + "step": 159170 + }, + { + "epoch": 0.32155367106097765, + "grad_norm": 273.02557373046875, + "learning_rate": 8.616523442027456e-06, + "loss": 16.7901, + "step": 159180 + }, + { + "epoch": 0.3215738716936615, + "grad_norm": 314.946533203125, + "learning_rate": 8.616282393114745e-06, + "loss": 27.1486, + "step": 159190 + }, + { + "epoch": 0.32159407232634524, + "grad_norm": 593.523193359375, + "learning_rate": 8.616041326576711e-06, + "loss": 28.6153, + "step": 159200 + }, + { + "epoch": 0.32161427295902906, + "grad_norm": 619.8696899414062, + "learning_rate": 8.61580024241453e-06, + "loss": 13.1667, + "step": 159210 + }, + { + "epoch": 0.3216344735917129, + "grad_norm": 529.56298828125, + "learning_rate": 8.615559140629377e-06, + "loss": 23.2421, + "step": 159220 + }, + { + "epoch": 0.3216546742243967, + "grad_norm": 20.517074584960938, + "learning_rate": 8.61531802122243e-06, + "loss": 8.4084, + "step": 159230 + }, + { + "epoch": 0.3216748748570805, + "grad_norm": 288.3408508300781, + "learning_rate": 8.615076884194859e-06, + "loss": 26.7926, + "step": 159240 + }, + { + "epoch": 0.32169507548976434, + "grad_norm": 660.5656127929688, + "learning_rate": 8.614835729547841e-06, + "loss": 21.9357, + "step": 159250 + }, + { + "epoch": 0.32171527612244816, + "grad_norm": 425.1676330566406, + "learning_rate": 8.614594557282553e-06, + "loss": 17.5466, + "step": 159260 + }, + { + "epoch": 0.321735476755132, + "grad_norm": 572.3814086914062, + "learning_rate": 8.614353367400171e-06, + "loss": 25.9064, + "step": 159270 + }, + { + "epoch": 0.3217556773878158, + "grad_norm": 125.5263900756836, + "learning_rate": 8.614112159901869e-06, + "loss": 13.9782, + "step": 159280 + }, + { + "epoch": 0.3217758780204996, + "grad_norm": 48.52302551269531, + "learning_rate": 8.61387093478882e-06, + "loss": 17.9707, + "step": 159290 + }, + { + "epoch": 0.32179607865318344, + "grad_norm": 624.1820678710938, + "learning_rate": 8.613629692062204e-06, + "loss": 27.8861, + "step": 159300 + }, + { + "epoch": 0.32181627928586726, + "grad_norm": 714.5958862304688, + "learning_rate": 8.613388431723195e-06, + "loss": 35.3888, + "step": 159310 + }, + { + "epoch": 0.321836479918551, + "grad_norm": 3063.965576171875, + "learning_rate": 8.61314715377297e-06, + "loss": 19.4244, + "step": 159320 + }, + { + "epoch": 0.32185668055123484, + "grad_norm": 191.87030029296875, + "learning_rate": 8.612905858212702e-06, + "loss": 19.0154, + "step": 159330 + }, + { + "epoch": 0.32187688118391866, + "grad_norm": 271.310546875, + "learning_rate": 8.612664545043572e-06, + "loss": 10.0511, + "step": 159340 + }, + { + "epoch": 0.3218970818166025, + "grad_norm": 486.7148132324219, + "learning_rate": 8.612423214266749e-06, + "loss": 15.3081, + "step": 159350 + }, + { + "epoch": 0.3219172824492863, + "grad_norm": 153.84129333496094, + "learning_rate": 8.612181865883416e-06, + "loss": 18.8152, + "step": 159360 + }, + { + "epoch": 0.3219374830819701, + "grad_norm": 518.131103515625, + "learning_rate": 8.611940499894746e-06, + "loss": 25.9769, + "step": 159370 + }, + { + "epoch": 0.32195768371465394, + "grad_norm": 402.9221496582031, + "learning_rate": 8.611699116301916e-06, + "loss": 26.4168, + "step": 159380 + }, + { + "epoch": 0.32197788434733776, + "grad_norm": 579.6004638671875, + "learning_rate": 8.611457715106103e-06, + "loss": 18.7459, + "step": 159390 + }, + { + "epoch": 0.3219980849800216, + "grad_norm": 272.2318115234375, + "learning_rate": 8.611216296308485e-06, + "loss": 19.9651, + "step": 159400 + }, + { + "epoch": 0.3220182856127054, + "grad_norm": 543.0274047851562, + "learning_rate": 8.610974859910235e-06, + "loss": 29.9832, + "step": 159410 + }, + { + "epoch": 0.3220384862453892, + "grad_norm": 544.9154663085938, + "learning_rate": 8.610733405912531e-06, + "loss": 17.4575, + "step": 159420 + }, + { + "epoch": 0.32205868687807304, + "grad_norm": 183.30484008789062, + "learning_rate": 8.61049193431655e-06, + "loss": 10.8891, + "step": 159430 + }, + { + "epoch": 0.32207888751075686, + "grad_norm": 614.1286010742188, + "learning_rate": 8.610250445123472e-06, + "loss": 30.1711, + "step": 159440 + }, + { + "epoch": 0.3220990881434406, + "grad_norm": 405.5843505859375, + "learning_rate": 8.610008938334467e-06, + "loss": 18.1423, + "step": 159450 + }, + { + "epoch": 0.32211928877612445, + "grad_norm": 353.0569152832031, + "learning_rate": 8.609767413950719e-06, + "loss": 13.327, + "step": 159460 + }, + { + "epoch": 0.32213948940880827, + "grad_norm": 1106.0927734375, + "learning_rate": 8.609525871973402e-06, + "loss": 26.8672, + "step": 159470 + }, + { + "epoch": 0.3221596900414921, + "grad_norm": 678.6915283203125, + "learning_rate": 8.609284312403695e-06, + "loss": 17.5967, + "step": 159480 + }, + { + "epoch": 0.3221798906741759, + "grad_norm": 203.57518005371094, + "learning_rate": 8.60904273524277e-06, + "loss": 8.7134, + "step": 159490 + }, + { + "epoch": 0.3222000913068597, + "grad_norm": 877.3408813476562, + "learning_rate": 8.608801140491811e-06, + "loss": 22.3646, + "step": 159500 + }, + { + "epoch": 0.32222029193954355, + "grad_norm": 440.02825927734375, + "learning_rate": 8.608559528151994e-06, + "loss": 32.0175, + "step": 159510 + }, + { + "epoch": 0.32224049257222737, + "grad_norm": 284.1017150878906, + "learning_rate": 8.608317898224495e-06, + "loss": 22.2352, + "step": 159520 + }, + { + "epoch": 0.3222606932049112, + "grad_norm": 996.2796630859375, + "learning_rate": 8.608076250710491e-06, + "loss": 24.237, + "step": 159530 + }, + { + "epoch": 0.322280893837595, + "grad_norm": 1123.6976318359375, + "learning_rate": 8.607834585611162e-06, + "loss": 20.9938, + "step": 159540 + }, + { + "epoch": 0.3223010944702788, + "grad_norm": 386.87310791015625, + "learning_rate": 8.607592902927684e-06, + "loss": 18.9438, + "step": 159550 + }, + { + "epoch": 0.32232129510296265, + "grad_norm": 24.58682632446289, + "learning_rate": 8.607351202661236e-06, + "loss": 14.1573, + "step": 159560 + }, + { + "epoch": 0.32234149573564647, + "grad_norm": 492.16845703125, + "learning_rate": 8.607109484812996e-06, + "loss": 15.2691, + "step": 159570 + }, + { + "epoch": 0.32236169636833023, + "grad_norm": 160.42462158203125, + "learning_rate": 8.606867749384142e-06, + "loss": 20.1392, + "step": 159580 + }, + { + "epoch": 0.32238189700101405, + "grad_norm": 693.1361694335938, + "learning_rate": 8.60662599637585e-06, + "loss": 38.6382, + "step": 159590 + }, + { + "epoch": 0.32240209763369787, + "grad_norm": 316.7165222167969, + "learning_rate": 8.606384225789304e-06, + "loss": 13.4398, + "step": 159600 + }, + { + "epoch": 0.3224222982663817, + "grad_norm": 468.33197021484375, + "learning_rate": 8.606142437625676e-06, + "loss": 17.6492, + "step": 159610 + }, + { + "epoch": 0.3224424988990655, + "grad_norm": 207.65821838378906, + "learning_rate": 8.605900631886148e-06, + "loss": 18.3911, + "step": 159620 + }, + { + "epoch": 0.32246269953174933, + "grad_norm": 844.1154174804688, + "learning_rate": 8.605658808571898e-06, + "loss": 13.3952, + "step": 159630 + }, + { + "epoch": 0.32248290016443315, + "grad_norm": 258.1779479980469, + "learning_rate": 8.605416967684105e-06, + "loss": 12.232, + "step": 159640 + }, + { + "epoch": 0.32250310079711697, + "grad_norm": 377.6234436035156, + "learning_rate": 8.605175109223945e-06, + "loss": 18.0037, + "step": 159650 + }, + { + "epoch": 0.3225233014298008, + "grad_norm": 42.619693756103516, + "learning_rate": 8.604933233192598e-06, + "loss": 11.9227, + "step": 159660 + }, + { + "epoch": 0.3225435020624846, + "grad_norm": 325.4930114746094, + "learning_rate": 8.604691339591248e-06, + "loss": 11.5, + "step": 159670 + }, + { + "epoch": 0.32256370269516843, + "grad_norm": 397.5663757324219, + "learning_rate": 8.604449428421065e-06, + "loss": 27.9093, + "step": 159680 + }, + { + "epoch": 0.32258390332785225, + "grad_norm": 103.412353515625, + "learning_rate": 8.604207499683235e-06, + "loss": 13.7212, + "step": 159690 + }, + { + "epoch": 0.32260410396053607, + "grad_norm": 265.3908996582031, + "learning_rate": 8.603965553378934e-06, + "loss": 14.6327, + "step": 159700 + }, + { + "epoch": 0.32262430459321984, + "grad_norm": 446.9028625488281, + "learning_rate": 8.603723589509342e-06, + "loss": 24.4498, + "step": 159710 + }, + { + "epoch": 0.32264450522590365, + "grad_norm": 473.041015625, + "learning_rate": 8.603481608075638e-06, + "loss": 26.1825, + "step": 159720 + }, + { + "epoch": 0.3226647058585875, + "grad_norm": 673.2293090820312, + "learning_rate": 8.603239609079005e-06, + "loss": 22.2739, + "step": 159730 + }, + { + "epoch": 0.3226849064912713, + "grad_norm": 757.7315063476562, + "learning_rate": 8.602997592520615e-06, + "loss": 14.6222, + "step": 159740 + }, + { + "epoch": 0.3227051071239551, + "grad_norm": 1660.0133056640625, + "learning_rate": 8.602755558401653e-06, + "loss": 33.4549, + "step": 159750 + }, + { + "epoch": 0.32272530775663893, + "grad_norm": 1194.78125, + "learning_rate": 8.602513506723298e-06, + "loss": 40.946, + "step": 159760 + }, + { + "epoch": 0.32274550838932275, + "grad_norm": 737.1159057617188, + "learning_rate": 8.602271437486728e-06, + "loss": 32.7479, + "step": 159770 + }, + { + "epoch": 0.3227657090220066, + "grad_norm": 257.111328125, + "learning_rate": 8.602029350693124e-06, + "loss": 39.9, + "step": 159780 + }, + { + "epoch": 0.3227859096546904, + "grad_norm": 249.00831604003906, + "learning_rate": 8.601787246343667e-06, + "loss": 9.8765, + "step": 159790 + }, + { + "epoch": 0.3228061102873742, + "grad_norm": 406.1790771484375, + "learning_rate": 8.601545124439535e-06, + "loss": 17.3077, + "step": 159800 + }, + { + "epoch": 0.32282631092005803, + "grad_norm": 0.0, + "learning_rate": 8.60130298498191e-06, + "loss": 16.5663, + "step": 159810 + }, + { + "epoch": 0.32284651155274185, + "grad_norm": 348.7825012207031, + "learning_rate": 8.60106082797197e-06, + "loss": 15.3467, + "step": 159820 + }, + { + "epoch": 0.3228667121854257, + "grad_norm": 175.0106201171875, + "learning_rate": 8.600818653410895e-06, + "loss": 25.3356, + "step": 159830 + }, + { + "epoch": 0.32288691281810944, + "grad_norm": 206.51332092285156, + "learning_rate": 8.600576461299869e-06, + "loss": 37.4376, + "step": 159840 + }, + { + "epoch": 0.32290711345079326, + "grad_norm": 469.91583251953125, + "learning_rate": 8.60033425164007e-06, + "loss": 19.0019, + "step": 159850 + }, + { + "epoch": 0.3229273140834771, + "grad_norm": 347.607177734375, + "learning_rate": 8.600092024432676e-06, + "loss": 17.3698, + "step": 159860 + }, + { + "epoch": 0.3229475147161609, + "grad_norm": 553.8758544921875, + "learning_rate": 8.599849779678872e-06, + "loss": 31.0398, + "step": 159870 + }, + { + "epoch": 0.3229677153488447, + "grad_norm": 466.9158935546875, + "learning_rate": 8.599607517379837e-06, + "loss": 23.9702, + "step": 159880 + }, + { + "epoch": 0.32298791598152854, + "grad_norm": 224.60279846191406, + "learning_rate": 8.599365237536751e-06, + "loss": 21.599, + "step": 159890 + }, + { + "epoch": 0.32300811661421236, + "grad_norm": 340.7933044433594, + "learning_rate": 8.599122940150795e-06, + "loss": 28.8004, + "step": 159900 + }, + { + "epoch": 0.3230283172468962, + "grad_norm": 138.8593292236328, + "learning_rate": 8.598880625223152e-06, + "loss": 15.2423, + "step": 159910 + }, + { + "epoch": 0.32304851787958, + "grad_norm": 374.0597229003906, + "learning_rate": 8.598638292755e-06, + "loss": 23.8807, + "step": 159920 + }, + { + "epoch": 0.3230687185122638, + "grad_norm": 364.0459289550781, + "learning_rate": 8.59839594274752e-06, + "loss": 30.2405, + "step": 159930 + }, + { + "epoch": 0.32308891914494764, + "grad_norm": 546.2945556640625, + "learning_rate": 8.598153575201897e-06, + "loss": 24.3799, + "step": 159940 + }, + { + "epoch": 0.32310911977763146, + "grad_norm": 342.47540283203125, + "learning_rate": 8.597911190119308e-06, + "loss": 30.9596, + "step": 159950 + }, + { + "epoch": 0.3231293204103152, + "grad_norm": 0.0, + "learning_rate": 8.597668787500937e-06, + "loss": 6.6189, + "step": 159960 + }, + { + "epoch": 0.32314952104299904, + "grad_norm": 494.8405456542969, + "learning_rate": 8.597426367347965e-06, + "loss": 34.9544, + "step": 159970 + }, + { + "epoch": 0.32316972167568286, + "grad_norm": 348.3296813964844, + "learning_rate": 8.597183929661573e-06, + "loss": 23.1549, + "step": 159980 + }, + { + "epoch": 0.3231899223083667, + "grad_norm": 335.0788879394531, + "learning_rate": 8.596941474442943e-06, + "loss": 12.827, + "step": 159990 + }, + { + "epoch": 0.3232101229410505, + "grad_norm": 357.87774658203125, + "learning_rate": 8.596699001693257e-06, + "loss": 13.433, + "step": 160000 + }, + { + "epoch": 0.3232303235737343, + "grad_norm": 868.0357666015625, + "learning_rate": 8.596456511413695e-06, + "loss": 35.5549, + "step": 160010 + }, + { + "epoch": 0.32325052420641814, + "grad_norm": 466.7880859375, + "learning_rate": 8.59621400360544e-06, + "loss": 22.8039, + "step": 160020 + }, + { + "epoch": 0.32327072483910196, + "grad_norm": 747.6487426757812, + "learning_rate": 8.595971478269675e-06, + "loss": 15.5572, + "step": 160030 + }, + { + "epoch": 0.3232909254717858, + "grad_norm": 622.7278442382812, + "learning_rate": 8.59572893540758e-06, + "loss": 14.0454, + "step": 160040 + }, + { + "epoch": 0.3233111261044696, + "grad_norm": 572.1239624023438, + "learning_rate": 8.59548637502034e-06, + "loss": 16.4945, + "step": 160050 + }, + { + "epoch": 0.3233313267371534, + "grad_norm": 303.442138671875, + "learning_rate": 8.595243797109137e-06, + "loss": 27.6703, + "step": 160060 + }, + { + "epoch": 0.32335152736983724, + "grad_norm": 162.61599731445312, + "learning_rate": 8.595001201675149e-06, + "loss": 24.7356, + "step": 160070 + }, + { + "epoch": 0.32337172800252106, + "grad_norm": 386.8169860839844, + "learning_rate": 8.594758588719562e-06, + "loss": 19.3219, + "step": 160080 + }, + { + "epoch": 0.32339192863520483, + "grad_norm": 449.1678771972656, + "learning_rate": 8.594515958243557e-06, + "loss": 29.9948, + "step": 160090 + }, + { + "epoch": 0.32341212926788865, + "grad_norm": 598.8031005859375, + "learning_rate": 8.594273310248317e-06, + "loss": 18.6745, + "step": 160100 + }, + { + "epoch": 0.32343232990057247, + "grad_norm": 197.71539306640625, + "learning_rate": 8.594030644735025e-06, + "loss": 17.4596, + "step": 160110 + }, + { + "epoch": 0.3234525305332563, + "grad_norm": 338.89697265625, + "learning_rate": 8.593787961704864e-06, + "loss": 17.5787, + "step": 160120 + }, + { + "epoch": 0.3234727311659401, + "grad_norm": 657.6072387695312, + "learning_rate": 8.593545261159017e-06, + "loss": 30.781, + "step": 160130 + }, + { + "epoch": 0.32349293179862393, + "grad_norm": 408.7171325683594, + "learning_rate": 8.593302543098666e-06, + "loss": 18.7955, + "step": 160140 + }, + { + "epoch": 0.32351313243130775, + "grad_norm": 999.4275512695312, + "learning_rate": 8.593059807524993e-06, + "loss": 30.9812, + "step": 160150 + }, + { + "epoch": 0.32353333306399157, + "grad_norm": 232.17300415039062, + "learning_rate": 8.592817054439184e-06, + "loss": 14.6789, + "step": 160160 + }, + { + "epoch": 0.3235535336966754, + "grad_norm": 59.33561706542969, + "learning_rate": 8.592574283842418e-06, + "loss": 8.6261, + "step": 160170 + }, + { + "epoch": 0.3235737343293592, + "grad_norm": 464.56915283203125, + "learning_rate": 8.592331495735884e-06, + "loss": 13.9509, + "step": 160180 + }, + { + "epoch": 0.32359393496204303, + "grad_norm": 299.0964660644531, + "learning_rate": 8.592088690120759e-06, + "loss": 17.5647, + "step": 160190 + }, + { + "epoch": 0.32361413559472685, + "grad_norm": 600.798583984375, + "learning_rate": 8.591845866998231e-06, + "loss": 26.1429, + "step": 160200 + }, + { + "epoch": 0.32363433622741067, + "grad_norm": 1665.239501953125, + "learning_rate": 8.591603026369481e-06, + "loss": 30.2836, + "step": 160210 + }, + { + "epoch": 0.32365453686009443, + "grad_norm": 258.0927734375, + "learning_rate": 8.591360168235694e-06, + "loss": 8.8683, + "step": 160220 + }, + { + "epoch": 0.32367473749277825, + "grad_norm": 188.40628051757812, + "learning_rate": 8.591117292598053e-06, + "loss": 26.9318, + "step": 160230 + }, + { + "epoch": 0.3236949381254621, + "grad_norm": 330.8376159667969, + "learning_rate": 8.590874399457743e-06, + "loss": 33.8913, + "step": 160240 + }, + { + "epoch": 0.3237151387581459, + "grad_norm": 123.2463607788086, + "learning_rate": 8.590631488815945e-06, + "loss": 24.2562, + "step": 160250 + }, + { + "epoch": 0.3237353393908297, + "grad_norm": 210.2475128173828, + "learning_rate": 8.590388560673846e-06, + "loss": 23.3481, + "step": 160260 + }, + { + "epoch": 0.32375554002351353, + "grad_norm": 1193.0272216796875, + "learning_rate": 8.590145615032626e-06, + "loss": 18.3568, + "step": 160270 + }, + { + "epoch": 0.32377574065619735, + "grad_norm": 1069.4720458984375, + "learning_rate": 8.589902651893474e-06, + "loss": 41.682, + "step": 160280 + }, + { + "epoch": 0.3237959412888812, + "grad_norm": 1540.3475341796875, + "learning_rate": 8.589659671257573e-06, + "loss": 21.1678, + "step": 160290 + }, + { + "epoch": 0.323816141921565, + "grad_norm": 185.64068603515625, + "learning_rate": 8.589416673126104e-06, + "loss": 17.0713, + "step": 160300 + }, + { + "epoch": 0.3238363425542488, + "grad_norm": 284.4470520019531, + "learning_rate": 8.589173657500254e-06, + "loss": 19.351, + "step": 160310 + }, + { + "epoch": 0.32385654318693263, + "grad_norm": 267.5775146484375, + "learning_rate": 8.588930624381207e-06, + "loss": 10.3574, + "step": 160320 + }, + { + "epoch": 0.32387674381961645, + "grad_norm": 884.9189453125, + "learning_rate": 8.588687573770146e-06, + "loss": 22.3544, + "step": 160330 + }, + { + "epoch": 0.3238969444523003, + "grad_norm": 6.199318885803223, + "learning_rate": 8.588444505668259e-06, + "loss": 26.2884, + "step": 160340 + }, + { + "epoch": 0.32391714508498404, + "grad_norm": 197.96551513671875, + "learning_rate": 8.588201420076727e-06, + "loss": 35.7713, + "step": 160350 + }, + { + "epoch": 0.32393734571766786, + "grad_norm": 1003.1591796875, + "learning_rate": 8.587958316996739e-06, + "loss": 19.3143, + "step": 160360 + }, + { + "epoch": 0.3239575463503517, + "grad_norm": 284.5838928222656, + "learning_rate": 8.587715196429477e-06, + "loss": 23.5706, + "step": 160370 + }, + { + "epoch": 0.3239777469830355, + "grad_norm": 20.341636657714844, + "learning_rate": 8.587472058376122e-06, + "loss": 30.0062, + "step": 160380 + }, + { + "epoch": 0.3239979476157193, + "grad_norm": 960.5979614257812, + "learning_rate": 8.587228902837868e-06, + "loss": 29.1643, + "step": 160390 + }, + { + "epoch": 0.32401814824840314, + "grad_norm": 328.1547546386719, + "learning_rate": 8.586985729815895e-06, + "loss": 25.4238, + "step": 160400 + }, + { + "epoch": 0.32403834888108696, + "grad_norm": 336.5776062011719, + "learning_rate": 8.586742539311385e-06, + "loss": 15.6985, + "step": 160410 + }, + { + "epoch": 0.3240585495137708, + "grad_norm": 253.26870727539062, + "learning_rate": 8.58649933132553e-06, + "loss": 21.2331, + "step": 160420 + }, + { + "epoch": 0.3240787501464546, + "grad_norm": 1049.53076171875, + "learning_rate": 8.586256105859512e-06, + "loss": 32.0964, + "step": 160430 + }, + { + "epoch": 0.3240989507791384, + "grad_norm": 474.3785095214844, + "learning_rate": 8.586012862914517e-06, + "loss": 28.0231, + "step": 160440 + }, + { + "epoch": 0.32411915141182224, + "grad_norm": 570.814208984375, + "learning_rate": 8.585769602491729e-06, + "loss": 29.7923, + "step": 160450 + }, + { + "epoch": 0.32413935204450606, + "grad_norm": 317.1396789550781, + "learning_rate": 8.585526324592335e-06, + "loss": 22.6866, + "step": 160460 + }, + { + "epoch": 0.3241595526771899, + "grad_norm": 509.0378723144531, + "learning_rate": 8.585283029217521e-06, + "loss": 24.7845, + "step": 160470 + }, + { + "epoch": 0.32417975330987364, + "grad_norm": 207.631591796875, + "learning_rate": 8.585039716368473e-06, + "loss": 14.4742, + "step": 160480 + }, + { + "epoch": 0.32419995394255746, + "grad_norm": 179.84829711914062, + "learning_rate": 8.584796386046374e-06, + "loss": 18.566, + "step": 160490 + }, + { + "epoch": 0.3242201545752413, + "grad_norm": 511.96466064453125, + "learning_rate": 8.584553038252415e-06, + "loss": 20.0971, + "step": 160500 + }, + { + "epoch": 0.3242403552079251, + "grad_norm": 366.3064270019531, + "learning_rate": 8.584309672987778e-06, + "loss": 18.8068, + "step": 160510 + }, + { + "epoch": 0.3242605558406089, + "grad_norm": 839.582763671875, + "learning_rate": 8.584066290253649e-06, + "loss": 25.166, + "step": 160520 + }, + { + "epoch": 0.32428075647329274, + "grad_norm": 516.114013671875, + "learning_rate": 8.583822890051217e-06, + "loss": 26.7551, + "step": 160530 + }, + { + "epoch": 0.32430095710597656, + "grad_norm": 115.7437744140625, + "learning_rate": 8.583579472381668e-06, + "loss": 12.1438, + "step": 160540 + }, + { + "epoch": 0.3243211577386604, + "grad_norm": 849.4694213867188, + "learning_rate": 8.583336037246187e-06, + "loss": 40.4061, + "step": 160550 + }, + { + "epoch": 0.3243413583713442, + "grad_norm": 356.7700500488281, + "learning_rate": 8.58309258464596e-06, + "loss": 33.7424, + "step": 160560 + }, + { + "epoch": 0.324361559004028, + "grad_norm": 592.692138671875, + "learning_rate": 8.582849114582173e-06, + "loss": 12.2936, + "step": 160570 + }, + { + "epoch": 0.32438175963671184, + "grad_norm": 95.89431762695312, + "learning_rate": 8.582605627056016e-06, + "loss": 17.5404, + "step": 160580 + }, + { + "epoch": 0.32440196026939566, + "grad_norm": 265.5685729980469, + "learning_rate": 8.582362122068673e-06, + "loss": 13.4172, + "step": 160590 + }, + { + "epoch": 0.3244221609020794, + "grad_norm": 416.9441223144531, + "learning_rate": 8.58211859962133e-06, + "loss": 25.3335, + "step": 160600 + }, + { + "epoch": 0.32444236153476325, + "grad_norm": 138.38201904296875, + "learning_rate": 8.581875059715177e-06, + "loss": 30.2629, + "step": 160610 + }, + { + "epoch": 0.32446256216744707, + "grad_norm": 112.8659896850586, + "learning_rate": 8.5816315023514e-06, + "loss": 19.0405, + "step": 160620 + }, + { + "epoch": 0.3244827628001309, + "grad_norm": 385.9313659667969, + "learning_rate": 8.581387927531184e-06, + "loss": 12.0313, + "step": 160630 + }, + { + "epoch": 0.3245029634328147, + "grad_norm": 121.29768371582031, + "learning_rate": 8.581144335255717e-06, + "loss": 12.3851, + "step": 160640 + }, + { + "epoch": 0.3245231640654985, + "grad_norm": 224.8111114501953, + "learning_rate": 8.580900725526189e-06, + "loss": 23.353, + "step": 160650 + }, + { + "epoch": 0.32454336469818235, + "grad_norm": 308.1531982421875, + "learning_rate": 8.580657098343786e-06, + "loss": 17.705, + "step": 160660 + }, + { + "epoch": 0.32456356533086617, + "grad_norm": 256.0937194824219, + "learning_rate": 8.58041345370969e-06, + "loss": 18.9161, + "step": 160670 + }, + { + "epoch": 0.32458376596355, + "grad_norm": 442.410400390625, + "learning_rate": 8.580169791625097e-06, + "loss": 16.9792, + "step": 160680 + }, + { + "epoch": 0.3246039665962338, + "grad_norm": 1028.3814697265625, + "learning_rate": 8.57992611209119e-06, + "loss": 34.2166, + "step": 160690 + }, + { + "epoch": 0.3246241672289176, + "grad_norm": 268.5386962890625, + "learning_rate": 8.579682415109156e-06, + "loss": 34.551, + "step": 160700 + }, + { + "epoch": 0.32464436786160145, + "grad_norm": 503.40478515625, + "learning_rate": 8.579438700680184e-06, + "loss": 21.9579, + "step": 160710 + }, + { + "epoch": 0.32466456849428527, + "grad_norm": 468.3645935058594, + "learning_rate": 8.579194968805464e-06, + "loss": 22.4314, + "step": 160720 + }, + { + "epoch": 0.32468476912696903, + "grad_norm": 217.84596252441406, + "learning_rate": 8.57895121948618e-06, + "loss": 15.8868, + "step": 160730 + }, + { + "epoch": 0.32470496975965285, + "grad_norm": 164.38002014160156, + "learning_rate": 8.578707452723524e-06, + "loss": 23.0477, + "step": 160740 + }, + { + "epoch": 0.32472517039233667, + "grad_norm": 813.5709838867188, + "learning_rate": 8.57846366851868e-06, + "loss": 22.8599, + "step": 160750 + }, + { + "epoch": 0.3247453710250205, + "grad_norm": 672.2219848632812, + "learning_rate": 8.57821986687284e-06, + "loss": 12.0255, + "step": 160760 + }, + { + "epoch": 0.3247655716577043, + "grad_norm": 2.353846549987793, + "learning_rate": 8.577976047787187e-06, + "loss": 14.1876, + "step": 160770 + }, + { + "epoch": 0.32478577229038813, + "grad_norm": 265.181640625, + "learning_rate": 8.577732211262914e-06, + "loss": 20.4124, + "step": 160780 + }, + { + "epoch": 0.32480597292307195, + "grad_norm": 476.9296569824219, + "learning_rate": 8.577488357301209e-06, + "loss": 20.8482, + "step": 160790 + }, + { + "epoch": 0.32482617355575577, + "grad_norm": 391.3948669433594, + "learning_rate": 8.57724448590326e-06, + "loss": 20.8304, + "step": 160800 + }, + { + "epoch": 0.3248463741884396, + "grad_norm": 629.4627685546875, + "learning_rate": 8.577000597070256e-06, + "loss": 15.2627, + "step": 160810 + }, + { + "epoch": 0.3248665748211234, + "grad_norm": 297.3624267578125, + "learning_rate": 8.576756690803382e-06, + "loss": 13.3768, + "step": 160820 + }, + { + "epoch": 0.32488677545380723, + "grad_norm": 481.56170654296875, + "learning_rate": 8.576512767103831e-06, + "loss": 12.0114, + "step": 160830 + }, + { + "epoch": 0.32490697608649105, + "grad_norm": 341.4563903808594, + "learning_rate": 8.576268825972791e-06, + "loss": 39.4003, + "step": 160840 + }, + { + "epoch": 0.32492717671917487, + "grad_norm": 528.8344116210938, + "learning_rate": 8.576024867411452e-06, + "loss": 32.0481, + "step": 160850 + }, + { + "epoch": 0.32494737735185864, + "grad_norm": 534.1671142578125, + "learning_rate": 8.575780891420998e-06, + "loss": 29.5159, + "step": 160860 + }, + { + "epoch": 0.32496757798454246, + "grad_norm": 75.37045288085938, + "learning_rate": 8.575536898002623e-06, + "loss": 7.5759, + "step": 160870 + }, + { + "epoch": 0.3249877786172263, + "grad_norm": 289.7796630859375, + "learning_rate": 8.575292887157515e-06, + "loss": 17.507, + "step": 160880 + }, + { + "epoch": 0.3250079792499101, + "grad_norm": 520.741455078125, + "learning_rate": 8.575048858886865e-06, + "loss": 17.889, + "step": 160890 + }, + { + "epoch": 0.3250281798825939, + "grad_norm": 189.44778442382812, + "learning_rate": 8.574804813191859e-06, + "loss": 14.8586, + "step": 160900 + }, + { + "epoch": 0.32504838051527774, + "grad_norm": 376.878662109375, + "learning_rate": 8.574560750073687e-06, + "loss": 14.024, + "step": 160910 + }, + { + "epoch": 0.32506858114796156, + "grad_norm": 679.778564453125, + "learning_rate": 8.57431666953354e-06, + "loss": 24.7067, + "step": 160920 + }, + { + "epoch": 0.3250887817806454, + "grad_norm": 486.84967041015625, + "learning_rate": 8.574072571572606e-06, + "loss": 14.1587, + "step": 160930 + }, + { + "epoch": 0.3251089824133292, + "grad_norm": 430.3070373535156, + "learning_rate": 8.57382845619208e-06, + "loss": 16.6094, + "step": 160940 + }, + { + "epoch": 0.325129183046013, + "grad_norm": 548.0245971679688, + "learning_rate": 8.573584323393142e-06, + "loss": 23.7725, + "step": 160950 + }, + { + "epoch": 0.32514938367869683, + "grad_norm": 334.4255676269531, + "learning_rate": 8.57334017317699e-06, + "loss": 21.4106, + "step": 160960 + }, + { + "epoch": 0.32516958431138065, + "grad_norm": 515.2789306640625, + "learning_rate": 8.573096005544812e-06, + "loss": 30.5712, + "step": 160970 + }, + { + "epoch": 0.3251897849440645, + "grad_norm": 460.02093505859375, + "learning_rate": 8.572851820497797e-06, + "loss": 15.8288, + "step": 160980 + }, + { + "epoch": 0.32520998557674824, + "grad_norm": 614.912109375, + "learning_rate": 8.572607618037137e-06, + "loss": 19.0982, + "step": 160990 + }, + { + "epoch": 0.32523018620943206, + "grad_norm": 266.36419677734375, + "learning_rate": 8.572363398164017e-06, + "loss": 25.8255, + "step": 161000 + }, + { + "epoch": 0.3252503868421159, + "grad_norm": 472.96392822265625, + "learning_rate": 8.572119160879633e-06, + "loss": 21.8668, + "step": 161010 + }, + { + "epoch": 0.3252705874747997, + "grad_norm": 497.37042236328125, + "learning_rate": 8.571874906185175e-06, + "loss": 28.2013, + "step": 161020 + }, + { + "epoch": 0.3252907881074835, + "grad_norm": 74.47261810302734, + "learning_rate": 8.57163063408183e-06, + "loss": 18.9175, + "step": 161030 + }, + { + "epoch": 0.32531098874016734, + "grad_norm": 302.9626159667969, + "learning_rate": 8.571386344570791e-06, + "loss": 41.7581, + "step": 161040 + }, + { + "epoch": 0.32533118937285116, + "grad_norm": 477.74627685546875, + "learning_rate": 8.571142037653249e-06, + "loss": 26.5132, + "step": 161050 + }, + { + "epoch": 0.325351390005535, + "grad_norm": 445.6697998046875, + "learning_rate": 8.570897713330392e-06, + "loss": 24.8526, + "step": 161060 + }, + { + "epoch": 0.3253715906382188, + "grad_norm": 445.0180358886719, + "learning_rate": 8.570653371603414e-06, + "loss": 20.6618, + "step": 161070 + }, + { + "epoch": 0.3253917912709026, + "grad_norm": 304.4143981933594, + "learning_rate": 8.570409012473503e-06, + "loss": 20.7598, + "step": 161080 + }, + { + "epoch": 0.32541199190358644, + "grad_norm": 9.344650268554688, + "learning_rate": 8.570164635941853e-06, + "loss": 25.8202, + "step": 161090 + }, + { + "epoch": 0.32543219253627026, + "grad_norm": 507.6846008300781, + "learning_rate": 8.569920242009655e-06, + "loss": 25.5562, + "step": 161100 + }, + { + "epoch": 0.325452393168954, + "grad_norm": 291.4764404296875, + "learning_rate": 8.569675830678097e-06, + "loss": 13.4058, + "step": 161110 + }, + { + "epoch": 0.32547259380163784, + "grad_norm": 669.7941284179688, + "learning_rate": 8.569431401948371e-06, + "loss": 28.1941, + "step": 161120 + }, + { + "epoch": 0.32549279443432166, + "grad_norm": 108.53662872314453, + "learning_rate": 8.56918695582167e-06, + "loss": 21.114, + "step": 161130 + }, + { + "epoch": 0.3255129950670055, + "grad_norm": 185.5521697998047, + "learning_rate": 8.568942492299186e-06, + "loss": 19.8685, + "step": 161140 + }, + { + "epoch": 0.3255331956996893, + "grad_norm": 737.6587524414062, + "learning_rate": 8.568698011382108e-06, + "loss": 19.6524, + "step": 161150 + }, + { + "epoch": 0.3255533963323731, + "grad_norm": 523.246337890625, + "learning_rate": 8.568453513071628e-06, + "loss": 30.4347, + "step": 161160 + }, + { + "epoch": 0.32557359696505694, + "grad_norm": 302.4512939453125, + "learning_rate": 8.568208997368938e-06, + "loss": 15.7047, + "step": 161170 + }, + { + "epoch": 0.32559379759774076, + "grad_norm": 406.25714111328125, + "learning_rate": 8.567964464275233e-06, + "loss": 13.2557, + "step": 161180 + }, + { + "epoch": 0.3256139982304246, + "grad_norm": 238.0476837158203, + "learning_rate": 8.5677199137917e-06, + "loss": 29.0175, + "step": 161190 + }, + { + "epoch": 0.3256341988631084, + "grad_norm": 375.2989501953125, + "learning_rate": 8.567475345919532e-06, + "loss": 29.475, + "step": 161200 + }, + { + "epoch": 0.3256543994957922, + "grad_norm": 277.81304931640625, + "learning_rate": 8.567230760659924e-06, + "loss": 15.3525, + "step": 161210 + }, + { + "epoch": 0.32567460012847604, + "grad_norm": 177.0699462890625, + "learning_rate": 8.566986158014065e-06, + "loss": 17.9513, + "step": 161220 + }, + { + "epoch": 0.32569480076115986, + "grad_norm": 441.58746337890625, + "learning_rate": 8.566741537983147e-06, + "loss": 18.4316, + "step": 161230 + }, + { + "epoch": 0.32571500139384363, + "grad_norm": 800.390380859375, + "learning_rate": 8.566496900568364e-06, + "loss": 18.4686, + "step": 161240 + }, + { + "epoch": 0.32573520202652745, + "grad_norm": 692.088623046875, + "learning_rate": 8.56625224577091e-06, + "loss": 18.8764, + "step": 161250 + }, + { + "epoch": 0.32575540265921127, + "grad_norm": 679.6253662109375, + "learning_rate": 8.566007573591972e-06, + "loss": 13.6277, + "step": 161260 + }, + { + "epoch": 0.3257756032918951, + "grad_norm": 403.29766845703125, + "learning_rate": 8.565762884032747e-06, + "loss": 20.9267, + "step": 161270 + }, + { + "epoch": 0.3257958039245789, + "grad_norm": 545.2127075195312, + "learning_rate": 8.565518177094425e-06, + "loss": 25.598, + "step": 161280 + }, + { + "epoch": 0.32581600455726273, + "grad_norm": 545.4293212890625, + "learning_rate": 8.5652734527782e-06, + "loss": 16.6594, + "step": 161290 + }, + { + "epoch": 0.32583620518994655, + "grad_norm": 578.7387084960938, + "learning_rate": 8.565028711085266e-06, + "loss": 29.5076, + "step": 161300 + }, + { + "epoch": 0.32585640582263037, + "grad_norm": 924.8513793945312, + "learning_rate": 8.564783952016813e-06, + "loss": 19.9818, + "step": 161310 + }, + { + "epoch": 0.3258766064553142, + "grad_norm": 59.36968231201172, + "learning_rate": 8.564539175574035e-06, + "loss": 16.2665, + "step": 161320 + }, + { + "epoch": 0.325896807087998, + "grad_norm": 536.894775390625, + "learning_rate": 8.564294381758128e-06, + "loss": 12.6896, + "step": 161330 + }, + { + "epoch": 0.32591700772068183, + "grad_norm": 159.65232849121094, + "learning_rate": 8.56404957057028e-06, + "loss": 24.8131, + "step": 161340 + }, + { + "epoch": 0.32593720835336565, + "grad_norm": 129.59710693359375, + "learning_rate": 8.563804742011689e-06, + "loss": 21.0008, + "step": 161350 + }, + { + "epoch": 0.32595740898604947, + "grad_norm": 490.70501708984375, + "learning_rate": 8.563559896083544e-06, + "loss": 24.1454, + "step": 161360 + }, + { + "epoch": 0.32597760961873323, + "grad_norm": 496.4228210449219, + "learning_rate": 8.56331503278704e-06, + "loss": 34.2492, + "step": 161370 + }, + { + "epoch": 0.32599781025141705, + "grad_norm": 339.2381896972656, + "learning_rate": 8.563070152123372e-06, + "loss": 11.0371, + "step": 161380 + }, + { + "epoch": 0.3260180108841009, + "grad_norm": 487.29449462890625, + "learning_rate": 8.562825254093732e-06, + "loss": 13.583, + "step": 161390 + }, + { + "epoch": 0.3260382115167847, + "grad_norm": 442.0322570800781, + "learning_rate": 8.562580338699313e-06, + "loss": 28.2066, + "step": 161400 + }, + { + "epoch": 0.3260584121494685, + "grad_norm": 523.257568359375, + "learning_rate": 8.56233540594131e-06, + "loss": 17.8132, + "step": 161410 + }, + { + "epoch": 0.32607861278215233, + "grad_norm": 177.55528259277344, + "learning_rate": 8.562090455820918e-06, + "loss": 35.7883, + "step": 161420 + }, + { + "epoch": 0.32609881341483615, + "grad_norm": 1586.7711181640625, + "learning_rate": 8.561845488339327e-06, + "loss": 30.0223, + "step": 161430 + }, + { + "epoch": 0.32611901404752, + "grad_norm": 35.832515716552734, + "learning_rate": 8.561600503497734e-06, + "loss": 34.6228, + "step": 161440 + }, + { + "epoch": 0.3261392146802038, + "grad_norm": 563.1609497070312, + "learning_rate": 8.56135550129733e-06, + "loss": 29.8364, + "step": 161450 + }, + { + "epoch": 0.3261594153128876, + "grad_norm": 74.82799530029297, + "learning_rate": 8.561110481739314e-06, + "loss": 16.1139, + "step": 161460 + }, + { + "epoch": 0.32617961594557143, + "grad_norm": 4.64946174621582, + "learning_rate": 8.560865444824875e-06, + "loss": 28.9766, + "step": 161470 + }, + { + "epoch": 0.32619981657825525, + "grad_norm": 492.2882385253906, + "learning_rate": 8.560620390555212e-06, + "loss": 19.2768, + "step": 161480 + }, + { + "epoch": 0.3262200172109391, + "grad_norm": 549.025146484375, + "learning_rate": 8.560375318931517e-06, + "loss": 13.379, + "step": 161490 + }, + { + "epoch": 0.32624021784362284, + "grad_norm": 261.4835510253906, + "learning_rate": 8.560130229954985e-06, + "loss": 25.0308, + "step": 161500 + }, + { + "epoch": 0.32626041847630666, + "grad_norm": 844.3115234375, + "learning_rate": 8.559885123626806e-06, + "loss": 22.5069, + "step": 161510 + }, + { + "epoch": 0.3262806191089905, + "grad_norm": 349.45947265625, + "learning_rate": 8.559639999948181e-06, + "loss": 16.8482, + "step": 161520 + }, + { + "epoch": 0.3263008197416743, + "grad_norm": 401.1449890136719, + "learning_rate": 8.559394858920304e-06, + "loss": 21.0563, + "step": 161530 + }, + { + "epoch": 0.3263210203743581, + "grad_norm": 277.7603759765625, + "learning_rate": 8.559149700544367e-06, + "loss": 26.0529, + "step": 161540 + }, + { + "epoch": 0.32634122100704194, + "grad_norm": 375.0739440917969, + "learning_rate": 8.558904524821565e-06, + "loss": 7.5684, + "step": 161550 + }, + { + "epoch": 0.32636142163972576, + "grad_norm": 423.8526306152344, + "learning_rate": 8.558659331753096e-06, + "loss": 23.3725, + "step": 161560 + }, + { + "epoch": 0.3263816222724096, + "grad_norm": 132.01666259765625, + "learning_rate": 8.558414121340152e-06, + "loss": 23.1224, + "step": 161570 + }, + { + "epoch": 0.3264018229050934, + "grad_norm": 469.2595520019531, + "learning_rate": 8.55816889358393e-06, + "loss": 11.6602, + "step": 161580 + }, + { + "epoch": 0.3264220235377772, + "grad_norm": 1435.6239013671875, + "learning_rate": 8.557923648485622e-06, + "loss": 32.211, + "step": 161590 + }, + { + "epoch": 0.32644222417046104, + "grad_norm": 1065.9990234375, + "learning_rate": 8.557678386046429e-06, + "loss": 30.3401, + "step": 161600 + }, + { + "epoch": 0.32646242480314486, + "grad_norm": 171.49520874023438, + "learning_rate": 8.55743310626754e-06, + "loss": 9.6927, + "step": 161610 + }, + { + "epoch": 0.3264826254358287, + "grad_norm": 396.7803649902344, + "learning_rate": 8.557187809150154e-06, + "loss": 15.5282, + "step": 161620 + }, + { + "epoch": 0.32650282606851244, + "grad_norm": 829.1217041015625, + "learning_rate": 8.556942494695467e-06, + "loss": 16.5586, + "step": 161630 + }, + { + "epoch": 0.32652302670119626, + "grad_norm": 231.0962677001953, + "learning_rate": 8.556697162904674e-06, + "loss": 34.7648, + "step": 161640 + }, + { + "epoch": 0.3265432273338801, + "grad_norm": 620.1940307617188, + "learning_rate": 8.55645181377897e-06, + "loss": 18.0748, + "step": 161650 + }, + { + "epoch": 0.3265634279665639, + "grad_norm": 358.02386474609375, + "learning_rate": 8.55620644731955e-06, + "loss": 26.7937, + "step": 161660 + }, + { + "epoch": 0.3265836285992477, + "grad_norm": 451.89056396484375, + "learning_rate": 8.555961063527612e-06, + "loss": 24.4252, + "step": 161670 + }, + { + "epoch": 0.32660382923193154, + "grad_norm": 157.92124938964844, + "learning_rate": 8.555715662404352e-06, + "loss": 9.6293, + "step": 161680 + }, + { + "epoch": 0.32662402986461536, + "grad_norm": 262.64215087890625, + "learning_rate": 8.555470243950963e-06, + "loss": 14.5127, + "step": 161690 + }, + { + "epoch": 0.3266442304972992, + "grad_norm": 518.3018798828125, + "learning_rate": 8.555224808168644e-06, + "loss": 22.2709, + "step": 161700 + }, + { + "epoch": 0.326664431129983, + "grad_norm": 341.222900390625, + "learning_rate": 8.554979355058593e-06, + "loss": 19.9243, + "step": 161710 + }, + { + "epoch": 0.3266846317626668, + "grad_norm": 255.33712768554688, + "learning_rate": 8.554733884622003e-06, + "loss": 22.7051, + "step": 161720 + }, + { + "epoch": 0.32670483239535064, + "grad_norm": 495.9708557128906, + "learning_rate": 8.554488396860069e-06, + "loss": 28.9186, + "step": 161730 + }, + { + "epoch": 0.32672503302803446, + "grad_norm": 247.82550048828125, + "learning_rate": 8.55424289177399e-06, + "loss": 19.3723, + "step": 161740 + }, + { + "epoch": 0.3267452336607182, + "grad_norm": 276.525634765625, + "learning_rate": 8.553997369364964e-06, + "loss": 18.7027, + "step": 161750 + }, + { + "epoch": 0.32676543429340205, + "grad_norm": 4.652155876159668, + "learning_rate": 8.553751829634184e-06, + "loss": 13.5415, + "step": 161760 + }, + { + "epoch": 0.32678563492608587, + "grad_norm": 67.46437072753906, + "learning_rate": 8.55350627258285e-06, + "loss": 20.1329, + "step": 161770 + }, + { + "epoch": 0.3268058355587697, + "grad_norm": 367.9119873046875, + "learning_rate": 8.553260698212156e-06, + "loss": 13.7456, + "step": 161780 + }, + { + "epoch": 0.3268260361914535, + "grad_norm": 565.5093383789062, + "learning_rate": 8.5530151065233e-06, + "loss": 29.1452, + "step": 161790 + }, + { + "epoch": 0.3268462368241373, + "grad_norm": 883.661376953125, + "learning_rate": 8.55276949751748e-06, + "loss": 20.8828, + "step": 161800 + }, + { + "epoch": 0.32686643745682115, + "grad_norm": 380.5515441894531, + "learning_rate": 8.552523871195895e-06, + "loss": 27.1904, + "step": 161810 + }, + { + "epoch": 0.32688663808950497, + "grad_norm": 349.1186828613281, + "learning_rate": 8.552278227559736e-06, + "loss": 34.3681, + "step": 161820 + }, + { + "epoch": 0.3269068387221888, + "grad_norm": 65.8606948852539, + "learning_rate": 8.552032566610206e-06, + "loss": 13.1881, + "step": 161830 + }, + { + "epoch": 0.3269270393548726, + "grad_norm": 518.0609741210938, + "learning_rate": 8.551786888348499e-06, + "loss": 22.1906, + "step": 161840 + }, + { + "epoch": 0.3269472399875564, + "grad_norm": 556.7715454101562, + "learning_rate": 8.551541192775813e-06, + "loss": 40.4598, + "step": 161850 + }, + { + "epoch": 0.32696744062024025, + "grad_norm": 127.9832534790039, + "learning_rate": 8.551295479893347e-06, + "loss": 22.3926, + "step": 161860 + }, + { + "epoch": 0.32698764125292407, + "grad_norm": 329.7506103515625, + "learning_rate": 8.551049749702298e-06, + "loss": 25.5638, + "step": 161870 + }, + { + "epoch": 0.32700784188560783, + "grad_norm": 387.6360168457031, + "learning_rate": 8.550804002203862e-06, + "loss": 16.2738, + "step": 161880 + }, + { + "epoch": 0.32702804251829165, + "grad_norm": 289.0902404785156, + "learning_rate": 8.550558237399238e-06, + "loss": 15.9065, + "step": 161890 + }, + { + "epoch": 0.32704824315097547, + "grad_norm": 365.9407958984375, + "learning_rate": 8.550312455289624e-06, + "loss": 19.6126, + "step": 161900 + }, + { + "epoch": 0.3270684437836593, + "grad_norm": 0.0, + "learning_rate": 8.550066655876219e-06, + "loss": 21.1061, + "step": 161910 + }, + { + "epoch": 0.3270886444163431, + "grad_norm": 775.4854125976562, + "learning_rate": 8.549820839160217e-06, + "loss": 20.7549, + "step": 161920 + }, + { + "epoch": 0.32710884504902693, + "grad_norm": 369.57037353515625, + "learning_rate": 8.54957500514282e-06, + "loss": 25.828, + "step": 161930 + }, + { + "epoch": 0.32712904568171075, + "grad_norm": 518.6697387695312, + "learning_rate": 8.549329153825226e-06, + "loss": 15.5915, + "step": 161940 + }, + { + "epoch": 0.32714924631439457, + "grad_norm": 683.3680419921875, + "learning_rate": 8.549083285208632e-06, + "loss": 18.4541, + "step": 161950 + }, + { + "epoch": 0.3271694469470784, + "grad_norm": 341.146240234375, + "learning_rate": 8.548837399294235e-06, + "loss": 45.243, + "step": 161960 + }, + { + "epoch": 0.3271896475797622, + "grad_norm": 262.21697998046875, + "learning_rate": 8.548591496083236e-06, + "loss": 27.1111, + "step": 161970 + }, + { + "epoch": 0.32720984821244603, + "grad_norm": 400.15399169921875, + "learning_rate": 8.548345575576832e-06, + "loss": 33.4267, + "step": 161980 + }, + { + "epoch": 0.32723004884512985, + "grad_norm": 596.1068725585938, + "learning_rate": 8.548099637776222e-06, + "loss": 16.6739, + "step": 161990 + }, + { + "epoch": 0.32725024947781367, + "grad_norm": 304.2925720214844, + "learning_rate": 8.547853682682605e-06, + "loss": 11.4405, + "step": 162000 + }, + { + "epoch": 0.32727045011049744, + "grad_norm": 210.50241088867188, + "learning_rate": 8.54760771029718e-06, + "loss": 15.9007, + "step": 162010 + }, + { + "epoch": 0.32729065074318126, + "grad_norm": 563.9810791015625, + "learning_rate": 8.547361720621144e-06, + "loss": 25.1085, + "step": 162020 + }, + { + "epoch": 0.3273108513758651, + "grad_norm": 838.3320922851562, + "learning_rate": 8.547115713655698e-06, + "loss": 21.7204, + "step": 162030 + }, + { + "epoch": 0.3273310520085489, + "grad_norm": 238.96920776367188, + "learning_rate": 8.546869689402042e-06, + "loss": 16.9194, + "step": 162040 + }, + { + "epoch": 0.3273512526412327, + "grad_norm": 187.7772979736328, + "learning_rate": 8.54662364786137e-06, + "loss": 17.0501, + "step": 162050 + }, + { + "epoch": 0.32737145327391654, + "grad_norm": 10.696183204650879, + "learning_rate": 8.546377589034886e-06, + "loss": 28.5379, + "step": 162060 + }, + { + "epoch": 0.32739165390660036, + "grad_norm": 600.3301391601562, + "learning_rate": 8.546131512923787e-06, + "loss": 33.8055, + "step": 162070 + }, + { + "epoch": 0.3274118545392842, + "grad_norm": 326.2858581542969, + "learning_rate": 8.545885419529276e-06, + "loss": 21.1821, + "step": 162080 + }, + { + "epoch": 0.327432055171968, + "grad_norm": 270.0640869140625, + "learning_rate": 8.545639308852546e-06, + "loss": 14.3184, + "step": 162090 + }, + { + "epoch": 0.3274522558046518, + "grad_norm": 376.1778259277344, + "learning_rate": 8.545393180894801e-06, + "loss": 24.4655, + "step": 162100 + }, + { + "epoch": 0.32747245643733564, + "grad_norm": 485.3133239746094, + "learning_rate": 8.54514703565724e-06, + "loss": 19.5701, + "step": 162110 + }, + { + "epoch": 0.32749265707001946, + "grad_norm": 410.0458068847656, + "learning_rate": 8.544900873141063e-06, + "loss": 10.9461, + "step": 162120 + }, + { + "epoch": 0.3275128577027033, + "grad_norm": 121.17286682128906, + "learning_rate": 8.54465469334747e-06, + "loss": 24.1511, + "step": 162130 + }, + { + "epoch": 0.32753305833538704, + "grad_norm": 688.821044921875, + "learning_rate": 8.544408496277657e-06, + "loss": 16.0761, + "step": 162140 + }, + { + "epoch": 0.32755325896807086, + "grad_norm": 969.4783325195312, + "learning_rate": 8.544162281932829e-06, + "loss": 37.9854, + "step": 162150 + }, + { + "epoch": 0.3275734596007547, + "grad_norm": 184.92868041992188, + "learning_rate": 8.543916050314182e-06, + "loss": 21.9242, + "step": 162160 + }, + { + "epoch": 0.3275936602334385, + "grad_norm": 447.2829895019531, + "learning_rate": 8.54366980142292e-06, + "loss": 11.3009, + "step": 162170 + }, + { + "epoch": 0.3276138608661223, + "grad_norm": 169.4679412841797, + "learning_rate": 8.54342353526024e-06, + "loss": 17.2119, + "step": 162180 + }, + { + "epoch": 0.32763406149880614, + "grad_norm": 312.2886047363281, + "learning_rate": 8.543177251827344e-06, + "loss": 21.3861, + "step": 162190 + }, + { + "epoch": 0.32765426213148996, + "grad_norm": 499.69000244140625, + "learning_rate": 8.542930951125432e-06, + "loss": 18.3689, + "step": 162200 + }, + { + "epoch": 0.3276744627641738, + "grad_norm": 365.1938171386719, + "learning_rate": 8.542684633155703e-06, + "loss": 15.5692, + "step": 162210 + }, + { + "epoch": 0.3276946633968576, + "grad_norm": 348.21636962890625, + "learning_rate": 8.54243829791936e-06, + "loss": 26.9492, + "step": 162220 + }, + { + "epoch": 0.3277148640295414, + "grad_norm": 347.29559326171875, + "learning_rate": 8.5421919454176e-06, + "loss": 17.5994, + "step": 162230 + }, + { + "epoch": 0.32773506466222524, + "grad_norm": 616.3199462890625, + "learning_rate": 8.54194557565163e-06, + "loss": 11.9137, + "step": 162240 + }, + { + "epoch": 0.32775526529490906, + "grad_norm": 262.7330627441406, + "learning_rate": 8.541699188622645e-06, + "loss": 15.68, + "step": 162250 + }, + { + "epoch": 0.3277754659275929, + "grad_norm": 494.26934814453125, + "learning_rate": 8.541452784331848e-06, + "loss": 28.6482, + "step": 162260 + }, + { + "epoch": 0.32779566656027664, + "grad_norm": 365.5330505371094, + "learning_rate": 8.541206362780439e-06, + "loss": 39.957, + "step": 162270 + }, + { + "epoch": 0.32781586719296046, + "grad_norm": 423.7875671386719, + "learning_rate": 8.54095992396962e-06, + "loss": 16.2074, + "step": 162280 + }, + { + "epoch": 0.3278360678256443, + "grad_norm": 551.8334350585938, + "learning_rate": 8.540713467900592e-06, + "loss": 21.7853, + "step": 162290 + }, + { + "epoch": 0.3278562684583281, + "grad_norm": 70.32467651367188, + "learning_rate": 8.540466994574556e-06, + "loss": 27.0091, + "step": 162300 + }, + { + "epoch": 0.3278764690910119, + "grad_norm": 479.802978515625, + "learning_rate": 8.540220503992713e-06, + "loss": 28.0744, + "step": 162310 + }, + { + "epoch": 0.32789666972369574, + "grad_norm": 258.39971923828125, + "learning_rate": 8.539973996156265e-06, + "loss": 15.1333, + "step": 162320 + }, + { + "epoch": 0.32791687035637956, + "grad_norm": 546.7047729492188, + "learning_rate": 8.539727471066412e-06, + "loss": 24.53, + "step": 162330 + }, + { + "epoch": 0.3279370709890634, + "grad_norm": 489.63507080078125, + "learning_rate": 8.539480928724358e-06, + "loss": 19.637, + "step": 162340 + }, + { + "epoch": 0.3279572716217472, + "grad_norm": 347.3206481933594, + "learning_rate": 8.539234369131301e-06, + "loss": 22.4533, + "step": 162350 + }, + { + "epoch": 0.327977472254431, + "grad_norm": 297.5228271484375, + "learning_rate": 8.538987792288447e-06, + "loss": 16.6198, + "step": 162360 + }, + { + "epoch": 0.32799767288711484, + "grad_norm": 430.5821228027344, + "learning_rate": 8.538741198196996e-06, + "loss": 17.2786, + "step": 162370 + }, + { + "epoch": 0.32801787351979866, + "grad_norm": 663.9387817382812, + "learning_rate": 8.53849458685815e-06, + "loss": 19.893, + "step": 162380 + }, + { + "epoch": 0.32803807415248243, + "grad_norm": 546.9205932617188, + "learning_rate": 8.53824795827311e-06, + "loss": 23.6222, + "step": 162390 + }, + { + "epoch": 0.32805827478516625, + "grad_norm": 306.525146484375, + "learning_rate": 8.538001312443078e-06, + "loss": 20.6085, + "step": 162400 + }, + { + "epoch": 0.32807847541785007, + "grad_norm": 255.8943328857422, + "learning_rate": 8.537754649369256e-06, + "loss": 18.243, + "step": 162410 + }, + { + "epoch": 0.3280986760505339, + "grad_norm": 337.78399658203125, + "learning_rate": 8.537507969052848e-06, + "loss": 18.7949, + "step": 162420 + }, + { + "epoch": 0.3281188766832177, + "grad_norm": 106.52259063720703, + "learning_rate": 8.537261271495055e-06, + "loss": 12.198, + "step": 162430 + }, + { + "epoch": 0.32813907731590153, + "grad_norm": 191.0532684326172, + "learning_rate": 8.537014556697078e-06, + "loss": 17.4216, + "step": 162440 + }, + { + "epoch": 0.32815927794858535, + "grad_norm": 462.94671630859375, + "learning_rate": 8.536767824660124e-06, + "loss": 22.73, + "step": 162450 + }, + { + "epoch": 0.32817947858126917, + "grad_norm": 279.56524658203125, + "learning_rate": 8.536521075385391e-06, + "loss": 13.6735, + "step": 162460 + }, + { + "epoch": 0.328199679213953, + "grad_norm": 579.7990112304688, + "learning_rate": 8.536274308874083e-06, + "loss": 19.308, + "step": 162470 + }, + { + "epoch": 0.3282198798466368, + "grad_norm": 779.7994995117188, + "learning_rate": 8.536027525127405e-06, + "loss": 27.6808, + "step": 162480 + }, + { + "epoch": 0.32824008047932063, + "grad_norm": 558.7512817382812, + "learning_rate": 8.535780724146553e-06, + "loss": 20.9931, + "step": 162490 + }, + { + "epoch": 0.32826028111200445, + "grad_norm": 1386.6976318359375, + "learning_rate": 8.535533905932739e-06, + "loss": 45.5163, + "step": 162500 + }, + { + "epoch": 0.32828048174468827, + "grad_norm": 331.6851501464844, + "learning_rate": 8.53528707048716e-06, + "loss": 13.4701, + "step": 162510 + }, + { + "epoch": 0.32830068237737203, + "grad_norm": 233.7537078857422, + "learning_rate": 8.535040217811019e-06, + "loss": 15.4268, + "step": 162520 + }, + { + "epoch": 0.32832088301005585, + "grad_norm": 261.6727294921875, + "learning_rate": 8.534793347905523e-06, + "loss": 17.069, + "step": 162530 + }, + { + "epoch": 0.3283410836427397, + "grad_norm": 497.7231140136719, + "learning_rate": 8.534546460771873e-06, + "loss": 30.1122, + "step": 162540 + }, + { + "epoch": 0.3283612842754235, + "grad_norm": 270.703125, + "learning_rate": 8.534299556411272e-06, + "loss": 17.4311, + "step": 162550 + }, + { + "epoch": 0.3283814849081073, + "grad_norm": 343.0119323730469, + "learning_rate": 8.534052634824923e-06, + "loss": 20.4247, + "step": 162560 + }, + { + "epoch": 0.32840168554079113, + "grad_norm": 327.3485107421875, + "learning_rate": 8.53380569601403e-06, + "loss": 12.5775, + "step": 162570 + }, + { + "epoch": 0.32842188617347495, + "grad_norm": 22.53675079345703, + "learning_rate": 8.533558739979796e-06, + "loss": 17.3434, + "step": 162580 + }, + { + "epoch": 0.3284420868061588, + "grad_norm": 609.8348999023438, + "learning_rate": 8.533311766723428e-06, + "loss": 38.1542, + "step": 162590 + }, + { + "epoch": 0.3284622874388426, + "grad_norm": 569.4354858398438, + "learning_rate": 8.533064776246126e-06, + "loss": 14.9503, + "step": 162600 + }, + { + "epoch": 0.3284824880715264, + "grad_norm": 344.6394958496094, + "learning_rate": 8.532817768549092e-06, + "loss": 17.9084, + "step": 162610 + }, + { + "epoch": 0.32850268870421023, + "grad_norm": 312.6901550292969, + "learning_rate": 8.532570743633535e-06, + "loss": 44.5818, + "step": 162620 + }, + { + "epoch": 0.32852288933689405, + "grad_norm": 134.35845947265625, + "learning_rate": 8.532323701500657e-06, + "loss": 13.8301, + "step": 162630 + }, + { + "epoch": 0.3285430899695779, + "grad_norm": 392.0881042480469, + "learning_rate": 8.532076642151661e-06, + "loss": 25.9964, + "step": 162640 + }, + { + "epoch": 0.32856329060226164, + "grad_norm": 316.1405944824219, + "learning_rate": 8.531829565587751e-06, + "loss": 20.5279, + "step": 162650 + }, + { + "epoch": 0.32858349123494546, + "grad_norm": 292.3106384277344, + "learning_rate": 8.531582471810134e-06, + "loss": 22.3062, + "step": 162660 + }, + { + "epoch": 0.3286036918676293, + "grad_norm": 419.2328186035156, + "learning_rate": 8.53133536082001e-06, + "loss": 22.9919, + "step": 162670 + }, + { + "epoch": 0.3286238925003131, + "grad_norm": 467.255615234375, + "learning_rate": 8.531088232618587e-06, + "loss": 31.9226, + "step": 162680 + }, + { + "epoch": 0.3286440931329969, + "grad_norm": 180.1269989013672, + "learning_rate": 8.530841087207068e-06, + "loss": 9.9781, + "step": 162690 + }, + { + "epoch": 0.32866429376568074, + "grad_norm": 123.73636627197266, + "learning_rate": 8.530593924586659e-06, + "loss": 25.4351, + "step": 162700 + }, + { + "epoch": 0.32868449439836456, + "grad_norm": 342.6778259277344, + "learning_rate": 8.530346744758562e-06, + "loss": 26.805, + "step": 162710 + }, + { + "epoch": 0.3287046950310484, + "grad_norm": 573.797607421875, + "learning_rate": 8.530099547723983e-06, + "loss": 30.3572, + "step": 162720 + }, + { + "epoch": 0.3287248956637322, + "grad_norm": 829.430908203125, + "learning_rate": 8.529852333484129e-06, + "loss": 20.8336, + "step": 162730 + }, + { + "epoch": 0.328745096296416, + "grad_norm": 129.11514282226562, + "learning_rate": 8.5296051020402e-06, + "loss": 20.287, + "step": 162740 + }, + { + "epoch": 0.32876529692909984, + "grad_norm": 403.94183349609375, + "learning_rate": 8.529357853393406e-06, + "loss": 22.2563, + "step": 162750 + }, + { + "epoch": 0.32878549756178366, + "grad_norm": 205.53599548339844, + "learning_rate": 8.52911058754495e-06, + "loss": 18.6037, + "step": 162760 + }, + { + "epoch": 0.3288056981944675, + "grad_norm": 288.7787170410156, + "learning_rate": 8.528863304496035e-06, + "loss": 22.3115, + "step": 162770 + }, + { + "epoch": 0.32882589882715124, + "grad_norm": 734.8102416992188, + "learning_rate": 8.528616004247869e-06, + "loss": 18.1356, + "step": 162780 + }, + { + "epoch": 0.32884609945983506, + "grad_norm": 598.1242065429688, + "learning_rate": 8.528368686801656e-06, + "loss": 25.302, + "step": 162790 + }, + { + "epoch": 0.3288663000925189, + "grad_norm": 713.0995483398438, + "learning_rate": 8.528121352158604e-06, + "loss": 10.998, + "step": 162800 + }, + { + "epoch": 0.3288865007252027, + "grad_norm": 232.38246154785156, + "learning_rate": 8.527874000319915e-06, + "loss": 13.865, + "step": 162810 + }, + { + "epoch": 0.3289067013578865, + "grad_norm": 468.7928771972656, + "learning_rate": 8.527626631286797e-06, + "loss": 30.414, + "step": 162820 + }, + { + "epoch": 0.32892690199057034, + "grad_norm": 212.81771850585938, + "learning_rate": 8.527379245060453e-06, + "loss": 15.9356, + "step": 162830 + }, + { + "epoch": 0.32894710262325416, + "grad_norm": 252.7122039794922, + "learning_rate": 8.527131841642092e-06, + "loss": 20.9118, + "step": 162840 + }, + { + "epoch": 0.328967303255938, + "grad_norm": 127.44690704345703, + "learning_rate": 8.526884421032916e-06, + "loss": 34.5894, + "step": 162850 + }, + { + "epoch": 0.3289875038886218, + "grad_norm": 426.28131103515625, + "learning_rate": 8.526636983234135e-06, + "loss": 14.298, + "step": 162860 + }, + { + "epoch": 0.3290077045213056, + "grad_norm": 450.6679992675781, + "learning_rate": 8.526389528246955e-06, + "loss": 17.171, + "step": 162870 + }, + { + "epoch": 0.32902790515398944, + "grad_norm": 486.2570495605469, + "learning_rate": 8.526142056072578e-06, + "loss": 26.1525, + "step": 162880 + }, + { + "epoch": 0.32904810578667326, + "grad_norm": 314.75201416015625, + "learning_rate": 8.525894566712212e-06, + "loss": 24.6402, + "step": 162890 + }, + { + "epoch": 0.3290683064193571, + "grad_norm": 250.26100158691406, + "learning_rate": 8.525647060167063e-06, + "loss": 17.4215, + "step": 162900 + }, + { + "epoch": 0.32908850705204085, + "grad_norm": 205.49232482910156, + "learning_rate": 8.52539953643834e-06, + "loss": 33.0014, + "step": 162910 + }, + { + "epoch": 0.32910870768472467, + "grad_norm": 744.604248046875, + "learning_rate": 8.525151995527244e-06, + "loss": 19.6141, + "step": 162920 + }, + { + "epoch": 0.3291289083174085, + "grad_norm": 805.7160034179688, + "learning_rate": 8.524904437434986e-06, + "loss": 34.4444, + "step": 162930 + }, + { + "epoch": 0.3291491089500923, + "grad_norm": 334.0453186035156, + "learning_rate": 8.524656862162773e-06, + "loss": 22.3157, + "step": 162940 + }, + { + "epoch": 0.3291693095827761, + "grad_norm": 1248.5230712890625, + "learning_rate": 8.524409269711808e-06, + "loss": 12.5765, + "step": 162950 + }, + { + "epoch": 0.32918951021545995, + "grad_norm": 172.95550537109375, + "learning_rate": 8.524161660083301e-06, + "loss": 16.6586, + "step": 162960 + }, + { + "epoch": 0.32920971084814377, + "grad_norm": 515.2111206054688, + "learning_rate": 8.523914033278456e-06, + "loss": 22.1315, + "step": 162970 + }, + { + "epoch": 0.3292299114808276, + "grad_norm": 115.48587799072266, + "learning_rate": 8.523666389298484e-06, + "loss": 26.5629, + "step": 162980 + }, + { + "epoch": 0.3292501121135114, + "grad_norm": 632.9249267578125, + "learning_rate": 8.523418728144585e-06, + "loss": 25.6933, + "step": 162990 + }, + { + "epoch": 0.3292703127461952, + "grad_norm": 751.2723388671875, + "learning_rate": 8.523171049817974e-06, + "loss": 20.6223, + "step": 163000 + }, + { + "epoch": 0.32929051337887905, + "grad_norm": 295.5430603027344, + "learning_rate": 8.522923354319854e-06, + "loss": 25.3509, + "step": 163010 + }, + { + "epoch": 0.32931071401156287, + "grad_norm": 273.0199279785156, + "learning_rate": 8.522675641651432e-06, + "loss": 16.5398, + "step": 163020 + }, + { + "epoch": 0.32933091464424663, + "grad_norm": 607.393310546875, + "learning_rate": 8.522427911813917e-06, + "loss": 20.1377, + "step": 163030 + }, + { + "epoch": 0.32935111527693045, + "grad_norm": 541.272705078125, + "learning_rate": 8.522180164808515e-06, + "loss": 33.9776, + "step": 163040 + }, + { + "epoch": 0.32937131590961427, + "grad_norm": 213.0469970703125, + "learning_rate": 8.521932400636435e-06, + "loss": 23.3719, + "step": 163050 + }, + { + "epoch": 0.3293915165422981, + "grad_norm": 216.6061248779297, + "learning_rate": 8.521684619298883e-06, + "loss": 17.6476, + "step": 163060 + }, + { + "epoch": 0.3294117171749819, + "grad_norm": 109.58318328857422, + "learning_rate": 8.521436820797067e-06, + "loss": 14.3388, + "step": 163070 + }, + { + "epoch": 0.32943191780766573, + "grad_norm": 695.0380249023438, + "learning_rate": 8.521189005132195e-06, + "loss": 13.6592, + "step": 163080 + }, + { + "epoch": 0.32945211844034955, + "grad_norm": 840.1543579101562, + "learning_rate": 8.520941172305477e-06, + "loss": 18.1814, + "step": 163090 + }, + { + "epoch": 0.32947231907303337, + "grad_norm": 489.0300598144531, + "learning_rate": 8.520693322318116e-06, + "loss": 30.5703, + "step": 163100 + }, + { + "epoch": 0.3294925197057172, + "grad_norm": 275.0984191894531, + "learning_rate": 8.520445455171325e-06, + "loss": 23.3886, + "step": 163110 + }, + { + "epoch": 0.329512720338401, + "grad_norm": 702.0556640625, + "learning_rate": 8.520197570866307e-06, + "loss": 18.539, + "step": 163120 + }, + { + "epoch": 0.32953292097108483, + "grad_norm": 23.86518096923828, + "learning_rate": 8.519949669404275e-06, + "loss": 24.436, + "step": 163130 + }, + { + "epoch": 0.32955312160376865, + "grad_norm": 860.4483642578125, + "learning_rate": 8.519701750786435e-06, + "loss": 24.5196, + "step": 163140 + }, + { + "epoch": 0.32957332223645247, + "grad_norm": 294.7730407714844, + "learning_rate": 8.519453815013996e-06, + "loss": 20.5409, + "step": 163150 + }, + { + "epoch": 0.32959352286913624, + "grad_norm": 623.9278564453125, + "learning_rate": 8.519205862088165e-06, + "loss": 22.677, + "step": 163160 + }, + { + "epoch": 0.32961372350182006, + "grad_norm": 552.2848510742188, + "learning_rate": 8.518957892010151e-06, + "loss": 18.9434, + "step": 163170 + }, + { + "epoch": 0.3296339241345039, + "grad_norm": 199.6901092529297, + "learning_rate": 8.518709904781163e-06, + "loss": 24.925, + "step": 163180 + }, + { + "epoch": 0.3296541247671877, + "grad_norm": 517.12060546875, + "learning_rate": 8.518461900402411e-06, + "loss": 20.4278, + "step": 163190 + }, + { + "epoch": 0.3296743253998715, + "grad_norm": 1314.028076171875, + "learning_rate": 8.518213878875103e-06, + "loss": 27.2679, + "step": 163200 + }, + { + "epoch": 0.32969452603255534, + "grad_norm": 183.17808532714844, + "learning_rate": 8.517965840200445e-06, + "loss": 36.9442, + "step": 163210 + }, + { + "epoch": 0.32971472666523916, + "grad_norm": 2.3865418434143066, + "learning_rate": 8.51771778437965e-06, + "loss": 18.969, + "step": 163220 + }, + { + "epoch": 0.329734927297923, + "grad_norm": 154.100341796875, + "learning_rate": 8.517469711413924e-06, + "loss": 17.4932, + "step": 163230 + }, + { + "epoch": 0.3297551279306068, + "grad_norm": 197.871826171875, + "learning_rate": 8.517221621304479e-06, + "loss": 13.5748, + "step": 163240 + }, + { + "epoch": 0.3297753285632906, + "grad_norm": 14.94884967803955, + "learning_rate": 8.51697351405252e-06, + "loss": 19.712, + "step": 163250 + }, + { + "epoch": 0.32979552919597444, + "grad_norm": 412.6863098144531, + "learning_rate": 8.51672538965926e-06, + "loss": 22.0908, + "step": 163260 + }, + { + "epoch": 0.32981572982865826, + "grad_norm": 441.4495849609375, + "learning_rate": 8.516477248125907e-06, + "loss": 23.1763, + "step": 163270 + }, + { + "epoch": 0.3298359304613421, + "grad_norm": 216.23782348632812, + "learning_rate": 8.51622908945367e-06, + "loss": 21.2779, + "step": 163280 + }, + { + "epoch": 0.32985613109402584, + "grad_norm": 357.8020935058594, + "learning_rate": 8.515980913643759e-06, + "loss": 19.6541, + "step": 163290 + }, + { + "epoch": 0.32987633172670966, + "grad_norm": 312.1048889160156, + "learning_rate": 8.515732720697383e-06, + "loss": 23.176, + "step": 163300 + }, + { + "epoch": 0.3298965323593935, + "grad_norm": 408.1529541015625, + "learning_rate": 8.515484510615753e-06, + "loss": 20.2293, + "step": 163310 + }, + { + "epoch": 0.3299167329920773, + "grad_norm": 10.482206344604492, + "learning_rate": 8.515236283400078e-06, + "loss": 19.5709, + "step": 163320 + }, + { + "epoch": 0.3299369336247611, + "grad_norm": 490.29095458984375, + "learning_rate": 8.514988039051567e-06, + "loss": 17.5755, + "step": 163330 + }, + { + "epoch": 0.32995713425744494, + "grad_norm": 792.8616943359375, + "learning_rate": 8.514739777571431e-06, + "loss": 25.364, + "step": 163340 + }, + { + "epoch": 0.32997733489012876, + "grad_norm": 519.2301635742188, + "learning_rate": 8.51449149896088e-06, + "loss": 30.0556, + "step": 163350 + }, + { + "epoch": 0.3299975355228126, + "grad_norm": 14.800172805786133, + "learning_rate": 8.514243203221124e-06, + "loss": 32.3125, + "step": 163360 + }, + { + "epoch": 0.3300177361554964, + "grad_norm": 486.00164794921875, + "learning_rate": 8.51399489035337e-06, + "loss": 29.4682, + "step": 163370 + }, + { + "epoch": 0.3300379367881802, + "grad_norm": 387.1175537109375, + "learning_rate": 8.513746560358833e-06, + "loss": 24.6668, + "step": 163380 + }, + { + "epoch": 0.33005813742086404, + "grad_norm": 583.5296630859375, + "learning_rate": 8.513498213238722e-06, + "loss": 24.8786, + "step": 163390 + }, + { + "epoch": 0.33007833805354786, + "grad_norm": 1140.5289306640625, + "learning_rate": 8.513249848994248e-06, + "loss": 33.8562, + "step": 163400 + }, + { + "epoch": 0.3300985386862317, + "grad_norm": 535.426513671875, + "learning_rate": 8.513001467626618e-06, + "loss": 33.8145, + "step": 163410 + }, + { + "epoch": 0.33011873931891544, + "grad_norm": 613.3733520507812, + "learning_rate": 8.512753069137046e-06, + "loss": 33.113, + "step": 163420 + }, + { + "epoch": 0.33013893995159926, + "grad_norm": 167.0186309814453, + "learning_rate": 8.51250465352674e-06, + "loss": 12.3463, + "step": 163430 + }, + { + "epoch": 0.3301591405842831, + "grad_norm": 349.2598876953125, + "learning_rate": 8.512256220796915e-06, + "loss": 31.2951, + "step": 163440 + }, + { + "epoch": 0.3301793412169669, + "grad_norm": 495.67828369140625, + "learning_rate": 8.512007770948775e-06, + "loss": 24.4958, + "step": 163450 + }, + { + "epoch": 0.3301995418496507, + "grad_norm": 21.19040298461914, + "learning_rate": 8.51175930398354e-06, + "loss": 24.4859, + "step": 163460 + }, + { + "epoch": 0.33021974248233454, + "grad_norm": 748.869140625, + "learning_rate": 8.511510819902413e-06, + "loss": 38.5311, + "step": 163470 + }, + { + "epoch": 0.33023994311501836, + "grad_norm": 410.0790710449219, + "learning_rate": 8.51126231870661e-06, + "loss": 15.1447, + "step": 163480 + }, + { + "epoch": 0.3302601437477022, + "grad_norm": 975.2960815429688, + "learning_rate": 8.511013800397338e-06, + "loss": 18.7852, + "step": 163490 + }, + { + "epoch": 0.330280344380386, + "grad_norm": 518.7819213867188, + "learning_rate": 8.510765264975813e-06, + "loss": 28.029, + "step": 163500 + }, + { + "epoch": 0.3303005450130698, + "grad_norm": 546.6959228515625, + "learning_rate": 8.510516712443244e-06, + "loss": 13.1513, + "step": 163510 + }, + { + "epoch": 0.33032074564575364, + "grad_norm": 664.11376953125, + "learning_rate": 8.51026814280084e-06, + "loss": 31.3736, + "step": 163520 + }, + { + "epoch": 0.33034094627843746, + "grad_norm": 534.3417358398438, + "learning_rate": 8.510019556049815e-06, + "loss": 24.8484, + "step": 163530 + }, + { + "epoch": 0.3303611469111213, + "grad_norm": 530.6990356445312, + "learning_rate": 8.509770952191384e-06, + "loss": 23.2029, + "step": 163540 + }, + { + "epoch": 0.33038134754380505, + "grad_norm": 471.0072937011719, + "learning_rate": 8.509522331226751e-06, + "loss": 49.6587, + "step": 163550 + }, + { + "epoch": 0.33040154817648887, + "grad_norm": 452.3913269042969, + "learning_rate": 8.509273693157133e-06, + "loss": 22.6263, + "step": 163560 + }, + { + "epoch": 0.3304217488091727, + "grad_norm": 331.334716796875, + "learning_rate": 8.509025037983742e-06, + "loss": 25.4802, + "step": 163570 + }, + { + "epoch": 0.3304419494418565, + "grad_norm": 250.43438720703125, + "learning_rate": 8.508776365707788e-06, + "loss": 23.6728, + "step": 163580 + }, + { + "epoch": 0.33046215007454033, + "grad_norm": 1079.7852783203125, + "learning_rate": 8.508527676330483e-06, + "loss": 32.216, + "step": 163590 + }, + { + "epoch": 0.33048235070722415, + "grad_norm": 1053.2109375, + "learning_rate": 8.508278969853037e-06, + "loss": 27.0024, + "step": 163600 + }, + { + "epoch": 0.33050255133990797, + "grad_norm": 162.0265655517578, + "learning_rate": 8.508030246276668e-06, + "loss": 22.319, + "step": 163610 + }, + { + "epoch": 0.3305227519725918, + "grad_norm": 8.137332916259766, + "learning_rate": 8.507781505602585e-06, + "loss": 24.311, + "step": 163620 + }, + { + "epoch": 0.3305429526052756, + "grad_norm": 190.8646697998047, + "learning_rate": 8.507532747832e-06, + "loss": 27.7262, + "step": 163630 + }, + { + "epoch": 0.33056315323795943, + "grad_norm": 118.73406219482422, + "learning_rate": 8.507283972966126e-06, + "loss": 17.7927, + "step": 163640 + }, + { + "epoch": 0.33058335387064325, + "grad_norm": 303.9774169921875, + "learning_rate": 8.507035181006175e-06, + "loss": 21.1845, + "step": 163650 + }, + { + "epoch": 0.33060355450332707, + "grad_norm": 280.1281433105469, + "learning_rate": 8.50678637195336e-06, + "loss": 30.4494, + "step": 163660 + }, + { + "epoch": 0.33062375513601083, + "grad_norm": 925.5298461914062, + "learning_rate": 8.506537545808894e-06, + "loss": 30.8261, + "step": 163670 + }, + { + "epoch": 0.33064395576869465, + "grad_norm": 0.0, + "learning_rate": 8.506288702573988e-06, + "loss": 22.4596, + "step": 163680 + }, + { + "epoch": 0.3306641564013785, + "grad_norm": 592.17578125, + "learning_rate": 8.506039842249855e-06, + "loss": 19.7413, + "step": 163690 + }, + { + "epoch": 0.3306843570340623, + "grad_norm": 941.7138671875, + "learning_rate": 8.505790964837712e-06, + "loss": 23.3322, + "step": 163700 + }, + { + "epoch": 0.3307045576667461, + "grad_norm": 495.9679260253906, + "learning_rate": 8.505542070338768e-06, + "loss": 27.4565, + "step": 163710 + }, + { + "epoch": 0.33072475829942993, + "grad_norm": 507.7092590332031, + "learning_rate": 8.505293158754238e-06, + "loss": 22.9482, + "step": 163720 + }, + { + "epoch": 0.33074495893211375, + "grad_norm": 1086.2271728515625, + "learning_rate": 8.505044230085332e-06, + "loss": 23.6799, + "step": 163730 + }, + { + "epoch": 0.3307651595647976, + "grad_norm": 212.30029296875, + "learning_rate": 8.504795284333267e-06, + "loss": 25.285, + "step": 163740 + }, + { + "epoch": 0.3307853601974814, + "grad_norm": 239.4369659423828, + "learning_rate": 8.504546321499255e-06, + "loss": 35.6713, + "step": 163750 + }, + { + "epoch": 0.3308055608301652, + "grad_norm": 417.925537109375, + "learning_rate": 8.504297341584509e-06, + "loss": 22.9194, + "step": 163760 + }, + { + "epoch": 0.33082576146284903, + "grad_norm": 309.2572021484375, + "learning_rate": 8.504048344590243e-06, + "loss": 12.4214, + "step": 163770 + }, + { + "epoch": 0.33084596209553285, + "grad_norm": 400.84893798828125, + "learning_rate": 8.50379933051767e-06, + "loss": 7.8366, + "step": 163780 + }, + { + "epoch": 0.3308661627282167, + "grad_norm": 82.74517059326172, + "learning_rate": 8.503550299368004e-06, + "loss": 24.8063, + "step": 163790 + }, + { + "epoch": 0.33088636336090044, + "grad_norm": 271.88836669921875, + "learning_rate": 8.50330125114246e-06, + "loss": 23.0029, + "step": 163800 + }, + { + "epoch": 0.33090656399358426, + "grad_norm": 677.6578979492188, + "learning_rate": 8.50305218584225e-06, + "loss": 39.7992, + "step": 163810 + }, + { + "epoch": 0.3309267646262681, + "grad_norm": 176.89434814453125, + "learning_rate": 8.502803103468587e-06, + "loss": 14.4343, + "step": 163820 + }, + { + "epoch": 0.3309469652589519, + "grad_norm": 1424.477783203125, + "learning_rate": 8.502554004022688e-06, + "loss": 27.7567, + "step": 163830 + }, + { + "epoch": 0.3309671658916357, + "grad_norm": 343.5373229980469, + "learning_rate": 8.502304887505765e-06, + "loss": 14.9989, + "step": 163840 + }, + { + "epoch": 0.33098736652431954, + "grad_norm": 503.44097900390625, + "learning_rate": 8.502055753919033e-06, + "loss": 22.4749, + "step": 163850 + }, + { + "epoch": 0.33100756715700336, + "grad_norm": 376.5154113769531, + "learning_rate": 8.501806603263706e-06, + "loss": 21.681, + "step": 163860 + }, + { + "epoch": 0.3310277677896872, + "grad_norm": 532.5528564453125, + "learning_rate": 8.501557435540996e-06, + "loss": 23.0162, + "step": 163870 + }, + { + "epoch": 0.331047968422371, + "grad_norm": 733.7446899414062, + "learning_rate": 8.501308250752123e-06, + "loss": 34.5138, + "step": 163880 + }, + { + "epoch": 0.3310681690550548, + "grad_norm": 153.69967651367188, + "learning_rate": 8.501059048898297e-06, + "loss": 16.3836, + "step": 163890 + }, + { + "epoch": 0.33108836968773864, + "grad_norm": 593.3562622070312, + "learning_rate": 8.500809829980734e-06, + "loss": 16.1941, + "step": 163900 + }, + { + "epoch": 0.33110857032042246, + "grad_norm": 42.10019302368164, + "learning_rate": 8.50056059400065e-06, + "loss": 23.1775, + "step": 163910 + }, + { + "epoch": 0.3311287709531063, + "grad_norm": 150.84754943847656, + "learning_rate": 8.500311340959256e-06, + "loss": 23.459, + "step": 163920 + }, + { + "epoch": 0.33114897158579004, + "grad_norm": 130.12181091308594, + "learning_rate": 8.500062070857772e-06, + "loss": 19.3963, + "step": 163930 + }, + { + "epoch": 0.33116917221847386, + "grad_norm": 248.83938598632812, + "learning_rate": 8.499812783697406e-06, + "loss": 20.3013, + "step": 163940 + }, + { + "epoch": 0.3311893728511577, + "grad_norm": 205.4557647705078, + "learning_rate": 8.499563479479378e-06, + "loss": 30.7546, + "step": 163950 + }, + { + "epoch": 0.3312095734838415, + "grad_norm": 629.36083984375, + "learning_rate": 8.499314158204904e-06, + "loss": 28.0496, + "step": 163960 + }, + { + "epoch": 0.3312297741165253, + "grad_norm": 510.4541320800781, + "learning_rate": 8.499064819875195e-06, + "loss": 18.2752, + "step": 163970 + }, + { + "epoch": 0.33124997474920914, + "grad_norm": 407.316650390625, + "learning_rate": 8.49881546449147e-06, + "loss": 15.6284, + "step": 163980 + }, + { + "epoch": 0.33127017538189296, + "grad_norm": 278.81414794921875, + "learning_rate": 8.498566092054943e-06, + "loss": 16.8428, + "step": 163990 + }, + { + "epoch": 0.3312903760145768, + "grad_norm": 581.9839477539062, + "learning_rate": 8.498316702566828e-06, + "loss": 30.8934, + "step": 164000 + }, + { + "epoch": 0.3313105766472606, + "grad_norm": 669.310791015625, + "learning_rate": 8.498067296028343e-06, + "loss": 28.5954, + "step": 164010 + }, + { + "epoch": 0.3313307772799444, + "grad_norm": 514.7133178710938, + "learning_rate": 8.497817872440702e-06, + "loss": 19.1162, + "step": 164020 + }, + { + "epoch": 0.33135097791262824, + "grad_norm": 233.59666442871094, + "learning_rate": 8.497568431805118e-06, + "loss": 7.1543, + "step": 164030 + }, + { + "epoch": 0.33137117854531206, + "grad_norm": 208.81288146972656, + "learning_rate": 8.497318974122813e-06, + "loss": 13.3095, + "step": 164040 + }, + { + "epoch": 0.3313913791779959, + "grad_norm": 480.57177734375, + "learning_rate": 8.497069499394998e-06, + "loss": 19.3028, + "step": 164050 + }, + { + "epoch": 0.33141157981067965, + "grad_norm": 521.1884155273438, + "learning_rate": 8.496820007622891e-06, + "loss": 16.78, + "step": 164060 + }, + { + "epoch": 0.33143178044336347, + "grad_norm": 520.5380859375, + "learning_rate": 8.496570498807708e-06, + "loss": 24.3755, + "step": 164070 + }, + { + "epoch": 0.3314519810760473, + "grad_norm": 447.192138671875, + "learning_rate": 8.496320972950663e-06, + "loss": 19.0259, + "step": 164080 + }, + { + "epoch": 0.3314721817087311, + "grad_norm": 283.7176818847656, + "learning_rate": 8.496071430052975e-06, + "loss": 21.2698, + "step": 164090 + }, + { + "epoch": 0.3314923823414149, + "grad_norm": 1076.1070556640625, + "learning_rate": 8.495821870115857e-06, + "loss": 31.0239, + "step": 164100 + }, + { + "epoch": 0.33151258297409875, + "grad_norm": 496.1323547363281, + "learning_rate": 8.49557229314053e-06, + "loss": 20.0498, + "step": 164110 + }, + { + "epoch": 0.33153278360678257, + "grad_norm": 455.780517578125, + "learning_rate": 8.495322699128206e-06, + "loss": 19.5465, + "step": 164120 + }, + { + "epoch": 0.3315529842394664, + "grad_norm": 568.5023803710938, + "learning_rate": 8.495073088080102e-06, + "loss": 20.3758, + "step": 164130 + }, + { + "epoch": 0.3315731848721502, + "grad_norm": 200.53025817871094, + "learning_rate": 8.494823459997437e-06, + "loss": 25.7012, + "step": 164140 + }, + { + "epoch": 0.331593385504834, + "grad_norm": 264.4395446777344, + "learning_rate": 8.494573814881426e-06, + "loss": 18.4623, + "step": 164150 + }, + { + "epoch": 0.33161358613751785, + "grad_norm": 324.6714782714844, + "learning_rate": 8.494324152733286e-06, + "loss": 22.9645, + "step": 164160 + }, + { + "epoch": 0.33163378677020167, + "grad_norm": 193.10011291503906, + "learning_rate": 8.494074473554235e-06, + "loss": 18.749, + "step": 164170 + }, + { + "epoch": 0.33165398740288543, + "grad_norm": 260.4193420410156, + "learning_rate": 8.493824777345487e-06, + "loss": 20.2552, + "step": 164180 + }, + { + "epoch": 0.33167418803556925, + "grad_norm": 339.6500244140625, + "learning_rate": 8.493575064108262e-06, + "loss": 24.2752, + "step": 164190 + }, + { + "epoch": 0.33169438866825307, + "grad_norm": 147.78736877441406, + "learning_rate": 8.493325333843776e-06, + "loss": 15.3503, + "step": 164200 + }, + { + "epoch": 0.3317145893009369, + "grad_norm": 47.65892791748047, + "learning_rate": 8.493075586553245e-06, + "loss": 20.568, + "step": 164210 + }, + { + "epoch": 0.3317347899336207, + "grad_norm": 295.9309997558594, + "learning_rate": 8.492825822237888e-06, + "loss": 52.0664, + "step": 164220 + }, + { + "epoch": 0.33175499056630453, + "grad_norm": 492.0908508300781, + "learning_rate": 8.492576040898921e-06, + "loss": 16.3666, + "step": 164230 + }, + { + "epoch": 0.33177519119898835, + "grad_norm": 1002.5883178710938, + "learning_rate": 8.492326242537564e-06, + "loss": 17.0254, + "step": 164240 + }, + { + "epoch": 0.33179539183167217, + "grad_norm": 499.7936706542969, + "learning_rate": 8.492076427155031e-06, + "loss": 34.3492, + "step": 164250 + }, + { + "epoch": 0.331815592464356, + "grad_norm": 398.0194396972656, + "learning_rate": 8.49182659475254e-06, + "loss": 12.2671, + "step": 164260 + }, + { + "epoch": 0.3318357930970398, + "grad_norm": 602.7805786132812, + "learning_rate": 8.491576745331312e-06, + "loss": 22.4832, + "step": 164270 + }, + { + "epoch": 0.33185599372972363, + "grad_norm": 385.6957702636719, + "learning_rate": 8.49132687889256e-06, + "loss": 15.002, + "step": 164280 + }, + { + "epoch": 0.33187619436240745, + "grad_norm": 53.15830993652344, + "learning_rate": 8.491076995437504e-06, + "loss": 28.6947, + "step": 164290 + }, + { + "epoch": 0.33189639499509127, + "grad_norm": 110.26107788085938, + "learning_rate": 8.490827094967364e-06, + "loss": 20.2564, + "step": 164300 + }, + { + "epoch": 0.33191659562777504, + "grad_norm": 401.9669494628906, + "learning_rate": 8.490577177483357e-06, + "loss": 22.4905, + "step": 164310 + }, + { + "epoch": 0.33193679626045886, + "grad_norm": 373.6098327636719, + "learning_rate": 8.490327242986698e-06, + "loss": 19.2917, + "step": 164320 + }, + { + "epoch": 0.3319569968931427, + "grad_norm": 893.7239379882812, + "learning_rate": 8.490077291478607e-06, + "loss": 20.567, + "step": 164330 + }, + { + "epoch": 0.3319771975258265, + "grad_norm": 160.93667602539062, + "learning_rate": 8.489827322960305e-06, + "loss": 11.6975, + "step": 164340 + }, + { + "epoch": 0.3319973981585103, + "grad_norm": 391.2195739746094, + "learning_rate": 8.489577337433006e-06, + "loss": 18.3815, + "step": 164350 + }, + { + "epoch": 0.33201759879119414, + "grad_norm": 228.7294921875, + "learning_rate": 8.48932733489793e-06, + "loss": 26.4221, + "step": 164360 + }, + { + "epoch": 0.33203779942387796, + "grad_norm": 462.6561584472656, + "learning_rate": 8.489077315356297e-06, + "loss": 17.3625, + "step": 164370 + }, + { + "epoch": 0.3320580000565618, + "grad_norm": 372.6870422363281, + "learning_rate": 8.488827278809324e-06, + "loss": 28.4132, + "step": 164380 + }, + { + "epoch": 0.3320782006892456, + "grad_norm": 736.1632690429688, + "learning_rate": 8.48857722525823e-06, + "loss": 27.4231, + "step": 164390 + }, + { + "epoch": 0.3320984013219294, + "grad_norm": 276.06182861328125, + "learning_rate": 8.488327154704232e-06, + "loss": 22.3278, + "step": 164400 + }, + { + "epoch": 0.33211860195461324, + "grad_norm": 311.4897766113281, + "learning_rate": 8.488077067148554e-06, + "loss": 11.3768, + "step": 164410 + }, + { + "epoch": 0.33213880258729706, + "grad_norm": 979.6168212890625, + "learning_rate": 8.487826962592409e-06, + "loss": 14.5317, + "step": 164420 + }, + { + "epoch": 0.3321590032199809, + "grad_norm": 756.59423828125, + "learning_rate": 8.487576841037019e-06, + "loss": 16.5986, + "step": 164430 + }, + { + "epoch": 0.33217920385266464, + "grad_norm": 124.56269073486328, + "learning_rate": 8.487326702483602e-06, + "loss": 53.1876, + "step": 164440 + }, + { + "epoch": 0.33219940448534846, + "grad_norm": 367.2579650878906, + "learning_rate": 8.487076546933378e-06, + "loss": 18.0344, + "step": 164450 + }, + { + "epoch": 0.3322196051180323, + "grad_norm": 414.42083740234375, + "learning_rate": 8.486826374387568e-06, + "loss": 22.6892, + "step": 164460 + }, + { + "epoch": 0.3322398057507161, + "grad_norm": 905.2984008789062, + "learning_rate": 8.486576184847386e-06, + "loss": 29.1753, + "step": 164470 + }, + { + "epoch": 0.3322600063833999, + "grad_norm": 559.3729248046875, + "learning_rate": 8.486325978314054e-06, + "loss": 20.0122, + "step": 164480 + }, + { + "epoch": 0.33228020701608374, + "grad_norm": 675.530029296875, + "learning_rate": 8.486075754788794e-06, + "loss": 17.5078, + "step": 164490 + }, + { + "epoch": 0.33230040764876756, + "grad_norm": 492.83636474609375, + "learning_rate": 8.485825514272824e-06, + "loss": 34.6663, + "step": 164500 + }, + { + "epoch": 0.3323206082814514, + "grad_norm": 384.5464782714844, + "learning_rate": 8.485575256767362e-06, + "loss": 29.0318, + "step": 164510 + }, + { + "epoch": 0.3323408089141352, + "grad_norm": 579.1456909179688, + "learning_rate": 8.48532498227363e-06, + "loss": 12.1218, + "step": 164520 + }, + { + "epoch": 0.332361009546819, + "grad_norm": 186.2333526611328, + "learning_rate": 8.485074690792845e-06, + "loss": 14.7983, + "step": 164530 + }, + { + "epoch": 0.33238121017950284, + "grad_norm": 2662.985595703125, + "learning_rate": 8.484824382326232e-06, + "loss": 29.8661, + "step": 164540 + }, + { + "epoch": 0.33240141081218666, + "grad_norm": 384.2058410644531, + "learning_rate": 8.484574056875004e-06, + "loss": 27.3167, + "step": 164550 + }, + { + "epoch": 0.3324216114448705, + "grad_norm": 386.95574951171875, + "learning_rate": 8.484323714440386e-06, + "loss": 42.5152, + "step": 164560 + }, + { + "epoch": 0.33244181207755424, + "grad_norm": 973.225341796875, + "learning_rate": 8.484073355023597e-06, + "loss": 25.1114, + "step": 164570 + }, + { + "epoch": 0.33246201271023806, + "grad_norm": 542.1936645507812, + "learning_rate": 8.483822978625855e-06, + "loss": 19.0829, + "step": 164580 + }, + { + "epoch": 0.3324822133429219, + "grad_norm": 357.5818786621094, + "learning_rate": 8.483572585248385e-06, + "loss": 12.3465, + "step": 164590 + }, + { + "epoch": 0.3325024139756057, + "grad_norm": 277.9449157714844, + "learning_rate": 8.483322174892404e-06, + "loss": 16.4891, + "step": 164600 + }, + { + "epoch": 0.3325226146082895, + "grad_norm": 291.3374938964844, + "learning_rate": 8.483071747559133e-06, + "loss": 14.9676, + "step": 164610 + }, + { + "epoch": 0.33254281524097334, + "grad_norm": 166.30682373046875, + "learning_rate": 8.482821303249793e-06, + "loss": 25.2423, + "step": 164620 + }, + { + "epoch": 0.33256301587365716, + "grad_norm": 341.4237060546875, + "learning_rate": 8.482570841965605e-06, + "loss": 19.0733, + "step": 164630 + }, + { + "epoch": 0.332583216506341, + "grad_norm": 285.921875, + "learning_rate": 8.482320363707787e-06, + "loss": 18.954, + "step": 164640 + }, + { + "epoch": 0.3326034171390248, + "grad_norm": 873.6636352539062, + "learning_rate": 8.482069868477565e-06, + "loss": 14.2361, + "step": 164650 + }, + { + "epoch": 0.3326236177717086, + "grad_norm": 671.779296875, + "learning_rate": 8.481819356276155e-06, + "loss": 31.5424, + "step": 164660 + }, + { + "epoch": 0.33264381840439244, + "grad_norm": 615.38623046875, + "learning_rate": 8.481568827104779e-06, + "loss": 18.1736, + "step": 164670 + }, + { + "epoch": 0.33266401903707626, + "grad_norm": 446.8658142089844, + "learning_rate": 8.481318280964661e-06, + "loss": 21.8833, + "step": 164680 + }, + { + "epoch": 0.3326842196697601, + "grad_norm": 521.0661010742188, + "learning_rate": 8.481067717857017e-06, + "loss": 20.2418, + "step": 164690 + }, + { + "epoch": 0.33270442030244385, + "grad_norm": 426.0536804199219, + "learning_rate": 8.480817137783073e-06, + "loss": 16.9004, + "step": 164700 + }, + { + "epoch": 0.33272462093512767, + "grad_norm": 625.370849609375, + "learning_rate": 8.480566540744048e-06, + "loss": 17.6258, + "step": 164710 + }, + { + "epoch": 0.3327448215678115, + "grad_norm": 0.0, + "learning_rate": 8.480315926741165e-06, + "loss": 34.8818, + "step": 164720 + }, + { + "epoch": 0.3327650222004953, + "grad_norm": 641.9622802734375, + "learning_rate": 8.480065295775643e-06, + "loss": 26.7894, + "step": 164730 + }, + { + "epoch": 0.33278522283317913, + "grad_norm": 624.4006958007812, + "learning_rate": 8.479814647848706e-06, + "loss": 16.9639, + "step": 164740 + }, + { + "epoch": 0.33280542346586295, + "grad_norm": 365.9395446777344, + "learning_rate": 8.479563982961572e-06, + "loss": 36.2082, + "step": 164750 + }, + { + "epoch": 0.33282562409854677, + "grad_norm": 320.30950927734375, + "learning_rate": 8.479313301115467e-06, + "loss": 13.5921, + "step": 164760 + }, + { + "epoch": 0.3328458247312306, + "grad_norm": 555.3203735351562, + "learning_rate": 8.479062602311611e-06, + "loss": 16.8221, + "step": 164770 + }, + { + "epoch": 0.3328660253639144, + "grad_norm": 91.21572875976562, + "learning_rate": 8.478811886551226e-06, + "loss": 24.735, + "step": 164780 + }, + { + "epoch": 0.33288622599659823, + "grad_norm": 638.2918090820312, + "learning_rate": 8.478561153835532e-06, + "loss": 19.5226, + "step": 164790 + }, + { + "epoch": 0.33290642662928205, + "grad_norm": 511.9052734375, + "learning_rate": 8.478310404165756e-06, + "loss": 29.554, + "step": 164800 + }, + { + "epoch": 0.33292662726196587, + "grad_norm": 398.52520751953125, + "learning_rate": 8.478059637543114e-06, + "loss": 43.5895, + "step": 164810 + }, + { + "epoch": 0.33294682789464963, + "grad_norm": 464.9781494140625, + "learning_rate": 8.477808853968831e-06, + "loss": 20.1277, + "step": 164820 + }, + { + "epoch": 0.33296702852733345, + "grad_norm": 225.42550659179688, + "learning_rate": 8.477558053444133e-06, + "loss": 17.408, + "step": 164830 + }, + { + "epoch": 0.3329872291600173, + "grad_norm": 319.70770263671875, + "learning_rate": 8.477307235970235e-06, + "loss": 23.274, + "step": 164840 + }, + { + "epoch": 0.3330074297927011, + "grad_norm": 159.54104614257812, + "learning_rate": 8.477056401548364e-06, + "loss": 37.0467, + "step": 164850 + }, + { + "epoch": 0.3330276304253849, + "grad_norm": 602.1248168945312, + "learning_rate": 8.476805550179743e-06, + "loss": 15.3591, + "step": 164860 + }, + { + "epoch": 0.33304783105806873, + "grad_norm": 504.2489013671875, + "learning_rate": 8.476554681865594e-06, + "loss": 26.6329, + "step": 164870 + }, + { + "epoch": 0.33306803169075255, + "grad_norm": 635.47265625, + "learning_rate": 8.476303796607138e-06, + "loss": 16.9301, + "step": 164880 + }, + { + "epoch": 0.3330882323234364, + "grad_norm": 525.6773071289062, + "learning_rate": 8.4760528944056e-06, + "loss": 19.0034, + "step": 164890 + }, + { + "epoch": 0.3331084329561202, + "grad_norm": 438.91815185546875, + "learning_rate": 8.4758019752622e-06, + "loss": 20.7083, + "step": 164900 + }, + { + "epoch": 0.333128633588804, + "grad_norm": 592.015380859375, + "learning_rate": 8.475551039178164e-06, + "loss": 22.1569, + "step": 164910 + }, + { + "epoch": 0.33314883422148783, + "grad_norm": 538.2504272460938, + "learning_rate": 8.475300086154714e-06, + "loss": 41.7083, + "step": 164920 + }, + { + "epoch": 0.33316903485417165, + "grad_norm": 916.0399780273438, + "learning_rate": 8.475049116193071e-06, + "loss": 41.4224, + "step": 164930 + }, + { + "epoch": 0.3331892354868555, + "grad_norm": 221.72230529785156, + "learning_rate": 8.474798129294462e-06, + "loss": 29.0907, + "step": 164940 + }, + { + "epoch": 0.33320943611953924, + "grad_norm": 10.00229549407959, + "learning_rate": 8.474547125460108e-06, + "loss": 19.1066, + "step": 164950 + }, + { + "epoch": 0.33322963675222306, + "grad_norm": 425.7928466796875, + "learning_rate": 8.474296104691231e-06, + "loss": 44.4712, + "step": 164960 + }, + { + "epoch": 0.3332498373849069, + "grad_norm": 19.716455459594727, + "learning_rate": 8.474045066989058e-06, + "loss": 20.1471, + "step": 164970 + }, + { + "epoch": 0.3332700380175907, + "grad_norm": 365.4695129394531, + "learning_rate": 8.47379401235481e-06, + "loss": 35.0631, + "step": 164980 + }, + { + "epoch": 0.3332902386502745, + "grad_norm": 397.28033447265625, + "learning_rate": 8.473542940789712e-06, + "loss": 19.0947, + "step": 164990 + }, + { + "epoch": 0.33331043928295834, + "grad_norm": 615.3440551757812, + "learning_rate": 8.473291852294986e-06, + "loss": 19.4807, + "step": 165000 + }, + { + "epoch": 0.33333063991564216, + "grad_norm": 148.29989624023438, + "learning_rate": 8.47304074687186e-06, + "loss": 15.1729, + "step": 165010 + }, + { + "epoch": 0.333350840548326, + "grad_norm": 535.3994140625, + "learning_rate": 8.472789624521552e-06, + "loss": 34.4777, + "step": 165020 + }, + { + "epoch": 0.3333710411810098, + "grad_norm": 182.21981811523438, + "learning_rate": 8.472538485245287e-06, + "loss": 39.2378, + "step": 165030 + }, + { + "epoch": 0.3333912418136936, + "grad_norm": 554.3379516601562, + "learning_rate": 8.472287329044292e-06, + "loss": 14.5789, + "step": 165040 + }, + { + "epoch": 0.33341144244637744, + "grad_norm": 155.79673767089844, + "learning_rate": 8.47203615591979e-06, + "loss": 19.0935, + "step": 165050 + }, + { + "epoch": 0.33343164307906126, + "grad_norm": 305.8067321777344, + "learning_rate": 8.471784965873005e-06, + "loss": 17.3382, + "step": 165060 + }, + { + "epoch": 0.3334518437117451, + "grad_norm": 404.5699157714844, + "learning_rate": 8.471533758905161e-06, + "loss": 11.7331, + "step": 165070 + }, + { + "epoch": 0.33347204434442884, + "grad_norm": 682.6023559570312, + "learning_rate": 8.471282535017482e-06, + "loss": 18.0622, + "step": 165080 + }, + { + "epoch": 0.33349224497711266, + "grad_norm": 213.82958984375, + "learning_rate": 8.471031294211194e-06, + "loss": 32.7308, + "step": 165090 + }, + { + "epoch": 0.3335124456097965, + "grad_norm": 663.1001586914062, + "learning_rate": 8.47078003648752e-06, + "loss": 28.8932, + "step": 165100 + }, + { + "epoch": 0.3335326462424803, + "grad_norm": 243.75440979003906, + "learning_rate": 8.470528761847684e-06, + "loss": 15.589, + "step": 165110 + }, + { + "epoch": 0.3335528468751641, + "grad_norm": 342.10357666015625, + "learning_rate": 8.470277470292914e-06, + "loss": 13.145, + "step": 165120 + }, + { + "epoch": 0.33357304750784794, + "grad_norm": 416.9073181152344, + "learning_rate": 8.47002616182443e-06, + "loss": 18.5308, + "step": 165130 + }, + { + "epoch": 0.33359324814053176, + "grad_norm": 254.2198486328125, + "learning_rate": 8.46977483644346e-06, + "loss": 20.4267, + "step": 165140 + }, + { + "epoch": 0.3336134487732156, + "grad_norm": 317.7593688964844, + "learning_rate": 8.469523494151229e-06, + "loss": 18.6798, + "step": 165150 + }, + { + "epoch": 0.3336336494058994, + "grad_norm": 385.6800842285156, + "learning_rate": 8.469272134948963e-06, + "loss": 13.8181, + "step": 165160 + }, + { + "epoch": 0.3336538500385832, + "grad_norm": 325.71832275390625, + "learning_rate": 8.469020758837882e-06, + "loss": 26.1625, + "step": 165170 + }, + { + "epoch": 0.33367405067126704, + "grad_norm": 508.8575439453125, + "learning_rate": 8.468769365819216e-06, + "loss": 37.5624, + "step": 165180 + }, + { + "epoch": 0.33369425130395086, + "grad_norm": 637.180908203125, + "learning_rate": 8.46851795589419e-06, + "loss": 16.4045, + "step": 165190 + }, + { + "epoch": 0.3337144519366347, + "grad_norm": 468.8171691894531, + "learning_rate": 8.468266529064025e-06, + "loss": 28.6397, + "step": 165200 + }, + { + "epoch": 0.33373465256931845, + "grad_norm": 289.24896240234375, + "learning_rate": 8.468015085329952e-06, + "loss": 12.0796, + "step": 165210 + }, + { + "epoch": 0.33375485320200227, + "grad_norm": 265.281005859375, + "learning_rate": 8.467763624693195e-06, + "loss": 13.2185, + "step": 165220 + }, + { + "epoch": 0.3337750538346861, + "grad_norm": 781.6720581054688, + "learning_rate": 8.467512147154977e-06, + "loss": 30.3655, + "step": 165230 + }, + { + "epoch": 0.3337952544673699, + "grad_norm": 192.22543334960938, + "learning_rate": 8.467260652716525e-06, + "loss": 13.2761, + "step": 165240 + }, + { + "epoch": 0.3338154551000537, + "grad_norm": 163.2029571533203, + "learning_rate": 8.467009141379065e-06, + "loss": 22.6383, + "step": 165250 + }, + { + "epoch": 0.33383565573273755, + "grad_norm": 218.8509979248047, + "learning_rate": 8.466757613143824e-06, + "loss": 27.0643, + "step": 165260 + }, + { + "epoch": 0.33385585636542137, + "grad_norm": 518.0239868164062, + "learning_rate": 8.466506068012025e-06, + "loss": 17.2885, + "step": 165270 + }, + { + "epoch": 0.3338760569981052, + "grad_norm": 392.69757080078125, + "learning_rate": 8.466254505984899e-06, + "loss": 24.6875, + "step": 165280 + }, + { + "epoch": 0.333896257630789, + "grad_norm": 402.17510986328125, + "learning_rate": 8.466002927063668e-06, + "loss": 32.3207, + "step": 165290 + }, + { + "epoch": 0.3339164582634728, + "grad_norm": 274.7178039550781, + "learning_rate": 8.465751331249558e-06, + "loss": 25.4942, + "step": 165300 + }, + { + "epoch": 0.33393665889615665, + "grad_norm": 928.2224731445312, + "learning_rate": 8.465499718543797e-06, + "loss": 37.6747, + "step": 165310 + }, + { + "epoch": 0.33395685952884047, + "grad_norm": 134.0745849609375, + "learning_rate": 8.46524808894761e-06, + "loss": 13.0874, + "step": 165320 + }, + { + "epoch": 0.3339770601615243, + "grad_norm": 144.41650390625, + "learning_rate": 8.464996442462226e-06, + "loss": 15.1885, + "step": 165330 + }, + { + "epoch": 0.33399726079420805, + "grad_norm": 517.2526245117188, + "learning_rate": 8.464744779088868e-06, + "loss": 36.2311, + "step": 165340 + }, + { + "epoch": 0.33401746142689187, + "grad_norm": 398.09765625, + "learning_rate": 8.464493098828763e-06, + "loss": 31.7159, + "step": 165350 + }, + { + "epoch": 0.3340376620595757, + "grad_norm": 112.75455474853516, + "learning_rate": 8.464241401683142e-06, + "loss": 15.3615, + "step": 165360 + }, + { + "epoch": 0.3340578626922595, + "grad_norm": 289.33154296875, + "learning_rate": 8.463989687653226e-06, + "loss": 17.8703, + "step": 165370 + }, + { + "epoch": 0.33407806332494333, + "grad_norm": 558.5931396484375, + "learning_rate": 8.463737956740246e-06, + "loss": 19.8129, + "step": 165380 + }, + { + "epoch": 0.33409826395762715, + "grad_norm": 25.02311134338379, + "learning_rate": 8.463486208945426e-06, + "loss": 16.373, + "step": 165390 + }, + { + "epoch": 0.33411846459031097, + "grad_norm": 281.6791076660156, + "learning_rate": 8.463234444269994e-06, + "loss": 9.0688, + "step": 165400 + }, + { + "epoch": 0.3341386652229948, + "grad_norm": 444.8165588378906, + "learning_rate": 8.462982662715179e-06, + "loss": 20.3018, + "step": 165410 + }, + { + "epoch": 0.3341588658556786, + "grad_norm": 287.7277526855469, + "learning_rate": 8.462730864282206e-06, + "loss": 13.8106, + "step": 165420 + }, + { + "epoch": 0.33417906648836243, + "grad_norm": 281.07452392578125, + "learning_rate": 8.462479048972302e-06, + "loss": 26.3089, + "step": 165430 + }, + { + "epoch": 0.33419926712104625, + "grad_norm": 747.5748901367188, + "learning_rate": 8.462227216786696e-06, + "loss": 19.8105, + "step": 165440 + }, + { + "epoch": 0.33421946775373007, + "grad_norm": 345.7933044433594, + "learning_rate": 8.461975367726614e-06, + "loss": 17.2945, + "step": 165450 + }, + { + "epoch": 0.33423966838641384, + "grad_norm": 225.85513305664062, + "learning_rate": 8.461723501793284e-06, + "loss": 39.0874, + "step": 165460 + }, + { + "epoch": 0.33425986901909766, + "grad_norm": 278.6300964355469, + "learning_rate": 8.461471618987933e-06, + "loss": 18.8434, + "step": 165470 + }, + { + "epoch": 0.3342800696517815, + "grad_norm": 987.3038940429688, + "learning_rate": 8.46121971931179e-06, + "loss": 26.1987, + "step": 165480 + }, + { + "epoch": 0.3343002702844653, + "grad_norm": 510.0689392089844, + "learning_rate": 8.460967802766081e-06, + "loss": 13.8596, + "step": 165490 + }, + { + "epoch": 0.3343204709171491, + "grad_norm": 346.2994689941406, + "learning_rate": 8.460715869352035e-06, + "loss": 22.2588, + "step": 165500 + }, + { + "epoch": 0.33434067154983294, + "grad_norm": 4.863061428070068, + "learning_rate": 8.460463919070879e-06, + "loss": 19.7094, + "step": 165510 + }, + { + "epoch": 0.33436087218251676, + "grad_norm": 329.3653564453125, + "learning_rate": 8.460211951923842e-06, + "loss": 30.0413, + "step": 165520 + }, + { + "epoch": 0.3343810728152006, + "grad_norm": 576.1764526367188, + "learning_rate": 8.459959967912152e-06, + "loss": 27.0201, + "step": 165530 + }, + { + "epoch": 0.3344012734478844, + "grad_norm": 403.1051940917969, + "learning_rate": 8.459707967037037e-06, + "loss": 16.6992, + "step": 165540 + }, + { + "epoch": 0.3344214740805682, + "grad_norm": 102.03054809570312, + "learning_rate": 8.459455949299725e-06, + "loss": 16.7472, + "step": 165550 + }, + { + "epoch": 0.33444167471325204, + "grad_norm": 573.9119873046875, + "learning_rate": 8.459203914701444e-06, + "loss": 18.7682, + "step": 165560 + }, + { + "epoch": 0.33446187534593586, + "grad_norm": 731.407470703125, + "learning_rate": 8.458951863243424e-06, + "loss": 26.0697, + "step": 165570 + }, + { + "epoch": 0.3344820759786197, + "grad_norm": 1461.892822265625, + "learning_rate": 8.45869979492689e-06, + "loss": 22.1549, + "step": 165580 + }, + { + "epoch": 0.33450227661130344, + "grad_norm": 853.9044799804688, + "learning_rate": 8.458447709753073e-06, + "loss": 26.7949, + "step": 165590 + }, + { + "epoch": 0.33452247724398726, + "grad_norm": 198.04319763183594, + "learning_rate": 8.458195607723201e-06, + "loss": 21.1348, + "step": 165600 + }, + { + "epoch": 0.3345426778766711, + "grad_norm": 159.65589904785156, + "learning_rate": 8.457943488838504e-06, + "loss": 21.2028, + "step": 165610 + }, + { + "epoch": 0.3345628785093549, + "grad_norm": 368.6065673828125, + "learning_rate": 8.45769135310021e-06, + "loss": 22.417, + "step": 165620 + }, + { + "epoch": 0.3345830791420387, + "grad_norm": 973.2472534179688, + "learning_rate": 8.457439200509548e-06, + "loss": 29.6429, + "step": 165630 + }, + { + "epoch": 0.33460327977472254, + "grad_norm": 141.88278198242188, + "learning_rate": 8.457187031067746e-06, + "loss": 15.8237, + "step": 165640 + }, + { + "epoch": 0.33462348040740636, + "grad_norm": 311.3077087402344, + "learning_rate": 8.456934844776033e-06, + "loss": 15.497, + "step": 165650 + }, + { + "epoch": 0.3346436810400902, + "grad_norm": 597.1159057617188, + "learning_rate": 8.456682641635639e-06, + "loss": 28.0578, + "step": 165660 + }, + { + "epoch": 0.334663881672774, + "grad_norm": 443.5572814941406, + "learning_rate": 8.456430421647795e-06, + "loss": 37.2504, + "step": 165670 + }, + { + "epoch": 0.3346840823054578, + "grad_norm": 71.40750885009766, + "learning_rate": 8.456178184813726e-06, + "loss": 13.1042, + "step": 165680 + }, + { + "epoch": 0.33470428293814164, + "grad_norm": 493.4185485839844, + "learning_rate": 8.455925931134665e-06, + "loss": 13.4274, + "step": 165690 + }, + { + "epoch": 0.33472448357082546, + "grad_norm": 852.5182495117188, + "learning_rate": 8.45567366061184e-06, + "loss": 28.5645, + "step": 165700 + }, + { + "epoch": 0.3347446842035093, + "grad_norm": 433.5321350097656, + "learning_rate": 8.455421373246479e-06, + "loss": 18.3268, + "step": 165710 + }, + { + "epoch": 0.33476488483619304, + "grad_norm": 472.730712890625, + "learning_rate": 8.455169069039814e-06, + "loss": 25.194, + "step": 165720 + }, + { + "epoch": 0.33478508546887686, + "grad_norm": 543.5000610351562, + "learning_rate": 8.454916747993076e-06, + "loss": 20.7661, + "step": 165730 + }, + { + "epoch": 0.3348052861015607, + "grad_norm": 203.8567657470703, + "learning_rate": 8.454664410107492e-06, + "loss": 28.9626, + "step": 165740 + }, + { + "epoch": 0.3348254867342445, + "grad_norm": 255.99026489257812, + "learning_rate": 8.454412055384292e-06, + "loss": 17.9984, + "step": 165750 + }, + { + "epoch": 0.3348456873669283, + "grad_norm": 946.3783569335938, + "learning_rate": 8.454159683824707e-06, + "loss": 23.2077, + "step": 165760 + }, + { + "epoch": 0.33486588799961214, + "grad_norm": 364.1036376953125, + "learning_rate": 8.453907295429969e-06, + "loss": 18.382, + "step": 165770 + }, + { + "epoch": 0.33488608863229596, + "grad_norm": 133.6348876953125, + "learning_rate": 8.453654890201301e-06, + "loss": 45.5667, + "step": 165780 + }, + { + "epoch": 0.3349062892649798, + "grad_norm": 1.842629075050354, + "learning_rate": 8.453402468139941e-06, + "loss": 10.9108, + "step": 165790 + }, + { + "epoch": 0.3349264898976636, + "grad_norm": 557.810302734375, + "learning_rate": 8.453150029247115e-06, + "loss": 23.3179, + "step": 165800 + }, + { + "epoch": 0.3349466905303474, + "grad_norm": 602.2593383789062, + "learning_rate": 8.452897573524055e-06, + "loss": 19.134, + "step": 165810 + }, + { + "epoch": 0.33496689116303124, + "grad_norm": 367.6490478515625, + "learning_rate": 8.452645100971991e-06, + "loss": 26.2369, + "step": 165820 + }, + { + "epoch": 0.33498709179571506, + "grad_norm": 385.2917175292969, + "learning_rate": 8.452392611592154e-06, + "loss": 14.507, + "step": 165830 + }, + { + "epoch": 0.3350072924283989, + "grad_norm": 211.99009704589844, + "learning_rate": 8.452140105385774e-06, + "loss": 17.8554, + "step": 165840 + }, + { + "epoch": 0.33502749306108265, + "grad_norm": 287.37188720703125, + "learning_rate": 8.451887582354081e-06, + "loss": 24.3312, + "step": 165850 + }, + { + "epoch": 0.33504769369376647, + "grad_norm": 128.64540100097656, + "learning_rate": 8.451635042498307e-06, + "loss": 15.5851, + "step": 165860 + }, + { + "epoch": 0.3350678943264503, + "grad_norm": 488.65380859375, + "learning_rate": 8.451382485819683e-06, + "loss": 23.7012, + "step": 165870 + }, + { + "epoch": 0.3350880949591341, + "grad_norm": 91.1632080078125, + "learning_rate": 8.451129912319439e-06, + "loss": 22.6898, + "step": 165880 + }, + { + "epoch": 0.33510829559181793, + "grad_norm": 630.19287109375, + "learning_rate": 8.450877321998805e-06, + "loss": 22.226, + "step": 165890 + }, + { + "epoch": 0.33512849622450175, + "grad_norm": 214.9325408935547, + "learning_rate": 8.450624714859016e-06, + "loss": 25.6399, + "step": 165900 + }, + { + "epoch": 0.33514869685718557, + "grad_norm": 567.51611328125, + "learning_rate": 8.4503720909013e-06, + "loss": 25.022, + "step": 165910 + }, + { + "epoch": 0.3351688974898694, + "grad_norm": 538.2396240234375, + "learning_rate": 8.450119450126889e-06, + "loss": 30.0168, + "step": 165920 + }, + { + "epoch": 0.3351890981225532, + "grad_norm": 426.6291809082031, + "learning_rate": 8.449866792537013e-06, + "loss": 8.4766, + "step": 165930 + }, + { + "epoch": 0.33520929875523703, + "grad_norm": 570.8070068359375, + "learning_rate": 8.449614118132905e-06, + "loss": 30.7451, + "step": 165940 + }, + { + "epoch": 0.33522949938792085, + "grad_norm": 285.88262939453125, + "learning_rate": 8.449361426915797e-06, + "loss": 18.7535, + "step": 165950 + }, + { + "epoch": 0.33524970002060467, + "grad_norm": 342.9992370605469, + "learning_rate": 8.449108718886919e-06, + "loss": 17.1622, + "step": 165960 + }, + { + "epoch": 0.3352699006532885, + "grad_norm": 501.7101745605469, + "learning_rate": 8.448855994047502e-06, + "loss": 41.3075, + "step": 165970 + }, + { + "epoch": 0.33529010128597225, + "grad_norm": 648.7622680664062, + "learning_rate": 8.448603252398782e-06, + "loss": 27.9492, + "step": 165980 + }, + { + "epoch": 0.3353103019186561, + "grad_norm": 765.6998291015625, + "learning_rate": 8.448350493941986e-06, + "loss": 19.664, + "step": 165990 + }, + { + "epoch": 0.3353305025513399, + "grad_norm": 592.9990234375, + "learning_rate": 8.44809771867835e-06, + "loss": 27.3351, + "step": 166000 + }, + { + "epoch": 0.3353507031840237, + "grad_norm": 715.3545532226562, + "learning_rate": 8.447844926609103e-06, + "loss": 27.305, + "step": 166010 + }, + { + "epoch": 0.33537090381670753, + "grad_norm": 337.9970703125, + "learning_rate": 8.447592117735477e-06, + "loss": 17.8994, + "step": 166020 + }, + { + "epoch": 0.33539110444939135, + "grad_norm": 338.2167663574219, + "learning_rate": 8.447339292058706e-06, + "loss": 37.4576, + "step": 166030 + }, + { + "epoch": 0.3354113050820752, + "grad_norm": 383.3039245605469, + "learning_rate": 8.44708644958002e-06, + "loss": 17.3921, + "step": 166040 + }, + { + "epoch": 0.335431505714759, + "grad_norm": 361.1171875, + "learning_rate": 8.446833590300656e-06, + "loss": 23.4858, + "step": 166050 + }, + { + "epoch": 0.3354517063474428, + "grad_norm": 499.3106689453125, + "learning_rate": 8.44658071422184e-06, + "loss": 24.2805, + "step": 166060 + }, + { + "epoch": 0.33547190698012663, + "grad_norm": 851.7272338867188, + "learning_rate": 8.44632782134481e-06, + "loss": 16.007, + "step": 166070 + }, + { + "epoch": 0.33549210761281045, + "grad_norm": 541.3045654296875, + "learning_rate": 8.446074911670795e-06, + "loss": 22.5634, + "step": 166080 + }, + { + "epoch": 0.3355123082454943, + "grad_norm": 232.80931091308594, + "learning_rate": 8.445821985201028e-06, + "loss": 12.8816, + "step": 166090 + }, + { + "epoch": 0.33553250887817804, + "grad_norm": 741.358642578125, + "learning_rate": 8.445569041936743e-06, + "loss": 22.6484, + "step": 166100 + }, + { + "epoch": 0.33555270951086186, + "grad_norm": 848.94189453125, + "learning_rate": 8.445316081879174e-06, + "loss": 26.0934, + "step": 166110 + }, + { + "epoch": 0.3355729101435457, + "grad_norm": 859.1322021484375, + "learning_rate": 8.44506310502955e-06, + "loss": 19.3114, + "step": 166120 + }, + { + "epoch": 0.3355931107762295, + "grad_norm": 418.796142578125, + "learning_rate": 8.444810111389108e-06, + "loss": 42.8017, + "step": 166130 + }, + { + "epoch": 0.3356133114089133, + "grad_norm": 124.33617401123047, + "learning_rate": 8.44455710095908e-06, + "loss": 20.8427, + "step": 166140 + }, + { + "epoch": 0.33563351204159714, + "grad_norm": 463.8589782714844, + "learning_rate": 8.444304073740695e-06, + "loss": 14.3268, + "step": 166150 + }, + { + "epoch": 0.33565371267428096, + "grad_norm": 271.988037109375, + "learning_rate": 8.444051029735192e-06, + "loss": 20.3641, + "step": 166160 + }, + { + "epoch": 0.3356739133069648, + "grad_norm": 392.1072082519531, + "learning_rate": 8.443797968943801e-06, + "loss": 31.2143, + "step": 166170 + }, + { + "epoch": 0.3356941139396486, + "grad_norm": 455.8572998046875, + "learning_rate": 8.443544891367758e-06, + "loss": 28.3247, + "step": 166180 + }, + { + "epoch": 0.3357143145723324, + "grad_norm": 379.1649169921875, + "learning_rate": 8.443291797008294e-06, + "loss": 19.1225, + "step": 166190 + }, + { + "epoch": 0.33573451520501624, + "grad_norm": 437.4548645019531, + "learning_rate": 8.443038685866643e-06, + "loss": 19.6158, + "step": 166200 + }, + { + "epoch": 0.33575471583770006, + "grad_norm": 73.28082275390625, + "learning_rate": 8.44278555794404e-06, + "loss": 19.3912, + "step": 166210 + }, + { + "epoch": 0.3357749164703839, + "grad_norm": 350.60595703125, + "learning_rate": 8.442532413241717e-06, + "loss": 24.2174, + "step": 166220 + }, + { + "epoch": 0.33579511710306764, + "grad_norm": 38.57235336303711, + "learning_rate": 8.442279251760907e-06, + "loss": 18.4508, + "step": 166230 + }, + { + "epoch": 0.33581531773575146, + "grad_norm": 103.85962677001953, + "learning_rate": 8.442026073502849e-06, + "loss": 29.9303, + "step": 166240 + }, + { + "epoch": 0.3358355183684353, + "grad_norm": 294.2778015136719, + "learning_rate": 8.44177287846877e-06, + "loss": 15.0246, + "step": 166250 + }, + { + "epoch": 0.3358557190011191, + "grad_norm": 264.279296875, + "learning_rate": 8.44151966665991e-06, + "loss": 25.0396, + "step": 166260 + }, + { + "epoch": 0.3358759196338029, + "grad_norm": 222.1487274169922, + "learning_rate": 8.4412664380775e-06, + "loss": 33.7391, + "step": 166270 + }, + { + "epoch": 0.33589612026648674, + "grad_norm": 668.9306030273438, + "learning_rate": 8.441013192722774e-06, + "loss": 21.2129, + "step": 166280 + }, + { + "epoch": 0.33591632089917056, + "grad_norm": 17.128477096557617, + "learning_rate": 8.440759930596967e-06, + "loss": 24.8987, + "step": 166290 + }, + { + "epoch": 0.3359365215318544, + "grad_norm": 411.150390625, + "learning_rate": 8.440506651701315e-06, + "loss": 23.1951, + "step": 166300 + }, + { + "epoch": 0.3359567221645382, + "grad_norm": 302.4668884277344, + "learning_rate": 8.440253356037048e-06, + "loss": 19.6862, + "step": 166310 + }, + { + "epoch": 0.335976922797222, + "grad_norm": 47.11547088623047, + "learning_rate": 8.440000043605406e-06, + "loss": 24.6051, + "step": 166320 + }, + { + "epoch": 0.33599712342990584, + "grad_norm": 588.094970703125, + "learning_rate": 8.439746714407619e-06, + "loss": 21.6316, + "step": 166330 + }, + { + "epoch": 0.33601732406258966, + "grad_norm": 353.5929870605469, + "learning_rate": 8.439493368444924e-06, + "loss": 11.4648, + "step": 166340 + }, + { + "epoch": 0.3360375246952735, + "grad_norm": 312.5060729980469, + "learning_rate": 8.439240005718556e-06, + "loss": 28.425, + "step": 166350 + }, + { + "epoch": 0.33605772532795725, + "grad_norm": 756.814697265625, + "learning_rate": 8.43898662622975e-06, + "loss": 51.6401, + "step": 166360 + }, + { + "epoch": 0.33607792596064107, + "grad_norm": 406.2431640625, + "learning_rate": 8.438733229979741e-06, + "loss": 17.1661, + "step": 166370 + }, + { + "epoch": 0.3360981265933249, + "grad_norm": 96.3930892944336, + "learning_rate": 8.438479816969762e-06, + "loss": 24.6942, + "step": 166380 + }, + { + "epoch": 0.3361183272260087, + "grad_norm": 387.2288513183594, + "learning_rate": 8.438226387201048e-06, + "loss": 14.9424, + "step": 166390 + }, + { + "epoch": 0.3361385278586925, + "grad_norm": 404.4085693359375, + "learning_rate": 8.437972940674838e-06, + "loss": 30.1257, + "step": 166400 + }, + { + "epoch": 0.33615872849137635, + "grad_norm": 355.3511962890625, + "learning_rate": 8.437719477392363e-06, + "loss": 19.9233, + "step": 166410 + }, + { + "epoch": 0.33617892912406017, + "grad_norm": 178.55581665039062, + "learning_rate": 8.43746599735486e-06, + "loss": 20.9385, + "step": 166420 + }, + { + "epoch": 0.336199129756744, + "grad_norm": 282.252685546875, + "learning_rate": 8.437212500563567e-06, + "loss": 13.9381, + "step": 166430 + }, + { + "epoch": 0.3362193303894278, + "grad_norm": 63.36928939819336, + "learning_rate": 8.436958987019717e-06, + "loss": 17.171, + "step": 166440 + }, + { + "epoch": 0.3362395310221116, + "grad_norm": 125.04975891113281, + "learning_rate": 8.436705456724545e-06, + "loss": 17.6303, + "step": 166450 + }, + { + "epoch": 0.33625973165479545, + "grad_norm": 247.55935668945312, + "learning_rate": 8.436451909679286e-06, + "loss": 21.9529, + "step": 166460 + }, + { + "epoch": 0.33627993228747927, + "grad_norm": 481.1356201171875, + "learning_rate": 8.436198345885177e-06, + "loss": 14.1245, + "step": 166470 + }, + { + "epoch": 0.3363001329201631, + "grad_norm": 441.5704345703125, + "learning_rate": 8.435944765343457e-06, + "loss": 22.2057, + "step": 166480 + }, + { + "epoch": 0.33632033355284685, + "grad_norm": 575.5211181640625, + "learning_rate": 8.435691168055358e-06, + "loss": 22.0704, + "step": 166490 + }, + { + "epoch": 0.33634053418553067, + "grad_norm": 615.0734252929688, + "learning_rate": 8.435437554022116e-06, + "loss": 27.4929, + "step": 166500 + }, + { + "epoch": 0.3363607348182145, + "grad_norm": 374.204833984375, + "learning_rate": 8.435183923244969e-06, + "loss": 39.71, + "step": 166510 + }, + { + "epoch": 0.3363809354508983, + "grad_norm": 292.698974609375, + "learning_rate": 8.43493027572515e-06, + "loss": 15.8342, + "step": 166520 + }, + { + "epoch": 0.33640113608358213, + "grad_norm": 11.15060043334961, + "learning_rate": 8.4346766114639e-06, + "loss": 26.0426, + "step": 166530 + }, + { + "epoch": 0.33642133671626595, + "grad_norm": 445.99713134765625, + "learning_rate": 8.434422930462452e-06, + "loss": 20.9144, + "step": 166540 + }, + { + "epoch": 0.33644153734894977, + "grad_norm": 385.7422180175781, + "learning_rate": 8.434169232722043e-06, + "loss": 23.432, + "step": 166550 + }, + { + "epoch": 0.3364617379816336, + "grad_norm": 505.59503173828125, + "learning_rate": 8.433915518243909e-06, + "loss": 18.3347, + "step": 166560 + }, + { + "epoch": 0.3364819386143174, + "grad_norm": 643.0967407226562, + "learning_rate": 8.433661787029288e-06, + "loss": 20.145, + "step": 166570 + }, + { + "epoch": 0.33650213924700123, + "grad_norm": 425.1985168457031, + "learning_rate": 8.433408039079415e-06, + "loss": 14.0901, + "step": 166580 + }, + { + "epoch": 0.33652233987968505, + "grad_norm": 388.61944580078125, + "learning_rate": 8.433154274395529e-06, + "loss": 28.345, + "step": 166590 + }, + { + "epoch": 0.33654254051236887, + "grad_norm": 984.209228515625, + "learning_rate": 8.432900492978864e-06, + "loss": 21.7307, + "step": 166600 + }, + { + "epoch": 0.3365627411450527, + "grad_norm": 178.183837890625, + "learning_rate": 8.43264669483066e-06, + "loss": 24.6388, + "step": 166610 + }, + { + "epoch": 0.33658294177773646, + "grad_norm": 231.317138671875, + "learning_rate": 8.432392879952151e-06, + "loss": 13.9039, + "step": 166620 + }, + { + "epoch": 0.3366031424104203, + "grad_norm": 393.9949951171875, + "learning_rate": 8.432139048344577e-06, + "loss": 18.0523, + "step": 166630 + }, + { + "epoch": 0.3366233430431041, + "grad_norm": 24.599918365478516, + "learning_rate": 8.431885200009172e-06, + "loss": 20.6987, + "step": 166640 + }, + { + "epoch": 0.3366435436757879, + "grad_norm": 223.66964721679688, + "learning_rate": 8.431631334947175e-06, + "loss": 22.4234, + "step": 166650 + }, + { + "epoch": 0.33666374430847174, + "grad_norm": 474.1702880859375, + "learning_rate": 8.431377453159822e-06, + "loss": 17.2442, + "step": 166660 + }, + { + "epoch": 0.33668394494115556, + "grad_norm": 516.7149047851562, + "learning_rate": 8.431123554648354e-06, + "loss": 24.7318, + "step": 166670 + }, + { + "epoch": 0.3367041455738394, + "grad_norm": 380.5775146484375, + "learning_rate": 8.430869639414004e-06, + "loss": 24.9795, + "step": 166680 + }, + { + "epoch": 0.3367243462065232, + "grad_norm": 313.3843078613281, + "learning_rate": 8.430615707458012e-06, + "loss": 23.0944, + "step": 166690 + }, + { + "epoch": 0.336744546839207, + "grad_norm": 229.4980926513672, + "learning_rate": 8.430361758781616e-06, + "loss": 26.4191, + "step": 166700 + }, + { + "epoch": 0.33676474747189084, + "grad_norm": 192.1104278564453, + "learning_rate": 8.430107793386053e-06, + "loss": 20.8212, + "step": 166710 + }, + { + "epoch": 0.33678494810457466, + "grad_norm": 387.45477294921875, + "learning_rate": 8.42985381127256e-06, + "loss": 22.7019, + "step": 166720 + }, + { + "epoch": 0.3368051487372585, + "grad_norm": 519.1399536132812, + "learning_rate": 8.429599812442373e-06, + "loss": 26.394, + "step": 166730 + }, + { + "epoch": 0.33682534936994224, + "grad_norm": 1378.7998046875, + "learning_rate": 8.429345796896736e-06, + "loss": 36.4727, + "step": 166740 + }, + { + "epoch": 0.33684555000262606, + "grad_norm": 334.69732666015625, + "learning_rate": 8.429091764636883e-06, + "loss": 22.7127, + "step": 166750 + }, + { + "epoch": 0.3368657506353099, + "grad_norm": 644.4428100585938, + "learning_rate": 8.42883771566405e-06, + "loss": 27.6707, + "step": 166760 + }, + { + "epoch": 0.3368859512679937, + "grad_norm": 1111.902587890625, + "learning_rate": 8.42858364997948e-06, + "loss": 22.6756, + "step": 166770 + }, + { + "epoch": 0.3369061519006775, + "grad_norm": 492.6197814941406, + "learning_rate": 8.428329567584411e-06, + "loss": 24.5891, + "step": 166780 + }, + { + "epoch": 0.33692635253336134, + "grad_norm": 77.93360900878906, + "learning_rate": 8.428075468480076e-06, + "loss": 16.2545, + "step": 166790 + }, + { + "epoch": 0.33694655316604516, + "grad_norm": 496.0776672363281, + "learning_rate": 8.427821352667719e-06, + "loss": 16.1612, + "step": 166800 + }, + { + "epoch": 0.336966753798729, + "grad_norm": 1901.234619140625, + "learning_rate": 8.427567220148574e-06, + "loss": 27.2727, + "step": 166810 + }, + { + "epoch": 0.3369869544314128, + "grad_norm": 264.42022705078125, + "learning_rate": 8.427313070923885e-06, + "loss": 27.5479, + "step": 166820 + }, + { + "epoch": 0.3370071550640966, + "grad_norm": 185.50962829589844, + "learning_rate": 8.427058904994888e-06, + "loss": 11.7651, + "step": 166830 + }, + { + "epoch": 0.33702735569678044, + "grad_norm": 140.66539001464844, + "learning_rate": 8.426804722362818e-06, + "loss": 19.9623, + "step": 166840 + }, + { + "epoch": 0.33704755632946426, + "grad_norm": 75.92221069335938, + "learning_rate": 8.42655052302892e-06, + "loss": 8.8664, + "step": 166850 + }, + { + "epoch": 0.3370677569621481, + "grad_norm": 195.6241912841797, + "learning_rate": 8.42629630699443e-06, + "loss": 18.7624, + "step": 166860 + }, + { + "epoch": 0.33708795759483184, + "grad_norm": 388.46453857421875, + "learning_rate": 8.426042074260588e-06, + "loss": 21.4288, + "step": 166870 + }, + { + "epoch": 0.33710815822751566, + "grad_norm": 273.54736328125, + "learning_rate": 8.425787824828632e-06, + "loss": 18.4725, + "step": 166880 + }, + { + "epoch": 0.3371283588601995, + "grad_norm": 492.0992736816406, + "learning_rate": 8.425533558699801e-06, + "loss": 17.9134, + "step": 166890 + }, + { + "epoch": 0.3371485594928833, + "grad_norm": 233.55056762695312, + "learning_rate": 8.425279275875336e-06, + "loss": 14.4852, + "step": 166900 + }, + { + "epoch": 0.3371687601255671, + "grad_norm": 429.4356689453125, + "learning_rate": 8.425024976356474e-06, + "loss": 26.6053, + "step": 166910 + }, + { + "epoch": 0.33718896075825094, + "grad_norm": 406.38671875, + "learning_rate": 8.424770660144457e-06, + "loss": 30.5793, + "step": 166920 + }, + { + "epoch": 0.33720916139093476, + "grad_norm": 380.27838134765625, + "learning_rate": 8.424516327240521e-06, + "loss": 12.3561, + "step": 166930 + }, + { + "epoch": 0.3372293620236186, + "grad_norm": 353.7559509277344, + "learning_rate": 8.424261977645909e-06, + "loss": 29.661, + "step": 166940 + }, + { + "epoch": 0.3372495626563024, + "grad_norm": 482.1020812988281, + "learning_rate": 8.424007611361861e-06, + "loss": 16.1973, + "step": 166950 + }, + { + "epoch": 0.3372697632889862, + "grad_norm": 462.35589599609375, + "learning_rate": 8.423753228389612e-06, + "loss": 19.1275, + "step": 166960 + }, + { + "epoch": 0.33728996392167004, + "grad_norm": 440.8437194824219, + "learning_rate": 8.423498828730408e-06, + "loss": 22.6218, + "step": 166970 + }, + { + "epoch": 0.33731016455435386, + "grad_norm": 386.29315185546875, + "learning_rate": 8.423244412385485e-06, + "loss": 20.1392, + "step": 166980 + }, + { + "epoch": 0.3373303651870377, + "grad_norm": 923.9805908203125, + "learning_rate": 8.422989979356084e-06, + "loss": 15.9717, + "step": 166990 + }, + { + "epoch": 0.33735056581972145, + "grad_norm": 0.0, + "learning_rate": 8.422735529643445e-06, + "loss": 9.4694, + "step": 167000 + }, + { + "epoch": 0.33737076645240527, + "grad_norm": 250.28387451171875, + "learning_rate": 8.422481063248806e-06, + "loss": 18.5734, + "step": 167010 + }, + { + "epoch": 0.3373909670850891, + "grad_norm": 399.7904968261719, + "learning_rate": 8.422226580173411e-06, + "loss": 12.4292, + "step": 167020 + }, + { + "epoch": 0.3374111677177729, + "grad_norm": 453.8663024902344, + "learning_rate": 8.4219720804185e-06, + "loss": 26.8315, + "step": 167030 + }, + { + "epoch": 0.33743136835045673, + "grad_norm": 642.8257446289062, + "learning_rate": 8.421717563985312e-06, + "loss": 21.8601, + "step": 167040 + }, + { + "epoch": 0.33745156898314055, + "grad_norm": 743.86669921875, + "learning_rate": 8.421463030875086e-06, + "loss": 37.0026, + "step": 167050 + }, + { + "epoch": 0.33747176961582437, + "grad_norm": 146.4901580810547, + "learning_rate": 8.421208481089064e-06, + "loss": 18.3244, + "step": 167060 + }, + { + "epoch": 0.3374919702485082, + "grad_norm": 168.8124237060547, + "learning_rate": 8.42095391462849e-06, + "loss": 16.8123, + "step": 167070 + }, + { + "epoch": 0.337512170881192, + "grad_norm": 547.0553588867188, + "learning_rate": 8.420699331494597e-06, + "loss": 27.1035, + "step": 167080 + }, + { + "epoch": 0.33753237151387583, + "grad_norm": 545.5162963867188, + "learning_rate": 8.420444731688632e-06, + "loss": 26.4211, + "step": 167090 + }, + { + "epoch": 0.33755257214655965, + "grad_norm": 436.70703125, + "learning_rate": 8.420190115211835e-06, + "loss": 23.3277, + "step": 167100 + }, + { + "epoch": 0.33757277277924347, + "grad_norm": 341.232421875, + "learning_rate": 8.419935482065447e-06, + "loss": 22.7691, + "step": 167110 + }, + { + "epoch": 0.3375929734119273, + "grad_norm": 736.3862915039062, + "learning_rate": 8.419680832250706e-06, + "loss": 26.1081, + "step": 167120 + }, + { + "epoch": 0.33761317404461105, + "grad_norm": 389.464111328125, + "learning_rate": 8.419426165768856e-06, + "loss": 27.3949, + "step": 167130 + }, + { + "epoch": 0.3376333746772949, + "grad_norm": 816.3432006835938, + "learning_rate": 8.41917148262114e-06, + "loss": 25.1434, + "step": 167140 + }, + { + "epoch": 0.3376535753099787, + "grad_norm": 847.6219482421875, + "learning_rate": 8.418916782808795e-06, + "loss": 23.2586, + "step": 167150 + }, + { + "epoch": 0.3376737759426625, + "grad_norm": 1193.47216796875, + "learning_rate": 8.418662066333063e-06, + "loss": 44.3514, + "step": 167160 + }, + { + "epoch": 0.33769397657534633, + "grad_norm": 645.29052734375, + "learning_rate": 8.418407333195189e-06, + "loss": 22.0493, + "step": 167170 + }, + { + "epoch": 0.33771417720803015, + "grad_norm": 561.77783203125, + "learning_rate": 8.418152583396411e-06, + "loss": 22.6123, + "step": 167180 + }, + { + "epoch": 0.337734377840714, + "grad_norm": 269.1815490722656, + "learning_rate": 8.417897816937973e-06, + "loss": 24.2781, + "step": 167190 + }, + { + "epoch": 0.3377545784733978, + "grad_norm": 378.8291320800781, + "learning_rate": 8.417643033821114e-06, + "loss": 14.2875, + "step": 167200 + }, + { + "epoch": 0.3377747791060816, + "grad_norm": 357.581298828125, + "learning_rate": 8.417388234047078e-06, + "loss": 12.0459, + "step": 167210 + }, + { + "epoch": 0.33779497973876543, + "grad_norm": 520.4596557617188, + "learning_rate": 8.417133417617107e-06, + "loss": 26.0304, + "step": 167220 + }, + { + "epoch": 0.33781518037144925, + "grad_norm": 137.63626098632812, + "learning_rate": 8.416878584532442e-06, + "loss": 26.1745, + "step": 167230 + }, + { + "epoch": 0.3378353810041331, + "grad_norm": 124.04597473144531, + "learning_rate": 8.416623734794324e-06, + "loss": 26.5604, + "step": 167240 + }, + { + "epoch": 0.33785558163681684, + "grad_norm": 352.2334899902344, + "learning_rate": 8.416368868403997e-06, + "loss": 21.9152, + "step": 167250 + }, + { + "epoch": 0.33787578226950066, + "grad_norm": 170.1546173095703, + "learning_rate": 8.416113985362702e-06, + "loss": 16.849, + "step": 167260 + }, + { + "epoch": 0.3378959829021845, + "grad_norm": 1349.9058837890625, + "learning_rate": 8.415859085671683e-06, + "loss": 42.5881, + "step": 167270 + }, + { + "epoch": 0.3379161835348683, + "grad_norm": 1348.8409423828125, + "learning_rate": 8.41560416933218e-06, + "loss": 24.9556, + "step": 167280 + }, + { + "epoch": 0.3379363841675521, + "grad_norm": 321.2070007324219, + "learning_rate": 8.415349236345436e-06, + "loss": 19.3961, + "step": 167290 + }, + { + "epoch": 0.33795658480023594, + "grad_norm": 342.5564270019531, + "learning_rate": 8.415094286712694e-06, + "loss": 12.1926, + "step": 167300 + }, + { + "epoch": 0.33797678543291976, + "grad_norm": 356.09503173828125, + "learning_rate": 8.4148393204352e-06, + "loss": 14.2662, + "step": 167310 + }, + { + "epoch": 0.3379969860656036, + "grad_norm": 257.2403869628906, + "learning_rate": 8.41458433751419e-06, + "loss": 14.5469, + "step": 167320 + }, + { + "epoch": 0.3380171866982874, + "grad_norm": 479.4848327636719, + "learning_rate": 8.41432933795091e-06, + "loss": 25.0478, + "step": 167330 + }, + { + "epoch": 0.3380373873309712, + "grad_norm": 347.92242431640625, + "learning_rate": 8.414074321746605e-06, + "loss": 13.6998, + "step": 167340 + }, + { + "epoch": 0.33805758796365504, + "grad_norm": 1818.5078125, + "learning_rate": 8.413819288902514e-06, + "loss": 38.0327, + "step": 167350 + }, + { + "epoch": 0.33807778859633886, + "grad_norm": 291.0877380371094, + "learning_rate": 8.413564239419883e-06, + "loss": 21.6719, + "step": 167360 + }, + { + "epoch": 0.3380979892290227, + "grad_norm": 540.657470703125, + "learning_rate": 8.413309173299954e-06, + "loss": 19.6665, + "step": 167370 + }, + { + "epoch": 0.33811818986170644, + "grad_norm": 268.7587585449219, + "learning_rate": 8.41305409054397e-06, + "loss": 20.5052, + "step": 167380 + }, + { + "epoch": 0.33813839049439026, + "grad_norm": 241.68829345703125, + "learning_rate": 8.412798991153172e-06, + "loss": 20.1494, + "step": 167390 + }, + { + "epoch": 0.3381585911270741, + "grad_norm": 0.0, + "learning_rate": 8.412543875128809e-06, + "loss": 13.7161, + "step": 167400 + }, + { + "epoch": 0.3381787917597579, + "grad_norm": 56.017757415771484, + "learning_rate": 8.412288742472118e-06, + "loss": 23.5154, + "step": 167410 + }, + { + "epoch": 0.3381989923924417, + "grad_norm": 938.0296020507812, + "learning_rate": 8.412033593184348e-06, + "loss": 22.4197, + "step": 167420 + }, + { + "epoch": 0.33821919302512554, + "grad_norm": 551.4971313476562, + "learning_rate": 8.411778427266739e-06, + "loss": 22.8464, + "step": 167430 + }, + { + "epoch": 0.33823939365780936, + "grad_norm": 286.5843505859375, + "learning_rate": 8.411523244720536e-06, + "loss": 21.0896, + "step": 167440 + }, + { + "epoch": 0.3382595942904932, + "grad_norm": 519.6744384765625, + "learning_rate": 8.411268045546984e-06, + "loss": 19.7263, + "step": 167450 + }, + { + "epoch": 0.338279794923177, + "grad_norm": 20.29805564880371, + "learning_rate": 8.411012829747323e-06, + "loss": 11.9788, + "step": 167460 + }, + { + "epoch": 0.3382999955558608, + "grad_norm": 661.3635864257812, + "learning_rate": 8.4107575973228e-06, + "loss": 21.3432, + "step": 167470 + }, + { + "epoch": 0.33832019618854464, + "grad_norm": 490.74407958984375, + "learning_rate": 8.410502348274658e-06, + "loss": 23.1471, + "step": 167480 + }, + { + "epoch": 0.33834039682122846, + "grad_norm": 433.8485412597656, + "learning_rate": 8.410247082604142e-06, + "loss": 28.1603, + "step": 167490 + }, + { + "epoch": 0.3383605974539123, + "grad_norm": 259.97308349609375, + "learning_rate": 8.409991800312493e-06, + "loss": 45.3952, + "step": 167500 + }, + { + "epoch": 0.33838079808659605, + "grad_norm": 306.1461181640625, + "learning_rate": 8.40973650140096e-06, + "loss": 47.7216, + "step": 167510 + }, + { + "epoch": 0.33840099871927987, + "grad_norm": 276.6924133300781, + "learning_rate": 8.409481185870783e-06, + "loss": 16.2209, + "step": 167520 + }, + { + "epoch": 0.3384211993519637, + "grad_norm": 317.9801025390625, + "learning_rate": 8.409225853723209e-06, + "loss": 28.3326, + "step": 167530 + }, + { + "epoch": 0.3384413999846475, + "grad_norm": 219.22189331054688, + "learning_rate": 8.40897050495948e-06, + "loss": 23.7641, + "step": 167540 + }, + { + "epoch": 0.3384616006173313, + "grad_norm": 1.6560962200164795, + "learning_rate": 8.408715139580846e-06, + "loss": 14.9415, + "step": 167550 + }, + { + "epoch": 0.33848180125001515, + "grad_norm": 383.08514404296875, + "learning_rate": 8.408459757588547e-06, + "loss": 13.7631, + "step": 167560 + }, + { + "epoch": 0.33850200188269897, + "grad_norm": 307.9669494628906, + "learning_rate": 8.408204358983826e-06, + "loss": 18.9212, + "step": 167570 + }, + { + "epoch": 0.3385222025153828, + "grad_norm": 147.46270751953125, + "learning_rate": 8.407948943767933e-06, + "loss": 13.1037, + "step": 167580 + }, + { + "epoch": 0.3385424031480666, + "grad_norm": 751.38037109375, + "learning_rate": 8.407693511942107e-06, + "loss": 17.0046, + "step": 167590 + }, + { + "epoch": 0.3385626037807504, + "grad_norm": 276.7906799316406, + "learning_rate": 8.4074380635076e-06, + "loss": 24.6428, + "step": 167600 + }, + { + "epoch": 0.33858280441343425, + "grad_norm": 370.64874267578125, + "learning_rate": 8.40718259846565e-06, + "loss": 26.6888, + "step": 167610 + }, + { + "epoch": 0.33860300504611807, + "grad_norm": 313.6131591796875, + "learning_rate": 8.406927116817507e-06, + "loss": 29.4524, + "step": 167620 + }, + { + "epoch": 0.3386232056788019, + "grad_norm": 305.2022705078125, + "learning_rate": 8.406671618564413e-06, + "loss": 28.7683, + "step": 167630 + }, + { + "epoch": 0.33864340631148565, + "grad_norm": 143.18399047851562, + "learning_rate": 8.406416103707616e-06, + "loss": 15.4658, + "step": 167640 + }, + { + "epoch": 0.33866360694416947, + "grad_norm": 73.9214096069336, + "learning_rate": 8.406160572248361e-06, + "loss": 18.774, + "step": 167650 + }, + { + "epoch": 0.3386838075768533, + "grad_norm": 483.00140380859375, + "learning_rate": 8.40590502418789e-06, + "loss": 25.6484, + "step": 167660 + }, + { + "epoch": 0.3387040082095371, + "grad_norm": 1040.4700927734375, + "learning_rate": 8.405649459527453e-06, + "loss": 26.0978, + "step": 167670 + }, + { + "epoch": 0.33872420884222093, + "grad_norm": 157.07838439941406, + "learning_rate": 8.405393878268292e-06, + "loss": 21.5523, + "step": 167680 + }, + { + "epoch": 0.33874440947490475, + "grad_norm": 10.15843391418457, + "learning_rate": 8.405138280411656e-06, + "loss": 18.6426, + "step": 167690 + }, + { + "epoch": 0.33876461010758857, + "grad_norm": 652.705810546875, + "learning_rate": 8.404882665958788e-06, + "loss": 18.274, + "step": 167700 + }, + { + "epoch": 0.3387848107402724, + "grad_norm": 440.4273376464844, + "learning_rate": 8.404627034910934e-06, + "loss": 18.0438, + "step": 167710 + }, + { + "epoch": 0.3388050113729562, + "grad_norm": 579.0615234375, + "learning_rate": 8.404371387269341e-06, + "loss": 22.8465, + "step": 167720 + }, + { + "epoch": 0.33882521200564003, + "grad_norm": 311.01251220703125, + "learning_rate": 8.404115723035256e-06, + "loss": 19.5474, + "step": 167730 + }, + { + "epoch": 0.33884541263832385, + "grad_norm": 482.511474609375, + "learning_rate": 8.403860042209923e-06, + "loss": 23.372, + "step": 167740 + }, + { + "epoch": 0.33886561327100767, + "grad_norm": 487.533935546875, + "learning_rate": 8.40360434479459e-06, + "loss": 38.8385, + "step": 167750 + }, + { + "epoch": 0.3388858139036915, + "grad_norm": 943.122314453125, + "learning_rate": 8.4033486307905e-06, + "loss": 23.2046, + "step": 167760 + }, + { + "epoch": 0.33890601453637526, + "grad_norm": 27.917802810668945, + "learning_rate": 8.403092900198904e-06, + "loss": 14.9173, + "step": 167770 + }, + { + "epoch": 0.3389262151690591, + "grad_norm": 1000.3346557617188, + "learning_rate": 8.402837153021047e-06, + "loss": 18.9368, + "step": 167780 + }, + { + "epoch": 0.3389464158017429, + "grad_norm": 476.365234375, + "learning_rate": 8.402581389258171e-06, + "loss": 19.8223, + "step": 167790 + }, + { + "epoch": 0.3389666164344267, + "grad_norm": 482.34063720703125, + "learning_rate": 8.402325608911527e-06, + "loss": 21.4722, + "step": 167800 + }, + { + "epoch": 0.33898681706711054, + "grad_norm": 1219.3975830078125, + "learning_rate": 8.40206981198236e-06, + "loss": 30.1687, + "step": 167810 + }, + { + "epoch": 0.33900701769979436, + "grad_norm": 369.8954772949219, + "learning_rate": 8.40181399847192e-06, + "loss": 38.7512, + "step": 167820 + }, + { + "epoch": 0.3390272183324782, + "grad_norm": 641.1861572265625, + "learning_rate": 8.40155816838145e-06, + "loss": 26.7039, + "step": 167830 + }, + { + "epoch": 0.339047418965162, + "grad_norm": 447.89642333984375, + "learning_rate": 8.401302321712198e-06, + "loss": 20.2609, + "step": 167840 + }, + { + "epoch": 0.3390676195978458, + "grad_norm": 528.932373046875, + "learning_rate": 8.40104645846541e-06, + "loss": 8.231, + "step": 167850 + }, + { + "epoch": 0.33908782023052964, + "grad_norm": 164.8485870361328, + "learning_rate": 8.400790578642333e-06, + "loss": 11.0415, + "step": 167860 + }, + { + "epoch": 0.33910802086321346, + "grad_norm": 832.0770263671875, + "learning_rate": 8.400534682244217e-06, + "loss": 31.5946, + "step": 167870 + }, + { + "epoch": 0.3391282214958973, + "grad_norm": 646.8993530273438, + "learning_rate": 8.400278769272307e-06, + "loss": 36.4199, + "step": 167880 + }, + { + "epoch": 0.33914842212858104, + "grad_norm": 182.03268432617188, + "learning_rate": 8.400022839727853e-06, + "loss": 13.3391, + "step": 167890 + }, + { + "epoch": 0.33916862276126486, + "grad_norm": 550.815673828125, + "learning_rate": 8.399766893612096e-06, + "loss": 12.5295, + "step": 167900 + }, + { + "epoch": 0.3391888233939487, + "grad_norm": 277.0923156738281, + "learning_rate": 8.399510930926291e-06, + "loss": 25.4, + "step": 167910 + }, + { + "epoch": 0.3392090240266325, + "grad_norm": 38.229129791259766, + "learning_rate": 8.399254951671681e-06, + "loss": 23.5104, + "step": 167920 + }, + { + "epoch": 0.3392292246593163, + "grad_norm": 428.00860595703125, + "learning_rate": 8.398998955849513e-06, + "loss": 50.9553, + "step": 167930 + }, + { + "epoch": 0.33924942529200014, + "grad_norm": 448.3719787597656, + "learning_rate": 8.398742943461038e-06, + "loss": 26.2329, + "step": 167940 + }, + { + "epoch": 0.33926962592468396, + "grad_norm": 747.2113037109375, + "learning_rate": 8.398486914507501e-06, + "loss": 23.6032, + "step": 167950 + }, + { + "epoch": 0.3392898265573678, + "grad_norm": 437.9092712402344, + "learning_rate": 8.398230868990151e-06, + "loss": 26.8879, + "step": 167960 + }, + { + "epoch": 0.3393100271900516, + "grad_norm": 383.5091247558594, + "learning_rate": 8.397974806910237e-06, + "loss": 26.357, + "step": 167970 + }, + { + "epoch": 0.3393302278227354, + "grad_norm": 636.2250366210938, + "learning_rate": 8.397718728269006e-06, + "loss": 17.1569, + "step": 167980 + }, + { + "epoch": 0.33935042845541924, + "grad_norm": 41.08114242553711, + "learning_rate": 8.397462633067705e-06, + "loss": 24.3474, + "step": 167990 + }, + { + "epoch": 0.33937062908810306, + "grad_norm": 103.65257263183594, + "learning_rate": 8.397206521307584e-06, + "loss": 21.3358, + "step": 168000 + }, + { + "epoch": 0.3393908297207869, + "grad_norm": 286.0574951171875, + "learning_rate": 8.396950392989888e-06, + "loss": 15.17, + "step": 168010 + }, + { + "epoch": 0.33941103035347064, + "grad_norm": 19.315410614013672, + "learning_rate": 8.396694248115871e-06, + "loss": 20.8692, + "step": 168020 + }, + { + "epoch": 0.33943123098615446, + "grad_norm": 479.29913330078125, + "learning_rate": 8.396438086686779e-06, + "loss": 43.0148, + "step": 168030 + }, + { + "epoch": 0.3394514316188383, + "grad_norm": 248.98126220703125, + "learning_rate": 8.396181908703855e-06, + "loss": 13.8845, + "step": 168040 + }, + { + "epoch": 0.3394716322515221, + "grad_norm": 78.33552551269531, + "learning_rate": 8.395925714168356e-06, + "loss": 50.4541, + "step": 168050 + }, + { + "epoch": 0.3394918328842059, + "grad_norm": 525.9219970703125, + "learning_rate": 8.395669503081524e-06, + "loss": 14.7674, + "step": 168060 + }, + { + "epoch": 0.33951203351688974, + "grad_norm": 302.1783447265625, + "learning_rate": 8.395413275444614e-06, + "loss": 16.9842, + "step": 168070 + }, + { + "epoch": 0.33953223414957356, + "grad_norm": 434.3306579589844, + "learning_rate": 8.39515703125887e-06, + "loss": 26.1344, + "step": 168080 + }, + { + "epoch": 0.3395524347822574, + "grad_norm": 178.66641235351562, + "learning_rate": 8.394900770525543e-06, + "loss": 17.0307, + "step": 168090 + }, + { + "epoch": 0.3395726354149412, + "grad_norm": 508.07611083984375, + "learning_rate": 8.394644493245882e-06, + "loss": 26.2574, + "step": 168100 + }, + { + "epoch": 0.339592836047625, + "grad_norm": 185.32598876953125, + "learning_rate": 8.394388199421133e-06, + "loss": 9.7913, + "step": 168110 + }, + { + "epoch": 0.33961303668030884, + "grad_norm": 689.20068359375, + "learning_rate": 8.39413188905255e-06, + "loss": 29.8605, + "step": 168120 + }, + { + "epoch": 0.33963323731299266, + "grad_norm": 135.01458740234375, + "learning_rate": 8.393875562141379e-06, + "loss": 16.6367, + "step": 168130 + }, + { + "epoch": 0.3396534379456765, + "grad_norm": 391.94451904296875, + "learning_rate": 8.39361921868887e-06, + "loss": 20.4665, + "step": 168140 + }, + { + "epoch": 0.33967363857836025, + "grad_norm": 283.5951843261719, + "learning_rate": 8.393362858696272e-06, + "loss": 11.1001, + "step": 168150 + }, + { + "epoch": 0.33969383921104407, + "grad_norm": 760.8709106445312, + "learning_rate": 8.393106482164836e-06, + "loss": 30.6613, + "step": 168160 + }, + { + "epoch": 0.3397140398437279, + "grad_norm": 733.862060546875, + "learning_rate": 8.39285008909581e-06, + "loss": 26.6271, + "step": 168170 + }, + { + "epoch": 0.3397342404764117, + "grad_norm": 348.95367431640625, + "learning_rate": 8.392593679490444e-06, + "loss": 16.9305, + "step": 168180 + }, + { + "epoch": 0.33975444110909553, + "grad_norm": 204.7607421875, + "learning_rate": 8.392337253349988e-06, + "loss": 9.565, + "step": 168190 + }, + { + "epoch": 0.33977464174177935, + "grad_norm": 272.72021484375, + "learning_rate": 8.392080810675692e-06, + "loss": 28.3992, + "step": 168200 + }, + { + "epoch": 0.33979484237446317, + "grad_norm": 517.2645874023438, + "learning_rate": 8.391824351468805e-06, + "loss": 33.936, + "step": 168210 + }, + { + "epoch": 0.339815043007147, + "grad_norm": 421.4093322753906, + "learning_rate": 8.391567875730577e-06, + "loss": 21.1351, + "step": 168220 + }, + { + "epoch": 0.3398352436398308, + "grad_norm": 401.4591064453125, + "learning_rate": 8.39131138346226e-06, + "loss": 23.4746, + "step": 168230 + }, + { + "epoch": 0.33985544427251463, + "grad_norm": 363.4436340332031, + "learning_rate": 8.391054874665103e-06, + "loss": 21.9265, + "step": 168240 + }, + { + "epoch": 0.33987564490519845, + "grad_norm": 158.54002380371094, + "learning_rate": 8.390798349340354e-06, + "loss": 15.752, + "step": 168250 + }, + { + "epoch": 0.33989584553788227, + "grad_norm": 442.8247375488281, + "learning_rate": 8.390541807489266e-06, + "loss": 27.6127, + "step": 168260 + }, + { + "epoch": 0.3399160461705661, + "grad_norm": 410.08660888671875, + "learning_rate": 8.390285249113088e-06, + "loss": 17.5862, + "step": 168270 + }, + { + "epoch": 0.33993624680324985, + "grad_norm": 485.7059631347656, + "learning_rate": 8.390028674213072e-06, + "loss": 17.1023, + "step": 168280 + }, + { + "epoch": 0.3399564474359337, + "grad_norm": 242.07127380371094, + "learning_rate": 8.389772082790466e-06, + "loss": 15.6061, + "step": 168290 + }, + { + "epoch": 0.3399766480686175, + "grad_norm": 182.19862365722656, + "learning_rate": 8.389515474846522e-06, + "loss": 23.0791, + "step": 168300 + }, + { + "epoch": 0.3399968487013013, + "grad_norm": 96.41129302978516, + "learning_rate": 8.389258850382491e-06, + "loss": 16.7356, + "step": 168310 + }, + { + "epoch": 0.34001704933398513, + "grad_norm": 402.6282958984375, + "learning_rate": 8.389002209399625e-06, + "loss": 17.2682, + "step": 168320 + }, + { + "epoch": 0.34003724996666895, + "grad_norm": 400.3856506347656, + "learning_rate": 8.388745551899172e-06, + "loss": 14.5012, + "step": 168330 + }, + { + "epoch": 0.3400574505993528, + "grad_norm": 468.0992126464844, + "learning_rate": 8.388488877882383e-06, + "loss": 20.7429, + "step": 168340 + }, + { + "epoch": 0.3400776512320366, + "grad_norm": 342.0508117675781, + "learning_rate": 8.388232187350513e-06, + "loss": 21.1195, + "step": 168350 + }, + { + "epoch": 0.3400978518647204, + "grad_norm": 225.8852996826172, + "learning_rate": 8.387975480304808e-06, + "loss": 20.6178, + "step": 168360 + }, + { + "epoch": 0.34011805249740423, + "grad_norm": 150.79534912109375, + "learning_rate": 8.387718756746522e-06, + "loss": 25.234, + "step": 168370 + }, + { + "epoch": 0.34013825313008805, + "grad_norm": 692.2120361328125, + "learning_rate": 8.387462016676906e-06, + "loss": 25.3535, + "step": 168380 + }, + { + "epoch": 0.3401584537627719, + "grad_norm": 92.25536346435547, + "learning_rate": 8.387205260097211e-06, + "loss": 27.1206, + "step": 168390 + }, + { + "epoch": 0.3401786543954557, + "grad_norm": 384.4954833984375, + "learning_rate": 8.386948487008687e-06, + "loss": 22.4686, + "step": 168400 + }, + { + "epoch": 0.34019885502813946, + "grad_norm": 314.7448425292969, + "learning_rate": 8.386691697412588e-06, + "loss": 20.4522, + "step": 168410 + }, + { + "epoch": 0.3402190556608233, + "grad_norm": 144.3899688720703, + "learning_rate": 8.386434891310164e-06, + "loss": 24.6264, + "step": 168420 + }, + { + "epoch": 0.3402392562935071, + "grad_norm": 385.3217468261719, + "learning_rate": 8.386178068702669e-06, + "loss": 24.4465, + "step": 168430 + }, + { + "epoch": 0.3402594569261909, + "grad_norm": 705.103759765625, + "learning_rate": 8.385921229591351e-06, + "loss": 17.2236, + "step": 168440 + }, + { + "epoch": 0.34027965755887474, + "grad_norm": 669.4501953125, + "learning_rate": 8.385664373977462e-06, + "loss": 20.0496, + "step": 168450 + }, + { + "epoch": 0.34029985819155856, + "grad_norm": 634.993408203125, + "learning_rate": 8.385407501862258e-06, + "loss": 24.514, + "step": 168460 + }, + { + "epoch": 0.3403200588242424, + "grad_norm": 399.2812194824219, + "learning_rate": 8.385150613246989e-06, + "loss": 10.7176, + "step": 168470 + }, + { + "epoch": 0.3403402594569262, + "grad_norm": 661.9140625, + "learning_rate": 8.384893708132904e-06, + "loss": 19.4782, + "step": 168480 + }, + { + "epoch": 0.34036046008961, + "grad_norm": 349.7503967285156, + "learning_rate": 8.384636786521259e-06, + "loss": 18.4263, + "step": 168490 + }, + { + "epoch": 0.34038066072229384, + "grad_norm": 115.83380889892578, + "learning_rate": 8.384379848413304e-06, + "loss": 17.163, + "step": 168500 + }, + { + "epoch": 0.34040086135497766, + "grad_norm": 324.6518859863281, + "learning_rate": 8.384122893810294e-06, + "loss": 15.6572, + "step": 168510 + }, + { + "epoch": 0.3404210619876615, + "grad_norm": 328.8683776855469, + "learning_rate": 8.383865922713478e-06, + "loss": 49.4089, + "step": 168520 + }, + { + "epoch": 0.34044126262034524, + "grad_norm": 631.0416870117188, + "learning_rate": 8.383608935124109e-06, + "loss": 18.9201, + "step": 168530 + }, + { + "epoch": 0.34046146325302906, + "grad_norm": 533.4650268554688, + "learning_rate": 8.383351931043441e-06, + "loss": 16.3636, + "step": 168540 + }, + { + "epoch": 0.3404816638857129, + "grad_norm": 6.021905899047852, + "learning_rate": 8.383094910472728e-06, + "loss": 12.5463, + "step": 168550 + }, + { + "epoch": 0.3405018645183967, + "grad_norm": 1174.9256591796875, + "learning_rate": 8.38283787341322e-06, + "loss": 25.0581, + "step": 168560 + }, + { + "epoch": 0.3405220651510805, + "grad_norm": 306.4240417480469, + "learning_rate": 8.382580819866168e-06, + "loss": 12.3355, + "step": 168570 + }, + { + "epoch": 0.34054226578376434, + "grad_norm": 356.0873107910156, + "learning_rate": 8.38232374983283e-06, + "loss": 35.7938, + "step": 168580 + }, + { + "epoch": 0.34056246641644816, + "grad_norm": 685.3045654296875, + "learning_rate": 8.382066663314455e-06, + "loss": 43.9767, + "step": 168590 + }, + { + "epoch": 0.340582667049132, + "grad_norm": 644.2220458984375, + "learning_rate": 8.381809560312298e-06, + "loss": 29.4125, + "step": 168600 + }, + { + "epoch": 0.3406028676818158, + "grad_norm": 187.02745056152344, + "learning_rate": 8.381552440827611e-06, + "loss": 31.9015, + "step": 168610 + }, + { + "epoch": 0.3406230683144996, + "grad_norm": 68.95345306396484, + "learning_rate": 8.381295304861647e-06, + "loss": 33.2846, + "step": 168620 + }, + { + "epoch": 0.34064326894718344, + "grad_norm": 502.47271728515625, + "learning_rate": 8.381038152415661e-06, + "loss": 29.9597, + "step": 168630 + }, + { + "epoch": 0.34066346957986726, + "grad_norm": 481.96142578125, + "learning_rate": 8.380780983490903e-06, + "loss": 24.2839, + "step": 168640 + }, + { + "epoch": 0.3406836702125511, + "grad_norm": 258.9666748046875, + "learning_rate": 8.38052379808863e-06, + "loss": 32.6194, + "step": 168650 + }, + { + "epoch": 0.34070387084523485, + "grad_norm": 320.35662841796875, + "learning_rate": 8.380266596210095e-06, + "loss": 32.8204, + "step": 168660 + }, + { + "epoch": 0.34072407147791867, + "grad_norm": 619.9447631835938, + "learning_rate": 8.380009377856548e-06, + "loss": 21.3143, + "step": 168670 + }, + { + "epoch": 0.3407442721106025, + "grad_norm": 626.5851440429688, + "learning_rate": 8.379752143029248e-06, + "loss": 21.0067, + "step": 168680 + }, + { + "epoch": 0.3407644727432863, + "grad_norm": 4.577370643615723, + "learning_rate": 8.379494891729445e-06, + "loss": 26.3092, + "step": 168690 + }, + { + "epoch": 0.3407846733759701, + "grad_norm": 627.0779418945312, + "learning_rate": 8.379237623958393e-06, + "loss": 19.199, + "step": 168700 + }, + { + "epoch": 0.34080487400865395, + "grad_norm": 334.8100891113281, + "learning_rate": 8.378980339717348e-06, + "loss": 19.6408, + "step": 168710 + }, + { + "epoch": 0.34082507464133777, + "grad_norm": 706.650634765625, + "learning_rate": 8.37872303900756e-06, + "loss": 31.1062, + "step": 168720 + }, + { + "epoch": 0.3408452752740216, + "grad_norm": 265.525390625, + "learning_rate": 8.378465721830289e-06, + "loss": 16.0674, + "step": 168730 + }, + { + "epoch": 0.3408654759067054, + "grad_norm": 197.72540283203125, + "learning_rate": 8.378208388186784e-06, + "loss": 50.1339, + "step": 168740 + }, + { + "epoch": 0.3408856765393892, + "grad_norm": 260.6598815917969, + "learning_rate": 8.377951038078303e-06, + "loss": 34.7109, + "step": 168750 + }, + { + "epoch": 0.34090587717207305, + "grad_norm": 24.389583587646484, + "learning_rate": 8.377693671506094e-06, + "loss": 14.9343, + "step": 168760 + }, + { + "epoch": 0.34092607780475687, + "grad_norm": 325.55029296875, + "learning_rate": 8.37743628847142e-06, + "loss": 22.3212, + "step": 168770 + }, + { + "epoch": 0.3409462784374407, + "grad_norm": 335.56439208984375, + "learning_rate": 8.37717888897553e-06, + "loss": 16.7277, + "step": 168780 + }, + { + "epoch": 0.34096647907012445, + "grad_norm": 328.0765075683594, + "learning_rate": 8.37692147301968e-06, + "loss": 15.7982, + "step": 168790 + }, + { + "epoch": 0.34098667970280827, + "grad_norm": 614.3564453125, + "learning_rate": 8.376664040605122e-06, + "loss": 21.111, + "step": 168800 + }, + { + "epoch": 0.3410068803354921, + "grad_norm": 224.3957061767578, + "learning_rate": 8.376406591733115e-06, + "loss": 18.3861, + "step": 168810 + }, + { + "epoch": 0.3410270809681759, + "grad_norm": 324.8829650878906, + "learning_rate": 8.37614912640491e-06, + "loss": 32.3365, + "step": 168820 + }, + { + "epoch": 0.34104728160085973, + "grad_norm": 411.1203918457031, + "learning_rate": 8.375891644621767e-06, + "loss": 11.3225, + "step": 168830 + }, + { + "epoch": 0.34106748223354355, + "grad_norm": 236.78245544433594, + "learning_rate": 8.375634146384937e-06, + "loss": 15.5481, + "step": 168840 + }, + { + "epoch": 0.34108768286622737, + "grad_norm": 6.189517498016357, + "learning_rate": 8.375376631695673e-06, + "loss": 17.8311, + "step": 168850 + }, + { + "epoch": 0.3411078834989112, + "grad_norm": 467.2071228027344, + "learning_rate": 8.375119100555234e-06, + "loss": 17.9199, + "step": 168860 + }, + { + "epoch": 0.341128084131595, + "grad_norm": 391.2217712402344, + "learning_rate": 8.374861552964875e-06, + "loss": 16.9528, + "step": 168870 + }, + { + "epoch": 0.34114828476427883, + "grad_norm": 188.17681884765625, + "learning_rate": 8.374603988925848e-06, + "loss": 15.361, + "step": 168880 + }, + { + "epoch": 0.34116848539696265, + "grad_norm": 599.74365234375, + "learning_rate": 8.37434640843941e-06, + "loss": 15.0419, + "step": 168890 + }, + { + "epoch": 0.34118868602964647, + "grad_norm": 324.143798828125, + "learning_rate": 8.374088811506819e-06, + "loss": 23.422, + "step": 168900 + }, + { + "epoch": 0.3412088866623303, + "grad_norm": 772.922607421875, + "learning_rate": 8.373831198129327e-06, + "loss": 15.1421, + "step": 168910 + }, + { + "epoch": 0.34122908729501406, + "grad_norm": 435.1819152832031, + "learning_rate": 8.373573568308193e-06, + "loss": 24.3665, + "step": 168920 + }, + { + "epoch": 0.3412492879276979, + "grad_norm": 1819.421630859375, + "learning_rate": 8.37331592204467e-06, + "loss": 31.2344, + "step": 168930 + }, + { + "epoch": 0.3412694885603817, + "grad_norm": 21.15869140625, + "learning_rate": 8.373058259340012e-06, + "loss": 27.9389, + "step": 168940 + }, + { + "epoch": 0.3412896891930655, + "grad_norm": 523.2466430664062, + "learning_rate": 8.372800580195479e-06, + "loss": 23.8264, + "step": 168950 + }, + { + "epoch": 0.34130988982574934, + "grad_norm": 307.0376892089844, + "learning_rate": 8.372542884612324e-06, + "loss": 17.7892, + "step": 168960 + }, + { + "epoch": 0.34133009045843316, + "grad_norm": 209.58401489257812, + "learning_rate": 8.372285172591806e-06, + "loss": 24.8336, + "step": 168970 + }, + { + "epoch": 0.341350291091117, + "grad_norm": 416.9378662109375, + "learning_rate": 8.372027444135176e-06, + "loss": 25.3283, + "step": 168980 + }, + { + "epoch": 0.3413704917238008, + "grad_norm": 66.7175064086914, + "learning_rate": 8.371769699243694e-06, + "loss": 14.9726, + "step": 168990 + }, + { + "epoch": 0.3413906923564846, + "grad_norm": 1017.1961059570312, + "learning_rate": 8.371511937918616e-06, + "loss": 26.7788, + "step": 169000 + }, + { + "epoch": 0.34141089298916844, + "grad_norm": 16.84259033203125, + "learning_rate": 8.3712541601612e-06, + "loss": 9.0483, + "step": 169010 + }, + { + "epoch": 0.34143109362185226, + "grad_norm": 278.5416259765625, + "learning_rate": 8.370996365972698e-06, + "loss": 19.8899, + "step": 169020 + }, + { + "epoch": 0.3414512942545361, + "grad_norm": 206.6166534423828, + "learning_rate": 8.37073855535437e-06, + "loss": 19.9386, + "step": 169030 + }, + { + "epoch": 0.3414714948872199, + "grad_norm": 321.22039794921875, + "learning_rate": 8.370480728307469e-06, + "loss": 21.6634, + "step": 169040 + }, + { + "epoch": 0.34149169551990366, + "grad_norm": 167.9938201904297, + "learning_rate": 8.370222884833255e-06, + "loss": 13.9485, + "step": 169050 + }, + { + "epoch": 0.3415118961525875, + "grad_norm": 753.5398559570312, + "learning_rate": 8.369965024932983e-06, + "loss": 33.6936, + "step": 169060 + }, + { + "epoch": 0.3415320967852713, + "grad_norm": 22.527313232421875, + "learning_rate": 8.36970714860791e-06, + "loss": 20.0193, + "step": 169070 + }, + { + "epoch": 0.3415522974179551, + "grad_norm": 152.59750366210938, + "learning_rate": 8.369449255859294e-06, + "loss": 26.9117, + "step": 169080 + }, + { + "epoch": 0.34157249805063894, + "grad_norm": 450.9139099121094, + "learning_rate": 8.36919134668839e-06, + "loss": 9.5488, + "step": 169090 + }, + { + "epoch": 0.34159269868332276, + "grad_norm": 136.21226501464844, + "learning_rate": 8.368933421096454e-06, + "loss": 27.497, + "step": 169100 + }, + { + "epoch": 0.3416128993160066, + "grad_norm": 655.6229858398438, + "learning_rate": 8.368675479084749e-06, + "loss": 37.529, + "step": 169110 + }, + { + "epoch": 0.3416330999486904, + "grad_norm": 111.01313018798828, + "learning_rate": 8.368417520654526e-06, + "loss": 15.1597, + "step": 169120 + }, + { + "epoch": 0.3416533005813742, + "grad_norm": 258.2264404296875, + "learning_rate": 8.368159545807047e-06, + "loss": 21.8688, + "step": 169130 + }, + { + "epoch": 0.34167350121405804, + "grad_norm": 248.80821228027344, + "learning_rate": 8.367901554543563e-06, + "loss": 14.1267, + "step": 169140 + }, + { + "epoch": 0.34169370184674186, + "grad_norm": 0.0, + "learning_rate": 8.367643546865339e-06, + "loss": 16.2312, + "step": 169150 + }, + { + "epoch": 0.3417139024794257, + "grad_norm": 121.87797546386719, + "learning_rate": 8.367385522773625e-06, + "loss": 14.3055, + "step": 169160 + }, + { + "epoch": 0.34173410311210944, + "grad_norm": 494.537841796875, + "learning_rate": 8.367127482269686e-06, + "loss": 20.4748, + "step": 169170 + }, + { + "epoch": 0.34175430374479326, + "grad_norm": 254.8951873779297, + "learning_rate": 8.366869425354774e-06, + "loss": 12.5494, + "step": 169180 + }, + { + "epoch": 0.3417745043774771, + "grad_norm": 348.20233154296875, + "learning_rate": 8.36661135203015e-06, + "loss": 12.4797, + "step": 169190 + }, + { + "epoch": 0.3417947050101609, + "grad_norm": 871.5491333007812, + "learning_rate": 8.366353262297069e-06, + "loss": 29.6144, + "step": 169200 + }, + { + "epoch": 0.3418149056428447, + "grad_norm": 497.8019104003906, + "learning_rate": 8.366095156156793e-06, + "loss": 22.8318, + "step": 169210 + }, + { + "epoch": 0.34183510627552854, + "grad_norm": 623.549072265625, + "learning_rate": 8.365837033610576e-06, + "loss": 38.8452, + "step": 169220 + }, + { + "epoch": 0.34185530690821236, + "grad_norm": 638.8836669921875, + "learning_rate": 8.365578894659677e-06, + "loss": 14.1556, + "step": 169230 + }, + { + "epoch": 0.3418755075408962, + "grad_norm": 375.0995178222656, + "learning_rate": 8.365320739305355e-06, + "loss": 19.7537, + "step": 169240 + }, + { + "epoch": 0.34189570817358, + "grad_norm": 932.085205078125, + "learning_rate": 8.365062567548868e-06, + "loss": 26.9566, + "step": 169250 + }, + { + "epoch": 0.3419159088062638, + "grad_norm": 45.37674331665039, + "learning_rate": 8.364804379391474e-06, + "loss": 33.6483, + "step": 169260 + }, + { + "epoch": 0.34193610943894764, + "grad_norm": 479.5264892578125, + "learning_rate": 8.364546174834431e-06, + "loss": 36.6652, + "step": 169270 + }, + { + "epoch": 0.34195631007163146, + "grad_norm": 696.8583374023438, + "learning_rate": 8.364287953879e-06, + "loss": 18.2568, + "step": 169280 + }, + { + "epoch": 0.3419765107043153, + "grad_norm": 393.2134094238281, + "learning_rate": 8.364029716526437e-06, + "loss": 13.7931, + "step": 169290 + }, + { + "epoch": 0.34199671133699905, + "grad_norm": 101.37198638916016, + "learning_rate": 8.363771462778e-06, + "loss": 29.9042, + "step": 169300 + }, + { + "epoch": 0.34201691196968287, + "grad_norm": 689.8870239257812, + "learning_rate": 8.36351319263495e-06, + "loss": 22.3282, + "step": 169310 + }, + { + "epoch": 0.3420371126023667, + "grad_norm": 436.01104736328125, + "learning_rate": 8.363254906098543e-06, + "loss": 24.0667, + "step": 169320 + }, + { + "epoch": 0.3420573132350505, + "grad_norm": 552.0741577148438, + "learning_rate": 8.36299660317004e-06, + "loss": 20.0332, + "step": 169330 + }, + { + "epoch": 0.34207751386773433, + "grad_norm": 387.7598876953125, + "learning_rate": 8.3627382838507e-06, + "loss": 28.4696, + "step": 169340 + }, + { + "epoch": 0.34209771450041815, + "grad_norm": 443.9676818847656, + "learning_rate": 8.36247994814178e-06, + "loss": 14.1799, + "step": 169350 + }, + { + "epoch": 0.34211791513310197, + "grad_norm": 539.8017578125, + "learning_rate": 8.362221596044542e-06, + "loss": 24.8048, + "step": 169360 + }, + { + "epoch": 0.3421381157657858, + "grad_norm": 235.52764892578125, + "learning_rate": 8.361963227560244e-06, + "loss": 23.0377, + "step": 169370 + }, + { + "epoch": 0.3421583163984696, + "grad_norm": 80.1401596069336, + "learning_rate": 8.361704842690144e-06, + "loss": 16.5207, + "step": 169380 + }, + { + "epoch": 0.34217851703115343, + "grad_norm": 561.2789916992188, + "learning_rate": 8.361446441435503e-06, + "loss": 26.294, + "step": 169390 + }, + { + "epoch": 0.34219871766383725, + "grad_norm": 302.1050720214844, + "learning_rate": 8.361188023797581e-06, + "loss": 19.0365, + "step": 169400 + }, + { + "epoch": 0.34221891829652107, + "grad_norm": 692.5919799804688, + "learning_rate": 8.360929589777634e-06, + "loss": 29.2574, + "step": 169410 + }, + { + "epoch": 0.3422391189292049, + "grad_norm": 690.12646484375, + "learning_rate": 8.360671139376925e-06, + "loss": 13.6711, + "step": 169420 + }, + { + "epoch": 0.34225931956188865, + "grad_norm": 266.81707763671875, + "learning_rate": 8.360412672596712e-06, + "loss": 20.4914, + "step": 169430 + }, + { + "epoch": 0.3422795201945725, + "grad_norm": 605.1412353515625, + "learning_rate": 8.360154189438257e-06, + "loss": 18.8189, + "step": 169440 + }, + { + "epoch": 0.3422997208272563, + "grad_norm": 182.8216094970703, + "learning_rate": 8.359895689902815e-06, + "loss": 22.1485, + "step": 169450 + }, + { + "epoch": 0.3423199214599401, + "grad_norm": 551.4183959960938, + "learning_rate": 8.35963717399165e-06, + "loss": 15.8574, + "step": 169460 + }, + { + "epoch": 0.34234012209262393, + "grad_norm": 714.9756469726562, + "learning_rate": 8.359378641706021e-06, + "loss": 23.5323, + "step": 169470 + }, + { + "epoch": 0.34236032272530775, + "grad_norm": 460.0231018066406, + "learning_rate": 8.35912009304719e-06, + "loss": 19.5928, + "step": 169480 + }, + { + "epoch": 0.3423805233579916, + "grad_norm": 262.87603759765625, + "learning_rate": 8.358861528016413e-06, + "loss": 35.9306, + "step": 169490 + }, + { + "epoch": 0.3424007239906754, + "grad_norm": 171.7864990234375, + "learning_rate": 8.358602946614952e-06, + "loss": 24.3828, + "step": 169500 + }, + { + "epoch": 0.3424209246233592, + "grad_norm": 451.3740234375, + "learning_rate": 8.358344348844068e-06, + "loss": 18.9796, + "step": 169510 + }, + { + "epoch": 0.34244112525604303, + "grad_norm": 72.87115478515625, + "learning_rate": 8.358085734705021e-06, + "loss": 18.0904, + "step": 169520 + }, + { + "epoch": 0.34246132588872685, + "grad_norm": 197.48690795898438, + "learning_rate": 8.357827104199073e-06, + "loss": 18.9026, + "step": 169530 + }, + { + "epoch": 0.3424815265214107, + "grad_norm": 776.4761962890625, + "learning_rate": 8.357568457327481e-06, + "loss": 26.4053, + "step": 169540 + }, + { + "epoch": 0.3425017271540945, + "grad_norm": 186.80355834960938, + "learning_rate": 8.357309794091508e-06, + "loss": 21.6301, + "step": 169550 + }, + { + "epoch": 0.34252192778677826, + "grad_norm": 293.9378662109375, + "learning_rate": 8.357051114492414e-06, + "loss": 10.8797, + "step": 169560 + }, + { + "epoch": 0.3425421284194621, + "grad_norm": 304.8777160644531, + "learning_rate": 8.35679241853146e-06, + "loss": 15.5547, + "step": 169570 + }, + { + "epoch": 0.3425623290521459, + "grad_norm": 469.260498046875, + "learning_rate": 8.356533706209907e-06, + "loss": 18.2517, + "step": 169580 + }, + { + "epoch": 0.3425825296848297, + "grad_norm": 459.9218444824219, + "learning_rate": 8.356274977529015e-06, + "loss": 21.3798, + "step": 169590 + }, + { + "epoch": 0.34260273031751354, + "grad_norm": 429.17529296875, + "learning_rate": 8.356016232490047e-06, + "loss": 27.0815, + "step": 169600 + }, + { + "epoch": 0.34262293095019736, + "grad_norm": 239.51206970214844, + "learning_rate": 8.355757471094263e-06, + "loss": 13.7177, + "step": 169610 + }, + { + "epoch": 0.3426431315828812, + "grad_norm": 382.5251159667969, + "learning_rate": 8.355498693342925e-06, + "loss": 25.9149, + "step": 169620 + }, + { + "epoch": 0.342663332215565, + "grad_norm": 422.1537780761719, + "learning_rate": 8.355239899237291e-06, + "loss": 24.0822, + "step": 169630 + }, + { + "epoch": 0.3426835328482488, + "grad_norm": 0.0, + "learning_rate": 8.354981088778626e-06, + "loss": 15.7907, + "step": 169640 + }, + { + "epoch": 0.34270373348093264, + "grad_norm": 763.3910522460938, + "learning_rate": 8.35472226196819e-06, + "loss": 22.5954, + "step": 169650 + }, + { + "epoch": 0.34272393411361646, + "grad_norm": 506.7947998046875, + "learning_rate": 8.354463418807245e-06, + "loss": 17.1152, + "step": 169660 + }, + { + "epoch": 0.3427441347463003, + "grad_norm": 408.3777770996094, + "learning_rate": 8.35420455929705e-06, + "loss": 14.6228, + "step": 169670 + }, + { + "epoch": 0.3427643353789841, + "grad_norm": 161.3364715576172, + "learning_rate": 8.35394568343887e-06, + "loss": 24.6751, + "step": 169680 + }, + { + "epoch": 0.34278453601166786, + "grad_norm": 251.61575317382812, + "learning_rate": 8.353686791233969e-06, + "loss": 15.8552, + "step": 169690 + }, + { + "epoch": 0.3428047366443517, + "grad_norm": 247.20155334472656, + "learning_rate": 8.353427882683601e-06, + "loss": 20.0482, + "step": 169700 + }, + { + "epoch": 0.3428249372770355, + "grad_norm": 528.7928466796875, + "learning_rate": 8.353168957789033e-06, + "loss": 27.7193, + "step": 169710 + }, + { + "epoch": 0.3428451379097193, + "grad_norm": 566.47705078125, + "learning_rate": 8.352910016551527e-06, + "loss": 28.6182, + "step": 169720 + }, + { + "epoch": 0.34286533854240314, + "grad_norm": 401.88116455078125, + "learning_rate": 8.352651058972344e-06, + "loss": 25.6401, + "step": 169730 + }, + { + "epoch": 0.34288553917508696, + "grad_norm": 815.401611328125, + "learning_rate": 8.352392085052748e-06, + "loss": 23.015, + "step": 169740 + }, + { + "epoch": 0.3429057398077708, + "grad_norm": 188.4127197265625, + "learning_rate": 8.352133094793996e-06, + "loss": 25.489, + "step": 169750 + }, + { + "epoch": 0.3429259404404546, + "grad_norm": 434.46868896484375, + "learning_rate": 8.351874088197356e-06, + "loss": 14.7717, + "step": 169760 + }, + { + "epoch": 0.3429461410731384, + "grad_norm": 288.19000244140625, + "learning_rate": 8.351615065264088e-06, + "loss": 28.5178, + "step": 169770 + }, + { + "epoch": 0.34296634170582224, + "grad_norm": 353.88775634765625, + "learning_rate": 8.351356025995454e-06, + "loss": 21.3087, + "step": 169780 + }, + { + "epoch": 0.34298654233850606, + "grad_norm": 359.5602111816406, + "learning_rate": 8.351096970392718e-06, + "loss": 20.6816, + "step": 169790 + }, + { + "epoch": 0.3430067429711899, + "grad_norm": 336.394775390625, + "learning_rate": 8.350837898457142e-06, + "loss": 32.5338, + "step": 169800 + }, + { + "epoch": 0.34302694360387365, + "grad_norm": 65.51998901367188, + "learning_rate": 8.350578810189988e-06, + "loss": 18.3302, + "step": 169810 + }, + { + "epoch": 0.34304714423655747, + "grad_norm": 410.8455505371094, + "learning_rate": 8.35031970559252e-06, + "loss": 14.3117, + "step": 169820 + }, + { + "epoch": 0.3430673448692413, + "grad_norm": 801.1245727539062, + "learning_rate": 8.350060584666e-06, + "loss": 27.8058, + "step": 169830 + }, + { + "epoch": 0.3430875455019251, + "grad_norm": 637.9276123046875, + "learning_rate": 8.34980144741169e-06, + "loss": 26.9451, + "step": 169840 + }, + { + "epoch": 0.3431077461346089, + "grad_norm": 320.0799560546875, + "learning_rate": 8.349542293830854e-06, + "loss": 20.0242, + "step": 169850 + }, + { + "epoch": 0.34312794676729275, + "grad_norm": 358.5277099609375, + "learning_rate": 8.349283123924756e-06, + "loss": 22.6337, + "step": 169860 + }, + { + "epoch": 0.34314814739997657, + "grad_norm": 101.61077117919922, + "learning_rate": 8.349023937694658e-06, + "loss": 23.6921, + "step": 169870 + }, + { + "epoch": 0.3431683480326604, + "grad_norm": 416.40826416015625, + "learning_rate": 8.348764735141823e-06, + "loss": 22.7895, + "step": 169880 + }, + { + "epoch": 0.3431885486653442, + "grad_norm": 31.98366928100586, + "learning_rate": 8.348505516267515e-06, + "loss": 39.1289, + "step": 169890 + }, + { + "epoch": 0.343208749298028, + "grad_norm": 885.4635620117188, + "learning_rate": 8.348246281072998e-06, + "loss": 34.4323, + "step": 169900 + }, + { + "epoch": 0.34322894993071185, + "grad_norm": 2524.4736328125, + "learning_rate": 8.347987029559534e-06, + "loss": 46.8328, + "step": 169910 + }, + { + "epoch": 0.34324915056339567, + "grad_norm": 428.1924743652344, + "learning_rate": 8.347727761728388e-06, + "loss": 16.0055, + "step": 169920 + }, + { + "epoch": 0.3432693511960795, + "grad_norm": 1.726176381111145, + "learning_rate": 8.347468477580822e-06, + "loss": 13.6668, + "step": 169930 + }, + { + "epoch": 0.34328955182876325, + "grad_norm": 56.078792572021484, + "learning_rate": 8.347209177118101e-06, + "loss": 16.26, + "step": 169940 + }, + { + "epoch": 0.34330975246144707, + "grad_norm": 474.4909362792969, + "learning_rate": 8.346949860341489e-06, + "loss": 22.5801, + "step": 169950 + }, + { + "epoch": 0.3433299530941309, + "grad_norm": 316.09130859375, + "learning_rate": 8.34669052725225e-06, + "loss": 14.1333, + "step": 169960 + }, + { + "epoch": 0.3433501537268147, + "grad_norm": 513.1192626953125, + "learning_rate": 8.346431177851645e-06, + "loss": 15.1452, + "step": 169970 + }, + { + "epoch": 0.34337035435949853, + "grad_norm": 257.24407958984375, + "learning_rate": 8.346171812140942e-06, + "loss": 11.9185, + "step": 169980 + }, + { + "epoch": 0.34339055499218235, + "grad_norm": 700.6121215820312, + "learning_rate": 8.345912430121403e-06, + "loss": 35.5041, + "step": 169990 + }, + { + "epoch": 0.34341075562486617, + "grad_norm": 217.2509002685547, + "learning_rate": 8.345653031794292e-06, + "loss": 15.1854, + "step": 170000 + }, + { + "epoch": 0.34343095625755, + "grad_norm": 188.48707580566406, + "learning_rate": 8.345393617160876e-06, + "loss": 17.3443, + "step": 170010 + }, + { + "epoch": 0.3434511568902338, + "grad_norm": 315.58544921875, + "learning_rate": 8.345134186222415e-06, + "loss": 30.7932, + "step": 170020 + }, + { + "epoch": 0.34347135752291763, + "grad_norm": 203.83631896972656, + "learning_rate": 8.344874738980175e-06, + "loss": 21.2119, + "step": 170030 + }, + { + "epoch": 0.34349155815560145, + "grad_norm": 211.39915466308594, + "learning_rate": 8.344615275435423e-06, + "loss": 31.5429, + "step": 170040 + }, + { + "epoch": 0.34351175878828527, + "grad_norm": 938.8517456054688, + "learning_rate": 8.34435579558942e-06, + "loss": 30.4447, + "step": 170050 + }, + { + "epoch": 0.3435319594209691, + "grad_norm": 530.2730712890625, + "learning_rate": 8.344096299443434e-06, + "loss": 19.6267, + "step": 170060 + }, + { + "epoch": 0.34355216005365286, + "grad_norm": 242.96951293945312, + "learning_rate": 8.34383678699873e-06, + "loss": 28.5493, + "step": 170070 + }, + { + "epoch": 0.3435723606863367, + "grad_norm": 283.67205810546875, + "learning_rate": 8.343577258256567e-06, + "loss": 22.3562, + "step": 170080 + }, + { + "epoch": 0.3435925613190205, + "grad_norm": 599.8084106445312, + "learning_rate": 8.343317713218218e-06, + "loss": 18.2178, + "step": 170090 + }, + { + "epoch": 0.3436127619517043, + "grad_norm": 257.37677001953125, + "learning_rate": 8.343058151884942e-06, + "loss": 28.3873, + "step": 170100 + }, + { + "epoch": 0.34363296258438814, + "grad_norm": 1036.1143798828125, + "learning_rate": 8.342798574258005e-06, + "loss": 40.1173, + "step": 170110 + }, + { + "epoch": 0.34365316321707196, + "grad_norm": 321.11163330078125, + "learning_rate": 8.342538980338675e-06, + "loss": 24.2756, + "step": 170120 + }, + { + "epoch": 0.3436733638497558, + "grad_norm": 289.0916748046875, + "learning_rate": 8.342279370128215e-06, + "loss": 14.9477, + "step": 170130 + }, + { + "epoch": 0.3436935644824396, + "grad_norm": 1119.5335693359375, + "learning_rate": 8.34201974362789e-06, + "loss": 30.5628, + "step": 170140 + }, + { + "epoch": 0.3437137651151234, + "grad_norm": 419.0737609863281, + "learning_rate": 8.341760100838967e-06, + "loss": 12.6939, + "step": 170150 + }, + { + "epoch": 0.34373396574780724, + "grad_norm": 301.77398681640625, + "learning_rate": 8.341500441762708e-06, + "loss": 37.2242, + "step": 170160 + }, + { + "epoch": 0.34375416638049106, + "grad_norm": 295.69183349609375, + "learning_rate": 8.341240766400385e-06, + "loss": 19.4963, + "step": 170170 + }, + { + "epoch": 0.3437743670131749, + "grad_norm": 610.6740112304688, + "learning_rate": 8.340981074753258e-06, + "loss": 24.2911, + "step": 170180 + }, + { + "epoch": 0.3437945676458587, + "grad_norm": 407.9315185546875, + "learning_rate": 8.340721366822594e-06, + "loss": 14.1512, + "step": 170190 + }, + { + "epoch": 0.34381476827854246, + "grad_norm": 1188.9407958984375, + "learning_rate": 8.34046164260966e-06, + "loss": 24.054, + "step": 170200 + }, + { + "epoch": 0.3438349689112263, + "grad_norm": 221.2847900390625, + "learning_rate": 8.34020190211572e-06, + "loss": 26.5996, + "step": 170210 + }, + { + "epoch": 0.3438551695439101, + "grad_norm": 406.8226013183594, + "learning_rate": 8.33994214534204e-06, + "loss": 19.9345, + "step": 170220 + }, + { + "epoch": 0.3438753701765939, + "grad_norm": 574.2039794921875, + "learning_rate": 8.33968237228989e-06, + "loss": 18.2827, + "step": 170230 + }, + { + "epoch": 0.34389557080927774, + "grad_norm": 428.4683532714844, + "learning_rate": 8.339422582960533e-06, + "loss": 14.6638, + "step": 170240 + }, + { + "epoch": 0.34391577144196156, + "grad_norm": 1262.0887451171875, + "learning_rate": 8.339162777355233e-06, + "loss": 20.0347, + "step": 170250 + }, + { + "epoch": 0.3439359720746454, + "grad_norm": 535.7229614257812, + "learning_rate": 8.338902955475261e-06, + "loss": 18.6566, + "step": 170260 + }, + { + "epoch": 0.3439561727073292, + "grad_norm": 298.4888610839844, + "learning_rate": 8.33864311732188e-06, + "loss": 22.3802, + "step": 170270 + }, + { + "epoch": 0.343976373340013, + "grad_norm": 172.71926879882812, + "learning_rate": 8.338383262896357e-06, + "loss": 36.8636, + "step": 170280 + }, + { + "epoch": 0.34399657397269684, + "grad_norm": 452.70599365234375, + "learning_rate": 8.33812339219996e-06, + "loss": 9.8566, + "step": 170290 + }, + { + "epoch": 0.34401677460538066, + "grad_norm": 188.11831665039062, + "learning_rate": 8.337863505233954e-06, + "loss": 16.6795, + "step": 170300 + }, + { + "epoch": 0.3440369752380645, + "grad_norm": 320.6652526855469, + "learning_rate": 8.337603601999605e-06, + "loss": 19.1898, + "step": 170310 + }, + { + "epoch": 0.34405717587074824, + "grad_norm": 454.8415222167969, + "learning_rate": 8.337343682498181e-06, + "loss": 20.4676, + "step": 170320 + }, + { + "epoch": 0.34407737650343206, + "grad_norm": 394.4899597167969, + "learning_rate": 8.33708374673095e-06, + "loss": 16.1026, + "step": 170330 + }, + { + "epoch": 0.3440975771361159, + "grad_norm": 160.0926055908203, + "learning_rate": 8.336823794699177e-06, + "loss": 37.1404, + "step": 170340 + }, + { + "epoch": 0.3441177777687997, + "grad_norm": 891.3941650390625, + "learning_rate": 8.336563826404129e-06, + "loss": 15.6676, + "step": 170350 + }, + { + "epoch": 0.3441379784014835, + "grad_norm": 397.9073486328125, + "learning_rate": 8.336303841847073e-06, + "loss": 22.0974, + "step": 170360 + }, + { + "epoch": 0.34415817903416734, + "grad_norm": 361.884033203125, + "learning_rate": 8.336043841029278e-06, + "loss": 21.7622, + "step": 170370 + }, + { + "epoch": 0.34417837966685116, + "grad_norm": 673.8883056640625, + "learning_rate": 8.33578382395201e-06, + "loss": 40.7657, + "step": 170380 + }, + { + "epoch": 0.344198580299535, + "grad_norm": 436.1302490234375, + "learning_rate": 8.335523790616536e-06, + "loss": 19.4727, + "step": 170390 + }, + { + "epoch": 0.3442187809322188, + "grad_norm": 349.3412780761719, + "learning_rate": 8.335263741024123e-06, + "loss": 20.2046, + "step": 170400 + }, + { + "epoch": 0.3442389815649026, + "grad_norm": 921.329345703125, + "learning_rate": 8.33500367517604e-06, + "loss": 32.5087, + "step": 170410 + }, + { + "epoch": 0.34425918219758644, + "grad_norm": 444.14471435546875, + "learning_rate": 8.334743593073553e-06, + "loss": 32.0047, + "step": 170420 + }, + { + "epoch": 0.34427938283027026, + "grad_norm": 563.543212890625, + "learning_rate": 8.33448349471793e-06, + "loss": 10.8298, + "step": 170430 + }, + { + "epoch": 0.3442995834629541, + "grad_norm": 953.4566650390625, + "learning_rate": 8.334223380110438e-06, + "loss": 28.8473, + "step": 170440 + }, + { + "epoch": 0.34431978409563785, + "grad_norm": 500.12701416015625, + "learning_rate": 8.333963249252347e-06, + "loss": 27.48, + "step": 170450 + }, + { + "epoch": 0.34433998472832167, + "grad_norm": 282.26861572265625, + "learning_rate": 8.333703102144924e-06, + "loss": 15.609, + "step": 170460 + }, + { + "epoch": 0.3443601853610055, + "grad_norm": 769.2520751953125, + "learning_rate": 8.333442938789435e-06, + "loss": 19.0537, + "step": 170470 + }, + { + "epoch": 0.3443803859936893, + "grad_norm": 1127.9569091796875, + "learning_rate": 8.333182759187151e-06, + "loss": 24.0776, + "step": 170480 + }, + { + "epoch": 0.34440058662637313, + "grad_norm": 518.5499877929688, + "learning_rate": 8.332922563339336e-06, + "loss": 20.4763, + "step": 170490 + }, + { + "epoch": 0.34442078725905695, + "grad_norm": 1037.2139892578125, + "learning_rate": 8.332662351247262e-06, + "loss": 22.9443, + "step": 170500 + }, + { + "epoch": 0.34444098789174077, + "grad_norm": 362.7684326171875, + "learning_rate": 8.332402122912198e-06, + "loss": 21.2911, + "step": 170510 + }, + { + "epoch": 0.3444611885244246, + "grad_norm": 114.61538696289062, + "learning_rate": 8.332141878335407e-06, + "loss": 39.4521, + "step": 170520 + }, + { + "epoch": 0.3444813891571084, + "grad_norm": 157.37367248535156, + "learning_rate": 8.331881617518163e-06, + "loss": 13.0495, + "step": 170530 + }, + { + "epoch": 0.34450158978979223, + "grad_norm": 719.7300415039062, + "learning_rate": 8.331621340461731e-06, + "loss": 24.5822, + "step": 170540 + }, + { + "epoch": 0.34452179042247605, + "grad_norm": 991.0283813476562, + "learning_rate": 8.33136104716738e-06, + "loss": 35.9608, + "step": 170550 + }, + { + "epoch": 0.34454199105515987, + "grad_norm": 425.6492004394531, + "learning_rate": 8.331100737636381e-06, + "loss": 22.3316, + "step": 170560 + }, + { + "epoch": 0.3445621916878437, + "grad_norm": 193.59051513671875, + "learning_rate": 8.330840411869999e-06, + "loss": 21.6656, + "step": 170570 + }, + { + "epoch": 0.34458239232052745, + "grad_norm": 160.44122314453125, + "learning_rate": 8.330580069869506e-06, + "loss": 12.0201, + "step": 170580 + }, + { + "epoch": 0.3446025929532113, + "grad_norm": 287.67901611328125, + "learning_rate": 8.33031971163617e-06, + "loss": 20.8663, + "step": 170590 + }, + { + "epoch": 0.3446227935858951, + "grad_norm": 404.4389343261719, + "learning_rate": 8.33005933717126e-06, + "loss": 17.7059, + "step": 170600 + }, + { + "epoch": 0.3446429942185789, + "grad_norm": 782.0043334960938, + "learning_rate": 8.329798946476042e-06, + "loss": 24.3673, + "step": 170610 + }, + { + "epoch": 0.34466319485126273, + "grad_norm": 375.68548583984375, + "learning_rate": 8.329538539551791e-06, + "loss": 37.0092, + "step": 170620 + }, + { + "epoch": 0.34468339548394655, + "grad_norm": 1607.6585693359375, + "learning_rate": 8.32927811639977e-06, + "loss": 26.7318, + "step": 170630 + }, + { + "epoch": 0.3447035961166304, + "grad_norm": 743.4594116210938, + "learning_rate": 8.329017677021254e-06, + "loss": 14.8951, + "step": 170640 + }, + { + "epoch": 0.3447237967493142, + "grad_norm": 220.3754119873047, + "learning_rate": 8.328757221417507e-06, + "loss": 13.5273, + "step": 170650 + }, + { + "epoch": 0.344743997381998, + "grad_norm": 1711.363525390625, + "learning_rate": 8.328496749589803e-06, + "loss": 44.6478, + "step": 170660 + }, + { + "epoch": 0.34476419801468183, + "grad_norm": 283.867919921875, + "learning_rate": 8.328236261539411e-06, + "loss": 21.2721, + "step": 170670 + }, + { + "epoch": 0.34478439864736565, + "grad_norm": 352.4759826660156, + "learning_rate": 8.327975757267596e-06, + "loss": 18.2324, + "step": 170680 + }, + { + "epoch": 0.3448045992800495, + "grad_norm": 143.2894287109375, + "learning_rate": 8.327715236775634e-06, + "loss": 11.3667, + "step": 170690 + }, + { + "epoch": 0.3448247999127333, + "grad_norm": 104.88064575195312, + "learning_rate": 8.327454700064788e-06, + "loss": 13.2799, + "step": 170700 + }, + { + "epoch": 0.34484500054541706, + "grad_norm": 194.11032104492188, + "learning_rate": 8.327194147136332e-06, + "loss": 34.7384, + "step": 170710 + }, + { + "epoch": 0.3448652011781009, + "grad_norm": 1422.937744140625, + "learning_rate": 8.326933577991536e-06, + "loss": 31.012, + "step": 170720 + }, + { + "epoch": 0.3448854018107847, + "grad_norm": 322.70208740234375, + "learning_rate": 8.326672992631671e-06, + "loss": 28.5852, + "step": 170730 + }, + { + "epoch": 0.3449056024434685, + "grad_norm": 255.77743530273438, + "learning_rate": 8.326412391058003e-06, + "loss": 16.4344, + "step": 170740 + }, + { + "epoch": 0.34492580307615234, + "grad_norm": 208.72271728515625, + "learning_rate": 8.326151773271805e-06, + "loss": 21.4215, + "step": 170750 + }, + { + "epoch": 0.34494600370883616, + "grad_norm": 688.43896484375, + "learning_rate": 8.325891139274348e-06, + "loss": 25.3622, + "step": 170760 + }, + { + "epoch": 0.34496620434152, + "grad_norm": 878.5773315429688, + "learning_rate": 8.325630489066899e-06, + "loss": 29.5908, + "step": 170770 + }, + { + "epoch": 0.3449864049742038, + "grad_norm": 315.8194580078125, + "learning_rate": 8.325369822650731e-06, + "loss": 18.6706, + "step": 170780 + }, + { + "epoch": 0.3450066056068876, + "grad_norm": 331.92791748046875, + "learning_rate": 8.325109140027115e-06, + "loss": 21.8603, + "step": 170790 + }, + { + "epoch": 0.34502680623957144, + "grad_norm": 233.86302185058594, + "learning_rate": 8.324848441197317e-06, + "loss": 24.5369, + "step": 170800 + }, + { + "epoch": 0.34504700687225526, + "grad_norm": 94.36721801757812, + "learning_rate": 8.324587726162614e-06, + "loss": 17.6402, + "step": 170810 + }, + { + "epoch": 0.3450672075049391, + "grad_norm": 102.23753356933594, + "learning_rate": 8.324326994924272e-06, + "loss": 18.1454, + "step": 170820 + }, + { + "epoch": 0.3450874081376229, + "grad_norm": 831.04296875, + "learning_rate": 8.324066247483565e-06, + "loss": 14.6294, + "step": 170830 + }, + { + "epoch": 0.34510760877030666, + "grad_norm": 124.45799255371094, + "learning_rate": 8.323805483841762e-06, + "loss": 28.6171, + "step": 170840 + }, + { + "epoch": 0.3451278094029905, + "grad_norm": 201.7651824951172, + "learning_rate": 8.323544704000134e-06, + "loss": 20.1691, + "step": 170850 + }, + { + "epoch": 0.3451480100356743, + "grad_norm": 556.4908447265625, + "learning_rate": 8.323283907959952e-06, + "loss": 12.2531, + "step": 170860 + }, + { + "epoch": 0.3451682106683581, + "grad_norm": 426.6864929199219, + "learning_rate": 8.323023095722486e-06, + "loss": 14.0918, + "step": 170870 + }, + { + "epoch": 0.34518841130104194, + "grad_norm": 429.3362121582031, + "learning_rate": 8.32276226728901e-06, + "loss": 25.1534, + "step": 170880 + }, + { + "epoch": 0.34520861193372576, + "grad_norm": 325.4744567871094, + "learning_rate": 8.322501422660794e-06, + "loss": 19.3162, + "step": 170890 + }, + { + "epoch": 0.3452288125664096, + "grad_norm": 2206.974853515625, + "learning_rate": 8.32224056183911e-06, + "loss": 53.0366, + "step": 170900 + }, + { + "epoch": 0.3452490131990934, + "grad_norm": 367.0641784667969, + "learning_rate": 8.321979684825225e-06, + "loss": 21.5768, + "step": 170910 + }, + { + "epoch": 0.3452692138317772, + "grad_norm": 653.0476684570312, + "learning_rate": 8.321718791620417e-06, + "loss": 22.0793, + "step": 170920 + }, + { + "epoch": 0.34528941446446104, + "grad_norm": 646.60888671875, + "learning_rate": 8.321457882225952e-06, + "loss": 9.5773, + "step": 170930 + }, + { + "epoch": 0.34530961509714486, + "grad_norm": 224.6949462890625, + "learning_rate": 8.321196956643107e-06, + "loss": 29.1476, + "step": 170940 + }, + { + "epoch": 0.3453298157298287, + "grad_norm": 324.59033203125, + "learning_rate": 8.320936014873148e-06, + "loss": 32.6603, + "step": 170950 + }, + { + "epoch": 0.34535001636251245, + "grad_norm": 179.57569885253906, + "learning_rate": 8.320675056917353e-06, + "loss": 20.4256, + "step": 170960 + }, + { + "epoch": 0.34537021699519627, + "grad_norm": 334.9712219238281, + "learning_rate": 8.320414082776987e-06, + "loss": 24.2263, + "step": 170970 + }, + { + "epoch": 0.3453904176278801, + "grad_norm": 157.3404998779297, + "learning_rate": 8.320153092453326e-06, + "loss": 22.3718, + "step": 170980 + }, + { + "epoch": 0.3454106182605639, + "grad_norm": 392.4839782714844, + "learning_rate": 8.319892085947643e-06, + "loss": 19.561, + "step": 170990 + }, + { + "epoch": 0.3454308188932477, + "grad_norm": 309.3358154296875, + "learning_rate": 8.319631063261209e-06, + "loss": 27.3685, + "step": 171000 + }, + { + "epoch": 0.34545101952593155, + "grad_norm": 568.41552734375, + "learning_rate": 8.319370024395294e-06, + "loss": 20.216, + "step": 171010 + }, + { + "epoch": 0.34547122015861537, + "grad_norm": 484.2926025390625, + "learning_rate": 8.319108969351173e-06, + "loss": 27.3912, + "step": 171020 + }, + { + "epoch": 0.3454914207912992, + "grad_norm": 308.9615783691406, + "learning_rate": 8.318847898130118e-06, + "loss": 34.1446, + "step": 171030 + }, + { + "epoch": 0.345511621423983, + "grad_norm": 527.2250366210938, + "learning_rate": 8.318586810733401e-06, + "loss": 16.5062, + "step": 171040 + }, + { + "epoch": 0.3455318220566668, + "grad_norm": 219.6500244140625, + "learning_rate": 8.318325707162293e-06, + "loss": 10.3177, + "step": 171050 + }, + { + "epoch": 0.34555202268935065, + "grad_norm": 161.37271118164062, + "learning_rate": 8.318064587418068e-06, + "loss": 25.4439, + "step": 171060 + }, + { + "epoch": 0.34557222332203447, + "grad_norm": 203.05517578125, + "learning_rate": 8.317803451502e-06, + "loss": 12.2113, + "step": 171070 + }, + { + "epoch": 0.3455924239547183, + "grad_norm": 692.4461669921875, + "learning_rate": 8.31754229941536e-06, + "loss": 30.0388, + "step": 171080 + }, + { + "epoch": 0.34561262458740205, + "grad_norm": 449.1273193359375, + "learning_rate": 8.31728113115942e-06, + "loss": 21.1349, + "step": 171090 + }, + { + "epoch": 0.34563282522008587, + "grad_norm": 533.189453125, + "learning_rate": 8.317019946735456e-06, + "loss": 36.1625, + "step": 171100 + }, + { + "epoch": 0.3456530258527697, + "grad_norm": 79.7922134399414, + "learning_rate": 8.316758746144738e-06, + "loss": 17.4875, + "step": 171110 + }, + { + "epoch": 0.3456732264854535, + "grad_norm": 1937.9715576171875, + "learning_rate": 8.31649752938854e-06, + "loss": 34.8884, + "step": 171120 + }, + { + "epoch": 0.34569342711813733, + "grad_norm": 181.68194580078125, + "learning_rate": 8.316236296468135e-06, + "loss": 24.8936, + "step": 171130 + }, + { + "epoch": 0.34571362775082115, + "grad_norm": 318.2300720214844, + "learning_rate": 8.315975047384798e-06, + "loss": 16.2185, + "step": 171140 + }, + { + "epoch": 0.34573382838350497, + "grad_norm": 716.7720947265625, + "learning_rate": 8.315713782139801e-06, + "loss": 17.966, + "step": 171150 + }, + { + "epoch": 0.3457540290161888, + "grad_norm": 359.93780517578125, + "learning_rate": 8.315452500734415e-06, + "loss": 21.9639, + "step": 171160 + }, + { + "epoch": 0.3457742296488726, + "grad_norm": 214.55775451660156, + "learning_rate": 8.315191203169917e-06, + "loss": 26.7262, + "step": 171170 + }, + { + "epoch": 0.34579443028155643, + "grad_norm": 478.1524353027344, + "learning_rate": 8.314929889447578e-06, + "loss": 18.5229, + "step": 171180 + }, + { + "epoch": 0.34581463091424025, + "grad_norm": 901.6751098632812, + "learning_rate": 8.314668559568674e-06, + "loss": 14.8478, + "step": 171190 + }, + { + "epoch": 0.34583483154692407, + "grad_norm": 648.0640869140625, + "learning_rate": 8.314407213534477e-06, + "loss": 34.5751, + "step": 171200 + }, + { + "epoch": 0.3458550321796079, + "grad_norm": 72.91156005859375, + "learning_rate": 8.31414585134626e-06, + "loss": 27.2991, + "step": 171210 + }, + { + "epoch": 0.34587523281229166, + "grad_norm": 562.7615356445312, + "learning_rate": 8.3138844730053e-06, + "loss": 12.0175, + "step": 171220 + }, + { + "epoch": 0.3458954334449755, + "grad_norm": 354.0174865722656, + "learning_rate": 8.313623078512869e-06, + "loss": 21.4302, + "step": 171230 + }, + { + "epoch": 0.3459156340776593, + "grad_norm": 894.1097412109375, + "learning_rate": 8.313361667870238e-06, + "loss": 30.4599, + "step": 171240 + }, + { + "epoch": 0.3459358347103431, + "grad_norm": 409.5519714355469, + "learning_rate": 8.313100241078689e-06, + "loss": 32.2646, + "step": 171250 + }, + { + "epoch": 0.34595603534302694, + "grad_norm": 815.2952880859375, + "learning_rate": 8.312838798139488e-06, + "loss": 25.4976, + "step": 171260 + }, + { + "epoch": 0.34597623597571076, + "grad_norm": 390.8360900878906, + "learning_rate": 8.312577339053914e-06, + "loss": 15.6623, + "step": 171270 + }, + { + "epoch": 0.3459964366083946, + "grad_norm": 341.1806945800781, + "learning_rate": 8.312315863823239e-06, + "loss": 11.5922, + "step": 171280 + }, + { + "epoch": 0.3460166372410784, + "grad_norm": 459.4844665527344, + "learning_rate": 8.312054372448737e-06, + "loss": 15.2012, + "step": 171290 + }, + { + "epoch": 0.3460368378737622, + "grad_norm": 334.7923583984375, + "learning_rate": 8.311792864931686e-06, + "loss": 31.4539, + "step": 171300 + }, + { + "epoch": 0.34605703850644604, + "grad_norm": 638.260986328125, + "learning_rate": 8.311531341273355e-06, + "loss": 14.77, + "step": 171310 + }, + { + "epoch": 0.34607723913912986, + "grad_norm": 464.9892578125, + "learning_rate": 8.311269801475026e-06, + "loss": 24.8364, + "step": 171320 + }, + { + "epoch": 0.3460974397718137, + "grad_norm": 618.2025756835938, + "learning_rate": 8.311008245537967e-06, + "loss": 18.2905, + "step": 171330 + }, + { + "epoch": 0.3461176404044975, + "grad_norm": 301.9693908691406, + "learning_rate": 8.310746673463456e-06, + "loss": 15.1808, + "step": 171340 + }, + { + "epoch": 0.34613784103718126, + "grad_norm": 899.296875, + "learning_rate": 8.310485085252767e-06, + "loss": 22.5531, + "step": 171350 + }, + { + "epoch": 0.3461580416698651, + "grad_norm": 747.182861328125, + "learning_rate": 8.310223480907176e-06, + "loss": 21.0325, + "step": 171360 + }, + { + "epoch": 0.3461782423025489, + "grad_norm": 68.04808807373047, + "learning_rate": 8.309961860427957e-06, + "loss": 17.6851, + "step": 171370 + }, + { + "epoch": 0.3461984429352327, + "grad_norm": 361.1148986816406, + "learning_rate": 8.309700223816385e-06, + "loss": 20.9249, + "step": 171380 + }, + { + "epoch": 0.34621864356791654, + "grad_norm": 475.031494140625, + "learning_rate": 8.309438571073734e-06, + "loss": 38.1675, + "step": 171390 + }, + { + "epoch": 0.34623884420060036, + "grad_norm": 575.501220703125, + "learning_rate": 8.309176902201283e-06, + "loss": 22.632, + "step": 171400 + }, + { + "epoch": 0.3462590448332842, + "grad_norm": 58.37808609008789, + "learning_rate": 8.308915217200305e-06, + "loss": 19.541, + "step": 171410 + }, + { + "epoch": 0.346279245465968, + "grad_norm": 258.83099365234375, + "learning_rate": 8.308653516072074e-06, + "loss": 14.9517, + "step": 171420 + }, + { + "epoch": 0.3462994460986518, + "grad_norm": 327.9186096191406, + "learning_rate": 8.30839179881787e-06, + "loss": 27.1265, + "step": 171430 + }, + { + "epoch": 0.34631964673133564, + "grad_norm": 489.5204162597656, + "learning_rate": 8.308130065438963e-06, + "loss": 20.5175, + "step": 171440 + }, + { + "epoch": 0.34633984736401946, + "grad_norm": 185.77662658691406, + "learning_rate": 8.307868315936631e-06, + "loss": 18.2377, + "step": 171450 + }, + { + "epoch": 0.3463600479967033, + "grad_norm": 529.444580078125, + "learning_rate": 8.307606550312152e-06, + "loss": 14.2224, + "step": 171460 + }, + { + "epoch": 0.3463802486293871, + "grad_norm": 798.6822509765625, + "learning_rate": 8.307344768566798e-06, + "loss": 32.2639, + "step": 171470 + }, + { + "epoch": 0.34640044926207086, + "grad_norm": 1000.7474975585938, + "learning_rate": 8.30708297070185e-06, + "loss": 34.3624, + "step": 171480 + }, + { + "epoch": 0.3464206498947547, + "grad_norm": 283.7931213378906, + "learning_rate": 8.306821156718577e-06, + "loss": 15.6855, + "step": 171490 + }, + { + "epoch": 0.3464408505274385, + "grad_norm": 435.1275939941406, + "learning_rate": 8.30655932661826e-06, + "loss": 41.0409, + "step": 171500 + }, + { + "epoch": 0.3464610511601223, + "grad_norm": 154.36695861816406, + "learning_rate": 8.306297480402175e-06, + "loss": 17.1858, + "step": 171510 + }, + { + "epoch": 0.34648125179280614, + "grad_norm": 392.240478515625, + "learning_rate": 8.306035618071595e-06, + "loss": 23.5365, + "step": 171520 + }, + { + "epoch": 0.34650145242548996, + "grad_norm": 331.3149108886719, + "learning_rate": 8.305773739627801e-06, + "loss": 9.4739, + "step": 171530 + }, + { + "epoch": 0.3465216530581738, + "grad_norm": 263.245849609375, + "learning_rate": 8.305511845072065e-06, + "loss": 20.0296, + "step": 171540 + }, + { + "epoch": 0.3465418536908576, + "grad_norm": 252.88711547851562, + "learning_rate": 8.305249934405663e-06, + "loss": 21.1912, + "step": 171550 + }, + { + "epoch": 0.3465620543235414, + "grad_norm": 213.32379150390625, + "learning_rate": 8.304988007629878e-06, + "loss": 15.5472, + "step": 171560 + }, + { + "epoch": 0.34658225495622524, + "grad_norm": 964.141357421875, + "learning_rate": 8.30472606474598e-06, + "loss": 41.3969, + "step": 171570 + }, + { + "epoch": 0.34660245558890906, + "grad_norm": 837.5604248046875, + "learning_rate": 8.304464105755248e-06, + "loss": 33.7858, + "step": 171580 + }, + { + "epoch": 0.3466226562215929, + "grad_norm": 223.40711975097656, + "learning_rate": 8.304202130658959e-06, + "loss": 23.8464, + "step": 171590 + }, + { + "epoch": 0.34664285685427665, + "grad_norm": 756.6446533203125, + "learning_rate": 8.303940139458389e-06, + "loss": 17.5319, + "step": 171600 + }, + { + "epoch": 0.34666305748696047, + "grad_norm": 1434.41455078125, + "learning_rate": 8.303678132154817e-06, + "loss": 44.3605, + "step": 171610 + }, + { + "epoch": 0.3466832581196443, + "grad_norm": 33.394493103027344, + "learning_rate": 8.303416108749516e-06, + "loss": 13.2621, + "step": 171620 + }, + { + "epoch": 0.3467034587523281, + "grad_norm": 435.3074645996094, + "learning_rate": 8.303154069243769e-06, + "loss": 27.7944, + "step": 171630 + }, + { + "epoch": 0.34672365938501193, + "grad_norm": 315.0975036621094, + "learning_rate": 8.302892013638846e-06, + "loss": 16.8905, + "step": 171640 + }, + { + "epoch": 0.34674386001769575, + "grad_norm": 1057.85693359375, + "learning_rate": 8.302629941936032e-06, + "loss": 18.137, + "step": 171650 + }, + { + "epoch": 0.34676406065037957, + "grad_norm": 426.70343017578125, + "learning_rate": 8.302367854136598e-06, + "loss": 18.8805, + "step": 171660 + }, + { + "epoch": 0.3467842612830634, + "grad_norm": 328.9032287597656, + "learning_rate": 8.302105750241822e-06, + "loss": 18.166, + "step": 171670 + }, + { + "epoch": 0.3468044619157472, + "grad_norm": 883.0125122070312, + "learning_rate": 8.301843630252986e-06, + "loss": 36.7881, + "step": 171680 + }, + { + "epoch": 0.34682466254843103, + "grad_norm": 266.7763671875, + "learning_rate": 8.301581494171363e-06, + "loss": 25.3635, + "step": 171690 + }, + { + "epoch": 0.34684486318111485, + "grad_norm": 963.8483276367188, + "learning_rate": 8.301319341998231e-06, + "loss": 12.9876, + "step": 171700 + }, + { + "epoch": 0.34686506381379867, + "grad_norm": 364.8996887207031, + "learning_rate": 8.301057173734872e-06, + "loss": 34.6909, + "step": 171710 + }, + { + "epoch": 0.3468852644464825, + "grad_norm": 408.3586730957031, + "learning_rate": 8.300794989382559e-06, + "loss": 15.5905, + "step": 171720 + }, + { + "epoch": 0.34690546507916625, + "grad_norm": 330.177978515625, + "learning_rate": 8.300532788942571e-06, + "loss": 31.1517, + "step": 171730 + }, + { + "epoch": 0.3469256657118501, + "grad_norm": 621.7115478515625, + "learning_rate": 8.300270572416187e-06, + "loss": 23.2462, + "step": 171740 + }, + { + "epoch": 0.3469458663445339, + "grad_norm": 13.08733081817627, + "learning_rate": 8.300008339804686e-06, + "loss": 10.6224, + "step": 171750 + }, + { + "epoch": 0.3469660669772177, + "grad_norm": 24.804424285888672, + "learning_rate": 8.299746091109343e-06, + "loss": 20.0629, + "step": 171760 + }, + { + "epoch": 0.34698626760990153, + "grad_norm": 22.87811851501465, + "learning_rate": 8.299483826331436e-06, + "loss": 9.8308, + "step": 171770 + }, + { + "epoch": 0.34700646824258535, + "grad_norm": 201.6088409423828, + "learning_rate": 8.299221545472248e-06, + "loss": 24.3398, + "step": 171780 + }, + { + "epoch": 0.3470266688752692, + "grad_norm": 737.10400390625, + "learning_rate": 8.298959248533054e-06, + "loss": 24.2523, + "step": 171790 + }, + { + "epoch": 0.347046869507953, + "grad_norm": 690.1240844726562, + "learning_rate": 8.298696935515132e-06, + "loss": 36.7034, + "step": 171800 + }, + { + "epoch": 0.3470670701406368, + "grad_norm": 233.17715454101562, + "learning_rate": 8.29843460641976e-06, + "loss": 38.8903, + "step": 171810 + }, + { + "epoch": 0.34708727077332063, + "grad_norm": 299.7923278808594, + "learning_rate": 8.29817226124822e-06, + "loss": 15.2415, + "step": 171820 + }, + { + "epoch": 0.34710747140600445, + "grad_norm": 190.00265502929688, + "learning_rate": 8.297909900001787e-06, + "loss": 17.8699, + "step": 171830 + }, + { + "epoch": 0.3471276720386883, + "grad_norm": 581.3510131835938, + "learning_rate": 8.297647522681741e-06, + "loss": 29.8289, + "step": 171840 + }, + { + "epoch": 0.3471478726713721, + "grad_norm": 384.6495361328125, + "learning_rate": 8.297385129289361e-06, + "loss": 8.751, + "step": 171850 + }, + { + "epoch": 0.34716807330405586, + "grad_norm": 87.20552825927734, + "learning_rate": 8.297122719825928e-06, + "loss": 20.6711, + "step": 171860 + }, + { + "epoch": 0.3471882739367397, + "grad_norm": 578.283203125, + "learning_rate": 8.296860294292716e-06, + "loss": 19.5475, + "step": 171870 + }, + { + "epoch": 0.3472084745694235, + "grad_norm": 744.7264404296875, + "learning_rate": 8.296597852691008e-06, + "loss": 28.1937, + "step": 171880 + }, + { + "epoch": 0.3472286752021073, + "grad_norm": 40.18510818481445, + "learning_rate": 8.296335395022083e-06, + "loss": 22.0612, + "step": 171890 + }, + { + "epoch": 0.34724887583479114, + "grad_norm": 421.2962951660156, + "learning_rate": 8.296072921287217e-06, + "loss": 20.1413, + "step": 171900 + }, + { + "epoch": 0.34726907646747496, + "grad_norm": 715.844482421875, + "learning_rate": 8.295810431487692e-06, + "loss": 51.607, + "step": 171910 + }, + { + "epoch": 0.3472892771001588, + "grad_norm": 182.30955505371094, + "learning_rate": 8.295547925624786e-06, + "loss": 22.5666, + "step": 171920 + }, + { + "epoch": 0.3473094777328426, + "grad_norm": 459.4350891113281, + "learning_rate": 8.295285403699783e-06, + "loss": 21.2349, + "step": 171930 + }, + { + "epoch": 0.3473296783655264, + "grad_norm": 326.0625305175781, + "learning_rate": 8.295022865713955e-06, + "loss": 19.5352, + "step": 171940 + }, + { + "epoch": 0.34734987899821024, + "grad_norm": 505.67962646484375, + "learning_rate": 8.294760311668586e-06, + "loss": 21.1203, + "step": 171950 + }, + { + "epoch": 0.34737007963089406, + "grad_norm": 626.6988525390625, + "learning_rate": 8.294497741564953e-06, + "loss": 14.3073, + "step": 171960 + }, + { + "epoch": 0.3473902802635779, + "grad_norm": 62.619449615478516, + "learning_rate": 8.29423515540434e-06, + "loss": 20.2754, + "step": 171970 + }, + { + "epoch": 0.3474104808962617, + "grad_norm": 892.3823852539062, + "learning_rate": 8.293972553188023e-06, + "loss": 34.2372, + "step": 171980 + }, + { + "epoch": 0.34743068152894546, + "grad_norm": 332.57415771484375, + "learning_rate": 8.293709934917284e-06, + "loss": 19.2847, + "step": 171990 + }, + { + "epoch": 0.3474508821616293, + "grad_norm": 206.59849548339844, + "learning_rate": 8.293447300593402e-06, + "loss": 17.6362, + "step": 172000 + }, + { + "epoch": 0.3474710827943131, + "grad_norm": 433.46435546875, + "learning_rate": 8.293184650217657e-06, + "loss": 15.8899, + "step": 172010 + }, + { + "epoch": 0.3474912834269969, + "grad_norm": 529.408935546875, + "learning_rate": 8.292921983791332e-06, + "loss": 23.3935, + "step": 172020 + }, + { + "epoch": 0.34751148405968074, + "grad_norm": 312.68878173828125, + "learning_rate": 8.292659301315702e-06, + "loss": 21.1025, + "step": 172030 + }, + { + "epoch": 0.34753168469236456, + "grad_norm": 486.9510803222656, + "learning_rate": 8.29239660279205e-06, + "loss": 11.4331, + "step": 172040 + }, + { + "epoch": 0.3475518853250484, + "grad_norm": 630.8102416992188, + "learning_rate": 8.292133888221659e-06, + "loss": 24.9631, + "step": 172050 + }, + { + "epoch": 0.3475720859577322, + "grad_norm": 322.6904296875, + "learning_rate": 8.291871157605803e-06, + "loss": 22.8038, + "step": 172060 + }, + { + "epoch": 0.347592286590416, + "grad_norm": 263.4180603027344, + "learning_rate": 8.291608410945768e-06, + "loss": 31.7861, + "step": 172070 + }, + { + "epoch": 0.34761248722309984, + "grad_norm": 276.12310791015625, + "learning_rate": 8.291345648242832e-06, + "loss": 21.8643, + "step": 172080 + }, + { + "epoch": 0.34763268785578366, + "grad_norm": 558.2141723632812, + "learning_rate": 8.291082869498277e-06, + "loss": 22.0106, + "step": 172090 + }, + { + "epoch": 0.3476528884884675, + "grad_norm": 965.6648559570312, + "learning_rate": 8.290820074713383e-06, + "loss": 18.0912, + "step": 172100 + }, + { + "epoch": 0.3476730891211513, + "grad_norm": 154.92520141601562, + "learning_rate": 8.290557263889432e-06, + "loss": 3.3864, + "step": 172110 + }, + { + "epoch": 0.34769328975383507, + "grad_norm": 184.02687072753906, + "learning_rate": 8.290294437027704e-06, + "loss": 22.1731, + "step": 172120 + }, + { + "epoch": 0.3477134903865189, + "grad_norm": 547.7509765625, + "learning_rate": 8.29003159412948e-06, + "loss": 18.3219, + "step": 172130 + }, + { + "epoch": 0.3477336910192027, + "grad_norm": 92.7471923828125, + "learning_rate": 8.28976873519604e-06, + "loss": 10.7081, + "step": 172140 + }, + { + "epoch": 0.3477538916518865, + "grad_norm": 347.52655029296875, + "learning_rate": 8.289505860228666e-06, + "loss": 43.5869, + "step": 172150 + }, + { + "epoch": 0.34777409228457035, + "grad_norm": 875.1144409179688, + "learning_rate": 8.28924296922864e-06, + "loss": 29.2674, + "step": 172160 + }, + { + "epoch": 0.34779429291725417, + "grad_norm": 629.087646484375, + "learning_rate": 8.288980062197243e-06, + "loss": 27.1192, + "step": 172170 + }, + { + "epoch": 0.347814493549938, + "grad_norm": 170.34738159179688, + "learning_rate": 8.288717139135755e-06, + "loss": 7.0775, + "step": 172180 + }, + { + "epoch": 0.3478346941826218, + "grad_norm": 381.4020690917969, + "learning_rate": 8.28845420004546e-06, + "loss": 21.7472, + "step": 172190 + }, + { + "epoch": 0.3478548948153056, + "grad_norm": 423.1697082519531, + "learning_rate": 8.288191244927637e-06, + "loss": 13.2788, + "step": 172200 + }, + { + "epoch": 0.34787509544798945, + "grad_norm": 616.6538696289062, + "learning_rate": 8.28792827378357e-06, + "loss": 12.0924, + "step": 172210 + }, + { + "epoch": 0.34789529608067327, + "grad_norm": 971.9646606445312, + "learning_rate": 8.287665286614538e-06, + "loss": 23.8575, + "step": 172220 + }, + { + "epoch": 0.3479154967133571, + "grad_norm": 748.2013549804688, + "learning_rate": 8.287402283421825e-06, + "loss": 20.1542, + "step": 172230 + }, + { + "epoch": 0.34793569734604085, + "grad_norm": 222.02059936523438, + "learning_rate": 8.287139264206712e-06, + "loss": 34.7346, + "step": 172240 + }, + { + "epoch": 0.34795589797872467, + "grad_norm": 258.45654296875, + "learning_rate": 8.28687622897048e-06, + "loss": 20.7514, + "step": 172250 + }, + { + "epoch": 0.3479760986114085, + "grad_norm": 440.2414245605469, + "learning_rate": 8.286613177714412e-06, + "loss": 23.3005, + "step": 172260 + }, + { + "epoch": 0.3479962992440923, + "grad_norm": 23.830753326416016, + "learning_rate": 8.28635011043979e-06, + "loss": 19.8647, + "step": 172270 + }, + { + "epoch": 0.34801649987677613, + "grad_norm": 302.4520263671875, + "learning_rate": 8.286087027147899e-06, + "loss": 18.095, + "step": 172280 + }, + { + "epoch": 0.34803670050945995, + "grad_norm": 561.2001342773438, + "learning_rate": 8.285823927840015e-06, + "loss": 16.7233, + "step": 172290 + }, + { + "epoch": 0.34805690114214377, + "grad_norm": 197.0220184326172, + "learning_rate": 8.285560812517423e-06, + "loss": 36.6479, + "step": 172300 + }, + { + "epoch": 0.3480771017748276, + "grad_norm": 525.0717163085938, + "learning_rate": 8.285297681181408e-06, + "loss": 20.3847, + "step": 172310 + }, + { + "epoch": 0.3480973024075114, + "grad_norm": 345.0289001464844, + "learning_rate": 8.285034533833251e-06, + "loss": 26.9689, + "step": 172320 + }, + { + "epoch": 0.34811750304019523, + "grad_norm": 576.8797607421875, + "learning_rate": 8.284771370474233e-06, + "loss": 25.3182, + "step": 172330 + }, + { + "epoch": 0.34813770367287905, + "grad_norm": 8.572928428649902, + "learning_rate": 8.284508191105638e-06, + "loss": 17.7019, + "step": 172340 + }, + { + "epoch": 0.34815790430556287, + "grad_norm": 217.74179077148438, + "learning_rate": 8.284244995728749e-06, + "loss": 23.8376, + "step": 172350 + }, + { + "epoch": 0.3481781049382467, + "grad_norm": 599.9877319335938, + "learning_rate": 8.283981784344847e-06, + "loss": 25.6374, + "step": 172360 + }, + { + "epoch": 0.34819830557093046, + "grad_norm": 356.48748779296875, + "learning_rate": 8.283718556955216e-06, + "loss": 19.1465, + "step": 172370 + }, + { + "epoch": 0.3482185062036143, + "grad_norm": 312.2649841308594, + "learning_rate": 8.283455313561141e-06, + "loss": 38.9632, + "step": 172380 + }, + { + "epoch": 0.3482387068362981, + "grad_norm": 724.2706909179688, + "learning_rate": 8.283192054163902e-06, + "loss": 20.6261, + "step": 172390 + }, + { + "epoch": 0.3482589074689819, + "grad_norm": 331.345458984375, + "learning_rate": 8.282928778764783e-06, + "loss": 17.2537, + "step": 172400 + }, + { + "epoch": 0.34827910810166574, + "grad_norm": 688.9057006835938, + "learning_rate": 8.282665487365067e-06, + "loss": 10.3169, + "step": 172410 + }, + { + "epoch": 0.34829930873434956, + "grad_norm": 391.465087890625, + "learning_rate": 8.282402179966039e-06, + "loss": 28.3406, + "step": 172420 + }, + { + "epoch": 0.3483195093670334, + "grad_norm": 126.01844024658203, + "learning_rate": 8.282138856568978e-06, + "loss": 25.5566, + "step": 172430 + }, + { + "epoch": 0.3483397099997172, + "grad_norm": 267.91357421875, + "learning_rate": 8.281875517175173e-06, + "loss": 13.3885, + "step": 172440 + }, + { + "epoch": 0.348359910632401, + "grad_norm": 1057.038818359375, + "learning_rate": 8.281612161785903e-06, + "loss": 21.8697, + "step": 172450 + }, + { + "epoch": 0.34838011126508484, + "grad_norm": 983.1893310546875, + "learning_rate": 8.281348790402455e-06, + "loss": 33.7804, + "step": 172460 + }, + { + "epoch": 0.34840031189776866, + "grad_norm": 671.6742553710938, + "learning_rate": 8.28108540302611e-06, + "loss": 28.8767, + "step": 172470 + }, + { + "epoch": 0.3484205125304525, + "grad_norm": 2117.237548828125, + "learning_rate": 8.280821999658153e-06, + "loss": 47.7445, + "step": 172480 + }, + { + "epoch": 0.3484407131631363, + "grad_norm": 10.951056480407715, + "learning_rate": 8.280558580299868e-06, + "loss": 23.4332, + "step": 172490 + }, + { + "epoch": 0.34846091379582006, + "grad_norm": 597.7149047851562, + "learning_rate": 8.280295144952537e-06, + "loss": 29.9681, + "step": 172500 + }, + { + "epoch": 0.3484811144285039, + "grad_norm": 523.89501953125, + "learning_rate": 8.280031693617446e-06, + "loss": 27.9063, + "step": 172510 + }, + { + "epoch": 0.3485013150611877, + "grad_norm": 392.02764892578125, + "learning_rate": 8.27976822629588e-06, + "loss": 11.0087, + "step": 172520 + }, + { + "epoch": 0.3485215156938715, + "grad_norm": 211.89456176757812, + "learning_rate": 8.279504742989117e-06, + "loss": 26.7032, + "step": 172530 + }, + { + "epoch": 0.34854171632655534, + "grad_norm": 42.395484924316406, + "learning_rate": 8.27924124369845e-06, + "loss": 15.2733, + "step": 172540 + }, + { + "epoch": 0.34856191695923916, + "grad_norm": 602.2659912109375, + "learning_rate": 8.278977728425157e-06, + "loss": 31.1341, + "step": 172550 + }, + { + "epoch": 0.348582117591923, + "grad_norm": 629.5859375, + "learning_rate": 8.278714197170526e-06, + "loss": 24.7875, + "step": 172560 + }, + { + "epoch": 0.3486023182246068, + "grad_norm": 374.5086669921875, + "learning_rate": 8.278450649935838e-06, + "loss": 33.6697, + "step": 172570 + }, + { + "epoch": 0.3486225188572906, + "grad_norm": 175.51300048828125, + "learning_rate": 8.278187086722378e-06, + "loss": 17.7969, + "step": 172580 + }, + { + "epoch": 0.34864271948997444, + "grad_norm": 426.8929748535156, + "learning_rate": 8.277923507531434e-06, + "loss": 19.0821, + "step": 172590 + }, + { + "epoch": 0.34866292012265826, + "grad_norm": 427.6558837890625, + "learning_rate": 8.277659912364288e-06, + "loss": 18.8651, + "step": 172600 + }, + { + "epoch": 0.3486831207553421, + "grad_norm": 512.8453369140625, + "learning_rate": 8.277396301222223e-06, + "loss": 25.9066, + "step": 172610 + }, + { + "epoch": 0.3487033213880259, + "grad_norm": 471.2196044921875, + "learning_rate": 8.277132674106528e-06, + "loss": 14.5615, + "step": 172620 + }, + { + "epoch": 0.34872352202070966, + "grad_norm": 377.5659484863281, + "learning_rate": 8.276869031018486e-06, + "loss": 27.5667, + "step": 172630 + }, + { + "epoch": 0.3487437226533935, + "grad_norm": 327.4805908203125, + "learning_rate": 8.27660537195938e-06, + "loss": 25.7655, + "step": 172640 + }, + { + "epoch": 0.3487639232860773, + "grad_norm": 332.9518127441406, + "learning_rate": 8.276341696930499e-06, + "loss": 14.511, + "step": 172650 + }, + { + "epoch": 0.3487841239187611, + "grad_norm": 221.7066650390625, + "learning_rate": 8.276078005933125e-06, + "loss": 21.6163, + "step": 172660 + }, + { + "epoch": 0.34880432455144494, + "grad_norm": 447.6441345214844, + "learning_rate": 8.275814298968544e-06, + "loss": 15.6934, + "step": 172670 + }, + { + "epoch": 0.34882452518412876, + "grad_norm": 374.6302490234375, + "learning_rate": 8.275550576038043e-06, + "loss": 18.794, + "step": 172680 + }, + { + "epoch": 0.3488447258168126, + "grad_norm": 1010.4691162109375, + "learning_rate": 8.275286837142903e-06, + "loss": 24.7621, + "step": 172690 + }, + { + "epoch": 0.3488649264494964, + "grad_norm": 228.8806610107422, + "learning_rate": 8.275023082284413e-06, + "loss": 16.0672, + "step": 172700 + }, + { + "epoch": 0.3488851270821802, + "grad_norm": 494.7354431152344, + "learning_rate": 8.27475931146386e-06, + "loss": 13.6087, + "step": 172710 + }, + { + "epoch": 0.34890532771486404, + "grad_norm": 1394.575439453125, + "learning_rate": 8.274495524682524e-06, + "loss": 28.2938, + "step": 172720 + }, + { + "epoch": 0.34892552834754786, + "grad_norm": 315.2424011230469, + "learning_rate": 8.274231721941696e-06, + "loss": 25.0352, + "step": 172730 + }, + { + "epoch": 0.3489457289802317, + "grad_norm": 262.1417541503906, + "learning_rate": 8.273967903242659e-06, + "loss": 12.0646, + "step": 172740 + }, + { + "epoch": 0.3489659296129155, + "grad_norm": 698.4266967773438, + "learning_rate": 8.273704068586698e-06, + "loss": 22.4679, + "step": 172750 + }, + { + "epoch": 0.34898613024559927, + "grad_norm": 476.93621826171875, + "learning_rate": 8.273440217975103e-06, + "loss": 19.2444, + "step": 172760 + }, + { + "epoch": 0.3490063308782831, + "grad_norm": 266.60009765625, + "learning_rate": 8.273176351409157e-06, + "loss": 34.1924, + "step": 172770 + }, + { + "epoch": 0.3490265315109669, + "grad_norm": 159.0330047607422, + "learning_rate": 8.272912468890146e-06, + "loss": 17.7355, + "step": 172780 + }, + { + "epoch": 0.34904673214365073, + "grad_norm": 391.28729248046875, + "learning_rate": 8.272648570419357e-06, + "loss": 27.3779, + "step": 172790 + }, + { + "epoch": 0.34906693277633455, + "grad_norm": 66.09520721435547, + "learning_rate": 8.272384655998075e-06, + "loss": 19.4427, + "step": 172800 + }, + { + "epoch": 0.34908713340901837, + "grad_norm": 606.9044799804688, + "learning_rate": 8.272120725627588e-06, + "loss": 31.7733, + "step": 172810 + }, + { + "epoch": 0.3491073340417022, + "grad_norm": 778.39013671875, + "learning_rate": 8.27185677930918e-06, + "loss": 24.3503, + "step": 172820 + }, + { + "epoch": 0.349127534674386, + "grad_norm": 674.9252319335938, + "learning_rate": 8.27159281704414e-06, + "loss": 15.9073, + "step": 172830 + }, + { + "epoch": 0.34914773530706983, + "grad_norm": 793.4208984375, + "learning_rate": 8.271328838833753e-06, + "loss": 25.1093, + "step": 172840 + }, + { + "epoch": 0.34916793593975365, + "grad_norm": 695.896484375, + "learning_rate": 8.271064844679306e-06, + "loss": 30.2209, + "step": 172850 + }, + { + "epoch": 0.34918813657243747, + "grad_norm": 71.12458801269531, + "learning_rate": 8.270800834582087e-06, + "loss": 25.3217, + "step": 172860 + }, + { + "epoch": 0.3492083372051213, + "grad_norm": 674.998779296875, + "learning_rate": 8.270536808543379e-06, + "loss": 25.0619, + "step": 172870 + }, + { + "epoch": 0.34922853783780505, + "grad_norm": 566.4926147460938, + "learning_rate": 8.270272766564473e-06, + "loss": 30.4275, + "step": 172880 + }, + { + "epoch": 0.3492487384704889, + "grad_norm": 1088.8902587890625, + "learning_rate": 8.270008708646653e-06, + "loss": 28.3126, + "step": 172890 + }, + { + "epoch": 0.3492689391031727, + "grad_norm": 306.4773864746094, + "learning_rate": 8.269744634791207e-06, + "loss": 22.0916, + "step": 172900 + }, + { + "epoch": 0.3492891397358565, + "grad_norm": 357.73980712890625, + "learning_rate": 8.269480544999424e-06, + "loss": 9.0816, + "step": 172910 + }, + { + "epoch": 0.34930934036854033, + "grad_norm": 341.8287048339844, + "learning_rate": 8.26921643927259e-06, + "loss": 31.488, + "step": 172920 + }, + { + "epoch": 0.34932954100122415, + "grad_norm": 378.9211730957031, + "learning_rate": 8.268952317611989e-06, + "loss": 19.5464, + "step": 172930 + }, + { + "epoch": 0.349349741633908, + "grad_norm": 423.0436096191406, + "learning_rate": 8.268688180018911e-06, + "loss": 20.9778, + "step": 172940 + }, + { + "epoch": 0.3493699422665918, + "grad_norm": 500.8439636230469, + "learning_rate": 8.268424026494646e-06, + "loss": 16.0925, + "step": 172950 + }, + { + "epoch": 0.3493901428992756, + "grad_norm": 640.2675170898438, + "learning_rate": 8.268159857040475e-06, + "loss": 19.7851, + "step": 172960 + }, + { + "epoch": 0.34941034353195943, + "grad_norm": 375.4637451171875, + "learning_rate": 8.267895671657692e-06, + "loss": 22.5761, + "step": 172970 + }, + { + "epoch": 0.34943054416464325, + "grad_norm": 303.6288757324219, + "learning_rate": 8.26763147034758e-06, + "loss": 27.9879, + "step": 172980 + }, + { + "epoch": 0.3494507447973271, + "grad_norm": 216.30111694335938, + "learning_rate": 8.26736725311143e-06, + "loss": 19.6532, + "step": 172990 + }, + { + "epoch": 0.3494709454300109, + "grad_norm": 531.7112426757812, + "learning_rate": 8.267103019950529e-06, + "loss": 10.862, + "step": 173000 + }, + { + "epoch": 0.34949114606269466, + "grad_norm": 86.75955963134766, + "learning_rate": 8.266838770866162e-06, + "loss": 14.3546, + "step": 173010 + }, + { + "epoch": 0.3495113466953785, + "grad_norm": 170.6025848388672, + "learning_rate": 8.26657450585962e-06, + "loss": 16.5934, + "step": 173020 + }, + { + "epoch": 0.3495315473280623, + "grad_norm": 1053.049560546875, + "learning_rate": 8.266310224932191e-06, + "loss": 31.3053, + "step": 173030 + }, + { + "epoch": 0.3495517479607461, + "grad_norm": 134.47523498535156, + "learning_rate": 8.26604592808516e-06, + "loss": 87.7414, + "step": 173040 + }, + { + "epoch": 0.34957194859342994, + "grad_norm": 693.0878295898438, + "learning_rate": 8.26578161531982e-06, + "loss": 29.6518, + "step": 173050 + }, + { + "epoch": 0.34959214922611376, + "grad_norm": 654.5897216796875, + "learning_rate": 8.265517286637453e-06, + "loss": 23.5505, + "step": 173060 + }, + { + "epoch": 0.3496123498587976, + "grad_norm": 668.3735961914062, + "learning_rate": 8.265252942039352e-06, + "loss": 21.3319, + "step": 173070 + }, + { + "epoch": 0.3496325504914814, + "grad_norm": 212.28268432617188, + "learning_rate": 8.264988581526806e-06, + "loss": 14.6275, + "step": 173080 + }, + { + "epoch": 0.3496527511241652, + "grad_norm": 192.173583984375, + "learning_rate": 8.2647242051011e-06, + "loss": 17.1246, + "step": 173090 + }, + { + "epoch": 0.34967295175684904, + "grad_norm": 985.883544921875, + "learning_rate": 8.264459812763525e-06, + "loss": 23.9225, + "step": 173100 + }, + { + "epoch": 0.34969315238953286, + "grad_norm": 1110.9190673828125, + "learning_rate": 8.264195404515369e-06, + "loss": 43.0745, + "step": 173110 + }, + { + "epoch": 0.3497133530222167, + "grad_norm": 514.7064208984375, + "learning_rate": 8.26393098035792e-06, + "loss": 21.6134, + "step": 173120 + }, + { + "epoch": 0.3497335536549005, + "grad_norm": 338.3360290527344, + "learning_rate": 8.263666540292468e-06, + "loss": 25.7399, + "step": 173130 + }, + { + "epoch": 0.34975375428758426, + "grad_norm": 155.49681091308594, + "learning_rate": 8.263402084320299e-06, + "loss": 13.0151, + "step": 173140 + }, + { + "epoch": 0.3497739549202681, + "grad_norm": 485.2061767578125, + "learning_rate": 8.263137612442705e-06, + "loss": 29.5074, + "step": 173150 + }, + { + "epoch": 0.3497941555529519, + "grad_norm": 543.310302734375, + "learning_rate": 8.262873124660976e-06, + "loss": 28.8361, + "step": 173160 + }, + { + "epoch": 0.3498143561856357, + "grad_norm": 250.26800537109375, + "learning_rate": 8.262608620976396e-06, + "loss": 25.3538, + "step": 173170 + }, + { + "epoch": 0.34983455681831954, + "grad_norm": 0.0, + "learning_rate": 8.262344101390261e-06, + "loss": 30.8818, + "step": 173180 + }, + { + "epoch": 0.34985475745100336, + "grad_norm": 148.61407470703125, + "learning_rate": 8.262079565903853e-06, + "loss": 35.5837, + "step": 173190 + }, + { + "epoch": 0.3498749580836872, + "grad_norm": 1020.1525268554688, + "learning_rate": 8.261815014518465e-06, + "loss": 19.6972, + "step": 173200 + }, + { + "epoch": 0.349895158716371, + "grad_norm": 517.3057250976562, + "learning_rate": 8.261550447235389e-06, + "loss": 16.0533, + "step": 173210 + }, + { + "epoch": 0.3499153593490548, + "grad_norm": 372.4304504394531, + "learning_rate": 8.26128586405591e-06, + "loss": 14.5533, + "step": 173220 + }, + { + "epoch": 0.34993555998173864, + "grad_norm": 0.0, + "learning_rate": 8.26102126498132e-06, + "loss": 22.9425, + "step": 173230 + }, + { + "epoch": 0.34995576061442246, + "grad_norm": 264.6286926269531, + "learning_rate": 8.260756650012906e-06, + "loss": 22.0927, + "step": 173240 + }, + { + "epoch": 0.3499759612471063, + "grad_norm": 1.9577926397323608, + "learning_rate": 8.260492019151962e-06, + "loss": 22.1662, + "step": 173250 + }, + { + "epoch": 0.3499961618797901, + "grad_norm": 451.12725830078125, + "learning_rate": 8.260227372399773e-06, + "loss": 13.3866, + "step": 173260 + }, + { + "epoch": 0.35001636251247387, + "grad_norm": 585.6008911132812, + "learning_rate": 8.259962709757634e-06, + "loss": 37.5836, + "step": 173270 + }, + { + "epoch": 0.3500365631451577, + "grad_norm": 266.95361328125, + "learning_rate": 8.259698031226831e-06, + "loss": 19.4592, + "step": 173280 + }, + { + "epoch": 0.3500567637778415, + "grad_norm": 401.6283264160156, + "learning_rate": 8.259433336808653e-06, + "loss": 28.0473, + "step": 173290 + }, + { + "epoch": 0.3500769644105253, + "grad_norm": 1045.560546875, + "learning_rate": 8.259168626504395e-06, + "loss": 18.9615, + "step": 173300 + }, + { + "epoch": 0.35009716504320915, + "grad_norm": 498.0918884277344, + "learning_rate": 8.258903900315343e-06, + "loss": 19.7967, + "step": 173310 + }, + { + "epoch": 0.35011736567589297, + "grad_norm": 369.1413269042969, + "learning_rate": 8.25863915824279e-06, + "loss": 26.5213, + "step": 173320 + }, + { + "epoch": 0.3501375663085768, + "grad_norm": 144.392822265625, + "learning_rate": 8.258374400288022e-06, + "loss": 34.1343, + "step": 173330 + }, + { + "epoch": 0.3501577669412606, + "grad_norm": 487.2896728515625, + "learning_rate": 8.258109626452335e-06, + "loss": 20.6067, + "step": 173340 + }, + { + "epoch": 0.3501779675739444, + "grad_norm": 62.003700256347656, + "learning_rate": 8.257844836737017e-06, + "loss": 15.0474, + "step": 173350 + }, + { + "epoch": 0.35019816820662825, + "grad_norm": 216.18478393554688, + "learning_rate": 8.257580031143357e-06, + "loss": 16.0873, + "step": 173360 + }, + { + "epoch": 0.35021836883931207, + "grad_norm": 295.86688232421875, + "learning_rate": 8.257315209672648e-06, + "loss": 20.4352, + "step": 173370 + }, + { + "epoch": 0.3502385694719959, + "grad_norm": 590.26953125, + "learning_rate": 8.257050372326179e-06, + "loss": 29.161, + "step": 173380 + }, + { + "epoch": 0.35025877010467965, + "grad_norm": 397.9071044921875, + "learning_rate": 8.256785519105242e-06, + "loss": 26.4877, + "step": 173390 + }, + { + "epoch": 0.35027897073736347, + "grad_norm": 365.5968933105469, + "learning_rate": 8.256520650011126e-06, + "loss": 25.2415, + "step": 173400 + }, + { + "epoch": 0.3502991713700473, + "grad_norm": 804.7587890625, + "learning_rate": 8.256255765045124e-06, + "loss": 33.8894, + "step": 173410 + }, + { + "epoch": 0.3503193720027311, + "grad_norm": 534.3924560546875, + "learning_rate": 8.255990864208529e-06, + "loss": 36.9681, + "step": 173420 + }, + { + "epoch": 0.35033957263541493, + "grad_norm": 283.88006591796875, + "learning_rate": 8.255725947502627e-06, + "loss": 29.9513, + "step": 173430 + }, + { + "epoch": 0.35035977326809875, + "grad_norm": 313.4808044433594, + "learning_rate": 8.255461014928713e-06, + "loss": 30.8336, + "step": 173440 + }, + { + "epoch": 0.35037997390078257, + "grad_norm": 634.5245361328125, + "learning_rate": 8.255196066488074e-06, + "loss": 44.8829, + "step": 173450 + }, + { + "epoch": 0.3504001745334664, + "grad_norm": 455.39520263671875, + "learning_rate": 8.254931102182007e-06, + "loss": 35.0474, + "step": 173460 + }, + { + "epoch": 0.3504203751661502, + "grad_norm": 136.81488037109375, + "learning_rate": 8.254666122011799e-06, + "loss": 14.8398, + "step": 173470 + }, + { + "epoch": 0.35044057579883403, + "grad_norm": 58.212120056152344, + "learning_rate": 8.254401125978744e-06, + "loss": 16.4767, + "step": 173480 + }, + { + "epoch": 0.35046077643151785, + "grad_norm": 218.75843811035156, + "learning_rate": 8.254136114084133e-06, + "loss": 10.1186, + "step": 173490 + }, + { + "epoch": 0.35048097706420167, + "grad_norm": 123.15180206298828, + "learning_rate": 8.253871086329255e-06, + "loss": 22.3891, + "step": 173500 + }, + { + "epoch": 0.3505011776968855, + "grad_norm": 397.4679260253906, + "learning_rate": 8.253606042715405e-06, + "loss": 39.2791, + "step": 173510 + }, + { + "epoch": 0.35052137832956926, + "grad_norm": 549.019287109375, + "learning_rate": 8.253340983243876e-06, + "loss": 22.0367, + "step": 173520 + }, + { + "epoch": 0.3505415789622531, + "grad_norm": 463.5042419433594, + "learning_rate": 8.253075907915955e-06, + "loss": 19.4606, + "step": 173530 + }, + { + "epoch": 0.3505617795949369, + "grad_norm": 47.17810821533203, + "learning_rate": 8.252810816732936e-06, + "loss": 18.1637, + "step": 173540 + }, + { + "epoch": 0.3505819802276207, + "grad_norm": 699.627685546875, + "learning_rate": 8.252545709696115e-06, + "loss": 33.326, + "step": 173550 + }, + { + "epoch": 0.35060218086030454, + "grad_norm": 786.27001953125, + "learning_rate": 8.252280586806778e-06, + "loss": 25.532, + "step": 173560 + }, + { + "epoch": 0.35062238149298836, + "grad_norm": 590.9194946289062, + "learning_rate": 8.25201544806622e-06, + "loss": 29.8604, + "step": 173570 + }, + { + "epoch": 0.3506425821256722, + "grad_norm": 482.8412780761719, + "learning_rate": 8.251750293475735e-06, + "loss": 34.1533, + "step": 173580 + }, + { + "epoch": 0.350662782758356, + "grad_norm": 500.3822326660156, + "learning_rate": 8.25148512303661e-06, + "loss": 14.332, + "step": 173590 + }, + { + "epoch": 0.3506829833910398, + "grad_norm": 428.2358093261719, + "learning_rate": 8.251219936750145e-06, + "loss": 13.9377, + "step": 173600 + }, + { + "epoch": 0.35070318402372364, + "grad_norm": 180.1342010498047, + "learning_rate": 8.250954734617627e-06, + "loss": 23.4336, + "step": 173610 + }, + { + "epoch": 0.35072338465640746, + "grad_norm": 627.1963500976562, + "learning_rate": 8.250689516640349e-06, + "loss": 10.7365, + "step": 173620 + }, + { + "epoch": 0.3507435852890913, + "grad_norm": 533.58837890625, + "learning_rate": 8.250424282819604e-06, + "loss": 17.2078, + "step": 173630 + }, + { + "epoch": 0.3507637859217751, + "grad_norm": 643.8233642578125, + "learning_rate": 8.250159033156687e-06, + "loss": 27.9006, + "step": 173640 + }, + { + "epoch": 0.35078398655445886, + "grad_norm": 22.857276916503906, + "learning_rate": 8.249893767652888e-06, + "loss": 15.8945, + "step": 173650 + }, + { + "epoch": 0.3508041871871427, + "grad_norm": 132.8467254638672, + "learning_rate": 8.2496284863095e-06, + "loss": 21.7937, + "step": 173660 + }, + { + "epoch": 0.3508243878198265, + "grad_norm": 135.77993774414062, + "learning_rate": 8.249363189127818e-06, + "loss": 23.7345, + "step": 173670 + }, + { + "epoch": 0.3508445884525103, + "grad_norm": 657.5906372070312, + "learning_rate": 8.249097876109136e-06, + "loss": 13.2515, + "step": 173680 + }, + { + "epoch": 0.35086478908519414, + "grad_norm": 1219.5201416015625, + "learning_rate": 8.248832547254742e-06, + "loss": 28.1558, + "step": 173690 + }, + { + "epoch": 0.35088498971787796, + "grad_norm": 390.09503173828125, + "learning_rate": 8.248567202565934e-06, + "loss": 17.8971, + "step": 173700 + }, + { + "epoch": 0.3509051903505618, + "grad_norm": 468.7591552734375, + "learning_rate": 8.248301842044003e-06, + "loss": 25.077, + "step": 173710 + }, + { + "epoch": 0.3509253909832456, + "grad_norm": 320.3813781738281, + "learning_rate": 8.24803646569024e-06, + "loss": 25.8523, + "step": 173720 + }, + { + "epoch": 0.3509455916159294, + "grad_norm": 498.8067321777344, + "learning_rate": 8.247771073505946e-06, + "loss": 29.7495, + "step": 173730 + }, + { + "epoch": 0.35096579224861324, + "grad_norm": 1407.796142578125, + "learning_rate": 8.247505665492406e-06, + "loss": 25.3236, + "step": 173740 + }, + { + "epoch": 0.35098599288129706, + "grad_norm": 417.5876159667969, + "learning_rate": 8.247240241650918e-06, + "loss": 22.4682, + "step": 173750 + }, + { + "epoch": 0.3510061935139809, + "grad_norm": 289.0838623046875, + "learning_rate": 8.246974801982776e-06, + "loss": 20.5483, + "step": 173760 + }, + { + "epoch": 0.3510263941466647, + "grad_norm": 155.3359832763672, + "learning_rate": 8.246709346489272e-06, + "loss": 26.6778, + "step": 173770 + }, + { + "epoch": 0.35104659477934846, + "grad_norm": 440.09454345703125, + "learning_rate": 8.2464438751717e-06, + "loss": 28.5211, + "step": 173780 + }, + { + "epoch": 0.3510667954120323, + "grad_norm": 311.7047119140625, + "learning_rate": 8.246178388031355e-06, + "loss": 18.4436, + "step": 173790 + }, + { + "epoch": 0.3510869960447161, + "grad_norm": 30.6214656829834, + "learning_rate": 8.24591288506953e-06, + "loss": 10.6946, + "step": 173800 + }, + { + "epoch": 0.3511071966773999, + "grad_norm": 614.3140258789062, + "learning_rate": 8.24564736628752e-06, + "loss": 14.8239, + "step": 173810 + }, + { + "epoch": 0.35112739731008374, + "grad_norm": 182.3624267578125, + "learning_rate": 8.245381831686618e-06, + "loss": 16.5028, + "step": 173820 + }, + { + "epoch": 0.35114759794276756, + "grad_norm": 676.9864501953125, + "learning_rate": 8.245116281268119e-06, + "loss": 20.3585, + "step": 173830 + }, + { + "epoch": 0.3511677985754514, + "grad_norm": 1057.8885498046875, + "learning_rate": 8.244850715033316e-06, + "loss": 25.8648, + "step": 173840 + }, + { + "epoch": 0.3511879992081352, + "grad_norm": 497.7960510253906, + "learning_rate": 8.244585132983505e-06, + "loss": 17.4738, + "step": 173850 + }, + { + "epoch": 0.351208199840819, + "grad_norm": 179.4878387451172, + "learning_rate": 8.244319535119978e-06, + "loss": 20.1675, + "step": 173860 + }, + { + "epoch": 0.35122840047350284, + "grad_norm": 388.04949951171875, + "learning_rate": 8.244053921444034e-06, + "loss": 21.7078, + "step": 173870 + }, + { + "epoch": 0.35124860110618666, + "grad_norm": 362.1933898925781, + "learning_rate": 8.24378829195696e-06, + "loss": 13.1197, + "step": 173880 + }, + { + "epoch": 0.3512688017388705, + "grad_norm": 365.29107666015625, + "learning_rate": 8.243522646660058e-06, + "loss": 27.2235, + "step": 173890 + }, + { + "epoch": 0.3512890023715543, + "grad_norm": 743.2318115234375, + "learning_rate": 8.243256985554622e-06, + "loss": 22.8129, + "step": 173900 + }, + { + "epoch": 0.35130920300423807, + "grad_norm": 221.1050567626953, + "learning_rate": 8.24299130864194e-06, + "loss": 44.6234, + "step": 173910 + }, + { + "epoch": 0.3513294036369219, + "grad_norm": 380.4090576171875, + "learning_rate": 8.242725615923316e-06, + "loss": 19.9493, + "step": 173920 + }, + { + "epoch": 0.3513496042696057, + "grad_norm": 393.19842529296875, + "learning_rate": 8.24245990740004e-06, + "loss": 20.9912, + "step": 173930 + }, + { + "epoch": 0.35136980490228953, + "grad_norm": 153.64451599121094, + "learning_rate": 8.242194183073406e-06, + "loss": 30.5434, + "step": 173940 + }, + { + "epoch": 0.35139000553497335, + "grad_norm": 397.6885986328125, + "learning_rate": 8.24192844294471e-06, + "loss": 31.8051, + "step": 173950 + }, + { + "epoch": 0.35141020616765717, + "grad_norm": 1085.44189453125, + "learning_rate": 8.241662687015251e-06, + "loss": 18.7905, + "step": 173960 + }, + { + "epoch": 0.351430406800341, + "grad_norm": 169.8010711669922, + "learning_rate": 8.24139691528632e-06, + "loss": 19.0973, + "step": 173970 + }, + { + "epoch": 0.3514506074330248, + "grad_norm": 123.90522003173828, + "learning_rate": 8.241131127759214e-06, + "loss": 9.796, + "step": 173980 + }, + { + "epoch": 0.35147080806570863, + "grad_norm": 408.1794128417969, + "learning_rate": 8.240865324435225e-06, + "loss": 18.8127, + "step": 173990 + }, + { + "epoch": 0.35149100869839245, + "grad_norm": 47.10768508911133, + "learning_rate": 8.240599505315656e-06, + "loss": 21.2711, + "step": 174000 + }, + { + "epoch": 0.35151120933107627, + "grad_norm": 275.2547302246094, + "learning_rate": 8.240333670401795e-06, + "loss": 26.2684, + "step": 174010 + }, + { + "epoch": 0.3515314099637601, + "grad_norm": 513.5037841796875, + "learning_rate": 8.240067819694941e-06, + "loss": 23.2252, + "step": 174020 + }, + { + "epoch": 0.35155161059644385, + "grad_norm": 34.5965576171875, + "learning_rate": 8.23980195319639e-06, + "loss": 21.4563, + "step": 174030 + }, + { + "epoch": 0.3515718112291277, + "grad_norm": 365.20196533203125, + "learning_rate": 8.239536070907437e-06, + "loss": 22.2469, + "step": 174040 + }, + { + "epoch": 0.3515920118618115, + "grad_norm": 288.6556701660156, + "learning_rate": 8.239270172829379e-06, + "loss": 13.4052, + "step": 174050 + }, + { + "epoch": 0.3516122124944953, + "grad_norm": 651.83837890625, + "learning_rate": 8.23900425896351e-06, + "loss": 16.056, + "step": 174060 + }, + { + "epoch": 0.35163241312717913, + "grad_norm": 306.2084045410156, + "learning_rate": 8.238738329311126e-06, + "loss": 17.5342, + "step": 174070 + }, + { + "epoch": 0.35165261375986295, + "grad_norm": 162.78701782226562, + "learning_rate": 8.238472383873528e-06, + "loss": 16.8793, + "step": 174080 + }, + { + "epoch": 0.3516728143925468, + "grad_norm": 1043.649658203125, + "learning_rate": 8.238206422652007e-06, + "loss": 17.1426, + "step": 174090 + }, + { + "epoch": 0.3516930150252306, + "grad_norm": 507.8601989746094, + "learning_rate": 8.237940445647858e-06, + "loss": 29.4015, + "step": 174100 + }, + { + "epoch": 0.3517132156579144, + "grad_norm": 371.531494140625, + "learning_rate": 8.23767445286238e-06, + "loss": 24.1237, + "step": 174110 + }, + { + "epoch": 0.35173341629059823, + "grad_norm": 502.83709716796875, + "learning_rate": 8.237408444296872e-06, + "loss": 21.8385, + "step": 174120 + }, + { + "epoch": 0.35175361692328205, + "grad_norm": 127.65411376953125, + "learning_rate": 8.237142419952628e-06, + "loss": 36.3535, + "step": 174130 + }, + { + "epoch": 0.3517738175559659, + "grad_norm": 155.60055541992188, + "learning_rate": 8.236876379830943e-06, + "loss": 27.9313, + "step": 174140 + }, + { + "epoch": 0.3517940181886497, + "grad_norm": 464.2092590332031, + "learning_rate": 8.236610323933115e-06, + "loss": 13.191, + "step": 174150 + }, + { + "epoch": 0.35181421882133346, + "grad_norm": 1012.05810546875, + "learning_rate": 8.236344252260442e-06, + "loss": 19.8992, + "step": 174160 + }, + { + "epoch": 0.3518344194540173, + "grad_norm": 28.210956573486328, + "learning_rate": 8.236078164814218e-06, + "loss": 32.3238, + "step": 174170 + }, + { + "epoch": 0.3518546200867011, + "grad_norm": 137.7019805908203, + "learning_rate": 8.235812061595742e-06, + "loss": 25.4323, + "step": 174180 + }, + { + "epoch": 0.3518748207193849, + "grad_norm": 491.35687255859375, + "learning_rate": 8.235545942606311e-06, + "loss": 20.8667, + "step": 174190 + }, + { + "epoch": 0.35189502135206874, + "grad_norm": 532.808349609375, + "learning_rate": 8.235279807847223e-06, + "loss": 20.3499, + "step": 174200 + }, + { + "epoch": 0.35191522198475256, + "grad_norm": 236.93276977539062, + "learning_rate": 8.235013657319772e-06, + "loss": 37.5176, + "step": 174210 + }, + { + "epoch": 0.3519354226174364, + "grad_norm": 674.0003051757812, + "learning_rate": 8.234747491025257e-06, + "loss": 24.63, + "step": 174220 + }, + { + "epoch": 0.3519556232501202, + "grad_norm": 282.430419921875, + "learning_rate": 8.234481308964975e-06, + "loss": 19.8445, + "step": 174230 + }, + { + "epoch": 0.351975823882804, + "grad_norm": 854.5878295898438, + "learning_rate": 8.234215111140222e-06, + "loss": 35.0627, + "step": 174240 + }, + { + "epoch": 0.35199602451548784, + "grad_norm": 476.0958557128906, + "learning_rate": 8.2339488975523e-06, + "loss": 14.2766, + "step": 174250 + }, + { + "epoch": 0.35201622514817166, + "grad_norm": 394.2736511230469, + "learning_rate": 8.2336826682025e-06, + "loss": 19.2981, + "step": 174260 + }, + { + "epoch": 0.3520364257808555, + "grad_norm": 125.087646484375, + "learning_rate": 8.233416423092124e-06, + "loss": 19.7714, + "step": 174270 + }, + { + "epoch": 0.3520566264135393, + "grad_norm": 252.89723205566406, + "learning_rate": 8.23315016222247e-06, + "loss": 28.8608, + "step": 174280 + }, + { + "epoch": 0.35207682704622306, + "grad_norm": 167.74928283691406, + "learning_rate": 8.232883885594831e-06, + "loss": 14.6707, + "step": 174290 + }, + { + "epoch": 0.3520970276789069, + "grad_norm": 134.2716064453125, + "learning_rate": 8.232617593210512e-06, + "loss": 12.7259, + "step": 174300 + }, + { + "epoch": 0.3521172283115907, + "grad_norm": 160.16358947753906, + "learning_rate": 8.232351285070804e-06, + "loss": 15.1264, + "step": 174310 + }, + { + "epoch": 0.3521374289442745, + "grad_norm": 701.306640625, + "learning_rate": 8.23208496117701e-06, + "loss": 34.7873, + "step": 174320 + }, + { + "epoch": 0.35215762957695834, + "grad_norm": 222.68003845214844, + "learning_rate": 8.231818621530424e-06, + "loss": 34.4222, + "step": 174330 + }, + { + "epoch": 0.35217783020964216, + "grad_norm": 393.4009094238281, + "learning_rate": 8.231552266132346e-06, + "loss": 49.5541, + "step": 174340 + }, + { + "epoch": 0.352198030842326, + "grad_norm": 224.24929809570312, + "learning_rate": 8.231285894984076e-06, + "loss": 27.1999, + "step": 174350 + }, + { + "epoch": 0.3522182314750098, + "grad_norm": 215.3976593017578, + "learning_rate": 8.231019508086908e-06, + "loss": 18.1587, + "step": 174360 + }, + { + "epoch": 0.3522384321076936, + "grad_norm": 86.14173889160156, + "learning_rate": 8.230753105442145e-06, + "loss": 25.2225, + "step": 174370 + }, + { + "epoch": 0.35225863274037744, + "grad_norm": 467.911865234375, + "learning_rate": 8.230486687051082e-06, + "loss": 19.3863, + "step": 174380 + }, + { + "epoch": 0.35227883337306126, + "grad_norm": 515.731201171875, + "learning_rate": 8.23022025291502e-06, + "loss": 20.7277, + "step": 174390 + }, + { + "epoch": 0.3522990340057451, + "grad_norm": 491.38543701171875, + "learning_rate": 8.229953803035256e-06, + "loss": 20.9303, + "step": 174400 + }, + { + "epoch": 0.3523192346384289, + "grad_norm": 251.4536590576172, + "learning_rate": 8.229687337413087e-06, + "loss": 38.819, + "step": 174410 + }, + { + "epoch": 0.35233943527111267, + "grad_norm": 396.22802734375, + "learning_rate": 8.229420856049814e-06, + "loss": 24.1538, + "step": 174420 + }, + { + "epoch": 0.3523596359037965, + "grad_norm": 379.91949462890625, + "learning_rate": 8.229154358946739e-06, + "loss": 29.9421, + "step": 174430 + }, + { + "epoch": 0.3523798365364803, + "grad_norm": 520.71484375, + "learning_rate": 8.228887846105154e-06, + "loss": 17.0296, + "step": 174440 + }, + { + "epoch": 0.3524000371691641, + "grad_norm": 289.1027526855469, + "learning_rate": 8.228621317526362e-06, + "loss": 20.0793, + "step": 174450 + }, + { + "epoch": 0.35242023780184795, + "grad_norm": 453.0242919921875, + "learning_rate": 8.22835477321166e-06, + "loss": 20.1559, + "step": 174460 + }, + { + "epoch": 0.35244043843453177, + "grad_norm": 866.8163452148438, + "learning_rate": 8.22808821316235e-06, + "loss": 27.4743, + "step": 174470 + }, + { + "epoch": 0.3524606390672156, + "grad_norm": 399.63018798828125, + "learning_rate": 8.22782163737973e-06, + "loss": 15.5161, + "step": 174480 + }, + { + "epoch": 0.3524808396998994, + "grad_norm": 542.755859375, + "learning_rate": 8.227555045865097e-06, + "loss": 27.222, + "step": 174490 + }, + { + "epoch": 0.3525010403325832, + "grad_norm": 813.2581787109375, + "learning_rate": 8.227288438619754e-06, + "loss": 19.779, + "step": 174500 + }, + { + "epoch": 0.35252124096526705, + "grad_norm": 313.12493896484375, + "learning_rate": 8.227021815644998e-06, + "loss": 19.1409, + "step": 174510 + }, + { + "epoch": 0.35254144159795087, + "grad_norm": 606.32421875, + "learning_rate": 8.226755176942127e-06, + "loss": 18.7381, + "step": 174520 + }, + { + "epoch": 0.3525616422306347, + "grad_norm": 721.5530395507812, + "learning_rate": 8.226488522512445e-06, + "loss": 18.6823, + "step": 174530 + }, + { + "epoch": 0.3525818428633185, + "grad_norm": 443.0743713378906, + "learning_rate": 8.22622185235725e-06, + "loss": 16.3067, + "step": 174540 + }, + { + "epoch": 0.35260204349600227, + "grad_norm": 309.57989501953125, + "learning_rate": 8.22595516647784e-06, + "loss": 16.5351, + "step": 174550 + }, + { + "epoch": 0.3526222441286861, + "grad_norm": 266.3574523925781, + "learning_rate": 8.225688464875514e-06, + "loss": 10.3782, + "step": 174560 + }, + { + "epoch": 0.3526424447613699, + "grad_norm": 455.272216796875, + "learning_rate": 8.225421747551575e-06, + "loss": 13.2519, + "step": 174570 + }, + { + "epoch": 0.35266264539405373, + "grad_norm": 424.2519226074219, + "learning_rate": 8.225155014507322e-06, + "loss": 42.6528, + "step": 174580 + }, + { + "epoch": 0.35268284602673755, + "grad_norm": 1382.4771728515625, + "learning_rate": 8.224888265744055e-06, + "loss": 20.4178, + "step": 174590 + }, + { + "epoch": 0.35270304665942137, + "grad_norm": 114.04889678955078, + "learning_rate": 8.224621501263073e-06, + "loss": 12.4362, + "step": 174600 + }, + { + "epoch": 0.3527232472921052, + "grad_norm": 410.6907958984375, + "learning_rate": 8.224354721065676e-06, + "loss": 29.2127, + "step": 174610 + }, + { + "epoch": 0.352743447924789, + "grad_norm": 375.0404968261719, + "learning_rate": 8.224087925153167e-06, + "loss": 15.1587, + "step": 174620 + }, + { + "epoch": 0.35276364855747283, + "grad_norm": 661.4111938476562, + "learning_rate": 8.223821113526843e-06, + "loss": 31.2368, + "step": 174630 + }, + { + "epoch": 0.35278384919015665, + "grad_norm": 159.78857421875, + "learning_rate": 8.223554286188007e-06, + "loss": 17.7349, + "step": 174640 + }, + { + "epoch": 0.35280404982284047, + "grad_norm": 897.872314453125, + "learning_rate": 8.223287443137957e-06, + "loss": 20.3204, + "step": 174650 + }, + { + "epoch": 0.3528242504555243, + "grad_norm": 699.4429321289062, + "learning_rate": 8.223020584377997e-06, + "loss": 23.8734, + "step": 174660 + }, + { + "epoch": 0.35284445108820806, + "grad_norm": 632.2472534179688, + "learning_rate": 8.222753709909423e-06, + "loss": 19.7888, + "step": 174670 + }, + { + "epoch": 0.3528646517208919, + "grad_norm": 725.69189453125, + "learning_rate": 8.22248681973354e-06, + "loss": 40.8493, + "step": 174680 + }, + { + "epoch": 0.3528848523535757, + "grad_norm": 453.00372314453125, + "learning_rate": 8.222219913851647e-06, + "loss": 14.9936, + "step": 174690 + }, + { + "epoch": 0.3529050529862595, + "grad_norm": 1012.4638061523438, + "learning_rate": 8.221952992265046e-06, + "loss": 15.8687, + "step": 174700 + }, + { + "epoch": 0.35292525361894334, + "grad_norm": 430.6357727050781, + "learning_rate": 8.221686054975035e-06, + "loss": 29.4404, + "step": 174710 + }, + { + "epoch": 0.35294545425162716, + "grad_norm": 78.58820343017578, + "learning_rate": 8.221419101982917e-06, + "loss": 52.1942, + "step": 174720 + }, + { + "epoch": 0.352965654884311, + "grad_norm": 643.046142578125, + "learning_rate": 8.221152133289993e-06, + "loss": 27.5479, + "step": 174730 + }, + { + "epoch": 0.3529858555169948, + "grad_norm": 840.59521484375, + "learning_rate": 8.220885148897566e-06, + "loss": 17.2693, + "step": 174740 + }, + { + "epoch": 0.3530060561496786, + "grad_norm": 498.2879638671875, + "learning_rate": 8.220618148806934e-06, + "loss": 19.2516, + "step": 174750 + }, + { + "epoch": 0.35302625678236244, + "grad_norm": 94.1357650756836, + "learning_rate": 8.2203511330194e-06, + "loss": 13.9296, + "step": 174760 + }, + { + "epoch": 0.35304645741504626, + "grad_norm": 252.35238647460938, + "learning_rate": 8.220084101536264e-06, + "loss": 26.2823, + "step": 174770 + }, + { + "epoch": 0.3530666580477301, + "grad_norm": 392.3772888183594, + "learning_rate": 8.21981705435883e-06, + "loss": 15.2514, + "step": 174780 + }, + { + "epoch": 0.3530868586804139, + "grad_norm": 431.6820068359375, + "learning_rate": 8.219549991488398e-06, + "loss": 21.3616, + "step": 174790 + }, + { + "epoch": 0.35310705931309766, + "grad_norm": 511.37030029296875, + "learning_rate": 8.21928291292627e-06, + "loss": 23.538, + "step": 174800 + }, + { + "epoch": 0.3531272599457815, + "grad_norm": 491.3835144042969, + "learning_rate": 8.219015818673747e-06, + "loss": 37.6915, + "step": 174810 + }, + { + "epoch": 0.3531474605784653, + "grad_norm": 568.794921875, + "learning_rate": 8.218748708732131e-06, + "loss": 11.7215, + "step": 174820 + }, + { + "epoch": 0.3531676612111491, + "grad_norm": 198.10693359375, + "learning_rate": 8.218481583102725e-06, + "loss": 22.8137, + "step": 174830 + }, + { + "epoch": 0.35318786184383294, + "grad_norm": 369.39703369140625, + "learning_rate": 8.218214441786829e-06, + "loss": 21.6082, + "step": 174840 + }, + { + "epoch": 0.35320806247651676, + "grad_norm": 108.52118682861328, + "learning_rate": 8.217947284785748e-06, + "loss": 28.672, + "step": 174850 + }, + { + "epoch": 0.3532282631092006, + "grad_norm": 319.4595642089844, + "learning_rate": 8.217680112100782e-06, + "loss": 14.6211, + "step": 174860 + }, + { + "epoch": 0.3532484637418844, + "grad_norm": 550.907470703125, + "learning_rate": 8.217412923733232e-06, + "loss": 26.826, + "step": 174870 + }, + { + "epoch": 0.3532686643745682, + "grad_norm": 77.43846130371094, + "learning_rate": 8.217145719684403e-06, + "loss": 36.5522, + "step": 174880 + }, + { + "epoch": 0.35328886500725204, + "grad_norm": 162.79888916015625, + "learning_rate": 8.216878499955594e-06, + "loss": 12.6612, + "step": 174890 + }, + { + "epoch": 0.35330906563993586, + "grad_norm": 570.7067260742188, + "learning_rate": 8.21661126454811e-06, + "loss": 13.0498, + "step": 174900 + }, + { + "epoch": 0.3533292662726197, + "grad_norm": 539.6907348632812, + "learning_rate": 8.216344013463255e-06, + "loss": 16.5399, + "step": 174910 + }, + { + "epoch": 0.3533494669053035, + "grad_norm": 180.33628845214844, + "learning_rate": 8.216076746702327e-06, + "loss": 20.4061, + "step": 174920 + }, + { + "epoch": 0.35336966753798726, + "grad_norm": 309.32513427734375, + "learning_rate": 8.215809464266632e-06, + "loss": 14.4209, + "step": 174930 + }, + { + "epoch": 0.3533898681706711, + "grad_norm": 535.1676025390625, + "learning_rate": 8.21554216615747e-06, + "loss": 24.7506, + "step": 174940 + }, + { + "epoch": 0.3534100688033549, + "grad_norm": 377.8276062011719, + "learning_rate": 8.215274852376148e-06, + "loss": 18.0881, + "step": 174950 + }, + { + "epoch": 0.3534302694360387, + "grad_norm": 223.22373962402344, + "learning_rate": 8.215007522923965e-06, + "loss": 8.4666, + "step": 174960 + }, + { + "epoch": 0.35345047006872254, + "grad_norm": 621.6992797851562, + "learning_rate": 8.214740177802225e-06, + "loss": 33.3335, + "step": 174970 + }, + { + "epoch": 0.35347067070140636, + "grad_norm": 183.6868896484375, + "learning_rate": 8.214472817012232e-06, + "loss": 28.1381, + "step": 174980 + }, + { + "epoch": 0.3534908713340902, + "grad_norm": 445.21795654296875, + "learning_rate": 8.214205440555289e-06, + "loss": 11.1762, + "step": 174990 + }, + { + "epoch": 0.353511071966774, + "grad_norm": 191.78660583496094, + "learning_rate": 8.213938048432697e-06, + "loss": 13.0107, + "step": 175000 + }, + { + "epoch": 0.3535312725994578, + "grad_norm": 495.6348876953125, + "learning_rate": 8.213670640645762e-06, + "loss": 33.909, + "step": 175010 + }, + { + "epoch": 0.35355147323214164, + "grad_norm": 526.88916015625, + "learning_rate": 8.213403217195785e-06, + "loss": 21.3817, + "step": 175020 + }, + { + "epoch": 0.35357167386482546, + "grad_norm": 487.1705322265625, + "learning_rate": 8.21313577808407e-06, + "loss": 27.8439, + "step": 175030 + }, + { + "epoch": 0.3535918744975093, + "grad_norm": 373.7630310058594, + "learning_rate": 8.212868323311923e-06, + "loss": 9.8656, + "step": 175040 + }, + { + "epoch": 0.3536120751301931, + "grad_norm": 310.5169372558594, + "learning_rate": 8.212600852880644e-06, + "loss": 33.0556, + "step": 175050 + }, + { + "epoch": 0.35363227576287687, + "grad_norm": 214.21728515625, + "learning_rate": 8.212333366791539e-06, + "loss": 29.317, + "step": 175060 + }, + { + "epoch": 0.3536524763955607, + "grad_norm": 1133.0125732421875, + "learning_rate": 8.21206586504591e-06, + "loss": 19.4452, + "step": 175070 + }, + { + "epoch": 0.3536726770282445, + "grad_norm": 477.560302734375, + "learning_rate": 8.211798347645062e-06, + "loss": 15.9271, + "step": 175080 + }, + { + "epoch": 0.35369287766092833, + "grad_norm": 448.5748596191406, + "learning_rate": 8.211530814590298e-06, + "loss": 23.897, + "step": 175090 + }, + { + "epoch": 0.35371307829361215, + "grad_norm": 321.6637268066406, + "learning_rate": 8.211263265882923e-06, + "loss": 17.1155, + "step": 175100 + }, + { + "epoch": 0.35373327892629597, + "grad_norm": 157.0853271484375, + "learning_rate": 8.21099570152424e-06, + "loss": 31.9269, + "step": 175110 + }, + { + "epoch": 0.3537534795589798, + "grad_norm": 366.2945556640625, + "learning_rate": 8.210728121515552e-06, + "loss": 23.7279, + "step": 175120 + }, + { + "epoch": 0.3537736801916636, + "grad_norm": 212.07386779785156, + "learning_rate": 8.210460525858167e-06, + "loss": 25.1346, + "step": 175130 + }, + { + "epoch": 0.35379388082434743, + "grad_norm": 187.92138671875, + "learning_rate": 8.210192914553385e-06, + "loss": 20.5662, + "step": 175140 + }, + { + "epoch": 0.35381408145703125, + "grad_norm": 335.2757568359375, + "learning_rate": 8.209925287602513e-06, + "loss": 19.1828, + "step": 175150 + }, + { + "epoch": 0.35383428208971507, + "grad_norm": 333.5650634765625, + "learning_rate": 8.209657645006854e-06, + "loss": 11.0527, + "step": 175160 + }, + { + "epoch": 0.3538544827223989, + "grad_norm": 375.4122619628906, + "learning_rate": 8.209389986767713e-06, + "loss": 14.096, + "step": 175170 + }, + { + "epoch": 0.3538746833550827, + "grad_norm": 599.1802368164062, + "learning_rate": 8.209122312886394e-06, + "loss": 15.0332, + "step": 175180 + }, + { + "epoch": 0.3538948839877665, + "grad_norm": 830.013427734375, + "learning_rate": 8.208854623364202e-06, + "loss": 24.5616, + "step": 175190 + }, + { + "epoch": 0.3539150846204503, + "grad_norm": 2.863065719604492, + "learning_rate": 8.208586918202444e-06, + "loss": 21.1935, + "step": 175200 + }, + { + "epoch": 0.3539352852531341, + "grad_norm": 860.7329711914062, + "learning_rate": 8.208319197402418e-06, + "loss": 23.9066, + "step": 175210 + }, + { + "epoch": 0.35395548588581793, + "grad_norm": 295.81427001953125, + "learning_rate": 8.208051460965438e-06, + "loss": 21.0558, + "step": 175220 + }, + { + "epoch": 0.35397568651850175, + "grad_norm": 357.52789306640625, + "learning_rate": 8.207783708892802e-06, + "loss": 12.3761, + "step": 175230 + }, + { + "epoch": 0.3539958871511856, + "grad_norm": 414.4925231933594, + "learning_rate": 8.207515941185818e-06, + "loss": 27.3326, + "step": 175240 + }, + { + "epoch": 0.3540160877838694, + "grad_norm": 600.637451171875, + "learning_rate": 8.20724815784579e-06, + "loss": 16.0749, + "step": 175250 + }, + { + "epoch": 0.3540362884165532, + "grad_norm": 128.98521423339844, + "learning_rate": 8.206980358874024e-06, + "loss": 20.8088, + "step": 175260 + }, + { + "epoch": 0.35405648904923703, + "grad_norm": 301.27325439453125, + "learning_rate": 8.206712544271825e-06, + "loss": 29.2206, + "step": 175270 + }, + { + "epoch": 0.35407668968192085, + "grad_norm": 520.8452758789062, + "learning_rate": 8.206444714040496e-06, + "loss": 17.5237, + "step": 175280 + }, + { + "epoch": 0.3540968903146047, + "grad_norm": 122.30708312988281, + "learning_rate": 8.206176868181346e-06, + "loss": 11.8924, + "step": 175290 + }, + { + "epoch": 0.3541170909472885, + "grad_norm": 161.1448516845703, + "learning_rate": 8.205909006695679e-06, + "loss": 16.651, + "step": 175300 + }, + { + "epoch": 0.35413729157997226, + "grad_norm": 214.54786682128906, + "learning_rate": 8.205641129584798e-06, + "loss": 11.0476, + "step": 175310 + }, + { + "epoch": 0.3541574922126561, + "grad_norm": 580.5718383789062, + "learning_rate": 8.205373236850013e-06, + "loss": 20.9196, + "step": 175320 + }, + { + "epoch": 0.3541776928453399, + "grad_norm": 736.0718383789062, + "learning_rate": 8.205105328492627e-06, + "loss": 23.4889, + "step": 175330 + }, + { + "epoch": 0.3541978934780237, + "grad_norm": 637.3290405273438, + "learning_rate": 8.204837404513946e-06, + "loss": 21.8905, + "step": 175340 + }, + { + "epoch": 0.35421809411070754, + "grad_norm": 1373.241943359375, + "learning_rate": 8.204569464915278e-06, + "loss": 22.8951, + "step": 175350 + }, + { + "epoch": 0.35423829474339136, + "grad_norm": 755.3607177734375, + "learning_rate": 8.204301509697925e-06, + "loss": 16.576, + "step": 175360 + }, + { + "epoch": 0.3542584953760752, + "grad_norm": 643.6798095703125, + "learning_rate": 8.204033538863196e-06, + "loss": 28.1972, + "step": 175370 + }, + { + "epoch": 0.354278696008759, + "grad_norm": 452.77093505859375, + "learning_rate": 8.203765552412396e-06, + "loss": 25.9113, + "step": 175380 + }, + { + "epoch": 0.3542988966414428, + "grad_norm": 437.998779296875, + "learning_rate": 8.203497550346832e-06, + "loss": 50.7171, + "step": 175390 + }, + { + "epoch": 0.35431909727412664, + "grad_norm": 688.136474609375, + "learning_rate": 8.203229532667808e-06, + "loss": 23.6534, + "step": 175400 + }, + { + "epoch": 0.35433929790681046, + "grad_norm": 570.303955078125, + "learning_rate": 8.202961499376633e-06, + "loss": 28.7329, + "step": 175410 + }, + { + "epoch": 0.3543594985394943, + "grad_norm": 364.986572265625, + "learning_rate": 8.202693450474611e-06, + "loss": 14.5151, + "step": 175420 + }, + { + "epoch": 0.3543796991721781, + "grad_norm": 765.8523559570312, + "learning_rate": 8.20242538596305e-06, + "loss": 23.3007, + "step": 175430 + }, + { + "epoch": 0.35439989980486186, + "grad_norm": 165.0301513671875, + "learning_rate": 8.202157305843256e-06, + "loss": 18.9969, + "step": 175440 + }, + { + "epoch": 0.3544201004375457, + "grad_norm": 945.1859130859375, + "learning_rate": 8.201889210116536e-06, + "loss": 20.2072, + "step": 175450 + }, + { + "epoch": 0.3544403010702295, + "grad_norm": 311.2787780761719, + "learning_rate": 8.201621098784198e-06, + "loss": 27.8193, + "step": 175460 + }, + { + "epoch": 0.3544605017029133, + "grad_norm": 199.78689575195312, + "learning_rate": 8.201352971847544e-06, + "loss": 19.0399, + "step": 175470 + }, + { + "epoch": 0.35448070233559714, + "grad_norm": 453.7472229003906, + "learning_rate": 8.201084829307886e-06, + "loss": 15.9557, + "step": 175480 + }, + { + "epoch": 0.35450090296828096, + "grad_norm": 32.777862548828125, + "learning_rate": 8.200816671166529e-06, + "loss": 19.7851, + "step": 175490 + }, + { + "epoch": 0.3545211036009648, + "grad_norm": 177.6970672607422, + "learning_rate": 8.200548497424779e-06, + "loss": 36.1829, + "step": 175500 + }, + { + "epoch": 0.3545413042336486, + "grad_norm": 297.3965148925781, + "learning_rate": 8.200280308083944e-06, + "loss": 17.4511, + "step": 175510 + }, + { + "epoch": 0.3545615048663324, + "grad_norm": 346.8007507324219, + "learning_rate": 8.200012103145329e-06, + "loss": 13.4345, + "step": 175520 + }, + { + "epoch": 0.35458170549901624, + "grad_norm": 377.2947692871094, + "learning_rate": 8.199743882610245e-06, + "loss": 19.6327, + "step": 175530 + }, + { + "epoch": 0.35460190613170006, + "grad_norm": 135.6499481201172, + "learning_rate": 8.199475646479997e-06, + "loss": 17.9815, + "step": 175540 + }, + { + "epoch": 0.3546221067643839, + "grad_norm": 373.2859191894531, + "learning_rate": 8.199207394755892e-06, + "loss": 31.1193, + "step": 175550 + }, + { + "epoch": 0.3546423073970677, + "grad_norm": 282.8166198730469, + "learning_rate": 8.19893912743924e-06, + "loss": 14.4851, + "step": 175560 + }, + { + "epoch": 0.35466250802975147, + "grad_norm": 254.1454315185547, + "learning_rate": 8.198670844531345e-06, + "loss": 11.7371, + "step": 175570 + }, + { + "epoch": 0.3546827086624353, + "grad_norm": 293.01776123046875, + "learning_rate": 8.198402546033518e-06, + "loss": 27.6861, + "step": 175580 + }, + { + "epoch": 0.3547029092951191, + "grad_norm": 394.53094482421875, + "learning_rate": 8.198134231947064e-06, + "loss": 14.5366, + "step": 175590 + }, + { + "epoch": 0.3547231099278029, + "grad_norm": 313.37158203125, + "learning_rate": 8.197865902273291e-06, + "loss": 32.7562, + "step": 175600 + }, + { + "epoch": 0.35474331056048675, + "grad_norm": 265.4231872558594, + "learning_rate": 8.197597557013507e-06, + "loss": 23.1748, + "step": 175610 + }, + { + "epoch": 0.35476351119317057, + "grad_norm": 204.347412109375, + "learning_rate": 8.197329196169022e-06, + "loss": 28.9186, + "step": 175620 + }, + { + "epoch": 0.3547837118258544, + "grad_norm": 164.70802307128906, + "learning_rate": 8.197060819741141e-06, + "loss": 20.8136, + "step": 175630 + }, + { + "epoch": 0.3548039124585382, + "grad_norm": 434.8452453613281, + "learning_rate": 8.196792427731175e-06, + "loss": 21.0687, + "step": 175640 + }, + { + "epoch": 0.354824113091222, + "grad_norm": 770.203369140625, + "learning_rate": 8.196524020140428e-06, + "loss": 14.9363, + "step": 175650 + }, + { + "epoch": 0.35484431372390585, + "grad_norm": 111.00702667236328, + "learning_rate": 8.196255596970214e-06, + "loss": 26.4642, + "step": 175660 + }, + { + "epoch": 0.35486451435658967, + "grad_norm": 210.8970947265625, + "learning_rate": 8.195987158221835e-06, + "loss": 21.014, + "step": 175670 + }, + { + "epoch": 0.3548847149892735, + "grad_norm": 503.451171875, + "learning_rate": 8.195718703896603e-06, + "loss": 28.6416, + "step": 175680 + }, + { + "epoch": 0.3549049156219573, + "grad_norm": 220.35812377929688, + "learning_rate": 8.195450233995826e-06, + "loss": 15.3415, + "step": 175690 + }, + { + "epoch": 0.35492511625464107, + "grad_norm": 499.7003173828125, + "learning_rate": 8.19518174852081e-06, + "loss": 26.7256, + "step": 175700 + }, + { + "epoch": 0.3549453168873249, + "grad_norm": 136.46705627441406, + "learning_rate": 8.194913247472868e-06, + "loss": 12.9298, + "step": 175710 + }, + { + "epoch": 0.3549655175200087, + "grad_norm": 883.2164916992188, + "learning_rate": 8.194644730853307e-06, + "loss": 21.4403, + "step": 175720 + }, + { + "epoch": 0.35498571815269253, + "grad_norm": 638.2017822265625, + "learning_rate": 8.194376198663434e-06, + "loss": 30.9745, + "step": 175730 + }, + { + "epoch": 0.35500591878537635, + "grad_norm": 534.1513671875, + "learning_rate": 8.194107650904556e-06, + "loss": 20.47, + "step": 175740 + }, + { + "epoch": 0.35502611941806017, + "grad_norm": 417.97369384765625, + "learning_rate": 8.19383908757799e-06, + "loss": 16.4925, + "step": 175750 + }, + { + "epoch": 0.355046320050744, + "grad_norm": 704.442138671875, + "learning_rate": 8.193570508685035e-06, + "loss": 21.7582, + "step": 175760 + }, + { + "epoch": 0.3550665206834278, + "grad_norm": 309.06768798828125, + "learning_rate": 8.193301914227008e-06, + "loss": 13.6277, + "step": 175770 + }, + { + "epoch": 0.35508672131611163, + "grad_norm": 169.63116455078125, + "learning_rate": 8.193033304205213e-06, + "loss": 10.3619, + "step": 175780 + }, + { + "epoch": 0.35510692194879545, + "grad_norm": 461.3911437988281, + "learning_rate": 8.192764678620961e-06, + "loss": 27.736, + "step": 175790 + }, + { + "epoch": 0.35512712258147927, + "grad_norm": 453.21435546875, + "learning_rate": 8.192496037475562e-06, + "loss": 25.6383, + "step": 175800 + }, + { + "epoch": 0.3551473232141631, + "grad_norm": 696.3877563476562, + "learning_rate": 8.192227380770326e-06, + "loss": 32.5503, + "step": 175810 + }, + { + "epoch": 0.3551675238468469, + "grad_norm": 359.2447204589844, + "learning_rate": 8.191958708506557e-06, + "loss": 24.0632, + "step": 175820 + }, + { + "epoch": 0.3551877244795307, + "grad_norm": 448.9452819824219, + "learning_rate": 8.19169002068557e-06, + "loss": 16.0923, + "step": 175830 + }, + { + "epoch": 0.3552079251122145, + "grad_norm": 302.58843994140625, + "learning_rate": 8.191421317308674e-06, + "loss": 7.7536, + "step": 175840 + }, + { + "epoch": 0.3552281257448983, + "grad_norm": 317.3415222167969, + "learning_rate": 8.191152598377179e-06, + "loss": 26.0583, + "step": 175850 + }, + { + "epoch": 0.35524832637758214, + "grad_norm": 113.57054138183594, + "learning_rate": 8.19088386389239e-06, + "loss": 22.8452, + "step": 175860 + }, + { + "epoch": 0.35526852701026596, + "grad_norm": 515.0502319335938, + "learning_rate": 8.19061511385562e-06, + "loss": 17.7838, + "step": 175870 + }, + { + "epoch": 0.3552887276429498, + "grad_norm": 625.891357421875, + "learning_rate": 8.190346348268182e-06, + "loss": 23.5269, + "step": 175880 + }, + { + "epoch": 0.3553089282756336, + "grad_norm": 329.49493408203125, + "learning_rate": 8.190077567131381e-06, + "loss": 34.8531, + "step": 175890 + }, + { + "epoch": 0.3553291289083174, + "grad_norm": 644.8638305664062, + "learning_rate": 8.189808770446528e-06, + "loss": 33.901, + "step": 175900 + }, + { + "epoch": 0.35534932954100124, + "grad_norm": 254.66087341308594, + "learning_rate": 8.189539958214934e-06, + "loss": 38.3538, + "step": 175910 + }, + { + "epoch": 0.35536953017368506, + "grad_norm": 767.3721923828125, + "learning_rate": 8.18927113043791e-06, + "loss": 20.6208, + "step": 175920 + }, + { + "epoch": 0.3553897308063689, + "grad_norm": 124.93978881835938, + "learning_rate": 8.189002287116765e-06, + "loss": 23.6206, + "step": 175930 + }, + { + "epoch": 0.3554099314390527, + "grad_norm": 368.7055969238281, + "learning_rate": 8.188733428252811e-06, + "loss": 18.4986, + "step": 175940 + }, + { + "epoch": 0.35543013207173646, + "grad_norm": 328.1405029296875, + "learning_rate": 8.188464553847356e-06, + "loss": 16.6194, + "step": 175950 + }, + { + "epoch": 0.3554503327044203, + "grad_norm": 617.9406127929688, + "learning_rate": 8.18819566390171e-06, + "loss": 17.6561, + "step": 175960 + }, + { + "epoch": 0.3554705333371041, + "grad_norm": 984.331787109375, + "learning_rate": 8.187926758417188e-06, + "loss": 23.4561, + "step": 175970 + }, + { + "epoch": 0.3554907339697879, + "grad_norm": 368.822021484375, + "learning_rate": 8.187657837395095e-06, + "loss": 14.7196, + "step": 175980 + }, + { + "epoch": 0.35551093460247174, + "grad_norm": 350.0384216308594, + "learning_rate": 8.187388900836745e-06, + "loss": 26.0618, + "step": 175990 + }, + { + "epoch": 0.35553113523515556, + "grad_norm": 405.1641540527344, + "learning_rate": 8.18711994874345e-06, + "loss": 14.5582, + "step": 176000 + }, + { + "epoch": 0.3555513358678394, + "grad_norm": 178.0616455078125, + "learning_rate": 8.186850981116516e-06, + "loss": 14.8914, + "step": 176010 + }, + { + "epoch": 0.3555715365005232, + "grad_norm": 223.12640380859375, + "learning_rate": 8.18658199795726e-06, + "loss": 14.7733, + "step": 176020 + }, + { + "epoch": 0.355591737133207, + "grad_norm": 642.9396362304688, + "learning_rate": 8.186312999266987e-06, + "loss": 19.4961, + "step": 176030 + }, + { + "epoch": 0.35561193776589084, + "grad_norm": 97.81071472167969, + "learning_rate": 8.186043985047012e-06, + "loss": 15.9184, + "step": 176040 + }, + { + "epoch": 0.35563213839857466, + "grad_norm": 331.9412841796875, + "learning_rate": 8.185774955298645e-06, + "loss": 21.9188, + "step": 176050 + }, + { + "epoch": 0.3556523390312585, + "grad_norm": 1437.18603515625, + "learning_rate": 8.185505910023196e-06, + "loss": 28.9128, + "step": 176060 + }, + { + "epoch": 0.3556725396639423, + "grad_norm": 440.3446350097656, + "learning_rate": 8.18523684922198e-06, + "loss": 34.4394, + "step": 176070 + }, + { + "epoch": 0.35569274029662606, + "grad_norm": 1181.7449951171875, + "learning_rate": 8.184967772896304e-06, + "loss": 21.1126, + "step": 176080 + }, + { + "epoch": 0.3557129409293099, + "grad_norm": 466.76519775390625, + "learning_rate": 8.184698681047482e-06, + "loss": 14.2977, + "step": 176090 + }, + { + "epoch": 0.3557331415619937, + "grad_norm": 668.67431640625, + "learning_rate": 8.184429573676825e-06, + "loss": 23.9406, + "step": 176100 + }, + { + "epoch": 0.3557533421946775, + "grad_norm": 235.19480895996094, + "learning_rate": 8.184160450785645e-06, + "loss": 29.0543, + "step": 176110 + }, + { + "epoch": 0.35577354282736134, + "grad_norm": 366.2788391113281, + "learning_rate": 8.183891312375251e-06, + "loss": 20.1827, + "step": 176120 + }, + { + "epoch": 0.35579374346004516, + "grad_norm": 100.78903198242188, + "learning_rate": 8.18362215844696e-06, + "loss": 22.2481, + "step": 176130 + }, + { + "epoch": 0.355813944092729, + "grad_norm": 290.5372314453125, + "learning_rate": 8.183352989002079e-06, + "loss": 17.5751, + "step": 176140 + }, + { + "epoch": 0.3558341447254128, + "grad_norm": 265.5997619628906, + "learning_rate": 8.183083804041922e-06, + "loss": 19.2007, + "step": 176150 + }, + { + "epoch": 0.3558543453580966, + "grad_norm": 817.3016357421875, + "learning_rate": 8.1828146035678e-06, + "loss": 35.5227, + "step": 176160 + }, + { + "epoch": 0.35587454599078044, + "grad_norm": 647.1814575195312, + "learning_rate": 8.182545387581026e-06, + "loss": 20.4804, + "step": 176170 + }, + { + "epoch": 0.35589474662346426, + "grad_norm": 240.89344787597656, + "learning_rate": 8.182276156082911e-06, + "loss": 26.3593, + "step": 176180 + }, + { + "epoch": 0.3559149472561481, + "grad_norm": 830.0279541015625, + "learning_rate": 8.182006909074769e-06, + "loss": 38.3872, + "step": 176190 + }, + { + "epoch": 0.3559351478888319, + "grad_norm": 69.38955688476562, + "learning_rate": 8.181737646557912e-06, + "loss": 14.6388, + "step": 176200 + }, + { + "epoch": 0.35595534852151567, + "grad_norm": 361.1736145019531, + "learning_rate": 8.181468368533651e-06, + "loss": 28.375, + "step": 176210 + }, + { + "epoch": 0.3559755491541995, + "grad_norm": 322.6838684082031, + "learning_rate": 8.181199075003298e-06, + "loss": 20.5138, + "step": 176220 + }, + { + "epoch": 0.3559957497868833, + "grad_norm": 219.79002380371094, + "learning_rate": 8.180929765968168e-06, + "loss": 25.9195, + "step": 176230 + }, + { + "epoch": 0.35601595041956713, + "grad_norm": 498.863525390625, + "learning_rate": 8.18066044142957e-06, + "loss": 18.7417, + "step": 176240 + }, + { + "epoch": 0.35603615105225095, + "grad_norm": 692.880615234375, + "learning_rate": 8.18039110138882e-06, + "loss": 47.3885, + "step": 176250 + }, + { + "epoch": 0.35605635168493477, + "grad_norm": 502.8414001464844, + "learning_rate": 8.18012174584723e-06, + "loss": 20.0152, + "step": 176260 + }, + { + "epoch": 0.3560765523176186, + "grad_norm": 347.7989501953125, + "learning_rate": 8.179852374806112e-06, + "loss": 19.1384, + "step": 176270 + }, + { + "epoch": 0.3560967529503024, + "grad_norm": 599.8389892578125, + "learning_rate": 8.179582988266778e-06, + "loss": 15.0186, + "step": 176280 + }, + { + "epoch": 0.35611695358298623, + "grad_norm": 831.802001953125, + "learning_rate": 8.179313586230544e-06, + "loss": 27.5229, + "step": 176290 + }, + { + "epoch": 0.35613715421567005, + "grad_norm": 877.4141845703125, + "learning_rate": 8.179044168698722e-06, + "loss": 41.1221, + "step": 176300 + }, + { + "epoch": 0.35615735484835387, + "grad_norm": 1.0121227502822876, + "learning_rate": 8.178774735672622e-06, + "loss": 29.6882, + "step": 176310 + }, + { + "epoch": 0.3561775554810377, + "grad_norm": 466.5857849121094, + "learning_rate": 8.17850528715356e-06, + "loss": 29.7614, + "step": 176320 + }, + { + "epoch": 0.3561977561137215, + "grad_norm": 1201.7891845703125, + "learning_rate": 8.178235823142849e-06, + "loss": 20.21, + "step": 176330 + }, + { + "epoch": 0.3562179567464053, + "grad_norm": 674.99609375, + "learning_rate": 8.177966343641803e-06, + "loss": 22.0448, + "step": 176340 + }, + { + "epoch": 0.3562381573790891, + "grad_norm": 144.88600158691406, + "learning_rate": 8.177696848651733e-06, + "loss": 14.6154, + "step": 176350 + }, + { + "epoch": 0.3562583580117729, + "grad_norm": 424.9156799316406, + "learning_rate": 8.177427338173955e-06, + "loss": 17.7365, + "step": 176360 + }, + { + "epoch": 0.35627855864445673, + "grad_norm": 140.97987365722656, + "learning_rate": 8.17715781220978e-06, + "loss": 10.5915, + "step": 176370 + }, + { + "epoch": 0.35629875927714055, + "grad_norm": 670.3990478515625, + "learning_rate": 8.176888270760524e-06, + "loss": 37.206, + "step": 176380 + }, + { + "epoch": 0.3563189599098244, + "grad_norm": 335.6086730957031, + "learning_rate": 8.1766187138275e-06, + "loss": 16.479, + "step": 176390 + }, + { + "epoch": 0.3563391605425082, + "grad_norm": 543.1423950195312, + "learning_rate": 8.176349141412022e-06, + "loss": 27.8851, + "step": 176400 + }, + { + "epoch": 0.356359361175192, + "grad_norm": 407.7586975097656, + "learning_rate": 8.176079553515403e-06, + "loss": 21.3756, + "step": 176410 + }, + { + "epoch": 0.35637956180787583, + "grad_norm": 487.2771911621094, + "learning_rate": 8.175809950138958e-06, + "loss": 19.7585, + "step": 176420 + }, + { + "epoch": 0.35639976244055965, + "grad_norm": 1027.403076171875, + "learning_rate": 8.175540331284e-06, + "loss": 41.2479, + "step": 176430 + }, + { + "epoch": 0.3564199630732435, + "grad_norm": 2425.6884765625, + "learning_rate": 8.175270696951846e-06, + "loss": 27.228, + "step": 176440 + }, + { + "epoch": 0.3564401637059273, + "grad_norm": 364.4439392089844, + "learning_rate": 8.175001047143804e-06, + "loss": 14.7915, + "step": 176450 + }, + { + "epoch": 0.35646036433861106, + "grad_norm": 255.93695068359375, + "learning_rate": 8.174731381861194e-06, + "loss": 25.3969, + "step": 176460 + }, + { + "epoch": 0.3564805649712949, + "grad_norm": 1018.808349609375, + "learning_rate": 8.174461701105328e-06, + "loss": 60.0915, + "step": 176470 + }, + { + "epoch": 0.3565007656039787, + "grad_norm": 239.02467346191406, + "learning_rate": 8.17419200487752e-06, + "loss": 18.13, + "step": 176480 + }, + { + "epoch": 0.3565209662366625, + "grad_norm": 135.59262084960938, + "learning_rate": 8.173922293179086e-06, + "loss": 15.985, + "step": 176490 + }, + { + "epoch": 0.35654116686934634, + "grad_norm": 263.75164794921875, + "learning_rate": 8.173652566011339e-06, + "loss": 29.7916, + "step": 176500 + }, + { + "epoch": 0.35656136750203016, + "grad_norm": 424.6169738769531, + "learning_rate": 8.173382823375594e-06, + "loss": 22.1316, + "step": 176510 + }, + { + "epoch": 0.356581568134714, + "grad_norm": 487.9251708984375, + "learning_rate": 8.173113065273167e-06, + "loss": 12.1967, + "step": 176520 + }, + { + "epoch": 0.3566017687673978, + "grad_norm": 316.0293884277344, + "learning_rate": 8.17284329170537e-06, + "loss": 19.9682, + "step": 176530 + }, + { + "epoch": 0.3566219694000816, + "grad_norm": 613.0550537109375, + "learning_rate": 8.172573502673522e-06, + "loss": 12.1763, + "step": 176540 + }, + { + "epoch": 0.35664217003276544, + "grad_norm": 183.81101989746094, + "learning_rate": 8.172303698178934e-06, + "loss": 13.2585, + "step": 176550 + }, + { + "epoch": 0.35666237066544926, + "grad_norm": 183.7642364501953, + "learning_rate": 8.172033878222924e-06, + "loss": 21.5417, + "step": 176560 + }, + { + "epoch": 0.3566825712981331, + "grad_norm": 508.7245178222656, + "learning_rate": 8.171764042806804e-06, + "loss": 20.4488, + "step": 176570 + }, + { + "epoch": 0.3567027719308169, + "grad_norm": 192.7313232421875, + "learning_rate": 8.171494191931892e-06, + "loss": 22.8201, + "step": 176580 + }, + { + "epoch": 0.35672297256350066, + "grad_norm": 275.0957336425781, + "learning_rate": 8.171224325599502e-06, + "loss": 20.6334, + "step": 176590 + }, + { + "epoch": 0.3567431731961845, + "grad_norm": 459.2030029296875, + "learning_rate": 8.170954443810947e-06, + "loss": 19.7246, + "step": 176600 + }, + { + "epoch": 0.3567633738288683, + "grad_norm": 724.6907348632812, + "learning_rate": 8.170684546567546e-06, + "loss": 48.7054, + "step": 176610 + }, + { + "epoch": 0.3567835744615521, + "grad_norm": 320.8407287597656, + "learning_rate": 8.170414633870617e-06, + "loss": 36.5331, + "step": 176620 + }, + { + "epoch": 0.35680377509423594, + "grad_norm": 379.14349365234375, + "learning_rate": 8.170144705721465e-06, + "loss": 34.0888, + "step": 176630 + }, + { + "epoch": 0.35682397572691976, + "grad_norm": 463.0989074707031, + "learning_rate": 8.169874762121416e-06, + "loss": 16.4816, + "step": 176640 + }, + { + "epoch": 0.3568441763596036, + "grad_norm": 297.63055419921875, + "learning_rate": 8.169604803071783e-06, + "loss": 15.3545, + "step": 176650 + }, + { + "epoch": 0.3568643769922874, + "grad_norm": 551.3389282226562, + "learning_rate": 8.169334828573878e-06, + "loss": 17.3687, + "step": 176660 + }, + { + "epoch": 0.3568845776249712, + "grad_norm": 367.9317932128906, + "learning_rate": 8.16906483862902e-06, + "loss": 18.374, + "step": 176670 + }, + { + "epoch": 0.35690477825765504, + "grad_norm": 951.7230834960938, + "learning_rate": 8.168794833238523e-06, + "loss": 36.017, + "step": 176680 + }, + { + "epoch": 0.35692497889033886, + "grad_norm": 297.3556823730469, + "learning_rate": 8.168524812403707e-06, + "loss": 22.7016, + "step": 176690 + }, + { + "epoch": 0.3569451795230227, + "grad_norm": 207.41456604003906, + "learning_rate": 8.168254776125883e-06, + "loss": 24.2357, + "step": 176700 + }, + { + "epoch": 0.3569653801557065, + "grad_norm": 428.276123046875, + "learning_rate": 8.167984724406371e-06, + "loss": 13.914, + "step": 176710 + }, + { + "epoch": 0.35698558078839027, + "grad_norm": 234.00286865234375, + "learning_rate": 8.167714657246486e-06, + "loss": 24.5092, + "step": 176720 + }, + { + "epoch": 0.3570057814210741, + "grad_norm": 7.838464736938477, + "learning_rate": 8.167444574647542e-06, + "loss": 17.7004, + "step": 176730 + }, + { + "epoch": 0.3570259820537579, + "grad_norm": 384.3555603027344, + "learning_rate": 8.16717447661086e-06, + "loss": 22.2544, + "step": 176740 + }, + { + "epoch": 0.3570461826864417, + "grad_norm": 132.55084228515625, + "learning_rate": 8.166904363137752e-06, + "loss": 19.6422, + "step": 176750 + }, + { + "epoch": 0.35706638331912555, + "grad_norm": 231.16519165039062, + "learning_rate": 8.166634234229535e-06, + "loss": 23.2205, + "step": 176760 + }, + { + "epoch": 0.35708658395180937, + "grad_norm": 488.4976501464844, + "learning_rate": 8.166364089887528e-06, + "loss": 26.6965, + "step": 176770 + }, + { + "epoch": 0.3571067845844932, + "grad_norm": 471.4288330078125, + "learning_rate": 8.166093930113048e-06, + "loss": 16.4289, + "step": 176780 + }, + { + "epoch": 0.357126985217177, + "grad_norm": 189.90719604492188, + "learning_rate": 8.16582375490741e-06, + "loss": 17.9405, + "step": 176790 + }, + { + "epoch": 0.3571471858498608, + "grad_norm": 333.07122802734375, + "learning_rate": 8.165553564271928e-06, + "loss": 20.4349, + "step": 176800 + }, + { + "epoch": 0.35716738648254465, + "grad_norm": 306.79095458984375, + "learning_rate": 8.165283358207924e-06, + "loss": 23.715, + "step": 176810 + }, + { + "epoch": 0.35718758711522847, + "grad_norm": 202.62399291992188, + "learning_rate": 8.165013136716714e-06, + "loss": 15.7137, + "step": 176820 + }, + { + "epoch": 0.3572077877479123, + "grad_norm": 1108.8873291015625, + "learning_rate": 8.164742899799612e-06, + "loss": 27.1154, + "step": 176830 + }, + { + "epoch": 0.3572279883805961, + "grad_norm": 481.61407470703125, + "learning_rate": 8.164472647457937e-06, + "loss": 9.8774, + "step": 176840 + }, + { + "epoch": 0.35724818901327987, + "grad_norm": 924.6329956054688, + "learning_rate": 8.164202379693008e-06, + "loss": 19.521, + "step": 176850 + }, + { + "epoch": 0.3572683896459637, + "grad_norm": 148.47378540039062, + "learning_rate": 8.163932096506137e-06, + "loss": 22.9844, + "step": 176860 + }, + { + "epoch": 0.3572885902786475, + "grad_norm": 513.387939453125, + "learning_rate": 8.163661797898647e-06, + "loss": 22.9582, + "step": 176870 + }, + { + "epoch": 0.35730879091133133, + "grad_norm": 0.0, + "learning_rate": 8.163391483871853e-06, + "loss": 11.2833, + "step": 176880 + }, + { + "epoch": 0.35732899154401515, + "grad_norm": 411.4132385253906, + "learning_rate": 8.163121154427073e-06, + "loss": 16.2289, + "step": 176890 + }, + { + "epoch": 0.35734919217669897, + "grad_norm": 213.57513427734375, + "learning_rate": 8.162850809565623e-06, + "loss": 22.0561, + "step": 176900 + }, + { + "epoch": 0.3573693928093828, + "grad_norm": 391.9679870605469, + "learning_rate": 8.162580449288822e-06, + "loss": 19.1955, + "step": 176910 + }, + { + "epoch": 0.3573895934420666, + "grad_norm": 525.6038818359375, + "learning_rate": 8.162310073597987e-06, + "loss": 38.1771, + "step": 176920 + }, + { + "epoch": 0.35740979407475043, + "grad_norm": 267.0684814453125, + "learning_rate": 8.162039682494438e-06, + "loss": 16.3052, + "step": 176930 + }, + { + "epoch": 0.35742999470743425, + "grad_norm": 350.752685546875, + "learning_rate": 8.16176927597949e-06, + "loss": 23.3383, + "step": 176940 + }, + { + "epoch": 0.35745019534011807, + "grad_norm": 502.6983642578125, + "learning_rate": 8.161498854054462e-06, + "loss": 11.7765, + "step": 176950 + }, + { + "epoch": 0.3574703959728019, + "grad_norm": 161.5146026611328, + "learning_rate": 8.161228416720673e-06, + "loss": 23.4157, + "step": 176960 + }, + { + "epoch": 0.3574905966054857, + "grad_norm": 789.0687866210938, + "learning_rate": 8.160957963979438e-06, + "loss": 20.3483, + "step": 176970 + }, + { + "epoch": 0.3575107972381695, + "grad_norm": 1065.0162353515625, + "learning_rate": 8.160687495832078e-06, + "loss": 25.1569, + "step": 176980 + }, + { + "epoch": 0.3575309978708533, + "grad_norm": 365.522705078125, + "learning_rate": 8.160417012279911e-06, + "loss": 19.2211, + "step": 176990 + }, + { + "epoch": 0.3575511985035371, + "grad_norm": 785.7221069335938, + "learning_rate": 8.160146513324256e-06, + "loss": 18.4823, + "step": 177000 + }, + { + "epoch": 0.35757139913622094, + "grad_norm": 177.9609375, + "learning_rate": 8.159875998966427e-06, + "loss": 15.5257, + "step": 177010 + }, + { + "epoch": 0.35759159976890476, + "grad_norm": 261.8163757324219, + "learning_rate": 8.15960546920775e-06, + "loss": 19.7163, + "step": 177020 + }, + { + "epoch": 0.3576118004015886, + "grad_norm": 622.7557373046875, + "learning_rate": 8.159334924049536e-06, + "loss": 24.4171, + "step": 177030 + }, + { + "epoch": 0.3576320010342724, + "grad_norm": 230.59262084960938, + "learning_rate": 8.159064363493106e-06, + "loss": 20.035, + "step": 177040 + }, + { + "epoch": 0.3576522016669562, + "grad_norm": 252.6420440673828, + "learning_rate": 8.158793787539782e-06, + "loss": 32.239, + "step": 177050 + }, + { + "epoch": 0.35767240229964004, + "grad_norm": 678.0930786132812, + "learning_rate": 8.158523196190879e-06, + "loss": 25.604, + "step": 177060 + }, + { + "epoch": 0.35769260293232386, + "grad_norm": 320.4878234863281, + "learning_rate": 8.158252589447717e-06, + "loss": 31.6477, + "step": 177070 + }, + { + "epoch": 0.3577128035650077, + "grad_norm": 1682.7012939453125, + "learning_rate": 8.157981967311614e-06, + "loss": 19.9565, + "step": 177080 + }, + { + "epoch": 0.3577330041976915, + "grad_norm": 429.1732177734375, + "learning_rate": 8.15771132978389e-06, + "loss": 24.1278, + "step": 177090 + }, + { + "epoch": 0.35775320483037526, + "grad_norm": 230.71633911132812, + "learning_rate": 8.157440676865866e-06, + "loss": 12.2427, + "step": 177100 + }, + { + "epoch": 0.3577734054630591, + "grad_norm": 682.2756958007812, + "learning_rate": 8.15717000855886e-06, + "loss": 22.3429, + "step": 177110 + }, + { + "epoch": 0.3577936060957429, + "grad_norm": 234.9181671142578, + "learning_rate": 8.156899324864187e-06, + "loss": 22.0557, + "step": 177120 + }, + { + "epoch": 0.3578138067284267, + "grad_norm": 83.15522766113281, + "learning_rate": 8.15662862578317e-06, + "loss": 29.9858, + "step": 177130 + }, + { + "epoch": 0.35783400736111054, + "grad_norm": 206.202392578125, + "learning_rate": 8.15635791131713e-06, + "loss": 14.7609, + "step": 177140 + }, + { + "epoch": 0.35785420799379436, + "grad_norm": 492.3747863769531, + "learning_rate": 8.156087181467382e-06, + "loss": 25.9009, + "step": 177150 + }, + { + "epoch": 0.3578744086264782, + "grad_norm": 181.47689819335938, + "learning_rate": 8.15581643623525e-06, + "loss": 27.0679, + "step": 177160 + }, + { + "epoch": 0.357894609259162, + "grad_norm": 315.83404541015625, + "learning_rate": 8.155545675622049e-06, + "loss": 19.0138, + "step": 177170 + }, + { + "epoch": 0.3579148098918458, + "grad_norm": 234.9701385498047, + "learning_rate": 8.155274899629104e-06, + "loss": 12.2873, + "step": 177180 + }, + { + "epoch": 0.35793501052452964, + "grad_norm": 77.23339080810547, + "learning_rate": 8.155004108257731e-06, + "loss": 11.3722, + "step": 177190 + }, + { + "epoch": 0.35795521115721346, + "grad_norm": 954.743896484375, + "learning_rate": 8.154733301509249e-06, + "loss": 36.6601, + "step": 177200 + }, + { + "epoch": 0.3579754117898973, + "grad_norm": 144.52452087402344, + "learning_rate": 8.154462479384982e-06, + "loss": 23.6636, + "step": 177210 + }, + { + "epoch": 0.3579956124225811, + "grad_norm": 0.0, + "learning_rate": 8.154191641886244e-06, + "loss": 20.1353, + "step": 177220 + }, + { + "epoch": 0.35801581305526486, + "grad_norm": 503.2443542480469, + "learning_rate": 8.15392078901436e-06, + "loss": 21.068, + "step": 177230 + }, + { + "epoch": 0.3580360136879487, + "grad_norm": 317.6268310546875, + "learning_rate": 8.15364992077065e-06, + "loss": 13.95, + "step": 177240 + }, + { + "epoch": 0.3580562143206325, + "grad_norm": 106.98784637451172, + "learning_rate": 8.153379037156433e-06, + "loss": 16.8383, + "step": 177250 + }, + { + "epoch": 0.3580764149533163, + "grad_norm": 453.4849853515625, + "learning_rate": 8.153108138173027e-06, + "loss": 21.0666, + "step": 177260 + }, + { + "epoch": 0.35809661558600014, + "grad_norm": 1080.802001953125, + "learning_rate": 8.152837223821755e-06, + "loss": 27.8877, + "step": 177270 + }, + { + "epoch": 0.35811681621868396, + "grad_norm": 730.4622802734375, + "learning_rate": 8.152566294103936e-06, + "loss": 19.6041, + "step": 177280 + }, + { + "epoch": 0.3581370168513678, + "grad_norm": 451.93182373046875, + "learning_rate": 8.152295349020893e-06, + "loss": 18.5048, + "step": 177290 + }, + { + "epoch": 0.3581572174840516, + "grad_norm": 427.9884948730469, + "learning_rate": 8.152024388573945e-06, + "loss": 23.6541, + "step": 177300 + }, + { + "epoch": 0.3581774181167354, + "grad_norm": 3.375775098800659, + "learning_rate": 8.15175341276441e-06, + "loss": 28.5684, + "step": 177310 + }, + { + "epoch": 0.35819761874941924, + "grad_norm": 328.9010314941406, + "learning_rate": 8.151482421593613e-06, + "loss": 31.7084, + "step": 177320 + }, + { + "epoch": 0.35821781938210306, + "grad_norm": 21.159927368164062, + "learning_rate": 8.151211415062872e-06, + "loss": 24.8793, + "step": 177330 + }, + { + "epoch": 0.3582380200147869, + "grad_norm": 321.2985534667969, + "learning_rate": 8.15094039317351e-06, + "loss": 26.8257, + "step": 177340 + }, + { + "epoch": 0.3582582206474707, + "grad_norm": 401.93707275390625, + "learning_rate": 8.150669355926848e-06, + "loss": 18.7087, + "step": 177350 + }, + { + "epoch": 0.35827842128015447, + "grad_norm": 57.58470916748047, + "learning_rate": 8.150398303324201e-06, + "loss": 13.7024, + "step": 177360 + }, + { + "epoch": 0.3582986219128383, + "grad_norm": 227.62020874023438, + "learning_rate": 8.150127235366897e-06, + "loss": 13.9598, + "step": 177370 + }, + { + "epoch": 0.3583188225455221, + "grad_norm": 197.00538635253906, + "learning_rate": 8.149856152056257e-06, + "loss": 27.529, + "step": 177380 + }, + { + "epoch": 0.35833902317820593, + "grad_norm": 167.59326171875, + "learning_rate": 8.149585053393599e-06, + "loss": 15.2473, + "step": 177390 + }, + { + "epoch": 0.35835922381088975, + "grad_norm": 160.30685424804688, + "learning_rate": 8.149313939380244e-06, + "loss": 22.6754, + "step": 177400 + }, + { + "epoch": 0.35837942444357357, + "grad_norm": 209.0167999267578, + "learning_rate": 8.149042810017515e-06, + "loss": 15.2747, + "step": 177410 + }, + { + "epoch": 0.3583996250762574, + "grad_norm": 112.47742462158203, + "learning_rate": 8.148771665306736e-06, + "loss": 20.4897, + "step": 177420 + }, + { + "epoch": 0.3584198257089412, + "grad_norm": 470.9197692871094, + "learning_rate": 8.148500505249224e-06, + "loss": 20.8236, + "step": 177430 + }, + { + "epoch": 0.35844002634162503, + "grad_norm": 538.292724609375, + "learning_rate": 8.1482293298463e-06, + "loss": 27.6507, + "step": 177440 + }, + { + "epoch": 0.35846022697430885, + "grad_norm": 415.1450500488281, + "learning_rate": 8.147958139099292e-06, + "loss": 28.701, + "step": 177450 + }, + { + "epoch": 0.35848042760699267, + "grad_norm": 379.1101989746094, + "learning_rate": 8.147686933009515e-06, + "loss": 16.573, + "step": 177460 + }, + { + "epoch": 0.3585006282396765, + "grad_norm": 2266.6806640625, + "learning_rate": 8.147415711578295e-06, + "loss": 44.919, + "step": 177470 + }, + { + "epoch": 0.3585208288723603, + "grad_norm": 393.7117614746094, + "learning_rate": 8.147144474806954e-06, + "loss": 16.6654, + "step": 177480 + }, + { + "epoch": 0.3585410295050441, + "grad_norm": 874.4924926757812, + "learning_rate": 8.14687322269681e-06, + "loss": 25.838, + "step": 177490 + }, + { + "epoch": 0.3585612301377279, + "grad_norm": 288.9904479980469, + "learning_rate": 8.146601955249187e-06, + "loss": 11.5666, + "step": 177500 + }, + { + "epoch": 0.3585814307704117, + "grad_norm": 838.0281372070312, + "learning_rate": 8.14633067246541e-06, + "loss": 30.3927, + "step": 177510 + }, + { + "epoch": 0.35860163140309553, + "grad_norm": 547.0402221679688, + "learning_rate": 8.146059374346798e-06, + "loss": 19.2305, + "step": 177520 + }, + { + "epoch": 0.35862183203577935, + "grad_norm": 509.0076904296875, + "learning_rate": 8.145788060894675e-06, + "loss": 14.9584, + "step": 177530 + }, + { + "epoch": 0.3586420326684632, + "grad_norm": 354.7601013183594, + "learning_rate": 8.145516732110362e-06, + "loss": 21.724, + "step": 177540 + }, + { + "epoch": 0.358662233301147, + "grad_norm": 309.3062438964844, + "learning_rate": 8.14524538799518e-06, + "loss": 17.2265, + "step": 177550 + }, + { + "epoch": 0.3586824339338308, + "grad_norm": 715.89990234375, + "learning_rate": 8.144974028550456e-06, + "loss": 40.1115, + "step": 177560 + }, + { + "epoch": 0.35870263456651463, + "grad_norm": 586.0116577148438, + "learning_rate": 8.14470265377751e-06, + "loss": 33.4867, + "step": 177570 + }, + { + "epoch": 0.35872283519919845, + "grad_norm": 120.88922882080078, + "learning_rate": 8.144431263677663e-06, + "loss": 12.1822, + "step": 177580 + }, + { + "epoch": 0.3587430358318823, + "grad_norm": 298.3090515136719, + "learning_rate": 8.144159858252241e-06, + "loss": 14.616, + "step": 177590 + }, + { + "epoch": 0.3587632364645661, + "grad_norm": 692.1654052734375, + "learning_rate": 8.143888437502565e-06, + "loss": 29.6946, + "step": 177600 + }, + { + "epoch": 0.3587834370972499, + "grad_norm": 417.9906311035156, + "learning_rate": 8.143617001429957e-06, + "loss": 28.8346, + "step": 177610 + }, + { + "epoch": 0.3588036377299337, + "grad_norm": 487.2813415527344, + "learning_rate": 8.143345550035742e-06, + "loss": 16.8323, + "step": 177620 + }, + { + "epoch": 0.3588238383626175, + "grad_norm": 268.7579040527344, + "learning_rate": 8.14307408332124e-06, + "loss": 22.2511, + "step": 177630 + }, + { + "epoch": 0.3588440389953013, + "grad_norm": 4335.0478515625, + "learning_rate": 8.14280260128778e-06, + "loss": 49.9402, + "step": 177640 + }, + { + "epoch": 0.35886423962798514, + "grad_norm": 284.3321228027344, + "learning_rate": 8.142531103936677e-06, + "loss": 18.8025, + "step": 177650 + }, + { + "epoch": 0.35888444026066896, + "grad_norm": 593.8846435546875, + "learning_rate": 8.142259591269261e-06, + "loss": 15.7247, + "step": 177660 + }, + { + "epoch": 0.3589046408933528, + "grad_norm": 239.56578063964844, + "learning_rate": 8.141988063286853e-06, + "loss": 24.4761, + "step": 177670 + }, + { + "epoch": 0.3589248415260366, + "grad_norm": 29.926515579223633, + "learning_rate": 8.141716519990776e-06, + "loss": 31.3717, + "step": 177680 + }, + { + "epoch": 0.3589450421587204, + "grad_norm": 1262.7120361328125, + "learning_rate": 8.141444961382353e-06, + "loss": 33.6779, + "step": 177690 + }, + { + "epoch": 0.35896524279140424, + "grad_norm": 622.3956909179688, + "learning_rate": 8.141173387462908e-06, + "loss": 30.4117, + "step": 177700 + }, + { + "epoch": 0.35898544342408806, + "grad_norm": 427.6423645019531, + "learning_rate": 8.140901798233766e-06, + "loss": 21.7507, + "step": 177710 + }, + { + "epoch": 0.3590056440567719, + "grad_norm": 701.2109375, + "learning_rate": 8.14063019369625e-06, + "loss": 30.8782, + "step": 177720 + }, + { + "epoch": 0.3590258446894557, + "grad_norm": 27.665742874145508, + "learning_rate": 8.140358573851682e-06, + "loss": 27.4379, + "step": 177730 + }, + { + "epoch": 0.35904604532213946, + "grad_norm": 201.99661254882812, + "learning_rate": 8.140086938701387e-06, + "loss": 12.4247, + "step": 177740 + }, + { + "epoch": 0.3590662459548233, + "grad_norm": 178.28997802734375, + "learning_rate": 8.139815288246692e-06, + "loss": 17.5456, + "step": 177750 + }, + { + "epoch": 0.3590864465875071, + "grad_norm": 569.2922973632812, + "learning_rate": 8.139543622488914e-06, + "loss": 34.9385, + "step": 177760 + }, + { + "epoch": 0.3591066472201909, + "grad_norm": 293.24383544921875, + "learning_rate": 8.139271941429383e-06, + "loss": 17.1171, + "step": 177770 + }, + { + "epoch": 0.35912684785287474, + "grad_norm": 75.68390655517578, + "learning_rate": 8.139000245069421e-06, + "loss": 17.4726, + "step": 177780 + }, + { + "epoch": 0.35914704848555856, + "grad_norm": 401.88262939453125, + "learning_rate": 8.138728533410354e-06, + "loss": 22.3922, + "step": 177790 + }, + { + "epoch": 0.3591672491182424, + "grad_norm": 574.680419921875, + "learning_rate": 8.138456806453503e-06, + "loss": 13.5508, + "step": 177800 + }, + { + "epoch": 0.3591874497509262, + "grad_norm": 515.5734252929688, + "learning_rate": 8.138185064200195e-06, + "loss": 29.5561, + "step": 177810 + }, + { + "epoch": 0.35920765038361, + "grad_norm": 722.4268188476562, + "learning_rate": 8.137913306651754e-06, + "loss": 28.8704, + "step": 177820 + }, + { + "epoch": 0.35922785101629384, + "grad_norm": 499.84326171875, + "learning_rate": 8.137641533809503e-06, + "loss": 21.1517, + "step": 177830 + }, + { + "epoch": 0.35924805164897766, + "grad_norm": 451.6605224609375, + "learning_rate": 8.137369745674769e-06, + "loss": 13.4682, + "step": 177840 + }, + { + "epoch": 0.3592682522816615, + "grad_norm": 372.6636657714844, + "learning_rate": 8.137097942248875e-06, + "loss": 27.0819, + "step": 177850 + }, + { + "epoch": 0.3592884529143453, + "grad_norm": 22.09795379638672, + "learning_rate": 8.136826123533144e-06, + "loss": 22.3577, + "step": 177860 + }, + { + "epoch": 0.35930865354702907, + "grad_norm": 78.34536743164062, + "learning_rate": 8.136554289528906e-06, + "loss": 18.4124, + "step": 177870 + }, + { + "epoch": 0.3593288541797129, + "grad_norm": 683.2900390625, + "learning_rate": 8.136282440237481e-06, + "loss": 22.9591, + "step": 177880 + }, + { + "epoch": 0.3593490548123967, + "grad_norm": 596.79541015625, + "learning_rate": 8.136010575660197e-06, + "loss": 14.4441, + "step": 177890 + }, + { + "epoch": 0.3593692554450805, + "grad_norm": 287.7501220703125, + "learning_rate": 8.135738695798377e-06, + "loss": 19.8037, + "step": 177900 + }, + { + "epoch": 0.35938945607776435, + "grad_norm": 34.817054748535156, + "learning_rate": 8.135466800653347e-06, + "loss": 9.004, + "step": 177910 + }, + { + "epoch": 0.35940965671044817, + "grad_norm": 544.5115356445312, + "learning_rate": 8.135194890226432e-06, + "loss": 23.7276, + "step": 177920 + }, + { + "epoch": 0.359429857343132, + "grad_norm": 754.536865234375, + "learning_rate": 8.134922964518957e-06, + "loss": 34.7466, + "step": 177930 + }, + { + "epoch": 0.3594500579758158, + "grad_norm": 302.80645751953125, + "learning_rate": 8.134651023532249e-06, + "loss": 24.1984, + "step": 177940 + }, + { + "epoch": 0.3594702586084996, + "grad_norm": 314.097412109375, + "learning_rate": 8.13437906726763e-06, + "loss": 21.9839, + "step": 177950 + }, + { + "epoch": 0.35949045924118345, + "grad_norm": 627.1465454101562, + "learning_rate": 8.13410709572643e-06, + "loss": 42.2638, + "step": 177960 + }, + { + "epoch": 0.35951065987386727, + "grad_norm": 199.92535400390625, + "learning_rate": 8.13383510890997e-06, + "loss": 10.2202, + "step": 177970 + }, + { + "epoch": 0.3595308605065511, + "grad_norm": 678.3382568359375, + "learning_rate": 8.133563106819579e-06, + "loss": 20.0439, + "step": 177980 + }, + { + "epoch": 0.3595510611392349, + "grad_norm": 436.7383117675781, + "learning_rate": 8.13329108945658e-06, + "loss": 17.698, + "step": 177990 + }, + { + "epoch": 0.35957126177191867, + "grad_norm": 559.6670532226562, + "learning_rate": 8.133019056822303e-06, + "loss": 22.6511, + "step": 178000 + }, + { + "epoch": 0.3595914624046025, + "grad_norm": 306.81243896484375, + "learning_rate": 8.132747008918069e-06, + "loss": 31.3838, + "step": 178010 + }, + { + "epoch": 0.3596116630372863, + "grad_norm": 190.06283569335938, + "learning_rate": 8.132474945745207e-06, + "loss": 21.2258, + "step": 178020 + }, + { + "epoch": 0.35963186366997013, + "grad_norm": 36.86237716674805, + "learning_rate": 8.132202867305043e-06, + "loss": 11.6965, + "step": 178030 + }, + { + "epoch": 0.35965206430265395, + "grad_norm": 521.4072265625, + "learning_rate": 8.1319307735989e-06, + "loss": 28.9941, + "step": 178040 + }, + { + "epoch": 0.35967226493533777, + "grad_norm": 1079.01318359375, + "learning_rate": 8.131658664628108e-06, + "loss": 21.3401, + "step": 178050 + }, + { + "epoch": 0.3596924655680216, + "grad_norm": 300.1889953613281, + "learning_rate": 8.131386540393991e-06, + "loss": 24.1562, + "step": 178060 + }, + { + "epoch": 0.3597126662007054, + "grad_norm": 5.549616813659668, + "learning_rate": 8.131114400897874e-06, + "loss": 16.3086, + "step": 178070 + }, + { + "epoch": 0.35973286683338923, + "grad_norm": 557.4177856445312, + "learning_rate": 8.130842246141086e-06, + "loss": 25.6069, + "step": 178080 + }, + { + "epoch": 0.35975306746607305, + "grad_norm": 246.23886108398438, + "learning_rate": 8.130570076124954e-06, + "loss": 17.3673, + "step": 178090 + }, + { + "epoch": 0.35977326809875687, + "grad_norm": 338.0411376953125, + "learning_rate": 8.130297890850803e-06, + "loss": 24.9579, + "step": 178100 + }, + { + "epoch": 0.3597934687314407, + "grad_norm": 146.607421875, + "learning_rate": 8.130025690319958e-06, + "loss": 36.285, + "step": 178110 + }, + { + "epoch": 0.3598136693641245, + "grad_norm": 228.3867645263672, + "learning_rate": 8.129753474533749e-06, + "loss": 20.9564, + "step": 178120 + }, + { + "epoch": 0.3598338699968083, + "grad_norm": 380.2311096191406, + "learning_rate": 8.1294812434935e-06, + "loss": 20.4764, + "step": 178130 + }, + { + "epoch": 0.3598540706294921, + "grad_norm": 360.1344299316406, + "learning_rate": 8.129208997200539e-06, + "loss": 18.9098, + "step": 178140 + }, + { + "epoch": 0.3598742712621759, + "grad_norm": 486.223388671875, + "learning_rate": 8.128936735656195e-06, + "loss": 19.3291, + "step": 178150 + }, + { + "epoch": 0.35989447189485974, + "grad_norm": 258.1197204589844, + "learning_rate": 8.12866445886179e-06, + "loss": 17.2444, + "step": 178160 + }, + { + "epoch": 0.35991467252754356, + "grad_norm": 306.47991943359375, + "learning_rate": 8.128392166818655e-06, + "loss": 14.1862, + "step": 178170 + }, + { + "epoch": 0.3599348731602274, + "grad_norm": 343.256591796875, + "learning_rate": 8.128119859528116e-06, + "loss": 14.6788, + "step": 178180 + }, + { + "epoch": 0.3599550737929112, + "grad_norm": 157.1279296875, + "learning_rate": 8.127847536991501e-06, + "loss": 28.506, + "step": 178190 + }, + { + "epoch": 0.359975274425595, + "grad_norm": 647.3510131835938, + "learning_rate": 8.127575199210136e-06, + "loss": 24.6872, + "step": 178200 + }, + { + "epoch": 0.35999547505827884, + "grad_norm": 430.32830810546875, + "learning_rate": 8.127302846185348e-06, + "loss": 18.3237, + "step": 178210 + }, + { + "epoch": 0.36001567569096266, + "grad_norm": 307.2442932128906, + "learning_rate": 8.127030477918466e-06, + "loss": 28.0633, + "step": 178220 + }, + { + "epoch": 0.3600358763236465, + "grad_norm": 328.6007080078125, + "learning_rate": 8.126758094410816e-06, + "loss": 20.8106, + "step": 178230 + }, + { + "epoch": 0.3600560769563303, + "grad_norm": 205.92359924316406, + "learning_rate": 8.126485695663728e-06, + "loss": 18.6058, + "step": 178240 + }, + { + "epoch": 0.3600762775890141, + "grad_norm": 419.4658508300781, + "learning_rate": 8.126213281678527e-06, + "loss": 30.8753, + "step": 178250 + }, + { + "epoch": 0.3600964782216979, + "grad_norm": 575.1708374023438, + "learning_rate": 8.12594085245654e-06, + "loss": 14.9826, + "step": 178260 + }, + { + "epoch": 0.3601166788543817, + "grad_norm": 415.85113525390625, + "learning_rate": 8.1256684079991e-06, + "loss": 13.5627, + "step": 178270 + }, + { + "epoch": 0.3601368794870655, + "grad_norm": 374.0655212402344, + "learning_rate": 8.125395948307528e-06, + "loss": 13.7356, + "step": 178280 + }, + { + "epoch": 0.36015708011974934, + "grad_norm": 447.27825927734375, + "learning_rate": 8.125123473383156e-06, + "loss": 17.7901, + "step": 178290 + }, + { + "epoch": 0.36017728075243316, + "grad_norm": 611.2520751953125, + "learning_rate": 8.124850983227313e-06, + "loss": 30.2359, + "step": 178300 + }, + { + "epoch": 0.360197481385117, + "grad_norm": 398.5036926269531, + "learning_rate": 8.124578477841323e-06, + "loss": 17.1272, + "step": 178310 + }, + { + "epoch": 0.3602176820178008, + "grad_norm": 208.12890625, + "learning_rate": 8.124305957226518e-06, + "loss": 18.1814, + "step": 178320 + }, + { + "epoch": 0.3602378826504846, + "grad_norm": 405.2329406738281, + "learning_rate": 8.124033421384223e-06, + "loss": 13.769, + "step": 178330 + }, + { + "epoch": 0.36025808328316844, + "grad_norm": 451.02703857421875, + "learning_rate": 8.12376087031577e-06, + "loss": 26.2292, + "step": 178340 + }, + { + "epoch": 0.36027828391585226, + "grad_norm": 146.75135803222656, + "learning_rate": 8.123488304022485e-06, + "loss": 5.5689, + "step": 178350 + }, + { + "epoch": 0.3602984845485361, + "grad_norm": 279.85107421875, + "learning_rate": 8.123215722505695e-06, + "loss": 29.3162, + "step": 178360 + }, + { + "epoch": 0.3603186851812199, + "grad_norm": 279.0551452636719, + "learning_rate": 8.122943125766731e-06, + "loss": 24.3572, + "step": 178370 + }, + { + "epoch": 0.36033888581390366, + "grad_norm": 280.7283020019531, + "learning_rate": 8.122670513806924e-06, + "loss": 37.2789, + "step": 178380 + }, + { + "epoch": 0.3603590864465875, + "grad_norm": 534.52587890625, + "learning_rate": 8.122397886627596e-06, + "loss": 29.5953, + "step": 178390 + }, + { + "epoch": 0.3603792870792713, + "grad_norm": 439.2769775390625, + "learning_rate": 8.12212524423008e-06, + "loss": 12.9245, + "step": 178400 + }, + { + "epoch": 0.3603994877119551, + "grad_norm": 801.3646850585938, + "learning_rate": 8.121852586615705e-06, + "loss": 29.8669, + "step": 178410 + }, + { + "epoch": 0.36041968834463894, + "grad_norm": 461.25018310546875, + "learning_rate": 8.121579913785799e-06, + "loss": 23.2916, + "step": 178420 + }, + { + "epoch": 0.36043988897732276, + "grad_norm": 255.2156982421875, + "learning_rate": 8.12130722574169e-06, + "loss": 11.5908, + "step": 178430 + }, + { + "epoch": 0.3604600896100066, + "grad_norm": 20.481443405151367, + "learning_rate": 8.12103452248471e-06, + "loss": 22.8622, + "step": 178440 + }, + { + "epoch": 0.3604802902426904, + "grad_norm": 42.72632598876953, + "learning_rate": 8.120761804016186e-06, + "loss": 10.2893, + "step": 178450 + }, + { + "epoch": 0.3605004908753742, + "grad_norm": 182.3892822265625, + "learning_rate": 8.120489070337448e-06, + "loss": 30.1117, + "step": 178460 + }, + { + "epoch": 0.36052069150805804, + "grad_norm": 350.43414306640625, + "learning_rate": 8.120216321449823e-06, + "loss": 14.188, + "step": 178470 + }, + { + "epoch": 0.36054089214074186, + "grad_norm": 435.0140686035156, + "learning_rate": 8.119943557354641e-06, + "loss": 12.9704, + "step": 178480 + }, + { + "epoch": 0.3605610927734257, + "grad_norm": 172.6223602294922, + "learning_rate": 8.119670778053236e-06, + "loss": 23.8365, + "step": 178490 + }, + { + "epoch": 0.3605812934061095, + "grad_norm": 447.8199157714844, + "learning_rate": 8.119397983546932e-06, + "loss": 13.7439, + "step": 178500 + }, + { + "epoch": 0.36060149403879327, + "grad_norm": 254.7931365966797, + "learning_rate": 8.119125173837061e-06, + "loss": 15.6657, + "step": 178510 + }, + { + "epoch": 0.3606216946714771, + "grad_norm": 245.6374969482422, + "learning_rate": 8.118852348924951e-06, + "loss": 25.515, + "step": 178520 + }, + { + "epoch": 0.3606418953041609, + "grad_norm": 205.4235382080078, + "learning_rate": 8.118579508811934e-06, + "loss": 30.7655, + "step": 178530 + }, + { + "epoch": 0.36066209593684473, + "grad_norm": 137.30201721191406, + "learning_rate": 8.118306653499339e-06, + "loss": 30.7389, + "step": 178540 + }, + { + "epoch": 0.36068229656952855, + "grad_norm": 428.897705078125, + "learning_rate": 8.118033782988496e-06, + "loss": 28.0444, + "step": 178550 + }, + { + "epoch": 0.36070249720221237, + "grad_norm": 592.7665405273438, + "learning_rate": 8.117760897280733e-06, + "loss": 43.9568, + "step": 178560 + }, + { + "epoch": 0.3607226978348962, + "grad_norm": 0.0, + "learning_rate": 8.117487996377383e-06, + "loss": 34.4643, + "step": 178570 + }, + { + "epoch": 0.36074289846758, + "grad_norm": 530.3194580078125, + "learning_rate": 8.117215080279774e-06, + "loss": 24.9329, + "step": 178580 + }, + { + "epoch": 0.36076309910026383, + "grad_norm": 512.5408325195312, + "learning_rate": 8.116942148989238e-06, + "loss": 29.8084, + "step": 178590 + }, + { + "epoch": 0.36078329973294765, + "grad_norm": 326.04730224609375, + "learning_rate": 8.116669202507102e-06, + "loss": 39.6674, + "step": 178600 + }, + { + "epoch": 0.36080350036563147, + "grad_norm": 1247.66943359375, + "learning_rate": 8.116396240834699e-06, + "loss": 23.1247, + "step": 178610 + }, + { + "epoch": 0.3608237009983153, + "grad_norm": 67.5570068359375, + "learning_rate": 8.116123263973359e-06, + "loss": 26.048, + "step": 178620 + }, + { + "epoch": 0.3608439016309991, + "grad_norm": 419.583984375, + "learning_rate": 8.11585027192441e-06, + "loss": 22.5238, + "step": 178630 + }, + { + "epoch": 0.3608641022636829, + "grad_norm": 253.14178466796875, + "learning_rate": 8.115577264689188e-06, + "loss": 17.6159, + "step": 178640 + }, + { + "epoch": 0.3608843028963667, + "grad_norm": 208.3504638671875, + "learning_rate": 8.115304242269017e-06, + "loss": 27.7183, + "step": 178650 + }, + { + "epoch": 0.3609045035290505, + "grad_norm": 266.0090026855469, + "learning_rate": 8.115031204665233e-06, + "loss": 15.1854, + "step": 178660 + }, + { + "epoch": 0.36092470416173433, + "grad_norm": 594.6963500976562, + "learning_rate": 8.114758151879164e-06, + "loss": 18.6671, + "step": 178670 + }, + { + "epoch": 0.36094490479441815, + "grad_norm": 677.4618530273438, + "learning_rate": 8.114485083912143e-06, + "loss": 16.8773, + "step": 178680 + }, + { + "epoch": 0.360965105427102, + "grad_norm": 788.9229125976562, + "learning_rate": 8.1142120007655e-06, + "loss": 27.072, + "step": 178690 + }, + { + "epoch": 0.3609853060597858, + "grad_norm": 360.16583251953125, + "learning_rate": 8.113938902440563e-06, + "loss": 29.9497, + "step": 178700 + }, + { + "epoch": 0.3610055066924696, + "grad_norm": 475.398193359375, + "learning_rate": 8.113665788938667e-06, + "loss": 14.3001, + "step": 178710 + }, + { + "epoch": 0.36102570732515343, + "grad_norm": 192.18301391601562, + "learning_rate": 8.113392660261141e-06, + "loss": 18.8154, + "step": 178720 + }, + { + "epoch": 0.36104590795783725, + "grad_norm": 462.969482421875, + "learning_rate": 8.113119516409315e-06, + "loss": 15.4176, + "step": 178730 + }, + { + "epoch": 0.3610661085905211, + "grad_norm": 345.4085693359375, + "learning_rate": 8.112846357384526e-06, + "loss": 10.2465, + "step": 178740 + }, + { + "epoch": 0.3610863092232049, + "grad_norm": 318.5648498535156, + "learning_rate": 8.112573183188099e-06, + "loss": 27.8682, + "step": 178750 + }, + { + "epoch": 0.3611065098558887, + "grad_norm": 376.4531555175781, + "learning_rate": 8.112299993821366e-06, + "loss": 23.3852, + "step": 178760 + }, + { + "epoch": 0.3611267104885725, + "grad_norm": 650.11279296875, + "learning_rate": 8.112026789285664e-06, + "loss": 17.0906, + "step": 178770 + }, + { + "epoch": 0.3611469111212563, + "grad_norm": 423.9063720703125, + "learning_rate": 8.111753569582319e-06, + "loss": 27.5014, + "step": 178780 + }, + { + "epoch": 0.3611671117539401, + "grad_norm": 793.5993041992188, + "learning_rate": 8.111480334712664e-06, + "loss": 18.9096, + "step": 178790 + }, + { + "epoch": 0.36118731238662394, + "grad_norm": 205.39283752441406, + "learning_rate": 8.111207084678033e-06, + "loss": 17.618, + "step": 178800 + }, + { + "epoch": 0.36120751301930776, + "grad_norm": 164.9736785888672, + "learning_rate": 8.110933819479754e-06, + "loss": 13.0748, + "step": 178810 + }, + { + "epoch": 0.3612277136519916, + "grad_norm": 243.2801513671875, + "learning_rate": 8.110660539119163e-06, + "loss": 5.6099, + "step": 178820 + }, + { + "epoch": 0.3612479142846754, + "grad_norm": 600.1412353515625, + "learning_rate": 8.110387243597588e-06, + "loss": 25.9991, + "step": 178830 + }, + { + "epoch": 0.3612681149173592, + "grad_norm": 203.08787536621094, + "learning_rate": 8.110113932916363e-06, + "loss": 18.2952, + "step": 178840 + }, + { + "epoch": 0.36128831555004304, + "grad_norm": 15.868556022644043, + "learning_rate": 8.10984060707682e-06, + "loss": 10.9692, + "step": 178850 + }, + { + "epoch": 0.36130851618272686, + "grad_norm": 365.302734375, + "learning_rate": 8.109567266080292e-06, + "loss": 32.2658, + "step": 178860 + }, + { + "epoch": 0.3613287168154107, + "grad_norm": 508.83551025390625, + "learning_rate": 8.109293909928111e-06, + "loss": 26.9549, + "step": 178870 + }, + { + "epoch": 0.3613489174480945, + "grad_norm": 230.3358154296875, + "learning_rate": 8.109020538621607e-06, + "loss": 29.653, + "step": 178880 + }, + { + "epoch": 0.3613691180807783, + "grad_norm": 532.472412109375, + "learning_rate": 8.108747152162113e-06, + "loss": 11.6277, + "step": 178890 + }, + { + "epoch": 0.3613893187134621, + "grad_norm": 239.40606689453125, + "learning_rate": 8.108473750550965e-06, + "loss": 25.3715, + "step": 178900 + }, + { + "epoch": 0.3614095193461459, + "grad_norm": 459.02783203125, + "learning_rate": 8.10820033378949e-06, + "loss": 26.4688, + "step": 178910 + }, + { + "epoch": 0.3614297199788297, + "grad_norm": 1088.55615234375, + "learning_rate": 8.107926901879027e-06, + "loss": 25.8571, + "step": 178920 + }, + { + "epoch": 0.36144992061151354, + "grad_norm": 209.74513244628906, + "learning_rate": 8.107653454820902e-06, + "loss": 8.8448, + "step": 178930 + }, + { + "epoch": 0.36147012124419736, + "grad_norm": 376.676513671875, + "learning_rate": 8.107379992616453e-06, + "loss": 37.7792, + "step": 178940 + }, + { + "epoch": 0.3614903218768812, + "grad_norm": 506.7843017578125, + "learning_rate": 8.10710651526701e-06, + "loss": 13.5302, + "step": 178950 + }, + { + "epoch": 0.361510522509565, + "grad_norm": 483.62109375, + "learning_rate": 8.106833022773908e-06, + "loss": 18.1689, + "step": 178960 + }, + { + "epoch": 0.3615307231422488, + "grad_norm": 304.67926025390625, + "learning_rate": 8.106559515138477e-06, + "loss": 15.037, + "step": 178970 + }, + { + "epoch": 0.36155092377493264, + "grad_norm": 716.6126708984375, + "learning_rate": 8.106285992362052e-06, + "loss": 17.5214, + "step": 178980 + }, + { + "epoch": 0.36157112440761646, + "grad_norm": 160.1989288330078, + "learning_rate": 8.106012454445966e-06, + "loss": 27.9466, + "step": 178990 + }, + { + "epoch": 0.3615913250403003, + "grad_norm": 349.72882080078125, + "learning_rate": 8.105738901391553e-06, + "loss": 25.9572, + "step": 179000 + }, + { + "epoch": 0.3616115256729841, + "grad_norm": 366.52716064453125, + "learning_rate": 8.105465333200144e-06, + "loss": 20.5821, + "step": 179010 + }, + { + "epoch": 0.36163172630566787, + "grad_norm": 268.8068542480469, + "learning_rate": 8.105191749873075e-06, + "loss": 27.8748, + "step": 179020 + }, + { + "epoch": 0.3616519269383517, + "grad_norm": 67.10267639160156, + "learning_rate": 8.104918151411676e-06, + "loss": 12.8232, + "step": 179030 + }, + { + "epoch": 0.3616721275710355, + "grad_norm": 665.4845581054688, + "learning_rate": 8.104644537817284e-06, + "loss": 49.0935, + "step": 179040 + }, + { + "epoch": 0.3616923282037193, + "grad_norm": 449.352294921875, + "learning_rate": 8.10437090909123e-06, + "loss": 29.9721, + "step": 179050 + }, + { + "epoch": 0.36171252883640315, + "grad_norm": 290.5399475097656, + "learning_rate": 8.10409726523485e-06, + "loss": 13.8746, + "step": 179060 + }, + { + "epoch": 0.36173272946908697, + "grad_norm": 498.26446533203125, + "learning_rate": 8.103823606249476e-06, + "loss": 18.2015, + "step": 179070 + }, + { + "epoch": 0.3617529301017708, + "grad_norm": 556.02587890625, + "learning_rate": 8.103549932136442e-06, + "loss": 13.341, + "step": 179080 + }, + { + "epoch": 0.3617731307344546, + "grad_norm": 474.5405578613281, + "learning_rate": 8.10327624289708e-06, + "loss": 19.0943, + "step": 179090 + }, + { + "epoch": 0.3617933313671384, + "grad_norm": 139.08218383789062, + "learning_rate": 8.103002538532729e-06, + "loss": 15.2622, + "step": 179100 + }, + { + "epoch": 0.36181353199982225, + "grad_norm": 291.2727355957031, + "learning_rate": 8.102728819044718e-06, + "loss": 30.471, + "step": 179110 + }, + { + "epoch": 0.36183373263250607, + "grad_norm": 631.0053100585938, + "learning_rate": 8.102455084434385e-06, + "loss": 35.978, + "step": 179120 + }, + { + "epoch": 0.3618539332651899, + "grad_norm": 453.00311279296875, + "learning_rate": 8.102181334703061e-06, + "loss": 13.041, + "step": 179130 + }, + { + "epoch": 0.3618741338978737, + "grad_norm": 106.15189361572266, + "learning_rate": 8.101907569852081e-06, + "loss": 8.384, + "step": 179140 + }, + { + "epoch": 0.36189433453055747, + "grad_norm": 333.9896240234375, + "learning_rate": 8.101633789882781e-06, + "loss": 18.6485, + "step": 179150 + }, + { + "epoch": 0.3619145351632413, + "grad_norm": 400.0981140136719, + "learning_rate": 8.101359994796494e-06, + "loss": 20.3175, + "step": 179160 + }, + { + "epoch": 0.3619347357959251, + "grad_norm": 257.89410400390625, + "learning_rate": 8.101086184594554e-06, + "loss": 15.6535, + "step": 179170 + }, + { + "epoch": 0.36195493642860893, + "grad_norm": 28.253049850463867, + "learning_rate": 8.100812359278294e-06, + "loss": 24.1828, + "step": 179180 + }, + { + "epoch": 0.36197513706129275, + "grad_norm": 553.4799194335938, + "learning_rate": 8.100538518849053e-06, + "loss": 13.5885, + "step": 179190 + }, + { + "epoch": 0.36199533769397657, + "grad_norm": 210.08636474609375, + "learning_rate": 8.100264663308165e-06, + "loss": 11.83, + "step": 179200 + }, + { + "epoch": 0.3620155383266604, + "grad_norm": 519.5848999023438, + "learning_rate": 8.09999079265696e-06, + "loss": 16.0559, + "step": 179210 + }, + { + "epoch": 0.3620357389593442, + "grad_norm": 285.53985595703125, + "learning_rate": 8.099716906896776e-06, + "loss": 42.4347, + "step": 179220 + }, + { + "epoch": 0.36205593959202803, + "grad_norm": 325.1741027832031, + "learning_rate": 8.099443006028948e-06, + "loss": 24.6796, + "step": 179230 + }, + { + "epoch": 0.36207614022471185, + "grad_norm": 671.6212158203125, + "learning_rate": 8.099169090054812e-06, + "loss": 23.3181, + "step": 179240 + }, + { + "epoch": 0.36209634085739567, + "grad_norm": 683.6408081054688, + "learning_rate": 8.098895158975702e-06, + "loss": 23.4439, + "step": 179250 + }, + { + "epoch": 0.3621165414900795, + "grad_norm": 384.7148132324219, + "learning_rate": 8.098621212792952e-06, + "loss": 23.5365, + "step": 179260 + }, + { + "epoch": 0.3621367421227633, + "grad_norm": 4892.22119140625, + "learning_rate": 8.098347251507896e-06, + "loss": 53.4412, + "step": 179270 + }, + { + "epoch": 0.3621569427554471, + "grad_norm": 223.54103088378906, + "learning_rate": 8.098073275121876e-06, + "loss": 17.9581, + "step": 179280 + }, + { + "epoch": 0.3621771433881309, + "grad_norm": 448.0371398925781, + "learning_rate": 8.097799283636219e-06, + "loss": 19.9835, + "step": 179290 + }, + { + "epoch": 0.3621973440208147, + "grad_norm": 550.0449829101562, + "learning_rate": 8.097525277052265e-06, + "loss": 16.1264, + "step": 179300 + }, + { + "epoch": 0.36221754465349854, + "grad_norm": 496.9193115234375, + "learning_rate": 8.097251255371347e-06, + "loss": 24.7837, + "step": 179310 + }, + { + "epoch": 0.36223774528618236, + "grad_norm": 415.5220031738281, + "learning_rate": 8.096977218594803e-06, + "loss": 20.3981, + "step": 179320 + }, + { + "epoch": 0.3622579459188662, + "grad_norm": 279.1328125, + "learning_rate": 8.09670316672397e-06, + "loss": 29.0992, + "step": 179330 + }, + { + "epoch": 0.36227814655155, + "grad_norm": 472.2081604003906, + "learning_rate": 8.096429099760176e-06, + "loss": 19.3455, + "step": 179340 + }, + { + "epoch": 0.3622983471842338, + "grad_norm": 279.03106689453125, + "learning_rate": 8.096155017704768e-06, + "loss": 22.1065, + "step": 179350 + }, + { + "epoch": 0.36231854781691764, + "grad_norm": 679.9822387695312, + "learning_rate": 8.095880920559073e-06, + "loss": 26.3839, + "step": 179360 + }, + { + "epoch": 0.36233874844960146, + "grad_norm": 461.66571044921875, + "learning_rate": 8.09560680832443e-06, + "loss": 30.5445, + "step": 179370 + }, + { + "epoch": 0.3623589490822853, + "grad_norm": 556.361083984375, + "learning_rate": 8.095332681002175e-06, + "loss": 33.7321, + "step": 179380 + }, + { + "epoch": 0.3623791497149691, + "grad_norm": 143.40692138671875, + "learning_rate": 8.095058538593645e-06, + "loss": 13.6774, + "step": 179390 + }, + { + "epoch": 0.3623993503476529, + "grad_norm": 733.951904296875, + "learning_rate": 8.094784381100174e-06, + "loss": 18.1463, + "step": 179400 + }, + { + "epoch": 0.3624195509803367, + "grad_norm": 611.850341796875, + "learning_rate": 8.094510208523099e-06, + "loss": 33.0224, + "step": 179410 + }, + { + "epoch": 0.3624397516130205, + "grad_norm": 436.51654052734375, + "learning_rate": 8.094236020863758e-06, + "loss": 29.0462, + "step": 179420 + }, + { + "epoch": 0.3624599522457043, + "grad_norm": 880.206787109375, + "learning_rate": 8.093961818123483e-06, + "loss": 22.809, + "step": 179430 + }, + { + "epoch": 0.36248015287838814, + "grad_norm": 314.6743469238281, + "learning_rate": 8.093687600303616e-06, + "loss": 13.1, + "step": 179440 + }, + { + "epoch": 0.36250035351107196, + "grad_norm": 513.7949829101562, + "learning_rate": 8.09341336740549e-06, + "loss": 12.9643, + "step": 179450 + }, + { + "epoch": 0.3625205541437558, + "grad_norm": 377.70062255859375, + "learning_rate": 8.093139119430442e-06, + "loss": 20.9338, + "step": 179460 + }, + { + "epoch": 0.3625407547764396, + "grad_norm": 191.74209594726562, + "learning_rate": 8.09286485637981e-06, + "loss": 20.7988, + "step": 179470 + }, + { + "epoch": 0.3625609554091234, + "grad_norm": 546.7145385742188, + "learning_rate": 8.092590578254931e-06, + "loss": 25.0234, + "step": 179480 + }, + { + "epoch": 0.36258115604180724, + "grad_norm": 781.304931640625, + "learning_rate": 8.092316285057138e-06, + "loss": 24.1487, + "step": 179490 + }, + { + "epoch": 0.36260135667449106, + "grad_norm": 784.0794067382812, + "learning_rate": 8.092041976787772e-06, + "loss": 19.9087, + "step": 179500 + }, + { + "epoch": 0.3626215573071749, + "grad_norm": 461.7140197753906, + "learning_rate": 8.091767653448169e-06, + "loss": 21.3678, + "step": 179510 + }, + { + "epoch": 0.3626417579398587, + "grad_norm": 847.074951171875, + "learning_rate": 8.091493315039662e-06, + "loss": 27.9731, + "step": 179520 + }, + { + "epoch": 0.36266195857254246, + "grad_norm": 352.7601623535156, + "learning_rate": 8.091218961563593e-06, + "loss": 14.7476, + "step": 179530 + }, + { + "epoch": 0.3626821592052263, + "grad_norm": 465.680908203125, + "learning_rate": 8.0909445930213e-06, + "loss": 24.597, + "step": 179540 + }, + { + "epoch": 0.3627023598379101, + "grad_norm": 3518.7138671875, + "learning_rate": 8.090670209414117e-06, + "loss": 23.9385, + "step": 179550 + }, + { + "epoch": 0.3627225604705939, + "grad_norm": 509.2041931152344, + "learning_rate": 8.090395810743382e-06, + "loss": 21.9164, + "step": 179560 + }, + { + "epoch": 0.36274276110327774, + "grad_norm": 428.3517150878906, + "learning_rate": 8.090121397010432e-06, + "loss": 16.0016, + "step": 179570 + }, + { + "epoch": 0.36276296173596156, + "grad_norm": 173.6181640625, + "learning_rate": 8.089846968216605e-06, + "loss": 22.2588, + "step": 179580 + }, + { + "epoch": 0.3627831623686454, + "grad_norm": 205.15170288085938, + "learning_rate": 8.08957252436324e-06, + "loss": 10.1585, + "step": 179590 + }, + { + "epoch": 0.3628033630013292, + "grad_norm": 601.03564453125, + "learning_rate": 8.089298065451673e-06, + "loss": 28.1236, + "step": 179600 + }, + { + "epoch": 0.362823563634013, + "grad_norm": 50.00336456298828, + "learning_rate": 8.08902359148324e-06, + "loss": 25.6974, + "step": 179610 + }, + { + "epoch": 0.36284376426669684, + "grad_norm": 598.7530517578125, + "learning_rate": 8.088749102459284e-06, + "loss": 28.9201, + "step": 179620 + }, + { + "epoch": 0.36286396489938066, + "grad_norm": 235.47975158691406, + "learning_rate": 8.088474598381134e-06, + "loss": 16.8251, + "step": 179630 + }, + { + "epoch": 0.3628841655320645, + "grad_norm": 356.95037841796875, + "learning_rate": 8.088200079250139e-06, + "loss": 23.515, + "step": 179640 + }, + { + "epoch": 0.3629043661647483, + "grad_norm": 318.74761962890625, + "learning_rate": 8.08792554506763e-06, + "loss": 22.7573, + "step": 179650 + }, + { + "epoch": 0.36292456679743207, + "grad_norm": 356.8153991699219, + "learning_rate": 8.087650995834946e-06, + "loss": 11.2592, + "step": 179660 + }, + { + "epoch": 0.3629447674301159, + "grad_norm": 218.93699645996094, + "learning_rate": 8.087376431553425e-06, + "loss": 23.3865, + "step": 179670 + }, + { + "epoch": 0.3629649680627997, + "grad_norm": 1522.609375, + "learning_rate": 8.087101852224406e-06, + "loss": 18.9979, + "step": 179680 + }, + { + "epoch": 0.36298516869548353, + "grad_norm": 0.0, + "learning_rate": 8.086827257849225e-06, + "loss": 15.1281, + "step": 179690 + }, + { + "epoch": 0.36300536932816735, + "grad_norm": 311.6251525878906, + "learning_rate": 8.086552648429225e-06, + "loss": 22.289, + "step": 179700 + }, + { + "epoch": 0.36302556996085117, + "grad_norm": 191.47767639160156, + "learning_rate": 8.08627802396574e-06, + "loss": 22.3104, + "step": 179710 + }, + { + "epoch": 0.363045770593535, + "grad_norm": 458.1260070800781, + "learning_rate": 8.086003384460113e-06, + "loss": 23.9723, + "step": 179720 + }, + { + "epoch": 0.3630659712262188, + "grad_norm": 478.980712890625, + "learning_rate": 8.085728729913677e-06, + "loss": 17.0848, + "step": 179730 + }, + { + "epoch": 0.36308617185890263, + "grad_norm": 138.81344604492188, + "learning_rate": 8.085454060327775e-06, + "loss": 19.4853, + "step": 179740 + }, + { + "epoch": 0.36310637249158645, + "grad_norm": 374.0357666015625, + "learning_rate": 8.085179375703745e-06, + "loss": 31.6013, + "step": 179750 + }, + { + "epoch": 0.36312657312427027, + "grad_norm": 495.9537658691406, + "learning_rate": 8.084904676042923e-06, + "loss": 21.5826, + "step": 179760 + }, + { + "epoch": 0.3631467737569541, + "grad_norm": 540.7644653320312, + "learning_rate": 8.08462996134665e-06, + "loss": 16.5344, + "step": 179770 + }, + { + "epoch": 0.3631669743896379, + "grad_norm": 346.47137451171875, + "learning_rate": 8.084355231616266e-06, + "loss": 12.8951, + "step": 179780 + }, + { + "epoch": 0.3631871750223217, + "grad_norm": 294.4875793457031, + "learning_rate": 8.084080486853106e-06, + "loss": 24.5862, + "step": 179790 + }, + { + "epoch": 0.3632073756550055, + "grad_norm": 48.79007339477539, + "learning_rate": 8.083805727058514e-06, + "loss": 44.3199, + "step": 179800 + }, + { + "epoch": 0.3632275762876893, + "grad_norm": 1104.3382568359375, + "learning_rate": 8.083530952233826e-06, + "loss": 32.3153, + "step": 179810 + }, + { + "epoch": 0.36324777692037313, + "grad_norm": 501.9084167480469, + "learning_rate": 8.083256162380382e-06, + "loss": 15.4729, + "step": 179820 + }, + { + "epoch": 0.36326797755305695, + "grad_norm": 443.52587890625, + "learning_rate": 8.082981357499522e-06, + "loss": 14.8998, + "step": 179830 + }, + { + "epoch": 0.3632881781857408, + "grad_norm": 70.27091217041016, + "learning_rate": 8.082706537592585e-06, + "loss": 33.6036, + "step": 179840 + }, + { + "epoch": 0.3633083788184246, + "grad_norm": 348.7554626464844, + "learning_rate": 8.08243170266091e-06, + "loss": 19.2431, + "step": 179850 + }, + { + "epoch": 0.3633285794511084, + "grad_norm": 248.2569122314453, + "learning_rate": 8.082156852705837e-06, + "loss": 11.5737, + "step": 179860 + }, + { + "epoch": 0.36334878008379223, + "grad_norm": 190.74111938476562, + "learning_rate": 8.081881987728704e-06, + "loss": 10.9432, + "step": 179870 + }, + { + "epoch": 0.36336898071647605, + "grad_norm": 204.70008850097656, + "learning_rate": 8.081607107730853e-06, + "loss": 24.379, + "step": 179880 + }, + { + "epoch": 0.3633891813491599, + "grad_norm": 538.6603393554688, + "learning_rate": 8.081332212713625e-06, + "loss": 32.2789, + "step": 179890 + }, + { + "epoch": 0.3634093819818437, + "grad_norm": 662.2504272460938, + "learning_rate": 8.081057302678352e-06, + "loss": 21.0323, + "step": 179900 + }, + { + "epoch": 0.3634295826145275, + "grad_norm": 397.1094665527344, + "learning_rate": 8.080782377626383e-06, + "loss": 15.8753, + "step": 179910 + }, + { + "epoch": 0.3634497832472113, + "grad_norm": 79.99320983886719, + "learning_rate": 8.080507437559056e-06, + "loss": 23.1658, + "step": 179920 + }, + { + "epoch": 0.3634699838798951, + "grad_norm": 710.0244140625, + "learning_rate": 8.080232482477705e-06, + "loss": 27.1346, + "step": 179930 + }, + { + "epoch": 0.3634901845125789, + "grad_norm": 476.2806091308594, + "learning_rate": 8.079957512383679e-06, + "loss": 18.4552, + "step": 179940 + }, + { + "epoch": 0.36351038514526274, + "grad_norm": 657.8384399414062, + "learning_rate": 8.07968252727831e-06, + "loss": 23.6445, + "step": 179950 + }, + { + "epoch": 0.36353058577794656, + "grad_norm": 268.9906005859375, + "learning_rate": 8.079407527162944e-06, + "loss": 31.5085, + "step": 179960 + }, + { + "epoch": 0.3635507864106304, + "grad_norm": 479.5155334472656, + "learning_rate": 8.079132512038917e-06, + "loss": 28.7466, + "step": 179970 + }, + { + "epoch": 0.3635709870433142, + "grad_norm": 266.0442199707031, + "learning_rate": 8.078857481907573e-06, + "loss": 11.4897, + "step": 179980 + }, + { + "epoch": 0.363591187675998, + "grad_norm": 248.67543029785156, + "learning_rate": 8.078582436770252e-06, + "loss": 31.0825, + "step": 179990 + }, + { + "epoch": 0.36361138830868184, + "grad_norm": 1310.8602294921875, + "learning_rate": 8.078307376628292e-06, + "loss": 26.8525, + "step": 180000 + }, + { + "epoch": 0.36363158894136566, + "grad_norm": 243.6558837890625, + "learning_rate": 8.078032301483036e-06, + "loss": 23.1223, + "step": 180010 + }, + { + "epoch": 0.3636517895740495, + "grad_norm": 484.58319091796875, + "learning_rate": 8.077757211335823e-06, + "loss": 29.5466, + "step": 180020 + }, + { + "epoch": 0.3636719902067333, + "grad_norm": 837.57177734375, + "learning_rate": 8.077482106187997e-06, + "loss": 26.9726, + "step": 180030 + }, + { + "epoch": 0.3636921908394171, + "grad_norm": 731.7139282226562, + "learning_rate": 8.077206986040894e-06, + "loss": 34.0762, + "step": 180040 + }, + { + "epoch": 0.3637123914721009, + "grad_norm": 653.1241455078125, + "learning_rate": 8.076931850895858e-06, + "loss": 34.7637, + "step": 180050 + }, + { + "epoch": 0.3637325921047847, + "grad_norm": 304.6058349609375, + "learning_rate": 8.07665670075423e-06, + "loss": 34.1574, + "step": 180060 + }, + { + "epoch": 0.3637527927374685, + "grad_norm": 318.70135498046875, + "learning_rate": 8.07638153561735e-06, + "loss": 21.1969, + "step": 180070 + }, + { + "epoch": 0.36377299337015234, + "grad_norm": 100.94208526611328, + "learning_rate": 8.076106355486559e-06, + "loss": 20.3431, + "step": 180080 + }, + { + "epoch": 0.36379319400283616, + "grad_norm": 540.5836791992188, + "learning_rate": 8.0758311603632e-06, + "loss": 28.3074, + "step": 180090 + }, + { + "epoch": 0.36381339463552, + "grad_norm": 741.1316528320312, + "learning_rate": 8.075555950248613e-06, + "loss": 24.8105, + "step": 180100 + }, + { + "epoch": 0.3638335952682038, + "grad_norm": 878.4921264648438, + "learning_rate": 8.075280725144136e-06, + "loss": 24.5495, + "step": 180110 + }, + { + "epoch": 0.3638537959008876, + "grad_norm": 423.2711486816406, + "learning_rate": 8.075005485051117e-06, + "loss": 27.7036, + "step": 180120 + }, + { + "epoch": 0.36387399653357144, + "grad_norm": 74.36318969726562, + "learning_rate": 8.074730229970895e-06, + "loss": 23.0773, + "step": 180130 + }, + { + "epoch": 0.36389419716625526, + "grad_norm": 713.6236572265625, + "learning_rate": 8.074454959904807e-06, + "loss": 25.2313, + "step": 180140 + }, + { + "epoch": 0.3639143977989391, + "grad_norm": 2.3272178173065186, + "learning_rate": 8.0741796748542e-06, + "loss": 11.2923, + "step": 180150 + }, + { + "epoch": 0.3639345984316229, + "grad_norm": 399.4425048828125, + "learning_rate": 8.073904374820416e-06, + "loss": 29.8339, + "step": 180160 + }, + { + "epoch": 0.36395479906430667, + "grad_norm": 551.170166015625, + "learning_rate": 8.073629059804795e-06, + "loss": 23.4163, + "step": 180170 + }, + { + "epoch": 0.3639749996969905, + "grad_norm": 61.851165771484375, + "learning_rate": 8.073353729808676e-06, + "loss": 13.2651, + "step": 180180 + }, + { + "epoch": 0.3639952003296743, + "grad_norm": 423.3778381347656, + "learning_rate": 8.073078384833406e-06, + "loss": 20.787, + "step": 180190 + }, + { + "epoch": 0.3640154009623581, + "grad_norm": 442.4151306152344, + "learning_rate": 8.072803024880322e-06, + "loss": 16.1326, + "step": 180200 + }, + { + "epoch": 0.36403560159504195, + "grad_norm": 663.5044555664062, + "learning_rate": 8.072527649950772e-06, + "loss": 16.5667, + "step": 180210 + }, + { + "epoch": 0.36405580222772577, + "grad_norm": 328.0487365722656, + "learning_rate": 8.072252260046094e-06, + "loss": 27.5722, + "step": 180220 + }, + { + "epoch": 0.3640760028604096, + "grad_norm": 474.4621276855469, + "learning_rate": 8.071976855167629e-06, + "loss": 27.4091, + "step": 180230 + }, + { + "epoch": 0.3640962034930934, + "grad_norm": 123.0985336303711, + "learning_rate": 8.071701435316724e-06, + "loss": 41.9142, + "step": 180240 + }, + { + "epoch": 0.3641164041257772, + "grad_norm": 86.01426696777344, + "learning_rate": 8.071426000494716e-06, + "loss": 19.347, + "step": 180250 + }, + { + "epoch": 0.36413660475846105, + "grad_norm": 239.38197326660156, + "learning_rate": 8.071150550702953e-06, + "loss": 34.2087, + "step": 180260 + }, + { + "epoch": 0.36415680539114487, + "grad_norm": 432.0234069824219, + "learning_rate": 8.070875085942772e-06, + "loss": 18.4752, + "step": 180270 + }, + { + "epoch": 0.3641770060238287, + "grad_norm": 36.60873794555664, + "learning_rate": 8.070599606215522e-06, + "loss": 23.4685, + "step": 180280 + }, + { + "epoch": 0.3641972066565125, + "grad_norm": 147.41677856445312, + "learning_rate": 8.070324111522538e-06, + "loss": 24.2534, + "step": 180290 + }, + { + "epoch": 0.36421740728919627, + "grad_norm": 460.8913269042969, + "learning_rate": 8.07004860186517e-06, + "loss": 31.2957, + "step": 180300 + }, + { + "epoch": 0.3642376079218801, + "grad_norm": 519.7053833007812, + "learning_rate": 8.069773077244756e-06, + "loss": 17.9148, + "step": 180310 + }, + { + "epoch": 0.3642578085545639, + "grad_norm": 198.05221557617188, + "learning_rate": 8.069497537662638e-06, + "loss": 20.4212, + "step": 180320 + }, + { + "epoch": 0.36427800918724773, + "grad_norm": 151.0083770751953, + "learning_rate": 8.069221983120164e-06, + "loss": 19.3584, + "step": 180330 + }, + { + "epoch": 0.36429820981993155, + "grad_norm": 270.20404052734375, + "learning_rate": 8.068946413618674e-06, + "loss": 18.1931, + "step": 180340 + }, + { + "epoch": 0.36431841045261537, + "grad_norm": 282.3987121582031, + "learning_rate": 8.068670829159511e-06, + "loss": 18.7832, + "step": 180350 + }, + { + "epoch": 0.3643386110852992, + "grad_norm": 449.9928894042969, + "learning_rate": 8.06839522974402e-06, + "loss": 43.0912, + "step": 180360 + }, + { + "epoch": 0.364358811717983, + "grad_norm": 634.7880859375, + "learning_rate": 8.068119615373541e-06, + "loss": 16.687, + "step": 180370 + }, + { + "epoch": 0.36437901235066683, + "grad_norm": 661.2182006835938, + "learning_rate": 8.06784398604942e-06, + "loss": 18.101, + "step": 180380 + }, + { + "epoch": 0.36439921298335065, + "grad_norm": 98.8208236694336, + "learning_rate": 8.067568341773e-06, + "loss": 22.6252, + "step": 180390 + }, + { + "epoch": 0.36441941361603447, + "grad_norm": 593.2794799804688, + "learning_rate": 8.067292682545622e-06, + "loss": 16.2175, + "step": 180400 + }, + { + "epoch": 0.3644396142487183, + "grad_norm": 192.91513061523438, + "learning_rate": 8.067017008368632e-06, + "loss": 9.7837, + "step": 180410 + }, + { + "epoch": 0.3644598148814021, + "grad_norm": 847.2479248046875, + "learning_rate": 8.066741319243374e-06, + "loss": 18.9218, + "step": 180420 + }, + { + "epoch": 0.3644800155140859, + "grad_norm": 501.9024963378906, + "learning_rate": 8.06646561517119e-06, + "loss": 27.7609, + "step": 180430 + }, + { + "epoch": 0.3645002161467697, + "grad_norm": 248.13670349121094, + "learning_rate": 8.066189896153425e-06, + "loss": 28.4785, + "step": 180440 + }, + { + "epoch": 0.3645204167794535, + "grad_norm": 249.80906677246094, + "learning_rate": 8.065914162191424e-06, + "loss": 13.8947, + "step": 180450 + }, + { + "epoch": 0.36454061741213734, + "grad_norm": 107.41999816894531, + "learning_rate": 8.065638413286526e-06, + "loss": 13.3479, + "step": 180460 + }, + { + "epoch": 0.36456081804482116, + "grad_norm": 236.6258087158203, + "learning_rate": 8.065362649440081e-06, + "loss": 19.4068, + "step": 180470 + }, + { + "epoch": 0.364581018677505, + "grad_norm": 50.60342788696289, + "learning_rate": 8.065086870653428e-06, + "loss": 17.5798, + "step": 180480 + }, + { + "epoch": 0.3646012193101888, + "grad_norm": 227.71481323242188, + "learning_rate": 8.064811076927915e-06, + "loss": 37.9688, + "step": 180490 + }, + { + "epoch": 0.3646214199428726, + "grad_norm": 542.9400634765625, + "learning_rate": 8.064535268264883e-06, + "loss": 22.8471, + "step": 180500 + }, + { + "epoch": 0.36464162057555644, + "grad_norm": 650.06640625, + "learning_rate": 8.064259444665678e-06, + "loss": 14.5285, + "step": 180510 + }, + { + "epoch": 0.36466182120824026, + "grad_norm": 169.16299438476562, + "learning_rate": 8.063983606131645e-06, + "loss": 30.553, + "step": 180520 + }, + { + "epoch": 0.3646820218409241, + "grad_norm": 260.5411682128906, + "learning_rate": 8.063707752664127e-06, + "loss": 16.6584, + "step": 180530 + }, + { + "epoch": 0.3647022224736079, + "grad_norm": 948.4075927734375, + "learning_rate": 8.06343188426447e-06, + "loss": 31.9307, + "step": 180540 + }, + { + "epoch": 0.3647224231062917, + "grad_norm": 178.24842834472656, + "learning_rate": 8.063156000934017e-06, + "loss": 35.5165, + "step": 180550 + }, + { + "epoch": 0.3647426237389755, + "grad_norm": 545.8712158203125, + "learning_rate": 8.062880102674112e-06, + "loss": 29.6667, + "step": 180560 + }, + { + "epoch": 0.3647628243716593, + "grad_norm": 259.62982177734375, + "learning_rate": 8.062604189486102e-06, + "loss": 35.8326, + "step": 180570 + }, + { + "epoch": 0.3647830250043431, + "grad_norm": 451.8420104980469, + "learning_rate": 8.06232826137133e-06, + "loss": 14.4411, + "step": 180580 + }, + { + "epoch": 0.36480322563702694, + "grad_norm": 222.64039611816406, + "learning_rate": 8.062052318331142e-06, + "loss": 17.8508, + "step": 180590 + }, + { + "epoch": 0.36482342626971076, + "grad_norm": 1076.473388671875, + "learning_rate": 8.061776360366883e-06, + "loss": 28.3678, + "step": 180600 + }, + { + "epoch": 0.3648436269023946, + "grad_norm": 338.62493896484375, + "learning_rate": 8.061500387479896e-06, + "loss": 25.0373, + "step": 180610 + }, + { + "epoch": 0.3648638275350784, + "grad_norm": 491.73956298828125, + "learning_rate": 8.06122439967153e-06, + "loss": 8.7945, + "step": 180620 + }, + { + "epoch": 0.3648840281677622, + "grad_norm": 250.841552734375, + "learning_rate": 8.060948396943125e-06, + "loss": 21.4569, + "step": 180630 + }, + { + "epoch": 0.36490422880044604, + "grad_norm": 600.4892578125, + "learning_rate": 8.060672379296028e-06, + "loss": 32.9581, + "step": 180640 + }, + { + "epoch": 0.36492442943312986, + "grad_norm": 579.3340454101562, + "learning_rate": 8.060396346731587e-06, + "loss": 23.5043, + "step": 180650 + }, + { + "epoch": 0.3649446300658137, + "grad_norm": 1081.14013671875, + "learning_rate": 8.060120299251145e-06, + "loss": 41.1831, + "step": 180660 + }, + { + "epoch": 0.3649648306984975, + "grad_norm": 260.2357177734375, + "learning_rate": 8.059844236856047e-06, + "loss": 22.1888, + "step": 180670 + }, + { + "epoch": 0.3649850313311813, + "grad_norm": 665.8071899414062, + "learning_rate": 8.05956815954764e-06, + "loss": 32.5861, + "step": 180680 + }, + { + "epoch": 0.3650052319638651, + "grad_norm": 539.4544677734375, + "learning_rate": 8.059292067327268e-06, + "loss": 23.4349, + "step": 180690 + }, + { + "epoch": 0.3650254325965489, + "grad_norm": 222.7199249267578, + "learning_rate": 8.05901596019628e-06, + "loss": 32.5975, + "step": 180700 + }, + { + "epoch": 0.3650456332292327, + "grad_norm": 653.107666015625, + "learning_rate": 8.058739838156018e-06, + "loss": 26.6703, + "step": 180710 + }, + { + "epoch": 0.36506583386191654, + "grad_norm": 131.5776824951172, + "learning_rate": 8.058463701207828e-06, + "loss": 26.4162, + "step": 180720 + }, + { + "epoch": 0.36508603449460036, + "grad_norm": 159.77001953125, + "learning_rate": 8.058187549353058e-06, + "loss": 17.4183, + "step": 180730 + }, + { + "epoch": 0.3651062351272842, + "grad_norm": 389.9031677246094, + "learning_rate": 8.057911382593052e-06, + "loss": 13.0312, + "step": 180740 + }, + { + "epoch": 0.365126435759968, + "grad_norm": 271.0975036621094, + "learning_rate": 8.057635200929157e-06, + "loss": 21.7841, + "step": 180750 + }, + { + "epoch": 0.3651466363926518, + "grad_norm": 256.2905578613281, + "learning_rate": 8.057359004362719e-06, + "loss": 18.0252, + "step": 180760 + }, + { + "epoch": 0.36516683702533564, + "grad_norm": 393.3341979980469, + "learning_rate": 8.057082792895084e-06, + "loss": 15.9911, + "step": 180770 + }, + { + "epoch": 0.36518703765801946, + "grad_norm": 1234.55810546875, + "learning_rate": 8.056806566527597e-06, + "loss": 50.5567, + "step": 180780 + }, + { + "epoch": 0.3652072382907033, + "grad_norm": 167.49339294433594, + "learning_rate": 8.056530325261607e-06, + "loss": 23.2267, + "step": 180790 + }, + { + "epoch": 0.3652274389233871, + "grad_norm": 1536.591796875, + "learning_rate": 8.05625406909846e-06, + "loss": 32.0553, + "step": 180800 + }, + { + "epoch": 0.36524763955607087, + "grad_norm": 460.4673767089844, + "learning_rate": 8.055977798039499e-06, + "loss": 18.0047, + "step": 180810 + }, + { + "epoch": 0.3652678401887547, + "grad_norm": 286.6156921386719, + "learning_rate": 8.055701512086073e-06, + "loss": 18.4002, + "step": 180820 + }, + { + "epoch": 0.3652880408214385, + "grad_norm": 507.64703369140625, + "learning_rate": 8.05542521123953e-06, + "loss": 34.6961, + "step": 180830 + }, + { + "epoch": 0.36530824145412233, + "grad_norm": 197.2182159423828, + "learning_rate": 8.055148895501214e-06, + "loss": 25.0815, + "step": 180840 + }, + { + "epoch": 0.36532844208680615, + "grad_norm": 241.7445068359375, + "learning_rate": 8.054872564872474e-06, + "loss": 19.066, + "step": 180850 + }, + { + "epoch": 0.36534864271948997, + "grad_norm": 405.707763671875, + "learning_rate": 8.054596219354655e-06, + "loss": 16.7721, + "step": 180860 + }, + { + "epoch": 0.3653688433521738, + "grad_norm": 652.0069580078125, + "learning_rate": 8.054319858949104e-06, + "loss": 16.9904, + "step": 180870 + }, + { + "epoch": 0.3653890439848576, + "grad_norm": 437.1751708984375, + "learning_rate": 8.054043483657169e-06, + "loss": 20.3146, + "step": 180880 + }, + { + "epoch": 0.36540924461754143, + "grad_norm": 276.588134765625, + "learning_rate": 8.053767093480196e-06, + "loss": 18.7465, + "step": 180890 + }, + { + "epoch": 0.36542944525022525, + "grad_norm": 67.01463317871094, + "learning_rate": 8.053490688419532e-06, + "loss": 27.196, + "step": 180900 + }, + { + "epoch": 0.36544964588290907, + "grad_norm": 532.4506225585938, + "learning_rate": 8.053214268476526e-06, + "loss": 35.5491, + "step": 180910 + }, + { + "epoch": 0.3654698465155929, + "grad_norm": 255.35911560058594, + "learning_rate": 8.052937833652524e-06, + "loss": 11.376, + "step": 180920 + }, + { + "epoch": 0.3654900471482767, + "grad_norm": 78.1842041015625, + "learning_rate": 8.052661383948873e-06, + "loss": 27.2627, + "step": 180930 + }, + { + "epoch": 0.3655102477809605, + "grad_norm": 225.21063232421875, + "learning_rate": 8.05238491936692e-06, + "loss": 19.6099, + "step": 180940 + }, + { + "epoch": 0.3655304484136443, + "grad_norm": 316.75054931640625, + "learning_rate": 8.052108439908014e-06, + "loss": 32.9721, + "step": 180950 + }, + { + "epoch": 0.3655506490463281, + "grad_norm": 331.910888671875, + "learning_rate": 8.0518319455735e-06, + "loss": 16.345, + "step": 180960 + }, + { + "epoch": 0.36557084967901193, + "grad_norm": 550.0551147460938, + "learning_rate": 8.051555436364728e-06, + "loss": 24.1847, + "step": 180970 + }, + { + "epoch": 0.36559105031169575, + "grad_norm": 383.29962158203125, + "learning_rate": 8.051278912283046e-06, + "loss": 15.6813, + "step": 180980 + }, + { + "epoch": 0.3656112509443796, + "grad_norm": 700.6581420898438, + "learning_rate": 8.0510023733298e-06, + "loss": 18.2657, + "step": 180990 + }, + { + "epoch": 0.3656314515770634, + "grad_norm": 1364.6787109375, + "learning_rate": 8.05072581950634e-06, + "loss": 24.5909, + "step": 181000 + }, + { + "epoch": 0.3656516522097472, + "grad_norm": 357.5721130371094, + "learning_rate": 8.05044925081401e-06, + "loss": 24.2228, + "step": 181010 + }, + { + "epoch": 0.36567185284243103, + "grad_norm": 374.67138671875, + "learning_rate": 8.050172667254161e-06, + "loss": 18.6345, + "step": 181020 + }, + { + "epoch": 0.36569205347511485, + "grad_norm": 587.245361328125, + "learning_rate": 8.04989606882814e-06, + "loss": 12.8158, + "step": 181030 + }, + { + "epoch": 0.3657122541077987, + "grad_norm": 69.31981658935547, + "learning_rate": 8.049619455537296e-06, + "loss": 15.0433, + "step": 181040 + }, + { + "epoch": 0.3657324547404825, + "grad_norm": 149.47796630859375, + "learning_rate": 8.049342827382978e-06, + "loss": 17.1523, + "step": 181050 + }, + { + "epoch": 0.3657526553731663, + "grad_norm": 233.05543518066406, + "learning_rate": 8.049066184366532e-06, + "loss": 39.2642, + "step": 181060 + }, + { + "epoch": 0.3657728560058501, + "grad_norm": 609.6397094726562, + "learning_rate": 8.048789526489305e-06, + "loss": 21.6553, + "step": 181070 + }, + { + "epoch": 0.3657930566385339, + "grad_norm": 234.69485473632812, + "learning_rate": 8.04851285375265e-06, + "loss": 10.3634, + "step": 181080 + }, + { + "epoch": 0.3658132572712177, + "grad_norm": 1949.347900390625, + "learning_rate": 8.048236166157912e-06, + "loss": 23.8406, + "step": 181090 + }, + { + "epoch": 0.36583345790390154, + "grad_norm": 913.1204833984375, + "learning_rate": 8.047959463706441e-06, + "loss": 32.6902, + "step": 181100 + }, + { + "epoch": 0.36585365853658536, + "grad_norm": 311.9753723144531, + "learning_rate": 8.047682746399585e-06, + "loss": 17.295, + "step": 181110 + }, + { + "epoch": 0.3658738591692692, + "grad_norm": 548.8505249023438, + "learning_rate": 8.047406014238695e-06, + "loss": 20.3511, + "step": 181120 + }, + { + "epoch": 0.365894059801953, + "grad_norm": 177.647705078125, + "learning_rate": 8.047129267225116e-06, + "loss": 19.28, + "step": 181130 + }, + { + "epoch": 0.3659142604346368, + "grad_norm": 131.7987518310547, + "learning_rate": 8.046852505360196e-06, + "loss": 23.0578, + "step": 181140 + }, + { + "epoch": 0.36593446106732064, + "grad_norm": 0.0, + "learning_rate": 8.04657572864529e-06, + "loss": 22.0129, + "step": 181150 + }, + { + "epoch": 0.36595466170000446, + "grad_norm": 601.8718872070312, + "learning_rate": 8.046298937081742e-06, + "loss": 15.4517, + "step": 181160 + }, + { + "epoch": 0.3659748623326883, + "grad_norm": 386.86602783203125, + "learning_rate": 8.046022130670903e-06, + "loss": 19.1586, + "step": 181170 + }, + { + "epoch": 0.3659950629653721, + "grad_norm": 537.0209350585938, + "learning_rate": 8.045745309414122e-06, + "loss": 25.2806, + "step": 181180 + }, + { + "epoch": 0.3660152635980559, + "grad_norm": 49.718807220458984, + "learning_rate": 8.045468473312748e-06, + "loss": 8.4768, + "step": 181190 + }, + { + "epoch": 0.3660354642307397, + "grad_norm": 17.38374137878418, + "learning_rate": 8.045191622368128e-06, + "loss": 17.5699, + "step": 181200 + }, + { + "epoch": 0.3660556648634235, + "grad_norm": 759.65380859375, + "learning_rate": 8.044914756581614e-06, + "loss": 32.4582, + "step": 181210 + }, + { + "epoch": 0.3660758654961073, + "grad_norm": 363.1199035644531, + "learning_rate": 8.044637875954556e-06, + "loss": 13.3467, + "step": 181220 + }, + { + "epoch": 0.36609606612879114, + "grad_norm": 676.3450317382812, + "learning_rate": 8.044360980488302e-06, + "loss": 16.8875, + "step": 181230 + }, + { + "epoch": 0.36611626676147496, + "grad_norm": 570.9820556640625, + "learning_rate": 8.044084070184202e-06, + "loss": 24.6783, + "step": 181240 + }, + { + "epoch": 0.3661364673941588, + "grad_norm": 388.772216796875, + "learning_rate": 8.043807145043604e-06, + "loss": 16.8156, + "step": 181250 + }, + { + "epoch": 0.3661566680268426, + "grad_norm": 172.58050537109375, + "learning_rate": 8.04353020506786e-06, + "loss": 25.0926, + "step": 181260 + }, + { + "epoch": 0.3661768686595264, + "grad_norm": 1798.0380859375, + "learning_rate": 8.04325325025832e-06, + "loss": 36.0175, + "step": 181270 + }, + { + "epoch": 0.36619706929221024, + "grad_norm": 444.1122741699219, + "learning_rate": 8.04297628061633e-06, + "loss": 20.0588, + "step": 181280 + }, + { + "epoch": 0.36621726992489406, + "grad_norm": 432.8059387207031, + "learning_rate": 8.042699296143244e-06, + "loss": 28.8735, + "step": 181290 + }, + { + "epoch": 0.3662374705575779, + "grad_norm": 179.3892059326172, + "learning_rate": 8.04242229684041e-06, + "loss": 28.0737, + "step": 181300 + }, + { + "epoch": 0.3662576711902617, + "grad_norm": 382.18927001953125, + "learning_rate": 8.042145282709181e-06, + "loss": 11.817, + "step": 181310 + }, + { + "epoch": 0.3662778718229455, + "grad_norm": 231.79049682617188, + "learning_rate": 8.041868253750904e-06, + "loss": 14.6898, + "step": 181320 + }, + { + "epoch": 0.3662980724556293, + "grad_norm": 265.1534729003906, + "learning_rate": 8.04159120996693e-06, + "loss": 19.368, + "step": 181330 + }, + { + "epoch": 0.3663182730883131, + "grad_norm": 338.3877258300781, + "learning_rate": 8.04131415135861e-06, + "loss": 20.6414, + "step": 181340 + }, + { + "epoch": 0.3663384737209969, + "grad_norm": 543.4014282226562, + "learning_rate": 8.041037077927291e-06, + "loss": 23.0925, + "step": 181350 + }, + { + "epoch": 0.36635867435368075, + "grad_norm": 155.44252014160156, + "learning_rate": 8.040759989674328e-06, + "loss": 27.2844, + "step": 181360 + }, + { + "epoch": 0.36637887498636457, + "grad_norm": 349.7950744628906, + "learning_rate": 8.04048288660107e-06, + "loss": 30.5285, + "step": 181370 + }, + { + "epoch": 0.3663990756190484, + "grad_norm": 289.8055114746094, + "learning_rate": 8.040205768708866e-06, + "loss": 14.5676, + "step": 181380 + }, + { + "epoch": 0.3664192762517322, + "grad_norm": 568.9749145507812, + "learning_rate": 8.03992863599907e-06, + "loss": 22.2054, + "step": 181390 + }, + { + "epoch": 0.366439476884416, + "grad_norm": 610.5119018554688, + "learning_rate": 8.039651488473028e-06, + "loss": 22.5269, + "step": 181400 + }, + { + "epoch": 0.36645967751709985, + "grad_norm": 58.552066802978516, + "learning_rate": 8.039374326132095e-06, + "loss": 19.4534, + "step": 181410 + }, + { + "epoch": 0.36647987814978367, + "grad_norm": 413.19134521484375, + "learning_rate": 8.03909714897762e-06, + "loss": 44.6754, + "step": 181420 + }, + { + "epoch": 0.3665000787824675, + "grad_norm": 345.2781677246094, + "learning_rate": 8.038819957010953e-06, + "loss": 25.9225, + "step": 181430 + }, + { + "epoch": 0.3665202794151513, + "grad_norm": 76.79401397705078, + "learning_rate": 8.038542750233445e-06, + "loss": 21.2028, + "step": 181440 + }, + { + "epoch": 0.36654048004783507, + "grad_norm": 467.41796875, + "learning_rate": 8.03826552864645e-06, + "loss": 28.2208, + "step": 181450 + }, + { + "epoch": 0.3665606806805189, + "grad_norm": 118.24577331542969, + "learning_rate": 8.037988292251317e-06, + "loss": 29.4202, + "step": 181460 + }, + { + "epoch": 0.3665808813132027, + "grad_norm": 422.4919128417969, + "learning_rate": 8.037711041049398e-06, + "loss": 16.9169, + "step": 181470 + }, + { + "epoch": 0.36660108194588653, + "grad_norm": 159.02706909179688, + "learning_rate": 8.037433775042044e-06, + "loss": 21.7535, + "step": 181480 + }, + { + "epoch": 0.36662128257857035, + "grad_norm": 401.123779296875, + "learning_rate": 8.037156494230605e-06, + "loss": 22.9303, + "step": 181490 + }, + { + "epoch": 0.36664148321125417, + "grad_norm": 278.9327087402344, + "learning_rate": 8.036879198616434e-06, + "loss": 35.944, + "step": 181500 + }, + { + "epoch": 0.366661683843938, + "grad_norm": 426.00067138671875, + "learning_rate": 8.036601888200883e-06, + "loss": 25.5249, + "step": 181510 + }, + { + "epoch": 0.3666818844766218, + "grad_norm": 1824.5927734375, + "learning_rate": 8.036324562985302e-06, + "loss": 42.9552, + "step": 181520 + }, + { + "epoch": 0.36670208510930563, + "grad_norm": 556.7666625976562, + "learning_rate": 8.036047222971043e-06, + "loss": 27.7041, + "step": 181530 + }, + { + "epoch": 0.36672228574198945, + "grad_norm": 337.087890625, + "learning_rate": 8.035769868159457e-06, + "loss": 14.2904, + "step": 181540 + }, + { + "epoch": 0.36674248637467327, + "grad_norm": 480.3008728027344, + "learning_rate": 8.0354924985519e-06, + "loss": 21.9382, + "step": 181550 + }, + { + "epoch": 0.3667626870073571, + "grad_norm": 701.7417602539062, + "learning_rate": 8.035215114149719e-06, + "loss": 14.3989, + "step": 181560 + }, + { + "epoch": 0.3667828876400409, + "grad_norm": 478.9831237792969, + "learning_rate": 8.034937714954267e-06, + "loss": 12.8167, + "step": 181570 + }, + { + "epoch": 0.3668030882727247, + "grad_norm": 2242.091796875, + "learning_rate": 8.034660300966898e-06, + "loss": 24.1358, + "step": 181580 + }, + { + "epoch": 0.3668232889054085, + "grad_norm": 383.171875, + "learning_rate": 8.034382872188961e-06, + "loss": 27.349, + "step": 181590 + }, + { + "epoch": 0.3668434895380923, + "grad_norm": 59.53066635131836, + "learning_rate": 8.034105428621812e-06, + "loss": 10.8993, + "step": 181600 + }, + { + "epoch": 0.36686369017077614, + "grad_norm": 516.87939453125, + "learning_rate": 8.0338279702668e-06, + "loss": 19.6811, + "step": 181610 + }, + { + "epoch": 0.36688389080345996, + "grad_norm": 535.0899658203125, + "learning_rate": 8.033550497125277e-06, + "loss": 14.4022, + "step": 181620 + }, + { + "epoch": 0.3669040914361438, + "grad_norm": 249.20155334472656, + "learning_rate": 8.0332730091986e-06, + "loss": 17.234, + "step": 181630 + }, + { + "epoch": 0.3669242920688276, + "grad_norm": 345.7061767578125, + "learning_rate": 8.032995506488117e-06, + "loss": 17.3267, + "step": 181640 + }, + { + "epoch": 0.3669444927015114, + "grad_norm": 341.51251220703125, + "learning_rate": 8.03271798899518e-06, + "loss": 13.1889, + "step": 181650 + }, + { + "epoch": 0.36696469333419524, + "grad_norm": 622.7710571289062, + "learning_rate": 8.032440456721145e-06, + "loss": 23.3273, + "step": 181660 + }, + { + "epoch": 0.36698489396687906, + "grad_norm": 1223.9803466796875, + "learning_rate": 8.032162909667363e-06, + "loss": 26.7826, + "step": 181670 + }, + { + "epoch": 0.3670050945995629, + "grad_norm": 697.6757202148438, + "learning_rate": 8.031885347835187e-06, + "loss": 26.0568, + "step": 181680 + }, + { + "epoch": 0.3670252952322467, + "grad_norm": 227.46725463867188, + "learning_rate": 8.031607771225969e-06, + "loss": 20.1208, + "step": 181690 + }, + { + "epoch": 0.3670454958649305, + "grad_norm": 624.7969970703125, + "learning_rate": 8.031330179841062e-06, + "loss": 22.6029, + "step": 181700 + }, + { + "epoch": 0.3670656964976143, + "grad_norm": 474.216552734375, + "learning_rate": 8.031052573681819e-06, + "loss": 18.4208, + "step": 181710 + }, + { + "epoch": 0.3670858971302981, + "grad_norm": 419.1664733886719, + "learning_rate": 8.030774952749596e-06, + "loss": 25.571, + "step": 181720 + }, + { + "epoch": 0.3671060977629819, + "grad_norm": 605.2910766601562, + "learning_rate": 8.03049731704574e-06, + "loss": 26.0323, + "step": 181730 + }, + { + "epoch": 0.36712629839566574, + "grad_norm": 268.3743896484375, + "learning_rate": 8.03021966657161e-06, + "loss": 20.9834, + "step": 181740 + }, + { + "epoch": 0.36714649902834956, + "grad_norm": 135.5460662841797, + "learning_rate": 8.029942001328555e-06, + "loss": 28.5735, + "step": 181750 + }, + { + "epoch": 0.3671666996610334, + "grad_norm": 386.54852294921875, + "learning_rate": 8.029664321317932e-06, + "loss": 12.4357, + "step": 181760 + }, + { + "epoch": 0.3671869002937172, + "grad_norm": 591.2630004882812, + "learning_rate": 8.029386626541092e-06, + "loss": 14.311, + "step": 181770 + }, + { + "epoch": 0.367207100926401, + "grad_norm": 236.5461883544922, + "learning_rate": 8.02910891699939e-06, + "loss": 20.2664, + "step": 181780 + }, + { + "epoch": 0.36722730155908484, + "grad_norm": 310.11029052734375, + "learning_rate": 8.028831192694176e-06, + "loss": 6.558, + "step": 181790 + }, + { + "epoch": 0.36724750219176866, + "grad_norm": 530.2633666992188, + "learning_rate": 8.028553453626809e-06, + "loss": 11.7515, + "step": 181800 + }, + { + "epoch": 0.3672677028244525, + "grad_norm": 660.1512451171875, + "learning_rate": 8.028275699798638e-06, + "loss": 30.4409, + "step": 181810 + }, + { + "epoch": 0.3672879034571363, + "grad_norm": 528.1337280273438, + "learning_rate": 8.027997931211017e-06, + "loss": 24.7369, + "step": 181820 + }, + { + "epoch": 0.3673081040898201, + "grad_norm": 185.4175567626953, + "learning_rate": 8.027720147865304e-06, + "loss": 12.895, + "step": 181830 + }, + { + "epoch": 0.3673283047225039, + "grad_norm": 333.89111328125, + "learning_rate": 8.02744234976285e-06, + "loss": 17.903, + "step": 181840 + }, + { + "epoch": 0.3673485053551877, + "grad_norm": 6.933417320251465, + "learning_rate": 8.027164536905008e-06, + "loss": 20.3719, + "step": 181850 + }, + { + "epoch": 0.3673687059878715, + "grad_norm": 941.4908447265625, + "learning_rate": 8.026886709293133e-06, + "loss": 31.5246, + "step": 181860 + }, + { + "epoch": 0.36738890662055534, + "grad_norm": 345.4415283203125, + "learning_rate": 8.02660886692858e-06, + "loss": 12.1413, + "step": 181870 + }, + { + "epoch": 0.36740910725323916, + "grad_norm": 1225.314453125, + "learning_rate": 8.026331009812703e-06, + "loss": 26.2431, + "step": 181880 + }, + { + "epoch": 0.367429307885923, + "grad_norm": 773.1281127929688, + "learning_rate": 8.026053137946855e-06, + "loss": 30.959, + "step": 181890 + }, + { + "epoch": 0.3674495085186068, + "grad_norm": 127.11233520507812, + "learning_rate": 8.02577525133239e-06, + "loss": 24.5994, + "step": 181900 + }, + { + "epoch": 0.3674697091512906, + "grad_norm": 442.1996765136719, + "learning_rate": 8.025497349970666e-06, + "loss": 18.6258, + "step": 181910 + }, + { + "epoch": 0.36748990978397444, + "grad_norm": 191.32305908203125, + "learning_rate": 8.025219433863035e-06, + "loss": 16.1111, + "step": 181920 + }, + { + "epoch": 0.36751011041665826, + "grad_norm": 265.1795349121094, + "learning_rate": 8.024941503010848e-06, + "loss": 15.1225, + "step": 181930 + }, + { + "epoch": 0.3675303110493421, + "grad_norm": 273.599365234375, + "learning_rate": 8.024663557415466e-06, + "loss": 23.978, + "step": 181940 + }, + { + "epoch": 0.3675505116820259, + "grad_norm": 696.9441528320312, + "learning_rate": 8.024385597078239e-06, + "loss": 21.3629, + "step": 181950 + }, + { + "epoch": 0.3675707123147097, + "grad_norm": 429.6669921875, + "learning_rate": 8.024107622000524e-06, + "loss": 14.9189, + "step": 181960 + }, + { + "epoch": 0.3675909129473935, + "grad_norm": 283.3707580566406, + "learning_rate": 8.023829632183676e-06, + "loss": 30.5343, + "step": 181970 + }, + { + "epoch": 0.3676111135800773, + "grad_norm": 380.0705871582031, + "learning_rate": 8.023551627629047e-06, + "loss": 22.6781, + "step": 181980 + }, + { + "epoch": 0.36763131421276113, + "grad_norm": 620.158203125, + "learning_rate": 8.023273608337997e-06, + "loss": 17.7174, + "step": 181990 + }, + { + "epoch": 0.36765151484544495, + "grad_norm": 26.114177703857422, + "learning_rate": 8.022995574311876e-06, + "loss": 15.6572, + "step": 182000 + }, + { + "epoch": 0.36767171547812877, + "grad_norm": 745.3638305664062, + "learning_rate": 8.022717525552041e-06, + "loss": 22.326, + "step": 182010 + }, + { + "epoch": 0.3676919161108126, + "grad_norm": 901.6583862304688, + "learning_rate": 8.022439462059849e-06, + "loss": 24.4014, + "step": 182020 + }, + { + "epoch": 0.3677121167434964, + "grad_norm": 221.56088256835938, + "learning_rate": 8.022161383836652e-06, + "loss": 22.557, + "step": 182030 + }, + { + "epoch": 0.36773231737618023, + "grad_norm": 532.9639892578125, + "learning_rate": 8.021883290883808e-06, + "loss": 24.7744, + "step": 182040 + }, + { + "epoch": 0.36775251800886405, + "grad_norm": 524.7433471679688, + "learning_rate": 8.021605183202669e-06, + "loss": 28.2642, + "step": 182050 + }, + { + "epoch": 0.36777271864154787, + "grad_norm": 306.8576965332031, + "learning_rate": 8.021327060794597e-06, + "loss": 12.5226, + "step": 182060 + }, + { + "epoch": 0.3677929192742317, + "grad_norm": 289.05828857421875, + "learning_rate": 8.02104892366094e-06, + "loss": 36.4176, + "step": 182070 + }, + { + "epoch": 0.3678131199069155, + "grad_norm": 103.75401306152344, + "learning_rate": 8.02077077180306e-06, + "loss": 31.7933, + "step": 182080 + }, + { + "epoch": 0.3678333205395993, + "grad_norm": 198.9027557373047, + "learning_rate": 8.020492605222307e-06, + "loss": 26.2181, + "step": 182090 + }, + { + "epoch": 0.3678535211722831, + "grad_norm": 159.69471740722656, + "learning_rate": 8.020214423920039e-06, + "loss": 42.431, + "step": 182100 + }, + { + "epoch": 0.3678737218049669, + "grad_norm": 133.8673553466797, + "learning_rate": 8.019936227897614e-06, + "loss": 29.5133, + "step": 182110 + }, + { + "epoch": 0.36789392243765073, + "grad_norm": 344.2096252441406, + "learning_rate": 8.019658017156384e-06, + "loss": 30.8522, + "step": 182120 + }, + { + "epoch": 0.36791412307033455, + "grad_norm": 394.213623046875, + "learning_rate": 8.01937979169771e-06, + "loss": 23.3407, + "step": 182130 + }, + { + "epoch": 0.3679343237030184, + "grad_norm": 174.77699279785156, + "learning_rate": 8.019101551522942e-06, + "loss": 13.8389, + "step": 182140 + }, + { + "epoch": 0.3679545243357022, + "grad_norm": 699.2435913085938, + "learning_rate": 8.018823296633442e-06, + "loss": 20.0749, + "step": 182150 + }, + { + "epoch": 0.367974724968386, + "grad_norm": 355.6721496582031, + "learning_rate": 8.018545027030564e-06, + "loss": 18.3917, + "step": 182160 + }, + { + "epoch": 0.36799492560106983, + "grad_norm": 820.6582641601562, + "learning_rate": 8.01826674271566e-06, + "loss": 16.2338, + "step": 182170 + }, + { + "epoch": 0.36801512623375365, + "grad_norm": 326.598388671875, + "learning_rate": 8.017988443690092e-06, + "loss": 33.0122, + "step": 182180 + }, + { + "epoch": 0.3680353268664375, + "grad_norm": 461.7607727050781, + "learning_rate": 8.017710129955215e-06, + "loss": 12.0651, + "step": 182190 + }, + { + "epoch": 0.3680555274991213, + "grad_norm": 504.27825927734375, + "learning_rate": 8.017431801512384e-06, + "loss": 14.9889, + "step": 182200 + }, + { + "epoch": 0.3680757281318051, + "grad_norm": 12.012791633605957, + "learning_rate": 8.017153458362957e-06, + "loss": 23.0942, + "step": 182210 + }, + { + "epoch": 0.3680959287644889, + "grad_norm": 171.19497680664062, + "learning_rate": 8.016875100508289e-06, + "loss": 20.3272, + "step": 182220 + }, + { + "epoch": 0.3681161293971727, + "grad_norm": 657.1697998046875, + "learning_rate": 8.016596727949737e-06, + "loss": 40.3708, + "step": 182230 + }, + { + "epoch": 0.3681363300298565, + "grad_norm": 25.94593620300293, + "learning_rate": 8.01631834068866e-06, + "loss": 40.1797, + "step": 182240 + }, + { + "epoch": 0.36815653066254034, + "grad_norm": 441.80059814453125, + "learning_rate": 8.016039938726413e-06, + "loss": 12.376, + "step": 182250 + }, + { + "epoch": 0.36817673129522416, + "grad_norm": 265.05938720703125, + "learning_rate": 8.015761522064353e-06, + "loss": 28.1768, + "step": 182260 + }, + { + "epoch": 0.368196931927908, + "grad_norm": 165.88687133789062, + "learning_rate": 8.015483090703837e-06, + "loss": 19.9978, + "step": 182270 + }, + { + "epoch": 0.3682171325605918, + "grad_norm": 318.32977294921875, + "learning_rate": 8.015204644646222e-06, + "loss": 28.3211, + "step": 182280 + }, + { + "epoch": 0.3682373331932756, + "grad_norm": 230.91534423828125, + "learning_rate": 8.014926183892867e-06, + "loss": 5.3446, + "step": 182290 + }, + { + "epoch": 0.36825753382595944, + "grad_norm": 254.33131408691406, + "learning_rate": 8.014647708445124e-06, + "loss": 26.3181, + "step": 182300 + }, + { + "epoch": 0.36827773445864326, + "grad_norm": 780.2587890625, + "learning_rate": 8.014369218304356e-06, + "loss": 14.7973, + "step": 182310 + }, + { + "epoch": 0.3682979350913271, + "grad_norm": 58.186317443847656, + "learning_rate": 8.014090713471917e-06, + "loss": 21.2951, + "step": 182320 + }, + { + "epoch": 0.3683181357240109, + "grad_norm": 1826.95654296875, + "learning_rate": 8.013812193949166e-06, + "loss": 29.8689, + "step": 182330 + }, + { + "epoch": 0.3683383363566947, + "grad_norm": 210.12147521972656, + "learning_rate": 8.01353365973746e-06, + "loss": 24.4536, + "step": 182340 + }, + { + "epoch": 0.3683585369893785, + "grad_norm": 297.1112976074219, + "learning_rate": 8.013255110838156e-06, + "loss": 33.6899, + "step": 182350 + }, + { + "epoch": 0.3683787376220623, + "grad_norm": 196.2924346923828, + "learning_rate": 8.012976547252614e-06, + "loss": 25.512, + "step": 182360 + }, + { + "epoch": 0.3683989382547461, + "grad_norm": 334.15185546875, + "learning_rate": 8.012697968982187e-06, + "loss": 13.9515, + "step": 182370 + }, + { + "epoch": 0.36841913888742994, + "grad_norm": 336.0948791503906, + "learning_rate": 8.012419376028237e-06, + "loss": 18.3184, + "step": 182380 + }, + { + "epoch": 0.36843933952011376, + "grad_norm": 238.00503540039062, + "learning_rate": 8.01214076839212e-06, + "loss": 24.894, + "step": 182390 + }, + { + "epoch": 0.3684595401527976, + "grad_norm": 240.62161254882812, + "learning_rate": 8.011862146075194e-06, + "loss": 13.059, + "step": 182400 + }, + { + "epoch": 0.3684797407854814, + "grad_norm": 417.91510009765625, + "learning_rate": 8.011583509078817e-06, + "loss": 17.8153, + "step": 182410 + }, + { + "epoch": 0.3684999414181652, + "grad_norm": 242.91551208496094, + "learning_rate": 8.011304857404347e-06, + "loss": 9.9211, + "step": 182420 + }, + { + "epoch": 0.36852014205084904, + "grad_norm": 291.0343017578125, + "learning_rate": 8.011026191053144e-06, + "loss": 14.2289, + "step": 182430 + }, + { + "epoch": 0.36854034268353286, + "grad_norm": 649.30908203125, + "learning_rate": 8.010747510026564e-06, + "loss": 27.8836, + "step": 182440 + }, + { + "epoch": 0.3685605433162167, + "grad_norm": 524.3955078125, + "learning_rate": 8.010468814325964e-06, + "loss": 32.7936, + "step": 182450 + }, + { + "epoch": 0.3685807439489005, + "grad_norm": 389.1418151855469, + "learning_rate": 8.010190103952706e-06, + "loss": 23.8763, + "step": 182460 + }, + { + "epoch": 0.3686009445815843, + "grad_norm": 329.4264831542969, + "learning_rate": 8.009911378908147e-06, + "loss": 22.2178, + "step": 182470 + }, + { + "epoch": 0.3686211452142681, + "grad_norm": 319.6559753417969, + "learning_rate": 8.009632639193643e-06, + "loss": 22.7205, + "step": 182480 + }, + { + "epoch": 0.3686413458469519, + "grad_norm": 75.72455596923828, + "learning_rate": 8.009353884810555e-06, + "loss": 36.0206, + "step": 182490 + }, + { + "epoch": 0.3686615464796357, + "grad_norm": 481.7266845703125, + "learning_rate": 8.009075115760243e-06, + "loss": 23.0295, + "step": 182500 + }, + { + "epoch": 0.36868174711231955, + "grad_norm": 350.93804931640625, + "learning_rate": 8.008796332044062e-06, + "loss": 27.9491, + "step": 182510 + }, + { + "epoch": 0.36870194774500337, + "grad_norm": 80.4354476928711, + "learning_rate": 8.008517533663372e-06, + "loss": 20.2024, + "step": 182520 + }, + { + "epoch": 0.3687221483776872, + "grad_norm": 594.6436157226562, + "learning_rate": 8.008238720619534e-06, + "loss": 15.693, + "step": 182530 + }, + { + "epoch": 0.368742349010371, + "grad_norm": 584.0489501953125, + "learning_rate": 8.007959892913906e-06, + "loss": 38.8679, + "step": 182540 + }, + { + "epoch": 0.3687625496430548, + "grad_norm": 404.3018493652344, + "learning_rate": 8.007681050547844e-06, + "loss": 22.9341, + "step": 182550 + }, + { + "epoch": 0.36878275027573865, + "grad_norm": 431.6450500488281, + "learning_rate": 8.007402193522711e-06, + "loss": 24.3709, + "step": 182560 + }, + { + "epoch": 0.36880295090842247, + "grad_norm": 330.33197021484375, + "learning_rate": 8.007123321839865e-06, + "loss": 30.0981, + "step": 182570 + }, + { + "epoch": 0.3688231515411063, + "grad_norm": 485.0052795410156, + "learning_rate": 8.006844435500663e-06, + "loss": 27.0596, + "step": 182580 + }, + { + "epoch": 0.3688433521737901, + "grad_norm": 253.75457763671875, + "learning_rate": 8.006565534506465e-06, + "loss": 16.497, + "step": 182590 + }, + { + "epoch": 0.36886355280647387, + "grad_norm": 260.1213073730469, + "learning_rate": 8.006286618858634e-06, + "loss": 18.9027, + "step": 182600 + }, + { + "epoch": 0.3688837534391577, + "grad_norm": 185.5214080810547, + "learning_rate": 8.006007688558526e-06, + "loss": 18.1766, + "step": 182610 + }, + { + "epoch": 0.3689039540718415, + "grad_norm": 362.86444091796875, + "learning_rate": 8.005728743607499e-06, + "loss": 25.2606, + "step": 182620 + }, + { + "epoch": 0.36892415470452533, + "grad_norm": 444.9572448730469, + "learning_rate": 8.005449784006917e-06, + "loss": 20.8313, + "step": 182630 + }, + { + "epoch": 0.36894435533720915, + "grad_norm": 296.40045166015625, + "learning_rate": 8.005170809758136e-06, + "loss": 15.4983, + "step": 182640 + }, + { + "epoch": 0.36896455596989297, + "grad_norm": 306.0716552734375, + "learning_rate": 8.004891820862516e-06, + "loss": 18.7421, + "step": 182650 + }, + { + "epoch": 0.3689847566025768, + "grad_norm": 522.7783813476562, + "learning_rate": 8.004612817321419e-06, + "loss": 24.4774, + "step": 182660 + }, + { + "epoch": 0.3690049572352606, + "grad_norm": 141.30459594726562, + "learning_rate": 8.004333799136206e-06, + "loss": 12.4619, + "step": 182670 + }, + { + "epoch": 0.36902515786794443, + "grad_norm": 36.370323181152344, + "learning_rate": 8.004054766308232e-06, + "loss": 7.8107, + "step": 182680 + }, + { + "epoch": 0.36904535850062825, + "grad_norm": 982.9057006835938, + "learning_rate": 8.003775718838859e-06, + "loss": 33.2967, + "step": 182690 + }, + { + "epoch": 0.36906555913331207, + "grad_norm": 378.8469543457031, + "learning_rate": 8.003496656729448e-06, + "loss": 23.5365, + "step": 182700 + }, + { + "epoch": 0.3690857597659959, + "grad_norm": 165.53233337402344, + "learning_rate": 8.003217579981358e-06, + "loss": 17.2538, + "step": 182710 + }, + { + "epoch": 0.3691059603986797, + "grad_norm": 318.7522888183594, + "learning_rate": 8.002938488595951e-06, + "loss": 31.0203, + "step": 182720 + }, + { + "epoch": 0.3691261610313635, + "grad_norm": 189.60289001464844, + "learning_rate": 8.002659382574584e-06, + "loss": 17.6615, + "step": 182730 + }, + { + "epoch": 0.3691463616640473, + "grad_norm": 206.4030303955078, + "learning_rate": 8.00238026191862e-06, + "loss": 13.3053, + "step": 182740 + }, + { + "epoch": 0.3691665622967311, + "grad_norm": 985.864013671875, + "learning_rate": 8.002101126629422e-06, + "loss": 36.234, + "step": 182750 + }, + { + "epoch": 0.36918676292941494, + "grad_norm": 334.8808288574219, + "learning_rate": 8.001821976708344e-06, + "loss": 14.8257, + "step": 182760 + }, + { + "epoch": 0.36920696356209876, + "grad_norm": 606.4573974609375, + "learning_rate": 8.001542812156751e-06, + "loss": 17.9043, + "step": 182770 + }, + { + "epoch": 0.3692271641947826, + "grad_norm": 377.3143615722656, + "learning_rate": 8.001263632976001e-06, + "loss": 49.0505, + "step": 182780 + }, + { + "epoch": 0.3692473648274664, + "grad_norm": 819.2080078125, + "learning_rate": 8.000984439167457e-06, + "loss": 28.7495, + "step": 182790 + }, + { + "epoch": 0.3692675654601502, + "grad_norm": 192.60838317871094, + "learning_rate": 8.000705230732478e-06, + "loss": 17.4605, + "step": 182800 + }, + { + "epoch": 0.36928776609283404, + "grad_norm": 375.5916442871094, + "learning_rate": 8.000426007672426e-06, + "loss": 12.4707, + "step": 182810 + }, + { + "epoch": 0.36930796672551786, + "grad_norm": 1288.135498046875, + "learning_rate": 8.000146769988662e-06, + "loss": 29.7924, + "step": 182820 + }, + { + "epoch": 0.3693281673582017, + "grad_norm": 432.34814453125, + "learning_rate": 7.999867517682547e-06, + "loss": 15.0319, + "step": 182830 + }, + { + "epoch": 0.3693483679908855, + "grad_norm": 353.95367431640625, + "learning_rate": 7.999588250755442e-06, + "loss": 7.9291, + "step": 182840 + }, + { + "epoch": 0.3693685686235693, + "grad_norm": 593.1589965820312, + "learning_rate": 7.999308969208705e-06, + "loss": 38.9186, + "step": 182850 + }, + { + "epoch": 0.3693887692562531, + "grad_norm": 573.551025390625, + "learning_rate": 7.999029673043703e-06, + "loss": 14.8998, + "step": 182860 + }, + { + "epoch": 0.3694089698889369, + "grad_norm": 268.9027404785156, + "learning_rate": 7.99875036226179e-06, + "loss": 21.6615, + "step": 182870 + }, + { + "epoch": 0.3694291705216207, + "grad_norm": 223.42030334472656, + "learning_rate": 7.998471036864336e-06, + "loss": 14.4676, + "step": 182880 + }, + { + "epoch": 0.36944937115430454, + "grad_norm": 374.5646667480469, + "learning_rate": 7.998191696852696e-06, + "loss": 15.6709, + "step": 182890 + }, + { + "epoch": 0.36946957178698836, + "grad_norm": 469.9173889160156, + "learning_rate": 7.997912342228232e-06, + "loss": 46.8033, + "step": 182900 + }, + { + "epoch": 0.3694897724196722, + "grad_norm": 630.7611694335938, + "learning_rate": 7.997632972992308e-06, + "loss": 20.4953, + "step": 182910 + }, + { + "epoch": 0.369509973052356, + "grad_norm": 295.6542663574219, + "learning_rate": 7.997353589146284e-06, + "loss": 19.6812, + "step": 182920 + }, + { + "epoch": 0.3695301736850398, + "grad_norm": 312.4400939941406, + "learning_rate": 7.997074190691523e-06, + "loss": 19.4398, + "step": 182930 + }, + { + "epoch": 0.36955037431772364, + "grad_norm": 495.8638000488281, + "learning_rate": 7.996794777629386e-06, + "loss": 19.1701, + "step": 182940 + }, + { + "epoch": 0.36957057495040746, + "grad_norm": 415.1043395996094, + "learning_rate": 7.996515349961233e-06, + "loss": 25.876, + "step": 182950 + }, + { + "epoch": 0.3695907755830913, + "grad_norm": 257.7971496582031, + "learning_rate": 7.99623590768843e-06, + "loss": 24.0562, + "step": 182960 + }, + { + "epoch": 0.3696109762157751, + "grad_norm": 269.9927978515625, + "learning_rate": 7.995956450812335e-06, + "loss": 20.5977, + "step": 182970 + }, + { + "epoch": 0.3696311768484589, + "grad_norm": 272.4700622558594, + "learning_rate": 7.995676979334313e-06, + "loss": 16.3563, + "step": 182980 + }, + { + "epoch": 0.3696513774811427, + "grad_norm": 343.4119873046875, + "learning_rate": 7.995397493255723e-06, + "loss": 18.765, + "step": 182990 + }, + { + "epoch": 0.3696715781138265, + "grad_norm": 348.2850646972656, + "learning_rate": 7.99511799257793e-06, + "loss": 19.3499, + "step": 183000 + }, + { + "epoch": 0.3696917787465103, + "grad_norm": 963.1719970703125, + "learning_rate": 7.994838477302294e-06, + "loss": 33.3418, + "step": 183010 + }, + { + "epoch": 0.36971197937919414, + "grad_norm": 0.0, + "learning_rate": 7.99455894743018e-06, + "loss": 15.1736, + "step": 183020 + }, + { + "epoch": 0.36973218001187796, + "grad_norm": 772.2694091796875, + "learning_rate": 7.994279402962948e-06, + "loss": 31.3075, + "step": 183030 + }, + { + "epoch": 0.3697523806445618, + "grad_norm": 381.2769775390625, + "learning_rate": 7.993999843901963e-06, + "loss": 17.7624, + "step": 183040 + }, + { + "epoch": 0.3697725812772456, + "grad_norm": 168.1398162841797, + "learning_rate": 7.993720270248583e-06, + "loss": 27.4773, + "step": 183050 + }, + { + "epoch": 0.3697927819099294, + "grad_norm": 681.6910400390625, + "learning_rate": 7.993440682004176e-06, + "loss": 18.7589, + "step": 183060 + }, + { + "epoch": 0.36981298254261324, + "grad_norm": 389.6490783691406, + "learning_rate": 7.993161079170101e-06, + "loss": 11.7036, + "step": 183070 + }, + { + "epoch": 0.36983318317529706, + "grad_norm": 721.0125122070312, + "learning_rate": 7.992881461747721e-06, + "loss": 50.7051, + "step": 183080 + }, + { + "epoch": 0.3698533838079809, + "grad_norm": 90.48036193847656, + "learning_rate": 7.992601829738403e-06, + "loss": 31.4077, + "step": 183090 + }, + { + "epoch": 0.3698735844406647, + "grad_norm": 321.55548095703125, + "learning_rate": 7.992322183143504e-06, + "loss": 13.8314, + "step": 183100 + }, + { + "epoch": 0.3698937850733485, + "grad_norm": 6.5746660232543945, + "learning_rate": 7.99204252196439e-06, + "loss": 11.8207, + "step": 183110 + }, + { + "epoch": 0.3699139857060323, + "grad_norm": 368.2030334472656, + "learning_rate": 7.991762846202423e-06, + "loss": 21.1813, + "step": 183120 + }, + { + "epoch": 0.3699341863387161, + "grad_norm": 415.379638671875, + "learning_rate": 7.991483155858968e-06, + "loss": 16.6932, + "step": 183130 + }, + { + "epoch": 0.36995438697139993, + "grad_norm": 889.5968017578125, + "learning_rate": 7.991203450935385e-06, + "loss": 29.9055, + "step": 183140 + }, + { + "epoch": 0.36997458760408375, + "grad_norm": 579.6605224609375, + "learning_rate": 7.990923731433043e-06, + "loss": 18.9249, + "step": 183150 + }, + { + "epoch": 0.36999478823676757, + "grad_norm": 456.553955078125, + "learning_rate": 7.990643997353296e-06, + "loss": 30.3161, + "step": 183160 + }, + { + "epoch": 0.3700149888694514, + "grad_norm": 326.1436767578125, + "learning_rate": 7.990364248697517e-06, + "loss": 16.7434, + "step": 183170 + }, + { + "epoch": 0.3700351895021352, + "grad_norm": 422.5854187011719, + "learning_rate": 7.990084485467065e-06, + "loss": 14.8333, + "step": 183180 + }, + { + "epoch": 0.37005539013481903, + "grad_norm": 81.252197265625, + "learning_rate": 7.989804707663302e-06, + "loss": 11.8127, + "step": 183190 + }, + { + "epoch": 0.37007559076750285, + "grad_norm": 585.47802734375, + "learning_rate": 7.989524915287595e-06, + "loss": 27.9933, + "step": 183200 + }, + { + "epoch": 0.37009579140018667, + "grad_norm": 44.53159713745117, + "learning_rate": 7.989245108341305e-06, + "loss": 11.5468, + "step": 183210 + }, + { + "epoch": 0.3701159920328705, + "grad_norm": 1733.242919921875, + "learning_rate": 7.988965286825798e-06, + "loss": 20.0665, + "step": 183220 + }, + { + "epoch": 0.3701361926655543, + "grad_norm": 17.573762893676758, + "learning_rate": 7.988685450742438e-06, + "loss": 19.0419, + "step": 183230 + }, + { + "epoch": 0.3701563932982381, + "grad_norm": 299.3620300292969, + "learning_rate": 7.988405600092585e-06, + "loss": 19.412, + "step": 183240 + }, + { + "epoch": 0.3701765939309219, + "grad_norm": 182.89193725585938, + "learning_rate": 7.988125734877607e-06, + "loss": 19.0907, + "step": 183250 + }, + { + "epoch": 0.3701967945636057, + "grad_norm": 687.41455078125, + "learning_rate": 7.987845855098864e-06, + "loss": 19.0766, + "step": 183260 + }, + { + "epoch": 0.37021699519628953, + "grad_norm": 303.67315673828125, + "learning_rate": 7.987565960757726e-06, + "loss": 26.404, + "step": 183270 + }, + { + "epoch": 0.37023719582897335, + "grad_norm": 322.6246337890625, + "learning_rate": 7.987286051855552e-06, + "loss": 17.0765, + "step": 183280 + }, + { + "epoch": 0.3702573964616572, + "grad_norm": 255.3560791015625, + "learning_rate": 7.98700612839371e-06, + "loss": 12.1922, + "step": 183290 + }, + { + "epoch": 0.370277597094341, + "grad_norm": 520.8690185546875, + "learning_rate": 7.986726190373562e-06, + "loss": 13.4479, + "step": 183300 + }, + { + "epoch": 0.3702977977270248, + "grad_norm": 293.56982421875, + "learning_rate": 7.986446237796471e-06, + "loss": 18.1435, + "step": 183310 + }, + { + "epoch": 0.37031799835970863, + "grad_norm": 238.17291259765625, + "learning_rate": 7.986166270663805e-06, + "loss": 25.508, + "step": 183320 + }, + { + "epoch": 0.37033819899239245, + "grad_norm": 571.9578247070312, + "learning_rate": 7.985886288976926e-06, + "loss": 18.8088, + "step": 183330 + }, + { + "epoch": 0.3703583996250763, + "grad_norm": 94.60584259033203, + "learning_rate": 7.985606292737199e-06, + "loss": 15.2967, + "step": 183340 + }, + { + "epoch": 0.3703786002577601, + "grad_norm": 14.213637351989746, + "learning_rate": 7.985326281945988e-06, + "loss": 14.4738, + "step": 183350 + }, + { + "epoch": 0.3703988008904439, + "grad_norm": 331.3981018066406, + "learning_rate": 7.98504625660466e-06, + "loss": 15.4799, + "step": 183360 + }, + { + "epoch": 0.3704190015231277, + "grad_norm": 543.9615478515625, + "learning_rate": 7.98476621671458e-06, + "loss": 14.1034, + "step": 183370 + }, + { + "epoch": 0.3704392021558115, + "grad_norm": 718.8535766601562, + "learning_rate": 7.98448616227711e-06, + "loss": 23.0639, + "step": 183380 + }, + { + "epoch": 0.3704594027884953, + "grad_norm": 515.5036010742188, + "learning_rate": 7.984206093293617e-06, + "loss": 13.5569, + "step": 183390 + }, + { + "epoch": 0.37047960342117914, + "grad_norm": 317.13751220703125, + "learning_rate": 7.983926009765464e-06, + "loss": 28.3156, + "step": 183400 + }, + { + "epoch": 0.37049980405386296, + "grad_norm": 455.3040466308594, + "learning_rate": 7.983645911694018e-06, + "loss": 25.6995, + "step": 183410 + }, + { + "epoch": 0.3705200046865468, + "grad_norm": 321.3277587890625, + "learning_rate": 7.983365799080645e-06, + "loss": 19.8978, + "step": 183420 + }, + { + "epoch": 0.3705402053192306, + "grad_norm": 262.7257385253906, + "learning_rate": 7.983085671926707e-06, + "loss": 16.4916, + "step": 183430 + }, + { + "epoch": 0.3705604059519144, + "grad_norm": 149.5018768310547, + "learning_rate": 7.982805530233573e-06, + "loss": 32.7754, + "step": 183440 + }, + { + "epoch": 0.37058060658459824, + "grad_norm": 77.14970397949219, + "learning_rate": 7.982525374002607e-06, + "loss": 26.9118, + "step": 183450 + }, + { + "epoch": 0.37060080721728206, + "grad_norm": 15.213008880615234, + "learning_rate": 7.982245203235172e-06, + "loss": 21.7054, + "step": 183460 + }, + { + "epoch": 0.3706210078499659, + "grad_norm": 399.4362487792969, + "learning_rate": 7.981965017932638e-06, + "loss": 18.1264, + "step": 183470 + }, + { + "epoch": 0.3706412084826497, + "grad_norm": 205.84544372558594, + "learning_rate": 7.981684818096367e-06, + "loss": 23.8543, + "step": 183480 + }, + { + "epoch": 0.3706614091153335, + "grad_norm": 661.1862182617188, + "learning_rate": 7.981404603727726e-06, + "loss": 21.7212, + "step": 183490 + }, + { + "epoch": 0.3706816097480173, + "grad_norm": 284.22589111328125, + "learning_rate": 7.981124374828079e-06, + "loss": 27.9908, + "step": 183500 + }, + { + "epoch": 0.3707018103807011, + "grad_norm": 636.8121337890625, + "learning_rate": 7.980844131398795e-06, + "loss": 21.8741, + "step": 183510 + }, + { + "epoch": 0.3707220110133849, + "grad_norm": 235.91986083984375, + "learning_rate": 7.980563873441239e-06, + "loss": 18.6829, + "step": 183520 + }, + { + "epoch": 0.37074221164606874, + "grad_norm": 34.92259979248047, + "learning_rate": 7.980283600956775e-06, + "loss": 21.6579, + "step": 183530 + }, + { + "epoch": 0.37076241227875256, + "grad_norm": 531.4364624023438, + "learning_rate": 7.98000331394677e-06, + "loss": 16.9734, + "step": 183540 + }, + { + "epoch": 0.3707826129114364, + "grad_norm": 314.3226013183594, + "learning_rate": 7.97972301241259e-06, + "loss": 20.7448, + "step": 183550 + }, + { + "epoch": 0.3708028135441202, + "grad_norm": 459.74737548828125, + "learning_rate": 7.979442696355601e-06, + "loss": 14.8336, + "step": 183560 + }, + { + "epoch": 0.370823014176804, + "grad_norm": 701.6031494140625, + "learning_rate": 7.979162365777173e-06, + "loss": 26.2635, + "step": 183570 + }, + { + "epoch": 0.37084321480948784, + "grad_norm": 99.23480987548828, + "learning_rate": 7.978882020678666e-06, + "loss": 14.496, + "step": 183580 + }, + { + "epoch": 0.37086341544217166, + "grad_norm": 511.1658630371094, + "learning_rate": 7.978601661061449e-06, + "loss": 26.4086, + "step": 183590 + }, + { + "epoch": 0.3708836160748555, + "grad_norm": 363.9020080566406, + "learning_rate": 7.978321286926892e-06, + "loss": 16.8137, + "step": 183600 + }, + { + "epoch": 0.3709038167075393, + "grad_norm": 549.7080078125, + "learning_rate": 7.978040898276353e-06, + "loss": 39.1137, + "step": 183610 + }, + { + "epoch": 0.3709240173402231, + "grad_norm": 875.7857666015625, + "learning_rate": 7.977760495111209e-06, + "loss": 29.2739, + "step": 183620 + }, + { + "epoch": 0.3709442179729069, + "grad_norm": 273.4394836425781, + "learning_rate": 7.97748007743282e-06, + "loss": 11.0776, + "step": 183630 + }, + { + "epoch": 0.3709644186055907, + "grad_norm": 290.8146667480469, + "learning_rate": 7.977199645242553e-06, + "loss": 22.1111, + "step": 183640 + }, + { + "epoch": 0.3709846192382745, + "grad_norm": 750.2342529296875, + "learning_rate": 7.976919198541775e-06, + "loss": 38.7514, + "step": 183650 + }, + { + "epoch": 0.37100481987095835, + "grad_norm": 199.48460388183594, + "learning_rate": 7.976638737331855e-06, + "loss": 21.5552, + "step": 183660 + }, + { + "epoch": 0.37102502050364217, + "grad_norm": 3.6414854526519775, + "learning_rate": 7.97635826161416e-06, + "loss": 12.262, + "step": 183670 + }, + { + "epoch": 0.371045221136326, + "grad_norm": 324.2091979980469, + "learning_rate": 7.976077771390056e-06, + "loss": 18.6805, + "step": 183680 + }, + { + "epoch": 0.3710654217690098, + "grad_norm": 409.9293518066406, + "learning_rate": 7.975797266660908e-06, + "loss": 13.9024, + "step": 183690 + }, + { + "epoch": 0.3710856224016936, + "grad_norm": 556.5054321289062, + "learning_rate": 7.975516747428087e-06, + "loss": 19.1498, + "step": 183700 + }, + { + "epoch": 0.37110582303437745, + "grad_norm": 1997.013427734375, + "learning_rate": 7.975236213692956e-06, + "loss": 11.7474, + "step": 183710 + }, + { + "epoch": 0.37112602366706127, + "grad_norm": 1051.9697265625, + "learning_rate": 7.974955665456887e-06, + "loss": 15.8293, + "step": 183720 + }, + { + "epoch": 0.3711462242997451, + "grad_norm": 827.4751586914062, + "learning_rate": 7.974675102721244e-06, + "loss": 26.5937, + "step": 183730 + }, + { + "epoch": 0.3711664249324289, + "grad_norm": 525.66357421875, + "learning_rate": 7.974394525487395e-06, + "loss": 19.3954, + "step": 183740 + }, + { + "epoch": 0.3711866255651127, + "grad_norm": 850.7464599609375, + "learning_rate": 7.974113933756708e-06, + "loss": 14.5438, + "step": 183750 + }, + { + "epoch": 0.3712068261977965, + "grad_norm": 945.1376953125, + "learning_rate": 7.97383332753055e-06, + "loss": 23.5675, + "step": 183760 + }, + { + "epoch": 0.3712270268304803, + "grad_norm": 308.317626953125, + "learning_rate": 7.973552706810288e-06, + "loss": 18.4932, + "step": 183770 + }, + { + "epoch": 0.37124722746316413, + "grad_norm": 316.6867980957031, + "learning_rate": 7.973272071597293e-06, + "loss": 24.5264, + "step": 183780 + }, + { + "epoch": 0.37126742809584795, + "grad_norm": 395.24273681640625, + "learning_rate": 7.97299142189293e-06, + "loss": 9.765, + "step": 183790 + }, + { + "epoch": 0.37128762872853177, + "grad_norm": 195.8678436279297, + "learning_rate": 7.972710757698567e-06, + "loss": 15.1407, + "step": 183800 + }, + { + "epoch": 0.3713078293612156, + "grad_norm": 1529.356689453125, + "learning_rate": 7.972430079015572e-06, + "loss": 31.2506, + "step": 183810 + }, + { + "epoch": 0.3713280299938994, + "grad_norm": 325.48455810546875, + "learning_rate": 7.972149385845314e-06, + "loss": 18.5395, + "step": 183820 + }, + { + "epoch": 0.37134823062658323, + "grad_norm": 593.8530883789062, + "learning_rate": 7.97186867818916e-06, + "loss": 9.7227, + "step": 183830 + }, + { + "epoch": 0.37136843125926705, + "grad_norm": 644.62451171875, + "learning_rate": 7.971587956048479e-06, + "loss": 17.8562, + "step": 183840 + }, + { + "epoch": 0.37138863189195087, + "grad_norm": 591.8629760742188, + "learning_rate": 7.971307219424637e-06, + "loss": 18.2705, + "step": 183850 + }, + { + "epoch": 0.3714088325246347, + "grad_norm": 545.7822875976562, + "learning_rate": 7.971026468319006e-06, + "loss": 24.0909, + "step": 183860 + }, + { + "epoch": 0.3714290331573185, + "grad_norm": 1182.5684814453125, + "learning_rate": 7.970745702732951e-06, + "loss": 27.9939, + "step": 183870 + }, + { + "epoch": 0.3714492337900023, + "grad_norm": 427.9289855957031, + "learning_rate": 7.970464922667842e-06, + "loss": 15.6524, + "step": 183880 + }, + { + "epoch": 0.3714694344226861, + "grad_norm": 50.75448226928711, + "learning_rate": 7.97018412812505e-06, + "loss": 23.8423, + "step": 183890 + }, + { + "epoch": 0.3714896350553699, + "grad_norm": 464.32373046875, + "learning_rate": 7.969903319105935e-06, + "loss": 19.8493, + "step": 183900 + }, + { + "epoch": 0.37150983568805374, + "grad_norm": 304.79669189453125, + "learning_rate": 7.969622495611877e-06, + "loss": 16.9334, + "step": 183910 + }, + { + "epoch": 0.37153003632073756, + "grad_norm": 206.56349182128906, + "learning_rate": 7.969341657644236e-06, + "loss": 15.2697, + "step": 183920 + }, + { + "epoch": 0.3715502369534214, + "grad_norm": 236.22669982910156, + "learning_rate": 7.969060805204385e-06, + "loss": 33.679, + "step": 183930 + }, + { + "epoch": 0.3715704375861052, + "grad_norm": 254.5858917236328, + "learning_rate": 7.968779938293691e-06, + "loss": 21.397, + "step": 183940 + }, + { + "epoch": 0.371590638218789, + "grad_norm": 440.5389404296875, + "learning_rate": 7.968499056913525e-06, + "loss": 21.9985, + "step": 183950 + }, + { + "epoch": 0.37161083885147284, + "grad_norm": 398.2488708496094, + "learning_rate": 7.968218161065253e-06, + "loss": 41.6348, + "step": 183960 + }, + { + "epoch": 0.37163103948415666, + "grad_norm": 54.56743240356445, + "learning_rate": 7.967937250750248e-06, + "loss": 12.1987, + "step": 183970 + }, + { + "epoch": 0.3716512401168405, + "grad_norm": 234.84764099121094, + "learning_rate": 7.967656325969875e-06, + "loss": 14.9494, + "step": 183980 + }, + { + "epoch": 0.3716714407495243, + "grad_norm": 228.147216796875, + "learning_rate": 7.967375386725505e-06, + "loss": 25.4243, + "step": 183990 + }, + { + "epoch": 0.3716916413822081, + "grad_norm": 123.51333618164062, + "learning_rate": 7.967094433018508e-06, + "loss": 13.0959, + "step": 184000 + }, + { + "epoch": 0.3717118420148919, + "grad_norm": 469.3757019042969, + "learning_rate": 7.966813464850252e-06, + "loss": 22.9168, + "step": 184010 + }, + { + "epoch": 0.3717320426475757, + "grad_norm": 250.6117401123047, + "learning_rate": 7.966532482222106e-06, + "loss": 23.3032, + "step": 184020 + }, + { + "epoch": 0.3717522432802595, + "grad_norm": 565.9239501953125, + "learning_rate": 7.966251485135443e-06, + "loss": 17.2938, + "step": 184030 + }, + { + "epoch": 0.37177244391294334, + "grad_norm": 226.63751220703125, + "learning_rate": 7.96597047359163e-06, + "loss": 15.2441, + "step": 184040 + }, + { + "epoch": 0.37179264454562716, + "grad_norm": 234.7274169921875, + "learning_rate": 7.965689447592034e-06, + "loss": 24.1701, + "step": 184050 + }, + { + "epoch": 0.371812845178311, + "grad_norm": 191.4552764892578, + "learning_rate": 7.96540840713803e-06, + "loss": 16.944, + "step": 184060 + }, + { + "epoch": 0.3718330458109948, + "grad_norm": 364.73040771484375, + "learning_rate": 7.965127352230984e-06, + "loss": 22.2877, + "step": 184070 + }, + { + "epoch": 0.3718532464436786, + "grad_norm": 229.94163513183594, + "learning_rate": 7.964846282872265e-06, + "loss": 21.0565, + "step": 184080 + }, + { + "epoch": 0.37187344707636244, + "grad_norm": 437.13079833984375, + "learning_rate": 7.964565199063247e-06, + "loss": 20.9628, + "step": 184090 + }, + { + "epoch": 0.37189364770904626, + "grad_norm": 303.15606689453125, + "learning_rate": 7.964284100805297e-06, + "loss": 17.737, + "step": 184100 + }, + { + "epoch": 0.3719138483417301, + "grad_norm": 599.64990234375, + "learning_rate": 7.964002988099785e-06, + "loss": 31.0308, + "step": 184110 + }, + { + "epoch": 0.3719340489744139, + "grad_norm": 447.39349365234375, + "learning_rate": 7.963721860948085e-06, + "loss": 18.8614, + "step": 184120 + }, + { + "epoch": 0.3719542496070977, + "grad_norm": 253.20962524414062, + "learning_rate": 7.96344071935156e-06, + "loss": 23.9809, + "step": 184130 + }, + { + "epoch": 0.3719744502397815, + "grad_norm": 263.40814208984375, + "learning_rate": 7.963159563311587e-06, + "loss": 24.8219, + "step": 184140 + }, + { + "epoch": 0.3719946508724653, + "grad_norm": 187.50816345214844, + "learning_rate": 7.962878392829533e-06, + "loss": 27.1222, + "step": 184150 + }, + { + "epoch": 0.3720148515051491, + "grad_norm": 787.1674194335938, + "learning_rate": 7.96259720790677e-06, + "loss": 36.3851, + "step": 184160 + }, + { + "epoch": 0.37203505213783294, + "grad_norm": 309.73291015625, + "learning_rate": 7.962316008544666e-06, + "loss": 15.5864, + "step": 184170 + }, + { + "epoch": 0.37205525277051676, + "grad_norm": 2.7626118659973145, + "learning_rate": 7.962034794744594e-06, + "loss": 25.9718, + "step": 184180 + }, + { + "epoch": 0.3720754534032006, + "grad_norm": 233.19387817382812, + "learning_rate": 7.961753566507924e-06, + "loss": 13.7824, + "step": 184190 + }, + { + "epoch": 0.3720956540358844, + "grad_norm": 233.17050170898438, + "learning_rate": 7.961472323836025e-06, + "loss": 22.2851, + "step": 184200 + }, + { + "epoch": 0.3721158546685682, + "grad_norm": 683.0888061523438, + "learning_rate": 7.961191066730272e-06, + "loss": 31.4705, + "step": 184210 + }, + { + "epoch": 0.37213605530125204, + "grad_norm": 204.0174102783203, + "learning_rate": 7.960909795192029e-06, + "loss": 16.8607, + "step": 184220 + }, + { + "epoch": 0.37215625593393586, + "grad_norm": 126.80240631103516, + "learning_rate": 7.960628509222674e-06, + "loss": 12.3081, + "step": 184230 + }, + { + "epoch": 0.3721764565666197, + "grad_norm": 155.76065063476562, + "learning_rate": 7.960347208823572e-06, + "loss": 29.5864, + "step": 184240 + }, + { + "epoch": 0.3721966571993035, + "grad_norm": 575.4931030273438, + "learning_rate": 7.960065893996099e-06, + "loss": 24.3775, + "step": 184250 + }, + { + "epoch": 0.3722168578319873, + "grad_norm": 427.9136047363281, + "learning_rate": 7.959784564741622e-06, + "loss": 29.6747, + "step": 184260 + }, + { + "epoch": 0.3722370584646711, + "grad_norm": 384.1243591308594, + "learning_rate": 7.959503221061515e-06, + "loss": 27.1969, + "step": 184270 + }, + { + "epoch": 0.3722572590973549, + "grad_norm": 370.0421447753906, + "learning_rate": 7.959221862957149e-06, + "loss": 28.6106, + "step": 184280 + }, + { + "epoch": 0.37227745973003873, + "grad_norm": 414.8506774902344, + "learning_rate": 7.958940490429893e-06, + "loss": 34.214, + "step": 184290 + }, + { + "epoch": 0.37229766036272255, + "grad_norm": 85.80133819580078, + "learning_rate": 7.95865910348112e-06, + "loss": 17.7264, + "step": 184300 + }, + { + "epoch": 0.37231786099540637, + "grad_norm": 876.0701293945312, + "learning_rate": 7.958377702112204e-06, + "loss": 27.1869, + "step": 184310 + }, + { + "epoch": 0.3723380616280902, + "grad_norm": 346.3531494140625, + "learning_rate": 7.95809628632451e-06, + "loss": 15.2381, + "step": 184320 + }, + { + "epoch": 0.372358262260774, + "grad_norm": 331.1015319824219, + "learning_rate": 7.957814856119416e-06, + "loss": 16.0605, + "step": 184330 + }, + { + "epoch": 0.37237846289345783, + "grad_norm": 305.2842102050781, + "learning_rate": 7.95753341149829e-06, + "loss": 21.837, + "step": 184340 + }, + { + "epoch": 0.37239866352614165, + "grad_norm": 529.4951171875, + "learning_rate": 7.957251952462506e-06, + "loss": 22.0057, + "step": 184350 + }, + { + "epoch": 0.37241886415882547, + "grad_norm": 549.4971923828125, + "learning_rate": 7.956970479013433e-06, + "loss": 20.2053, + "step": 184360 + }, + { + "epoch": 0.3724390647915093, + "grad_norm": 384.4616394042969, + "learning_rate": 7.956688991152446e-06, + "loss": 21.9653, + "step": 184370 + }, + { + "epoch": 0.3724592654241931, + "grad_norm": 411.3118591308594, + "learning_rate": 7.956407488880915e-06, + "loss": 27.5025, + "step": 184380 + }, + { + "epoch": 0.37247946605687693, + "grad_norm": 182.23385620117188, + "learning_rate": 7.956125972200212e-06, + "loss": 25.5676, + "step": 184390 + }, + { + "epoch": 0.3724996666895607, + "grad_norm": 218.42762756347656, + "learning_rate": 7.95584444111171e-06, + "loss": 14.4574, + "step": 184400 + }, + { + "epoch": 0.3725198673222445, + "grad_norm": 294.8897705078125, + "learning_rate": 7.955562895616782e-06, + "loss": 21.5955, + "step": 184410 + }, + { + "epoch": 0.37254006795492833, + "grad_norm": 634.1650390625, + "learning_rate": 7.955281335716797e-06, + "loss": 20.6862, + "step": 184420 + }, + { + "epoch": 0.37256026858761215, + "grad_norm": 450.59613037109375, + "learning_rate": 7.954999761413129e-06, + "loss": 39.3258, + "step": 184430 + }, + { + "epoch": 0.372580469220296, + "grad_norm": 412.40533447265625, + "learning_rate": 7.954718172707153e-06, + "loss": 16.5582, + "step": 184440 + }, + { + "epoch": 0.3726006698529798, + "grad_norm": 1051.6776123046875, + "learning_rate": 7.954436569600238e-06, + "loss": 18.6176, + "step": 184450 + }, + { + "epoch": 0.3726208704856636, + "grad_norm": 656.8740234375, + "learning_rate": 7.954154952093754e-06, + "loss": 26.7486, + "step": 184460 + }, + { + "epoch": 0.37264107111834743, + "grad_norm": 130.8815460205078, + "learning_rate": 7.95387332018908e-06, + "loss": 12.8114, + "step": 184470 + }, + { + "epoch": 0.37266127175103125, + "grad_norm": 92.77995300292969, + "learning_rate": 7.953591673887586e-06, + "loss": 30.7558, + "step": 184480 + }, + { + "epoch": 0.3726814723837151, + "grad_norm": 363.2783203125, + "learning_rate": 7.953310013190645e-06, + "loss": 19.3335, + "step": 184490 + }, + { + "epoch": 0.3727016730163989, + "grad_norm": 1399.116455078125, + "learning_rate": 7.953028338099628e-06, + "loss": 33.2493, + "step": 184500 + }, + { + "epoch": 0.3727218736490827, + "grad_norm": 227.3540802001953, + "learning_rate": 7.952746648615908e-06, + "loss": 18.106, + "step": 184510 + }, + { + "epoch": 0.3727420742817665, + "grad_norm": 507.50384521484375, + "learning_rate": 7.952464944740861e-06, + "loss": 21.1331, + "step": 184520 + }, + { + "epoch": 0.3727622749144503, + "grad_norm": 143.76055908203125, + "learning_rate": 7.952183226475858e-06, + "loss": 14.0552, + "step": 184530 + }, + { + "epoch": 0.3727824755471341, + "grad_norm": 132.0059051513672, + "learning_rate": 7.95190149382227e-06, + "loss": 25.0118, + "step": 184540 + }, + { + "epoch": 0.37280267617981794, + "grad_norm": 117.39488983154297, + "learning_rate": 7.951619746781474e-06, + "loss": 29.0449, + "step": 184550 + }, + { + "epoch": 0.37282287681250176, + "grad_norm": 579.1425170898438, + "learning_rate": 7.95133798535484e-06, + "loss": 22.7947, + "step": 184560 + }, + { + "epoch": 0.3728430774451856, + "grad_norm": 458.9516906738281, + "learning_rate": 7.951056209543744e-06, + "loss": 29.5082, + "step": 184570 + }, + { + "epoch": 0.3728632780778694, + "grad_norm": 534.38623046875, + "learning_rate": 7.950774419349557e-06, + "loss": 22.181, + "step": 184580 + }, + { + "epoch": 0.3728834787105532, + "grad_norm": 342.5497131347656, + "learning_rate": 7.950492614773653e-06, + "loss": 25.2034, + "step": 184590 + }, + { + "epoch": 0.37290367934323704, + "grad_norm": 0.0, + "learning_rate": 7.950210795817406e-06, + "loss": 17.5624, + "step": 184600 + }, + { + "epoch": 0.37292387997592086, + "grad_norm": 975.2837524414062, + "learning_rate": 7.949928962482191e-06, + "loss": 28.2033, + "step": 184610 + }, + { + "epoch": 0.3729440806086047, + "grad_norm": 490.6043395996094, + "learning_rate": 7.94964711476938e-06, + "loss": 18.7533, + "step": 184620 + }, + { + "epoch": 0.3729642812412885, + "grad_norm": 281.4523620605469, + "learning_rate": 7.949365252680343e-06, + "loss": 29.3018, + "step": 184630 + }, + { + "epoch": 0.3729844818739723, + "grad_norm": 476.4045715332031, + "learning_rate": 7.94908337621646e-06, + "loss": 25.1157, + "step": 184640 + }, + { + "epoch": 0.3730046825066561, + "grad_norm": 468.6783142089844, + "learning_rate": 7.948801485379103e-06, + "loss": 27.7701, + "step": 184650 + }, + { + "epoch": 0.3730248831393399, + "grad_norm": 316.97369384765625, + "learning_rate": 7.948519580169644e-06, + "loss": 30.6865, + "step": 184660 + }, + { + "epoch": 0.3730450837720237, + "grad_norm": 599.8587646484375, + "learning_rate": 7.94823766058946e-06, + "loss": 36.0465, + "step": 184670 + }, + { + "epoch": 0.37306528440470754, + "grad_norm": 415.52838134765625, + "learning_rate": 7.947955726639922e-06, + "loss": 20.1033, + "step": 184680 + }, + { + "epoch": 0.37308548503739136, + "grad_norm": 308.10943603515625, + "learning_rate": 7.947673778322405e-06, + "loss": 19.1519, + "step": 184690 + }, + { + "epoch": 0.3731056856700752, + "grad_norm": 364.94189453125, + "learning_rate": 7.947391815638284e-06, + "loss": 13.5899, + "step": 184700 + }, + { + "epoch": 0.373125886302759, + "grad_norm": 363.39337158203125, + "learning_rate": 7.947109838588932e-06, + "loss": 23.8721, + "step": 184710 + }, + { + "epoch": 0.3731460869354428, + "grad_norm": 872.3594970703125, + "learning_rate": 7.946827847175724e-06, + "loss": 14.1021, + "step": 184720 + }, + { + "epoch": 0.37316628756812664, + "grad_norm": 495.41619873046875, + "learning_rate": 7.946545841400035e-06, + "loss": 25.4238, + "step": 184730 + }, + { + "epoch": 0.37318648820081046, + "grad_norm": 903.4898681640625, + "learning_rate": 7.94626382126324e-06, + "loss": 20.8995, + "step": 184740 + }, + { + "epoch": 0.3732066888334943, + "grad_norm": 420.36553955078125, + "learning_rate": 7.945981786766712e-06, + "loss": 42.4123, + "step": 184750 + }, + { + "epoch": 0.3732268894661781, + "grad_norm": 45.977108001708984, + "learning_rate": 7.945699737911825e-06, + "loss": 20.0298, + "step": 184760 + }, + { + "epoch": 0.3732470900988619, + "grad_norm": 463.26324462890625, + "learning_rate": 7.945417674699954e-06, + "loss": 29.0649, + "step": 184770 + }, + { + "epoch": 0.3732672907315457, + "grad_norm": 300.8017272949219, + "learning_rate": 7.945135597132477e-06, + "loss": 21.3997, + "step": 184780 + }, + { + "epoch": 0.3732874913642295, + "grad_norm": 373.9449462890625, + "learning_rate": 7.944853505210766e-06, + "loss": 23.3273, + "step": 184790 + }, + { + "epoch": 0.3733076919969133, + "grad_norm": 226.5374755859375, + "learning_rate": 7.944571398936193e-06, + "loss": 19.7992, + "step": 184800 + }, + { + "epoch": 0.37332789262959715, + "grad_norm": 325.9032287597656, + "learning_rate": 7.94428927831014e-06, + "loss": 23.8617, + "step": 184810 + }, + { + "epoch": 0.37334809326228097, + "grad_norm": 264.9305725097656, + "learning_rate": 7.944007143333976e-06, + "loss": 16.1416, + "step": 184820 + }, + { + "epoch": 0.3733682938949648, + "grad_norm": 396.746337890625, + "learning_rate": 7.943724994009078e-06, + "loss": 20.2477, + "step": 184830 + }, + { + "epoch": 0.3733884945276486, + "grad_norm": 514.2771606445312, + "learning_rate": 7.943442830336822e-06, + "loss": 28.5255, + "step": 184840 + }, + { + "epoch": 0.3734086951603324, + "grad_norm": 406.6208190917969, + "learning_rate": 7.943160652318585e-06, + "loss": 29.833, + "step": 184850 + }, + { + "epoch": 0.37342889579301625, + "grad_norm": 339.4564514160156, + "learning_rate": 7.942878459955737e-06, + "loss": 14.0227, + "step": 184860 + }, + { + "epoch": 0.37344909642570007, + "grad_norm": 1454.90673828125, + "learning_rate": 7.942596253249658e-06, + "loss": 25.755, + "step": 184870 + }, + { + "epoch": 0.3734692970583839, + "grad_norm": 338.5536804199219, + "learning_rate": 7.94231403220172e-06, + "loss": 12.4781, + "step": 184880 + }, + { + "epoch": 0.3734894976910677, + "grad_norm": 338.1551513671875, + "learning_rate": 7.942031796813302e-06, + "loss": 23.3433, + "step": 184890 + }, + { + "epoch": 0.3735096983237515, + "grad_norm": 311.31689453125, + "learning_rate": 7.941749547085778e-06, + "loss": 19.965, + "step": 184900 + }, + { + "epoch": 0.3735298989564353, + "grad_norm": 404.0726013183594, + "learning_rate": 7.941467283020521e-06, + "loss": 34.9207, + "step": 184910 + }, + { + "epoch": 0.3735500995891191, + "grad_norm": 159.56248474121094, + "learning_rate": 7.941185004618911e-06, + "loss": 20.8413, + "step": 184920 + }, + { + "epoch": 0.37357030022180293, + "grad_norm": 195.2562255859375, + "learning_rate": 7.940902711882321e-06, + "loss": 10.9093, + "step": 184930 + }, + { + "epoch": 0.37359050085448675, + "grad_norm": 180.9736328125, + "learning_rate": 7.940620404812129e-06, + "loss": 19.7429, + "step": 184940 + }, + { + "epoch": 0.37361070148717057, + "grad_norm": 115.31999206542969, + "learning_rate": 7.94033808340971e-06, + "loss": 16.3377, + "step": 184950 + }, + { + "epoch": 0.3736309021198544, + "grad_norm": 665.5597534179688, + "learning_rate": 7.940055747676439e-06, + "loss": 43.3205, + "step": 184960 + }, + { + "epoch": 0.3736511027525382, + "grad_norm": 128.6699676513672, + "learning_rate": 7.939773397613692e-06, + "loss": 23.3228, + "step": 184970 + }, + { + "epoch": 0.37367130338522203, + "grad_norm": 502.3322448730469, + "learning_rate": 7.939491033222848e-06, + "loss": 19.8535, + "step": 184980 + }, + { + "epoch": 0.37369150401790585, + "grad_norm": 211.36598205566406, + "learning_rate": 7.939208654505281e-06, + "loss": 11.8954, + "step": 184990 + }, + { + "epoch": 0.37371170465058967, + "grad_norm": 351.20111083984375, + "learning_rate": 7.938926261462366e-06, + "loss": 28.067, + "step": 185000 + }, + { + "epoch": 0.3737319052832735, + "grad_norm": 792.7965698242188, + "learning_rate": 7.938643854095482e-06, + "loss": 56.3682, + "step": 185010 + }, + { + "epoch": 0.3737521059159573, + "grad_norm": 714.4829711914062, + "learning_rate": 7.938361432406005e-06, + "loss": 18.4576, + "step": 185020 + }, + { + "epoch": 0.37377230654864113, + "grad_norm": 186.06739807128906, + "learning_rate": 7.93807899639531e-06, + "loss": 23.9017, + "step": 185030 + }, + { + "epoch": 0.3737925071813249, + "grad_norm": 246.42047119140625, + "learning_rate": 7.937796546064773e-06, + "loss": 25.0836, + "step": 185040 + }, + { + "epoch": 0.3738127078140087, + "grad_norm": 913.75439453125, + "learning_rate": 7.937514081415773e-06, + "loss": 39.7503, + "step": 185050 + }, + { + "epoch": 0.37383290844669254, + "grad_norm": 215.49111938476562, + "learning_rate": 7.937231602449687e-06, + "loss": 21.732, + "step": 185060 + }, + { + "epoch": 0.37385310907937636, + "grad_norm": 629.5859985351562, + "learning_rate": 7.936949109167887e-06, + "loss": 42.6736, + "step": 185070 + }, + { + "epoch": 0.3738733097120602, + "grad_norm": 203.20004272460938, + "learning_rate": 7.936666601571756e-06, + "loss": 18.076, + "step": 185080 + }, + { + "epoch": 0.373893510344744, + "grad_norm": 25.225662231445312, + "learning_rate": 7.936384079662666e-06, + "loss": 20.4229, + "step": 185090 + }, + { + "epoch": 0.3739137109774278, + "grad_norm": 276.54180908203125, + "learning_rate": 7.936101543441998e-06, + "loss": 16.5489, + "step": 185100 + }, + { + "epoch": 0.37393391161011164, + "grad_norm": 256.1173095703125, + "learning_rate": 7.935818992911129e-06, + "loss": 34.7231, + "step": 185110 + }, + { + "epoch": 0.37395411224279546, + "grad_norm": 265.9543762207031, + "learning_rate": 7.935536428071431e-06, + "loss": 14.2963, + "step": 185120 + }, + { + "epoch": 0.3739743128754793, + "grad_norm": 230.64804077148438, + "learning_rate": 7.935253848924285e-06, + "loss": 16.0491, + "step": 185130 + }, + { + "epoch": 0.3739945135081631, + "grad_norm": 334.02545166015625, + "learning_rate": 7.93497125547107e-06, + "loss": 28.3974, + "step": 185140 + }, + { + "epoch": 0.3740147141408469, + "grad_norm": 468.404052734375, + "learning_rate": 7.934688647713158e-06, + "loss": 20.2318, + "step": 185150 + }, + { + "epoch": 0.3740349147735307, + "grad_norm": 645.5770874023438, + "learning_rate": 7.93440602565193e-06, + "loss": 18.7274, + "step": 185160 + }, + { + "epoch": 0.3740551154062145, + "grad_norm": 257.2210998535156, + "learning_rate": 7.934123389288765e-06, + "loss": 18.5803, + "step": 185170 + }, + { + "epoch": 0.3740753160388983, + "grad_norm": 185.73089599609375, + "learning_rate": 7.933840738625035e-06, + "loss": 10.0916, + "step": 185180 + }, + { + "epoch": 0.37409551667158214, + "grad_norm": 257.7461242675781, + "learning_rate": 7.933558073662125e-06, + "loss": 16.1209, + "step": 185190 + }, + { + "epoch": 0.37411571730426596, + "grad_norm": 42.35219955444336, + "learning_rate": 7.933275394401407e-06, + "loss": 13.1954, + "step": 185200 + }, + { + "epoch": 0.3741359179369498, + "grad_norm": 409.8907775878906, + "learning_rate": 7.932992700844261e-06, + "loss": 16.9948, + "step": 185210 + }, + { + "epoch": 0.3741561185696336, + "grad_norm": 494.62371826171875, + "learning_rate": 7.932709992992063e-06, + "loss": 27.6559, + "step": 185220 + }, + { + "epoch": 0.3741763192023174, + "grad_norm": 357.8368835449219, + "learning_rate": 7.932427270846194e-06, + "loss": 10.1764, + "step": 185230 + }, + { + "epoch": 0.37419651983500124, + "grad_norm": 504.85205078125, + "learning_rate": 7.932144534408028e-06, + "loss": 12.3351, + "step": 185240 + }, + { + "epoch": 0.37421672046768506, + "grad_norm": 248.2692108154297, + "learning_rate": 7.931861783678946e-06, + "loss": 21.6287, + "step": 185250 + }, + { + "epoch": 0.3742369211003689, + "grad_norm": 102.75997161865234, + "learning_rate": 7.931579018660327e-06, + "loss": 31.1875, + "step": 185260 + }, + { + "epoch": 0.3742571217330527, + "grad_norm": 194.78038024902344, + "learning_rate": 7.931296239353546e-06, + "loss": 9.991, + "step": 185270 + }, + { + "epoch": 0.3742773223657365, + "grad_norm": 449.73455810546875, + "learning_rate": 7.931013445759984e-06, + "loss": 21.2125, + "step": 185280 + }, + { + "epoch": 0.3742975229984203, + "grad_norm": 282.6627502441406, + "learning_rate": 7.930730637881016e-06, + "loss": 25.5474, + "step": 185290 + }, + { + "epoch": 0.3743177236311041, + "grad_norm": 27.483171463012695, + "learning_rate": 7.930447815718022e-06, + "loss": 18.2717, + "step": 185300 + }, + { + "epoch": 0.3743379242637879, + "grad_norm": 310.1260070800781, + "learning_rate": 7.93016497927238e-06, + "loss": 18.9379, + "step": 185310 + }, + { + "epoch": 0.37435812489647174, + "grad_norm": 296.8552551269531, + "learning_rate": 7.929882128545474e-06, + "loss": 31.4366, + "step": 185320 + }, + { + "epoch": 0.37437832552915556, + "grad_norm": 416.75634765625, + "learning_rate": 7.929599263538674e-06, + "loss": 12.4571, + "step": 185330 + }, + { + "epoch": 0.3743985261618394, + "grad_norm": 207.06988525390625, + "learning_rate": 7.929316384253363e-06, + "loss": 37.065, + "step": 185340 + }, + { + "epoch": 0.3744187267945232, + "grad_norm": 252.4696502685547, + "learning_rate": 7.929033490690921e-06, + "loss": 13.6604, + "step": 185350 + }, + { + "epoch": 0.374438927427207, + "grad_norm": 674.5070190429688, + "learning_rate": 7.928750582852722e-06, + "loss": 24.2581, + "step": 185360 + }, + { + "epoch": 0.37445912805989084, + "grad_norm": 218.89048767089844, + "learning_rate": 7.92846766074015e-06, + "loss": 12.1517, + "step": 185370 + }, + { + "epoch": 0.37447932869257466, + "grad_norm": 116.3596420288086, + "learning_rate": 7.928184724354581e-06, + "loss": 15.6534, + "step": 185380 + }, + { + "epoch": 0.3744995293252585, + "grad_norm": 236.49302673339844, + "learning_rate": 7.927901773697396e-06, + "loss": 19.4553, + "step": 185390 + }, + { + "epoch": 0.3745197299579423, + "grad_norm": 307.58331298828125, + "learning_rate": 7.927618808769971e-06, + "loss": 13.7072, + "step": 185400 + }, + { + "epoch": 0.3745399305906261, + "grad_norm": 116.84063720703125, + "learning_rate": 7.927335829573688e-06, + "loss": 27.0985, + "step": 185410 + }, + { + "epoch": 0.3745601312233099, + "grad_norm": 451.7219543457031, + "learning_rate": 7.927052836109925e-06, + "loss": 23.0556, + "step": 185420 + }, + { + "epoch": 0.3745803318559937, + "grad_norm": 488.555908203125, + "learning_rate": 7.926769828380062e-06, + "loss": 24.1628, + "step": 185430 + }, + { + "epoch": 0.37460053248867753, + "grad_norm": 421.19952392578125, + "learning_rate": 7.926486806385479e-06, + "loss": 13.3053, + "step": 185440 + }, + { + "epoch": 0.37462073312136135, + "grad_norm": 579.669189453125, + "learning_rate": 7.926203770127552e-06, + "loss": 16.6306, + "step": 185450 + }, + { + "epoch": 0.37464093375404517, + "grad_norm": 571.9569091796875, + "learning_rate": 7.925920719607663e-06, + "loss": 25.5336, + "step": 185460 + }, + { + "epoch": 0.374661134386729, + "grad_norm": 557.4139404296875, + "learning_rate": 7.925637654827192e-06, + "loss": 16.2948, + "step": 185470 + }, + { + "epoch": 0.3746813350194128, + "grad_norm": 279.9828186035156, + "learning_rate": 7.925354575787517e-06, + "loss": 28.4328, + "step": 185480 + }, + { + "epoch": 0.37470153565209663, + "grad_norm": 1.3079050779342651, + "learning_rate": 7.925071482490018e-06, + "loss": 23.791, + "step": 185490 + }, + { + "epoch": 0.37472173628478045, + "grad_norm": 19.147375106811523, + "learning_rate": 7.92478837493608e-06, + "loss": 28.4207, + "step": 185500 + }, + { + "epoch": 0.37474193691746427, + "grad_norm": 133.83213806152344, + "learning_rate": 7.924505253127072e-06, + "loss": 18.1067, + "step": 185510 + }, + { + "epoch": 0.3747621375501481, + "grad_norm": 170.837890625, + "learning_rate": 7.924222117064385e-06, + "loss": 10.1431, + "step": 185520 + }, + { + "epoch": 0.3747823381828319, + "grad_norm": 694.3130493164062, + "learning_rate": 7.92393896674939e-06, + "loss": 12.2083, + "step": 185530 + }, + { + "epoch": 0.37480253881551573, + "grad_norm": 365.13720703125, + "learning_rate": 7.923655802183475e-06, + "loss": 25.7444, + "step": 185540 + }, + { + "epoch": 0.3748227394481995, + "grad_norm": 478.3114013671875, + "learning_rate": 7.923372623368014e-06, + "loss": 22.0276, + "step": 185550 + }, + { + "epoch": 0.3748429400808833, + "grad_norm": 540.748046875, + "learning_rate": 7.92308943030439e-06, + "loss": 26.7105, + "step": 185560 + }, + { + "epoch": 0.37486314071356713, + "grad_norm": 431.2481384277344, + "learning_rate": 7.922806222993981e-06, + "loss": 15.4215, + "step": 185570 + }, + { + "epoch": 0.37488334134625095, + "grad_norm": 311.3813781738281, + "learning_rate": 7.92252300143817e-06, + "loss": 23.4757, + "step": 185580 + }, + { + "epoch": 0.3749035419789348, + "grad_norm": 240.32777404785156, + "learning_rate": 7.922239765638338e-06, + "loss": 21.5707, + "step": 185590 + }, + { + "epoch": 0.3749237426116186, + "grad_norm": 359.6653747558594, + "learning_rate": 7.921956515595861e-06, + "loss": 22.6223, + "step": 185600 + }, + { + "epoch": 0.3749439432443024, + "grad_norm": 899.4860229492188, + "learning_rate": 7.921673251312124e-06, + "loss": 35.97, + "step": 185610 + }, + { + "epoch": 0.37496414387698623, + "grad_norm": 29.04227066040039, + "learning_rate": 7.921389972788505e-06, + "loss": 12.8657, + "step": 185620 + }, + { + "epoch": 0.37498434450967005, + "grad_norm": 263.94097900390625, + "learning_rate": 7.921106680026388e-06, + "loss": 18.596, + "step": 185630 + }, + { + "epoch": 0.3750045451423539, + "grad_norm": 283.1744384765625, + "learning_rate": 7.920823373027149e-06, + "loss": 14.0105, + "step": 185640 + }, + { + "epoch": 0.3750247457750377, + "grad_norm": 454.0027770996094, + "learning_rate": 7.920540051792171e-06, + "loss": 22.3368, + "step": 185650 + }, + { + "epoch": 0.3750449464077215, + "grad_norm": 525.3453979492188, + "learning_rate": 7.920256716322837e-06, + "loss": 10.4326, + "step": 185660 + }, + { + "epoch": 0.3750651470404053, + "grad_norm": 365.10662841796875, + "learning_rate": 7.919973366620525e-06, + "loss": 23.8483, + "step": 185670 + }, + { + "epoch": 0.3750853476730891, + "grad_norm": 431.7030334472656, + "learning_rate": 7.919690002686615e-06, + "loss": 20.7615, + "step": 185680 + }, + { + "epoch": 0.3751055483057729, + "grad_norm": 366.341552734375, + "learning_rate": 7.919406624522492e-06, + "loss": 32.8188, + "step": 185690 + }, + { + "epoch": 0.37512574893845674, + "grad_norm": 436.1279602050781, + "learning_rate": 7.919123232129535e-06, + "loss": 16.5177, + "step": 185700 + }, + { + "epoch": 0.37514594957114056, + "grad_norm": 488.3873291015625, + "learning_rate": 7.918839825509126e-06, + "loss": 19.1131, + "step": 185710 + }, + { + "epoch": 0.3751661502038244, + "grad_norm": 3.707166910171509, + "learning_rate": 7.918556404662645e-06, + "loss": 11.3262, + "step": 185720 + }, + { + "epoch": 0.3751863508365082, + "grad_norm": 423.8901062011719, + "learning_rate": 7.918272969591474e-06, + "loss": 32.1798, + "step": 185730 + }, + { + "epoch": 0.375206551469192, + "grad_norm": 550.1453857421875, + "learning_rate": 7.917989520296996e-06, + "loss": 18.4277, + "step": 185740 + }, + { + "epoch": 0.37522675210187584, + "grad_norm": 283.56903076171875, + "learning_rate": 7.917706056780588e-06, + "loss": 27.2015, + "step": 185750 + }, + { + "epoch": 0.37524695273455966, + "grad_norm": 608.2725219726562, + "learning_rate": 7.917422579043637e-06, + "loss": 11.4259, + "step": 185760 + }, + { + "epoch": 0.3752671533672435, + "grad_norm": 490.80792236328125, + "learning_rate": 7.91713908708752e-06, + "loss": 21.0511, + "step": 185770 + }, + { + "epoch": 0.3752873539999273, + "grad_norm": 448.4722900390625, + "learning_rate": 7.916855580913622e-06, + "loss": 24.288, + "step": 185780 + }, + { + "epoch": 0.3753075546326111, + "grad_norm": 232.2841796875, + "learning_rate": 7.916572060523326e-06, + "loss": 26.2219, + "step": 185790 + }, + { + "epoch": 0.3753277552652949, + "grad_norm": 77.63753509521484, + "learning_rate": 7.916288525918008e-06, + "loss": 20.1102, + "step": 185800 + }, + { + "epoch": 0.3753479558979787, + "grad_norm": 405.3289489746094, + "learning_rate": 7.916004977099054e-06, + "loss": 31.0604, + "step": 185810 + }, + { + "epoch": 0.3753681565306625, + "grad_norm": 761.98583984375, + "learning_rate": 7.915721414067847e-06, + "loss": 21.6656, + "step": 185820 + }, + { + "epoch": 0.37538835716334634, + "grad_norm": 342.01947021484375, + "learning_rate": 7.915437836825767e-06, + "loss": 27.4627, + "step": 185830 + }, + { + "epoch": 0.37540855779603016, + "grad_norm": 149.497314453125, + "learning_rate": 7.915154245374197e-06, + "loss": 12.2475, + "step": 185840 + }, + { + "epoch": 0.375428758428714, + "grad_norm": 264.66595458984375, + "learning_rate": 7.914870639714517e-06, + "loss": 17.7935, + "step": 185850 + }, + { + "epoch": 0.3754489590613978, + "grad_norm": 260.6634826660156, + "learning_rate": 7.914587019848113e-06, + "loss": 24.8694, + "step": 185860 + }, + { + "epoch": 0.3754691596940816, + "grad_norm": 575.6069946289062, + "learning_rate": 7.914303385776365e-06, + "loss": 14.7805, + "step": 185870 + }, + { + "epoch": 0.37548936032676544, + "grad_norm": 121.77664184570312, + "learning_rate": 7.914019737500655e-06, + "loss": 21.3474, + "step": 185880 + }, + { + "epoch": 0.37550956095944926, + "grad_norm": 368.7645263671875, + "learning_rate": 7.913736075022366e-06, + "loss": 18.4518, + "step": 185890 + }, + { + "epoch": 0.3755297615921331, + "grad_norm": 530.3685913085938, + "learning_rate": 7.913452398342882e-06, + "loss": 22.438, + "step": 185900 + }, + { + "epoch": 0.3755499622248169, + "grad_norm": 460.9221496582031, + "learning_rate": 7.913168707463583e-06, + "loss": 15.6313, + "step": 185910 + }, + { + "epoch": 0.3755701628575007, + "grad_norm": 575.2742309570312, + "learning_rate": 7.912885002385852e-06, + "loss": 27.6675, + "step": 185920 + }, + { + "epoch": 0.3755903634901845, + "grad_norm": 1062.7508544921875, + "learning_rate": 7.912601283111076e-06, + "loss": 25.1376, + "step": 185930 + }, + { + "epoch": 0.3756105641228683, + "grad_norm": 249.41693115234375, + "learning_rate": 7.912317549640632e-06, + "loss": 13.3535, + "step": 185940 + }, + { + "epoch": 0.3756307647555521, + "grad_norm": 436.9518737792969, + "learning_rate": 7.912033801975907e-06, + "loss": 31.6522, + "step": 185950 + }, + { + "epoch": 0.37565096538823595, + "grad_norm": 534.3568115234375, + "learning_rate": 7.911750040118282e-06, + "loss": 25.9282, + "step": 185960 + }, + { + "epoch": 0.37567116602091977, + "grad_norm": 501.3494873046875, + "learning_rate": 7.91146626406914e-06, + "loss": 14.9841, + "step": 185970 + }, + { + "epoch": 0.3756913666536036, + "grad_norm": 855.99853515625, + "learning_rate": 7.911182473829865e-06, + "loss": 15.4411, + "step": 185980 + }, + { + "epoch": 0.3757115672862874, + "grad_norm": 342.9635925292969, + "learning_rate": 7.91089866940184e-06, + "loss": 26.5816, + "step": 185990 + }, + { + "epoch": 0.3757317679189712, + "grad_norm": 877.3455200195312, + "learning_rate": 7.910614850786448e-06, + "loss": 28.9508, + "step": 186000 + }, + { + "epoch": 0.37575196855165505, + "grad_norm": 675.5576171875, + "learning_rate": 7.910331017985072e-06, + "loss": 15.8867, + "step": 186010 + }, + { + "epoch": 0.37577216918433887, + "grad_norm": 598.6139526367188, + "learning_rate": 7.910047170999095e-06, + "loss": 30.0019, + "step": 186020 + }, + { + "epoch": 0.3757923698170227, + "grad_norm": 59.5485725402832, + "learning_rate": 7.9097633098299e-06, + "loss": 46.0081, + "step": 186030 + }, + { + "epoch": 0.3758125704497065, + "grad_norm": 273.9651794433594, + "learning_rate": 7.909479434478874e-06, + "loss": 16.2739, + "step": 186040 + }, + { + "epoch": 0.3758327710823903, + "grad_norm": 455.9841003417969, + "learning_rate": 7.909195544947398e-06, + "loss": 28.814, + "step": 186050 + }, + { + "epoch": 0.3758529717150741, + "grad_norm": 252.17733764648438, + "learning_rate": 7.908911641236855e-06, + "loss": 21.4245, + "step": 186060 + }, + { + "epoch": 0.3758731723477579, + "grad_norm": 173.3985137939453, + "learning_rate": 7.908627723348628e-06, + "loss": 23.1291, + "step": 186070 + }, + { + "epoch": 0.37589337298044173, + "grad_norm": 385.39447021484375, + "learning_rate": 7.908343791284104e-06, + "loss": 29.9944, + "step": 186080 + }, + { + "epoch": 0.37591357361312555, + "grad_norm": 375.59173583984375, + "learning_rate": 7.908059845044665e-06, + "loss": 13.6519, + "step": 186090 + }, + { + "epoch": 0.37593377424580937, + "grad_norm": 460.693603515625, + "learning_rate": 7.907775884631694e-06, + "loss": 17.0569, + "step": 186100 + }, + { + "epoch": 0.3759539748784932, + "grad_norm": 265.4934387207031, + "learning_rate": 7.907491910046578e-06, + "loss": 37.8087, + "step": 186110 + }, + { + "epoch": 0.375974175511177, + "grad_norm": 124.66250610351562, + "learning_rate": 7.907207921290698e-06, + "loss": 24.2959, + "step": 186120 + }, + { + "epoch": 0.37599437614386083, + "grad_norm": 80.29561614990234, + "learning_rate": 7.906923918365439e-06, + "loss": 26.6489, + "step": 186130 + }, + { + "epoch": 0.37601457677654465, + "grad_norm": 146.98678588867188, + "learning_rate": 7.906639901272183e-06, + "loss": 40.2186, + "step": 186140 + }, + { + "epoch": 0.37603477740922847, + "grad_norm": 333.65533447265625, + "learning_rate": 7.90635587001232e-06, + "loss": 25.5619, + "step": 186150 + }, + { + "epoch": 0.3760549780419123, + "grad_norm": 158.60435485839844, + "learning_rate": 7.906071824587231e-06, + "loss": 20.7498, + "step": 186160 + }, + { + "epoch": 0.3760751786745961, + "grad_norm": 178.82809448242188, + "learning_rate": 7.9057877649983e-06, + "loss": 25.6808, + "step": 186170 + }, + { + "epoch": 0.37609537930727993, + "grad_norm": 361.49884033203125, + "learning_rate": 7.905503691246909e-06, + "loss": 17.2704, + "step": 186180 + }, + { + "epoch": 0.3761155799399637, + "grad_norm": 311.1093444824219, + "learning_rate": 7.905219603334449e-06, + "loss": 14.6666, + "step": 186190 + }, + { + "epoch": 0.3761357805726475, + "grad_norm": 72.5036849975586, + "learning_rate": 7.904935501262301e-06, + "loss": 17.7347, + "step": 186200 + }, + { + "epoch": 0.37615598120533134, + "grad_norm": 183.01370239257812, + "learning_rate": 7.904651385031847e-06, + "loss": 36.2247, + "step": 186210 + }, + { + "epoch": 0.37617618183801516, + "grad_norm": 903.3137817382812, + "learning_rate": 7.904367254644475e-06, + "loss": 20.9463, + "step": 186220 + }, + { + "epoch": 0.376196382470699, + "grad_norm": 272.78167724609375, + "learning_rate": 7.90408311010157e-06, + "loss": 21.3787, + "step": 186230 + }, + { + "epoch": 0.3762165831033828, + "grad_norm": 486.9576721191406, + "learning_rate": 7.903798951404518e-06, + "loss": 17.8778, + "step": 186240 + }, + { + "epoch": 0.3762367837360666, + "grad_norm": 321.8508605957031, + "learning_rate": 7.903514778554699e-06, + "loss": 26.1256, + "step": 186250 + }, + { + "epoch": 0.37625698436875044, + "grad_norm": 455.9219970703125, + "learning_rate": 7.903230591553504e-06, + "loss": 53.3584, + "step": 186260 + }, + { + "epoch": 0.37627718500143426, + "grad_norm": 829.6468505859375, + "learning_rate": 7.902946390402313e-06, + "loss": 32.9458, + "step": 186270 + }, + { + "epoch": 0.3762973856341181, + "grad_norm": 248.43630981445312, + "learning_rate": 7.902662175102514e-06, + "loss": 13.599, + "step": 186280 + }, + { + "epoch": 0.3763175862668019, + "grad_norm": 236.6037139892578, + "learning_rate": 7.90237794565549e-06, + "loss": 32.8911, + "step": 186290 + }, + { + "epoch": 0.3763377868994857, + "grad_norm": 403.4139404296875, + "learning_rate": 7.90209370206263e-06, + "loss": 16.999, + "step": 186300 + }, + { + "epoch": 0.3763579875321695, + "grad_norm": 303.2038269042969, + "learning_rate": 7.901809444325318e-06, + "loss": 13.2335, + "step": 186310 + }, + { + "epoch": 0.3763781881648533, + "grad_norm": 201.00413513183594, + "learning_rate": 7.901525172444938e-06, + "loss": 6.8179, + "step": 186320 + }, + { + "epoch": 0.3763983887975371, + "grad_norm": 1031.934814453125, + "learning_rate": 7.901240886422875e-06, + "loss": 37.6776, + "step": 186330 + }, + { + "epoch": 0.37641858943022094, + "grad_norm": 217.69149780273438, + "learning_rate": 7.900956586260516e-06, + "loss": 14.5887, + "step": 186340 + }, + { + "epoch": 0.37643879006290476, + "grad_norm": 391.96905517578125, + "learning_rate": 7.900672271959247e-06, + "loss": 14.6044, + "step": 186350 + }, + { + "epoch": 0.3764589906955886, + "grad_norm": 532.9293823242188, + "learning_rate": 7.900387943520453e-06, + "loss": 21.1594, + "step": 186360 + }, + { + "epoch": 0.3764791913282724, + "grad_norm": 449.4956970214844, + "learning_rate": 7.900103600945521e-06, + "loss": 26.3718, + "step": 186370 + }, + { + "epoch": 0.3764993919609562, + "grad_norm": 946.3658447265625, + "learning_rate": 7.899819244235835e-06, + "loss": 19.1424, + "step": 186380 + }, + { + "epoch": 0.37651959259364004, + "grad_norm": 440.26019287109375, + "learning_rate": 7.899534873392781e-06, + "loss": 12.1415, + "step": 186390 + }, + { + "epoch": 0.37653979322632386, + "grad_norm": 562.1298217773438, + "learning_rate": 7.899250488417746e-06, + "loss": 17.3849, + "step": 186400 + }, + { + "epoch": 0.3765599938590077, + "grad_norm": 213.060791015625, + "learning_rate": 7.898966089312117e-06, + "loss": 23.1838, + "step": 186410 + }, + { + "epoch": 0.3765801944916915, + "grad_norm": 203.11265563964844, + "learning_rate": 7.898681676077278e-06, + "loss": 24.2536, + "step": 186420 + }, + { + "epoch": 0.3766003951243753, + "grad_norm": 50.743221282958984, + "learning_rate": 7.898397248714615e-06, + "loss": 28.5922, + "step": 186430 + }, + { + "epoch": 0.3766205957570591, + "grad_norm": 318.5630187988281, + "learning_rate": 7.898112807225517e-06, + "loss": 9.3861, + "step": 186440 + }, + { + "epoch": 0.3766407963897429, + "grad_norm": 493.37646484375, + "learning_rate": 7.897828351611368e-06, + "loss": 18.3657, + "step": 186450 + }, + { + "epoch": 0.3766609970224267, + "grad_norm": 669.4931640625, + "learning_rate": 7.897543881873555e-06, + "loss": 29.6392, + "step": 186460 + }, + { + "epoch": 0.37668119765511054, + "grad_norm": 449.8827819824219, + "learning_rate": 7.897259398013465e-06, + "loss": 19.5032, + "step": 186470 + }, + { + "epoch": 0.37670139828779436, + "grad_norm": 400.0908203125, + "learning_rate": 7.896974900032483e-06, + "loss": 54.8798, + "step": 186480 + }, + { + "epoch": 0.3767215989204782, + "grad_norm": 40.167388916015625, + "learning_rate": 7.896690387931997e-06, + "loss": 33.8369, + "step": 186490 + }, + { + "epoch": 0.376741799553162, + "grad_norm": 226.82191467285156, + "learning_rate": 7.896405861713393e-06, + "loss": 24.2288, + "step": 186500 + }, + { + "epoch": 0.3767620001858458, + "grad_norm": 322.2902526855469, + "learning_rate": 7.89612132137806e-06, + "loss": 20.8587, + "step": 186510 + }, + { + "epoch": 0.37678220081852964, + "grad_norm": 66.47044372558594, + "learning_rate": 7.895836766927383e-06, + "loss": 17.9935, + "step": 186520 + }, + { + "epoch": 0.37680240145121346, + "grad_norm": 228.1887969970703, + "learning_rate": 7.895552198362748e-06, + "loss": 6.859, + "step": 186530 + }, + { + "epoch": 0.3768226020838973, + "grad_norm": 336.7323303222656, + "learning_rate": 7.895267615685542e-06, + "loss": 16.1734, + "step": 186540 + }, + { + "epoch": 0.3768428027165811, + "grad_norm": 603.1956787109375, + "learning_rate": 7.894983018897153e-06, + "loss": 24.2695, + "step": 186550 + }, + { + "epoch": 0.3768630033492649, + "grad_norm": 325.160888671875, + "learning_rate": 7.89469840799897e-06, + "loss": 19.9261, + "step": 186560 + }, + { + "epoch": 0.3768832039819487, + "grad_norm": 351.0687255859375, + "learning_rate": 7.894413782992375e-06, + "loss": 20.6885, + "step": 186570 + }, + { + "epoch": 0.3769034046146325, + "grad_norm": 216.7996063232422, + "learning_rate": 7.894129143878758e-06, + "loss": 16.5278, + "step": 186580 + }, + { + "epoch": 0.37692360524731633, + "grad_norm": 539.2125854492188, + "learning_rate": 7.89384449065951e-06, + "loss": 16.7036, + "step": 186590 + }, + { + "epoch": 0.37694380588000015, + "grad_norm": 596.241943359375, + "learning_rate": 7.893559823336013e-06, + "loss": 25.1195, + "step": 186600 + }, + { + "epoch": 0.37696400651268397, + "grad_norm": 527.4559326171875, + "learning_rate": 7.893275141909655e-06, + "loss": 16.728, + "step": 186610 + }, + { + "epoch": 0.3769842071453678, + "grad_norm": 25.770774841308594, + "learning_rate": 7.892990446381828e-06, + "loss": 21.4469, + "step": 186620 + }, + { + "epoch": 0.3770044077780516, + "grad_norm": 642.983642578125, + "learning_rate": 7.892705736753913e-06, + "loss": 14.4212, + "step": 186630 + }, + { + "epoch": 0.37702460841073543, + "grad_norm": 278.6478271484375, + "learning_rate": 7.892421013027302e-06, + "loss": 22.8077, + "step": 186640 + }, + { + "epoch": 0.37704480904341925, + "grad_norm": 351.9095764160156, + "learning_rate": 7.892136275203383e-06, + "loss": 15.5565, + "step": 186650 + }, + { + "epoch": 0.37706500967610307, + "grad_norm": 694.929931640625, + "learning_rate": 7.891851523283542e-06, + "loss": 23.1556, + "step": 186660 + }, + { + "epoch": 0.3770852103087869, + "grad_norm": 393.3650817871094, + "learning_rate": 7.891566757269169e-06, + "loss": 33.6424, + "step": 186670 + }, + { + "epoch": 0.3771054109414707, + "grad_norm": 568.8635864257812, + "learning_rate": 7.891281977161648e-06, + "loss": 20.8657, + "step": 186680 + }, + { + "epoch": 0.37712561157415453, + "grad_norm": 675.2408447265625, + "learning_rate": 7.89099718296237e-06, + "loss": 23.8988, + "step": 186690 + }, + { + "epoch": 0.3771458122068383, + "grad_norm": 681.5302734375, + "learning_rate": 7.890712374672724e-06, + "loss": 14.7216, + "step": 186700 + }, + { + "epoch": 0.3771660128395221, + "grad_norm": 177.2668914794922, + "learning_rate": 7.890427552294093e-06, + "loss": 16.5428, + "step": 186710 + }, + { + "epoch": 0.37718621347220593, + "grad_norm": 616.7086181640625, + "learning_rate": 7.890142715827871e-06, + "loss": 19.9336, + "step": 186720 + }, + { + "epoch": 0.37720641410488975, + "grad_norm": 148.07113647460938, + "learning_rate": 7.889857865275445e-06, + "loss": 19.689, + "step": 186730 + }, + { + "epoch": 0.3772266147375736, + "grad_norm": 561.8738403320312, + "learning_rate": 7.8895730006382e-06, + "loss": 21.0662, + "step": 186740 + }, + { + "epoch": 0.3772468153702574, + "grad_norm": 583.3108520507812, + "learning_rate": 7.889288121917528e-06, + "loss": 30.6697, + "step": 186750 + }, + { + "epoch": 0.3772670160029412, + "grad_norm": 135.86241149902344, + "learning_rate": 7.889003229114816e-06, + "loss": 23.6817, + "step": 186760 + }, + { + "epoch": 0.37728721663562503, + "grad_norm": 152.9756317138672, + "learning_rate": 7.888718322231452e-06, + "loss": 11.7561, + "step": 186770 + }, + { + "epoch": 0.37730741726830885, + "grad_norm": 271.7252197265625, + "learning_rate": 7.888433401268825e-06, + "loss": 20.0215, + "step": 186780 + }, + { + "epoch": 0.3773276179009927, + "grad_norm": 18.141109466552734, + "learning_rate": 7.888148466228325e-06, + "loss": 20.4894, + "step": 186790 + }, + { + "epoch": 0.3773478185336765, + "grad_norm": 678.092529296875, + "learning_rate": 7.887863517111337e-06, + "loss": 27.0805, + "step": 186800 + }, + { + "epoch": 0.3773680191663603, + "grad_norm": 328.9731750488281, + "learning_rate": 7.887578553919256e-06, + "loss": 16.3972, + "step": 186810 + }, + { + "epoch": 0.37738821979904413, + "grad_norm": 484.07122802734375, + "learning_rate": 7.887293576653467e-06, + "loss": 19.7992, + "step": 186820 + }, + { + "epoch": 0.3774084204317279, + "grad_norm": 101.78085327148438, + "learning_rate": 7.887008585315358e-06, + "loss": 11.8989, + "step": 186830 + }, + { + "epoch": 0.3774286210644117, + "grad_norm": 602.8466796875, + "learning_rate": 7.88672357990632e-06, + "loss": 33.1497, + "step": 186840 + }, + { + "epoch": 0.37744882169709554, + "grad_norm": 647.81005859375, + "learning_rate": 7.88643856042774e-06, + "loss": 19.9207, + "step": 186850 + }, + { + "epoch": 0.37746902232977936, + "grad_norm": 705.634033203125, + "learning_rate": 7.886153526881011e-06, + "loss": 29.5627, + "step": 186860 + }, + { + "epoch": 0.3774892229624632, + "grad_norm": 383.0318298339844, + "learning_rate": 7.885868479267517e-06, + "loss": 9.3724, + "step": 186870 + }, + { + "epoch": 0.377509423595147, + "grad_norm": 65.01758575439453, + "learning_rate": 7.885583417588652e-06, + "loss": 20.5341, + "step": 186880 + }, + { + "epoch": 0.3775296242278308, + "grad_norm": 398.67120361328125, + "learning_rate": 7.885298341845803e-06, + "loss": 19.8146, + "step": 186890 + }, + { + "epoch": 0.37754982486051464, + "grad_norm": 483.8502197265625, + "learning_rate": 7.88501325204036e-06, + "loss": 21.7544, + "step": 186900 + }, + { + "epoch": 0.37757002549319846, + "grad_norm": 431.016357421875, + "learning_rate": 7.88472814817371e-06, + "loss": 18.4124, + "step": 186910 + }, + { + "epoch": 0.3775902261258823, + "grad_norm": 407.1339111328125, + "learning_rate": 7.884443030247248e-06, + "loss": 8.0406, + "step": 186920 + }, + { + "epoch": 0.3776104267585661, + "grad_norm": 169.00169372558594, + "learning_rate": 7.88415789826236e-06, + "loss": 10.5729, + "step": 186930 + }, + { + "epoch": 0.3776306273912499, + "grad_norm": 445.7185363769531, + "learning_rate": 7.883872752220434e-06, + "loss": 25.9141, + "step": 186940 + }, + { + "epoch": 0.3776508280239337, + "grad_norm": 80.28812408447266, + "learning_rate": 7.883587592122864e-06, + "loss": 20.7481, + "step": 186950 + }, + { + "epoch": 0.3776710286566175, + "grad_norm": 347.7830505371094, + "learning_rate": 7.883302417971037e-06, + "loss": 15.599, + "step": 186960 + }, + { + "epoch": 0.3776912292893013, + "grad_norm": 65.90535736083984, + "learning_rate": 7.883017229766344e-06, + "loss": 14.5699, + "step": 186970 + }, + { + "epoch": 0.37771142992198514, + "grad_norm": 297.8687744140625, + "learning_rate": 7.882732027510174e-06, + "loss": 18.6648, + "step": 186980 + }, + { + "epoch": 0.37773163055466896, + "grad_norm": 289.4709167480469, + "learning_rate": 7.88244681120392e-06, + "loss": 14.5055, + "step": 186990 + }, + { + "epoch": 0.3777518311873528, + "grad_norm": 745.4989013671875, + "learning_rate": 7.882161580848966e-06, + "loss": 23.7828, + "step": 187000 + }, + { + "epoch": 0.3777720318200366, + "grad_norm": 388.3495178222656, + "learning_rate": 7.88187633644671e-06, + "loss": 23.081, + "step": 187010 + }, + { + "epoch": 0.3777922324527204, + "grad_norm": 296.5321960449219, + "learning_rate": 7.881591077998536e-06, + "loss": 29.1688, + "step": 187020 + }, + { + "epoch": 0.37781243308540424, + "grad_norm": 619.4912109375, + "learning_rate": 7.881305805505836e-06, + "loss": 26.354, + "step": 187030 + }, + { + "epoch": 0.37783263371808806, + "grad_norm": 281.350830078125, + "learning_rate": 7.881020518970003e-06, + "loss": 10.417, + "step": 187040 + }, + { + "epoch": 0.3778528343507719, + "grad_norm": 431.0904846191406, + "learning_rate": 7.880735218392424e-06, + "loss": 25.9254, + "step": 187050 + }, + { + "epoch": 0.3778730349834557, + "grad_norm": 740.3833618164062, + "learning_rate": 7.880449903774492e-06, + "loss": 52.8058, + "step": 187060 + }, + { + "epoch": 0.3778932356161395, + "grad_norm": 615.2548217773438, + "learning_rate": 7.880164575117596e-06, + "loss": 15.4073, + "step": 187070 + }, + { + "epoch": 0.3779134362488233, + "grad_norm": 236.98509216308594, + "learning_rate": 7.879879232423127e-06, + "loss": 15.5054, + "step": 187080 + }, + { + "epoch": 0.3779336368815071, + "grad_norm": 472.54852294921875, + "learning_rate": 7.879593875692476e-06, + "loss": 36.7834, + "step": 187090 + }, + { + "epoch": 0.3779538375141909, + "grad_norm": 390.18182373046875, + "learning_rate": 7.879308504927034e-06, + "loss": 17.5821, + "step": 187100 + }, + { + "epoch": 0.37797403814687475, + "grad_norm": 549.0300903320312, + "learning_rate": 7.879023120128191e-06, + "loss": 17.6275, + "step": 187110 + }, + { + "epoch": 0.37799423877955857, + "grad_norm": 570.148681640625, + "learning_rate": 7.87873772129734e-06, + "loss": 19.3192, + "step": 187120 + }, + { + "epoch": 0.3780144394122424, + "grad_norm": 462.4393005371094, + "learning_rate": 7.878452308435868e-06, + "loss": 20.0012, + "step": 187130 + }, + { + "epoch": 0.3780346400449262, + "grad_norm": 876.2901611328125, + "learning_rate": 7.878166881545171e-06, + "loss": 19.8765, + "step": 187140 + }, + { + "epoch": 0.37805484067761, + "grad_norm": 21940.1875, + "learning_rate": 7.877881440626635e-06, + "loss": 35.6072, + "step": 187150 + }, + { + "epoch": 0.37807504131029385, + "grad_norm": 544.189697265625, + "learning_rate": 7.877595985681656e-06, + "loss": 25.7395, + "step": 187160 + }, + { + "epoch": 0.37809524194297767, + "grad_norm": 674.6602783203125, + "learning_rate": 7.877310516711623e-06, + "loss": 27.2366, + "step": 187170 + }, + { + "epoch": 0.3781154425756615, + "grad_norm": 373.325439453125, + "learning_rate": 7.877025033717926e-06, + "loss": 25.4264, + "step": 187180 + }, + { + "epoch": 0.3781356432083453, + "grad_norm": 429.7815246582031, + "learning_rate": 7.876739536701961e-06, + "loss": 10.7018, + "step": 187190 + }, + { + "epoch": 0.3781558438410291, + "grad_norm": 79.63567352294922, + "learning_rate": 7.876454025665114e-06, + "loss": 18.9043, + "step": 187200 + }, + { + "epoch": 0.3781760444737129, + "grad_norm": 11.09257698059082, + "learning_rate": 7.87616850060878e-06, + "loss": 19.4218, + "step": 187210 + }, + { + "epoch": 0.3781962451063967, + "grad_norm": 999.5443725585938, + "learning_rate": 7.875882961534347e-06, + "loss": 42.3215, + "step": 187220 + }, + { + "epoch": 0.37821644573908053, + "grad_norm": 222.81358337402344, + "learning_rate": 7.875597408443212e-06, + "loss": 8.4141, + "step": 187230 + }, + { + "epoch": 0.37823664637176435, + "grad_norm": 252.15322875976562, + "learning_rate": 7.875311841336763e-06, + "loss": 20.9764, + "step": 187240 + }, + { + "epoch": 0.37825684700444817, + "grad_norm": 270.7761535644531, + "learning_rate": 7.875026260216395e-06, + "loss": 14.9763, + "step": 187250 + }, + { + "epoch": 0.378277047637132, + "grad_norm": 38126.50390625, + "learning_rate": 7.874740665083494e-06, + "loss": 58.4065, + "step": 187260 + }, + { + "epoch": 0.3782972482698158, + "grad_norm": 737.64892578125, + "learning_rate": 7.874455055939458e-06, + "loss": 35.3872, + "step": 187270 + }, + { + "epoch": 0.37831744890249963, + "grad_norm": 72.3812026977539, + "learning_rate": 7.874169432785677e-06, + "loss": 24.2148, + "step": 187280 + }, + { + "epoch": 0.37833764953518345, + "grad_norm": 80.65542602539062, + "learning_rate": 7.87388379562354e-06, + "loss": 26.749, + "step": 187290 + }, + { + "epoch": 0.37835785016786727, + "grad_norm": 792.4655151367188, + "learning_rate": 7.873598144454444e-06, + "loss": 20.2782, + "step": 187300 + }, + { + "epoch": 0.3783780508005511, + "grad_norm": 325.38720703125, + "learning_rate": 7.87331247927978e-06, + "loss": 13.6539, + "step": 187310 + }, + { + "epoch": 0.3783982514332349, + "grad_norm": 160.98269653320312, + "learning_rate": 7.873026800100937e-06, + "loss": 16.7994, + "step": 187320 + }, + { + "epoch": 0.37841845206591873, + "grad_norm": 297.0093994140625, + "learning_rate": 7.872741106919313e-06, + "loss": 24.4319, + "step": 187330 + }, + { + "epoch": 0.3784386526986025, + "grad_norm": 330.3622741699219, + "learning_rate": 7.872455399736295e-06, + "loss": 21.5576, + "step": 187340 + }, + { + "epoch": 0.3784588533312863, + "grad_norm": 475.5498046875, + "learning_rate": 7.872169678553279e-06, + "loss": 23.0462, + "step": 187350 + }, + { + "epoch": 0.37847905396397014, + "grad_norm": 209.27825927734375, + "learning_rate": 7.871883943371656e-06, + "loss": 15.8601, + "step": 187360 + }, + { + "epoch": 0.37849925459665396, + "grad_norm": 427.2062072753906, + "learning_rate": 7.871598194192817e-06, + "loss": 14.7591, + "step": 187370 + }, + { + "epoch": 0.3785194552293378, + "grad_norm": 235.0536651611328, + "learning_rate": 7.871312431018158e-06, + "loss": 9.4029, + "step": 187380 + }, + { + "epoch": 0.3785396558620216, + "grad_norm": 306.8526916503906, + "learning_rate": 7.871026653849071e-06, + "loss": 25.201, + "step": 187390 + }, + { + "epoch": 0.3785598564947054, + "grad_norm": 409.6279602050781, + "learning_rate": 7.87074086268695e-06, + "loss": 28.2371, + "step": 187400 + }, + { + "epoch": 0.37858005712738924, + "grad_norm": 281.5046081542969, + "learning_rate": 7.870455057533184e-06, + "loss": 16.8539, + "step": 187410 + }, + { + "epoch": 0.37860025776007306, + "grad_norm": 1179.369140625, + "learning_rate": 7.870169238389168e-06, + "loss": 38.7465, + "step": 187420 + }, + { + "epoch": 0.3786204583927569, + "grad_norm": 392.25579833984375, + "learning_rate": 7.869883405256296e-06, + "loss": 24.1973, + "step": 187430 + }, + { + "epoch": 0.3786406590254407, + "grad_norm": 377.63763427734375, + "learning_rate": 7.869597558135959e-06, + "loss": 20.7806, + "step": 187440 + }, + { + "epoch": 0.3786608596581245, + "grad_norm": 739.8941040039062, + "learning_rate": 7.869311697029553e-06, + "loss": 23.3404, + "step": 187450 + }, + { + "epoch": 0.37868106029080834, + "grad_norm": 282.66680908203125, + "learning_rate": 7.86902582193847e-06, + "loss": 25.5504, + "step": 187460 + }, + { + "epoch": 0.3787012609234921, + "grad_norm": 373.229736328125, + "learning_rate": 7.868739932864102e-06, + "loss": 31.0513, + "step": 187470 + }, + { + "epoch": 0.3787214615561759, + "grad_norm": 299.4689636230469, + "learning_rate": 7.868454029807843e-06, + "loss": 16.7641, + "step": 187480 + }, + { + "epoch": 0.37874166218885974, + "grad_norm": 183.34661865234375, + "learning_rate": 7.86816811277109e-06, + "loss": 12.0302, + "step": 187490 + }, + { + "epoch": 0.37876186282154356, + "grad_norm": 377.3712158203125, + "learning_rate": 7.86788218175523e-06, + "loss": 26.0073, + "step": 187500 + }, + { + "epoch": 0.3787820634542274, + "grad_norm": 522.1876831054688, + "learning_rate": 7.867596236761663e-06, + "loss": 28.2889, + "step": 187510 + }, + { + "epoch": 0.3788022640869112, + "grad_norm": 253.38307189941406, + "learning_rate": 7.867310277791778e-06, + "loss": 13.8486, + "step": 187520 + }, + { + "epoch": 0.378822464719595, + "grad_norm": 843.4441528320312, + "learning_rate": 7.867024304846971e-06, + "loss": 18.8832, + "step": 187530 + }, + { + "epoch": 0.37884266535227884, + "grad_norm": 611.51171875, + "learning_rate": 7.866738317928636e-06, + "loss": 21.2437, + "step": 187540 + }, + { + "epoch": 0.37886286598496266, + "grad_norm": 191.75840759277344, + "learning_rate": 7.866452317038164e-06, + "loss": 30.4904, + "step": 187550 + }, + { + "epoch": 0.3788830666176465, + "grad_norm": 491.4467468261719, + "learning_rate": 7.866166302176952e-06, + "loss": 20.1586, + "step": 187560 + }, + { + "epoch": 0.3789032672503303, + "grad_norm": 470.33642578125, + "learning_rate": 7.865880273346393e-06, + "loss": 20.3936, + "step": 187570 + }, + { + "epoch": 0.3789234678830141, + "grad_norm": 204.30572509765625, + "learning_rate": 7.865594230547882e-06, + "loss": 48.1156, + "step": 187580 + }, + { + "epoch": 0.3789436685156979, + "grad_norm": 312.6099548339844, + "learning_rate": 7.865308173782812e-06, + "loss": 33.8019, + "step": 187590 + }, + { + "epoch": 0.3789638691483817, + "grad_norm": 699.95703125, + "learning_rate": 7.865022103052578e-06, + "loss": 31.6593, + "step": 187600 + }, + { + "epoch": 0.3789840697810655, + "grad_norm": 188.8434600830078, + "learning_rate": 7.864736018358571e-06, + "loss": 22.8804, + "step": 187610 + }, + { + "epoch": 0.37900427041374934, + "grad_norm": 341.70623779296875, + "learning_rate": 7.864449919702192e-06, + "loss": 27.6174, + "step": 187620 + }, + { + "epoch": 0.37902447104643316, + "grad_norm": 656.0889282226562, + "learning_rate": 7.864163807084831e-06, + "loss": 22.6183, + "step": 187630 + }, + { + "epoch": 0.379044671679117, + "grad_norm": 748.5330200195312, + "learning_rate": 7.863877680507879e-06, + "loss": 36.9047, + "step": 187640 + }, + { + "epoch": 0.3790648723118008, + "grad_norm": 476.5628662109375, + "learning_rate": 7.863591539972739e-06, + "loss": 20.3583, + "step": 187650 + }, + { + "epoch": 0.3790850729444846, + "grad_norm": 31.000638961791992, + "learning_rate": 7.863305385480798e-06, + "loss": 31.3927, + "step": 187660 + }, + { + "epoch": 0.37910527357716844, + "grad_norm": 685.523193359375, + "learning_rate": 7.863019217033456e-06, + "loss": 22.1542, + "step": 187670 + }, + { + "epoch": 0.37912547420985226, + "grad_norm": 569.0435791015625, + "learning_rate": 7.862733034632105e-06, + "loss": 23.0225, + "step": 187680 + }, + { + "epoch": 0.3791456748425361, + "grad_norm": 2024.16259765625, + "learning_rate": 7.862446838278139e-06, + "loss": 64.9608, + "step": 187690 + }, + { + "epoch": 0.3791658754752199, + "grad_norm": 159.88900756835938, + "learning_rate": 7.862160627972956e-06, + "loss": 23.9056, + "step": 187700 + }, + { + "epoch": 0.3791860761079037, + "grad_norm": 371.289306640625, + "learning_rate": 7.861874403717948e-06, + "loss": 12.7317, + "step": 187710 + }, + { + "epoch": 0.3792062767405875, + "grad_norm": 213.77850341796875, + "learning_rate": 7.86158816551451e-06, + "loss": 23.5957, + "step": 187720 + }, + { + "epoch": 0.3792264773732713, + "grad_norm": 389.7715759277344, + "learning_rate": 7.861301913364043e-06, + "loss": 31.0234, + "step": 187730 + }, + { + "epoch": 0.37924667800595513, + "grad_norm": 251.98199462890625, + "learning_rate": 7.861015647267934e-06, + "loss": 11.2755, + "step": 187740 + }, + { + "epoch": 0.37926687863863895, + "grad_norm": 139.1150360107422, + "learning_rate": 7.860729367227582e-06, + "loss": 23.1484, + "step": 187750 + }, + { + "epoch": 0.37928707927132277, + "grad_norm": 426.55010986328125, + "learning_rate": 7.860443073244383e-06, + "loss": 20.0323, + "step": 187760 + }, + { + "epoch": 0.3793072799040066, + "grad_norm": 647.5835571289062, + "learning_rate": 7.86015676531973e-06, + "loss": 20.186, + "step": 187770 + }, + { + "epoch": 0.3793274805366904, + "grad_norm": 164.36106872558594, + "learning_rate": 7.859870443455021e-06, + "loss": 27.9901, + "step": 187780 + }, + { + "epoch": 0.37934768116937423, + "grad_norm": 460.61383056640625, + "learning_rate": 7.85958410765165e-06, + "loss": 16.9639, + "step": 187790 + }, + { + "epoch": 0.37936788180205805, + "grad_norm": 141.99009704589844, + "learning_rate": 7.859297757911013e-06, + "loss": 13.2798, + "step": 187800 + }, + { + "epoch": 0.37938808243474187, + "grad_norm": 229.5868377685547, + "learning_rate": 7.859011394234506e-06, + "loss": 42.6979, + "step": 187810 + }, + { + "epoch": 0.3794082830674257, + "grad_norm": 331.86865234375, + "learning_rate": 7.858725016623523e-06, + "loss": 9.1058, + "step": 187820 + }, + { + "epoch": 0.3794284837001095, + "grad_norm": 1173.534423828125, + "learning_rate": 7.85843862507946e-06, + "loss": 15.9746, + "step": 187830 + }, + { + "epoch": 0.37944868433279333, + "grad_norm": 498.7364196777344, + "learning_rate": 7.858152219603718e-06, + "loss": 21.3509, + "step": 187840 + }, + { + "epoch": 0.3794688849654771, + "grad_norm": 258.2376403808594, + "learning_rate": 7.857865800197684e-06, + "loss": 20.0005, + "step": 187850 + }, + { + "epoch": 0.3794890855981609, + "grad_norm": 440.5531005859375, + "learning_rate": 7.857579366862761e-06, + "loss": 17.516, + "step": 187860 + }, + { + "epoch": 0.37950928623084473, + "grad_norm": 453.6659240722656, + "learning_rate": 7.857292919600343e-06, + "loss": 18.621, + "step": 187870 + }, + { + "epoch": 0.37952948686352855, + "grad_norm": 253.365234375, + "learning_rate": 7.857006458411826e-06, + "loss": 22.758, + "step": 187880 + }, + { + "epoch": 0.3795496874962124, + "grad_norm": 468.333251953125, + "learning_rate": 7.856719983298606e-06, + "loss": 32.0136, + "step": 187890 + }, + { + "epoch": 0.3795698881288962, + "grad_norm": 334.6289367675781, + "learning_rate": 7.856433494262078e-06, + "loss": 15.2881, + "step": 187900 + }, + { + "epoch": 0.37959008876158, + "grad_norm": 213.6356658935547, + "learning_rate": 7.856146991303641e-06, + "loss": 24.8394, + "step": 187910 + }, + { + "epoch": 0.37961028939426383, + "grad_norm": 389.71392822265625, + "learning_rate": 7.85586047442469e-06, + "loss": 27.6617, + "step": 187920 + }, + { + "epoch": 0.37963049002694765, + "grad_norm": 776.3466796875, + "learning_rate": 7.85557394362662e-06, + "loss": 25.2853, + "step": 187930 + }, + { + "epoch": 0.3796506906596315, + "grad_norm": 360.78558349609375, + "learning_rate": 7.85528739891083e-06, + "loss": 15.2057, + "step": 187940 + }, + { + "epoch": 0.3796708912923153, + "grad_norm": 553.1041259765625, + "learning_rate": 7.855000840278715e-06, + "loss": 15.2743, + "step": 187950 + }, + { + "epoch": 0.3796910919249991, + "grad_norm": 150.0745849609375, + "learning_rate": 7.854714267731673e-06, + "loss": 14.223, + "step": 187960 + }, + { + "epoch": 0.37971129255768293, + "grad_norm": 296.0186462402344, + "learning_rate": 7.8544276812711e-06, + "loss": 33.1519, + "step": 187970 + }, + { + "epoch": 0.3797314931903667, + "grad_norm": 34.12297439575195, + "learning_rate": 7.85414108089839e-06, + "loss": 12.3409, + "step": 187980 + }, + { + "epoch": 0.3797516938230505, + "grad_norm": 495.8981018066406, + "learning_rate": 7.853854466614945e-06, + "loss": 26.7397, + "step": 187990 + }, + { + "epoch": 0.37977189445573434, + "grad_norm": 47.39965057373047, + "learning_rate": 7.85356783842216e-06, + "loss": 25.0049, + "step": 188000 + }, + { + "epoch": 0.37979209508841816, + "grad_norm": 214.90065002441406, + "learning_rate": 7.85328119632143e-06, + "loss": 10.1669, + "step": 188010 + }, + { + "epoch": 0.379812295721102, + "grad_norm": 449.6014099121094, + "learning_rate": 7.852994540314154e-06, + "loss": 18.693, + "step": 188020 + }, + { + "epoch": 0.3798324963537858, + "grad_norm": 692.6038208007812, + "learning_rate": 7.852707870401728e-06, + "loss": 18.3583, + "step": 188030 + }, + { + "epoch": 0.3798526969864696, + "grad_norm": 385.4586486816406, + "learning_rate": 7.85242118658555e-06, + "loss": 14.5829, + "step": 188040 + }, + { + "epoch": 0.37987289761915344, + "grad_norm": 284.35723876953125, + "learning_rate": 7.852134488867017e-06, + "loss": 6.5827, + "step": 188050 + }, + { + "epoch": 0.37989309825183726, + "grad_norm": 339.4539794921875, + "learning_rate": 7.851847777247528e-06, + "loss": 23.9021, + "step": 188060 + }, + { + "epoch": 0.3799132988845211, + "grad_norm": 331.1771545410156, + "learning_rate": 7.851561051728478e-06, + "loss": 24.8811, + "step": 188070 + }, + { + "epoch": 0.3799334995172049, + "grad_norm": 773.5575561523438, + "learning_rate": 7.851274312311266e-06, + "loss": 28.8373, + "step": 188080 + }, + { + "epoch": 0.3799537001498887, + "grad_norm": 521.5496826171875, + "learning_rate": 7.850987558997287e-06, + "loss": 30.2774, + "step": 188090 + }, + { + "epoch": 0.37997390078257254, + "grad_norm": 436.01220703125, + "learning_rate": 7.850700791787941e-06, + "loss": 27.6853, + "step": 188100 + }, + { + "epoch": 0.3799941014152563, + "grad_norm": 329.2882385253906, + "learning_rate": 7.850414010684626e-06, + "loss": 12.8973, + "step": 188110 + }, + { + "epoch": 0.3800143020479401, + "grad_norm": 623.2317504882812, + "learning_rate": 7.85012721568874e-06, + "loss": 25.3543, + "step": 188120 + }, + { + "epoch": 0.38003450268062394, + "grad_norm": 219.14224243164062, + "learning_rate": 7.849840406801676e-06, + "loss": 22.3661, + "step": 188130 + }, + { + "epoch": 0.38005470331330776, + "grad_norm": 538.2972412109375, + "learning_rate": 7.849553584024836e-06, + "loss": 26.4047, + "step": 188140 + }, + { + "epoch": 0.3800749039459916, + "grad_norm": 560.73388671875, + "learning_rate": 7.849266747359619e-06, + "loss": 35.3035, + "step": 188150 + }, + { + "epoch": 0.3800951045786754, + "grad_norm": 171.05557250976562, + "learning_rate": 7.848979896807422e-06, + "loss": 10.2947, + "step": 188160 + }, + { + "epoch": 0.3801153052113592, + "grad_norm": 250.48715209960938, + "learning_rate": 7.848693032369641e-06, + "loss": 11.1912, + "step": 188170 + }, + { + "epoch": 0.38013550584404304, + "grad_norm": 7.347401142120361, + "learning_rate": 7.848406154047677e-06, + "loss": 7.639, + "step": 188180 + }, + { + "epoch": 0.38015570647672686, + "grad_norm": 639.7820434570312, + "learning_rate": 7.848119261842926e-06, + "loss": 21.6782, + "step": 188190 + }, + { + "epoch": 0.3801759071094107, + "grad_norm": 395.41497802734375, + "learning_rate": 7.847832355756788e-06, + "loss": 16.0722, + "step": 188200 + }, + { + "epoch": 0.3801961077420945, + "grad_norm": 200.6482696533203, + "learning_rate": 7.84754543579066e-06, + "loss": 20.1189, + "step": 188210 + }, + { + "epoch": 0.3802163083747783, + "grad_norm": 895.510986328125, + "learning_rate": 7.84725850194594e-06, + "loss": 27.9294, + "step": 188220 + }, + { + "epoch": 0.3802365090074621, + "grad_norm": 23.38909149169922, + "learning_rate": 7.84697155422403e-06, + "loss": 14.721, + "step": 188230 + }, + { + "epoch": 0.3802567096401459, + "grad_norm": 57.09502029418945, + "learning_rate": 7.846684592626324e-06, + "loss": 17.3742, + "step": 188240 + }, + { + "epoch": 0.3802769102728297, + "grad_norm": 953.9886474609375, + "learning_rate": 7.846397617154223e-06, + "loss": 50.8312, + "step": 188250 + }, + { + "epoch": 0.38029711090551355, + "grad_norm": 270.4121398925781, + "learning_rate": 7.846110627809123e-06, + "loss": 22.7044, + "step": 188260 + }, + { + "epoch": 0.38031731153819737, + "grad_norm": 826.6783447265625, + "learning_rate": 7.845823624592427e-06, + "loss": 24.6503, + "step": 188270 + }, + { + "epoch": 0.3803375121708812, + "grad_norm": 792.712890625, + "learning_rate": 7.845536607505533e-06, + "loss": 23.5779, + "step": 188280 + }, + { + "epoch": 0.380357712803565, + "grad_norm": 543.3534545898438, + "learning_rate": 7.845249576549836e-06, + "loss": 17.7022, + "step": 188290 + }, + { + "epoch": 0.3803779134362488, + "grad_norm": 524.1748657226562, + "learning_rate": 7.844962531726742e-06, + "loss": 15.8757, + "step": 188300 + }, + { + "epoch": 0.38039811406893265, + "grad_norm": 361.21148681640625, + "learning_rate": 7.844675473037641e-06, + "loss": 9.4886, + "step": 188310 + }, + { + "epoch": 0.38041831470161647, + "grad_norm": 777.5284423828125, + "learning_rate": 7.844388400483938e-06, + "loss": 27.2019, + "step": 188320 + }, + { + "epoch": 0.3804385153343003, + "grad_norm": 198.74984741210938, + "learning_rate": 7.844101314067031e-06, + "loss": 13.2818, + "step": 188330 + }, + { + "epoch": 0.3804587159669841, + "grad_norm": 793.528564453125, + "learning_rate": 7.843814213788322e-06, + "loss": 21.0352, + "step": 188340 + }, + { + "epoch": 0.3804789165996679, + "grad_norm": 568.158935546875, + "learning_rate": 7.843527099649204e-06, + "loss": 37.7767, + "step": 188350 + }, + { + "epoch": 0.3804991172323517, + "grad_norm": 539.360595703125, + "learning_rate": 7.84323997165108e-06, + "loss": 21.5503, + "step": 188360 + }, + { + "epoch": 0.3805193178650355, + "grad_norm": 513.9918823242188, + "learning_rate": 7.842952829795352e-06, + "loss": 32.8909, + "step": 188370 + }, + { + "epoch": 0.38053951849771933, + "grad_norm": 437.0247497558594, + "learning_rate": 7.842665674083413e-06, + "loss": 26.4054, + "step": 188380 + }, + { + "epoch": 0.38055971913040315, + "grad_norm": 344.40313720703125, + "learning_rate": 7.842378504516669e-06, + "loss": 15.8336, + "step": 188390 + }, + { + "epoch": 0.38057991976308697, + "grad_norm": 185.64108276367188, + "learning_rate": 7.842091321096515e-06, + "loss": 13.0182, + "step": 188400 + }, + { + "epoch": 0.3806001203957708, + "grad_norm": 197.11865234375, + "learning_rate": 7.841804123824354e-06, + "loss": 18.4705, + "step": 188410 + }, + { + "epoch": 0.3806203210284546, + "grad_norm": 168.9591522216797, + "learning_rate": 7.841516912701585e-06, + "loss": 33.6644, + "step": 188420 + }, + { + "epoch": 0.38064052166113843, + "grad_norm": 539.0736083984375, + "learning_rate": 7.841229687729606e-06, + "loss": 20.0488, + "step": 188430 + }, + { + "epoch": 0.38066072229382225, + "grad_norm": 7.903078556060791, + "learning_rate": 7.840942448909818e-06, + "loss": 22.0559, + "step": 188440 + }, + { + "epoch": 0.38068092292650607, + "grad_norm": 370.5745849609375, + "learning_rate": 7.84065519624362e-06, + "loss": 18.5808, + "step": 188450 + }, + { + "epoch": 0.3807011235591899, + "grad_norm": 0.0, + "learning_rate": 7.840367929732415e-06, + "loss": 9.9115, + "step": 188460 + }, + { + "epoch": 0.3807213241918737, + "grad_norm": 14.133622169494629, + "learning_rate": 7.840080649377602e-06, + "loss": 19.3093, + "step": 188470 + }, + { + "epoch": 0.38074152482455753, + "grad_norm": 271.5661315917969, + "learning_rate": 7.839793355180578e-06, + "loss": 22.0619, + "step": 188480 + }, + { + "epoch": 0.3807617254572413, + "grad_norm": 927.3042602539062, + "learning_rate": 7.839506047142747e-06, + "loss": 24.626, + "step": 188490 + }, + { + "epoch": 0.3807819260899251, + "grad_norm": 314.3900451660156, + "learning_rate": 7.839218725265507e-06, + "loss": 51.4573, + "step": 188500 + }, + { + "epoch": 0.38080212672260894, + "grad_norm": 221.56214904785156, + "learning_rate": 7.83893138955026e-06, + "loss": 13.1621, + "step": 188510 + }, + { + "epoch": 0.38082232735529276, + "grad_norm": 651.3807983398438, + "learning_rate": 7.838644039998405e-06, + "loss": 17.0011, + "step": 188520 + }, + { + "epoch": 0.3808425279879766, + "grad_norm": 400.63250732421875, + "learning_rate": 7.838356676611345e-06, + "loss": 22.624, + "step": 188530 + }, + { + "epoch": 0.3808627286206604, + "grad_norm": 308.29071044921875, + "learning_rate": 7.838069299390476e-06, + "loss": 13.878, + "step": 188540 + }, + { + "epoch": 0.3808829292533442, + "grad_norm": 62.3780517578125, + "learning_rate": 7.837781908337204e-06, + "loss": 11.383, + "step": 188550 + }, + { + "epoch": 0.38090312988602804, + "grad_norm": 837.8966064453125, + "learning_rate": 7.837494503452925e-06, + "loss": 25.1983, + "step": 188560 + }, + { + "epoch": 0.38092333051871186, + "grad_norm": 241.6901397705078, + "learning_rate": 7.837207084739044e-06, + "loss": 30.5917, + "step": 188570 + }, + { + "epoch": 0.3809435311513957, + "grad_norm": 537.393798828125, + "learning_rate": 7.83691965219696e-06, + "loss": 14.8654, + "step": 188580 + }, + { + "epoch": 0.3809637317840795, + "grad_norm": 0.5184242725372314, + "learning_rate": 7.836632205828072e-06, + "loss": 23.3382, + "step": 188590 + }, + { + "epoch": 0.3809839324167633, + "grad_norm": 377.3197937011719, + "learning_rate": 7.836344745633785e-06, + "loss": 26.2711, + "step": 188600 + }, + { + "epoch": 0.38100413304944714, + "grad_norm": 266.58966064453125, + "learning_rate": 7.836057271615496e-06, + "loss": 16.5561, + "step": 188610 + }, + { + "epoch": 0.3810243336821309, + "grad_norm": 604.7203369140625, + "learning_rate": 7.835769783774606e-06, + "loss": 29.1615, + "step": 188620 + }, + { + "epoch": 0.3810445343148147, + "grad_norm": 451.97906494140625, + "learning_rate": 7.83548228211252e-06, + "loss": 10.576, + "step": 188630 + }, + { + "epoch": 0.38106473494749854, + "grad_norm": 465.3996276855469, + "learning_rate": 7.835194766630638e-06, + "loss": 22.2135, + "step": 188640 + }, + { + "epoch": 0.38108493558018236, + "grad_norm": 342.02655029296875, + "learning_rate": 7.834907237330359e-06, + "loss": 13.0836, + "step": 188650 + }, + { + "epoch": 0.3811051362128662, + "grad_norm": 619.200927734375, + "learning_rate": 7.834619694213087e-06, + "loss": 17.6876, + "step": 188660 + }, + { + "epoch": 0.38112533684555, + "grad_norm": 657.8128662109375, + "learning_rate": 7.83433213728022e-06, + "loss": 18.6648, + "step": 188670 + }, + { + "epoch": 0.3811455374782338, + "grad_norm": 333.6239318847656, + "learning_rate": 7.834044566533166e-06, + "loss": 26.969, + "step": 188680 + }, + { + "epoch": 0.38116573811091764, + "grad_norm": 335.3335876464844, + "learning_rate": 7.833756981973321e-06, + "loss": 46.9242, + "step": 188690 + }, + { + "epoch": 0.38118593874360146, + "grad_norm": 618.9785766601562, + "learning_rate": 7.833469383602086e-06, + "loss": 20.7087, + "step": 188700 + }, + { + "epoch": 0.3812061393762853, + "grad_norm": 325.0849304199219, + "learning_rate": 7.833181771420869e-06, + "loss": 34.5737, + "step": 188710 + }, + { + "epoch": 0.3812263400089691, + "grad_norm": 586.0824584960938, + "learning_rate": 7.832894145431062e-06, + "loss": 14.6962, + "step": 188720 + }, + { + "epoch": 0.3812465406416529, + "grad_norm": 134.80641174316406, + "learning_rate": 7.832606505634077e-06, + "loss": 14.8707, + "step": 188730 + }, + { + "epoch": 0.3812667412743367, + "grad_norm": 677.0626831054688, + "learning_rate": 7.832318852031311e-06, + "loss": 39.5213, + "step": 188740 + }, + { + "epoch": 0.3812869419070205, + "grad_norm": 378.0164794921875, + "learning_rate": 7.832031184624165e-06, + "loss": 11.0117, + "step": 188750 + }, + { + "epoch": 0.3813071425397043, + "grad_norm": 555.2911987304688, + "learning_rate": 7.831743503414043e-06, + "loss": 24.0668, + "step": 188760 + }, + { + "epoch": 0.38132734317238814, + "grad_norm": 252.79132080078125, + "learning_rate": 7.831455808402348e-06, + "loss": 18.3286, + "step": 188770 + }, + { + "epoch": 0.38134754380507196, + "grad_norm": 151.18746948242188, + "learning_rate": 7.831168099590478e-06, + "loss": 12.5021, + "step": 188780 + }, + { + "epoch": 0.3813677444377558, + "grad_norm": 309.6161193847656, + "learning_rate": 7.83088037697984e-06, + "loss": 17.4055, + "step": 188790 + }, + { + "epoch": 0.3813879450704396, + "grad_norm": 1953.6256103515625, + "learning_rate": 7.830592640571833e-06, + "loss": 22.8445, + "step": 188800 + }, + { + "epoch": 0.3814081457031234, + "grad_norm": 371.9723205566406, + "learning_rate": 7.830304890367862e-06, + "loss": 12.5413, + "step": 188810 + }, + { + "epoch": 0.38142834633580724, + "grad_norm": 329.8029479980469, + "learning_rate": 7.83001712636933e-06, + "loss": 25.8127, + "step": 188820 + }, + { + "epoch": 0.38144854696849106, + "grad_norm": 619.4336547851562, + "learning_rate": 7.829729348577636e-06, + "loss": 27.3079, + "step": 188830 + }, + { + "epoch": 0.3814687476011749, + "grad_norm": 45.39575958251953, + "learning_rate": 7.829441556994182e-06, + "loss": 16.6073, + "step": 188840 + }, + { + "epoch": 0.3814889482338587, + "grad_norm": 319.5869140625, + "learning_rate": 7.829153751620375e-06, + "loss": 11.1007, + "step": 188850 + }, + { + "epoch": 0.3815091488665425, + "grad_norm": 255.23594665527344, + "learning_rate": 7.828865932457617e-06, + "loss": 34.4464, + "step": 188860 + }, + { + "epoch": 0.3815293494992263, + "grad_norm": 678.3028564453125, + "learning_rate": 7.828578099507308e-06, + "loss": 19.8113, + "step": 188870 + }, + { + "epoch": 0.3815495501319101, + "grad_norm": 205.38990783691406, + "learning_rate": 7.828290252770852e-06, + "loss": 25.729, + "step": 188880 + }, + { + "epoch": 0.38156975076459393, + "grad_norm": 359.9259338378906, + "learning_rate": 7.828002392249654e-06, + "loss": 19.4046, + "step": 188890 + }, + { + "epoch": 0.38158995139727775, + "grad_norm": 429.4687805175781, + "learning_rate": 7.827714517945116e-06, + "loss": 20.4328, + "step": 188900 + }, + { + "epoch": 0.38161015202996157, + "grad_norm": 564.55322265625, + "learning_rate": 7.827426629858636e-06, + "loss": 9.6021, + "step": 188910 + }, + { + "epoch": 0.3816303526626454, + "grad_norm": 803.4877319335938, + "learning_rate": 7.827138727991625e-06, + "loss": 20.2551, + "step": 188920 + }, + { + "epoch": 0.3816505532953292, + "grad_norm": 921.0742797851562, + "learning_rate": 7.826850812345484e-06, + "loss": 18.397, + "step": 188930 + }, + { + "epoch": 0.38167075392801303, + "grad_norm": 560.7493896484375, + "learning_rate": 7.826562882921613e-06, + "loss": 40.604, + "step": 188940 + }, + { + "epoch": 0.38169095456069685, + "grad_norm": 286.7036437988281, + "learning_rate": 7.826274939721417e-06, + "loss": 16.4252, + "step": 188950 + }, + { + "epoch": 0.38171115519338067, + "grad_norm": 304.8981628417969, + "learning_rate": 7.8259869827463e-06, + "loss": 24.3084, + "step": 188960 + }, + { + "epoch": 0.3817313558260645, + "grad_norm": 233.96810913085938, + "learning_rate": 7.825699011997665e-06, + "loss": 19.5671, + "step": 188970 + }, + { + "epoch": 0.3817515564587483, + "grad_norm": 263.20111083984375, + "learning_rate": 7.825411027476917e-06, + "loss": 19.3988, + "step": 188980 + }, + { + "epoch": 0.38177175709143213, + "grad_norm": 293.4358825683594, + "learning_rate": 7.825123029185457e-06, + "loss": 29.9489, + "step": 188990 + }, + { + "epoch": 0.3817919577241159, + "grad_norm": 225.1694793701172, + "learning_rate": 7.82483501712469e-06, + "loss": 18.0091, + "step": 189000 + }, + { + "epoch": 0.3818121583567997, + "grad_norm": 581.0350952148438, + "learning_rate": 7.824546991296021e-06, + "loss": 26.3481, + "step": 189010 + }, + { + "epoch": 0.38183235898948353, + "grad_norm": 126.13812255859375, + "learning_rate": 7.824258951700852e-06, + "loss": 15.3107, + "step": 189020 + }, + { + "epoch": 0.38185255962216735, + "grad_norm": 272.8997497558594, + "learning_rate": 7.823970898340587e-06, + "loss": 24.682, + "step": 189030 + }, + { + "epoch": 0.3818727602548512, + "grad_norm": 316.7326354980469, + "learning_rate": 7.82368283121663e-06, + "loss": 24.832, + "step": 189040 + }, + { + "epoch": 0.381892960887535, + "grad_norm": 269.50006103515625, + "learning_rate": 7.823394750330386e-06, + "loss": 10.9026, + "step": 189050 + }, + { + "epoch": 0.3819131615202188, + "grad_norm": 538.9169921875, + "learning_rate": 7.823106655683259e-06, + "loss": 18.1382, + "step": 189060 + }, + { + "epoch": 0.38193336215290263, + "grad_norm": 234.98548889160156, + "learning_rate": 7.822818547276652e-06, + "loss": 12.2784, + "step": 189070 + }, + { + "epoch": 0.38195356278558645, + "grad_norm": 485.49481201171875, + "learning_rate": 7.822530425111969e-06, + "loss": 8.7811, + "step": 189080 + }, + { + "epoch": 0.3819737634182703, + "grad_norm": 375.4350891113281, + "learning_rate": 7.822242289190615e-06, + "loss": 20.5474, + "step": 189090 + }, + { + "epoch": 0.3819939640509541, + "grad_norm": 387.18035888671875, + "learning_rate": 7.821954139513997e-06, + "loss": 20.0211, + "step": 189100 + }, + { + "epoch": 0.3820141646836379, + "grad_norm": 20.593969345092773, + "learning_rate": 7.821665976083515e-06, + "loss": 19.7231, + "step": 189110 + }, + { + "epoch": 0.38203436531632173, + "grad_norm": 537.4613647460938, + "learning_rate": 7.821377798900574e-06, + "loss": 14.9787, + "step": 189120 + }, + { + "epoch": 0.3820545659490055, + "grad_norm": 420.259521484375, + "learning_rate": 7.82108960796658e-06, + "loss": 36.6597, + "step": 189130 + }, + { + "epoch": 0.3820747665816893, + "grad_norm": 981.8267211914062, + "learning_rate": 7.82080140328294e-06, + "loss": 20.7919, + "step": 189140 + }, + { + "epoch": 0.38209496721437314, + "grad_norm": 56.50301742553711, + "learning_rate": 7.820513184851052e-06, + "loss": 35.5867, + "step": 189150 + }, + { + "epoch": 0.38211516784705696, + "grad_norm": 23.353208541870117, + "learning_rate": 7.820224952672329e-06, + "loss": 19.9272, + "step": 189160 + }, + { + "epoch": 0.3821353684797408, + "grad_norm": 14.043118476867676, + "learning_rate": 7.819936706748168e-06, + "loss": 28.0067, + "step": 189170 + }, + { + "epoch": 0.3821555691124246, + "grad_norm": 138.26553344726562, + "learning_rate": 7.81964844707998e-06, + "loss": 28.2015, + "step": 189180 + }, + { + "epoch": 0.3821757697451084, + "grad_norm": 266.5179748535156, + "learning_rate": 7.819360173669168e-06, + "loss": 19.5376, + "step": 189190 + }, + { + "epoch": 0.38219597037779224, + "grad_norm": 461.5394287109375, + "learning_rate": 7.819071886517134e-06, + "loss": 16.4602, + "step": 189200 + }, + { + "epoch": 0.38221617101047606, + "grad_norm": 522.180908203125, + "learning_rate": 7.818783585625287e-06, + "loss": 40.6513, + "step": 189210 + }, + { + "epoch": 0.3822363716431599, + "grad_norm": 253.56768798828125, + "learning_rate": 7.818495270995031e-06, + "loss": 17.4649, + "step": 189220 + }, + { + "epoch": 0.3822565722758437, + "grad_norm": 177.39678955078125, + "learning_rate": 7.81820694262777e-06, + "loss": 25.4444, + "step": 189230 + }, + { + "epoch": 0.3822767729085275, + "grad_norm": 196.9581298828125, + "learning_rate": 7.81791860052491e-06, + "loss": 11.0922, + "step": 189240 + }, + { + "epoch": 0.38229697354121134, + "grad_norm": 421.8256530761719, + "learning_rate": 7.817630244687857e-06, + "loss": 22.3353, + "step": 189250 + }, + { + "epoch": 0.3823171741738951, + "grad_norm": 413.8868103027344, + "learning_rate": 7.817341875118016e-06, + "loss": 22.3017, + "step": 189260 + }, + { + "epoch": 0.3823373748065789, + "grad_norm": 295.10400390625, + "learning_rate": 7.817053491816794e-06, + "loss": 15.9826, + "step": 189270 + }, + { + "epoch": 0.38235757543926274, + "grad_norm": 304.6611022949219, + "learning_rate": 7.816765094785593e-06, + "loss": 13.6321, + "step": 189280 + }, + { + "epoch": 0.38237777607194656, + "grad_norm": 149.24656677246094, + "learning_rate": 7.81647668402582e-06, + "loss": 15.778, + "step": 189290 + }, + { + "epoch": 0.3823979767046304, + "grad_norm": 195.9346160888672, + "learning_rate": 7.816188259538885e-06, + "loss": 18.8319, + "step": 189300 + }, + { + "epoch": 0.3824181773373142, + "grad_norm": 85.01852416992188, + "learning_rate": 7.815899821326185e-06, + "loss": 33.3886, + "step": 189310 + }, + { + "epoch": 0.382438377969998, + "grad_norm": 1904.7947998046875, + "learning_rate": 7.815611369389134e-06, + "loss": 20.0872, + "step": 189320 + }, + { + "epoch": 0.38245857860268184, + "grad_norm": 350.1895446777344, + "learning_rate": 7.815322903729133e-06, + "loss": 16.7494, + "step": 189330 + }, + { + "epoch": 0.38247877923536566, + "grad_norm": 396.7063293457031, + "learning_rate": 7.81503442434759e-06, + "loss": 13.4726, + "step": 189340 + }, + { + "epoch": 0.3824989798680495, + "grad_norm": 592.1580200195312, + "learning_rate": 7.814745931245911e-06, + "loss": 31.8094, + "step": 189350 + }, + { + "epoch": 0.3825191805007333, + "grad_norm": 358.4219665527344, + "learning_rate": 7.814457424425501e-06, + "loss": 22.111, + "step": 189360 + }, + { + "epoch": 0.3825393811334171, + "grad_norm": 7.479894161224365, + "learning_rate": 7.814168903887768e-06, + "loss": 16.0946, + "step": 189370 + }, + { + "epoch": 0.3825595817661009, + "grad_norm": 439.0794982910156, + "learning_rate": 7.813880369634114e-06, + "loss": 18.5301, + "step": 189380 + }, + { + "epoch": 0.3825797823987847, + "grad_norm": 388.0850524902344, + "learning_rate": 7.813591821665953e-06, + "loss": 23.1441, + "step": 189390 + }, + { + "epoch": 0.3825999830314685, + "grad_norm": 117.37612915039062, + "learning_rate": 7.813303259984685e-06, + "loss": 12.417, + "step": 189400 + }, + { + "epoch": 0.38262018366415235, + "grad_norm": 505.7078857421875, + "learning_rate": 7.813014684591718e-06, + "loss": 67.48, + "step": 189410 + }, + { + "epoch": 0.38264038429683617, + "grad_norm": 416.0635986328125, + "learning_rate": 7.812726095488457e-06, + "loss": 41.8872, + "step": 189420 + }, + { + "epoch": 0.38266058492952, + "grad_norm": 606.1459350585938, + "learning_rate": 7.812437492676312e-06, + "loss": 19.2614, + "step": 189430 + }, + { + "epoch": 0.3826807855622038, + "grad_norm": 346.3567199707031, + "learning_rate": 7.812148876156687e-06, + "loss": 17.6643, + "step": 189440 + }, + { + "epoch": 0.3827009861948876, + "grad_norm": 43.39950942993164, + "learning_rate": 7.81186024593099e-06, + "loss": 34.3869, + "step": 189450 + }, + { + "epoch": 0.38272118682757145, + "grad_norm": 2429.51220703125, + "learning_rate": 7.811571602000628e-06, + "loss": 42.3883, + "step": 189460 + }, + { + "epoch": 0.38274138746025527, + "grad_norm": 26.305356979370117, + "learning_rate": 7.811282944367004e-06, + "loss": 14.7009, + "step": 189470 + }, + { + "epoch": 0.3827615880929391, + "grad_norm": 343.4811706542969, + "learning_rate": 7.810994273031532e-06, + "loss": 22.4447, + "step": 189480 + }, + { + "epoch": 0.3827817887256229, + "grad_norm": 330.87255859375, + "learning_rate": 7.81070558799561e-06, + "loss": 6.6134, + "step": 189490 + }, + { + "epoch": 0.3828019893583067, + "grad_norm": 556.8169555664062, + "learning_rate": 7.810416889260653e-06, + "loss": 27.3821, + "step": 189500 + }, + { + "epoch": 0.3828221899909905, + "grad_norm": 748.4137573242188, + "learning_rate": 7.810128176828065e-06, + "loss": 26.3715, + "step": 189510 + }, + { + "epoch": 0.3828423906236743, + "grad_norm": 293.07574462890625, + "learning_rate": 7.809839450699253e-06, + "loss": 16.7798, + "step": 189520 + }, + { + "epoch": 0.38286259125635813, + "grad_norm": 874.266357421875, + "learning_rate": 7.809550710875624e-06, + "loss": 16.807, + "step": 189530 + }, + { + "epoch": 0.38288279188904195, + "grad_norm": 379.8854675292969, + "learning_rate": 7.809261957358585e-06, + "loss": 20.7475, + "step": 189540 + }, + { + "epoch": 0.38290299252172577, + "grad_norm": 809.4370727539062, + "learning_rate": 7.808973190149544e-06, + "loss": 23.3438, + "step": 189550 + }, + { + "epoch": 0.3829231931544096, + "grad_norm": 867.1205444335938, + "learning_rate": 7.80868440924991e-06, + "loss": 21.947, + "step": 189560 + }, + { + "epoch": 0.3829433937870934, + "grad_norm": 23.575105667114258, + "learning_rate": 7.808395614661086e-06, + "loss": 4.3154, + "step": 189570 + }, + { + "epoch": 0.38296359441977723, + "grad_norm": 314.0118103027344, + "learning_rate": 7.808106806384484e-06, + "loss": 22.6902, + "step": 189580 + }, + { + "epoch": 0.38298379505246105, + "grad_norm": 475.39581298828125, + "learning_rate": 7.80781798442151e-06, + "loss": 31.9625, + "step": 189590 + }, + { + "epoch": 0.38300399568514487, + "grad_norm": 474.6445617675781, + "learning_rate": 7.807529148773572e-06, + "loss": 20.4564, + "step": 189600 + }, + { + "epoch": 0.3830241963178287, + "grad_norm": 422.025390625, + "learning_rate": 7.807240299442078e-06, + "loss": 16.3724, + "step": 189610 + }, + { + "epoch": 0.3830443969505125, + "grad_norm": 716.3912963867188, + "learning_rate": 7.806951436428433e-06, + "loss": 21.4261, + "step": 189620 + }, + { + "epoch": 0.38306459758319633, + "grad_norm": 161.43482971191406, + "learning_rate": 7.80666255973405e-06, + "loss": 34.0578, + "step": 189630 + }, + { + "epoch": 0.3830847982158801, + "grad_norm": 570.9780883789062, + "learning_rate": 7.806373669360332e-06, + "loss": 25.1385, + "step": 189640 + }, + { + "epoch": 0.3831049988485639, + "grad_norm": 509.7627868652344, + "learning_rate": 7.80608476530869e-06, + "loss": 14.3529, + "step": 189650 + }, + { + "epoch": 0.38312519948124774, + "grad_norm": 294.26800537109375, + "learning_rate": 7.80579584758053e-06, + "loss": 27.4212, + "step": 189660 + }, + { + "epoch": 0.38314540011393156, + "grad_norm": 307.59808349609375, + "learning_rate": 7.805506916177263e-06, + "loss": 8.6388, + "step": 189670 + }, + { + "epoch": 0.3831656007466154, + "grad_norm": 793.291259765625, + "learning_rate": 7.805217971100295e-06, + "loss": 36.2088, + "step": 189680 + }, + { + "epoch": 0.3831858013792992, + "grad_norm": 297.2881164550781, + "learning_rate": 7.804929012351034e-06, + "loss": 12.2604, + "step": 189690 + }, + { + "epoch": 0.383206002011983, + "grad_norm": 621.73876953125, + "learning_rate": 7.80464003993089e-06, + "loss": 21.661, + "step": 189700 + }, + { + "epoch": 0.38322620264466684, + "grad_norm": 266.7144775390625, + "learning_rate": 7.80435105384127e-06, + "loss": 14.7892, + "step": 189710 + }, + { + "epoch": 0.38324640327735066, + "grad_norm": 389.62469482421875, + "learning_rate": 7.804062054083585e-06, + "loss": 15.0259, + "step": 189720 + }, + { + "epoch": 0.3832666039100345, + "grad_norm": 1625.658935546875, + "learning_rate": 7.803773040659239e-06, + "loss": 29.3019, + "step": 189730 + }, + { + "epoch": 0.3832868045427183, + "grad_norm": 314.7716064453125, + "learning_rate": 7.803484013569644e-06, + "loss": 22.3522, + "step": 189740 + }, + { + "epoch": 0.3833070051754021, + "grad_norm": 456.04412841796875, + "learning_rate": 7.80319497281621e-06, + "loss": 34.2024, + "step": 189750 + }, + { + "epoch": 0.38332720580808594, + "grad_norm": 0.025324681773781776, + "learning_rate": 7.802905918400342e-06, + "loss": 18.5097, + "step": 189760 + }, + { + "epoch": 0.3833474064407697, + "grad_norm": 363.8247375488281, + "learning_rate": 7.80261685032345e-06, + "loss": 18.735, + "step": 189770 + }, + { + "epoch": 0.3833676070734535, + "grad_norm": 132.87318420410156, + "learning_rate": 7.802327768586944e-06, + "loss": 10.1872, + "step": 189780 + }, + { + "epoch": 0.38338780770613734, + "grad_norm": 319.84515380859375, + "learning_rate": 7.802038673192233e-06, + "loss": 13.8941, + "step": 189790 + }, + { + "epoch": 0.38340800833882116, + "grad_norm": 166.3119659423828, + "learning_rate": 7.801749564140724e-06, + "loss": 38.4708, + "step": 189800 + }, + { + "epoch": 0.383428208971505, + "grad_norm": 551.60205078125, + "learning_rate": 7.801460441433828e-06, + "loss": 11.4967, + "step": 189810 + }, + { + "epoch": 0.3834484096041888, + "grad_norm": 486.2964782714844, + "learning_rate": 7.801171305072954e-06, + "loss": 54.4579, + "step": 189820 + }, + { + "epoch": 0.3834686102368726, + "grad_norm": 522.7513427734375, + "learning_rate": 7.80088215505951e-06, + "loss": 29.501, + "step": 189830 + }, + { + "epoch": 0.38348881086955644, + "grad_norm": 342.9999694824219, + "learning_rate": 7.800592991394906e-06, + "loss": 20.4978, + "step": 189840 + }, + { + "epoch": 0.38350901150224026, + "grad_norm": 182.85626220703125, + "learning_rate": 7.800303814080552e-06, + "loss": 15.3206, + "step": 189850 + }, + { + "epoch": 0.3835292121349241, + "grad_norm": 917.25830078125, + "learning_rate": 7.800014623117858e-06, + "loss": 39.3917, + "step": 189860 + }, + { + "epoch": 0.3835494127676079, + "grad_norm": 296.2469177246094, + "learning_rate": 7.799725418508231e-06, + "loss": 42.9546, + "step": 189870 + }, + { + "epoch": 0.3835696134002917, + "grad_norm": 525.8936767578125, + "learning_rate": 7.799436200253082e-06, + "loss": 17.0192, + "step": 189880 + }, + { + "epoch": 0.38358981403297554, + "grad_norm": 544.18359375, + "learning_rate": 7.79914696835382e-06, + "loss": 27.5664, + "step": 189890 + }, + { + "epoch": 0.3836100146656593, + "grad_norm": 309.77520751953125, + "learning_rate": 7.798857722811857e-06, + "loss": 33.1132, + "step": 189900 + }, + { + "epoch": 0.3836302152983431, + "grad_norm": 788.8636474609375, + "learning_rate": 7.798568463628597e-06, + "loss": 16.2416, + "step": 189910 + }, + { + "epoch": 0.38365041593102694, + "grad_norm": 363.8883972167969, + "learning_rate": 7.798279190805458e-06, + "loss": 16.0867, + "step": 189920 + }, + { + "epoch": 0.38367061656371076, + "grad_norm": 475.0231018066406, + "learning_rate": 7.797989904343844e-06, + "loss": 21.7322, + "step": 189930 + }, + { + "epoch": 0.3836908171963946, + "grad_norm": 297.6060485839844, + "learning_rate": 7.797700604245166e-06, + "loss": 21.8392, + "step": 189940 + }, + { + "epoch": 0.3837110178290784, + "grad_norm": 308.3179016113281, + "learning_rate": 7.797411290510836e-06, + "loss": 24.0743, + "step": 189950 + }, + { + "epoch": 0.3837312184617622, + "grad_norm": 385.8530578613281, + "learning_rate": 7.797121963142263e-06, + "loss": 16.2976, + "step": 189960 + }, + { + "epoch": 0.38375141909444604, + "grad_norm": 850.6321411132812, + "learning_rate": 7.796832622140854e-06, + "loss": 17.7315, + "step": 189970 + }, + { + "epoch": 0.38377161972712986, + "grad_norm": 74.19915771484375, + "learning_rate": 7.796543267508023e-06, + "loss": 26.2938, + "step": 189980 + }, + { + "epoch": 0.3837918203598137, + "grad_norm": 147.29254150390625, + "learning_rate": 7.79625389924518e-06, + "loss": 19.7365, + "step": 189990 + }, + { + "epoch": 0.3838120209924975, + "grad_norm": 649.164306640625, + "learning_rate": 7.795964517353734e-06, + "loss": 26.2491, + "step": 190000 + }, + { + "epoch": 0.3838322216251813, + "grad_norm": 12.49026107788086, + "learning_rate": 7.795675121835099e-06, + "loss": 12.423, + "step": 190010 + }, + { + "epoch": 0.3838524222578651, + "grad_norm": 535.0386962890625, + "learning_rate": 7.795385712690678e-06, + "loss": 19.0475, + "step": 190020 + }, + { + "epoch": 0.3838726228905489, + "grad_norm": 294.6451721191406, + "learning_rate": 7.795096289921888e-06, + "loss": 40.8549, + "step": 190030 + }, + { + "epoch": 0.38389282352323273, + "grad_norm": 517.9992065429688, + "learning_rate": 7.794806853530139e-06, + "loss": 28.8375, + "step": 190040 + }, + { + "epoch": 0.38391302415591655, + "grad_norm": 400.79620361328125, + "learning_rate": 7.79451740351684e-06, + "loss": 26.9044, + "step": 190050 + }, + { + "epoch": 0.38393322478860037, + "grad_norm": 626.6098022460938, + "learning_rate": 7.7942279398834e-06, + "loss": 31.716, + "step": 190060 + }, + { + "epoch": 0.3839534254212842, + "grad_norm": 429.52264404296875, + "learning_rate": 7.793938462631233e-06, + "loss": 23.1419, + "step": 190070 + }, + { + "epoch": 0.383973626053968, + "grad_norm": 595.3565063476562, + "learning_rate": 7.79364897176175e-06, + "loss": 25.5712, + "step": 190080 + }, + { + "epoch": 0.38399382668665183, + "grad_norm": 801.8369750976562, + "learning_rate": 7.79335946727636e-06, + "loss": 26.2808, + "step": 190090 + }, + { + "epoch": 0.38401402731933565, + "grad_norm": 566.8032836914062, + "learning_rate": 7.793069949176474e-06, + "loss": 19.8818, + "step": 190100 + }, + { + "epoch": 0.38403422795201947, + "grad_norm": 537.2616577148438, + "learning_rate": 7.792780417463505e-06, + "loss": 33.2724, + "step": 190110 + }, + { + "epoch": 0.3840544285847033, + "grad_norm": 444.0425109863281, + "learning_rate": 7.792490872138861e-06, + "loss": 30.7525, + "step": 190120 + }, + { + "epoch": 0.3840746292173871, + "grad_norm": 251.57086181640625, + "learning_rate": 7.792201313203957e-06, + "loss": 24.0304, + "step": 190130 + }, + { + "epoch": 0.38409482985007093, + "grad_norm": 126.93512725830078, + "learning_rate": 7.791911740660203e-06, + "loss": 17.5722, + "step": 190140 + }, + { + "epoch": 0.3841150304827547, + "grad_norm": 23.59386444091797, + "learning_rate": 7.791622154509008e-06, + "loss": 25.2037, + "step": 190150 + }, + { + "epoch": 0.3841352311154385, + "grad_norm": 170.85472106933594, + "learning_rate": 7.791332554751784e-06, + "loss": 12.2092, + "step": 190160 + }, + { + "epoch": 0.38415543174812233, + "grad_norm": 418.3758239746094, + "learning_rate": 7.791042941389948e-06, + "loss": 15.9761, + "step": 190170 + }, + { + "epoch": 0.38417563238080615, + "grad_norm": 331.4839172363281, + "learning_rate": 7.790753314424903e-06, + "loss": 30.2002, + "step": 190180 + }, + { + "epoch": 0.38419583301349, + "grad_norm": 347.7175598144531, + "learning_rate": 7.790463673858069e-06, + "loss": 31.4372, + "step": 190190 + }, + { + "epoch": 0.3842160336461738, + "grad_norm": 256.3693542480469, + "learning_rate": 7.79017401969085e-06, + "loss": 16.3942, + "step": 190200 + }, + { + "epoch": 0.3842362342788576, + "grad_norm": 512.9190063476562, + "learning_rate": 7.789884351924662e-06, + "loss": 23.2703, + "step": 190210 + }, + { + "epoch": 0.38425643491154143, + "grad_norm": 211.8291778564453, + "learning_rate": 7.789594670560917e-06, + "loss": 26.066, + "step": 190220 + }, + { + "epoch": 0.38427663554422525, + "grad_norm": 1608.3006591796875, + "learning_rate": 7.789304975601025e-06, + "loss": 23.6601, + "step": 190230 + }, + { + "epoch": 0.3842968361769091, + "grad_norm": 259.23809814453125, + "learning_rate": 7.789015267046399e-06, + "loss": 9.3303, + "step": 190240 + }, + { + "epoch": 0.3843170368095929, + "grad_norm": 403.8951110839844, + "learning_rate": 7.788725544898452e-06, + "loss": 22.6142, + "step": 190250 + }, + { + "epoch": 0.3843372374422767, + "grad_norm": 600.2481689453125, + "learning_rate": 7.788435809158593e-06, + "loss": 31.1368, + "step": 190260 + }, + { + "epoch": 0.38435743807496053, + "grad_norm": 284.01580810546875, + "learning_rate": 7.788146059828238e-06, + "loss": 26.3911, + "step": 190270 + }, + { + "epoch": 0.3843776387076443, + "grad_norm": 484.2990417480469, + "learning_rate": 7.787856296908795e-06, + "loss": 25.2612, + "step": 190280 + }, + { + "epoch": 0.3843978393403281, + "grad_norm": 481.3173522949219, + "learning_rate": 7.787566520401681e-06, + "loss": 53.1752, + "step": 190290 + }, + { + "epoch": 0.38441803997301194, + "grad_norm": 470.0443115234375, + "learning_rate": 7.787276730308304e-06, + "loss": 32.446, + "step": 190300 + }, + { + "epoch": 0.38443824060569576, + "grad_norm": 302.90643310546875, + "learning_rate": 7.786986926630079e-06, + "loss": 26.6058, + "step": 190310 + }, + { + "epoch": 0.3844584412383796, + "grad_norm": 445.5045166015625, + "learning_rate": 7.786697109368418e-06, + "loss": 18.3555, + "step": 190320 + }, + { + "epoch": 0.3844786418710634, + "grad_norm": 165.96913146972656, + "learning_rate": 7.786407278524733e-06, + "loss": 15.2357, + "step": 190330 + }, + { + "epoch": 0.3844988425037472, + "grad_norm": 256.4248046875, + "learning_rate": 7.786117434100438e-06, + "loss": 13.3223, + "step": 190340 + }, + { + "epoch": 0.38451904313643104, + "grad_norm": 491.4432067871094, + "learning_rate": 7.785827576096943e-06, + "loss": 16.5863, + "step": 190350 + }, + { + "epoch": 0.38453924376911486, + "grad_norm": 549.3370971679688, + "learning_rate": 7.785537704515662e-06, + "loss": 11.8801, + "step": 190360 + }, + { + "epoch": 0.3845594444017987, + "grad_norm": 1140.859619140625, + "learning_rate": 7.785247819358009e-06, + "loss": 26.7579, + "step": 190370 + }, + { + "epoch": 0.3845796450344825, + "grad_norm": 249.52108764648438, + "learning_rate": 7.784957920625396e-06, + "loss": 40.0003, + "step": 190380 + }, + { + "epoch": 0.3845998456671663, + "grad_norm": 897.1212768554688, + "learning_rate": 7.784668008319235e-06, + "loss": 20.8859, + "step": 190390 + }, + { + "epoch": 0.38462004629985014, + "grad_norm": 297.2471618652344, + "learning_rate": 7.78437808244094e-06, + "loss": 24.5372, + "step": 190400 + }, + { + "epoch": 0.3846402469325339, + "grad_norm": 182.60113525390625, + "learning_rate": 7.784088142991926e-06, + "loss": 32.806, + "step": 190410 + }, + { + "epoch": 0.3846604475652177, + "grad_norm": 286.5628967285156, + "learning_rate": 7.783798189973601e-06, + "loss": 15.1996, + "step": 190420 + }, + { + "epoch": 0.38468064819790154, + "grad_norm": 292.6827392578125, + "learning_rate": 7.783508223387384e-06, + "loss": 8.7446, + "step": 190430 + }, + { + "epoch": 0.38470084883058536, + "grad_norm": 1819.3795166015625, + "learning_rate": 7.783218243234684e-06, + "loss": 17.3953, + "step": 190440 + }, + { + "epoch": 0.3847210494632692, + "grad_norm": 210.9979248046875, + "learning_rate": 7.782928249516915e-06, + "loss": 15.8199, + "step": 190450 + }, + { + "epoch": 0.384741250095953, + "grad_norm": 94.75699615478516, + "learning_rate": 7.782638242235493e-06, + "loss": 18.1524, + "step": 190460 + }, + { + "epoch": 0.3847614507286368, + "grad_norm": 125.23542022705078, + "learning_rate": 7.782348221391828e-06, + "loss": 18.0945, + "step": 190470 + }, + { + "epoch": 0.38478165136132064, + "grad_norm": 614.4030151367188, + "learning_rate": 7.782058186987337e-06, + "loss": 16.2525, + "step": 190480 + }, + { + "epoch": 0.38480185199400446, + "grad_norm": 588.0127563476562, + "learning_rate": 7.781768139023431e-06, + "loss": 18.1065, + "step": 190490 + }, + { + "epoch": 0.3848220526266883, + "grad_norm": 94.29621124267578, + "learning_rate": 7.781478077501526e-06, + "loss": 25.8049, + "step": 190500 + }, + { + "epoch": 0.3848422532593721, + "grad_norm": 663.5479736328125, + "learning_rate": 7.78118800242303e-06, + "loss": 17.9266, + "step": 190510 + }, + { + "epoch": 0.3848624538920559, + "grad_norm": 362.5411071777344, + "learning_rate": 7.780897913789364e-06, + "loss": 15.5323, + "step": 190520 + }, + { + "epoch": 0.38488265452473974, + "grad_norm": 860.7693481445312, + "learning_rate": 7.780607811601939e-06, + "loss": 31.4978, + "step": 190530 + }, + { + "epoch": 0.3849028551574235, + "grad_norm": 536.4878540039062, + "learning_rate": 7.78031769586217e-06, + "loss": 33.0594, + "step": 190540 + }, + { + "epoch": 0.3849230557901073, + "grad_norm": 372.718994140625, + "learning_rate": 7.780027566571467e-06, + "loss": 18.7467, + "step": 190550 + }, + { + "epoch": 0.38494325642279115, + "grad_norm": 238.12620544433594, + "learning_rate": 7.779737423731248e-06, + "loss": 14.4867, + "step": 190560 + }, + { + "epoch": 0.38496345705547497, + "grad_norm": 751.9949340820312, + "learning_rate": 7.779447267342926e-06, + "loss": 27.8818, + "step": 190570 + }, + { + "epoch": 0.3849836576881588, + "grad_norm": 244.15658569335938, + "learning_rate": 7.779157097407914e-06, + "loss": 22.7166, + "step": 190580 + }, + { + "epoch": 0.3850038583208426, + "grad_norm": 751.5225219726562, + "learning_rate": 7.77886691392763e-06, + "loss": 17.3041, + "step": 190590 + }, + { + "epoch": 0.3850240589535264, + "grad_norm": 578.5184936523438, + "learning_rate": 7.778576716903484e-06, + "loss": 20.246, + "step": 190600 + }, + { + "epoch": 0.38504425958621025, + "grad_norm": 264.69122314453125, + "learning_rate": 7.778286506336892e-06, + "loss": 12.2246, + "step": 190610 + }, + { + "epoch": 0.38506446021889407, + "grad_norm": 356.569091796875, + "learning_rate": 7.777996282229267e-06, + "loss": 13.9804, + "step": 190620 + }, + { + "epoch": 0.3850846608515779, + "grad_norm": 850.6675415039062, + "learning_rate": 7.777706044582027e-06, + "loss": 24.7217, + "step": 190630 + }, + { + "epoch": 0.3851048614842617, + "grad_norm": 2920.63818359375, + "learning_rate": 7.777415793396585e-06, + "loss": 35.0636, + "step": 190640 + }, + { + "epoch": 0.3851250621169455, + "grad_norm": 1809.05517578125, + "learning_rate": 7.777125528674356e-06, + "loss": 19.7704, + "step": 190650 + }, + { + "epoch": 0.3851452627496293, + "grad_norm": 762.5745239257812, + "learning_rate": 7.776835250416752e-06, + "loss": 36.9752, + "step": 190660 + }, + { + "epoch": 0.3851654633823131, + "grad_norm": 122.67737579345703, + "learning_rate": 7.776544958625189e-06, + "loss": 21.3244, + "step": 190670 + }, + { + "epoch": 0.38518566401499693, + "grad_norm": 805.9771728515625, + "learning_rate": 7.776254653301086e-06, + "loss": 23.1274, + "step": 190680 + }, + { + "epoch": 0.38520586464768075, + "grad_norm": 739.9912719726562, + "learning_rate": 7.775964334445851e-06, + "loss": 28.9868, + "step": 190690 + }, + { + "epoch": 0.38522606528036457, + "grad_norm": 421.4423828125, + "learning_rate": 7.775674002060905e-06, + "loss": 27.7118, + "step": 190700 + }, + { + "epoch": 0.3852462659130484, + "grad_norm": 266.5092468261719, + "learning_rate": 7.775383656147659e-06, + "loss": 12.4261, + "step": 190710 + }, + { + "epoch": 0.3852664665457322, + "grad_norm": 389.5838928222656, + "learning_rate": 7.77509329670753e-06, + "loss": 17.9006, + "step": 190720 + }, + { + "epoch": 0.38528666717841603, + "grad_norm": 388.9902648925781, + "learning_rate": 7.774802923741936e-06, + "loss": 21.794, + "step": 190730 + }, + { + "epoch": 0.38530686781109985, + "grad_norm": 165.98974609375, + "learning_rate": 7.774512537252284e-06, + "loss": 25.8932, + "step": 190740 + }, + { + "epoch": 0.38532706844378367, + "grad_norm": 566.0869140625, + "learning_rate": 7.774222137239998e-06, + "loss": 13.8304, + "step": 190750 + }, + { + "epoch": 0.3853472690764675, + "grad_norm": 616.65185546875, + "learning_rate": 7.773931723706487e-06, + "loss": 17.8071, + "step": 190760 + }, + { + "epoch": 0.3853674697091513, + "grad_norm": 462.8968811035156, + "learning_rate": 7.773641296653171e-06, + "loss": 24.443, + "step": 190770 + }, + { + "epoch": 0.38538767034183513, + "grad_norm": 488.7454833984375, + "learning_rate": 7.773350856081464e-06, + "loss": 22.0167, + "step": 190780 + }, + { + "epoch": 0.3854078709745189, + "grad_norm": 218.26710510253906, + "learning_rate": 7.773060401992781e-06, + "loss": 15.0263, + "step": 190790 + }, + { + "epoch": 0.3854280716072027, + "grad_norm": 437.1186828613281, + "learning_rate": 7.772769934388537e-06, + "loss": 15.2703, + "step": 190800 + }, + { + "epoch": 0.38544827223988654, + "grad_norm": 188.64244079589844, + "learning_rate": 7.772479453270149e-06, + "loss": 16.9342, + "step": 190810 + }, + { + "epoch": 0.38546847287257036, + "grad_norm": 256.7730712890625, + "learning_rate": 7.772188958639034e-06, + "loss": 16.2013, + "step": 190820 + }, + { + "epoch": 0.3854886735052542, + "grad_norm": 299.796142578125, + "learning_rate": 7.771898450496605e-06, + "loss": 23.0266, + "step": 190830 + }, + { + "epoch": 0.385508874137938, + "grad_norm": 746.550048828125, + "learning_rate": 7.771607928844278e-06, + "loss": 17.1555, + "step": 190840 + }, + { + "epoch": 0.3855290747706218, + "grad_norm": 209.331298828125, + "learning_rate": 7.771317393683471e-06, + "loss": 18.055, + "step": 190850 + }, + { + "epoch": 0.38554927540330564, + "grad_norm": 918.1318969726562, + "learning_rate": 7.7710268450156e-06, + "loss": 22.2411, + "step": 190860 + }, + { + "epoch": 0.38556947603598946, + "grad_norm": 396.0271911621094, + "learning_rate": 7.770736282842079e-06, + "loss": 29.7558, + "step": 190870 + }, + { + "epoch": 0.3855896766686733, + "grad_norm": 543.81494140625, + "learning_rate": 7.770445707164325e-06, + "loss": 22.1566, + "step": 190880 + }, + { + "epoch": 0.3856098773013571, + "grad_norm": 231.0266876220703, + "learning_rate": 7.770155117983757e-06, + "loss": 15.992, + "step": 190890 + }, + { + "epoch": 0.3856300779340409, + "grad_norm": 236.5028076171875, + "learning_rate": 7.769864515301787e-06, + "loss": 18.8548, + "step": 190900 + }, + { + "epoch": 0.38565027856672474, + "grad_norm": 553.9424438476562, + "learning_rate": 7.769573899119834e-06, + "loss": 32.5234, + "step": 190910 + }, + { + "epoch": 0.3856704791994085, + "grad_norm": 217.63796997070312, + "learning_rate": 7.769283269439314e-06, + "loss": 17.9023, + "step": 190920 + }, + { + "epoch": 0.3856906798320923, + "grad_norm": 265.1778564453125, + "learning_rate": 7.768992626261642e-06, + "loss": 20.8228, + "step": 190930 + }, + { + "epoch": 0.38571088046477614, + "grad_norm": 330.60870361328125, + "learning_rate": 7.768701969588237e-06, + "loss": 23.952, + "step": 190940 + }, + { + "epoch": 0.38573108109745996, + "grad_norm": 64.96464538574219, + "learning_rate": 7.768411299420513e-06, + "loss": 14.4169, + "step": 190950 + }, + { + "epoch": 0.3857512817301438, + "grad_norm": 468.0285339355469, + "learning_rate": 7.76812061575989e-06, + "loss": 20.3455, + "step": 190960 + }, + { + "epoch": 0.3857714823628276, + "grad_norm": 251.83633422851562, + "learning_rate": 7.767829918607782e-06, + "loss": 15.6292, + "step": 190970 + }, + { + "epoch": 0.3857916829955114, + "grad_norm": 352.02020263671875, + "learning_rate": 7.767539207965606e-06, + "loss": 20.8917, + "step": 190980 + }, + { + "epoch": 0.38581188362819524, + "grad_norm": 202.3304443359375, + "learning_rate": 7.767248483834781e-06, + "loss": 7.3944, + "step": 190990 + }, + { + "epoch": 0.38583208426087906, + "grad_norm": 782.37939453125, + "learning_rate": 7.76695774621672e-06, + "loss": 21.0491, + "step": 191000 + }, + { + "epoch": 0.3858522848935629, + "grad_norm": 323.6791076660156, + "learning_rate": 7.766666995112846e-06, + "loss": 22.5071, + "step": 191010 + }, + { + "epoch": 0.3858724855262467, + "grad_norm": 381.05926513671875, + "learning_rate": 7.76637623052457e-06, + "loss": 20.2027, + "step": 191020 + }, + { + "epoch": 0.3858926861589305, + "grad_norm": 833.758056640625, + "learning_rate": 7.766085452453312e-06, + "loss": 16.7291, + "step": 191030 + }, + { + "epoch": 0.38591288679161434, + "grad_norm": 652.1970825195312, + "learning_rate": 7.765794660900489e-06, + "loss": 25.2256, + "step": 191040 + }, + { + "epoch": 0.3859330874242981, + "grad_norm": 328.4167175292969, + "learning_rate": 7.76550385586752e-06, + "loss": 11.8474, + "step": 191050 + }, + { + "epoch": 0.3859532880569819, + "grad_norm": 767.3618774414062, + "learning_rate": 7.76521303735582e-06, + "loss": 26.2798, + "step": 191060 + }, + { + "epoch": 0.38597348868966574, + "grad_norm": 994.3623657226562, + "learning_rate": 7.764922205366807e-06, + "loss": 26.8572, + "step": 191070 + }, + { + "epoch": 0.38599368932234956, + "grad_norm": 280.192626953125, + "learning_rate": 7.764631359901897e-06, + "loss": 24.9527, + "step": 191080 + }, + { + "epoch": 0.3860138899550334, + "grad_norm": 405.3322448730469, + "learning_rate": 7.764340500962511e-06, + "loss": 29.9454, + "step": 191090 + }, + { + "epoch": 0.3860340905877172, + "grad_norm": 217.43472290039062, + "learning_rate": 7.764049628550063e-06, + "loss": 24.0357, + "step": 191100 + }, + { + "epoch": 0.386054291220401, + "grad_norm": 278.47412109375, + "learning_rate": 7.763758742665973e-06, + "loss": 32.7479, + "step": 191110 + }, + { + "epoch": 0.38607449185308484, + "grad_norm": 332.62255859375, + "learning_rate": 7.763467843311658e-06, + "loss": 26.9294, + "step": 191120 + }, + { + "epoch": 0.38609469248576866, + "grad_norm": 353.11480712890625, + "learning_rate": 7.763176930488537e-06, + "loss": 25.5999, + "step": 191130 + }, + { + "epoch": 0.3861148931184525, + "grad_norm": 623.3184204101562, + "learning_rate": 7.762886004198024e-06, + "loss": 17.434, + "step": 191140 + }, + { + "epoch": 0.3861350937511363, + "grad_norm": 525.96923828125, + "learning_rate": 7.762595064441542e-06, + "loss": 16.3577, + "step": 191150 + }, + { + "epoch": 0.3861552943838201, + "grad_norm": 447.5262145996094, + "learning_rate": 7.762304111220506e-06, + "loss": 15.5832, + "step": 191160 + }, + { + "epoch": 0.38617549501650394, + "grad_norm": 373.88299560546875, + "learning_rate": 7.762013144536337e-06, + "loss": 30.258, + "step": 191170 + }, + { + "epoch": 0.3861956956491877, + "grad_norm": 342.3337707519531, + "learning_rate": 7.761722164390448e-06, + "loss": 16.9355, + "step": 191180 + }, + { + "epoch": 0.38621589628187153, + "grad_norm": 916.4569091796875, + "learning_rate": 7.761431170784261e-06, + "loss": 46.6152, + "step": 191190 + }, + { + "epoch": 0.38623609691455535, + "grad_norm": 630.3865356445312, + "learning_rate": 7.761140163719194e-06, + "loss": 19.4969, + "step": 191200 + }, + { + "epoch": 0.38625629754723917, + "grad_norm": 522.5431518554688, + "learning_rate": 7.760849143196664e-06, + "loss": 15.8899, + "step": 191210 + }, + { + "epoch": 0.386276498179923, + "grad_norm": 128.63572692871094, + "learning_rate": 7.76055810921809e-06, + "loss": 17.217, + "step": 191220 + }, + { + "epoch": 0.3862966988126068, + "grad_norm": 1020.7322387695312, + "learning_rate": 7.760267061784891e-06, + "loss": 25.6774, + "step": 191230 + }, + { + "epoch": 0.38631689944529063, + "grad_norm": 330.7348937988281, + "learning_rate": 7.759976000898486e-06, + "loss": 19.0391, + "step": 191240 + }, + { + "epoch": 0.38633710007797445, + "grad_norm": 156.26284790039062, + "learning_rate": 7.759684926560292e-06, + "loss": 14.8975, + "step": 191250 + }, + { + "epoch": 0.38635730071065827, + "grad_norm": 406.6961975097656, + "learning_rate": 7.759393838771728e-06, + "loss": 18.1821, + "step": 191260 + }, + { + "epoch": 0.3863775013433421, + "grad_norm": 175.15341186523438, + "learning_rate": 7.759102737534214e-06, + "loss": 27.0331, + "step": 191270 + }, + { + "epoch": 0.3863977019760259, + "grad_norm": 535.4419555664062, + "learning_rate": 7.758811622849167e-06, + "loss": 20.4541, + "step": 191280 + }, + { + "epoch": 0.38641790260870973, + "grad_norm": 560.9713134765625, + "learning_rate": 7.758520494718006e-06, + "loss": 27.5345, + "step": 191290 + }, + { + "epoch": 0.3864381032413935, + "grad_norm": 104.56470489501953, + "learning_rate": 7.758229353142153e-06, + "loss": 18.1173, + "step": 191300 + }, + { + "epoch": 0.3864583038740773, + "grad_norm": 896.4780883789062, + "learning_rate": 7.757938198123024e-06, + "loss": 17.6291, + "step": 191310 + }, + { + "epoch": 0.38647850450676113, + "grad_norm": 774.9434204101562, + "learning_rate": 7.757647029662037e-06, + "loss": 16.2023, + "step": 191320 + }, + { + "epoch": 0.38649870513944495, + "grad_norm": 441.3625793457031, + "learning_rate": 7.757355847760614e-06, + "loss": 13.2716, + "step": 191330 + }, + { + "epoch": 0.3865189057721288, + "grad_norm": 192.32289123535156, + "learning_rate": 7.757064652420172e-06, + "loss": 37.5933, + "step": 191340 + }, + { + "epoch": 0.3865391064048126, + "grad_norm": 221.5293426513672, + "learning_rate": 7.756773443642132e-06, + "loss": 58.5677, + "step": 191350 + }, + { + "epoch": 0.3865593070374964, + "grad_norm": 188.2999267578125, + "learning_rate": 7.756482221427914e-06, + "loss": 42.004, + "step": 191360 + }, + { + "epoch": 0.38657950767018023, + "grad_norm": 148.3327178955078, + "learning_rate": 7.756190985778933e-06, + "loss": 23.1077, + "step": 191370 + }, + { + "epoch": 0.38659970830286405, + "grad_norm": 871.6217041015625, + "learning_rate": 7.755899736696613e-06, + "loss": 24.2541, + "step": 191380 + }, + { + "epoch": 0.3866199089355479, + "grad_norm": 551.8317260742188, + "learning_rate": 7.755608474182372e-06, + "loss": 28.5641, + "step": 191390 + }, + { + "epoch": 0.3866401095682317, + "grad_norm": 79.60022735595703, + "learning_rate": 7.755317198237631e-06, + "loss": 23.9328, + "step": 191400 + }, + { + "epoch": 0.3866603102009155, + "grad_norm": 776.9974365234375, + "learning_rate": 7.755025908863807e-06, + "loss": 33.3793, + "step": 191410 + }, + { + "epoch": 0.38668051083359933, + "grad_norm": 207.62042236328125, + "learning_rate": 7.75473460606232e-06, + "loss": 11.9068, + "step": 191420 + }, + { + "epoch": 0.3867007114662831, + "grad_norm": 1.0078130960464478, + "learning_rate": 7.75444328983459e-06, + "loss": 18.8599, + "step": 191430 + }, + { + "epoch": 0.3867209120989669, + "grad_norm": 428.4453430175781, + "learning_rate": 7.75415196018204e-06, + "loss": 37.7331, + "step": 191440 + }, + { + "epoch": 0.38674111273165074, + "grad_norm": 509.4866638183594, + "learning_rate": 7.753860617106085e-06, + "loss": 19.8049, + "step": 191450 + }, + { + "epoch": 0.38676131336433456, + "grad_norm": 248.05801391601562, + "learning_rate": 7.75356926060815e-06, + "loss": 23.2241, + "step": 191460 + }, + { + "epoch": 0.3867815139970184, + "grad_norm": 185.79217529296875, + "learning_rate": 7.75327789068965e-06, + "loss": 23.705, + "step": 191470 + }, + { + "epoch": 0.3868017146297022, + "grad_norm": 459.7008972167969, + "learning_rate": 7.752986507352009e-06, + "loss": 21.4752, + "step": 191480 + }, + { + "epoch": 0.386821915262386, + "grad_norm": 524.0685424804688, + "learning_rate": 7.752695110596644e-06, + "loss": 18.9021, + "step": 191490 + }, + { + "epoch": 0.38684211589506984, + "grad_norm": 545.9414672851562, + "learning_rate": 7.752403700424978e-06, + "loss": 22.1293, + "step": 191500 + }, + { + "epoch": 0.38686231652775366, + "grad_norm": 491.1232604980469, + "learning_rate": 7.75211227683843e-06, + "loss": 33.53, + "step": 191510 + }, + { + "epoch": 0.3868825171604375, + "grad_norm": 428.9862976074219, + "learning_rate": 7.751820839838423e-06, + "loss": 18.6401, + "step": 191520 + }, + { + "epoch": 0.3869027177931213, + "grad_norm": 39.37624740600586, + "learning_rate": 7.751529389426372e-06, + "loss": 12.3774, + "step": 191530 + }, + { + "epoch": 0.3869229184258051, + "grad_norm": 361.1414794921875, + "learning_rate": 7.7512379256037e-06, + "loss": 29.8175, + "step": 191540 + }, + { + "epoch": 0.38694311905848894, + "grad_norm": 444.081787109375, + "learning_rate": 7.75094644837183e-06, + "loss": 10.0405, + "step": 191550 + }, + { + "epoch": 0.3869633196911727, + "grad_norm": 337.3037414550781, + "learning_rate": 7.750654957732179e-06, + "loss": 15.4009, + "step": 191560 + }, + { + "epoch": 0.3869835203238565, + "grad_norm": 593.4365844726562, + "learning_rate": 7.75036345368617e-06, + "loss": 18.3733, + "step": 191570 + }, + { + "epoch": 0.38700372095654034, + "grad_norm": 437.93780517578125, + "learning_rate": 7.750071936235223e-06, + "loss": 23.0736, + "step": 191580 + }, + { + "epoch": 0.38702392158922416, + "grad_norm": 179.80548095703125, + "learning_rate": 7.74978040538076e-06, + "loss": 8.6473, + "step": 191590 + }, + { + "epoch": 0.387044122221908, + "grad_norm": 180.412353515625, + "learning_rate": 7.7494888611242e-06, + "loss": 21.2373, + "step": 191600 + }, + { + "epoch": 0.3870643228545918, + "grad_norm": 150.6357421875, + "learning_rate": 7.749197303466964e-06, + "loss": 23.9392, + "step": 191610 + }, + { + "epoch": 0.3870845234872756, + "grad_norm": 316.60296630859375, + "learning_rate": 7.748905732410475e-06, + "loss": 29.0779, + "step": 191620 + }, + { + "epoch": 0.38710472411995944, + "grad_norm": 552.203857421875, + "learning_rate": 7.748614147956153e-06, + "loss": 21.3639, + "step": 191630 + }, + { + "epoch": 0.38712492475264326, + "grad_norm": 399.39093017578125, + "learning_rate": 7.748322550105419e-06, + "loss": 16.7633, + "step": 191640 + }, + { + "epoch": 0.3871451253853271, + "grad_norm": 317.05401611328125, + "learning_rate": 7.748030938859692e-06, + "loss": 23.4956, + "step": 191650 + }, + { + "epoch": 0.3871653260180109, + "grad_norm": 502.89508056640625, + "learning_rate": 7.747739314220398e-06, + "loss": 14.9821, + "step": 191660 + }, + { + "epoch": 0.3871855266506947, + "grad_norm": 153.30264282226562, + "learning_rate": 7.747447676188955e-06, + "loss": 26.0995, + "step": 191670 + }, + { + "epoch": 0.38720572728337854, + "grad_norm": 213.2479248046875, + "learning_rate": 7.747156024766785e-06, + "loss": 33.2892, + "step": 191680 + }, + { + "epoch": 0.3872259279160623, + "grad_norm": 83.84458923339844, + "learning_rate": 7.74686435995531e-06, + "loss": 17.1863, + "step": 191690 + }, + { + "epoch": 0.3872461285487461, + "grad_norm": 280.7131042480469, + "learning_rate": 7.74657268175595e-06, + "loss": 18.4512, + "step": 191700 + }, + { + "epoch": 0.38726632918142995, + "grad_norm": 203.6442108154297, + "learning_rate": 7.74628099017013e-06, + "loss": 24.3327, + "step": 191710 + }, + { + "epoch": 0.38728652981411377, + "grad_norm": 260.60430908203125, + "learning_rate": 7.74598928519927e-06, + "loss": 25.9397, + "step": 191720 + }, + { + "epoch": 0.3873067304467976, + "grad_norm": 357.5342102050781, + "learning_rate": 7.745697566844788e-06, + "loss": 20.6397, + "step": 191730 + }, + { + "epoch": 0.3873269310794814, + "grad_norm": 65.1893539428711, + "learning_rate": 7.745405835108112e-06, + "loss": 22.6363, + "step": 191740 + }, + { + "epoch": 0.3873471317121652, + "grad_norm": 114.95198822021484, + "learning_rate": 7.74511408999066e-06, + "loss": 23.7369, + "step": 191750 + }, + { + "epoch": 0.38736733234484905, + "grad_norm": 304.2437438964844, + "learning_rate": 7.744822331493855e-06, + "loss": 11.8269, + "step": 191760 + }, + { + "epoch": 0.38738753297753287, + "grad_norm": 680.9169921875, + "learning_rate": 7.744530559619117e-06, + "loss": 29.2421, + "step": 191770 + }, + { + "epoch": 0.3874077336102167, + "grad_norm": 389.0510559082031, + "learning_rate": 7.744238774367873e-06, + "loss": 20.0267, + "step": 191780 + }, + { + "epoch": 0.3874279342429005, + "grad_norm": 356.3277587890625, + "learning_rate": 7.743946975741541e-06, + "loss": 16.5415, + "step": 191790 + }, + { + "epoch": 0.3874481348755843, + "grad_norm": 185.0585479736328, + "learning_rate": 7.743655163741544e-06, + "loss": 26.0596, + "step": 191800 + }, + { + "epoch": 0.3874683355082681, + "grad_norm": 188.16104125976562, + "learning_rate": 7.743363338369303e-06, + "loss": 28.3094, + "step": 191810 + }, + { + "epoch": 0.3874885361409519, + "grad_norm": 147.9081573486328, + "learning_rate": 7.743071499626244e-06, + "loss": 16.0463, + "step": 191820 + }, + { + "epoch": 0.38750873677363573, + "grad_norm": 448.7315673828125, + "learning_rate": 7.742779647513785e-06, + "loss": 20.7494, + "step": 191830 + }, + { + "epoch": 0.38752893740631955, + "grad_norm": 196.2622833251953, + "learning_rate": 7.742487782033352e-06, + "loss": 16.3189, + "step": 191840 + }, + { + "epoch": 0.38754913803900337, + "grad_norm": 307.7751159667969, + "learning_rate": 7.742195903186366e-06, + "loss": 13.5797, + "step": 191850 + }, + { + "epoch": 0.3875693386716872, + "grad_norm": 263.0933837890625, + "learning_rate": 7.74190401097425e-06, + "loss": 21.196, + "step": 191860 + }, + { + "epoch": 0.387589539304371, + "grad_norm": 455.3670654296875, + "learning_rate": 7.741612105398429e-06, + "loss": 24.6287, + "step": 191870 + }, + { + "epoch": 0.38760973993705483, + "grad_norm": 136.99728393554688, + "learning_rate": 7.74132018646032e-06, + "loss": 14.6892, + "step": 191880 + }, + { + "epoch": 0.38762994056973865, + "grad_norm": 242.41091918945312, + "learning_rate": 7.741028254161349e-06, + "loss": 13.3496, + "step": 191890 + }, + { + "epoch": 0.38765014120242247, + "grad_norm": 267.5481262207031, + "learning_rate": 7.740736308502939e-06, + "loss": 20.801, + "step": 191900 + }, + { + "epoch": 0.3876703418351063, + "grad_norm": 184.3376007080078, + "learning_rate": 7.740444349486512e-06, + "loss": 14.7279, + "step": 191910 + }, + { + "epoch": 0.3876905424677901, + "grad_norm": 638.7349243164062, + "learning_rate": 7.740152377113493e-06, + "loss": 20.4639, + "step": 191920 + }, + { + "epoch": 0.38771074310047393, + "grad_norm": 517.0067749023438, + "learning_rate": 7.739860391385303e-06, + "loss": 26.5182, + "step": 191930 + }, + { + "epoch": 0.3877309437331577, + "grad_norm": 703.1930541992188, + "learning_rate": 7.739568392303364e-06, + "loss": 38.8584, + "step": 191940 + }, + { + "epoch": 0.3877511443658415, + "grad_norm": 519.8761596679688, + "learning_rate": 7.739276379869105e-06, + "loss": 23.3888, + "step": 191950 + }, + { + "epoch": 0.38777134499852534, + "grad_norm": 296.1531677246094, + "learning_rate": 7.738984354083942e-06, + "loss": 41.9171, + "step": 191960 + }, + { + "epoch": 0.38779154563120916, + "grad_norm": 523.1737060546875, + "learning_rate": 7.738692314949304e-06, + "loss": 22.122, + "step": 191970 + }, + { + "epoch": 0.387811746263893, + "grad_norm": 373.5809326171875, + "learning_rate": 7.738400262466608e-06, + "loss": 15.7975, + "step": 191980 + }, + { + "epoch": 0.3878319468965768, + "grad_norm": 316.2525939941406, + "learning_rate": 7.738108196637284e-06, + "loss": 24.286, + "step": 191990 + }, + { + "epoch": 0.3878521475292606, + "grad_norm": 323.22698974609375, + "learning_rate": 7.737816117462752e-06, + "loss": 32.5908, + "step": 192000 + }, + { + "epoch": 0.38787234816194444, + "grad_norm": 59.11886215209961, + "learning_rate": 7.737524024944437e-06, + "loss": 26.2432, + "step": 192010 + }, + { + "epoch": 0.38789254879462826, + "grad_norm": 574.417724609375, + "learning_rate": 7.737231919083761e-06, + "loss": 37.8521, + "step": 192020 + }, + { + "epoch": 0.3879127494273121, + "grad_norm": 552.07568359375, + "learning_rate": 7.736939799882149e-06, + "loss": 17.1835, + "step": 192030 + }, + { + "epoch": 0.3879329500599959, + "grad_norm": 455.1221618652344, + "learning_rate": 7.736647667341025e-06, + "loss": 36.4382, + "step": 192040 + }, + { + "epoch": 0.3879531506926797, + "grad_norm": 910.0355834960938, + "learning_rate": 7.736355521461812e-06, + "loss": 34.3768, + "step": 192050 + }, + { + "epoch": 0.38797335132536354, + "grad_norm": 832.4783325195312, + "learning_rate": 7.736063362245931e-06, + "loss": 30.0665, + "step": 192060 + }, + { + "epoch": 0.3879935519580473, + "grad_norm": 690.0657348632812, + "learning_rate": 7.735771189694813e-06, + "loss": 19.8979, + "step": 192070 + }, + { + "epoch": 0.3880137525907311, + "grad_norm": 220.23095703125, + "learning_rate": 7.735479003809876e-06, + "loss": 10.865, + "step": 192080 + }, + { + "epoch": 0.38803395322341494, + "grad_norm": 172.29656982421875, + "learning_rate": 7.735186804592548e-06, + "loss": 9.8104, + "step": 192090 + }, + { + "epoch": 0.38805415385609876, + "grad_norm": 558.6802368164062, + "learning_rate": 7.734894592044249e-06, + "loss": 26.7397, + "step": 192100 + }, + { + "epoch": 0.3880743544887826, + "grad_norm": 519.433349609375, + "learning_rate": 7.734602366166406e-06, + "loss": 16.3344, + "step": 192110 + }, + { + "epoch": 0.3880945551214664, + "grad_norm": 250.96353149414062, + "learning_rate": 7.734310126960444e-06, + "loss": 26.8956, + "step": 192120 + }, + { + "epoch": 0.3881147557541502, + "grad_norm": 271.8106384277344, + "learning_rate": 7.734017874427786e-06, + "loss": 20.7124, + "step": 192130 + }, + { + "epoch": 0.38813495638683404, + "grad_norm": 8.314308166503906, + "learning_rate": 7.733725608569856e-06, + "loss": 14.2017, + "step": 192140 + }, + { + "epoch": 0.38815515701951786, + "grad_norm": 426.02459716796875, + "learning_rate": 7.733433329388079e-06, + "loss": 14.6203, + "step": 192150 + }, + { + "epoch": 0.3881753576522017, + "grad_norm": 328.6076965332031, + "learning_rate": 7.733141036883878e-06, + "loss": 14.6803, + "step": 192160 + }, + { + "epoch": 0.3881955582848855, + "grad_norm": 632.5792236328125, + "learning_rate": 7.73284873105868e-06, + "loss": 27.803, + "step": 192170 + }, + { + "epoch": 0.3882157589175693, + "grad_norm": 295.1548767089844, + "learning_rate": 7.73255641191391e-06, + "loss": 23.968, + "step": 192180 + }, + { + "epoch": 0.38823595955025314, + "grad_norm": 366.90863037109375, + "learning_rate": 7.73226407945099e-06, + "loss": 12.7493, + "step": 192190 + }, + { + "epoch": 0.3882561601829369, + "grad_norm": 515.6712646484375, + "learning_rate": 7.731971733671347e-06, + "loss": 24.4788, + "step": 192200 + }, + { + "epoch": 0.3882763608156207, + "grad_norm": 517.9892578125, + "learning_rate": 7.731679374576404e-06, + "loss": 14.1814, + "step": 192210 + }, + { + "epoch": 0.38829656144830454, + "grad_norm": 318.1728210449219, + "learning_rate": 7.731387002167587e-06, + "loss": 15.2578, + "step": 192220 + }, + { + "epoch": 0.38831676208098836, + "grad_norm": 997.310302734375, + "learning_rate": 7.731094616446323e-06, + "loss": 26.4868, + "step": 192230 + }, + { + "epoch": 0.3883369627136722, + "grad_norm": 754.043212890625, + "learning_rate": 7.730802217414034e-06, + "loss": 21.1662, + "step": 192240 + }, + { + "epoch": 0.388357163346356, + "grad_norm": 342.50286865234375, + "learning_rate": 7.730509805072146e-06, + "loss": 24.6172, + "step": 192250 + }, + { + "epoch": 0.3883773639790398, + "grad_norm": 380.1715393066406, + "learning_rate": 7.730217379422084e-06, + "loss": 36.2436, + "step": 192260 + }, + { + "epoch": 0.38839756461172364, + "grad_norm": 315.2763671875, + "learning_rate": 7.729924940465275e-06, + "loss": 17.2449, + "step": 192270 + }, + { + "epoch": 0.38841776524440746, + "grad_norm": 1045.7430419921875, + "learning_rate": 7.729632488203142e-06, + "loss": 40.8509, + "step": 192280 + }, + { + "epoch": 0.3884379658770913, + "grad_norm": 376.32586669921875, + "learning_rate": 7.729340022637111e-06, + "loss": 9.346, + "step": 192290 + }, + { + "epoch": 0.3884581665097751, + "grad_norm": 407.0836486816406, + "learning_rate": 7.729047543768608e-06, + "loss": 26.9397, + "step": 192300 + }, + { + "epoch": 0.3884783671424589, + "grad_norm": 1427.2691650390625, + "learning_rate": 7.72875505159906e-06, + "loss": 28.058, + "step": 192310 + }, + { + "epoch": 0.38849856777514274, + "grad_norm": 107.99658966064453, + "learning_rate": 7.728462546129888e-06, + "loss": 29.4344, + "step": 192320 + }, + { + "epoch": 0.3885187684078265, + "grad_norm": 236.956298828125, + "learning_rate": 7.728170027362523e-06, + "loss": 21.1595, + "step": 192330 + }, + { + "epoch": 0.38853896904051033, + "grad_norm": 213.91917419433594, + "learning_rate": 7.727877495298386e-06, + "loss": 36.8074, + "step": 192340 + }, + { + "epoch": 0.38855916967319415, + "grad_norm": 533.0302734375, + "learning_rate": 7.727584949938907e-06, + "loss": 22.6468, + "step": 192350 + }, + { + "epoch": 0.38857937030587797, + "grad_norm": 520.6922607421875, + "learning_rate": 7.727292391285507e-06, + "loss": 46.2784, + "step": 192360 + }, + { + "epoch": 0.3885995709385618, + "grad_norm": 232.45205688476562, + "learning_rate": 7.726999819339618e-06, + "loss": 40.3498, + "step": 192370 + }, + { + "epoch": 0.3886197715712456, + "grad_norm": 445.8552551269531, + "learning_rate": 7.72670723410266e-06, + "loss": 12.3434, + "step": 192380 + }, + { + "epoch": 0.38863997220392943, + "grad_norm": 425.1505126953125, + "learning_rate": 7.726414635576062e-06, + "loss": 22.3091, + "step": 192390 + }, + { + "epoch": 0.38866017283661325, + "grad_norm": 256.747802734375, + "learning_rate": 7.726122023761252e-06, + "loss": 18.3736, + "step": 192400 + }, + { + "epoch": 0.38868037346929707, + "grad_norm": 401.13604736328125, + "learning_rate": 7.72582939865965e-06, + "loss": 24.4971, + "step": 192410 + }, + { + "epoch": 0.3887005741019809, + "grad_norm": 253.91697692871094, + "learning_rate": 7.72553676027269e-06, + "loss": 17.021, + "step": 192420 + }, + { + "epoch": 0.3887207747346647, + "grad_norm": 109.07809448242188, + "learning_rate": 7.725244108601793e-06, + "loss": 34.0692, + "step": 192430 + }, + { + "epoch": 0.38874097536734853, + "grad_norm": 335.65625, + "learning_rate": 7.724951443648386e-06, + "loss": 17.4623, + "step": 192440 + }, + { + "epoch": 0.3887611760000323, + "grad_norm": 333.1289367675781, + "learning_rate": 7.724658765413897e-06, + "loss": 14.9412, + "step": 192450 + }, + { + "epoch": 0.3887813766327161, + "grad_norm": 62.65464782714844, + "learning_rate": 7.72436607389975e-06, + "loss": 28.4945, + "step": 192460 + }, + { + "epoch": 0.38880157726539993, + "grad_norm": 262.5960693359375, + "learning_rate": 7.724073369107376e-06, + "loss": 13.1065, + "step": 192470 + }, + { + "epoch": 0.38882177789808375, + "grad_norm": 345.6170959472656, + "learning_rate": 7.723780651038196e-06, + "loss": 31.3918, + "step": 192480 + }, + { + "epoch": 0.3888419785307676, + "grad_norm": 329.9227600097656, + "learning_rate": 7.723487919693642e-06, + "loss": 15.6708, + "step": 192490 + }, + { + "epoch": 0.3888621791634514, + "grad_norm": 561.7655639648438, + "learning_rate": 7.723195175075136e-06, + "loss": 21.4188, + "step": 192500 + }, + { + "epoch": 0.3888823797961352, + "grad_norm": 0.0, + "learning_rate": 7.722902417184109e-06, + "loss": 14.1593, + "step": 192510 + }, + { + "epoch": 0.38890258042881903, + "grad_norm": 409.5954284667969, + "learning_rate": 7.722609646021984e-06, + "loss": 22.6441, + "step": 192520 + }, + { + "epoch": 0.38892278106150285, + "grad_norm": 517.1251831054688, + "learning_rate": 7.72231686159019e-06, + "loss": 15.9057, + "step": 192530 + }, + { + "epoch": 0.3889429816941867, + "grad_norm": 144.67063903808594, + "learning_rate": 7.722024063890154e-06, + "loss": 14.5635, + "step": 192540 + }, + { + "epoch": 0.3889631823268705, + "grad_norm": 103.00260162353516, + "learning_rate": 7.721731252923305e-06, + "loss": 14.1731, + "step": 192550 + }, + { + "epoch": 0.3889833829595543, + "grad_norm": 426.76556396484375, + "learning_rate": 7.721438428691065e-06, + "loss": 27.5293, + "step": 192560 + }, + { + "epoch": 0.38900358359223813, + "grad_norm": 193.24110412597656, + "learning_rate": 7.721145591194865e-06, + "loss": 14.3015, + "step": 192570 + }, + { + "epoch": 0.3890237842249219, + "grad_norm": 259.59454345703125, + "learning_rate": 7.720852740436134e-06, + "loss": 16.3027, + "step": 192580 + }, + { + "epoch": 0.3890439848576057, + "grad_norm": 293.9356384277344, + "learning_rate": 7.720559876416293e-06, + "loss": 24.0709, + "step": 192590 + }, + { + "epoch": 0.38906418549028954, + "grad_norm": 293.2192687988281, + "learning_rate": 7.720266999136774e-06, + "loss": 25.922, + "step": 192600 + }, + { + "epoch": 0.38908438612297336, + "grad_norm": 728.6549072265625, + "learning_rate": 7.719974108599005e-06, + "loss": 24.7767, + "step": 192610 + }, + { + "epoch": 0.3891045867556572, + "grad_norm": 579.4147338867188, + "learning_rate": 7.719681204804413e-06, + "loss": 51.0215, + "step": 192620 + }, + { + "epoch": 0.389124787388341, + "grad_norm": 544.8616333007812, + "learning_rate": 7.71938828775442e-06, + "loss": 14.4531, + "step": 192630 + }, + { + "epoch": 0.3891449880210248, + "grad_norm": 413.74578857421875, + "learning_rate": 7.719095357450462e-06, + "loss": 20.1423, + "step": 192640 + }, + { + "epoch": 0.38916518865370864, + "grad_norm": 470.0211486816406, + "learning_rate": 7.718802413893963e-06, + "loss": 20.2963, + "step": 192650 + }, + { + "epoch": 0.38918538928639246, + "grad_norm": 65.1580810546875, + "learning_rate": 7.718509457086351e-06, + "loss": 22.7796, + "step": 192660 + }, + { + "epoch": 0.3892055899190763, + "grad_norm": 468.04290771484375, + "learning_rate": 7.718216487029051e-06, + "loss": 15.223, + "step": 192670 + }, + { + "epoch": 0.3892257905517601, + "grad_norm": 691.6110229492188, + "learning_rate": 7.717923503723496e-06, + "loss": 31.8396, + "step": 192680 + }, + { + "epoch": 0.3892459911844439, + "grad_norm": 285.8040771484375, + "learning_rate": 7.71763050717111e-06, + "loss": 20.0454, + "step": 192690 + }, + { + "epoch": 0.38926619181712774, + "grad_norm": 98.07317352294922, + "learning_rate": 7.717337497373324e-06, + "loss": 13.061, + "step": 192700 + }, + { + "epoch": 0.3892863924498115, + "grad_norm": 278.7805480957031, + "learning_rate": 7.717044474331565e-06, + "loss": 33.9224, + "step": 192710 + }, + { + "epoch": 0.3893065930824953, + "grad_norm": 951.0253295898438, + "learning_rate": 7.716751438047259e-06, + "loss": 18.3914, + "step": 192720 + }, + { + "epoch": 0.38932679371517914, + "grad_norm": 635.8330078125, + "learning_rate": 7.716458388521837e-06, + "loss": 40.7988, + "step": 192730 + }, + { + "epoch": 0.38934699434786296, + "grad_norm": 391.28814697265625, + "learning_rate": 7.716165325756727e-06, + "loss": 17.1659, + "step": 192740 + }, + { + "epoch": 0.3893671949805468, + "grad_norm": 255.3839569091797, + "learning_rate": 7.715872249753353e-06, + "loss": 13.6852, + "step": 192750 + }, + { + "epoch": 0.3893873956132306, + "grad_norm": 405.2212219238281, + "learning_rate": 7.715579160513152e-06, + "loss": 27.0415, + "step": 192760 + }, + { + "epoch": 0.3894075962459144, + "grad_norm": 38.79237747192383, + "learning_rate": 7.715286058037544e-06, + "loss": 20.1129, + "step": 192770 + }, + { + "epoch": 0.38942779687859824, + "grad_norm": 281.222412109375, + "learning_rate": 7.714992942327962e-06, + "loss": 17.8623, + "step": 192780 + }, + { + "epoch": 0.38944799751128206, + "grad_norm": 329.6753234863281, + "learning_rate": 7.714699813385834e-06, + "loss": 23.4179, + "step": 192790 + }, + { + "epoch": 0.3894681981439659, + "grad_norm": 270.1520080566406, + "learning_rate": 7.714406671212589e-06, + "loss": 39.082, + "step": 192800 + }, + { + "epoch": 0.3894883987766497, + "grad_norm": 484.9346923828125, + "learning_rate": 7.714113515809653e-06, + "loss": 15.3479, + "step": 192810 + }, + { + "epoch": 0.3895085994093335, + "grad_norm": 405.59246826171875, + "learning_rate": 7.71382034717846e-06, + "loss": 18.2914, + "step": 192820 + }, + { + "epoch": 0.38952880004201734, + "grad_norm": 92.56423950195312, + "learning_rate": 7.713527165320432e-06, + "loss": 26.7206, + "step": 192830 + }, + { + "epoch": 0.3895490006747011, + "grad_norm": 258.7493896484375, + "learning_rate": 7.713233970237004e-06, + "loss": 9.1603, + "step": 192840 + }, + { + "epoch": 0.3895692013073849, + "grad_norm": 52.760894775390625, + "learning_rate": 7.712940761929604e-06, + "loss": 15.5544, + "step": 192850 + }, + { + "epoch": 0.38958940194006875, + "grad_norm": 360.18975830078125, + "learning_rate": 7.712647540399658e-06, + "loss": 14.6412, + "step": 192860 + }, + { + "epoch": 0.38960960257275257, + "grad_norm": 476.0581359863281, + "learning_rate": 7.712354305648597e-06, + "loss": 14.0446, + "step": 192870 + }, + { + "epoch": 0.3896298032054364, + "grad_norm": 431.9054870605469, + "learning_rate": 7.71206105767785e-06, + "loss": 20.7469, + "step": 192880 + }, + { + "epoch": 0.3896500038381202, + "grad_norm": 445.0193786621094, + "learning_rate": 7.711767796488847e-06, + "loss": 13.5873, + "step": 192890 + }, + { + "epoch": 0.389670204470804, + "grad_norm": 973.5383911132812, + "learning_rate": 7.711474522083015e-06, + "loss": 25.6614, + "step": 192900 + }, + { + "epoch": 0.38969040510348785, + "grad_norm": 590.4380493164062, + "learning_rate": 7.711181234461786e-06, + "loss": 12.0582, + "step": 192910 + }, + { + "epoch": 0.38971060573617167, + "grad_norm": 314.5450744628906, + "learning_rate": 7.71088793362659e-06, + "loss": 19.7088, + "step": 192920 + }, + { + "epoch": 0.3897308063688555, + "grad_norm": 346.41595458984375, + "learning_rate": 7.710594619578853e-06, + "loss": 15.1289, + "step": 192930 + }, + { + "epoch": 0.3897510070015393, + "grad_norm": 643.9741821289062, + "learning_rate": 7.710301292320007e-06, + "loss": 18.48, + "step": 192940 + }, + { + "epoch": 0.3897712076342231, + "grad_norm": 338.6170654296875, + "learning_rate": 7.710007951851482e-06, + "loss": 13.5522, + "step": 192950 + }, + { + "epoch": 0.38979140826690695, + "grad_norm": 231.61537170410156, + "learning_rate": 7.709714598174706e-06, + "loss": 10.4692, + "step": 192960 + }, + { + "epoch": 0.3898116088995907, + "grad_norm": 497.3166809082031, + "learning_rate": 7.709421231291112e-06, + "loss": 28.2675, + "step": 192970 + }, + { + "epoch": 0.38983180953227453, + "grad_norm": 1395.8343505859375, + "learning_rate": 7.709127851202126e-06, + "loss": 25.3676, + "step": 192980 + }, + { + "epoch": 0.38985201016495835, + "grad_norm": 246.81260681152344, + "learning_rate": 7.708834457909179e-06, + "loss": 15.7524, + "step": 192990 + }, + { + "epoch": 0.38987221079764217, + "grad_norm": 610.1190185546875, + "learning_rate": 7.7085410514137e-06, + "loss": 17.1402, + "step": 193000 + }, + { + "epoch": 0.389892411430326, + "grad_norm": 19.861980438232422, + "learning_rate": 7.708247631717122e-06, + "loss": 19.2926, + "step": 193010 + }, + { + "epoch": 0.3899126120630098, + "grad_norm": 415.28173828125, + "learning_rate": 7.707954198820873e-06, + "loss": 15.6091, + "step": 193020 + }, + { + "epoch": 0.38993281269569363, + "grad_norm": 84.7052001953125, + "learning_rate": 7.707660752726384e-06, + "loss": 16.5228, + "step": 193030 + }, + { + "epoch": 0.38995301332837745, + "grad_norm": 52.818817138671875, + "learning_rate": 7.707367293435086e-06, + "loss": 26.4588, + "step": 193040 + }, + { + "epoch": 0.38997321396106127, + "grad_norm": 332.837890625, + "learning_rate": 7.707073820948407e-06, + "loss": 16.7823, + "step": 193050 + }, + { + "epoch": 0.3899934145937451, + "grad_norm": 505.16143798828125, + "learning_rate": 7.706780335267778e-06, + "loss": 28.5368, + "step": 193060 + }, + { + "epoch": 0.3900136152264289, + "grad_norm": 519.2085571289062, + "learning_rate": 7.706486836394632e-06, + "loss": 29.9999, + "step": 193070 + }, + { + "epoch": 0.39003381585911273, + "grad_norm": 812.468017578125, + "learning_rate": 7.706193324330396e-06, + "loss": 32.646, + "step": 193080 + }, + { + "epoch": 0.3900540164917965, + "grad_norm": 126.26093292236328, + "learning_rate": 7.705899799076502e-06, + "loss": 16.4912, + "step": 193090 + }, + { + "epoch": 0.3900742171244803, + "grad_norm": 15.94903564453125, + "learning_rate": 7.70560626063438e-06, + "loss": 13.8317, + "step": 193100 + }, + { + "epoch": 0.39009441775716414, + "grad_norm": 410.4071960449219, + "learning_rate": 7.70531270900546e-06, + "loss": 22.1461, + "step": 193110 + }, + { + "epoch": 0.39011461838984796, + "grad_norm": 280.9446105957031, + "learning_rate": 7.705019144191178e-06, + "loss": 15.9059, + "step": 193120 + }, + { + "epoch": 0.3901348190225318, + "grad_norm": 121.53821563720703, + "learning_rate": 7.704725566192959e-06, + "loss": 9.9436, + "step": 193130 + }, + { + "epoch": 0.3901550196552156, + "grad_norm": 393.3848876953125, + "learning_rate": 7.704431975012234e-06, + "loss": 24.0284, + "step": 193140 + }, + { + "epoch": 0.3901752202878994, + "grad_norm": 205.39065551757812, + "learning_rate": 7.704138370650437e-06, + "loss": 20.3011, + "step": 193150 + }, + { + "epoch": 0.39019542092058324, + "grad_norm": 569.0725708007812, + "learning_rate": 7.703844753108997e-06, + "loss": 26.1612, + "step": 193160 + }, + { + "epoch": 0.39021562155326706, + "grad_norm": 1265.81396484375, + "learning_rate": 7.703551122389345e-06, + "loss": 21.4275, + "step": 193170 + }, + { + "epoch": 0.3902358221859509, + "grad_norm": 639.0967407226562, + "learning_rate": 7.703257478492915e-06, + "loss": 17.0219, + "step": 193180 + }, + { + "epoch": 0.3902560228186347, + "grad_norm": 619.8237915039062, + "learning_rate": 7.702963821421135e-06, + "loss": 19.2808, + "step": 193190 + }, + { + "epoch": 0.3902762234513185, + "grad_norm": 208.57553100585938, + "learning_rate": 7.702670151175435e-06, + "loss": 20.6654, + "step": 193200 + }, + { + "epoch": 0.39029642408400234, + "grad_norm": 26.127004623413086, + "learning_rate": 7.70237646775725e-06, + "loss": 38.2808, + "step": 193210 + }, + { + "epoch": 0.3903166247166861, + "grad_norm": 897.0552368164062, + "learning_rate": 7.70208277116801e-06, + "loss": 24.5054, + "step": 193220 + }, + { + "epoch": 0.3903368253493699, + "grad_norm": 0.0, + "learning_rate": 7.701789061409148e-06, + "loss": 25.8468, + "step": 193230 + }, + { + "epoch": 0.39035702598205374, + "grad_norm": 482.36175537109375, + "learning_rate": 7.701495338482093e-06, + "loss": 24.4538, + "step": 193240 + }, + { + "epoch": 0.39037722661473756, + "grad_norm": 197.87158203125, + "learning_rate": 7.701201602388276e-06, + "loss": 28.4927, + "step": 193250 + }, + { + "epoch": 0.3903974272474214, + "grad_norm": 449.4877014160156, + "learning_rate": 7.70090785312913e-06, + "loss": 25.7932, + "step": 193260 + }, + { + "epoch": 0.3904176278801052, + "grad_norm": 384.9527893066406, + "learning_rate": 7.700614090706087e-06, + "loss": 54.9316, + "step": 193270 + }, + { + "epoch": 0.390437828512789, + "grad_norm": 582.7141723632812, + "learning_rate": 7.70032031512058e-06, + "loss": 16.4824, + "step": 193280 + }, + { + "epoch": 0.39045802914547284, + "grad_norm": 291.9700622558594, + "learning_rate": 7.700026526374038e-06, + "loss": 18.14, + "step": 193290 + }, + { + "epoch": 0.39047822977815666, + "grad_norm": 342.890380859375, + "learning_rate": 7.699732724467894e-06, + "loss": 34.7699, + "step": 193300 + }, + { + "epoch": 0.3904984304108405, + "grad_norm": 469.6289367675781, + "learning_rate": 7.69943890940358e-06, + "loss": 19.6911, + "step": 193310 + }, + { + "epoch": 0.3905186310435243, + "grad_norm": 337.8787841796875, + "learning_rate": 7.699145081182528e-06, + "loss": 8.6771, + "step": 193320 + }, + { + "epoch": 0.3905388316762081, + "grad_norm": 711.6795043945312, + "learning_rate": 7.69885123980617e-06, + "loss": 25.2997, + "step": 193330 + }, + { + "epoch": 0.39055903230889194, + "grad_norm": 521.74169921875, + "learning_rate": 7.69855738527594e-06, + "loss": 25.3729, + "step": 193340 + }, + { + "epoch": 0.3905792329415757, + "grad_norm": 870.8306884765625, + "learning_rate": 7.698263517593268e-06, + "loss": 35.401, + "step": 193350 + }, + { + "epoch": 0.3905994335742595, + "grad_norm": 312.2770080566406, + "learning_rate": 7.697969636759586e-06, + "loss": 36.4694, + "step": 193360 + }, + { + "epoch": 0.39061963420694334, + "grad_norm": 318.42279052734375, + "learning_rate": 7.69767574277633e-06, + "loss": 20.2815, + "step": 193370 + }, + { + "epoch": 0.39063983483962716, + "grad_norm": 171.6785125732422, + "learning_rate": 7.697381835644926e-06, + "loss": 19.937, + "step": 193380 + }, + { + "epoch": 0.390660035472311, + "grad_norm": 838.3765258789062, + "learning_rate": 7.697087915366811e-06, + "loss": 27.686, + "step": 193390 + }, + { + "epoch": 0.3906802361049948, + "grad_norm": 327.0580139160156, + "learning_rate": 7.696793981943418e-06, + "loss": 19.0435, + "step": 193400 + }, + { + "epoch": 0.3907004367376786, + "grad_norm": 377.91900634765625, + "learning_rate": 7.696500035376177e-06, + "loss": 25.9461, + "step": 193410 + }, + { + "epoch": 0.39072063737036244, + "grad_norm": 189.01881408691406, + "learning_rate": 7.696206075666523e-06, + "loss": 23.5916, + "step": 193420 + }, + { + "epoch": 0.39074083800304626, + "grad_norm": 552.4807739257812, + "learning_rate": 7.695912102815886e-06, + "loss": 17.8163, + "step": 193430 + }, + { + "epoch": 0.3907610386357301, + "grad_norm": 327.3682861328125, + "learning_rate": 7.6956181168257e-06, + "loss": 22.9008, + "step": 193440 + }, + { + "epoch": 0.3907812392684139, + "grad_norm": 214.7379608154297, + "learning_rate": 7.695324117697401e-06, + "loss": 40.267, + "step": 193450 + }, + { + "epoch": 0.3908014399010977, + "grad_norm": 296.865234375, + "learning_rate": 7.695030105432417e-06, + "loss": 18.1039, + "step": 193460 + }, + { + "epoch": 0.39082164053378154, + "grad_norm": 265.7020263671875, + "learning_rate": 7.694736080032185e-06, + "loss": 14.8433, + "step": 193470 + }, + { + "epoch": 0.3908418411664653, + "grad_norm": 197.13731384277344, + "learning_rate": 7.694442041498133e-06, + "loss": 14.579, + "step": 193480 + }, + { + "epoch": 0.39086204179914913, + "grad_norm": 557.2015991210938, + "learning_rate": 7.6941479898317e-06, + "loss": 24.7952, + "step": 193490 + }, + { + "epoch": 0.39088224243183295, + "grad_norm": 260.6639099121094, + "learning_rate": 7.693853925034316e-06, + "loss": 29.1451, + "step": 193500 + }, + { + "epoch": 0.39090244306451677, + "grad_norm": 322.4444274902344, + "learning_rate": 7.693559847107415e-06, + "loss": 9.7132, + "step": 193510 + }, + { + "epoch": 0.3909226436972006, + "grad_norm": 284.7377624511719, + "learning_rate": 7.693265756052427e-06, + "loss": 23.6692, + "step": 193520 + }, + { + "epoch": 0.3909428443298844, + "grad_norm": 315.2529602050781, + "learning_rate": 7.692971651870793e-06, + "loss": 12.345, + "step": 193530 + }, + { + "epoch": 0.39096304496256823, + "grad_norm": 373.9523620605469, + "learning_rate": 7.692677534563939e-06, + "loss": 24.3102, + "step": 193540 + }, + { + "epoch": 0.39098324559525205, + "grad_norm": 697.5858154296875, + "learning_rate": 7.692383404133302e-06, + "loss": 25.1512, + "step": 193550 + }, + { + "epoch": 0.39100344622793587, + "grad_norm": 700.3854370117188, + "learning_rate": 7.692089260580315e-06, + "loss": 13.5812, + "step": 193560 + }, + { + "epoch": 0.3910236468606197, + "grad_norm": 254.58399963378906, + "learning_rate": 7.69179510390641e-06, + "loss": 17.3985, + "step": 193570 + }, + { + "epoch": 0.3910438474933035, + "grad_norm": 45.8987922668457, + "learning_rate": 7.691500934113022e-06, + "loss": 25.2974, + "step": 193580 + }, + { + "epoch": 0.39106404812598733, + "grad_norm": 662.2791748046875, + "learning_rate": 7.691206751201588e-06, + "loss": 23.1583, + "step": 193590 + }, + { + "epoch": 0.39108424875867115, + "grad_norm": 540.3480224609375, + "learning_rate": 7.690912555173536e-06, + "loss": 36.1515, + "step": 193600 + }, + { + "epoch": 0.3911044493913549, + "grad_norm": 897.8367919921875, + "learning_rate": 7.690618346030303e-06, + "loss": 27.4172, + "step": 193610 + }, + { + "epoch": 0.39112465002403873, + "grad_norm": 32.94099044799805, + "learning_rate": 7.690324123773324e-06, + "loss": 22.4372, + "step": 193620 + }, + { + "epoch": 0.39114485065672255, + "grad_norm": 360.4986877441406, + "learning_rate": 7.69002988840403e-06, + "loss": 14.5806, + "step": 193630 + }, + { + "epoch": 0.3911650512894064, + "grad_norm": 89.0041275024414, + "learning_rate": 7.689735639923857e-06, + "loss": 15.5969, + "step": 193640 + }, + { + "epoch": 0.3911852519220902, + "grad_norm": 516.5864868164062, + "learning_rate": 7.689441378334239e-06, + "loss": 15.9018, + "step": 193650 + }, + { + "epoch": 0.391205452554774, + "grad_norm": 640.2452392578125, + "learning_rate": 7.68914710363661e-06, + "loss": 20.0243, + "step": 193660 + }, + { + "epoch": 0.39122565318745783, + "grad_norm": 348.3353271484375, + "learning_rate": 7.688852815832405e-06, + "loss": 9.4373, + "step": 193670 + }, + { + "epoch": 0.39124585382014165, + "grad_norm": 556.3218383789062, + "learning_rate": 7.688558514923055e-06, + "loss": 26.3589, + "step": 193680 + }, + { + "epoch": 0.3912660544528255, + "grad_norm": 357.5398864746094, + "learning_rate": 7.688264200909998e-06, + "loss": 21.5004, + "step": 193690 + }, + { + "epoch": 0.3912862550855093, + "grad_norm": 262.3053894042969, + "learning_rate": 7.687969873794667e-06, + "loss": 23.7308, + "step": 193700 + }, + { + "epoch": 0.3913064557181931, + "grad_norm": 1059.08154296875, + "learning_rate": 7.687675533578497e-06, + "loss": 20.6328, + "step": 193710 + }, + { + "epoch": 0.39132665635087693, + "grad_norm": 896.2767333984375, + "learning_rate": 7.687381180262924e-06, + "loss": 37.2704, + "step": 193720 + }, + { + "epoch": 0.3913468569835607, + "grad_norm": 287.42926025390625, + "learning_rate": 7.687086813849378e-06, + "loss": 18.2111, + "step": 193730 + }, + { + "epoch": 0.3913670576162445, + "grad_norm": 247.09051513671875, + "learning_rate": 7.6867924343393e-06, + "loss": 23.5803, + "step": 193740 + }, + { + "epoch": 0.39138725824892834, + "grad_norm": 257.3861083984375, + "learning_rate": 7.686498041734121e-06, + "loss": 30.9001, + "step": 193750 + }, + { + "epoch": 0.39140745888161216, + "grad_norm": 68.65151977539062, + "learning_rate": 7.686203636035274e-06, + "loss": 15.7408, + "step": 193760 + }, + { + "epoch": 0.391427659514296, + "grad_norm": 1279.1070556640625, + "learning_rate": 7.685909217244198e-06, + "loss": 45.0109, + "step": 193770 + }, + { + "epoch": 0.3914478601469798, + "grad_norm": 137.1002960205078, + "learning_rate": 7.685614785362325e-06, + "loss": 19.9498, + "step": 193780 + }, + { + "epoch": 0.3914680607796636, + "grad_norm": 151.82278442382812, + "learning_rate": 7.685320340391093e-06, + "loss": 11.8919, + "step": 193790 + }, + { + "epoch": 0.39148826141234744, + "grad_norm": 265.51702880859375, + "learning_rate": 7.685025882331936e-06, + "loss": 32.6317, + "step": 193800 + }, + { + "epoch": 0.39150846204503126, + "grad_norm": 589.0602416992188, + "learning_rate": 7.684731411186285e-06, + "loss": 21.0638, + "step": 193810 + }, + { + "epoch": 0.3915286626777151, + "grad_norm": 2400.545166015625, + "learning_rate": 7.684436926955584e-06, + "loss": 50.3162, + "step": 193820 + }, + { + "epoch": 0.3915488633103989, + "grad_norm": 286.2929992675781, + "learning_rate": 7.684142429641258e-06, + "loss": 17.3957, + "step": 193830 + }, + { + "epoch": 0.3915690639430827, + "grad_norm": 363.6485595703125, + "learning_rate": 7.683847919244748e-06, + "loss": 19.5178, + "step": 193840 + }, + { + "epoch": 0.39158926457576654, + "grad_norm": 141.14840698242188, + "learning_rate": 7.683553395767492e-06, + "loss": 16.3106, + "step": 193850 + }, + { + "epoch": 0.3916094652084503, + "grad_norm": 747.1369018554688, + "learning_rate": 7.683258859210921e-06, + "loss": 38.7589, + "step": 193860 + }, + { + "epoch": 0.3916296658411341, + "grad_norm": 57.289024353027344, + "learning_rate": 7.68296430957647e-06, + "loss": 20.7131, + "step": 193870 + }, + { + "epoch": 0.39164986647381794, + "grad_norm": 173.58657836914062, + "learning_rate": 7.682669746865577e-06, + "loss": 28.5628, + "step": 193880 + }, + { + "epoch": 0.39167006710650176, + "grad_norm": 152.33546447753906, + "learning_rate": 7.682375171079677e-06, + "loss": 13.3818, + "step": 193890 + }, + { + "epoch": 0.3916902677391856, + "grad_norm": 517.7659912109375, + "learning_rate": 7.682080582220206e-06, + "loss": 25.9379, + "step": 193900 + }, + { + "epoch": 0.3917104683718694, + "grad_norm": 99.8304443359375, + "learning_rate": 7.681785980288601e-06, + "loss": 24.2838, + "step": 193910 + }, + { + "epoch": 0.3917306690045532, + "grad_norm": 317.5882873535156, + "learning_rate": 7.681491365286294e-06, + "loss": 15.5712, + "step": 193920 + }, + { + "epoch": 0.39175086963723704, + "grad_norm": 599.1548461914062, + "learning_rate": 7.681196737214725e-06, + "loss": 27.534, + "step": 193930 + }, + { + "epoch": 0.39177107026992086, + "grad_norm": 275.6092224121094, + "learning_rate": 7.680902096075327e-06, + "loss": 10.7184, + "step": 193940 + }, + { + "epoch": 0.3917912709026047, + "grad_norm": 411.04400634765625, + "learning_rate": 7.680607441869538e-06, + "loss": 14.6416, + "step": 193950 + }, + { + "epoch": 0.3918114715352885, + "grad_norm": 498.05950927734375, + "learning_rate": 7.680312774598794e-06, + "loss": 25.6096, + "step": 193960 + }, + { + "epoch": 0.3918316721679723, + "grad_norm": 96.87602996826172, + "learning_rate": 7.68001809426453e-06, + "loss": 18.0629, + "step": 193970 + }, + { + "epoch": 0.39185187280065614, + "grad_norm": 346.5243225097656, + "learning_rate": 7.679723400868181e-06, + "loss": 19.9727, + "step": 193980 + }, + { + "epoch": 0.3918720734333399, + "grad_norm": 462.3692321777344, + "learning_rate": 7.679428694411188e-06, + "loss": 17.1533, + "step": 193990 + }, + { + "epoch": 0.3918922740660237, + "grad_norm": 357.5788879394531, + "learning_rate": 7.679133974894984e-06, + "loss": 18.8631, + "step": 194000 + }, + { + "epoch": 0.39191247469870755, + "grad_norm": 872.04248046875, + "learning_rate": 7.678839242321005e-06, + "loss": 21.2325, + "step": 194010 + }, + { + "epoch": 0.39193267533139137, + "grad_norm": 219.5369415283203, + "learning_rate": 7.67854449669069e-06, + "loss": 17.9927, + "step": 194020 + }, + { + "epoch": 0.3919528759640752, + "grad_norm": 701.3224487304688, + "learning_rate": 7.678249738005473e-06, + "loss": 25.9722, + "step": 194030 + }, + { + "epoch": 0.391973076596759, + "grad_norm": 110.27085876464844, + "learning_rate": 7.677954966266791e-06, + "loss": 17.4507, + "step": 194040 + }, + { + "epoch": 0.3919932772294428, + "grad_norm": 402.5440673828125, + "learning_rate": 7.67766018147608e-06, + "loss": 19.4773, + "step": 194050 + }, + { + "epoch": 0.39201347786212665, + "grad_norm": 224.45831298828125, + "learning_rate": 7.677365383634782e-06, + "loss": 15.217, + "step": 194060 + }, + { + "epoch": 0.39203367849481047, + "grad_norm": 18.64542579650879, + "learning_rate": 7.677070572744327e-06, + "loss": 14.8555, + "step": 194070 + }, + { + "epoch": 0.3920538791274943, + "grad_norm": 448.8019104003906, + "learning_rate": 7.676775748806156e-06, + "loss": 16.4689, + "step": 194080 + }, + { + "epoch": 0.3920740797601781, + "grad_norm": 680.8139038085938, + "learning_rate": 7.676480911821705e-06, + "loss": 19.097, + "step": 194090 + }, + { + "epoch": 0.3920942803928619, + "grad_norm": 333.9926452636719, + "learning_rate": 7.676186061792408e-06, + "loss": 28.193, + "step": 194100 + }, + { + "epoch": 0.39211448102554575, + "grad_norm": 250.51133728027344, + "learning_rate": 7.675891198719707e-06, + "loss": 44.4848, + "step": 194110 + }, + { + "epoch": 0.3921346816582295, + "grad_norm": 350.780517578125, + "learning_rate": 7.675596322605036e-06, + "loss": 10.5025, + "step": 194120 + }, + { + "epoch": 0.39215488229091333, + "grad_norm": 682.9677124023438, + "learning_rate": 7.675301433449833e-06, + "loss": 23.318, + "step": 194130 + }, + { + "epoch": 0.39217508292359715, + "grad_norm": 266.6212158203125, + "learning_rate": 7.675006531255537e-06, + "loss": 19.9699, + "step": 194140 + }, + { + "epoch": 0.39219528355628097, + "grad_norm": 493.7453308105469, + "learning_rate": 7.67471161602358e-06, + "loss": 23.1714, + "step": 194150 + }, + { + "epoch": 0.3922154841889648, + "grad_norm": 237.18321228027344, + "learning_rate": 7.674416687755406e-06, + "loss": 11.0524, + "step": 194160 + }, + { + "epoch": 0.3922356848216486, + "grad_norm": 233.67233276367188, + "learning_rate": 7.67412174645245e-06, + "loss": 26.9773, + "step": 194170 + }, + { + "epoch": 0.39225588545433243, + "grad_norm": 503.0744934082031, + "learning_rate": 7.673826792116146e-06, + "loss": 19.6552, + "step": 194180 + }, + { + "epoch": 0.39227608608701625, + "grad_norm": 1173.34814453125, + "learning_rate": 7.673531824747937e-06, + "loss": 33.5286, + "step": 194190 + }, + { + "epoch": 0.39229628671970007, + "grad_norm": 446.452880859375, + "learning_rate": 7.673236844349257e-06, + "loss": 18.7192, + "step": 194200 + }, + { + "epoch": 0.3923164873523839, + "grad_norm": 192.9727783203125, + "learning_rate": 7.672941850921545e-06, + "loss": 17.7934, + "step": 194210 + }, + { + "epoch": 0.3923366879850677, + "grad_norm": 660.9296264648438, + "learning_rate": 7.67264684446624e-06, + "loss": 16.9177, + "step": 194220 + }, + { + "epoch": 0.39235688861775153, + "grad_norm": 330.9119567871094, + "learning_rate": 7.672351824984777e-06, + "loss": 29.6802, + "step": 194230 + }, + { + "epoch": 0.39237708925043535, + "grad_norm": 202.8923797607422, + "learning_rate": 7.672056792478595e-06, + "loss": 11.5582, + "step": 194240 + }, + { + "epoch": 0.3923972898831191, + "grad_norm": 226.911865234375, + "learning_rate": 7.671761746949133e-06, + "loss": 27.6348, + "step": 194250 + }, + { + "epoch": 0.39241749051580294, + "grad_norm": 316.6273498535156, + "learning_rate": 7.671466688397828e-06, + "loss": 20.0882, + "step": 194260 + }, + { + "epoch": 0.39243769114848676, + "grad_norm": 129.1420135498047, + "learning_rate": 7.671171616826117e-06, + "loss": 19.2911, + "step": 194270 + }, + { + "epoch": 0.3924578917811706, + "grad_norm": 636.3347778320312, + "learning_rate": 7.670876532235444e-06, + "loss": 16.3546, + "step": 194280 + }, + { + "epoch": 0.3924780924138544, + "grad_norm": 334.3447265625, + "learning_rate": 7.670581434627237e-06, + "loss": 45.9473, + "step": 194290 + }, + { + "epoch": 0.3924982930465382, + "grad_norm": 63.227874755859375, + "learning_rate": 7.670286324002943e-06, + "loss": 8.8525, + "step": 194300 + }, + { + "epoch": 0.39251849367922204, + "grad_norm": 351.48858642578125, + "learning_rate": 7.669991200363997e-06, + "loss": 16.564, + "step": 194310 + }, + { + "epoch": 0.39253869431190586, + "grad_norm": 22.39614486694336, + "learning_rate": 7.669696063711837e-06, + "loss": 19.5439, + "step": 194320 + }, + { + "epoch": 0.3925588949445897, + "grad_norm": 349.4576110839844, + "learning_rate": 7.669400914047903e-06, + "loss": 17.9836, + "step": 194330 + }, + { + "epoch": 0.3925790955772735, + "grad_norm": 331.0430603027344, + "learning_rate": 7.669105751373633e-06, + "loss": 16.4443, + "step": 194340 + }, + { + "epoch": 0.3925992962099573, + "grad_norm": 414.0130920410156, + "learning_rate": 7.668810575690465e-06, + "loss": 21.053, + "step": 194350 + }, + { + "epoch": 0.39261949684264114, + "grad_norm": 350.8014831542969, + "learning_rate": 7.668515386999837e-06, + "loss": 22.02, + "step": 194360 + }, + { + "epoch": 0.3926396974753249, + "grad_norm": 351.5557556152344, + "learning_rate": 7.66822018530319e-06, + "loss": 24.537, + "step": 194370 + }, + { + "epoch": 0.3926598981080087, + "grad_norm": 233.8751220703125, + "learning_rate": 7.667924970601961e-06, + "loss": 29.8172, + "step": 194380 + }, + { + "epoch": 0.39268009874069254, + "grad_norm": 894.9293823242188, + "learning_rate": 7.667629742897589e-06, + "loss": 30.2181, + "step": 194390 + }, + { + "epoch": 0.39270029937337636, + "grad_norm": 247.10067749023438, + "learning_rate": 7.667334502191514e-06, + "loss": 12.0517, + "step": 194400 + }, + { + "epoch": 0.3927205000060602, + "grad_norm": 409.8434143066406, + "learning_rate": 7.667039248485173e-06, + "loss": 15.6907, + "step": 194410 + }, + { + "epoch": 0.392740700638744, + "grad_norm": 364.4486389160156, + "learning_rate": 7.666743981780007e-06, + "loss": 20.5752, + "step": 194420 + }, + { + "epoch": 0.3927609012714278, + "grad_norm": 112.05078125, + "learning_rate": 7.666448702077454e-06, + "loss": 14.4298, + "step": 194430 + }, + { + "epoch": 0.39278110190411164, + "grad_norm": 246.4239501953125, + "learning_rate": 7.666153409378954e-06, + "loss": 13.8918, + "step": 194440 + }, + { + "epoch": 0.39280130253679546, + "grad_norm": 729.3032836914062, + "learning_rate": 7.665858103685944e-06, + "loss": 22.3902, + "step": 194450 + }, + { + "epoch": 0.3928215031694793, + "grad_norm": 496.1769714355469, + "learning_rate": 7.665562784999865e-06, + "loss": 28.3222, + "step": 194460 + }, + { + "epoch": 0.3928417038021631, + "grad_norm": 784.860595703125, + "learning_rate": 7.665267453322158e-06, + "loss": 18.3699, + "step": 194470 + }, + { + "epoch": 0.3928619044348469, + "grad_norm": 225.1392059326172, + "learning_rate": 7.664972108654261e-06, + "loss": 21.1566, + "step": 194480 + }, + { + "epoch": 0.39288210506753074, + "grad_norm": 371.5826721191406, + "learning_rate": 7.664676750997611e-06, + "loss": 18.9349, + "step": 194490 + }, + { + "epoch": 0.3929023057002145, + "grad_norm": 220.47027587890625, + "learning_rate": 7.66438138035365e-06, + "loss": 11.1904, + "step": 194500 + }, + { + "epoch": 0.3929225063328983, + "grad_norm": 239.3990478515625, + "learning_rate": 7.664085996723819e-06, + "loss": 16.0179, + "step": 194510 + }, + { + "epoch": 0.39294270696558214, + "grad_norm": 57.55672836303711, + "learning_rate": 7.663790600109554e-06, + "loss": 32.0592, + "step": 194520 + }, + { + "epoch": 0.39296290759826596, + "grad_norm": 682.9308471679688, + "learning_rate": 7.663495190512297e-06, + "loss": 27.1631, + "step": 194530 + }, + { + "epoch": 0.3929831082309498, + "grad_norm": 966.5193481445312, + "learning_rate": 7.663199767933489e-06, + "loss": 23.8877, + "step": 194540 + }, + { + "epoch": 0.3930033088636336, + "grad_norm": 568.3528442382812, + "learning_rate": 7.662904332374568e-06, + "loss": 32.1116, + "step": 194550 + }, + { + "epoch": 0.3930235094963174, + "grad_norm": 1156.96875, + "learning_rate": 7.662608883836975e-06, + "loss": 27.4185, + "step": 194560 + }, + { + "epoch": 0.39304371012900124, + "grad_norm": 294.32171630859375, + "learning_rate": 7.662313422322147e-06, + "loss": 19.2916, + "step": 194570 + }, + { + "epoch": 0.39306391076168506, + "grad_norm": 581.0121459960938, + "learning_rate": 7.662017947831528e-06, + "loss": 19.6493, + "step": 194580 + }, + { + "epoch": 0.3930841113943689, + "grad_norm": 762.0816650390625, + "learning_rate": 7.661722460366556e-06, + "loss": 36.2527, + "step": 194590 + }, + { + "epoch": 0.3931043120270527, + "grad_norm": 326.9516906738281, + "learning_rate": 7.66142695992867e-06, + "loss": 15.3531, + "step": 194600 + }, + { + "epoch": 0.3931245126597365, + "grad_norm": 295.166259765625, + "learning_rate": 7.661131446519314e-06, + "loss": 24.2526, + "step": 194610 + }, + { + "epoch": 0.39314471329242034, + "grad_norm": 699.7713623046875, + "learning_rate": 7.660835920139926e-06, + "loss": 30.8892, + "step": 194620 + }, + { + "epoch": 0.3931649139251041, + "grad_norm": 365.5238952636719, + "learning_rate": 7.660540380791944e-06, + "loss": 13.6505, + "step": 194630 + }, + { + "epoch": 0.39318511455778793, + "grad_norm": 188.6708221435547, + "learning_rate": 7.660244828476812e-06, + "loss": 16.7058, + "step": 194640 + }, + { + "epoch": 0.39320531519047175, + "grad_norm": 623.6669311523438, + "learning_rate": 7.659949263195971e-06, + "loss": 31.2965, + "step": 194650 + }, + { + "epoch": 0.39322551582315557, + "grad_norm": 505.28240966796875, + "learning_rate": 7.659653684950859e-06, + "loss": 13.8678, + "step": 194660 + }, + { + "epoch": 0.3932457164558394, + "grad_norm": 212.126953125, + "learning_rate": 7.659358093742917e-06, + "loss": 13.8969, + "step": 194670 + }, + { + "epoch": 0.3932659170885232, + "grad_norm": 942.80322265625, + "learning_rate": 7.659062489573585e-06, + "loss": 29.1217, + "step": 194680 + }, + { + "epoch": 0.39328611772120703, + "grad_norm": 313.01116943359375, + "learning_rate": 7.658766872444307e-06, + "loss": 12.3671, + "step": 194690 + }, + { + "epoch": 0.39330631835389085, + "grad_norm": 293.50726318359375, + "learning_rate": 7.658471242356521e-06, + "loss": 15.6545, + "step": 194700 + }, + { + "epoch": 0.39332651898657467, + "grad_norm": 198.49395751953125, + "learning_rate": 7.658175599311667e-06, + "loss": 22.4372, + "step": 194710 + }, + { + "epoch": 0.3933467196192585, + "grad_norm": 486.50640869140625, + "learning_rate": 7.65787994331119e-06, + "loss": 34.3354, + "step": 194720 + }, + { + "epoch": 0.3933669202519423, + "grad_norm": 602.3167114257812, + "learning_rate": 7.657584274356529e-06, + "loss": 26.0346, + "step": 194730 + }, + { + "epoch": 0.39338712088462613, + "grad_norm": 655.1782836914062, + "learning_rate": 7.657288592449124e-06, + "loss": 23.3669, + "step": 194740 + }, + { + "epoch": 0.39340732151730995, + "grad_norm": 334.3829345703125, + "learning_rate": 7.656992897590416e-06, + "loss": 21.9736, + "step": 194750 + }, + { + "epoch": 0.3934275221499937, + "grad_norm": 420.82806396484375, + "learning_rate": 7.656697189781846e-06, + "loss": 14.5847, + "step": 194760 + }, + { + "epoch": 0.39344772278267753, + "grad_norm": 351.6455078125, + "learning_rate": 7.656401469024856e-06, + "loss": 17.5412, + "step": 194770 + }, + { + "epoch": 0.39346792341536135, + "grad_norm": 207.95065307617188, + "learning_rate": 7.65610573532089e-06, + "loss": 28.8794, + "step": 194780 + }, + { + "epoch": 0.3934881240480452, + "grad_norm": 291.03179931640625, + "learning_rate": 7.655809988671383e-06, + "loss": 50.9971, + "step": 194790 + }, + { + "epoch": 0.393508324680729, + "grad_norm": 432.8161926269531, + "learning_rate": 7.655514229077784e-06, + "loss": 17.4149, + "step": 194800 + }, + { + "epoch": 0.3935285253134128, + "grad_norm": 554.6521606445312, + "learning_rate": 7.65521845654153e-06, + "loss": 12.0385, + "step": 194810 + }, + { + "epoch": 0.39354872594609663, + "grad_norm": 356.1087341308594, + "learning_rate": 7.654922671064062e-06, + "loss": 21.1111, + "step": 194820 + }, + { + "epoch": 0.39356892657878045, + "grad_norm": 248.7077178955078, + "learning_rate": 7.654626872646824e-06, + "loss": 26.6326, + "step": 194830 + }, + { + "epoch": 0.3935891272114643, + "grad_norm": 127.09835815429688, + "learning_rate": 7.654331061291254e-06, + "loss": 40.9527, + "step": 194840 + }, + { + "epoch": 0.3936093278441481, + "grad_norm": 168.29798889160156, + "learning_rate": 7.6540352369988e-06, + "loss": 30.0293, + "step": 194850 + }, + { + "epoch": 0.3936295284768319, + "grad_norm": 252.59275817871094, + "learning_rate": 7.653739399770897e-06, + "loss": 15.8375, + "step": 194860 + }, + { + "epoch": 0.39364972910951573, + "grad_norm": 602.8658447265625, + "learning_rate": 7.653443549608993e-06, + "loss": 37.0763, + "step": 194870 + }, + { + "epoch": 0.3936699297421995, + "grad_norm": 411.3060302734375, + "learning_rate": 7.653147686514523e-06, + "loss": 32.9975, + "step": 194880 + }, + { + "epoch": 0.3936901303748833, + "grad_norm": 624.1795043945312, + "learning_rate": 7.652851810488937e-06, + "loss": 33.5942, + "step": 194890 + }, + { + "epoch": 0.39371033100756714, + "grad_norm": 113.71038055419922, + "learning_rate": 7.652555921533671e-06, + "loss": 9.8372, + "step": 194900 + }, + { + "epoch": 0.39373053164025096, + "grad_norm": 820.1229858398438, + "learning_rate": 7.65226001965017e-06, + "loss": 25.7756, + "step": 194910 + }, + { + "epoch": 0.3937507322729348, + "grad_norm": 383.6437683105469, + "learning_rate": 7.651964104839876e-06, + "loss": 19.5417, + "step": 194920 + }, + { + "epoch": 0.3937709329056186, + "grad_norm": 464.3129577636719, + "learning_rate": 7.651668177104227e-06, + "loss": 15.4653, + "step": 194930 + }, + { + "epoch": 0.3937911335383024, + "grad_norm": 97.2220230102539, + "learning_rate": 7.651372236444673e-06, + "loss": 22.6917, + "step": 194940 + }, + { + "epoch": 0.39381133417098624, + "grad_norm": 459.4883117675781, + "learning_rate": 7.65107628286265e-06, + "loss": 21.9122, + "step": 194950 + }, + { + "epoch": 0.39383153480367006, + "grad_norm": 702.5147094726562, + "learning_rate": 7.650780316359604e-06, + "loss": 19.4244, + "step": 194960 + }, + { + "epoch": 0.3938517354363539, + "grad_norm": 127.46588897705078, + "learning_rate": 7.650484336936976e-06, + "loss": 14.7059, + "step": 194970 + }, + { + "epoch": 0.3938719360690377, + "grad_norm": 164.09481811523438, + "learning_rate": 7.650188344596207e-06, + "loss": 13.868, + "step": 194980 + }, + { + "epoch": 0.3938921367017215, + "grad_norm": 575.3648071289062, + "learning_rate": 7.649892339338743e-06, + "loss": 27.2575, + "step": 194990 + }, + { + "epoch": 0.39391233733440534, + "grad_norm": 111.28274536132812, + "learning_rate": 7.649596321166024e-06, + "loss": 15.7311, + "step": 195000 + }, + { + "epoch": 0.3939325379670891, + "grad_norm": 645.0296630859375, + "learning_rate": 7.649300290079497e-06, + "loss": 23.9902, + "step": 195010 + }, + { + "epoch": 0.3939527385997729, + "grad_norm": 1080.9793701171875, + "learning_rate": 7.6490042460806e-06, + "loss": 31.0383, + "step": 195020 + }, + { + "epoch": 0.39397293923245674, + "grad_norm": 611.94921875, + "learning_rate": 7.648708189170777e-06, + "loss": 30.8021, + "step": 195030 + }, + { + "epoch": 0.39399313986514056, + "grad_norm": 380.3797607421875, + "learning_rate": 7.648412119351471e-06, + "loss": 21.9114, + "step": 195040 + }, + { + "epoch": 0.3940133404978244, + "grad_norm": 234.54901123046875, + "learning_rate": 7.648116036624125e-06, + "loss": 36.2674, + "step": 195050 + }, + { + "epoch": 0.3940335411305082, + "grad_norm": 327.9686279296875, + "learning_rate": 7.647819940990184e-06, + "loss": 15.7611, + "step": 195060 + }, + { + "epoch": 0.394053741763192, + "grad_norm": 187.89361572265625, + "learning_rate": 7.647523832451091e-06, + "loss": 28.8206, + "step": 195070 + }, + { + "epoch": 0.39407394239587584, + "grad_norm": 289.9301452636719, + "learning_rate": 7.647227711008288e-06, + "loss": 24.5769, + "step": 195080 + }, + { + "epoch": 0.39409414302855966, + "grad_norm": 3.1522254943847656, + "learning_rate": 7.646931576663215e-06, + "loss": 24.1421, + "step": 195090 + }, + { + "epoch": 0.3941143436612435, + "grad_norm": 4.271010398864746, + "learning_rate": 7.646635429417322e-06, + "loss": 12.4696, + "step": 195100 + }, + { + "epoch": 0.3941345442939273, + "grad_norm": 267.3883056640625, + "learning_rate": 7.646339269272045e-06, + "loss": 23.5687, + "step": 195110 + }, + { + "epoch": 0.3941547449266111, + "grad_norm": 338.36083984375, + "learning_rate": 7.646043096228835e-06, + "loss": 18.5252, + "step": 195120 + }, + { + "epoch": 0.39417494555929494, + "grad_norm": 473.6835632324219, + "learning_rate": 7.645746910289128e-06, + "loss": 19.1282, + "step": 195130 + }, + { + "epoch": 0.3941951461919787, + "grad_norm": 211.53900146484375, + "learning_rate": 7.645450711454377e-06, + "loss": 15.0999, + "step": 195140 + }, + { + "epoch": 0.3942153468246625, + "grad_norm": 588.5606079101562, + "learning_rate": 7.645154499726017e-06, + "loss": 23.0524, + "step": 195150 + }, + { + "epoch": 0.39423554745734635, + "grad_norm": 350.8065490722656, + "learning_rate": 7.644858275105494e-06, + "loss": 12.7395, + "step": 195160 + }, + { + "epoch": 0.39425574809003017, + "grad_norm": 261.5005798339844, + "learning_rate": 7.644562037594254e-06, + "loss": 16.985, + "step": 195170 + }, + { + "epoch": 0.394275948722714, + "grad_norm": 599.6815185546875, + "learning_rate": 7.644265787193739e-06, + "loss": 20.9377, + "step": 195180 + }, + { + "epoch": 0.3942961493553978, + "grad_norm": 790.2697143554688, + "learning_rate": 7.643969523905392e-06, + "loss": 23.1945, + "step": 195190 + }, + { + "epoch": 0.3943163499880816, + "grad_norm": 480.89324951171875, + "learning_rate": 7.64367324773066e-06, + "loss": 37.3124, + "step": 195200 + }, + { + "epoch": 0.39433655062076545, + "grad_norm": 286.4555358886719, + "learning_rate": 7.643376958670983e-06, + "loss": 22.1148, + "step": 195210 + }, + { + "epoch": 0.39435675125344927, + "grad_norm": 358.4879455566406, + "learning_rate": 7.643080656727809e-06, + "loss": 19.6391, + "step": 195220 + }, + { + "epoch": 0.3943769518861331, + "grad_norm": 455.17266845703125, + "learning_rate": 7.642784341902581e-06, + "loss": 13.485, + "step": 195230 + }, + { + "epoch": 0.3943971525188169, + "grad_norm": 251.19456481933594, + "learning_rate": 7.642488014196742e-06, + "loss": 25.8569, + "step": 195240 + }, + { + "epoch": 0.3944173531515007, + "grad_norm": 478.7985534667969, + "learning_rate": 7.642191673611737e-06, + "loss": 28.5158, + "step": 195250 + }, + { + "epoch": 0.39443755378418455, + "grad_norm": 328.29254150390625, + "learning_rate": 7.641895320149008e-06, + "loss": 21.5013, + "step": 195260 + }, + { + "epoch": 0.3944577544168683, + "grad_norm": 197.05569458007812, + "learning_rate": 7.641598953810006e-06, + "loss": 12.8383, + "step": 195270 + }, + { + "epoch": 0.39447795504955213, + "grad_norm": 427.639892578125, + "learning_rate": 7.641302574596168e-06, + "loss": 24.9733, + "step": 195280 + }, + { + "epoch": 0.39449815568223595, + "grad_norm": 310.03375244140625, + "learning_rate": 7.64100618250894e-06, + "loss": 29.2077, + "step": 195290 + }, + { + "epoch": 0.39451835631491977, + "grad_norm": 226.11190795898438, + "learning_rate": 7.640709777549773e-06, + "loss": 40.6232, + "step": 195300 + }, + { + "epoch": 0.3945385569476036, + "grad_norm": 1317.2608642578125, + "learning_rate": 7.640413359720105e-06, + "loss": 21.724, + "step": 195310 + }, + { + "epoch": 0.3945587575802874, + "grad_norm": 172.037841796875, + "learning_rate": 7.64011692902138e-06, + "loss": 11.937, + "step": 195320 + }, + { + "epoch": 0.39457895821297123, + "grad_norm": 579.522216796875, + "learning_rate": 7.639820485455047e-06, + "loss": 20.1632, + "step": 195330 + }, + { + "epoch": 0.39459915884565505, + "grad_norm": 1046.946533203125, + "learning_rate": 7.639524029022552e-06, + "loss": 30.2525, + "step": 195340 + }, + { + "epoch": 0.39461935947833887, + "grad_norm": 661.1083374023438, + "learning_rate": 7.639227559725333e-06, + "loss": 20.4884, + "step": 195350 + }, + { + "epoch": 0.3946395601110227, + "grad_norm": 864.996826171875, + "learning_rate": 7.63893107756484e-06, + "loss": 18.8866, + "step": 195360 + }, + { + "epoch": 0.3946597607437065, + "grad_norm": 448.4447021484375, + "learning_rate": 7.638634582542516e-06, + "loss": 16.8178, + "step": 195370 + }, + { + "epoch": 0.39467996137639033, + "grad_norm": 669.9586791992188, + "learning_rate": 7.63833807465981e-06, + "loss": 18.6679, + "step": 195380 + }, + { + "epoch": 0.39470016200907415, + "grad_norm": 263.8163146972656, + "learning_rate": 7.638041553918162e-06, + "loss": 17.5448, + "step": 195390 + }, + { + "epoch": 0.3947203626417579, + "grad_norm": 543.4058837890625, + "learning_rate": 7.637745020319019e-06, + "loss": 10.2975, + "step": 195400 + }, + { + "epoch": 0.39474056327444174, + "grad_norm": 430.3114318847656, + "learning_rate": 7.63744847386383e-06, + "loss": 18.436, + "step": 195410 + }, + { + "epoch": 0.39476076390712556, + "grad_norm": 474.70654296875, + "learning_rate": 7.637151914554033e-06, + "loss": 24.9709, + "step": 195420 + }, + { + "epoch": 0.3947809645398094, + "grad_norm": 733.6818237304688, + "learning_rate": 7.63685534239108e-06, + "loss": 19.4783, + "step": 195430 + }, + { + "epoch": 0.3948011651724932, + "grad_norm": 208.99607849121094, + "learning_rate": 7.636558757376413e-06, + "loss": 17.9064, + "step": 195440 + }, + { + "epoch": 0.394821365805177, + "grad_norm": 785.7476806640625, + "learning_rate": 7.636262159511479e-06, + "loss": 25.3541, + "step": 195450 + }, + { + "epoch": 0.39484156643786084, + "grad_norm": 7.1770548820495605, + "learning_rate": 7.63596554879772e-06, + "loss": 39.068, + "step": 195460 + }, + { + "epoch": 0.39486176707054466, + "grad_norm": 293.75311279296875, + "learning_rate": 7.635668925236588e-06, + "loss": 16.6436, + "step": 195470 + }, + { + "epoch": 0.3948819677032285, + "grad_norm": 455.05364990234375, + "learning_rate": 7.635372288829524e-06, + "loss": 29.1417, + "step": 195480 + }, + { + "epoch": 0.3949021683359123, + "grad_norm": 664.2986450195312, + "learning_rate": 7.635075639577976e-06, + "loss": 17.6787, + "step": 195490 + }, + { + "epoch": 0.3949223689685961, + "grad_norm": 1943.1253662109375, + "learning_rate": 7.634778977483389e-06, + "loss": 43.1422, + "step": 195500 + }, + { + "epoch": 0.39494256960127994, + "grad_norm": 351.3155212402344, + "learning_rate": 7.634482302547208e-06, + "loss": 28.7181, + "step": 195510 + }, + { + "epoch": 0.3949627702339637, + "grad_norm": 181.8381805419922, + "learning_rate": 7.63418561477088e-06, + "loss": 20.9259, + "step": 195520 + }, + { + "epoch": 0.3949829708666475, + "grad_norm": 194.98898315429688, + "learning_rate": 7.63388891415585e-06, + "loss": 17.6536, + "step": 195530 + }, + { + "epoch": 0.39500317149933134, + "grad_norm": 308.9329833984375, + "learning_rate": 7.633592200703566e-06, + "loss": 25.9925, + "step": 195540 + }, + { + "epoch": 0.39502337213201516, + "grad_norm": 702.1701049804688, + "learning_rate": 7.633295474415473e-06, + "loss": 22.3518, + "step": 195550 + }, + { + "epoch": 0.395043572764699, + "grad_norm": 698.7434692382812, + "learning_rate": 7.632998735293016e-06, + "loss": 20.4989, + "step": 195560 + }, + { + "epoch": 0.3950637733973828, + "grad_norm": 549.2517700195312, + "learning_rate": 7.632701983337645e-06, + "loss": 47.9753, + "step": 195570 + }, + { + "epoch": 0.3950839740300666, + "grad_norm": 686.4854125976562, + "learning_rate": 7.632405218550801e-06, + "loss": 21.3223, + "step": 195580 + }, + { + "epoch": 0.39510417466275044, + "grad_norm": 73.8600082397461, + "learning_rate": 7.632108440933934e-06, + "loss": 14.1037, + "step": 195590 + }, + { + "epoch": 0.39512437529543426, + "grad_norm": 224.45777893066406, + "learning_rate": 7.63181165048849e-06, + "loss": 13.8222, + "step": 195600 + }, + { + "epoch": 0.3951445759281181, + "grad_norm": 287.5701904296875, + "learning_rate": 7.631514847215914e-06, + "loss": 33.526, + "step": 195610 + }, + { + "epoch": 0.3951647765608019, + "grad_norm": 0.0, + "learning_rate": 7.631218031117658e-06, + "loss": 19.2528, + "step": 195620 + }, + { + "epoch": 0.3951849771934857, + "grad_norm": 795.55029296875, + "learning_rate": 7.630921202195161e-06, + "loss": 24.2244, + "step": 195630 + }, + { + "epoch": 0.39520517782616954, + "grad_norm": 187.6152801513672, + "learning_rate": 7.630624360449875e-06, + "loss": 13.1685, + "step": 195640 + }, + { + "epoch": 0.3952253784588533, + "grad_norm": 337.955078125, + "learning_rate": 7.630327505883243e-06, + "loss": 12.6366, + "step": 195650 + }, + { + "epoch": 0.3952455790915371, + "grad_norm": 153.82737731933594, + "learning_rate": 7.630030638496714e-06, + "loss": 17.0972, + "step": 195660 + }, + { + "epoch": 0.39526577972422094, + "grad_norm": 423.9061584472656, + "learning_rate": 7.629733758291736e-06, + "loss": 24.7043, + "step": 195670 + }, + { + "epoch": 0.39528598035690476, + "grad_norm": 843.265869140625, + "learning_rate": 7.629436865269753e-06, + "loss": 32.2621, + "step": 195680 + }, + { + "epoch": 0.3953061809895886, + "grad_norm": 294.4344482421875, + "learning_rate": 7.629139959432215e-06, + "loss": 16.6348, + "step": 195690 + }, + { + "epoch": 0.3953263816222724, + "grad_norm": 165.56494140625, + "learning_rate": 7.628843040780567e-06, + "loss": 20.1531, + "step": 195700 + }, + { + "epoch": 0.3953465822549562, + "grad_norm": 106.0355453491211, + "learning_rate": 7.628546109316257e-06, + "loss": 17.0529, + "step": 195710 + }, + { + "epoch": 0.39536678288764004, + "grad_norm": 368.1607666015625, + "learning_rate": 7.628249165040731e-06, + "loss": 19.9909, + "step": 195720 + }, + { + "epoch": 0.39538698352032386, + "grad_norm": 425.4073486328125, + "learning_rate": 7.627952207955439e-06, + "loss": 24.6675, + "step": 195730 + }, + { + "epoch": 0.3954071841530077, + "grad_norm": 375.9248962402344, + "learning_rate": 7.627655238061825e-06, + "loss": 34.5446, + "step": 195740 + }, + { + "epoch": 0.3954273847856915, + "grad_norm": 900.6353759765625, + "learning_rate": 7.627358255361339e-06, + "loss": 18.2016, + "step": 195750 + }, + { + "epoch": 0.3954475854183753, + "grad_norm": 351.25244140625, + "learning_rate": 7.627061259855428e-06, + "loss": 8.3365, + "step": 195760 + }, + { + "epoch": 0.39546778605105914, + "grad_norm": 367.19952392578125, + "learning_rate": 7.626764251545539e-06, + "loss": 21.8847, + "step": 195770 + }, + { + "epoch": 0.3954879866837429, + "grad_norm": 552.2587890625, + "learning_rate": 7.62646723043312e-06, + "loss": 23.0697, + "step": 195780 + }, + { + "epoch": 0.39550818731642673, + "grad_norm": 345.9349060058594, + "learning_rate": 7.626170196519618e-06, + "loss": 29.2348, + "step": 195790 + }, + { + "epoch": 0.39552838794911055, + "grad_norm": 613.0227661132812, + "learning_rate": 7.6258731498064796e-06, + "loss": 44.9344, + "step": 195800 + }, + { + "epoch": 0.39554858858179437, + "grad_norm": 109.75715637207031, + "learning_rate": 7.625576090295155e-06, + "loss": 27.0129, + "step": 195810 + }, + { + "epoch": 0.3955687892144782, + "grad_norm": 366.6320495605469, + "learning_rate": 7.625279017987091e-06, + "loss": 17.0775, + "step": 195820 + }, + { + "epoch": 0.395588989847162, + "grad_norm": 280.7791748046875, + "learning_rate": 7.624981932883735e-06, + "loss": 15.4656, + "step": 195830 + }, + { + "epoch": 0.39560919047984583, + "grad_norm": 61.66830062866211, + "learning_rate": 7.624684834986536e-06, + "loss": 15.0344, + "step": 195840 + }, + { + "epoch": 0.39562939111252965, + "grad_norm": 108.14071655273438, + "learning_rate": 7.624387724296941e-06, + "loss": 16.5607, + "step": 195850 + }, + { + "epoch": 0.39564959174521347, + "grad_norm": 224.29344177246094, + "learning_rate": 7.6240906008163985e-06, + "loss": 25.5896, + "step": 195860 + }, + { + "epoch": 0.3956697923778973, + "grad_norm": 128.72021484375, + "learning_rate": 7.623793464546359e-06, + "loss": 40.8091, + "step": 195870 + }, + { + "epoch": 0.3956899930105811, + "grad_norm": 780.8737182617188, + "learning_rate": 7.623496315488264e-06, + "loss": 35.4666, + "step": 195880 + }, + { + "epoch": 0.39571019364326493, + "grad_norm": 361.8275146484375, + "learning_rate": 7.623199153643569e-06, + "loss": 12.9063, + "step": 195890 + }, + { + "epoch": 0.39573039427594875, + "grad_norm": 228.5116729736328, + "learning_rate": 7.622901979013717e-06, + "loss": 15.9548, + "step": 195900 + }, + { + "epoch": 0.3957505949086325, + "grad_norm": 378.7085266113281, + "learning_rate": 7.6226047916001624e-06, + "loss": 18.3263, + "step": 195910 + }, + { + "epoch": 0.39577079554131633, + "grad_norm": 566.7664794921875, + "learning_rate": 7.622307591404347e-06, + "loss": 25.1457, + "step": 195920 + }, + { + "epoch": 0.39579099617400015, + "grad_norm": 454.21746826171875, + "learning_rate": 7.622010378427725e-06, + "loss": 28.153, + "step": 195930 + }, + { + "epoch": 0.395811196806684, + "grad_norm": 268.2760009765625, + "learning_rate": 7.621713152671742e-06, + "loss": 13.4463, + "step": 195940 + }, + { + "epoch": 0.3958313974393678, + "grad_norm": 234.8856964111328, + "learning_rate": 7.6214159141378465e-06, + "loss": 26.7978, + "step": 195950 + }, + { + "epoch": 0.3958515980720516, + "grad_norm": 562.821044921875, + "learning_rate": 7.621118662827487e-06, + "loss": 13.562, + "step": 195960 + }, + { + "epoch": 0.39587179870473543, + "grad_norm": 353.9815368652344, + "learning_rate": 7.620821398742114e-06, + "loss": 10.7441, + "step": 195970 + }, + { + "epoch": 0.39589199933741925, + "grad_norm": 1172.15966796875, + "learning_rate": 7.620524121883175e-06, + "loss": 16.3424, + "step": 195980 + }, + { + "epoch": 0.3959121999701031, + "grad_norm": 326.10137939453125, + "learning_rate": 7.62022683225212e-06, + "loss": 20.9486, + "step": 195990 + }, + { + "epoch": 0.3959324006027869, + "grad_norm": 10.76241397857666, + "learning_rate": 7.619929529850397e-06, + "loss": 26.3895, + "step": 196000 + }, + { + "epoch": 0.3959526012354707, + "grad_norm": 661.6845703125, + "learning_rate": 7.6196322146794534e-06, + "loss": 11.5033, + "step": 196010 + }, + { + "epoch": 0.39597280186815453, + "grad_norm": 655.6265258789062, + "learning_rate": 7.619334886740744e-06, + "loss": 14.49, + "step": 196020 + }, + { + "epoch": 0.39599300250083835, + "grad_norm": 820.6245727539062, + "learning_rate": 7.61903754603571e-06, + "loss": 25.1901, + "step": 196030 + }, + { + "epoch": 0.3960132031335221, + "grad_norm": 353.4188537597656, + "learning_rate": 7.618740192565806e-06, + "loss": 23.5844, + "step": 196040 + }, + { + "epoch": 0.39603340376620594, + "grad_norm": 731.4053344726562, + "learning_rate": 7.6184428263324815e-06, + "loss": 38.6164, + "step": 196050 + }, + { + "epoch": 0.39605360439888976, + "grad_norm": 323.4394226074219, + "learning_rate": 7.618145447337182e-06, + "loss": 16.8984, + "step": 196060 + }, + { + "epoch": 0.3960738050315736, + "grad_norm": 418.0279846191406, + "learning_rate": 7.617848055581361e-06, + "loss": 19.5996, + "step": 196070 + }, + { + "epoch": 0.3960940056642574, + "grad_norm": 267.29449462890625, + "learning_rate": 7.6175506510664645e-06, + "loss": 12.705, + "step": 196080 + }, + { + "epoch": 0.3961142062969412, + "grad_norm": 327.3512878417969, + "learning_rate": 7.617253233793944e-06, + "loss": 12.123, + "step": 196090 + }, + { + "epoch": 0.39613440692962504, + "grad_norm": 512.3671875, + "learning_rate": 7.616955803765249e-06, + "loss": 23.7502, + "step": 196100 + }, + { + "epoch": 0.39615460756230886, + "grad_norm": 817.8502807617188, + "learning_rate": 7.616658360981828e-06, + "loss": 19.767, + "step": 196110 + }, + { + "epoch": 0.3961748081949927, + "grad_norm": 197.24513244628906, + "learning_rate": 7.616360905445132e-06, + "loss": 36.9933, + "step": 196120 + }, + { + "epoch": 0.3961950088276765, + "grad_norm": 875.3771362304688, + "learning_rate": 7.616063437156611e-06, + "loss": 39.9314, + "step": 196130 + }, + { + "epoch": 0.3962152094603603, + "grad_norm": 288.16058349609375, + "learning_rate": 7.615765956117714e-06, + "loss": 10.6367, + "step": 196140 + }, + { + "epoch": 0.39623541009304414, + "grad_norm": 545.18603515625, + "learning_rate": 7.61546846232989e-06, + "loss": 21.7529, + "step": 196150 + }, + { + "epoch": 0.3962556107257279, + "grad_norm": 80.21540069580078, + "learning_rate": 7.615170955794592e-06, + "loss": 38.5269, + "step": 196160 + }, + { + "epoch": 0.3962758113584117, + "grad_norm": 321.4330749511719, + "learning_rate": 7.614873436513265e-06, + "loss": 15.3385, + "step": 196170 + }, + { + "epoch": 0.39629601199109554, + "grad_norm": 392.32110595703125, + "learning_rate": 7.614575904487365e-06, + "loss": 22.9912, + "step": 196180 + }, + { + "epoch": 0.39631621262377936, + "grad_norm": 468.4725646972656, + "learning_rate": 7.6142783597183365e-06, + "loss": 21.5545, + "step": 196190 + }, + { + "epoch": 0.3963364132564632, + "grad_norm": 502.376708984375, + "learning_rate": 7.613980802207633e-06, + "loss": 18.922, + "step": 196200 + }, + { + "epoch": 0.396356613889147, + "grad_norm": 578.26123046875, + "learning_rate": 7.613683231956705e-06, + "loss": 25.8956, + "step": 196210 + }, + { + "epoch": 0.3963768145218308, + "grad_norm": 328.1247863769531, + "learning_rate": 7.613385648967002e-06, + "loss": 18.3482, + "step": 196220 + }, + { + "epoch": 0.39639701515451464, + "grad_norm": 365.4832763671875, + "learning_rate": 7.613088053239974e-06, + "loss": 12.2532, + "step": 196230 + }, + { + "epoch": 0.39641721578719846, + "grad_norm": 146.03553771972656, + "learning_rate": 7.612790444777072e-06, + "loss": 26.2427, + "step": 196240 + }, + { + "epoch": 0.3964374164198823, + "grad_norm": 614.1575927734375, + "learning_rate": 7.612492823579744e-06, + "loss": 19.1647, + "step": 196250 + }, + { + "epoch": 0.3964576170525661, + "grad_norm": 662.9046630859375, + "learning_rate": 7.612195189649445e-06, + "loss": 36.0849, + "step": 196260 + }, + { + "epoch": 0.3964778176852499, + "grad_norm": 194.89370727539062, + "learning_rate": 7.611897542987623e-06, + "loss": 15.8646, + "step": 196270 + }, + { + "epoch": 0.39649801831793374, + "grad_norm": 401.27874755859375, + "learning_rate": 7.611599883595731e-06, + "loss": 11.8066, + "step": 196280 + }, + { + "epoch": 0.3965182189506175, + "grad_norm": 207.72634887695312, + "learning_rate": 7.611302211475216e-06, + "loss": 19.1647, + "step": 196290 + }, + { + "epoch": 0.3965384195833013, + "grad_norm": 471.66790771484375, + "learning_rate": 7.6110045266275305e-06, + "loss": 15.0866, + "step": 196300 + }, + { + "epoch": 0.39655862021598515, + "grad_norm": 246.96693420410156, + "learning_rate": 7.610706829054126e-06, + "loss": 27.5003, + "step": 196310 + }, + { + "epoch": 0.39657882084866897, + "grad_norm": 401.9541320800781, + "learning_rate": 7.610409118756454e-06, + "loss": 14.7462, + "step": 196320 + }, + { + "epoch": 0.3965990214813528, + "grad_norm": 593.8477783203125, + "learning_rate": 7.610111395735962e-06, + "loss": 37.6239, + "step": 196330 + }, + { + "epoch": 0.3966192221140366, + "grad_norm": 453.9722900390625, + "learning_rate": 7.609813659994107e-06, + "loss": 14.7958, + "step": 196340 + }, + { + "epoch": 0.3966394227467204, + "grad_norm": 587.7510986328125, + "learning_rate": 7.6095159115323335e-06, + "loss": 20.1343, + "step": 196350 + }, + { + "epoch": 0.39665962337940425, + "grad_norm": 504.5757751464844, + "learning_rate": 7.609218150352098e-06, + "loss": 27.4714, + "step": 196360 + }, + { + "epoch": 0.39667982401208807, + "grad_norm": 404.4772644042969, + "learning_rate": 7.608920376454849e-06, + "loss": 20.5916, + "step": 196370 + }, + { + "epoch": 0.3967000246447719, + "grad_norm": 323.6693115234375, + "learning_rate": 7.608622589842039e-06, + "loss": 16.1455, + "step": 196380 + }, + { + "epoch": 0.3967202252774557, + "grad_norm": 316.3536682128906, + "learning_rate": 7.608324790515119e-06, + "loss": 18.9734, + "step": 196390 + }, + { + "epoch": 0.3967404259101395, + "grad_norm": 565.381591796875, + "learning_rate": 7.6080269784755405e-06, + "loss": 29.0074, + "step": 196400 + }, + { + "epoch": 0.39676062654282335, + "grad_norm": 323.6951599121094, + "learning_rate": 7.607729153724755e-06, + "loss": 17.4683, + "step": 196410 + }, + { + "epoch": 0.3967808271755071, + "grad_norm": 386.5226135253906, + "learning_rate": 7.607431316264211e-06, + "loss": 13.6444, + "step": 196420 + }, + { + "epoch": 0.39680102780819093, + "grad_norm": 136.9131317138672, + "learning_rate": 7.607133466095365e-06, + "loss": 11.536, + "step": 196430 + }, + { + "epoch": 0.39682122844087475, + "grad_norm": 366.0284423828125, + "learning_rate": 7.606835603219666e-06, + "loss": 17.8898, + "step": 196440 + }, + { + "epoch": 0.39684142907355857, + "grad_norm": 434.31982421875, + "learning_rate": 7.60653772763857e-06, + "loss": 16.1737, + "step": 196450 + }, + { + "epoch": 0.3968616297062424, + "grad_norm": 632.747802734375, + "learning_rate": 7.606239839353522e-06, + "loss": 18.743, + "step": 196460 + }, + { + "epoch": 0.3968818303389262, + "grad_norm": 513.1290283203125, + "learning_rate": 7.605941938365977e-06, + "loss": 14.2984, + "step": 196470 + }, + { + "epoch": 0.39690203097161003, + "grad_norm": 1058.4290771484375, + "learning_rate": 7.6056440246773884e-06, + "loss": 25.4774, + "step": 196480 + }, + { + "epoch": 0.39692223160429385, + "grad_norm": 674.5564575195312, + "learning_rate": 7.605346098289206e-06, + "loss": 17.9885, + "step": 196490 + }, + { + "epoch": 0.39694243223697767, + "grad_norm": 570.1978149414062, + "learning_rate": 7.605048159202884e-06, + "loss": 24.6575, + "step": 196500 + }, + { + "epoch": 0.3969626328696615, + "grad_norm": 621.2515258789062, + "learning_rate": 7.60475020741987e-06, + "loss": 31.1932, + "step": 196510 + }, + { + "epoch": 0.3969828335023453, + "grad_norm": 641.5540161132812, + "learning_rate": 7.604452242941622e-06, + "loss": 21.09, + "step": 196520 + }, + { + "epoch": 0.39700303413502913, + "grad_norm": 514.0419921875, + "learning_rate": 7.60415426576959e-06, + "loss": 21.2872, + "step": 196530 + }, + { + "epoch": 0.39702323476771295, + "grad_norm": 267.4769287109375, + "learning_rate": 7.603856275905223e-06, + "loss": 16.4014, + "step": 196540 + }, + { + "epoch": 0.3970434354003967, + "grad_norm": 793.6900634765625, + "learning_rate": 7.6035582733499805e-06, + "loss": 27.5892, + "step": 196550 + }, + { + "epoch": 0.39706363603308054, + "grad_norm": 972.1981811523438, + "learning_rate": 7.6032602581053075e-06, + "loss": 17.3954, + "step": 196560 + }, + { + "epoch": 0.39708383666576436, + "grad_norm": 312.2232360839844, + "learning_rate": 7.602962230172661e-06, + "loss": 13.5148, + "step": 196570 + }, + { + "epoch": 0.3971040372984482, + "grad_norm": 254.5087127685547, + "learning_rate": 7.6026641895534925e-06, + "loss": 22.2328, + "step": 196580 + }, + { + "epoch": 0.397124237931132, + "grad_norm": 351.739501953125, + "learning_rate": 7.602366136249254e-06, + "loss": 13.8995, + "step": 196590 + }, + { + "epoch": 0.3971444385638158, + "grad_norm": 587.6464233398438, + "learning_rate": 7.6020680702613995e-06, + "loss": 21.4347, + "step": 196600 + }, + { + "epoch": 0.39716463919649964, + "grad_norm": 348.7734680175781, + "learning_rate": 7.60176999159138e-06, + "loss": 21.451, + "step": 196610 + }, + { + "epoch": 0.39718483982918346, + "grad_norm": 483.9173583984375, + "learning_rate": 7.601471900240648e-06, + "loss": 21.8984, + "step": 196620 + }, + { + "epoch": 0.3972050404618673, + "grad_norm": 347.1903076171875, + "learning_rate": 7.601173796210659e-06, + "loss": 21.8069, + "step": 196630 + }, + { + "epoch": 0.3972252410945511, + "grad_norm": 169.56793212890625, + "learning_rate": 7.600875679502864e-06, + "loss": 17.1872, + "step": 196640 + }, + { + "epoch": 0.3972454417272349, + "grad_norm": 252.4035186767578, + "learning_rate": 7.6005775501187165e-06, + "loss": 19.6626, + "step": 196650 + }, + { + "epoch": 0.39726564235991874, + "grad_norm": 198.06736755371094, + "learning_rate": 7.60027940805967e-06, + "loss": 13.199, + "step": 196660 + }, + { + "epoch": 0.39728584299260256, + "grad_norm": 1069.942138671875, + "learning_rate": 7.5999812533271755e-06, + "loss": 15.3515, + "step": 196670 + }, + { + "epoch": 0.3973060436252863, + "grad_norm": 282.4885559082031, + "learning_rate": 7.599683085922689e-06, + "loss": 21.1991, + "step": 196680 + }, + { + "epoch": 0.39732624425797014, + "grad_norm": 131.6488037109375, + "learning_rate": 7.599384905847662e-06, + "loss": 23.1902, + "step": 196690 + }, + { + "epoch": 0.39734644489065396, + "grad_norm": 367.5249938964844, + "learning_rate": 7.5990867131035474e-06, + "loss": 12.6256, + "step": 196700 + }, + { + "epoch": 0.3973666455233378, + "grad_norm": 1070.554931640625, + "learning_rate": 7.598788507691801e-06, + "loss": 22.2413, + "step": 196710 + }, + { + "epoch": 0.3973868461560216, + "grad_norm": 386.4486999511719, + "learning_rate": 7.5984902896138736e-06, + "loss": 32.7206, + "step": 196720 + }, + { + "epoch": 0.3974070467887054, + "grad_norm": 75.12928009033203, + "learning_rate": 7.598192058871221e-06, + "loss": 25.8482, + "step": 196730 + }, + { + "epoch": 0.39742724742138924, + "grad_norm": 198.99949645996094, + "learning_rate": 7.597893815465294e-06, + "loss": 9.4323, + "step": 196740 + }, + { + "epoch": 0.39744744805407306, + "grad_norm": 417.2445983886719, + "learning_rate": 7.597595559397548e-06, + "loss": 18.515, + "step": 196750 + }, + { + "epoch": 0.3974676486867569, + "grad_norm": 304.7272644042969, + "learning_rate": 7.597297290669437e-06, + "loss": 13.3798, + "step": 196760 + }, + { + "epoch": 0.3974878493194407, + "grad_norm": 576.5830688476562, + "learning_rate": 7.596999009282413e-06, + "loss": 20.7862, + "step": 196770 + }, + { + "epoch": 0.3975080499521245, + "grad_norm": 580.6087036132812, + "learning_rate": 7.5967007152379305e-06, + "loss": 29.0417, + "step": 196780 + }, + { + "epoch": 0.39752825058480834, + "grad_norm": 351.1151428222656, + "learning_rate": 7.596402408537444e-06, + "loss": 23.2411, + "step": 196790 + }, + { + "epoch": 0.3975484512174921, + "grad_norm": 889.0579833984375, + "learning_rate": 7.596104089182408e-06, + "loss": 26.9643, + "step": 196800 + }, + { + "epoch": 0.3975686518501759, + "grad_norm": 710.7026977539062, + "learning_rate": 7.595805757174275e-06, + "loss": 22.4164, + "step": 196810 + }, + { + "epoch": 0.39758885248285974, + "grad_norm": 351.8357238769531, + "learning_rate": 7.5955074125145e-06, + "loss": 20.6835, + "step": 196820 + }, + { + "epoch": 0.39760905311554356, + "grad_norm": 509.480224609375, + "learning_rate": 7.595209055204534e-06, + "loss": 13.3233, + "step": 196830 + }, + { + "epoch": 0.3976292537482274, + "grad_norm": 145.74423217773438, + "learning_rate": 7.594910685245837e-06, + "loss": 25.3805, + "step": 196840 + }, + { + "epoch": 0.3976494543809112, + "grad_norm": 207.77796936035156, + "learning_rate": 7.594612302639859e-06, + "loss": 23.9481, + "step": 196850 + }, + { + "epoch": 0.397669655013595, + "grad_norm": 375.04638671875, + "learning_rate": 7.5943139073880555e-06, + "loss": 19.9612, + "step": 196860 + }, + { + "epoch": 0.39768985564627884, + "grad_norm": 433.2248229980469, + "learning_rate": 7.5940154994918806e-06, + "loss": 15.8358, + "step": 196870 + }, + { + "epoch": 0.39771005627896266, + "grad_norm": 187.95147705078125, + "learning_rate": 7.593717078952788e-06, + "loss": 24.1666, + "step": 196880 + }, + { + "epoch": 0.3977302569116465, + "grad_norm": 376.43994140625, + "learning_rate": 7.593418645772235e-06, + "loss": 28.1238, + "step": 196890 + }, + { + "epoch": 0.3977504575443303, + "grad_norm": 634.16455078125, + "learning_rate": 7.5931201999516715e-06, + "loss": 25.7747, + "step": 196900 + }, + { + "epoch": 0.3977706581770141, + "grad_norm": 931.7929077148438, + "learning_rate": 7.592821741492555e-06, + "loss": 22.2204, + "step": 196910 + }, + { + "epoch": 0.39779085880969794, + "grad_norm": 567.7821044921875, + "learning_rate": 7.592523270396342e-06, + "loss": 60.1065, + "step": 196920 + }, + { + "epoch": 0.3978110594423817, + "grad_norm": 189.20652770996094, + "learning_rate": 7.592224786664484e-06, + "loss": 19.5442, + "step": 196930 + }, + { + "epoch": 0.39783126007506553, + "grad_norm": 277.2565002441406, + "learning_rate": 7.591926290298435e-06, + "loss": 23.0767, + "step": 196940 + }, + { + "epoch": 0.39785146070774935, + "grad_norm": 260.0182800292969, + "learning_rate": 7.591627781299654e-06, + "loss": 17.1038, + "step": 196950 + }, + { + "epoch": 0.39787166134043317, + "grad_norm": 338.0267333984375, + "learning_rate": 7.5913292596695906e-06, + "loss": 17.5577, + "step": 196960 + }, + { + "epoch": 0.397891861973117, + "grad_norm": 584.3551635742188, + "learning_rate": 7.5910307254097075e-06, + "loss": 10.1535, + "step": 196970 + }, + { + "epoch": 0.3979120626058008, + "grad_norm": 676.46630859375, + "learning_rate": 7.590732178521451e-06, + "loss": 18.7005, + "step": 196980 + }, + { + "epoch": 0.39793226323848463, + "grad_norm": 457.1989440917969, + "learning_rate": 7.590433619006281e-06, + "loss": 21.0176, + "step": 196990 + }, + { + "epoch": 0.39795246387116845, + "grad_norm": 198.6430206298828, + "learning_rate": 7.590135046865652e-06, + "loss": 18.1502, + "step": 197000 + }, + { + "epoch": 0.39797266450385227, + "grad_norm": 607.3787841796875, + "learning_rate": 7.589836462101019e-06, + "loss": 18.3786, + "step": 197010 + }, + { + "epoch": 0.3979928651365361, + "grad_norm": 383.4787292480469, + "learning_rate": 7.589537864713836e-06, + "loss": 21.7777, + "step": 197020 + }, + { + "epoch": 0.3980130657692199, + "grad_norm": 258.12158203125, + "learning_rate": 7.58923925470556e-06, + "loss": 12.7004, + "step": 197030 + }, + { + "epoch": 0.39803326640190373, + "grad_norm": 149.9590606689453, + "learning_rate": 7.588940632077647e-06, + "loss": 16.3806, + "step": 197040 + }, + { + "epoch": 0.39805346703458755, + "grad_norm": 42.28246307373047, + "learning_rate": 7.588641996831551e-06, + "loss": 16.3697, + "step": 197050 + }, + { + "epoch": 0.3980736676672713, + "grad_norm": 605.8641357421875, + "learning_rate": 7.588343348968728e-06, + "loss": 22.4684, + "step": 197060 + }, + { + "epoch": 0.39809386829995513, + "grad_norm": 631.8109130859375, + "learning_rate": 7.588044688490633e-06, + "loss": 19.5397, + "step": 197070 + }, + { + "epoch": 0.39811406893263895, + "grad_norm": 980.2217407226562, + "learning_rate": 7.587746015398723e-06, + "loss": 42.9501, + "step": 197080 + }, + { + "epoch": 0.3981342695653228, + "grad_norm": 256.7890319824219, + "learning_rate": 7.587447329694451e-06, + "loss": 12.6726, + "step": 197090 + }, + { + "epoch": 0.3981544701980066, + "grad_norm": 273.36187744140625, + "learning_rate": 7.587148631379276e-06, + "loss": 29.7439, + "step": 197100 + }, + { + "epoch": 0.3981746708306904, + "grad_norm": 142.3683319091797, + "learning_rate": 7.586849920454652e-06, + "loss": 18.1001, + "step": 197110 + }, + { + "epoch": 0.39819487146337423, + "grad_norm": 90.94284057617188, + "learning_rate": 7.586551196922034e-06, + "loss": 21.5349, + "step": 197120 + }, + { + "epoch": 0.39821507209605805, + "grad_norm": 1.5760222673416138, + "learning_rate": 7.586252460782882e-06, + "loss": 14.9537, + "step": 197130 + }, + { + "epoch": 0.3982352727287419, + "grad_norm": 346.44256591796875, + "learning_rate": 7.585953712038646e-06, + "loss": 9.2722, + "step": 197140 + }, + { + "epoch": 0.3982554733614257, + "grad_norm": 1218.5108642578125, + "learning_rate": 7.585654950690786e-06, + "loss": 35.6418, + "step": 197150 + }, + { + "epoch": 0.3982756739941095, + "grad_norm": 427.25762939453125, + "learning_rate": 7.585356176740759e-06, + "loss": 17.8915, + "step": 197160 + }, + { + "epoch": 0.39829587462679333, + "grad_norm": 426.3423156738281, + "learning_rate": 7.5850573901900185e-06, + "loss": 19.4384, + "step": 197170 + }, + { + "epoch": 0.39831607525947715, + "grad_norm": 478.8867492675781, + "learning_rate": 7.584758591040022e-06, + "loss": 26.0589, + "step": 197180 + }, + { + "epoch": 0.3983362758921609, + "grad_norm": 616.3558959960938, + "learning_rate": 7.584459779292226e-06, + "loss": 22.0291, + "step": 197190 + }, + { + "epoch": 0.39835647652484474, + "grad_norm": 205.8076934814453, + "learning_rate": 7.5841609549480854e-06, + "loss": 29.4691, + "step": 197200 + }, + { + "epoch": 0.39837667715752856, + "grad_norm": 312.9051513671875, + "learning_rate": 7.583862118009058e-06, + "loss": 28.2658, + "step": 197210 + }, + { + "epoch": 0.3983968777902124, + "grad_norm": 771.70703125, + "learning_rate": 7.583563268476602e-06, + "loss": 24.9648, + "step": 197220 + }, + { + "epoch": 0.3984170784228962, + "grad_norm": 231.82586669921875, + "learning_rate": 7.583264406352169e-06, + "loss": 21.6845, + "step": 197230 + }, + { + "epoch": 0.39843727905558, + "grad_norm": 693.255615234375, + "learning_rate": 7.582965531637221e-06, + "loss": 16.0977, + "step": 197240 + }, + { + "epoch": 0.39845747968826384, + "grad_norm": 1050.954345703125, + "learning_rate": 7.58266664433321e-06, + "loss": 31.7357, + "step": 197250 + }, + { + "epoch": 0.39847768032094766, + "grad_norm": 264.2238464355469, + "learning_rate": 7.582367744441597e-06, + "loss": 17.3054, + "step": 197260 + }, + { + "epoch": 0.3984978809536315, + "grad_norm": 29.328338623046875, + "learning_rate": 7.582068831963836e-06, + "loss": 16.1974, + "step": 197270 + }, + { + "epoch": 0.3985180815863153, + "grad_norm": 90.00679016113281, + "learning_rate": 7.5817699069013835e-06, + "loss": 18.1018, + "step": 197280 + }, + { + "epoch": 0.3985382822189991, + "grad_norm": 230.550537109375, + "learning_rate": 7.5814709692557e-06, + "loss": 23.5122, + "step": 197290 + }, + { + "epoch": 0.39855848285168294, + "grad_norm": 338.776123046875, + "learning_rate": 7.581172019028238e-06, + "loss": 10.5504, + "step": 197300 + }, + { + "epoch": 0.39857868348436676, + "grad_norm": 313.533447265625, + "learning_rate": 7.580873056220458e-06, + "loss": 27.3461, + "step": 197310 + }, + { + "epoch": 0.3985988841170505, + "grad_norm": 162.95481872558594, + "learning_rate": 7.580574080833816e-06, + "loss": 14.9247, + "step": 197320 + }, + { + "epoch": 0.39861908474973434, + "grad_norm": 59.26035690307617, + "learning_rate": 7.580275092869766e-06, + "loss": 8.7449, + "step": 197330 + }, + { + "epoch": 0.39863928538241816, + "grad_norm": 303.17474365234375, + "learning_rate": 7.579976092329772e-06, + "loss": 19.3581, + "step": 197340 + }, + { + "epoch": 0.398659486015102, + "grad_norm": 45.15639114379883, + "learning_rate": 7.579677079215286e-06, + "loss": 21.2257, + "step": 197350 + }, + { + "epoch": 0.3986796866477858, + "grad_norm": 364.3130798339844, + "learning_rate": 7.5793780535277665e-06, + "loss": 16.8157, + "step": 197360 + }, + { + "epoch": 0.3986998872804696, + "grad_norm": 113.99015045166016, + "learning_rate": 7.579079015268671e-06, + "loss": 16.8319, + "step": 197370 + }, + { + "epoch": 0.39872008791315344, + "grad_norm": 816.494384765625, + "learning_rate": 7.5787799644394576e-06, + "loss": 32.5022, + "step": 197380 + }, + { + "epoch": 0.39874028854583726, + "grad_norm": 11.595226287841797, + "learning_rate": 7.578480901041583e-06, + "loss": 15.6218, + "step": 197390 + }, + { + "epoch": 0.3987604891785211, + "grad_norm": 329.2650146484375, + "learning_rate": 7.578181825076506e-06, + "loss": 17.355, + "step": 197400 + }, + { + "epoch": 0.3987806898112049, + "grad_norm": 415.5538330078125, + "learning_rate": 7.577882736545683e-06, + "loss": 16.8613, + "step": 197410 + }, + { + "epoch": 0.3988008904438887, + "grad_norm": 156.80242919921875, + "learning_rate": 7.577583635450572e-06, + "loss": 39.1839, + "step": 197420 + }, + { + "epoch": 0.39882109107657254, + "grad_norm": 392.8519592285156, + "learning_rate": 7.577284521792632e-06, + "loss": 39.5167, + "step": 197430 + }, + { + "epoch": 0.3988412917092563, + "grad_norm": 160.69647216796875, + "learning_rate": 7.576985395573318e-06, + "loss": 18.4188, + "step": 197440 + }, + { + "epoch": 0.3988614923419401, + "grad_norm": 66.4518814086914, + "learning_rate": 7.576686256794092e-06, + "loss": 20.8966, + "step": 197450 + }, + { + "epoch": 0.39888169297462395, + "grad_norm": 1258.77783203125, + "learning_rate": 7.576387105456408e-06, + "loss": 22.6945, + "step": 197460 + }, + { + "epoch": 0.39890189360730777, + "grad_norm": 210.94589233398438, + "learning_rate": 7.576087941561725e-06, + "loss": 14.4726, + "step": 197470 + }, + { + "epoch": 0.3989220942399916, + "grad_norm": 321.79595947265625, + "learning_rate": 7.575788765111504e-06, + "loss": 20.7766, + "step": 197480 + }, + { + "epoch": 0.3989422948726754, + "grad_norm": 377.4902038574219, + "learning_rate": 7.5754895761072e-06, + "loss": 20.7367, + "step": 197490 + }, + { + "epoch": 0.3989624955053592, + "grad_norm": 3.9381821155548096, + "learning_rate": 7.575190374550272e-06, + "loss": 20.1107, + "step": 197500 + }, + { + "epoch": 0.39898269613804305, + "grad_norm": 1592.7799072265625, + "learning_rate": 7.574891160442179e-06, + "loss": 36.0749, + "step": 197510 + }, + { + "epoch": 0.39900289677072687, + "grad_norm": 616.9158325195312, + "learning_rate": 7.574591933784378e-06, + "loss": 27.1391, + "step": 197520 + }, + { + "epoch": 0.3990230974034107, + "grad_norm": 507.11236572265625, + "learning_rate": 7.574292694578329e-06, + "loss": 15.9596, + "step": 197530 + }, + { + "epoch": 0.3990432980360945, + "grad_norm": 16.916385650634766, + "learning_rate": 7.573993442825489e-06, + "loss": 15.5859, + "step": 197540 + }, + { + "epoch": 0.3990634986687783, + "grad_norm": 271.2107849121094, + "learning_rate": 7.573694178527317e-06, + "loss": 12.2203, + "step": 197550 + }, + { + "epoch": 0.39908369930146215, + "grad_norm": 187.93568420410156, + "learning_rate": 7.573394901685271e-06, + "loss": 10.1335, + "step": 197560 + }, + { + "epoch": 0.3991038999341459, + "grad_norm": 298.5325012207031, + "learning_rate": 7.573095612300813e-06, + "loss": 30.5151, + "step": 197570 + }, + { + "epoch": 0.39912410056682973, + "grad_norm": 109.72039031982422, + "learning_rate": 7.572796310375397e-06, + "loss": 27.6701, + "step": 197580 + }, + { + "epoch": 0.39914430119951355, + "grad_norm": 23.563430786132812, + "learning_rate": 7.5724969959104835e-06, + "loss": 13.946, + "step": 197590 + }, + { + "epoch": 0.39916450183219737, + "grad_norm": 168.8724365234375, + "learning_rate": 7.572197668907533e-06, + "loss": 35.5947, + "step": 197600 + }, + { + "epoch": 0.3991847024648812, + "grad_norm": 246.98825073242188, + "learning_rate": 7.571898329368004e-06, + "loss": 16.9728, + "step": 197610 + }, + { + "epoch": 0.399204903097565, + "grad_norm": 180.78948974609375, + "learning_rate": 7.571598977293351e-06, + "loss": 22.5512, + "step": 197620 + }, + { + "epoch": 0.39922510373024883, + "grad_norm": 328.37451171875, + "learning_rate": 7.571299612685039e-06, + "loss": 19.8352, + "step": 197630 + }, + { + "epoch": 0.39924530436293265, + "grad_norm": 1071.57421875, + "learning_rate": 7.571000235544524e-06, + "loss": 23.538, + "step": 197640 + }, + { + "epoch": 0.39926550499561647, + "grad_norm": 11091.6884765625, + "learning_rate": 7.570700845873265e-06, + "loss": 38.0649, + "step": 197650 + }, + { + "epoch": 0.3992857056283003, + "grad_norm": 177.7987823486328, + "learning_rate": 7.570401443672723e-06, + "loss": 21.3488, + "step": 197660 + }, + { + "epoch": 0.3993059062609841, + "grad_norm": 206.8262939453125, + "learning_rate": 7.570102028944356e-06, + "loss": 31.9598, + "step": 197670 + }, + { + "epoch": 0.39932610689366793, + "grad_norm": 237.55679321289062, + "learning_rate": 7.569802601689623e-06, + "loss": 33.1679, + "step": 197680 + }, + { + "epoch": 0.39934630752635175, + "grad_norm": 552.0098266601562, + "learning_rate": 7.569503161909984e-06, + "loss": 12.7157, + "step": 197690 + }, + { + "epoch": 0.3993665081590355, + "grad_norm": 419.996337890625, + "learning_rate": 7.569203709606898e-06, + "loss": 31.8053, + "step": 197700 + }, + { + "epoch": 0.39938670879171934, + "grad_norm": 506.3651123046875, + "learning_rate": 7.568904244781825e-06, + "loss": 21.68, + "step": 197710 + }, + { + "epoch": 0.39940690942440316, + "grad_norm": 86.0506591796875, + "learning_rate": 7.568604767436225e-06, + "loss": 16.1703, + "step": 197720 + }, + { + "epoch": 0.399427110057087, + "grad_norm": 430.3340759277344, + "learning_rate": 7.5683052775715545e-06, + "loss": 29.1856, + "step": 197730 + }, + { + "epoch": 0.3994473106897708, + "grad_norm": 181.5095977783203, + "learning_rate": 7.568005775189278e-06, + "loss": 20.1455, + "step": 197740 + }, + { + "epoch": 0.3994675113224546, + "grad_norm": 352.21929931640625, + "learning_rate": 7.5677062602908515e-06, + "loss": 20.4361, + "step": 197750 + }, + { + "epoch": 0.39948771195513844, + "grad_norm": 361.66436767578125, + "learning_rate": 7.567406732877735e-06, + "loss": 14.4292, + "step": 197760 + }, + { + "epoch": 0.39950791258782226, + "grad_norm": 148.053955078125, + "learning_rate": 7.567107192951393e-06, + "loss": 16.3337, + "step": 197770 + }, + { + "epoch": 0.3995281132205061, + "grad_norm": 253.73312377929688, + "learning_rate": 7.566807640513278e-06, + "loss": 9.7145, + "step": 197780 + }, + { + "epoch": 0.3995483138531899, + "grad_norm": 0.0, + "learning_rate": 7.5665080755648575e-06, + "loss": 18.1261, + "step": 197790 + }, + { + "epoch": 0.3995685144858737, + "grad_norm": 211.78326416015625, + "learning_rate": 7.566208498107586e-06, + "loss": 20.9847, + "step": 197800 + }, + { + "epoch": 0.39958871511855754, + "grad_norm": 290.6148986816406, + "learning_rate": 7.5659089081429245e-06, + "loss": 24.0462, + "step": 197810 + }, + { + "epoch": 0.39960891575124136, + "grad_norm": 406.0163269042969, + "learning_rate": 7.565609305672336e-06, + "loss": 20.732, + "step": 197820 + }, + { + "epoch": 0.3996291163839251, + "grad_norm": 785.3375244140625, + "learning_rate": 7.565309690697279e-06, + "loss": 29.2806, + "step": 197830 + }, + { + "epoch": 0.39964931701660894, + "grad_norm": 258.56170654296875, + "learning_rate": 7.565010063219214e-06, + "loss": 16.6117, + "step": 197840 + }, + { + "epoch": 0.39966951764929276, + "grad_norm": 117.95482635498047, + "learning_rate": 7.5647104232395985e-06, + "loss": 20.0101, + "step": 197850 + }, + { + "epoch": 0.3996897182819766, + "grad_norm": 344.67987060546875, + "learning_rate": 7.564410770759897e-06, + "loss": 32.3461, + "step": 197860 + }, + { + "epoch": 0.3997099189146604, + "grad_norm": 283.5302429199219, + "learning_rate": 7.564111105781568e-06, + "loss": 26.5465, + "step": 197870 + }, + { + "epoch": 0.3997301195473442, + "grad_norm": 483.49658203125, + "learning_rate": 7.5638114283060735e-06, + "loss": 19.7947, + "step": 197880 + }, + { + "epoch": 0.39975032018002804, + "grad_norm": 43.05898666381836, + "learning_rate": 7.5635117383348725e-06, + "loss": 16.5961, + "step": 197890 + }, + { + "epoch": 0.39977052081271186, + "grad_norm": 311.8305969238281, + "learning_rate": 7.563212035869426e-06, + "loss": 14.6824, + "step": 197900 + }, + { + "epoch": 0.3997907214453957, + "grad_norm": 369.6077880859375, + "learning_rate": 7.5629123209111955e-06, + "loss": 21.0569, + "step": 197910 + }, + { + "epoch": 0.3998109220780795, + "grad_norm": 571.8080444335938, + "learning_rate": 7.56261259346164e-06, + "loss": 28.5138, + "step": 197920 + }, + { + "epoch": 0.3998311227107633, + "grad_norm": 815.8748168945312, + "learning_rate": 7.5623128535222224e-06, + "loss": 20.3018, + "step": 197930 + }, + { + "epoch": 0.39985132334344714, + "grad_norm": 466.9893493652344, + "learning_rate": 7.562013101094403e-06, + "loss": 11.5698, + "step": 197940 + }, + { + "epoch": 0.3998715239761309, + "grad_norm": 503.07440185546875, + "learning_rate": 7.561713336179642e-06, + "loss": 40.4079, + "step": 197950 + }, + { + "epoch": 0.3998917246088147, + "grad_norm": 1044.09619140625, + "learning_rate": 7.561413558779401e-06, + "loss": 39.9123, + "step": 197960 + }, + { + "epoch": 0.39991192524149854, + "grad_norm": 720.7599487304688, + "learning_rate": 7.5611137688951405e-06, + "loss": 27.5598, + "step": 197970 + }, + { + "epoch": 0.39993212587418236, + "grad_norm": 314.8182373046875, + "learning_rate": 7.560813966528323e-06, + "loss": 13.6287, + "step": 197980 + }, + { + "epoch": 0.3999523265068662, + "grad_norm": 356.9963073730469, + "learning_rate": 7.560514151680409e-06, + "loss": 16.692, + "step": 197990 + }, + { + "epoch": 0.39997252713955, + "grad_norm": 1151.5570068359375, + "learning_rate": 7.560214324352858e-06, + "loss": 32.3764, + "step": 198000 + }, + { + "epoch": 0.3999927277722338, + "grad_norm": 686.989013671875, + "learning_rate": 7.559914484547135e-06, + "loss": 26.3395, + "step": 198010 + }, + { + "epoch": 0.40001292840491764, + "grad_norm": 447.1755676269531, + "learning_rate": 7.559614632264698e-06, + "loss": 20.3976, + "step": 198020 + }, + { + "epoch": 0.40003312903760146, + "grad_norm": 280.5345764160156, + "learning_rate": 7.559314767507009e-06, + "loss": 30.6482, + "step": 198030 + }, + { + "epoch": 0.4000533296702853, + "grad_norm": 871.556396484375, + "learning_rate": 7.559014890275533e-06, + "loss": 19.0827, + "step": 198040 + }, + { + "epoch": 0.4000735303029691, + "grad_norm": 585.48828125, + "learning_rate": 7.5587150005717256e-06, + "loss": 32.5156, + "step": 198050 + }, + { + "epoch": 0.4000937309356529, + "grad_norm": 284.8545227050781, + "learning_rate": 7.558415098397054e-06, + "loss": 21.1683, + "step": 198060 + }, + { + "epoch": 0.40011393156833674, + "grad_norm": 694.2283325195312, + "learning_rate": 7.558115183752975e-06, + "loss": 16.6071, + "step": 198070 + }, + { + "epoch": 0.4001341322010205, + "grad_norm": 667.6007690429688, + "learning_rate": 7.557815256640954e-06, + "loss": 20.7687, + "step": 198080 + }, + { + "epoch": 0.40015433283370433, + "grad_norm": 318.7449645996094, + "learning_rate": 7.557515317062451e-06, + "loss": 23.4089, + "step": 198090 + }, + { + "epoch": 0.40017453346638815, + "grad_norm": 1349.544921875, + "learning_rate": 7.55721536501893e-06, + "loss": 21.347, + "step": 198100 + }, + { + "epoch": 0.40019473409907197, + "grad_norm": 256.22198486328125, + "learning_rate": 7.556915400511853e-06, + "loss": 8.9897, + "step": 198110 + }, + { + "epoch": 0.4002149347317558, + "grad_norm": 86.92630767822266, + "learning_rate": 7.556615423542677e-06, + "loss": 28.3522, + "step": 198120 + }, + { + "epoch": 0.4002351353644396, + "grad_norm": 918.8314208984375, + "learning_rate": 7.5563154341128695e-06, + "loss": 21.2617, + "step": 198130 + }, + { + "epoch": 0.40025533599712343, + "grad_norm": 78.71343994140625, + "learning_rate": 7.55601543222389e-06, + "loss": 23.4041, + "step": 198140 + }, + { + "epoch": 0.40027553662980725, + "grad_norm": 143.3230743408203, + "learning_rate": 7.555715417877201e-06, + "loss": 16.1013, + "step": 198150 + }, + { + "epoch": 0.40029573726249107, + "grad_norm": 369.9261474609375, + "learning_rate": 7.5554153910742655e-06, + "loss": 19.2886, + "step": 198160 + }, + { + "epoch": 0.4003159378951749, + "grad_norm": 148.36329650878906, + "learning_rate": 7.555115351816545e-06, + "loss": 17.6967, + "step": 198170 + }, + { + "epoch": 0.4003361385278587, + "grad_norm": 618.6707153320312, + "learning_rate": 7.554815300105502e-06, + "loss": 23.1028, + "step": 198180 + }, + { + "epoch": 0.40035633916054253, + "grad_norm": 19.182809829711914, + "learning_rate": 7.5545152359426e-06, + "loss": 23.1774, + "step": 198190 + }, + { + "epoch": 0.40037653979322635, + "grad_norm": 145.69302368164062, + "learning_rate": 7.5542151593293e-06, + "loss": 14.8161, + "step": 198200 + }, + { + "epoch": 0.4003967404259101, + "grad_norm": 346.6979675292969, + "learning_rate": 7.553915070267065e-06, + "loss": 26.5464, + "step": 198210 + }, + { + "epoch": 0.40041694105859393, + "grad_norm": 475.8887939453125, + "learning_rate": 7.553614968757359e-06, + "loss": 18.5249, + "step": 198220 + }, + { + "epoch": 0.40043714169127775, + "grad_norm": 420.0718994140625, + "learning_rate": 7.553314854801641e-06, + "loss": 27.0639, + "step": 198230 + }, + { + "epoch": 0.4004573423239616, + "grad_norm": 45.04717254638672, + "learning_rate": 7.553014728401378e-06, + "loss": 21.8668, + "step": 198240 + }, + { + "epoch": 0.4004775429566454, + "grad_norm": 547.7006225585938, + "learning_rate": 7.55271458955803e-06, + "loss": 27.888, + "step": 198250 + }, + { + "epoch": 0.4004977435893292, + "grad_norm": 388.16845703125, + "learning_rate": 7.5524144382730605e-06, + "loss": 16.2497, + "step": 198260 + }, + { + "epoch": 0.40051794422201303, + "grad_norm": 276.40576171875, + "learning_rate": 7.552114274547933e-06, + "loss": 9.4322, + "step": 198270 + }, + { + "epoch": 0.40053814485469685, + "grad_norm": 989.9608154296875, + "learning_rate": 7.5518140983841095e-06, + "loss": 35.7655, + "step": 198280 + }, + { + "epoch": 0.4005583454873807, + "grad_norm": 530.4025268554688, + "learning_rate": 7.551513909783055e-06, + "loss": 20.6342, + "step": 198290 + }, + { + "epoch": 0.4005785461200645, + "grad_norm": 573.7498779296875, + "learning_rate": 7.55121370874623e-06, + "loss": 15.7636, + "step": 198300 + }, + { + "epoch": 0.4005987467527483, + "grad_norm": 514.5701293945312, + "learning_rate": 7.550913495275098e-06, + "loss": 22.8541, + "step": 198310 + }, + { + "epoch": 0.40061894738543213, + "grad_norm": 284.38702392578125, + "learning_rate": 7.550613269371124e-06, + "loss": 19.1488, + "step": 198320 + }, + { + "epoch": 0.40063914801811595, + "grad_norm": 540.36767578125, + "learning_rate": 7.550313031035772e-06, + "loss": 14.1672, + "step": 198330 + }, + { + "epoch": 0.4006593486507997, + "grad_norm": 233.47023010253906, + "learning_rate": 7.550012780270499e-06, + "loss": 29.5194, + "step": 198340 + }, + { + "epoch": 0.40067954928348354, + "grad_norm": 634.5081787109375, + "learning_rate": 7.549712517076777e-06, + "loss": 20.172, + "step": 198350 + }, + { + "epoch": 0.40069974991616736, + "grad_norm": 816.7568359375, + "learning_rate": 7.5494122414560645e-06, + "loss": 20.3773, + "step": 198360 + }, + { + "epoch": 0.4007199505488512, + "grad_norm": 570.5978393554688, + "learning_rate": 7.549111953409827e-06, + "loss": 17.73, + "step": 198370 + }, + { + "epoch": 0.400740151181535, + "grad_norm": 329.0459289550781, + "learning_rate": 7.548811652939525e-06, + "loss": 18.9579, + "step": 198380 + }, + { + "epoch": 0.4007603518142188, + "grad_norm": 289.1005554199219, + "learning_rate": 7.548511340046625e-06, + "loss": 28.7145, + "step": 198390 + }, + { + "epoch": 0.40078055244690264, + "grad_norm": 455.5233154296875, + "learning_rate": 7.548211014732589e-06, + "loss": 15.8559, + "step": 198400 + }, + { + "epoch": 0.40080075307958646, + "grad_norm": 300.59869384765625, + "learning_rate": 7.547910676998883e-06, + "loss": 27.1662, + "step": 198410 + }, + { + "epoch": 0.4008209537122703, + "grad_norm": 676.0629272460938, + "learning_rate": 7.547610326846968e-06, + "loss": 20.6297, + "step": 198420 + }, + { + "epoch": 0.4008411543449541, + "grad_norm": 242.51876831054688, + "learning_rate": 7.547309964278311e-06, + "loss": 17.7566, + "step": 198430 + }, + { + "epoch": 0.4008613549776379, + "grad_norm": 141.97592163085938, + "learning_rate": 7.547009589294374e-06, + "loss": 16.042, + "step": 198440 + }, + { + "epoch": 0.40088155561032174, + "grad_norm": 27.422712326049805, + "learning_rate": 7.546709201896619e-06, + "loss": 10.796, + "step": 198450 + }, + { + "epoch": 0.40090175624300556, + "grad_norm": 127.43767547607422, + "learning_rate": 7.546408802086513e-06, + "loss": 16.8642, + "step": 198460 + }, + { + "epoch": 0.4009219568756893, + "grad_norm": 83.21980285644531, + "learning_rate": 7.546108389865521e-06, + "loss": 18.779, + "step": 198470 + }, + { + "epoch": 0.40094215750837314, + "grad_norm": 403.13580322265625, + "learning_rate": 7.545807965235103e-06, + "loss": 23.6962, + "step": 198480 + }, + { + "epoch": 0.40096235814105696, + "grad_norm": 24.013486862182617, + "learning_rate": 7.5455075281967295e-06, + "loss": 13.1861, + "step": 198490 + }, + { + "epoch": 0.4009825587737408, + "grad_norm": 0.0, + "learning_rate": 7.545207078751858e-06, + "loss": 21.689, + "step": 198500 + }, + { + "epoch": 0.4010027594064246, + "grad_norm": 506.35382080078125, + "learning_rate": 7.544906616901957e-06, + "loss": 16.2279, + "step": 198510 + }, + { + "epoch": 0.4010229600391084, + "grad_norm": 663.2824096679688, + "learning_rate": 7.544606142648489e-06, + "loss": 25.5661, + "step": 198520 + }, + { + "epoch": 0.40104316067179224, + "grad_norm": 71.92509460449219, + "learning_rate": 7.544305655992918e-06, + "loss": 30.7218, + "step": 198530 + }, + { + "epoch": 0.40106336130447606, + "grad_norm": 245.72608947753906, + "learning_rate": 7.544005156936713e-06, + "loss": 39.2497, + "step": 198540 + }, + { + "epoch": 0.4010835619371599, + "grad_norm": 454.83160400390625, + "learning_rate": 7.543704645481333e-06, + "loss": 22.8334, + "step": 198550 + }, + { + "epoch": 0.4011037625698437, + "grad_norm": 230.10459899902344, + "learning_rate": 7.543404121628246e-06, + "loss": 42.689, + "step": 198560 + }, + { + "epoch": 0.4011239632025275, + "grad_norm": 1047.18994140625, + "learning_rate": 7.543103585378917e-06, + "loss": 18.5602, + "step": 198570 + }, + { + "epoch": 0.40114416383521134, + "grad_norm": 450.0099792480469, + "learning_rate": 7.5428030367348075e-06, + "loss": 35.403, + "step": 198580 + }, + { + "epoch": 0.4011643644678951, + "grad_norm": 381.8701477050781, + "learning_rate": 7.542502475697385e-06, + "loss": 20.5552, + "step": 198590 + }, + { + "epoch": 0.4011845651005789, + "grad_norm": 155.0900421142578, + "learning_rate": 7.542201902268115e-06, + "loss": 17.0598, + "step": 198600 + }, + { + "epoch": 0.40120476573326275, + "grad_norm": 183.87039184570312, + "learning_rate": 7.541901316448459e-06, + "loss": 14.9127, + "step": 198610 + }, + { + "epoch": 0.40122496636594657, + "grad_norm": 467.0203552246094, + "learning_rate": 7.541600718239887e-06, + "loss": 13.6777, + "step": 198620 + }, + { + "epoch": 0.4012451669986304, + "grad_norm": 288.0802001953125, + "learning_rate": 7.541300107643858e-06, + "loss": 30.3727, + "step": 198630 + }, + { + "epoch": 0.4012653676313142, + "grad_norm": 256.5554504394531, + "learning_rate": 7.540999484661844e-06, + "loss": 17.2276, + "step": 198640 + }, + { + "epoch": 0.401285568263998, + "grad_norm": 321.2003173828125, + "learning_rate": 7.5406988492953045e-06, + "loss": 24.4281, + "step": 198650 + }, + { + "epoch": 0.40130576889668185, + "grad_norm": 98.70697021484375, + "learning_rate": 7.5403982015457065e-06, + "loss": 18.2068, + "step": 198660 + }, + { + "epoch": 0.40132596952936567, + "grad_norm": 342.7528991699219, + "learning_rate": 7.540097541414518e-06, + "loss": 26.4316, + "step": 198670 + }, + { + "epoch": 0.4013461701620495, + "grad_norm": 485.7228088378906, + "learning_rate": 7.5397968689032e-06, + "loss": 20.552, + "step": 198680 + }, + { + "epoch": 0.4013663707947333, + "grad_norm": 332.6778869628906, + "learning_rate": 7.539496184013222e-06, + "loss": 12.969, + "step": 198690 + }, + { + "epoch": 0.4013865714274171, + "grad_norm": 458.6863098144531, + "learning_rate": 7.539195486746047e-06, + "loss": 20.9644, + "step": 198700 + }, + { + "epoch": 0.40140677206010095, + "grad_norm": 325.9056091308594, + "learning_rate": 7.538894777103141e-06, + "loss": 14.1785, + "step": 198710 + }, + { + "epoch": 0.4014269726927847, + "grad_norm": 166.2456817626953, + "learning_rate": 7.538594055085971e-06, + "loss": 19.2282, + "step": 198720 + }, + { + "epoch": 0.40144717332546853, + "grad_norm": 163.84922790527344, + "learning_rate": 7.538293320696e-06, + "loss": 17.1688, + "step": 198730 + }, + { + "epoch": 0.40146737395815235, + "grad_norm": 269.4591369628906, + "learning_rate": 7.537992573934694e-06, + "loss": 17.306, + "step": 198740 + }, + { + "epoch": 0.40148757459083617, + "grad_norm": 124.59857177734375, + "learning_rate": 7.537691814803522e-06, + "loss": 13.5246, + "step": 198750 + }, + { + "epoch": 0.40150777522352, + "grad_norm": 431.02301025390625, + "learning_rate": 7.537391043303947e-06, + "loss": 20.7026, + "step": 198760 + }, + { + "epoch": 0.4015279758562038, + "grad_norm": 401.8933410644531, + "learning_rate": 7.537090259437436e-06, + "loss": 21.808, + "step": 198770 + }, + { + "epoch": 0.40154817648888763, + "grad_norm": 383.54058837890625, + "learning_rate": 7.536789463205455e-06, + "loss": 16.0117, + "step": 198780 + }, + { + "epoch": 0.40156837712157145, + "grad_norm": 258.8091735839844, + "learning_rate": 7.536488654609469e-06, + "loss": 13.9646, + "step": 198790 + }, + { + "epoch": 0.40158857775425527, + "grad_norm": 128.9613037109375, + "learning_rate": 7.536187833650947e-06, + "loss": 25.6044, + "step": 198800 + }, + { + "epoch": 0.4016087783869391, + "grad_norm": 1039.2686767578125, + "learning_rate": 7.535887000331352e-06, + "loss": 39.5227, + "step": 198810 + }, + { + "epoch": 0.4016289790196229, + "grad_norm": 92.21083068847656, + "learning_rate": 7.53558615465215e-06, + "loss": 21.3274, + "step": 198820 + }, + { + "epoch": 0.40164917965230673, + "grad_norm": 507.3398742675781, + "learning_rate": 7.535285296614809e-06, + "loss": 22.0745, + "step": 198830 + }, + { + "epoch": 0.40166938028499055, + "grad_norm": 408.8777770996094, + "learning_rate": 7.534984426220795e-06, + "loss": 14.6964, + "step": 198840 + }, + { + "epoch": 0.4016895809176743, + "grad_norm": 532.9547729492188, + "learning_rate": 7.534683543471577e-06, + "loss": 27.8968, + "step": 198850 + }, + { + "epoch": 0.40170978155035814, + "grad_norm": 315.3031921386719, + "learning_rate": 7.534382648368617e-06, + "loss": 12.7511, + "step": 198860 + }, + { + "epoch": 0.40172998218304196, + "grad_norm": 578.9645385742188, + "learning_rate": 7.534081740913382e-06, + "loss": 29.401, + "step": 198870 + }, + { + "epoch": 0.4017501828157258, + "grad_norm": 537.0770874023438, + "learning_rate": 7.5337808211073415e-06, + "loss": 18.7072, + "step": 198880 + }, + { + "epoch": 0.4017703834484096, + "grad_norm": 600.1065673828125, + "learning_rate": 7.53347988895196e-06, + "loss": 13.651, + "step": 198890 + }, + { + "epoch": 0.4017905840810934, + "grad_norm": 227.85195922851562, + "learning_rate": 7.533178944448705e-06, + "loss": 24.1425, + "step": 198900 + }, + { + "epoch": 0.40181078471377724, + "grad_norm": 477.8645935058594, + "learning_rate": 7.532877987599043e-06, + "loss": 23.5492, + "step": 198910 + }, + { + "epoch": 0.40183098534646106, + "grad_norm": 130.61325073242188, + "learning_rate": 7.532577018404441e-06, + "loss": 21.4196, + "step": 198920 + }, + { + "epoch": 0.4018511859791449, + "grad_norm": 399.0851135253906, + "learning_rate": 7.532276036866366e-06, + "loss": 32.2595, + "step": 198930 + }, + { + "epoch": 0.4018713866118287, + "grad_norm": 641.951904296875, + "learning_rate": 7.531975042986285e-06, + "loss": 15.4523, + "step": 198940 + }, + { + "epoch": 0.4018915872445125, + "grad_norm": 821.94873046875, + "learning_rate": 7.531674036765662e-06, + "loss": 13.3444, + "step": 198950 + }, + { + "epoch": 0.40191178787719634, + "grad_norm": 337.8610534667969, + "learning_rate": 7.531373018205971e-06, + "loss": 7.4867, + "step": 198960 + }, + { + "epoch": 0.40193198850988016, + "grad_norm": 164.3924102783203, + "learning_rate": 7.531071987308672e-06, + "loss": 21.7905, + "step": 198970 + }, + { + "epoch": 0.4019521891425639, + "grad_norm": 172.9576873779297, + "learning_rate": 7.5307709440752364e-06, + "loss": 16.716, + "step": 198980 + }, + { + "epoch": 0.40197238977524774, + "grad_norm": 155.1348876953125, + "learning_rate": 7.530469888507131e-06, + "loss": 14.815, + "step": 198990 + }, + { + "epoch": 0.40199259040793156, + "grad_norm": 260.4828186035156, + "learning_rate": 7.530168820605819e-06, + "loss": 35.3219, + "step": 199000 + }, + { + "epoch": 0.4020127910406154, + "grad_norm": 258.833984375, + "learning_rate": 7.529867740372774e-06, + "loss": 20.1072, + "step": 199010 + }, + { + "epoch": 0.4020329916732992, + "grad_norm": 695.7155151367188, + "learning_rate": 7.52956664780946e-06, + "loss": 9.5076, + "step": 199020 + }, + { + "epoch": 0.402053192305983, + "grad_norm": 399.386474609375, + "learning_rate": 7.529265542917343e-06, + "loss": 22.2962, + "step": 199030 + }, + { + "epoch": 0.40207339293866684, + "grad_norm": 462.0411376953125, + "learning_rate": 7.528964425697895e-06, + "loss": 24.6419, + "step": 199040 + }, + { + "epoch": 0.40209359357135066, + "grad_norm": 347.6090087890625, + "learning_rate": 7.5286632961525806e-06, + "loss": 30.8706, + "step": 199050 + }, + { + "epoch": 0.4021137942040345, + "grad_norm": 380.2854919433594, + "learning_rate": 7.528362154282868e-06, + "loss": 16.8862, + "step": 199060 + }, + { + "epoch": 0.4021339948367183, + "grad_norm": 551.378662109375, + "learning_rate": 7.528061000090223e-06, + "loss": 19.1845, + "step": 199070 + }, + { + "epoch": 0.4021541954694021, + "grad_norm": 264.61962890625, + "learning_rate": 7.527759833576118e-06, + "loss": 29.0762, + "step": 199080 + }, + { + "epoch": 0.40217439610208594, + "grad_norm": 918.442626953125, + "learning_rate": 7.527458654742017e-06, + "loss": 23.0314, + "step": 199090 + }, + { + "epoch": 0.40219459673476976, + "grad_norm": 550.7683715820312, + "learning_rate": 7.527157463589389e-06, + "loss": 26.4817, + "step": 199100 + }, + { + "epoch": 0.4022147973674535, + "grad_norm": 247.42665100097656, + "learning_rate": 7.526856260119702e-06, + "loss": 9.8326, + "step": 199110 + }, + { + "epoch": 0.40223499800013734, + "grad_norm": 532.6323852539062, + "learning_rate": 7.526555044334424e-06, + "loss": 21.7748, + "step": 199120 + }, + { + "epoch": 0.40225519863282116, + "grad_norm": 1574.6651611328125, + "learning_rate": 7.526253816235023e-06, + "loss": 20.4568, + "step": 199130 + }, + { + "epoch": 0.402275399265505, + "grad_norm": 65.26931762695312, + "learning_rate": 7.525952575822967e-06, + "loss": 18.6802, + "step": 199140 + }, + { + "epoch": 0.4022955998981888, + "grad_norm": 211.4417266845703, + "learning_rate": 7.525651323099725e-06, + "loss": 21.3643, + "step": 199150 + }, + { + "epoch": 0.4023158005308726, + "grad_norm": 315.5894775390625, + "learning_rate": 7.525350058066765e-06, + "loss": 14.3725, + "step": 199160 + }, + { + "epoch": 0.40233600116355644, + "grad_norm": 291.6868591308594, + "learning_rate": 7.525048780725554e-06, + "loss": 10.4822, + "step": 199170 + }, + { + "epoch": 0.40235620179624026, + "grad_norm": 387.8648986816406, + "learning_rate": 7.524747491077563e-06, + "loss": 24.4473, + "step": 199180 + }, + { + "epoch": 0.4023764024289241, + "grad_norm": 533.245361328125, + "learning_rate": 7.524446189124259e-06, + "loss": 19.3918, + "step": 199190 + }, + { + "epoch": 0.4023966030616079, + "grad_norm": 908.2100219726562, + "learning_rate": 7.52414487486711e-06, + "loss": 19.0767, + "step": 199200 + }, + { + "epoch": 0.4024168036942917, + "grad_norm": 717.2238159179688, + "learning_rate": 7.523843548307583e-06, + "loss": 16.1978, + "step": 199210 + }, + { + "epoch": 0.40243700432697554, + "grad_norm": 149.34889221191406, + "learning_rate": 7.523542209447152e-06, + "loss": 9.9471, + "step": 199220 + }, + { + "epoch": 0.4024572049596593, + "grad_norm": 330.5276794433594, + "learning_rate": 7.5232408582872805e-06, + "loss": 7.4752, + "step": 199230 + }, + { + "epoch": 0.40247740559234313, + "grad_norm": 464.214599609375, + "learning_rate": 7.52293949482944e-06, + "loss": 32.6366, + "step": 199240 + }, + { + "epoch": 0.40249760622502695, + "grad_norm": 396.9613952636719, + "learning_rate": 7.522638119075097e-06, + "loss": 22.1046, + "step": 199250 + }, + { + "epoch": 0.40251780685771077, + "grad_norm": 266.44549560546875, + "learning_rate": 7.522336731025723e-06, + "loss": 28.5606, + "step": 199260 + }, + { + "epoch": 0.4025380074903946, + "grad_norm": 360.39642333984375, + "learning_rate": 7.522035330682784e-06, + "loss": 21.5936, + "step": 199270 + }, + { + "epoch": 0.4025582081230784, + "grad_norm": 866.492919921875, + "learning_rate": 7.521733918047753e-06, + "loss": 16.0734, + "step": 199280 + }, + { + "epoch": 0.40257840875576223, + "grad_norm": 750.3490600585938, + "learning_rate": 7.5214324931220936e-06, + "loss": 22.1805, + "step": 199290 + }, + { + "epoch": 0.40259860938844605, + "grad_norm": 543.060791015625, + "learning_rate": 7.521131055907283e-06, + "loss": 13.1011, + "step": 199300 + }, + { + "epoch": 0.40261881002112987, + "grad_norm": 199.49288940429688, + "learning_rate": 7.520829606404781e-06, + "loss": 20.3131, + "step": 199310 + }, + { + "epoch": 0.4026390106538137, + "grad_norm": 691.2821655273438, + "learning_rate": 7.520528144616063e-06, + "loss": 17.9429, + "step": 199320 + }, + { + "epoch": 0.4026592112864975, + "grad_norm": 686.4976806640625, + "learning_rate": 7.520226670542597e-06, + "loss": 19.5697, + "step": 199330 + }, + { + "epoch": 0.40267941191918133, + "grad_norm": 581.8713989257812, + "learning_rate": 7.519925184185851e-06, + "loss": 20.2, + "step": 199340 + }, + { + "epoch": 0.40269961255186515, + "grad_norm": 351.9085388183594, + "learning_rate": 7.5196236855472945e-06, + "loss": 11.4018, + "step": 199350 + }, + { + "epoch": 0.4027198131845489, + "grad_norm": 242.6192169189453, + "learning_rate": 7.519322174628399e-06, + "loss": 30.2951, + "step": 199360 + }, + { + "epoch": 0.40274001381723273, + "grad_norm": 330.2269287109375, + "learning_rate": 7.519020651430633e-06, + "loss": 14.6469, + "step": 199370 + }, + { + "epoch": 0.40276021444991655, + "grad_norm": 130.55140686035156, + "learning_rate": 7.518719115955464e-06, + "loss": 13.2948, + "step": 199380 + }, + { + "epoch": 0.4027804150826004, + "grad_norm": 163.21871948242188, + "learning_rate": 7.518417568204366e-06, + "loss": 21.737, + "step": 199390 + }, + { + "epoch": 0.4028006157152842, + "grad_norm": 394.7507019042969, + "learning_rate": 7.518116008178805e-06, + "loss": 22.8714, + "step": 199400 + }, + { + "epoch": 0.402820816347968, + "grad_norm": 305.2882080078125, + "learning_rate": 7.517814435880252e-06, + "loss": 23.5342, + "step": 199410 + }, + { + "epoch": 0.40284101698065183, + "grad_norm": 523.1074829101562, + "learning_rate": 7.517512851310178e-06, + "loss": 18.615, + "step": 199420 + }, + { + "epoch": 0.40286121761333565, + "grad_norm": 75.26525115966797, + "learning_rate": 7.517211254470049e-06, + "loss": 7.9642, + "step": 199430 + }, + { + "epoch": 0.4028814182460195, + "grad_norm": 309.88348388671875, + "learning_rate": 7.51690964536134e-06, + "loss": 23.2928, + "step": 199440 + }, + { + "epoch": 0.4029016188787033, + "grad_norm": 556.8353271484375, + "learning_rate": 7.516608023985516e-06, + "loss": 34.7362, + "step": 199450 + }, + { + "epoch": 0.4029218195113871, + "grad_norm": 327.7669982910156, + "learning_rate": 7.516306390344052e-06, + "loss": 24.0189, + "step": 199460 + }, + { + "epoch": 0.40294202014407093, + "grad_norm": 654.4342041015625, + "learning_rate": 7.516004744438417e-06, + "loss": 23.8425, + "step": 199470 + }, + { + "epoch": 0.40296222077675475, + "grad_norm": 313.38922119140625, + "learning_rate": 7.515703086270078e-06, + "loss": 21.5069, + "step": 199480 + }, + { + "epoch": 0.4029824214094385, + "grad_norm": 588.447509765625, + "learning_rate": 7.515401415840509e-06, + "loss": 22.1496, + "step": 199490 + }, + { + "epoch": 0.40300262204212234, + "grad_norm": 579.3743286132812, + "learning_rate": 7.515099733151177e-06, + "loss": 16.3075, + "step": 199500 + }, + { + "epoch": 0.40302282267480616, + "grad_norm": 661.6896362304688, + "learning_rate": 7.514798038203553e-06, + "loss": 34.3176, + "step": 199510 + }, + { + "epoch": 0.40304302330749, + "grad_norm": 94.56706237792969, + "learning_rate": 7.514496330999111e-06, + "loss": 22.348, + "step": 199520 + }, + { + "epoch": 0.4030632239401738, + "grad_norm": 840.5774536132812, + "learning_rate": 7.514194611539316e-06, + "loss": 15.5795, + "step": 199530 + }, + { + "epoch": 0.4030834245728576, + "grad_norm": 887.9461669921875, + "learning_rate": 7.513892879825643e-06, + "loss": 26.4058, + "step": 199540 + }, + { + "epoch": 0.40310362520554144, + "grad_norm": 363.53839111328125, + "learning_rate": 7.5135911358595615e-06, + "loss": 22.4984, + "step": 199550 + }, + { + "epoch": 0.40312382583822526, + "grad_norm": 180.75489807128906, + "learning_rate": 7.513289379642541e-06, + "loss": 16.9116, + "step": 199560 + }, + { + "epoch": 0.4031440264709091, + "grad_norm": 2551.956298828125, + "learning_rate": 7.512987611176052e-06, + "loss": 28.4475, + "step": 199570 + }, + { + "epoch": 0.4031642271035929, + "grad_norm": 702.0039672851562, + "learning_rate": 7.512685830461568e-06, + "loss": 20.512, + "step": 199580 + }, + { + "epoch": 0.4031844277362767, + "grad_norm": 497.1060791015625, + "learning_rate": 7.5123840375005565e-06, + "loss": 23.2045, + "step": 199590 + }, + { + "epoch": 0.40320462836896054, + "grad_norm": 1770.20166015625, + "learning_rate": 7.512082232294491e-06, + "loss": 37.5429, + "step": 199600 + }, + { + "epoch": 0.40322482900164436, + "grad_norm": 0.0, + "learning_rate": 7.51178041484484e-06, + "loss": 31.8172, + "step": 199610 + }, + { + "epoch": 0.4032450296343281, + "grad_norm": 513.242919921875, + "learning_rate": 7.511478585153078e-06, + "loss": 21.8842, + "step": 199620 + }, + { + "epoch": 0.40326523026701194, + "grad_norm": 404.1490478515625, + "learning_rate": 7.511176743220672e-06, + "loss": 24.6893, + "step": 199630 + }, + { + "epoch": 0.40328543089969576, + "grad_norm": 112.2563705444336, + "learning_rate": 7.510874889049096e-06, + "loss": 16.5327, + "step": 199640 + }, + { + "epoch": 0.4033056315323796, + "grad_norm": 346.9835510253906, + "learning_rate": 7.51057302263982e-06, + "loss": 18.7282, + "step": 199650 + }, + { + "epoch": 0.4033258321650634, + "grad_norm": 1143.9215087890625, + "learning_rate": 7.510271143994317e-06, + "loss": 23.472, + "step": 199660 + }, + { + "epoch": 0.4033460327977472, + "grad_norm": 0.3586396276950836, + "learning_rate": 7.509969253114056e-06, + "loss": 16.7748, + "step": 199670 + }, + { + "epoch": 0.40336623343043104, + "grad_norm": 607.0986938476562, + "learning_rate": 7.509667350000509e-06, + "loss": 16.4035, + "step": 199680 + }, + { + "epoch": 0.40338643406311486, + "grad_norm": 347.8395690917969, + "learning_rate": 7.509365434655147e-06, + "loss": 17.4465, + "step": 199690 + }, + { + "epoch": 0.4034066346957987, + "grad_norm": 252.93080139160156, + "learning_rate": 7.509063507079443e-06, + "loss": 17.7776, + "step": 199700 + }, + { + "epoch": 0.4034268353284825, + "grad_norm": 140.82369995117188, + "learning_rate": 7.508761567274868e-06, + "loss": 26.0802, + "step": 199710 + }, + { + "epoch": 0.4034470359611663, + "grad_norm": 346.9831237792969, + "learning_rate": 7.5084596152428925e-06, + "loss": 17.0384, + "step": 199720 + }, + { + "epoch": 0.40346723659385014, + "grad_norm": 828.2061767578125, + "learning_rate": 7.508157650984991e-06, + "loss": 25.8712, + "step": 199730 + }, + { + "epoch": 0.40348743722653396, + "grad_norm": 204.78436279296875, + "learning_rate": 7.507855674502631e-06, + "loss": 19.5967, + "step": 199740 + }, + { + "epoch": 0.4035076378592177, + "grad_norm": 159.3317413330078, + "learning_rate": 7.507553685797288e-06, + "loss": 17.1598, + "step": 199750 + }, + { + "epoch": 0.40352783849190155, + "grad_norm": 1737.2420654296875, + "learning_rate": 7.507251684870433e-06, + "loss": 18.5982, + "step": 199760 + }, + { + "epoch": 0.40354803912458537, + "grad_norm": 303.4064636230469, + "learning_rate": 7.506949671723534e-06, + "loss": 21.3745, + "step": 199770 + }, + { + "epoch": 0.4035682397572692, + "grad_norm": 1069.7166748046875, + "learning_rate": 7.506647646358069e-06, + "loss": 22.2233, + "step": 199780 + }, + { + "epoch": 0.403588440389953, + "grad_norm": 639.1734008789062, + "learning_rate": 7.5063456087755085e-06, + "loss": 28.4787, + "step": 199790 + }, + { + "epoch": 0.4036086410226368, + "grad_norm": 151.21701049804688, + "learning_rate": 7.5060435589773215e-06, + "loss": 21.6127, + "step": 199800 + }, + { + "epoch": 0.40362884165532065, + "grad_norm": 479.32763671875, + "learning_rate": 7.505741496964984e-06, + "loss": 23.1587, + "step": 199810 + }, + { + "epoch": 0.40364904228800447, + "grad_norm": 754.65234375, + "learning_rate": 7.505439422739964e-06, + "loss": 18.3213, + "step": 199820 + }, + { + "epoch": 0.4036692429206883, + "grad_norm": 440.26763916015625, + "learning_rate": 7.505137336303739e-06, + "loss": 42.7359, + "step": 199830 + }, + { + "epoch": 0.4036894435533721, + "grad_norm": 416.4516906738281, + "learning_rate": 7.504835237657776e-06, + "loss": 25.8692, + "step": 199840 + }, + { + "epoch": 0.4037096441860559, + "grad_norm": 772.91552734375, + "learning_rate": 7.5045331268035505e-06, + "loss": 26.3638, + "step": 199850 + }, + { + "epoch": 0.40372984481873975, + "grad_norm": 1250.84765625, + "learning_rate": 7.5042310037425345e-06, + "loss": 34.9652, + "step": 199860 + }, + { + "epoch": 0.4037500454514235, + "grad_norm": 749.7967529296875, + "learning_rate": 7.5039288684762e-06, + "loss": 24.5819, + "step": 199870 + }, + { + "epoch": 0.40377024608410733, + "grad_norm": 0.0, + "learning_rate": 7.50362672100602e-06, + "loss": 19.5698, + "step": 199880 + }, + { + "epoch": 0.40379044671679115, + "grad_norm": 914.7378540039062, + "learning_rate": 7.503324561333467e-06, + "loss": 18.7178, + "step": 199890 + }, + { + "epoch": 0.40381064734947497, + "grad_norm": 216.0771026611328, + "learning_rate": 7.503022389460014e-06, + "loss": 20.7132, + "step": 199900 + }, + { + "epoch": 0.4038308479821588, + "grad_norm": 357.29656982421875, + "learning_rate": 7.502720205387133e-06, + "loss": 19.8191, + "step": 199910 + }, + { + "epoch": 0.4038510486148426, + "grad_norm": 609.9511108398438, + "learning_rate": 7.5024180091162976e-06, + "loss": 20.7704, + "step": 199920 + }, + { + "epoch": 0.40387124924752643, + "grad_norm": 3.1290600299835205, + "learning_rate": 7.50211580064898e-06, + "loss": 19.2354, + "step": 199930 + }, + { + "epoch": 0.40389144988021025, + "grad_norm": 367.80419921875, + "learning_rate": 7.501813579986655e-06, + "loss": 20.0358, + "step": 199940 + }, + { + "epoch": 0.40391165051289407, + "grad_norm": 107.23719787597656, + "learning_rate": 7.501511347130792e-06, + "loss": 19.2733, + "step": 199950 + }, + { + "epoch": 0.4039318511455779, + "grad_norm": 326.3555908203125, + "learning_rate": 7.501209102082867e-06, + "loss": 10.1754, + "step": 199960 + }, + { + "epoch": 0.4039520517782617, + "grad_norm": 425.93548583984375, + "learning_rate": 7.500906844844352e-06, + "loss": 24.8486, + "step": 199970 + }, + { + "epoch": 0.40397225241094553, + "grad_norm": 371.8782653808594, + "learning_rate": 7.5006045754167216e-06, + "loss": 17.8385, + "step": 199980 + }, + { + "epoch": 0.40399245304362935, + "grad_norm": 564.3142700195312, + "learning_rate": 7.5003022938014445e-06, + "loss": 23.0844, + "step": 199990 + }, + { + "epoch": 0.4040126536763131, + "grad_norm": 365.4569396972656, + "learning_rate": 7.500000000000001e-06, + "loss": 27.8551, + "step": 200000 + }, + { + "epoch": 0.40403285430899694, + "grad_norm": 907.5307006835938, + "learning_rate": 7.499697694013859e-06, + "loss": 19.9156, + "step": 200010 + }, + { + "epoch": 0.40405305494168076, + "grad_norm": 483.8946228027344, + "learning_rate": 7.499395375844494e-06, + "loss": 12.2011, + "step": 200020 + }, + { + "epoch": 0.4040732555743646, + "grad_norm": 534.7166748046875, + "learning_rate": 7.49909304549338e-06, + "loss": 25.33, + "step": 200030 + }, + { + "epoch": 0.4040934562070484, + "grad_norm": 531.9784545898438, + "learning_rate": 7.498790702961987e-06, + "loss": 9.2343, + "step": 200040 + }, + { + "epoch": 0.4041136568397322, + "grad_norm": 289.97991943359375, + "learning_rate": 7.498488348251793e-06, + "loss": 26.1014, + "step": 200050 + }, + { + "epoch": 0.40413385747241604, + "grad_norm": 354.9217834472656, + "learning_rate": 7.498185981364268e-06, + "loss": 18.9162, + "step": 200060 + }, + { + "epoch": 0.40415405810509986, + "grad_norm": 0.0, + "learning_rate": 7.497883602300891e-06, + "loss": 23.4203, + "step": 200070 + }, + { + "epoch": 0.4041742587377837, + "grad_norm": 511.7737731933594, + "learning_rate": 7.49758121106313e-06, + "loss": 19.8826, + "step": 200080 + }, + { + "epoch": 0.4041944593704675, + "grad_norm": 74.34274291992188, + "learning_rate": 7.49727880765246e-06, + "loss": 44.2725, + "step": 200090 + }, + { + "epoch": 0.4042146600031513, + "grad_norm": 709.9552612304688, + "learning_rate": 7.496976392070358e-06, + "loss": 46.4911, + "step": 200100 + }, + { + "epoch": 0.40423486063583514, + "grad_norm": 334.0511169433594, + "learning_rate": 7.496673964318295e-06, + "loss": 8.786, + "step": 200110 + }, + { + "epoch": 0.40425506126851896, + "grad_norm": 404.2134704589844, + "learning_rate": 7.496371524397747e-06, + "loss": 48.092, + "step": 200120 + }, + { + "epoch": 0.4042752619012027, + "grad_norm": 151.0152130126953, + "learning_rate": 7.496069072310185e-06, + "loss": 13.3238, + "step": 200130 + }, + { + "epoch": 0.40429546253388654, + "grad_norm": 386.9094543457031, + "learning_rate": 7.495766608057087e-06, + "loss": 19.2344, + "step": 200140 + }, + { + "epoch": 0.40431566316657036, + "grad_norm": 292.6839294433594, + "learning_rate": 7.495464131639923e-06, + "loss": 12.1242, + "step": 200150 + }, + { + "epoch": 0.4043358637992542, + "grad_norm": 356.8404235839844, + "learning_rate": 7.495161643060171e-06, + "loss": 26.1107, + "step": 200160 + }, + { + "epoch": 0.404356064431938, + "grad_norm": 375.3106994628906, + "learning_rate": 7.494859142319304e-06, + "loss": 30.0069, + "step": 200170 + }, + { + "epoch": 0.4043762650646218, + "grad_norm": 203.5823516845703, + "learning_rate": 7.494556629418796e-06, + "loss": 29.5559, + "step": 200180 + }, + { + "epoch": 0.40439646569730564, + "grad_norm": 428.19873046875, + "learning_rate": 7.494254104360121e-06, + "loss": 25.7056, + "step": 200190 + }, + { + "epoch": 0.40441666632998946, + "grad_norm": 584.8751220703125, + "learning_rate": 7.493951567144755e-06, + "loss": 16.5882, + "step": 200200 + }, + { + "epoch": 0.4044368669626733, + "grad_norm": 160.39398193359375, + "learning_rate": 7.493649017774172e-06, + "loss": 15.0911, + "step": 200210 + }, + { + "epoch": 0.4044570675953571, + "grad_norm": 42.888275146484375, + "learning_rate": 7.493346456249844e-06, + "loss": 16.9504, + "step": 200220 + }, + { + "epoch": 0.4044772682280409, + "grad_norm": 650.8150634765625, + "learning_rate": 7.493043882573249e-06, + "loss": 14.835, + "step": 200230 + }, + { + "epoch": 0.40449746886072474, + "grad_norm": 686.9885864257812, + "learning_rate": 7.49274129674586e-06, + "loss": 30.2144, + "step": 200240 + }, + { + "epoch": 0.40451766949340856, + "grad_norm": 366.8212585449219, + "learning_rate": 7.4924386987691525e-06, + "loss": 17.2971, + "step": 200250 + }, + { + "epoch": 0.4045378701260923, + "grad_norm": 526.153564453125, + "learning_rate": 7.492136088644601e-06, + "loss": 14.0622, + "step": 200260 + }, + { + "epoch": 0.40455807075877614, + "grad_norm": 352.774658203125, + "learning_rate": 7.4918334663736805e-06, + "loss": 26.5112, + "step": 200270 + }, + { + "epoch": 0.40457827139145996, + "grad_norm": 319.0962829589844, + "learning_rate": 7.491530831957866e-06, + "loss": 26.2128, + "step": 200280 + }, + { + "epoch": 0.4045984720241438, + "grad_norm": 406.3224182128906, + "learning_rate": 7.491228185398633e-06, + "loss": 26.2267, + "step": 200290 + }, + { + "epoch": 0.4046186726568276, + "grad_norm": 596.3237915039062, + "learning_rate": 7.490925526697455e-06, + "loss": 33.5153, + "step": 200300 + }, + { + "epoch": 0.4046388732895114, + "grad_norm": 728.4494018554688, + "learning_rate": 7.490622855855808e-06, + "loss": 34.1726, + "step": 200310 + }, + { + "epoch": 0.40465907392219524, + "grad_norm": 289.8499755859375, + "learning_rate": 7.490320172875169e-06, + "loss": 24.2974, + "step": 200320 + }, + { + "epoch": 0.40467927455487906, + "grad_norm": 2.1218676567077637, + "learning_rate": 7.490017477757009e-06, + "loss": 15.4083, + "step": 200330 + }, + { + "epoch": 0.4046994751875629, + "grad_norm": 231.6231689453125, + "learning_rate": 7.489714770502807e-06, + "loss": 20.2343, + "step": 200340 + }, + { + "epoch": 0.4047196758202467, + "grad_norm": 241.0740966796875, + "learning_rate": 7.489412051114038e-06, + "loss": 21.3217, + "step": 200350 + }, + { + "epoch": 0.4047398764529305, + "grad_norm": 409.89190673828125, + "learning_rate": 7.4891093195921764e-06, + "loss": 12.4392, + "step": 200360 + }, + { + "epoch": 0.40476007708561434, + "grad_norm": 415.6175537109375, + "learning_rate": 7.488806575938697e-06, + "loss": 13.2343, + "step": 200370 + }, + { + "epoch": 0.40478027771829816, + "grad_norm": 519.8626708984375, + "learning_rate": 7.488503820155075e-06, + "loss": 20.2115, + "step": 200380 + }, + { + "epoch": 0.40480047835098193, + "grad_norm": 227.9258270263672, + "learning_rate": 7.488201052242791e-06, + "loss": 28.8321, + "step": 200390 + }, + { + "epoch": 0.40482067898366575, + "grad_norm": 391.5074768066406, + "learning_rate": 7.487898272203314e-06, + "loss": 19.1745, + "step": 200400 + }, + { + "epoch": 0.40484087961634957, + "grad_norm": 165.18077087402344, + "learning_rate": 7.487595480038122e-06, + "loss": 23.5488, + "step": 200410 + }, + { + "epoch": 0.4048610802490334, + "grad_norm": 390.2011413574219, + "learning_rate": 7.487292675748693e-06, + "loss": 22.4712, + "step": 200420 + }, + { + "epoch": 0.4048812808817172, + "grad_norm": 182.7901153564453, + "learning_rate": 7.4869898593365e-06, + "loss": 15.2531, + "step": 200430 + }, + { + "epoch": 0.40490148151440103, + "grad_norm": 410.7278747558594, + "learning_rate": 7.4866870308030215e-06, + "loss": 26.6576, + "step": 200440 + }, + { + "epoch": 0.40492168214708485, + "grad_norm": 160.73777770996094, + "learning_rate": 7.486384190149731e-06, + "loss": 29.0196, + "step": 200450 + }, + { + "epoch": 0.40494188277976867, + "grad_norm": 514.1658325195312, + "learning_rate": 7.486081337378106e-06, + "loss": 15.2621, + "step": 200460 + }, + { + "epoch": 0.4049620834124525, + "grad_norm": 843.35400390625, + "learning_rate": 7.485778472489622e-06, + "loss": 16.1729, + "step": 200470 + }, + { + "epoch": 0.4049822840451363, + "grad_norm": 721.1657104492188, + "learning_rate": 7.485475595485756e-06, + "loss": 17.5695, + "step": 200480 + }, + { + "epoch": 0.40500248467782013, + "grad_norm": 520.6153564453125, + "learning_rate": 7.4851727063679806e-06, + "loss": 9.6996, + "step": 200490 + }, + { + "epoch": 0.40502268531050395, + "grad_norm": 641.8265991210938, + "learning_rate": 7.484869805137778e-06, + "loss": 26.9172, + "step": 200500 + }, + { + "epoch": 0.4050428859431877, + "grad_norm": 266.42669677734375, + "learning_rate": 7.48456689179662e-06, + "loss": 18.7096, + "step": 200510 + }, + { + "epoch": 0.40506308657587153, + "grad_norm": 246.48928833007812, + "learning_rate": 7.484263966345984e-06, + "loss": 13.7408, + "step": 200520 + }, + { + "epoch": 0.40508328720855535, + "grad_norm": 632.6735229492188, + "learning_rate": 7.483961028787346e-06, + "loss": 16.9427, + "step": 200530 + }, + { + "epoch": 0.4051034878412392, + "grad_norm": 308.1974182128906, + "learning_rate": 7.483658079122185e-06, + "loss": 16.8318, + "step": 200540 + }, + { + "epoch": 0.405123688473923, + "grad_norm": 579.98291015625, + "learning_rate": 7.483355117351976e-06, + "loss": 19.9662, + "step": 200550 + }, + { + "epoch": 0.4051438891066068, + "grad_norm": 698.7310791015625, + "learning_rate": 7.483052143478193e-06, + "loss": 17.7118, + "step": 200560 + }, + { + "epoch": 0.40516408973929063, + "grad_norm": 295.27142333984375, + "learning_rate": 7.482749157502317e-06, + "loss": 29.2573, + "step": 200570 + }, + { + "epoch": 0.40518429037197445, + "grad_norm": 530.3827514648438, + "learning_rate": 7.482446159425822e-06, + "loss": 10.686, + "step": 200580 + }, + { + "epoch": 0.4052044910046583, + "grad_norm": 868.5879516601562, + "learning_rate": 7.482143149250185e-06, + "loss": 24.0939, + "step": 200590 + }, + { + "epoch": 0.4052246916373421, + "grad_norm": 235.33192443847656, + "learning_rate": 7.481840126976885e-06, + "loss": 11.6707, + "step": 200600 + }, + { + "epoch": 0.4052448922700259, + "grad_norm": 474.0995788574219, + "learning_rate": 7.481537092607396e-06, + "loss": 15.2922, + "step": 200610 + }, + { + "epoch": 0.40526509290270973, + "grad_norm": 6.325074672698975, + "learning_rate": 7.4812340461431965e-06, + "loss": 24.3909, + "step": 200620 + }, + { + "epoch": 0.40528529353539355, + "grad_norm": 270.37689208984375, + "learning_rate": 7.480930987585763e-06, + "loss": 17.6095, + "step": 200630 + }, + { + "epoch": 0.4053054941680773, + "grad_norm": 1066.9154052734375, + "learning_rate": 7.480627916936574e-06, + "loss": 26.306, + "step": 200640 + }, + { + "epoch": 0.40532569480076114, + "grad_norm": 179.23202514648438, + "learning_rate": 7.480324834197103e-06, + "loss": 21.1797, + "step": 200650 + }, + { + "epoch": 0.40534589543344496, + "grad_norm": 63.762760162353516, + "learning_rate": 7.480021739368831e-06, + "loss": 17.4896, + "step": 200660 + }, + { + "epoch": 0.4053660960661288, + "grad_norm": 713.467041015625, + "learning_rate": 7.479718632453233e-06, + "loss": 21.3761, + "step": 200670 + }, + { + "epoch": 0.4053862966988126, + "grad_norm": 127.81704711914062, + "learning_rate": 7.4794155134517885e-06, + "loss": 17.1732, + "step": 200680 + }, + { + "epoch": 0.4054064973314964, + "grad_norm": 322.3304138183594, + "learning_rate": 7.479112382365973e-06, + "loss": 24.931, + "step": 200690 + }, + { + "epoch": 0.40542669796418024, + "grad_norm": 431.6565246582031, + "learning_rate": 7.478809239197264e-06, + "loss": 30.5781, + "step": 200700 + }, + { + "epoch": 0.40544689859686406, + "grad_norm": 237.55812072753906, + "learning_rate": 7.47850608394714e-06, + "loss": 14.3789, + "step": 200710 + }, + { + "epoch": 0.4054670992295479, + "grad_norm": 520.4130859375, + "learning_rate": 7.478202916617077e-06, + "loss": 12.677, + "step": 200720 + }, + { + "epoch": 0.4054872998622317, + "grad_norm": 1962.9849853515625, + "learning_rate": 7.477899737208555e-06, + "loss": 30.0419, + "step": 200730 + }, + { + "epoch": 0.4055075004949155, + "grad_norm": 549.2138061523438, + "learning_rate": 7.477596545723049e-06, + "loss": 23.2541, + "step": 200740 + }, + { + "epoch": 0.40552770112759934, + "grad_norm": 294.5437316894531, + "learning_rate": 7.477293342162038e-06, + "loss": 20.2328, + "step": 200750 + }, + { + "epoch": 0.40554790176028316, + "grad_norm": 220.3087615966797, + "learning_rate": 7.476990126527e-06, + "loss": 17.5602, + "step": 200760 + }, + { + "epoch": 0.4055681023929669, + "grad_norm": 121.65559387207031, + "learning_rate": 7.476686898819414e-06, + "loss": 8.0995, + "step": 200770 + }, + { + "epoch": 0.40558830302565074, + "grad_norm": 59.63343048095703, + "learning_rate": 7.476383659040754e-06, + "loss": 20.0355, + "step": 200780 + }, + { + "epoch": 0.40560850365833456, + "grad_norm": 244.6937255859375, + "learning_rate": 7.476080407192502e-06, + "loss": 16.1754, + "step": 200790 + }, + { + "epoch": 0.4056287042910184, + "grad_norm": 117.62286376953125, + "learning_rate": 7.475777143276133e-06, + "loss": 14.7477, + "step": 200800 + }, + { + "epoch": 0.4056489049237022, + "grad_norm": 641.1858520507812, + "learning_rate": 7.475473867293127e-06, + "loss": 23.3399, + "step": 200810 + }, + { + "epoch": 0.405669105556386, + "grad_norm": 204.1864471435547, + "learning_rate": 7.475170579244964e-06, + "loss": 17.1633, + "step": 200820 + }, + { + "epoch": 0.40568930618906984, + "grad_norm": 1005.5781860351562, + "learning_rate": 7.474867279133115e-06, + "loss": 33.7334, + "step": 200830 + }, + { + "epoch": 0.40570950682175366, + "grad_norm": 489.997802734375, + "learning_rate": 7.474563966959068e-06, + "loss": 15.5835, + "step": 200840 + }, + { + "epoch": 0.4057297074544375, + "grad_norm": 38.442405700683594, + "learning_rate": 7.4742606427242935e-06, + "loss": 24.9425, + "step": 200850 + }, + { + "epoch": 0.4057499080871213, + "grad_norm": 481.94927978515625, + "learning_rate": 7.473957306430273e-06, + "loss": 32.2975, + "step": 200860 + }, + { + "epoch": 0.4057701087198051, + "grad_norm": 259.0964660644531, + "learning_rate": 7.473653958078484e-06, + "loss": 21.2508, + "step": 200870 + }, + { + "epoch": 0.40579030935248894, + "grad_norm": 425.4049072265625, + "learning_rate": 7.473350597670407e-06, + "loss": 37.0715, + "step": 200880 + }, + { + "epoch": 0.40581050998517276, + "grad_norm": 83.00106811523438, + "learning_rate": 7.47304722520752e-06, + "loss": 14.0333, + "step": 200890 + }, + { + "epoch": 0.4058307106178565, + "grad_norm": 356.96600341796875, + "learning_rate": 7.4727438406912986e-06, + "loss": 21.6185, + "step": 200900 + }, + { + "epoch": 0.40585091125054035, + "grad_norm": 457.668212890625, + "learning_rate": 7.472440444123224e-06, + "loss": 19.1185, + "step": 200910 + }, + { + "epoch": 0.40587111188322417, + "grad_norm": 234.08029174804688, + "learning_rate": 7.472137035504776e-06, + "loss": 12.4808, + "step": 200920 + }, + { + "epoch": 0.405891312515908, + "grad_norm": 446.756103515625, + "learning_rate": 7.471833614837431e-06, + "loss": 22.8839, + "step": 200930 + }, + { + "epoch": 0.4059115131485918, + "grad_norm": 197.4498748779297, + "learning_rate": 7.471530182122668e-06, + "loss": 26.2008, + "step": 200940 + }, + { + "epoch": 0.4059317137812756, + "grad_norm": 658.5452880859375, + "learning_rate": 7.471226737361968e-06, + "loss": 21.0591, + "step": 200950 + }, + { + "epoch": 0.40595191441395945, + "grad_norm": 351.7759094238281, + "learning_rate": 7.470923280556808e-06, + "loss": 24.4132, + "step": 200960 + }, + { + "epoch": 0.40597211504664327, + "grad_norm": 521.5507202148438, + "learning_rate": 7.4706198117086685e-06, + "loss": 11.5193, + "step": 200970 + }, + { + "epoch": 0.4059923156793271, + "grad_norm": 410.0379333496094, + "learning_rate": 7.4703163308190275e-06, + "loss": 13.6378, + "step": 200980 + }, + { + "epoch": 0.4060125163120109, + "grad_norm": 816.7725219726562, + "learning_rate": 7.470012837889362e-06, + "loss": 33.3381, + "step": 200990 + }, + { + "epoch": 0.4060327169446947, + "grad_norm": 483.8327941894531, + "learning_rate": 7.469709332921155e-06, + "loss": 15.425, + "step": 201000 + }, + { + "epoch": 0.40605291757737855, + "grad_norm": 437.8172912597656, + "learning_rate": 7.469405815915885e-06, + "loss": 22.8705, + "step": 201010 + }, + { + "epoch": 0.4060731182100623, + "grad_norm": 654.0513916015625, + "learning_rate": 7.469102286875029e-06, + "loss": 22.8105, + "step": 201020 + }, + { + "epoch": 0.40609331884274613, + "grad_norm": 539.8277587890625, + "learning_rate": 7.46879874580007e-06, + "loss": 15.6053, + "step": 201030 + }, + { + "epoch": 0.40611351947542995, + "grad_norm": 568.4539184570312, + "learning_rate": 7.468495192692484e-06, + "loss": 28.7137, + "step": 201040 + }, + { + "epoch": 0.40613372010811377, + "grad_norm": 343.5415344238281, + "learning_rate": 7.468191627553752e-06, + "loss": 15.7525, + "step": 201050 + }, + { + "epoch": 0.4061539207407976, + "grad_norm": 756.5459594726562, + "learning_rate": 7.467888050385355e-06, + "loss": 31.3906, + "step": 201060 + }, + { + "epoch": 0.4061741213734814, + "grad_norm": 61.31733703613281, + "learning_rate": 7.467584461188769e-06, + "loss": 15.9251, + "step": 201070 + }, + { + "epoch": 0.40619432200616523, + "grad_norm": 246.74839782714844, + "learning_rate": 7.467280859965476e-06, + "loss": 21.4076, + "step": 201080 + }, + { + "epoch": 0.40621452263884905, + "grad_norm": 276.4493103027344, + "learning_rate": 7.4669772467169555e-06, + "loss": 19.7777, + "step": 201090 + }, + { + "epoch": 0.40623472327153287, + "grad_norm": 397.8076171875, + "learning_rate": 7.4666736214446855e-06, + "loss": 14.9288, + "step": 201100 + }, + { + "epoch": 0.4062549239042167, + "grad_norm": 230.67694091796875, + "learning_rate": 7.46636998415015e-06, + "loss": 31.7875, + "step": 201110 + }, + { + "epoch": 0.4062751245369005, + "grad_norm": 400.58160400390625, + "learning_rate": 7.466066334834825e-06, + "loss": 13.756, + "step": 201120 + }, + { + "epoch": 0.40629532516958433, + "grad_norm": 223.71376037597656, + "learning_rate": 7.465762673500192e-06, + "loss": 23.8489, + "step": 201130 + }, + { + "epoch": 0.40631552580226815, + "grad_norm": 425.3001708984375, + "learning_rate": 7.465459000147731e-06, + "loss": 16.3558, + "step": 201140 + }, + { + "epoch": 0.4063357264349519, + "grad_norm": 302.7783203125, + "learning_rate": 7.46515531477892e-06, + "loss": 15.9492, + "step": 201150 + }, + { + "epoch": 0.40635592706763574, + "grad_norm": 142.73504638671875, + "learning_rate": 7.464851617395244e-06, + "loss": 16.9348, + "step": 201160 + }, + { + "epoch": 0.40637612770031956, + "grad_norm": 261.61956787109375, + "learning_rate": 7.464547907998179e-06, + "loss": 22.927, + "step": 201170 + }, + { + "epoch": 0.4063963283330034, + "grad_norm": 456.87176513671875, + "learning_rate": 7.464244186589206e-06, + "loss": 19.6105, + "step": 201180 + }, + { + "epoch": 0.4064165289656872, + "grad_norm": 995.794921875, + "learning_rate": 7.4639404531698054e-06, + "loss": 20.3693, + "step": 201190 + }, + { + "epoch": 0.406436729598371, + "grad_norm": 491.23028564453125, + "learning_rate": 7.463636707741458e-06, + "loss": 24.6229, + "step": 201200 + }, + { + "epoch": 0.40645693023105484, + "grad_norm": 263.67669677734375, + "learning_rate": 7.463332950305646e-06, + "loss": 23.7628, + "step": 201210 + }, + { + "epoch": 0.40647713086373866, + "grad_norm": 564.0216674804688, + "learning_rate": 7.463029180863846e-06, + "loss": 20.9312, + "step": 201220 + }, + { + "epoch": 0.4064973314964225, + "grad_norm": 1087.8101806640625, + "learning_rate": 7.462725399417541e-06, + "loss": 34.9737, + "step": 201230 + }, + { + "epoch": 0.4065175321291063, + "grad_norm": 827.571044921875, + "learning_rate": 7.4624216059682106e-06, + "loss": 38.8245, + "step": 201240 + }, + { + "epoch": 0.4065377327617901, + "grad_norm": 457.0788269042969, + "learning_rate": 7.462117800517337e-06, + "loss": 26.9808, + "step": 201250 + }, + { + "epoch": 0.40655793339447394, + "grad_norm": 381.0020446777344, + "learning_rate": 7.461813983066398e-06, + "loss": 27.545, + "step": 201260 + }, + { + "epoch": 0.40657813402715776, + "grad_norm": 159.75901794433594, + "learning_rate": 7.461510153616879e-06, + "loss": 12.4726, + "step": 201270 + }, + { + "epoch": 0.4065983346598415, + "grad_norm": 2.2260327339172363, + "learning_rate": 7.461206312170255e-06, + "loss": 14.3518, + "step": 201280 + }, + { + "epoch": 0.40661853529252534, + "grad_norm": 371.715576171875, + "learning_rate": 7.460902458728012e-06, + "loss": 22.9897, + "step": 201290 + }, + { + "epoch": 0.40663873592520916, + "grad_norm": 188.25341796875, + "learning_rate": 7.460598593291628e-06, + "loss": 30.0347, + "step": 201300 + }, + { + "epoch": 0.406658936557893, + "grad_norm": 311.2943420410156, + "learning_rate": 7.460294715862586e-06, + "loss": 24.9204, + "step": 201310 + }, + { + "epoch": 0.4066791371905768, + "grad_norm": 603.1354370117188, + "learning_rate": 7.459990826442366e-06, + "loss": 27.4273, + "step": 201320 + }, + { + "epoch": 0.4066993378232606, + "grad_norm": 315.0934143066406, + "learning_rate": 7.459686925032446e-06, + "loss": 23.9535, + "step": 201330 + }, + { + "epoch": 0.40671953845594444, + "grad_norm": 188.21473693847656, + "learning_rate": 7.459383011634314e-06, + "loss": 15.5692, + "step": 201340 + }, + { + "epoch": 0.40673973908862826, + "grad_norm": 724.6349487304688, + "learning_rate": 7.459079086249445e-06, + "loss": 27.3631, + "step": 201350 + }, + { + "epoch": 0.4067599397213121, + "grad_norm": 1370.4459228515625, + "learning_rate": 7.458775148879325e-06, + "loss": 28.1437, + "step": 201360 + }, + { + "epoch": 0.4067801403539959, + "grad_norm": 494.84234619140625, + "learning_rate": 7.458471199525431e-06, + "loss": 24.9262, + "step": 201370 + }, + { + "epoch": 0.4068003409866797, + "grad_norm": 374.4007263183594, + "learning_rate": 7.458167238189249e-06, + "loss": 30.2492, + "step": 201380 + }, + { + "epoch": 0.40682054161936354, + "grad_norm": 439.54937744140625, + "learning_rate": 7.457863264872256e-06, + "loss": 14.7385, + "step": 201390 + }, + { + "epoch": 0.40684074225204736, + "grad_norm": 522.9287719726562, + "learning_rate": 7.4575592795759356e-06, + "loss": 12.2373, + "step": 201400 + }, + { + "epoch": 0.4068609428847311, + "grad_norm": 175.83682250976562, + "learning_rate": 7.45725528230177e-06, + "loss": 14.9058, + "step": 201410 + }, + { + "epoch": 0.40688114351741494, + "grad_norm": 491.727294921875, + "learning_rate": 7.456951273051239e-06, + "loss": 15.4886, + "step": 201420 + }, + { + "epoch": 0.40690134415009876, + "grad_norm": 448.6680908203125, + "learning_rate": 7.456647251825828e-06, + "loss": 31.4303, + "step": 201430 + }, + { + "epoch": 0.4069215447827826, + "grad_norm": 98.84780883789062, + "learning_rate": 7.4563432186270135e-06, + "loss": 11.6186, + "step": 201440 + }, + { + "epoch": 0.4069417454154664, + "grad_norm": 104.43572235107422, + "learning_rate": 7.456039173456282e-06, + "loss": 13.9035, + "step": 201450 + }, + { + "epoch": 0.4069619460481502, + "grad_norm": 390.1668395996094, + "learning_rate": 7.455735116315113e-06, + "loss": 20.1905, + "step": 201460 + }, + { + "epoch": 0.40698214668083404, + "grad_norm": 138.4781036376953, + "learning_rate": 7.455431047204988e-06, + "loss": 15.5138, + "step": 201470 + }, + { + "epoch": 0.40700234731351786, + "grad_norm": 214.82301330566406, + "learning_rate": 7.455126966127392e-06, + "loss": 15.5275, + "step": 201480 + }, + { + "epoch": 0.4070225479462017, + "grad_norm": 151.06898498535156, + "learning_rate": 7.4548228730838025e-06, + "loss": 12.9611, + "step": 201490 + }, + { + "epoch": 0.4070427485788855, + "grad_norm": 435.325927734375, + "learning_rate": 7.454518768075705e-06, + "loss": 17.9772, + "step": 201500 + }, + { + "epoch": 0.4070629492115693, + "grad_norm": 255.72364807128906, + "learning_rate": 7.454214651104581e-06, + "loss": 11.7055, + "step": 201510 + }, + { + "epoch": 0.40708314984425314, + "grad_norm": 502.916748046875, + "learning_rate": 7.453910522171912e-06, + "loss": 17.7385, + "step": 201520 + }, + { + "epoch": 0.40710335047693696, + "grad_norm": 325.4402160644531, + "learning_rate": 7.453606381279181e-06, + "loss": 19.5053, + "step": 201530 + }, + { + "epoch": 0.40712355110962073, + "grad_norm": 407.8590087890625, + "learning_rate": 7.45330222842787e-06, + "loss": 25.5274, + "step": 201540 + }, + { + "epoch": 0.40714375174230455, + "grad_norm": 993.3406372070312, + "learning_rate": 7.452998063619461e-06, + "loss": 11.7498, + "step": 201550 + }, + { + "epoch": 0.40716395237498837, + "grad_norm": 646.5173950195312, + "learning_rate": 7.452693886855438e-06, + "loss": 21.7291, + "step": 201560 + }, + { + "epoch": 0.4071841530076722, + "grad_norm": 223.51336669921875, + "learning_rate": 7.452389698137281e-06, + "loss": 41.3807, + "step": 201570 + }, + { + "epoch": 0.407204353640356, + "grad_norm": 137.25628662109375, + "learning_rate": 7.452085497466476e-06, + "loss": 16.8889, + "step": 201580 + }, + { + "epoch": 0.40722455427303983, + "grad_norm": 266.11163330078125, + "learning_rate": 7.451781284844503e-06, + "loss": 7.3088, + "step": 201590 + }, + { + "epoch": 0.40724475490572365, + "grad_norm": 160.74749755859375, + "learning_rate": 7.451477060272844e-06, + "loss": 19.159, + "step": 201600 + }, + { + "epoch": 0.40726495553840747, + "grad_norm": 104.86251831054688, + "learning_rate": 7.4511728237529845e-06, + "loss": 23.7645, + "step": 201610 + }, + { + "epoch": 0.4072851561710913, + "grad_norm": 908.7479858398438, + "learning_rate": 7.450868575286405e-06, + "loss": 31.8811, + "step": 201620 + }, + { + "epoch": 0.4073053568037751, + "grad_norm": 309.2737121582031, + "learning_rate": 7.450564314874591e-06, + "loss": 20.9828, + "step": 201630 + }, + { + "epoch": 0.40732555743645893, + "grad_norm": 568.97998046875, + "learning_rate": 7.450260042519022e-06, + "loss": 43.8262, + "step": 201640 + }, + { + "epoch": 0.40734575806914275, + "grad_norm": 167.1912078857422, + "learning_rate": 7.449955758221184e-06, + "loss": 16.0625, + "step": 201650 + }, + { + "epoch": 0.4073659587018265, + "grad_norm": 339.8267517089844, + "learning_rate": 7.449651461982559e-06, + "loss": 32.839, + "step": 201660 + }, + { + "epoch": 0.40738615933451033, + "grad_norm": 259.1501770019531, + "learning_rate": 7.449347153804628e-06, + "loss": 17.2005, + "step": 201670 + }, + { + "epoch": 0.40740635996719415, + "grad_norm": 476.617431640625, + "learning_rate": 7.4490428336888775e-06, + "loss": 30.358, + "step": 201680 + }, + { + "epoch": 0.407426560599878, + "grad_norm": 350.8307189941406, + "learning_rate": 7.4487385016367885e-06, + "loss": 45.564, + "step": 201690 + }, + { + "epoch": 0.4074467612325618, + "grad_norm": 433.2794494628906, + "learning_rate": 7.448434157649846e-06, + "loss": 25.2486, + "step": 201700 + }, + { + "epoch": 0.4074669618652456, + "grad_norm": 518.6293334960938, + "learning_rate": 7.448129801729531e-06, + "loss": 25.7643, + "step": 201710 + }, + { + "epoch": 0.40748716249792943, + "grad_norm": 223.87640380859375, + "learning_rate": 7.447825433877329e-06, + "loss": 14.6394, + "step": 201720 + }, + { + "epoch": 0.40750736313061325, + "grad_norm": 452.998779296875, + "learning_rate": 7.447521054094723e-06, + "loss": 14.4795, + "step": 201730 + }, + { + "epoch": 0.4075275637632971, + "grad_norm": 251.29132080078125, + "learning_rate": 7.447216662383196e-06, + "loss": 29.2115, + "step": 201740 + }, + { + "epoch": 0.4075477643959809, + "grad_norm": 325.8575134277344, + "learning_rate": 7.446912258744232e-06, + "loss": 22.5336, + "step": 201750 + }, + { + "epoch": 0.4075679650286647, + "grad_norm": 897.65869140625, + "learning_rate": 7.446607843179314e-06, + "loss": 26.6089, + "step": 201760 + }, + { + "epoch": 0.40758816566134853, + "grad_norm": 784.4694213867188, + "learning_rate": 7.446303415689927e-06, + "loss": 25.6471, + "step": 201770 + }, + { + "epoch": 0.40760836629403235, + "grad_norm": 187.5708770751953, + "learning_rate": 7.4459989762775516e-06, + "loss": 26.8016, + "step": 201780 + }, + { + "epoch": 0.4076285669267161, + "grad_norm": 268.2976989746094, + "learning_rate": 7.445694524943677e-06, + "loss": 16.3666, + "step": 201790 + }, + { + "epoch": 0.40764876755939994, + "grad_norm": 446.0238952636719, + "learning_rate": 7.445390061689782e-06, + "loss": 13.2344, + "step": 201800 + }, + { + "epoch": 0.40766896819208376, + "grad_norm": 361.369140625, + "learning_rate": 7.4450855865173534e-06, + "loss": 23.7117, + "step": 201810 + }, + { + "epoch": 0.4076891688247676, + "grad_norm": 383.89849853515625, + "learning_rate": 7.444781099427873e-06, + "loss": 18.8971, + "step": 201820 + }, + { + "epoch": 0.4077093694574514, + "grad_norm": 353.0237121582031, + "learning_rate": 7.444476600422827e-06, + "loss": 18.4254, + "step": 201830 + }, + { + "epoch": 0.4077295700901352, + "grad_norm": 268.77392578125, + "learning_rate": 7.444172089503698e-06, + "loss": 8.8478, + "step": 201840 + }, + { + "epoch": 0.40774977072281904, + "grad_norm": 63.98426818847656, + "learning_rate": 7.443867566671971e-06, + "loss": 8.1564, + "step": 201850 + }, + { + "epoch": 0.40776997135550286, + "grad_norm": 307.0477294921875, + "learning_rate": 7.4435630319291295e-06, + "loss": 21.182, + "step": 201860 + }, + { + "epoch": 0.4077901719881867, + "grad_norm": 103.76622009277344, + "learning_rate": 7.4432584852766575e-06, + "loss": 19.0367, + "step": 201870 + }, + { + "epoch": 0.4078103726208705, + "grad_norm": 468.2156066894531, + "learning_rate": 7.442953926716042e-06, + "loss": 27.5066, + "step": 201880 + }, + { + "epoch": 0.4078305732535543, + "grad_norm": 362.2070617675781, + "learning_rate": 7.442649356248765e-06, + "loss": 23.6275, + "step": 201890 + }, + { + "epoch": 0.40785077388623814, + "grad_norm": 389.12847900390625, + "learning_rate": 7.44234477387631e-06, + "loss": 13.1216, + "step": 201900 + }, + { + "epoch": 0.40787097451892196, + "grad_norm": 328.23065185546875, + "learning_rate": 7.442040179600163e-06, + "loss": 12.5546, + "step": 201910 + }, + { + "epoch": 0.4078911751516057, + "grad_norm": 1921.57666015625, + "learning_rate": 7.4417355734218085e-06, + "loss": 46.7711, + "step": 201920 + }, + { + "epoch": 0.40791137578428954, + "grad_norm": 721.4837646484375, + "learning_rate": 7.441430955342733e-06, + "loss": 17.6815, + "step": 201930 + }, + { + "epoch": 0.40793157641697336, + "grad_norm": 728.987548828125, + "learning_rate": 7.441126325364415e-06, + "loss": 19.4633, + "step": 201940 + }, + { + "epoch": 0.4079517770496572, + "grad_norm": 739.6961059570312, + "learning_rate": 7.440821683488346e-06, + "loss": 25.4722, + "step": 201950 + }, + { + "epoch": 0.407971977682341, + "grad_norm": 329.321044921875, + "learning_rate": 7.440517029716008e-06, + "loss": 31.0787, + "step": 201960 + }, + { + "epoch": 0.4079921783150248, + "grad_norm": 332.2442626953125, + "learning_rate": 7.4402123640488855e-06, + "loss": 21.4414, + "step": 201970 + }, + { + "epoch": 0.40801237894770864, + "grad_norm": 202.59873962402344, + "learning_rate": 7.439907686488463e-06, + "loss": 16.7013, + "step": 201980 + }, + { + "epoch": 0.40803257958039246, + "grad_norm": 1010.4586181640625, + "learning_rate": 7.439602997036229e-06, + "loss": 15.1833, + "step": 201990 + }, + { + "epoch": 0.4080527802130763, + "grad_norm": 505.4180908203125, + "learning_rate": 7.4392982956936644e-06, + "loss": 30.9323, + "step": 202000 + }, + { + "epoch": 0.4080729808457601, + "grad_norm": 5731.296875, + "learning_rate": 7.438993582462255e-06, + "loss": 44.6847, + "step": 202010 + }, + { + "epoch": 0.4080931814784439, + "grad_norm": 286.26513671875, + "learning_rate": 7.438688857343488e-06, + "loss": 16.6637, + "step": 202020 + }, + { + "epoch": 0.40811338211112774, + "grad_norm": 488.8510437011719, + "learning_rate": 7.438384120338846e-06, + "loss": 14.8836, + "step": 202030 + }, + { + "epoch": 0.40813358274381156, + "grad_norm": 224.29898071289062, + "learning_rate": 7.4380793714498175e-06, + "loss": 13.4785, + "step": 202040 + }, + { + "epoch": 0.4081537833764953, + "grad_norm": 664.1687622070312, + "learning_rate": 7.437774610677884e-06, + "loss": 19.8974, + "step": 202050 + }, + { + "epoch": 0.40817398400917915, + "grad_norm": 215.94056701660156, + "learning_rate": 7.437469838024534e-06, + "loss": 17.0218, + "step": 202060 + }, + { + "epoch": 0.40819418464186297, + "grad_norm": 560.67626953125, + "learning_rate": 7.437165053491251e-06, + "loss": 19.3743, + "step": 202070 + }, + { + "epoch": 0.4082143852745468, + "grad_norm": 84.43445587158203, + "learning_rate": 7.43686025707952e-06, + "loss": 13.7687, + "step": 202080 + }, + { + "epoch": 0.4082345859072306, + "grad_norm": 563.4779052734375, + "learning_rate": 7.43655544879083e-06, + "loss": 11.2371, + "step": 202090 + }, + { + "epoch": 0.4082547865399144, + "grad_norm": 316.088623046875, + "learning_rate": 7.436250628626662e-06, + "loss": 16.9678, + "step": 202100 + }, + { + "epoch": 0.40827498717259825, + "grad_norm": 846.0938720703125, + "learning_rate": 7.4359457965885066e-06, + "loss": 24.9185, + "step": 202110 + }, + { + "epoch": 0.40829518780528207, + "grad_norm": 394.32635498046875, + "learning_rate": 7.435640952677844e-06, + "loss": 24.0668, + "step": 202120 + }, + { + "epoch": 0.4083153884379659, + "grad_norm": 517.8413696289062, + "learning_rate": 7.435336096896164e-06, + "loss": 33.0063, + "step": 202130 + }, + { + "epoch": 0.4083355890706497, + "grad_norm": 13.747394561767578, + "learning_rate": 7.435031229244951e-06, + "loss": 22.4625, + "step": 202140 + }, + { + "epoch": 0.4083557897033335, + "grad_norm": 348.4352722167969, + "learning_rate": 7.434726349725692e-06, + "loss": 27.3487, + "step": 202150 + }, + { + "epoch": 0.40837599033601735, + "grad_norm": 224.69532775878906, + "learning_rate": 7.434421458339871e-06, + "loss": 20.6076, + "step": 202160 + }, + { + "epoch": 0.40839619096870117, + "grad_norm": 278.6298828125, + "learning_rate": 7.434116555088975e-06, + "loss": 23.4357, + "step": 202170 + }, + { + "epoch": 0.40841639160138493, + "grad_norm": 404.5762939453125, + "learning_rate": 7.4338116399744905e-06, + "loss": 23.1307, + "step": 202180 + }, + { + "epoch": 0.40843659223406875, + "grad_norm": 538.7630615234375, + "learning_rate": 7.433506712997903e-06, + "loss": 27.1231, + "step": 202190 + }, + { + "epoch": 0.40845679286675257, + "grad_norm": 552.8690795898438, + "learning_rate": 7.433201774160701e-06, + "loss": 16.8527, + "step": 202200 + }, + { + "epoch": 0.4084769934994364, + "grad_norm": 463.61944580078125, + "learning_rate": 7.432896823464366e-06, + "loss": 24.177, + "step": 202210 + }, + { + "epoch": 0.4084971941321202, + "grad_norm": 583.8330688476562, + "learning_rate": 7.432591860910389e-06, + "loss": 31.2486, + "step": 202220 + }, + { + "epoch": 0.40851739476480403, + "grad_norm": 605.05322265625, + "learning_rate": 7.432286886500253e-06, + "loss": 30.0312, + "step": 202230 + }, + { + "epoch": 0.40853759539748785, + "grad_norm": 1038.759765625, + "learning_rate": 7.431981900235446e-06, + "loss": 20.2549, + "step": 202240 + }, + { + "epoch": 0.40855779603017167, + "grad_norm": 293.982666015625, + "learning_rate": 7.431676902117453e-06, + "loss": 15.2225, + "step": 202250 + }, + { + "epoch": 0.4085779966628555, + "grad_norm": 417.32220458984375, + "learning_rate": 7.431371892147763e-06, + "loss": 21.8134, + "step": 202260 + }, + { + "epoch": 0.4085981972955393, + "grad_norm": 28.197507858276367, + "learning_rate": 7.431066870327861e-06, + "loss": 19.4645, + "step": 202270 + }, + { + "epoch": 0.40861839792822313, + "grad_norm": 867.1630859375, + "learning_rate": 7.430761836659235e-06, + "loss": 25.0237, + "step": 202280 + }, + { + "epoch": 0.40863859856090695, + "grad_norm": 347.8146057128906, + "learning_rate": 7.430456791143369e-06, + "loss": 29.2887, + "step": 202290 + }, + { + "epoch": 0.4086587991935907, + "grad_norm": 331.7054748535156, + "learning_rate": 7.430151733781752e-06, + "loss": 15.065, + "step": 202300 + }, + { + "epoch": 0.40867899982627454, + "grad_norm": 270.35186767578125, + "learning_rate": 7.42984666457587e-06, + "loss": 13.1611, + "step": 202310 + }, + { + "epoch": 0.40869920045895836, + "grad_norm": 352.6448974609375, + "learning_rate": 7.42954158352721e-06, + "loss": 34.8578, + "step": 202320 + }, + { + "epoch": 0.4087194010916422, + "grad_norm": 236.7410430908203, + "learning_rate": 7.42923649063726e-06, + "loss": 20.2191, + "step": 202330 + }, + { + "epoch": 0.408739601724326, + "grad_norm": 839.5408325195312, + "learning_rate": 7.428931385907505e-06, + "loss": 22.1993, + "step": 202340 + }, + { + "epoch": 0.4087598023570098, + "grad_norm": 260.7296142578125, + "learning_rate": 7.428626269339433e-06, + "loss": 18.8646, + "step": 202350 + }, + { + "epoch": 0.40878000298969364, + "grad_norm": 479.09130859375, + "learning_rate": 7.428321140934532e-06, + "loss": 33.4774, + "step": 202360 + }, + { + "epoch": 0.40880020362237746, + "grad_norm": 350.2244567871094, + "learning_rate": 7.428016000694287e-06, + "loss": 16.9029, + "step": 202370 + }, + { + "epoch": 0.4088204042550613, + "grad_norm": 0.0, + "learning_rate": 7.427710848620188e-06, + "loss": 21.7193, + "step": 202380 + }, + { + "epoch": 0.4088406048877451, + "grad_norm": 780.4125366210938, + "learning_rate": 7.4274056847137185e-06, + "loss": 32.7082, + "step": 202390 + }, + { + "epoch": 0.4088608055204289, + "grad_norm": 229.0189666748047, + "learning_rate": 7.42710050897637e-06, + "loss": 43.0292, + "step": 202400 + }, + { + "epoch": 0.40888100615311274, + "grad_norm": 182.1045379638672, + "learning_rate": 7.426795321409628e-06, + "loss": 26.0979, + "step": 202410 + }, + { + "epoch": 0.40890120678579656, + "grad_norm": 473.6593017578125, + "learning_rate": 7.42649012201498e-06, + "loss": 30.5279, + "step": 202420 + }, + { + "epoch": 0.4089214074184803, + "grad_norm": 315.952880859375, + "learning_rate": 7.426184910793914e-06, + "loss": 11.5901, + "step": 202430 + }, + { + "epoch": 0.40894160805116414, + "grad_norm": 510.8643493652344, + "learning_rate": 7.425879687747915e-06, + "loss": 18.8998, + "step": 202440 + }, + { + "epoch": 0.40896180868384796, + "grad_norm": 170.85086059570312, + "learning_rate": 7.425574452878474e-06, + "loss": 21.0382, + "step": 202450 + }, + { + "epoch": 0.4089820093165318, + "grad_norm": 567.182861328125, + "learning_rate": 7.425269206187076e-06, + "loss": 16.5877, + "step": 202460 + }, + { + "epoch": 0.4090022099492156, + "grad_norm": 449.30023193359375, + "learning_rate": 7.42496394767521e-06, + "loss": 11.9506, + "step": 202470 + }, + { + "epoch": 0.4090224105818994, + "grad_norm": 165.76919555664062, + "learning_rate": 7.424658677344365e-06, + "loss": 18.0442, + "step": 202480 + }, + { + "epoch": 0.40904261121458324, + "grad_norm": 166.05328369140625, + "learning_rate": 7.424353395196029e-06, + "loss": 9.8056, + "step": 202490 + }, + { + "epoch": 0.40906281184726706, + "grad_norm": 460.68206787109375, + "learning_rate": 7.424048101231687e-06, + "loss": 26.734, + "step": 202500 + }, + { + "epoch": 0.4090830124799509, + "grad_norm": 479.8690185546875, + "learning_rate": 7.423742795452827e-06, + "loss": 18.3991, + "step": 202510 + }, + { + "epoch": 0.4091032131126347, + "grad_norm": 385.1668701171875, + "learning_rate": 7.423437477860941e-06, + "loss": 16.2015, + "step": 202520 + }, + { + "epoch": 0.4091234137453185, + "grad_norm": 265.1182556152344, + "learning_rate": 7.423132148457512e-06, + "loss": 16.7979, + "step": 202530 + }, + { + "epoch": 0.40914361437800234, + "grad_norm": 124.39286804199219, + "learning_rate": 7.422826807244034e-06, + "loss": 21.9892, + "step": 202540 + }, + { + "epoch": 0.40916381501068616, + "grad_norm": 457.02484130859375, + "learning_rate": 7.42252145422199e-06, + "loss": 25.6926, + "step": 202550 + }, + { + "epoch": 0.4091840156433699, + "grad_norm": 365.1929931640625, + "learning_rate": 7.422216089392872e-06, + "loss": 23.4752, + "step": 202560 + }, + { + "epoch": 0.40920421627605374, + "grad_norm": 217.96307373046875, + "learning_rate": 7.421910712758165e-06, + "loss": 17.9261, + "step": 202570 + }, + { + "epoch": 0.40922441690873756, + "grad_norm": 339.1838073730469, + "learning_rate": 7.421605324319359e-06, + "loss": 21.7752, + "step": 202580 + }, + { + "epoch": 0.4092446175414214, + "grad_norm": 667.7643432617188, + "learning_rate": 7.421299924077943e-06, + "loss": 41.6153, + "step": 202590 + }, + { + "epoch": 0.4092648181741052, + "grad_norm": 114.67329406738281, + "learning_rate": 7.4209945120354045e-06, + "loss": 17.4382, + "step": 202600 + }, + { + "epoch": 0.409285018806789, + "grad_norm": 217.40821838378906, + "learning_rate": 7.420689088193232e-06, + "loss": 24.6881, + "step": 202610 + }, + { + "epoch": 0.40930521943947284, + "grad_norm": 512.017822265625, + "learning_rate": 7.420383652552915e-06, + "loss": 15.7967, + "step": 202620 + }, + { + "epoch": 0.40932542007215666, + "grad_norm": 673.688232421875, + "learning_rate": 7.420078205115942e-06, + "loss": 32.7925, + "step": 202630 + }, + { + "epoch": 0.4093456207048405, + "grad_norm": 621.5011596679688, + "learning_rate": 7.4197727458837995e-06, + "loss": 20.3245, + "step": 202640 + }, + { + "epoch": 0.4093658213375243, + "grad_norm": 547.8865356445312, + "learning_rate": 7.419467274857981e-06, + "loss": 13.0149, + "step": 202650 + }, + { + "epoch": 0.4093860219702081, + "grad_norm": 565.602783203125, + "learning_rate": 7.419161792039969e-06, + "loss": 21.422, + "step": 202660 + }, + { + "epoch": 0.40940622260289194, + "grad_norm": 1128.8487548828125, + "learning_rate": 7.41885629743126e-06, + "loss": 25.6424, + "step": 202670 + }, + { + "epoch": 0.40942642323557576, + "grad_norm": 213.93804931640625, + "learning_rate": 7.418550791033335e-06, + "loss": 34.2035, + "step": 202680 + }, + { + "epoch": 0.40944662386825953, + "grad_norm": 231.7338409423828, + "learning_rate": 7.418245272847688e-06, + "loss": 25.0876, + "step": 202690 + }, + { + "epoch": 0.40946682450094335, + "grad_norm": 461.5252685546875, + "learning_rate": 7.4179397428758085e-06, + "loss": 24.8995, + "step": 202700 + }, + { + "epoch": 0.40948702513362717, + "grad_norm": 127.33111572265625, + "learning_rate": 7.4176342011191816e-06, + "loss": 15.6913, + "step": 202710 + }, + { + "epoch": 0.409507225766311, + "grad_norm": 473.0237731933594, + "learning_rate": 7.417328647579301e-06, + "loss": 16.9951, + "step": 202720 + }, + { + "epoch": 0.4095274263989948, + "grad_norm": 535.3770141601562, + "learning_rate": 7.417023082257653e-06, + "loss": 20.9535, + "step": 202730 + }, + { + "epoch": 0.40954762703167863, + "grad_norm": 281.7862243652344, + "learning_rate": 7.416717505155726e-06, + "loss": 24.6127, + "step": 202740 + }, + { + "epoch": 0.40956782766436245, + "grad_norm": 207.27316284179688, + "learning_rate": 7.416411916275012e-06, + "loss": 13.6992, + "step": 202750 + }, + { + "epoch": 0.40958802829704627, + "grad_norm": 315.2615051269531, + "learning_rate": 7.416106315617e-06, + "loss": 13.0337, + "step": 202760 + }, + { + "epoch": 0.4096082289297301, + "grad_norm": 365.64556884765625, + "learning_rate": 7.415800703183179e-06, + "loss": 17.1957, + "step": 202770 + }, + { + "epoch": 0.4096284295624139, + "grad_norm": 591.7393188476562, + "learning_rate": 7.415495078975038e-06, + "loss": 24.1295, + "step": 202780 + }, + { + "epoch": 0.40964863019509773, + "grad_norm": 523.6378784179688, + "learning_rate": 7.415189442994066e-06, + "loss": 11.2499, + "step": 202790 + }, + { + "epoch": 0.40966883082778155, + "grad_norm": 30.62116241455078, + "learning_rate": 7.414883795241754e-06, + "loss": 23.3721, + "step": 202800 + }, + { + "epoch": 0.40968903146046537, + "grad_norm": 253.954345703125, + "learning_rate": 7.4145781357195936e-06, + "loss": 14.5089, + "step": 202810 + }, + { + "epoch": 0.40970923209314913, + "grad_norm": 1037.10302734375, + "learning_rate": 7.414272464429068e-06, + "loss": 22.0036, + "step": 202820 + }, + { + "epoch": 0.40972943272583295, + "grad_norm": 915.9358520507812, + "learning_rate": 7.413966781371676e-06, + "loss": 22.3646, + "step": 202830 + }, + { + "epoch": 0.4097496333585168, + "grad_norm": 1416.9718017578125, + "learning_rate": 7.413661086548899e-06, + "loss": 22.3141, + "step": 202840 + }, + { + "epoch": 0.4097698339912006, + "grad_norm": 597.7158813476562, + "learning_rate": 7.413355379962231e-06, + "loss": 35.5899, + "step": 202850 + }, + { + "epoch": 0.4097900346238844, + "grad_norm": 335.5372009277344, + "learning_rate": 7.413049661613163e-06, + "loss": 21.6018, + "step": 202860 + }, + { + "epoch": 0.40981023525656823, + "grad_norm": 534.3197631835938, + "learning_rate": 7.412743931503182e-06, + "loss": 37.7867, + "step": 202870 + }, + { + "epoch": 0.40983043588925205, + "grad_norm": 350.77685546875, + "learning_rate": 7.412438189633781e-06, + "loss": 16.7635, + "step": 202880 + }, + { + "epoch": 0.4098506365219359, + "grad_norm": 210.72047424316406, + "learning_rate": 7.412132436006449e-06, + "loss": 12.5931, + "step": 202890 + }, + { + "epoch": 0.4098708371546197, + "grad_norm": 342.9144287109375, + "learning_rate": 7.411826670622676e-06, + "loss": 16.952, + "step": 202900 + }, + { + "epoch": 0.4098910377873035, + "grad_norm": 328.1678161621094, + "learning_rate": 7.411520893483952e-06, + "loss": 8.9762, + "step": 202910 + }, + { + "epoch": 0.40991123841998733, + "grad_norm": 291.2751159667969, + "learning_rate": 7.411215104591767e-06, + "loss": 10.1271, + "step": 202920 + }, + { + "epoch": 0.40993143905267115, + "grad_norm": 152.46485900878906, + "learning_rate": 7.410909303947613e-06, + "loss": 22.4722, + "step": 202930 + }, + { + "epoch": 0.4099516396853549, + "grad_norm": 168.26895141601562, + "learning_rate": 7.4106034915529786e-06, + "loss": 27.4402, + "step": 202940 + }, + { + "epoch": 0.40997184031803874, + "grad_norm": 3.0440235137939453, + "learning_rate": 7.410297667409356e-06, + "loss": 14.0275, + "step": 202950 + }, + { + "epoch": 0.40999204095072256, + "grad_norm": 204.01937866210938, + "learning_rate": 7.409991831518235e-06, + "loss": 23.0723, + "step": 202960 + }, + { + "epoch": 0.4100122415834064, + "grad_norm": 221.46669006347656, + "learning_rate": 7.409685983881107e-06, + "loss": 16.941, + "step": 202970 + }, + { + "epoch": 0.4100324422160902, + "grad_norm": 815.1734008789062, + "learning_rate": 7.409380124499459e-06, + "loss": 26.154, + "step": 202980 + }, + { + "epoch": 0.410052642848774, + "grad_norm": 491.68438720703125, + "learning_rate": 7.409074253374786e-06, + "loss": 15.2505, + "step": 202990 + }, + { + "epoch": 0.41007284348145784, + "grad_norm": 342.01397705078125, + "learning_rate": 7.408768370508577e-06, + "loss": 19.1597, + "step": 203000 + }, + { + "epoch": 0.41009304411414166, + "grad_norm": 346.72747802734375, + "learning_rate": 7.408462475902324e-06, + "loss": 23.4766, + "step": 203010 + }, + { + "epoch": 0.4101132447468255, + "grad_norm": 901.693603515625, + "learning_rate": 7.408156569557515e-06, + "loss": 24.8032, + "step": 203020 + }, + { + "epoch": 0.4101334453795093, + "grad_norm": 868.064453125, + "learning_rate": 7.407850651475645e-06, + "loss": 30.5301, + "step": 203030 + }, + { + "epoch": 0.4101536460121931, + "grad_norm": 533.5048828125, + "learning_rate": 7.407544721658203e-06, + "loss": 18.0294, + "step": 203040 + }, + { + "epoch": 0.41017384664487694, + "grad_norm": 32.24917984008789, + "learning_rate": 7.407238780106679e-06, + "loss": 18.1304, + "step": 203050 + }, + { + "epoch": 0.41019404727756076, + "grad_norm": 316.50787353515625, + "learning_rate": 7.406932826822565e-06, + "loss": 22.6199, + "step": 203060 + }, + { + "epoch": 0.4102142479102445, + "grad_norm": 281.2059326171875, + "learning_rate": 7.406626861807352e-06, + "loss": 20.8594, + "step": 203070 + }, + { + "epoch": 0.41023444854292834, + "grad_norm": 350.651611328125, + "learning_rate": 7.406320885062532e-06, + "loss": 14.0463, + "step": 203080 + }, + { + "epoch": 0.41025464917561216, + "grad_norm": 98.49825286865234, + "learning_rate": 7.406014896589597e-06, + "loss": 13.0616, + "step": 203090 + }, + { + "epoch": 0.410274849808296, + "grad_norm": 459.5002746582031, + "learning_rate": 7.405708896390037e-06, + "loss": 32.8267, + "step": 203100 + }, + { + "epoch": 0.4102950504409798, + "grad_norm": 359.4190979003906, + "learning_rate": 7.405402884465342e-06, + "loss": 16.3583, + "step": 203110 + }, + { + "epoch": 0.4103152510736636, + "grad_norm": 454.3992919921875, + "learning_rate": 7.405096860817007e-06, + "loss": 27.9597, + "step": 203120 + }, + { + "epoch": 0.41033545170634744, + "grad_norm": 197.53135681152344, + "learning_rate": 7.404790825446522e-06, + "loss": 20.3972, + "step": 203130 + }, + { + "epoch": 0.41035565233903126, + "grad_norm": 158.32972717285156, + "learning_rate": 7.404484778355374e-06, + "loss": 21.0419, + "step": 203140 + }, + { + "epoch": 0.4103758529717151, + "grad_norm": 706.7918090820312, + "learning_rate": 7.404178719545063e-06, + "loss": 17.9008, + "step": 203150 + }, + { + "epoch": 0.4103960536043989, + "grad_norm": 496.1558837890625, + "learning_rate": 7.403872649017074e-06, + "loss": 12.4259, + "step": 203160 + }, + { + "epoch": 0.4104162542370827, + "grad_norm": 0.0, + "learning_rate": 7.403566566772904e-06, + "loss": 40.8558, + "step": 203170 + }, + { + "epoch": 0.41043645486976654, + "grad_norm": 491.2930908203125, + "learning_rate": 7.40326047281404e-06, + "loss": 27.3981, + "step": 203180 + }, + { + "epoch": 0.41045665550245036, + "grad_norm": 792.5803833007812, + "learning_rate": 7.4029543671419765e-06, + "loss": 39.0564, + "step": 203190 + }, + { + "epoch": 0.4104768561351341, + "grad_norm": 429.5687255859375, + "learning_rate": 7.402648249758204e-06, + "loss": 13.5491, + "step": 203200 + }, + { + "epoch": 0.41049705676781795, + "grad_norm": 488.1636657714844, + "learning_rate": 7.4023421206642176e-06, + "loss": 26.4762, + "step": 203210 + }, + { + "epoch": 0.41051725740050177, + "grad_norm": 776.623779296875, + "learning_rate": 7.402035979861505e-06, + "loss": 22.4744, + "step": 203220 + }, + { + "epoch": 0.4105374580331856, + "grad_norm": 1082.503662109375, + "learning_rate": 7.401729827351561e-06, + "loss": 17.9678, + "step": 203230 + }, + { + "epoch": 0.4105576586658694, + "grad_norm": 374.99774169921875, + "learning_rate": 7.401423663135877e-06, + "loss": 14.9524, + "step": 203240 + }, + { + "epoch": 0.4105778592985532, + "grad_norm": 184.60501098632812, + "learning_rate": 7.401117487215945e-06, + "loss": 19.6489, + "step": 203250 + }, + { + "epoch": 0.41059805993123705, + "grad_norm": 299.70745849609375, + "learning_rate": 7.400811299593259e-06, + "loss": 28.3008, + "step": 203260 + }, + { + "epoch": 0.41061826056392087, + "grad_norm": 298.9436340332031, + "learning_rate": 7.400505100269307e-06, + "loss": 15.3563, + "step": 203270 + }, + { + "epoch": 0.4106384611966047, + "grad_norm": 468.0664367675781, + "learning_rate": 7.4001988892455875e-06, + "loss": 24.0958, + "step": 203280 + }, + { + "epoch": 0.4106586618292885, + "grad_norm": 408.8467102050781, + "learning_rate": 7.399892666523588e-06, + "loss": 17.696, + "step": 203290 + }, + { + "epoch": 0.4106788624619723, + "grad_norm": 351.5890197753906, + "learning_rate": 7.3995864321048036e-06, + "loss": 22.1796, + "step": 203300 + }, + { + "epoch": 0.41069906309465615, + "grad_norm": 796.6549682617188, + "learning_rate": 7.399280185990726e-06, + "loss": 27.2794, + "step": 203310 + }, + { + "epoch": 0.41071926372733997, + "grad_norm": 521.300537109375, + "learning_rate": 7.3989739281828466e-06, + "loss": 30.68, + "step": 203320 + }, + { + "epoch": 0.41073946436002373, + "grad_norm": 210.74981689453125, + "learning_rate": 7.39866765868266e-06, + "loss": 16.6639, + "step": 203330 + }, + { + "epoch": 0.41075966499270755, + "grad_norm": 38.808624267578125, + "learning_rate": 7.398361377491659e-06, + "loss": 17.412, + "step": 203340 + }, + { + "epoch": 0.41077986562539137, + "grad_norm": 12.65576171875, + "learning_rate": 7.398055084611333e-06, + "loss": 14.3816, + "step": 203350 + }, + { + "epoch": 0.4108000662580752, + "grad_norm": 308.3738708496094, + "learning_rate": 7.397748780043179e-06, + "loss": 15.9892, + "step": 203360 + }, + { + "epoch": 0.410820266890759, + "grad_norm": 350.11297607421875, + "learning_rate": 7.3974424637886886e-06, + "loss": 14.9189, + "step": 203370 + }, + { + "epoch": 0.41084046752344283, + "grad_norm": 687.0380249023438, + "learning_rate": 7.397136135849354e-06, + "loss": 13.4597, + "step": 203380 + }, + { + "epoch": 0.41086066815612665, + "grad_norm": 36.86468505859375, + "learning_rate": 7.3968297962266696e-06, + "loss": 30.9209, + "step": 203390 + }, + { + "epoch": 0.41088086878881047, + "grad_norm": 58.81327438354492, + "learning_rate": 7.396523444922126e-06, + "loss": 13.2363, + "step": 203400 + }, + { + "epoch": 0.4109010694214943, + "grad_norm": 379.4734802246094, + "learning_rate": 7.396217081937218e-06, + "loss": 11.533, + "step": 203410 + }, + { + "epoch": 0.4109212700541781, + "grad_norm": 233.38462829589844, + "learning_rate": 7.395910707273439e-06, + "loss": 11.755, + "step": 203420 + }, + { + "epoch": 0.41094147068686193, + "grad_norm": 419.86407470703125, + "learning_rate": 7.395604320932281e-06, + "loss": 12.0955, + "step": 203430 + }, + { + "epoch": 0.41096167131954575, + "grad_norm": 139.32289123535156, + "learning_rate": 7.39529792291524e-06, + "loss": 15.9278, + "step": 203440 + }, + { + "epoch": 0.41098187195222957, + "grad_norm": 145.92091369628906, + "learning_rate": 7.394991513223806e-06, + "loss": 30.9979, + "step": 203450 + }, + { + "epoch": 0.41100207258491334, + "grad_norm": 341.9765625, + "learning_rate": 7.394685091859474e-06, + "loss": 22.7676, + "step": 203460 + }, + { + "epoch": 0.41102227321759716, + "grad_norm": 581.0768432617188, + "learning_rate": 7.394378658823738e-06, + "loss": 12.4343, + "step": 203470 + }, + { + "epoch": 0.411042473850281, + "grad_norm": 407.8321533203125, + "learning_rate": 7.3940722141180885e-06, + "loss": 13.9842, + "step": 203480 + }, + { + "epoch": 0.4110626744829648, + "grad_norm": 830.1275024414062, + "learning_rate": 7.393765757744025e-06, + "loss": 14.1395, + "step": 203490 + }, + { + "epoch": 0.4110828751156486, + "grad_norm": 361.3041076660156, + "learning_rate": 7.393459289703035e-06, + "loss": 7.2721, + "step": 203500 + }, + { + "epoch": 0.41110307574833244, + "grad_norm": 262.95428466796875, + "learning_rate": 7.393152809996616e-06, + "loss": 19.7618, + "step": 203510 + }, + { + "epoch": 0.41112327638101626, + "grad_norm": 261.2764587402344, + "learning_rate": 7.392846318626259e-06, + "loss": 22.2981, + "step": 203520 + }, + { + "epoch": 0.4111434770137001, + "grad_norm": 610.3558349609375, + "learning_rate": 7.392539815593459e-06, + "loss": 15.0741, + "step": 203530 + }, + { + "epoch": 0.4111636776463839, + "grad_norm": 364.45721435546875, + "learning_rate": 7.392233300899712e-06, + "loss": 13.466, + "step": 203540 + }, + { + "epoch": 0.4111838782790677, + "grad_norm": 537.528076171875, + "learning_rate": 7.391926774546509e-06, + "loss": 24.3812, + "step": 203550 + }, + { + "epoch": 0.41120407891175154, + "grad_norm": 1105.9019775390625, + "learning_rate": 7.391620236535345e-06, + "loss": 18.9171, + "step": 203560 + }, + { + "epoch": 0.41122427954443536, + "grad_norm": 430.5234069824219, + "learning_rate": 7.391313686867715e-06, + "loss": 24.4805, + "step": 203570 + }, + { + "epoch": 0.4112444801771191, + "grad_norm": 337.411376953125, + "learning_rate": 7.391007125545111e-06, + "loss": 15.8487, + "step": 203580 + }, + { + "epoch": 0.41126468080980294, + "grad_norm": 203.77084350585938, + "learning_rate": 7.390700552569028e-06, + "loss": 12.0691, + "step": 203590 + }, + { + "epoch": 0.41128488144248676, + "grad_norm": 363.8847351074219, + "learning_rate": 7.390393967940962e-06, + "loss": 25.2019, + "step": 203600 + }, + { + "epoch": 0.4113050820751706, + "grad_norm": 200.1789093017578, + "learning_rate": 7.390087371662403e-06, + "loss": 11.106, + "step": 203610 + }, + { + "epoch": 0.4113252827078544, + "grad_norm": 1082.9810791015625, + "learning_rate": 7.389780763734851e-06, + "loss": 28.1564, + "step": 203620 + }, + { + "epoch": 0.4113454833405382, + "grad_norm": 235.7948455810547, + "learning_rate": 7.3894741441597965e-06, + "loss": 17.6264, + "step": 203630 + }, + { + "epoch": 0.41136568397322204, + "grad_norm": 765.6566772460938, + "learning_rate": 7.389167512938734e-06, + "loss": 29.9497, + "step": 203640 + }, + { + "epoch": 0.41138588460590586, + "grad_norm": 976.2962646484375, + "learning_rate": 7.38886087007316e-06, + "loss": 21.5795, + "step": 203650 + }, + { + "epoch": 0.4114060852385897, + "grad_norm": 342.59521484375, + "learning_rate": 7.388554215564567e-06, + "loss": 18.6348, + "step": 203660 + }, + { + "epoch": 0.4114262858712735, + "grad_norm": 465.6590576171875, + "learning_rate": 7.388247549414451e-06, + "loss": 27.5429, + "step": 203670 + }, + { + "epoch": 0.4114464865039573, + "grad_norm": 959.4237670898438, + "learning_rate": 7.387940871624306e-06, + "loss": 13.0305, + "step": 203680 + }, + { + "epoch": 0.41146668713664114, + "grad_norm": 947.7572021484375, + "learning_rate": 7.387634182195626e-06, + "loss": 20.6679, + "step": 203690 + }, + { + "epoch": 0.41148688776932496, + "grad_norm": 185.0061798095703, + "learning_rate": 7.3873274811299065e-06, + "loss": 15.733, + "step": 203700 + }, + { + "epoch": 0.4115070884020087, + "grad_norm": 338.54742431640625, + "learning_rate": 7.387020768428644e-06, + "loss": 10.8132, + "step": 203710 + }, + { + "epoch": 0.41152728903469254, + "grad_norm": 238.6177215576172, + "learning_rate": 7.386714044093331e-06, + "loss": 36.3459, + "step": 203720 + }, + { + "epoch": 0.41154748966737636, + "grad_norm": 186.13943481445312, + "learning_rate": 7.3864073081254625e-06, + "loss": 22.4083, + "step": 203730 + }, + { + "epoch": 0.4115676903000602, + "grad_norm": 471.1614990234375, + "learning_rate": 7.386100560526537e-06, + "loss": 34.5945, + "step": 203740 + }, + { + "epoch": 0.411587890932744, + "grad_norm": 320.0458068847656, + "learning_rate": 7.3857938012980425e-06, + "loss": 21.0611, + "step": 203750 + }, + { + "epoch": 0.4116080915654278, + "grad_norm": 251.6160125732422, + "learning_rate": 7.385487030441482e-06, + "loss": 13.937, + "step": 203760 + }, + { + "epoch": 0.41162829219811164, + "grad_norm": 14.12028694152832, + "learning_rate": 7.385180247958343e-06, + "loss": 16.3978, + "step": 203770 + }, + { + "epoch": 0.41164849283079546, + "grad_norm": 354.54119873046875, + "learning_rate": 7.384873453850128e-06, + "loss": 14.0409, + "step": 203780 + }, + { + "epoch": 0.4116686934634793, + "grad_norm": 515.3600463867188, + "learning_rate": 7.3845666481183285e-06, + "loss": 17.1991, + "step": 203790 + }, + { + "epoch": 0.4116888940961631, + "grad_norm": 668.2739868164062, + "learning_rate": 7.3842598307644396e-06, + "loss": 18.2747, + "step": 203800 + }, + { + "epoch": 0.4117090947288469, + "grad_norm": 690.9266357421875, + "learning_rate": 7.38395300178996e-06, + "loss": 17.0975, + "step": 203810 + }, + { + "epoch": 0.41172929536153074, + "grad_norm": 632.6204223632812, + "learning_rate": 7.383646161196379e-06, + "loss": 25.168, + "step": 203820 + }, + { + "epoch": 0.41174949599421456, + "grad_norm": 91.12115478515625, + "learning_rate": 7.3833393089851975e-06, + "loss": 36.994, + "step": 203830 + }, + { + "epoch": 0.41176969662689833, + "grad_norm": 303.2684326171875, + "learning_rate": 7.383032445157909e-06, + "loss": 15.9723, + "step": 203840 + }, + { + "epoch": 0.41178989725958215, + "grad_norm": 187.7299041748047, + "learning_rate": 7.382725569716009e-06, + "loss": 32.0824, + "step": 203850 + }, + { + "epoch": 0.41181009789226597, + "grad_norm": 52.39496994018555, + "learning_rate": 7.3824186826609945e-06, + "loss": 16.3438, + "step": 203860 + }, + { + "epoch": 0.4118302985249498, + "grad_norm": 155.1835479736328, + "learning_rate": 7.382111783994361e-06, + "loss": 26.1586, + "step": 203870 + }, + { + "epoch": 0.4118504991576336, + "grad_norm": 748.326904296875, + "learning_rate": 7.381804873717602e-06, + "loss": 34.8786, + "step": 203880 + }, + { + "epoch": 0.41187069979031743, + "grad_norm": 42.789878845214844, + "learning_rate": 7.381497951832215e-06, + "loss": 29.3765, + "step": 203890 + }, + { + "epoch": 0.41189090042300125, + "grad_norm": 10.138392448425293, + "learning_rate": 7.381191018339697e-06, + "loss": 33.3606, + "step": 203900 + }, + { + "epoch": 0.41191110105568507, + "grad_norm": 463.3682861328125, + "learning_rate": 7.380884073241541e-06, + "loss": 17.2549, + "step": 203910 + }, + { + "epoch": 0.4119313016883689, + "grad_norm": 349.8173828125, + "learning_rate": 7.380577116539247e-06, + "loss": 17.862, + "step": 203920 + }, + { + "epoch": 0.4119515023210527, + "grad_norm": 213.31219482421875, + "learning_rate": 7.380270148234306e-06, + "loss": 15.8991, + "step": 203930 + }, + { + "epoch": 0.41197170295373653, + "grad_norm": 257.1610107421875, + "learning_rate": 7.379963168328219e-06, + "loss": 20.6672, + "step": 203940 + }, + { + "epoch": 0.41199190358642035, + "grad_norm": 474.9005432128906, + "learning_rate": 7.379656176822481e-06, + "loss": 26.1415, + "step": 203950 + }, + { + "epoch": 0.41201210421910417, + "grad_norm": 368.1659240722656, + "learning_rate": 7.379349173718585e-06, + "loss": 7.9902, + "step": 203960 + }, + { + "epoch": 0.41203230485178793, + "grad_norm": 339.0737609863281, + "learning_rate": 7.379042159018031e-06, + "loss": 26.1943, + "step": 203970 + }, + { + "epoch": 0.41205250548447175, + "grad_norm": 911.4879150390625, + "learning_rate": 7.378735132722313e-06, + "loss": 29.2985, + "step": 203980 + }, + { + "epoch": 0.4120727061171556, + "grad_norm": 334.9006652832031, + "learning_rate": 7.3784280948329315e-06, + "loss": 28.405, + "step": 203990 + }, + { + "epoch": 0.4120929067498394, + "grad_norm": 32.65821838378906, + "learning_rate": 7.378121045351378e-06, + "loss": 13.0551, + "step": 204000 + }, + { + "epoch": 0.4121131073825232, + "grad_norm": 719.0232543945312, + "learning_rate": 7.37781398427915e-06, + "loss": 23.5942, + "step": 204010 + }, + { + "epoch": 0.41213330801520703, + "grad_norm": 433.3742370605469, + "learning_rate": 7.377506911617746e-06, + "loss": 12.329, + "step": 204020 + }, + { + "epoch": 0.41215350864789085, + "grad_norm": 441.4324645996094, + "learning_rate": 7.3771998273686615e-06, + "loss": 11.4904, + "step": 204030 + }, + { + "epoch": 0.4121737092805747, + "grad_norm": 204.50839233398438, + "learning_rate": 7.376892731533392e-06, + "loss": 35.7096, + "step": 204040 + }, + { + "epoch": 0.4121939099132585, + "grad_norm": 705.2774658203125, + "learning_rate": 7.376585624113438e-06, + "loss": 24.6124, + "step": 204050 + }, + { + "epoch": 0.4122141105459423, + "grad_norm": 232.0381317138672, + "learning_rate": 7.3762785051102924e-06, + "loss": 12.1146, + "step": 204060 + }, + { + "epoch": 0.41223431117862613, + "grad_norm": 329.8414001464844, + "learning_rate": 7.375971374525454e-06, + "loss": 26.7676, + "step": 204070 + }, + { + "epoch": 0.41225451181130995, + "grad_norm": 147.4716033935547, + "learning_rate": 7.375664232360421e-06, + "loss": 53.1563, + "step": 204080 + }, + { + "epoch": 0.4122747124439937, + "grad_norm": 7.299252986907959, + "learning_rate": 7.375357078616685e-06, + "loss": 22.3826, + "step": 204090 + }, + { + "epoch": 0.41229491307667754, + "grad_norm": 28.4975528717041, + "learning_rate": 7.37504991329575e-06, + "loss": 7.3029, + "step": 204100 + }, + { + "epoch": 0.41231511370936136, + "grad_norm": 830.8320922851562, + "learning_rate": 7.374742736399108e-06, + "loss": 25.8122, + "step": 204110 + }, + { + "epoch": 0.4123353143420452, + "grad_norm": 170.08763122558594, + "learning_rate": 7.374435547928258e-06, + "loss": 25.8492, + "step": 204120 + }, + { + "epoch": 0.412355514974729, + "grad_norm": 265.1971130371094, + "learning_rate": 7.374128347884698e-06, + "loss": 20.4346, + "step": 204130 + }, + { + "epoch": 0.4123757156074128, + "grad_norm": 27.740524291992188, + "learning_rate": 7.373821136269924e-06, + "loss": 30.3226, + "step": 204140 + }, + { + "epoch": 0.41239591624009664, + "grad_norm": 388.4397888183594, + "learning_rate": 7.373513913085434e-06, + "loss": 12.6602, + "step": 204150 + }, + { + "epoch": 0.41241611687278046, + "grad_norm": 503.6349792480469, + "learning_rate": 7.3732066783327246e-06, + "loss": 30.4778, + "step": 204160 + }, + { + "epoch": 0.4124363175054643, + "grad_norm": 281.2956237792969, + "learning_rate": 7.372899432013294e-06, + "loss": 16.2556, + "step": 204170 + }, + { + "epoch": 0.4124565181381481, + "grad_norm": 784.6923828125, + "learning_rate": 7.372592174128639e-06, + "loss": 23.285, + "step": 204180 + }, + { + "epoch": 0.4124767187708319, + "grad_norm": 442.5145263671875, + "learning_rate": 7.372284904680258e-06, + "loss": 24.7501, + "step": 204190 + }, + { + "epoch": 0.41249691940351574, + "grad_norm": 516.041015625, + "learning_rate": 7.371977623669646e-06, + "loss": 38.6752, + "step": 204200 + }, + { + "epoch": 0.41251712003619956, + "grad_norm": 43.92538833618164, + "learning_rate": 7.371670331098306e-06, + "loss": 21.5389, + "step": 204210 + }, + { + "epoch": 0.4125373206688833, + "grad_norm": 530.768310546875, + "learning_rate": 7.371363026967731e-06, + "loss": 25.1033, + "step": 204220 + }, + { + "epoch": 0.41255752130156714, + "grad_norm": 267.6069641113281, + "learning_rate": 7.3710557112794205e-06, + "loss": 9.8462, + "step": 204230 + }, + { + "epoch": 0.41257772193425096, + "grad_norm": 301.1824035644531, + "learning_rate": 7.370748384034871e-06, + "loss": 18.6672, + "step": 204240 + }, + { + "epoch": 0.4125979225669348, + "grad_norm": 239.70164489746094, + "learning_rate": 7.370441045235581e-06, + "loss": 19.2322, + "step": 204250 + }, + { + "epoch": 0.4126181231996186, + "grad_norm": 341.6642761230469, + "learning_rate": 7.370133694883052e-06, + "loss": 15.1475, + "step": 204260 + }, + { + "epoch": 0.4126383238323024, + "grad_norm": 203.58834838867188, + "learning_rate": 7.369826332978776e-06, + "loss": 26.8191, + "step": 204270 + }, + { + "epoch": 0.41265852446498624, + "grad_norm": 358.1466979980469, + "learning_rate": 7.369518959524254e-06, + "loss": 18.3549, + "step": 204280 + }, + { + "epoch": 0.41267872509767006, + "grad_norm": 822.9906616210938, + "learning_rate": 7.369211574520985e-06, + "loss": 16.4689, + "step": 204290 + }, + { + "epoch": 0.4126989257303539, + "grad_norm": 6.4609479904174805, + "learning_rate": 7.368904177970466e-06, + "loss": 12.5418, + "step": 204300 + }, + { + "epoch": 0.4127191263630377, + "grad_norm": 289.052978515625, + "learning_rate": 7.368596769874194e-06, + "loss": 30.5566, + "step": 204310 + }, + { + "epoch": 0.4127393269957215, + "grad_norm": 598.248291015625, + "learning_rate": 7.368289350233672e-06, + "loss": 15.5053, + "step": 204320 + }, + { + "epoch": 0.41275952762840534, + "grad_norm": 766.3134155273438, + "learning_rate": 7.367981919050392e-06, + "loss": 40.4611, + "step": 204330 + }, + { + "epoch": 0.41277972826108916, + "grad_norm": 271.3908386230469, + "learning_rate": 7.367674476325856e-06, + "loss": 23.5434, + "step": 204340 + }, + { + "epoch": 0.4127999288937729, + "grad_norm": 845.899169921875, + "learning_rate": 7.3673670220615615e-06, + "loss": 142.2028, + "step": 204350 + }, + { + "epoch": 0.41282012952645675, + "grad_norm": 355.33087158203125, + "learning_rate": 7.367059556259008e-06, + "loss": 25.4397, + "step": 204360 + }, + { + "epoch": 0.41284033015914057, + "grad_norm": 779.6798095703125, + "learning_rate": 7.366752078919693e-06, + "loss": 17.3048, + "step": 204370 + }, + { + "epoch": 0.4128605307918244, + "grad_norm": 256.36920166015625, + "learning_rate": 7.366444590045114e-06, + "loss": 11.5476, + "step": 204380 + }, + { + "epoch": 0.4128807314245082, + "grad_norm": 46.427059173583984, + "learning_rate": 7.366137089636774e-06, + "loss": 31.0527, + "step": 204390 + }, + { + "epoch": 0.412900932057192, + "grad_norm": 299.9348449707031, + "learning_rate": 7.365829577696166e-06, + "loss": 13.5647, + "step": 204400 + }, + { + "epoch": 0.41292113268987585, + "grad_norm": 702.4867553710938, + "learning_rate": 7.365522054224793e-06, + "loss": 24.8403, + "step": 204410 + }, + { + "epoch": 0.41294133332255967, + "grad_norm": 198.4513702392578, + "learning_rate": 7.365214519224153e-06, + "loss": 27.4059, + "step": 204420 + }, + { + "epoch": 0.4129615339552435, + "grad_norm": 616.6324462890625, + "learning_rate": 7.3649069726957426e-06, + "loss": 13.2356, + "step": 204430 + }, + { + "epoch": 0.4129817345879273, + "grad_norm": 379.34423828125, + "learning_rate": 7.364599414641064e-06, + "loss": 20.9384, + "step": 204440 + }, + { + "epoch": 0.4130019352206111, + "grad_norm": 588.1680908203125, + "learning_rate": 7.364291845061613e-06, + "loss": 14.3176, + "step": 204450 + }, + { + "epoch": 0.41302213585329495, + "grad_norm": 152.29234313964844, + "learning_rate": 7.3639842639588924e-06, + "loss": 20.8452, + "step": 204460 + }, + { + "epoch": 0.41304233648597877, + "grad_norm": 768.8727416992188, + "learning_rate": 7.363676671334397e-06, + "loss": 25.8743, + "step": 204470 + }, + { + "epoch": 0.41306253711866253, + "grad_norm": 883.4291381835938, + "learning_rate": 7.36336906718963e-06, + "loss": 64.602, + "step": 204480 + }, + { + "epoch": 0.41308273775134635, + "grad_norm": 394.97369384765625, + "learning_rate": 7.363061451526088e-06, + "loss": 32.4078, + "step": 204490 + }, + { + "epoch": 0.41310293838403017, + "grad_norm": 362.8598327636719, + "learning_rate": 7.362753824345271e-06, + "loss": 16.4351, + "step": 204500 + }, + { + "epoch": 0.413123139016714, + "grad_norm": 93.06488800048828, + "learning_rate": 7.362446185648678e-06, + "loss": 16.3617, + "step": 204510 + }, + { + "epoch": 0.4131433396493978, + "grad_norm": 252.51776123046875, + "learning_rate": 7.362138535437809e-06, + "loss": 13.6326, + "step": 204520 + }, + { + "epoch": 0.41316354028208163, + "grad_norm": 688.4585571289062, + "learning_rate": 7.361830873714165e-06, + "loss": 16.4144, + "step": 204530 + }, + { + "epoch": 0.41318374091476545, + "grad_norm": 846.5159912109375, + "learning_rate": 7.361523200479241e-06, + "loss": 25.2794, + "step": 204540 + }, + { + "epoch": 0.41320394154744927, + "grad_norm": 293.8544006347656, + "learning_rate": 7.361215515734542e-06, + "loss": 19.9846, + "step": 204550 + }, + { + "epoch": 0.4132241421801331, + "grad_norm": 524.58203125, + "learning_rate": 7.360907819481564e-06, + "loss": 24.4492, + "step": 204560 + }, + { + "epoch": 0.4132443428128169, + "grad_norm": 319.6919860839844, + "learning_rate": 7.360600111721807e-06, + "loss": 19.3178, + "step": 204570 + }, + { + "epoch": 0.41326454344550073, + "grad_norm": 254.0984344482422, + "learning_rate": 7.360292392456772e-06, + "loss": 19.386, + "step": 204580 + }, + { + "epoch": 0.41328474407818455, + "grad_norm": 647.4578857421875, + "learning_rate": 7.359984661687957e-06, + "loss": 19.9465, + "step": 204590 + }, + { + "epoch": 0.41330494471086837, + "grad_norm": 395.6432189941406, + "learning_rate": 7.3596769194168646e-06, + "loss": 13.5074, + "step": 204600 + }, + { + "epoch": 0.41332514534355214, + "grad_norm": 427.6363220214844, + "learning_rate": 7.359369165644992e-06, + "loss": 39.348, + "step": 204610 + }, + { + "epoch": 0.41334534597623596, + "grad_norm": 679.1809692382812, + "learning_rate": 7.359061400373841e-06, + "loss": 11.2077, + "step": 204620 + }, + { + "epoch": 0.4133655466089198, + "grad_norm": 305.5386047363281, + "learning_rate": 7.358753623604911e-06, + "loss": 14.5615, + "step": 204630 + }, + { + "epoch": 0.4133857472416036, + "grad_norm": 625.0375366210938, + "learning_rate": 7.358445835339702e-06, + "loss": 26.0267, + "step": 204640 + }, + { + "epoch": 0.4134059478742874, + "grad_norm": 71.08431243896484, + "learning_rate": 7.358138035579712e-06, + "loss": 18.9844, + "step": 204650 + }, + { + "epoch": 0.41342614850697124, + "grad_norm": 156.96670532226562, + "learning_rate": 7.357830224326445e-06, + "loss": 20.113, + "step": 204660 + }, + { + "epoch": 0.41344634913965506, + "grad_norm": 176.9730682373047, + "learning_rate": 7.357522401581398e-06, + "loss": 24.6944, + "step": 204670 + }, + { + "epoch": 0.4134665497723389, + "grad_norm": 215.47763061523438, + "learning_rate": 7.357214567346075e-06, + "loss": 17.0557, + "step": 204680 + }, + { + "epoch": 0.4134867504050227, + "grad_norm": 331.2965393066406, + "learning_rate": 7.356906721621974e-06, + "loss": 20.3168, + "step": 204690 + }, + { + "epoch": 0.4135069510377065, + "grad_norm": 643.3457641601562, + "learning_rate": 7.3565988644105926e-06, + "loss": 27.3129, + "step": 204700 + }, + { + "epoch": 0.41352715167039034, + "grad_norm": 25.501609802246094, + "learning_rate": 7.356290995713436e-06, + "loss": 27.9145, + "step": 204710 + }, + { + "epoch": 0.41354735230307416, + "grad_norm": 334.7339172363281, + "learning_rate": 7.355983115532004e-06, + "loss": 13.3175, + "step": 204720 + }, + { + "epoch": 0.4135675529357579, + "grad_norm": 157.26942443847656, + "learning_rate": 7.355675223867794e-06, + "loss": 20.8501, + "step": 204730 + }, + { + "epoch": 0.41358775356844174, + "grad_norm": 515.1536254882812, + "learning_rate": 7.35536732072231e-06, + "loss": 13.5202, + "step": 204740 + }, + { + "epoch": 0.41360795420112556, + "grad_norm": 380.37646484375, + "learning_rate": 7.35505940609705e-06, + "loss": 31.1653, + "step": 204750 + }, + { + "epoch": 0.4136281548338094, + "grad_norm": 253.8275146484375, + "learning_rate": 7.354751479993518e-06, + "loss": 22.4868, + "step": 204760 + }, + { + "epoch": 0.4136483554664932, + "grad_norm": 19.62946128845215, + "learning_rate": 7.354443542413212e-06, + "loss": 18.2909, + "step": 204770 + }, + { + "epoch": 0.413668556099177, + "grad_norm": 424.73797607421875, + "learning_rate": 7.3541355933576345e-06, + "loss": 20.9551, + "step": 204780 + }, + { + "epoch": 0.41368875673186084, + "grad_norm": 446.65283203125, + "learning_rate": 7.353827632828283e-06, + "loss": 15.9439, + "step": 204790 + }, + { + "epoch": 0.41370895736454466, + "grad_norm": 501.2140197753906, + "learning_rate": 7.353519660826665e-06, + "loss": 16.7541, + "step": 204800 + }, + { + "epoch": 0.4137291579972285, + "grad_norm": 446.319580078125, + "learning_rate": 7.353211677354274e-06, + "loss": 14.0886, + "step": 204810 + }, + { + "epoch": 0.4137493586299123, + "grad_norm": 302.3290710449219, + "learning_rate": 7.352903682412617e-06, + "loss": 14.2707, + "step": 204820 + }, + { + "epoch": 0.4137695592625961, + "grad_norm": 765.657958984375, + "learning_rate": 7.352595676003191e-06, + "loss": 18.2536, + "step": 204830 + }, + { + "epoch": 0.41378975989527994, + "grad_norm": 307.65966796875, + "learning_rate": 7.3522876581275e-06, + "loss": 22.2144, + "step": 204840 + }, + { + "epoch": 0.41380996052796376, + "grad_norm": 0.0, + "learning_rate": 7.351979628787045e-06, + "loss": 23.47, + "step": 204850 + }, + { + "epoch": 0.4138301611606475, + "grad_norm": 699.7431030273438, + "learning_rate": 7.351671587983325e-06, + "loss": 12.5187, + "step": 204860 + }, + { + "epoch": 0.41385036179333134, + "grad_norm": 138.17953491210938, + "learning_rate": 7.351363535717845e-06, + "loss": 10.1403, + "step": 204870 + }, + { + "epoch": 0.41387056242601516, + "grad_norm": 425.6673583984375, + "learning_rate": 7.3510554719921015e-06, + "loss": 30.1279, + "step": 204880 + }, + { + "epoch": 0.413890763058699, + "grad_norm": 376.48590087890625, + "learning_rate": 7.350747396807601e-06, + "loss": 14.3699, + "step": 204890 + }, + { + "epoch": 0.4139109636913828, + "grad_norm": 459.9209899902344, + "learning_rate": 7.350439310165842e-06, + "loss": 13.465, + "step": 204900 + }, + { + "epoch": 0.4139311643240666, + "grad_norm": 226.81353759765625, + "learning_rate": 7.350131212068328e-06, + "loss": 21.4474, + "step": 204910 + }, + { + "epoch": 0.41395136495675044, + "grad_norm": 971.2474975585938, + "learning_rate": 7.349823102516558e-06, + "loss": 21.9557, + "step": 204920 + }, + { + "epoch": 0.41397156558943426, + "grad_norm": 251.871826171875, + "learning_rate": 7.349514981512036e-06, + "loss": 17.9313, + "step": 204930 + }, + { + "epoch": 0.4139917662221181, + "grad_norm": 283.456787109375, + "learning_rate": 7.349206849056263e-06, + "loss": 28.0992, + "step": 204940 + }, + { + "epoch": 0.4140119668548019, + "grad_norm": 262.2191467285156, + "learning_rate": 7.34889870515074e-06, + "loss": 16.7783, + "step": 204950 + }, + { + "epoch": 0.4140321674874857, + "grad_norm": 489.7118835449219, + "learning_rate": 7.34859054979697e-06, + "loss": 48.0792, + "step": 204960 + }, + { + "epoch": 0.41405236812016954, + "grad_norm": 160.91929626464844, + "learning_rate": 7.348282382996454e-06, + "loss": 29.146, + "step": 204970 + }, + { + "epoch": 0.41407256875285336, + "grad_norm": 370.2843933105469, + "learning_rate": 7.3479742047506955e-06, + "loss": 29.2165, + "step": 204980 + }, + { + "epoch": 0.41409276938553713, + "grad_norm": 593.0636596679688, + "learning_rate": 7.347666015061195e-06, + "loss": 27.1644, + "step": 204990 + }, + { + "epoch": 0.41411297001822095, + "grad_norm": 196.46533203125, + "learning_rate": 7.347357813929455e-06, + "loss": 21.9118, + "step": 205000 + }, + { + "epoch": 0.41413317065090477, + "grad_norm": 634.07275390625, + "learning_rate": 7.347049601356977e-06, + "loss": 28.3629, + "step": 205010 + }, + { + "epoch": 0.4141533712835886, + "grad_norm": 368.7333068847656, + "learning_rate": 7.346741377345264e-06, + "loss": 25.4299, + "step": 205020 + }, + { + "epoch": 0.4141735719162724, + "grad_norm": 1.2591146230697632, + "learning_rate": 7.34643314189582e-06, + "loss": 12.8976, + "step": 205030 + }, + { + "epoch": 0.41419377254895623, + "grad_norm": 340.10321044921875, + "learning_rate": 7.346124895010144e-06, + "loss": 12.9368, + "step": 205040 + }, + { + "epoch": 0.41421397318164005, + "grad_norm": 436.80450439453125, + "learning_rate": 7.345816636689741e-06, + "loss": 23.3278, + "step": 205050 + }, + { + "epoch": 0.41423417381432387, + "grad_norm": 202.7328338623047, + "learning_rate": 7.345508366936111e-06, + "loss": 12.795, + "step": 205060 + }, + { + "epoch": 0.4142543744470077, + "grad_norm": 275.9850158691406, + "learning_rate": 7.345200085750758e-06, + "loss": 9.8136, + "step": 205070 + }, + { + "epoch": 0.4142745750796915, + "grad_norm": 193.0493927001953, + "learning_rate": 7.344891793135184e-06, + "loss": 49.3537, + "step": 205080 + }, + { + "epoch": 0.41429477571237533, + "grad_norm": 431.5648498535156, + "learning_rate": 7.344583489090893e-06, + "loss": 18.3269, + "step": 205090 + }, + { + "epoch": 0.41431497634505915, + "grad_norm": 426.5965576171875, + "learning_rate": 7.344275173619385e-06, + "loss": 12.8332, + "step": 205100 + }, + { + "epoch": 0.41433517697774297, + "grad_norm": 283.1705322265625, + "learning_rate": 7.343966846722164e-06, + "loss": 32.939, + "step": 205110 + }, + { + "epoch": 0.41435537761042673, + "grad_norm": 325.40460205078125, + "learning_rate": 7.343658508400734e-06, + "loss": 15.246, + "step": 205120 + }, + { + "epoch": 0.41437557824311055, + "grad_norm": 63.674461364746094, + "learning_rate": 7.343350158656596e-06, + "loss": 27.4282, + "step": 205130 + }, + { + "epoch": 0.4143957788757944, + "grad_norm": 145.14161682128906, + "learning_rate": 7.343041797491253e-06, + "loss": 13.7614, + "step": 205140 + }, + { + "epoch": 0.4144159795084782, + "grad_norm": 712.3728637695312, + "learning_rate": 7.3427334249062085e-06, + "loss": 36.2831, + "step": 205150 + }, + { + "epoch": 0.414436180141162, + "grad_norm": 161.9210968017578, + "learning_rate": 7.342425040902967e-06, + "loss": 18.0161, + "step": 205160 + }, + { + "epoch": 0.41445638077384583, + "grad_norm": 1166.4088134765625, + "learning_rate": 7.3421166454830295e-06, + "loss": 28.6236, + "step": 205170 + }, + { + "epoch": 0.41447658140652965, + "grad_norm": 690.5027465820312, + "learning_rate": 7.341808238647898e-06, + "loss": 25.2683, + "step": 205180 + }, + { + "epoch": 0.4144967820392135, + "grad_norm": 449.7722473144531, + "learning_rate": 7.3414998203990784e-06, + "loss": 15.3232, + "step": 205190 + }, + { + "epoch": 0.4145169826718973, + "grad_norm": 261.26495361328125, + "learning_rate": 7.341191390738073e-06, + "loss": 18.6568, + "step": 205200 + }, + { + "epoch": 0.4145371833045811, + "grad_norm": 285.2146301269531, + "learning_rate": 7.340882949666385e-06, + "loss": 18.5354, + "step": 205210 + }, + { + "epoch": 0.41455738393726493, + "grad_norm": 307.7693786621094, + "learning_rate": 7.340574497185516e-06, + "loss": 18.4317, + "step": 205220 + }, + { + "epoch": 0.41457758456994875, + "grad_norm": 848.750244140625, + "learning_rate": 7.340266033296972e-06, + "loss": 26.6416, + "step": 205230 + }, + { + "epoch": 0.4145977852026326, + "grad_norm": 227.63143920898438, + "learning_rate": 7.339957558002254e-06, + "loss": 24.8482, + "step": 205240 + }, + { + "epoch": 0.41461798583531634, + "grad_norm": 1408.728271484375, + "learning_rate": 7.3396490713028674e-06, + "loss": 41.6129, + "step": 205250 + }, + { + "epoch": 0.41463818646800016, + "grad_norm": 381.8595886230469, + "learning_rate": 7.339340573200314e-06, + "loss": 17.6669, + "step": 205260 + }, + { + "epoch": 0.414658387100684, + "grad_norm": 645.9013061523438, + "learning_rate": 7.339032063696101e-06, + "loss": 17.5308, + "step": 205270 + }, + { + "epoch": 0.4146785877333678, + "grad_norm": 701.4525756835938, + "learning_rate": 7.3387235427917266e-06, + "loss": 15.9599, + "step": 205280 + }, + { + "epoch": 0.4146987883660516, + "grad_norm": 317.63165283203125, + "learning_rate": 7.338415010488699e-06, + "loss": 16.3648, + "step": 205290 + }, + { + "epoch": 0.41471898899873544, + "grad_norm": 39.73927307128906, + "learning_rate": 7.33810646678852e-06, + "loss": 12.4666, + "step": 205300 + }, + { + "epoch": 0.41473918963141926, + "grad_norm": 380.56182861328125, + "learning_rate": 7.3377979116926925e-06, + "loss": 15.8573, + "step": 205310 + }, + { + "epoch": 0.4147593902641031, + "grad_norm": 866.0729370117188, + "learning_rate": 7.337489345202723e-06, + "loss": 23.2634, + "step": 205320 + }, + { + "epoch": 0.4147795908967869, + "grad_norm": 888.8053588867188, + "learning_rate": 7.337180767320113e-06, + "loss": 24.4546, + "step": 205330 + }, + { + "epoch": 0.4147997915294707, + "grad_norm": 651.4313354492188, + "learning_rate": 7.336872178046368e-06, + "loss": 38.669, + "step": 205340 + }, + { + "epoch": 0.41481999216215454, + "grad_norm": 531.4393310546875, + "learning_rate": 7.33656357738299e-06, + "loss": 30.526, + "step": 205350 + }, + { + "epoch": 0.41484019279483836, + "grad_norm": 167.88613891601562, + "learning_rate": 7.336254965331486e-06, + "loss": 11.7034, + "step": 205360 + }, + { + "epoch": 0.4148603934275221, + "grad_norm": 653.5435180664062, + "learning_rate": 7.335946341893359e-06, + "loss": 24.5979, + "step": 205370 + }, + { + "epoch": 0.41488059406020594, + "grad_norm": 614.8929443359375, + "learning_rate": 7.335637707070111e-06, + "loss": 44.128, + "step": 205380 + }, + { + "epoch": 0.41490079469288976, + "grad_norm": 488.77435302734375, + "learning_rate": 7.33532906086325e-06, + "loss": 10.9534, + "step": 205390 + }, + { + "epoch": 0.4149209953255736, + "grad_norm": 341.00799560546875, + "learning_rate": 7.335020403274277e-06, + "loss": 19.0022, + "step": 205400 + }, + { + "epoch": 0.4149411959582574, + "grad_norm": 342.5754699707031, + "learning_rate": 7.334711734304698e-06, + "loss": 31.824, + "step": 205410 + }, + { + "epoch": 0.4149613965909412, + "grad_norm": 273.30535888671875, + "learning_rate": 7.334403053956018e-06, + "loss": 20.4858, + "step": 205420 + }, + { + "epoch": 0.41498159722362504, + "grad_norm": 287.59893798828125, + "learning_rate": 7.334094362229741e-06, + "loss": 14.5842, + "step": 205430 + }, + { + "epoch": 0.41500179785630886, + "grad_norm": 404.0746154785156, + "learning_rate": 7.333785659127371e-06, + "loss": 15.8078, + "step": 205440 + }, + { + "epoch": 0.4150219984889927, + "grad_norm": 263.71044921875, + "learning_rate": 7.333476944650411e-06, + "loss": 20.3386, + "step": 205450 + }, + { + "epoch": 0.4150421991216765, + "grad_norm": 399.9277038574219, + "learning_rate": 7.333168218800369e-06, + "loss": 18.7517, + "step": 205460 + }, + { + "epoch": 0.4150623997543603, + "grad_norm": 364.37640380859375, + "learning_rate": 7.332859481578747e-06, + "loss": 22.8713, + "step": 205470 + }, + { + "epoch": 0.41508260038704414, + "grad_norm": 348.4693603515625, + "learning_rate": 7.332550732987051e-06, + "loss": 21.0402, + "step": 205480 + }, + { + "epoch": 0.41510280101972796, + "grad_norm": 477.7831115722656, + "learning_rate": 7.332241973026786e-06, + "loss": 20.8785, + "step": 205490 + }, + { + "epoch": 0.4151230016524117, + "grad_norm": 327.65472412109375, + "learning_rate": 7.3319332016994575e-06, + "loss": 12.5089, + "step": 205500 + }, + { + "epoch": 0.41514320228509555, + "grad_norm": 609.080078125, + "learning_rate": 7.331624419006568e-06, + "loss": 16.9733, + "step": 205510 + }, + { + "epoch": 0.41516340291777937, + "grad_norm": 362.18572998046875, + "learning_rate": 7.331315624949624e-06, + "loss": 53.9419, + "step": 205520 + }, + { + "epoch": 0.4151836035504632, + "grad_norm": 1026.9534912109375, + "learning_rate": 7.33100681953013e-06, + "loss": 15.2135, + "step": 205530 + }, + { + "epoch": 0.415203804183147, + "grad_norm": 483.15625, + "learning_rate": 7.330698002749593e-06, + "loss": 13.6422, + "step": 205540 + }, + { + "epoch": 0.4152240048158308, + "grad_norm": 187.79249572753906, + "learning_rate": 7.330389174609516e-06, + "loss": 13.0211, + "step": 205550 + }, + { + "epoch": 0.41524420544851465, + "grad_norm": 716.1710815429688, + "learning_rate": 7.330080335111405e-06, + "loss": 14.7922, + "step": 205560 + }, + { + "epoch": 0.41526440608119847, + "grad_norm": 1296.7987060546875, + "learning_rate": 7.329771484256764e-06, + "loss": 16.539, + "step": 205570 + }, + { + "epoch": 0.4152846067138823, + "grad_norm": 603.66357421875, + "learning_rate": 7.3294626220470984e-06, + "loss": 14.3115, + "step": 205580 + }, + { + "epoch": 0.4153048073465661, + "grad_norm": 154.29220581054688, + "learning_rate": 7.329153748483918e-06, + "loss": 17.6815, + "step": 205590 + }, + { + "epoch": 0.4153250079792499, + "grad_norm": 451.9192199707031, + "learning_rate": 7.3288448635687215e-06, + "loss": 21.3467, + "step": 205600 + }, + { + "epoch": 0.41534520861193375, + "grad_norm": 376.01995849609375, + "learning_rate": 7.32853596730302e-06, + "loss": 22.9711, + "step": 205610 + }, + { + "epoch": 0.41536540924461757, + "grad_norm": 186.4207763671875, + "learning_rate": 7.3282270596883155e-06, + "loss": 21.0603, + "step": 205620 + }, + { + "epoch": 0.41538560987730133, + "grad_norm": 544.6901245117188, + "learning_rate": 7.327918140726115e-06, + "loss": 14.387, + "step": 205630 + }, + { + "epoch": 0.41540581050998515, + "grad_norm": 295.2100524902344, + "learning_rate": 7.327609210417923e-06, + "loss": 22.3907, + "step": 205640 + }, + { + "epoch": 0.41542601114266897, + "grad_norm": 300.23260498046875, + "learning_rate": 7.327300268765246e-06, + "loss": 17.4491, + "step": 205650 + }, + { + "epoch": 0.4154462117753528, + "grad_norm": 274.13958740234375, + "learning_rate": 7.3269913157695915e-06, + "loss": 37.3402, + "step": 205660 + }, + { + "epoch": 0.4154664124080366, + "grad_norm": 464.6728515625, + "learning_rate": 7.326682351432462e-06, + "loss": 31.4462, + "step": 205670 + }, + { + "epoch": 0.41548661304072043, + "grad_norm": 350.0167541503906, + "learning_rate": 7.326373375755365e-06, + "loss": 15.5198, + "step": 205680 + }, + { + "epoch": 0.41550681367340425, + "grad_norm": 514.5641479492188, + "learning_rate": 7.326064388739806e-06, + "loss": 21.8145, + "step": 205690 + }, + { + "epoch": 0.41552701430608807, + "grad_norm": 393.8197937011719, + "learning_rate": 7.325755390387293e-06, + "loss": 21.2698, + "step": 205700 + }, + { + "epoch": 0.4155472149387719, + "grad_norm": 215.42408752441406, + "learning_rate": 7.325446380699329e-06, + "loss": 31.1187, + "step": 205710 + }, + { + "epoch": 0.4155674155714557, + "grad_norm": 161.3758544921875, + "learning_rate": 7.3251373596774214e-06, + "loss": 16.2595, + "step": 205720 + }, + { + "epoch": 0.41558761620413953, + "grad_norm": 538.1591186523438, + "learning_rate": 7.324828327323077e-06, + "loss": 16.8449, + "step": 205730 + }, + { + "epoch": 0.41560781683682335, + "grad_norm": 828.3040161132812, + "learning_rate": 7.3245192836378e-06, + "loss": 44.7471, + "step": 205740 + }, + { + "epoch": 0.41562801746950717, + "grad_norm": 438.98577880859375, + "learning_rate": 7.3242102286231e-06, + "loss": 11.2179, + "step": 205750 + }, + { + "epoch": 0.41564821810219094, + "grad_norm": 98.83910369873047, + "learning_rate": 7.323901162280478e-06, + "loss": 12.6262, + "step": 205760 + }, + { + "epoch": 0.41566841873487476, + "grad_norm": 450.12481689453125, + "learning_rate": 7.323592084611446e-06, + "loss": 19.8717, + "step": 205770 + }, + { + "epoch": 0.4156886193675586, + "grad_norm": 304.15716552734375, + "learning_rate": 7.3232829956175074e-06, + "loss": 13.5696, + "step": 205780 + }, + { + "epoch": 0.4157088200002424, + "grad_norm": 448.3150939941406, + "learning_rate": 7.32297389530017e-06, + "loss": 25.9163, + "step": 205790 + }, + { + "epoch": 0.4157290206329262, + "grad_norm": 359.23626708984375, + "learning_rate": 7.32266478366094e-06, + "loss": 12.7628, + "step": 205800 + }, + { + "epoch": 0.41574922126561004, + "grad_norm": 876.4931030273438, + "learning_rate": 7.322355660701321e-06, + "loss": 22.1785, + "step": 205810 + }, + { + "epoch": 0.41576942189829386, + "grad_norm": 443.26605224609375, + "learning_rate": 7.322046526422824e-06, + "loss": 17.2462, + "step": 205820 + }, + { + "epoch": 0.4157896225309777, + "grad_norm": 388.3150329589844, + "learning_rate": 7.321737380826954e-06, + "loss": 22.4893, + "step": 205830 + }, + { + "epoch": 0.4158098231636615, + "grad_norm": 145.12567138671875, + "learning_rate": 7.321428223915217e-06, + "loss": 32.9967, + "step": 205840 + }, + { + "epoch": 0.4158300237963453, + "grad_norm": 71.86688995361328, + "learning_rate": 7.321119055689121e-06, + "loss": 17.2356, + "step": 205850 + }, + { + "epoch": 0.41585022442902914, + "grad_norm": 348.533203125, + "learning_rate": 7.3208098761501714e-06, + "loss": 20.6987, + "step": 205860 + }, + { + "epoch": 0.41587042506171296, + "grad_norm": 816.877685546875, + "learning_rate": 7.320500685299876e-06, + "loss": 21.9663, + "step": 205870 + }, + { + "epoch": 0.4158906256943968, + "grad_norm": 206.76580810546875, + "learning_rate": 7.320191483139742e-06, + "loss": 22.9791, + "step": 205880 + }, + { + "epoch": 0.41591082632708054, + "grad_norm": 96.70581817626953, + "learning_rate": 7.319882269671277e-06, + "loss": 20.0063, + "step": 205890 + }, + { + "epoch": 0.41593102695976436, + "grad_norm": 191.33203125, + "learning_rate": 7.319573044895986e-06, + "loss": 18.062, + "step": 205900 + }, + { + "epoch": 0.4159512275924482, + "grad_norm": 419.3578186035156, + "learning_rate": 7.319263808815378e-06, + "loss": 19.1683, + "step": 205910 + }, + { + "epoch": 0.415971428225132, + "grad_norm": 457.2339782714844, + "learning_rate": 7.318954561430959e-06, + "loss": 13.6817, + "step": 205920 + }, + { + "epoch": 0.4159916288578158, + "grad_norm": 1044.152587890625, + "learning_rate": 7.318645302744237e-06, + "loss": 24.3602, + "step": 205930 + }, + { + "epoch": 0.41601182949049964, + "grad_norm": 928.43212890625, + "learning_rate": 7.318336032756717e-06, + "loss": 26.4416, + "step": 205940 + }, + { + "epoch": 0.41603203012318346, + "grad_norm": 520.3009033203125, + "learning_rate": 7.318026751469912e-06, + "loss": 10.3366, + "step": 205950 + }, + { + "epoch": 0.4160522307558673, + "grad_norm": 283.0570068359375, + "learning_rate": 7.317717458885324e-06, + "loss": 21.4138, + "step": 205960 + }, + { + "epoch": 0.4160724313885511, + "grad_norm": 201.0565643310547, + "learning_rate": 7.317408155004462e-06, + "loss": 22.4877, + "step": 205970 + }, + { + "epoch": 0.4160926320212349, + "grad_norm": 93.96663665771484, + "learning_rate": 7.317098839828835e-06, + "loss": 31.8869, + "step": 205980 + }, + { + "epoch": 0.41611283265391874, + "grad_norm": 414.8111572265625, + "learning_rate": 7.316789513359948e-06, + "loss": 23.2044, + "step": 205990 + }, + { + "epoch": 0.41613303328660256, + "grad_norm": 429.84356689453125, + "learning_rate": 7.31648017559931e-06, + "loss": 17.882, + "step": 206000 + }, + { + "epoch": 0.4161532339192863, + "grad_norm": 496.6199645996094, + "learning_rate": 7.316170826548428e-06, + "loss": 23.4283, + "step": 206010 + }, + { + "epoch": 0.41617343455197015, + "grad_norm": 97.62599182128906, + "learning_rate": 7.315861466208811e-06, + "loss": 14.062, + "step": 206020 + }, + { + "epoch": 0.41619363518465397, + "grad_norm": 98.52935028076172, + "learning_rate": 7.315552094581966e-06, + "loss": 20.5307, + "step": 206030 + }, + { + "epoch": 0.4162138358173378, + "grad_norm": 410.72332763671875, + "learning_rate": 7.315242711669401e-06, + "loss": 23.0629, + "step": 206040 + }, + { + "epoch": 0.4162340364500216, + "grad_norm": 222.60693359375, + "learning_rate": 7.3149333174726246e-06, + "loss": 10.9954, + "step": 206050 + }, + { + "epoch": 0.4162542370827054, + "grad_norm": 446.1466979980469, + "learning_rate": 7.314623911993143e-06, + "loss": 27.7812, + "step": 206060 + }, + { + "epoch": 0.41627443771538924, + "grad_norm": 0.9989863634109497, + "learning_rate": 7.314314495232467e-06, + "loss": 22.6591, + "step": 206070 + }, + { + "epoch": 0.41629463834807306, + "grad_norm": 431.9134216308594, + "learning_rate": 7.314005067192099e-06, + "loss": 27.4647, + "step": 206080 + }, + { + "epoch": 0.4163148389807569, + "grad_norm": 1403.6568603515625, + "learning_rate": 7.313695627873554e-06, + "loss": 24.585, + "step": 206090 + }, + { + "epoch": 0.4163350396134407, + "grad_norm": 194.90769958496094, + "learning_rate": 7.313386177278335e-06, + "loss": 23.6103, + "step": 206100 + }, + { + "epoch": 0.4163552402461245, + "grad_norm": 306.5736389160156, + "learning_rate": 7.3130767154079555e-06, + "loss": 32.0867, + "step": 206110 + }, + { + "epoch": 0.41637544087880834, + "grad_norm": 298.9986877441406, + "learning_rate": 7.312767242263919e-06, + "loss": 12.8305, + "step": 206120 + }, + { + "epoch": 0.41639564151149216, + "grad_norm": 477.2821960449219, + "learning_rate": 7.312457757847734e-06, + "loss": 16.3254, + "step": 206130 + }, + { + "epoch": 0.41641584214417593, + "grad_norm": 595.7302856445312, + "learning_rate": 7.312148262160913e-06, + "loss": 22.1576, + "step": 206140 + }, + { + "epoch": 0.41643604277685975, + "grad_norm": 911.7860107421875, + "learning_rate": 7.31183875520496e-06, + "loss": 8.1722, + "step": 206150 + }, + { + "epoch": 0.41645624340954357, + "grad_norm": 777.1712036132812, + "learning_rate": 7.311529236981385e-06, + "loss": 64.3875, + "step": 206160 + }, + { + "epoch": 0.4164764440422274, + "grad_norm": 711.8444213867188, + "learning_rate": 7.3112197074916975e-06, + "loss": 22.7231, + "step": 206170 + }, + { + "epoch": 0.4164966446749112, + "grad_norm": 364.54669189453125, + "learning_rate": 7.310910166737406e-06, + "loss": 20.7396, + "step": 206180 + }, + { + "epoch": 0.41651684530759503, + "grad_norm": 412.8030090332031, + "learning_rate": 7.3106006147200185e-06, + "loss": 20.6863, + "step": 206190 + }, + { + "epoch": 0.41653704594027885, + "grad_norm": 1091.5716552734375, + "learning_rate": 7.310291051441044e-06, + "loss": 26.541, + "step": 206200 + }, + { + "epoch": 0.41655724657296267, + "grad_norm": 249.6706085205078, + "learning_rate": 7.30998147690199e-06, + "loss": 16.1993, + "step": 206210 + }, + { + "epoch": 0.4165774472056465, + "grad_norm": 93.3351821899414, + "learning_rate": 7.3096718911043675e-06, + "loss": 14.1391, + "step": 206220 + }, + { + "epoch": 0.4165976478383303, + "grad_norm": 278.6354675292969, + "learning_rate": 7.309362294049683e-06, + "loss": 25.293, + "step": 206230 + }, + { + "epoch": 0.41661784847101413, + "grad_norm": 215.47840881347656, + "learning_rate": 7.309052685739448e-06, + "loss": 16.4743, + "step": 206240 + }, + { + "epoch": 0.41663804910369795, + "grad_norm": 186.58836364746094, + "learning_rate": 7.308743066175172e-06, + "loss": 25.0049, + "step": 206250 + }, + { + "epoch": 0.41665824973638177, + "grad_norm": 348.30169677734375, + "learning_rate": 7.308433435358357e-06, + "loss": 17.7779, + "step": 206260 + }, + { + "epoch": 0.41667845036906553, + "grad_norm": 296.0768737792969, + "learning_rate": 7.308123793290523e-06, + "loss": 20.9964, + "step": 206270 + }, + { + "epoch": 0.41669865100174935, + "grad_norm": 0.0, + "learning_rate": 7.307814139973171e-06, + "loss": 8.9119, + "step": 206280 + }, + { + "epoch": 0.4167188516344332, + "grad_norm": 364.7955322265625, + "learning_rate": 7.307504475407813e-06, + "loss": 30.5145, + "step": 206290 + }, + { + "epoch": 0.416739052267117, + "grad_norm": 131.2972869873047, + "learning_rate": 7.307194799595958e-06, + "loss": 22.1773, + "step": 206300 + }, + { + "epoch": 0.4167592528998008, + "grad_norm": 510.60003662109375, + "learning_rate": 7.306885112539116e-06, + "loss": 21.3477, + "step": 206310 + }, + { + "epoch": 0.41677945353248463, + "grad_norm": 113.77836608886719, + "learning_rate": 7.306575414238794e-06, + "loss": 16.8134, + "step": 206320 + }, + { + "epoch": 0.41679965416516845, + "grad_norm": 958.2135009765625, + "learning_rate": 7.306265704696505e-06, + "loss": 32.4911, + "step": 206330 + }, + { + "epoch": 0.4168198547978523, + "grad_norm": 89.57183074951172, + "learning_rate": 7.305955983913756e-06, + "loss": 20.0299, + "step": 206340 + }, + { + "epoch": 0.4168400554305361, + "grad_norm": 273.4055480957031, + "learning_rate": 7.305646251892058e-06, + "loss": 13.8955, + "step": 206350 + }, + { + "epoch": 0.4168602560632199, + "grad_norm": 126.69972229003906, + "learning_rate": 7.30533650863292e-06, + "loss": 17.1203, + "step": 206360 + }, + { + "epoch": 0.41688045669590373, + "grad_norm": 609.8422241210938, + "learning_rate": 7.305026754137849e-06, + "loss": 14.8693, + "step": 206370 + }, + { + "epoch": 0.41690065732858755, + "grad_norm": 927.921875, + "learning_rate": 7.304716988408359e-06, + "loss": 20.7629, + "step": 206380 + }, + { + "epoch": 0.4169208579612714, + "grad_norm": 590.805419921875, + "learning_rate": 7.3044072114459585e-06, + "loss": 28.4767, + "step": 206390 + }, + { + "epoch": 0.41694105859395514, + "grad_norm": 683.1563720703125, + "learning_rate": 7.3040974232521555e-06, + "loss": 20.3865, + "step": 206400 + }, + { + "epoch": 0.41696125922663896, + "grad_norm": 528.8218994140625, + "learning_rate": 7.3037876238284625e-06, + "loss": 12.0202, + "step": 206410 + }, + { + "epoch": 0.4169814598593228, + "grad_norm": 326.8551940917969, + "learning_rate": 7.303477813176385e-06, + "loss": 23.3848, + "step": 206420 + }, + { + "epoch": 0.4170016604920066, + "grad_norm": 75.90850067138672, + "learning_rate": 7.303167991297439e-06, + "loss": 21.5874, + "step": 206430 + }, + { + "epoch": 0.4170218611246904, + "grad_norm": 499.3680419921875, + "learning_rate": 7.302858158193131e-06, + "loss": 24.6299, + "step": 206440 + }, + { + "epoch": 0.41704206175737424, + "grad_norm": 365.0440979003906, + "learning_rate": 7.302548313864971e-06, + "loss": 13.7051, + "step": 206450 + }, + { + "epoch": 0.41706226239005806, + "grad_norm": 277.8636169433594, + "learning_rate": 7.30223845831447e-06, + "loss": 14.326, + "step": 206460 + }, + { + "epoch": 0.4170824630227419, + "grad_norm": 320.0940246582031, + "learning_rate": 7.301928591543137e-06, + "loss": 22.6345, + "step": 206470 + }, + { + "epoch": 0.4171026636554257, + "grad_norm": 675.6278686523438, + "learning_rate": 7.301618713552485e-06, + "loss": 18.9414, + "step": 206480 + }, + { + "epoch": 0.4171228642881095, + "grad_norm": 519.8043823242188, + "learning_rate": 7.301308824344022e-06, + "loss": 22.9404, + "step": 206490 + }, + { + "epoch": 0.41714306492079334, + "grad_norm": 419.30902099609375, + "learning_rate": 7.300998923919259e-06, + "loss": 25.8645, + "step": 206500 + }, + { + "epoch": 0.41716326555347716, + "grad_norm": 125.47054290771484, + "learning_rate": 7.300689012279706e-06, + "loss": 21.1088, + "step": 206510 + }, + { + "epoch": 0.4171834661861609, + "grad_norm": 140.15902709960938, + "learning_rate": 7.300379089426874e-06, + "loss": 22.0836, + "step": 206520 + }, + { + "epoch": 0.41720366681884474, + "grad_norm": 225.854248046875, + "learning_rate": 7.300069155362272e-06, + "loss": 25.7466, + "step": 206530 + }, + { + "epoch": 0.41722386745152856, + "grad_norm": 439.0897216796875, + "learning_rate": 7.299759210087415e-06, + "loss": 14.7174, + "step": 206540 + }, + { + "epoch": 0.4172440680842124, + "grad_norm": 538.1312866210938, + "learning_rate": 7.299449253603807e-06, + "loss": 30.5284, + "step": 206550 + }, + { + "epoch": 0.4172642687168962, + "grad_norm": 41.05196762084961, + "learning_rate": 7.299139285912965e-06, + "loss": 14.0157, + "step": 206560 + }, + { + "epoch": 0.41728446934958, + "grad_norm": 579.4649047851562, + "learning_rate": 7.298829307016395e-06, + "loss": 11.66, + "step": 206570 + }, + { + "epoch": 0.41730466998226384, + "grad_norm": 91.174560546875, + "learning_rate": 7.298519316915611e-06, + "loss": 34.2421, + "step": 206580 + }, + { + "epoch": 0.41732487061494766, + "grad_norm": 239.1872100830078, + "learning_rate": 7.298209315612123e-06, + "loss": 20.6073, + "step": 206590 + }, + { + "epoch": 0.4173450712476315, + "grad_norm": 366.6288146972656, + "learning_rate": 7.297899303107441e-06, + "loss": 24.0675, + "step": 206600 + }, + { + "epoch": 0.4173652718803153, + "grad_norm": 1000.869384765625, + "learning_rate": 7.297589279403076e-06, + "loss": 27.9361, + "step": 206610 + }, + { + "epoch": 0.4173854725129991, + "grad_norm": 326.3446044921875, + "learning_rate": 7.297279244500539e-06, + "loss": 23.5082, + "step": 206620 + }, + { + "epoch": 0.41740567314568294, + "grad_norm": 209.549560546875, + "learning_rate": 7.296969198401342e-06, + "loss": 14.9825, + "step": 206630 + }, + { + "epoch": 0.41742587377836676, + "grad_norm": 510.73297119140625, + "learning_rate": 7.296659141106996e-06, + "loss": 31.6884, + "step": 206640 + }, + { + "epoch": 0.4174460744110505, + "grad_norm": 230.9799041748047, + "learning_rate": 7.2963490726190134e-06, + "loss": 10.3942, + "step": 206650 + }, + { + "epoch": 0.41746627504373435, + "grad_norm": 722.6381225585938, + "learning_rate": 7.296038992938902e-06, + "loss": 29.7932, + "step": 206660 + }, + { + "epoch": 0.41748647567641817, + "grad_norm": 743.6947631835938, + "learning_rate": 7.2957289020681755e-06, + "loss": 43.5345, + "step": 206670 + }, + { + "epoch": 0.417506676309102, + "grad_norm": 614.7973022460938, + "learning_rate": 7.295418800008345e-06, + "loss": 31.6349, + "step": 206680 + }, + { + "epoch": 0.4175268769417858, + "grad_norm": 286.641357421875, + "learning_rate": 7.295108686760921e-06, + "loss": 21.4192, + "step": 206690 + }, + { + "epoch": 0.4175470775744696, + "grad_norm": 567.8367919921875, + "learning_rate": 7.294798562327417e-06, + "loss": 17.8248, + "step": 206700 + }, + { + "epoch": 0.41756727820715345, + "grad_norm": 883.4342651367188, + "learning_rate": 7.2944884267093405e-06, + "loss": 19.8862, + "step": 206710 + }, + { + "epoch": 0.41758747883983727, + "grad_norm": 72.49630737304688, + "learning_rate": 7.294178279908208e-06, + "loss": 45.195, + "step": 206720 + }, + { + "epoch": 0.4176076794725211, + "grad_norm": 236.8124237060547, + "learning_rate": 7.293868121925528e-06, + "loss": 15.5517, + "step": 206730 + }, + { + "epoch": 0.4176278801052049, + "grad_norm": 251.05450439453125, + "learning_rate": 7.293557952762813e-06, + "loss": 15.2094, + "step": 206740 + }, + { + "epoch": 0.4176480807378887, + "grad_norm": 461.9261169433594, + "learning_rate": 7.293247772421577e-06, + "loss": 17.8967, + "step": 206750 + }, + { + "epoch": 0.41766828137057255, + "grad_norm": 335.471923828125, + "learning_rate": 7.292937580903326e-06, + "loss": 14.177, + "step": 206760 + }, + { + "epoch": 0.41768848200325637, + "grad_norm": 17.844736099243164, + "learning_rate": 7.2926273782095766e-06, + "loss": 24.8497, + "step": 206770 + }, + { + "epoch": 0.41770868263594013, + "grad_norm": 225.99588012695312, + "learning_rate": 7.29231716434184e-06, + "loss": 18.3583, + "step": 206780 + }, + { + "epoch": 0.41772888326862395, + "grad_norm": 281.2262878417969, + "learning_rate": 7.292006939301627e-06, + "loss": 28.0654, + "step": 206790 + }, + { + "epoch": 0.41774908390130777, + "grad_norm": 174.10264587402344, + "learning_rate": 7.291696703090449e-06, + "loss": 12.1054, + "step": 206800 + }, + { + "epoch": 0.4177692845339916, + "grad_norm": 354.1954650878906, + "learning_rate": 7.291386455709823e-06, + "loss": 18.5032, + "step": 206810 + }, + { + "epoch": 0.4177894851666754, + "grad_norm": 581.7930908203125, + "learning_rate": 7.291076197161253e-06, + "loss": 21.4246, + "step": 206820 + }, + { + "epoch": 0.41780968579935923, + "grad_norm": 353.8553771972656, + "learning_rate": 7.290765927446258e-06, + "loss": 25.3988, + "step": 206830 + }, + { + "epoch": 0.41782988643204305, + "grad_norm": 274.6319274902344, + "learning_rate": 7.290455646566347e-06, + "loss": 31.9479, + "step": 206840 + }, + { + "epoch": 0.41785008706472687, + "grad_norm": 414.32049560546875, + "learning_rate": 7.2901453545230325e-06, + "loss": 8.159, + "step": 206850 + }, + { + "epoch": 0.4178702876974107, + "grad_norm": 627.198974609375, + "learning_rate": 7.289835051317828e-06, + "loss": 14.0879, + "step": 206860 + }, + { + "epoch": 0.4178904883300945, + "grad_norm": 970.3900146484375, + "learning_rate": 7.289524736952245e-06, + "loss": 18.2681, + "step": 206870 + }, + { + "epoch": 0.41791068896277833, + "grad_norm": 439.5081481933594, + "learning_rate": 7.289214411427796e-06, + "loss": 15.3993, + "step": 206880 + }, + { + "epoch": 0.41793088959546215, + "grad_norm": 1673.9371337890625, + "learning_rate": 7.288904074745994e-06, + "loss": 25.6307, + "step": 206890 + }, + { + "epoch": 0.41795109022814597, + "grad_norm": 235.52084350585938, + "learning_rate": 7.288593726908351e-06, + "loss": 15.1453, + "step": 206900 + }, + { + "epoch": 0.41797129086082974, + "grad_norm": 480.03887939453125, + "learning_rate": 7.28828336791638e-06, + "loss": 28.3371, + "step": 206910 + }, + { + "epoch": 0.41799149149351356, + "grad_norm": 323.4153747558594, + "learning_rate": 7.287972997771592e-06, + "loss": 20.6894, + "step": 206920 + }, + { + "epoch": 0.4180116921261974, + "grad_norm": 28.118074417114258, + "learning_rate": 7.287662616475504e-06, + "loss": 19.9836, + "step": 206930 + }, + { + "epoch": 0.4180318927588812, + "grad_norm": 639.7236938476562, + "learning_rate": 7.287352224029623e-06, + "loss": 37.9913, + "step": 206940 + }, + { + "epoch": 0.418052093391565, + "grad_norm": 411.906494140625, + "learning_rate": 7.287041820435465e-06, + "loss": 30.1956, + "step": 206950 + }, + { + "epoch": 0.41807229402424884, + "grad_norm": 184.6607208251953, + "learning_rate": 7.286731405694544e-06, + "loss": 4.7745, + "step": 206960 + }, + { + "epoch": 0.41809249465693266, + "grad_norm": 59.70170211791992, + "learning_rate": 7.28642097980837e-06, + "loss": 24.4321, + "step": 206970 + }, + { + "epoch": 0.4181126952896165, + "grad_norm": 468.37042236328125, + "learning_rate": 7.286110542778459e-06, + "loss": 25.9108, + "step": 206980 + }, + { + "epoch": 0.4181328959223003, + "grad_norm": 249.84591674804688, + "learning_rate": 7.285800094606321e-06, + "loss": 11.4053, + "step": 206990 + }, + { + "epoch": 0.4181530965549841, + "grad_norm": 310.65936279296875, + "learning_rate": 7.285489635293472e-06, + "loss": 24.1084, + "step": 207000 + }, + { + "epoch": 0.41817329718766794, + "grad_norm": 688.833984375, + "learning_rate": 7.2851791648414226e-06, + "loss": 25.7734, + "step": 207010 + }, + { + "epoch": 0.41819349782035176, + "grad_norm": 172.11061096191406, + "learning_rate": 7.284868683251688e-06, + "loss": 16.902, + "step": 207020 + }, + { + "epoch": 0.4182136984530356, + "grad_norm": 112.6848373413086, + "learning_rate": 7.284558190525779e-06, + "loss": 16.9506, + "step": 207030 + }, + { + "epoch": 0.41823389908571934, + "grad_norm": 275.7078857421875, + "learning_rate": 7.284247686665212e-06, + "loss": 16.2917, + "step": 207040 + }, + { + "epoch": 0.41825409971840316, + "grad_norm": 659.5950317382812, + "learning_rate": 7.283937171671498e-06, + "loss": 21.892, + "step": 207050 + }, + { + "epoch": 0.418274300351087, + "grad_norm": 126.5652084350586, + "learning_rate": 7.283626645546152e-06, + "loss": 19.0833, + "step": 207060 + }, + { + "epoch": 0.4182945009837708, + "grad_norm": 438.12249755859375, + "learning_rate": 7.283316108290685e-06, + "loss": 13.0235, + "step": 207070 + }, + { + "epoch": 0.4183147016164546, + "grad_norm": 363.8396301269531, + "learning_rate": 7.283005559906614e-06, + "loss": 20.077, + "step": 207080 + }, + { + "epoch": 0.41833490224913844, + "grad_norm": 288.7080383300781, + "learning_rate": 7.282695000395451e-06, + "loss": 20.4268, + "step": 207090 + }, + { + "epoch": 0.41835510288182226, + "grad_norm": 188.05029296875, + "learning_rate": 7.282384429758709e-06, + "loss": 14.1814, + "step": 207100 + }, + { + "epoch": 0.4183753035145061, + "grad_norm": 396.8157653808594, + "learning_rate": 7.282073847997901e-06, + "loss": 11.6911, + "step": 207110 + }, + { + "epoch": 0.4183955041471899, + "grad_norm": 17.411792755126953, + "learning_rate": 7.281763255114542e-06, + "loss": 22.379, + "step": 207120 + }, + { + "epoch": 0.4184157047798737, + "grad_norm": 575.6268310546875, + "learning_rate": 7.281452651110148e-06, + "loss": 19.024, + "step": 207130 + }, + { + "epoch": 0.41843590541255754, + "grad_norm": 583.982177734375, + "learning_rate": 7.281142035986227e-06, + "loss": 19.9401, + "step": 207140 + }, + { + "epoch": 0.41845610604524136, + "grad_norm": 332.6950378417969, + "learning_rate": 7.280831409744299e-06, + "loss": 20.0233, + "step": 207150 + }, + { + "epoch": 0.4184763066779251, + "grad_norm": 557.7929077148438, + "learning_rate": 7.280520772385875e-06, + "loss": 18.3228, + "step": 207160 + }, + { + "epoch": 0.41849650731060895, + "grad_norm": 857.3450927734375, + "learning_rate": 7.280210123912468e-06, + "loss": 21.378, + "step": 207170 + }, + { + "epoch": 0.41851670794329277, + "grad_norm": 391.34783935546875, + "learning_rate": 7.2798994643255945e-06, + "loss": 19.3676, + "step": 207180 + }, + { + "epoch": 0.4185369085759766, + "grad_norm": 865.761962890625, + "learning_rate": 7.279588793626767e-06, + "loss": 27.0067, + "step": 207190 + }, + { + "epoch": 0.4185571092086604, + "grad_norm": 467.2917785644531, + "learning_rate": 7.279278111817502e-06, + "loss": 18.5604, + "step": 207200 + }, + { + "epoch": 0.4185773098413442, + "grad_norm": 372.0297546386719, + "learning_rate": 7.2789674188993096e-06, + "loss": 21.0548, + "step": 207210 + }, + { + "epoch": 0.41859751047402805, + "grad_norm": 9.171235084533691, + "learning_rate": 7.278656714873707e-06, + "loss": 17.0036, + "step": 207220 + }, + { + "epoch": 0.41861771110671187, + "grad_norm": 406.84490966796875, + "learning_rate": 7.2783459997422075e-06, + "loss": 24.6613, + "step": 207230 + }, + { + "epoch": 0.4186379117393957, + "grad_norm": 585.8035278320312, + "learning_rate": 7.278035273506327e-06, + "loss": 26.4588, + "step": 207240 + }, + { + "epoch": 0.4186581123720795, + "grad_norm": 464.6855773925781, + "learning_rate": 7.2777245361675786e-06, + "loss": 24.9899, + "step": 207250 + }, + { + "epoch": 0.4186783130047633, + "grad_norm": 176.9896697998047, + "learning_rate": 7.277413787727478e-06, + "loss": 21.9758, + "step": 207260 + }, + { + "epoch": 0.41869851363744715, + "grad_norm": 680.6339721679688, + "learning_rate": 7.277103028187536e-06, + "loss": 35.1897, + "step": 207270 + }, + { + "epoch": 0.41871871427013097, + "grad_norm": 520.2926025390625, + "learning_rate": 7.276792257549273e-06, + "loss": 19.2672, + "step": 207280 + }, + { + "epoch": 0.41873891490281473, + "grad_norm": 194.8145751953125, + "learning_rate": 7.276481475814199e-06, + "loss": 23.8696, + "step": 207290 + }, + { + "epoch": 0.41875911553549855, + "grad_norm": 370.65338134765625, + "learning_rate": 7.27617068298383e-06, + "loss": 24.2894, + "step": 207300 + }, + { + "epoch": 0.41877931616818237, + "grad_norm": 1000.7235107421875, + "learning_rate": 7.2758598790596836e-06, + "loss": 35.6905, + "step": 207310 + }, + { + "epoch": 0.4187995168008662, + "grad_norm": 95.40765380859375, + "learning_rate": 7.275549064043269e-06, + "loss": 22.1614, + "step": 207320 + }, + { + "epoch": 0.41881971743355, + "grad_norm": 466.3887939453125, + "learning_rate": 7.275238237936106e-06, + "loss": 20.3975, + "step": 207330 + }, + { + "epoch": 0.41883991806623383, + "grad_norm": 421.66552734375, + "learning_rate": 7.274927400739708e-06, + "loss": 22.5977, + "step": 207340 + }, + { + "epoch": 0.41886011869891765, + "grad_norm": 304.6739807128906, + "learning_rate": 7.274616552455589e-06, + "loss": 20.5304, + "step": 207350 + }, + { + "epoch": 0.41888031933160147, + "grad_norm": 114.920654296875, + "learning_rate": 7.274305693085266e-06, + "loss": 11.689, + "step": 207360 + }, + { + "epoch": 0.4189005199642853, + "grad_norm": 495.1329345703125, + "learning_rate": 7.273994822630251e-06, + "loss": 17.1498, + "step": 207370 + }, + { + "epoch": 0.4189207205969691, + "grad_norm": 562.6421508789062, + "learning_rate": 7.273683941092063e-06, + "loss": 23.1622, + "step": 207380 + }, + { + "epoch": 0.41894092122965293, + "grad_norm": 428.80682373046875, + "learning_rate": 7.273373048472214e-06, + "loss": 17.5005, + "step": 207390 + }, + { + "epoch": 0.41896112186233675, + "grad_norm": 417.3091735839844, + "learning_rate": 7.27306214477222e-06, + "loss": 23.5517, + "step": 207400 + }, + { + "epoch": 0.41898132249502057, + "grad_norm": 293.5716857910156, + "learning_rate": 7.272751229993598e-06, + "loss": 11.2909, + "step": 207410 + }, + { + "epoch": 0.41900152312770433, + "grad_norm": 741.0191040039062, + "learning_rate": 7.272440304137862e-06, + "loss": 36.1299, + "step": 207420 + }, + { + "epoch": 0.41902172376038815, + "grad_norm": 219.34278869628906, + "learning_rate": 7.2721293672065275e-06, + "loss": 33.6884, + "step": 207430 + }, + { + "epoch": 0.419041924393072, + "grad_norm": 801.2993774414062, + "learning_rate": 7.27181841920111e-06, + "loss": 18.6123, + "step": 207440 + }, + { + "epoch": 0.4190621250257558, + "grad_norm": 406.53662109375, + "learning_rate": 7.271507460123124e-06, + "loss": 14.4973, + "step": 207450 + }, + { + "epoch": 0.4190823256584396, + "grad_norm": 239.15638732910156, + "learning_rate": 7.271196489974087e-06, + "loss": 26.1185, + "step": 207460 + }, + { + "epoch": 0.41910252629112343, + "grad_norm": 225.15029907226562, + "learning_rate": 7.270885508755515e-06, + "loss": 23.1628, + "step": 207470 + }, + { + "epoch": 0.41912272692380725, + "grad_norm": 285.8903503417969, + "learning_rate": 7.27057451646892e-06, + "loss": 13.9068, + "step": 207480 + }, + { + "epoch": 0.4191429275564911, + "grad_norm": 179.57513427734375, + "learning_rate": 7.270263513115823e-06, + "loss": 10.4213, + "step": 207490 + }, + { + "epoch": 0.4191631281891749, + "grad_norm": 209.47906494140625, + "learning_rate": 7.269952498697734e-06, + "loss": 10.3899, + "step": 207500 + }, + { + "epoch": 0.4191833288218587, + "grad_norm": 430.1376953125, + "learning_rate": 7.269641473216174e-06, + "loss": 22.5931, + "step": 207510 + }, + { + "epoch": 0.41920352945454253, + "grad_norm": 270.9019775390625, + "learning_rate": 7.269330436672656e-06, + "loss": 14.631, + "step": 207520 + }, + { + "epoch": 0.41922373008722635, + "grad_norm": 424.13189697265625, + "learning_rate": 7.269019389068697e-06, + "loss": 27.2419, + "step": 207530 + }, + { + "epoch": 0.4192439307199102, + "grad_norm": 557.1117553710938, + "learning_rate": 7.2687083304058125e-06, + "loss": 28.1627, + "step": 207540 + }, + { + "epoch": 0.41926413135259394, + "grad_norm": 68.83080291748047, + "learning_rate": 7.268397260685518e-06, + "loss": 20.6843, + "step": 207550 + }, + { + "epoch": 0.41928433198527776, + "grad_norm": 120.33457946777344, + "learning_rate": 7.268086179909331e-06, + "loss": 15.7439, + "step": 207560 + }, + { + "epoch": 0.4193045326179616, + "grad_norm": 428.87127685546875, + "learning_rate": 7.267775088078768e-06, + "loss": 19.5306, + "step": 207570 + }, + { + "epoch": 0.4193247332506454, + "grad_norm": 291.04327392578125, + "learning_rate": 7.267463985195343e-06, + "loss": 32.1417, + "step": 207580 + }, + { + "epoch": 0.4193449338833292, + "grad_norm": 647.67578125, + "learning_rate": 7.267152871260573e-06, + "loss": 22.1257, + "step": 207590 + }, + { + "epoch": 0.41936513451601304, + "grad_norm": 301.8692321777344, + "learning_rate": 7.266841746275977e-06, + "loss": 22.967, + "step": 207600 + }, + { + "epoch": 0.41938533514869686, + "grad_norm": 243.04318237304688, + "learning_rate": 7.266530610243068e-06, + "loss": 12.8156, + "step": 207610 + }, + { + "epoch": 0.4194055357813807, + "grad_norm": 64.8611831665039, + "learning_rate": 7.266219463163363e-06, + "loss": 23.5269, + "step": 207620 + }, + { + "epoch": 0.4194257364140645, + "grad_norm": 27.306312561035156, + "learning_rate": 7.265908305038381e-06, + "loss": 11.3525, + "step": 207630 + }, + { + "epoch": 0.4194459370467483, + "grad_norm": 267.2720642089844, + "learning_rate": 7.265597135869635e-06, + "loss": 19.133, + "step": 207640 + }, + { + "epoch": 0.41946613767943214, + "grad_norm": 967.9885864257812, + "learning_rate": 7.265285955658645e-06, + "loss": 30.2306, + "step": 207650 + }, + { + "epoch": 0.41948633831211596, + "grad_norm": 400.0880126953125, + "learning_rate": 7.264974764406924e-06, + "loss": 9.7783, + "step": 207660 + }, + { + "epoch": 0.4195065389447998, + "grad_norm": 588.6100463867188, + "learning_rate": 7.2646635621159925e-06, + "loss": 18.3165, + "step": 207670 + }, + { + "epoch": 0.41952673957748354, + "grad_norm": 767.2931518554688, + "learning_rate": 7.264352348787364e-06, + "loss": 19.7032, + "step": 207680 + }, + { + "epoch": 0.41954694021016736, + "grad_norm": 408.10540771484375, + "learning_rate": 7.2640411244225576e-06, + "loss": 15.6501, + "step": 207690 + }, + { + "epoch": 0.4195671408428512, + "grad_norm": 282.24578857421875, + "learning_rate": 7.26372988902309e-06, + "loss": 33.6294, + "step": 207700 + }, + { + "epoch": 0.419587341475535, + "grad_norm": 467.11175537109375, + "learning_rate": 7.263418642590476e-06, + "loss": 13.9205, + "step": 207710 + }, + { + "epoch": 0.4196075421082188, + "grad_norm": 139.88189697265625, + "learning_rate": 7.263107385126236e-06, + "loss": 16.2457, + "step": 207720 + }, + { + "epoch": 0.41962774274090264, + "grad_norm": 795.6160888671875, + "learning_rate": 7.262796116631882e-06, + "loss": 16.2437, + "step": 207730 + }, + { + "epoch": 0.41964794337358646, + "grad_norm": 294.77490234375, + "learning_rate": 7.262484837108937e-06, + "loss": 34.8987, + "step": 207740 + }, + { + "epoch": 0.4196681440062703, + "grad_norm": 246.17417907714844, + "learning_rate": 7.262173546558914e-06, + "loss": 17.3762, + "step": 207750 + }, + { + "epoch": 0.4196883446389541, + "grad_norm": 418.432861328125, + "learning_rate": 7.261862244983333e-06, + "loss": 22.3757, + "step": 207760 + }, + { + "epoch": 0.4197085452716379, + "grad_norm": 8.907363891601562, + "learning_rate": 7.261550932383707e-06, + "loss": 14.0214, + "step": 207770 + }, + { + "epoch": 0.41972874590432174, + "grad_norm": 158.2021026611328, + "learning_rate": 7.2612396087615586e-06, + "loss": 15.8603, + "step": 207780 + }, + { + "epoch": 0.41974894653700556, + "grad_norm": 262.1641540527344, + "learning_rate": 7.260928274118402e-06, + "loss": 14.9543, + "step": 207790 + }, + { + "epoch": 0.4197691471696893, + "grad_norm": 486.50714111328125, + "learning_rate": 7.260616928455754e-06, + "loss": 19.0735, + "step": 207800 + }, + { + "epoch": 0.41978934780237315, + "grad_norm": 444.6836853027344, + "learning_rate": 7.260305571775135e-06, + "loss": 31.9929, + "step": 207810 + }, + { + "epoch": 0.41980954843505697, + "grad_norm": 290.2806091308594, + "learning_rate": 7.2599942040780605e-06, + "loss": 23.0906, + "step": 207820 + }, + { + "epoch": 0.4198297490677408, + "grad_norm": 895.4974365234375, + "learning_rate": 7.259682825366047e-06, + "loss": 16.3634, + "step": 207830 + }, + { + "epoch": 0.4198499497004246, + "grad_norm": 632.72900390625, + "learning_rate": 7.2593714356406146e-06, + "loss": 21.4046, + "step": 207840 + }, + { + "epoch": 0.4198701503331084, + "grad_norm": 235.52828979492188, + "learning_rate": 7.259060034903278e-06, + "loss": 16.791, + "step": 207850 + }, + { + "epoch": 0.41989035096579225, + "grad_norm": 63.5164794921875, + "learning_rate": 7.258748623155558e-06, + "loss": 15.0879, + "step": 207860 + }, + { + "epoch": 0.41991055159847607, + "grad_norm": 203.7410430908203, + "learning_rate": 7.258437200398974e-06, + "loss": 31.3387, + "step": 207870 + }, + { + "epoch": 0.4199307522311599, + "grad_norm": 671.734375, + "learning_rate": 7.258125766635038e-06, + "loss": 19.7088, + "step": 207880 + }, + { + "epoch": 0.4199509528638437, + "grad_norm": 142.62875366210938, + "learning_rate": 7.257814321865271e-06, + "loss": 12.4566, + "step": 207890 + }, + { + "epoch": 0.4199711534965275, + "grad_norm": 352.7214660644531, + "learning_rate": 7.257502866091192e-06, + "loss": 32.3652, + "step": 207900 + }, + { + "epoch": 0.41999135412921135, + "grad_norm": 602.37060546875, + "learning_rate": 7.257191399314315e-06, + "loss": 15.4602, + "step": 207910 + }, + { + "epoch": 0.42001155476189517, + "grad_norm": 93.40857696533203, + "learning_rate": 7.256879921536164e-06, + "loss": 23.5006, + "step": 207920 + }, + { + "epoch": 0.42003175539457893, + "grad_norm": 191.13961791992188, + "learning_rate": 7.256568432758252e-06, + "loss": 15.4048, + "step": 207930 + }, + { + "epoch": 0.42005195602726275, + "grad_norm": 463.3614196777344, + "learning_rate": 7.256256932982101e-06, + "loss": 19.8775, + "step": 207940 + }, + { + "epoch": 0.42007215665994657, + "grad_norm": 31.436904907226562, + "learning_rate": 7.2559454222092265e-06, + "loss": 16.4579, + "step": 207950 + }, + { + "epoch": 0.4200923572926304, + "grad_norm": 541.3646240234375, + "learning_rate": 7.255633900441147e-06, + "loss": 27.1186, + "step": 207960 + }, + { + "epoch": 0.4201125579253142, + "grad_norm": 807.3328857421875, + "learning_rate": 7.255322367679382e-06, + "loss": 21.5338, + "step": 207970 + }, + { + "epoch": 0.42013275855799803, + "grad_norm": 220.16342163085938, + "learning_rate": 7.255010823925448e-06, + "loss": 25.4912, + "step": 207980 + }, + { + "epoch": 0.42015295919068185, + "grad_norm": 342.37786865234375, + "learning_rate": 7.254699269180867e-06, + "loss": 16.0834, + "step": 207990 + }, + { + "epoch": 0.42017315982336567, + "grad_norm": 484.2235412597656, + "learning_rate": 7.254387703447154e-06, + "loss": 21.7082, + "step": 208000 + }, + { + "epoch": 0.4201933604560495, + "grad_norm": 647.9937133789062, + "learning_rate": 7.25407612672583e-06, + "loss": 13.444, + "step": 208010 + }, + { + "epoch": 0.4202135610887333, + "grad_norm": 243.7660369873047, + "learning_rate": 7.253764539018411e-06, + "loss": 21.8181, + "step": 208020 + }, + { + "epoch": 0.42023376172141713, + "grad_norm": 9.34383487701416, + "learning_rate": 7.253452940326418e-06, + "loss": 21.1636, + "step": 208030 + }, + { + "epoch": 0.42025396235410095, + "grad_norm": 402.57147216796875, + "learning_rate": 7.253141330651367e-06, + "loss": 18.6828, + "step": 208040 + }, + { + "epoch": 0.42027416298678477, + "grad_norm": 186.9805145263672, + "learning_rate": 7.2528297099947796e-06, + "loss": 18.4708, + "step": 208050 + }, + { + "epoch": 0.42029436361946854, + "grad_norm": 484.6163330078125, + "learning_rate": 7.252518078358173e-06, + "loss": 29.91, + "step": 208060 + }, + { + "epoch": 0.42031456425215236, + "grad_norm": 602.1930541992188, + "learning_rate": 7.252206435743067e-06, + "loss": 18.7558, + "step": 208070 + }, + { + "epoch": 0.4203347648848362, + "grad_norm": 233.40078735351562, + "learning_rate": 7.251894782150981e-06, + "loss": 11.5505, + "step": 208080 + }, + { + "epoch": 0.42035496551752, + "grad_norm": 353.1018371582031, + "learning_rate": 7.251583117583429e-06, + "loss": 17.2449, + "step": 208090 + }, + { + "epoch": 0.4203751661502038, + "grad_norm": 531.6416015625, + "learning_rate": 7.251271442041938e-06, + "loss": 32.751, + "step": 208100 + }, + { + "epoch": 0.42039536678288764, + "grad_norm": 508.6883850097656, + "learning_rate": 7.250959755528022e-06, + "loss": 16.1892, + "step": 208110 + }, + { + "epoch": 0.42041556741557146, + "grad_norm": 713.8525390625, + "learning_rate": 7.2506480580432005e-06, + "loss": 23.175, + "step": 208120 + }, + { + "epoch": 0.4204357680482553, + "grad_norm": 1152.8912353515625, + "learning_rate": 7.250336349588995e-06, + "loss": 26.7371, + "step": 208130 + }, + { + "epoch": 0.4204559686809391, + "grad_norm": 48.779632568359375, + "learning_rate": 7.250024630166921e-06, + "loss": 24.1488, + "step": 208140 + }, + { + "epoch": 0.4204761693136229, + "grad_norm": 537.6607055664062, + "learning_rate": 7.2497128997785e-06, + "loss": 10.6233, + "step": 208150 + }, + { + "epoch": 0.42049636994630674, + "grad_norm": 312.4154968261719, + "learning_rate": 7.249401158425252e-06, + "loss": 22.843, + "step": 208160 + }, + { + "epoch": 0.42051657057899056, + "grad_norm": 279.8778076171875, + "learning_rate": 7.249089406108696e-06, + "loss": 13.4447, + "step": 208170 + }, + { + "epoch": 0.4205367712116744, + "grad_norm": 114.59381103515625, + "learning_rate": 7.248777642830351e-06, + "loss": 11.0693, + "step": 208180 + }, + { + "epoch": 0.42055697184435814, + "grad_norm": 373.0764465332031, + "learning_rate": 7.248465868591735e-06, + "loss": 30.5604, + "step": 208190 + }, + { + "epoch": 0.42057717247704196, + "grad_norm": 227.80795288085938, + "learning_rate": 7.24815408339437e-06, + "loss": 7.9865, + "step": 208200 + }, + { + "epoch": 0.4205973731097258, + "grad_norm": 383.9432067871094, + "learning_rate": 7.247842287239775e-06, + "loss": 16.1091, + "step": 208210 + }, + { + "epoch": 0.4206175737424096, + "grad_norm": 51.92649841308594, + "learning_rate": 7.247530480129469e-06, + "loss": 15.9713, + "step": 208220 + }, + { + "epoch": 0.4206377743750934, + "grad_norm": 510.3590087890625, + "learning_rate": 7.247218662064972e-06, + "loss": 31.5859, + "step": 208230 + }, + { + "epoch": 0.42065797500777724, + "grad_norm": 514.0302124023438, + "learning_rate": 7.2469068330478046e-06, + "loss": 13.0219, + "step": 208240 + }, + { + "epoch": 0.42067817564046106, + "grad_norm": 243.36740112304688, + "learning_rate": 7.246594993079483e-06, + "loss": 17.2838, + "step": 208250 + }, + { + "epoch": 0.4206983762731449, + "grad_norm": 24.595321655273438, + "learning_rate": 7.246283142161533e-06, + "loss": 10.2763, + "step": 208260 + }, + { + "epoch": 0.4207185769058287, + "grad_norm": 34.77042770385742, + "learning_rate": 7.245971280295469e-06, + "loss": 24.0394, + "step": 208270 + }, + { + "epoch": 0.4207387775385125, + "grad_norm": 246.41275024414062, + "learning_rate": 7.245659407482815e-06, + "loss": 17.2448, + "step": 208280 + }, + { + "epoch": 0.42075897817119634, + "grad_norm": 8.182950019836426, + "learning_rate": 7.2453475237250895e-06, + "loss": 19.6814, + "step": 208290 + }, + { + "epoch": 0.42077917880388016, + "grad_norm": 793.640869140625, + "learning_rate": 7.245035629023812e-06, + "loss": 22.2856, + "step": 208300 + }, + { + "epoch": 0.420799379436564, + "grad_norm": 274.09912109375, + "learning_rate": 7.244723723380504e-06, + "loss": 27.2288, + "step": 208310 + }, + { + "epoch": 0.42081958006924775, + "grad_norm": 265.5552673339844, + "learning_rate": 7.244411806796684e-06, + "loss": 16.1567, + "step": 208320 + }, + { + "epoch": 0.42083978070193157, + "grad_norm": 243.61830139160156, + "learning_rate": 7.244099879273873e-06, + "loss": 27.1939, + "step": 208330 + }, + { + "epoch": 0.4208599813346154, + "grad_norm": 134.2394256591797, + "learning_rate": 7.243787940813591e-06, + "loss": 22.9721, + "step": 208340 + }, + { + "epoch": 0.4208801819672992, + "grad_norm": 142.01174926757812, + "learning_rate": 7.24347599141736e-06, + "loss": 19.7979, + "step": 208350 + }, + { + "epoch": 0.420900382599983, + "grad_norm": 439.92041015625, + "learning_rate": 7.243164031086697e-06, + "loss": 30.2856, + "step": 208360 + }, + { + "epoch": 0.42092058323266685, + "grad_norm": 182.24444580078125, + "learning_rate": 7.242852059823127e-06, + "loss": 15.2457, + "step": 208370 + }, + { + "epoch": 0.42094078386535067, + "grad_norm": 265.6875, + "learning_rate": 7.2425400776281665e-06, + "loss": 20.2317, + "step": 208380 + }, + { + "epoch": 0.4209609844980345, + "grad_norm": 683.1009521484375, + "learning_rate": 7.242228084503338e-06, + "loss": 12.1677, + "step": 208390 + }, + { + "epoch": 0.4209811851307183, + "grad_norm": 416.8982849121094, + "learning_rate": 7.241916080450163e-06, + "loss": 10.0464, + "step": 208400 + }, + { + "epoch": 0.4210013857634021, + "grad_norm": 471.6433410644531, + "learning_rate": 7.241604065470158e-06, + "loss": 21.2547, + "step": 208410 + }, + { + "epoch": 0.42102158639608595, + "grad_norm": 158.52459716796875, + "learning_rate": 7.24129203956485e-06, + "loss": 14.5647, + "step": 208420 + }, + { + "epoch": 0.42104178702876977, + "grad_norm": 573.9026489257812, + "learning_rate": 7.240980002735754e-06, + "loss": 13.6013, + "step": 208430 + }, + { + "epoch": 0.42106198766145353, + "grad_norm": 577.913330078125, + "learning_rate": 7.240667954984395e-06, + "loss": 25.1335, + "step": 208440 + }, + { + "epoch": 0.42108218829413735, + "grad_norm": 652.5313110351562, + "learning_rate": 7.24035589631229e-06, + "loss": 23.0305, + "step": 208450 + }, + { + "epoch": 0.42110238892682117, + "grad_norm": 265.8028869628906, + "learning_rate": 7.240043826720964e-06, + "loss": 25.7539, + "step": 208460 + }, + { + "epoch": 0.421122589559505, + "grad_norm": 3482.27294921875, + "learning_rate": 7.239731746211936e-06, + "loss": 34.0574, + "step": 208470 + }, + { + "epoch": 0.4211427901921888, + "grad_norm": 20.080074310302734, + "learning_rate": 7.239419654786727e-06, + "loss": 11.3123, + "step": 208480 + }, + { + "epoch": 0.42116299082487263, + "grad_norm": 551.6575927734375, + "learning_rate": 7.239107552446858e-06, + "loss": 21.158, + "step": 208490 + }, + { + "epoch": 0.42118319145755645, + "grad_norm": 288.7182312011719, + "learning_rate": 7.238795439193849e-06, + "loss": 12.6363, + "step": 208500 + }, + { + "epoch": 0.42120339209024027, + "grad_norm": 268.54833984375, + "learning_rate": 7.2384833150292234e-06, + "loss": 25.1454, + "step": 208510 + }, + { + "epoch": 0.4212235927229241, + "grad_norm": 347.07891845703125, + "learning_rate": 7.238171179954502e-06, + "loss": 24.0564, + "step": 208520 + }, + { + "epoch": 0.4212437933556079, + "grad_norm": 584.1370239257812, + "learning_rate": 7.237859033971206e-06, + "loss": 17.0298, + "step": 208530 + }, + { + "epoch": 0.42126399398829173, + "grad_norm": 951.8050537109375, + "learning_rate": 7.2375468770808555e-06, + "loss": 35.244, + "step": 208540 + }, + { + "epoch": 0.42128419462097555, + "grad_norm": 492.3121032714844, + "learning_rate": 7.2372347092849744e-06, + "loss": 17.6317, + "step": 208550 + }, + { + "epoch": 0.42130439525365937, + "grad_norm": 295.8072814941406, + "learning_rate": 7.236922530585082e-06, + "loss": 17.6025, + "step": 208560 + }, + { + "epoch": 0.42132459588634313, + "grad_norm": 238.51760864257812, + "learning_rate": 7.236610340982699e-06, + "loss": 13.1738, + "step": 208570 + }, + { + "epoch": 0.42134479651902695, + "grad_norm": 559.2195434570312, + "learning_rate": 7.236298140479352e-06, + "loss": 25.8419, + "step": 208580 + }, + { + "epoch": 0.4213649971517108, + "grad_norm": 61.331138610839844, + "learning_rate": 7.235985929076556e-06, + "loss": 21.84, + "step": 208590 + }, + { + "epoch": 0.4213851977843946, + "grad_norm": 585.8510131835938, + "learning_rate": 7.235673706775837e-06, + "loss": 18.3465, + "step": 208600 + }, + { + "epoch": 0.4214053984170784, + "grad_norm": 221.32998657226562, + "learning_rate": 7.235361473578715e-06, + "loss": 15.0191, + "step": 208610 + }, + { + "epoch": 0.42142559904976223, + "grad_norm": 461.74273681640625, + "learning_rate": 7.235049229486713e-06, + "loss": 27.2304, + "step": 208620 + }, + { + "epoch": 0.42144579968244605, + "grad_norm": 382.2008972167969, + "learning_rate": 7.23473697450135e-06, + "loss": 14.0785, + "step": 208630 + }, + { + "epoch": 0.4214660003151299, + "grad_norm": 367.16229248046875, + "learning_rate": 7.234424708624152e-06, + "loss": 22.0599, + "step": 208640 + }, + { + "epoch": 0.4214862009478137, + "grad_norm": 436.0984191894531, + "learning_rate": 7.234112431856639e-06, + "loss": 14.0695, + "step": 208650 + }, + { + "epoch": 0.4215064015804975, + "grad_norm": 142.49688720703125, + "learning_rate": 7.233800144200332e-06, + "loss": 5.8185, + "step": 208660 + }, + { + "epoch": 0.42152660221318133, + "grad_norm": 224.3113555908203, + "learning_rate": 7.233487845656755e-06, + "loss": 24.458, + "step": 208670 + }, + { + "epoch": 0.42154680284586515, + "grad_norm": 207.14895629882812, + "learning_rate": 7.233175536227428e-06, + "loss": 23.031, + "step": 208680 + }, + { + "epoch": 0.421567003478549, + "grad_norm": 445.96478271484375, + "learning_rate": 7.2328632159138764e-06, + "loss": 17.6489, + "step": 208690 + }, + { + "epoch": 0.42158720411123274, + "grad_norm": 636.1257934570312, + "learning_rate": 7.2325508847176175e-06, + "loss": 21.732, + "step": 208700 + }, + { + "epoch": 0.42160740474391656, + "grad_norm": 154.71957397460938, + "learning_rate": 7.232238542640178e-06, + "loss": 11.7602, + "step": 208710 + }, + { + "epoch": 0.4216276053766004, + "grad_norm": 234.51773071289062, + "learning_rate": 7.23192618968308e-06, + "loss": 26.8346, + "step": 208720 + }, + { + "epoch": 0.4216478060092842, + "grad_norm": 572.9177856445312, + "learning_rate": 7.231613825847842e-06, + "loss": 22.7497, + "step": 208730 + }, + { + "epoch": 0.421668006641968, + "grad_norm": 146.1887969970703, + "learning_rate": 7.23130145113599e-06, + "loss": 15.8045, + "step": 208740 + }, + { + "epoch": 0.42168820727465184, + "grad_norm": 230.3306427001953, + "learning_rate": 7.2309890655490446e-06, + "loss": 14.3457, + "step": 208750 + }, + { + "epoch": 0.42170840790733566, + "grad_norm": 407.8550720214844, + "learning_rate": 7.23067666908853e-06, + "loss": 16.3858, + "step": 208760 + }, + { + "epoch": 0.4217286085400195, + "grad_norm": 160.23910522460938, + "learning_rate": 7.230364261755967e-06, + "loss": 17.3511, + "step": 208770 + }, + { + "epoch": 0.4217488091727033, + "grad_norm": 521.4282836914062, + "learning_rate": 7.230051843552879e-06, + "loss": 14.473, + "step": 208780 + }, + { + "epoch": 0.4217690098053871, + "grad_norm": 364.94281005859375, + "learning_rate": 7.22973941448079e-06, + "loss": 13.961, + "step": 208790 + }, + { + "epoch": 0.42178921043807094, + "grad_norm": 436.3430480957031, + "learning_rate": 7.2294269745412214e-06, + "loss": 16.7884, + "step": 208800 + }, + { + "epoch": 0.42180941107075476, + "grad_norm": 1169.0938720703125, + "learning_rate": 7.229114523735695e-06, + "loss": 30.58, + "step": 208810 + }, + { + "epoch": 0.4218296117034386, + "grad_norm": 610.2642822265625, + "learning_rate": 7.228802062065735e-06, + "loss": 37.7107, + "step": 208820 + }, + { + "epoch": 0.42184981233612234, + "grad_norm": 236.06874084472656, + "learning_rate": 7.228489589532865e-06, + "loss": 14.797, + "step": 208830 + }, + { + "epoch": 0.42187001296880616, + "grad_norm": 321.25360107421875, + "learning_rate": 7.228177106138605e-06, + "loss": 16.5439, + "step": 208840 + }, + { + "epoch": 0.42189021360149, + "grad_norm": 1895.261474609375, + "learning_rate": 7.227864611884483e-06, + "loss": 34.6209, + "step": 208850 + }, + { + "epoch": 0.4219104142341738, + "grad_norm": 433.1759033203125, + "learning_rate": 7.227552106772015e-06, + "loss": 34.8761, + "step": 208860 + }, + { + "epoch": 0.4219306148668576, + "grad_norm": 634.3358764648438, + "learning_rate": 7.227239590802733e-06, + "loss": 10.7749, + "step": 208870 + }, + { + "epoch": 0.42195081549954144, + "grad_norm": 316.2372131347656, + "learning_rate": 7.226927063978153e-06, + "loss": 21.7075, + "step": 208880 + }, + { + "epoch": 0.42197101613222526, + "grad_norm": 693.2899169921875, + "learning_rate": 7.2266145262998e-06, + "loss": 35.3719, + "step": 208890 + }, + { + "epoch": 0.4219912167649091, + "grad_norm": 550.0910034179688, + "learning_rate": 7.226301977769199e-06, + "loss": 18.3152, + "step": 208900 + }, + { + "epoch": 0.4220114173975929, + "grad_norm": 472.962890625, + "learning_rate": 7.225989418387871e-06, + "loss": 14.3691, + "step": 208910 + }, + { + "epoch": 0.4220316180302767, + "grad_norm": 194.69569396972656, + "learning_rate": 7.2256768481573414e-06, + "loss": 9.0484, + "step": 208920 + }, + { + "epoch": 0.42205181866296054, + "grad_norm": 331.1741638183594, + "learning_rate": 7.225364267079134e-06, + "loss": 22.7028, + "step": 208930 + }, + { + "epoch": 0.42207201929564436, + "grad_norm": 153.98699951171875, + "learning_rate": 7.225051675154768e-06, + "loss": 25.4647, + "step": 208940 + }, + { + "epoch": 0.4220922199283282, + "grad_norm": 788.9549560546875, + "learning_rate": 7.224739072385773e-06, + "loss": 14.5946, + "step": 208950 + }, + { + "epoch": 0.42211242056101195, + "grad_norm": 644.8583984375, + "learning_rate": 7.224426458773668e-06, + "loss": 46.768, + "step": 208960 + }, + { + "epoch": 0.42213262119369577, + "grad_norm": 439.9338073730469, + "learning_rate": 7.224113834319978e-06, + "loss": 21.0104, + "step": 208970 + }, + { + "epoch": 0.4221528218263796, + "grad_norm": 719.8021850585938, + "learning_rate": 7.223801199026228e-06, + "loss": 20.113, + "step": 208980 + }, + { + "epoch": 0.4221730224590634, + "grad_norm": 76.28775024414062, + "learning_rate": 7.22348855289394e-06, + "loss": 21.883, + "step": 208990 + }, + { + "epoch": 0.4221932230917472, + "grad_norm": 182.53370666503906, + "learning_rate": 7.223175895924638e-06, + "loss": 16.6704, + "step": 209000 + }, + { + "epoch": 0.42221342372443105, + "grad_norm": 1.7606089115142822, + "learning_rate": 7.2228632281198475e-06, + "loss": 22.5764, + "step": 209010 + }, + { + "epoch": 0.42223362435711487, + "grad_norm": 396.4153137207031, + "learning_rate": 7.22255054948109e-06, + "loss": 11.976, + "step": 209020 + }, + { + "epoch": 0.4222538249897987, + "grad_norm": 252.0966339111328, + "learning_rate": 7.222237860009892e-06, + "loss": 16.9671, + "step": 209030 + }, + { + "epoch": 0.4222740256224825, + "grad_norm": 152.38465881347656, + "learning_rate": 7.2219251597077745e-06, + "loss": 20.7021, + "step": 209040 + }, + { + "epoch": 0.4222942262551663, + "grad_norm": 130.76669311523438, + "learning_rate": 7.221612448576265e-06, + "loss": 15.2044, + "step": 209050 + }, + { + "epoch": 0.42231442688785015, + "grad_norm": 325.18719482421875, + "learning_rate": 7.221299726616885e-06, + "loss": 20.5442, + "step": 209060 + }, + { + "epoch": 0.42233462752053397, + "grad_norm": 598.7401733398438, + "learning_rate": 7.220986993831159e-06, + "loss": 20.5388, + "step": 209070 + }, + { + "epoch": 0.42235482815321773, + "grad_norm": 260.79327392578125, + "learning_rate": 7.220674250220614e-06, + "loss": 12.8102, + "step": 209080 + }, + { + "epoch": 0.42237502878590155, + "grad_norm": 499.1023254394531, + "learning_rate": 7.220361495786769e-06, + "loss": 29.819, + "step": 209090 + }, + { + "epoch": 0.42239522941858537, + "grad_norm": 362.8083190917969, + "learning_rate": 7.220048730531154e-06, + "loss": 12.4547, + "step": 209100 + }, + { + "epoch": 0.4224154300512692, + "grad_norm": 28.141883850097656, + "learning_rate": 7.219735954455289e-06, + "loss": 23.9655, + "step": 209110 + }, + { + "epoch": 0.422435630683953, + "grad_norm": 450.5405578613281, + "learning_rate": 7.219423167560701e-06, + "loss": 25.0005, + "step": 209120 + }, + { + "epoch": 0.42245583131663683, + "grad_norm": 167.74432373046875, + "learning_rate": 7.219110369848913e-06, + "loss": 17.3343, + "step": 209130 + }, + { + "epoch": 0.42247603194932065, + "grad_norm": 11.758810043334961, + "learning_rate": 7.218797561321451e-06, + "loss": 16.1645, + "step": 209140 + }, + { + "epoch": 0.42249623258200447, + "grad_norm": 345.144775390625, + "learning_rate": 7.2184847419798384e-06, + "loss": 19.0552, + "step": 209150 + }, + { + "epoch": 0.4225164332146883, + "grad_norm": 1046.2261962890625, + "learning_rate": 7.2181719118256e-06, + "loss": 17.4823, + "step": 209160 + }, + { + "epoch": 0.4225366338473721, + "grad_norm": 372.1084289550781, + "learning_rate": 7.217859070860261e-06, + "loss": 21.2658, + "step": 209170 + }, + { + "epoch": 0.42255683448005593, + "grad_norm": 227.75784301757812, + "learning_rate": 7.217546219085346e-06, + "loss": 14.4277, + "step": 209180 + }, + { + "epoch": 0.42257703511273975, + "grad_norm": 382.9267578125, + "learning_rate": 7.21723335650238e-06, + "loss": 26.5891, + "step": 209190 + }, + { + "epoch": 0.42259723574542357, + "grad_norm": 97.09908294677734, + "learning_rate": 7.216920483112886e-06, + "loss": 21.0516, + "step": 209200 + }, + { + "epoch": 0.42261743637810734, + "grad_norm": 480.6952819824219, + "learning_rate": 7.216607598918392e-06, + "loss": 21.1206, + "step": 209210 + }, + { + "epoch": 0.42263763701079116, + "grad_norm": 27.950443267822266, + "learning_rate": 7.216294703920421e-06, + "loss": 24.9184, + "step": 209220 + }, + { + "epoch": 0.422657837643475, + "grad_norm": 238.2189483642578, + "learning_rate": 7.215981798120499e-06, + "loss": 25.0528, + "step": 209230 + }, + { + "epoch": 0.4226780382761588, + "grad_norm": 367.7483215332031, + "learning_rate": 7.215668881520149e-06, + "loss": 15.9533, + "step": 209240 + }, + { + "epoch": 0.4226982389088426, + "grad_norm": 801.534912109375, + "learning_rate": 7.215355954120899e-06, + "loss": 28.8105, + "step": 209250 + }, + { + "epoch": 0.42271843954152644, + "grad_norm": 325.34014892578125, + "learning_rate": 7.2150430159242724e-06, + "loss": 23.3782, + "step": 209260 + }, + { + "epoch": 0.42273864017421026, + "grad_norm": 157.3614044189453, + "learning_rate": 7.214730066931794e-06, + "loss": 15.9653, + "step": 209270 + }, + { + "epoch": 0.4227588408068941, + "grad_norm": 698.5846557617188, + "learning_rate": 7.214417107144991e-06, + "loss": 20.0662, + "step": 209280 + }, + { + "epoch": 0.4227790414395779, + "grad_norm": 110.46583557128906, + "learning_rate": 7.2141041365653876e-06, + "loss": 22.2963, + "step": 209290 + }, + { + "epoch": 0.4227992420722617, + "grad_norm": 479.1408386230469, + "learning_rate": 7.21379115519451e-06, + "loss": 31.6837, + "step": 209300 + }, + { + "epoch": 0.42281944270494554, + "grad_norm": 337.5665588378906, + "learning_rate": 7.213478163033879e-06, + "loss": 10.4446, + "step": 209310 + }, + { + "epoch": 0.42283964333762936, + "grad_norm": 174.47479248046875, + "learning_rate": 7.213165160085027e-06, + "loss": 22.3634, + "step": 209320 + }, + { + "epoch": 0.4228598439703132, + "grad_norm": 577.8895263671875, + "learning_rate": 7.212852146349476e-06, + "loss": 17.4193, + "step": 209330 + }, + { + "epoch": 0.42288004460299694, + "grad_norm": 371.3006591796875, + "learning_rate": 7.212539121828752e-06, + "loss": 24.541, + "step": 209340 + }, + { + "epoch": 0.42290024523568076, + "grad_norm": 270.4233703613281, + "learning_rate": 7.212226086524381e-06, + "loss": 15.2102, + "step": 209350 + }, + { + "epoch": 0.4229204458683646, + "grad_norm": 735.9139404296875, + "learning_rate": 7.211913040437887e-06, + "loss": 32.0948, + "step": 209360 + }, + { + "epoch": 0.4229406465010484, + "grad_norm": 74.67229461669922, + "learning_rate": 7.211599983570799e-06, + "loss": 11.3681, + "step": 209370 + }, + { + "epoch": 0.4229608471337322, + "grad_norm": 114.5218734741211, + "learning_rate": 7.211286915924639e-06, + "loss": 27.2411, + "step": 209380 + }, + { + "epoch": 0.42298104776641604, + "grad_norm": 680.1890258789062, + "learning_rate": 7.210973837500936e-06, + "loss": 20.7182, + "step": 209390 + }, + { + "epoch": 0.42300124839909986, + "grad_norm": 565.722412109375, + "learning_rate": 7.210660748301214e-06, + "loss": 20.5261, + "step": 209400 + }, + { + "epoch": 0.4230214490317837, + "grad_norm": 731.522705078125, + "learning_rate": 7.210347648327001e-06, + "loss": 23.8383, + "step": 209410 + }, + { + "epoch": 0.4230416496644675, + "grad_norm": 363.3365478515625, + "learning_rate": 7.21003453757982e-06, + "loss": 28.7912, + "step": 209420 + }, + { + "epoch": 0.4230618502971513, + "grad_norm": 588.8091430664062, + "learning_rate": 7.209721416061199e-06, + "loss": 20.4795, + "step": 209430 + }, + { + "epoch": 0.42308205092983514, + "grad_norm": 286.3702087402344, + "learning_rate": 7.209408283772664e-06, + "loss": 16.4857, + "step": 209440 + }, + { + "epoch": 0.42310225156251896, + "grad_norm": 560.5650634765625, + "learning_rate": 7.209095140715742e-06, + "loss": 19.5949, + "step": 209450 + }, + { + "epoch": 0.4231224521952028, + "grad_norm": 596.6691284179688, + "learning_rate": 7.208781986891957e-06, + "loss": 30.0939, + "step": 209460 + }, + { + "epoch": 0.42314265282788655, + "grad_norm": 393.65325927734375, + "learning_rate": 7.208468822302837e-06, + "loss": 24.3944, + "step": 209470 + }, + { + "epoch": 0.42316285346057037, + "grad_norm": 542.9483642578125, + "learning_rate": 7.208155646949908e-06, + "loss": 22.9175, + "step": 209480 + }, + { + "epoch": 0.4231830540932542, + "grad_norm": 229.46798706054688, + "learning_rate": 7.207842460834695e-06, + "loss": 25.6597, + "step": 209490 + }, + { + "epoch": 0.423203254725938, + "grad_norm": 907.5210571289062, + "learning_rate": 7.207529263958727e-06, + "loss": 22.0783, + "step": 209500 + }, + { + "epoch": 0.4232234553586218, + "grad_norm": 134.958984375, + "learning_rate": 7.2072160563235285e-06, + "loss": 9.5013, + "step": 209510 + }, + { + "epoch": 0.42324365599130565, + "grad_norm": 199.20062255859375, + "learning_rate": 7.206902837930626e-06, + "loss": 14.9864, + "step": 209520 + }, + { + "epoch": 0.42326385662398947, + "grad_norm": 440.49334716796875, + "learning_rate": 7.206589608781549e-06, + "loss": 40.4692, + "step": 209530 + }, + { + "epoch": 0.4232840572566733, + "grad_norm": 201.7251434326172, + "learning_rate": 7.206276368877821e-06, + "loss": 17.232, + "step": 209540 + }, + { + "epoch": 0.4233042578893571, + "grad_norm": 188.55267333984375, + "learning_rate": 7.2059631182209676e-06, + "loss": 25.9212, + "step": 209550 + }, + { + "epoch": 0.4233244585220409, + "grad_norm": 387.6357116699219, + "learning_rate": 7.205649856812519e-06, + "loss": 26.5822, + "step": 209560 + }, + { + "epoch": 0.42334465915472475, + "grad_norm": 587.2101440429688, + "learning_rate": 7.205336584653999e-06, + "loss": 37.4754, + "step": 209570 + }, + { + "epoch": 0.42336485978740857, + "grad_norm": 305.0256042480469, + "learning_rate": 7.205023301746938e-06, + "loss": 34.4858, + "step": 209580 + }, + { + "epoch": 0.42338506042009233, + "grad_norm": 531.5726928710938, + "learning_rate": 7.204710008092861e-06, + "loss": 24.7794, + "step": 209590 + }, + { + "epoch": 0.42340526105277615, + "grad_norm": 801.3169555664062, + "learning_rate": 7.2043967036932935e-06, + "loss": 23.223, + "step": 209600 + }, + { + "epoch": 0.42342546168545997, + "grad_norm": 270.9145202636719, + "learning_rate": 7.204083388549764e-06, + "loss": 16.9073, + "step": 209610 + }, + { + "epoch": 0.4234456623181438, + "grad_norm": 499.89617919921875, + "learning_rate": 7.203770062663801e-06, + "loss": 31.6552, + "step": 209620 + }, + { + "epoch": 0.4234658629508276, + "grad_norm": 526.369873046875, + "learning_rate": 7.203456726036927e-06, + "loss": 17.5986, + "step": 209630 + }, + { + "epoch": 0.42348606358351143, + "grad_norm": 102.6902847290039, + "learning_rate": 7.203143378670675e-06, + "loss": 20.5156, + "step": 209640 + }, + { + "epoch": 0.42350626421619525, + "grad_norm": 241.0461883544922, + "learning_rate": 7.202830020566567e-06, + "loss": 20.2259, + "step": 209650 + }, + { + "epoch": 0.42352646484887907, + "grad_norm": 265.1438903808594, + "learning_rate": 7.202516651726135e-06, + "loss": 28.8852, + "step": 209660 + }, + { + "epoch": 0.4235466654815629, + "grad_norm": 581.31298828125, + "learning_rate": 7.202203272150903e-06, + "loss": 31.6967, + "step": 209670 + }, + { + "epoch": 0.4235668661142467, + "grad_norm": 109.85911560058594, + "learning_rate": 7.2018898818423985e-06, + "loss": 13.9738, + "step": 209680 + }, + { + "epoch": 0.42358706674693053, + "grad_norm": 360.0051574707031, + "learning_rate": 7.201576480802151e-06, + "loss": 23.8676, + "step": 209690 + }, + { + "epoch": 0.42360726737961435, + "grad_norm": 174.0601348876953, + "learning_rate": 7.201263069031686e-06, + "loss": 22.7484, + "step": 209700 + }, + { + "epoch": 0.42362746801229817, + "grad_norm": 380.489990234375, + "learning_rate": 7.200949646532532e-06, + "loss": 19.4527, + "step": 209710 + }, + { + "epoch": 0.42364766864498193, + "grad_norm": 725.2266235351562, + "learning_rate": 7.200636213306216e-06, + "loss": 23.2591, + "step": 209720 + }, + { + "epoch": 0.42366786927766575, + "grad_norm": 435.45306396484375, + "learning_rate": 7.200322769354267e-06, + "loss": 26.7146, + "step": 209730 + }, + { + "epoch": 0.4236880699103496, + "grad_norm": 195.5260467529297, + "learning_rate": 7.20000931467821e-06, + "loss": 13.8227, + "step": 209740 + }, + { + "epoch": 0.4237082705430334, + "grad_norm": 554.610107421875, + "learning_rate": 7.199695849279576e-06, + "loss": 20.7622, + "step": 209750 + }, + { + "epoch": 0.4237284711757172, + "grad_norm": 190.65818786621094, + "learning_rate": 7.199382373159891e-06, + "loss": 11.9765, + "step": 209760 + }, + { + "epoch": 0.42374867180840103, + "grad_norm": 52.058013916015625, + "learning_rate": 7.1990688863206835e-06, + "loss": 20.5043, + "step": 209770 + }, + { + "epoch": 0.42376887244108485, + "grad_norm": 529.6057739257812, + "learning_rate": 7.19875538876348e-06, + "loss": 32.3624, + "step": 209780 + }, + { + "epoch": 0.4237890730737687, + "grad_norm": 450.0621032714844, + "learning_rate": 7.198441880489809e-06, + "loss": 15.9361, + "step": 209790 + }, + { + "epoch": 0.4238092737064525, + "grad_norm": 248.64169311523438, + "learning_rate": 7.1981283615012e-06, + "loss": 11.9052, + "step": 209800 + }, + { + "epoch": 0.4238294743391363, + "grad_norm": 275.8608093261719, + "learning_rate": 7.197814831799179e-06, + "loss": 14.8522, + "step": 209810 + }, + { + "epoch": 0.42384967497182013, + "grad_norm": 565.5928955078125, + "learning_rate": 7.197501291385276e-06, + "loss": 26.5751, + "step": 209820 + }, + { + "epoch": 0.42386987560450395, + "grad_norm": 161.0192413330078, + "learning_rate": 7.197187740261018e-06, + "loss": 17.3746, + "step": 209830 + }, + { + "epoch": 0.4238900762371878, + "grad_norm": 982.380126953125, + "learning_rate": 7.196874178427933e-06, + "loss": 19.237, + "step": 209840 + }, + { + "epoch": 0.42391027686987154, + "grad_norm": 99.38864135742188, + "learning_rate": 7.196560605887551e-06, + "loss": 16.0603, + "step": 209850 + }, + { + "epoch": 0.42393047750255536, + "grad_norm": 367.59686279296875, + "learning_rate": 7.196247022641398e-06, + "loss": 11.7232, + "step": 209860 + }, + { + "epoch": 0.4239506781352392, + "grad_norm": 590.4494018554688, + "learning_rate": 7.195933428691003e-06, + "loss": 25.6529, + "step": 209870 + }, + { + "epoch": 0.423970878767923, + "grad_norm": 760.5358276367188, + "learning_rate": 7.195619824037895e-06, + "loss": 21.0403, + "step": 209880 + }, + { + "epoch": 0.4239910794006068, + "grad_norm": 73.87620544433594, + "learning_rate": 7.195306208683602e-06, + "loss": 19.5883, + "step": 209890 + }, + { + "epoch": 0.42401128003329064, + "grad_norm": 566.1576538085938, + "learning_rate": 7.194992582629654e-06, + "loss": 31.2496, + "step": 209900 + }, + { + "epoch": 0.42403148066597446, + "grad_norm": 525.5848999023438, + "learning_rate": 7.194678945877578e-06, + "loss": 16.0137, + "step": 209910 + }, + { + "epoch": 0.4240516812986583, + "grad_norm": 22.791072845458984, + "learning_rate": 7.194365298428901e-06, + "loss": 24.1958, + "step": 209920 + }, + { + "epoch": 0.4240718819313421, + "grad_norm": 12.593621253967285, + "learning_rate": 7.194051640285156e-06, + "loss": 24.5946, + "step": 209930 + }, + { + "epoch": 0.4240920825640259, + "grad_norm": 670.6082153320312, + "learning_rate": 7.1937379714478696e-06, + "loss": 14.8021, + "step": 209940 + }, + { + "epoch": 0.42411228319670974, + "grad_norm": 218.0416259765625, + "learning_rate": 7.19342429191857e-06, + "loss": 20.3079, + "step": 209950 + }, + { + "epoch": 0.42413248382939356, + "grad_norm": 522.1910400390625, + "learning_rate": 7.193110601698785e-06, + "loss": 34.548, + "step": 209960 + }, + { + "epoch": 0.4241526844620774, + "grad_norm": 818.6900634765625, + "learning_rate": 7.192796900790046e-06, + "loss": 29.4781, + "step": 209970 + }, + { + "epoch": 0.42417288509476114, + "grad_norm": 166.05030822753906, + "learning_rate": 7.192483189193881e-06, + "loss": 21.9706, + "step": 209980 + }, + { + "epoch": 0.42419308572744496, + "grad_norm": 167.2766876220703, + "learning_rate": 7.192169466911818e-06, + "loss": 10.6179, + "step": 209990 + }, + { + "epoch": 0.4242132863601288, + "grad_norm": 123.68822479248047, + "learning_rate": 7.191855733945388e-06, + "loss": 16.7535, + "step": 210000 + }, + { + "epoch": 0.4242334869928126, + "grad_norm": 215.9828338623047, + "learning_rate": 7.191541990296118e-06, + "loss": 32.7292, + "step": 210010 + }, + { + "epoch": 0.4242536876254964, + "grad_norm": 98.85929107666016, + "learning_rate": 7.191228235965539e-06, + "loss": 10.6004, + "step": 210020 + }, + { + "epoch": 0.42427388825818024, + "grad_norm": 414.95037841796875, + "learning_rate": 7.190914470955179e-06, + "loss": 13.2742, + "step": 210030 + }, + { + "epoch": 0.42429408889086406, + "grad_norm": 69.71099090576172, + "learning_rate": 7.190600695266567e-06, + "loss": 25.0969, + "step": 210040 + }, + { + "epoch": 0.4243142895235479, + "grad_norm": 138.5528564453125, + "learning_rate": 7.190286908901234e-06, + "loss": 14.603, + "step": 210050 + }, + { + "epoch": 0.4243344901562317, + "grad_norm": 169.1397247314453, + "learning_rate": 7.189973111860708e-06, + "loss": 17.6189, + "step": 210060 + }, + { + "epoch": 0.4243546907889155, + "grad_norm": 357.02044677734375, + "learning_rate": 7.189659304146519e-06, + "loss": 11.5942, + "step": 210070 + }, + { + "epoch": 0.42437489142159934, + "grad_norm": 197.739013671875, + "learning_rate": 7.189345485760194e-06, + "loss": 19.1052, + "step": 210080 + }, + { + "epoch": 0.42439509205428316, + "grad_norm": 1462.9423828125, + "learning_rate": 7.189031656703267e-06, + "loss": 19.1876, + "step": 210090 + }, + { + "epoch": 0.424415292686967, + "grad_norm": 365.7261657714844, + "learning_rate": 7.188717816977264e-06, + "loss": 23.9252, + "step": 210100 + }, + { + "epoch": 0.42443549331965075, + "grad_norm": 276.45184326171875, + "learning_rate": 7.1884039665837165e-06, + "loss": 9.0559, + "step": 210110 + }, + { + "epoch": 0.42445569395233457, + "grad_norm": 36.809783935546875, + "learning_rate": 7.188090105524152e-06, + "loss": 30.0834, + "step": 210120 + }, + { + "epoch": 0.4244758945850184, + "grad_norm": 273.6554260253906, + "learning_rate": 7.187776233800104e-06, + "loss": 17.0771, + "step": 210130 + }, + { + "epoch": 0.4244960952177022, + "grad_norm": 232.99725341796875, + "learning_rate": 7.187462351413099e-06, + "loss": 22.8462, + "step": 210140 + }, + { + "epoch": 0.424516295850386, + "grad_norm": 169.66261291503906, + "learning_rate": 7.187148458364668e-06, + "loss": 9.2088, + "step": 210150 + }, + { + "epoch": 0.42453649648306985, + "grad_norm": 355.7333068847656, + "learning_rate": 7.18683455465634e-06, + "loss": 11.4464, + "step": 210160 + }, + { + "epoch": 0.42455669711575367, + "grad_norm": 401.5683288574219, + "learning_rate": 7.1865206402896455e-06, + "loss": 17.6637, + "step": 210170 + }, + { + "epoch": 0.4245768977484375, + "grad_norm": 116.06808471679688, + "learning_rate": 7.1862067152661155e-06, + "loss": 13.2952, + "step": 210180 + }, + { + "epoch": 0.4245970983811213, + "grad_norm": 587.5383911132812, + "learning_rate": 7.185892779587278e-06, + "loss": 21.3133, + "step": 210190 + }, + { + "epoch": 0.4246172990138051, + "grad_norm": 443.3553161621094, + "learning_rate": 7.185578833254665e-06, + "loss": 14.619, + "step": 210200 + }, + { + "epoch": 0.42463749964648895, + "grad_norm": 262.3222961425781, + "learning_rate": 7.185264876269806e-06, + "loss": 22.8874, + "step": 210210 + }, + { + "epoch": 0.42465770027917277, + "grad_norm": 417.6518859863281, + "learning_rate": 7.18495090863423e-06, + "loss": 20.7733, + "step": 210220 + }, + { + "epoch": 0.42467790091185653, + "grad_norm": 900.1268920898438, + "learning_rate": 7.184636930349471e-06, + "loss": 26.1569, + "step": 210230 + }, + { + "epoch": 0.42469810154454035, + "grad_norm": 806.3509521484375, + "learning_rate": 7.184322941417052e-06, + "loss": 21.0926, + "step": 210240 + }, + { + "epoch": 0.42471830217722417, + "grad_norm": 370.7440185546875, + "learning_rate": 7.184008941838512e-06, + "loss": 26.7139, + "step": 210250 + }, + { + "epoch": 0.424738502809908, + "grad_norm": 26.355684280395508, + "learning_rate": 7.183694931615374e-06, + "loss": 7.9719, + "step": 210260 + }, + { + "epoch": 0.4247587034425918, + "grad_norm": 629.7391357421875, + "learning_rate": 7.183380910749176e-06, + "loss": 21.9063, + "step": 210270 + }, + { + "epoch": 0.42477890407527563, + "grad_norm": 467.73724365234375, + "learning_rate": 7.1830668792414405e-06, + "loss": 15.0258, + "step": 210280 + }, + { + "epoch": 0.42479910470795945, + "grad_norm": 182.96475219726562, + "learning_rate": 7.182752837093704e-06, + "loss": 20.7013, + "step": 210290 + }, + { + "epoch": 0.42481930534064327, + "grad_norm": 226.529541015625, + "learning_rate": 7.182438784307495e-06, + "loss": 18.3346, + "step": 210300 + }, + { + "epoch": 0.4248395059733271, + "grad_norm": 404.8439025878906, + "learning_rate": 7.182124720884342e-06, + "loss": 13.0509, + "step": 210310 + }, + { + "epoch": 0.4248597066060109, + "grad_norm": 478.815673828125, + "learning_rate": 7.181810646825779e-06, + "loss": 17.1866, + "step": 210320 + }, + { + "epoch": 0.42487990723869473, + "grad_norm": 646.7852783203125, + "learning_rate": 7.1814965621333355e-06, + "loss": 31.5088, + "step": 210330 + }, + { + "epoch": 0.42490010787137855, + "grad_norm": 278.8764953613281, + "learning_rate": 7.181182466808542e-06, + "loss": 20.0878, + "step": 210340 + }, + { + "epoch": 0.42492030850406237, + "grad_norm": 598.1981811523438, + "learning_rate": 7.18086836085293e-06, + "loss": 17.7833, + "step": 210350 + }, + { + "epoch": 0.42494050913674614, + "grad_norm": 44.72614288330078, + "learning_rate": 7.18055424426803e-06, + "loss": 17.147, + "step": 210360 + }, + { + "epoch": 0.42496070976942996, + "grad_norm": 136.76609802246094, + "learning_rate": 7.180240117055372e-06, + "loss": 11.8497, + "step": 210370 + }, + { + "epoch": 0.4249809104021138, + "grad_norm": 460.1715393066406, + "learning_rate": 7.1799259792164914e-06, + "loss": 17.4917, + "step": 210380 + }, + { + "epoch": 0.4250011110347976, + "grad_norm": 305.7075500488281, + "learning_rate": 7.179611830752914e-06, + "loss": 18.4208, + "step": 210390 + }, + { + "epoch": 0.4250213116674814, + "grad_norm": 5.594964027404785, + "learning_rate": 7.179297671666171e-06, + "loss": 8.8133, + "step": 210400 + }, + { + "epoch": 0.42504151230016524, + "grad_norm": 369.8493347167969, + "learning_rate": 7.178983501957798e-06, + "loss": 20.4684, + "step": 210410 + }, + { + "epoch": 0.42506171293284906, + "grad_norm": 89.79129028320312, + "learning_rate": 7.178669321629321e-06, + "loss": 20.433, + "step": 210420 + }, + { + "epoch": 0.4250819135655329, + "grad_norm": 602.5833740234375, + "learning_rate": 7.178355130682278e-06, + "loss": 18.3495, + "step": 210430 + }, + { + "epoch": 0.4251021141982167, + "grad_norm": 429.21966552734375, + "learning_rate": 7.178040929118193e-06, + "loss": 94.1416, + "step": 210440 + }, + { + "epoch": 0.4251223148309005, + "grad_norm": 383.83087158203125, + "learning_rate": 7.177726716938602e-06, + "loss": 21.4504, + "step": 210450 + }, + { + "epoch": 0.42514251546358434, + "grad_norm": 571.14892578125, + "learning_rate": 7.177412494145035e-06, + "loss": 21.3484, + "step": 210460 + }, + { + "epoch": 0.42516271609626816, + "grad_norm": 1431.5240478515625, + "learning_rate": 7.177098260739024e-06, + "loss": 15.8618, + "step": 210470 + }, + { + "epoch": 0.425182916728952, + "grad_norm": 670.5843505859375, + "learning_rate": 7.176784016722099e-06, + "loss": 17.4151, + "step": 210480 + }, + { + "epoch": 0.42520311736163574, + "grad_norm": 317.0255432128906, + "learning_rate": 7.1764697620957935e-06, + "loss": 16.6094, + "step": 210490 + }, + { + "epoch": 0.42522331799431956, + "grad_norm": 232.52720642089844, + "learning_rate": 7.176155496861639e-06, + "loss": 22.0382, + "step": 210500 + }, + { + "epoch": 0.4252435186270034, + "grad_norm": 313.4559020996094, + "learning_rate": 7.175841221021165e-06, + "loss": 10.5647, + "step": 210510 + }, + { + "epoch": 0.4252637192596872, + "grad_norm": 479.8163146972656, + "learning_rate": 7.175526934575906e-06, + "loss": 25.453, + "step": 210520 + }, + { + "epoch": 0.425283919892371, + "grad_norm": 444.48052978515625, + "learning_rate": 7.175212637527391e-06, + "loss": 16.5115, + "step": 210530 + }, + { + "epoch": 0.42530412052505484, + "grad_norm": 647.677734375, + "learning_rate": 7.174898329877156e-06, + "loss": 20.0601, + "step": 210540 + }, + { + "epoch": 0.42532432115773866, + "grad_norm": 585.613037109375, + "learning_rate": 7.174584011626729e-06, + "loss": 17.7973, + "step": 210550 + }, + { + "epoch": 0.4253445217904225, + "grad_norm": 314.0742492675781, + "learning_rate": 7.1742696827776415e-06, + "loss": 22.3381, + "step": 210560 + }, + { + "epoch": 0.4253647224231063, + "grad_norm": 751.9611206054688, + "learning_rate": 7.17395534333143e-06, + "loss": 29.1542, + "step": 210570 + }, + { + "epoch": 0.4253849230557901, + "grad_norm": 453.81414794921875, + "learning_rate": 7.173640993289621e-06, + "loss": 19.3853, + "step": 210580 + }, + { + "epoch": 0.42540512368847394, + "grad_norm": 48.85789489746094, + "learning_rate": 7.173326632653752e-06, + "loss": 14.1212, + "step": 210590 + }, + { + "epoch": 0.42542532432115776, + "grad_norm": 999.33984375, + "learning_rate": 7.173012261425352e-06, + "loss": 19.0004, + "step": 210600 + }, + { + "epoch": 0.4254455249538416, + "grad_norm": 519.2509155273438, + "learning_rate": 7.172697879605954e-06, + "loss": 16.9589, + "step": 210610 + }, + { + "epoch": 0.42546572558652535, + "grad_norm": 534.8131103515625, + "learning_rate": 7.1723834871970885e-06, + "loss": 25.6091, + "step": 210620 + }, + { + "epoch": 0.42548592621920917, + "grad_norm": 212.594482421875, + "learning_rate": 7.172069084200291e-06, + "loss": 19.7887, + "step": 210630 + }, + { + "epoch": 0.425506126851893, + "grad_norm": 195.0337677001953, + "learning_rate": 7.171754670617093e-06, + "loss": 21.1371, + "step": 210640 + }, + { + "epoch": 0.4255263274845768, + "grad_norm": 1047.99169921875, + "learning_rate": 7.171440246449024e-06, + "loss": 32.4524, + "step": 210650 + }, + { + "epoch": 0.4255465281172606, + "grad_norm": 702.249267578125, + "learning_rate": 7.171125811697619e-06, + "loss": 17.065, + "step": 210660 + }, + { + "epoch": 0.42556672874994445, + "grad_norm": 239.98944091796875, + "learning_rate": 7.1708113663644105e-06, + "loss": 27.1717, + "step": 210670 + }, + { + "epoch": 0.42558692938262827, + "grad_norm": 579.8043823242188, + "learning_rate": 7.170496910450932e-06, + "loss": 23.0293, + "step": 210680 + }, + { + "epoch": 0.4256071300153121, + "grad_norm": 227.3568572998047, + "learning_rate": 7.170182443958712e-06, + "loss": 36.0902, + "step": 210690 + }, + { + "epoch": 0.4256273306479959, + "grad_norm": 145.62576293945312, + "learning_rate": 7.169867966889288e-06, + "loss": 26.828, + "step": 210700 + }, + { + "epoch": 0.4256475312806797, + "grad_norm": 800.6478881835938, + "learning_rate": 7.16955347924419e-06, + "loss": 31.4572, + "step": 210710 + }, + { + "epoch": 0.42566773191336355, + "grad_norm": 350.2894287109375, + "learning_rate": 7.169238981024952e-06, + "loss": 18.8867, + "step": 210720 + }, + { + "epoch": 0.42568793254604737, + "grad_norm": 293.75341796875, + "learning_rate": 7.168924472233107e-06, + "loss": 18.7785, + "step": 210730 + }, + { + "epoch": 0.4257081331787312, + "grad_norm": 168.0781707763672, + "learning_rate": 7.168609952870185e-06, + "loss": 17.6992, + "step": 210740 + }, + { + "epoch": 0.42572833381141495, + "grad_norm": 618.8905029296875, + "learning_rate": 7.168295422937723e-06, + "loss": 19.3183, + "step": 210750 + }, + { + "epoch": 0.42574853444409877, + "grad_norm": 753.1238403320312, + "learning_rate": 7.167980882437251e-06, + "loss": 24.5493, + "step": 210760 + }, + { + "epoch": 0.4257687350767826, + "grad_norm": 360.7587585449219, + "learning_rate": 7.167666331370303e-06, + "loss": 33.7452, + "step": 210770 + }, + { + "epoch": 0.4257889357094664, + "grad_norm": 635.6531372070312, + "learning_rate": 7.167351769738413e-06, + "loss": 9.7641, + "step": 210780 + }, + { + "epoch": 0.42580913634215023, + "grad_norm": 382.7436828613281, + "learning_rate": 7.167037197543112e-06, + "loss": 16.3177, + "step": 210790 + }, + { + "epoch": 0.42582933697483405, + "grad_norm": 212.64776611328125, + "learning_rate": 7.166722614785937e-06, + "loss": 12.0685, + "step": 210800 + }, + { + "epoch": 0.42584953760751787, + "grad_norm": 328.70526123046875, + "learning_rate": 7.1664080214684176e-06, + "loss": 22.4504, + "step": 210810 + }, + { + "epoch": 0.4258697382402017, + "grad_norm": 444.7970275878906, + "learning_rate": 7.166093417592087e-06, + "loss": 13.8742, + "step": 210820 + }, + { + "epoch": 0.4258899388728855, + "grad_norm": 488.2218933105469, + "learning_rate": 7.165778803158481e-06, + "loss": 15.0398, + "step": 210830 + }, + { + "epoch": 0.42591013950556933, + "grad_norm": 432.7123718261719, + "learning_rate": 7.165464178169133e-06, + "loss": 15.9308, + "step": 210840 + }, + { + "epoch": 0.42593034013825315, + "grad_norm": 310.0197448730469, + "learning_rate": 7.1651495426255725e-06, + "loss": 18.5798, + "step": 210850 + }, + { + "epoch": 0.42595054077093697, + "grad_norm": 202.10557556152344, + "learning_rate": 7.164834896529338e-06, + "loss": 19.17, + "step": 210860 + }, + { + "epoch": 0.42597074140362073, + "grad_norm": 305.1272277832031, + "learning_rate": 7.164520239881958e-06, + "loss": 31.638, + "step": 210870 + }, + { + "epoch": 0.42599094203630455, + "grad_norm": 411.7037353515625, + "learning_rate": 7.164205572684971e-06, + "loss": 20.7471, + "step": 210880 + }, + { + "epoch": 0.4260111426689884, + "grad_norm": 108.20954895019531, + "learning_rate": 7.163890894939909e-06, + "loss": 18.4649, + "step": 210890 + }, + { + "epoch": 0.4260313433016722, + "grad_norm": 544.68798828125, + "learning_rate": 7.1635762066483035e-06, + "loss": 17.9135, + "step": 210900 + }, + { + "epoch": 0.426051543934356, + "grad_norm": 302.5722961425781, + "learning_rate": 7.163261507811692e-06, + "loss": 26.4492, + "step": 210910 + }, + { + "epoch": 0.42607174456703983, + "grad_norm": 552.00146484375, + "learning_rate": 7.162946798431605e-06, + "loss": 21.0426, + "step": 210920 + }, + { + "epoch": 0.42609194519972365, + "grad_norm": 883.0687866210938, + "learning_rate": 7.162632078509578e-06, + "loss": 24.7735, + "step": 210930 + }, + { + "epoch": 0.4261121458324075, + "grad_norm": 166.25155639648438, + "learning_rate": 7.162317348047144e-06, + "loss": 10.7019, + "step": 210940 + }, + { + "epoch": 0.4261323464650913, + "grad_norm": 274.6679382324219, + "learning_rate": 7.162002607045838e-06, + "loss": 16.6136, + "step": 210950 + }, + { + "epoch": 0.4261525470977751, + "grad_norm": 536.052978515625, + "learning_rate": 7.161687855507193e-06, + "loss": 12.0968, + "step": 210960 + }, + { + "epoch": 0.42617274773045893, + "grad_norm": 575.5294189453125, + "learning_rate": 7.161373093432745e-06, + "loss": 17.3558, + "step": 210970 + }, + { + "epoch": 0.42619294836314275, + "grad_norm": 120.9773178100586, + "learning_rate": 7.161058320824026e-06, + "loss": 21.9131, + "step": 210980 + }, + { + "epoch": 0.4262131489958266, + "grad_norm": 953.031982421875, + "learning_rate": 7.160743537682569e-06, + "loss": 32.6143, + "step": 210990 + }, + { + "epoch": 0.42623334962851034, + "grad_norm": 469.15252685546875, + "learning_rate": 7.160428744009913e-06, + "loss": 13.7952, + "step": 211000 + }, + { + "epoch": 0.42625355026119416, + "grad_norm": 175.22052001953125, + "learning_rate": 7.160113939807587e-06, + "loss": 15.7662, + "step": 211010 + }, + { + "epoch": 0.426273750893878, + "grad_norm": 91.44915008544922, + "learning_rate": 7.159799125077129e-06, + "loss": 9.9428, + "step": 211020 + }, + { + "epoch": 0.4262939515265618, + "grad_norm": 388.4784240722656, + "learning_rate": 7.159484299820071e-06, + "loss": 21.2014, + "step": 211030 + }, + { + "epoch": 0.4263141521592456, + "grad_norm": 279.7063903808594, + "learning_rate": 7.15916946403795e-06, + "loss": 20.4712, + "step": 211040 + }, + { + "epoch": 0.42633435279192944, + "grad_norm": 411.2039794921875, + "learning_rate": 7.1588546177322975e-06, + "loss": 22.7108, + "step": 211050 + }, + { + "epoch": 0.42635455342461326, + "grad_norm": 331.2437438964844, + "learning_rate": 7.158539760904649e-06, + "loss": 14.3297, + "step": 211060 + }, + { + "epoch": 0.4263747540572971, + "grad_norm": 70.88005065917969, + "learning_rate": 7.158224893556541e-06, + "loss": 29.7413, + "step": 211070 + }, + { + "epoch": 0.4263949546899809, + "grad_norm": 229.89979553222656, + "learning_rate": 7.157910015689505e-06, + "loss": 6.9969, + "step": 211080 + }, + { + "epoch": 0.4264151553226647, + "grad_norm": 278.6521911621094, + "learning_rate": 7.157595127305079e-06, + "loss": 41.8598, + "step": 211090 + }, + { + "epoch": 0.42643535595534854, + "grad_norm": 196.2091064453125, + "learning_rate": 7.157280228404796e-06, + "loss": 15.4831, + "step": 211100 + }, + { + "epoch": 0.42645555658803236, + "grad_norm": 130.30809020996094, + "learning_rate": 7.15696531899019e-06, + "loss": 15.3417, + "step": 211110 + }, + { + "epoch": 0.4264757572207162, + "grad_norm": 479.2704772949219, + "learning_rate": 7.156650399062797e-06, + "loss": 21.1902, + "step": 211120 + }, + { + "epoch": 0.42649595785339994, + "grad_norm": 488.7560119628906, + "learning_rate": 7.156335468624151e-06, + "loss": 19.0772, + "step": 211130 + }, + { + "epoch": 0.42651615848608376, + "grad_norm": 377.4590759277344, + "learning_rate": 7.156020527675788e-06, + "loss": 18.6304, + "step": 211140 + }, + { + "epoch": 0.4265363591187676, + "grad_norm": 243.23861694335938, + "learning_rate": 7.155705576219242e-06, + "loss": 11.6741, + "step": 211150 + }, + { + "epoch": 0.4265565597514514, + "grad_norm": 336.5179748535156, + "learning_rate": 7.155390614256048e-06, + "loss": 18.4848, + "step": 211160 + }, + { + "epoch": 0.4265767603841352, + "grad_norm": 153.4668731689453, + "learning_rate": 7.1550756417877436e-06, + "loss": 26.9486, + "step": 211170 + }, + { + "epoch": 0.42659696101681904, + "grad_norm": 420.29925537109375, + "learning_rate": 7.15476065881586e-06, + "loss": 24.4821, + "step": 211180 + }, + { + "epoch": 0.42661716164950286, + "grad_norm": 197.16571044921875, + "learning_rate": 7.154445665341933e-06, + "loss": 22.205, + "step": 211190 + }, + { + "epoch": 0.4266373622821867, + "grad_norm": 552.336669921875, + "learning_rate": 7.154130661367503e-06, + "loss": 16.2936, + "step": 211200 + }, + { + "epoch": 0.4266575629148705, + "grad_norm": 732.9390869140625, + "learning_rate": 7.1538156468940986e-06, + "loss": 30.0069, + "step": 211210 + }, + { + "epoch": 0.4266777635475543, + "grad_norm": 412.35076904296875, + "learning_rate": 7.15350062192326e-06, + "loss": 11.5509, + "step": 211220 + }, + { + "epoch": 0.42669796418023814, + "grad_norm": 425.7041015625, + "learning_rate": 7.153185586456518e-06, + "loss": 17.0695, + "step": 211230 + }, + { + "epoch": 0.42671816481292196, + "grad_norm": 22.26059341430664, + "learning_rate": 7.152870540495413e-06, + "loss": 15.4676, + "step": 211240 + }, + { + "epoch": 0.4267383654456058, + "grad_norm": 94.48267364501953, + "learning_rate": 7.1525554840414765e-06, + "loss": 17.9319, + "step": 211250 + }, + { + "epoch": 0.42675856607828955, + "grad_norm": 156.0415496826172, + "learning_rate": 7.152240417096247e-06, + "loss": 17.3309, + "step": 211260 + }, + { + "epoch": 0.42677876671097337, + "grad_norm": 350.1610107421875, + "learning_rate": 7.151925339661256e-06, + "loss": 27.3246, + "step": 211270 + }, + { + "epoch": 0.4267989673436572, + "grad_norm": 381.147705078125, + "learning_rate": 7.151610251738045e-06, + "loss": 12.8145, + "step": 211280 + }, + { + "epoch": 0.426819167976341, + "grad_norm": 291.07183837890625, + "learning_rate": 7.151295153328146e-06, + "loss": 17.6673, + "step": 211290 + }, + { + "epoch": 0.4268393686090248, + "grad_norm": 319.807373046875, + "learning_rate": 7.150980044433094e-06, + "loss": 39.6193, + "step": 211300 + }, + { + "epoch": 0.42685956924170865, + "grad_norm": 845.2122802734375, + "learning_rate": 7.150664925054427e-06, + "loss": 21.6314, + "step": 211310 + }, + { + "epoch": 0.42687976987439247, + "grad_norm": 166.87449645996094, + "learning_rate": 7.1503497951936794e-06, + "loss": 28.2948, + "step": 211320 + }, + { + "epoch": 0.4268999705070763, + "grad_norm": 449.9621887207031, + "learning_rate": 7.1500346548523894e-06, + "loss": 34.1296, + "step": 211330 + }, + { + "epoch": 0.4269201711397601, + "grad_norm": 339.46661376953125, + "learning_rate": 7.14971950403209e-06, + "loss": 9.2132, + "step": 211340 + }, + { + "epoch": 0.4269403717724439, + "grad_norm": 417.22430419921875, + "learning_rate": 7.149404342734317e-06, + "loss": 25.15, + "step": 211350 + }, + { + "epoch": 0.42696057240512775, + "grad_norm": 607.6698608398438, + "learning_rate": 7.14908917096061e-06, + "loss": 19.1785, + "step": 211360 + }, + { + "epoch": 0.42698077303781157, + "grad_norm": 250.4014129638672, + "learning_rate": 7.148773988712503e-06, + "loss": 23.7206, + "step": 211370 + }, + { + "epoch": 0.4270009736704954, + "grad_norm": 21.49866485595703, + "learning_rate": 7.148458795991531e-06, + "loss": 21.4286, + "step": 211380 + }, + { + "epoch": 0.42702117430317915, + "grad_norm": 386.2162170410156, + "learning_rate": 7.148143592799232e-06, + "loss": 15.9567, + "step": 211390 + }, + { + "epoch": 0.42704137493586297, + "grad_norm": 230.6778106689453, + "learning_rate": 7.1478283791371415e-06, + "loss": 36.8292, + "step": 211400 + }, + { + "epoch": 0.4270615755685468, + "grad_norm": 367.3896789550781, + "learning_rate": 7.147513155006798e-06, + "loss": 17.0131, + "step": 211410 + }, + { + "epoch": 0.4270817762012306, + "grad_norm": 784.0731201171875, + "learning_rate": 7.147197920409733e-06, + "loss": 24.7425, + "step": 211420 + }, + { + "epoch": 0.42710197683391443, + "grad_norm": 171.8694305419922, + "learning_rate": 7.146882675347486e-06, + "loss": 18.0884, + "step": 211430 + }, + { + "epoch": 0.42712217746659825, + "grad_norm": 596.0709838867188, + "learning_rate": 7.146567419821595e-06, + "loss": 26.5218, + "step": 211440 + }, + { + "epoch": 0.42714237809928207, + "grad_norm": 591.632080078125, + "learning_rate": 7.146252153833594e-06, + "loss": 33.4292, + "step": 211450 + }, + { + "epoch": 0.4271625787319659, + "grad_norm": 115.9808578491211, + "learning_rate": 7.145936877385019e-06, + "loss": 28.2537, + "step": 211460 + }, + { + "epoch": 0.4271827793646497, + "grad_norm": 542.8873901367188, + "learning_rate": 7.145621590477409e-06, + "loss": 17.5467, + "step": 211470 + }, + { + "epoch": 0.42720297999733353, + "grad_norm": 87.06926727294922, + "learning_rate": 7.1453062931123e-06, + "loss": 10.7627, + "step": 211480 + }, + { + "epoch": 0.42722318063001735, + "grad_norm": 290.7402648925781, + "learning_rate": 7.144990985291228e-06, + "loss": 20.0356, + "step": 211490 + }, + { + "epoch": 0.42724338126270117, + "grad_norm": 271.3774108886719, + "learning_rate": 7.1446756670157306e-06, + "loss": 22.2469, + "step": 211500 + }, + { + "epoch": 0.42726358189538494, + "grad_norm": 615.0104370117188, + "learning_rate": 7.144360338287343e-06, + "loss": 20.2854, + "step": 211510 + }, + { + "epoch": 0.42728378252806876, + "grad_norm": 226.7115478515625, + "learning_rate": 7.1440449991076045e-06, + "loss": 25.3044, + "step": 211520 + }, + { + "epoch": 0.4273039831607526, + "grad_norm": 281.998046875, + "learning_rate": 7.143729649478049e-06, + "loss": 25.6546, + "step": 211530 + }, + { + "epoch": 0.4273241837934364, + "grad_norm": 300.0990905761719, + "learning_rate": 7.143414289400217e-06, + "loss": 11.4684, + "step": 211540 + }, + { + "epoch": 0.4273443844261202, + "grad_norm": 495.08087158203125, + "learning_rate": 7.143098918875643e-06, + "loss": 13.4182, + "step": 211550 + }, + { + "epoch": 0.42736458505880404, + "grad_norm": 566.4196166992188, + "learning_rate": 7.142783537905864e-06, + "loss": 16.3631, + "step": 211560 + }, + { + "epoch": 0.42738478569148786, + "grad_norm": 490.5919494628906, + "learning_rate": 7.1424681464924185e-06, + "loss": 25.3629, + "step": 211570 + }, + { + "epoch": 0.4274049863241717, + "grad_norm": 836.9344482421875, + "learning_rate": 7.142152744636843e-06, + "loss": 30.2841, + "step": 211580 + }, + { + "epoch": 0.4274251869568555, + "grad_norm": 356.6913146972656, + "learning_rate": 7.141837332340675e-06, + "loss": 22.8689, + "step": 211590 + }, + { + "epoch": 0.4274453875895393, + "grad_norm": 977.859130859375, + "learning_rate": 7.141521909605452e-06, + "loss": 34.7252, + "step": 211600 + }, + { + "epoch": 0.42746558822222314, + "grad_norm": 822.9716186523438, + "learning_rate": 7.141206476432711e-06, + "loss": 21.0304, + "step": 211610 + }, + { + "epoch": 0.42748578885490696, + "grad_norm": 24.67584228515625, + "learning_rate": 7.140891032823989e-06, + "loss": 25.4088, + "step": 211620 + }, + { + "epoch": 0.4275059894875908, + "grad_norm": 194.58030700683594, + "learning_rate": 7.140575578780824e-06, + "loss": 26.2878, + "step": 211630 + }, + { + "epoch": 0.42752619012027454, + "grad_norm": 323.7582702636719, + "learning_rate": 7.1402601143047514e-06, + "loss": 26.4877, + "step": 211640 + }, + { + "epoch": 0.42754639075295836, + "grad_norm": 456.35797119140625, + "learning_rate": 7.139944639397313e-06, + "loss": 24.8844, + "step": 211650 + }, + { + "epoch": 0.4275665913856422, + "grad_norm": 201.45193481445312, + "learning_rate": 7.1396291540600435e-06, + "loss": 29.4343, + "step": 211660 + }, + { + "epoch": 0.427586792018326, + "grad_norm": 303.1866455078125, + "learning_rate": 7.13931365829448e-06, + "loss": 21.7534, + "step": 211670 + }, + { + "epoch": 0.4276069926510098, + "grad_norm": 368.20196533203125, + "learning_rate": 7.138998152102162e-06, + "loss": 17.7064, + "step": 211680 + }, + { + "epoch": 0.42762719328369364, + "grad_norm": 306.7176513671875, + "learning_rate": 7.138682635484626e-06, + "loss": 21.6391, + "step": 211690 + }, + { + "epoch": 0.42764739391637746, + "grad_norm": 1134.7449951171875, + "learning_rate": 7.138367108443411e-06, + "loss": 18.8113, + "step": 211700 + }, + { + "epoch": 0.4276675945490613, + "grad_norm": 337.86138916015625, + "learning_rate": 7.138051570980053e-06, + "loss": 19.6044, + "step": 211710 + }, + { + "epoch": 0.4276877951817451, + "grad_norm": 422.91522216796875, + "learning_rate": 7.137736023096091e-06, + "loss": 20.632, + "step": 211720 + }, + { + "epoch": 0.4277079958144289, + "grad_norm": 871.5953979492188, + "learning_rate": 7.1374204647930636e-06, + "loss": 29.7215, + "step": 211730 + }, + { + "epoch": 0.42772819644711274, + "grad_norm": 688.8816528320312, + "learning_rate": 7.137104896072508e-06, + "loss": 22.6805, + "step": 211740 + }, + { + "epoch": 0.42774839707979656, + "grad_norm": 27.244319915771484, + "learning_rate": 7.1367893169359636e-06, + "loss": 13.1682, + "step": 211750 + }, + { + "epoch": 0.4277685977124804, + "grad_norm": 232.0713653564453, + "learning_rate": 7.136473727384965e-06, + "loss": 27.5891, + "step": 211760 + }, + { + "epoch": 0.42778879834516415, + "grad_norm": 317.5654602050781, + "learning_rate": 7.136158127421053e-06, + "loss": 20.855, + "step": 211770 + }, + { + "epoch": 0.42780899897784797, + "grad_norm": 3.613424777984619, + "learning_rate": 7.1358425170457655e-06, + "loss": 12.7244, + "step": 211780 + }, + { + "epoch": 0.4278291996105318, + "grad_norm": 705.9700927734375, + "learning_rate": 7.135526896260643e-06, + "loss": 23.082, + "step": 211790 + }, + { + "epoch": 0.4278494002432156, + "grad_norm": 416.4897766113281, + "learning_rate": 7.135211265067217e-06, + "loss": 10.2001, + "step": 211800 + }, + { + "epoch": 0.4278696008758994, + "grad_norm": 384.7234802246094, + "learning_rate": 7.1348956234670345e-06, + "loss": 19.6609, + "step": 211810 + }, + { + "epoch": 0.42788980150858325, + "grad_norm": 73.60569763183594, + "learning_rate": 7.134579971461627e-06, + "loss": 19.9937, + "step": 211820 + }, + { + "epoch": 0.42791000214126707, + "grad_norm": 281.4009094238281, + "learning_rate": 7.134264309052537e-06, + "loss": 17.6055, + "step": 211830 + }, + { + "epoch": 0.4279302027739509, + "grad_norm": 114.01895904541016, + "learning_rate": 7.1339486362413005e-06, + "loss": 17.1725, + "step": 211840 + }, + { + "epoch": 0.4279504034066347, + "grad_norm": 450.64923095703125, + "learning_rate": 7.133632953029457e-06, + "loss": 9.1914, + "step": 211850 + }, + { + "epoch": 0.4279706040393185, + "grad_norm": 319.7637023925781, + "learning_rate": 7.133317259418546e-06, + "loss": 27.361, + "step": 211860 + }, + { + "epoch": 0.42799080467200235, + "grad_norm": 377.401611328125, + "learning_rate": 7.133001555410106e-06, + "loss": 20.2308, + "step": 211870 + }, + { + "epoch": 0.42801100530468617, + "grad_norm": 696.2091064453125, + "learning_rate": 7.132685841005674e-06, + "loss": 18.5949, + "step": 211880 + }, + { + "epoch": 0.42803120593737, + "grad_norm": 883.6140747070312, + "learning_rate": 7.1323701162067905e-06, + "loss": 32.1629, + "step": 211890 + }, + { + "epoch": 0.42805140657005375, + "grad_norm": 539.1190185546875, + "learning_rate": 7.1320543810149945e-06, + "loss": 19.7657, + "step": 211900 + }, + { + "epoch": 0.42807160720273757, + "grad_norm": 252.74114990234375, + "learning_rate": 7.131738635431822e-06, + "loss": 25.9459, + "step": 211910 + }, + { + "epoch": 0.4280918078354214, + "grad_norm": 230.0977020263672, + "learning_rate": 7.131422879458815e-06, + "loss": 15.9849, + "step": 211920 + }, + { + "epoch": 0.4281120084681052, + "grad_norm": 1002.0272216796875, + "learning_rate": 7.131107113097512e-06, + "loss": 25.0592, + "step": 211930 + }, + { + "epoch": 0.42813220910078903, + "grad_norm": 390.1118469238281, + "learning_rate": 7.13079133634945e-06, + "loss": 17.789, + "step": 211940 + }, + { + "epoch": 0.42815240973347285, + "grad_norm": 1.0614250898361206, + "learning_rate": 7.130475549216171e-06, + "loss": 14.2543, + "step": 211950 + }, + { + "epoch": 0.42817261036615667, + "grad_norm": 644.6792602539062, + "learning_rate": 7.130159751699211e-06, + "loss": 19.5148, + "step": 211960 + }, + { + "epoch": 0.4281928109988405, + "grad_norm": 489.6703796386719, + "learning_rate": 7.129843943800112e-06, + "loss": 23.6921, + "step": 211970 + }, + { + "epoch": 0.4282130116315243, + "grad_norm": 54.380401611328125, + "learning_rate": 7.129528125520411e-06, + "loss": 23.5899, + "step": 211980 + }, + { + "epoch": 0.42823321226420813, + "grad_norm": 252.82147216796875, + "learning_rate": 7.129212296861649e-06, + "loss": 13.7989, + "step": 211990 + }, + { + "epoch": 0.42825341289689195, + "grad_norm": 300.24127197265625, + "learning_rate": 7.128896457825364e-06, + "loss": 24.0652, + "step": 212000 + }, + { + "epoch": 0.42827361352957577, + "grad_norm": 414.4104309082031, + "learning_rate": 7.128580608413096e-06, + "loss": 16.4752, + "step": 212010 + }, + { + "epoch": 0.4282938141622596, + "grad_norm": 400.53228759765625, + "learning_rate": 7.128264748626385e-06, + "loss": 12.5373, + "step": 212020 + }, + { + "epoch": 0.42831401479494335, + "grad_norm": 431.4949951171875, + "learning_rate": 7.127948878466768e-06, + "loss": 21.7802, + "step": 212030 + }, + { + "epoch": 0.4283342154276272, + "grad_norm": 115.11209869384766, + "learning_rate": 7.127632997935787e-06, + "loss": 24.0653, + "step": 212040 + }, + { + "epoch": 0.428354416060311, + "grad_norm": 335.8630065917969, + "learning_rate": 7.127317107034982e-06, + "loss": 20.3889, + "step": 212050 + }, + { + "epoch": 0.4283746166929948, + "grad_norm": 1286.7281494140625, + "learning_rate": 7.12700120576589e-06, + "loss": 29.3023, + "step": 212060 + }, + { + "epoch": 0.42839481732567863, + "grad_norm": 167.96475219726562, + "learning_rate": 7.126685294130053e-06, + "loss": 12.2335, + "step": 212070 + }, + { + "epoch": 0.42841501795836245, + "grad_norm": 413.0950622558594, + "learning_rate": 7.126369372129009e-06, + "loss": 25.9105, + "step": 212080 + }, + { + "epoch": 0.4284352185910463, + "grad_norm": 5.152590274810791, + "learning_rate": 7.1260534397643e-06, + "loss": 6.9625, + "step": 212090 + }, + { + "epoch": 0.4284554192237301, + "grad_norm": 264.45416259765625, + "learning_rate": 7.125737497037464e-06, + "loss": 11.4568, + "step": 212100 + }, + { + "epoch": 0.4284756198564139, + "grad_norm": 473.06207275390625, + "learning_rate": 7.125421543950039e-06, + "loss": 13.1764, + "step": 212110 + }, + { + "epoch": 0.42849582048909773, + "grad_norm": 243.21975708007812, + "learning_rate": 7.12510558050357e-06, + "loss": 30.7551, + "step": 212120 + }, + { + "epoch": 0.42851602112178155, + "grad_norm": 164.69775390625, + "learning_rate": 7.124789606699594e-06, + "loss": 17.1487, + "step": 212130 + }, + { + "epoch": 0.4285362217544654, + "grad_norm": 1040.53369140625, + "learning_rate": 7.1244736225396485e-06, + "loss": 22.3604, + "step": 212140 + }, + { + "epoch": 0.42855642238714914, + "grad_norm": 641.1317138671875, + "learning_rate": 7.124157628025279e-06, + "loss": 28.1907, + "step": 212150 + }, + { + "epoch": 0.42857662301983296, + "grad_norm": 159.34039306640625, + "learning_rate": 7.123841623158021e-06, + "loss": 26.2785, + "step": 212160 + }, + { + "epoch": 0.4285968236525168, + "grad_norm": 812.2888793945312, + "learning_rate": 7.123525607939418e-06, + "loss": 30.7258, + "step": 212170 + }, + { + "epoch": 0.4286170242852006, + "grad_norm": 245.14108276367188, + "learning_rate": 7.1232095823710064e-06, + "loss": 10.38, + "step": 212180 + }, + { + "epoch": 0.4286372249178844, + "grad_norm": 109.20542907714844, + "learning_rate": 7.1228935464543325e-06, + "loss": 15.28, + "step": 212190 + }, + { + "epoch": 0.42865742555056824, + "grad_norm": 442.0393371582031, + "learning_rate": 7.12257750019093e-06, + "loss": 24.887, + "step": 212200 + }, + { + "epoch": 0.42867762618325206, + "grad_norm": 99.32170104980469, + "learning_rate": 7.122261443582343e-06, + "loss": 23.0774, + "step": 212210 + }, + { + "epoch": 0.4286978268159359, + "grad_norm": 324.8145751953125, + "learning_rate": 7.12194537663011e-06, + "loss": 26.7955, + "step": 212220 + }, + { + "epoch": 0.4287180274486197, + "grad_norm": 288.45196533203125, + "learning_rate": 7.121629299335775e-06, + "loss": 24.4507, + "step": 212230 + }, + { + "epoch": 0.4287382280813035, + "grad_norm": 0.0, + "learning_rate": 7.121313211700875e-06, + "loss": 16.4163, + "step": 212240 + }, + { + "epoch": 0.42875842871398734, + "grad_norm": 152.95367431640625, + "learning_rate": 7.120997113726951e-06, + "loss": 12.9391, + "step": 212250 + }, + { + "epoch": 0.42877862934667116, + "grad_norm": 65.94784545898438, + "learning_rate": 7.120681005415546e-06, + "loss": 13.3712, + "step": 212260 + }, + { + "epoch": 0.428798829979355, + "grad_norm": 579.9019165039062, + "learning_rate": 7.120364886768197e-06, + "loss": 16.0123, + "step": 212270 + }, + { + "epoch": 0.42881903061203874, + "grad_norm": 595.8623046875, + "learning_rate": 7.120048757786448e-06, + "loss": 33.4327, + "step": 212280 + }, + { + "epoch": 0.42883923124472256, + "grad_norm": 543.7451782226562, + "learning_rate": 7.119732618471838e-06, + "loss": 37.4489, + "step": 212290 + }, + { + "epoch": 0.4288594318774064, + "grad_norm": 290.9337463378906, + "learning_rate": 7.119416468825908e-06, + "loss": 30.5408, + "step": 212300 + }, + { + "epoch": 0.4288796325100902, + "grad_norm": 342.37957763671875, + "learning_rate": 7.119100308850201e-06, + "loss": 36.2474, + "step": 212310 + }, + { + "epoch": 0.428899833142774, + "grad_norm": 753.0562744140625, + "learning_rate": 7.118784138546254e-06, + "loss": 14.3913, + "step": 212320 + }, + { + "epoch": 0.42892003377545784, + "grad_norm": 6.931411266326904, + "learning_rate": 7.1184679579156115e-06, + "loss": 29.2693, + "step": 212330 + }, + { + "epoch": 0.42894023440814166, + "grad_norm": 412.5704040527344, + "learning_rate": 7.118151766959811e-06, + "loss": 25.7069, + "step": 212340 + }, + { + "epoch": 0.4289604350408255, + "grad_norm": 226.2316131591797, + "learning_rate": 7.117835565680399e-06, + "loss": 24.1271, + "step": 212350 + }, + { + "epoch": 0.4289806356735093, + "grad_norm": 186.11773681640625, + "learning_rate": 7.11751935407891e-06, + "loss": 23.0482, + "step": 212360 + }, + { + "epoch": 0.4290008363061931, + "grad_norm": 198.1680145263672, + "learning_rate": 7.11720313215689e-06, + "loss": 23.9003, + "step": 212370 + }, + { + "epoch": 0.42902103693887694, + "grad_norm": 295.8449401855469, + "learning_rate": 7.116886899915879e-06, + "loss": 26.7899, + "step": 212380 + }, + { + "epoch": 0.42904123757156076, + "grad_norm": 847.6760864257812, + "learning_rate": 7.116570657357418e-06, + "loss": 33.5373, + "step": 212390 + }, + { + "epoch": 0.4290614382042446, + "grad_norm": 423.29107666015625, + "learning_rate": 7.116254404483049e-06, + "loss": 11.6476, + "step": 212400 + }, + { + "epoch": 0.42908163883692835, + "grad_norm": 222.54244995117188, + "learning_rate": 7.115938141294309e-06, + "loss": 31.9477, + "step": 212410 + }, + { + "epoch": 0.42910183946961217, + "grad_norm": 274.11883544921875, + "learning_rate": 7.1156218677927465e-06, + "loss": 23.6792, + "step": 212420 + }, + { + "epoch": 0.429122040102296, + "grad_norm": 451.3059387207031, + "learning_rate": 7.115305583979899e-06, + "loss": 15.9437, + "step": 212430 + }, + { + "epoch": 0.4291422407349798, + "grad_norm": 12.71606731414795, + "learning_rate": 7.114989289857308e-06, + "loss": 26.5223, + "step": 212440 + }, + { + "epoch": 0.4291624413676636, + "grad_norm": 350.2530517578125, + "learning_rate": 7.114672985426516e-06, + "loss": 26.1803, + "step": 212450 + }, + { + "epoch": 0.42918264200034745, + "grad_norm": 310.4293518066406, + "learning_rate": 7.114356670689065e-06, + "loss": 11.0326, + "step": 212460 + }, + { + "epoch": 0.42920284263303127, + "grad_norm": 361.52862548828125, + "learning_rate": 7.114040345646497e-06, + "loss": 23.1904, + "step": 212470 + }, + { + "epoch": 0.4292230432657151, + "grad_norm": 143.16583251953125, + "learning_rate": 7.113724010300351e-06, + "loss": 21.7281, + "step": 212480 + }, + { + "epoch": 0.4292432438983989, + "grad_norm": 404.5968017578125, + "learning_rate": 7.113407664652171e-06, + "loss": 13.2178, + "step": 212490 + }, + { + "epoch": 0.4292634445310827, + "grad_norm": 77.86985778808594, + "learning_rate": 7.113091308703498e-06, + "loss": 17.3989, + "step": 212500 + }, + { + "epoch": 0.42928364516376655, + "grad_norm": 38.63712692260742, + "learning_rate": 7.1127749424558755e-06, + "loss": 20.7504, + "step": 212510 + }, + { + "epoch": 0.42930384579645037, + "grad_norm": 424.34197998046875, + "learning_rate": 7.112458565910841e-06, + "loss": 11.3512, + "step": 212520 + }, + { + "epoch": 0.4293240464291342, + "grad_norm": 761.510986328125, + "learning_rate": 7.112142179069943e-06, + "loss": 25.2012, + "step": 212530 + }, + { + "epoch": 0.42934424706181795, + "grad_norm": 1095.1092529296875, + "learning_rate": 7.111825781934719e-06, + "loss": 25.1316, + "step": 212540 + }, + { + "epoch": 0.42936444769450177, + "grad_norm": 602.100341796875, + "learning_rate": 7.111509374506712e-06, + "loss": 22.8112, + "step": 212550 + }, + { + "epoch": 0.4293846483271856, + "grad_norm": 318.4169921875, + "learning_rate": 7.111192956787466e-06, + "loss": 12.117, + "step": 212560 + }, + { + "epoch": 0.4294048489598694, + "grad_norm": 289.4682922363281, + "learning_rate": 7.1108765287785185e-06, + "loss": 20.1972, + "step": 212570 + }, + { + "epoch": 0.42942504959255323, + "grad_norm": 292.43341064453125, + "learning_rate": 7.110560090481418e-06, + "loss": 19.0301, + "step": 212580 + }, + { + "epoch": 0.42944525022523705, + "grad_norm": 341.2841491699219, + "learning_rate": 7.1102436418977e-06, + "loss": 36.331, + "step": 212590 + }, + { + "epoch": 0.42946545085792087, + "grad_norm": 897.2501220703125, + "learning_rate": 7.1099271830289155e-06, + "loss": 28.4771, + "step": 212600 + }, + { + "epoch": 0.4294856514906047, + "grad_norm": 22.196765899658203, + "learning_rate": 7.109610713876598e-06, + "loss": 21.6898, + "step": 212610 + }, + { + "epoch": 0.4295058521232885, + "grad_norm": 151.3219451904297, + "learning_rate": 7.109294234442294e-06, + "loss": 22.8881, + "step": 212620 + }, + { + "epoch": 0.42952605275597233, + "grad_norm": 210.76492309570312, + "learning_rate": 7.108977744727548e-06, + "loss": 19.365, + "step": 212630 + }, + { + "epoch": 0.42954625338865615, + "grad_norm": 822.7898559570312, + "learning_rate": 7.108661244733899e-06, + "loss": 22.5107, + "step": 212640 + }, + { + "epoch": 0.42956645402133997, + "grad_norm": 119.3055648803711, + "learning_rate": 7.10834473446289e-06, + "loss": 22.6131, + "step": 212650 + }, + { + "epoch": 0.42958665465402374, + "grad_norm": 1025.1552734375, + "learning_rate": 7.108028213916065e-06, + "loss": 41.4985, + "step": 212660 + }, + { + "epoch": 0.42960685528670756, + "grad_norm": 417.2041931152344, + "learning_rate": 7.107711683094966e-06, + "loss": 18.0833, + "step": 212670 + }, + { + "epoch": 0.4296270559193914, + "grad_norm": 419.1497497558594, + "learning_rate": 7.107395142001135e-06, + "loss": 25.933, + "step": 212680 + }, + { + "epoch": 0.4296472565520752, + "grad_norm": 1319.3250732421875, + "learning_rate": 7.107078590636118e-06, + "loss": 27.9086, + "step": 212690 + }, + { + "epoch": 0.429667457184759, + "grad_norm": 900.4307250976562, + "learning_rate": 7.106762029001455e-06, + "loss": 11.1124, + "step": 212700 + }, + { + "epoch": 0.42968765781744284, + "grad_norm": 374.51129150390625, + "learning_rate": 7.106445457098687e-06, + "loss": 15.3615, + "step": 212710 + }, + { + "epoch": 0.42970785845012666, + "grad_norm": 485.9091796875, + "learning_rate": 7.106128874929364e-06, + "loss": 24.1599, + "step": 212720 + }, + { + "epoch": 0.4297280590828105, + "grad_norm": 438.72784423828125, + "learning_rate": 7.10581228249502e-06, + "loss": 17.5406, + "step": 212730 + }, + { + "epoch": 0.4297482597154943, + "grad_norm": 211.18467712402344, + "learning_rate": 7.105495679797203e-06, + "loss": 24.0315, + "step": 212740 + }, + { + "epoch": 0.4297684603481781, + "grad_norm": 250.50799560546875, + "learning_rate": 7.105179066837456e-06, + "loss": 16.8138, + "step": 212750 + }, + { + "epoch": 0.42978866098086194, + "grad_norm": 179.66966247558594, + "learning_rate": 7.104862443617322e-06, + "loss": 14.6905, + "step": 212760 + }, + { + "epoch": 0.42980886161354576, + "grad_norm": 509.5496826171875, + "learning_rate": 7.104545810138343e-06, + "loss": 16.3801, + "step": 212770 + }, + { + "epoch": 0.4298290622462296, + "grad_norm": 397.1598205566406, + "learning_rate": 7.1042291664020635e-06, + "loss": 22.1303, + "step": 212780 + }, + { + "epoch": 0.42984926287891334, + "grad_norm": 522.1380004882812, + "learning_rate": 7.103912512410025e-06, + "loss": 23.0236, + "step": 212790 + }, + { + "epoch": 0.42986946351159716, + "grad_norm": 464.6538391113281, + "learning_rate": 7.103595848163775e-06, + "loss": 24.2334, + "step": 212800 + }, + { + "epoch": 0.429889664144281, + "grad_norm": 73.5645523071289, + "learning_rate": 7.103279173664851e-06, + "loss": 17.8165, + "step": 212810 + }, + { + "epoch": 0.4299098647769648, + "grad_norm": 469.95623779296875, + "learning_rate": 7.1029624889148005e-06, + "loss": 16.158, + "step": 212820 + }, + { + "epoch": 0.4299300654096486, + "grad_norm": 427.65716552734375, + "learning_rate": 7.102645793915166e-06, + "loss": 28.5236, + "step": 212830 + }, + { + "epoch": 0.42995026604233244, + "grad_norm": 367.964111328125, + "learning_rate": 7.10232908866749e-06, + "loss": 29.3951, + "step": 212840 + }, + { + "epoch": 0.42997046667501626, + "grad_norm": 376.3992614746094, + "learning_rate": 7.102012373173319e-06, + "loss": 27.3425, + "step": 212850 + }, + { + "epoch": 0.4299906673077001, + "grad_norm": 258.5666809082031, + "learning_rate": 7.101695647434193e-06, + "loss": 16.5746, + "step": 212860 + }, + { + "epoch": 0.4300108679403839, + "grad_norm": 212.1715087890625, + "learning_rate": 7.101378911451659e-06, + "loss": 12.4041, + "step": 212870 + }, + { + "epoch": 0.4300310685730677, + "grad_norm": 590.6844482421875, + "learning_rate": 7.101062165227257e-06, + "loss": 20.8692, + "step": 212880 + }, + { + "epoch": 0.43005126920575154, + "grad_norm": 489.03302001953125, + "learning_rate": 7.100745408762534e-06, + "loss": 14.9576, + "step": 212890 + }, + { + "epoch": 0.43007146983843536, + "grad_norm": 1098.0113525390625, + "learning_rate": 7.100428642059033e-06, + "loss": 29.7588, + "step": 212900 + }, + { + "epoch": 0.4300916704711192, + "grad_norm": 201.09103393554688, + "learning_rate": 7.100111865118295e-06, + "loss": 23.0751, + "step": 212910 + }, + { + "epoch": 0.43011187110380295, + "grad_norm": 607.2546997070312, + "learning_rate": 7.099795077941869e-06, + "loss": 26.2209, + "step": 212920 + }, + { + "epoch": 0.43013207173648677, + "grad_norm": 51.52604675292969, + "learning_rate": 7.099478280531296e-06, + "loss": 10.3928, + "step": 212930 + }, + { + "epoch": 0.4301522723691706, + "grad_norm": 291.2106018066406, + "learning_rate": 7.0991614728881205e-06, + "loss": 24.6395, + "step": 212940 + }, + { + "epoch": 0.4301724730018544, + "grad_norm": 292.82940673828125, + "learning_rate": 7.098844655013886e-06, + "loss": 19.3948, + "step": 212950 + }, + { + "epoch": 0.4301926736345382, + "grad_norm": 143.66151428222656, + "learning_rate": 7.098527826910138e-06, + "loss": 46.2533, + "step": 212960 + }, + { + "epoch": 0.43021287426722205, + "grad_norm": 370.6066589355469, + "learning_rate": 7.09821098857842e-06, + "loss": 22.9495, + "step": 212970 + }, + { + "epoch": 0.43023307489990587, + "grad_norm": 434.4966125488281, + "learning_rate": 7.097894140020276e-06, + "loss": 36.5388, + "step": 212980 + }, + { + "epoch": 0.4302532755325897, + "grad_norm": 313.8411865234375, + "learning_rate": 7.097577281237249e-06, + "loss": 17.4724, + "step": 212990 + }, + { + "epoch": 0.4302734761652735, + "grad_norm": 517.5624389648438, + "learning_rate": 7.0972604122308865e-06, + "loss": 19.9412, + "step": 213000 + }, + { + "epoch": 0.4302936767979573, + "grad_norm": 317.34381103515625, + "learning_rate": 7.096943533002732e-06, + "loss": 17.9368, + "step": 213010 + }, + { + "epoch": 0.43031387743064115, + "grad_norm": 239.87741088867188, + "learning_rate": 7.096626643554325e-06, + "loss": 20.8212, + "step": 213020 + }, + { + "epoch": 0.43033407806332497, + "grad_norm": 402.55206298828125, + "learning_rate": 7.0963097438872175e-06, + "loss": 41.616, + "step": 213030 + }, + { + "epoch": 0.4303542786960088, + "grad_norm": 178.4177703857422, + "learning_rate": 7.09599283400295e-06, + "loss": 35.7992, + "step": 213040 + }, + { + "epoch": 0.43037447932869255, + "grad_norm": 157.42201232910156, + "learning_rate": 7.095675913903067e-06, + "loss": 20.6485, + "step": 213050 + }, + { + "epoch": 0.43039467996137637, + "grad_norm": 179.64877319335938, + "learning_rate": 7.095358983589115e-06, + "loss": 10.9177, + "step": 213060 + }, + { + "epoch": 0.4304148805940602, + "grad_norm": 318.3880310058594, + "learning_rate": 7.095042043062635e-06, + "loss": 14.0704, + "step": 213070 + }, + { + "epoch": 0.430435081226744, + "grad_norm": 198.04489135742188, + "learning_rate": 7.094725092325177e-06, + "loss": 28.2684, + "step": 213080 + }, + { + "epoch": 0.43045528185942783, + "grad_norm": 610.4573974609375, + "learning_rate": 7.094408131378281e-06, + "loss": 21.1807, + "step": 213090 + }, + { + "epoch": 0.43047548249211165, + "grad_norm": 487.8459167480469, + "learning_rate": 7.094091160223493e-06, + "loss": 13.2935, + "step": 213100 + }, + { + "epoch": 0.43049568312479547, + "grad_norm": 259.0301818847656, + "learning_rate": 7.093774178862361e-06, + "loss": 13.0684, + "step": 213110 + }, + { + "epoch": 0.4305158837574793, + "grad_norm": 276.094970703125, + "learning_rate": 7.0934571872964265e-06, + "loss": 32.0942, + "step": 213120 + }, + { + "epoch": 0.4305360843901631, + "grad_norm": 206.2740020751953, + "learning_rate": 7.093140185527236e-06, + "loss": 19.2888, + "step": 213130 + }, + { + "epoch": 0.43055628502284693, + "grad_norm": 381.5347595214844, + "learning_rate": 7.092823173556333e-06, + "loss": 20.5193, + "step": 213140 + }, + { + "epoch": 0.43057648565553075, + "grad_norm": 174.14508056640625, + "learning_rate": 7.092506151385265e-06, + "loss": 16.89, + "step": 213150 + }, + { + "epoch": 0.43059668628821457, + "grad_norm": 175.1168975830078, + "learning_rate": 7.092189119015575e-06, + "loss": 14.0268, + "step": 213160 + }, + { + "epoch": 0.4306168869208984, + "grad_norm": 184.75022888183594, + "learning_rate": 7.09187207644881e-06, + "loss": 21.9329, + "step": 213170 + }, + { + "epoch": 0.43063708755358215, + "grad_norm": 201.6131591796875, + "learning_rate": 7.091555023686512e-06, + "loss": 32.6859, + "step": 213180 + }, + { + "epoch": 0.430657288186266, + "grad_norm": 438.4466247558594, + "learning_rate": 7.091237960730231e-06, + "loss": 21.0089, + "step": 213190 + }, + { + "epoch": 0.4306774888189498, + "grad_norm": 287.1460876464844, + "learning_rate": 7.090920887581507e-06, + "loss": 21.9322, + "step": 213200 + }, + { + "epoch": 0.4306976894516336, + "grad_norm": 270.5567321777344, + "learning_rate": 7.090603804241892e-06, + "loss": 14.682, + "step": 213210 + }, + { + "epoch": 0.43071789008431743, + "grad_norm": 1029.9796142578125, + "learning_rate": 7.090286710712925e-06, + "loss": 34.4511, + "step": 213220 + }, + { + "epoch": 0.43073809071700125, + "grad_norm": 413.9538879394531, + "learning_rate": 7.089969606996155e-06, + "loss": 9.93, + "step": 213230 + }, + { + "epoch": 0.4307582913496851, + "grad_norm": 948.9886474609375, + "learning_rate": 7.089652493093127e-06, + "loss": 36.8057, + "step": 213240 + }, + { + "epoch": 0.4307784919823689, + "grad_norm": 321.8413391113281, + "learning_rate": 7.0893353690053845e-06, + "loss": 16.7714, + "step": 213250 + }, + { + "epoch": 0.4307986926150527, + "grad_norm": 447.0562744140625, + "learning_rate": 7.089018234734476e-06, + "loss": 15.2927, + "step": 213260 + }, + { + "epoch": 0.43081889324773653, + "grad_norm": 176.10293579101562, + "learning_rate": 7.088701090281945e-06, + "loss": 11.8446, + "step": 213270 + }, + { + "epoch": 0.43083909388042035, + "grad_norm": 560.9588012695312, + "learning_rate": 7.088383935649339e-06, + "loss": 21.9758, + "step": 213280 + }, + { + "epoch": 0.4308592945131042, + "grad_norm": 1164.3033447265625, + "learning_rate": 7.088066770838204e-06, + "loss": 21.1519, + "step": 213290 + }, + { + "epoch": 0.43087949514578794, + "grad_norm": 766.023681640625, + "learning_rate": 7.087749595850084e-06, + "loss": 28.1324, + "step": 213300 + }, + { + "epoch": 0.43089969577847176, + "grad_norm": 567.0196533203125, + "learning_rate": 7.087432410686526e-06, + "loss": 23.1312, + "step": 213310 + }, + { + "epoch": 0.4309198964111556, + "grad_norm": 62.32025146484375, + "learning_rate": 7.087115215349074e-06, + "loss": 19.6072, + "step": 213320 + }, + { + "epoch": 0.4309400970438394, + "grad_norm": 195.3494873046875, + "learning_rate": 7.086798009839278e-06, + "loss": 11.3675, + "step": 213330 + }, + { + "epoch": 0.4309602976765232, + "grad_norm": 226.8108673095703, + "learning_rate": 7.086480794158679e-06, + "loss": 15.9095, + "step": 213340 + }, + { + "epoch": 0.43098049830920704, + "grad_norm": 442.96417236328125, + "learning_rate": 7.086163568308828e-06, + "loss": 12.637, + "step": 213350 + }, + { + "epoch": 0.43100069894189086, + "grad_norm": 369.91412353515625, + "learning_rate": 7.085846332291267e-06, + "loss": 32.5252, + "step": 213360 + }, + { + "epoch": 0.4310208995745747, + "grad_norm": 833.627685546875, + "learning_rate": 7.085529086107545e-06, + "loss": 27.3388, + "step": 213370 + }, + { + "epoch": 0.4310411002072585, + "grad_norm": 211.1463623046875, + "learning_rate": 7.085211829759207e-06, + "loss": 21.9671, + "step": 213380 + }, + { + "epoch": 0.4310613008399423, + "grad_norm": 556.7465209960938, + "learning_rate": 7.084894563247798e-06, + "loss": 17.2722, + "step": 213390 + }, + { + "epoch": 0.43108150147262614, + "grad_norm": 69.29907989501953, + "learning_rate": 7.0845772865748684e-06, + "loss": 10.2243, + "step": 213400 + }, + { + "epoch": 0.43110170210530996, + "grad_norm": 80.62250518798828, + "learning_rate": 7.0842599997419606e-06, + "loss": 33.0582, + "step": 213410 + }, + { + "epoch": 0.4311219027379938, + "grad_norm": 718.1709594726562, + "learning_rate": 7.083942702750622e-06, + "loss": 11.4429, + "step": 213420 + }, + { + "epoch": 0.43114210337067754, + "grad_norm": 159.78565979003906, + "learning_rate": 7.083625395602401e-06, + "loss": 21.0411, + "step": 213430 + }, + { + "epoch": 0.43116230400336136, + "grad_norm": 380.5772399902344, + "learning_rate": 7.08330807829884e-06, + "loss": 15.6969, + "step": 213440 + }, + { + "epoch": 0.4311825046360452, + "grad_norm": 401.239990234375, + "learning_rate": 7.0829907508414894e-06, + "loss": 25.3976, + "step": 213450 + }, + { + "epoch": 0.431202705268729, + "grad_norm": 280.7538146972656, + "learning_rate": 7.082673413231896e-06, + "loss": 20.0536, + "step": 213460 + }, + { + "epoch": 0.4312229059014128, + "grad_norm": 96.32061004638672, + "learning_rate": 7.082356065471603e-06, + "loss": 20.6452, + "step": 213470 + }, + { + "epoch": 0.43124310653409664, + "grad_norm": 28.504817962646484, + "learning_rate": 7.082038707562159e-06, + "loss": 15.115, + "step": 213480 + }, + { + "epoch": 0.43126330716678046, + "grad_norm": 421.4388122558594, + "learning_rate": 7.081721339505112e-06, + "loss": 24.9443, + "step": 213490 + }, + { + "epoch": 0.4312835077994643, + "grad_norm": 3.3708393573760986, + "learning_rate": 7.081403961302007e-06, + "loss": 14.0149, + "step": 213500 + }, + { + "epoch": 0.4313037084321481, + "grad_norm": 347.34014892578125, + "learning_rate": 7.081086572954392e-06, + "loss": 21.9458, + "step": 213510 + }, + { + "epoch": 0.4313239090648319, + "grad_norm": 647.5054321289062, + "learning_rate": 7.080769174463812e-06, + "loss": 30.6609, + "step": 213520 + }, + { + "epoch": 0.43134410969751574, + "grad_norm": 280.7157287597656, + "learning_rate": 7.080451765831817e-06, + "loss": 15.5381, + "step": 213530 + }, + { + "epoch": 0.43136431033019956, + "grad_norm": 252.02383422851562, + "learning_rate": 7.0801343470599525e-06, + "loss": 19.6196, + "step": 213540 + }, + { + "epoch": 0.4313845109628834, + "grad_norm": 768.6023559570312, + "learning_rate": 7.079816918149765e-06, + "loss": 25.3262, + "step": 213550 + }, + { + "epoch": 0.43140471159556715, + "grad_norm": 810.6908569335938, + "learning_rate": 7.079499479102802e-06, + "loss": 26.0697, + "step": 213560 + }, + { + "epoch": 0.43142491222825097, + "grad_norm": 624.0357055664062, + "learning_rate": 7.07918202992061e-06, + "loss": 10.6303, + "step": 213570 + }, + { + "epoch": 0.4314451128609348, + "grad_norm": 20.6352481842041, + "learning_rate": 7.0788645706047384e-06, + "loss": 10.3129, + "step": 213580 + }, + { + "epoch": 0.4314653134936186, + "grad_norm": 226.48268127441406, + "learning_rate": 7.078547101156732e-06, + "loss": 18.2315, + "step": 213590 + }, + { + "epoch": 0.4314855141263024, + "grad_norm": 416.14483642578125, + "learning_rate": 7.07822962157814e-06, + "loss": 30.8123, + "step": 213600 + }, + { + "epoch": 0.43150571475898625, + "grad_norm": 302.602294921875, + "learning_rate": 7.077912131870508e-06, + "loss": 19.0619, + "step": 213610 + }, + { + "epoch": 0.43152591539167007, + "grad_norm": 163.70896911621094, + "learning_rate": 7.077594632035385e-06, + "loss": 13.1436, + "step": 213620 + }, + { + "epoch": 0.4315461160243539, + "grad_norm": 254.6735076904297, + "learning_rate": 7.077277122074317e-06, + "loss": 21.3446, + "step": 213630 + }, + { + "epoch": 0.4315663166570377, + "grad_norm": 123.6911849975586, + "learning_rate": 7.076959601988853e-06, + "loss": 12.5261, + "step": 213640 + }, + { + "epoch": 0.4315865172897215, + "grad_norm": 49.97380065917969, + "learning_rate": 7.076642071780539e-06, + "loss": 16.6701, + "step": 213650 + }, + { + "epoch": 0.43160671792240535, + "grad_norm": 507.8262939453125, + "learning_rate": 7.076324531450924e-06, + "loss": 10.1495, + "step": 213660 + }, + { + "epoch": 0.43162691855508917, + "grad_norm": 296.35711669921875, + "learning_rate": 7.076006981001556e-06, + "loss": 24.1735, + "step": 213670 + }, + { + "epoch": 0.431647119187773, + "grad_norm": 341.41241455078125, + "learning_rate": 7.07568942043398e-06, + "loss": 38.0018, + "step": 213680 + }, + { + "epoch": 0.43166731982045675, + "grad_norm": 360.9331359863281, + "learning_rate": 7.075371849749747e-06, + "loss": 25.1891, + "step": 213690 + }, + { + "epoch": 0.43168752045314057, + "grad_norm": 497.16473388671875, + "learning_rate": 7.075054268950402e-06, + "loss": 21.5986, + "step": 213700 + }, + { + "epoch": 0.4317077210858244, + "grad_norm": 211.05453491210938, + "learning_rate": 7.074736678037495e-06, + "loss": 20.4163, + "step": 213710 + }, + { + "epoch": 0.4317279217185082, + "grad_norm": 682.28515625, + "learning_rate": 7.074419077012572e-06, + "loss": 11.8788, + "step": 213720 + }, + { + "epoch": 0.43174812235119203, + "grad_norm": 1382.1717529296875, + "learning_rate": 7.074101465877183e-06, + "loss": 30.4152, + "step": 213730 + }, + { + "epoch": 0.43176832298387585, + "grad_norm": 346.9513854980469, + "learning_rate": 7.073783844632875e-06, + "loss": 25.7935, + "step": 213740 + }, + { + "epoch": 0.43178852361655967, + "grad_norm": 723.1521606445312, + "learning_rate": 7.073466213281196e-06, + "loss": 14.6055, + "step": 213750 + }, + { + "epoch": 0.4318087242492435, + "grad_norm": 324.0091857910156, + "learning_rate": 7.073148571823694e-06, + "loss": 18.7438, + "step": 213760 + }, + { + "epoch": 0.4318289248819273, + "grad_norm": 470.0965576171875, + "learning_rate": 7.072830920261918e-06, + "loss": 23.5092, + "step": 213770 + }, + { + "epoch": 0.43184912551461113, + "grad_norm": 656.0071411132812, + "learning_rate": 7.072513258597416e-06, + "loss": 19.2885, + "step": 213780 + }, + { + "epoch": 0.43186932614729495, + "grad_norm": 203.9122314453125, + "learning_rate": 7.072195586831733e-06, + "loss": 15.3756, + "step": 213790 + }, + { + "epoch": 0.43188952677997877, + "grad_norm": 0.7824701070785522, + "learning_rate": 7.071877904966422e-06, + "loss": 14.3607, + "step": 213800 + }, + { + "epoch": 0.4319097274126626, + "grad_norm": 383.6852111816406, + "learning_rate": 7.071560213003028e-06, + "loss": 17.5401, + "step": 213810 + }, + { + "epoch": 0.43192992804534636, + "grad_norm": 121.68672180175781, + "learning_rate": 7.071242510943101e-06, + "loss": 24.731, + "step": 213820 + }, + { + "epoch": 0.4319501286780302, + "grad_norm": 113.0888442993164, + "learning_rate": 7.070924798788191e-06, + "loss": 12.6736, + "step": 213830 + }, + { + "epoch": 0.431970329310714, + "grad_norm": 411.6690368652344, + "learning_rate": 7.070607076539844e-06, + "loss": 15.2181, + "step": 213840 + }, + { + "epoch": 0.4319905299433978, + "grad_norm": 593.9186401367188, + "learning_rate": 7.070289344199609e-06, + "loss": 28.4739, + "step": 213850 + }, + { + "epoch": 0.43201073057608164, + "grad_norm": 522.96728515625, + "learning_rate": 7.069971601769035e-06, + "loss": 19.7131, + "step": 213860 + }, + { + "epoch": 0.43203093120876546, + "grad_norm": 49.98808288574219, + "learning_rate": 7.069653849249671e-06, + "loss": 15.4424, + "step": 213870 + }, + { + "epoch": 0.4320511318414493, + "grad_norm": 1380.20703125, + "learning_rate": 7.069336086643064e-06, + "loss": 16.537, + "step": 213880 + }, + { + "epoch": 0.4320713324741331, + "grad_norm": 413.2056579589844, + "learning_rate": 7.0690183139507625e-06, + "loss": 10.349, + "step": 213890 + }, + { + "epoch": 0.4320915331068169, + "grad_norm": 285.13067626953125, + "learning_rate": 7.0687005311743195e-06, + "loss": 31.0651, + "step": 213900 + }, + { + "epoch": 0.43211173373950074, + "grad_norm": 174.30828857421875, + "learning_rate": 7.068382738315281e-06, + "loss": 10.5683, + "step": 213910 + }, + { + "epoch": 0.43213193437218456, + "grad_norm": 1726.405517578125, + "learning_rate": 7.068064935375194e-06, + "loss": 23.6315, + "step": 213920 + }, + { + "epoch": 0.4321521350048684, + "grad_norm": 246.17745971679688, + "learning_rate": 7.06774712235561e-06, + "loss": 25.6117, + "step": 213930 + }, + { + "epoch": 0.43217233563755214, + "grad_norm": 181.28305053710938, + "learning_rate": 7.067429299258078e-06, + "loss": 10.6982, + "step": 213940 + }, + { + "epoch": 0.43219253627023596, + "grad_norm": 255.8702392578125, + "learning_rate": 7.0671114660841455e-06, + "loss": 10.8665, + "step": 213950 + }, + { + "epoch": 0.4322127369029198, + "grad_norm": 411.35986328125, + "learning_rate": 7.066793622835364e-06, + "loss": 27.6026, + "step": 213960 + }, + { + "epoch": 0.4322329375356036, + "grad_norm": 200.5729217529297, + "learning_rate": 7.066475769513278e-06, + "loss": 12.6426, + "step": 213970 + }, + { + "epoch": 0.4322531381682874, + "grad_norm": 305.8490905761719, + "learning_rate": 7.066157906119442e-06, + "loss": 16.995, + "step": 213980 + }, + { + "epoch": 0.43227333880097124, + "grad_norm": 178.36602783203125, + "learning_rate": 7.0658400326554025e-06, + "loss": 19.5498, + "step": 213990 + }, + { + "epoch": 0.43229353943365506, + "grad_norm": 426.994140625, + "learning_rate": 7.06552214912271e-06, + "loss": 24.935, + "step": 214000 + }, + { + "epoch": 0.4323137400663389, + "grad_norm": 309.235107421875, + "learning_rate": 7.065204255522913e-06, + "loss": 15.2317, + "step": 214010 + }, + { + "epoch": 0.4323339406990227, + "grad_norm": 325.0115966796875, + "learning_rate": 7.064886351857559e-06, + "loss": 18.5206, + "step": 214020 + }, + { + "epoch": 0.4323541413317065, + "grad_norm": 691.6421508789062, + "learning_rate": 7.064568438128201e-06, + "loss": 27.4744, + "step": 214030 + }, + { + "epoch": 0.43237434196439034, + "grad_norm": 431.8277587890625, + "learning_rate": 7.064250514336386e-06, + "loss": 18.0513, + "step": 214040 + }, + { + "epoch": 0.43239454259707416, + "grad_norm": 308.5205993652344, + "learning_rate": 7.063932580483665e-06, + "loss": 32.0444, + "step": 214050 + }, + { + "epoch": 0.432414743229758, + "grad_norm": 221.98826599121094, + "learning_rate": 7.063614636571586e-06, + "loss": 19.9235, + "step": 214060 + }, + { + "epoch": 0.43243494386244175, + "grad_norm": 1.0397236347198486, + "learning_rate": 7.063296682601701e-06, + "loss": 31.5466, + "step": 214070 + }, + { + "epoch": 0.43245514449512557, + "grad_norm": 97.03449249267578, + "learning_rate": 7.062978718575558e-06, + "loss": 25.4533, + "step": 214080 + }, + { + "epoch": 0.4324753451278094, + "grad_norm": 291.8382873535156, + "learning_rate": 7.062660744494706e-06, + "loss": 34.5286, + "step": 214090 + }, + { + "epoch": 0.4324955457604932, + "grad_norm": 122.28335571289062, + "learning_rate": 7.0623427603606965e-06, + "loss": 17.019, + "step": 214100 + }, + { + "epoch": 0.432515746393177, + "grad_norm": 444.2857971191406, + "learning_rate": 7.062024766175077e-06, + "loss": 16.7993, + "step": 214110 + }, + { + "epoch": 0.43253594702586085, + "grad_norm": 1357.626708984375, + "learning_rate": 7.061706761939402e-06, + "loss": 22.7957, + "step": 214120 + }, + { + "epoch": 0.43255614765854467, + "grad_norm": 509.154052734375, + "learning_rate": 7.0613887476552145e-06, + "loss": 16.4702, + "step": 214130 + }, + { + "epoch": 0.4325763482912285, + "grad_norm": 234.2130584716797, + "learning_rate": 7.061070723324071e-06, + "loss": 11.7002, + "step": 214140 + }, + { + "epoch": 0.4325965489239123, + "grad_norm": 493.207763671875, + "learning_rate": 7.060752688947519e-06, + "loss": 17.9663, + "step": 214150 + }, + { + "epoch": 0.4326167495565961, + "grad_norm": 579.5936279296875, + "learning_rate": 7.060434644527105e-06, + "loss": 24.1063, + "step": 214160 + }, + { + "epoch": 0.43263695018927995, + "grad_norm": 264.56549072265625, + "learning_rate": 7.060116590064385e-06, + "loss": 18.8271, + "step": 214170 + }, + { + "epoch": 0.43265715082196377, + "grad_norm": 257.78607177734375, + "learning_rate": 7.059798525560907e-06, + "loss": 20.6671, + "step": 214180 + }, + { + "epoch": 0.4326773514546476, + "grad_norm": 192.1004638671875, + "learning_rate": 7.05948045101822e-06, + "loss": 9.7443, + "step": 214190 + }, + { + "epoch": 0.43269755208733135, + "grad_norm": 178.0238494873047, + "learning_rate": 7.059162366437875e-06, + "loss": 18.8894, + "step": 214200 + }, + { + "epoch": 0.43271775272001517, + "grad_norm": 490.48089599609375, + "learning_rate": 7.058844271821423e-06, + "loss": 21.5204, + "step": 214210 + }, + { + "epoch": 0.432737953352699, + "grad_norm": 7.368875503540039, + "learning_rate": 7.058526167170413e-06, + "loss": 17.0344, + "step": 214220 + }, + { + "epoch": 0.4327581539853828, + "grad_norm": 458.9912414550781, + "learning_rate": 7.058208052486397e-06, + "loss": 14.8924, + "step": 214230 + }, + { + "epoch": 0.43277835461806663, + "grad_norm": 453.0133056640625, + "learning_rate": 7.057889927770922e-06, + "loss": 10.8933, + "step": 214240 + }, + { + "epoch": 0.43279855525075045, + "grad_norm": 331.8202819824219, + "learning_rate": 7.057571793025545e-06, + "loss": 11.8419, + "step": 214250 + }, + { + "epoch": 0.43281875588343427, + "grad_norm": 729.1732177734375, + "learning_rate": 7.057253648251811e-06, + "loss": 26.4337, + "step": 214260 + }, + { + "epoch": 0.4328389565161181, + "grad_norm": 293.8382568359375, + "learning_rate": 7.056935493451271e-06, + "loss": 11.2388, + "step": 214270 + }, + { + "epoch": 0.4328591571488019, + "grad_norm": 139.239501953125, + "learning_rate": 7.056617328625479e-06, + "loss": 28.4436, + "step": 214280 + }, + { + "epoch": 0.43287935778148573, + "grad_norm": 132.2022247314453, + "learning_rate": 7.056299153775981e-06, + "loss": 17.8826, + "step": 214290 + }, + { + "epoch": 0.43289955841416955, + "grad_norm": 458.7197265625, + "learning_rate": 7.0559809689043325e-06, + "loss": 17.1688, + "step": 214300 + }, + { + "epoch": 0.43291975904685337, + "grad_norm": 20.315086364746094, + "learning_rate": 7.055662774012081e-06, + "loss": 22.9634, + "step": 214310 + }, + { + "epoch": 0.4329399596795372, + "grad_norm": 779.4647827148438, + "learning_rate": 7.0553445691007785e-06, + "loss": 24.1353, + "step": 214320 + }, + { + "epoch": 0.43296016031222095, + "grad_norm": 702.3358764648438, + "learning_rate": 7.055026354171976e-06, + "loss": 22.2894, + "step": 214330 + }, + { + "epoch": 0.4329803609449048, + "grad_norm": 211.43646240234375, + "learning_rate": 7.054708129227225e-06, + "loss": 30.6347, + "step": 214340 + }, + { + "epoch": 0.4330005615775886, + "grad_norm": 431.9747619628906, + "learning_rate": 7.0543898942680755e-06, + "loss": 31.5493, + "step": 214350 + }, + { + "epoch": 0.4330207622102724, + "grad_norm": 178.3709716796875, + "learning_rate": 7.054071649296078e-06, + "loss": 20.351, + "step": 214360 + }, + { + "epoch": 0.43304096284295623, + "grad_norm": 476.716552734375, + "learning_rate": 7.053753394312786e-06, + "loss": 29.231, + "step": 214370 + }, + { + "epoch": 0.43306116347564005, + "grad_norm": 254.08023071289062, + "learning_rate": 7.053435129319746e-06, + "loss": 15.2788, + "step": 214380 + }, + { + "epoch": 0.4330813641083239, + "grad_norm": 185.78001403808594, + "learning_rate": 7.0531168543185155e-06, + "loss": 24.5804, + "step": 214390 + }, + { + "epoch": 0.4331015647410077, + "grad_norm": 118.17011260986328, + "learning_rate": 7.052798569310641e-06, + "loss": 13.5243, + "step": 214400 + }, + { + "epoch": 0.4331217653736915, + "grad_norm": 284.7645263671875, + "learning_rate": 7.052480274297675e-06, + "loss": 23.3654, + "step": 214410 + }, + { + "epoch": 0.43314196600637533, + "grad_norm": 198.52272033691406, + "learning_rate": 7.0521619692811705e-06, + "loss": 21.7649, + "step": 214420 + }, + { + "epoch": 0.43316216663905915, + "grad_norm": 321.36224365234375, + "learning_rate": 7.051843654262675e-06, + "loss": 11.9821, + "step": 214430 + }, + { + "epoch": 0.433182367271743, + "grad_norm": 102.5936050415039, + "learning_rate": 7.051525329243745e-06, + "loss": 11.121, + "step": 214440 + }, + { + "epoch": 0.4332025679044268, + "grad_norm": 235.19683837890625, + "learning_rate": 7.0512069942259275e-06, + "loss": 18.9256, + "step": 214450 + }, + { + "epoch": 0.43322276853711056, + "grad_norm": 1039.6514892578125, + "learning_rate": 7.050888649210778e-06, + "loss": 23.6549, + "step": 214460 + }, + { + "epoch": 0.4332429691697944, + "grad_norm": 251.51922607421875, + "learning_rate": 7.050570294199843e-06, + "loss": 27.8066, + "step": 214470 + }, + { + "epoch": 0.4332631698024782, + "grad_norm": 223.6874237060547, + "learning_rate": 7.050251929194679e-06, + "loss": 23.5122, + "step": 214480 + }, + { + "epoch": 0.433283370435162, + "grad_norm": 283.97772216796875, + "learning_rate": 7.049933554196835e-06, + "loss": 31.5258, + "step": 214490 + }, + { + "epoch": 0.43330357106784584, + "grad_norm": 320.8291320800781, + "learning_rate": 7.049615169207864e-06, + "loss": 11.4516, + "step": 214500 + }, + { + "epoch": 0.43332377170052966, + "grad_norm": 694.3058471679688, + "learning_rate": 7.049296774229317e-06, + "loss": 19.1876, + "step": 214510 + }, + { + "epoch": 0.4333439723332135, + "grad_norm": 541.5520629882812, + "learning_rate": 7.048978369262747e-06, + "loss": 11.8673, + "step": 214520 + }, + { + "epoch": 0.4333641729658973, + "grad_norm": 609.4490356445312, + "learning_rate": 7.048659954309704e-06, + "loss": 26.1409, + "step": 214530 + }, + { + "epoch": 0.4333843735985811, + "grad_norm": 574.5306396484375, + "learning_rate": 7.048341529371741e-06, + "loss": 18.618, + "step": 214540 + }, + { + "epoch": 0.43340457423126494, + "grad_norm": 80.67208862304688, + "learning_rate": 7.048023094450412e-06, + "loss": 26.0304, + "step": 214550 + }, + { + "epoch": 0.43342477486394876, + "grad_norm": 655.6376953125, + "learning_rate": 7.047704649547263e-06, + "loss": 31.5926, + "step": 214560 + }, + { + "epoch": 0.4334449754966326, + "grad_norm": 287.29541015625, + "learning_rate": 7.047386194663852e-06, + "loss": 38.228, + "step": 214570 + }, + { + "epoch": 0.43346517612931634, + "grad_norm": 146.16346740722656, + "learning_rate": 7.047067729801727e-06, + "loss": 26.6311, + "step": 214580 + }, + { + "epoch": 0.43348537676200016, + "grad_norm": 62.65180969238281, + "learning_rate": 7.046749254962445e-06, + "loss": 27.0513, + "step": 214590 + }, + { + "epoch": 0.433505577394684, + "grad_norm": 327.2711181640625, + "learning_rate": 7.0464307701475544e-06, + "loss": 22.0265, + "step": 214600 + }, + { + "epoch": 0.4335257780273678, + "grad_norm": 269.0719299316406, + "learning_rate": 7.046112275358607e-06, + "loss": 12.5422, + "step": 214610 + }, + { + "epoch": 0.4335459786600516, + "grad_norm": 302.6663513183594, + "learning_rate": 7.045793770597159e-06, + "loss": 14.6392, + "step": 214620 + }, + { + "epoch": 0.43356617929273544, + "grad_norm": 785.456298828125, + "learning_rate": 7.045475255864757e-06, + "loss": 15.9883, + "step": 214630 + }, + { + "epoch": 0.43358637992541926, + "grad_norm": 496.3552551269531, + "learning_rate": 7.0451567311629596e-06, + "loss": 22.0847, + "step": 214640 + }, + { + "epoch": 0.4336065805581031, + "grad_norm": 567.821533203125, + "learning_rate": 7.044838196493315e-06, + "loss": 16.1361, + "step": 214650 + }, + { + "epoch": 0.4336267811907869, + "grad_norm": 1357.6578369140625, + "learning_rate": 7.044519651857378e-06, + "loss": 27.535, + "step": 214660 + }, + { + "epoch": 0.4336469818234707, + "grad_norm": 354.6550598144531, + "learning_rate": 7.0442010972567e-06, + "loss": 41.6993, + "step": 214670 + }, + { + "epoch": 0.43366718245615454, + "grad_norm": 406.94598388671875, + "learning_rate": 7.043882532692834e-06, + "loss": 20.9676, + "step": 214680 + }, + { + "epoch": 0.43368738308883836, + "grad_norm": 806.630126953125, + "learning_rate": 7.043563958167331e-06, + "loss": 24.1425, + "step": 214690 + }, + { + "epoch": 0.4337075837215222, + "grad_norm": 221.11691284179688, + "learning_rate": 7.043245373681746e-06, + "loss": 29.4682, + "step": 214700 + }, + { + "epoch": 0.43372778435420595, + "grad_norm": 399.14971923828125, + "learning_rate": 7.042926779237631e-06, + "loss": 18.2937, + "step": 214710 + }, + { + "epoch": 0.43374798498688977, + "grad_norm": 538.7844848632812, + "learning_rate": 7.0426081748365386e-06, + "loss": 26.3239, + "step": 214720 + }, + { + "epoch": 0.4337681856195736, + "grad_norm": 319.4942932128906, + "learning_rate": 7.042289560480023e-06, + "loss": 8.8327, + "step": 214730 + }, + { + "epoch": 0.4337883862522574, + "grad_norm": 118.76900482177734, + "learning_rate": 7.041970936169632e-06, + "loss": 12.8371, + "step": 214740 + }, + { + "epoch": 0.4338085868849412, + "grad_norm": 332.2370910644531, + "learning_rate": 7.041652301906925e-06, + "loss": 12.5893, + "step": 214750 + }, + { + "epoch": 0.43382878751762505, + "grad_norm": 34.194908142089844, + "learning_rate": 7.041333657693452e-06, + "loss": 28.0618, + "step": 214760 + }, + { + "epoch": 0.43384898815030887, + "grad_norm": 770.1193237304688, + "learning_rate": 7.041015003530766e-06, + "loss": 22.473, + "step": 214770 + }, + { + "epoch": 0.4338691887829927, + "grad_norm": 317.9777526855469, + "learning_rate": 7.040696339420421e-06, + "loss": 16.2811, + "step": 214780 + }, + { + "epoch": 0.4338893894156765, + "grad_norm": 439.4205627441406, + "learning_rate": 7.0403776653639685e-06, + "loss": 62.0859, + "step": 214790 + }, + { + "epoch": 0.4339095900483603, + "grad_norm": 434.8787841796875, + "learning_rate": 7.0400589813629645e-06, + "loss": 29.797, + "step": 214800 + }, + { + "epoch": 0.43392979068104415, + "grad_norm": 132.6191864013672, + "learning_rate": 7.039740287418959e-06, + "loss": 10.511, + "step": 214810 + }, + { + "epoch": 0.43394999131372797, + "grad_norm": 264.0240478515625, + "learning_rate": 7.039421583533508e-06, + "loss": 28.529, + "step": 214820 + }, + { + "epoch": 0.4339701919464118, + "grad_norm": 341.5426940917969, + "learning_rate": 7.039102869708162e-06, + "loss": 25.6444, + "step": 214830 + }, + { + "epoch": 0.43399039257909555, + "grad_norm": 432.90618896484375, + "learning_rate": 7.038784145944477e-06, + "loss": 11.9233, + "step": 214840 + }, + { + "epoch": 0.43401059321177937, + "grad_norm": 631.9166870117188, + "learning_rate": 7.038465412244005e-06, + "loss": 17.7358, + "step": 214850 + }, + { + "epoch": 0.4340307938444632, + "grad_norm": 231.7225341796875, + "learning_rate": 7.0381466686083e-06, + "loss": 21.0268, + "step": 214860 + }, + { + "epoch": 0.434050994477147, + "grad_norm": 366.8880920410156, + "learning_rate": 7.037827915038915e-06, + "loss": 16.1726, + "step": 214870 + }, + { + "epoch": 0.43407119510983083, + "grad_norm": 320.87042236328125, + "learning_rate": 7.037509151537404e-06, + "loss": 16.9134, + "step": 214880 + }, + { + "epoch": 0.43409139574251465, + "grad_norm": 163.25094604492188, + "learning_rate": 7.0371903781053215e-06, + "loss": 34.1174, + "step": 214890 + }, + { + "epoch": 0.43411159637519847, + "grad_norm": 1105.0751953125, + "learning_rate": 7.036871594744218e-06, + "loss": 26.4776, + "step": 214900 + }, + { + "epoch": 0.4341317970078823, + "grad_norm": 586.7778930664062, + "learning_rate": 7.036552801455652e-06, + "loss": 16.3743, + "step": 214910 + }, + { + "epoch": 0.4341519976405661, + "grad_norm": 141.29327392578125, + "learning_rate": 7.0362339982411735e-06, + "loss": 19.7854, + "step": 214920 + }, + { + "epoch": 0.43417219827324993, + "grad_norm": 428.3002014160156, + "learning_rate": 7.035915185102337e-06, + "loss": 32.9638, + "step": 214930 + }, + { + "epoch": 0.43419239890593375, + "grad_norm": 609.0887451171875, + "learning_rate": 7.035596362040697e-06, + "loss": 25.8629, + "step": 214940 + }, + { + "epoch": 0.43421259953861757, + "grad_norm": 614.7772216796875, + "learning_rate": 7.035277529057807e-06, + "loss": 18.8787, + "step": 214950 + }, + { + "epoch": 0.4342328001713014, + "grad_norm": 168.0975799560547, + "learning_rate": 7.034958686155222e-06, + "loss": 13.9449, + "step": 214960 + }, + { + "epoch": 0.43425300080398516, + "grad_norm": 537.1471557617188, + "learning_rate": 7.0346398333344945e-06, + "loss": 19.1792, + "step": 214970 + }, + { + "epoch": 0.434273201436669, + "grad_norm": 922.3345336914062, + "learning_rate": 7.03432097059718e-06, + "loss": 21.4447, + "step": 214980 + }, + { + "epoch": 0.4342934020693528, + "grad_norm": 476.7845458984375, + "learning_rate": 7.03400209794483e-06, + "loss": 21.6334, + "step": 214990 + }, + { + "epoch": 0.4343136027020366, + "grad_norm": 291.10595703125, + "learning_rate": 7.033683215379002e-06, + "loss": 18.2016, + "step": 215000 + }, + { + "epoch": 0.43433380333472044, + "grad_norm": 514.0755004882812, + "learning_rate": 7.033364322901248e-06, + "loss": 23.4727, + "step": 215010 + }, + { + "epoch": 0.43435400396740426, + "grad_norm": 285.6300048828125, + "learning_rate": 7.0330454205131235e-06, + "loss": 22.901, + "step": 215020 + }, + { + "epoch": 0.4343742046000881, + "grad_norm": 875.8580932617188, + "learning_rate": 7.032726508216182e-06, + "loss": 21.1357, + "step": 215030 + }, + { + "epoch": 0.4343944052327719, + "grad_norm": 328.4040222167969, + "learning_rate": 7.032407586011978e-06, + "loss": 18.7843, + "step": 215040 + }, + { + "epoch": 0.4344146058654557, + "grad_norm": 1020.8289794921875, + "learning_rate": 7.0320886539020674e-06, + "loss": 22.0729, + "step": 215050 + }, + { + "epoch": 0.43443480649813954, + "grad_norm": 462.8836364746094, + "learning_rate": 7.031769711887999e-06, + "loss": 32.8359, + "step": 215060 + }, + { + "epoch": 0.43445500713082336, + "grad_norm": 214.752197265625, + "learning_rate": 7.031450759971335e-06, + "loss": 13.637, + "step": 215070 + }, + { + "epoch": 0.4344752077635072, + "grad_norm": 255.45242309570312, + "learning_rate": 7.031131798153625e-06, + "loss": 22.2565, + "step": 215080 + }, + { + "epoch": 0.434495408396191, + "grad_norm": 161.22865295410156, + "learning_rate": 7.030812826436426e-06, + "loss": 18.9131, + "step": 215090 + }, + { + "epoch": 0.43451560902887476, + "grad_norm": 231.98321533203125, + "learning_rate": 7.030493844821291e-06, + "loss": 18.8109, + "step": 215100 + }, + { + "epoch": 0.4345358096615586, + "grad_norm": 290.0347900390625, + "learning_rate": 7.0301748533097745e-06, + "loss": 19.043, + "step": 215110 + }, + { + "epoch": 0.4345560102942424, + "grad_norm": 517.3612060546875, + "learning_rate": 7.029855851903433e-06, + "loss": 28.0285, + "step": 215120 + }, + { + "epoch": 0.4345762109269262, + "grad_norm": 255.6366729736328, + "learning_rate": 7.029536840603821e-06, + "loss": 20.5103, + "step": 215130 + }, + { + "epoch": 0.43459641155961004, + "grad_norm": 461.9790954589844, + "learning_rate": 7.029217819412491e-06, + "loss": 18.1368, + "step": 215140 + }, + { + "epoch": 0.43461661219229386, + "grad_norm": 342.81768798828125, + "learning_rate": 7.028898788331e-06, + "loss": 15.0202, + "step": 215150 + }, + { + "epoch": 0.4346368128249777, + "grad_norm": 196.1928253173828, + "learning_rate": 7.028579747360903e-06, + "loss": 7.83, + "step": 215160 + }, + { + "epoch": 0.4346570134576615, + "grad_norm": 275.57867431640625, + "learning_rate": 7.028260696503754e-06, + "loss": 15.5337, + "step": 215170 + }, + { + "epoch": 0.4346772140903453, + "grad_norm": 387.40374755859375, + "learning_rate": 7.027941635761109e-06, + "loss": 11.6378, + "step": 215180 + }, + { + "epoch": 0.43469741472302914, + "grad_norm": 224.36241149902344, + "learning_rate": 7.02762256513452e-06, + "loss": 20.288, + "step": 215190 + }, + { + "epoch": 0.43471761535571296, + "grad_norm": 324.6615295410156, + "learning_rate": 7.027303484625547e-06, + "loss": 14.4402, + "step": 215200 + }, + { + "epoch": 0.4347378159883968, + "grad_norm": 1438.098876953125, + "learning_rate": 7.026984394235742e-06, + "loss": 22.0813, + "step": 215210 + }, + { + "epoch": 0.43475801662108055, + "grad_norm": 643.7030029296875, + "learning_rate": 7.0266652939666605e-06, + "loss": 15.3364, + "step": 215220 + }, + { + "epoch": 0.43477821725376437, + "grad_norm": 738.4295043945312, + "learning_rate": 7.026346183819859e-06, + "loss": 28.5164, + "step": 215230 + }, + { + "epoch": 0.4347984178864482, + "grad_norm": 210.78024291992188, + "learning_rate": 7.026027063796891e-06, + "loss": 22.1025, + "step": 215240 + }, + { + "epoch": 0.434818618519132, + "grad_norm": 348.28533935546875, + "learning_rate": 7.025707933899314e-06, + "loss": 15.153, + "step": 215250 + }, + { + "epoch": 0.4348388191518158, + "grad_norm": 639.33203125, + "learning_rate": 7.0253887941286816e-06, + "loss": 16.5251, + "step": 215260 + }, + { + "epoch": 0.43485901978449965, + "grad_norm": 374.78662109375, + "learning_rate": 7.025069644486549e-06, + "loss": 24.9781, + "step": 215270 + }, + { + "epoch": 0.43487922041718347, + "grad_norm": 248.15109252929688, + "learning_rate": 7.024750484974473e-06, + "loss": 16.7164, + "step": 215280 + }, + { + "epoch": 0.4348994210498673, + "grad_norm": 412.46148681640625, + "learning_rate": 7.02443131559401e-06, + "loss": 43.2204, + "step": 215290 + }, + { + "epoch": 0.4349196216825511, + "grad_norm": 514.1223754882812, + "learning_rate": 7.024112136346713e-06, + "loss": 28.6112, + "step": 215300 + }, + { + "epoch": 0.4349398223152349, + "grad_norm": 399.0684814453125, + "learning_rate": 7.023792947234139e-06, + "loss": 17.6464, + "step": 215310 + }, + { + "epoch": 0.43496002294791875, + "grad_norm": 405.3045654296875, + "learning_rate": 7.023473748257844e-06, + "loss": 13.2634, + "step": 215320 + }, + { + "epoch": 0.43498022358060257, + "grad_norm": 205.40782165527344, + "learning_rate": 7.023154539419383e-06, + "loss": 12.7478, + "step": 215330 + }, + { + "epoch": 0.4350004242132864, + "grad_norm": 507.6519470214844, + "learning_rate": 7.0228353207203136e-06, + "loss": 16.6266, + "step": 215340 + }, + { + "epoch": 0.43502062484597015, + "grad_norm": 561.2252197265625, + "learning_rate": 7.022516092162187e-06, + "loss": 18.6296, + "step": 215350 + }, + { + "epoch": 0.43504082547865397, + "grad_norm": 462.3230285644531, + "learning_rate": 7.022196853746565e-06, + "loss": 21.4073, + "step": 215360 + }, + { + "epoch": 0.4350610261113378, + "grad_norm": 945.115478515625, + "learning_rate": 7.0218776054750004e-06, + "loss": 15.2625, + "step": 215370 + }, + { + "epoch": 0.4350812267440216, + "grad_norm": 993.54443359375, + "learning_rate": 7.021558347349049e-06, + "loss": 16.8026, + "step": 215380 + }, + { + "epoch": 0.43510142737670543, + "grad_norm": 605.0009155273438, + "learning_rate": 7.0212390793702675e-06, + "loss": 18.7984, + "step": 215390 + }, + { + "epoch": 0.43512162800938925, + "grad_norm": 86.10658264160156, + "learning_rate": 7.0209198015402115e-06, + "loss": 17.231, + "step": 215400 + }, + { + "epoch": 0.43514182864207307, + "grad_norm": 92.33135223388672, + "learning_rate": 7.02060051386044e-06, + "loss": 20.2986, + "step": 215410 + }, + { + "epoch": 0.4351620292747569, + "grad_norm": 1052.357177734375, + "learning_rate": 7.020281216332503e-06, + "loss": 24.0735, + "step": 215420 + }, + { + "epoch": 0.4351822299074407, + "grad_norm": 511.3278503417969, + "learning_rate": 7.019961908957962e-06, + "loss": 33.2723, + "step": 215430 + }, + { + "epoch": 0.43520243054012453, + "grad_norm": 548.94921875, + "learning_rate": 7.019642591738372e-06, + "loss": 20.2783, + "step": 215440 + }, + { + "epoch": 0.43522263117280835, + "grad_norm": 207.590087890625, + "learning_rate": 7.01932326467529e-06, + "loss": 19.8643, + "step": 215450 + }, + { + "epoch": 0.43524283180549217, + "grad_norm": 499.21429443359375, + "learning_rate": 7.0190039277702685e-06, + "loss": 13.7157, + "step": 215460 + }, + { + "epoch": 0.435263032438176, + "grad_norm": 287.5592346191406, + "learning_rate": 7.018684581024868e-06, + "loss": 13.873, + "step": 215470 + }, + { + "epoch": 0.43528323307085975, + "grad_norm": 971.934814453125, + "learning_rate": 7.018365224440644e-06, + "loss": 36.0666, + "step": 215480 + }, + { + "epoch": 0.4353034337035436, + "grad_norm": 443.7062683105469, + "learning_rate": 7.018045858019154e-06, + "loss": 46.004, + "step": 215490 + }, + { + "epoch": 0.4353236343362274, + "grad_norm": 612.0048217773438, + "learning_rate": 7.0177264817619514e-06, + "loss": 28.7301, + "step": 215500 + }, + { + "epoch": 0.4353438349689112, + "grad_norm": 1242.58740234375, + "learning_rate": 7.017407095670594e-06, + "loss": 24.5226, + "step": 215510 + }, + { + "epoch": 0.43536403560159503, + "grad_norm": 31.768735885620117, + "learning_rate": 7.0170876997466406e-06, + "loss": 21.7206, + "step": 215520 + }, + { + "epoch": 0.43538423623427885, + "grad_norm": 445.52276611328125, + "learning_rate": 7.016768293991647e-06, + "loss": 14.7207, + "step": 215530 + }, + { + "epoch": 0.4354044368669627, + "grad_norm": 141.01197814941406, + "learning_rate": 7.016448878407167e-06, + "loss": 15.1699, + "step": 215540 + }, + { + "epoch": 0.4354246374996465, + "grad_norm": 396.162109375, + "learning_rate": 7.016129452994761e-06, + "loss": 18.822, + "step": 215550 + }, + { + "epoch": 0.4354448381323303, + "grad_norm": 728.1718139648438, + "learning_rate": 7.015810017755985e-06, + "loss": 16.6068, + "step": 215560 + }, + { + "epoch": 0.43546503876501413, + "grad_norm": 348.1802673339844, + "learning_rate": 7.015490572692396e-06, + "loss": 23.2858, + "step": 215570 + }, + { + "epoch": 0.43548523939769795, + "grad_norm": 184.91587829589844, + "learning_rate": 7.015171117805549e-06, + "loss": 14.2151, + "step": 215580 + }, + { + "epoch": 0.4355054400303818, + "grad_norm": 417.75201416015625, + "learning_rate": 7.014851653097003e-06, + "loss": 20.5845, + "step": 215590 + }, + { + "epoch": 0.4355256406630656, + "grad_norm": 115.35167694091797, + "learning_rate": 7.014532178568314e-06, + "loss": 14.5059, + "step": 215600 + }, + { + "epoch": 0.43554584129574936, + "grad_norm": 2028.61865234375, + "learning_rate": 7.014212694221041e-06, + "loss": 37.7481, + "step": 215610 + }, + { + "epoch": 0.4355660419284332, + "grad_norm": 1195.914306640625, + "learning_rate": 7.013893200056736e-06, + "loss": 32.3363, + "step": 215620 + }, + { + "epoch": 0.435586242561117, + "grad_norm": 465.3851623535156, + "learning_rate": 7.013573696076964e-06, + "loss": 12.0181, + "step": 215630 + }, + { + "epoch": 0.4356064431938008, + "grad_norm": 441.920166015625, + "learning_rate": 7.013254182283275e-06, + "loss": 12.5004, + "step": 215640 + }, + { + "epoch": 0.43562664382648464, + "grad_norm": 9.876458168029785, + "learning_rate": 7.01293465867723e-06, + "loss": 10.3304, + "step": 215650 + }, + { + "epoch": 0.43564684445916846, + "grad_norm": 187.7135009765625, + "learning_rate": 7.012615125260388e-06, + "loss": 10.8502, + "step": 215660 + }, + { + "epoch": 0.4356670450918523, + "grad_norm": 384.7646484375, + "learning_rate": 7.012295582034301e-06, + "loss": 29.0559, + "step": 215670 + }, + { + "epoch": 0.4356872457245361, + "grad_norm": 507.1191711425781, + "learning_rate": 7.011976029000531e-06, + "loss": 24.8476, + "step": 215680 + }, + { + "epoch": 0.4357074463572199, + "grad_norm": 37.014530181884766, + "learning_rate": 7.011656466160633e-06, + "loss": 15.289, + "step": 215690 + }, + { + "epoch": 0.43572764698990374, + "grad_norm": 1234.5159912109375, + "learning_rate": 7.011336893516167e-06, + "loss": 17.1629, + "step": 215700 + }, + { + "epoch": 0.43574784762258756, + "grad_norm": 187.715087890625, + "learning_rate": 7.011017311068686e-06, + "loss": 16.9608, + "step": 215710 + }, + { + "epoch": 0.4357680482552714, + "grad_norm": 1021.8418579101562, + "learning_rate": 7.010697718819753e-06, + "loss": 18.2547, + "step": 215720 + }, + { + "epoch": 0.43578824888795514, + "grad_norm": 48.73601531982422, + "learning_rate": 7.0103781167709224e-06, + "loss": 8.0319, + "step": 215730 + }, + { + "epoch": 0.43580844952063896, + "grad_norm": 460.2627868652344, + "learning_rate": 7.010058504923753e-06, + "loss": 18.5694, + "step": 215740 + }, + { + "epoch": 0.4358286501533228, + "grad_norm": 301.5263977050781, + "learning_rate": 7.009738883279802e-06, + "loss": 19.9788, + "step": 215750 + }, + { + "epoch": 0.4358488507860066, + "grad_norm": 260.9577331542969, + "learning_rate": 7.009419251840627e-06, + "loss": 23.0658, + "step": 215760 + }, + { + "epoch": 0.4358690514186904, + "grad_norm": 200.27781677246094, + "learning_rate": 7.0090996106077855e-06, + "loss": 16.2399, + "step": 215770 + }, + { + "epoch": 0.43588925205137424, + "grad_norm": 558.4069213867188, + "learning_rate": 7.008779959582838e-06, + "loss": 33.8017, + "step": 215780 + }, + { + "epoch": 0.43590945268405806, + "grad_norm": 409.48443603515625, + "learning_rate": 7.008460298767341e-06, + "loss": 17.3062, + "step": 215790 + }, + { + "epoch": 0.4359296533167419, + "grad_norm": 188.6319122314453, + "learning_rate": 7.008140628162851e-06, + "loss": 27.7887, + "step": 215800 + }, + { + "epoch": 0.4359498539494257, + "grad_norm": 521.9065551757812, + "learning_rate": 7.007820947770927e-06, + "loss": 13.1792, + "step": 215810 + }, + { + "epoch": 0.4359700545821095, + "grad_norm": 1069.1414794921875, + "learning_rate": 7.007501257593126e-06, + "loss": 28.8951, + "step": 215820 + }, + { + "epoch": 0.43599025521479334, + "grad_norm": 448.6017761230469, + "learning_rate": 7.0071815576310085e-06, + "loss": 22.1577, + "step": 215830 + }, + { + "epoch": 0.43601045584747716, + "grad_norm": 487.28936767578125, + "learning_rate": 7.006861847886133e-06, + "loss": 18.2779, + "step": 215840 + }, + { + "epoch": 0.436030656480161, + "grad_norm": 676.8814697265625, + "learning_rate": 7.006542128360054e-06, + "loss": 20.9958, + "step": 215850 + }, + { + "epoch": 0.43605085711284475, + "grad_norm": 53.80535125732422, + "learning_rate": 7.006222399054334e-06, + "loss": 14.9174, + "step": 215860 + }, + { + "epoch": 0.43607105774552857, + "grad_norm": 377.7115783691406, + "learning_rate": 7.005902659970528e-06, + "loss": 22.759, + "step": 215870 + }, + { + "epoch": 0.4360912583782124, + "grad_norm": 105.19438934326172, + "learning_rate": 7.0055829111101954e-06, + "loss": 22.7473, + "step": 215880 + }, + { + "epoch": 0.4361114590108962, + "grad_norm": 594.7811889648438, + "learning_rate": 7.005263152474896e-06, + "loss": 17.0541, + "step": 215890 + }, + { + "epoch": 0.43613165964358, + "grad_norm": 73.36730194091797, + "learning_rate": 7.0049433840661875e-06, + "loss": 13.0328, + "step": 215900 + }, + { + "epoch": 0.43615186027626385, + "grad_norm": 130.21876525878906, + "learning_rate": 7.004623605885628e-06, + "loss": 17.4, + "step": 215910 + }, + { + "epoch": 0.43617206090894767, + "grad_norm": 804.5039672851562, + "learning_rate": 7.004303817934775e-06, + "loss": 17.1268, + "step": 215920 + }, + { + "epoch": 0.4361922615416315, + "grad_norm": 992.1928100585938, + "learning_rate": 7.003984020215189e-06, + "loss": 16.6337, + "step": 215930 + }, + { + "epoch": 0.4362124621743153, + "grad_norm": 72.60612487792969, + "learning_rate": 7.003664212728427e-06, + "loss": 15.7889, + "step": 215940 + }, + { + "epoch": 0.4362326628069991, + "grad_norm": 757.6968383789062, + "learning_rate": 7.003344395476051e-06, + "loss": 18.0091, + "step": 215950 + }, + { + "epoch": 0.43625286343968295, + "grad_norm": 1845.9464111328125, + "learning_rate": 7.003024568459614e-06, + "loss": 37.3103, + "step": 215960 + }, + { + "epoch": 0.43627306407236677, + "grad_norm": 293.3792724609375, + "learning_rate": 7.002704731680682e-06, + "loss": 19.8846, + "step": 215970 + }, + { + "epoch": 0.4362932647050506, + "grad_norm": 773.8984985351562, + "learning_rate": 7.0023848851408076e-06, + "loss": 19.4425, + "step": 215980 + }, + { + "epoch": 0.43631346533773435, + "grad_norm": 584.5510864257812, + "learning_rate": 7.002065028841553e-06, + "loss": 13.9668, + "step": 215990 + }, + { + "epoch": 0.43633366597041817, + "grad_norm": 253.05511474609375, + "learning_rate": 7.0017451627844765e-06, + "loss": 14.3967, + "step": 216000 + }, + { + "epoch": 0.436353866603102, + "grad_norm": 199.73092651367188, + "learning_rate": 7.001425286971135e-06, + "loss": 15.918, + "step": 216010 + }, + { + "epoch": 0.4363740672357858, + "grad_norm": 307.1466369628906, + "learning_rate": 7.001105401403091e-06, + "loss": 14.1764, + "step": 216020 + }, + { + "epoch": 0.43639426786846963, + "grad_norm": 338.7720947265625, + "learning_rate": 7.000785506081902e-06, + "loss": 26.9864, + "step": 216030 + }, + { + "epoch": 0.43641446850115345, + "grad_norm": 511.8108825683594, + "learning_rate": 7.000465601009126e-06, + "loss": 15.8379, + "step": 216040 + }, + { + "epoch": 0.43643466913383727, + "grad_norm": 679.4248657226562, + "learning_rate": 7.0001456861863236e-06, + "loss": 23.5186, + "step": 216050 + }, + { + "epoch": 0.4364548697665211, + "grad_norm": 295.2950439453125, + "learning_rate": 6.999825761615055e-06, + "loss": 15.3735, + "step": 216060 + }, + { + "epoch": 0.4364750703992049, + "grad_norm": 46.097312927246094, + "learning_rate": 6.999505827296877e-06, + "loss": 18.0488, + "step": 216070 + }, + { + "epoch": 0.43649527103188873, + "grad_norm": 473.28607177734375, + "learning_rate": 6.999185883233351e-06, + "loss": 30.239, + "step": 216080 + }, + { + "epoch": 0.43651547166457255, + "grad_norm": 194.43380737304688, + "learning_rate": 6.998865929426035e-06, + "loss": 8.9002, + "step": 216090 + }, + { + "epoch": 0.43653567229725637, + "grad_norm": 463.8258361816406, + "learning_rate": 6.998545965876489e-06, + "loss": 16.6704, + "step": 216100 + }, + { + "epoch": 0.4365558729299402, + "grad_norm": 449.43377685546875, + "learning_rate": 6.998225992586273e-06, + "loss": 28.3768, + "step": 216110 + }, + { + "epoch": 0.43657607356262396, + "grad_norm": 538.9699096679688, + "learning_rate": 6.997906009556943e-06, + "loss": 19.3899, + "step": 216120 + }, + { + "epoch": 0.4365962741953078, + "grad_norm": 500.4964599609375, + "learning_rate": 6.997586016790065e-06, + "loss": 12.6349, + "step": 216130 + }, + { + "epoch": 0.4366164748279916, + "grad_norm": 287.2041320800781, + "learning_rate": 6.997266014287193e-06, + "loss": 20.6508, + "step": 216140 + }, + { + "epoch": 0.4366366754606754, + "grad_norm": 228.54196166992188, + "learning_rate": 6.996946002049889e-06, + "loss": 14.7713, + "step": 216150 + }, + { + "epoch": 0.43665687609335924, + "grad_norm": 634.4945068359375, + "learning_rate": 6.9966259800797124e-06, + "loss": 29.2191, + "step": 216160 + }, + { + "epoch": 0.43667707672604306, + "grad_norm": 224.03050231933594, + "learning_rate": 6.996305948378223e-06, + "loss": 18.9435, + "step": 216170 + }, + { + "epoch": 0.4366972773587269, + "grad_norm": 117.95034790039062, + "learning_rate": 6.995985906946982e-06, + "loss": 34.8892, + "step": 216180 + }, + { + "epoch": 0.4367174779914107, + "grad_norm": 339.0070495605469, + "learning_rate": 6.995665855787546e-06, + "loss": 13.897, + "step": 216190 + }, + { + "epoch": 0.4367376786240945, + "grad_norm": 437.2546691894531, + "learning_rate": 6.995345794901477e-06, + "loss": 13.7998, + "step": 216200 + }, + { + "epoch": 0.43675787925677834, + "grad_norm": 409.4122619628906, + "learning_rate": 6.995025724290334e-06, + "loss": 11.4524, + "step": 216210 + }, + { + "epoch": 0.43677807988946216, + "grad_norm": 552.8837890625, + "learning_rate": 6.994705643955678e-06, + "loss": 21.521, + "step": 216220 + }, + { + "epoch": 0.436798280522146, + "grad_norm": 403.9027099609375, + "learning_rate": 6.994385553899069e-06, + "loss": 20.7522, + "step": 216230 + }, + { + "epoch": 0.4368184811548298, + "grad_norm": 280.6409912109375, + "learning_rate": 6.9940654541220675e-06, + "loss": 26.3352, + "step": 216240 + }, + { + "epoch": 0.43683868178751356, + "grad_norm": 434.9540100097656, + "learning_rate": 6.993745344626232e-06, + "loss": 28.3306, + "step": 216250 + }, + { + "epoch": 0.4368588824201974, + "grad_norm": 170.8972625732422, + "learning_rate": 6.993425225413123e-06, + "loss": 20.7315, + "step": 216260 + }, + { + "epoch": 0.4368790830528812, + "grad_norm": 142.85736083984375, + "learning_rate": 6.993105096484303e-06, + "loss": 32.1516, + "step": 216270 + }, + { + "epoch": 0.436899283685565, + "grad_norm": 128.5509796142578, + "learning_rate": 6.992784957841328e-06, + "loss": 11.9072, + "step": 216280 + }, + { + "epoch": 0.43691948431824884, + "grad_norm": 327.773193359375, + "learning_rate": 6.9924648094857625e-06, + "loss": 23.4574, + "step": 216290 + }, + { + "epoch": 0.43693968495093266, + "grad_norm": 327.75848388671875, + "learning_rate": 6.992144651419163e-06, + "loss": 11.3785, + "step": 216300 + }, + { + "epoch": 0.4369598855836165, + "grad_norm": 161.98997497558594, + "learning_rate": 6.991824483643095e-06, + "loss": 20.4968, + "step": 216310 + }, + { + "epoch": 0.4369800862163003, + "grad_norm": 442.284423828125, + "learning_rate": 6.991504306159115e-06, + "loss": 10.4146, + "step": 216320 + }, + { + "epoch": 0.4370002868489841, + "grad_norm": 244.4092559814453, + "learning_rate": 6.991184118968783e-06, + "loss": 20.7462, + "step": 216330 + }, + { + "epoch": 0.43702048748166794, + "grad_norm": 0.0, + "learning_rate": 6.990863922073664e-06, + "loss": 15.3136, + "step": 216340 + }, + { + "epoch": 0.43704068811435176, + "grad_norm": 487.59368896484375, + "learning_rate": 6.990543715475314e-06, + "loss": 21.1463, + "step": 216350 + }, + { + "epoch": 0.4370608887470356, + "grad_norm": 315.670654296875, + "learning_rate": 6.9902234991752945e-06, + "loss": 14.6671, + "step": 216360 + }, + { + "epoch": 0.43708108937971935, + "grad_norm": 662.347900390625, + "learning_rate": 6.989903273175169e-06, + "loss": 68.7131, + "step": 216370 + }, + { + "epoch": 0.43710129001240317, + "grad_norm": 312.63812255859375, + "learning_rate": 6.989583037476495e-06, + "loss": 18.9875, + "step": 216380 + }, + { + "epoch": 0.437121490645087, + "grad_norm": 1367.5877685546875, + "learning_rate": 6.989262792080834e-06, + "loss": 22.7746, + "step": 216390 + }, + { + "epoch": 0.4371416912777708, + "grad_norm": 18.924123764038086, + "learning_rate": 6.98894253698975e-06, + "loss": 18.3445, + "step": 216400 + }, + { + "epoch": 0.4371618919104546, + "grad_norm": 238.9813690185547, + "learning_rate": 6.988622272204799e-06, + "loss": 23.9051, + "step": 216410 + }, + { + "epoch": 0.43718209254313845, + "grad_norm": 231.65159606933594, + "learning_rate": 6.9883019977275446e-06, + "loss": 20.7348, + "step": 216420 + }, + { + "epoch": 0.43720229317582227, + "grad_norm": 356.8558349609375, + "learning_rate": 6.987981713559548e-06, + "loss": 19.909, + "step": 216430 + }, + { + "epoch": 0.4372224938085061, + "grad_norm": 564.163330078125, + "learning_rate": 6.98766141970237e-06, + "loss": 12.3083, + "step": 216440 + }, + { + "epoch": 0.4372426944411899, + "grad_norm": 188.4165802001953, + "learning_rate": 6.987341116157571e-06, + "loss": 15.2723, + "step": 216450 + }, + { + "epoch": 0.4372628950738737, + "grad_norm": 297.6818542480469, + "learning_rate": 6.987020802926711e-06, + "loss": 20.2872, + "step": 216460 + }, + { + "epoch": 0.43728309570655755, + "grad_norm": 532.7806396484375, + "learning_rate": 6.986700480011353e-06, + "loss": 13.1729, + "step": 216470 + }, + { + "epoch": 0.43730329633924137, + "grad_norm": 70.1969985961914, + "learning_rate": 6.986380147413059e-06, + "loss": 18.947, + "step": 216480 + }, + { + "epoch": 0.4373234969719252, + "grad_norm": 120.35257720947266, + "learning_rate": 6.986059805133389e-06, + "loss": 13.2831, + "step": 216490 + }, + { + "epoch": 0.43734369760460895, + "grad_norm": 105.15730285644531, + "learning_rate": 6.985739453173903e-06, + "loss": 19.2324, + "step": 216500 + }, + { + "epoch": 0.43736389823729277, + "grad_norm": 7.287929534912109, + "learning_rate": 6.985419091536167e-06, + "loss": 50.7144, + "step": 216510 + }, + { + "epoch": 0.4373840988699766, + "grad_norm": 258.4720458984375, + "learning_rate": 6.985098720221736e-06, + "loss": 35.159, + "step": 216520 + }, + { + "epoch": 0.4374042995026604, + "grad_norm": 632.69677734375, + "learning_rate": 6.984778339232174e-06, + "loss": 18.8524, + "step": 216530 + }, + { + "epoch": 0.43742450013534423, + "grad_norm": 504.6958312988281, + "learning_rate": 6.984457948569045e-06, + "loss": 23.1049, + "step": 216540 + }, + { + "epoch": 0.43744470076802805, + "grad_norm": 799.5957641601562, + "learning_rate": 6.984137548233909e-06, + "loss": 19.1404, + "step": 216550 + }, + { + "epoch": 0.43746490140071187, + "grad_norm": 452.9576721191406, + "learning_rate": 6.983817138228327e-06, + "loss": 24.7578, + "step": 216560 + }, + { + "epoch": 0.4374851020333957, + "grad_norm": 709.0180053710938, + "learning_rate": 6.983496718553859e-06, + "loss": 25.9065, + "step": 216570 + }, + { + "epoch": 0.4375053026660795, + "grad_norm": 290.4211730957031, + "learning_rate": 6.98317628921207e-06, + "loss": 16.1081, + "step": 216580 + }, + { + "epoch": 0.43752550329876333, + "grad_norm": 191.25637817382812, + "learning_rate": 6.982855850204521e-06, + "loss": 13.7262, + "step": 216590 + }, + { + "epoch": 0.43754570393144715, + "grad_norm": 530.4448852539062, + "learning_rate": 6.9825354015327715e-06, + "loss": 41.3313, + "step": 216600 + }, + { + "epoch": 0.43756590456413097, + "grad_norm": 155.168212890625, + "learning_rate": 6.9822149431983865e-06, + "loss": 20.3135, + "step": 216610 + }, + { + "epoch": 0.4375861051968148, + "grad_norm": 409.7743835449219, + "learning_rate": 6.981894475202924e-06, + "loss": 19.4899, + "step": 216620 + }, + { + "epoch": 0.43760630582949855, + "grad_norm": 627.4774169921875, + "learning_rate": 6.981573997547951e-06, + "loss": 22.509, + "step": 216630 + }, + { + "epoch": 0.4376265064621824, + "grad_norm": 423.2906494140625, + "learning_rate": 6.981253510235025e-06, + "loss": 26.1528, + "step": 216640 + }, + { + "epoch": 0.4376467070948662, + "grad_norm": 391.2195129394531, + "learning_rate": 6.98093301326571e-06, + "loss": 41.3183, + "step": 216650 + }, + { + "epoch": 0.43766690772755, + "grad_norm": 507.5215759277344, + "learning_rate": 6.980612506641567e-06, + "loss": 14.901, + "step": 216660 + }, + { + "epoch": 0.43768710836023383, + "grad_norm": 401.25927734375, + "learning_rate": 6.9802919903641605e-06, + "loss": 11.358, + "step": 216670 + }, + { + "epoch": 0.43770730899291765, + "grad_norm": 378.6192626953125, + "learning_rate": 6.9799714644350504e-06, + "loss": 23.1479, + "step": 216680 + }, + { + "epoch": 0.4377275096256015, + "grad_norm": 244.60025024414062, + "learning_rate": 6.979650928855799e-06, + "loss": 18.0241, + "step": 216690 + }, + { + "epoch": 0.4377477102582853, + "grad_norm": 659.0491333007812, + "learning_rate": 6.979330383627969e-06, + "loss": 30.2683, + "step": 216700 + }, + { + "epoch": 0.4377679108909691, + "grad_norm": 385.2704772949219, + "learning_rate": 6.9790098287531225e-06, + "loss": 8.4695, + "step": 216710 + }, + { + "epoch": 0.43778811152365293, + "grad_norm": 0.0, + "learning_rate": 6.978689264232824e-06, + "loss": 28.6678, + "step": 216720 + }, + { + "epoch": 0.43780831215633675, + "grad_norm": 408.3829345703125, + "learning_rate": 6.978368690068632e-06, + "loss": 13.1668, + "step": 216730 + }, + { + "epoch": 0.4378285127890206, + "grad_norm": 107.53667449951172, + "learning_rate": 6.9780481062621115e-06, + "loss": 23.9724, + "step": 216740 + }, + { + "epoch": 0.4378487134217044, + "grad_norm": 707.2608032226562, + "learning_rate": 6.977727512814826e-06, + "loss": 31.421, + "step": 216750 + }, + { + "epoch": 0.43786891405438816, + "grad_norm": 634.0804443359375, + "learning_rate": 6.977406909728335e-06, + "loss": 22.6222, + "step": 216760 + }, + { + "epoch": 0.437889114687072, + "grad_norm": 9.855452537536621, + "learning_rate": 6.9770862970042015e-06, + "loss": 11.3352, + "step": 216770 + }, + { + "epoch": 0.4379093153197558, + "grad_norm": 424.2359924316406, + "learning_rate": 6.97676567464399e-06, + "loss": 17.6337, + "step": 216780 + }, + { + "epoch": 0.4379295159524396, + "grad_norm": 414.2570495605469, + "learning_rate": 6.976445042649265e-06, + "loss": 13.9813, + "step": 216790 + }, + { + "epoch": 0.43794971658512344, + "grad_norm": 434.4364318847656, + "learning_rate": 6.976124401021583e-06, + "loss": 54.4137, + "step": 216800 + }, + { + "epoch": 0.43796991721780726, + "grad_norm": 276.7896423339844, + "learning_rate": 6.975803749762512e-06, + "loss": 16.4736, + "step": 216810 + }, + { + "epoch": 0.4379901178504911, + "grad_norm": 531.3176879882812, + "learning_rate": 6.975483088873613e-06, + "loss": 29.0812, + "step": 216820 + }, + { + "epoch": 0.4380103184831749, + "grad_norm": 423.0602111816406, + "learning_rate": 6.975162418356448e-06, + "loss": 36.8104, + "step": 216830 + }, + { + "epoch": 0.4380305191158587, + "grad_norm": 3095.189453125, + "learning_rate": 6.974841738212581e-06, + "loss": 26.0828, + "step": 216840 + }, + { + "epoch": 0.43805071974854254, + "grad_norm": 116.5901870727539, + "learning_rate": 6.974521048443577e-06, + "loss": 17.5627, + "step": 216850 + }, + { + "epoch": 0.43807092038122636, + "grad_norm": 175.02687072753906, + "learning_rate": 6.974200349050996e-06, + "loss": 12.1302, + "step": 216860 + }, + { + "epoch": 0.4380911210139102, + "grad_norm": 485.75958251953125, + "learning_rate": 6.9738796400364005e-06, + "loss": 17.7153, + "step": 216870 + }, + { + "epoch": 0.438111321646594, + "grad_norm": 255.5109100341797, + "learning_rate": 6.973558921401356e-06, + "loss": 17.5615, + "step": 216880 + }, + { + "epoch": 0.43813152227927776, + "grad_norm": 117.12784576416016, + "learning_rate": 6.973238193147425e-06, + "loss": 20.0242, + "step": 216890 + }, + { + "epoch": 0.4381517229119616, + "grad_norm": 122.49821472167969, + "learning_rate": 6.97291745527617e-06, + "loss": 31.9352, + "step": 216900 + }, + { + "epoch": 0.4381719235446454, + "grad_norm": 884.3511962890625, + "learning_rate": 6.972596707789154e-06, + "loss": 21.7809, + "step": 216910 + }, + { + "epoch": 0.4381921241773292, + "grad_norm": 9.070160865783691, + "learning_rate": 6.972275950687942e-06, + "loss": 20.1085, + "step": 216920 + }, + { + "epoch": 0.43821232481001304, + "grad_norm": 374.5420227050781, + "learning_rate": 6.9719551839740964e-06, + "loss": 13.6864, + "step": 216930 + }, + { + "epoch": 0.43823252544269686, + "grad_norm": 780.5281982421875, + "learning_rate": 6.971634407649179e-06, + "loss": 25.9084, + "step": 216940 + }, + { + "epoch": 0.4382527260753807, + "grad_norm": 31.735071182250977, + "learning_rate": 6.971313621714756e-06, + "loss": 23.8748, + "step": 216950 + }, + { + "epoch": 0.4382729267080645, + "grad_norm": 383.90399169921875, + "learning_rate": 6.970992826172389e-06, + "loss": 44.4712, + "step": 216960 + }, + { + "epoch": 0.4382931273407483, + "grad_norm": 389.1132507324219, + "learning_rate": 6.970672021023641e-06, + "loss": 12.3641, + "step": 216970 + }, + { + "epoch": 0.43831332797343214, + "grad_norm": 405.3794860839844, + "learning_rate": 6.970351206270079e-06, + "loss": 30.3496, + "step": 216980 + }, + { + "epoch": 0.43833352860611596, + "grad_norm": 328.0166015625, + "learning_rate": 6.970030381913262e-06, + "loss": 20.1953, + "step": 216990 + }, + { + "epoch": 0.4383537292387998, + "grad_norm": 261.0073547363281, + "learning_rate": 6.9697095479547564e-06, + "loss": 17.1022, + "step": 217000 + }, + { + "epoch": 0.43837392987148355, + "grad_norm": 274.3747863769531, + "learning_rate": 6.969388704396126e-06, + "loss": 20.3654, + "step": 217010 + }, + { + "epoch": 0.43839413050416737, + "grad_norm": 481.7867126464844, + "learning_rate": 6.969067851238933e-06, + "loss": 14.2299, + "step": 217020 + }, + { + "epoch": 0.4384143311368512, + "grad_norm": 18.3186092376709, + "learning_rate": 6.968746988484742e-06, + "loss": 20.9748, + "step": 217030 + }, + { + "epoch": 0.438434531769535, + "grad_norm": 316.3203125, + "learning_rate": 6.968426116135118e-06, + "loss": 27.5381, + "step": 217040 + }, + { + "epoch": 0.4384547324022188, + "grad_norm": 562.3090209960938, + "learning_rate": 6.968105234191623e-06, + "loss": 31.5549, + "step": 217050 + }, + { + "epoch": 0.43847493303490265, + "grad_norm": 532.0624389648438, + "learning_rate": 6.9677843426558235e-06, + "loss": 23.4905, + "step": 217060 + }, + { + "epoch": 0.43849513366758647, + "grad_norm": 282.7160339355469, + "learning_rate": 6.967463441529278e-06, + "loss": 22.0337, + "step": 217070 + }, + { + "epoch": 0.4385153343002703, + "grad_norm": 542.9619140625, + "learning_rate": 6.967142530813558e-06, + "loss": 22.199, + "step": 217080 + }, + { + "epoch": 0.4385355349329541, + "grad_norm": 66.30181884765625, + "learning_rate": 6.966821610510222e-06, + "loss": 19.1166, + "step": 217090 + }, + { + "epoch": 0.4385557355656379, + "grad_norm": 444.0246887207031, + "learning_rate": 6.966500680620837e-06, + "loss": 31.164, + "step": 217100 + }, + { + "epoch": 0.43857593619832175, + "grad_norm": 257.4731140136719, + "learning_rate": 6.966179741146966e-06, + "loss": 15.1363, + "step": 217110 + }, + { + "epoch": 0.43859613683100557, + "grad_norm": 441.1191101074219, + "learning_rate": 6.965858792090174e-06, + "loss": 15.4256, + "step": 217120 + }, + { + "epoch": 0.4386163374636894, + "grad_norm": 88.1796646118164, + "learning_rate": 6.965537833452024e-06, + "loss": 30.0568, + "step": 217130 + }, + { + "epoch": 0.43863653809637315, + "grad_norm": 404.3753967285156, + "learning_rate": 6.9652168652340804e-06, + "loss": 23.0397, + "step": 217140 + }, + { + "epoch": 0.43865673872905697, + "grad_norm": 726.0257568359375, + "learning_rate": 6.9648958874379084e-06, + "loss": 17.76, + "step": 217150 + }, + { + "epoch": 0.4386769393617408, + "grad_norm": 195.17135620117188, + "learning_rate": 6.964574900065072e-06, + "loss": 16.7495, + "step": 217160 + }, + { + "epoch": 0.4386971399944246, + "grad_norm": 626.55029296875, + "learning_rate": 6.964253903117138e-06, + "loss": 14.5461, + "step": 217170 + }, + { + "epoch": 0.43871734062710843, + "grad_norm": 391.9192810058594, + "learning_rate": 6.963932896595665e-06, + "loss": 34.7739, + "step": 217180 + }, + { + "epoch": 0.43873754125979225, + "grad_norm": 13.609729766845703, + "learning_rate": 6.963611880502225e-06, + "loss": 102.5089, + "step": 217190 + }, + { + "epoch": 0.43875774189247607, + "grad_norm": 424.7698669433594, + "learning_rate": 6.963290854838376e-06, + "loss": 17.1916, + "step": 217200 + }, + { + "epoch": 0.4387779425251599, + "grad_norm": 598.4286499023438, + "learning_rate": 6.962969819605686e-06, + "loss": 16.3738, + "step": 217210 + }, + { + "epoch": 0.4387981431578437, + "grad_norm": 51.94447708129883, + "learning_rate": 6.96264877480572e-06, + "loss": 14.15, + "step": 217220 + }, + { + "epoch": 0.43881834379052753, + "grad_norm": 5.548483371734619, + "learning_rate": 6.96232772044004e-06, + "loss": 22.1524, + "step": 217230 + }, + { + "epoch": 0.43883854442321135, + "grad_norm": 883.7322998046875, + "learning_rate": 6.962006656510216e-06, + "loss": 24.5439, + "step": 217240 + }, + { + "epoch": 0.43885874505589517, + "grad_norm": 505.0666809082031, + "learning_rate": 6.961685583017808e-06, + "loss": 13.2514, + "step": 217250 + }, + { + "epoch": 0.438878945688579, + "grad_norm": 446.1742858886719, + "learning_rate": 6.961364499964383e-06, + "loss": 20.9679, + "step": 217260 + }, + { + "epoch": 0.43889914632126276, + "grad_norm": 307.6526184082031, + "learning_rate": 6.961043407351505e-06, + "loss": 29.1635, + "step": 217270 + }, + { + "epoch": 0.4389193469539466, + "grad_norm": 246.08106994628906, + "learning_rate": 6.960722305180737e-06, + "loss": 17.7958, + "step": 217280 + }, + { + "epoch": 0.4389395475866304, + "grad_norm": 671.756591796875, + "learning_rate": 6.9604011934536495e-06, + "loss": 26.1312, + "step": 217290 + }, + { + "epoch": 0.4389597482193142, + "grad_norm": 52.21709442138672, + "learning_rate": 6.960080072171802e-06, + "loss": 11.9219, + "step": 217300 + }, + { + "epoch": 0.43897994885199804, + "grad_norm": 382.4366455078125, + "learning_rate": 6.959758941336762e-06, + "loss": 26.336, + "step": 217310 + }, + { + "epoch": 0.43900014948468186, + "grad_norm": 495.8280334472656, + "learning_rate": 6.959437800950097e-06, + "loss": 25.7054, + "step": 217320 + }, + { + "epoch": 0.4390203501173657, + "grad_norm": 1479.1221923828125, + "learning_rate": 6.959116651013369e-06, + "loss": 46.8976, + "step": 217330 + }, + { + "epoch": 0.4390405507500495, + "grad_norm": 2274.858642578125, + "learning_rate": 6.958795491528142e-06, + "loss": 46.3309, + "step": 217340 + }, + { + "epoch": 0.4390607513827333, + "grad_norm": 584.8311157226562, + "learning_rate": 6.9584743224959846e-06, + "loss": 17.9077, + "step": 217350 + }, + { + "epoch": 0.43908095201541714, + "grad_norm": 705.3248901367188, + "learning_rate": 6.958153143918462e-06, + "loss": 14.4178, + "step": 217360 + }, + { + "epoch": 0.43910115264810096, + "grad_norm": 208.1329803466797, + "learning_rate": 6.957831955797137e-06, + "loss": 14.8227, + "step": 217370 + }, + { + "epoch": 0.4391213532807848, + "grad_norm": 682.8501586914062, + "learning_rate": 6.957510758133579e-06, + "loss": 27.366, + "step": 217380 + }, + { + "epoch": 0.4391415539134686, + "grad_norm": 204.91993713378906, + "learning_rate": 6.957189550929346e-06, + "loss": 21.1082, + "step": 217390 + }, + { + "epoch": 0.43916175454615236, + "grad_norm": 384.63140869140625, + "learning_rate": 6.9568683341860135e-06, + "loss": 15.1485, + "step": 217400 + }, + { + "epoch": 0.4391819551788362, + "grad_norm": 491.8144836425781, + "learning_rate": 6.9565471079051395e-06, + "loss": 19.7581, + "step": 217410 + }, + { + "epoch": 0.43920215581152, + "grad_norm": 814.9802856445312, + "learning_rate": 6.956225872088292e-06, + "loss": 11.183, + "step": 217420 + }, + { + "epoch": 0.4392223564442038, + "grad_norm": 157.53225708007812, + "learning_rate": 6.9559046267370375e-06, + "loss": 18.0584, + "step": 217430 + }, + { + "epoch": 0.43924255707688764, + "grad_norm": 151.10498046875, + "learning_rate": 6.955583371852942e-06, + "loss": 17.8701, + "step": 217440 + }, + { + "epoch": 0.43926275770957146, + "grad_norm": 848.97607421875, + "learning_rate": 6.95526210743757e-06, + "loss": 26.1495, + "step": 217450 + }, + { + "epoch": 0.4392829583422553, + "grad_norm": 218.88198852539062, + "learning_rate": 6.954940833492487e-06, + "loss": 12.4294, + "step": 217460 + }, + { + "epoch": 0.4393031589749391, + "grad_norm": 418.4537048339844, + "learning_rate": 6.954619550019259e-06, + "loss": 22.6387, + "step": 217470 + }, + { + "epoch": 0.4393233596076229, + "grad_norm": 504.738525390625, + "learning_rate": 6.954298257019454e-06, + "loss": 15.7061, + "step": 217480 + }, + { + "epoch": 0.43934356024030674, + "grad_norm": 1237.7802734375, + "learning_rate": 6.953976954494636e-06, + "loss": 24.4484, + "step": 217490 + }, + { + "epoch": 0.43936376087299056, + "grad_norm": 202.9380645751953, + "learning_rate": 6.953655642446368e-06, + "loss": 22.1801, + "step": 217500 + }, + { + "epoch": 0.4393839615056744, + "grad_norm": 251.24620056152344, + "learning_rate": 6.953334320876224e-06, + "loss": 16.4192, + "step": 217510 + }, + { + "epoch": 0.4394041621383582, + "grad_norm": 571.8933715820312, + "learning_rate": 6.9530129897857626e-06, + "loss": 17.6492, + "step": 217520 + }, + { + "epoch": 0.43942436277104197, + "grad_norm": 291.2666015625, + "learning_rate": 6.952691649176554e-06, + "loss": 39.2445, + "step": 217530 + }, + { + "epoch": 0.4394445634037258, + "grad_norm": 227.22714233398438, + "learning_rate": 6.952370299050163e-06, + "loss": 15.9309, + "step": 217540 + }, + { + "epoch": 0.4394647640364096, + "grad_norm": 772.4575805664062, + "learning_rate": 6.952048939408156e-06, + "loss": 27.742, + "step": 217550 + }, + { + "epoch": 0.4394849646690934, + "grad_norm": 367.7803649902344, + "learning_rate": 6.9517275702521e-06, + "loss": 13.9836, + "step": 217560 + }, + { + "epoch": 0.43950516530177725, + "grad_norm": 1871.306396484375, + "learning_rate": 6.9514061915835584e-06, + "loss": 47.7978, + "step": 217570 + }, + { + "epoch": 0.43952536593446107, + "grad_norm": 611.9116821289062, + "learning_rate": 6.9510848034041e-06, + "loss": 44.3175, + "step": 217580 + }, + { + "epoch": 0.4395455665671449, + "grad_norm": 442.2686462402344, + "learning_rate": 6.950763405715292e-06, + "loss": 25.4758, + "step": 217590 + }, + { + "epoch": 0.4395657671998287, + "grad_norm": 166.6293182373047, + "learning_rate": 6.950441998518699e-06, + "loss": 21.7418, + "step": 217600 + }, + { + "epoch": 0.4395859678325125, + "grad_norm": 649.2433471679688, + "learning_rate": 6.950120581815889e-06, + "loss": 46.6833, + "step": 217610 + }, + { + "epoch": 0.43960616846519635, + "grad_norm": 182.2321014404297, + "learning_rate": 6.9497991556084275e-06, + "loss": 15.3815, + "step": 217620 + }, + { + "epoch": 0.43962636909788017, + "grad_norm": 702.5481567382812, + "learning_rate": 6.9494777198978815e-06, + "loss": 16.7604, + "step": 217630 + }, + { + "epoch": 0.439646569730564, + "grad_norm": 124.78889465332031, + "learning_rate": 6.949156274685818e-06, + "loss": 15.8259, + "step": 217640 + }, + { + "epoch": 0.43966677036324775, + "grad_norm": 327.0403137207031, + "learning_rate": 6.948834819973803e-06, + "loss": 19.646, + "step": 217650 + }, + { + "epoch": 0.43968697099593157, + "grad_norm": 238.36106872558594, + "learning_rate": 6.948513355763402e-06, + "loss": 24.9178, + "step": 217660 + }, + { + "epoch": 0.4397071716286154, + "grad_norm": 343.8514404296875, + "learning_rate": 6.948191882056185e-06, + "loss": 28.2836, + "step": 217670 + }, + { + "epoch": 0.4397273722612992, + "grad_norm": 1091.2371826171875, + "learning_rate": 6.947870398853716e-06, + "loss": 37.9444, + "step": 217680 + }, + { + "epoch": 0.43974757289398303, + "grad_norm": 5.781420707702637, + "learning_rate": 6.947548906157563e-06, + "loss": 18.4193, + "step": 217690 + }, + { + "epoch": 0.43976777352666685, + "grad_norm": 294.8966064453125, + "learning_rate": 6.947227403969293e-06, + "loss": 17.5239, + "step": 217700 + }, + { + "epoch": 0.43978797415935067, + "grad_norm": 380.2729187011719, + "learning_rate": 6.946905892290473e-06, + "loss": 30.6968, + "step": 217710 + }, + { + "epoch": 0.4398081747920345, + "grad_norm": 121.35004425048828, + "learning_rate": 6.946584371122671e-06, + "loss": 8.2712, + "step": 217720 + }, + { + "epoch": 0.4398283754247183, + "grad_norm": 403.33270263671875, + "learning_rate": 6.94626284046745e-06, + "loss": 17.1258, + "step": 217730 + }, + { + "epoch": 0.43984857605740213, + "grad_norm": 856.6432495117188, + "learning_rate": 6.945941300326382e-06, + "loss": 26.1067, + "step": 217740 + }, + { + "epoch": 0.43986877669008595, + "grad_norm": 500.9569091796875, + "learning_rate": 6.9456197507010315e-06, + "loss": 26.9629, + "step": 217750 + }, + { + "epoch": 0.43988897732276977, + "grad_norm": 193.00595092773438, + "learning_rate": 6.945298191592967e-06, + "loss": 23.0274, + "step": 217760 + }, + { + "epoch": 0.4399091779554536, + "grad_norm": 256.3966369628906, + "learning_rate": 6.944976623003754e-06, + "loss": 33.6679, + "step": 217770 + }, + { + "epoch": 0.43992937858813735, + "grad_norm": 1096.0462646484375, + "learning_rate": 6.944655044934962e-06, + "loss": 18.0047, + "step": 217780 + }, + { + "epoch": 0.4399495792208212, + "grad_norm": 347.51373291015625, + "learning_rate": 6.944333457388156e-06, + "loss": 21.4259, + "step": 217790 + }, + { + "epoch": 0.439969779853505, + "grad_norm": 256.20904541015625, + "learning_rate": 6.944011860364905e-06, + "loss": 30.3539, + "step": 217800 + }, + { + "epoch": 0.4399899804861888, + "grad_norm": 359.13592529296875, + "learning_rate": 6.9436902538667775e-06, + "loss": 20.0677, + "step": 217810 + }, + { + "epoch": 0.44001018111887263, + "grad_norm": 349.65802001953125, + "learning_rate": 6.943368637895338e-06, + "loss": 27.8022, + "step": 217820 + }, + { + "epoch": 0.44003038175155645, + "grad_norm": 624.5690307617188, + "learning_rate": 6.943047012452156e-06, + "loss": 12.4032, + "step": 217830 + }, + { + "epoch": 0.4400505823842403, + "grad_norm": 460.8302917480469, + "learning_rate": 6.942725377538797e-06, + "loss": 28.531, + "step": 217840 + }, + { + "epoch": 0.4400707830169241, + "grad_norm": 397.0054016113281, + "learning_rate": 6.942403733156832e-06, + "loss": 17.3211, + "step": 217850 + }, + { + "epoch": 0.4400909836496079, + "grad_norm": 149.7487335205078, + "learning_rate": 6.942082079307826e-06, + "loss": 20.93, + "step": 217860 + }, + { + "epoch": 0.44011118428229173, + "grad_norm": 281.08526611328125, + "learning_rate": 6.941760415993346e-06, + "loss": 19.6336, + "step": 217870 + }, + { + "epoch": 0.44013138491497555, + "grad_norm": 406.5328674316406, + "learning_rate": 6.941438743214963e-06, + "loss": 16.7679, + "step": 217880 + }, + { + "epoch": 0.4401515855476594, + "grad_norm": 326.6893005371094, + "learning_rate": 6.941117060974243e-06, + "loss": 23.4696, + "step": 217890 + }, + { + "epoch": 0.4401717861803432, + "grad_norm": 362.5688781738281, + "learning_rate": 6.940795369272754e-06, + "loss": 25.0945, + "step": 217900 + }, + { + "epoch": 0.44019198681302696, + "grad_norm": 204.62315368652344, + "learning_rate": 6.940473668112063e-06, + "loss": 25.728, + "step": 217910 + }, + { + "epoch": 0.4402121874457108, + "grad_norm": 332.57855224609375, + "learning_rate": 6.940151957493739e-06, + "loss": 21.0858, + "step": 217920 + }, + { + "epoch": 0.4402323880783946, + "grad_norm": 79.63465881347656, + "learning_rate": 6.939830237419349e-06, + "loss": 15.6943, + "step": 217930 + }, + { + "epoch": 0.4402525887110784, + "grad_norm": 238.8327178955078, + "learning_rate": 6.939508507890464e-06, + "loss": 20.8919, + "step": 217940 + }, + { + "epoch": 0.44027278934376224, + "grad_norm": 681.3981323242188, + "learning_rate": 6.939186768908647e-06, + "loss": 39.2945, + "step": 217950 + }, + { + "epoch": 0.44029298997644606, + "grad_norm": 474.71160888671875, + "learning_rate": 6.938865020475471e-06, + "loss": 34.5285, + "step": 217960 + }, + { + "epoch": 0.4403131906091299, + "grad_norm": 596.3753051757812, + "learning_rate": 6.9385432625925006e-06, + "loss": 20.1723, + "step": 217970 + }, + { + "epoch": 0.4403333912418137, + "grad_norm": 409.4012145996094, + "learning_rate": 6.938221495261306e-06, + "loss": 19.905, + "step": 217980 + }, + { + "epoch": 0.4403535918744975, + "grad_norm": 393.483154296875, + "learning_rate": 6.937899718483456e-06, + "loss": 17.1246, + "step": 217990 + }, + { + "epoch": 0.44037379250718134, + "grad_norm": 278.825927734375, + "learning_rate": 6.9375779322605154e-06, + "loss": 22.2015, + "step": 218000 + }, + { + "epoch": 0.44039399313986516, + "grad_norm": 643.759765625, + "learning_rate": 6.937256136594057e-06, + "loss": 20.2364, + "step": 218010 + }, + { + "epoch": 0.440414193772549, + "grad_norm": 357.60296630859375, + "learning_rate": 6.936934331485646e-06, + "loss": 18.5588, + "step": 218020 + }, + { + "epoch": 0.4404343944052328, + "grad_norm": 159.6121826171875, + "learning_rate": 6.936612516936852e-06, + "loss": 24.4171, + "step": 218030 + }, + { + "epoch": 0.44045459503791656, + "grad_norm": 198.94183349609375, + "learning_rate": 6.936290692949243e-06, + "loss": 16.4759, + "step": 218040 + }, + { + "epoch": 0.4404747956706004, + "grad_norm": 256.5325622558594, + "learning_rate": 6.935968859524389e-06, + "loss": 24.06, + "step": 218050 + }, + { + "epoch": 0.4404949963032842, + "grad_norm": 238.43252563476562, + "learning_rate": 6.935647016663859e-06, + "loss": 34.2544, + "step": 218060 + }, + { + "epoch": 0.440515196935968, + "grad_norm": 321.5218811035156, + "learning_rate": 6.935325164369219e-06, + "loss": 32.8428, + "step": 218070 + }, + { + "epoch": 0.44053539756865184, + "grad_norm": 54.25544357299805, + "learning_rate": 6.935003302642038e-06, + "loss": 19.1141, + "step": 218080 + }, + { + "epoch": 0.44055559820133566, + "grad_norm": 422.23712158203125, + "learning_rate": 6.934681431483886e-06, + "loss": 23.3319, + "step": 218090 + }, + { + "epoch": 0.4405757988340195, + "grad_norm": 150.90109252929688, + "learning_rate": 6.934359550896332e-06, + "loss": 21.2584, + "step": 218100 + }, + { + "epoch": 0.4405959994667033, + "grad_norm": 514.95751953125, + "learning_rate": 6.934037660880942e-06, + "loss": 21.9861, + "step": 218110 + }, + { + "epoch": 0.4406162000993871, + "grad_norm": 735.240966796875, + "learning_rate": 6.93371576143929e-06, + "loss": 26.3322, + "step": 218120 + }, + { + "epoch": 0.44063640073207094, + "grad_norm": 220.24258422851562, + "learning_rate": 6.9333938525729396e-06, + "loss": 17.7656, + "step": 218130 + }, + { + "epoch": 0.44065660136475476, + "grad_norm": 145.22857666015625, + "learning_rate": 6.9330719342834644e-06, + "loss": 21.6354, + "step": 218140 + }, + { + "epoch": 0.4406768019974386, + "grad_norm": 143.77865600585938, + "learning_rate": 6.932750006572428e-06, + "loss": 26.5324, + "step": 218150 + }, + { + "epoch": 0.4406970026301224, + "grad_norm": 286.95574951171875, + "learning_rate": 6.932428069441405e-06, + "loss": 12.5363, + "step": 218160 + }, + { + "epoch": 0.44071720326280617, + "grad_norm": 207.35633850097656, + "learning_rate": 6.932106122891961e-06, + "loss": 29.8767, + "step": 218170 + }, + { + "epoch": 0.44073740389549, + "grad_norm": 323.6469421386719, + "learning_rate": 6.931784166925667e-06, + "loss": 23.2252, + "step": 218180 + }, + { + "epoch": 0.4407576045281738, + "grad_norm": 99.18583679199219, + "learning_rate": 6.93146220154409e-06, + "loss": 12.3409, + "step": 218190 + }, + { + "epoch": 0.4407778051608576, + "grad_norm": 283.7394104003906, + "learning_rate": 6.9311402267488004e-06, + "loss": 18.442, + "step": 218200 + }, + { + "epoch": 0.44079800579354145, + "grad_norm": 216.76153564453125, + "learning_rate": 6.930818242541368e-06, + "loss": 18.5097, + "step": 218210 + }, + { + "epoch": 0.44081820642622527, + "grad_norm": 311.0485534667969, + "learning_rate": 6.9304962489233615e-06, + "loss": 12.1743, + "step": 218220 + }, + { + "epoch": 0.4408384070589091, + "grad_norm": 643.7454833984375, + "learning_rate": 6.930174245896352e-06, + "loss": 23.9982, + "step": 218230 + }, + { + "epoch": 0.4408586076915929, + "grad_norm": 1482.6033935546875, + "learning_rate": 6.929852233461906e-06, + "loss": 52.5193, + "step": 218240 + }, + { + "epoch": 0.4408788083242767, + "grad_norm": 505.75299072265625, + "learning_rate": 6.929530211621593e-06, + "loss": 29.9107, + "step": 218250 + }, + { + "epoch": 0.44089900895696055, + "grad_norm": 690.9946899414062, + "learning_rate": 6.9292081803769865e-06, + "loss": 25.7389, + "step": 218260 + }, + { + "epoch": 0.44091920958964437, + "grad_norm": 690.6004638671875, + "learning_rate": 6.928886139729652e-06, + "loss": 20.0891, + "step": 218270 + }, + { + "epoch": 0.4409394102223282, + "grad_norm": 255.3621826171875, + "learning_rate": 6.92856408968116e-06, + "loss": 19.1689, + "step": 218280 + }, + { + "epoch": 0.44095961085501195, + "grad_norm": 188.1383514404297, + "learning_rate": 6.92824203023308e-06, + "loss": 11.7769, + "step": 218290 + }, + { + "epoch": 0.44097981148769577, + "grad_norm": 297.4443359375, + "learning_rate": 6.927919961386984e-06, + "loss": 23.7473, + "step": 218300 + }, + { + "epoch": 0.4410000121203796, + "grad_norm": 460.88055419921875, + "learning_rate": 6.927597883144439e-06, + "loss": 15.4438, + "step": 218310 + }, + { + "epoch": 0.4410202127530634, + "grad_norm": 400.71966552734375, + "learning_rate": 6.9272757955070146e-06, + "loss": 27.2158, + "step": 218320 + }, + { + "epoch": 0.44104041338574723, + "grad_norm": 285.7626953125, + "learning_rate": 6.926953698476284e-06, + "loss": 15.3149, + "step": 218330 + }, + { + "epoch": 0.44106061401843105, + "grad_norm": 693.8849487304688, + "learning_rate": 6.926631592053812e-06, + "loss": 25.8479, + "step": 218340 + }, + { + "epoch": 0.44108081465111487, + "grad_norm": 493.76361083984375, + "learning_rate": 6.926309476241174e-06, + "loss": 10.8633, + "step": 218350 + }, + { + "epoch": 0.4411010152837987, + "grad_norm": 228.04771423339844, + "learning_rate": 6.925987351039936e-06, + "loss": 21.5013, + "step": 218360 + }, + { + "epoch": 0.4411212159164825, + "grad_norm": 432.6797790527344, + "learning_rate": 6.925665216451669e-06, + "loss": 12.5181, + "step": 218370 + }, + { + "epoch": 0.44114141654916633, + "grad_norm": 462.1549377441406, + "learning_rate": 6.925343072477943e-06, + "loss": 18.1943, + "step": 218380 + }, + { + "epoch": 0.44116161718185015, + "grad_norm": 445.35955810546875, + "learning_rate": 6.925020919120331e-06, + "loss": 14.6549, + "step": 218390 + }, + { + "epoch": 0.44118181781453397, + "grad_norm": 518.8099365234375, + "learning_rate": 6.924698756380398e-06, + "loss": 23.1013, + "step": 218400 + }, + { + "epoch": 0.4412020184472178, + "grad_norm": 496.41107177734375, + "learning_rate": 6.924376584259718e-06, + "loss": 13.32, + "step": 218410 + }, + { + "epoch": 0.44122221907990156, + "grad_norm": 1270.318359375, + "learning_rate": 6.924054402759858e-06, + "loss": 24.7812, + "step": 218420 + }, + { + "epoch": 0.4412424197125854, + "grad_norm": 10.630510330200195, + "learning_rate": 6.923732211882391e-06, + "loss": 19.0398, + "step": 218430 + }, + { + "epoch": 0.4412626203452692, + "grad_norm": 528.0415649414062, + "learning_rate": 6.9234100116288886e-06, + "loss": 19.2299, + "step": 218440 + }, + { + "epoch": 0.441282820977953, + "grad_norm": 986.3802490234375, + "learning_rate": 6.923087802000916e-06, + "loss": 23.4119, + "step": 218450 + }, + { + "epoch": 0.44130302161063684, + "grad_norm": 0.0, + "learning_rate": 6.9227655830000485e-06, + "loss": 38.5934, + "step": 218460 + }, + { + "epoch": 0.44132322224332066, + "grad_norm": 287.9686279296875, + "learning_rate": 6.922443354627855e-06, + "loss": 20.5077, + "step": 218470 + }, + { + "epoch": 0.4413434228760045, + "grad_norm": 509.0263671875, + "learning_rate": 6.922121116885905e-06, + "loss": 18.8017, + "step": 218480 + }, + { + "epoch": 0.4413636235086883, + "grad_norm": 413.716552734375, + "learning_rate": 6.921798869775769e-06, + "loss": 15.485, + "step": 218490 + }, + { + "epoch": 0.4413838241413721, + "grad_norm": 306.12896728515625, + "learning_rate": 6.921476613299018e-06, + "loss": 14.0145, + "step": 218500 + }, + { + "epoch": 0.44140402477405594, + "grad_norm": 273.29681396484375, + "learning_rate": 6.921154347457226e-06, + "loss": 19.0963, + "step": 218510 + }, + { + "epoch": 0.44142422540673976, + "grad_norm": 433.2372131347656, + "learning_rate": 6.9208320722519594e-06, + "loss": 23.0324, + "step": 218520 + }, + { + "epoch": 0.4414444260394236, + "grad_norm": 486.3014221191406, + "learning_rate": 6.920509787684789e-06, + "loss": 18.287, + "step": 218530 + }, + { + "epoch": 0.4414646266721074, + "grad_norm": 655.7376098632812, + "learning_rate": 6.920187493757288e-06, + "loss": 21.3021, + "step": 218540 + }, + { + "epoch": 0.44148482730479116, + "grad_norm": 1174.218505859375, + "learning_rate": 6.919865190471027e-06, + "loss": 36.7262, + "step": 218550 + }, + { + "epoch": 0.441505027937475, + "grad_norm": 222.35841369628906, + "learning_rate": 6.919542877827573e-06, + "loss": 14.9355, + "step": 218560 + }, + { + "epoch": 0.4415252285701588, + "grad_norm": 563.6325073242188, + "learning_rate": 6.919220555828502e-06, + "loss": 24.8963, + "step": 218570 + }, + { + "epoch": 0.4415454292028426, + "grad_norm": 188.27462768554688, + "learning_rate": 6.918898224475382e-06, + "loss": 26.0509, + "step": 218580 + }, + { + "epoch": 0.44156562983552644, + "grad_norm": 415.9721984863281, + "learning_rate": 6.918575883769784e-06, + "loss": 17.2536, + "step": 218590 + }, + { + "epoch": 0.44158583046821026, + "grad_norm": 365.4659423828125, + "learning_rate": 6.9182535337132824e-06, + "loss": 17.1436, + "step": 218600 + }, + { + "epoch": 0.4416060311008941, + "grad_norm": 134.54942321777344, + "learning_rate": 6.917931174307444e-06, + "loss": 17.5852, + "step": 218610 + }, + { + "epoch": 0.4416262317335779, + "grad_norm": 482.1897277832031, + "learning_rate": 6.917608805553843e-06, + "loss": 19.3952, + "step": 218620 + }, + { + "epoch": 0.4416464323662617, + "grad_norm": 648.8010864257812, + "learning_rate": 6.917286427454048e-06, + "loss": 21.4208, + "step": 218630 + }, + { + "epoch": 0.44166663299894554, + "grad_norm": 309.67236328125, + "learning_rate": 6.916964040009631e-06, + "loss": 20.092, + "step": 218640 + }, + { + "epoch": 0.44168683363162936, + "grad_norm": 170.3636474609375, + "learning_rate": 6.9166416432221636e-06, + "loss": 20.4238, + "step": 218650 + }, + { + "epoch": 0.4417070342643132, + "grad_norm": 241.44003295898438, + "learning_rate": 6.916319237093219e-06, + "loss": 25.8699, + "step": 218660 + }, + { + "epoch": 0.441727234896997, + "grad_norm": 174.6161346435547, + "learning_rate": 6.915996821624366e-06, + "loss": 28.2445, + "step": 218670 + }, + { + "epoch": 0.44174743552968077, + "grad_norm": 156.77670288085938, + "learning_rate": 6.915674396817177e-06, + "loss": 23.5601, + "step": 218680 + }, + { + "epoch": 0.4417676361623646, + "grad_norm": 440.65863037109375, + "learning_rate": 6.9153519626732225e-06, + "loss": 28.8264, + "step": 218690 + }, + { + "epoch": 0.4417878367950484, + "grad_norm": 316.6114501953125, + "learning_rate": 6.915029519194076e-06, + "loss": 17.6017, + "step": 218700 + }, + { + "epoch": 0.4418080374277322, + "grad_norm": 210.33470153808594, + "learning_rate": 6.914707066381308e-06, + "loss": 21.3246, + "step": 218710 + }, + { + "epoch": 0.44182823806041605, + "grad_norm": 116.99150848388672, + "learning_rate": 6.914384604236488e-06, + "loss": 14.9222, + "step": 218720 + }, + { + "epoch": 0.44184843869309987, + "grad_norm": 176.82261657714844, + "learning_rate": 6.914062132761192e-06, + "loss": 30.5506, + "step": 218730 + }, + { + "epoch": 0.4418686393257837, + "grad_norm": 772.8134765625, + "learning_rate": 6.913739651956989e-06, + "loss": 16.5471, + "step": 218740 + }, + { + "epoch": 0.4418888399584675, + "grad_norm": 251.96591186523438, + "learning_rate": 6.913417161825449e-06, + "loss": 10.0527, + "step": 218750 + }, + { + "epoch": 0.4419090405911513, + "grad_norm": 480.7281799316406, + "learning_rate": 6.913094662368147e-06, + "loss": 28.3984, + "step": 218760 + }, + { + "epoch": 0.44192924122383515, + "grad_norm": 144.40867614746094, + "learning_rate": 6.912772153586654e-06, + "loss": 12.8837, + "step": 218770 + }, + { + "epoch": 0.44194944185651897, + "grad_norm": 181.55413818359375, + "learning_rate": 6.9124496354825435e-06, + "loss": 17.7858, + "step": 218780 + }, + { + "epoch": 0.4419696424892028, + "grad_norm": 337.3177185058594, + "learning_rate": 6.912127108057381e-06, + "loss": 20.0702, + "step": 218790 + }, + { + "epoch": 0.44198984312188655, + "grad_norm": 575.0049438476562, + "learning_rate": 6.911804571312746e-06, + "loss": 45.3986, + "step": 218800 + }, + { + "epoch": 0.44201004375457037, + "grad_norm": 318.1145935058594, + "learning_rate": 6.911482025250207e-06, + "loss": 18.0525, + "step": 218810 + }, + { + "epoch": 0.4420302443872542, + "grad_norm": 183.11764526367188, + "learning_rate": 6.911159469871335e-06, + "loss": 22.6033, + "step": 218820 + }, + { + "epoch": 0.442050445019938, + "grad_norm": 275.8320007324219, + "learning_rate": 6.9108369051777045e-06, + "loss": 28.4958, + "step": 218830 + }, + { + "epoch": 0.44207064565262183, + "grad_norm": 471.90777587890625, + "learning_rate": 6.910514331170888e-06, + "loss": 17.0451, + "step": 218840 + }, + { + "epoch": 0.44209084628530565, + "grad_norm": 406.13275146484375, + "learning_rate": 6.910191747852455e-06, + "loss": 33.3191, + "step": 218850 + }, + { + "epoch": 0.44211104691798947, + "grad_norm": 95.6614990234375, + "learning_rate": 6.909869155223978e-06, + "loss": 20.2539, + "step": 218860 + }, + { + "epoch": 0.4421312475506733, + "grad_norm": 217.68885803222656, + "learning_rate": 6.909546553287032e-06, + "loss": 33.5004, + "step": 218870 + }, + { + "epoch": 0.4421514481833571, + "grad_norm": 498.65191650390625, + "learning_rate": 6.909223942043187e-06, + "loss": 17.5515, + "step": 218880 + }, + { + "epoch": 0.44217164881604093, + "grad_norm": 545.9909057617188, + "learning_rate": 6.908901321494017e-06, + "loss": 25.4967, + "step": 218890 + }, + { + "epoch": 0.44219184944872475, + "grad_norm": 597.2545166015625, + "learning_rate": 6.908578691641092e-06, + "loss": 16.2348, + "step": 218900 + }, + { + "epoch": 0.44221205008140857, + "grad_norm": 441.7108459472656, + "learning_rate": 6.9082560524859875e-06, + "loss": 17.0352, + "step": 218910 + }, + { + "epoch": 0.4422322507140924, + "grad_norm": 46.86520767211914, + "learning_rate": 6.907933404030274e-06, + "loss": 11.285, + "step": 218920 + }, + { + "epoch": 0.44225245134677615, + "grad_norm": 321.8306579589844, + "learning_rate": 6.907610746275524e-06, + "loss": 16.6901, + "step": 218930 + }, + { + "epoch": 0.44227265197946, + "grad_norm": 546.273193359375, + "learning_rate": 6.907288079223311e-06, + "loss": 24.1094, + "step": 218940 + }, + { + "epoch": 0.4422928526121438, + "grad_norm": 152.28660583496094, + "learning_rate": 6.906965402875207e-06, + "loss": 13.4603, + "step": 218950 + }, + { + "epoch": 0.4423130532448276, + "grad_norm": 220.2197723388672, + "learning_rate": 6.906642717232786e-06, + "loss": 9.6003, + "step": 218960 + }, + { + "epoch": 0.44233325387751143, + "grad_norm": 496.92596435546875, + "learning_rate": 6.906320022297618e-06, + "loss": 17.6962, + "step": 218970 + }, + { + "epoch": 0.44235345451019525, + "grad_norm": 561.6021728515625, + "learning_rate": 6.905997318071278e-06, + "loss": 33.4298, + "step": 218980 + }, + { + "epoch": 0.4423736551428791, + "grad_norm": 459.9938049316406, + "learning_rate": 6.905674604555337e-06, + "loss": 13.3946, + "step": 218990 + }, + { + "epoch": 0.4423938557755629, + "grad_norm": 73.86894989013672, + "learning_rate": 6.905351881751372e-06, + "loss": 29.4571, + "step": 219000 + }, + { + "epoch": 0.4424140564082467, + "grad_norm": 339.7744445800781, + "learning_rate": 6.905029149660951e-06, + "loss": 14.2523, + "step": 219010 + }, + { + "epoch": 0.44243425704093053, + "grad_norm": 366.2799987792969, + "learning_rate": 6.904706408285649e-06, + "loss": 22.1335, + "step": 219020 + }, + { + "epoch": 0.44245445767361435, + "grad_norm": 750.3668823242188, + "learning_rate": 6.90438365762704e-06, + "loss": 16.5124, + "step": 219030 + }, + { + "epoch": 0.4424746583062982, + "grad_norm": 103.09490203857422, + "learning_rate": 6.904060897686695e-06, + "loss": 14.9539, + "step": 219040 + }, + { + "epoch": 0.442494858938982, + "grad_norm": 1246.744873046875, + "learning_rate": 6.903738128466189e-06, + "loss": 31.4349, + "step": 219050 + }, + { + "epoch": 0.44251505957166576, + "grad_norm": 225.55918884277344, + "learning_rate": 6.903415349967092e-06, + "loss": 19.1666, + "step": 219060 + }, + { + "epoch": 0.4425352602043496, + "grad_norm": 290.3597106933594, + "learning_rate": 6.903092562190983e-06, + "loss": 17.0541, + "step": 219070 + }, + { + "epoch": 0.4425554608370334, + "grad_norm": 400.8343200683594, + "learning_rate": 6.902769765139429e-06, + "loss": 19.3249, + "step": 219080 + }, + { + "epoch": 0.4425756614697172, + "grad_norm": 175.66539001464844, + "learning_rate": 6.902446958814007e-06, + "loss": 14.8605, + "step": 219090 + }, + { + "epoch": 0.44259586210240104, + "grad_norm": 979.9549560546875, + "learning_rate": 6.9021241432162886e-06, + "loss": 24.2692, + "step": 219100 + }, + { + "epoch": 0.44261606273508486, + "grad_norm": 302.62091064453125, + "learning_rate": 6.901801318347848e-06, + "loss": 27.2775, + "step": 219110 + }, + { + "epoch": 0.4426362633677687, + "grad_norm": 633.2466430664062, + "learning_rate": 6.90147848421026e-06, + "loss": 50.8793, + "step": 219120 + }, + { + "epoch": 0.4426564640004525, + "grad_norm": 130.0436553955078, + "learning_rate": 6.901155640805095e-06, + "loss": 26.6634, + "step": 219130 + }, + { + "epoch": 0.4426766646331363, + "grad_norm": 346.499755859375, + "learning_rate": 6.900832788133928e-06, + "loss": 17.779, + "step": 219140 + }, + { + "epoch": 0.44269686526582014, + "grad_norm": 549.9331665039062, + "learning_rate": 6.900509926198332e-06, + "loss": 42.8939, + "step": 219150 + }, + { + "epoch": 0.44271706589850396, + "grad_norm": 501.3892517089844, + "learning_rate": 6.900187054999883e-06, + "loss": 24.4063, + "step": 219160 + }, + { + "epoch": 0.4427372665311878, + "grad_norm": 452.5372314453125, + "learning_rate": 6.899864174540151e-06, + "loss": 16.4124, + "step": 219170 + }, + { + "epoch": 0.4427574671638716, + "grad_norm": 458.61090087890625, + "learning_rate": 6.899541284820712e-06, + "loss": 24.1362, + "step": 219180 + }, + { + "epoch": 0.44277766779655536, + "grad_norm": 415.2669982910156, + "learning_rate": 6.899218385843139e-06, + "loss": 11.8202, + "step": 219190 + }, + { + "epoch": 0.4427978684292392, + "grad_norm": 659.0968017578125, + "learning_rate": 6.898895477609007e-06, + "loss": 26.5903, + "step": 219200 + }, + { + "epoch": 0.442818069061923, + "grad_norm": 1159.6007080078125, + "learning_rate": 6.898572560119888e-06, + "loss": 44.2738, + "step": 219210 + }, + { + "epoch": 0.4428382696946068, + "grad_norm": 209.72499084472656, + "learning_rate": 6.898249633377355e-06, + "loss": 15.2883, + "step": 219220 + }, + { + "epoch": 0.44285847032729064, + "grad_norm": 231.04542541503906, + "learning_rate": 6.897926697382986e-06, + "loss": 16.2038, + "step": 219230 + }, + { + "epoch": 0.44287867095997446, + "grad_norm": 271.1341857910156, + "learning_rate": 6.897603752138351e-06, + "loss": 27.1958, + "step": 219240 + }, + { + "epoch": 0.4428988715926583, + "grad_norm": 824.4793701171875, + "learning_rate": 6.897280797645026e-06, + "loss": 23.8028, + "step": 219250 + }, + { + "epoch": 0.4429190722253421, + "grad_norm": 307.6236877441406, + "learning_rate": 6.8969578339045855e-06, + "loss": 23.5481, + "step": 219260 + }, + { + "epoch": 0.4429392728580259, + "grad_norm": 413.1080627441406, + "learning_rate": 6.8966348609186005e-06, + "loss": 22.1092, + "step": 219270 + }, + { + "epoch": 0.44295947349070974, + "grad_norm": 480.2059020996094, + "learning_rate": 6.896311878688648e-06, + "loss": 25.5122, + "step": 219280 + }, + { + "epoch": 0.44297967412339356, + "grad_norm": 254.49496459960938, + "learning_rate": 6.895988887216303e-06, + "loss": 13.311, + "step": 219290 + }, + { + "epoch": 0.4429998747560774, + "grad_norm": 784.9171752929688, + "learning_rate": 6.895665886503136e-06, + "loss": 23.6449, + "step": 219300 + }, + { + "epoch": 0.4430200753887612, + "grad_norm": 355.4720764160156, + "learning_rate": 6.895342876550724e-06, + "loss": 9.4687, + "step": 219310 + }, + { + "epoch": 0.44304027602144497, + "grad_norm": 431.9592590332031, + "learning_rate": 6.895019857360641e-06, + "loss": 36.8282, + "step": 219320 + }, + { + "epoch": 0.4430604766541288, + "grad_norm": 108.13883209228516, + "learning_rate": 6.8946968289344605e-06, + "loss": 11.7617, + "step": 219330 + }, + { + "epoch": 0.4430806772868126, + "grad_norm": 415.89080810546875, + "learning_rate": 6.894373791273758e-06, + "loss": 18.5024, + "step": 219340 + }, + { + "epoch": 0.4431008779194964, + "grad_norm": 3.8527114391326904, + "learning_rate": 6.8940507443801076e-06, + "loss": 15.8009, + "step": 219350 + }, + { + "epoch": 0.44312107855218025, + "grad_norm": 230.16094970703125, + "learning_rate": 6.893727688255083e-06, + "loss": 18.3914, + "step": 219360 + }, + { + "epoch": 0.44314127918486407, + "grad_norm": 476.1678161621094, + "learning_rate": 6.8934046229002605e-06, + "loss": 11.7016, + "step": 219370 + }, + { + "epoch": 0.4431614798175479, + "grad_norm": 706.7972412109375, + "learning_rate": 6.893081548317212e-06, + "loss": 26.295, + "step": 219380 + }, + { + "epoch": 0.4431816804502317, + "grad_norm": 592.8897705078125, + "learning_rate": 6.8927584645075154e-06, + "loss": 10.9637, + "step": 219390 + }, + { + "epoch": 0.4432018810829155, + "grad_norm": 326.7542724609375, + "learning_rate": 6.892435371472741e-06, + "loss": 21.9441, + "step": 219400 + }, + { + "epoch": 0.44322208171559935, + "grad_norm": 340.3212585449219, + "learning_rate": 6.892112269214468e-06, + "loss": 41.4171, + "step": 219410 + }, + { + "epoch": 0.44324228234828317, + "grad_norm": 194.18162536621094, + "learning_rate": 6.8917891577342685e-06, + "loss": 17.9069, + "step": 219420 + }, + { + "epoch": 0.443262482980967, + "grad_norm": 225.82171630859375, + "learning_rate": 6.891466037033718e-06, + "loss": 41.562, + "step": 219430 + }, + { + "epoch": 0.44328268361365075, + "grad_norm": 573.9820556640625, + "learning_rate": 6.891142907114392e-06, + "loss": 23.5243, + "step": 219440 + }, + { + "epoch": 0.44330288424633457, + "grad_norm": 1166.9981689453125, + "learning_rate": 6.890819767977865e-06, + "loss": 24.1516, + "step": 219450 + }, + { + "epoch": 0.4433230848790184, + "grad_norm": 253.97682189941406, + "learning_rate": 6.890496619625713e-06, + "loss": 23.7371, + "step": 219460 + }, + { + "epoch": 0.4433432855117022, + "grad_norm": 495.72625732421875, + "learning_rate": 6.890173462059508e-06, + "loss": 23.6401, + "step": 219470 + }, + { + "epoch": 0.44336348614438603, + "grad_norm": 618.368408203125, + "learning_rate": 6.889850295280827e-06, + "loss": 10.3683, + "step": 219480 + }, + { + "epoch": 0.44338368677706985, + "grad_norm": 560.4053955078125, + "learning_rate": 6.8895271192912435e-06, + "loss": 18.8768, + "step": 219490 + }, + { + "epoch": 0.44340388740975367, + "grad_norm": 249.0582733154297, + "learning_rate": 6.889203934092337e-06, + "loss": 25.7241, + "step": 219500 + }, + { + "epoch": 0.4434240880424375, + "grad_norm": 222.59014892578125, + "learning_rate": 6.888880739685677e-06, + "loss": 12.0046, + "step": 219510 + }, + { + "epoch": 0.4434442886751213, + "grad_norm": 619.3270874023438, + "learning_rate": 6.888557536072843e-06, + "loss": 21.5745, + "step": 219520 + }, + { + "epoch": 0.44346448930780513, + "grad_norm": 507.25189208984375, + "learning_rate": 6.888234323255408e-06, + "loss": 29.5272, + "step": 219530 + }, + { + "epoch": 0.44348468994048895, + "grad_norm": 303.3486022949219, + "learning_rate": 6.8879111012349475e-06, + "loss": 21.201, + "step": 219540 + }, + { + "epoch": 0.44350489057317277, + "grad_norm": 186.70721435546875, + "learning_rate": 6.887587870013039e-06, + "loss": 14.6315, + "step": 219550 + }, + { + "epoch": 0.4435250912058566, + "grad_norm": 179.59512329101562, + "learning_rate": 6.887264629591254e-06, + "loss": 23.6044, + "step": 219560 + }, + { + "epoch": 0.44354529183854036, + "grad_norm": 166.09658813476562, + "learning_rate": 6.886941379971172e-06, + "loss": 25.149, + "step": 219570 + }, + { + "epoch": 0.4435654924712242, + "grad_norm": 434.411865234375, + "learning_rate": 6.886618121154364e-06, + "loss": 19.2683, + "step": 219580 + }, + { + "epoch": 0.443585693103908, + "grad_norm": 0.0, + "learning_rate": 6.88629485314241e-06, + "loss": 24.9773, + "step": 219590 + }, + { + "epoch": 0.4436058937365918, + "grad_norm": 530.9507446289062, + "learning_rate": 6.885971575936884e-06, + "loss": 22.3455, + "step": 219600 + }, + { + "epoch": 0.44362609436927564, + "grad_norm": 517.7140502929688, + "learning_rate": 6.885648289539362e-06, + "loss": 13.0993, + "step": 219610 + }, + { + "epoch": 0.44364629500195946, + "grad_norm": 580.7538452148438, + "learning_rate": 6.8853249939514165e-06, + "loss": 17.3157, + "step": 219620 + }, + { + "epoch": 0.4436664956346433, + "grad_norm": 0.0, + "learning_rate": 6.885001689174627e-06, + "loss": 31.4602, + "step": 219630 + }, + { + "epoch": 0.4436866962673271, + "grad_norm": 295.22637939453125, + "learning_rate": 6.884678375210568e-06, + "loss": 13.4412, + "step": 219640 + }, + { + "epoch": 0.4437068969000109, + "grad_norm": 632.1878051757812, + "learning_rate": 6.884355052060814e-06, + "loss": 23.5613, + "step": 219650 + }, + { + "epoch": 0.44372709753269474, + "grad_norm": 657.80029296875, + "learning_rate": 6.884031719726943e-06, + "loss": 32.8196, + "step": 219660 + }, + { + "epoch": 0.44374729816537856, + "grad_norm": 558.5362548828125, + "learning_rate": 6.8837083782105296e-06, + "loss": 15.4952, + "step": 219670 + }, + { + "epoch": 0.4437674987980624, + "grad_norm": 266.2539367675781, + "learning_rate": 6.883385027513151e-06, + "loss": 29.817, + "step": 219680 + }, + { + "epoch": 0.4437876994307462, + "grad_norm": 463.96905517578125, + "learning_rate": 6.88306166763638e-06, + "loss": 14.0948, + "step": 219690 + }, + { + "epoch": 0.44380790006342996, + "grad_norm": 404.00384521484375, + "learning_rate": 6.882738298581797e-06, + "loss": 26.9113, + "step": 219700 + }, + { + "epoch": 0.4438281006961138, + "grad_norm": 637.414794921875, + "learning_rate": 6.882414920350975e-06, + "loss": 22.0788, + "step": 219710 + }, + { + "epoch": 0.4438483013287976, + "grad_norm": 785.491455078125, + "learning_rate": 6.882091532945491e-06, + "loss": 22.8759, + "step": 219720 + }, + { + "epoch": 0.4438685019614814, + "grad_norm": 506.03900146484375, + "learning_rate": 6.881768136366922e-06, + "loss": 19.3762, + "step": 219730 + }, + { + "epoch": 0.44388870259416524, + "grad_norm": 1084.349853515625, + "learning_rate": 6.881444730616842e-06, + "loss": 19.5856, + "step": 219740 + }, + { + "epoch": 0.44390890322684906, + "grad_norm": 337.2403869628906, + "learning_rate": 6.881121315696828e-06, + "loss": 28.647, + "step": 219750 + }, + { + "epoch": 0.4439291038595329, + "grad_norm": 615.3025512695312, + "learning_rate": 6.880797891608458e-06, + "loss": 25.024, + "step": 219760 + }, + { + "epoch": 0.4439493044922167, + "grad_norm": 195.57460021972656, + "learning_rate": 6.880474458353309e-06, + "loss": 22.0026, + "step": 219770 + }, + { + "epoch": 0.4439695051249005, + "grad_norm": 328.4720764160156, + "learning_rate": 6.880151015932952e-06, + "loss": 18.3296, + "step": 219780 + }, + { + "epoch": 0.44398970575758434, + "grad_norm": 192.4298553466797, + "learning_rate": 6.87982756434897e-06, + "loss": 10.177, + "step": 219790 + }, + { + "epoch": 0.44400990639026816, + "grad_norm": 484.88909912109375, + "learning_rate": 6.879504103602934e-06, + "loss": 19.0882, + "step": 219800 + }, + { + "epoch": 0.444030107022952, + "grad_norm": 285.41278076171875, + "learning_rate": 6.879180633696425e-06, + "loss": 8.9307, + "step": 219810 + }, + { + "epoch": 0.4440503076556358, + "grad_norm": 945.0198974609375, + "learning_rate": 6.878857154631016e-06, + "loss": 46.455, + "step": 219820 + }, + { + "epoch": 0.44407050828831957, + "grad_norm": 355.7682800292969, + "learning_rate": 6.878533666408286e-06, + "loss": 13.8178, + "step": 219830 + }, + { + "epoch": 0.4440907089210034, + "grad_norm": 1002.5101318359375, + "learning_rate": 6.878210169029811e-06, + "loss": 29.3335, + "step": 219840 + }, + { + "epoch": 0.4441109095536872, + "grad_norm": 97.81233978271484, + "learning_rate": 6.877886662497165e-06, + "loss": 19.1359, + "step": 219850 + }, + { + "epoch": 0.444131110186371, + "grad_norm": 103.65803527832031, + "learning_rate": 6.877563146811931e-06, + "loss": 24.5209, + "step": 219860 + }, + { + "epoch": 0.44415131081905485, + "grad_norm": 601.9344482421875, + "learning_rate": 6.87723962197568e-06, + "loss": 12.1354, + "step": 219870 + }, + { + "epoch": 0.44417151145173867, + "grad_norm": 181.5896759033203, + "learning_rate": 6.87691608798999e-06, + "loss": 32.641, + "step": 219880 + }, + { + "epoch": 0.4441917120844225, + "grad_norm": 541.8621215820312, + "learning_rate": 6.87659254485644e-06, + "loss": 21.6538, + "step": 219890 + }, + { + "epoch": 0.4442119127171063, + "grad_norm": 790.8692016601562, + "learning_rate": 6.876268992576605e-06, + "loss": 18.3269, + "step": 219900 + }, + { + "epoch": 0.4442321133497901, + "grad_norm": 397.9839172363281, + "learning_rate": 6.875945431152063e-06, + "loss": 24.26, + "step": 219910 + }, + { + "epoch": 0.44425231398247395, + "grad_norm": 789.2221069335938, + "learning_rate": 6.875621860584389e-06, + "loss": 19.794, + "step": 219920 + }, + { + "epoch": 0.44427251461515777, + "grad_norm": 249.17762756347656, + "learning_rate": 6.875298280875163e-06, + "loss": 27.8191, + "step": 219930 + }, + { + "epoch": 0.4442927152478416, + "grad_norm": 268.7455749511719, + "learning_rate": 6.874974692025959e-06, + "loss": 27.2405, + "step": 219940 + }, + { + "epoch": 0.4443129158805254, + "grad_norm": 305.5923156738281, + "learning_rate": 6.874651094038358e-06, + "loss": 11.3509, + "step": 219950 + }, + { + "epoch": 0.44433311651320917, + "grad_norm": 430.91143798828125, + "learning_rate": 6.874327486913933e-06, + "loss": 24.2091, + "step": 219960 + }, + { + "epoch": 0.444353317145893, + "grad_norm": 457.19342041015625, + "learning_rate": 6.874003870654265e-06, + "loss": 16.9818, + "step": 219970 + }, + { + "epoch": 0.4443735177785768, + "grad_norm": 1346.923583984375, + "learning_rate": 6.873680245260929e-06, + "loss": 29.6735, + "step": 219980 + }, + { + "epoch": 0.44439371841126063, + "grad_norm": 722.2505493164062, + "learning_rate": 6.8733566107355e-06, + "loss": 15.069, + "step": 219990 + }, + { + "epoch": 0.44441391904394445, + "grad_norm": 642.849853515625, + "learning_rate": 6.873032967079562e-06, + "loss": 10.8828, + "step": 220000 + }, + { + "epoch": 0.44443411967662827, + "grad_norm": 240.14439392089844, + "learning_rate": 6.872709314294685e-06, + "loss": 22.4157, + "step": 220010 + }, + { + "epoch": 0.4444543203093121, + "grad_norm": 381.982421875, + "learning_rate": 6.872385652382452e-06, + "loss": 21.5925, + "step": 220020 + }, + { + "epoch": 0.4444745209419959, + "grad_norm": 540.565673828125, + "learning_rate": 6.872061981344438e-06, + "loss": 18.8366, + "step": 220030 + }, + { + "epoch": 0.44449472157467973, + "grad_norm": 931.19921875, + "learning_rate": 6.871738301182221e-06, + "loss": 26.1174, + "step": 220040 + }, + { + "epoch": 0.44451492220736355, + "grad_norm": 309.64837646484375, + "learning_rate": 6.87141461189738e-06, + "loss": 33.8266, + "step": 220050 + }, + { + "epoch": 0.44453512284004737, + "grad_norm": 339.34674072265625, + "learning_rate": 6.87109091349149e-06, + "loss": 17.4143, + "step": 220060 + }, + { + "epoch": 0.4445553234727312, + "grad_norm": 538.2361450195312, + "learning_rate": 6.870767205966128e-06, + "loss": 28.7001, + "step": 220070 + }, + { + "epoch": 0.44457552410541495, + "grad_norm": 506.4960021972656, + "learning_rate": 6.870443489322875e-06, + "loss": 20.3924, + "step": 220080 + }, + { + "epoch": 0.4445957247380988, + "grad_norm": 381.004150390625, + "learning_rate": 6.870119763563308e-06, + "loss": 24.2316, + "step": 220090 + }, + { + "epoch": 0.4446159253707826, + "grad_norm": 149.9341278076172, + "learning_rate": 6.869796028689002e-06, + "loss": 10.0959, + "step": 220100 + }, + { + "epoch": 0.4446361260034664, + "grad_norm": 496.5892028808594, + "learning_rate": 6.86947228470154e-06, + "loss": 12.7109, + "step": 220110 + }, + { + "epoch": 0.44465632663615023, + "grad_norm": 401.718505859375, + "learning_rate": 6.8691485316024945e-06, + "loss": 19.4941, + "step": 220120 + }, + { + "epoch": 0.44467652726883405, + "grad_norm": 302.5918884277344, + "learning_rate": 6.8688247693934465e-06, + "loss": 6.8404, + "step": 220130 + }, + { + "epoch": 0.4446967279015179, + "grad_norm": 346.9261474609375, + "learning_rate": 6.868500998075973e-06, + "loss": 40.9869, + "step": 220140 + }, + { + "epoch": 0.4447169285342017, + "grad_norm": 426.3053894042969, + "learning_rate": 6.8681772176516525e-06, + "loss": 32.7499, + "step": 220150 + }, + { + "epoch": 0.4447371291668855, + "grad_norm": 525.0901489257812, + "learning_rate": 6.867853428122063e-06, + "loss": 16.9985, + "step": 220160 + }, + { + "epoch": 0.44475732979956933, + "grad_norm": 332.4767761230469, + "learning_rate": 6.867529629488782e-06, + "loss": 19.9982, + "step": 220170 + }, + { + "epoch": 0.44477753043225315, + "grad_norm": 349.4854736328125, + "learning_rate": 6.867205821753389e-06, + "loss": 33.0529, + "step": 220180 + }, + { + "epoch": 0.444797731064937, + "grad_norm": 8.316431999206543, + "learning_rate": 6.86688200491746e-06, + "loss": 12.1505, + "step": 220190 + }, + { + "epoch": 0.4448179316976208, + "grad_norm": 544.8107299804688, + "learning_rate": 6.866558178982575e-06, + "loss": 8.8382, + "step": 220200 + }, + { + "epoch": 0.44483813233030456, + "grad_norm": 677.3682250976562, + "learning_rate": 6.866234343950312e-06, + "loss": 30.8832, + "step": 220210 + }, + { + "epoch": 0.4448583329629884, + "grad_norm": 361.1523742675781, + "learning_rate": 6.86591049982225e-06, + "loss": 30.8473, + "step": 220220 + }, + { + "epoch": 0.4448785335956722, + "grad_norm": 596.5050659179688, + "learning_rate": 6.865586646599965e-06, + "loss": 27.6241, + "step": 220230 + }, + { + "epoch": 0.444898734228356, + "grad_norm": 196.83078002929688, + "learning_rate": 6.8652627842850374e-06, + "loss": 17.0317, + "step": 220240 + }, + { + "epoch": 0.44491893486103984, + "grad_norm": 213.5590362548828, + "learning_rate": 6.8649389128790455e-06, + "loss": 13.1203, + "step": 220250 + }, + { + "epoch": 0.44493913549372366, + "grad_norm": 319.96099853515625, + "learning_rate": 6.864615032383567e-06, + "loss": 9.0173, + "step": 220260 + }, + { + "epoch": 0.4449593361264075, + "grad_norm": 687.814453125, + "learning_rate": 6.864291142800183e-06, + "loss": 16.3831, + "step": 220270 + }, + { + "epoch": 0.4449795367590913, + "grad_norm": 492.3997497558594, + "learning_rate": 6.863967244130467e-06, + "loss": 14.2196, + "step": 220280 + }, + { + "epoch": 0.4449997373917751, + "grad_norm": 398.268310546875, + "learning_rate": 6.8636433363760025e-06, + "loss": 15.2738, + "step": 220290 + }, + { + "epoch": 0.44501993802445894, + "grad_norm": 95.0443115234375, + "learning_rate": 6.863319419538366e-06, + "loss": 22.0605, + "step": 220300 + }, + { + "epoch": 0.44504013865714276, + "grad_norm": 98.15335083007812, + "learning_rate": 6.862995493619137e-06, + "loss": 17.2972, + "step": 220310 + }, + { + "epoch": 0.4450603392898266, + "grad_norm": 43.280120849609375, + "learning_rate": 6.862671558619894e-06, + "loss": 26.5221, + "step": 220320 + }, + { + "epoch": 0.4450805399225104, + "grad_norm": 417.2856750488281, + "learning_rate": 6.862347614542214e-06, + "loss": 31.7517, + "step": 220330 + }, + { + "epoch": 0.44510074055519416, + "grad_norm": 396.2840576171875, + "learning_rate": 6.86202366138768e-06, + "loss": 27.3912, + "step": 220340 + }, + { + "epoch": 0.445120941187878, + "grad_norm": 568.3402099609375, + "learning_rate": 6.861699699157868e-06, + "loss": 17.0884, + "step": 220350 + }, + { + "epoch": 0.4451411418205618, + "grad_norm": 148.70721435546875, + "learning_rate": 6.861375727854356e-06, + "loss": 21.2353, + "step": 220360 + }, + { + "epoch": 0.4451613424532456, + "grad_norm": 509.0034484863281, + "learning_rate": 6.861051747478727e-06, + "loss": 20.7965, + "step": 220370 + }, + { + "epoch": 0.44518154308592944, + "grad_norm": 309.30206298828125, + "learning_rate": 6.860727758032555e-06, + "loss": 23.0757, + "step": 220380 + }, + { + "epoch": 0.44520174371861326, + "grad_norm": 29.304731369018555, + "learning_rate": 6.860403759517422e-06, + "loss": 9.9736, + "step": 220390 + }, + { + "epoch": 0.4452219443512971, + "grad_norm": 858.1065063476562, + "learning_rate": 6.860079751934908e-06, + "loss": 31.402, + "step": 220400 + }, + { + "epoch": 0.4452421449839809, + "grad_norm": 444.6431884765625, + "learning_rate": 6.859755735286589e-06, + "loss": 17.8292, + "step": 220410 + }, + { + "epoch": 0.4452623456166647, + "grad_norm": 171.51792907714844, + "learning_rate": 6.859431709574048e-06, + "loss": 18.3242, + "step": 220420 + }, + { + "epoch": 0.44528254624934854, + "grad_norm": 310.8061828613281, + "learning_rate": 6.859107674798863e-06, + "loss": 41.8775, + "step": 220430 + }, + { + "epoch": 0.44530274688203236, + "grad_norm": 431.1271057128906, + "learning_rate": 6.85878363096261e-06, + "loss": 14.0809, + "step": 220440 + }, + { + "epoch": 0.4453229475147162, + "grad_norm": 518.21826171875, + "learning_rate": 6.858459578066873e-06, + "loss": 26.5326, + "step": 220450 + }, + { + "epoch": 0.4453431481474, + "grad_norm": 476.1693420410156, + "learning_rate": 6.858135516113226e-06, + "loss": 33.7109, + "step": 220460 + }, + { + "epoch": 0.44536334878008377, + "grad_norm": 565.9754638671875, + "learning_rate": 6.857811445103257e-06, + "loss": 24.4651, + "step": 220470 + }, + { + "epoch": 0.4453835494127676, + "grad_norm": 838.7970581054688, + "learning_rate": 6.857487365038537e-06, + "loss": 29.9847, + "step": 220480 + }, + { + "epoch": 0.4454037500454514, + "grad_norm": 55.276100158691406, + "learning_rate": 6.857163275920651e-06, + "loss": 17.5076, + "step": 220490 + }, + { + "epoch": 0.4454239506781352, + "grad_norm": 852.9766845703125, + "learning_rate": 6.856839177751175e-06, + "loss": 19.5113, + "step": 220500 + }, + { + "epoch": 0.44544415131081905, + "grad_norm": 701.2528076171875, + "learning_rate": 6.85651507053169e-06, + "loss": 15.4285, + "step": 220510 + }, + { + "epoch": 0.44546435194350287, + "grad_norm": 863.6895141601562, + "learning_rate": 6.856190954263776e-06, + "loss": 20.8058, + "step": 220520 + }, + { + "epoch": 0.4454845525761867, + "grad_norm": 336.2017822265625, + "learning_rate": 6.8558668289490126e-06, + "loss": 32.8068, + "step": 220530 + }, + { + "epoch": 0.4455047532088705, + "grad_norm": 598.2466430664062, + "learning_rate": 6.855542694588979e-06, + "loss": 20.53, + "step": 220540 + }, + { + "epoch": 0.4455249538415543, + "grad_norm": 662.9794311523438, + "learning_rate": 6.8552185511852555e-06, + "loss": 9.7046, + "step": 220550 + }, + { + "epoch": 0.44554515447423815, + "grad_norm": 0.0, + "learning_rate": 6.854894398739422e-06, + "loss": 12.0585, + "step": 220560 + }, + { + "epoch": 0.44556535510692197, + "grad_norm": 249.34548950195312, + "learning_rate": 6.854570237253059e-06, + "loss": 13.9427, + "step": 220570 + }, + { + "epoch": 0.4455855557396058, + "grad_norm": 1056.8892822265625, + "learning_rate": 6.854246066727743e-06, + "loss": 30.9743, + "step": 220580 + }, + { + "epoch": 0.4456057563722896, + "grad_norm": 420.1027526855469, + "learning_rate": 6.8539218871650605e-06, + "loss": 20.473, + "step": 220590 + }, + { + "epoch": 0.44562595700497337, + "grad_norm": 560.8649291992188, + "learning_rate": 6.853597698566583e-06, + "loss": 20.4571, + "step": 220600 + }, + { + "epoch": 0.4456461576376572, + "grad_norm": 421.0732421875, + "learning_rate": 6.853273500933899e-06, + "loss": 25.3511, + "step": 220610 + }, + { + "epoch": 0.445666358270341, + "grad_norm": 424.44268798828125, + "learning_rate": 6.852949294268582e-06, + "loss": 26.6552, + "step": 220620 + }, + { + "epoch": 0.44568655890302483, + "grad_norm": 814.9902954101562, + "learning_rate": 6.852625078572217e-06, + "loss": 25.6921, + "step": 220630 + }, + { + "epoch": 0.44570675953570865, + "grad_norm": 214.76417541503906, + "learning_rate": 6.852300853846381e-06, + "loss": 10.8756, + "step": 220640 + }, + { + "epoch": 0.44572696016839247, + "grad_norm": 639.0645141601562, + "learning_rate": 6.851976620092655e-06, + "loss": 18.1716, + "step": 220650 + }, + { + "epoch": 0.4457471608010763, + "grad_norm": 479.9458312988281, + "learning_rate": 6.851652377312621e-06, + "loss": 30.2227, + "step": 220660 + }, + { + "epoch": 0.4457673614337601, + "grad_norm": 64.48029327392578, + "learning_rate": 6.851328125507856e-06, + "loss": 43.543, + "step": 220670 + }, + { + "epoch": 0.44578756206644393, + "grad_norm": 961.3707275390625, + "learning_rate": 6.851003864679943e-06, + "loss": 24.3358, + "step": 220680 + }, + { + "epoch": 0.44580776269912775, + "grad_norm": 1021.1846923828125, + "learning_rate": 6.850679594830461e-06, + "loss": 28.7365, + "step": 220690 + }, + { + "epoch": 0.44582796333181157, + "grad_norm": 1020.2935791015625, + "learning_rate": 6.850355315960992e-06, + "loss": 19.7658, + "step": 220700 + }, + { + "epoch": 0.4458481639644954, + "grad_norm": 511.8146667480469, + "learning_rate": 6.850031028073115e-06, + "loss": 19.4434, + "step": 220710 + }, + { + "epoch": 0.44586836459717916, + "grad_norm": 129.0845489501953, + "learning_rate": 6.849706731168413e-06, + "loss": 16.4215, + "step": 220720 + }, + { + "epoch": 0.445888565229863, + "grad_norm": 483.9500732421875, + "learning_rate": 6.849382425248463e-06, + "loss": 20.441, + "step": 220730 + }, + { + "epoch": 0.4459087658625468, + "grad_norm": 215.6471405029297, + "learning_rate": 6.849058110314848e-06, + "loss": 8.7657, + "step": 220740 + }, + { + "epoch": 0.4459289664952306, + "grad_norm": 588.5631713867188, + "learning_rate": 6.848733786369147e-06, + "loss": 19.9991, + "step": 220750 + }, + { + "epoch": 0.44594916712791444, + "grad_norm": 94.149169921875, + "learning_rate": 6.848409453412943e-06, + "loss": 23.1848, + "step": 220760 + }, + { + "epoch": 0.44596936776059826, + "grad_norm": 458.5713806152344, + "learning_rate": 6.848085111447815e-06, + "loss": 11.2409, + "step": 220770 + }, + { + "epoch": 0.4459895683932821, + "grad_norm": 333.8248596191406, + "learning_rate": 6.847760760475344e-06, + "loss": 23.8046, + "step": 220780 + }, + { + "epoch": 0.4460097690259659, + "grad_norm": 186.26602172851562, + "learning_rate": 6.8474364004971115e-06, + "loss": 16.6932, + "step": 220790 + }, + { + "epoch": 0.4460299696586497, + "grad_norm": 570.8720092773438, + "learning_rate": 6.847112031514698e-06, + "loss": 21.8735, + "step": 220800 + }, + { + "epoch": 0.44605017029133354, + "grad_norm": 262.8045959472656, + "learning_rate": 6.8467876535296855e-06, + "loss": 8.6507, + "step": 220810 + }, + { + "epoch": 0.44607037092401736, + "grad_norm": 310.4090576171875, + "learning_rate": 6.846463266543653e-06, + "loss": 34.8979, + "step": 220820 + }, + { + "epoch": 0.4460905715567012, + "grad_norm": 473.4156188964844, + "learning_rate": 6.846138870558181e-06, + "loss": 19.9906, + "step": 220830 + }, + { + "epoch": 0.446110772189385, + "grad_norm": 256.7278137207031, + "learning_rate": 6.845814465574855e-06, + "loss": 17.3198, + "step": 220840 + }, + { + "epoch": 0.44613097282206876, + "grad_norm": 511.5408630371094, + "learning_rate": 6.845490051595252e-06, + "loss": 13.9553, + "step": 220850 + }, + { + "epoch": 0.4461511734547526, + "grad_norm": 214.9493408203125, + "learning_rate": 6.8451656286209535e-06, + "loss": 34.4136, + "step": 220860 + }, + { + "epoch": 0.4461713740874364, + "grad_norm": 467.59490966796875, + "learning_rate": 6.844841196653541e-06, + "loss": 9.1946, + "step": 220870 + }, + { + "epoch": 0.4461915747201202, + "grad_norm": 576.91650390625, + "learning_rate": 6.844516755694599e-06, + "loss": 17.083, + "step": 220880 + }, + { + "epoch": 0.44621177535280404, + "grad_norm": 195.52423095703125, + "learning_rate": 6.844192305745702e-06, + "loss": 21.3189, + "step": 220890 + }, + { + "epoch": 0.44623197598548786, + "grad_norm": 591.4985961914062, + "learning_rate": 6.843867846808438e-06, + "loss": 39.7037, + "step": 220900 + }, + { + "epoch": 0.4462521766181717, + "grad_norm": 275.8883056640625, + "learning_rate": 6.8435433788843865e-06, + "loss": 10.8072, + "step": 220910 + }, + { + "epoch": 0.4462723772508555, + "grad_norm": 129.10862731933594, + "learning_rate": 6.843218901975127e-06, + "loss": 21.2382, + "step": 220920 + }, + { + "epoch": 0.4462925778835393, + "grad_norm": 384.60406494140625, + "learning_rate": 6.842894416082243e-06, + "loss": 15.3655, + "step": 220930 + }, + { + "epoch": 0.44631277851622314, + "grad_norm": 249.91744995117188, + "learning_rate": 6.842569921207314e-06, + "loss": 13.1972, + "step": 220940 + }, + { + "epoch": 0.44633297914890696, + "grad_norm": 83.22366333007812, + "learning_rate": 6.842245417351923e-06, + "loss": 11.8011, + "step": 220950 + }, + { + "epoch": 0.4463531797815908, + "grad_norm": 585.205810546875, + "learning_rate": 6.841920904517652e-06, + "loss": 20.8354, + "step": 220960 + }, + { + "epoch": 0.4463733804142746, + "grad_norm": 316.5137939453125, + "learning_rate": 6.84159638270608e-06, + "loss": 7.4139, + "step": 220970 + }, + { + "epoch": 0.44639358104695837, + "grad_norm": 281.46441650390625, + "learning_rate": 6.8412718519187916e-06, + "loss": 19.4551, + "step": 220980 + }, + { + "epoch": 0.4464137816796422, + "grad_norm": 1354.536865234375, + "learning_rate": 6.840947312157367e-06, + "loss": 22.3534, + "step": 220990 + }, + { + "epoch": 0.446433982312326, + "grad_norm": 616.942626953125, + "learning_rate": 6.840622763423391e-06, + "loss": 21.2377, + "step": 221000 + }, + { + "epoch": 0.4464541829450098, + "grad_norm": 1267.2545166015625, + "learning_rate": 6.840298205718441e-06, + "loss": 34.4087, + "step": 221010 + }, + { + "epoch": 0.44647438357769365, + "grad_norm": 317.268310546875, + "learning_rate": 6.839973639044101e-06, + "loss": 18.1336, + "step": 221020 + }, + { + "epoch": 0.44649458421037747, + "grad_norm": 410.1036682128906, + "learning_rate": 6.839649063401952e-06, + "loss": 22.5897, + "step": 221030 + }, + { + "epoch": 0.4465147848430613, + "grad_norm": 146.34117126464844, + "learning_rate": 6.8393244787935775e-06, + "loss": 16.7547, + "step": 221040 + }, + { + "epoch": 0.4465349854757451, + "grad_norm": 494.58782958984375, + "learning_rate": 6.838999885220558e-06, + "loss": 29.628, + "step": 221050 + }, + { + "epoch": 0.4465551861084289, + "grad_norm": 530.4630737304688, + "learning_rate": 6.838675282684477e-06, + "loss": 26.4836, + "step": 221060 + }, + { + "epoch": 0.44657538674111275, + "grad_norm": 259.78839111328125, + "learning_rate": 6.838350671186914e-06, + "loss": 21.3799, + "step": 221070 + }, + { + "epoch": 0.44659558737379657, + "grad_norm": 490.98724365234375, + "learning_rate": 6.838026050729454e-06, + "loss": 15.9772, + "step": 221080 + }, + { + "epoch": 0.4466157880064804, + "grad_norm": 2814.173095703125, + "learning_rate": 6.837701421313677e-06, + "loss": 40.5265, + "step": 221090 + }, + { + "epoch": 0.4466359886391642, + "grad_norm": 393.2685546875, + "learning_rate": 6.837376782941168e-06, + "loss": 29.2508, + "step": 221100 + }, + { + "epoch": 0.44665618927184797, + "grad_norm": 194.7854461669922, + "learning_rate": 6.837052135613507e-06, + "loss": 12.3636, + "step": 221110 + }, + { + "epoch": 0.4466763899045318, + "grad_norm": 524.756103515625, + "learning_rate": 6.8367274793322745e-06, + "loss": 9.756, + "step": 221120 + }, + { + "epoch": 0.4466965905372156, + "grad_norm": 458.5486145019531, + "learning_rate": 6.836402814099057e-06, + "loss": 21.9322, + "step": 221130 + }, + { + "epoch": 0.44671679116989943, + "grad_norm": 1.953729510307312, + "learning_rate": 6.836078139915434e-06, + "loss": 20.1561, + "step": 221140 + }, + { + "epoch": 0.44673699180258325, + "grad_norm": 325.73577880859375, + "learning_rate": 6.83575345678299e-06, + "loss": 15.4104, + "step": 221150 + }, + { + "epoch": 0.44675719243526707, + "grad_norm": 199.3097686767578, + "learning_rate": 6.8354287647033046e-06, + "loss": 16.0741, + "step": 221160 + }, + { + "epoch": 0.4467773930679509, + "grad_norm": 321.97662353515625, + "learning_rate": 6.835104063677964e-06, + "loss": 24.8787, + "step": 221170 + }, + { + "epoch": 0.4467975937006347, + "grad_norm": 488.0107421875, + "learning_rate": 6.8347793537085474e-06, + "loss": 37.2962, + "step": 221180 + }, + { + "epoch": 0.44681779433331853, + "grad_norm": 371.7288513183594, + "learning_rate": 6.834454634796639e-06, + "loss": 13.6815, + "step": 221190 + }, + { + "epoch": 0.44683799496600235, + "grad_norm": 293.863525390625, + "learning_rate": 6.834129906943822e-06, + "loss": 17.7457, + "step": 221200 + }, + { + "epoch": 0.44685819559868617, + "grad_norm": 390.8061828613281, + "learning_rate": 6.833805170151676e-06, + "loss": 36.4255, + "step": 221210 + }, + { + "epoch": 0.44687839623137, + "grad_norm": 511.901123046875, + "learning_rate": 6.8334804244217885e-06, + "loss": 19.4919, + "step": 221220 + }, + { + "epoch": 0.4468985968640538, + "grad_norm": 391.38848876953125, + "learning_rate": 6.833155669755738e-06, + "loss": 22.7092, + "step": 221230 + }, + { + "epoch": 0.4469187974967376, + "grad_norm": 413.0695495605469, + "learning_rate": 6.8328309061551105e-06, + "loss": 18.0782, + "step": 221240 + }, + { + "epoch": 0.4469389981294214, + "grad_norm": 606.9026489257812, + "learning_rate": 6.832506133621487e-06, + "loss": 19.8127, + "step": 221250 + }, + { + "epoch": 0.4469591987621052, + "grad_norm": 770.3826904296875, + "learning_rate": 6.832181352156451e-06, + "loss": 24.4051, + "step": 221260 + }, + { + "epoch": 0.44697939939478903, + "grad_norm": 594.4346313476562, + "learning_rate": 6.831856561761585e-06, + "loss": 31.0357, + "step": 221270 + }, + { + "epoch": 0.44699960002747285, + "grad_norm": 301.72650146484375, + "learning_rate": 6.831531762438472e-06, + "loss": 22.9298, + "step": 221280 + }, + { + "epoch": 0.4470198006601567, + "grad_norm": 61.547115325927734, + "learning_rate": 6.8312069541886964e-06, + "loss": 20.4524, + "step": 221290 + }, + { + "epoch": 0.4470400012928405, + "grad_norm": 266.6343994140625, + "learning_rate": 6.830882137013839e-06, + "loss": 25.9055, + "step": 221300 + }, + { + "epoch": 0.4470602019255243, + "grad_norm": 550.8693237304688, + "learning_rate": 6.830557310915484e-06, + "loss": 39.7968, + "step": 221310 + }, + { + "epoch": 0.44708040255820813, + "grad_norm": 444.9831237792969, + "learning_rate": 6.830232475895215e-06, + "loss": 34.9837, + "step": 221320 + }, + { + "epoch": 0.44710060319089195, + "grad_norm": 189.2568817138672, + "learning_rate": 6.829907631954618e-06, + "loss": 17.8648, + "step": 221330 + }, + { + "epoch": 0.4471208038235758, + "grad_norm": 554.810791015625, + "learning_rate": 6.829582779095269e-06, + "loss": 29.1685, + "step": 221340 + }, + { + "epoch": 0.4471410044562596, + "grad_norm": 81.38941955566406, + "learning_rate": 6.829257917318757e-06, + "loss": 16.7415, + "step": 221350 + }, + { + "epoch": 0.44716120508894336, + "grad_norm": 115.8245849609375, + "learning_rate": 6.8289330466266635e-06, + "loss": 14.2278, + "step": 221360 + }, + { + "epoch": 0.4471814057216272, + "grad_norm": 247.37832641601562, + "learning_rate": 6.828608167020572e-06, + "loss": 15.3853, + "step": 221370 + }, + { + "epoch": 0.447201606354311, + "grad_norm": 292.91973876953125, + "learning_rate": 6.828283278502067e-06, + "loss": 21.835, + "step": 221380 + }, + { + "epoch": 0.4472218069869948, + "grad_norm": 28.080339431762695, + "learning_rate": 6.827958381072729e-06, + "loss": 23.6165, + "step": 221390 + }, + { + "epoch": 0.44724200761967864, + "grad_norm": 355.7489318847656, + "learning_rate": 6.827633474734145e-06, + "loss": 19.5352, + "step": 221400 + }, + { + "epoch": 0.44726220825236246, + "grad_norm": 1030.5733642578125, + "learning_rate": 6.827308559487897e-06, + "loss": 23.4393, + "step": 221410 + }, + { + "epoch": 0.4472824088850463, + "grad_norm": 181.12928771972656, + "learning_rate": 6.826983635335569e-06, + "loss": 17.1553, + "step": 221420 + }, + { + "epoch": 0.4473026095177301, + "grad_norm": 205.83755493164062, + "learning_rate": 6.826658702278745e-06, + "loss": 13.2372, + "step": 221430 + }, + { + "epoch": 0.4473228101504139, + "grad_norm": 560.5516967773438, + "learning_rate": 6.826333760319006e-06, + "loss": 26.3961, + "step": 221440 + }, + { + "epoch": 0.44734301078309774, + "grad_norm": 43.00218200683594, + "learning_rate": 6.82600880945794e-06, + "loss": 29.598, + "step": 221450 + }, + { + "epoch": 0.44736321141578156, + "grad_norm": 88.67811584472656, + "learning_rate": 6.825683849697127e-06, + "loss": 19.7466, + "step": 221460 + }, + { + "epoch": 0.4473834120484654, + "grad_norm": 367.19464111328125, + "learning_rate": 6.825358881038153e-06, + "loss": 14.5548, + "step": 221470 + }, + { + "epoch": 0.4474036126811492, + "grad_norm": 220.63648986816406, + "learning_rate": 6.825033903482601e-06, + "loss": 30.0329, + "step": 221480 + }, + { + "epoch": 0.44742381331383296, + "grad_norm": 340.2915344238281, + "learning_rate": 6.824708917032056e-06, + "loss": 17.1634, + "step": 221490 + }, + { + "epoch": 0.4474440139465168, + "grad_norm": 290.98419189453125, + "learning_rate": 6.824383921688098e-06, + "loss": 12.8677, + "step": 221500 + }, + { + "epoch": 0.4474642145792006, + "grad_norm": 252.28880310058594, + "learning_rate": 6.824058917452318e-06, + "loss": 21.9886, + "step": 221510 + }, + { + "epoch": 0.4474844152118844, + "grad_norm": 209.39837646484375, + "learning_rate": 6.823733904326293e-06, + "loss": 15.2154, + "step": 221520 + }, + { + "epoch": 0.44750461584456824, + "grad_norm": 715.6478271484375, + "learning_rate": 6.823408882311612e-06, + "loss": 26.3106, + "step": 221530 + }, + { + "epoch": 0.44752481647725206, + "grad_norm": 783.3381958007812, + "learning_rate": 6.823083851409857e-06, + "loss": 23.6285, + "step": 221540 + }, + { + "epoch": 0.4475450171099359, + "grad_norm": 1062.1817626953125, + "learning_rate": 6.822758811622611e-06, + "loss": 32.6851, + "step": 221550 + }, + { + "epoch": 0.4475652177426197, + "grad_norm": 37.7314338684082, + "learning_rate": 6.8224337629514615e-06, + "loss": 13.1089, + "step": 221560 + }, + { + "epoch": 0.4475854183753035, + "grad_norm": 552.5439453125, + "learning_rate": 6.8221087053979894e-06, + "loss": 11.862, + "step": 221570 + }, + { + "epoch": 0.44760561900798734, + "grad_norm": 888.235107421875, + "learning_rate": 6.821783638963782e-06, + "loss": 35.1384, + "step": 221580 + }, + { + "epoch": 0.44762581964067116, + "grad_norm": 267.38714599609375, + "learning_rate": 6.82145856365042e-06, + "loss": 13.1128, + "step": 221590 + }, + { + "epoch": 0.447646020273355, + "grad_norm": 205.28140258789062, + "learning_rate": 6.821133479459492e-06, + "loss": 15.3797, + "step": 221600 + }, + { + "epoch": 0.4476662209060388, + "grad_norm": 265.0107421875, + "learning_rate": 6.820808386392579e-06, + "loss": 10.0269, + "step": 221610 + }, + { + "epoch": 0.44768642153872257, + "grad_norm": 30.379993438720703, + "learning_rate": 6.820483284451267e-06, + "loss": 23.4607, + "step": 221620 + }, + { + "epoch": 0.4477066221714064, + "grad_norm": 330.9953308105469, + "learning_rate": 6.820158173637142e-06, + "loss": 19.6689, + "step": 221630 + }, + { + "epoch": 0.4477268228040902, + "grad_norm": 201.62255859375, + "learning_rate": 6.819833053951783e-06, + "loss": 19.4625, + "step": 221640 + }, + { + "epoch": 0.447747023436774, + "grad_norm": 118.86680603027344, + "learning_rate": 6.819507925396782e-06, + "loss": 13.1599, + "step": 221650 + }, + { + "epoch": 0.44776722406945785, + "grad_norm": 616.4024658203125, + "learning_rate": 6.819182787973717e-06, + "loss": 21.929, + "step": 221660 + }, + { + "epoch": 0.44778742470214167, + "grad_norm": 148.64752197265625, + "learning_rate": 6.818857641684179e-06, + "loss": 22.8448, + "step": 221670 + }, + { + "epoch": 0.4478076253348255, + "grad_norm": 361.9415588378906, + "learning_rate": 6.8185324865297475e-06, + "loss": 18.2555, + "step": 221680 + }, + { + "epoch": 0.4478278259675093, + "grad_norm": 137.9051513671875, + "learning_rate": 6.81820732251201e-06, + "loss": 11.5808, + "step": 221690 + }, + { + "epoch": 0.4478480266001931, + "grad_norm": 661.150634765625, + "learning_rate": 6.81788214963255e-06, + "loss": 29.458, + "step": 221700 + }, + { + "epoch": 0.44786822723287695, + "grad_norm": 193.84556579589844, + "learning_rate": 6.817556967892953e-06, + "loss": 12.4532, + "step": 221710 + }, + { + "epoch": 0.44788842786556077, + "grad_norm": 1289.806884765625, + "learning_rate": 6.817231777294804e-06, + "loss": 30.7461, + "step": 221720 + }, + { + "epoch": 0.4479086284982446, + "grad_norm": 181.6709442138672, + "learning_rate": 6.816906577839688e-06, + "loss": 15.6548, + "step": 221730 + }, + { + "epoch": 0.4479288291309284, + "grad_norm": 422.0000305175781, + "learning_rate": 6.816581369529189e-06, + "loss": 27.0599, + "step": 221740 + }, + { + "epoch": 0.44794902976361217, + "grad_norm": 529.2969970703125, + "learning_rate": 6.816256152364893e-06, + "loss": 17.3799, + "step": 221750 + }, + { + "epoch": 0.447969230396296, + "grad_norm": 374.6127624511719, + "learning_rate": 6.815930926348384e-06, + "loss": 21.8464, + "step": 221760 + }, + { + "epoch": 0.4479894310289798, + "grad_norm": 108.05406188964844, + "learning_rate": 6.8156056914812486e-06, + "loss": 15.8189, + "step": 221770 + }, + { + "epoch": 0.44800963166166363, + "grad_norm": 197.88031005859375, + "learning_rate": 6.815280447765073e-06, + "loss": 9.593, + "step": 221780 + }, + { + "epoch": 0.44802983229434745, + "grad_norm": 366.7749328613281, + "learning_rate": 6.814955195201438e-06, + "loss": 21.9934, + "step": 221790 + }, + { + "epoch": 0.44805003292703127, + "grad_norm": 324.75244140625, + "learning_rate": 6.814629933791932e-06, + "loss": 15.2373, + "step": 221800 + }, + { + "epoch": 0.4480702335597151, + "grad_norm": 403.6629943847656, + "learning_rate": 6.814304663538142e-06, + "loss": 14.6087, + "step": 221810 + }, + { + "epoch": 0.4480904341923989, + "grad_norm": 423.86431884765625, + "learning_rate": 6.813979384441648e-06, + "loss": 31.6844, + "step": 221820 + }, + { + "epoch": 0.44811063482508273, + "grad_norm": 2697.802734375, + "learning_rate": 6.813654096504041e-06, + "loss": 27.9102, + "step": 221830 + }, + { + "epoch": 0.44813083545776655, + "grad_norm": 500.7284240722656, + "learning_rate": 6.813328799726901e-06, + "loss": 29.9251, + "step": 221840 + }, + { + "epoch": 0.44815103609045037, + "grad_norm": 699.9358520507812, + "learning_rate": 6.8130034941118185e-06, + "loss": 38.8371, + "step": 221850 + }, + { + "epoch": 0.4481712367231342, + "grad_norm": 858.3467407226562, + "learning_rate": 6.812678179660377e-06, + "loss": 33.7234, + "step": 221860 + }, + { + "epoch": 0.44819143735581796, + "grad_norm": 297.064453125, + "learning_rate": 6.812352856374162e-06, + "loss": 23.5032, + "step": 221870 + }, + { + "epoch": 0.4482116379885018, + "grad_norm": 71.46343231201172, + "learning_rate": 6.812027524254758e-06, + "loss": 19.5076, + "step": 221880 + }, + { + "epoch": 0.4482318386211856, + "grad_norm": 15.395904541015625, + "learning_rate": 6.8117021833037514e-06, + "loss": 21.745, + "step": 221890 + }, + { + "epoch": 0.4482520392538694, + "grad_norm": 252.263671875, + "learning_rate": 6.811376833522729e-06, + "loss": 21.7261, + "step": 221900 + }, + { + "epoch": 0.44827223988655324, + "grad_norm": 505.7231140136719, + "learning_rate": 6.811051474913275e-06, + "loss": 19.5103, + "step": 221910 + }, + { + "epoch": 0.44829244051923706, + "grad_norm": 532.5568237304688, + "learning_rate": 6.810726107476977e-06, + "loss": 20.0245, + "step": 221920 + }, + { + "epoch": 0.4483126411519209, + "grad_norm": 585.125244140625, + "learning_rate": 6.8104007312154185e-06, + "loss": 15.5491, + "step": 221930 + }, + { + "epoch": 0.4483328417846047, + "grad_norm": 240.69189453125, + "learning_rate": 6.810075346130187e-06, + "loss": 21.6393, + "step": 221940 + }, + { + "epoch": 0.4483530424172885, + "grad_norm": 363.57904052734375, + "learning_rate": 6.809749952222867e-06, + "loss": 15.7399, + "step": 221950 + }, + { + "epoch": 0.44837324304997234, + "grad_norm": 270.8522033691406, + "learning_rate": 6.809424549495045e-06, + "loss": 15.6743, + "step": 221960 + }, + { + "epoch": 0.44839344368265616, + "grad_norm": 632.1456909179688, + "learning_rate": 6.809099137948309e-06, + "loss": 19.7913, + "step": 221970 + }, + { + "epoch": 0.44841364431534, + "grad_norm": 312.0612487792969, + "learning_rate": 6.80877371758424e-06, + "loss": 24.0879, + "step": 221980 + }, + { + "epoch": 0.4484338449480238, + "grad_norm": 1596.5499267578125, + "learning_rate": 6.808448288404431e-06, + "loss": 33.8926, + "step": 221990 + }, + { + "epoch": 0.44845404558070756, + "grad_norm": 172.30804443359375, + "learning_rate": 6.808122850410461e-06, + "loss": 29.8875, + "step": 222000 + }, + { + "epoch": 0.4484742462133914, + "grad_norm": 106.3112564086914, + "learning_rate": 6.807797403603923e-06, + "loss": 23.2489, + "step": 222010 + }, + { + "epoch": 0.4484944468460752, + "grad_norm": 206.79513549804688, + "learning_rate": 6.8074719479863974e-06, + "loss": 41.5476, + "step": 222020 + }, + { + "epoch": 0.448514647478759, + "grad_norm": 358.0508728027344, + "learning_rate": 6.8071464835594735e-06, + "loss": 17.2828, + "step": 222030 + }, + { + "epoch": 0.44853484811144284, + "grad_norm": 276.592529296875, + "learning_rate": 6.806821010324738e-06, + "loss": 30.7488, + "step": 222040 + }, + { + "epoch": 0.44855504874412666, + "grad_norm": 282.1035461425781, + "learning_rate": 6.806495528283772e-06, + "loss": 40.0614, + "step": 222050 + }, + { + "epoch": 0.4485752493768105, + "grad_norm": 157.2418670654297, + "learning_rate": 6.80617003743817e-06, + "loss": 11.9531, + "step": 222060 + }, + { + "epoch": 0.4485954500094943, + "grad_norm": 390.024169921875, + "learning_rate": 6.805844537789512e-06, + "loss": 15.1986, + "step": 222070 + }, + { + "epoch": 0.4486156506421781, + "grad_norm": 398.4321594238281, + "learning_rate": 6.805519029339388e-06, + "loss": 16.9989, + "step": 222080 + }, + { + "epoch": 0.44863585127486194, + "grad_norm": 608.80712890625, + "learning_rate": 6.805193512089383e-06, + "loss": 20.4634, + "step": 222090 + }, + { + "epoch": 0.44865605190754576, + "grad_norm": 582.0148315429688, + "learning_rate": 6.804867986041084e-06, + "loss": 18.6298, + "step": 222100 + }, + { + "epoch": 0.4486762525402296, + "grad_norm": 667.9501342773438, + "learning_rate": 6.804542451196075e-06, + "loss": 20.4488, + "step": 222110 + }, + { + "epoch": 0.4486964531729134, + "grad_norm": 121.29228973388672, + "learning_rate": 6.804216907555948e-06, + "loss": 17.3846, + "step": 222120 + }, + { + "epoch": 0.44871665380559717, + "grad_norm": 431.90704345703125, + "learning_rate": 6.8038913551222864e-06, + "loss": 36.2312, + "step": 222130 + }, + { + "epoch": 0.448736854438281, + "grad_norm": 271.4874572753906, + "learning_rate": 6.803565793896676e-06, + "loss": 19.1641, + "step": 222140 + }, + { + "epoch": 0.4487570550709648, + "grad_norm": 335.38275146484375, + "learning_rate": 6.803240223880705e-06, + "loss": 24.8926, + "step": 222150 + }, + { + "epoch": 0.4487772557036486, + "grad_norm": 389.165283203125, + "learning_rate": 6.802914645075959e-06, + "loss": 10.8841, + "step": 222160 + }, + { + "epoch": 0.44879745633633245, + "grad_norm": 753.0094604492188, + "learning_rate": 6.802589057484027e-06, + "loss": 22.3376, + "step": 222170 + }, + { + "epoch": 0.44881765696901627, + "grad_norm": 184.64968872070312, + "learning_rate": 6.8022634611064945e-06, + "loss": 18.6461, + "step": 222180 + }, + { + "epoch": 0.4488378576017001, + "grad_norm": 349.143310546875, + "learning_rate": 6.801937855944946e-06, + "loss": 13.9254, + "step": 222190 + }, + { + "epoch": 0.4488580582343839, + "grad_norm": 148.9762725830078, + "learning_rate": 6.8016122420009745e-06, + "loss": 22.2679, + "step": 222200 + }, + { + "epoch": 0.4488782588670677, + "grad_norm": 294.5908508300781, + "learning_rate": 6.801286619276161e-06, + "loss": 21.1456, + "step": 222210 + }, + { + "epoch": 0.44889845949975155, + "grad_norm": 742.6663208007812, + "learning_rate": 6.800960987772096e-06, + "loss": 16.5063, + "step": 222220 + }, + { + "epoch": 0.44891866013243537, + "grad_norm": 649.3998413085938, + "learning_rate": 6.800635347490365e-06, + "loss": 22.3299, + "step": 222230 + }, + { + "epoch": 0.4489388607651192, + "grad_norm": 689.6530151367188, + "learning_rate": 6.800309698432557e-06, + "loss": 13.688, + "step": 222240 + }, + { + "epoch": 0.448959061397803, + "grad_norm": 453.74615478515625, + "learning_rate": 6.799984040600257e-06, + "loss": 19.618, + "step": 222250 + }, + { + "epoch": 0.44897926203048677, + "grad_norm": 727.8592529296875, + "learning_rate": 6.799658373995054e-06, + "loss": 17.2367, + "step": 222260 + }, + { + "epoch": 0.4489994626631706, + "grad_norm": 170.2805633544922, + "learning_rate": 6.7993326986185315e-06, + "loss": 32.9658, + "step": 222270 + }, + { + "epoch": 0.4490196632958544, + "grad_norm": 346.58978271484375, + "learning_rate": 6.799007014472283e-06, + "loss": 13.2505, + "step": 222280 + }, + { + "epoch": 0.44903986392853823, + "grad_norm": 317.31060791015625, + "learning_rate": 6.798681321557891e-06, + "loss": 10.2113, + "step": 222290 + }, + { + "epoch": 0.44906006456122205, + "grad_norm": 23.51372718811035, + "learning_rate": 6.798355619876944e-06, + "loss": 9.2266, + "step": 222300 + }, + { + "epoch": 0.44908026519390587, + "grad_norm": 1001.768798828125, + "learning_rate": 6.798029909431031e-06, + "loss": 20.929, + "step": 222310 + }, + { + "epoch": 0.4491004658265897, + "grad_norm": 255.8820037841797, + "learning_rate": 6.797704190221737e-06, + "loss": 27.7979, + "step": 222320 + }, + { + "epoch": 0.4491206664592735, + "grad_norm": 380.12359619140625, + "learning_rate": 6.797378462250653e-06, + "loss": 25.8956, + "step": 222330 + }, + { + "epoch": 0.44914086709195733, + "grad_norm": 352.653564453125, + "learning_rate": 6.797052725519362e-06, + "loss": 15.2067, + "step": 222340 + }, + { + "epoch": 0.44916106772464115, + "grad_norm": 254.15542602539062, + "learning_rate": 6.796726980029454e-06, + "loss": 13.2863, + "step": 222350 + }, + { + "epoch": 0.44918126835732497, + "grad_norm": 330.99774169921875, + "learning_rate": 6.796401225782517e-06, + "loss": 13.3235, + "step": 222360 + }, + { + "epoch": 0.4492014689900088, + "grad_norm": 496.7027282714844, + "learning_rate": 6.796075462780139e-06, + "loss": 19.9533, + "step": 222370 + }, + { + "epoch": 0.4492216696226926, + "grad_norm": 85.53564453125, + "learning_rate": 6.7957496910239075e-06, + "loss": 23.17, + "step": 222380 + }, + { + "epoch": 0.4492418702553764, + "grad_norm": 216.68374633789062, + "learning_rate": 6.7954239105154084e-06, + "loss": 24.8749, + "step": 222390 + }, + { + "epoch": 0.4492620708880602, + "grad_norm": 365.98907470703125, + "learning_rate": 6.7950981212562315e-06, + "loss": 16.9925, + "step": 222400 + }, + { + "epoch": 0.449282271520744, + "grad_norm": 433.2200622558594, + "learning_rate": 6.794772323247965e-06, + "loss": 14.6022, + "step": 222410 + }, + { + "epoch": 0.44930247215342783, + "grad_norm": 888.5516967773438, + "learning_rate": 6.794446516492195e-06, + "loss": 26.1488, + "step": 222420 + }, + { + "epoch": 0.44932267278611165, + "grad_norm": 703.0285034179688, + "learning_rate": 6.794120700990509e-06, + "loss": 14.7055, + "step": 222430 + }, + { + "epoch": 0.4493428734187955, + "grad_norm": 9.99472713470459, + "learning_rate": 6.793794876744499e-06, + "loss": 13.5452, + "step": 222440 + }, + { + "epoch": 0.4493630740514793, + "grad_norm": 421.5027770996094, + "learning_rate": 6.793469043755747e-06, + "loss": 40.9224, + "step": 222450 + }, + { + "epoch": 0.4493832746841631, + "grad_norm": 555.3636474609375, + "learning_rate": 6.793143202025848e-06, + "loss": 23.906, + "step": 222460 + }, + { + "epoch": 0.44940347531684693, + "grad_norm": 242.57211303710938, + "learning_rate": 6.792817351556384e-06, + "loss": 13.0388, + "step": 222470 + }, + { + "epoch": 0.44942367594953075, + "grad_norm": 1042.263427734375, + "learning_rate": 6.792491492348947e-06, + "loss": 15.2862, + "step": 222480 + }, + { + "epoch": 0.4494438765822146, + "grad_norm": 423.35992431640625, + "learning_rate": 6.792165624405124e-06, + "loss": 20.0324, + "step": 222490 + }, + { + "epoch": 0.4494640772148984, + "grad_norm": 726.4552612304688, + "learning_rate": 6.7918397477265e-06, + "loss": 16.9545, + "step": 222500 + }, + { + "epoch": 0.44948427784758216, + "grad_norm": 502.55914306640625, + "learning_rate": 6.791513862314672e-06, + "loss": 29.1973, + "step": 222510 + }, + { + "epoch": 0.449504478480266, + "grad_norm": 272.6805725097656, + "learning_rate": 6.791187968171219e-06, + "loss": 10.5815, + "step": 222520 + }, + { + "epoch": 0.4495246791129498, + "grad_norm": 459.9281005859375, + "learning_rate": 6.790862065297733e-06, + "loss": 21.6328, + "step": 222530 + }, + { + "epoch": 0.4495448797456336, + "grad_norm": 335.8197021484375, + "learning_rate": 6.7905361536958035e-06, + "loss": 17.3794, + "step": 222540 + }, + { + "epoch": 0.44956508037831744, + "grad_norm": 535.16259765625, + "learning_rate": 6.7902102333670185e-06, + "loss": 8.7648, + "step": 222550 + }, + { + "epoch": 0.44958528101100126, + "grad_norm": 263.1183166503906, + "learning_rate": 6.789884304312965e-06, + "loss": 17.3051, + "step": 222560 + }, + { + "epoch": 0.4496054816436851, + "grad_norm": 445.25994873046875, + "learning_rate": 6.789558366535232e-06, + "loss": 16.7604, + "step": 222570 + }, + { + "epoch": 0.4496256822763689, + "grad_norm": 459.8729553222656, + "learning_rate": 6.78923242003541e-06, + "loss": 14.4448, + "step": 222580 + }, + { + "epoch": 0.4496458829090527, + "grad_norm": 62.0632438659668, + "learning_rate": 6.788906464815085e-06, + "loss": 26.4532, + "step": 222590 + }, + { + "epoch": 0.44966608354173654, + "grad_norm": 547.4466552734375, + "learning_rate": 6.788580500875848e-06, + "loss": 28.4726, + "step": 222600 + }, + { + "epoch": 0.44968628417442036, + "grad_norm": 6680.791015625, + "learning_rate": 6.788254528219285e-06, + "loss": 76.5322, + "step": 222610 + }, + { + "epoch": 0.4497064848071042, + "grad_norm": 291.7486877441406, + "learning_rate": 6.787928546846987e-06, + "loss": 18.212, + "step": 222620 + }, + { + "epoch": 0.449726685439788, + "grad_norm": 520.095947265625, + "learning_rate": 6.787602556760542e-06, + "loss": 28.5444, + "step": 222630 + }, + { + "epoch": 0.44974688607247176, + "grad_norm": 343.7140197753906, + "learning_rate": 6.78727655796154e-06, + "loss": 31.3997, + "step": 222640 + }, + { + "epoch": 0.4497670867051556, + "grad_norm": 850.1187133789062, + "learning_rate": 6.786950550451568e-06, + "loss": 23.2061, + "step": 222650 + }, + { + "epoch": 0.4497872873378394, + "grad_norm": 671.8289184570312, + "learning_rate": 6.786624534232215e-06, + "loss": 21.8656, + "step": 222660 + }, + { + "epoch": 0.4498074879705232, + "grad_norm": 340.3204650878906, + "learning_rate": 6.786298509305072e-06, + "loss": 27.215, + "step": 222670 + }, + { + "epoch": 0.44982768860320704, + "grad_norm": 291.07452392578125, + "learning_rate": 6.785972475671726e-06, + "loss": 16.2252, + "step": 222680 + }, + { + "epoch": 0.44984788923589086, + "grad_norm": 305.812255859375, + "learning_rate": 6.785646433333767e-06, + "loss": 25.1343, + "step": 222690 + }, + { + "epoch": 0.4498680898685747, + "grad_norm": 67.56078338623047, + "learning_rate": 6.785320382292783e-06, + "loss": 12.049, + "step": 222700 + }, + { + "epoch": 0.4498882905012585, + "grad_norm": 29.37464714050293, + "learning_rate": 6.784994322550367e-06, + "loss": 17.6114, + "step": 222710 + }, + { + "epoch": 0.4499084911339423, + "grad_norm": 37.972652435302734, + "learning_rate": 6.7846682541081024e-06, + "loss": 22.8585, + "step": 222720 + }, + { + "epoch": 0.44992869176662614, + "grad_norm": 216.0361328125, + "learning_rate": 6.784342176967581e-06, + "loss": 9.1523, + "step": 222730 + }, + { + "epoch": 0.44994889239930996, + "grad_norm": 780.3121337890625, + "learning_rate": 6.784016091130393e-06, + "loss": 32.2775, + "step": 222740 + }, + { + "epoch": 0.4499690930319938, + "grad_norm": 1125.4534912109375, + "learning_rate": 6.783689996598126e-06, + "loss": 25.3061, + "step": 222750 + }, + { + "epoch": 0.4499892936646776, + "grad_norm": 264.4049987792969, + "learning_rate": 6.783363893372372e-06, + "loss": 23.4717, + "step": 222760 + }, + { + "epoch": 0.45000949429736137, + "grad_norm": 358.88214111328125, + "learning_rate": 6.783037781454718e-06, + "loss": 20.7717, + "step": 222770 + }, + { + "epoch": 0.4500296949300452, + "grad_norm": 525.8593139648438, + "learning_rate": 6.782711660846755e-06, + "loss": 12.3507, + "step": 222780 + }, + { + "epoch": 0.450049895562729, + "grad_norm": 816.92236328125, + "learning_rate": 6.78238553155007e-06, + "loss": 29.601, + "step": 222790 + }, + { + "epoch": 0.4500700961954128, + "grad_norm": 49.18048858642578, + "learning_rate": 6.782059393566254e-06, + "loss": 17.1982, + "step": 222800 + }, + { + "epoch": 0.45009029682809665, + "grad_norm": 210.97425842285156, + "learning_rate": 6.781733246896898e-06, + "loss": 20.1037, + "step": 222810 + }, + { + "epoch": 0.45011049746078047, + "grad_norm": 1031.457763671875, + "learning_rate": 6.781407091543589e-06, + "loss": 28.7679, + "step": 222820 + }, + { + "epoch": 0.4501306980934643, + "grad_norm": 925.4967041015625, + "learning_rate": 6.781080927507919e-06, + "loss": 12.5863, + "step": 222830 + }, + { + "epoch": 0.4501508987261481, + "grad_norm": 544.8528442382812, + "learning_rate": 6.780754754791476e-06, + "loss": 19.6443, + "step": 222840 + }, + { + "epoch": 0.4501710993588319, + "grad_norm": 33.40279769897461, + "learning_rate": 6.7804285733958495e-06, + "loss": 32.5941, + "step": 222850 + }, + { + "epoch": 0.45019129999151575, + "grad_norm": 529.589599609375, + "learning_rate": 6.780102383322631e-06, + "loss": 29.3027, + "step": 222860 + }, + { + "epoch": 0.45021150062419957, + "grad_norm": 750.5447998046875, + "learning_rate": 6.7797761845734115e-06, + "loss": 19.2539, + "step": 222870 + }, + { + "epoch": 0.4502317012568834, + "grad_norm": 384.02264404296875, + "learning_rate": 6.779449977149774e-06, + "loss": 9.5737, + "step": 222880 + }, + { + "epoch": 0.4502519018895672, + "grad_norm": 490.29400634765625, + "learning_rate": 6.779123761053317e-06, + "loss": 19.3089, + "step": 222890 + }, + { + "epoch": 0.45027210252225097, + "grad_norm": 31.462303161621094, + "learning_rate": 6.778797536285625e-06, + "loss": 13.2727, + "step": 222900 + }, + { + "epoch": 0.4502923031549348, + "grad_norm": 362.80755615234375, + "learning_rate": 6.778471302848291e-06, + "loss": 11.4522, + "step": 222910 + }, + { + "epoch": 0.4503125037876186, + "grad_norm": 336.7137756347656, + "learning_rate": 6.778145060742902e-06, + "loss": 21.5893, + "step": 222920 + }, + { + "epoch": 0.45033270442030243, + "grad_norm": 529.6342163085938, + "learning_rate": 6.777818809971048e-06, + "loss": 22.9397, + "step": 222930 + }, + { + "epoch": 0.45035290505298625, + "grad_norm": 222.5026397705078, + "learning_rate": 6.777492550534325e-06, + "loss": 22.5689, + "step": 222940 + }, + { + "epoch": 0.45037310568567007, + "grad_norm": 647.3722534179688, + "learning_rate": 6.777166282434316e-06, + "loss": 21.2208, + "step": 222950 + }, + { + "epoch": 0.4503933063183539, + "grad_norm": 389.1189880371094, + "learning_rate": 6.776840005672615e-06, + "loss": 12.6924, + "step": 222960 + }, + { + "epoch": 0.4504135069510377, + "grad_norm": 128.2410888671875, + "learning_rate": 6.77651372025081e-06, + "loss": 15.6193, + "step": 222970 + }, + { + "epoch": 0.45043370758372153, + "grad_norm": 279.0683898925781, + "learning_rate": 6.776187426170494e-06, + "loss": 21.5215, + "step": 222980 + }, + { + "epoch": 0.45045390821640535, + "grad_norm": 1050.976806640625, + "learning_rate": 6.775861123433256e-06, + "loss": 28.3963, + "step": 222990 + }, + { + "epoch": 0.45047410884908917, + "grad_norm": 435.2922668457031, + "learning_rate": 6.775534812040686e-06, + "loss": 13.145, + "step": 223000 + }, + { + "epoch": 0.450494309481773, + "grad_norm": 228.21713256835938, + "learning_rate": 6.775208491994375e-06, + "loss": 16.705, + "step": 223010 + }, + { + "epoch": 0.4505145101144568, + "grad_norm": 339.0024108886719, + "learning_rate": 6.7748821632959126e-06, + "loss": 17.5873, + "step": 223020 + }, + { + "epoch": 0.4505347107471406, + "grad_norm": 99.26470947265625, + "learning_rate": 6.774555825946889e-06, + "loss": 25.5734, + "step": 223030 + }, + { + "epoch": 0.4505549113798244, + "grad_norm": 343.356689453125, + "learning_rate": 6.7742294799488965e-06, + "loss": 22.6062, + "step": 223040 + }, + { + "epoch": 0.4505751120125082, + "grad_norm": 416.54144287109375, + "learning_rate": 6.773903125303525e-06, + "loss": 28.9425, + "step": 223050 + }, + { + "epoch": 0.45059531264519204, + "grad_norm": 243.22572326660156, + "learning_rate": 6.773576762012365e-06, + "loss": 33.0823, + "step": 223060 + }, + { + "epoch": 0.45061551327787586, + "grad_norm": 371.2916564941406, + "learning_rate": 6.773250390077006e-06, + "loss": 12.2203, + "step": 223070 + }, + { + "epoch": 0.4506357139105597, + "grad_norm": 217.40724182128906, + "learning_rate": 6.77292400949904e-06, + "loss": 19.6364, + "step": 223080 + }, + { + "epoch": 0.4506559145432435, + "grad_norm": 237.15167236328125, + "learning_rate": 6.772597620280057e-06, + "loss": 16.4248, + "step": 223090 + }, + { + "epoch": 0.4506761151759273, + "grad_norm": 509.1220397949219, + "learning_rate": 6.772271222421649e-06, + "loss": 26.5038, + "step": 223100 + }, + { + "epoch": 0.45069631580861114, + "grad_norm": 391.43890380859375, + "learning_rate": 6.771944815925405e-06, + "loss": 16.6508, + "step": 223110 + }, + { + "epoch": 0.45071651644129496, + "grad_norm": 521.5532836914062, + "learning_rate": 6.771618400792919e-06, + "loss": 22.183, + "step": 223120 + }, + { + "epoch": 0.4507367170739788, + "grad_norm": 277.815185546875, + "learning_rate": 6.771291977025778e-06, + "loss": 15.3895, + "step": 223130 + }, + { + "epoch": 0.4507569177066626, + "grad_norm": 247.5813751220703, + "learning_rate": 6.770965544625574e-06, + "loss": 18.2332, + "step": 223140 + }, + { + "epoch": 0.45077711833934636, + "grad_norm": 1147.00390625, + "learning_rate": 6.7706391035939e-06, + "loss": 39.6065, + "step": 223150 + }, + { + "epoch": 0.4507973189720302, + "grad_norm": 293.8579406738281, + "learning_rate": 6.770312653932346e-06, + "loss": 22.2251, + "step": 223160 + }, + { + "epoch": 0.450817519604714, + "grad_norm": 230.84765625, + "learning_rate": 6.769986195642503e-06, + "loss": 16.5617, + "step": 223170 + }, + { + "epoch": 0.4508377202373978, + "grad_norm": 288.72161865234375, + "learning_rate": 6.76965972872596e-06, + "loss": 12.05, + "step": 223180 + }, + { + "epoch": 0.45085792087008164, + "grad_norm": 717.0557861328125, + "learning_rate": 6.769333253184312e-06, + "loss": 10.6119, + "step": 223190 + }, + { + "epoch": 0.45087812150276546, + "grad_norm": 377.36029052734375, + "learning_rate": 6.769006769019147e-06, + "loss": 16.843, + "step": 223200 + }, + { + "epoch": 0.4508983221354493, + "grad_norm": 542.556640625, + "learning_rate": 6.76868027623206e-06, + "loss": 16.8681, + "step": 223210 + }, + { + "epoch": 0.4509185227681331, + "grad_norm": 870.3871459960938, + "learning_rate": 6.768353774824636e-06, + "loss": 12.3779, + "step": 223220 + }, + { + "epoch": 0.4509387234008169, + "grad_norm": 58.22195816040039, + "learning_rate": 6.7680272647984734e-06, + "loss": 14.6801, + "step": 223230 + }, + { + "epoch": 0.45095892403350074, + "grad_norm": 460.4647521972656, + "learning_rate": 6.767700746155159e-06, + "loss": 26.8794, + "step": 223240 + }, + { + "epoch": 0.45097912466618456, + "grad_norm": 291.2055969238281, + "learning_rate": 6.767374218896286e-06, + "loss": 19.5938, + "step": 223250 + }, + { + "epoch": 0.4509993252988684, + "grad_norm": 308.76104736328125, + "learning_rate": 6.767047683023447e-06, + "loss": 30.2626, + "step": 223260 + }, + { + "epoch": 0.4510195259315522, + "grad_norm": 183.4350128173828, + "learning_rate": 6.766721138538228e-06, + "loss": 13.9079, + "step": 223270 + }, + { + "epoch": 0.45103972656423597, + "grad_norm": 255.0755157470703, + "learning_rate": 6.766394585442228e-06, + "loss": 12.5789, + "step": 223280 + }, + { + "epoch": 0.4510599271969198, + "grad_norm": 522.11181640625, + "learning_rate": 6.766068023737034e-06, + "loss": 31.9759, + "step": 223290 + }, + { + "epoch": 0.4510801278296036, + "grad_norm": 246.31863403320312, + "learning_rate": 6.765741453424237e-06, + "loss": 17.7365, + "step": 223300 + }, + { + "epoch": 0.4511003284622874, + "grad_norm": 165.8690948486328, + "learning_rate": 6.765414874505431e-06, + "loss": 17.5843, + "step": 223310 + }, + { + "epoch": 0.45112052909497125, + "grad_norm": 350.4022216796875, + "learning_rate": 6.765088286982209e-06, + "loss": 47.494, + "step": 223320 + }, + { + "epoch": 0.45114072972765507, + "grad_norm": 133.50668334960938, + "learning_rate": 6.7647616908561595e-06, + "loss": 13.5222, + "step": 223330 + }, + { + "epoch": 0.4511609303603389, + "grad_norm": 479.4945373535156, + "learning_rate": 6.764435086128876e-06, + "loss": 13.6611, + "step": 223340 + }, + { + "epoch": 0.4511811309930227, + "grad_norm": 202.6797332763672, + "learning_rate": 6.764108472801949e-06, + "loss": 10.2501, + "step": 223350 + }, + { + "epoch": 0.4512013316257065, + "grad_norm": 836.3070678710938, + "learning_rate": 6.763781850876972e-06, + "loss": 11.0629, + "step": 223360 + }, + { + "epoch": 0.45122153225839035, + "grad_norm": 213.8543243408203, + "learning_rate": 6.763455220355536e-06, + "loss": 23.6617, + "step": 223370 + }, + { + "epoch": 0.45124173289107417, + "grad_norm": 1030.3858642578125, + "learning_rate": 6.763128581239231e-06, + "loss": 40.2191, + "step": 223380 + }, + { + "epoch": 0.451261933523758, + "grad_norm": 424.92510986328125, + "learning_rate": 6.762801933529655e-06, + "loss": 13.2672, + "step": 223390 + }, + { + "epoch": 0.4512821341564418, + "grad_norm": 393.67071533203125, + "learning_rate": 6.762475277228393e-06, + "loss": 15.9861, + "step": 223400 + }, + { + "epoch": 0.45130233478912557, + "grad_norm": 439.38031005859375, + "learning_rate": 6.762148612337042e-06, + "loss": 27.0009, + "step": 223410 + }, + { + "epoch": 0.4513225354218094, + "grad_norm": 544.7018432617188, + "learning_rate": 6.761821938857191e-06, + "loss": 14.2156, + "step": 223420 + }, + { + "epoch": 0.4513427360544932, + "grad_norm": 557.1956787109375, + "learning_rate": 6.761495256790434e-06, + "loss": 15.6513, + "step": 223430 + }, + { + "epoch": 0.45136293668717703, + "grad_norm": 203.35861206054688, + "learning_rate": 6.761168566138366e-06, + "loss": 15.0615, + "step": 223440 + }, + { + "epoch": 0.45138313731986085, + "grad_norm": 349.0697326660156, + "learning_rate": 6.760841866902572e-06, + "loss": 49.9705, + "step": 223450 + }, + { + "epoch": 0.45140333795254467, + "grad_norm": 235.16610717773438, + "learning_rate": 6.7605151590846494e-06, + "loss": 19.9045, + "step": 223460 + }, + { + "epoch": 0.4514235385852285, + "grad_norm": 417.69879150390625, + "learning_rate": 6.760188442686189e-06, + "loss": 19.2494, + "step": 223470 + }, + { + "epoch": 0.4514437392179123, + "grad_norm": 127.5810775756836, + "learning_rate": 6.759861717708785e-06, + "loss": 10.0971, + "step": 223480 + }, + { + "epoch": 0.45146393985059613, + "grad_norm": 486.7557373046875, + "learning_rate": 6.759534984154027e-06, + "loss": 15.5324, + "step": 223490 + }, + { + "epoch": 0.45148414048327995, + "grad_norm": 117.85852813720703, + "learning_rate": 6.759208242023509e-06, + "loss": 14.1374, + "step": 223500 + }, + { + "epoch": 0.45150434111596377, + "grad_norm": 442.956298828125, + "learning_rate": 6.758881491318825e-06, + "loss": 14.8639, + "step": 223510 + }, + { + "epoch": 0.4515245417486476, + "grad_norm": 617.3826904296875, + "learning_rate": 6.758554732041564e-06, + "loss": 19.7456, + "step": 223520 + }, + { + "epoch": 0.4515447423813314, + "grad_norm": 732.301025390625, + "learning_rate": 6.758227964193323e-06, + "loss": 19.6029, + "step": 223530 + }, + { + "epoch": 0.4515649430140152, + "grad_norm": 489.2425231933594, + "learning_rate": 6.757901187775689e-06, + "loss": 12.1993, + "step": 223540 + }, + { + "epoch": 0.451585143646699, + "grad_norm": 610.5255737304688, + "learning_rate": 6.75757440279026e-06, + "loss": 17.4517, + "step": 223550 + }, + { + "epoch": 0.4516053442793828, + "grad_norm": 476.91864013671875, + "learning_rate": 6.757247609238625e-06, + "loss": 10.3758, + "step": 223560 + }, + { + "epoch": 0.45162554491206663, + "grad_norm": 163.54229736328125, + "learning_rate": 6.75692080712238e-06, + "loss": 15.9277, + "step": 223570 + }, + { + "epoch": 0.45164574554475045, + "grad_norm": 721.86865234375, + "learning_rate": 6.756593996443115e-06, + "loss": 23.1408, + "step": 223580 + }, + { + "epoch": 0.4516659461774343, + "grad_norm": 372.91082763671875, + "learning_rate": 6.756267177202425e-06, + "loss": 6.0081, + "step": 223590 + }, + { + "epoch": 0.4516861468101181, + "grad_norm": 226.74749755859375, + "learning_rate": 6.755940349401901e-06, + "loss": 42.1689, + "step": 223600 + }, + { + "epoch": 0.4517063474428019, + "grad_norm": 339.08819580078125, + "learning_rate": 6.755613513043136e-06, + "loss": 21.0595, + "step": 223610 + }, + { + "epoch": 0.45172654807548573, + "grad_norm": 179.5376739501953, + "learning_rate": 6.755286668127724e-06, + "loss": 15.6352, + "step": 223620 + }, + { + "epoch": 0.45174674870816955, + "grad_norm": 1601.601806640625, + "learning_rate": 6.7549598146572584e-06, + "loss": 32.942, + "step": 223630 + }, + { + "epoch": 0.4517669493408534, + "grad_norm": 425.022216796875, + "learning_rate": 6.7546329526333305e-06, + "loss": 20.5134, + "step": 223640 + }, + { + "epoch": 0.4517871499735372, + "grad_norm": 255.98838806152344, + "learning_rate": 6.754306082057534e-06, + "loss": 20.1227, + "step": 223650 + }, + { + "epoch": 0.451807350606221, + "grad_norm": 275.3392639160156, + "learning_rate": 6.753979202931466e-06, + "loss": 13.5676, + "step": 223660 + }, + { + "epoch": 0.4518275512389048, + "grad_norm": 196.0421142578125, + "learning_rate": 6.753652315256712e-06, + "loss": 14.2024, + "step": 223670 + }, + { + "epoch": 0.4518477518715886, + "grad_norm": 374.6175842285156, + "learning_rate": 6.753325419034871e-06, + "loss": 21.9876, + "step": 223680 + }, + { + "epoch": 0.4518679525042724, + "grad_norm": 272.6880187988281, + "learning_rate": 6.752998514267534e-06, + "loss": 17.4269, + "step": 223690 + }, + { + "epoch": 0.45188815313695624, + "grad_norm": 476.31658935546875, + "learning_rate": 6.752671600956295e-06, + "loss": 26.3126, + "step": 223700 + }, + { + "epoch": 0.45190835376964006, + "grad_norm": 440.00982666015625, + "learning_rate": 6.752344679102749e-06, + "loss": 62.6821, + "step": 223710 + }, + { + "epoch": 0.4519285544023239, + "grad_norm": 282.3997497558594, + "learning_rate": 6.752017748708485e-06, + "loss": 15.0602, + "step": 223720 + }, + { + "epoch": 0.4519487550350077, + "grad_norm": 643.2767944335938, + "learning_rate": 6.7516908097751e-06, + "loss": 26.9476, + "step": 223730 + }, + { + "epoch": 0.4519689556676915, + "grad_norm": 494.6185302734375, + "learning_rate": 6.751363862304186e-06, + "loss": 16.5201, + "step": 223740 + }, + { + "epoch": 0.45198915630037534, + "grad_norm": 165.56671142578125, + "learning_rate": 6.751036906297338e-06, + "loss": 22.5521, + "step": 223750 + }, + { + "epoch": 0.45200935693305916, + "grad_norm": 467.59576416015625, + "learning_rate": 6.750709941756147e-06, + "loss": 14.1566, + "step": 223760 + }, + { + "epoch": 0.452029557565743, + "grad_norm": 461.00543212890625, + "learning_rate": 6.7503829686822095e-06, + "loss": 25.7327, + "step": 223770 + }, + { + "epoch": 0.4520497581984268, + "grad_norm": 448.5977783203125, + "learning_rate": 6.750055987077118e-06, + "loss": 21.1888, + "step": 223780 + }, + { + "epoch": 0.45206995883111056, + "grad_norm": 605.9522705078125, + "learning_rate": 6.749728996942465e-06, + "loss": 14.3863, + "step": 223790 + }, + { + "epoch": 0.4520901594637944, + "grad_norm": 416.8888854980469, + "learning_rate": 6.749401998279845e-06, + "loss": 26.7473, + "step": 223800 + }, + { + "epoch": 0.4521103600964782, + "grad_norm": 558.3778686523438, + "learning_rate": 6.749074991090852e-06, + "loss": 24.0431, + "step": 223810 + }, + { + "epoch": 0.452130560729162, + "grad_norm": 659.2818603515625, + "learning_rate": 6.74874797537708e-06, + "loss": 20.5936, + "step": 223820 + }, + { + "epoch": 0.45215076136184584, + "grad_norm": 584.5877075195312, + "learning_rate": 6.748420951140121e-06, + "loss": 36.6595, + "step": 223830 + }, + { + "epoch": 0.45217096199452966, + "grad_norm": 650.2181396484375, + "learning_rate": 6.748093918381572e-06, + "loss": 21.7985, + "step": 223840 + }, + { + "epoch": 0.4521911626272135, + "grad_norm": 249.53504943847656, + "learning_rate": 6.747766877103025e-06, + "loss": 21.3749, + "step": 223850 + }, + { + "epoch": 0.4522113632598973, + "grad_norm": 280.7706298828125, + "learning_rate": 6.7474398273060725e-06, + "loss": 14.0659, + "step": 223860 + }, + { + "epoch": 0.4522315638925811, + "grad_norm": 329.1269836425781, + "learning_rate": 6.747112768992313e-06, + "loss": 13.8137, + "step": 223870 + }, + { + "epoch": 0.45225176452526494, + "grad_norm": 1071.5306396484375, + "learning_rate": 6.7467857021633354e-06, + "loss": 34.511, + "step": 223880 + }, + { + "epoch": 0.45227196515794876, + "grad_norm": 534.2124633789062, + "learning_rate": 6.746458626820738e-06, + "loss": 20.4756, + "step": 223890 + }, + { + "epoch": 0.4522921657906326, + "grad_norm": 224.7541046142578, + "learning_rate": 6.746131542966112e-06, + "loss": 24.4721, + "step": 223900 + }, + { + "epoch": 0.4523123664233164, + "grad_norm": 214.91180419921875, + "learning_rate": 6.745804450601053e-06, + "loss": 13.8816, + "step": 223910 + }, + { + "epoch": 0.45233256705600017, + "grad_norm": 539.7750244140625, + "learning_rate": 6.745477349727154e-06, + "loss": 19.0091, + "step": 223920 + }, + { + "epoch": 0.452352767688684, + "grad_norm": 20.45575523376465, + "learning_rate": 6.74515024034601e-06, + "loss": 18.5851, + "step": 223930 + }, + { + "epoch": 0.4523729683213678, + "grad_norm": 295.1483459472656, + "learning_rate": 6.744823122459217e-06, + "loss": 11.5418, + "step": 223940 + }, + { + "epoch": 0.4523931689540516, + "grad_norm": 123.01981353759766, + "learning_rate": 6.744495996068367e-06, + "loss": 11.6932, + "step": 223950 + }, + { + "epoch": 0.45241336958673545, + "grad_norm": 325.3513488769531, + "learning_rate": 6.744168861175056e-06, + "loss": 15.107, + "step": 223960 + }, + { + "epoch": 0.45243357021941927, + "grad_norm": 332.8097839355469, + "learning_rate": 6.743841717780876e-06, + "loss": 7.7247, + "step": 223970 + }, + { + "epoch": 0.4524537708521031, + "grad_norm": 283.2469482421875, + "learning_rate": 6.743514565887424e-06, + "loss": 14.3612, + "step": 223980 + }, + { + "epoch": 0.4524739714847869, + "grad_norm": 493.7138366699219, + "learning_rate": 6.743187405496292e-06, + "loss": 28.5663, + "step": 223990 + }, + { + "epoch": 0.4524941721174707, + "grad_norm": 248.72547912597656, + "learning_rate": 6.7428602366090764e-06, + "loss": 16.5097, + "step": 224000 + }, + { + "epoch": 0.45251437275015455, + "grad_norm": 521.6533813476562, + "learning_rate": 6.742533059227372e-06, + "loss": 12.9653, + "step": 224010 + }, + { + "epoch": 0.45253457338283837, + "grad_norm": 376.3984375, + "learning_rate": 6.742205873352773e-06, + "loss": 13.4344, + "step": 224020 + }, + { + "epoch": 0.4525547740155222, + "grad_norm": 913.5752563476562, + "learning_rate": 6.741878678986873e-06, + "loss": 26.3094, + "step": 224030 + }, + { + "epoch": 0.452574974648206, + "grad_norm": 478.7392272949219, + "learning_rate": 6.741551476131269e-06, + "loss": 30.4023, + "step": 224040 + }, + { + "epoch": 0.45259517528088977, + "grad_norm": 262.20526123046875, + "learning_rate": 6.741224264787553e-06, + "loss": 23.2195, + "step": 224050 + }, + { + "epoch": 0.4526153759135736, + "grad_norm": 560.4160766601562, + "learning_rate": 6.740897044957322e-06, + "loss": 22.4177, + "step": 224060 + }, + { + "epoch": 0.4526355765462574, + "grad_norm": 435.0744323730469, + "learning_rate": 6.74056981664217e-06, + "loss": 20.3772, + "step": 224070 + }, + { + "epoch": 0.45265577717894123, + "grad_norm": 4965.35400390625, + "learning_rate": 6.740242579843691e-06, + "loss": 73.2704, + "step": 224080 + }, + { + "epoch": 0.45267597781162505, + "grad_norm": 402.28302001953125, + "learning_rate": 6.739915334563481e-06, + "loss": 28.474, + "step": 224090 + }, + { + "epoch": 0.45269617844430887, + "grad_norm": 104.3423080444336, + "learning_rate": 6.739588080803134e-06, + "loss": 12.5469, + "step": 224100 + }, + { + "epoch": 0.4527163790769927, + "grad_norm": 491.0937805175781, + "learning_rate": 6.739260818564248e-06, + "loss": 17.2166, + "step": 224110 + }, + { + "epoch": 0.4527365797096765, + "grad_norm": 181.10830688476562, + "learning_rate": 6.738933547848414e-06, + "loss": 19.3685, + "step": 224120 + }, + { + "epoch": 0.45275678034236033, + "grad_norm": 435.29534912109375, + "learning_rate": 6.7386062686572286e-06, + "loss": 22.2347, + "step": 224130 + }, + { + "epoch": 0.45277698097504415, + "grad_norm": 51.51202392578125, + "learning_rate": 6.738278980992289e-06, + "loss": 21.777, + "step": 224140 + }, + { + "epoch": 0.45279718160772797, + "grad_norm": 428.5145568847656, + "learning_rate": 6.737951684855185e-06, + "loss": 16.9964, + "step": 224150 + }, + { + "epoch": 0.4528173822404118, + "grad_norm": 715.14306640625, + "learning_rate": 6.737624380247519e-06, + "loss": 15.9501, + "step": 224160 + }, + { + "epoch": 0.4528375828730956, + "grad_norm": 42.78643798828125, + "learning_rate": 6.737297067170879e-06, + "loss": 20.7435, + "step": 224170 + }, + { + "epoch": 0.4528577835057794, + "grad_norm": 387.38189697265625, + "learning_rate": 6.736969745626867e-06, + "loss": 16.737, + "step": 224180 + }, + { + "epoch": 0.4528779841384632, + "grad_norm": 52.91683578491211, + "learning_rate": 6.736642415617073e-06, + "loss": 15.4142, + "step": 224190 + }, + { + "epoch": 0.452898184771147, + "grad_norm": 2.1474220752716064, + "learning_rate": 6.736315077143095e-06, + "loss": 27.8739, + "step": 224200 + }, + { + "epoch": 0.45291838540383084, + "grad_norm": 271.73291015625, + "learning_rate": 6.735987730206529e-06, + "loss": 13.633, + "step": 224210 + }, + { + "epoch": 0.45293858603651466, + "grad_norm": 542.3934326171875, + "learning_rate": 6.735660374808969e-06, + "loss": 25.4332, + "step": 224220 + }, + { + "epoch": 0.4529587866691985, + "grad_norm": 7.41579532623291, + "learning_rate": 6.73533301095201e-06, + "loss": 15.1197, + "step": 224230 + }, + { + "epoch": 0.4529789873018823, + "grad_norm": 362.191650390625, + "learning_rate": 6.7350056386372485e-06, + "loss": 18.961, + "step": 224240 + }, + { + "epoch": 0.4529991879345661, + "grad_norm": 606.1195068359375, + "learning_rate": 6.7346782578662795e-06, + "loss": 23.9658, + "step": 224250 + }, + { + "epoch": 0.45301938856724994, + "grad_norm": 488.9588623046875, + "learning_rate": 6.7343508686407e-06, + "loss": 28.3714, + "step": 224260 + }, + { + "epoch": 0.45303958919993376, + "grad_norm": 119.45907592773438, + "learning_rate": 6.734023470962106e-06, + "loss": 22.3117, + "step": 224270 + }, + { + "epoch": 0.4530597898326176, + "grad_norm": 89.31884002685547, + "learning_rate": 6.733696064832089e-06, + "loss": 21.5681, + "step": 224280 + }, + { + "epoch": 0.4530799904653014, + "grad_norm": 162.55581665039062, + "learning_rate": 6.733368650252249e-06, + "loss": 18.6347, + "step": 224290 + }, + { + "epoch": 0.4531001910979852, + "grad_norm": 354.7278747558594, + "learning_rate": 6.733041227224182e-06, + "loss": 37.7039, + "step": 224300 + }, + { + "epoch": 0.453120391730669, + "grad_norm": 279.0623474121094, + "learning_rate": 6.732713795749479e-06, + "loss": 20.6569, + "step": 224310 + }, + { + "epoch": 0.4531405923633528, + "grad_norm": 291.3842468261719, + "learning_rate": 6.732386355829742e-06, + "loss": 22.4131, + "step": 224320 + }, + { + "epoch": 0.4531607929960366, + "grad_norm": 306.61767578125, + "learning_rate": 6.7320589074665606e-06, + "loss": 25.2244, + "step": 224330 + }, + { + "epoch": 0.45318099362872044, + "grad_norm": 435.5203552246094, + "learning_rate": 6.7317314506615385e-06, + "loss": 10.8718, + "step": 224340 + }, + { + "epoch": 0.45320119426140426, + "grad_norm": 598.0634765625, + "learning_rate": 6.731403985416265e-06, + "loss": 21.7381, + "step": 224350 + }, + { + "epoch": 0.4532213948940881, + "grad_norm": 50.27971267700195, + "learning_rate": 6.731076511732338e-06, + "loss": 21.2074, + "step": 224360 + }, + { + "epoch": 0.4532415955267719, + "grad_norm": 279.5244445800781, + "learning_rate": 6.730749029611354e-06, + "loss": 12.9486, + "step": 224370 + }, + { + "epoch": 0.4532617961594557, + "grad_norm": 84.92265319824219, + "learning_rate": 6.730421539054911e-06, + "loss": 17.9265, + "step": 224380 + }, + { + "epoch": 0.45328199679213954, + "grad_norm": 196.0972137451172, + "learning_rate": 6.730094040064602e-06, + "loss": 43.3192, + "step": 224390 + }, + { + "epoch": 0.45330219742482336, + "grad_norm": 481.4926452636719, + "learning_rate": 6.729766532642024e-06, + "loss": 22.348, + "step": 224400 + }, + { + "epoch": 0.4533223980575072, + "grad_norm": 509.6544189453125, + "learning_rate": 6.729439016788774e-06, + "loss": 14.9963, + "step": 224410 + }, + { + "epoch": 0.453342598690191, + "grad_norm": 81.28437805175781, + "learning_rate": 6.72911149250645e-06, + "loss": 9.7053, + "step": 224420 + }, + { + "epoch": 0.45336279932287477, + "grad_norm": 365.74481201171875, + "learning_rate": 6.7287839597966444e-06, + "loss": 22.3584, + "step": 224430 + }, + { + "epoch": 0.4533829999555586, + "grad_norm": 287.0074462890625, + "learning_rate": 6.728456418660954e-06, + "loss": 17.6767, + "step": 224440 + }, + { + "epoch": 0.4534032005882424, + "grad_norm": 754.4468994140625, + "learning_rate": 6.7281288691009795e-06, + "loss": 25.3387, + "step": 224450 + }, + { + "epoch": 0.4534234012209262, + "grad_norm": 496.4479675292969, + "learning_rate": 6.727801311118314e-06, + "loss": 23.3791, + "step": 224460 + }, + { + "epoch": 0.45344360185361005, + "grad_norm": 401.7496643066406, + "learning_rate": 6.727473744714554e-06, + "loss": 14.0117, + "step": 224470 + }, + { + "epoch": 0.45346380248629387, + "grad_norm": 221.49588012695312, + "learning_rate": 6.727146169891297e-06, + "loss": 20.1051, + "step": 224480 + }, + { + "epoch": 0.4534840031189777, + "grad_norm": 53.74786376953125, + "learning_rate": 6.726818586650137e-06, + "loss": 20.79, + "step": 224490 + }, + { + "epoch": 0.4535042037516615, + "grad_norm": 272.5646667480469, + "learning_rate": 6.7264909949926735e-06, + "loss": 16.9446, + "step": 224500 + }, + { + "epoch": 0.4535244043843453, + "grad_norm": 380.64703369140625, + "learning_rate": 6.726163394920503e-06, + "loss": 42.3083, + "step": 224510 + }, + { + "epoch": 0.45354460501702915, + "grad_norm": 193.45034790039062, + "learning_rate": 6.725835786435222e-06, + "loss": 16.3565, + "step": 224520 + }, + { + "epoch": 0.45356480564971297, + "grad_norm": 494.8837585449219, + "learning_rate": 6.725508169538425e-06, + "loss": 30.3393, + "step": 224530 + }, + { + "epoch": 0.4535850062823968, + "grad_norm": 393.85882568359375, + "learning_rate": 6.725180544231711e-06, + "loss": 14.4976, + "step": 224540 + }, + { + "epoch": 0.4536052069150806, + "grad_norm": 404.4794921875, + "learning_rate": 6.7248529105166785e-06, + "loss": 15.4334, + "step": 224550 + }, + { + "epoch": 0.45362540754776437, + "grad_norm": 846.5159301757812, + "learning_rate": 6.724525268394919e-06, + "loss": 25.1088, + "step": 224560 + }, + { + "epoch": 0.4536456081804482, + "grad_norm": 659.883056640625, + "learning_rate": 6.7241976178680335e-06, + "loss": 23.7296, + "step": 224570 + }, + { + "epoch": 0.453665808813132, + "grad_norm": 489.0628967285156, + "learning_rate": 6.723869958937619e-06, + "loss": 17.5904, + "step": 224580 + }, + { + "epoch": 0.45368600944581583, + "grad_norm": 516.4916381835938, + "learning_rate": 6.723542291605271e-06, + "loss": 16.8007, + "step": 224590 + }, + { + "epoch": 0.45370621007849965, + "grad_norm": 22.261503219604492, + "learning_rate": 6.723214615872585e-06, + "loss": 15.365, + "step": 224600 + }, + { + "epoch": 0.45372641071118347, + "grad_norm": 216.9540557861328, + "learning_rate": 6.722886931741163e-06, + "loss": 21.4706, + "step": 224610 + }, + { + "epoch": 0.4537466113438673, + "grad_norm": 172.14511108398438, + "learning_rate": 6.7225592392125975e-06, + "loss": 23.9545, + "step": 224620 + }, + { + "epoch": 0.4537668119765511, + "grad_norm": 213.1126251220703, + "learning_rate": 6.722231538288486e-06, + "loss": 18.9931, + "step": 224630 + }, + { + "epoch": 0.45378701260923493, + "grad_norm": 397.17864990234375, + "learning_rate": 6.7219038289704294e-06, + "loss": 17.2522, + "step": 224640 + }, + { + "epoch": 0.45380721324191875, + "grad_norm": 338.5043640136719, + "learning_rate": 6.72157611126002e-06, + "loss": 18.2936, + "step": 224650 + }, + { + "epoch": 0.45382741387460257, + "grad_norm": 69.46562194824219, + "learning_rate": 6.721248385158859e-06, + "loss": 12.1464, + "step": 224660 + }, + { + "epoch": 0.4538476145072864, + "grad_norm": 509.01617431640625, + "learning_rate": 6.720920650668542e-06, + "loss": 20.8537, + "step": 224670 + }, + { + "epoch": 0.4538678151399702, + "grad_norm": 457.7559509277344, + "learning_rate": 6.720592907790667e-06, + "loss": 17.0811, + "step": 224680 + }, + { + "epoch": 0.453888015772654, + "grad_norm": 399.3314208984375, + "learning_rate": 6.720265156526828e-06, + "loss": 15.585, + "step": 224690 + }, + { + "epoch": 0.4539082164053378, + "grad_norm": 176.3326873779297, + "learning_rate": 6.719937396878628e-06, + "loss": 20.0065, + "step": 224700 + }, + { + "epoch": 0.4539284170380216, + "grad_norm": 99.04472351074219, + "learning_rate": 6.719609628847662e-06, + "loss": 10.5466, + "step": 224710 + }, + { + "epoch": 0.45394861767070543, + "grad_norm": 11.641637802124023, + "learning_rate": 6.7192818524355266e-06, + "loss": 14.1317, + "step": 224720 + }, + { + "epoch": 0.45396881830338925, + "grad_norm": 356.7647399902344, + "learning_rate": 6.7189540676438195e-06, + "loss": 16.1533, + "step": 224730 + }, + { + "epoch": 0.4539890189360731, + "grad_norm": 532.47705078125, + "learning_rate": 6.718626274474138e-06, + "loss": 16.446, + "step": 224740 + }, + { + "epoch": 0.4540092195687569, + "grad_norm": 143.8042755126953, + "learning_rate": 6.718298472928082e-06, + "loss": 24.8809, + "step": 224750 + }, + { + "epoch": 0.4540294202014407, + "grad_norm": 692.6828002929688, + "learning_rate": 6.717970663007245e-06, + "loss": 14.8526, + "step": 224760 + }, + { + "epoch": 0.45404962083412453, + "grad_norm": 855.0104370117188, + "learning_rate": 6.71764284471323e-06, + "loss": 22.105, + "step": 224770 + }, + { + "epoch": 0.45406982146680835, + "grad_norm": 213.2781219482422, + "learning_rate": 6.717315018047631e-06, + "loss": 19.6701, + "step": 224780 + }, + { + "epoch": 0.4540900220994922, + "grad_norm": 436.2740173339844, + "learning_rate": 6.716987183012048e-06, + "loss": 21.4793, + "step": 224790 + }, + { + "epoch": 0.454110222732176, + "grad_norm": 242.42498779296875, + "learning_rate": 6.716659339608077e-06, + "loss": 27.442, + "step": 224800 + }, + { + "epoch": 0.4541304233648598, + "grad_norm": 389.03875732421875, + "learning_rate": 6.7163314878373166e-06, + "loss": 19.0603, + "step": 224810 + }, + { + "epoch": 0.4541506239975436, + "grad_norm": 16.420045852661133, + "learning_rate": 6.716003627701365e-06, + "loss": 23.3658, + "step": 224820 + }, + { + "epoch": 0.4541708246302274, + "grad_norm": 807.7119750976562, + "learning_rate": 6.71567575920182e-06, + "loss": 20.549, + "step": 224830 + }, + { + "epoch": 0.4541910252629112, + "grad_norm": 405.46063232421875, + "learning_rate": 6.715347882340278e-06, + "loss": 20.5764, + "step": 224840 + }, + { + "epoch": 0.45421122589559504, + "grad_norm": 301.35699462890625, + "learning_rate": 6.7150199971183395e-06, + "loss": 27.6921, + "step": 224850 + }, + { + "epoch": 0.45423142652827886, + "grad_norm": 528.4838256835938, + "learning_rate": 6.714692103537601e-06, + "loss": 15.2521, + "step": 224860 + }, + { + "epoch": 0.4542516271609627, + "grad_norm": 877.6928100585938, + "learning_rate": 6.7143642015996626e-06, + "loss": 21.6614, + "step": 224870 + }, + { + "epoch": 0.4542718277936465, + "grad_norm": 28.3657169342041, + "learning_rate": 6.714036291306121e-06, + "loss": 18.8145, + "step": 224880 + }, + { + "epoch": 0.4542920284263303, + "grad_norm": 690.2528686523438, + "learning_rate": 6.7137083726585724e-06, + "loss": 17.7149, + "step": 224890 + }, + { + "epoch": 0.45431222905901414, + "grad_norm": 767.5856323242188, + "learning_rate": 6.713380445658618e-06, + "loss": 11.7709, + "step": 224900 + }, + { + "epoch": 0.45433242969169796, + "grad_norm": 504.2879638671875, + "learning_rate": 6.713052510307856e-06, + "loss": 15.2106, + "step": 224910 + }, + { + "epoch": 0.4543526303243818, + "grad_norm": 431.6560363769531, + "learning_rate": 6.712724566607882e-06, + "loss": 14.1891, + "step": 224920 + }, + { + "epoch": 0.4543728309570656, + "grad_norm": 486.3409118652344, + "learning_rate": 6.712396614560298e-06, + "loss": 21.3397, + "step": 224930 + }, + { + "epoch": 0.45439303158974936, + "grad_norm": 399.1619567871094, + "learning_rate": 6.712068654166699e-06, + "loss": 20.931, + "step": 224940 + }, + { + "epoch": 0.4544132322224332, + "grad_norm": 874.1525268554688, + "learning_rate": 6.711740685428687e-06, + "loss": 18.339, + "step": 224950 + }, + { + "epoch": 0.454433432855117, + "grad_norm": 626.2977294921875, + "learning_rate": 6.711412708347857e-06, + "loss": 41.3339, + "step": 224960 + }, + { + "epoch": 0.4544536334878008, + "grad_norm": 573.0205688476562, + "learning_rate": 6.711084722925809e-06, + "loss": 26.5318, + "step": 224970 + }, + { + "epoch": 0.45447383412048464, + "grad_norm": 427.6914367675781, + "learning_rate": 6.7107567291641425e-06, + "loss": 23.2325, + "step": 224980 + }, + { + "epoch": 0.45449403475316846, + "grad_norm": 544.6525268554688, + "learning_rate": 6.710428727064454e-06, + "loss": 10.5366, + "step": 224990 + }, + { + "epoch": 0.4545142353858523, + "grad_norm": 89.42131805419922, + "learning_rate": 6.710100716628345e-06, + "loss": 16.2111, + "step": 225000 + }, + { + "epoch": 0.4545344360185361, + "grad_norm": 193.01864624023438, + "learning_rate": 6.709772697857411e-06, + "loss": 20.8958, + "step": 225010 + }, + { + "epoch": 0.4545546366512199, + "grad_norm": 55.706871032714844, + "learning_rate": 6.709444670753252e-06, + "loss": 15.9876, + "step": 225020 + }, + { + "epoch": 0.45457483728390374, + "grad_norm": 261.5452880859375, + "learning_rate": 6.709116635317469e-06, + "loss": 20.595, + "step": 225030 + }, + { + "epoch": 0.45459503791658756, + "grad_norm": 322.2333679199219, + "learning_rate": 6.708788591551658e-06, + "loss": 10.3576, + "step": 225040 + }, + { + "epoch": 0.4546152385492714, + "grad_norm": 182.4540252685547, + "learning_rate": 6.708460539457418e-06, + "loss": 27.0368, + "step": 225050 + }, + { + "epoch": 0.4546354391819552, + "grad_norm": 252.28440856933594, + "learning_rate": 6.708132479036349e-06, + "loss": 18.9177, + "step": 225060 + }, + { + "epoch": 0.45465563981463897, + "grad_norm": 294.22955322265625, + "learning_rate": 6.707804410290049e-06, + "loss": 16.5476, + "step": 225070 + }, + { + "epoch": 0.4546758404473228, + "grad_norm": 678.5457153320312, + "learning_rate": 6.707476333220116e-06, + "loss": 18.4188, + "step": 225080 + }, + { + "epoch": 0.4546960410800066, + "grad_norm": 1026.339599609375, + "learning_rate": 6.707148247828153e-06, + "loss": 26.8379, + "step": 225090 + }, + { + "epoch": 0.4547162417126904, + "grad_norm": 236.086181640625, + "learning_rate": 6.7068201541157555e-06, + "loss": 24.4431, + "step": 225100 + }, + { + "epoch": 0.45473644234537425, + "grad_norm": 733.8051147460938, + "learning_rate": 6.706492052084524e-06, + "loss": 20.2767, + "step": 225110 + }, + { + "epoch": 0.45475664297805807, + "grad_norm": 366.6125183105469, + "learning_rate": 6.706163941736057e-06, + "loss": 17.8546, + "step": 225120 + }, + { + "epoch": 0.4547768436107419, + "grad_norm": 425.5577087402344, + "learning_rate": 6.705835823071953e-06, + "loss": 15.3798, + "step": 225130 + }, + { + "epoch": 0.4547970442434257, + "grad_norm": 246.91073608398438, + "learning_rate": 6.7055076960938135e-06, + "loss": 11.608, + "step": 225140 + }, + { + "epoch": 0.4548172448761095, + "grad_norm": 77.5411148071289, + "learning_rate": 6.705179560803236e-06, + "loss": 7.9056, + "step": 225150 + }, + { + "epoch": 0.45483744550879335, + "grad_norm": 2.3171474933624268, + "learning_rate": 6.704851417201821e-06, + "loss": 15.201, + "step": 225160 + }, + { + "epoch": 0.45485764614147717, + "grad_norm": 199.99977111816406, + "learning_rate": 6.704523265291165e-06, + "loss": 13.8719, + "step": 225170 + }, + { + "epoch": 0.454877846774161, + "grad_norm": 611.3469848632812, + "learning_rate": 6.704195105072871e-06, + "loss": 11.1166, + "step": 225180 + }, + { + "epoch": 0.4548980474068448, + "grad_norm": 300.5711669921875, + "learning_rate": 6.703866936548534e-06, + "loss": 20.5433, + "step": 225190 + }, + { + "epoch": 0.45491824803952857, + "grad_norm": 583.619873046875, + "learning_rate": 6.70353875971976e-06, + "loss": 26.941, + "step": 225200 + }, + { + "epoch": 0.4549384486722124, + "grad_norm": 640.599853515625, + "learning_rate": 6.703210574588142e-06, + "loss": 16.2709, + "step": 225210 + }, + { + "epoch": 0.4549586493048962, + "grad_norm": 740.7960815429688, + "learning_rate": 6.702882381155283e-06, + "loss": 26.6456, + "step": 225220 + }, + { + "epoch": 0.45497884993758003, + "grad_norm": 45.49562072753906, + "learning_rate": 6.702554179422782e-06, + "loss": 18.9018, + "step": 225230 + }, + { + "epoch": 0.45499905057026385, + "grad_norm": 732.8751831054688, + "learning_rate": 6.702225969392238e-06, + "loss": 20.7339, + "step": 225240 + }, + { + "epoch": 0.45501925120294767, + "grad_norm": 1911.6829833984375, + "learning_rate": 6.701897751065251e-06, + "loss": 21.852, + "step": 225250 + }, + { + "epoch": 0.4550394518356315, + "grad_norm": 242.33111572265625, + "learning_rate": 6.701569524443421e-06, + "loss": 28.6399, + "step": 225260 + }, + { + "epoch": 0.4550596524683153, + "grad_norm": 728.8468017578125, + "learning_rate": 6.701241289528348e-06, + "loss": 17.6148, + "step": 225270 + }, + { + "epoch": 0.45507985310099913, + "grad_norm": 755.6907958984375, + "learning_rate": 6.700913046321631e-06, + "loss": 16.1734, + "step": 225280 + }, + { + "epoch": 0.45510005373368295, + "grad_norm": 472.9110412597656, + "learning_rate": 6.700584794824871e-06, + "loss": 29.4159, + "step": 225290 + }, + { + "epoch": 0.45512025436636677, + "grad_norm": 404.24493408203125, + "learning_rate": 6.700256535039665e-06, + "loss": 25.2351, + "step": 225300 + }, + { + "epoch": 0.4551404549990506, + "grad_norm": 634.1502075195312, + "learning_rate": 6.6999282669676155e-06, + "loss": 13.6189, + "step": 225310 + }, + { + "epoch": 0.4551606556317344, + "grad_norm": 1.6085678339004517, + "learning_rate": 6.699599990610324e-06, + "loss": 19.2426, + "step": 225320 + }, + { + "epoch": 0.4551808562644182, + "grad_norm": 300.082763671875, + "learning_rate": 6.699271705969386e-06, + "loss": 19.8193, + "step": 225330 + }, + { + "epoch": 0.455201056897102, + "grad_norm": 119.96390533447266, + "learning_rate": 6.698943413046404e-06, + "loss": 23.3312, + "step": 225340 + }, + { + "epoch": 0.4552212575297858, + "grad_norm": 372.2552795410156, + "learning_rate": 6.698615111842977e-06, + "loss": 19.8844, + "step": 225350 + }, + { + "epoch": 0.45524145816246964, + "grad_norm": 392.03778076171875, + "learning_rate": 6.698286802360708e-06, + "loss": 22.2316, + "step": 225360 + }, + { + "epoch": 0.45526165879515346, + "grad_norm": 127.15750122070312, + "learning_rate": 6.697958484601193e-06, + "loss": 22.944, + "step": 225370 + }, + { + "epoch": 0.4552818594278373, + "grad_norm": 162.3270263671875, + "learning_rate": 6.697630158566038e-06, + "loss": 14.8616, + "step": 225380 + }, + { + "epoch": 0.4553020600605211, + "grad_norm": 462.6418762207031, + "learning_rate": 6.697301824256836e-06, + "loss": 31.5704, + "step": 225390 + }, + { + "epoch": 0.4553222606932049, + "grad_norm": 575.5155639648438, + "learning_rate": 6.6969734816751906e-06, + "loss": 14.8419, + "step": 225400 + }, + { + "epoch": 0.45534246132588874, + "grad_norm": 397.850830078125, + "learning_rate": 6.696645130822704e-06, + "loss": 20.0939, + "step": 225410 + }, + { + "epoch": 0.45536266195857256, + "grad_norm": 236.34840393066406, + "learning_rate": 6.6963167717009745e-06, + "loss": 14.465, + "step": 225420 + }, + { + "epoch": 0.4553828625912564, + "grad_norm": 277.0977783203125, + "learning_rate": 6.695988404311603e-06, + "loss": 28.5895, + "step": 225430 + }, + { + "epoch": 0.4554030632239402, + "grad_norm": 553.6280517578125, + "learning_rate": 6.695660028656189e-06, + "loss": 22.1155, + "step": 225440 + }, + { + "epoch": 0.455423263856624, + "grad_norm": 379.44427490234375, + "learning_rate": 6.6953316447363335e-06, + "loss": 26.2365, + "step": 225450 + }, + { + "epoch": 0.4554434644893078, + "grad_norm": 708.2862548828125, + "learning_rate": 6.695003252553638e-06, + "loss": 21.1336, + "step": 225460 + }, + { + "epoch": 0.4554636651219916, + "grad_norm": 75.57585144042969, + "learning_rate": 6.694674852109701e-06, + "loss": 9.9841, + "step": 225470 + }, + { + "epoch": 0.4554838657546754, + "grad_norm": 230.75897216796875, + "learning_rate": 6.694346443406126e-06, + "loss": 16.4379, + "step": 225480 + }, + { + "epoch": 0.45550406638735924, + "grad_norm": 595.7857666015625, + "learning_rate": 6.694018026444511e-06, + "loss": 21.4264, + "step": 225490 + }, + { + "epoch": 0.45552426702004306, + "grad_norm": 300.95037841796875, + "learning_rate": 6.693689601226458e-06, + "loss": 11.351, + "step": 225500 + }, + { + "epoch": 0.4555444676527269, + "grad_norm": 620.47412109375, + "learning_rate": 6.693361167753567e-06, + "loss": 44.0126, + "step": 225510 + }, + { + "epoch": 0.4555646682854107, + "grad_norm": 602.3052978515625, + "learning_rate": 6.693032726027438e-06, + "loss": 23.7361, + "step": 225520 + }, + { + "epoch": 0.4555848689180945, + "grad_norm": 14.683794975280762, + "learning_rate": 6.692704276049674e-06, + "loss": 17.5383, + "step": 225530 + }, + { + "epoch": 0.45560506955077834, + "grad_norm": 403.2425842285156, + "learning_rate": 6.6923758178218756e-06, + "loss": 8.8611, + "step": 225540 + }, + { + "epoch": 0.45562527018346216, + "grad_norm": 289.0656433105469, + "learning_rate": 6.692047351345641e-06, + "loss": 30.0383, + "step": 225550 + }, + { + "epoch": 0.455645470816146, + "grad_norm": 423.5158386230469, + "learning_rate": 6.6917188766225736e-06, + "loss": 9.0815, + "step": 225560 + }, + { + "epoch": 0.4556656714488298, + "grad_norm": 630.5218505859375, + "learning_rate": 6.691390393654274e-06, + "loss": 31.1418, + "step": 225570 + }, + { + "epoch": 0.45568587208151357, + "grad_norm": 403.40777587890625, + "learning_rate": 6.691061902442342e-06, + "loss": 31.6541, + "step": 225580 + }, + { + "epoch": 0.4557060727141974, + "grad_norm": 296.65716552734375, + "learning_rate": 6.69073340298838e-06, + "loss": 9.8349, + "step": 225590 + }, + { + "epoch": 0.4557262733468812, + "grad_norm": 360.7072448730469, + "learning_rate": 6.690404895293987e-06, + "loss": 16.6198, + "step": 225600 + }, + { + "epoch": 0.455746473979565, + "grad_norm": 168.9312286376953, + "learning_rate": 6.690076379360767e-06, + "loss": 7.6697, + "step": 225610 + }, + { + "epoch": 0.45576667461224885, + "grad_norm": 1287.2730712890625, + "learning_rate": 6.689747855190319e-06, + "loss": 36.319, + "step": 225620 + }, + { + "epoch": 0.45578687524493267, + "grad_norm": 1017.265380859375, + "learning_rate": 6.689419322784245e-06, + "loss": 22.6657, + "step": 225630 + }, + { + "epoch": 0.4558070758776165, + "grad_norm": 391.20941162109375, + "learning_rate": 6.689090782144146e-06, + "loss": 7.4136, + "step": 225640 + }, + { + "epoch": 0.4558272765103003, + "grad_norm": 758.8549194335938, + "learning_rate": 6.688762233271625e-06, + "loss": 19.2401, + "step": 225650 + }, + { + "epoch": 0.4558474771429841, + "grad_norm": 501.3313293457031, + "learning_rate": 6.68843367616828e-06, + "loss": 24.5548, + "step": 225660 + }, + { + "epoch": 0.45586767777566795, + "grad_norm": 1015.6280517578125, + "learning_rate": 6.6881051108357146e-06, + "loss": 19.7456, + "step": 225670 + }, + { + "epoch": 0.45588787840835177, + "grad_norm": 314.453125, + "learning_rate": 6.68777653727553e-06, + "loss": 27.9557, + "step": 225680 + }, + { + "epoch": 0.4559080790410356, + "grad_norm": 257.3055725097656, + "learning_rate": 6.687447955489326e-06, + "loss": 23.2862, + "step": 225690 + }, + { + "epoch": 0.4559282796737194, + "grad_norm": 604.030029296875, + "learning_rate": 6.687119365478707e-06, + "loss": 27.7463, + "step": 225700 + }, + { + "epoch": 0.45594848030640317, + "grad_norm": 238.85536193847656, + "learning_rate": 6.68679076724527e-06, + "loss": 18.7839, + "step": 225710 + }, + { + "epoch": 0.455968680939087, + "grad_norm": 575.19287109375, + "learning_rate": 6.686462160790623e-06, + "loss": 17.8359, + "step": 225720 + }, + { + "epoch": 0.4559888815717708, + "grad_norm": 370.54681396484375, + "learning_rate": 6.686133546116363e-06, + "loss": 17.924, + "step": 225730 + }, + { + "epoch": 0.45600908220445463, + "grad_norm": 100.73015594482422, + "learning_rate": 6.685804923224091e-06, + "loss": 12.0782, + "step": 225740 + }, + { + "epoch": 0.45602928283713845, + "grad_norm": 471.8533630371094, + "learning_rate": 6.685476292115411e-06, + "loss": 14.371, + "step": 225750 + }, + { + "epoch": 0.45604948346982227, + "grad_norm": 667.66650390625, + "learning_rate": 6.6851476527919235e-06, + "loss": 12.8826, + "step": 225760 + }, + { + "epoch": 0.4560696841025061, + "grad_norm": 221.37005615234375, + "learning_rate": 6.684819005255232e-06, + "loss": 30.8509, + "step": 225770 + }, + { + "epoch": 0.4560898847351899, + "grad_norm": 447.97076416015625, + "learning_rate": 6.684490349506937e-06, + "loss": 21.0851, + "step": 225780 + }, + { + "epoch": 0.45611008536787373, + "grad_norm": 219.70057678222656, + "learning_rate": 6.6841616855486395e-06, + "loss": 18.8927, + "step": 225790 + }, + { + "epoch": 0.45613028600055755, + "grad_norm": 265.75439453125, + "learning_rate": 6.683833013381942e-06, + "loss": 15.736, + "step": 225800 + }, + { + "epoch": 0.45615048663324137, + "grad_norm": 383.4543151855469, + "learning_rate": 6.683504333008448e-06, + "loss": 21.2642, + "step": 225810 + }, + { + "epoch": 0.4561706872659252, + "grad_norm": 231.30374145507812, + "learning_rate": 6.683175644429756e-06, + "loss": 25.2174, + "step": 225820 + }, + { + "epoch": 0.456190887898609, + "grad_norm": 736.73583984375, + "learning_rate": 6.682846947647472e-06, + "loss": 12.7884, + "step": 225830 + }, + { + "epoch": 0.4562110885312928, + "grad_norm": 321.27716064453125, + "learning_rate": 6.682518242663195e-06, + "loss": 16.2992, + "step": 225840 + }, + { + "epoch": 0.4562312891639766, + "grad_norm": 222.0, + "learning_rate": 6.682189529478528e-06, + "loss": 18.1253, + "step": 225850 + }, + { + "epoch": 0.4562514897966604, + "grad_norm": 425.1649475097656, + "learning_rate": 6.681860808095074e-06, + "loss": 31.4157, + "step": 225860 + }, + { + "epoch": 0.45627169042934423, + "grad_norm": 445.70001220703125, + "learning_rate": 6.681532078514434e-06, + "loss": 21.3062, + "step": 225870 + }, + { + "epoch": 0.45629189106202805, + "grad_norm": 12.19681167602539, + "learning_rate": 6.681203340738212e-06, + "loss": 20.5821, + "step": 225880 + }, + { + "epoch": 0.4563120916947119, + "grad_norm": 615.259521484375, + "learning_rate": 6.680874594768006e-06, + "loss": 24.3428, + "step": 225890 + }, + { + "epoch": 0.4563322923273957, + "grad_norm": 291.0129699707031, + "learning_rate": 6.680545840605423e-06, + "loss": 34.5789, + "step": 225900 + }, + { + "epoch": 0.4563524929600795, + "grad_norm": 379.67230224609375, + "learning_rate": 6.680217078252063e-06, + "loss": 14.0362, + "step": 225910 + }, + { + "epoch": 0.45637269359276333, + "grad_norm": 347.0411376953125, + "learning_rate": 6.6798883077095276e-06, + "loss": 21.1224, + "step": 225920 + }, + { + "epoch": 0.45639289422544715, + "grad_norm": 96.60426330566406, + "learning_rate": 6.679559528979423e-06, + "loss": 24.0811, + "step": 225930 + }, + { + "epoch": 0.456413094858131, + "grad_norm": 246.80841064453125, + "learning_rate": 6.679230742063347e-06, + "loss": 17.1332, + "step": 225940 + }, + { + "epoch": 0.4564332954908148, + "grad_norm": 203.31515502929688, + "learning_rate": 6.6789019469629034e-06, + "loss": 27.6699, + "step": 225950 + }, + { + "epoch": 0.4564534961234986, + "grad_norm": 1652.2716064453125, + "learning_rate": 6.678573143679696e-06, + "loss": 40.7394, + "step": 225960 + }, + { + "epoch": 0.4564736967561824, + "grad_norm": 1629.622314453125, + "learning_rate": 6.678244332215329e-06, + "loss": 29.8662, + "step": 225970 + }, + { + "epoch": 0.4564938973888662, + "grad_norm": 305.15765380859375, + "learning_rate": 6.677915512571399e-06, + "loss": 14.7993, + "step": 225980 + }, + { + "epoch": 0.45651409802155, + "grad_norm": 489.23284912109375, + "learning_rate": 6.6775866847495155e-06, + "loss": 18.2312, + "step": 225990 + }, + { + "epoch": 0.45653429865423384, + "grad_norm": 248.98838806152344, + "learning_rate": 6.677257848751276e-06, + "loss": 28.0202, + "step": 226000 + }, + { + "epoch": 0.45655449928691766, + "grad_norm": 723.9705810546875, + "learning_rate": 6.676929004578286e-06, + "loss": 21.031, + "step": 226010 + }, + { + "epoch": 0.4565746999196015, + "grad_norm": 1153.4454345703125, + "learning_rate": 6.676600152232147e-06, + "loss": 26.4975, + "step": 226020 + }, + { + "epoch": 0.4565949005522853, + "grad_norm": 252.0647735595703, + "learning_rate": 6.676271291714461e-06, + "loss": 25.1416, + "step": 226030 + }, + { + "epoch": 0.4566151011849691, + "grad_norm": 438.14453125, + "learning_rate": 6.675942423026834e-06, + "loss": 15.9802, + "step": 226040 + }, + { + "epoch": 0.45663530181765294, + "grad_norm": 505.6163635253906, + "learning_rate": 6.675613546170866e-06, + "loss": 21.4501, + "step": 226050 + }, + { + "epoch": 0.45665550245033676, + "grad_norm": 724.9590454101562, + "learning_rate": 6.675284661148162e-06, + "loss": 18.4366, + "step": 226060 + }, + { + "epoch": 0.4566757030830206, + "grad_norm": 144.9850616455078, + "learning_rate": 6.6749557679603225e-06, + "loss": 14.9486, + "step": 226070 + }, + { + "epoch": 0.4566959037157044, + "grad_norm": 391.7661437988281, + "learning_rate": 6.674626866608951e-06, + "loss": 12.2022, + "step": 226080 + }, + { + "epoch": 0.4567161043483882, + "grad_norm": 2.0429892539978027, + "learning_rate": 6.674297957095652e-06, + "loss": 14.1796, + "step": 226090 + }, + { + "epoch": 0.456736304981072, + "grad_norm": 697.7444458007812, + "learning_rate": 6.673969039422029e-06, + "loss": 20.4768, + "step": 226100 + }, + { + "epoch": 0.4567565056137558, + "grad_norm": 382.4043884277344, + "learning_rate": 6.673640113589683e-06, + "loss": 15.7105, + "step": 226110 + }, + { + "epoch": 0.4567767062464396, + "grad_norm": 588.4155883789062, + "learning_rate": 6.673311179600218e-06, + "loss": 29.4429, + "step": 226120 + }, + { + "epoch": 0.45679690687912344, + "grad_norm": 756.751220703125, + "learning_rate": 6.672982237455238e-06, + "loss": 13.6926, + "step": 226130 + }, + { + "epoch": 0.45681710751180726, + "grad_norm": 431.67742919921875, + "learning_rate": 6.672653287156345e-06, + "loss": 24.9859, + "step": 226140 + }, + { + "epoch": 0.4568373081444911, + "grad_norm": 438.0408935546875, + "learning_rate": 6.672324328705142e-06, + "loss": 24.0636, + "step": 226150 + }, + { + "epoch": 0.4568575087771749, + "grad_norm": 113.45287322998047, + "learning_rate": 6.671995362103233e-06, + "loss": 15.0136, + "step": 226160 + }, + { + "epoch": 0.4568777094098587, + "grad_norm": 843.9808349609375, + "learning_rate": 6.671666387352223e-06, + "loss": 60.3752, + "step": 226170 + }, + { + "epoch": 0.45689791004254254, + "grad_norm": 517.6583862304688, + "learning_rate": 6.671337404453713e-06, + "loss": 21.3241, + "step": 226180 + }, + { + "epoch": 0.45691811067522636, + "grad_norm": 383.1948547363281, + "learning_rate": 6.671008413409306e-06, + "loss": 16.1406, + "step": 226190 + }, + { + "epoch": 0.4569383113079102, + "grad_norm": 370.60137939453125, + "learning_rate": 6.6706794142206085e-06, + "loss": 25.1204, + "step": 226200 + }, + { + "epoch": 0.456958511940594, + "grad_norm": 252.67543029785156, + "learning_rate": 6.67035040688922e-06, + "loss": 26.6554, + "step": 226210 + }, + { + "epoch": 0.45697871257327777, + "grad_norm": 417.4878234863281, + "learning_rate": 6.6700213914167485e-06, + "loss": 23.3417, + "step": 226220 + }, + { + "epoch": 0.4569989132059616, + "grad_norm": 382.7886047363281, + "learning_rate": 6.669692367804795e-06, + "loss": 25.7658, + "step": 226230 + }, + { + "epoch": 0.4570191138386454, + "grad_norm": 274.5790710449219, + "learning_rate": 6.6693633360549615e-06, + "loss": 37.9028, + "step": 226240 + }, + { + "epoch": 0.4570393144713292, + "grad_norm": 443.1168518066406, + "learning_rate": 6.669034296168855e-06, + "loss": 8.0102, + "step": 226250 + }, + { + "epoch": 0.45705951510401305, + "grad_norm": 38.52375411987305, + "learning_rate": 6.668705248148079e-06, + "loss": 15.8677, + "step": 226260 + }, + { + "epoch": 0.45707971573669687, + "grad_norm": 512.3353271484375, + "learning_rate": 6.668376191994234e-06, + "loss": 26.6839, + "step": 226270 + }, + { + "epoch": 0.4570999163693807, + "grad_norm": 57.37399673461914, + "learning_rate": 6.668047127708927e-06, + "loss": 25.7264, + "step": 226280 + }, + { + "epoch": 0.4571201170020645, + "grad_norm": 101.6399917602539, + "learning_rate": 6.667718055293759e-06, + "loss": 15.4918, + "step": 226290 + }, + { + "epoch": 0.4571403176347483, + "grad_norm": 349.11993408203125, + "learning_rate": 6.6673889747503364e-06, + "loss": 18.6506, + "step": 226300 + }, + { + "epoch": 0.45716051826743215, + "grad_norm": 308.615966796875, + "learning_rate": 6.667059886080263e-06, + "loss": 18.3663, + "step": 226310 + }, + { + "epoch": 0.45718071890011597, + "grad_norm": 138.72930908203125, + "learning_rate": 6.66673078928514e-06, + "loss": 12.4551, + "step": 226320 + }, + { + "epoch": 0.4572009195327998, + "grad_norm": 262.0975646972656, + "learning_rate": 6.666401684366575e-06, + "loss": 16.5631, + "step": 226330 + }, + { + "epoch": 0.4572211201654836, + "grad_norm": 270.44305419921875, + "learning_rate": 6.66607257132617e-06, + "loss": 22.5709, + "step": 226340 + }, + { + "epoch": 0.45724132079816737, + "grad_norm": 727.0762329101562, + "learning_rate": 6.665743450165528e-06, + "loss": 13.9652, + "step": 226350 + }, + { + "epoch": 0.4572615214308512, + "grad_norm": 270.78704833984375, + "learning_rate": 6.665414320886256e-06, + "loss": 10.4592, + "step": 226360 + }, + { + "epoch": 0.457281722063535, + "grad_norm": 568.377197265625, + "learning_rate": 6.665085183489955e-06, + "loss": 18.0877, + "step": 226370 + }, + { + "epoch": 0.45730192269621883, + "grad_norm": 613.829833984375, + "learning_rate": 6.664756037978233e-06, + "loss": 17.4813, + "step": 226380 + }, + { + "epoch": 0.45732212332890265, + "grad_norm": 503.44622802734375, + "learning_rate": 6.664426884352691e-06, + "loss": 30.1703, + "step": 226390 + }, + { + "epoch": 0.45734232396158647, + "grad_norm": 189.15289306640625, + "learning_rate": 6.664097722614934e-06, + "loss": 10.3167, + "step": 226400 + }, + { + "epoch": 0.4573625245942703, + "grad_norm": 450.5589294433594, + "learning_rate": 6.663768552766566e-06, + "loss": 32.9329, + "step": 226410 + }, + { + "epoch": 0.4573827252269541, + "grad_norm": 104.55570220947266, + "learning_rate": 6.663439374809194e-06, + "loss": 28.3364, + "step": 226420 + }, + { + "epoch": 0.45740292585963793, + "grad_norm": 301.110595703125, + "learning_rate": 6.663110188744417e-06, + "loss": 15.3579, + "step": 226430 + }, + { + "epoch": 0.45742312649232175, + "grad_norm": 536.1530151367188, + "learning_rate": 6.662780994573846e-06, + "loss": 17.2823, + "step": 226440 + }, + { + "epoch": 0.45744332712500557, + "grad_norm": 612.8405151367188, + "learning_rate": 6.6624517922990795e-06, + "loss": 25.9868, + "step": 226450 + }, + { + "epoch": 0.4574635277576894, + "grad_norm": 389.212158203125, + "learning_rate": 6.662122581921726e-06, + "loss": 23.0075, + "step": 226460 + }, + { + "epoch": 0.4574837283903732, + "grad_norm": 421.0704345703125, + "learning_rate": 6.661793363443389e-06, + "loss": 22.7034, + "step": 226470 + }, + { + "epoch": 0.457503929023057, + "grad_norm": 280.3844299316406, + "learning_rate": 6.661464136865671e-06, + "loss": 14.6026, + "step": 226480 + }, + { + "epoch": 0.4575241296557408, + "grad_norm": 525.3418579101562, + "learning_rate": 6.6611349021901795e-06, + "loss": 15.6766, + "step": 226490 + }, + { + "epoch": 0.4575443302884246, + "grad_norm": 506.8186340332031, + "learning_rate": 6.6608056594185166e-06, + "loss": 23.4258, + "step": 226500 + }, + { + "epoch": 0.45756453092110844, + "grad_norm": 372.74200439453125, + "learning_rate": 6.66047640855229e-06, + "loss": 22.0078, + "step": 226510 + }, + { + "epoch": 0.45758473155379226, + "grad_norm": 74.90923309326172, + "learning_rate": 6.660147149593102e-06, + "loss": 20.9625, + "step": 226520 + }, + { + "epoch": 0.4576049321864761, + "grad_norm": 52.75895309448242, + "learning_rate": 6.659817882542559e-06, + "loss": 26.064, + "step": 226530 + }, + { + "epoch": 0.4576251328191599, + "grad_norm": 638.3198852539062, + "learning_rate": 6.659488607402265e-06, + "loss": 37.9739, + "step": 226540 + }, + { + "epoch": 0.4576453334518437, + "grad_norm": 1.6899328231811523, + "learning_rate": 6.659159324173823e-06, + "loss": 14.513, + "step": 226550 + }, + { + "epoch": 0.45766553408452754, + "grad_norm": 185.67491149902344, + "learning_rate": 6.658830032858841e-06, + "loss": 11.7518, + "step": 226560 + }, + { + "epoch": 0.45768573471721136, + "grad_norm": 451.95306396484375, + "learning_rate": 6.658500733458922e-06, + "loss": 20.5763, + "step": 226570 + }, + { + "epoch": 0.4577059353498952, + "grad_norm": 817.3878173828125, + "learning_rate": 6.658171425975673e-06, + "loss": 19.2115, + "step": 226580 + }, + { + "epoch": 0.457726135982579, + "grad_norm": 373.3409423828125, + "learning_rate": 6.657842110410695e-06, + "loss": 31.3388, + "step": 226590 + }, + { + "epoch": 0.4577463366152628, + "grad_norm": 236.69320678710938, + "learning_rate": 6.657512786765599e-06, + "loss": 10.0431, + "step": 226600 + }, + { + "epoch": 0.4577665372479466, + "grad_norm": 173.1103057861328, + "learning_rate": 6.657183455041984e-06, + "loss": 16.616, + "step": 226610 + }, + { + "epoch": 0.4577867378806304, + "grad_norm": 672.570556640625, + "learning_rate": 6.656854115241458e-06, + "loss": 17.7288, + "step": 226620 + }, + { + "epoch": 0.4578069385133142, + "grad_norm": 399.2940368652344, + "learning_rate": 6.656524767365629e-06, + "loss": 19.0576, + "step": 226630 + }, + { + "epoch": 0.45782713914599804, + "grad_norm": 275.1471862792969, + "learning_rate": 6.656195411416094e-06, + "loss": 9.5872, + "step": 226640 + }, + { + "epoch": 0.45784733977868186, + "grad_norm": 281.56378173828125, + "learning_rate": 6.655866047394468e-06, + "loss": 15.1387, + "step": 226650 + }, + { + "epoch": 0.4578675404113657, + "grad_norm": 1057.09326171875, + "learning_rate": 6.655536675302349e-06, + "loss": 29.3993, + "step": 226660 + }, + { + "epoch": 0.4578877410440495, + "grad_norm": 365.1173400878906, + "learning_rate": 6.655207295141346e-06, + "loss": 13.6611, + "step": 226670 + }, + { + "epoch": 0.4579079416767333, + "grad_norm": 83.75810241699219, + "learning_rate": 6.654877906913064e-06, + "loss": 17.9194, + "step": 226680 + }, + { + "epoch": 0.45792814230941714, + "grad_norm": 437.2760314941406, + "learning_rate": 6.654548510619108e-06, + "loss": 24.4468, + "step": 226690 + }, + { + "epoch": 0.45794834294210096, + "grad_norm": 246.66741943359375, + "learning_rate": 6.654219106261082e-06, + "loss": 20.8572, + "step": 226700 + }, + { + "epoch": 0.4579685435747848, + "grad_norm": 113.89134216308594, + "learning_rate": 6.6538896938405935e-06, + "loss": 10.9767, + "step": 226710 + }, + { + "epoch": 0.4579887442074686, + "grad_norm": 366.5423889160156, + "learning_rate": 6.6535602733592465e-06, + "loss": 22.3123, + "step": 226720 + }, + { + "epoch": 0.4580089448401524, + "grad_norm": 701.4179077148438, + "learning_rate": 6.653230844818648e-06, + "loss": 15.3845, + "step": 226730 + }, + { + "epoch": 0.4580291454728362, + "grad_norm": 414.45611572265625, + "learning_rate": 6.6529014082204025e-06, + "loss": 15.3358, + "step": 226740 + }, + { + "epoch": 0.45804934610552, + "grad_norm": 425.706787109375, + "learning_rate": 6.652571963566116e-06, + "loss": 12.4742, + "step": 226750 + }, + { + "epoch": 0.4580695467382038, + "grad_norm": 746.2068481445312, + "learning_rate": 6.652242510857395e-06, + "loss": 29.1822, + "step": 226760 + }, + { + "epoch": 0.45808974737088765, + "grad_norm": 695.0816040039062, + "learning_rate": 6.651913050095842e-06, + "loss": 20.5539, + "step": 226770 + }, + { + "epoch": 0.45810994800357147, + "grad_norm": 553.7020874023438, + "learning_rate": 6.651583581283068e-06, + "loss": 21.6167, + "step": 226780 + }, + { + "epoch": 0.4581301486362553, + "grad_norm": 190.50221252441406, + "learning_rate": 6.651254104420674e-06, + "loss": 16.591, + "step": 226790 + }, + { + "epoch": 0.4581503492689391, + "grad_norm": 173.61050415039062, + "learning_rate": 6.6509246195102685e-06, + "loss": 13.1653, + "step": 226800 + }, + { + "epoch": 0.4581705499016229, + "grad_norm": 338.4618225097656, + "learning_rate": 6.650595126553459e-06, + "loss": 15.7322, + "step": 226810 + }, + { + "epoch": 0.45819075053430675, + "grad_norm": 3.598544120788574, + "learning_rate": 6.6502656255518435e-06, + "loss": 28.801, + "step": 226820 + }, + { + "epoch": 0.45821095116699057, + "grad_norm": 188.61846923828125, + "learning_rate": 6.649936116507039e-06, + "loss": 24.9716, + "step": 226830 + }, + { + "epoch": 0.4582311517996744, + "grad_norm": 576.9122924804688, + "learning_rate": 6.649606599420643e-06, + "loss": 36.46, + "step": 226840 + }, + { + "epoch": 0.4582513524323582, + "grad_norm": 161.2891845703125, + "learning_rate": 6.649277074294265e-06, + "loss": 25.622, + "step": 226850 + }, + { + "epoch": 0.45827155306504197, + "grad_norm": 216.29861450195312, + "learning_rate": 6.648947541129511e-06, + "loss": 28.6866, + "step": 226860 + }, + { + "epoch": 0.4582917536977258, + "grad_norm": 488.2857360839844, + "learning_rate": 6.648617999927986e-06, + "loss": 20.2214, + "step": 226870 + }, + { + "epoch": 0.4583119543304096, + "grad_norm": 314.35699462890625, + "learning_rate": 6.648288450691298e-06, + "loss": 22.3733, + "step": 226880 + }, + { + "epoch": 0.45833215496309343, + "grad_norm": 1308.2734375, + "learning_rate": 6.647958893421051e-06, + "loss": 22.9058, + "step": 226890 + }, + { + "epoch": 0.45835235559577725, + "grad_norm": 525.2289428710938, + "learning_rate": 6.647629328118852e-06, + "loss": 26.3447, + "step": 226900 + }, + { + "epoch": 0.45837255622846107, + "grad_norm": 586.7451171875, + "learning_rate": 6.647299754786308e-06, + "loss": 19.6053, + "step": 226910 + }, + { + "epoch": 0.4583927568611449, + "grad_norm": 796.2258911132812, + "learning_rate": 6.646970173425026e-06, + "loss": 31.2414, + "step": 226920 + }, + { + "epoch": 0.4584129574938287, + "grad_norm": 226.70315551757812, + "learning_rate": 6.646640584036609e-06, + "loss": 24.8402, + "step": 226930 + }, + { + "epoch": 0.45843315812651253, + "grad_norm": 2001.764404296875, + "learning_rate": 6.6463109866226675e-06, + "loss": 11.9321, + "step": 226940 + }, + { + "epoch": 0.45845335875919635, + "grad_norm": 695.1205444335938, + "learning_rate": 6.645981381184804e-06, + "loss": 22.2517, + "step": 226950 + }, + { + "epoch": 0.45847355939188017, + "grad_norm": 575.7254638671875, + "learning_rate": 6.645651767724628e-06, + "loss": 16.088, + "step": 226960 + }, + { + "epoch": 0.458493760024564, + "grad_norm": 365.404052734375, + "learning_rate": 6.645322146243744e-06, + "loss": 16.3673, + "step": 226970 + }, + { + "epoch": 0.4585139606572478, + "grad_norm": 152.20553588867188, + "learning_rate": 6.6449925167437604e-06, + "loss": 16.6647, + "step": 226980 + }, + { + "epoch": 0.4585341612899316, + "grad_norm": 698.5183715820312, + "learning_rate": 6.644662879226282e-06, + "loss": 26.9254, + "step": 226990 + }, + { + "epoch": 0.4585543619226154, + "grad_norm": 123.05801391601562, + "learning_rate": 6.644333233692917e-06, + "loss": 9.6099, + "step": 227000 + }, + { + "epoch": 0.4585745625552992, + "grad_norm": 378.0152282714844, + "learning_rate": 6.6440035801452705e-06, + "loss": 26.5745, + "step": 227010 + }, + { + "epoch": 0.45859476318798303, + "grad_norm": 508.7265319824219, + "learning_rate": 6.643673918584951e-06, + "loss": 15.1178, + "step": 227020 + }, + { + "epoch": 0.45861496382066685, + "grad_norm": 447.2159423828125, + "learning_rate": 6.643344249013562e-06, + "loss": 25.3622, + "step": 227030 + }, + { + "epoch": 0.4586351644533507, + "grad_norm": 1060.10986328125, + "learning_rate": 6.643014571432715e-06, + "loss": 27.1324, + "step": 227040 + }, + { + "epoch": 0.4586553650860345, + "grad_norm": 620.6658325195312, + "learning_rate": 6.642684885844013e-06, + "loss": 18.6872, + "step": 227050 + }, + { + "epoch": 0.4586755657187183, + "grad_norm": 566.2651977539062, + "learning_rate": 6.642355192249065e-06, + "loss": 14.9459, + "step": 227060 + }, + { + "epoch": 0.45869576635140213, + "grad_norm": 620.1211547851562, + "learning_rate": 6.642025490649475e-06, + "loss": 19.6324, + "step": 227070 + }, + { + "epoch": 0.45871596698408595, + "grad_norm": 571.9847412109375, + "learning_rate": 6.6416957810468555e-06, + "loss": 24.1564, + "step": 227080 + }, + { + "epoch": 0.4587361676167698, + "grad_norm": 301.05316162109375, + "learning_rate": 6.641366063442806e-06, + "loss": 10.7162, + "step": 227090 + }, + { + "epoch": 0.4587563682494536, + "grad_norm": 515.2897338867188, + "learning_rate": 6.64103633783894e-06, + "loss": 14.8232, + "step": 227100 + }, + { + "epoch": 0.4587765688821374, + "grad_norm": 167.2313995361328, + "learning_rate": 6.64070660423686e-06, + "loss": 7.9229, + "step": 227110 + }, + { + "epoch": 0.4587967695148212, + "grad_norm": 423.76812744140625, + "learning_rate": 6.640376862638176e-06, + "loss": 20.9157, + "step": 227120 + }, + { + "epoch": 0.458816970147505, + "grad_norm": 487.71142578125, + "learning_rate": 6.640047113044493e-06, + "loss": 19.3634, + "step": 227130 + }, + { + "epoch": 0.4588371707801888, + "grad_norm": 193.6186981201172, + "learning_rate": 6.63971735545742e-06, + "loss": 26.5241, + "step": 227140 + }, + { + "epoch": 0.45885737141287264, + "grad_norm": 93.84468078613281, + "learning_rate": 6.6393875898785655e-06, + "loss": 24.1329, + "step": 227150 + }, + { + "epoch": 0.45887757204555646, + "grad_norm": 403.93365478515625, + "learning_rate": 6.639057816309532e-06, + "loss": 16.5268, + "step": 227160 + }, + { + "epoch": 0.4588977726782403, + "grad_norm": 106.60189056396484, + "learning_rate": 6.638728034751931e-06, + "loss": 12.1873, + "step": 227170 + }, + { + "epoch": 0.4589179733109241, + "grad_norm": 219.35626220703125, + "learning_rate": 6.638398245207367e-06, + "loss": 11.526, + "step": 227180 + }, + { + "epoch": 0.4589381739436079, + "grad_norm": 569.699951171875, + "learning_rate": 6.638068447677449e-06, + "loss": 26.8491, + "step": 227190 + }, + { + "epoch": 0.45895837457629174, + "grad_norm": 655.5698852539062, + "learning_rate": 6.637738642163785e-06, + "loss": 24.5225, + "step": 227200 + }, + { + "epoch": 0.45897857520897556, + "grad_norm": 374.3693542480469, + "learning_rate": 6.637408828667982e-06, + "loss": 19.3083, + "step": 227210 + }, + { + "epoch": 0.4589987758416594, + "grad_norm": 121.1509780883789, + "learning_rate": 6.6370790071916456e-06, + "loss": 15.769, + "step": 227220 + }, + { + "epoch": 0.4590189764743432, + "grad_norm": 385.0738525390625, + "learning_rate": 6.6367491777363845e-06, + "loss": 19.7486, + "step": 227230 + }, + { + "epoch": 0.459039177107027, + "grad_norm": 286.7579345703125, + "learning_rate": 6.636419340303808e-06, + "loss": 35.0856, + "step": 227240 + }, + { + "epoch": 0.4590593777397108, + "grad_norm": 497.38653564453125, + "learning_rate": 6.63608949489552e-06, + "loss": 22.0173, + "step": 227250 + }, + { + "epoch": 0.4590795783723946, + "grad_norm": 49.78174591064453, + "learning_rate": 6.635759641513132e-06, + "loss": 23.5303, + "step": 227260 + }, + { + "epoch": 0.4590997790050784, + "grad_norm": 488.2474365234375, + "learning_rate": 6.635429780158248e-06, + "loss": 18.4013, + "step": 227270 + }, + { + "epoch": 0.45911997963776224, + "grad_norm": 582.5839233398438, + "learning_rate": 6.63509991083248e-06, + "loss": 14.5305, + "step": 227280 + }, + { + "epoch": 0.45914018027044606, + "grad_norm": 505.1718444824219, + "learning_rate": 6.634770033537432e-06, + "loss": 43.9581, + "step": 227290 + }, + { + "epoch": 0.4591603809031299, + "grad_norm": 431.9947814941406, + "learning_rate": 6.634440148274712e-06, + "loss": 16.2944, + "step": 227300 + }, + { + "epoch": 0.4591805815358137, + "grad_norm": 713.578857421875, + "learning_rate": 6.634110255045931e-06, + "loss": 14.6432, + "step": 227310 + }, + { + "epoch": 0.4592007821684975, + "grad_norm": 388.4759216308594, + "learning_rate": 6.633780353852695e-06, + "loss": 25.6398, + "step": 227320 + }, + { + "epoch": 0.45922098280118134, + "grad_norm": 1456.6639404296875, + "learning_rate": 6.6334504446966095e-06, + "loss": 24.7451, + "step": 227330 + }, + { + "epoch": 0.45924118343386516, + "grad_norm": 248.88380432128906, + "learning_rate": 6.633120527579286e-06, + "loss": 22.3931, + "step": 227340 + }, + { + "epoch": 0.459261384066549, + "grad_norm": 345.763916015625, + "learning_rate": 6.632790602502331e-06, + "loss": 37.7585, + "step": 227350 + }, + { + "epoch": 0.4592815846992328, + "grad_norm": 503.90740966796875, + "learning_rate": 6.632460669467353e-06, + "loss": 11.0966, + "step": 227360 + }, + { + "epoch": 0.4593017853319166, + "grad_norm": 771.942626953125, + "learning_rate": 6.632130728475961e-06, + "loss": 15.9836, + "step": 227370 + }, + { + "epoch": 0.4593219859646004, + "grad_norm": 521.92529296875, + "learning_rate": 6.631800779529759e-06, + "loss": 14.3884, + "step": 227380 + }, + { + "epoch": 0.4593421865972842, + "grad_norm": 194.33541870117188, + "learning_rate": 6.6314708226303596e-06, + "loss": 10.5635, + "step": 227390 + }, + { + "epoch": 0.459362387229968, + "grad_norm": 233.3264923095703, + "learning_rate": 6.631140857779368e-06, + "loss": 14.9375, + "step": 227400 + }, + { + "epoch": 0.45938258786265185, + "grad_norm": 571.1205444335938, + "learning_rate": 6.6308108849783936e-06, + "loss": 16.6, + "step": 227410 + }, + { + "epoch": 0.45940278849533567, + "grad_norm": 977.36669921875, + "learning_rate": 6.630480904229047e-06, + "loss": 23.7895, + "step": 227420 + }, + { + "epoch": 0.4594229891280195, + "grad_norm": 753.84130859375, + "learning_rate": 6.6301509155329315e-06, + "loss": 19.1079, + "step": 227430 + }, + { + "epoch": 0.4594431897607033, + "grad_norm": 268.7891845703125, + "learning_rate": 6.629820918891661e-06, + "loss": 23.4215, + "step": 227440 + }, + { + "epoch": 0.4594633903933871, + "grad_norm": 312.3785400390625, + "learning_rate": 6.629490914306839e-06, + "loss": 19.9039, + "step": 227450 + }, + { + "epoch": 0.45948359102607095, + "grad_norm": 374.58233642578125, + "learning_rate": 6.629160901780076e-06, + "loss": 29.6784, + "step": 227460 + }, + { + "epoch": 0.45950379165875477, + "grad_norm": 196.46884155273438, + "learning_rate": 6.62883088131298e-06, + "loss": 14.1337, + "step": 227470 + }, + { + "epoch": 0.4595239922914386, + "grad_norm": 387.6122741699219, + "learning_rate": 6.6285008529071615e-06, + "loss": 17.6004, + "step": 227480 + }, + { + "epoch": 0.4595441929241224, + "grad_norm": 421.771484375, + "learning_rate": 6.628170816564227e-06, + "loss": 22.1367, + "step": 227490 + }, + { + "epoch": 0.45956439355680617, + "grad_norm": 334.7427978515625, + "learning_rate": 6.627840772285784e-06, + "loss": 17.2208, + "step": 227500 + }, + { + "epoch": 0.45958459418949, + "grad_norm": 100.2383804321289, + "learning_rate": 6.627510720073443e-06, + "loss": 18.1849, + "step": 227510 + }, + { + "epoch": 0.4596047948221738, + "grad_norm": 1945.5546875, + "learning_rate": 6.627180659928812e-06, + "loss": 20.4495, + "step": 227520 + }, + { + "epoch": 0.45962499545485763, + "grad_norm": 441.0237121582031, + "learning_rate": 6.626850591853502e-06, + "loss": 32.192, + "step": 227530 + }, + { + "epoch": 0.45964519608754145, + "grad_norm": 317.0296936035156, + "learning_rate": 6.626520515849117e-06, + "loss": 27.84, + "step": 227540 + }, + { + "epoch": 0.45966539672022527, + "grad_norm": 362.31488037109375, + "learning_rate": 6.62619043191727e-06, + "loss": 25.4779, + "step": 227550 + }, + { + "epoch": 0.4596855973529091, + "grad_norm": 173.9813995361328, + "learning_rate": 6.625860340059567e-06, + "loss": 8.8232, + "step": 227560 + }, + { + "epoch": 0.4597057979855929, + "grad_norm": 383.3540344238281, + "learning_rate": 6.6255302402776175e-06, + "loss": 20.7251, + "step": 227570 + }, + { + "epoch": 0.45972599861827673, + "grad_norm": 107.57487487792969, + "learning_rate": 6.625200132573032e-06, + "loss": 12.143, + "step": 227580 + }, + { + "epoch": 0.45974619925096055, + "grad_norm": 222.10330200195312, + "learning_rate": 6.624870016947417e-06, + "loss": 19.9065, + "step": 227590 + }, + { + "epoch": 0.45976639988364437, + "grad_norm": 23.899377822875977, + "learning_rate": 6.624539893402383e-06, + "loss": 17.0291, + "step": 227600 + }, + { + "epoch": 0.4597866005163282, + "grad_norm": 578.3126831054688, + "learning_rate": 6.624209761939539e-06, + "loss": 12.0289, + "step": 227610 + }, + { + "epoch": 0.459806801149012, + "grad_norm": 296.71173095703125, + "learning_rate": 6.623879622560493e-06, + "loss": 16.731, + "step": 227620 + }, + { + "epoch": 0.4598270017816958, + "grad_norm": 255.4971466064453, + "learning_rate": 6.623549475266855e-06, + "loss": 22.7688, + "step": 227630 + }, + { + "epoch": 0.4598472024143796, + "grad_norm": 30.626436233520508, + "learning_rate": 6.6232193200602335e-06, + "loss": 12.6875, + "step": 227640 + }, + { + "epoch": 0.4598674030470634, + "grad_norm": 6.265193939208984, + "learning_rate": 6.622889156942239e-06, + "loss": 9.0322, + "step": 227650 + }, + { + "epoch": 0.45988760367974724, + "grad_norm": 318.8857421875, + "learning_rate": 6.622558985914478e-06, + "loss": 50.1292, + "step": 227660 + }, + { + "epoch": 0.45990780431243106, + "grad_norm": 100.06502532958984, + "learning_rate": 6.622228806978562e-06, + "loss": 24.8582, + "step": 227670 + }, + { + "epoch": 0.4599280049451149, + "grad_norm": 244.34930419921875, + "learning_rate": 6.6218986201361e-06, + "loss": 16.2447, + "step": 227680 + }, + { + "epoch": 0.4599482055777987, + "grad_norm": 411.37603759765625, + "learning_rate": 6.621568425388701e-06, + "loss": 13.6881, + "step": 227690 + }, + { + "epoch": 0.4599684062104825, + "grad_norm": 79.83688354492188, + "learning_rate": 6.6212382227379726e-06, + "loss": 27.1068, + "step": 227700 + }, + { + "epoch": 0.45998860684316634, + "grad_norm": 278.11492919921875, + "learning_rate": 6.620908012185528e-06, + "loss": 13.4156, + "step": 227710 + }, + { + "epoch": 0.46000880747585016, + "grad_norm": 158.6947479248047, + "learning_rate": 6.6205777937329715e-06, + "loss": 17.897, + "step": 227720 + }, + { + "epoch": 0.460029008108534, + "grad_norm": 377.27593994140625, + "learning_rate": 6.620247567381918e-06, + "loss": 37.6702, + "step": 227730 + }, + { + "epoch": 0.4600492087412178, + "grad_norm": 372.9764709472656, + "learning_rate": 6.619917333133973e-06, + "loss": 32.4328, + "step": 227740 + }, + { + "epoch": 0.4600694093739016, + "grad_norm": 268.5848083496094, + "learning_rate": 6.619587090990748e-06, + "loss": 16.2318, + "step": 227750 + }, + { + "epoch": 0.4600896100065854, + "grad_norm": 558.2783203125, + "learning_rate": 6.619256840953852e-06, + "loss": 32.6995, + "step": 227760 + }, + { + "epoch": 0.4601098106392692, + "grad_norm": 357.1181335449219, + "learning_rate": 6.618926583024894e-06, + "loss": 21.9979, + "step": 227770 + }, + { + "epoch": 0.460130011271953, + "grad_norm": 887.5396118164062, + "learning_rate": 6.618596317205485e-06, + "loss": 16.884, + "step": 227780 + }, + { + "epoch": 0.46015021190463684, + "grad_norm": 393.46160888671875, + "learning_rate": 6.6182660434972325e-06, + "loss": 27.323, + "step": 227790 + }, + { + "epoch": 0.46017041253732066, + "grad_norm": 1260.691162109375, + "learning_rate": 6.617935761901748e-06, + "loss": 31.7005, + "step": 227800 + }, + { + "epoch": 0.4601906131700045, + "grad_norm": 57.322059631347656, + "learning_rate": 6.61760547242064e-06, + "loss": 39.236, + "step": 227810 + }, + { + "epoch": 0.4602108138026883, + "grad_norm": 257.4314270019531, + "learning_rate": 6.617275175055522e-06, + "loss": 39.7078, + "step": 227820 + }, + { + "epoch": 0.4602310144353721, + "grad_norm": 430.6855773925781, + "learning_rate": 6.616944869807999e-06, + "loss": 45.0984, + "step": 227830 + }, + { + "epoch": 0.46025121506805594, + "grad_norm": 118.204345703125, + "learning_rate": 6.616614556679684e-06, + "loss": 25.9735, + "step": 227840 + }, + { + "epoch": 0.46027141570073976, + "grad_norm": 189.1195526123047, + "learning_rate": 6.616284235672184e-06, + "loss": 10.9118, + "step": 227850 + }, + { + "epoch": 0.4602916163334236, + "grad_norm": 497.6725158691406, + "learning_rate": 6.6159539067871114e-06, + "loss": 24.2337, + "step": 227860 + }, + { + "epoch": 0.4603118169661074, + "grad_norm": 55.70745086669922, + "learning_rate": 6.615623570026076e-06, + "loss": 16.5123, + "step": 227870 + }, + { + "epoch": 0.4603320175987912, + "grad_norm": 752.7064819335938, + "learning_rate": 6.615293225390686e-06, + "loss": 29.5757, + "step": 227880 + }, + { + "epoch": 0.460352218231475, + "grad_norm": 137.93495178222656, + "learning_rate": 6.6149628728825535e-06, + "loss": 16.8261, + "step": 227890 + }, + { + "epoch": 0.4603724188641588, + "grad_norm": 127.68555450439453, + "learning_rate": 6.614632512503289e-06, + "loss": 21.0727, + "step": 227900 + }, + { + "epoch": 0.4603926194968426, + "grad_norm": 883.1185913085938, + "learning_rate": 6.614302144254498e-06, + "loss": 25.1303, + "step": 227910 + }, + { + "epoch": 0.46041282012952645, + "grad_norm": 485.72283935546875, + "learning_rate": 6.613971768137799e-06, + "loss": 19.8868, + "step": 227920 + }, + { + "epoch": 0.46043302076221027, + "grad_norm": 580.4060668945312, + "learning_rate": 6.613641384154794e-06, + "loss": 12.1131, + "step": 227930 + }, + { + "epoch": 0.4604532213948941, + "grad_norm": 318.0347900390625, + "learning_rate": 6.613310992307097e-06, + "loss": 22.3772, + "step": 227940 + }, + { + "epoch": 0.4604734220275779, + "grad_norm": 928.0330200195312, + "learning_rate": 6.612980592596319e-06, + "loss": 18.6223, + "step": 227950 + }, + { + "epoch": 0.4604936226602617, + "grad_norm": 196.65948486328125, + "learning_rate": 6.612650185024068e-06, + "loss": 20.8005, + "step": 227960 + }, + { + "epoch": 0.46051382329294555, + "grad_norm": 203.98886108398438, + "learning_rate": 6.612319769591955e-06, + "loss": 24.5998, + "step": 227970 + }, + { + "epoch": 0.46053402392562937, + "grad_norm": 447.0224914550781, + "learning_rate": 6.611989346301594e-06, + "loss": 19.2729, + "step": 227980 + }, + { + "epoch": 0.4605542245583132, + "grad_norm": 9.080937385559082, + "learning_rate": 6.611658915154589e-06, + "loss": 13.6669, + "step": 227990 + }, + { + "epoch": 0.460574425190997, + "grad_norm": 334.2945251464844, + "learning_rate": 6.611328476152557e-06, + "loss": 18.59, + "step": 228000 + }, + { + "epoch": 0.46059462582368077, + "grad_norm": 392.900390625, + "learning_rate": 6.610998029297103e-06, + "loss": 17.4096, + "step": 228010 + }, + { + "epoch": 0.4606148264563646, + "grad_norm": 375.4248962402344, + "learning_rate": 6.610667574589841e-06, + "loss": 27.4371, + "step": 228020 + }, + { + "epoch": 0.4606350270890484, + "grad_norm": 266.73040771484375, + "learning_rate": 6.610337112032381e-06, + "loss": 23.259, + "step": 228030 + }, + { + "epoch": 0.46065522772173223, + "grad_norm": 22.642879486083984, + "learning_rate": 6.610006641626332e-06, + "loss": 8.5521, + "step": 228040 + }, + { + "epoch": 0.46067542835441605, + "grad_norm": 181.7366485595703, + "learning_rate": 6.6096761633733065e-06, + "loss": 27.2741, + "step": 228050 + }, + { + "epoch": 0.46069562898709987, + "grad_norm": 358.1246643066406, + "learning_rate": 6.6093456772749155e-06, + "loss": 15.0568, + "step": 228060 + }, + { + "epoch": 0.4607158296197837, + "grad_norm": 423.1742858886719, + "learning_rate": 6.609015183332767e-06, + "loss": 18.3091, + "step": 228070 + }, + { + "epoch": 0.4607360302524675, + "grad_norm": 274.8656921386719, + "learning_rate": 6.608684681548475e-06, + "loss": 27.7, + "step": 228080 + }, + { + "epoch": 0.46075623088515133, + "grad_norm": 284.6326599121094, + "learning_rate": 6.608354171923649e-06, + "loss": 56.2296, + "step": 228090 + }, + { + "epoch": 0.46077643151783515, + "grad_norm": 419.8416748046875, + "learning_rate": 6.6080236544599e-06, + "loss": 32.4822, + "step": 228100 + }, + { + "epoch": 0.46079663215051897, + "grad_norm": 161.27183532714844, + "learning_rate": 6.6076931291588375e-06, + "loss": 29.2, + "step": 228110 + }, + { + "epoch": 0.4608168327832028, + "grad_norm": 602.0552368164062, + "learning_rate": 6.607362596022074e-06, + "loss": 27.7334, + "step": 228120 + }, + { + "epoch": 0.4608370334158866, + "grad_norm": 424.15740966796875, + "learning_rate": 6.607032055051221e-06, + "loss": 11.4215, + "step": 228130 + }, + { + "epoch": 0.4608572340485704, + "grad_norm": 76.31950378417969, + "learning_rate": 6.606701506247889e-06, + "loss": 10.4816, + "step": 228140 + }, + { + "epoch": 0.4608774346812542, + "grad_norm": 395.2456970214844, + "learning_rate": 6.606370949613688e-06, + "loss": 11.1584, + "step": 228150 + }, + { + "epoch": 0.460897635313938, + "grad_norm": 574.4743041992188, + "learning_rate": 6.60604038515023e-06, + "loss": 24.3671, + "step": 228160 + }, + { + "epoch": 0.46091783594662183, + "grad_norm": 318.00225830078125, + "learning_rate": 6.605709812859126e-06, + "loss": 11.456, + "step": 228170 + }, + { + "epoch": 0.46093803657930565, + "grad_norm": 676.1705932617188, + "learning_rate": 6.605379232741986e-06, + "loss": 11.9738, + "step": 228180 + }, + { + "epoch": 0.4609582372119895, + "grad_norm": 306.6190490722656, + "learning_rate": 6.605048644800425e-06, + "loss": 11.9495, + "step": 228190 + }, + { + "epoch": 0.4609784378446733, + "grad_norm": 686.2730712890625, + "learning_rate": 6.604718049036047e-06, + "loss": 9.0638, + "step": 228200 + }, + { + "epoch": 0.4609986384773571, + "grad_norm": 309.8995361328125, + "learning_rate": 6.604387445450472e-06, + "loss": 8.2225, + "step": 228210 + }, + { + "epoch": 0.46101883911004093, + "grad_norm": 328.4812927246094, + "learning_rate": 6.604056834045306e-06, + "loss": 23.514, + "step": 228220 + }, + { + "epoch": 0.46103903974272475, + "grad_norm": 361.141357421875, + "learning_rate": 6.603726214822161e-06, + "loss": 25.217, + "step": 228230 + }, + { + "epoch": 0.4610592403754086, + "grad_norm": 428.0789794921875, + "learning_rate": 6.6033955877826495e-06, + "loss": 14.9675, + "step": 228240 + }, + { + "epoch": 0.4610794410080924, + "grad_norm": 147.8428955078125, + "learning_rate": 6.603064952928382e-06, + "loss": 38.2697, + "step": 228250 + }, + { + "epoch": 0.4610996416407762, + "grad_norm": 849.7745361328125, + "learning_rate": 6.6027343102609705e-06, + "loss": 18.2229, + "step": 228260 + }, + { + "epoch": 0.46111984227346, + "grad_norm": 338.63262939453125, + "learning_rate": 6.602403659782026e-06, + "loss": 19.8484, + "step": 228270 + }, + { + "epoch": 0.4611400429061438, + "grad_norm": 272.4281005859375, + "learning_rate": 6.602073001493161e-06, + "loss": 13.1957, + "step": 228280 + }, + { + "epoch": 0.4611602435388276, + "grad_norm": 341.6337890625, + "learning_rate": 6.601742335395987e-06, + "loss": 13.4541, + "step": 228290 + }, + { + "epoch": 0.46118044417151144, + "grad_norm": 440.431640625, + "learning_rate": 6.601411661492114e-06, + "loss": 18.1025, + "step": 228300 + }, + { + "epoch": 0.46120064480419526, + "grad_norm": 63.806026458740234, + "learning_rate": 6.601080979783155e-06, + "loss": 10.9558, + "step": 228310 + }, + { + "epoch": 0.4612208454368791, + "grad_norm": 375.86090087890625, + "learning_rate": 6.600750290270722e-06, + "loss": 36.3965, + "step": 228320 + }, + { + "epoch": 0.4612410460695629, + "grad_norm": 280.7741394042969, + "learning_rate": 6.600419592956427e-06, + "loss": 30.3312, + "step": 228330 + }, + { + "epoch": 0.4612612467022467, + "grad_norm": 32.59760665893555, + "learning_rate": 6.600088887841879e-06, + "loss": 16.0028, + "step": 228340 + }, + { + "epoch": 0.46128144733493054, + "grad_norm": 2014.97412109375, + "learning_rate": 6.599758174928692e-06, + "loss": 37.2913, + "step": 228350 + }, + { + "epoch": 0.46130164796761436, + "grad_norm": 282.12762451171875, + "learning_rate": 6.599427454218479e-06, + "loss": 23.0998, + "step": 228360 + }, + { + "epoch": 0.4613218486002982, + "grad_norm": 764.3007202148438, + "learning_rate": 6.59909672571285e-06, + "loss": 50.8008, + "step": 228370 + }, + { + "epoch": 0.461342049232982, + "grad_norm": 117.80643463134766, + "learning_rate": 6.598765989413419e-06, + "loss": 8.7945, + "step": 228380 + }, + { + "epoch": 0.4613622498656658, + "grad_norm": 164.44436645507812, + "learning_rate": 6.598435245321794e-06, + "loss": 36.3478, + "step": 228390 + }, + { + "epoch": 0.4613824504983496, + "grad_norm": 1353.674072265625, + "learning_rate": 6.59810449343959e-06, + "loss": 31.4991, + "step": 228400 + }, + { + "epoch": 0.4614026511310334, + "grad_norm": 244.39486694335938, + "learning_rate": 6.597773733768419e-06, + "loss": 9.2037, + "step": 228410 + }, + { + "epoch": 0.4614228517637172, + "grad_norm": 318.91986083984375, + "learning_rate": 6.597442966309893e-06, + "loss": 18.2771, + "step": 228420 + }, + { + "epoch": 0.46144305239640104, + "grad_norm": 593.6725463867188, + "learning_rate": 6.5971121910656245e-06, + "loss": 16.5071, + "step": 228430 + }, + { + "epoch": 0.46146325302908486, + "grad_norm": 297.6046447753906, + "learning_rate": 6.5967814080372224e-06, + "loss": 25.4071, + "step": 228440 + }, + { + "epoch": 0.4614834536617687, + "grad_norm": 351.06573486328125, + "learning_rate": 6.596450617226303e-06, + "loss": 25.4135, + "step": 228450 + }, + { + "epoch": 0.4615036542944525, + "grad_norm": 239.96615600585938, + "learning_rate": 6.596119818634478e-06, + "loss": 19.2743, + "step": 228460 + }, + { + "epoch": 0.4615238549271363, + "grad_norm": 339.3836364746094, + "learning_rate": 6.595789012263356e-06, + "loss": 12.7965, + "step": 228470 + }, + { + "epoch": 0.46154405555982014, + "grad_norm": 513.2958374023438, + "learning_rate": 6.5954581981145536e-06, + "loss": 27.8609, + "step": 228480 + }, + { + "epoch": 0.46156425619250396, + "grad_norm": 80.27755737304688, + "learning_rate": 6.5951273761896794e-06, + "loss": 15.2299, + "step": 228490 + }, + { + "epoch": 0.4615844568251878, + "grad_norm": 939.7469482421875, + "learning_rate": 6.594796546490351e-06, + "loss": 26.2262, + "step": 228500 + }, + { + "epoch": 0.4616046574578716, + "grad_norm": 262.9824523925781, + "learning_rate": 6.594465709018175e-06, + "loss": 11.674, + "step": 228510 + }, + { + "epoch": 0.4616248580905554, + "grad_norm": 1236.662353515625, + "learning_rate": 6.594134863774768e-06, + "loss": 24.4545, + "step": 228520 + }, + { + "epoch": 0.4616450587232392, + "grad_norm": 291.9767150878906, + "learning_rate": 6.593804010761742e-06, + "loss": 27.847, + "step": 228530 + }, + { + "epoch": 0.461665259355923, + "grad_norm": 806.2813720703125, + "learning_rate": 6.593473149980707e-06, + "loss": 24.3102, + "step": 228540 + }, + { + "epoch": 0.46168545998860683, + "grad_norm": 698.558837890625, + "learning_rate": 6.593142281433277e-06, + "loss": 30.923, + "step": 228550 + }, + { + "epoch": 0.46170566062129065, + "grad_norm": 240.34564208984375, + "learning_rate": 6.592811405121064e-06, + "loss": 26.97, + "step": 228560 + }, + { + "epoch": 0.46172586125397447, + "grad_norm": 55.996376037597656, + "learning_rate": 6.592480521045683e-06, + "loss": 44.2196, + "step": 228570 + }, + { + "epoch": 0.4617460618866583, + "grad_norm": 530.7982788085938, + "learning_rate": 6.592149629208744e-06, + "loss": 27.613, + "step": 228580 + }, + { + "epoch": 0.4617662625193421, + "grad_norm": 235.2656707763672, + "learning_rate": 6.591818729611863e-06, + "loss": 12.0532, + "step": 228590 + }, + { + "epoch": 0.46178646315202593, + "grad_norm": 327.0687561035156, + "learning_rate": 6.591487822256648e-06, + "loss": 14.905, + "step": 228600 + }, + { + "epoch": 0.46180666378470975, + "grad_norm": 698.6280517578125, + "learning_rate": 6.591156907144716e-06, + "loss": 25.5789, + "step": 228610 + }, + { + "epoch": 0.46182686441739357, + "grad_norm": 624.7056884765625, + "learning_rate": 6.590825984277677e-06, + "loss": 34.0949, + "step": 228620 + }, + { + "epoch": 0.4618470650500774, + "grad_norm": 99.8488540649414, + "learning_rate": 6.590495053657145e-06, + "loss": 12.8664, + "step": 228630 + }, + { + "epoch": 0.4618672656827612, + "grad_norm": 396.48907470703125, + "learning_rate": 6.590164115284734e-06, + "loss": 15.4031, + "step": 228640 + }, + { + "epoch": 0.46188746631544497, + "grad_norm": 361.1356506347656, + "learning_rate": 6.589833169162055e-06, + "loss": 23.8085, + "step": 228650 + }, + { + "epoch": 0.4619076669481288, + "grad_norm": 763.4234619140625, + "learning_rate": 6.589502215290723e-06, + "loss": 19.241, + "step": 228660 + }, + { + "epoch": 0.4619278675808126, + "grad_norm": 871.5690307617188, + "learning_rate": 6.5891712536723495e-06, + "loss": 24.2829, + "step": 228670 + }, + { + "epoch": 0.46194806821349643, + "grad_norm": 497.9557189941406, + "learning_rate": 6.588840284308548e-06, + "loss": 29.1579, + "step": 228680 + }, + { + "epoch": 0.46196826884618025, + "grad_norm": 176.30181884765625, + "learning_rate": 6.588509307200932e-06, + "loss": 24.5342, + "step": 228690 + }, + { + "epoch": 0.46198846947886407, + "grad_norm": 193.85894775390625, + "learning_rate": 6.588178322351113e-06, + "loss": 28.7056, + "step": 228700 + }, + { + "epoch": 0.4620086701115479, + "grad_norm": 47.11268997192383, + "learning_rate": 6.587847329760708e-06, + "loss": 26.5757, + "step": 228710 + }, + { + "epoch": 0.4620288707442317, + "grad_norm": 516.3763427734375, + "learning_rate": 6.587516329431326e-06, + "loss": 14.0678, + "step": 228720 + }, + { + "epoch": 0.46204907137691553, + "grad_norm": 57.38300323486328, + "learning_rate": 6.587185321364582e-06, + "loss": 12.3186, + "step": 228730 + }, + { + "epoch": 0.46206927200959935, + "grad_norm": 596.0176391601562, + "learning_rate": 6.5868543055620895e-06, + "loss": 14.9874, + "step": 228740 + }, + { + "epoch": 0.46208947264228317, + "grad_norm": 2.5874407291412354, + "learning_rate": 6.586523282025462e-06, + "loss": 45.7023, + "step": 228750 + }, + { + "epoch": 0.462109673274967, + "grad_norm": 102.36325073242188, + "learning_rate": 6.586192250756312e-06, + "loss": 28.644, + "step": 228760 + }, + { + "epoch": 0.4621298739076508, + "grad_norm": 295.92864990234375, + "learning_rate": 6.585861211756253e-06, + "loss": 19.5812, + "step": 228770 + }, + { + "epoch": 0.4621500745403346, + "grad_norm": 271.26055908203125, + "learning_rate": 6.585530165026899e-06, + "loss": 30.6603, + "step": 228780 + }, + { + "epoch": 0.4621702751730184, + "grad_norm": 250.38360595703125, + "learning_rate": 6.585199110569863e-06, + "loss": 10.7057, + "step": 228790 + }, + { + "epoch": 0.4621904758057022, + "grad_norm": 258.42181396484375, + "learning_rate": 6.58486804838676e-06, + "loss": 14.4258, + "step": 228800 + }, + { + "epoch": 0.46221067643838604, + "grad_norm": 288.42681884765625, + "learning_rate": 6.5845369784792e-06, + "loss": 32.4417, + "step": 228810 + }, + { + "epoch": 0.46223087707106986, + "grad_norm": 205.974853515625, + "learning_rate": 6.584205900848801e-06, + "loss": 22.5565, + "step": 228820 + }, + { + "epoch": 0.4622510777037537, + "grad_norm": 526.0134887695312, + "learning_rate": 6.583874815497174e-06, + "loss": 19.7797, + "step": 228830 + }, + { + "epoch": 0.4622712783364375, + "grad_norm": 112.11205291748047, + "learning_rate": 6.583543722425934e-06, + "loss": 11.1141, + "step": 228840 + }, + { + "epoch": 0.4622914789691213, + "grad_norm": 596.88818359375, + "learning_rate": 6.583212621636693e-06, + "loss": 19.2989, + "step": 228850 + }, + { + "epoch": 0.46231167960180514, + "grad_norm": 76.48033905029297, + "learning_rate": 6.582881513131065e-06, + "loss": 16.9819, + "step": 228860 + }, + { + "epoch": 0.46233188023448896, + "grad_norm": 341.16387939453125, + "learning_rate": 6.5825503969106675e-06, + "loss": 26.5284, + "step": 228870 + }, + { + "epoch": 0.4623520808671728, + "grad_norm": 133.86705017089844, + "learning_rate": 6.582219272977108e-06, + "loss": 29.1873, + "step": 228880 + }, + { + "epoch": 0.4623722814998566, + "grad_norm": 453.6415100097656, + "learning_rate": 6.581888141332004e-06, + "loss": 19.164, + "step": 228890 + }, + { + "epoch": 0.4623924821325404, + "grad_norm": 143.84637451171875, + "learning_rate": 6.58155700197697e-06, + "loss": 20.8901, + "step": 228900 + }, + { + "epoch": 0.4624126827652242, + "grad_norm": 113.33052825927734, + "learning_rate": 6.581225854913621e-06, + "loss": 21.8837, + "step": 228910 + }, + { + "epoch": 0.462432883397908, + "grad_norm": 339.52899169921875, + "learning_rate": 6.580894700143565e-06, + "loss": 20.7568, + "step": 228920 + }, + { + "epoch": 0.4624530840305918, + "grad_norm": 173.9373321533203, + "learning_rate": 6.580563537668423e-06, + "loss": 14.7747, + "step": 228930 + }, + { + "epoch": 0.46247328466327564, + "grad_norm": 280.2523193359375, + "learning_rate": 6.580232367489805e-06, + "loss": 20.7438, + "step": 228940 + }, + { + "epoch": 0.46249348529595946, + "grad_norm": 355.7958068847656, + "learning_rate": 6.579901189609325e-06, + "loss": 18.4877, + "step": 228950 + }, + { + "epoch": 0.4625136859286433, + "grad_norm": 247.1247100830078, + "learning_rate": 6.5795700040286014e-06, + "loss": 18.1621, + "step": 228960 + }, + { + "epoch": 0.4625338865613271, + "grad_norm": 499.34686279296875, + "learning_rate": 6.579238810749241e-06, + "loss": 19.4621, + "step": 228970 + }, + { + "epoch": 0.4625540871940109, + "grad_norm": 497.579833984375, + "learning_rate": 6.578907609772866e-06, + "loss": 43.1037, + "step": 228980 + }, + { + "epoch": 0.46257428782669474, + "grad_norm": 186.67184448242188, + "learning_rate": 6.578576401101084e-06, + "loss": 16.3681, + "step": 228990 + }, + { + "epoch": 0.46259448845937856, + "grad_norm": 256.8697814941406, + "learning_rate": 6.578245184735513e-06, + "loss": 12.9051, + "step": 229000 + }, + { + "epoch": 0.4626146890920624, + "grad_norm": 166.88180541992188, + "learning_rate": 6.577913960677766e-06, + "loss": 12.9451, + "step": 229010 + }, + { + "epoch": 0.4626348897247462, + "grad_norm": 606.2781372070312, + "learning_rate": 6.577582728929458e-06, + "loss": 20.379, + "step": 229020 + }, + { + "epoch": 0.46265509035743, + "grad_norm": 271.86834716796875, + "learning_rate": 6.5772514894922034e-06, + "loss": 11.8683, + "step": 229030 + }, + { + "epoch": 0.4626752909901138, + "grad_norm": 218.47808837890625, + "learning_rate": 6.576920242367617e-06, + "loss": 23.3083, + "step": 229040 + }, + { + "epoch": 0.4626954916227976, + "grad_norm": 333.4864807128906, + "learning_rate": 6.576588987557312e-06, + "loss": 19.5914, + "step": 229050 + }, + { + "epoch": 0.4627156922554814, + "grad_norm": 265.64501953125, + "learning_rate": 6.576257725062903e-06, + "loss": 11.2602, + "step": 229060 + }, + { + "epoch": 0.46273589288816525, + "grad_norm": 109.08692169189453, + "learning_rate": 6.575926454886005e-06, + "loss": 39.5215, + "step": 229070 + }, + { + "epoch": 0.46275609352084907, + "grad_norm": 620.376220703125, + "learning_rate": 6.575595177028233e-06, + "loss": 14.2546, + "step": 229080 + }, + { + "epoch": 0.4627762941535329, + "grad_norm": 280.4469299316406, + "learning_rate": 6.575263891491203e-06, + "loss": 15.089, + "step": 229090 + }, + { + "epoch": 0.4627964947862167, + "grad_norm": 257.61138916015625, + "learning_rate": 6.574932598276524e-06, + "loss": 24.787, + "step": 229100 + }, + { + "epoch": 0.4628166954189005, + "grad_norm": 239.0186004638672, + "learning_rate": 6.574601297385817e-06, + "loss": 13.9121, + "step": 229110 + }, + { + "epoch": 0.46283689605158435, + "grad_norm": 178.89210510253906, + "learning_rate": 6.574269988820694e-06, + "loss": 15.2393, + "step": 229120 + }, + { + "epoch": 0.46285709668426817, + "grad_norm": 774.9666137695312, + "learning_rate": 6.573938672582769e-06, + "loss": 11.8043, + "step": 229130 + }, + { + "epoch": 0.462877297316952, + "grad_norm": 909.0729370117188, + "learning_rate": 6.5736073486736606e-06, + "loss": 13.9207, + "step": 229140 + }, + { + "epoch": 0.4628974979496358, + "grad_norm": 745.9268188476562, + "learning_rate": 6.573276017094977e-06, + "loss": 21.3982, + "step": 229150 + }, + { + "epoch": 0.4629176985823196, + "grad_norm": 373.2799987792969, + "learning_rate": 6.5729446778483395e-06, + "loss": 28.3705, + "step": 229160 + }, + { + "epoch": 0.4629378992150034, + "grad_norm": 72.70282745361328, + "learning_rate": 6.57261333093536e-06, + "loss": 16.3056, + "step": 229170 + }, + { + "epoch": 0.4629580998476872, + "grad_norm": 103.14777374267578, + "learning_rate": 6.5722819763576525e-06, + "loss": 18.3655, + "step": 229180 + }, + { + "epoch": 0.46297830048037103, + "grad_norm": 327.900634765625, + "learning_rate": 6.571950614116835e-06, + "loss": 20.0955, + "step": 229190 + }, + { + "epoch": 0.46299850111305485, + "grad_norm": 519.2888793945312, + "learning_rate": 6.571619244214521e-06, + "loss": 14.4365, + "step": 229200 + }, + { + "epoch": 0.46301870174573867, + "grad_norm": 105.24832153320312, + "learning_rate": 6.571287866652325e-06, + "loss": 18.3877, + "step": 229210 + }, + { + "epoch": 0.4630389023784225, + "grad_norm": 464.1259460449219, + "learning_rate": 6.570956481431862e-06, + "loss": 23.1351, + "step": 229220 + }, + { + "epoch": 0.4630591030111063, + "grad_norm": 163.41204833984375, + "learning_rate": 6.570625088554747e-06, + "loss": 19.7771, + "step": 229230 + }, + { + "epoch": 0.46307930364379013, + "grad_norm": 194.02304077148438, + "learning_rate": 6.570293688022597e-06, + "loss": 20.9636, + "step": 229240 + }, + { + "epoch": 0.46309950427647395, + "grad_norm": 550.7429809570312, + "learning_rate": 6.569962279837025e-06, + "loss": 13.2235, + "step": 229250 + }, + { + "epoch": 0.46311970490915777, + "grad_norm": 485.3947448730469, + "learning_rate": 6.5696308639996475e-06, + "loss": 21.1499, + "step": 229260 + }, + { + "epoch": 0.4631399055418416, + "grad_norm": 318.07037353515625, + "learning_rate": 6.569299440512081e-06, + "loss": 19.188, + "step": 229270 + }, + { + "epoch": 0.4631601061745254, + "grad_norm": 390.4097595214844, + "learning_rate": 6.568968009375938e-06, + "loss": 15.8124, + "step": 229280 + }, + { + "epoch": 0.4631803068072092, + "grad_norm": 128.84483337402344, + "learning_rate": 6.568636570592835e-06, + "loss": 14.8368, + "step": 229290 + }, + { + "epoch": 0.463200507439893, + "grad_norm": 303.23114013671875, + "learning_rate": 6.5683051241643894e-06, + "loss": 16.1054, + "step": 229300 + }, + { + "epoch": 0.4632207080725768, + "grad_norm": 401.9518127441406, + "learning_rate": 6.567973670092212e-06, + "loss": 12.3538, + "step": 229310 + }, + { + "epoch": 0.46324090870526063, + "grad_norm": 130.42996215820312, + "learning_rate": 6.567642208377924e-06, + "loss": 20.0103, + "step": 229320 + }, + { + "epoch": 0.46326110933794445, + "grad_norm": 159.85983276367188, + "learning_rate": 6.567310739023136e-06, + "loss": 18.0162, + "step": 229330 + }, + { + "epoch": 0.4632813099706283, + "grad_norm": 8.865955352783203, + "learning_rate": 6.566979262029467e-06, + "loss": 21.846, + "step": 229340 + }, + { + "epoch": 0.4633015106033121, + "grad_norm": 691.6495971679688, + "learning_rate": 6.566647777398529e-06, + "loss": 22.9702, + "step": 229350 + }, + { + "epoch": 0.4633217112359959, + "grad_norm": 735.4181518554688, + "learning_rate": 6.566316285131943e-06, + "loss": 34.7296, + "step": 229360 + }, + { + "epoch": 0.46334191186867973, + "grad_norm": 192.17556762695312, + "learning_rate": 6.5659847852313184e-06, + "loss": 12.5642, + "step": 229370 + }, + { + "epoch": 0.46336211250136355, + "grad_norm": 338.44293212890625, + "learning_rate": 6.5656532776982765e-06, + "loss": 26.8633, + "step": 229380 + }, + { + "epoch": 0.4633823131340474, + "grad_norm": 92.99593353271484, + "learning_rate": 6.565321762534428e-06, + "loss": 13.8771, + "step": 229390 + }, + { + "epoch": 0.4634025137667312, + "grad_norm": 810.649658203125, + "learning_rate": 6.5649902397413915e-06, + "loss": 22.4203, + "step": 229400 + }, + { + "epoch": 0.463422714399415, + "grad_norm": 543.7352905273438, + "learning_rate": 6.564658709320783e-06, + "loss": 22.6507, + "step": 229410 + }, + { + "epoch": 0.4634429150320988, + "grad_norm": 236.32470703125, + "learning_rate": 6.564327171274217e-06, + "loss": 24.3408, + "step": 229420 + }, + { + "epoch": 0.4634631156647826, + "grad_norm": 389.9688415527344, + "learning_rate": 6.563995625603312e-06, + "loss": 18.6648, + "step": 229430 + }, + { + "epoch": 0.4634833162974664, + "grad_norm": 594.2128295898438, + "learning_rate": 6.56366407230968e-06, + "loss": 22.824, + "step": 229440 + }, + { + "epoch": 0.46350351693015024, + "grad_norm": 431.28973388671875, + "learning_rate": 6.5633325113949395e-06, + "loss": 21.5923, + "step": 229450 + }, + { + "epoch": 0.46352371756283406, + "grad_norm": 763.2753295898438, + "learning_rate": 6.5630009428607065e-06, + "loss": 23.1118, + "step": 229460 + }, + { + "epoch": 0.4635439181955179, + "grad_norm": 183.33277893066406, + "learning_rate": 6.562669366708596e-06, + "loss": 13.4126, + "step": 229470 + }, + { + "epoch": 0.4635641188282017, + "grad_norm": 403.41351318359375, + "learning_rate": 6.562337782940224e-06, + "loss": 26.614, + "step": 229480 + }, + { + "epoch": 0.4635843194608855, + "grad_norm": 673.7896118164062, + "learning_rate": 6.562006191557209e-06, + "loss": 20.5815, + "step": 229490 + }, + { + "epoch": 0.46360452009356934, + "grad_norm": 188.78221130371094, + "learning_rate": 6.561674592561164e-06, + "loss": 20.2226, + "step": 229500 + }, + { + "epoch": 0.46362472072625316, + "grad_norm": 211.59988403320312, + "learning_rate": 6.561342985953706e-06, + "loss": 11.8335, + "step": 229510 + }, + { + "epoch": 0.463644921358937, + "grad_norm": 391.2408142089844, + "learning_rate": 6.561011371736452e-06, + "loss": 10.9201, + "step": 229520 + }, + { + "epoch": 0.4636651219916208, + "grad_norm": 197.4485321044922, + "learning_rate": 6.560679749911018e-06, + "loss": 13.5148, + "step": 229530 + }, + { + "epoch": 0.4636853226243046, + "grad_norm": 439.3060302734375, + "learning_rate": 6.560348120479021e-06, + "loss": 31.6213, + "step": 229540 + }, + { + "epoch": 0.4637055232569884, + "grad_norm": 495.3293151855469, + "learning_rate": 6.5600164834420754e-06, + "loss": 18.4101, + "step": 229550 + }, + { + "epoch": 0.4637257238896722, + "grad_norm": 244.08631896972656, + "learning_rate": 6.559684838801798e-06, + "loss": 39.2556, + "step": 229560 + }, + { + "epoch": 0.463745924522356, + "grad_norm": 453.98846435546875, + "learning_rate": 6.559353186559808e-06, + "loss": 16.6811, + "step": 229570 + }, + { + "epoch": 0.46376612515503984, + "grad_norm": 868.4921875, + "learning_rate": 6.559021526717717e-06, + "loss": 25.1985, + "step": 229580 + }, + { + "epoch": 0.46378632578772366, + "grad_norm": 325.8453674316406, + "learning_rate": 6.558689859277148e-06, + "loss": 18.4847, + "step": 229590 + }, + { + "epoch": 0.4638065264204075, + "grad_norm": 184.70367431640625, + "learning_rate": 6.558358184239709e-06, + "loss": 22.3115, + "step": 229600 + }, + { + "epoch": 0.4638267270530913, + "grad_norm": 184.07736206054688, + "learning_rate": 6.5580265016070245e-06, + "loss": 37.0262, + "step": 229610 + }, + { + "epoch": 0.4638469276857751, + "grad_norm": 866.11669921875, + "learning_rate": 6.557694811380707e-06, + "loss": 32.8891, + "step": 229620 + }, + { + "epoch": 0.46386712831845894, + "grad_norm": 275.5018615722656, + "learning_rate": 6.5573631135623736e-06, + "loss": 21.9039, + "step": 229630 + }, + { + "epoch": 0.46388732895114276, + "grad_norm": 197.97389221191406, + "learning_rate": 6.557031408153642e-06, + "loss": 20.2326, + "step": 229640 + }, + { + "epoch": 0.4639075295838266, + "grad_norm": 481.6684265136719, + "learning_rate": 6.556699695156128e-06, + "loss": 18.4726, + "step": 229650 + }, + { + "epoch": 0.4639277302165104, + "grad_norm": 607.4542236328125, + "learning_rate": 6.556367974571448e-06, + "loss": 16.4958, + "step": 229660 + }, + { + "epoch": 0.4639479308491942, + "grad_norm": 517.954345703125, + "learning_rate": 6.556036246401218e-06, + "loss": 18.0444, + "step": 229670 + }, + { + "epoch": 0.463968131481878, + "grad_norm": 388.73162841796875, + "learning_rate": 6.555704510647059e-06, + "loss": 12.3879, + "step": 229680 + }, + { + "epoch": 0.4639883321145618, + "grad_norm": 66.10597229003906, + "learning_rate": 6.555372767310582e-06, + "loss": 27.1608, + "step": 229690 + }, + { + "epoch": 0.46400853274724563, + "grad_norm": 634.7944946289062, + "learning_rate": 6.55504101639341e-06, + "loss": 26.6912, + "step": 229700 + }, + { + "epoch": 0.46402873337992945, + "grad_norm": 320.9844665527344, + "learning_rate": 6.554709257897153e-06, + "loss": 14.519, + "step": 229710 + }, + { + "epoch": 0.46404893401261327, + "grad_norm": 155.81512451171875, + "learning_rate": 6.554377491823434e-06, + "loss": 18.6841, + "step": 229720 + }, + { + "epoch": 0.4640691346452971, + "grad_norm": 466.6041564941406, + "learning_rate": 6.554045718173867e-06, + "loss": 26.129, + "step": 229730 + }, + { + "epoch": 0.4640893352779809, + "grad_norm": 1391.3883056640625, + "learning_rate": 6.5537139369500705e-06, + "loss": 34.4066, + "step": 229740 + }, + { + "epoch": 0.46410953591066473, + "grad_norm": 235.05258178710938, + "learning_rate": 6.55338214815366e-06, + "loss": 19.4836, + "step": 229750 + }, + { + "epoch": 0.46412973654334855, + "grad_norm": 365.6845703125, + "learning_rate": 6.553050351786252e-06, + "loss": 22.4419, + "step": 229760 + }, + { + "epoch": 0.46414993717603237, + "grad_norm": 543.2626953125, + "learning_rate": 6.552718547849467e-06, + "loss": 12.3587, + "step": 229770 + }, + { + "epoch": 0.4641701378087162, + "grad_norm": 613.8858032226562, + "learning_rate": 6.55238673634492e-06, + "loss": 20.5729, + "step": 229780 + }, + { + "epoch": 0.4641903384414, + "grad_norm": 747.0809936523438, + "learning_rate": 6.552054917274226e-06, + "loss": 20.9929, + "step": 229790 + }, + { + "epoch": 0.46421053907408383, + "grad_norm": 855.6516723632812, + "learning_rate": 6.551723090639008e-06, + "loss": 25.5416, + "step": 229800 + }, + { + "epoch": 0.4642307397067676, + "grad_norm": 356.35699462890625, + "learning_rate": 6.551391256440877e-06, + "loss": 33.6675, + "step": 229810 + }, + { + "epoch": 0.4642509403394514, + "grad_norm": 330.6403503417969, + "learning_rate": 6.551059414681455e-06, + "loss": 19.6082, + "step": 229820 + }, + { + "epoch": 0.46427114097213523, + "grad_norm": 749.244140625, + "learning_rate": 6.550727565362357e-06, + "loss": 22.3681, + "step": 229830 + }, + { + "epoch": 0.46429134160481905, + "grad_norm": 367.9692687988281, + "learning_rate": 6.5503957084852e-06, + "loss": 26.3204, + "step": 229840 + }, + { + "epoch": 0.46431154223750287, + "grad_norm": 314.1671142578125, + "learning_rate": 6.550063844051603e-06, + "loss": 16.2764, + "step": 229850 + }, + { + "epoch": 0.4643317428701867, + "grad_norm": 263.6180725097656, + "learning_rate": 6.549731972063183e-06, + "loss": 15.8397, + "step": 229860 + }, + { + "epoch": 0.4643519435028705, + "grad_norm": 573.0430908203125, + "learning_rate": 6.549400092521557e-06, + "loss": 14.4382, + "step": 229870 + }, + { + "epoch": 0.46437214413555433, + "grad_norm": 1.8975880146026611, + "learning_rate": 6.549068205428344e-06, + "loss": 22.1632, + "step": 229880 + }, + { + "epoch": 0.46439234476823815, + "grad_norm": 777.577880859375, + "learning_rate": 6.54873631078516e-06, + "loss": 23.0008, + "step": 229890 + }, + { + "epoch": 0.46441254540092197, + "grad_norm": 492.24761962890625, + "learning_rate": 6.548404408593622e-06, + "loss": 17.066, + "step": 229900 + }, + { + "epoch": 0.4644327460336058, + "grad_norm": 152.51919555664062, + "learning_rate": 6.54807249885535e-06, + "loss": 34.6613, + "step": 229910 + }, + { + "epoch": 0.4644529466662896, + "grad_norm": 428.359619140625, + "learning_rate": 6.547740581571959e-06, + "loss": 18.024, + "step": 229920 + }, + { + "epoch": 0.4644731472989734, + "grad_norm": 293.5292053222656, + "learning_rate": 6.547408656745069e-06, + "loss": 11.4237, + "step": 229930 + }, + { + "epoch": 0.4644933479316572, + "grad_norm": 388.554931640625, + "learning_rate": 6.547076724376296e-06, + "loss": 15.9199, + "step": 229940 + }, + { + "epoch": 0.464513548564341, + "grad_norm": 921.075439453125, + "learning_rate": 6.546744784467261e-06, + "loss": 23.9284, + "step": 229950 + }, + { + "epoch": 0.46453374919702484, + "grad_norm": 239.83203125, + "learning_rate": 6.546412837019577e-06, + "loss": 28.0474, + "step": 229960 + }, + { + "epoch": 0.46455394982970866, + "grad_norm": 221.23048400878906, + "learning_rate": 6.546080882034866e-06, + "loss": 15.9598, + "step": 229970 + }, + { + "epoch": 0.4645741504623925, + "grad_norm": 1186.389892578125, + "learning_rate": 6.545748919514743e-06, + "loss": 21.552, + "step": 229980 + }, + { + "epoch": 0.4645943510950763, + "grad_norm": 376.7621765136719, + "learning_rate": 6.545416949460828e-06, + "loss": 27.2045, + "step": 229990 + }, + { + "epoch": 0.4646145517277601, + "grad_norm": 584.0380249023438, + "learning_rate": 6.545084971874738e-06, + "loss": 23.3419, + "step": 230000 + }, + { + "epoch": 0.46463475236044394, + "grad_norm": 573.6217041015625, + "learning_rate": 6.544752986758092e-06, + "loss": 64.9675, + "step": 230010 + }, + { + "epoch": 0.46465495299312776, + "grad_norm": 594.05615234375, + "learning_rate": 6.5444209941125056e-06, + "loss": 10.5585, + "step": 230020 + }, + { + "epoch": 0.4646751536258116, + "grad_norm": 236.2047119140625, + "learning_rate": 6.544088993939599e-06, + "loss": 11.2551, + "step": 230030 + }, + { + "epoch": 0.4646953542584954, + "grad_norm": 867.623779296875, + "learning_rate": 6.543756986240992e-06, + "loss": 21.3639, + "step": 230040 + }, + { + "epoch": 0.4647155548911792, + "grad_norm": 252.49024963378906, + "learning_rate": 6.543424971018298e-06, + "loss": 24.9629, + "step": 230050 + }, + { + "epoch": 0.464735755523863, + "grad_norm": 161.431396484375, + "learning_rate": 6.54309294827314e-06, + "loss": 7.0627, + "step": 230060 + }, + { + "epoch": 0.4647559561565468, + "grad_norm": 388.12982177734375, + "learning_rate": 6.542760918007133e-06, + "loss": 15.0796, + "step": 230070 + }, + { + "epoch": 0.4647761567892306, + "grad_norm": 631.7340087890625, + "learning_rate": 6.542428880221896e-06, + "loss": 28.6985, + "step": 230080 + }, + { + "epoch": 0.46479635742191444, + "grad_norm": 211.25621032714844, + "learning_rate": 6.542096834919049e-06, + "loss": 11.3219, + "step": 230090 + }, + { + "epoch": 0.46481655805459826, + "grad_norm": 331.23504638671875, + "learning_rate": 6.541764782100208e-06, + "loss": 13.1166, + "step": 230100 + }, + { + "epoch": 0.4648367586872821, + "grad_norm": 148.95367431640625, + "learning_rate": 6.541432721766994e-06, + "loss": 32.3821, + "step": 230110 + }, + { + "epoch": 0.4648569593199659, + "grad_norm": 206.0067596435547, + "learning_rate": 6.541100653921022e-06, + "loss": 24.1519, + "step": 230120 + }, + { + "epoch": 0.4648771599526497, + "grad_norm": 371.63330078125, + "learning_rate": 6.540768578563913e-06, + "loss": 30.2664, + "step": 230130 + }, + { + "epoch": 0.46489736058533354, + "grad_norm": 370.63690185546875, + "learning_rate": 6.540436495697284e-06, + "loss": 30.1104, + "step": 230140 + }, + { + "epoch": 0.46491756121801736, + "grad_norm": 398.7852783203125, + "learning_rate": 6.540104405322757e-06, + "loss": 24.7988, + "step": 230150 + }, + { + "epoch": 0.4649377618507012, + "grad_norm": 116.38355255126953, + "learning_rate": 6.5397723074419454e-06, + "loss": 26.7308, + "step": 230160 + }, + { + "epoch": 0.464957962483385, + "grad_norm": 715.37841796875, + "learning_rate": 6.53944020205647e-06, + "loss": 18.5669, + "step": 230170 + }, + { + "epoch": 0.4649781631160688, + "grad_norm": 595.7755737304688, + "learning_rate": 6.539108089167953e-06, + "loss": 29.9836, + "step": 230180 + }, + { + "epoch": 0.4649983637487526, + "grad_norm": 656.9431762695312, + "learning_rate": 6.538775968778006e-06, + "loss": 15.6107, + "step": 230190 + }, + { + "epoch": 0.4650185643814364, + "grad_norm": 298.2591552734375, + "learning_rate": 6.538443840888254e-06, + "loss": 20.1788, + "step": 230200 + }, + { + "epoch": 0.4650387650141202, + "grad_norm": 668.5206909179688, + "learning_rate": 6.538111705500312e-06, + "loss": 18.9093, + "step": 230210 + }, + { + "epoch": 0.46505896564680405, + "grad_norm": 294.69122314453125, + "learning_rate": 6.537779562615801e-06, + "loss": 13.5656, + "step": 230220 + }, + { + "epoch": 0.46507916627948787, + "grad_norm": 174.49681091308594, + "learning_rate": 6.537447412236338e-06, + "loss": 23.3252, + "step": 230230 + }, + { + "epoch": 0.4650993669121717, + "grad_norm": 190.39427185058594, + "learning_rate": 6.537115254363544e-06, + "loss": 16.2051, + "step": 230240 + }, + { + "epoch": 0.4651195675448555, + "grad_norm": 368.1281433105469, + "learning_rate": 6.536783088999037e-06, + "loss": 17.0869, + "step": 230250 + }, + { + "epoch": 0.4651397681775393, + "grad_norm": 471.6187438964844, + "learning_rate": 6.536450916144435e-06, + "loss": 23.9824, + "step": 230260 + }, + { + "epoch": 0.46515996881022315, + "grad_norm": 669.5811767578125, + "learning_rate": 6.536118735801356e-06, + "loss": 19.156, + "step": 230270 + }, + { + "epoch": 0.46518016944290697, + "grad_norm": 586.4672241210938, + "learning_rate": 6.535786547971421e-06, + "loss": 22.2844, + "step": 230280 + }, + { + "epoch": 0.4652003700755908, + "grad_norm": 519.2525634765625, + "learning_rate": 6.53545435265625e-06, + "loss": 20.4509, + "step": 230290 + }, + { + "epoch": 0.4652205707082746, + "grad_norm": 474.3525085449219, + "learning_rate": 6.53512214985746e-06, + "loss": 22.7389, + "step": 230300 + }, + { + "epoch": 0.4652407713409584, + "grad_norm": 334.4240417480469, + "learning_rate": 6.534789939576672e-06, + "loss": 10.9933, + "step": 230310 + }, + { + "epoch": 0.4652609719736422, + "grad_norm": 486.189208984375, + "learning_rate": 6.534457721815502e-06, + "loss": 11.7563, + "step": 230320 + }, + { + "epoch": 0.465281172606326, + "grad_norm": 174.62841796875, + "learning_rate": 6.534125496575573e-06, + "loss": 12.3943, + "step": 230330 + }, + { + "epoch": 0.46530137323900983, + "grad_norm": 411.12823486328125, + "learning_rate": 6.533793263858501e-06, + "loss": 20.1812, + "step": 230340 + }, + { + "epoch": 0.46532157387169365, + "grad_norm": 295.273193359375, + "learning_rate": 6.533461023665907e-06, + "loss": 32.0141, + "step": 230350 + }, + { + "epoch": 0.46534177450437747, + "grad_norm": 161.42010498046875, + "learning_rate": 6.533128775999411e-06, + "loss": 22.363, + "step": 230360 + }, + { + "epoch": 0.4653619751370613, + "grad_norm": 328.110107421875, + "learning_rate": 6.532796520860629e-06, + "loss": 15.8858, + "step": 230370 + }, + { + "epoch": 0.4653821757697451, + "grad_norm": 536.037353515625, + "learning_rate": 6.532464258251185e-06, + "loss": 13.6617, + "step": 230380 + }, + { + "epoch": 0.46540237640242893, + "grad_norm": 361.0971374511719, + "learning_rate": 6.532131988172695e-06, + "loss": 10.4187, + "step": 230390 + }, + { + "epoch": 0.46542257703511275, + "grad_norm": 623.8751220703125, + "learning_rate": 6.53179971062678e-06, + "loss": 18.2029, + "step": 230400 + }, + { + "epoch": 0.46544277766779657, + "grad_norm": 470.6343688964844, + "learning_rate": 6.531467425615059e-06, + "loss": 15.9785, + "step": 230410 + }, + { + "epoch": 0.4654629783004804, + "grad_norm": 180.88169860839844, + "learning_rate": 6.531135133139152e-06, + "loss": 17.0625, + "step": 230420 + }, + { + "epoch": 0.4654831789331642, + "grad_norm": 638.9075317382812, + "learning_rate": 6.530802833200677e-06, + "loss": 26.4864, + "step": 230430 + }, + { + "epoch": 0.46550337956584803, + "grad_norm": 375.173828125, + "learning_rate": 6.530470525801254e-06, + "loss": 22.1161, + "step": 230440 + }, + { + "epoch": 0.4655235801985318, + "grad_norm": 154.70245361328125, + "learning_rate": 6.530138210942505e-06, + "loss": 9.1623, + "step": 230450 + }, + { + "epoch": 0.4655437808312156, + "grad_norm": 100.7583999633789, + "learning_rate": 6.529805888626046e-06, + "loss": 15.7135, + "step": 230460 + }, + { + "epoch": 0.46556398146389943, + "grad_norm": 315.9190368652344, + "learning_rate": 6.529473558853501e-06, + "loss": 23.7646, + "step": 230470 + }, + { + "epoch": 0.46558418209658325, + "grad_norm": 69.15559387207031, + "learning_rate": 6.529141221626485e-06, + "loss": 13.7641, + "step": 230480 + }, + { + "epoch": 0.4656043827292671, + "grad_norm": 472.7928161621094, + "learning_rate": 6.528808876946622e-06, + "loss": 10.4498, + "step": 230490 + }, + { + "epoch": 0.4656245833619509, + "grad_norm": 363.53656005859375, + "learning_rate": 6.5284765248155295e-06, + "loss": 18.3428, + "step": 230500 + }, + { + "epoch": 0.4656447839946347, + "grad_norm": 180.32015991210938, + "learning_rate": 6.5281441652348266e-06, + "loss": 12.5083, + "step": 230510 + }, + { + "epoch": 0.46566498462731853, + "grad_norm": 503.92083740234375, + "learning_rate": 6.527811798206136e-06, + "loss": 20.311, + "step": 230520 + }, + { + "epoch": 0.46568518526000235, + "grad_norm": 516.410888671875, + "learning_rate": 6.527479423731074e-06, + "loss": 14.5096, + "step": 230530 + }, + { + "epoch": 0.4657053858926862, + "grad_norm": 426.80767822265625, + "learning_rate": 6.527147041811266e-06, + "loss": 22.1971, + "step": 230540 + }, + { + "epoch": 0.46572558652537, + "grad_norm": 491.8183288574219, + "learning_rate": 6.526814652448325e-06, + "loss": 17.7891, + "step": 230550 + }, + { + "epoch": 0.4657457871580538, + "grad_norm": 562.8279418945312, + "learning_rate": 6.526482255643877e-06, + "loss": 23.9458, + "step": 230560 + }, + { + "epoch": 0.4657659877907376, + "grad_norm": 51.70831298828125, + "learning_rate": 6.526149851399538e-06, + "loss": 16.1752, + "step": 230570 + }, + { + "epoch": 0.4657861884234214, + "grad_norm": 501.7652587890625, + "learning_rate": 6.525817439716932e-06, + "loss": 15.4438, + "step": 230580 + }, + { + "epoch": 0.4658063890561052, + "grad_norm": 321.99163818359375, + "learning_rate": 6.525485020597675e-06, + "loss": 10.7271, + "step": 230590 + }, + { + "epoch": 0.46582658968878904, + "grad_norm": 174.6129150390625, + "learning_rate": 6.525152594043389e-06, + "loss": 15.1069, + "step": 230600 + }, + { + "epoch": 0.46584679032147286, + "grad_norm": 311.6827697753906, + "learning_rate": 6.524820160055694e-06, + "loss": 22.2453, + "step": 230610 + }, + { + "epoch": 0.4658669909541567, + "grad_norm": 23.80679702758789, + "learning_rate": 6.5244877186362095e-06, + "loss": 19.2407, + "step": 230620 + }, + { + "epoch": 0.4658871915868405, + "grad_norm": 463.9993896484375, + "learning_rate": 6.52415526978656e-06, + "loss": 31.6258, + "step": 230630 + }, + { + "epoch": 0.4659073922195243, + "grad_norm": 226.607421875, + "learning_rate": 6.523822813508359e-06, + "loss": 19.5136, + "step": 230640 + }, + { + "epoch": 0.46592759285220814, + "grad_norm": 212.09646606445312, + "learning_rate": 6.5234903498032345e-06, + "loss": 16.432, + "step": 230650 + }, + { + "epoch": 0.46594779348489196, + "grad_norm": 125.5946273803711, + "learning_rate": 6.523157878672799e-06, + "loss": 17.5256, + "step": 230660 + }, + { + "epoch": 0.4659679941175758, + "grad_norm": 484.95208740234375, + "learning_rate": 6.522825400118679e-06, + "loss": 26.0193, + "step": 230670 + }, + { + "epoch": 0.4659881947502596, + "grad_norm": 571.1227416992188, + "learning_rate": 6.5224929141424906e-06, + "loss": 23.7222, + "step": 230680 + }, + { + "epoch": 0.4660083953829434, + "grad_norm": 333.61602783203125, + "learning_rate": 6.522160420745857e-06, + "loss": 23.6479, + "step": 230690 + }, + { + "epoch": 0.4660285960156272, + "grad_norm": 619.595458984375, + "learning_rate": 6.5218279199304014e-06, + "loss": 21.6691, + "step": 230700 + }, + { + "epoch": 0.466048796648311, + "grad_norm": 124.25101470947266, + "learning_rate": 6.521495411697738e-06, + "loss": 16.7347, + "step": 230710 + }, + { + "epoch": 0.4660689972809948, + "grad_norm": 165.28903198242188, + "learning_rate": 6.521162896049491e-06, + "loss": 17.5535, + "step": 230720 + }, + { + "epoch": 0.46608919791367864, + "grad_norm": 380.7362060546875, + "learning_rate": 6.52083037298728e-06, + "loss": 15.7127, + "step": 230730 + }, + { + "epoch": 0.46610939854636246, + "grad_norm": 42.50484085083008, + "learning_rate": 6.520497842512728e-06, + "loss": 9.2298, + "step": 230740 + }, + { + "epoch": 0.4661295991790463, + "grad_norm": 203.9617919921875, + "learning_rate": 6.520165304627452e-06, + "loss": 22.1434, + "step": 230750 + }, + { + "epoch": 0.4661497998117301, + "grad_norm": 470.6728515625, + "learning_rate": 6.519832759333076e-06, + "loss": 29.8831, + "step": 230760 + }, + { + "epoch": 0.4661700004444139, + "grad_norm": 306.70489501953125, + "learning_rate": 6.519500206631218e-06, + "loss": 31.5376, + "step": 230770 + }, + { + "epoch": 0.46619020107709774, + "grad_norm": 368.716064453125, + "learning_rate": 6.519167646523501e-06, + "loss": 9.9128, + "step": 230780 + }, + { + "epoch": 0.46621040170978156, + "grad_norm": 586.248779296875, + "learning_rate": 6.518835079011548e-06, + "loss": 17.9391, + "step": 230790 + }, + { + "epoch": 0.4662306023424654, + "grad_norm": 96.07538604736328, + "learning_rate": 6.518502504096972e-06, + "loss": 24.7334, + "step": 230800 + }, + { + "epoch": 0.4662508029751492, + "grad_norm": 231.39561462402344, + "learning_rate": 6.5181699217814025e-06, + "loss": 18.3506, + "step": 230810 + }, + { + "epoch": 0.466271003607833, + "grad_norm": 397.648193359375, + "learning_rate": 6.517837332066455e-06, + "loss": 25.5665, + "step": 230820 + }, + { + "epoch": 0.4662912042405168, + "grad_norm": 285.528564453125, + "learning_rate": 6.5175047349537535e-06, + "loss": 25.5306, + "step": 230830 + }, + { + "epoch": 0.4663114048732006, + "grad_norm": 320.0257263183594, + "learning_rate": 6.517172130444918e-06, + "loss": 10.6003, + "step": 230840 + }, + { + "epoch": 0.46633160550588443, + "grad_norm": 410.9398498535156, + "learning_rate": 6.516839518541569e-06, + "loss": 21.0209, + "step": 230850 + }, + { + "epoch": 0.46635180613856825, + "grad_norm": 269.03729248046875, + "learning_rate": 6.516506899245329e-06, + "loss": 10.5766, + "step": 230860 + }, + { + "epoch": 0.46637200677125207, + "grad_norm": 585.1954956054688, + "learning_rate": 6.516174272557817e-06, + "loss": 22.4784, + "step": 230870 + }, + { + "epoch": 0.4663922074039359, + "grad_norm": 508.3545837402344, + "learning_rate": 6.515841638480656e-06, + "loss": 13.3804, + "step": 230880 + }, + { + "epoch": 0.4664124080366197, + "grad_norm": 423.1375427246094, + "learning_rate": 6.515508997015467e-06, + "loss": 22.1213, + "step": 230890 + }, + { + "epoch": 0.46643260866930353, + "grad_norm": 372.9940490722656, + "learning_rate": 6.5151763481638705e-06, + "loss": 14.8557, + "step": 230900 + }, + { + "epoch": 0.46645280930198735, + "grad_norm": 250.304931640625, + "learning_rate": 6.5148436919274884e-06, + "loss": 36.5328, + "step": 230910 + }, + { + "epoch": 0.46647300993467117, + "grad_norm": 300.4207763671875, + "learning_rate": 6.514511028307943e-06, + "loss": 13.9809, + "step": 230920 + }, + { + "epoch": 0.466493210567355, + "grad_norm": 238.9508514404297, + "learning_rate": 6.5141783573068525e-06, + "loss": 15.1482, + "step": 230930 + }, + { + "epoch": 0.4665134112000388, + "grad_norm": 311.8534240722656, + "learning_rate": 6.513845678925842e-06, + "loss": 31.2771, + "step": 230940 + }, + { + "epoch": 0.46653361183272263, + "grad_norm": 696.7138061523438, + "learning_rate": 6.5135129931665305e-06, + "loss": 34.5944, + "step": 230950 + }, + { + "epoch": 0.4665538124654064, + "grad_norm": 573.7080688476562, + "learning_rate": 6.5131803000305405e-06, + "loss": 47.3493, + "step": 230960 + }, + { + "epoch": 0.4665740130980902, + "grad_norm": 638.0385131835938, + "learning_rate": 6.512847599519494e-06, + "loss": 16.6461, + "step": 230970 + }, + { + "epoch": 0.46659421373077403, + "grad_norm": 460.6133117675781, + "learning_rate": 6.5125148916350086e-06, + "loss": 15.5097, + "step": 230980 + }, + { + "epoch": 0.46661441436345785, + "grad_norm": 531.8438110351562, + "learning_rate": 6.512182176378713e-06, + "loss": 29.2572, + "step": 230990 + }, + { + "epoch": 0.4666346149961417, + "grad_norm": 670.6133422851562, + "learning_rate": 6.5118494537522235e-06, + "loss": 17.1781, + "step": 231000 + }, + { + "epoch": 0.4666548156288255, + "grad_norm": 283.95538330078125, + "learning_rate": 6.511516723757163e-06, + "loss": 30.5466, + "step": 231010 + }, + { + "epoch": 0.4666750162615093, + "grad_norm": 516.88330078125, + "learning_rate": 6.511183986395153e-06, + "loss": 21.4761, + "step": 231020 + }, + { + "epoch": 0.46669521689419313, + "grad_norm": 562.6792602539062, + "learning_rate": 6.510851241667816e-06, + "loss": 12.9656, + "step": 231030 + }, + { + "epoch": 0.46671541752687695, + "grad_norm": 134.06849670410156, + "learning_rate": 6.510518489576774e-06, + "loss": 30.0356, + "step": 231040 + }, + { + "epoch": 0.4667356181595608, + "grad_norm": 541.930908203125, + "learning_rate": 6.510185730123646e-06, + "loss": 15.1213, + "step": 231050 + }, + { + "epoch": 0.4667558187922446, + "grad_norm": 326.9928894042969, + "learning_rate": 6.509852963310057e-06, + "loss": 19.1856, + "step": 231060 + }, + { + "epoch": 0.4667760194249284, + "grad_norm": 66.58686828613281, + "learning_rate": 6.509520189137628e-06, + "loss": 23.8234, + "step": 231070 + }, + { + "epoch": 0.4667962200576122, + "grad_norm": 116.25985717773438, + "learning_rate": 6.509187407607981e-06, + "loss": 18.7779, + "step": 231080 + }, + { + "epoch": 0.466816420690296, + "grad_norm": 350.497802734375, + "learning_rate": 6.508854618722735e-06, + "loss": 19.0562, + "step": 231090 + }, + { + "epoch": 0.4668366213229798, + "grad_norm": 1122.8433837890625, + "learning_rate": 6.508521822483518e-06, + "loss": 30.5725, + "step": 231100 + }, + { + "epoch": 0.46685682195566364, + "grad_norm": 347.474365234375, + "learning_rate": 6.508189018891948e-06, + "loss": 17.4511, + "step": 231110 + }, + { + "epoch": 0.46687702258834746, + "grad_norm": 139.44105529785156, + "learning_rate": 6.507856207949647e-06, + "loss": 19.9628, + "step": 231120 + }, + { + "epoch": 0.4668972232210313, + "grad_norm": 510.9095153808594, + "learning_rate": 6.507523389658238e-06, + "loss": 9.4397, + "step": 231130 + }, + { + "epoch": 0.4669174238537151, + "grad_norm": 329.23095703125, + "learning_rate": 6.507190564019341e-06, + "loss": 11.1078, + "step": 231140 + }, + { + "epoch": 0.4669376244863989, + "grad_norm": 528.880615234375, + "learning_rate": 6.506857731034582e-06, + "loss": 24.5451, + "step": 231150 + }, + { + "epoch": 0.46695782511908274, + "grad_norm": 266.6522521972656, + "learning_rate": 6.506524890705581e-06, + "loss": 23.1448, + "step": 231160 + }, + { + "epoch": 0.46697802575176656, + "grad_norm": 708.8331298828125, + "learning_rate": 6.50619204303396e-06, + "loss": 18.703, + "step": 231170 + }, + { + "epoch": 0.4669982263844504, + "grad_norm": 283.2425537109375, + "learning_rate": 6.5058591880213414e-06, + "loss": 15.9134, + "step": 231180 + }, + { + "epoch": 0.4670184270171342, + "grad_norm": 66.12435913085938, + "learning_rate": 6.505526325669348e-06, + "loss": 14.2454, + "step": 231190 + }, + { + "epoch": 0.467038627649818, + "grad_norm": 340.502197265625, + "learning_rate": 6.505193455979603e-06, + "loss": 35.1657, + "step": 231200 + }, + { + "epoch": 0.4670588282825018, + "grad_norm": 302.19989013671875, + "learning_rate": 6.504860578953727e-06, + "loss": 20.8116, + "step": 231210 + }, + { + "epoch": 0.4670790289151856, + "grad_norm": 443.4750671386719, + "learning_rate": 6.504527694593342e-06, + "loss": 17.1766, + "step": 231220 + }, + { + "epoch": 0.4670992295478694, + "grad_norm": 376.025390625, + "learning_rate": 6.504194802900072e-06, + "loss": 11.6998, + "step": 231230 + }, + { + "epoch": 0.46711943018055324, + "grad_norm": 254.54635620117188, + "learning_rate": 6.50386190387554e-06, + "loss": 7.7445, + "step": 231240 + }, + { + "epoch": 0.46713963081323706, + "grad_norm": 1528.227783203125, + "learning_rate": 6.503528997521365e-06, + "loss": 27.1577, + "step": 231250 + }, + { + "epoch": 0.4671598314459209, + "grad_norm": 499.67095947265625, + "learning_rate": 6.503196083839175e-06, + "loss": 26.5145, + "step": 231260 + }, + { + "epoch": 0.4671800320786047, + "grad_norm": 544.8344116210938, + "learning_rate": 6.502863162830589e-06, + "loss": 21.0486, + "step": 231270 + }, + { + "epoch": 0.4672002327112885, + "grad_norm": 452.4975891113281, + "learning_rate": 6.502530234497229e-06, + "loss": 15.3538, + "step": 231280 + }, + { + "epoch": 0.46722043334397234, + "grad_norm": 130.9033966064453, + "learning_rate": 6.50219729884072e-06, + "loss": 19.6417, + "step": 231290 + }, + { + "epoch": 0.46724063397665616, + "grad_norm": 264.6180725097656, + "learning_rate": 6.501864355862682e-06, + "loss": 16.1505, + "step": 231300 + }, + { + "epoch": 0.46726083460934, + "grad_norm": 202.18988037109375, + "learning_rate": 6.50153140556474e-06, + "loss": 18.2257, + "step": 231310 + }, + { + "epoch": 0.4672810352420238, + "grad_norm": 120.94511413574219, + "learning_rate": 6.5011984479485165e-06, + "loss": 32.4436, + "step": 231320 + }, + { + "epoch": 0.4673012358747076, + "grad_norm": 350.2729797363281, + "learning_rate": 6.500865483015634e-06, + "loss": 19.1796, + "step": 231330 + }, + { + "epoch": 0.4673214365073914, + "grad_norm": 214.12326049804688, + "learning_rate": 6.5005325107677145e-06, + "loss": 22.4059, + "step": 231340 + }, + { + "epoch": 0.4673416371400752, + "grad_norm": 198.7315673828125, + "learning_rate": 6.500199531206381e-06, + "loss": 14.6877, + "step": 231350 + }, + { + "epoch": 0.467361837772759, + "grad_norm": 354.2320251464844, + "learning_rate": 6.4998665443332575e-06, + "loss": 56.6931, + "step": 231360 + }, + { + "epoch": 0.46738203840544285, + "grad_norm": 292.1263122558594, + "learning_rate": 6.499533550149968e-06, + "loss": 17.7263, + "step": 231370 + }, + { + "epoch": 0.46740223903812667, + "grad_norm": 258.74591064453125, + "learning_rate": 6.499200548658132e-06, + "loss": 20.6822, + "step": 231380 + }, + { + "epoch": 0.4674224396708105, + "grad_norm": 295.50738525390625, + "learning_rate": 6.498867539859375e-06, + "loss": 31.1578, + "step": 231390 + }, + { + "epoch": 0.4674426403034943, + "grad_norm": 316.6698303222656, + "learning_rate": 6.49853452375532e-06, + "loss": 10.2732, + "step": 231400 + }, + { + "epoch": 0.4674628409361781, + "grad_norm": 1588.327880859375, + "learning_rate": 6.498201500347587e-06, + "loss": 33.3242, + "step": 231410 + }, + { + "epoch": 0.46748304156886195, + "grad_norm": 536.5111694335938, + "learning_rate": 6.497868469637805e-06, + "loss": 31.4936, + "step": 231420 + }, + { + "epoch": 0.46750324220154577, + "grad_norm": 499.2315979003906, + "learning_rate": 6.497535431627591e-06, + "loss": 29.7853, + "step": 231430 + }, + { + "epoch": 0.4675234428342296, + "grad_norm": 315.7078552246094, + "learning_rate": 6.497202386318573e-06, + "loss": 27.0389, + "step": 231440 + }, + { + "epoch": 0.4675436434669134, + "grad_norm": 646.50927734375, + "learning_rate": 6.49686933371237e-06, + "loss": 10.0395, + "step": 231450 + }, + { + "epoch": 0.4675638440995972, + "grad_norm": 666.9247436523438, + "learning_rate": 6.496536273810609e-06, + "loss": 23.477, + "step": 231460 + }, + { + "epoch": 0.467584044732281, + "grad_norm": 448.87274169921875, + "learning_rate": 6.496203206614912e-06, + "loss": 17.5754, + "step": 231470 + }, + { + "epoch": 0.4676042453649648, + "grad_norm": 479.738525390625, + "learning_rate": 6.495870132126901e-06, + "loss": 21.3026, + "step": 231480 + }, + { + "epoch": 0.46762444599764863, + "grad_norm": 262.3644104003906, + "learning_rate": 6.495537050348201e-06, + "loss": 17.3864, + "step": 231490 + }, + { + "epoch": 0.46764464663033245, + "grad_norm": 439.9342956542969, + "learning_rate": 6.495203961280434e-06, + "loss": 17.5187, + "step": 231500 + }, + { + "epoch": 0.46766484726301627, + "grad_norm": 542.8916015625, + "learning_rate": 6.494870864925225e-06, + "loss": 16.6481, + "step": 231510 + }, + { + "epoch": 0.4676850478957001, + "grad_norm": 467.0146179199219, + "learning_rate": 6.494537761284197e-06, + "loss": 15.2403, + "step": 231520 + }, + { + "epoch": 0.4677052485283839, + "grad_norm": 363.3900146484375, + "learning_rate": 6.494204650358973e-06, + "loss": 11.0023, + "step": 231530 + }, + { + "epoch": 0.46772544916106773, + "grad_norm": 192.93788146972656, + "learning_rate": 6.493871532151176e-06, + "loss": 26.1454, + "step": 231540 + }, + { + "epoch": 0.46774564979375155, + "grad_norm": 643.8673095703125, + "learning_rate": 6.493538406662429e-06, + "loss": 15.8792, + "step": 231550 + }, + { + "epoch": 0.46776585042643537, + "grad_norm": 546.372314453125, + "learning_rate": 6.493205273894361e-06, + "loss": 17.0198, + "step": 231560 + }, + { + "epoch": 0.4677860510591192, + "grad_norm": 22.62688446044922, + "learning_rate": 6.492872133848588e-06, + "loss": 15.3674, + "step": 231570 + }, + { + "epoch": 0.467806251691803, + "grad_norm": 274.5195007324219, + "learning_rate": 6.49253898652674e-06, + "loss": 22.0984, + "step": 231580 + }, + { + "epoch": 0.46782645232448683, + "grad_norm": 873.9613647460938, + "learning_rate": 6.492205831930435e-06, + "loss": 19.9182, + "step": 231590 + }, + { + "epoch": 0.4678466529571706, + "grad_norm": 208.98648071289062, + "learning_rate": 6.491872670061302e-06, + "loss": 29.9326, + "step": 231600 + }, + { + "epoch": 0.4678668535898544, + "grad_norm": 234.10870361328125, + "learning_rate": 6.491539500920962e-06, + "loss": 20.4067, + "step": 231610 + }, + { + "epoch": 0.46788705422253823, + "grad_norm": 90.78465270996094, + "learning_rate": 6.491206324511039e-06, + "loss": 8.997, + "step": 231620 + }, + { + "epoch": 0.46790725485522205, + "grad_norm": 0.0, + "learning_rate": 6.490873140833158e-06, + "loss": 28.0562, + "step": 231630 + }, + { + "epoch": 0.4679274554879059, + "grad_norm": 563.7119750976562, + "learning_rate": 6.490539949888942e-06, + "loss": 25.4656, + "step": 231640 + }, + { + "epoch": 0.4679476561205897, + "grad_norm": 307.3618469238281, + "learning_rate": 6.490206751680015e-06, + "loss": 19.8245, + "step": 231650 + }, + { + "epoch": 0.4679678567532735, + "grad_norm": 498.5671081542969, + "learning_rate": 6.489873546208e-06, + "loss": 24.3951, + "step": 231660 + }, + { + "epoch": 0.46798805738595733, + "grad_norm": 1016.558349609375, + "learning_rate": 6.489540333474522e-06, + "loss": 31.5882, + "step": 231670 + }, + { + "epoch": 0.46800825801864115, + "grad_norm": 422.6470642089844, + "learning_rate": 6.4892071134812065e-06, + "loss": 21.794, + "step": 231680 + }, + { + "epoch": 0.468028458651325, + "grad_norm": 89.04255676269531, + "learning_rate": 6.4888738862296765e-06, + "loss": 16.5311, + "step": 231690 + }, + { + "epoch": 0.4680486592840088, + "grad_norm": 9.757550239562988, + "learning_rate": 6.4885406517215535e-06, + "loss": 17.0194, + "step": 231700 + }, + { + "epoch": 0.4680688599166926, + "grad_norm": 326.01116943359375, + "learning_rate": 6.488207409958466e-06, + "loss": 16.9723, + "step": 231710 + }, + { + "epoch": 0.4680890605493764, + "grad_norm": 95.29586791992188, + "learning_rate": 6.487874160942035e-06, + "loss": 30.1869, + "step": 231720 + }, + { + "epoch": 0.4681092611820602, + "grad_norm": 422.89642333984375, + "learning_rate": 6.487540904673886e-06, + "loss": 21.0942, + "step": 231730 + }, + { + "epoch": 0.468129461814744, + "grad_norm": 326.724609375, + "learning_rate": 6.4872076411556436e-06, + "loss": 12.9984, + "step": 231740 + }, + { + "epoch": 0.46814966244742784, + "grad_norm": 118.9988784790039, + "learning_rate": 6.48687437038893e-06, + "loss": 14.6035, + "step": 231750 + }, + { + "epoch": 0.46816986308011166, + "grad_norm": 640.1672973632812, + "learning_rate": 6.486541092375374e-06, + "loss": 14.3454, + "step": 231760 + }, + { + "epoch": 0.4681900637127955, + "grad_norm": 1.1657824516296387, + "learning_rate": 6.486207807116593e-06, + "loss": 20.8807, + "step": 231770 + }, + { + "epoch": 0.4682102643454793, + "grad_norm": 1463.0650634765625, + "learning_rate": 6.485874514614219e-06, + "loss": 25.1981, + "step": 231780 + }, + { + "epoch": 0.4682304649781631, + "grad_norm": 79.5938949584961, + "learning_rate": 6.4855412148698704e-06, + "loss": 9.8355, + "step": 231790 + }, + { + "epoch": 0.46825066561084694, + "grad_norm": 933.3094482421875, + "learning_rate": 6.485207907885175e-06, + "loss": 27.1165, + "step": 231800 + }, + { + "epoch": 0.46827086624353076, + "grad_norm": 319.1587219238281, + "learning_rate": 6.484874593661757e-06, + "loss": 19.2754, + "step": 231810 + }, + { + "epoch": 0.4682910668762146, + "grad_norm": 182.3033447265625, + "learning_rate": 6.484541272201239e-06, + "loss": 28.2243, + "step": 231820 + }, + { + "epoch": 0.4683112675088984, + "grad_norm": 468.4356689453125, + "learning_rate": 6.484207943505249e-06, + "loss": 17.5454, + "step": 231830 + }, + { + "epoch": 0.4683314681415822, + "grad_norm": 511.0302734375, + "learning_rate": 6.483874607575407e-06, + "loss": 17.7681, + "step": 231840 + }, + { + "epoch": 0.468351668774266, + "grad_norm": 397.6280517578125, + "learning_rate": 6.483541264413342e-06, + "loss": 20.0303, + "step": 231850 + }, + { + "epoch": 0.4683718694069498, + "grad_norm": 184.28292846679688, + "learning_rate": 6.483207914020675e-06, + "loss": 20.3128, + "step": 231860 + }, + { + "epoch": 0.4683920700396336, + "grad_norm": 248.45782470703125, + "learning_rate": 6.4828745563990345e-06, + "loss": 25.6381, + "step": 231870 + }, + { + "epoch": 0.46841227067231744, + "grad_norm": 478.2823791503906, + "learning_rate": 6.482541191550042e-06, + "loss": 30.6648, + "step": 231880 + }, + { + "epoch": 0.46843247130500126, + "grad_norm": 233.37548828125, + "learning_rate": 6.482207819475324e-06, + "loss": 11.1748, + "step": 231890 + }, + { + "epoch": 0.4684526719376851, + "grad_norm": 274.9101257324219, + "learning_rate": 6.481874440176506e-06, + "loss": 10.3556, + "step": 231900 + }, + { + "epoch": 0.4684728725703689, + "grad_norm": 147.09332275390625, + "learning_rate": 6.481541053655209e-06, + "loss": 30.1691, + "step": 231910 + }, + { + "epoch": 0.4684930732030527, + "grad_norm": 805.603515625, + "learning_rate": 6.481207659913062e-06, + "loss": 24.9236, + "step": 231920 + }, + { + "epoch": 0.46851327383573654, + "grad_norm": 211.09478759765625, + "learning_rate": 6.4808742589516895e-06, + "loss": 35.2855, + "step": 231930 + }, + { + "epoch": 0.46853347446842036, + "grad_norm": 349.984130859375, + "learning_rate": 6.480540850772714e-06, + "loss": 21.1277, + "step": 231940 + }, + { + "epoch": 0.4685536751011042, + "grad_norm": 667.3995361328125, + "learning_rate": 6.4802074353777625e-06, + "loss": 12.8221, + "step": 231950 + }, + { + "epoch": 0.468573875733788, + "grad_norm": 82.84886932373047, + "learning_rate": 6.479874012768459e-06, + "loss": 23.2999, + "step": 231960 + }, + { + "epoch": 0.4685940763664718, + "grad_norm": 388.8245544433594, + "learning_rate": 6.479540582946431e-06, + "loss": 24.1587, + "step": 231970 + }, + { + "epoch": 0.4686142769991556, + "grad_norm": 190.8773956298828, + "learning_rate": 6.4792071459133e-06, + "loss": 14.9611, + "step": 231980 + }, + { + "epoch": 0.4686344776318394, + "grad_norm": 443.36151123046875, + "learning_rate": 6.478873701670693e-06, + "loss": 36.3784, + "step": 231990 + }, + { + "epoch": 0.46865467826452323, + "grad_norm": 67.5339584350586, + "learning_rate": 6.4785402502202345e-06, + "loss": 22.0931, + "step": 232000 + }, + { + "epoch": 0.46867487889720705, + "grad_norm": 460.312744140625, + "learning_rate": 6.47820679156355e-06, + "loss": 21.1524, + "step": 232010 + }, + { + "epoch": 0.46869507952989087, + "grad_norm": 754.8125, + "learning_rate": 6.477873325702265e-06, + "loss": 13.3966, + "step": 232020 + }, + { + "epoch": 0.4687152801625747, + "grad_norm": 234.09780883789062, + "learning_rate": 6.477539852638006e-06, + "loss": 10.7828, + "step": 232030 + }, + { + "epoch": 0.4687354807952585, + "grad_norm": 301.2977600097656, + "learning_rate": 6.477206372372396e-06, + "loss": 10.8494, + "step": 232040 + }, + { + "epoch": 0.46875568142794233, + "grad_norm": 1041.564208984375, + "learning_rate": 6.476872884907061e-06, + "loss": 32.9868, + "step": 232050 + }, + { + "epoch": 0.46877588206062615, + "grad_norm": 717.1864013671875, + "learning_rate": 6.476539390243627e-06, + "loss": 38.6186, + "step": 232060 + }, + { + "epoch": 0.46879608269330997, + "grad_norm": 424.36114501953125, + "learning_rate": 6.47620588838372e-06, + "loss": 14.4405, + "step": 232070 + }, + { + "epoch": 0.4688162833259938, + "grad_norm": 99.09552764892578, + "learning_rate": 6.475872379328964e-06, + "loss": 21.134, + "step": 232080 + }, + { + "epoch": 0.4688364839586776, + "grad_norm": 40.13679504394531, + "learning_rate": 6.475538863080985e-06, + "loss": 34.1756, + "step": 232090 + }, + { + "epoch": 0.46885668459136143, + "grad_norm": 166.4034881591797, + "learning_rate": 6.4752053396414075e-06, + "loss": 10.8068, + "step": 232100 + }, + { + "epoch": 0.4688768852240452, + "grad_norm": 345.4293518066406, + "learning_rate": 6.474871809011858e-06, + "loss": 21.3451, + "step": 232110 + }, + { + "epoch": 0.468897085856729, + "grad_norm": 188.2816619873047, + "learning_rate": 6.474538271193963e-06, + "loss": 25.8371, + "step": 232120 + }, + { + "epoch": 0.46891728648941283, + "grad_norm": 328.0193786621094, + "learning_rate": 6.4742047261893485e-06, + "loss": 11.4861, + "step": 232130 + }, + { + "epoch": 0.46893748712209665, + "grad_norm": 221.19754028320312, + "learning_rate": 6.473871173999637e-06, + "loss": 17.6138, + "step": 232140 + }, + { + "epoch": 0.4689576877547805, + "grad_norm": 13.92487907409668, + "learning_rate": 6.473537614626457e-06, + "loss": 13.3422, + "step": 232150 + }, + { + "epoch": 0.4689778883874643, + "grad_norm": 3.4051811695098877, + "learning_rate": 6.473204048071433e-06, + "loss": 25.9466, + "step": 232160 + }, + { + "epoch": 0.4689980890201481, + "grad_norm": 426.1505126953125, + "learning_rate": 6.472870474336192e-06, + "loss": 21.1222, + "step": 232170 + }, + { + "epoch": 0.46901828965283193, + "grad_norm": 475.679931640625, + "learning_rate": 6.472536893422359e-06, + "loss": 29.9708, + "step": 232180 + }, + { + "epoch": 0.46903849028551575, + "grad_norm": 247.21754455566406, + "learning_rate": 6.472203305331559e-06, + "loss": 16.6034, + "step": 232190 + }, + { + "epoch": 0.4690586909181996, + "grad_norm": 332.2191467285156, + "learning_rate": 6.471869710065418e-06, + "loss": 21.3127, + "step": 232200 + }, + { + "epoch": 0.4690788915508834, + "grad_norm": 336.51300048828125, + "learning_rate": 6.471536107625565e-06, + "loss": 23.8785, + "step": 232210 + }, + { + "epoch": 0.4690990921835672, + "grad_norm": 659.5236206054688, + "learning_rate": 6.4712024980136215e-06, + "loss": 35.8184, + "step": 232220 + }, + { + "epoch": 0.46911929281625103, + "grad_norm": 129.71156311035156, + "learning_rate": 6.470868881231215e-06, + "loss": 14.7575, + "step": 232230 + }, + { + "epoch": 0.4691394934489348, + "grad_norm": 256.0760803222656, + "learning_rate": 6.470535257279974e-06, + "loss": 28.4628, + "step": 232240 + }, + { + "epoch": 0.4691596940816186, + "grad_norm": 171.26637268066406, + "learning_rate": 6.47020162616152e-06, + "loss": 14.8522, + "step": 232250 + }, + { + "epoch": 0.46917989471430244, + "grad_norm": 358.0352783203125, + "learning_rate": 6.469867987877484e-06, + "loss": 18.9665, + "step": 232260 + }, + { + "epoch": 0.46920009534698626, + "grad_norm": 452.86004638671875, + "learning_rate": 6.469534342429489e-06, + "loss": 20.4396, + "step": 232270 + }, + { + "epoch": 0.4692202959796701, + "grad_norm": 326.33551025390625, + "learning_rate": 6.469200689819161e-06, + "loss": 15.69, + "step": 232280 + }, + { + "epoch": 0.4692404966123539, + "grad_norm": 127.25655364990234, + "learning_rate": 6.468867030048127e-06, + "loss": 17.9054, + "step": 232290 + }, + { + "epoch": 0.4692606972450377, + "grad_norm": 549.651611328125, + "learning_rate": 6.4685333631180145e-06, + "loss": 31.6002, + "step": 232300 + }, + { + "epoch": 0.46928089787772154, + "grad_norm": 684.8788452148438, + "learning_rate": 6.468199689030448e-06, + "loss": 20.9694, + "step": 232310 + }, + { + "epoch": 0.46930109851040536, + "grad_norm": 249.0142364501953, + "learning_rate": 6.467866007787053e-06, + "loss": 17.6288, + "step": 232320 + }, + { + "epoch": 0.4693212991430892, + "grad_norm": 173.32032775878906, + "learning_rate": 6.467532319389458e-06, + "loss": 23.8902, + "step": 232330 + }, + { + "epoch": 0.469341499775773, + "grad_norm": 417.7165832519531, + "learning_rate": 6.467198623839288e-06, + "loss": 16.1906, + "step": 232340 + }, + { + "epoch": 0.4693617004084568, + "grad_norm": 436.4908447265625, + "learning_rate": 6.466864921138171e-06, + "loss": 29.664, + "step": 232350 + }, + { + "epoch": 0.4693819010411406, + "grad_norm": 123.5679931640625, + "learning_rate": 6.4665312112877325e-06, + "loss": 14.6397, + "step": 232360 + }, + { + "epoch": 0.4694021016738244, + "grad_norm": 414.0674743652344, + "learning_rate": 6.466197494289598e-06, + "loss": 24.4675, + "step": 232370 + }, + { + "epoch": 0.4694223023065082, + "grad_norm": 621.0228881835938, + "learning_rate": 6.465863770145394e-06, + "loss": 22.5904, + "step": 232380 + }, + { + "epoch": 0.46944250293919204, + "grad_norm": 57.8876838684082, + "learning_rate": 6.4655300388567485e-06, + "loss": 13.7389, + "step": 232390 + }, + { + "epoch": 0.46946270357187586, + "grad_norm": 732.6996459960938, + "learning_rate": 6.465196300425287e-06, + "loss": 30.7162, + "step": 232400 + }, + { + "epoch": 0.4694829042045597, + "grad_norm": 599.5005493164062, + "learning_rate": 6.464862554852638e-06, + "loss": 31.595, + "step": 232410 + }, + { + "epoch": 0.4695031048372435, + "grad_norm": 547.4244995117188, + "learning_rate": 6.464528802140425e-06, + "loss": 17.4058, + "step": 232420 + }, + { + "epoch": 0.4695233054699273, + "grad_norm": 567.2337646484375, + "learning_rate": 6.464195042290277e-06, + "loss": 17.078, + "step": 232430 + }, + { + "epoch": 0.46954350610261114, + "grad_norm": 141.6189422607422, + "learning_rate": 6.463861275303819e-06, + "loss": 14.2407, + "step": 232440 + }, + { + "epoch": 0.46956370673529496, + "grad_norm": 1400.07861328125, + "learning_rate": 6.463527501182679e-06, + "loss": 31.0269, + "step": 232450 + }, + { + "epoch": 0.4695839073679788, + "grad_norm": 473.0020751953125, + "learning_rate": 6.4631937199284845e-06, + "loss": 12.3634, + "step": 232460 + }, + { + "epoch": 0.4696041080006626, + "grad_norm": 1.2929986715316772, + "learning_rate": 6.46285993154286e-06, + "loss": 10.6551, + "step": 232470 + }, + { + "epoch": 0.4696243086333464, + "grad_norm": 431.5660400390625, + "learning_rate": 6.462526136027435e-06, + "loss": 17.7755, + "step": 232480 + }, + { + "epoch": 0.4696445092660302, + "grad_norm": 192.18548583984375, + "learning_rate": 6.462192333383834e-06, + "loss": 13.9088, + "step": 232490 + }, + { + "epoch": 0.469664709898714, + "grad_norm": 379.816650390625, + "learning_rate": 6.461858523613684e-06, + "loss": 34.9893, + "step": 232500 + }, + { + "epoch": 0.4696849105313978, + "grad_norm": 66.27000427246094, + "learning_rate": 6.461524706718615e-06, + "loss": 12.5309, + "step": 232510 + }, + { + "epoch": 0.46970511116408165, + "grad_norm": 12.445015907287598, + "learning_rate": 6.4611908827002504e-06, + "loss": 26.512, + "step": 232520 + }, + { + "epoch": 0.46972531179676547, + "grad_norm": 149.20140075683594, + "learning_rate": 6.460857051560219e-06, + "loss": 13.1563, + "step": 232530 + }, + { + "epoch": 0.4697455124294493, + "grad_norm": 261.16400146484375, + "learning_rate": 6.4605232133001474e-06, + "loss": 17.0952, + "step": 232540 + }, + { + "epoch": 0.4697657130621331, + "grad_norm": 249.5340118408203, + "learning_rate": 6.460189367921663e-06, + "loss": 14.9214, + "step": 232550 + }, + { + "epoch": 0.4697859136948169, + "grad_norm": 364.1491394042969, + "learning_rate": 6.459855515426392e-06, + "loss": 28.4356, + "step": 232560 + }, + { + "epoch": 0.46980611432750075, + "grad_norm": 661.8367919921875, + "learning_rate": 6.459521655815962e-06, + "loss": 21.1061, + "step": 232570 + }, + { + "epoch": 0.46982631496018457, + "grad_norm": 31.491783142089844, + "learning_rate": 6.459187789092003e-06, + "loss": 17.2922, + "step": 232580 + }, + { + "epoch": 0.4698465155928684, + "grad_norm": 816.8544921875, + "learning_rate": 6.4588539152561384e-06, + "loss": 29.5527, + "step": 232590 + }, + { + "epoch": 0.4698667162255522, + "grad_norm": 295.40594482421875, + "learning_rate": 6.458520034309995e-06, + "loss": 43.5878, + "step": 232600 + }, + { + "epoch": 0.469886916858236, + "grad_norm": 474.0346374511719, + "learning_rate": 6.458186146255203e-06, + "loss": 34.6073, + "step": 232610 + }, + { + "epoch": 0.4699071174909198, + "grad_norm": 348.7222595214844, + "learning_rate": 6.457852251093391e-06, + "loss": 12.7735, + "step": 232620 + }, + { + "epoch": 0.4699273181236036, + "grad_norm": 727.2521362304688, + "learning_rate": 6.4575183488261795e-06, + "loss": 44.7666, + "step": 232630 + }, + { + "epoch": 0.46994751875628743, + "grad_norm": 194.83692932128906, + "learning_rate": 6.457184439455204e-06, + "loss": 19.4839, + "step": 232640 + }, + { + "epoch": 0.46996771938897125, + "grad_norm": 595.656494140625, + "learning_rate": 6.456850522982086e-06, + "loss": 14.4586, + "step": 232650 + }, + { + "epoch": 0.46998792002165507, + "grad_norm": 530.2792358398438, + "learning_rate": 6.456516599408457e-06, + "loss": 17.6902, + "step": 232660 + }, + { + "epoch": 0.4700081206543389, + "grad_norm": 15.545038223266602, + "learning_rate": 6.456182668735941e-06, + "loss": 20.8662, + "step": 232670 + }, + { + "epoch": 0.4700283212870227, + "grad_norm": 284.81988525390625, + "learning_rate": 6.455848730966168e-06, + "loss": 39.0358, + "step": 232680 + }, + { + "epoch": 0.47004852191970653, + "grad_norm": 609.2344360351562, + "learning_rate": 6.455514786100766e-06, + "loss": 13.1211, + "step": 232690 + }, + { + "epoch": 0.47006872255239035, + "grad_norm": 836.0491943359375, + "learning_rate": 6.455180834141359e-06, + "loss": 20.7911, + "step": 232700 + }, + { + "epoch": 0.47008892318507417, + "grad_norm": 499.107666015625, + "learning_rate": 6.45484687508958e-06, + "loss": 18.5463, + "step": 232710 + }, + { + "epoch": 0.470109123817758, + "grad_norm": 301.69720458984375, + "learning_rate": 6.454512908947052e-06, + "loss": 9.9112, + "step": 232720 + }, + { + "epoch": 0.4701293244504418, + "grad_norm": 328.8058166503906, + "learning_rate": 6.454178935715405e-06, + "loss": 16.6532, + "step": 232730 + }, + { + "epoch": 0.47014952508312563, + "grad_norm": 451.3977966308594, + "learning_rate": 6.453844955396265e-06, + "loss": 20.1564, + "step": 232740 + }, + { + "epoch": 0.4701697257158094, + "grad_norm": 58.86642074584961, + "learning_rate": 6.453510967991263e-06, + "loss": 19.8341, + "step": 232750 + }, + { + "epoch": 0.4701899263484932, + "grad_norm": 240.06796264648438, + "learning_rate": 6.453176973502024e-06, + "loss": 31.4836, + "step": 232760 + }, + { + "epoch": 0.47021012698117703, + "grad_norm": 611.53173828125, + "learning_rate": 6.4528429719301764e-06, + "loss": 17.5353, + "step": 232770 + }, + { + "epoch": 0.47023032761386085, + "grad_norm": 603.2828369140625, + "learning_rate": 6.452508963277348e-06, + "loss": 15.3629, + "step": 232780 + }, + { + "epoch": 0.4702505282465447, + "grad_norm": 93.18273162841797, + "learning_rate": 6.452174947545169e-06, + "loss": 16.2431, + "step": 232790 + }, + { + "epoch": 0.4702707288792285, + "grad_norm": 113.83590698242188, + "learning_rate": 6.451840924735264e-06, + "loss": 27.0131, + "step": 232800 + }, + { + "epoch": 0.4702909295119123, + "grad_norm": 4.318406581878662, + "learning_rate": 6.451506894849262e-06, + "loss": 9.0512, + "step": 232810 + }, + { + "epoch": 0.47031113014459613, + "grad_norm": 69.14533996582031, + "learning_rate": 6.451172857888792e-06, + "loss": 19.8808, + "step": 232820 + }, + { + "epoch": 0.47033133077727995, + "grad_norm": 780.8925170898438, + "learning_rate": 6.450838813855482e-06, + "loss": 22.9376, + "step": 232830 + }, + { + "epoch": 0.4703515314099638, + "grad_norm": 415.5531311035156, + "learning_rate": 6.450504762750959e-06, + "loss": 27.1629, + "step": 232840 + }, + { + "epoch": 0.4703717320426476, + "grad_norm": 351.7154846191406, + "learning_rate": 6.4501707045768524e-06, + "loss": 42.6802, + "step": 232850 + }, + { + "epoch": 0.4703919326753314, + "grad_norm": 242.80615234375, + "learning_rate": 6.449836639334788e-06, + "loss": 20.4225, + "step": 232860 + }, + { + "epoch": 0.47041213330801523, + "grad_norm": 526.6072387695312, + "learning_rate": 6.449502567026398e-06, + "loss": 22.3538, + "step": 232870 + }, + { + "epoch": 0.470432333940699, + "grad_norm": 86.08837127685547, + "learning_rate": 6.449168487653305e-06, + "loss": 22.0289, + "step": 232880 + }, + { + "epoch": 0.4704525345733828, + "grad_norm": 244.63906860351562, + "learning_rate": 6.448834401217143e-06, + "loss": 12.4748, + "step": 232890 + }, + { + "epoch": 0.47047273520606664, + "grad_norm": 541.3038330078125, + "learning_rate": 6.448500307719537e-06, + "loss": 24.2339, + "step": 232900 + }, + { + "epoch": 0.47049293583875046, + "grad_norm": 239.39019775390625, + "learning_rate": 6.448166207162119e-06, + "loss": 15.4841, + "step": 232910 + }, + { + "epoch": 0.4705131364714343, + "grad_norm": 338.6671142578125, + "learning_rate": 6.447832099546512e-06, + "loss": 12.1724, + "step": 232920 + }, + { + "epoch": 0.4705333371041181, + "grad_norm": 416.7694091796875, + "learning_rate": 6.4474979848743455e-06, + "loss": 16.8376, + "step": 232930 + }, + { + "epoch": 0.4705535377368019, + "grad_norm": 503.6643371582031, + "learning_rate": 6.447163863147251e-06, + "loss": 27.0924, + "step": 232940 + }, + { + "epoch": 0.47057373836948574, + "grad_norm": 351.7838439941406, + "learning_rate": 6.446829734366855e-06, + "loss": 30.1301, + "step": 232950 + }, + { + "epoch": 0.47059393900216956, + "grad_norm": 394.0791931152344, + "learning_rate": 6.446495598534787e-06, + "loss": 32.1762, + "step": 232960 + }, + { + "epoch": 0.4706141396348534, + "grad_norm": 664.0892333984375, + "learning_rate": 6.446161455652674e-06, + "loss": 21.3008, + "step": 232970 + }, + { + "epoch": 0.4706343402675372, + "grad_norm": 419.5210876464844, + "learning_rate": 6.445827305722148e-06, + "loss": 8.2237, + "step": 232980 + }, + { + "epoch": 0.470654540900221, + "grad_norm": 240.32484436035156, + "learning_rate": 6.445493148744832e-06, + "loss": 17.5572, + "step": 232990 + }, + { + "epoch": 0.4706747415329048, + "grad_norm": 127.21864318847656, + "learning_rate": 6.445158984722358e-06, + "loss": 13.9824, + "step": 233000 + }, + { + "epoch": 0.4706949421655886, + "grad_norm": 535.8229370117188, + "learning_rate": 6.444824813656356e-06, + "loss": 24.5616, + "step": 233010 + }, + { + "epoch": 0.4707151427982724, + "grad_norm": 567.6218872070312, + "learning_rate": 6.444490635548451e-06, + "loss": 16.5069, + "step": 233020 + }, + { + "epoch": 0.47073534343095624, + "grad_norm": 982.9053955078125, + "learning_rate": 6.444156450400276e-06, + "loss": 11.4735, + "step": 233030 + }, + { + "epoch": 0.47075554406364006, + "grad_norm": 616.2341918945312, + "learning_rate": 6.443822258213457e-06, + "loss": 21.5657, + "step": 233040 + }, + { + "epoch": 0.4707757446963239, + "grad_norm": 132.17913818359375, + "learning_rate": 6.443488058989623e-06, + "loss": 16.1341, + "step": 233050 + }, + { + "epoch": 0.4707959453290077, + "grad_norm": 271.3033447265625, + "learning_rate": 6.443153852730404e-06, + "loss": 20.707, + "step": 233060 + }, + { + "epoch": 0.4708161459616915, + "grad_norm": 282.2535095214844, + "learning_rate": 6.4428196394374275e-06, + "loss": 23.6165, + "step": 233070 + }, + { + "epoch": 0.47083634659437534, + "grad_norm": 43.08664321899414, + "learning_rate": 6.442485419112322e-06, + "loss": 16.8165, + "step": 233080 + }, + { + "epoch": 0.47085654722705916, + "grad_norm": 310.480224609375, + "learning_rate": 6.44215119175672e-06, + "loss": 31.9848, + "step": 233090 + }, + { + "epoch": 0.470876747859743, + "grad_norm": 231.5115966796875, + "learning_rate": 6.441816957372247e-06, + "loss": 21.1428, + "step": 233100 + }, + { + "epoch": 0.4708969484924268, + "grad_norm": 263.9593811035156, + "learning_rate": 6.441482715960532e-06, + "loss": 13.3092, + "step": 233110 + }, + { + "epoch": 0.4709171491251106, + "grad_norm": 534.0245971679688, + "learning_rate": 6.441148467523206e-06, + "loss": 21.6064, + "step": 233120 + }, + { + "epoch": 0.4709373497577944, + "grad_norm": 38.840782165527344, + "learning_rate": 6.440814212061897e-06, + "loss": 62.0073, + "step": 233130 + }, + { + "epoch": 0.4709575503904782, + "grad_norm": 383.1385803222656, + "learning_rate": 6.440479949578234e-06, + "loss": 16.9425, + "step": 233140 + }, + { + "epoch": 0.47097775102316203, + "grad_norm": 328.57696533203125, + "learning_rate": 6.440145680073847e-06, + "loss": 14.7871, + "step": 233150 + }, + { + "epoch": 0.47099795165584585, + "grad_norm": 221.1512908935547, + "learning_rate": 6.4398114035503644e-06, + "loss": 14.7821, + "step": 233160 + }, + { + "epoch": 0.47101815228852967, + "grad_norm": 716.730224609375, + "learning_rate": 6.4394771200094156e-06, + "loss": 19.8269, + "step": 233170 + }, + { + "epoch": 0.4710383529212135, + "grad_norm": 356.3135681152344, + "learning_rate": 6.439142829452629e-06, + "loss": 21.0467, + "step": 233180 + }, + { + "epoch": 0.4710585535538973, + "grad_norm": 279.4208068847656, + "learning_rate": 6.438808531881637e-06, + "loss": 24.2735, + "step": 233190 + }, + { + "epoch": 0.47107875418658113, + "grad_norm": 310.1206359863281, + "learning_rate": 6.438474227298065e-06, + "loss": 16.764, + "step": 233200 + }, + { + "epoch": 0.47109895481926495, + "grad_norm": 325.89068603515625, + "learning_rate": 6.438139915703544e-06, + "loss": 19.6593, + "step": 233210 + }, + { + "epoch": 0.47111915545194877, + "grad_norm": 230.9476776123047, + "learning_rate": 6.437805597099704e-06, + "loss": 40.1894, + "step": 233220 + }, + { + "epoch": 0.4711393560846326, + "grad_norm": 389.3857116699219, + "learning_rate": 6.437471271488174e-06, + "loss": 10.1524, + "step": 233230 + }, + { + "epoch": 0.4711595567173164, + "grad_norm": 348.37701416015625, + "learning_rate": 6.437136938870583e-06, + "loss": 19.6702, + "step": 233240 + }, + { + "epoch": 0.47117975735000023, + "grad_norm": 791.6915283203125, + "learning_rate": 6.4368025992485615e-06, + "loss": 19.8193, + "step": 233250 + }, + { + "epoch": 0.471199957982684, + "grad_norm": 0.0, + "learning_rate": 6.436468252623738e-06, + "loss": 10.4314, + "step": 233260 + }, + { + "epoch": 0.4712201586153678, + "grad_norm": 883.5969848632812, + "learning_rate": 6.436133898997742e-06, + "loss": 33.7937, + "step": 233270 + }, + { + "epoch": 0.47124035924805163, + "grad_norm": 662.7801513671875, + "learning_rate": 6.4357995383722025e-06, + "loss": 12.4613, + "step": 233280 + }, + { + "epoch": 0.47126055988073545, + "grad_norm": 454.23809814453125, + "learning_rate": 6.435465170748753e-06, + "loss": 10.3335, + "step": 233290 + }, + { + "epoch": 0.4712807605134193, + "grad_norm": 128.9053497314453, + "learning_rate": 6.435130796129019e-06, + "loss": 11.611, + "step": 233300 + }, + { + "epoch": 0.4713009611461031, + "grad_norm": 222.30667114257812, + "learning_rate": 6.43479641451463e-06, + "loss": 24.3128, + "step": 233310 + }, + { + "epoch": 0.4713211617787869, + "grad_norm": 512.7349243164062, + "learning_rate": 6.43446202590722e-06, + "loss": 15.7066, + "step": 233320 + }, + { + "epoch": 0.47134136241147073, + "grad_norm": 1404.3638916015625, + "learning_rate": 6.434127630308415e-06, + "loss": 34.325, + "step": 233330 + }, + { + "epoch": 0.47136156304415455, + "grad_norm": 303.31195068359375, + "learning_rate": 6.433793227719845e-06, + "loss": 23.5098, + "step": 233340 + }, + { + "epoch": 0.4713817636768384, + "grad_norm": 454.6119079589844, + "learning_rate": 6.4334588181431424e-06, + "loss": 22.5974, + "step": 233350 + }, + { + "epoch": 0.4714019643095222, + "grad_norm": 658.8758544921875, + "learning_rate": 6.433124401579936e-06, + "loss": 10.9955, + "step": 233360 + }, + { + "epoch": 0.471422164942206, + "grad_norm": 393.7095947265625, + "learning_rate": 6.432789978031852e-06, + "loss": 13.9028, + "step": 233370 + }, + { + "epoch": 0.47144236557488983, + "grad_norm": 268.5219421386719, + "learning_rate": 6.432455547500525e-06, + "loss": 12.0937, + "step": 233380 + }, + { + "epoch": 0.4714625662075736, + "grad_norm": 206.73312377929688, + "learning_rate": 6.432121109987584e-06, + "loss": 38.5037, + "step": 233390 + }, + { + "epoch": 0.4714827668402574, + "grad_norm": 417.64654541015625, + "learning_rate": 6.431786665494657e-06, + "loss": 12.2168, + "step": 233400 + }, + { + "epoch": 0.47150296747294124, + "grad_norm": 756.30615234375, + "learning_rate": 6.431452214023377e-06, + "loss": 19.4205, + "step": 233410 + }, + { + "epoch": 0.47152316810562506, + "grad_norm": 21.44666862487793, + "learning_rate": 6.431117755575371e-06, + "loss": 14.3705, + "step": 233420 + }, + { + "epoch": 0.4715433687383089, + "grad_norm": 223.37791442871094, + "learning_rate": 6.430783290152272e-06, + "loss": 18.1995, + "step": 233430 + }, + { + "epoch": 0.4715635693709927, + "grad_norm": 299.15960693359375, + "learning_rate": 6.430448817755708e-06, + "loss": 13.9114, + "step": 233440 + }, + { + "epoch": 0.4715837700036765, + "grad_norm": 111.82083129882812, + "learning_rate": 6.43011433838731e-06, + "loss": 9.9036, + "step": 233450 + }, + { + "epoch": 0.47160397063636034, + "grad_norm": 395.947998046875, + "learning_rate": 6.429779852048709e-06, + "loss": 16.2541, + "step": 233460 + }, + { + "epoch": 0.47162417126904416, + "grad_norm": 320.6033020019531, + "learning_rate": 6.429445358741533e-06, + "loss": 18.566, + "step": 233470 + }, + { + "epoch": 0.471644371901728, + "grad_norm": 0.041875384747982025, + "learning_rate": 6.429110858467414e-06, + "loss": 12.0097, + "step": 233480 + }, + { + "epoch": 0.4716645725344118, + "grad_norm": 554.3045043945312, + "learning_rate": 6.428776351227984e-06, + "loss": 23.7955, + "step": 233490 + }, + { + "epoch": 0.4716847731670956, + "grad_norm": 627.7864990234375, + "learning_rate": 6.428441837024868e-06, + "loss": 18.7712, + "step": 233500 + }, + { + "epoch": 0.47170497379977944, + "grad_norm": 320.4233703613281, + "learning_rate": 6.428107315859702e-06, + "loss": 21.9909, + "step": 233510 + }, + { + "epoch": 0.4717251744324632, + "grad_norm": 382.1122741699219, + "learning_rate": 6.427772787734114e-06, + "loss": 24.3574, + "step": 233520 + }, + { + "epoch": 0.471745375065147, + "grad_norm": 340.03533935546875, + "learning_rate": 6.4274382526497335e-06, + "loss": 18.4787, + "step": 233530 + }, + { + "epoch": 0.47176557569783084, + "grad_norm": 311.5838928222656, + "learning_rate": 6.427103710608193e-06, + "loss": 20.3582, + "step": 233540 + }, + { + "epoch": 0.47178577633051466, + "grad_norm": 575.9215698242188, + "learning_rate": 6.426769161611122e-06, + "loss": 21.8079, + "step": 233550 + }, + { + "epoch": 0.4718059769631985, + "grad_norm": 239.95668029785156, + "learning_rate": 6.426434605660151e-06, + "loss": 16.3507, + "step": 233560 + }, + { + "epoch": 0.4718261775958823, + "grad_norm": 160.08863830566406, + "learning_rate": 6.426100042756912e-06, + "loss": 20.5992, + "step": 233570 + }, + { + "epoch": 0.4718463782285661, + "grad_norm": 149.10824584960938, + "learning_rate": 6.425765472903032e-06, + "loss": 23.4933, + "step": 233580 + }, + { + "epoch": 0.47186657886124994, + "grad_norm": 652.010986328125, + "learning_rate": 6.425430896100145e-06, + "loss": 23.7501, + "step": 233590 + }, + { + "epoch": 0.47188677949393376, + "grad_norm": 285.45513916015625, + "learning_rate": 6.425096312349881e-06, + "loss": 11.7523, + "step": 233600 + }, + { + "epoch": 0.4719069801266176, + "grad_norm": 456.1563720703125, + "learning_rate": 6.424761721653871e-06, + "loss": 33.2282, + "step": 233610 + }, + { + "epoch": 0.4719271807593014, + "grad_norm": 529.824951171875, + "learning_rate": 6.4244271240137435e-06, + "loss": 20.6466, + "step": 233620 + }, + { + "epoch": 0.4719473813919852, + "grad_norm": 483.4859619140625, + "learning_rate": 6.424092519431132e-06, + "loss": 29.0304, + "step": 233630 + }, + { + "epoch": 0.471967582024669, + "grad_norm": 487.1600341796875, + "learning_rate": 6.423757907907667e-06, + "loss": 13.2332, + "step": 233640 + }, + { + "epoch": 0.4719877826573528, + "grad_norm": 618.8986206054688, + "learning_rate": 6.423423289444978e-06, + "loss": 19.602, + "step": 233650 + }, + { + "epoch": 0.4720079832900366, + "grad_norm": 1082.475830078125, + "learning_rate": 6.423088664044696e-06, + "loss": 24.639, + "step": 233660 + }, + { + "epoch": 0.47202818392272045, + "grad_norm": 0.0, + "learning_rate": 6.422754031708453e-06, + "loss": 17.1816, + "step": 233670 + }, + { + "epoch": 0.47204838455540427, + "grad_norm": 472.9530029296875, + "learning_rate": 6.422419392437879e-06, + "loss": 16.0823, + "step": 233680 + }, + { + "epoch": 0.4720685851880881, + "grad_norm": 11.948860168457031, + "learning_rate": 6.422084746234605e-06, + "loss": 15.01, + "step": 233690 + }, + { + "epoch": 0.4720887858207719, + "grad_norm": 82.46209716796875, + "learning_rate": 6.421750093100264e-06, + "loss": 11.0773, + "step": 233700 + }, + { + "epoch": 0.4721089864534557, + "grad_norm": 339.6632995605469, + "learning_rate": 6.421415433036484e-06, + "loss": 26.4501, + "step": 233710 + }, + { + "epoch": 0.47212918708613955, + "grad_norm": 376.2914123535156, + "learning_rate": 6.421080766044898e-06, + "loss": 28.2944, + "step": 233720 + }, + { + "epoch": 0.47214938771882337, + "grad_norm": 724.7662963867188, + "learning_rate": 6.420746092127138e-06, + "loss": 30.572, + "step": 233730 + }, + { + "epoch": 0.4721695883515072, + "grad_norm": 336.7801208496094, + "learning_rate": 6.420411411284831e-06, + "loss": 24.8382, + "step": 233740 + }, + { + "epoch": 0.472189788984191, + "grad_norm": 318.15106201171875, + "learning_rate": 6.420076723519615e-06, + "loss": 13.2367, + "step": 233750 + }, + { + "epoch": 0.4722099896168748, + "grad_norm": 161.84442138671875, + "learning_rate": 6.419742028833114e-06, + "loss": 18.2227, + "step": 233760 + }, + { + "epoch": 0.4722301902495586, + "grad_norm": 792.44140625, + "learning_rate": 6.419407327226963e-06, + "loss": 24.4637, + "step": 233770 + }, + { + "epoch": 0.4722503908822424, + "grad_norm": 566.54443359375, + "learning_rate": 6.419072618702794e-06, + "loss": 23.9833, + "step": 233780 + }, + { + "epoch": 0.47227059151492623, + "grad_norm": 690.1046142578125, + "learning_rate": 6.4187379032622355e-06, + "loss": 30.394, + "step": 233790 + }, + { + "epoch": 0.47229079214761005, + "grad_norm": 372.13397216796875, + "learning_rate": 6.418403180906923e-06, + "loss": 29.5859, + "step": 233800 + }, + { + "epoch": 0.47231099278029387, + "grad_norm": 73.77745819091797, + "learning_rate": 6.418068451638484e-06, + "loss": 33.5413, + "step": 233810 + }, + { + "epoch": 0.4723311934129777, + "grad_norm": 362.9255676269531, + "learning_rate": 6.4177337154585514e-06, + "loss": 10.622, + "step": 233820 + }, + { + "epoch": 0.4723513940456615, + "grad_norm": 509.4954528808594, + "learning_rate": 6.417398972368756e-06, + "loss": 25.9802, + "step": 233830 + }, + { + "epoch": 0.47237159467834533, + "grad_norm": 346.18218994140625, + "learning_rate": 6.41706422237073e-06, + "loss": 18.6955, + "step": 233840 + }, + { + "epoch": 0.47239179531102915, + "grad_norm": 19.44637107849121, + "learning_rate": 6.416729465466106e-06, + "loss": 48.4805, + "step": 233850 + }, + { + "epoch": 0.47241199594371297, + "grad_norm": 788.625244140625, + "learning_rate": 6.416394701656514e-06, + "loss": 17.8316, + "step": 233860 + }, + { + "epoch": 0.4724321965763968, + "grad_norm": 17.05004119873047, + "learning_rate": 6.416059930943586e-06, + "loss": 21.2076, + "step": 233870 + }, + { + "epoch": 0.4724523972090806, + "grad_norm": 653.5442504882812, + "learning_rate": 6.415725153328953e-06, + "loss": 34.2984, + "step": 233880 + }, + { + "epoch": 0.47247259784176443, + "grad_norm": 792.2357788085938, + "learning_rate": 6.41539036881425e-06, + "loss": 19.9054, + "step": 233890 + }, + { + "epoch": 0.4724927984744482, + "grad_norm": 198.41261291503906, + "learning_rate": 6.415055577401101e-06, + "loss": 23.0116, + "step": 233900 + }, + { + "epoch": 0.472512999107132, + "grad_norm": 1255.3897705078125, + "learning_rate": 6.414720779091147e-06, + "loss": 33.3987, + "step": 233910 + }, + { + "epoch": 0.47253319973981583, + "grad_norm": 705.5669555664062, + "learning_rate": 6.414385973886012e-06, + "loss": 16.4641, + "step": 233920 + }, + { + "epoch": 0.47255340037249965, + "grad_norm": 1978.4207763671875, + "learning_rate": 6.414051161787334e-06, + "loss": 24.5816, + "step": 233930 + }, + { + "epoch": 0.4725736010051835, + "grad_norm": 384.2996826171875, + "learning_rate": 6.4137163427967415e-06, + "loss": 26.6482, + "step": 233940 + }, + { + "epoch": 0.4725938016378673, + "grad_norm": 217.927490234375, + "learning_rate": 6.413381516915868e-06, + "loss": 25.0769, + "step": 233950 + }, + { + "epoch": 0.4726140022705511, + "grad_norm": 209.63067626953125, + "learning_rate": 6.413046684146343e-06, + "loss": 19.4135, + "step": 233960 + }, + { + "epoch": 0.47263420290323493, + "grad_norm": 7.532628536224365, + "learning_rate": 6.412711844489801e-06, + "loss": 15.6465, + "step": 233970 + }, + { + "epoch": 0.47265440353591875, + "grad_norm": 361.89581298828125, + "learning_rate": 6.4123769979478715e-06, + "loss": 31.4909, + "step": 233980 + }, + { + "epoch": 0.4726746041686026, + "grad_norm": 448.7689208984375, + "learning_rate": 6.412042144522188e-06, + "loss": 16.613, + "step": 233990 + }, + { + "epoch": 0.4726948048012864, + "grad_norm": 454.71240234375, + "learning_rate": 6.411707284214384e-06, + "loss": 22.9245, + "step": 234000 + }, + { + "epoch": 0.4727150054339702, + "grad_norm": 209.35000610351562, + "learning_rate": 6.411372417026087e-06, + "loss": 31.0641, + "step": 234010 + }, + { + "epoch": 0.47273520606665403, + "grad_norm": 932.4186401367188, + "learning_rate": 6.411037542958935e-06, + "loss": 32.0332, + "step": 234020 + }, + { + "epoch": 0.4727554066993378, + "grad_norm": 49.623966217041016, + "learning_rate": 6.410702662014554e-06, + "loss": 20.1529, + "step": 234030 + }, + { + "epoch": 0.4727756073320216, + "grad_norm": 179.2115478515625, + "learning_rate": 6.410367774194583e-06, + "loss": 18.3027, + "step": 234040 + }, + { + "epoch": 0.47279580796470544, + "grad_norm": 573.8726196289062, + "learning_rate": 6.410032879500647e-06, + "loss": 20.464, + "step": 234050 + }, + { + "epoch": 0.47281600859738926, + "grad_norm": 353.9385986328125, + "learning_rate": 6.409697977934384e-06, + "loss": 32.2044, + "step": 234060 + }, + { + "epoch": 0.4728362092300731, + "grad_norm": 297.4496765136719, + "learning_rate": 6.409363069497424e-06, + "loss": 11.7943, + "step": 234070 + }, + { + "epoch": 0.4728564098627569, + "grad_norm": 558.5277099609375, + "learning_rate": 6.4090281541913975e-06, + "loss": 18.8906, + "step": 234080 + }, + { + "epoch": 0.4728766104954407, + "grad_norm": 306.1007080078125, + "learning_rate": 6.408693232017942e-06, + "loss": 17.146, + "step": 234090 + }, + { + "epoch": 0.47289681112812454, + "grad_norm": 295.1012878417969, + "learning_rate": 6.408358302978683e-06, + "loss": 31.9165, + "step": 234100 + }, + { + "epoch": 0.47291701176080836, + "grad_norm": 766.9622802734375, + "learning_rate": 6.408023367075258e-06, + "loss": 21.9427, + "step": 234110 + }, + { + "epoch": 0.4729372123934922, + "grad_norm": 239.5560302734375, + "learning_rate": 6.4076884243092975e-06, + "loss": 18.5846, + "step": 234120 + }, + { + "epoch": 0.472957413026176, + "grad_norm": 99.82518768310547, + "learning_rate": 6.407353474682436e-06, + "loss": 11.0251, + "step": 234130 + }, + { + "epoch": 0.4729776136588598, + "grad_norm": 246.14927673339844, + "learning_rate": 6.407018518196303e-06, + "loss": 27.4023, + "step": 234140 + }, + { + "epoch": 0.4729978142915436, + "grad_norm": 343.39886474609375, + "learning_rate": 6.406683554852532e-06, + "loss": 26.9017, + "step": 234150 + }, + { + "epoch": 0.4730180149242274, + "grad_norm": 508.7539367675781, + "learning_rate": 6.406348584652756e-06, + "loss": 23.6979, + "step": 234160 + }, + { + "epoch": 0.4730382155569112, + "grad_norm": 1494.3701171875, + "learning_rate": 6.4060136075986076e-06, + "loss": 20.9132, + "step": 234170 + }, + { + "epoch": 0.47305841618959504, + "grad_norm": 256.2373962402344, + "learning_rate": 6.405678623691721e-06, + "loss": 17.7332, + "step": 234180 + }, + { + "epoch": 0.47307861682227886, + "grad_norm": 583.6516723632812, + "learning_rate": 6.405343632933725e-06, + "loss": 9.6859, + "step": 234190 + }, + { + "epoch": 0.4730988174549627, + "grad_norm": 395.7854919433594, + "learning_rate": 6.4050086353262565e-06, + "loss": 13.114, + "step": 234200 + }, + { + "epoch": 0.4731190180876465, + "grad_norm": 945.669189453125, + "learning_rate": 6.404673630870946e-06, + "loss": 14.7819, + "step": 234210 + }, + { + "epoch": 0.4731392187203303, + "grad_norm": 281.0338134765625, + "learning_rate": 6.404338619569425e-06, + "loss": 34.6639, + "step": 234220 + }, + { + "epoch": 0.47315941935301414, + "grad_norm": 171.36349487304688, + "learning_rate": 6.40400360142333e-06, + "loss": 9.9086, + "step": 234230 + }, + { + "epoch": 0.47317961998569796, + "grad_norm": 601.5171508789062, + "learning_rate": 6.403668576434289e-06, + "loss": 17.3577, + "step": 234240 + }, + { + "epoch": 0.4731998206183818, + "grad_norm": 301.04840087890625, + "learning_rate": 6.40333354460394e-06, + "loss": 15.3694, + "step": 234250 + }, + { + "epoch": 0.4732200212510656, + "grad_norm": 215.11199951171875, + "learning_rate": 6.402998505933913e-06, + "loss": 18.9071, + "step": 234260 + }, + { + "epoch": 0.4732402218837494, + "grad_norm": 186.56710815429688, + "learning_rate": 6.4026634604258404e-06, + "loss": 16.7488, + "step": 234270 + }, + { + "epoch": 0.4732604225164332, + "grad_norm": 447.1479187011719, + "learning_rate": 6.402328408081358e-06, + "loss": 20.4462, + "step": 234280 + }, + { + "epoch": 0.473280623149117, + "grad_norm": 558.9010620117188, + "learning_rate": 6.401993348902095e-06, + "loss": 21.6987, + "step": 234290 + }, + { + "epoch": 0.47330082378180083, + "grad_norm": 360.9781188964844, + "learning_rate": 6.401658282889689e-06, + "loss": 23.5205, + "step": 234300 + }, + { + "epoch": 0.47332102441448465, + "grad_norm": 217.78128051757812, + "learning_rate": 6.401323210045768e-06, + "loss": 23.361, + "step": 234310 + }, + { + "epoch": 0.47334122504716847, + "grad_norm": 386.0395812988281, + "learning_rate": 6.400988130371969e-06, + "loss": 19.8657, + "step": 234320 + }, + { + "epoch": 0.4733614256798523, + "grad_norm": 235.39915466308594, + "learning_rate": 6.400653043869924e-06, + "loss": 18.4033, + "step": 234330 + }, + { + "epoch": 0.4733816263125361, + "grad_norm": 509.7830505371094, + "learning_rate": 6.400317950541265e-06, + "loss": 30.7332, + "step": 234340 + }, + { + "epoch": 0.47340182694521993, + "grad_norm": 585.4219360351562, + "learning_rate": 6.399982850387625e-06, + "loss": 19.8323, + "step": 234350 + }, + { + "epoch": 0.47342202757790375, + "grad_norm": 1028.9339599609375, + "learning_rate": 6.3996477434106405e-06, + "loss": 24.8702, + "step": 234360 + }, + { + "epoch": 0.47344222821058757, + "grad_norm": 802.591064453125, + "learning_rate": 6.399312629611941e-06, + "loss": 24.8163, + "step": 234370 + }, + { + "epoch": 0.4734624288432714, + "grad_norm": 0.0, + "learning_rate": 6.398977508993164e-06, + "loss": 8.7213, + "step": 234380 + }, + { + "epoch": 0.4734826294759552, + "grad_norm": 668.1257934570312, + "learning_rate": 6.3986423815559386e-06, + "loss": 26.5238, + "step": 234390 + }, + { + "epoch": 0.47350283010863903, + "grad_norm": 252.56785583496094, + "learning_rate": 6.3983072473019e-06, + "loss": 7.3386, + "step": 234400 + }, + { + "epoch": 0.4735230307413228, + "grad_norm": 457.1835021972656, + "learning_rate": 6.3979721062326815e-06, + "loss": 28.3048, + "step": 234410 + }, + { + "epoch": 0.4735432313740066, + "grad_norm": 412.34381103515625, + "learning_rate": 6.397636958349918e-06, + "loss": 11.678, + "step": 234420 + }, + { + "epoch": 0.47356343200669043, + "grad_norm": 648.4039916992188, + "learning_rate": 6.397301803655239e-06, + "loss": 20.3825, + "step": 234430 + }, + { + "epoch": 0.47358363263937425, + "grad_norm": 105.66046905517578, + "learning_rate": 6.396966642150282e-06, + "loss": 19.9678, + "step": 234440 + }, + { + "epoch": 0.4736038332720581, + "grad_norm": 358.0011291503906, + "learning_rate": 6.396631473836677e-06, + "loss": 27.233, + "step": 234450 + }, + { + "epoch": 0.4736240339047419, + "grad_norm": 141.00003051757812, + "learning_rate": 6.396296298716061e-06, + "loss": 15.7124, + "step": 234460 + }, + { + "epoch": 0.4736442345374257, + "grad_norm": 604.2069702148438, + "learning_rate": 6.3959611167900685e-06, + "loss": 20.4789, + "step": 234470 + }, + { + "epoch": 0.47366443517010953, + "grad_norm": 471.7048645019531, + "learning_rate": 6.395625928060328e-06, + "loss": 13.4348, + "step": 234480 + }, + { + "epoch": 0.47368463580279335, + "grad_norm": 99.30219268798828, + "learning_rate": 6.395290732528476e-06, + "loss": 9.8516, + "step": 234490 + }, + { + "epoch": 0.4737048364354772, + "grad_norm": 121.78633117675781, + "learning_rate": 6.3949555301961474e-06, + "loss": 32.6659, + "step": 234500 + }, + { + "epoch": 0.473725037068161, + "grad_norm": 192.4600372314453, + "learning_rate": 6.3946203210649734e-06, + "loss": 21.0008, + "step": 234510 + }, + { + "epoch": 0.4737452377008448, + "grad_norm": 320.25244140625, + "learning_rate": 6.39428510513659e-06, + "loss": 14.6393, + "step": 234520 + }, + { + "epoch": 0.47376543833352863, + "grad_norm": 491.1825256347656, + "learning_rate": 6.393949882412629e-06, + "loss": 34.5892, + "step": 234530 + }, + { + "epoch": 0.4737856389662124, + "grad_norm": 678.1868286132812, + "learning_rate": 6.393614652894727e-06, + "loss": 16.4696, + "step": 234540 + }, + { + "epoch": 0.4738058395988962, + "grad_norm": 449.6884765625, + "learning_rate": 6.3932794165845156e-06, + "loss": 19.0783, + "step": 234550 + }, + { + "epoch": 0.47382604023158004, + "grad_norm": 326.8847351074219, + "learning_rate": 6.39294417348363e-06, + "loss": 14.5435, + "step": 234560 + }, + { + "epoch": 0.47384624086426386, + "grad_norm": 517.9143676757812, + "learning_rate": 6.392608923593703e-06, + "loss": 16.6445, + "step": 234570 + }, + { + "epoch": 0.4738664414969477, + "grad_norm": 56.59879684448242, + "learning_rate": 6.392273666916369e-06, + "loss": 12.9108, + "step": 234580 + }, + { + "epoch": 0.4738866421296315, + "grad_norm": 247.04637145996094, + "learning_rate": 6.391938403453262e-06, + "loss": 20.0154, + "step": 234590 + }, + { + "epoch": 0.4739068427623153, + "grad_norm": 277.41070556640625, + "learning_rate": 6.391603133206015e-06, + "loss": 19.3081, + "step": 234600 + }, + { + "epoch": 0.47392704339499914, + "grad_norm": 304.9445495605469, + "learning_rate": 6.391267856176263e-06, + "loss": 23.4097, + "step": 234610 + }, + { + "epoch": 0.47394724402768296, + "grad_norm": 638.6124267578125, + "learning_rate": 6.390932572365641e-06, + "loss": 32.8187, + "step": 234620 + }, + { + "epoch": 0.4739674446603668, + "grad_norm": 279.404052734375, + "learning_rate": 6.390597281775783e-06, + "loss": 17.2522, + "step": 234630 + }, + { + "epoch": 0.4739876452930506, + "grad_norm": 719.03173828125, + "learning_rate": 6.390261984408322e-06, + "loss": 24.9276, + "step": 234640 + }, + { + "epoch": 0.4740078459257344, + "grad_norm": 469.1488952636719, + "learning_rate": 6.389926680264893e-06, + "loss": 20.5542, + "step": 234650 + }, + { + "epoch": 0.47402804655841824, + "grad_norm": 165.4766082763672, + "learning_rate": 6.389591369347129e-06, + "loss": 38.6157, + "step": 234660 + }, + { + "epoch": 0.474048247191102, + "grad_norm": 217.4148712158203, + "learning_rate": 6.389256051656665e-06, + "loss": 20.6019, + "step": 234670 + }, + { + "epoch": 0.4740684478237858, + "grad_norm": 348.1098937988281, + "learning_rate": 6.388920727195138e-06, + "loss": 14.3781, + "step": 234680 + }, + { + "epoch": 0.47408864845646964, + "grad_norm": 523.4564208984375, + "learning_rate": 6.3885853959641765e-06, + "loss": 14.9741, + "step": 234690 + }, + { + "epoch": 0.47410884908915346, + "grad_norm": 287.4259948730469, + "learning_rate": 6.388250057965421e-06, + "loss": 19.5019, + "step": 234700 + }, + { + "epoch": 0.4741290497218373, + "grad_norm": 714.7274780273438, + "learning_rate": 6.387914713200502e-06, + "loss": 21.1032, + "step": 234710 + }, + { + "epoch": 0.4741492503545211, + "grad_norm": 599.4790649414062, + "learning_rate": 6.387579361671054e-06, + "loss": 21.3773, + "step": 234720 + }, + { + "epoch": 0.4741694509872049, + "grad_norm": 457.8786315917969, + "learning_rate": 6.387244003378713e-06, + "loss": 25.5746, + "step": 234730 + }, + { + "epoch": 0.47418965161988874, + "grad_norm": 130.84654235839844, + "learning_rate": 6.386908638325114e-06, + "loss": 22.1176, + "step": 234740 + }, + { + "epoch": 0.47420985225257256, + "grad_norm": 177.48048400878906, + "learning_rate": 6.386573266511891e-06, + "loss": 29.9546, + "step": 234750 + }, + { + "epoch": 0.4742300528852564, + "grad_norm": 354.9969177246094, + "learning_rate": 6.3862378879406765e-06, + "loss": 36.502, + "step": 234760 + }, + { + "epoch": 0.4742502535179402, + "grad_norm": 642.2314453125, + "learning_rate": 6.385902502613106e-06, + "loss": 20.1804, + "step": 234770 + }, + { + "epoch": 0.474270454150624, + "grad_norm": 561.5035400390625, + "learning_rate": 6.385567110530816e-06, + "loss": 23.3928, + "step": 234780 + }, + { + "epoch": 0.4742906547833078, + "grad_norm": 469.0497131347656, + "learning_rate": 6.385231711695441e-06, + "loss": 27.8145, + "step": 234790 + }, + { + "epoch": 0.4743108554159916, + "grad_norm": 353.9854431152344, + "learning_rate": 6.384896306108612e-06, + "loss": 19.0264, + "step": 234800 + }, + { + "epoch": 0.4743310560486754, + "grad_norm": 304.20697021484375, + "learning_rate": 6.384560893771968e-06, + "loss": 17.292, + "step": 234810 + }, + { + "epoch": 0.47435125668135925, + "grad_norm": 342.96832275390625, + "learning_rate": 6.3842254746871424e-06, + "loss": 29.5318, + "step": 234820 + }, + { + "epoch": 0.47437145731404307, + "grad_norm": 763.3519287109375, + "learning_rate": 6.3838900488557695e-06, + "loss": 18.7512, + "step": 234830 + }, + { + "epoch": 0.4743916579467269, + "grad_norm": 170.58364868164062, + "learning_rate": 6.383554616279485e-06, + "loss": 21.7464, + "step": 234840 + }, + { + "epoch": 0.4744118585794107, + "grad_norm": 248.70811462402344, + "learning_rate": 6.383219176959921e-06, + "loss": 15.3511, + "step": 234850 + }, + { + "epoch": 0.4744320592120945, + "grad_norm": 243.34239196777344, + "learning_rate": 6.382883730898717e-06, + "loss": 39.5096, + "step": 234860 + }, + { + "epoch": 0.47445225984477835, + "grad_norm": 437.47540283203125, + "learning_rate": 6.382548278097503e-06, + "loss": 22.9937, + "step": 234870 + }, + { + "epoch": 0.47447246047746217, + "grad_norm": 127.16067504882812, + "learning_rate": 6.382212818557918e-06, + "loss": 13.2628, + "step": 234880 + }, + { + "epoch": 0.474492661110146, + "grad_norm": 453.02056884765625, + "learning_rate": 6.381877352281594e-06, + "loss": 19.5698, + "step": 234890 + }, + { + "epoch": 0.4745128617428298, + "grad_norm": 460.7250671386719, + "learning_rate": 6.3815418792701686e-06, + "loss": 17.8669, + "step": 234900 + }, + { + "epoch": 0.4745330623755136, + "grad_norm": 26.176515579223633, + "learning_rate": 6.381206399525276e-06, + "loss": 10.9431, + "step": 234910 + }, + { + "epoch": 0.4745532630081974, + "grad_norm": 156.2840576171875, + "learning_rate": 6.38087091304855e-06, + "loss": 19.0455, + "step": 234920 + }, + { + "epoch": 0.4745734636408812, + "grad_norm": 272.0287170410156, + "learning_rate": 6.380535419841627e-06, + "loss": 10.9583, + "step": 234930 + }, + { + "epoch": 0.47459366427356503, + "grad_norm": 241.13514709472656, + "learning_rate": 6.380199919906141e-06, + "loss": 13.9992, + "step": 234940 + }, + { + "epoch": 0.47461386490624885, + "grad_norm": 980.8489379882812, + "learning_rate": 6.3798644132437304e-06, + "loss": 28.1995, + "step": 234950 + }, + { + "epoch": 0.47463406553893267, + "grad_norm": 162.93238830566406, + "learning_rate": 6.379528899856025e-06, + "loss": 41.1193, + "step": 234960 + }, + { + "epoch": 0.4746542661716165, + "grad_norm": 282.31512451171875, + "learning_rate": 6.3791933797446644e-06, + "loss": 28.5602, + "step": 234970 + }, + { + "epoch": 0.4746744668043003, + "grad_norm": 285.1656494140625, + "learning_rate": 6.378857852911283e-06, + "loss": 20.8154, + "step": 234980 + }, + { + "epoch": 0.47469466743698413, + "grad_norm": 280.6553955078125, + "learning_rate": 6.378522319357515e-06, + "loss": 12.6222, + "step": 234990 + }, + { + "epoch": 0.47471486806966795, + "grad_norm": 257.03887939453125, + "learning_rate": 6.378186779084996e-06, + "loss": 21.6741, + "step": 235000 + }, + { + "epoch": 0.47473506870235177, + "grad_norm": 260.839599609375, + "learning_rate": 6.377851232095362e-06, + "loss": 13.5192, + "step": 235010 + }, + { + "epoch": 0.4747552693350356, + "grad_norm": 37.195953369140625, + "learning_rate": 6.37751567839025e-06, + "loss": 20.0982, + "step": 235020 + }, + { + "epoch": 0.4747754699677194, + "grad_norm": 309.1211853027344, + "learning_rate": 6.377180117971292e-06, + "loss": 12.8097, + "step": 235030 + }, + { + "epoch": 0.47479567060040323, + "grad_norm": 271.5387878417969, + "learning_rate": 6.376844550840126e-06, + "loss": 20.8148, + "step": 235040 + }, + { + "epoch": 0.474815871233087, + "grad_norm": 221.25985717773438, + "learning_rate": 6.376508976998385e-06, + "loss": 24.5826, + "step": 235050 + }, + { + "epoch": 0.4748360718657708, + "grad_norm": 555.2584228515625, + "learning_rate": 6.3761733964477066e-06, + "loss": 18.8689, + "step": 235060 + }, + { + "epoch": 0.47485627249845463, + "grad_norm": 29.00701141357422, + "learning_rate": 6.375837809189726e-06, + "loss": 18.4605, + "step": 235070 + }, + { + "epoch": 0.47487647313113845, + "grad_norm": 520.4061279296875, + "learning_rate": 6.375502215226082e-06, + "loss": 16.8058, + "step": 235080 + }, + { + "epoch": 0.4748966737638223, + "grad_norm": 232.8343963623047, + "learning_rate": 6.375166614558404e-06, + "loss": 13.5121, + "step": 235090 + }, + { + "epoch": 0.4749168743965061, + "grad_norm": 981.6873779296875, + "learning_rate": 6.374831007188331e-06, + "loss": 23.061, + "step": 235100 + }, + { + "epoch": 0.4749370750291899, + "grad_norm": 643.386474609375, + "learning_rate": 6.374495393117499e-06, + "loss": 32.28, + "step": 235110 + }, + { + "epoch": 0.47495727566187373, + "grad_norm": 336.3600769042969, + "learning_rate": 6.374159772347541e-06, + "loss": 16.8826, + "step": 235120 + }, + { + "epoch": 0.47497747629455755, + "grad_norm": 307.8014831542969, + "learning_rate": 6.373824144880099e-06, + "loss": 12.2089, + "step": 235130 + }, + { + "epoch": 0.4749976769272414, + "grad_norm": 633.9989624023438, + "learning_rate": 6.3734885107168e-06, + "loss": 13.723, + "step": 235140 + }, + { + "epoch": 0.4750178775599252, + "grad_norm": 506.0127868652344, + "learning_rate": 6.373152869859288e-06, + "loss": 20.6466, + "step": 235150 + }, + { + "epoch": 0.475038078192609, + "grad_norm": 538.2627563476562, + "learning_rate": 6.372817222309194e-06, + "loss": 23.1575, + "step": 235160 + }, + { + "epoch": 0.47505827882529283, + "grad_norm": 259.8069152832031, + "learning_rate": 6.372481568068156e-06, + "loss": 29.974, + "step": 235170 + }, + { + "epoch": 0.4750784794579766, + "grad_norm": 889.163818359375, + "learning_rate": 6.37214590713781e-06, + "loss": 31.5299, + "step": 235180 + }, + { + "epoch": 0.4750986800906604, + "grad_norm": 150.32533264160156, + "learning_rate": 6.37181023951979e-06, + "loss": 13.2184, + "step": 235190 + }, + { + "epoch": 0.47511888072334424, + "grad_norm": 514.8124389648438, + "learning_rate": 6.371474565215734e-06, + "loss": 21.9721, + "step": 235200 + }, + { + "epoch": 0.47513908135602806, + "grad_norm": 331.0513610839844, + "learning_rate": 6.371138884227277e-06, + "loss": 18.7461, + "step": 235210 + }, + { + "epoch": 0.4751592819887119, + "grad_norm": 310.6734313964844, + "learning_rate": 6.3708031965560545e-06, + "loss": 33.3973, + "step": 235220 + }, + { + "epoch": 0.4751794826213957, + "grad_norm": 627.3316040039062, + "learning_rate": 6.370467502203704e-06, + "loss": 24.008, + "step": 235230 + }, + { + "epoch": 0.4751996832540795, + "grad_norm": 507.3321838378906, + "learning_rate": 6.370131801171863e-06, + "loss": 23.7958, + "step": 235240 + }, + { + "epoch": 0.47521988388676334, + "grad_norm": 509.7972412109375, + "learning_rate": 6.369796093462164e-06, + "loss": 21.6889, + "step": 235250 + }, + { + "epoch": 0.47524008451944716, + "grad_norm": 0.0, + "learning_rate": 6.369460379076244e-06, + "loss": 20.57, + "step": 235260 + }, + { + "epoch": 0.475260285152131, + "grad_norm": 533.4564819335938, + "learning_rate": 6.369124658015742e-06, + "loss": 10.4068, + "step": 235270 + }, + { + "epoch": 0.4752804857848148, + "grad_norm": 143.0695343017578, + "learning_rate": 6.368788930282292e-06, + "loss": 22.7546, + "step": 235280 + }, + { + "epoch": 0.4753006864174986, + "grad_norm": 21.005603790283203, + "learning_rate": 6.368453195877531e-06, + "loss": 24.8897, + "step": 235290 + }, + { + "epoch": 0.47532088705018244, + "grad_norm": 187.2565155029297, + "learning_rate": 6.368117454803093e-06, + "loss": 12.8817, + "step": 235300 + }, + { + "epoch": 0.4753410876828662, + "grad_norm": 341.4812316894531, + "learning_rate": 6.36778170706062e-06, + "loss": 28.8768, + "step": 235310 + }, + { + "epoch": 0.47536128831555, + "grad_norm": 273.7339782714844, + "learning_rate": 6.367445952651742e-06, + "loss": 38.7234, + "step": 235320 + }, + { + "epoch": 0.47538148894823384, + "grad_norm": 520.7974243164062, + "learning_rate": 6.367110191578099e-06, + "loss": 25.4437, + "step": 235330 + }, + { + "epoch": 0.47540168958091766, + "grad_norm": 475.5674133300781, + "learning_rate": 6.366774423841326e-06, + "loss": 27.3372, + "step": 235340 + }, + { + "epoch": 0.4754218902136015, + "grad_norm": 122.72430419921875, + "learning_rate": 6.366438649443062e-06, + "loss": 10.3798, + "step": 235350 + }, + { + "epoch": 0.4754420908462853, + "grad_norm": 371.865478515625, + "learning_rate": 6.366102868384942e-06, + "loss": 18.0436, + "step": 235360 + }, + { + "epoch": 0.4754622914789691, + "grad_norm": 194.8443145751953, + "learning_rate": 6.365767080668601e-06, + "loss": 30.0203, + "step": 235370 + }, + { + "epoch": 0.47548249211165294, + "grad_norm": 261.30224609375, + "learning_rate": 6.365431286295677e-06, + "loss": 20.0374, + "step": 235380 + }, + { + "epoch": 0.47550269274433676, + "grad_norm": 298.7358703613281, + "learning_rate": 6.365095485267807e-06, + "loss": 10.4789, + "step": 235390 + }, + { + "epoch": 0.4755228933770206, + "grad_norm": 209.228759765625, + "learning_rate": 6.364759677586627e-06, + "loss": 15.6559, + "step": 235400 + }, + { + "epoch": 0.4755430940097044, + "grad_norm": 0.0, + "learning_rate": 6.364423863253772e-06, + "loss": 16.6951, + "step": 235410 + }, + { + "epoch": 0.4755632946423882, + "grad_norm": 508.7265625, + "learning_rate": 6.364088042270884e-06, + "loss": 16.7934, + "step": 235420 + }, + { + "epoch": 0.475583495275072, + "grad_norm": 396.3540954589844, + "learning_rate": 6.363752214639595e-06, + "loss": 15.8091, + "step": 235430 + }, + { + "epoch": 0.4756036959077558, + "grad_norm": 6.936513423919678, + "learning_rate": 6.363416380361542e-06, + "loss": 15.8172, + "step": 235440 + }, + { + "epoch": 0.47562389654043963, + "grad_norm": 148.58872985839844, + "learning_rate": 6.363080539438364e-06, + "loss": 16.6924, + "step": 235450 + }, + { + "epoch": 0.47564409717312345, + "grad_norm": 146.972900390625, + "learning_rate": 6.3627446918716965e-06, + "loss": 18.2104, + "step": 235460 + }, + { + "epoch": 0.47566429780580727, + "grad_norm": 856.9461669921875, + "learning_rate": 6.362408837663177e-06, + "loss": 25.8049, + "step": 235470 + }, + { + "epoch": 0.4756844984384911, + "grad_norm": 262.8402404785156, + "learning_rate": 6.3620729768144415e-06, + "loss": 22.2888, + "step": 235480 + }, + { + "epoch": 0.4757046990711749, + "grad_norm": 412.86749267578125, + "learning_rate": 6.361737109327128e-06, + "loss": 20.7995, + "step": 235490 + }, + { + "epoch": 0.47572489970385873, + "grad_norm": 110.34162902832031, + "learning_rate": 6.361401235202872e-06, + "loss": 21.2092, + "step": 235500 + }, + { + "epoch": 0.47574510033654255, + "grad_norm": 345.8625183105469, + "learning_rate": 6.361065354443312e-06, + "loss": 12.1448, + "step": 235510 + }, + { + "epoch": 0.47576530096922637, + "grad_norm": 269.2815246582031, + "learning_rate": 6.360729467050086e-06, + "loss": 16.7931, + "step": 235520 + }, + { + "epoch": 0.4757855016019102, + "grad_norm": 1235.232421875, + "learning_rate": 6.360393573024828e-06, + "loss": 27.6663, + "step": 235530 + }, + { + "epoch": 0.475805702234594, + "grad_norm": 136.81576538085938, + "learning_rate": 6.360057672369177e-06, + "loss": 20.3639, + "step": 235540 + }, + { + "epoch": 0.47582590286727783, + "grad_norm": 524.9102172851562, + "learning_rate": 6.35972176508477e-06, + "loss": 17.5077, + "step": 235550 + }, + { + "epoch": 0.4758461034999616, + "grad_norm": 235.1743621826172, + "learning_rate": 6.3593858511732446e-06, + "loss": 24.1391, + "step": 235560 + }, + { + "epoch": 0.4758663041326454, + "grad_norm": 696.58984375, + "learning_rate": 6.359049930636235e-06, + "loss": 17.0567, + "step": 235570 + }, + { + "epoch": 0.47588650476532923, + "grad_norm": 602.1285400390625, + "learning_rate": 6.3587140034753836e-06, + "loss": 19.6464, + "step": 235580 + }, + { + "epoch": 0.47590670539801305, + "grad_norm": 289.7513122558594, + "learning_rate": 6.358378069692324e-06, + "loss": 24.3361, + "step": 235590 + }, + { + "epoch": 0.4759269060306969, + "grad_norm": 367.5796203613281, + "learning_rate": 6.358042129288694e-06, + "loss": 23.8514, + "step": 235600 + }, + { + "epoch": 0.4759471066633807, + "grad_norm": 189.98202514648438, + "learning_rate": 6.3577061822661326e-06, + "loss": 14.4241, + "step": 235610 + }, + { + "epoch": 0.4759673072960645, + "grad_norm": 156.3752899169922, + "learning_rate": 6.357370228626274e-06, + "loss": 11.945, + "step": 235620 + }, + { + "epoch": 0.47598750792874833, + "grad_norm": 111.87229919433594, + "learning_rate": 6.3570342683707595e-06, + "loss": 18.5781, + "step": 235630 + }, + { + "epoch": 0.47600770856143215, + "grad_norm": 161.64215087890625, + "learning_rate": 6.356698301501224e-06, + "loss": 40.5645, + "step": 235640 + }, + { + "epoch": 0.476027909194116, + "grad_norm": 281.6654357910156, + "learning_rate": 6.356362328019305e-06, + "loss": 15.8512, + "step": 235650 + }, + { + "epoch": 0.4760481098267998, + "grad_norm": 608.2230224609375, + "learning_rate": 6.35602634792664e-06, + "loss": 31.2502, + "step": 235660 + }, + { + "epoch": 0.4760683104594836, + "grad_norm": 292.78631591796875, + "learning_rate": 6.355690361224869e-06, + "loss": 19.3586, + "step": 235670 + }, + { + "epoch": 0.47608851109216743, + "grad_norm": 299.5194396972656, + "learning_rate": 6.355354367915626e-06, + "loss": 18.2044, + "step": 235680 + }, + { + "epoch": 0.4761087117248512, + "grad_norm": 342.9618225097656, + "learning_rate": 6.355018368000552e-06, + "loss": 12.7424, + "step": 235690 + }, + { + "epoch": 0.476128912357535, + "grad_norm": 465.0326232910156, + "learning_rate": 6.35468236148128e-06, + "loss": 21.8289, + "step": 235700 + }, + { + "epoch": 0.47614911299021884, + "grad_norm": 442.16748046875, + "learning_rate": 6.354346348359452e-06, + "loss": 17.4674, + "step": 235710 + }, + { + "epoch": 0.47616931362290266, + "grad_norm": 489.3575134277344, + "learning_rate": 6.354010328636705e-06, + "loss": 18.5888, + "step": 235720 + }, + { + "epoch": 0.4761895142555865, + "grad_norm": 228.85992431640625, + "learning_rate": 6.3536743023146744e-06, + "loss": 8.9061, + "step": 235730 + }, + { + "epoch": 0.4762097148882703, + "grad_norm": 532.1124877929688, + "learning_rate": 6.353338269395e-06, + "loss": 19.8909, + "step": 235740 + }, + { + "epoch": 0.4762299155209541, + "grad_norm": 942.0388793945312, + "learning_rate": 6.353002229879318e-06, + "loss": 34.5855, + "step": 235750 + }, + { + "epoch": 0.47625011615363794, + "grad_norm": 314.45513916015625, + "learning_rate": 6.352666183769269e-06, + "loss": 10.9754, + "step": 235760 + }, + { + "epoch": 0.47627031678632176, + "grad_norm": 633.0263671875, + "learning_rate": 6.352330131066489e-06, + "loss": 18.347, + "step": 235770 + }, + { + "epoch": 0.4762905174190056, + "grad_norm": 891.4623413085938, + "learning_rate": 6.351994071772615e-06, + "loss": 20.04, + "step": 235780 + }, + { + "epoch": 0.4763107180516894, + "grad_norm": 462.0434265136719, + "learning_rate": 6.351658005889286e-06, + "loss": 18.791, + "step": 235790 + }, + { + "epoch": 0.4763309186843732, + "grad_norm": 370.8762512207031, + "learning_rate": 6.35132193341814e-06, + "loss": 22.8174, + "step": 235800 + }, + { + "epoch": 0.47635111931705704, + "grad_norm": 257.43975830078125, + "learning_rate": 6.350985854360815e-06, + "loss": 42.8529, + "step": 235810 + }, + { + "epoch": 0.4763713199497408, + "grad_norm": 713.8231811523438, + "learning_rate": 6.350649768718948e-06, + "loss": 22.5843, + "step": 235820 + }, + { + "epoch": 0.4763915205824246, + "grad_norm": 171.21658325195312, + "learning_rate": 6.3503136764941786e-06, + "loss": 13.8042, + "step": 235830 + }, + { + "epoch": 0.47641172121510844, + "grad_norm": 57.68280029296875, + "learning_rate": 6.349977577688144e-06, + "loss": 19.2037, + "step": 235840 + }, + { + "epoch": 0.47643192184779226, + "grad_norm": 431.3582763671875, + "learning_rate": 6.349641472302484e-06, + "loss": 19.0848, + "step": 235850 + }, + { + "epoch": 0.4764521224804761, + "grad_norm": 1326.8524169921875, + "learning_rate": 6.349305360338832e-06, + "loss": 20.5687, + "step": 235860 + }, + { + "epoch": 0.4764723231131599, + "grad_norm": 556.9224243164062, + "learning_rate": 6.34896924179883e-06, + "loss": 37.0349, + "step": 235870 + }, + { + "epoch": 0.4764925237458437, + "grad_norm": 208.58554077148438, + "learning_rate": 6.348633116684117e-06, + "loss": 18.2168, + "step": 235880 + }, + { + "epoch": 0.47651272437852754, + "grad_norm": 28.971874237060547, + "learning_rate": 6.348296984996329e-06, + "loss": 15.1086, + "step": 235890 + }, + { + "epoch": 0.47653292501121136, + "grad_norm": 0.8386023044586182, + "learning_rate": 6.3479608467371055e-06, + "loss": 21.6404, + "step": 235900 + }, + { + "epoch": 0.4765531256438952, + "grad_norm": 479.8161315917969, + "learning_rate": 6.3476247019080826e-06, + "loss": 19.4333, + "step": 235910 + }, + { + "epoch": 0.476573326276579, + "grad_norm": 383.649169921875, + "learning_rate": 6.347288550510903e-06, + "loss": 24.6602, + "step": 235920 + }, + { + "epoch": 0.4765935269092628, + "grad_norm": 11.255179405212402, + "learning_rate": 6.346952392547201e-06, + "loss": 14.3584, + "step": 235930 + }, + { + "epoch": 0.47661372754194664, + "grad_norm": 181.38755798339844, + "learning_rate": 6.3466162280186164e-06, + "loss": 11.1322, + "step": 235940 + }, + { + "epoch": 0.4766339281746304, + "grad_norm": 320.8212585449219, + "learning_rate": 6.346280056926788e-06, + "loss": 17.2683, + "step": 235950 + }, + { + "epoch": 0.4766541288073142, + "grad_norm": 41.16318893432617, + "learning_rate": 6.345943879273353e-06, + "loss": 15.3891, + "step": 235960 + }, + { + "epoch": 0.47667432943999805, + "grad_norm": 157.09657287597656, + "learning_rate": 6.3456076950599525e-06, + "loss": 13.0402, + "step": 235970 + }, + { + "epoch": 0.47669453007268187, + "grad_norm": 592.3854370117188, + "learning_rate": 6.345271504288222e-06, + "loss": 20.3103, + "step": 235980 + }, + { + "epoch": 0.4767147307053657, + "grad_norm": 531.6246337890625, + "learning_rate": 6.344935306959801e-06, + "loss": 14.9921, + "step": 235990 + }, + { + "epoch": 0.4767349313380495, + "grad_norm": 775.4092407226562, + "learning_rate": 6.344599103076329e-06, + "loss": 30.2433, + "step": 236000 + }, + { + "epoch": 0.4767551319707333, + "grad_norm": 804.9729614257812, + "learning_rate": 6.3442628926394455e-06, + "loss": 18.611, + "step": 236010 + }, + { + "epoch": 0.47677533260341715, + "grad_norm": 399.3684387207031, + "learning_rate": 6.3439266756507846e-06, + "loss": 14.1972, + "step": 236020 + }, + { + "epoch": 0.47679553323610097, + "grad_norm": 257.4033203125, + "learning_rate": 6.343590452111991e-06, + "loss": 25.993, + "step": 236030 + }, + { + "epoch": 0.4768157338687848, + "grad_norm": 291.25286865234375, + "learning_rate": 6.343254222024699e-06, + "loss": 13.1766, + "step": 236040 + }, + { + "epoch": 0.4768359345014686, + "grad_norm": 0.25358515977859497, + "learning_rate": 6.3429179853905485e-06, + "loss": 27.1687, + "step": 236050 + }, + { + "epoch": 0.4768561351341524, + "grad_norm": 1633.2650146484375, + "learning_rate": 6.34258174221118e-06, + "loss": 26.8737, + "step": 236060 + }, + { + "epoch": 0.4768763357668362, + "grad_norm": 472.8817138671875, + "learning_rate": 6.342245492488228e-06, + "loss": 19.0416, + "step": 236070 + }, + { + "epoch": 0.47689653639952, + "grad_norm": 280.98504638671875, + "learning_rate": 6.341909236223338e-06, + "loss": 21.0544, + "step": 236080 + }, + { + "epoch": 0.47691673703220383, + "grad_norm": 631.6600341796875, + "learning_rate": 6.341572973418143e-06, + "loss": 17.0014, + "step": 236090 + }, + { + "epoch": 0.47693693766488765, + "grad_norm": 416.7015686035156, + "learning_rate": 6.341236704074285e-06, + "loss": 24.6626, + "step": 236100 + }, + { + "epoch": 0.47695713829757147, + "grad_norm": 370.92547607421875, + "learning_rate": 6.340900428193401e-06, + "loss": 14.3205, + "step": 236110 + }, + { + "epoch": 0.4769773389302553, + "grad_norm": 154.2776336669922, + "learning_rate": 6.340564145777131e-06, + "loss": 17.0812, + "step": 236120 + }, + { + "epoch": 0.4769975395629391, + "grad_norm": 414.4328308105469, + "learning_rate": 6.340227856827116e-06, + "loss": 9.2969, + "step": 236130 + }, + { + "epoch": 0.47701774019562293, + "grad_norm": 434.3299865722656, + "learning_rate": 6.339891561344991e-06, + "loss": 19.4123, + "step": 236140 + }, + { + "epoch": 0.47703794082830675, + "grad_norm": 108.04666900634766, + "learning_rate": 6.339555259332398e-06, + "loss": 23.9393, + "step": 236150 + }, + { + "epoch": 0.47705814146099057, + "grad_norm": 1590.479248046875, + "learning_rate": 6.339218950790973e-06, + "loss": 45.2912, + "step": 236160 + }, + { + "epoch": 0.4770783420936744, + "grad_norm": 473.1038818359375, + "learning_rate": 6.33888263572236e-06, + "loss": 34.732, + "step": 236170 + }, + { + "epoch": 0.4770985427263582, + "grad_norm": 116.94279479980469, + "learning_rate": 6.338546314128193e-06, + "loss": 9.5652, + "step": 236180 + }, + { + "epoch": 0.47711874335904203, + "grad_norm": 158.46392822265625, + "learning_rate": 6.3382099860101154e-06, + "loss": 16.3361, + "step": 236190 + }, + { + "epoch": 0.4771389439917258, + "grad_norm": 113.47792053222656, + "learning_rate": 6.337873651369764e-06, + "loss": 20.9386, + "step": 236200 + }, + { + "epoch": 0.4771591446244096, + "grad_norm": 356.636474609375, + "learning_rate": 6.337537310208779e-06, + "loss": 15.0066, + "step": 236210 + }, + { + "epoch": 0.47717934525709343, + "grad_norm": 1007.4440307617188, + "learning_rate": 6.3372009625288e-06, + "loss": 23.4929, + "step": 236220 + }, + { + "epoch": 0.47719954588977725, + "grad_norm": 389.4663391113281, + "learning_rate": 6.336864608331463e-06, + "loss": 14.9125, + "step": 236230 + }, + { + "epoch": 0.4772197465224611, + "grad_norm": 567.827880859375, + "learning_rate": 6.336528247618413e-06, + "loss": 17.2285, + "step": 236240 + }, + { + "epoch": 0.4772399471551449, + "grad_norm": 433.66021728515625, + "learning_rate": 6.336191880391285e-06, + "loss": 43.0203, + "step": 236250 + }, + { + "epoch": 0.4772601477878287, + "grad_norm": 317.4508361816406, + "learning_rate": 6.335855506651721e-06, + "loss": 21.6446, + "step": 236260 + }, + { + "epoch": 0.47728034842051253, + "grad_norm": 261.9701232910156, + "learning_rate": 6.335519126401357e-06, + "loss": 22.2381, + "step": 236270 + }, + { + "epoch": 0.47730054905319635, + "grad_norm": 583.7076416015625, + "learning_rate": 6.335182739641837e-06, + "loss": 22.632, + "step": 236280 + }, + { + "epoch": 0.4773207496858802, + "grad_norm": 650.14111328125, + "learning_rate": 6.334846346374797e-06, + "loss": 17.9627, + "step": 236290 + }, + { + "epoch": 0.477340950318564, + "grad_norm": 286.3322448730469, + "learning_rate": 6.334509946601879e-06, + "loss": 19.6228, + "step": 236300 + }, + { + "epoch": 0.4773611509512478, + "grad_norm": 632.01123046875, + "learning_rate": 6.334173540324721e-06, + "loss": 20.1217, + "step": 236310 + }, + { + "epoch": 0.47738135158393163, + "grad_norm": 154.14942932128906, + "learning_rate": 6.3338371275449614e-06, + "loss": 17.7689, + "step": 236320 + }, + { + "epoch": 0.4774015522166154, + "grad_norm": 4.355168342590332, + "learning_rate": 6.333500708264243e-06, + "loss": 21.0828, + "step": 236330 + }, + { + "epoch": 0.4774217528492992, + "grad_norm": 437.45831298828125, + "learning_rate": 6.3331642824842034e-06, + "loss": 17.5286, + "step": 236340 + }, + { + "epoch": 0.47744195348198304, + "grad_norm": 130.97360229492188, + "learning_rate": 6.3328278502064835e-06, + "loss": 17.0982, + "step": 236350 + }, + { + "epoch": 0.47746215411466686, + "grad_norm": 417.3623046875, + "learning_rate": 6.3324914114327206e-06, + "loss": 16.7422, + "step": 236360 + }, + { + "epoch": 0.4774823547473507, + "grad_norm": 349.117431640625, + "learning_rate": 6.332154966164558e-06, + "loss": 18.2595, + "step": 236370 + }, + { + "epoch": 0.4775025553800345, + "grad_norm": 579.3184814453125, + "learning_rate": 6.3318185144036325e-06, + "loss": 35.9562, + "step": 236380 + }, + { + "epoch": 0.4775227560127183, + "grad_norm": 364.48846435546875, + "learning_rate": 6.3314820561515854e-06, + "loss": 16.838, + "step": 236390 + }, + { + "epoch": 0.47754295664540214, + "grad_norm": 101.16961669921875, + "learning_rate": 6.331145591410057e-06, + "loss": 18.6463, + "step": 236400 + }, + { + "epoch": 0.47756315727808596, + "grad_norm": 483.03875732421875, + "learning_rate": 6.330809120180685e-06, + "loss": 15.5044, + "step": 236410 + }, + { + "epoch": 0.4775833579107698, + "grad_norm": 60.906856536865234, + "learning_rate": 6.330472642465113e-06, + "loss": 19.4903, + "step": 236420 + }, + { + "epoch": 0.4776035585434536, + "grad_norm": 60.40165328979492, + "learning_rate": 6.330136158264977e-06, + "loss": 14.2993, + "step": 236430 + }, + { + "epoch": 0.4776237591761374, + "grad_norm": 666.2178955078125, + "learning_rate": 6.329799667581918e-06, + "loss": 28.4868, + "step": 236440 + }, + { + "epoch": 0.47764395980882124, + "grad_norm": 441.0329895019531, + "learning_rate": 6.329463170417578e-06, + "loss": 25.9949, + "step": 236450 + }, + { + "epoch": 0.477664160441505, + "grad_norm": 627.822998046875, + "learning_rate": 6.329126666773596e-06, + "loss": 22.5725, + "step": 236460 + }, + { + "epoch": 0.4776843610741888, + "grad_norm": 1271.912109375, + "learning_rate": 6.328790156651611e-06, + "loss": 9.4034, + "step": 236470 + }, + { + "epoch": 0.47770456170687264, + "grad_norm": 181.57403564453125, + "learning_rate": 6.328453640053264e-06, + "loss": 20.7942, + "step": 236480 + }, + { + "epoch": 0.47772476233955646, + "grad_norm": 412.6011962890625, + "learning_rate": 6.3281171169801944e-06, + "loss": 19.137, + "step": 236490 + }, + { + "epoch": 0.4777449629722403, + "grad_norm": 345.78594970703125, + "learning_rate": 6.327780587434045e-06, + "loss": 20.7337, + "step": 236500 + }, + { + "epoch": 0.4777651636049241, + "grad_norm": 280.6048889160156, + "learning_rate": 6.3274440514164535e-06, + "loss": 25.2685, + "step": 236510 + }, + { + "epoch": 0.4777853642376079, + "grad_norm": 302.62237548828125, + "learning_rate": 6.327107508929059e-06, + "loss": 22.2104, + "step": 236520 + }, + { + "epoch": 0.47780556487029174, + "grad_norm": 321.1506042480469, + "learning_rate": 6.326770959973505e-06, + "loss": 12.1764, + "step": 236530 + }, + { + "epoch": 0.47782576550297556, + "grad_norm": 647.4296264648438, + "learning_rate": 6.32643440455143e-06, + "loss": 20.1817, + "step": 236540 + }, + { + "epoch": 0.4778459661356594, + "grad_norm": 369.7336120605469, + "learning_rate": 6.3260978426644735e-06, + "loss": 18.1902, + "step": 236550 + }, + { + "epoch": 0.4778661667683432, + "grad_norm": 767.9469604492188, + "learning_rate": 6.325761274314279e-06, + "loss": 23.7414, + "step": 236560 + }, + { + "epoch": 0.477886367401027, + "grad_norm": 589.987548828125, + "learning_rate": 6.325424699502482e-06, + "loss": 22.7409, + "step": 236570 + }, + { + "epoch": 0.47790656803371084, + "grad_norm": 301.1730041503906, + "learning_rate": 6.3250881182307285e-06, + "loss": 23.0615, + "step": 236580 + }, + { + "epoch": 0.4779267686663946, + "grad_norm": 195.98422241210938, + "learning_rate": 6.324751530500656e-06, + "loss": 27.2593, + "step": 236590 + }, + { + "epoch": 0.47794696929907843, + "grad_norm": 376.743896484375, + "learning_rate": 6.324414936313904e-06, + "loss": 11.7994, + "step": 236600 + }, + { + "epoch": 0.47796716993176225, + "grad_norm": 369.4232482910156, + "learning_rate": 6.324078335672115e-06, + "loss": 20.3665, + "step": 236610 + }, + { + "epoch": 0.47798737056444607, + "grad_norm": 194.3318634033203, + "learning_rate": 6.323741728576928e-06, + "loss": 16.0633, + "step": 236620 + }, + { + "epoch": 0.4780075711971299, + "grad_norm": 119.3884506225586, + "learning_rate": 6.323405115029986e-06, + "loss": 14.5608, + "step": 236630 + }, + { + "epoch": 0.4780277718298137, + "grad_norm": 848.0132446289062, + "learning_rate": 6.323068495032927e-06, + "loss": 27.1935, + "step": 236640 + }, + { + "epoch": 0.47804797246249753, + "grad_norm": 700.3411865234375, + "learning_rate": 6.3227318685873915e-06, + "loss": 21.1771, + "step": 236650 + }, + { + "epoch": 0.47806817309518135, + "grad_norm": 371.2930908203125, + "learning_rate": 6.322395235695022e-06, + "loss": 7.3183, + "step": 236660 + }, + { + "epoch": 0.47808837372786517, + "grad_norm": 523.4465942382812, + "learning_rate": 6.32205859635746e-06, + "loss": 20.1393, + "step": 236670 + }, + { + "epoch": 0.478108574360549, + "grad_norm": 454.619384765625, + "learning_rate": 6.3217219505763426e-06, + "loss": 19.4088, + "step": 236680 + }, + { + "epoch": 0.4781287749932328, + "grad_norm": 205.50643920898438, + "learning_rate": 6.321385298353314e-06, + "loss": 27.9032, + "step": 236690 + }, + { + "epoch": 0.47814897562591663, + "grad_norm": 320.5482177734375, + "learning_rate": 6.321048639690013e-06, + "loss": 16.4477, + "step": 236700 + }, + { + "epoch": 0.4781691762586004, + "grad_norm": 257.9223937988281, + "learning_rate": 6.320711974588082e-06, + "loss": 15.9631, + "step": 236710 + }, + { + "epoch": 0.4781893768912842, + "grad_norm": 469.4799499511719, + "learning_rate": 6.32037530304916e-06, + "loss": 15.2386, + "step": 236720 + }, + { + "epoch": 0.47820957752396803, + "grad_norm": 460.84051513671875, + "learning_rate": 6.320038625074889e-06, + "loss": 21.4524, + "step": 236730 + }, + { + "epoch": 0.47822977815665185, + "grad_norm": 277.2516784667969, + "learning_rate": 6.319701940666911e-06, + "loss": 13.5471, + "step": 236740 + }, + { + "epoch": 0.4782499787893357, + "grad_norm": 256.4875183105469, + "learning_rate": 6.3193652498268656e-06, + "loss": 23.3701, + "step": 236750 + }, + { + "epoch": 0.4782701794220195, + "grad_norm": 153.87103271484375, + "learning_rate": 6.319028552556393e-06, + "loss": 16.1749, + "step": 236760 + }, + { + "epoch": 0.4782903800547033, + "grad_norm": 174.68017578125, + "learning_rate": 6.318691848857136e-06, + "loss": 16.622, + "step": 236770 + }, + { + "epoch": 0.47831058068738713, + "grad_norm": 131.1770782470703, + "learning_rate": 6.318355138730735e-06, + "loss": 23.3666, + "step": 236780 + }, + { + "epoch": 0.47833078132007095, + "grad_norm": 239.27786254882812, + "learning_rate": 6.318018422178829e-06, + "loss": 23.2029, + "step": 236790 + }, + { + "epoch": 0.4783509819527548, + "grad_norm": 374.8211364746094, + "learning_rate": 6.317681699203065e-06, + "loss": 25.8021, + "step": 236800 + }, + { + "epoch": 0.4783711825854386, + "grad_norm": 264.9538269042969, + "learning_rate": 6.317344969805077e-06, + "loss": 18.8978, + "step": 236810 + }, + { + "epoch": 0.4783913832181224, + "grad_norm": 338.7409973144531, + "learning_rate": 6.317008233986509e-06, + "loss": 11.726, + "step": 236820 + }, + { + "epoch": 0.47841158385080623, + "grad_norm": 551.9564208984375, + "learning_rate": 6.316671491749005e-06, + "loss": 21.1559, + "step": 236830 + }, + { + "epoch": 0.47843178448349, + "grad_norm": 0.0, + "learning_rate": 6.316334743094201e-06, + "loss": 15.9817, + "step": 236840 + }, + { + "epoch": 0.4784519851161738, + "grad_norm": 500.169677734375, + "learning_rate": 6.315997988023744e-06, + "loss": 24.3848, + "step": 236850 + }, + { + "epoch": 0.47847218574885764, + "grad_norm": 380.1037902832031, + "learning_rate": 6.31566122653927e-06, + "loss": 21.1684, + "step": 236860 + }, + { + "epoch": 0.47849238638154146, + "grad_norm": 146.63589477539062, + "learning_rate": 6.315324458642424e-06, + "loss": 8.5004, + "step": 236870 + }, + { + "epoch": 0.4785125870142253, + "grad_norm": 357.74359130859375, + "learning_rate": 6.314987684334847e-06, + "loss": 33.0014, + "step": 236880 + }, + { + "epoch": 0.4785327876469091, + "grad_norm": 745.3784790039062, + "learning_rate": 6.314650903618178e-06, + "loss": 23.9109, + "step": 236890 + }, + { + "epoch": 0.4785529882795929, + "grad_norm": 444.4207458496094, + "learning_rate": 6.314314116494061e-06, + "loss": 12.2537, + "step": 236900 + }, + { + "epoch": 0.47857318891227674, + "grad_norm": 856.6294555664062, + "learning_rate": 6.313977322964136e-06, + "loss": 48.7711, + "step": 236910 + }, + { + "epoch": 0.47859338954496056, + "grad_norm": 329.8413391113281, + "learning_rate": 6.313640523030045e-06, + "loss": 19.5563, + "step": 236920 + }, + { + "epoch": 0.4786135901776444, + "grad_norm": 443.108642578125, + "learning_rate": 6.313303716693428e-06, + "loss": 19.4823, + "step": 236930 + }, + { + "epoch": 0.4786337908103282, + "grad_norm": 453.31756591796875, + "learning_rate": 6.3129669039559295e-06, + "loss": 15.88, + "step": 236940 + }, + { + "epoch": 0.478653991443012, + "grad_norm": 512.3207397460938, + "learning_rate": 6.312630084819189e-06, + "loss": 17.9663, + "step": 236950 + }, + { + "epoch": 0.47867419207569584, + "grad_norm": 206.0601043701172, + "learning_rate": 6.3122932592848495e-06, + "loss": 16.0576, + "step": 236960 + }, + { + "epoch": 0.4786943927083796, + "grad_norm": 483.4388122558594, + "learning_rate": 6.311956427354552e-06, + "loss": 33.9894, + "step": 236970 + }, + { + "epoch": 0.4787145933410634, + "grad_norm": 236.13270568847656, + "learning_rate": 6.311619589029937e-06, + "loss": 24.7015, + "step": 236980 + }, + { + "epoch": 0.47873479397374724, + "grad_norm": 624.0055541992188, + "learning_rate": 6.311282744312647e-06, + "loss": 18.0067, + "step": 236990 + }, + { + "epoch": 0.47875499460643106, + "grad_norm": 808.8917846679688, + "learning_rate": 6.310945893204324e-06, + "loss": 27.9448, + "step": 237000 + }, + { + "epoch": 0.4787751952391149, + "grad_norm": 343.2776184082031, + "learning_rate": 6.310609035706611e-06, + "loss": 18.2245, + "step": 237010 + }, + { + "epoch": 0.4787953958717987, + "grad_norm": 345.72296142578125, + "learning_rate": 6.310272171821145e-06, + "loss": 24.3908, + "step": 237020 + }, + { + "epoch": 0.4788155965044825, + "grad_norm": 198.91836547851562, + "learning_rate": 6.3099353015495766e-06, + "loss": 13.6703, + "step": 237030 + }, + { + "epoch": 0.47883579713716634, + "grad_norm": 339.0745849609375, + "learning_rate": 6.309598424893539e-06, + "loss": 13.3374, + "step": 237040 + }, + { + "epoch": 0.47885599776985016, + "grad_norm": 782.8482666015625, + "learning_rate": 6.309261541854679e-06, + "loss": 23.3637, + "step": 237050 + }, + { + "epoch": 0.478876198402534, + "grad_norm": 586.35595703125, + "learning_rate": 6.308924652434636e-06, + "loss": 19.0398, + "step": 237060 + }, + { + "epoch": 0.4788963990352178, + "grad_norm": 368.9461364746094, + "learning_rate": 6.308587756635054e-06, + "loss": 20.1478, + "step": 237070 + }, + { + "epoch": 0.4789165996679016, + "grad_norm": 290.1398010253906, + "learning_rate": 6.308250854457572e-06, + "loss": 19.0407, + "step": 237080 + }, + { + "epoch": 0.47893680030058544, + "grad_norm": 19.14689826965332, + "learning_rate": 6.307913945903836e-06, + "loss": 18.9975, + "step": 237090 + }, + { + "epoch": 0.4789570009332692, + "grad_norm": 569.9417724609375, + "learning_rate": 6.307577030975485e-06, + "loss": 27.712, + "step": 237100 + }, + { + "epoch": 0.478977201565953, + "grad_norm": 1169.605224609375, + "learning_rate": 6.307240109674162e-06, + "loss": 25.0702, + "step": 237110 + }, + { + "epoch": 0.47899740219863685, + "grad_norm": 411.0592956542969, + "learning_rate": 6.3069031820015116e-06, + "loss": 48.7659, + "step": 237120 + }, + { + "epoch": 0.47901760283132067, + "grad_norm": 402.3951416015625, + "learning_rate": 6.306566247959169e-06, + "loss": 26.0601, + "step": 237130 + }, + { + "epoch": 0.4790378034640045, + "grad_norm": 657.840576171875, + "learning_rate": 6.3062293075487854e-06, + "loss": 29.0505, + "step": 237140 + }, + { + "epoch": 0.4790580040966883, + "grad_norm": 524.6390991210938, + "learning_rate": 6.305892360771997e-06, + "loss": 16.1245, + "step": 237150 + }, + { + "epoch": 0.4790782047293721, + "grad_norm": 527.5153198242188, + "learning_rate": 6.305555407630447e-06, + "loss": 30.8938, + "step": 237160 + }, + { + "epoch": 0.47909840536205595, + "grad_norm": 299.8638000488281, + "learning_rate": 6.3052184481257795e-06, + "loss": 23.2073, + "step": 237170 + }, + { + "epoch": 0.47911860599473977, + "grad_norm": 525.2657470703125, + "learning_rate": 6.304881482259634e-06, + "loss": 29.5828, + "step": 237180 + }, + { + "epoch": 0.4791388066274236, + "grad_norm": 337.80889892578125, + "learning_rate": 6.304544510033656e-06, + "loss": 17.2977, + "step": 237190 + }, + { + "epoch": 0.4791590072601074, + "grad_norm": 735.634765625, + "learning_rate": 6.304207531449486e-06, + "loss": 19.0968, + "step": 237200 + }, + { + "epoch": 0.4791792078927912, + "grad_norm": 121.38893127441406, + "learning_rate": 6.303870546508766e-06, + "loss": 19.7644, + "step": 237210 + }, + { + "epoch": 0.479199408525475, + "grad_norm": 702.9309692382812, + "learning_rate": 6.3035335552131395e-06, + "loss": 16.5914, + "step": 237220 + }, + { + "epoch": 0.4792196091581588, + "grad_norm": 433.74114990234375, + "learning_rate": 6.303196557564249e-06, + "loss": 13.2496, + "step": 237230 + }, + { + "epoch": 0.47923980979084263, + "grad_norm": 472.25042724609375, + "learning_rate": 6.302859553563736e-06, + "loss": 26.2887, + "step": 237240 + }, + { + "epoch": 0.47926001042352645, + "grad_norm": 454.13775634765625, + "learning_rate": 6.3025225432132434e-06, + "loss": 21.894, + "step": 237250 + }, + { + "epoch": 0.47928021105621027, + "grad_norm": 189.90338134765625, + "learning_rate": 6.302185526514413e-06, + "loss": 20.9103, + "step": 237260 + }, + { + "epoch": 0.4793004116888941, + "grad_norm": 306.26824951171875, + "learning_rate": 6.301848503468889e-06, + "loss": 17.7821, + "step": 237270 + }, + { + "epoch": 0.4793206123215779, + "grad_norm": 384.7181701660156, + "learning_rate": 6.301511474078315e-06, + "loss": 22.2573, + "step": 237280 + }, + { + "epoch": 0.47934081295426173, + "grad_norm": 432.1720275878906, + "learning_rate": 6.301174438344329e-06, + "loss": 19.971, + "step": 237290 + }, + { + "epoch": 0.47936101358694555, + "grad_norm": 407.7176513671875, + "learning_rate": 6.3008373962685785e-06, + "loss": 13.3768, + "step": 237300 + }, + { + "epoch": 0.47938121421962937, + "grad_norm": 49.56460952758789, + "learning_rate": 6.3005003478527036e-06, + "loss": 18.5751, + "step": 237310 + }, + { + "epoch": 0.4794014148523132, + "grad_norm": 408.12225341796875, + "learning_rate": 6.300163293098348e-06, + "loss": 28.2353, + "step": 237320 + }, + { + "epoch": 0.479421615484997, + "grad_norm": 376.2996826171875, + "learning_rate": 6.2998262320071546e-06, + "loss": 20.6713, + "step": 237330 + }, + { + "epoch": 0.47944181611768083, + "grad_norm": 380.1006164550781, + "learning_rate": 6.299489164580765e-06, + "loss": 17.1596, + "step": 237340 + }, + { + "epoch": 0.4794620167503646, + "grad_norm": 699.7108764648438, + "learning_rate": 6.2991520908208235e-06, + "loss": 27.1828, + "step": 237350 + }, + { + "epoch": 0.4794822173830484, + "grad_norm": 355.4895935058594, + "learning_rate": 6.298815010728972e-06, + "loss": 19.8443, + "step": 237360 + }, + { + "epoch": 0.47950241801573223, + "grad_norm": 225.16407775878906, + "learning_rate": 6.298477924306854e-06, + "loss": 13.6358, + "step": 237370 + }, + { + "epoch": 0.47952261864841605, + "grad_norm": 517.8504028320312, + "learning_rate": 6.298140831556112e-06, + "loss": 15.2491, + "step": 237380 + }, + { + "epoch": 0.4795428192810999, + "grad_norm": 415.0279235839844, + "learning_rate": 6.2978037324783894e-06, + "loss": 19.1637, + "step": 237390 + }, + { + "epoch": 0.4795630199137837, + "grad_norm": 187.1905975341797, + "learning_rate": 6.297466627075327e-06, + "loss": 18.4905, + "step": 237400 + }, + { + "epoch": 0.4795832205464675, + "grad_norm": 791.8858642578125, + "learning_rate": 6.2971295153485725e-06, + "loss": 19.4019, + "step": 237410 + }, + { + "epoch": 0.47960342117915133, + "grad_norm": 618.9378662109375, + "learning_rate": 6.296792397299764e-06, + "loss": 26.3401, + "step": 237420 + }, + { + "epoch": 0.47962362181183515, + "grad_norm": 310.3319091796875, + "learning_rate": 6.296455272930546e-06, + "loss": 38.8718, + "step": 237430 + }, + { + "epoch": 0.479643822444519, + "grad_norm": 164.62466430664062, + "learning_rate": 6.2961181422425645e-06, + "loss": 18.0805, + "step": 237440 + }, + { + "epoch": 0.4796640230772028, + "grad_norm": 294.67950439453125, + "learning_rate": 6.295781005237458e-06, + "loss": 15.5688, + "step": 237450 + }, + { + "epoch": 0.4796842237098866, + "grad_norm": 301.582275390625, + "learning_rate": 6.295443861916875e-06, + "loss": 21.2663, + "step": 237460 + }, + { + "epoch": 0.47970442434257043, + "grad_norm": 540.5686645507812, + "learning_rate": 6.2951067122824515e-06, + "loss": 25.5175, + "step": 237470 + }, + { + "epoch": 0.4797246249752542, + "grad_norm": 337.8367614746094, + "learning_rate": 6.294769556335839e-06, + "loss": 12.4853, + "step": 237480 + }, + { + "epoch": 0.479744825607938, + "grad_norm": 233.81678771972656, + "learning_rate": 6.294432394078675e-06, + "loss": 12.7851, + "step": 237490 + }, + { + "epoch": 0.47976502624062184, + "grad_norm": 697.8088989257812, + "learning_rate": 6.294095225512604e-06, + "loss": 18.0509, + "step": 237500 + }, + { + "epoch": 0.47978522687330566, + "grad_norm": 693.8338623046875, + "learning_rate": 6.293758050639272e-06, + "loss": 16.9721, + "step": 237510 + }, + { + "epoch": 0.4798054275059895, + "grad_norm": 637.6820068359375, + "learning_rate": 6.293420869460318e-06, + "loss": 18.5767, + "step": 237520 + }, + { + "epoch": 0.4798256281386733, + "grad_norm": 714.9810180664062, + "learning_rate": 6.2930836819773874e-06, + "loss": 22.6833, + "step": 237530 + }, + { + "epoch": 0.4798458287713571, + "grad_norm": 301.07733154296875, + "learning_rate": 6.292746488192125e-06, + "loss": 16.1072, + "step": 237540 + }, + { + "epoch": 0.47986602940404094, + "grad_norm": 62.74266815185547, + "learning_rate": 6.292409288106173e-06, + "loss": 13.7772, + "step": 237550 + }, + { + "epoch": 0.47988623003672476, + "grad_norm": 208.51495361328125, + "learning_rate": 6.292072081721173e-06, + "loss": 23.2082, + "step": 237560 + }, + { + "epoch": 0.4799064306694086, + "grad_norm": 435.7124938964844, + "learning_rate": 6.291734869038773e-06, + "loss": 16.1793, + "step": 237570 + }, + { + "epoch": 0.4799266313020924, + "grad_norm": 360.6304931640625, + "learning_rate": 6.291397650060613e-06, + "loss": 19.7922, + "step": 237580 + }, + { + "epoch": 0.4799468319347762, + "grad_norm": 36.94089889526367, + "learning_rate": 6.291060424788336e-06, + "loss": 19.1318, + "step": 237590 + }, + { + "epoch": 0.47996703256746004, + "grad_norm": 500.7837219238281, + "learning_rate": 6.290723193223589e-06, + "loss": 22.2277, + "step": 237600 + }, + { + "epoch": 0.4799872332001438, + "grad_norm": 213.74905395507812, + "learning_rate": 6.290385955368012e-06, + "loss": 29.0919, + "step": 237610 + }, + { + "epoch": 0.4800074338328276, + "grad_norm": 241.23556518554688, + "learning_rate": 6.2900487112232534e-06, + "loss": 23.703, + "step": 237620 + }, + { + "epoch": 0.48002763446551144, + "grad_norm": 177.02154541015625, + "learning_rate": 6.289711460790951e-06, + "loss": 7.9541, + "step": 237630 + }, + { + "epoch": 0.48004783509819526, + "grad_norm": 276.55133056640625, + "learning_rate": 6.289374204072752e-06, + "loss": 24.3936, + "step": 237640 + }, + { + "epoch": 0.4800680357308791, + "grad_norm": 375.5913391113281, + "learning_rate": 6.2890369410703e-06, + "loss": 18.9819, + "step": 237650 + }, + { + "epoch": 0.4800882363635629, + "grad_norm": 297.7219543457031, + "learning_rate": 6.2886996717852374e-06, + "loss": 23.8997, + "step": 237660 + }, + { + "epoch": 0.4801084369962467, + "grad_norm": 21.32270050048828, + "learning_rate": 6.28836239621921e-06, + "loss": 14.397, + "step": 237670 + }, + { + "epoch": 0.48012863762893054, + "grad_norm": 306.45330810546875, + "learning_rate": 6.288025114373862e-06, + "loss": 20.0237, + "step": 237680 + }, + { + "epoch": 0.48014883826161436, + "grad_norm": 252.57337951660156, + "learning_rate": 6.287687826250832e-06, + "loss": 11.4426, + "step": 237690 + }, + { + "epoch": 0.4801690388942982, + "grad_norm": 421.5823669433594, + "learning_rate": 6.28735053185177e-06, + "loss": 24.9128, + "step": 237700 + }, + { + "epoch": 0.480189239526982, + "grad_norm": 171.04920959472656, + "learning_rate": 6.287013231178316e-06, + "loss": 11.0669, + "step": 237710 + }, + { + "epoch": 0.4802094401596658, + "grad_norm": 303.7307434082031, + "learning_rate": 6.286675924232117e-06, + "loss": 19.8289, + "step": 237720 + }, + { + "epoch": 0.48022964079234964, + "grad_norm": 521.14453125, + "learning_rate": 6.286338611014817e-06, + "loss": 14.5308, + "step": 237730 + }, + { + "epoch": 0.4802498414250334, + "grad_norm": 226.71551513671875, + "learning_rate": 6.286001291528056e-06, + "loss": 14.6215, + "step": 237740 + }, + { + "epoch": 0.48027004205771723, + "grad_norm": 337.58154296875, + "learning_rate": 6.285663965773482e-06, + "loss": 30.0312, + "step": 237750 + }, + { + "epoch": 0.48029024269040105, + "grad_norm": 348.5039367675781, + "learning_rate": 6.285326633752737e-06, + "loss": 18.0105, + "step": 237760 + }, + { + "epoch": 0.48031044332308487, + "grad_norm": 0.5514386296272278, + "learning_rate": 6.284989295467466e-06, + "loss": 16.2173, + "step": 237770 + }, + { + "epoch": 0.4803306439557687, + "grad_norm": 97.46475219726562, + "learning_rate": 6.284651950919315e-06, + "loss": 18.553, + "step": 237780 + }, + { + "epoch": 0.4803508445884525, + "grad_norm": 512.9586181640625, + "learning_rate": 6.284314600109923e-06, + "loss": 23.7827, + "step": 237790 + }, + { + "epoch": 0.48037104522113633, + "grad_norm": 259.66802978515625, + "learning_rate": 6.28397724304094e-06, + "loss": 13.5792, + "step": 237800 + }, + { + "epoch": 0.48039124585382015, + "grad_norm": 293.5666198730469, + "learning_rate": 6.283639879714006e-06, + "loss": 32.8681, + "step": 237810 + }, + { + "epoch": 0.48041144648650397, + "grad_norm": 240.16566467285156, + "learning_rate": 6.283302510130768e-06, + "loss": 16.7174, + "step": 237820 + }, + { + "epoch": 0.4804316471191878, + "grad_norm": 550.2548217773438, + "learning_rate": 6.282965134292869e-06, + "loss": 14.6657, + "step": 237830 + }, + { + "epoch": 0.4804518477518716, + "grad_norm": 408.5176696777344, + "learning_rate": 6.282627752201953e-06, + "loss": 27.4887, + "step": 237840 + }, + { + "epoch": 0.48047204838455543, + "grad_norm": 0.1315782070159912, + "learning_rate": 6.2822903638596654e-06, + "loss": 14.9212, + "step": 237850 + }, + { + "epoch": 0.4804922490172392, + "grad_norm": 614.2935180664062, + "learning_rate": 6.28195296926765e-06, + "loss": 28.5032, + "step": 237860 + }, + { + "epoch": 0.480512449649923, + "grad_norm": 370.79547119140625, + "learning_rate": 6.281615568427551e-06, + "loss": 36.7822, + "step": 237870 + }, + { + "epoch": 0.48053265028260683, + "grad_norm": 413.24578857421875, + "learning_rate": 6.281278161341013e-06, + "loss": 17.2436, + "step": 237880 + }, + { + "epoch": 0.48055285091529065, + "grad_norm": 16.01764488220215, + "learning_rate": 6.280940748009682e-06, + "loss": 23.5995, + "step": 237890 + }, + { + "epoch": 0.4805730515479745, + "grad_norm": 494.7488098144531, + "learning_rate": 6.280603328435199e-06, + "loss": 18.618, + "step": 237900 + }, + { + "epoch": 0.4805932521806583, + "grad_norm": 82.51104736328125, + "learning_rate": 6.2802659026192124e-06, + "loss": 13.8566, + "step": 237910 + }, + { + "epoch": 0.4806134528133421, + "grad_norm": 429.82122802734375, + "learning_rate": 6.279928470563365e-06, + "loss": 18.2455, + "step": 237920 + }, + { + "epoch": 0.48063365344602593, + "grad_norm": 132.4311065673828, + "learning_rate": 6.2795910322693e-06, + "loss": 28.503, + "step": 237930 + }, + { + "epoch": 0.48065385407870975, + "grad_norm": 102.37493133544922, + "learning_rate": 6.279253587738664e-06, + "loss": 17.0488, + "step": 237940 + }, + { + "epoch": 0.4806740547113936, + "grad_norm": 705.259765625, + "learning_rate": 6.278916136973102e-06, + "loss": 22.8581, + "step": 237950 + }, + { + "epoch": 0.4806942553440774, + "grad_norm": 400.8526611328125, + "learning_rate": 6.278578679974259e-06, + "loss": 19.653, + "step": 237960 + }, + { + "epoch": 0.4807144559767612, + "grad_norm": 221.90017700195312, + "learning_rate": 6.278241216743777e-06, + "loss": 16.4257, + "step": 237970 + }, + { + "epoch": 0.48073465660944503, + "grad_norm": 318.0158996582031, + "learning_rate": 6.277903747283302e-06, + "loss": 19.8692, + "step": 237980 + }, + { + "epoch": 0.4807548572421288, + "grad_norm": 603.3626098632812, + "learning_rate": 6.277566271594478e-06, + "loss": 16.3699, + "step": 237990 + }, + { + "epoch": 0.4807750578748126, + "grad_norm": 581.0230712890625, + "learning_rate": 6.277228789678953e-06, + "loss": 25.7005, + "step": 238000 + }, + { + "epoch": 0.48079525850749644, + "grad_norm": 185.18804931640625, + "learning_rate": 6.2768913015383696e-06, + "loss": 19.0532, + "step": 238010 + }, + { + "epoch": 0.48081545914018026, + "grad_norm": 130.06170654296875, + "learning_rate": 6.276553807174373e-06, + "loss": 37.6985, + "step": 238020 + }, + { + "epoch": 0.4808356597728641, + "grad_norm": 544.4740600585938, + "learning_rate": 6.276216306588607e-06, + "loss": 14.2061, + "step": 238030 + }, + { + "epoch": 0.4808558604055479, + "grad_norm": 461.09722900390625, + "learning_rate": 6.275878799782719e-06, + "loss": 21.4767, + "step": 238040 + }, + { + "epoch": 0.4808760610382317, + "grad_norm": 212.8883819580078, + "learning_rate": 6.275541286758352e-06, + "loss": 20.6277, + "step": 238050 + }, + { + "epoch": 0.48089626167091554, + "grad_norm": 262.19769287109375, + "learning_rate": 6.2752037675171495e-06, + "loss": 27.4836, + "step": 238060 + }, + { + "epoch": 0.48091646230359936, + "grad_norm": 230.4353485107422, + "learning_rate": 6.274866242060761e-06, + "loss": 19.0922, + "step": 238070 + }, + { + "epoch": 0.4809366629362832, + "grad_norm": 290.68408203125, + "learning_rate": 6.274528710390826e-06, + "loss": 19.4623, + "step": 238080 + }, + { + "epoch": 0.480956863568967, + "grad_norm": 68.90953063964844, + "learning_rate": 6.274191172508996e-06, + "loss": 22.9101, + "step": 238090 + }, + { + "epoch": 0.4809770642016508, + "grad_norm": 164.39871215820312, + "learning_rate": 6.273853628416911e-06, + "loss": 11.4798, + "step": 238100 + }, + { + "epoch": 0.48099726483433464, + "grad_norm": 342.7237548828125, + "learning_rate": 6.273516078116218e-06, + "loss": 14.394, + "step": 238110 + }, + { + "epoch": 0.4810174654670184, + "grad_norm": 293.46392822265625, + "learning_rate": 6.273178521608564e-06, + "loss": 28.5658, + "step": 238120 + }, + { + "epoch": 0.4810376660997022, + "grad_norm": 436.5867004394531, + "learning_rate": 6.272840958895591e-06, + "loss": 14.1033, + "step": 238130 + }, + { + "epoch": 0.48105786673238604, + "grad_norm": 237.17486572265625, + "learning_rate": 6.272503389978945e-06, + "loss": 30.1534, + "step": 238140 + }, + { + "epoch": 0.48107806736506986, + "grad_norm": 598.215576171875, + "learning_rate": 6.272165814860272e-06, + "loss": 27.7402, + "step": 238150 + }, + { + "epoch": 0.4810982679977537, + "grad_norm": 324.96685791015625, + "learning_rate": 6.271828233541218e-06, + "loss": 15.9308, + "step": 238160 + }, + { + "epoch": 0.4811184686304375, + "grad_norm": 179.6580352783203, + "learning_rate": 6.271490646023426e-06, + "loss": 11.6563, + "step": 238170 + }, + { + "epoch": 0.4811386692631213, + "grad_norm": 430.6047058105469, + "learning_rate": 6.271153052308544e-06, + "loss": 19.4031, + "step": 238180 + }, + { + "epoch": 0.48115886989580514, + "grad_norm": 1718.493896484375, + "learning_rate": 6.2708154523982155e-06, + "loss": 45.0198, + "step": 238190 + }, + { + "epoch": 0.48117907052848896, + "grad_norm": 456.6887512207031, + "learning_rate": 6.270477846294086e-06, + "loss": 20.1995, + "step": 238200 + }, + { + "epoch": 0.4811992711611728, + "grad_norm": 291.59515380859375, + "learning_rate": 6.270140233997803e-06, + "loss": 11.6422, + "step": 238210 + }, + { + "epoch": 0.4812194717938566, + "grad_norm": 400.2921447753906, + "learning_rate": 6.269802615511009e-06, + "loss": 10.4124, + "step": 238220 + }, + { + "epoch": 0.4812396724265404, + "grad_norm": 87.04499816894531, + "learning_rate": 6.269464990835353e-06, + "loss": 11.0158, + "step": 238230 + }, + { + "epoch": 0.48125987305922424, + "grad_norm": 492.2239990234375, + "learning_rate": 6.2691273599724765e-06, + "loss": 17.3587, + "step": 238240 + }, + { + "epoch": 0.481280073691908, + "grad_norm": 536.57666015625, + "learning_rate": 6.268789722924029e-06, + "loss": 17.8366, + "step": 238250 + }, + { + "epoch": 0.4813002743245918, + "grad_norm": 226.37255859375, + "learning_rate": 6.268452079691654e-06, + "loss": 17.9656, + "step": 238260 + }, + { + "epoch": 0.48132047495727565, + "grad_norm": 184.16017150878906, + "learning_rate": 6.268114430276996e-06, + "loss": 20.7677, + "step": 238270 + }, + { + "epoch": 0.48134067558995947, + "grad_norm": 448.76898193359375, + "learning_rate": 6.267776774681703e-06, + "loss": 25.1282, + "step": 238280 + }, + { + "epoch": 0.4813608762226433, + "grad_norm": 783.249755859375, + "learning_rate": 6.26743911290742e-06, + "loss": 18.0381, + "step": 238290 + }, + { + "epoch": 0.4813810768553271, + "grad_norm": 144.61526489257812, + "learning_rate": 6.267101444955792e-06, + "loss": 28.28, + "step": 238300 + }, + { + "epoch": 0.4814012774880109, + "grad_norm": 57.12580490112305, + "learning_rate": 6.2667637708284655e-06, + "loss": 42.403, + "step": 238310 + }, + { + "epoch": 0.48142147812069475, + "grad_norm": 272.0799255371094, + "learning_rate": 6.266426090527087e-06, + "loss": 6.1611, + "step": 238320 + }, + { + "epoch": 0.48144167875337857, + "grad_norm": 225.3270263671875, + "learning_rate": 6.2660884040533e-06, + "loss": 54.5982, + "step": 238330 + }, + { + "epoch": 0.4814618793860624, + "grad_norm": 162.77500915527344, + "learning_rate": 6.2657507114087525e-06, + "loss": 16.513, + "step": 238340 + }, + { + "epoch": 0.4814820800187462, + "grad_norm": 381.93902587890625, + "learning_rate": 6.265413012595088e-06, + "loss": 26.8493, + "step": 238350 + }, + { + "epoch": 0.48150228065143, + "grad_norm": 395.8524475097656, + "learning_rate": 6.265075307613956e-06, + "loss": 24.0672, + "step": 238360 + }, + { + "epoch": 0.48152248128411385, + "grad_norm": 332.8500061035156, + "learning_rate": 6.264737596466999e-06, + "loss": 33.6149, + "step": 238370 + }, + { + "epoch": 0.4815426819167976, + "grad_norm": 264.1964416503906, + "learning_rate": 6.264399879155865e-06, + "loss": 24.1436, + "step": 238380 + }, + { + "epoch": 0.48156288254948143, + "grad_norm": 519.498779296875, + "learning_rate": 6.2640621556822e-06, + "loss": 15.3351, + "step": 238390 + }, + { + "epoch": 0.48158308318216525, + "grad_norm": 542.74365234375, + "learning_rate": 6.2637244260476474e-06, + "loss": 21.5293, + "step": 238400 + }, + { + "epoch": 0.48160328381484907, + "grad_norm": 609.6380615234375, + "learning_rate": 6.2633866902538564e-06, + "loss": 12.4965, + "step": 238410 + }, + { + "epoch": 0.4816234844475329, + "grad_norm": 614.1182250976562, + "learning_rate": 6.263048948302471e-06, + "loss": 21.9679, + "step": 238420 + }, + { + "epoch": 0.4816436850802167, + "grad_norm": 547.7650756835938, + "learning_rate": 6.26271120019514e-06, + "loss": 24.0661, + "step": 238430 + }, + { + "epoch": 0.48166388571290053, + "grad_norm": 548.20458984375, + "learning_rate": 6.262373445933506e-06, + "loss": 14.4791, + "step": 238440 + }, + { + "epoch": 0.48168408634558435, + "grad_norm": 257.6809997558594, + "learning_rate": 6.262035685519218e-06, + "loss": 17.3105, + "step": 238450 + }, + { + "epoch": 0.48170428697826817, + "grad_norm": 262.88385009765625, + "learning_rate": 6.261697918953922e-06, + "loss": 20.1521, + "step": 238460 + }, + { + "epoch": 0.481724487610952, + "grad_norm": 409.8096008300781, + "learning_rate": 6.261360146239261e-06, + "loss": 18.6579, + "step": 238470 + }, + { + "epoch": 0.4817446882436358, + "grad_norm": 352.5834045410156, + "learning_rate": 6.261022367376886e-06, + "loss": 14.1261, + "step": 238480 + }, + { + "epoch": 0.48176488887631963, + "grad_norm": 32.62371826171875, + "learning_rate": 6.260684582368439e-06, + "loss": 39.7104, + "step": 238490 + }, + { + "epoch": 0.4817850895090034, + "grad_norm": 437.8738708496094, + "learning_rate": 6.26034679121557e-06, + "loss": 19.1444, + "step": 238500 + }, + { + "epoch": 0.4818052901416872, + "grad_norm": 340.09320068359375, + "learning_rate": 6.260008993919922e-06, + "loss": 15.8661, + "step": 238510 + }, + { + "epoch": 0.48182549077437103, + "grad_norm": 305.18121337890625, + "learning_rate": 6.259671190483143e-06, + "loss": 13.8897, + "step": 238520 + }, + { + "epoch": 0.48184569140705485, + "grad_norm": 117.7383804321289, + "learning_rate": 6.25933338090688e-06, + "loss": 15.7199, + "step": 238530 + }, + { + "epoch": 0.4818658920397387, + "grad_norm": 441.84149169921875, + "learning_rate": 6.258995565192779e-06, + "loss": 16.5689, + "step": 238540 + }, + { + "epoch": 0.4818860926724225, + "grad_norm": 120.50872039794922, + "learning_rate": 6.258657743342486e-06, + "loss": 18.1548, + "step": 238550 + }, + { + "epoch": 0.4819062933051063, + "grad_norm": 168.04864501953125, + "learning_rate": 6.258319915357648e-06, + "loss": 18.8593, + "step": 238560 + }, + { + "epoch": 0.48192649393779013, + "grad_norm": 27.290313720703125, + "learning_rate": 6.257982081239912e-06, + "loss": 15.7458, + "step": 238570 + }, + { + "epoch": 0.48194669457047395, + "grad_norm": 677.3406982421875, + "learning_rate": 6.257644240990923e-06, + "loss": 21.2616, + "step": 238580 + }, + { + "epoch": 0.4819668952031578, + "grad_norm": 853.0466918945312, + "learning_rate": 6.257306394612328e-06, + "loss": 30.1345, + "step": 238590 + }, + { + "epoch": 0.4819870958358416, + "grad_norm": 482.6874084472656, + "learning_rate": 6.256968542105775e-06, + "loss": 19.709, + "step": 238600 + }, + { + "epoch": 0.4820072964685254, + "grad_norm": 713.67578125, + "learning_rate": 6.2566306834729095e-06, + "loss": 29.0597, + "step": 238610 + }, + { + "epoch": 0.48202749710120923, + "grad_norm": 72.65042114257812, + "learning_rate": 6.256292818715378e-06, + "loss": 29.3256, + "step": 238620 + }, + { + "epoch": 0.482047697733893, + "grad_norm": 707.7577514648438, + "learning_rate": 6.255954947834831e-06, + "loss": 13.887, + "step": 238630 + }, + { + "epoch": 0.4820678983665768, + "grad_norm": 587.9662475585938, + "learning_rate": 6.255617070832908e-06, + "loss": 22.2096, + "step": 238640 + }, + { + "epoch": 0.48208809899926064, + "grad_norm": 1038.5172119140625, + "learning_rate": 6.25527918771126e-06, + "loss": 42.1712, + "step": 238650 + }, + { + "epoch": 0.48210829963194446, + "grad_norm": 178.3859100341797, + "learning_rate": 6.254941298471535e-06, + "loss": 20.0805, + "step": 238660 + }, + { + "epoch": 0.4821285002646283, + "grad_norm": 423.32806396484375, + "learning_rate": 6.254603403115377e-06, + "loss": 25.6779, + "step": 238670 + }, + { + "epoch": 0.4821487008973121, + "grad_norm": 209.841552734375, + "learning_rate": 6.254265501644435e-06, + "loss": 16.6777, + "step": 238680 + }, + { + "epoch": 0.4821689015299959, + "grad_norm": 612.9212036132812, + "learning_rate": 6.253927594060354e-06, + "loss": 32.2635, + "step": 238690 + }, + { + "epoch": 0.48218910216267974, + "grad_norm": 688.3139038085938, + "learning_rate": 6.2535896803647845e-06, + "loss": 15.5334, + "step": 238700 + }, + { + "epoch": 0.48220930279536356, + "grad_norm": 152.21890258789062, + "learning_rate": 6.253251760559369e-06, + "loss": 18.4623, + "step": 238710 + }, + { + "epoch": 0.4822295034280474, + "grad_norm": 249.14146423339844, + "learning_rate": 6.252913834645757e-06, + "loss": 32.7701, + "step": 238720 + }, + { + "epoch": 0.4822497040607312, + "grad_norm": 576.8262329101562, + "learning_rate": 6.252575902625595e-06, + "loss": 17.6127, + "step": 238730 + }, + { + "epoch": 0.482269904693415, + "grad_norm": 291.13018798828125, + "learning_rate": 6.25223796450053e-06, + "loss": 10.3005, + "step": 238740 + }, + { + "epoch": 0.48229010532609884, + "grad_norm": 155.48092651367188, + "learning_rate": 6.251900020272208e-06, + "loss": 17.9134, + "step": 238750 + }, + { + "epoch": 0.4823103059587826, + "grad_norm": 942.543212890625, + "learning_rate": 6.2515620699422775e-06, + "loss": 31.9345, + "step": 238760 + }, + { + "epoch": 0.4823305065914664, + "grad_norm": 1022.882080078125, + "learning_rate": 6.2512241135123856e-06, + "loss": 22.4983, + "step": 238770 + }, + { + "epoch": 0.48235070722415024, + "grad_norm": 345.819091796875, + "learning_rate": 6.250886150984179e-06, + "loss": 18.4888, + "step": 238780 + }, + { + "epoch": 0.48237090785683406, + "grad_norm": 420.4615783691406, + "learning_rate": 6.2505481823593065e-06, + "loss": 21.7047, + "step": 238790 + }, + { + "epoch": 0.4823911084895179, + "grad_norm": 175.2515869140625, + "learning_rate": 6.250210207639411e-06, + "loss": 18.4462, + "step": 238800 + }, + { + "epoch": 0.4824113091222017, + "grad_norm": 582.9957275390625, + "learning_rate": 6.249872226826145e-06, + "loss": 13.5178, + "step": 238810 + }, + { + "epoch": 0.4824315097548855, + "grad_norm": 409.69757080078125, + "learning_rate": 6.249534239921154e-06, + "loss": 20.5632, + "step": 238820 + }, + { + "epoch": 0.48245171038756934, + "grad_norm": 496.896240234375, + "learning_rate": 6.24919624692608e-06, + "loss": 20.3248, + "step": 238830 + }, + { + "epoch": 0.48247191102025316, + "grad_norm": 838.0579223632812, + "learning_rate": 6.2488582478425795e-06, + "loss": 29.6529, + "step": 238840 + }, + { + "epoch": 0.482492111652937, + "grad_norm": 346.7917175292969, + "learning_rate": 6.2485202426722925e-06, + "loss": 38.4679, + "step": 238850 + }, + { + "epoch": 0.4825123122856208, + "grad_norm": 212.93829345703125, + "learning_rate": 6.248182231416872e-06, + "loss": 13.1648, + "step": 238860 + }, + { + "epoch": 0.4825325129183046, + "grad_norm": 1920.0400390625, + "learning_rate": 6.247844214077962e-06, + "loss": 33.3601, + "step": 238870 + }, + { + "epoch": 0.48255271355098844, + "grad_norm": 658.6197509765625, + "learning_rate": 6.247506190657209e-06, + "loss": 24.9037, + "step": 238880 + }, + { + "epoch": 0.4825729141836722, + "grad_norm": 281.5344543457031, + "learning_rate": 6.247168161156264e-06, + "loss": 20.6129, + "step": 238890 + }, + { + "epoch": 0.48259311481635603, + "grad_norm": 356.06890869140625, + "learning_rate": 6.24683012557677e-06, + "loss": 14.7911, + "step": 238900 + }, + { + "epoch": 0.48261331544903985, + "grad_norm": 580.4341430664062, + "learning_rate": 6.2464920839203805e-06, + "loss": 22.8333, + "step": 238910 + }, + { + "epoch": 0.48263351608172367, + "grad_norm": 341.4069519042969, + "learning_rate": 6.2461540361887386e-06, + "loss": 11.302, + "step": 238920 + }, + { + "epoch": 0.4826537167144075, + "grad_norm": 412.54620361328125, + "learning_rate": 6.245815982383492e-06, + "loss": 24.5794, + "step": 238930 + }, + { + "epoch": 0.4826739173470913, + "grad_norm": 245.6103515625, + "learning_rate": 6.24547792250629e-06, + "loss": 18.9905, + "step": 238940 + }, + { + "epoch": 0.48269411797977513, + "grad_norm": 530.7523193359375, + "learning_rate": 6.24513985655878e-06, + "loss": 29.9537, + "step": 238950 + }, + { + "epoch": 0.48271431861245895, + "grad_norm": 271.4397888183594, + "learning_rate": 6.244801784542609e-06, + "loss": 25.8615, + "step": 238960 + }, + { + "epoch": 0.48273451924514277, + "grad_norm": 275.6568603515625, + "learning_rate": 6.244463706459426e-06, + "loss": 16.2746, + "step": 238970 + }, + { + "epoch": 0.4827547198778266, + "grad_norm": 192.43406677246094, + "learning_rate": 6.244125622310877e-06, + "loss": 17.3146, + "step": 238980 + }, + { + "epoch": 0.4827749205105104, + "grad_norm": 382.0212097167969, + "learning_rate": 6.243787532098611e-06, + "loss": 15.6681, + "step": 238990 + }, + { + "epoch": 0.48279512114319423, + "grad_norm": 333.97064208984375, + "learning_rate": 6.243449435824276e-06, + "loss": 16.0727, + "step": 239000 + }, + { + "epoch": 0.48281532177587805, + "grad_norm": 186.2528533935547, + "learning_rate": 6.243111333489516e-06, + "loss": 5.797, + "step": 239010 + }, + { + "epoch": 0.4828355224085618, + "grad_norm": 275.0234680175781, + "learning_rate": 6.242773225095986e-06, + "loss": 15.1636, + "step": 239020 + }, + { + "epoch": 0.48285572304124563, + "grad_norm": 2234.659423828125, + "learning_rate": 6.242435110645328e-06, + "loss": 30.0221, + "step": 239030 + }, + { + "epoch": 0.48287592367392945, + "grad_norm": 297.8598937988281, + "learning_rate": 6.242096990139192e-06, + "loss": 34.7394, + "step": 239040 + }, + { + "epoch": 0.4828961243066133, + "grad_norm": 379.6488952636719, + "learning_rate": 6.241758863579227e-06, + "loss": 38.5201, + "step": 239050 + }, + { + "epoch": 0.4829163249392971, + "grad_norm": 465.7540283203125, + "learning_rate": 6.241420730967079e-06, + "loss": 21.9894, + "step": 239060 + }, + { + "epoch": 0.4829365255719809, + "grad_norm": 590.3614501953125, + "learning_rate": 6.241082592304398e-06, + "loss": 18.3803, + "step": 239070 + }, + { + "epoch": 0.48295672620466473, + "grad_norm": 251.89111328125, + "learning_rate": 6.24074444759283e-06, + "loss": 18.2033, + "step": 239080 + }, + { + "epoch": 0.48297692683734855, + "grad_norm": 609.8746337890625, + "learning_rate": 6.240406296834024e-06, + "loss": 26.0109, + "step": 239090 + }, + { + "epoch": 0.4829971274700324, + "grad_norm": 150.3726806640625, + "learning_rate": 6.240068140029628e-06, + "loss": 18.8098, + "step": 239100 + }, + { + "epoch": 0.4830173281027162, + "grad_norm": 306.13739013671875, + "learning_rate": 6.2397299771812925e-06, + "loss": 22.8141, + "step": 239110 + }, + { + "epoch": 0.4830375287354, + "grad_norm": 483.5748596191406, + "learning_rate": 6.23939180829066e-06, + "loss": 17.9819, + "step": 239120 + }, + { + "epoch": 0.48305772936808383, + "grad_norm": 285.5124206542969, + "learning_rate": 6.239053633359384e-06, + "loss": 12.5764, + "step": 239130 + }, + { + "epoch": 0.4830779300007676, + "grad_norm": 551.8235473632812, + "learning_rate": 6.2387154523891115e-06, + "loss": 20.5377, + "step": 239140 + }, + { + "epoch": 0.4830981306334514, + "grad_norm": 1192.864013671875, + "learning_rate": 6.238377265381489e-06, + "loss": 33.758, + "step": 239150 + }, + { + "epoch": 0.48311833126613524, + "grad_norm": 126.6272201538086, + "learning_rate": 6.2380390723381666e-06, + "loss": 11.3716, + "step": 239160 + }, + { + "epoch": 0.48313853189881906, + "grad_norm": 402.6373291015625, + "learning_rate": 6.23770087326079e-06, + "loss": 14.0015, + "step": 239170 + }, + { + "epoch": 0.4831587325315029, + "grad_norm": 408.8807067871094, + "learning_rate": 6.237362668151013e-06, + "loss": 20.2099, + "step": 239180 + }, + { + "epoch": 0.4831789331641867, + "grad_norm": 1.8079912662506104, + "learning_rate": 6.237024457010478e-06, + "loss": 21.0077, + "step": 239190 + }, + { + "epoch": 0.4831991337968705, + "grad_norm": 341.6095275878906, + "learning_rate": 6.236686239840836e-06, + "loss": 22.3597, + "step": 239200 + }, + { + "epoch": 0.48321933442955434, + "grad_norm": 251.46356201171875, + "learning_rate": 6.236348016643735e-06, + "loss": 19.5792, + "step": 239210 + }, + { + "epoch": 0.48323953506223816, + "grad_norm": 217.58238220214844, + "learning_rate": 6.236009787420824e-06, + "loss": 19.7335, + "step": 239220 + }, + { + "epoch": 0.483259735694922, + "grad_norm": 21.656570434570312, + "learning_rate": 6.235671552173752e-06, + "loss": 18.0882, + "step": 239230 + }, + { + "epoch": 0.4832799363276058, + "grad_norm": 303.45770263671875, + "learning_rate": 6.2353333109041655e-06, + "loss": 14.4671, + "step": 239240 + }, + { + "epoch": 0.4833001369602896, + "grad_norm": 430.2806701660156, + "learning_rate": 6.234995063613716e-06, + "loss": 17.2651, + "step": 239250 + }, + { + "epoch": 0.48332033759297344, + "grad_norm": 926.8755493164062, + "learning_rate": 6.234656810304048e-06, + "loss": 26.6614, + "step": 239260 + }, + { + "epoch": 0.4833405382256572, + "grad_norm": 159.474853515625, + "learning_rate": 6.234318550976815e-06, + "loss": 17.417, + "step": 239270 + }, + { + "epoch": 0.483360738858341, + "grad_norm": 194.66876220703125, + "learning_rate": 6.233980285633661e-06, + "loss": 19.7228, + "step": 239280 + }, + { + "epoch": 0.48338093949102484, + "grad_norm": 384.2812194824219, + "learning_rate": 6.233642014276238e-06, + "loss": 20.2494, + "step": 239290 + }, + { + "epoch": 0.48340114012370866, + "grad_norm": 193.2677001953125, + "learning_rate": 6.233303736906193e-06, + "loss": 24.9815, + "step": 239300 + }, + { + "epoch": 0.4834213407563925, + "grad_norm": 496.50653076171875, + "learning_rate": 6.232965453525175e-06, + "loss": 21.5377, + "step": 239310 + }, + { + "epoch": 0.4834415413890763, + "grad_norm": 177.0562286376953, + "learning_rate": 6.2326271641348325e-06, + "loss": 13.9061, + "step": 239320 + }, + { + "epoch": 0.4834617420217601, + "grad_norm": 369.60211181640625, + "learning_rate": 6.232288868736816e-06, + "loss": 11.1647, + "step": 239330 + }, + { + "epoch": 0.48348194265444394, + "grad_norm": 730.1735229492188, + "learning_rate": 6.231950567332773e-06, + "loss": 18.1089, + "step": 239340 + }, + { + "epoch": 0.48350214328712776, + "grad_norm": 169.8363494873047, + "learning_rate": 6.231612259924351e-06, + "loss": 10.8247, + "step": 239350 + }, + { + "epoch": 0.4835223439198116, + "grad_norm": 234.20614624023438, + "learning_rate": 6.231273946513201e-06, + "loss": 15.2445, + "step": 239360 + }, + { + "epoch": 0.4835425445524954, + "grad_norm": 383.68255615234375, + "learning_rate": 6.23093562710097e-06, + "loss": 40.6892, + "step": 239370 + }, + { + "epoch": 0.4835627451851792, + "grad_norm": 396.31842041015625, + "learning_rate": 6.23059730168931e-06, + "loss": 26.1263, + "step": 239380 + }, + { + "epoch": 0.48358294581786304, + "grad_norm": 251.06167602539062, + "learning_rate": 6.230258970279867e-06, + "loss": 10.0616, + "step": 239390 + }, + { + "epoch": 0.4836031464505468, + "grad_norm": 24.388273239135742, + "learning_rate": 6.229920632874291e-06, + "loss": 26.5303, + "step": 239400 + }, + { + "epoch": 0.4836233470832306, + "grad_norm": 123.3339614868164, + "learning_rate": 6.229582289474231e-06, + "loss": 23.5905, + "step": 239410 + }, + { + "epoch": 0.48364354771591445, + "grad_norm": 373.2325134277344, + "learning_rate": 6.229243940081336e-06, + "loss": 28.0521, + "step": 239420 + }, + { + "epoch": 0.48366374834859827, + "grad_norm": 304.2738952636719, + "learning_rate": 6.228905584697254e-06, + "loss": 21.776, + "step": 239430 + }, + { + "epoch": 0.4836839489812821, + "grad_norm": 659.3280029296875, + "learning_rate": 6.228567223323637e-06, + "loss": 17.784, + "step": 239440 + }, + { + "epoch": 0.4837041496139659, + "grad_norm": 370.91400146484375, + "learning_rate": 6.228228855962133e-06, + "loss": 15.5512, + "step": 239450 + }, + { + "epoch": 0.4837243502466497, + "grad_norm": 281.05889892578125, + "learning_rate": 6.227890482614388e-06, + "loss": 27.7278, + "step": 239460 + }, + { + "epoch": 0.48374455087933355, + "grad_norm": 1949.37158203125, + "learning_rate": 6.227552103282056e-06, + "loss": 39.1714, + "step": 239470 + }, + { + "epoch": 0.48376475151201737, + "grad_norm": 98.6017074584961, + "learning_rate": 6.227213717966782e-06, + "loss": 18.2789, + "step": 239480 + }, + { + "epoch": 0.4837849521447012, + "grad_norm": 112.09862518310547, + "learning_rate": 6.226875326670218e-06, + "loss": 9.7645, + "step": 239490 + }, + { + "epoch": 0.483805152777385, + "grad_norm": 549.0416259765625, + "learning_rate": 6.2265369293940135e-06, + "loss": 20.0345, + "step": 239500 + }, + { + "epoch": 0.4838253534100688, + "grad_norm": 436.4242858886719, + "learning_rate": 6.226198526139815e-06, + "loss": 23.6544, + "step": 239510 + }, + { + "epoch": 0.48384555404275265, + "grad_norm": 631.5547485351562, + "learning_rate": 6.225860116909276e-06, + "loss": 17.9584, + "step": 239520 + }, + { + "epoch": 0.4838657546754364, + "grad_norm": 726.14794921875, + "learning_rate": 6.225521701704042e-06, + "loss": 21.1192, + "step": 239530 + }, + { + "epoch": 0.48388595530812023, + "grad_norm": 714.0602416992188, + "learning_rate": 6.225183280525763e-06, + "loss": 22.2412, + "step": 239540 + }, + { + "epoch": 0.48390615594080405, + "grad_norm": 56.499000549316406, + "learning_rate": 6.2248448533760895e-06, + "loss": 16.5303, + "step": 239550 + }, + { + "epoch": 0.48392635657348787, + "grad_norm": 0.0, + "learning_rate": 6.224506420256673e-06, + "loss": 16.9147, + "step": 239560 + }, + { + "epoch": 0.4839465572061717, + "grad_norm": 402.64794921875, + "learning_rate": 6.2241679811691595e-06, + "loss": 16.0278, + "step": 239570 + }, + { + "epoch": 0.4839667578388555, + "grad_norm": 761.3997192382812, + "learning_rate": 6.223829536115198e-06, + "loss": 17.4052, + "step": 239580 + }, + { + "epoch": 0.48398695847153933, + "grad_norm": 267.6705627441406, + "learning_rate": 6.223491085096441e-06, + "loss": 19.644, + "step": 239590 + }, + { + "epoch": 0.48400715910422315, + "grad_norm": 468.30657958984375, + "learning_rate": 6.223152628114537e-06, + "loss": 13.8627, + "step": 239600 + }, + { + "epoch": 0.48402735973690697, + "grad_norm": 818.8458862304688, + "learning_rate": 6.222814165171136e-06, + "loss": 28.966, + "step": 239610 + }, + { + "epoch": 0.4840475603695908, + "grad_norm": 420.2396240234375, + "learning_rate": 6.222475696267885e-06, + "loss": 14.0035, + "step": 239620 + }, + { + "epoch": 0.4840677610022746, + "grad_norm": 150.33786010742188, + "learning_rate": 6.222137221406439e-06, + "loss": 15.3682, + "step": 239630 + }, + { + "epoch": 0.48408796163495843, + "grad_norm": 581.35302734375, + "learning_rate": 6.221798740588442e-06, + "loss": 31.8241, + "step": 239640 + }, + { + "epoch": 0.48410816226764225, + "grad_norm": 177.21458435058594, + "learning_rate": 6.221460253815546e-06, + "loss": 14.8487, + "step": 239650 + }, + { + "epoch": 0.484128362900326, + "grad_norm": 217.68978881835938, + "learning_rate": 6.221121761089402e-06, + "loss": 23.5491, + "step": 239660 + }, + { + "epoch": 0.48414856353300983, + "grad_norm": 184.8630828857422, + "learning_rate": 6.220783262411658e-06, + "loss": 10.7315, + "step": 239670 + }, + { + "epoch": 0.48416876416569365, + "grad_norm": 1146.76904296875, + "learning_rate": 6.220444757783966e-06, + "loss": 21.2471, + "step": 239680 + }, + { + "epoch": 0.4841889647983775, + "grad_norm": 361.0469665527344, + "learning_rate": 6.220106247207972e-06, + "loss": 17.7106, + "step": 239690 + }, + { + "epoch": 0.4842091654310613, + "grad_norm": 628.46337890625, + "learning_rate": 6.219767730685329e-06, + "loss": 25.8659, + "step": 239700 + }, + { + "epoch": 0.4842293660637451, + "grad_norm": 135.90708923339844, + "learning_rate": 6.219429208217685e-06, + "loss": 15.7509, + "step": 239710 + }, + { + "epoch": 0.48424956669642893, + "grad_norm": 542.4644165039062, + "learning_rate": 6.219090679806694e-06, + "loss": 26.3743, + "step": 239720 + }, + { + "epoch": 0.48426976732911275, + "grad_norm": 179.53170776367188, + "learning_rate": 6.218752145453999e-06, + "loss": 26.2833, + "step": 239730 + }, + { + "epoch": 0.4842899679617966, + "grad_norm": 508.2477722167969, + "learning_rate": 6.218413605161258e-06, + "loss": 21.0299, + "step": 239740 + }, + { + "epoch": 0.4843101685944804, + "grad_norm": 378.9328918457031, + "learning_rate": 6.218075058930113e-06, + "loss": 10.2118, + "step": 239750 + }, + { + "epoch": 0.4843303692271642, + "grad_norm": 445.72991943359375, + "learning_rate": 6.217736506762219e-06, + "loss": 24.4518, + "step": 239760 + }, + { + "epoch": 0.48435056985984803, + "grad_norm": 657.2543334960938, + "learning_rate": 6.217397948659228e-06, + "loss": 29.7278, + "step": 239770 + }, + { + "epoch": 0.4843707704925318, + "grad_norm": 297.7481689453125, + "learning_rate": 6.217059384622782e-06, + "loss": 14.4522, + "step": 239780 + }, + { + "epoch": 0.4843909711252156, + "grad_norm": 375.36016845703125, + "learning_rate": 6.21672081465454e-06, + "loss": 19.0674, + "step": 239790 + }, + { + "epoch": 0.48441117175789944, + "grad_norm": 203.6333770751953, + "learning_rate": 6.216382238756147e-06, + "loss": 14.1113, + "step": 239800 + }, + { + "epoch": 0.48443137239058326, + "grad_norm": 638.8234252929688, + "learning_rate": 6.216043656929254e-06, + "loss": 25.8044, + "step": 239810 + }, + { + "epoch": 0.4844515730232671, + "grad_norm": 379.4268798828125, + "learning_rate": 6.215705069175513e-06, + "loss": 29.2119, + "step": 239820 + }, + { + "epoch": 0.4844717736559509, + "grad_norm": 475.16302490234375, + "learning_rate": 6.215366475496572e-06, + "loss": 25.8795, + "step": 239830 + }, + { + "epoch": 0.4844919742886347, + "grad_norm": 366.3997802734375, + "learning_rate": 6.215027875894082e-06, + "loss": 15.6054, + "step": 239840 + }, + { + "epoch": 0.48451217492131854, + "grad_norm": 191.73252868652344, + "learning_rate": 6.214689270369694e-06, + "loss": 14.7612, + "step": 239850 + }, + { + "epoch": 0.48453237555400236, + "grad_norm": 343.63763427734375, + "learning_rate": 6.214350658925058e-06, + "loss": 8.4783, + "step": 239860 + }, + { + "epoch": 0.4845525761866862, + "grad_norm": 418.5944519042969, + "learning_rate": 6.2140120415618235e-06, + "loss": 24.3871, + "step": 239870 + }, + { + "epoch": 0.48457277681937, + "grad_norm": 386.5837097167969, + "learning_rate": 6.213673418281643e-06, + "loss": 18.3405, + "step": 239880 + }, + { + "epoch": 0.4845929774520538, + "grad_norm": 754.9140014648438, + "learning_rate": 6.213334789086163e-06, + "loss": 22.8767, + "step": 239890 + }, + { + "epoch": 0.48461317808473764, + "grad_norm": 166.3883056640625, + "learning_rate": 6.212996153977038e-06, + "loss": 14.9799, + "step": 239900 + }, + { + "epoch": 0.4846333787174214, + "grad_norm": 257.137939453125, + "learning_rate": 6.212657512955916e-06, + "loss": 17.5402, + "step": 239910 + }, + { + "epoch": 0.4846535793501052, + "grad_norm": 306.62982177734375, + "learning_rate": 6.212318866024449e-06, + "loss": 22.9998, + "step": 239920 + }, + { + "epoch": 0.48467377998278904, + "grad_norm": 242.8882293701172, + "learning_rate": 6.211980213184287e-06, + "loss": 18.9199, + "step": 239930 + }, + { + "epoch": 0.48469398061547286, + "grad_norm": 327.93304443359375, + "learning_rate": 6.21164155443708e-06, + "loss": 9.0374, + "step": 239940 + }, + { + "epoch": 0.4847141812481567, + "grad_norm": 179.9681854248047, + "learning_rate": 6.21130288978448e-06, + "loss": 16.0011, + "step": 239950 + }, + { + "epoch": 0.4847343818808405, + "grad_norm": 571.282958984375, + "learning_rate": 6.210964219228135e-06, + "loss": 27.4455, + "step": 239960 + }, + { + "epoch": 0.4847545825135243, + "grad_norm": 372.53173828125, + "learning_rate": 6.2106255427697e-06, + "loss": 32.171, + "step": 239970 + }, + { + "epoch": 0.48477478314620814, + "grad_norm": 132.0978240966797, + "learning_rate": 6.21028686041082e-06, + "loss": 25.9145, + "step": 239980 + }, + { + "epoch": 0.48479498377889196, + "grad_norm": 34.037010192871094, + "learning_rate": 6.20994817215315e-06, + "loss": 9.1799, + "step": 239990 + }, + { + "epoch": 0.4848151844115758, + "grad_norm": 269.9684753417969, + "learning_rate": 6.209609477998339e-06, + "loss": 14.7286, + "step": 240000 + }, + { + "epoch": 0.4848353850442596, + "grad_norm": 61.488460540771484, + "learning_rate": 6.209270777948038e-06, + "loss": 9.3359, + "step": 240010 + }, + { + "epoch": 0.4848555856769434, + "grad_norm": 1023.6925048828125, + "learning_rate": 6.208932072003899e-06, + "loss": 26.4617, + "step": 240020 + }, + { + "epoch": 0.48487578630962724, + "grad_norm": 127.65845489501953, + "learning_rate": 6.208593360167571e-06, + "loss": 16.4814, + "step": 240030 + }, + { + "epoch": 0.484895986942311, + "grad_norm": 337.93463134765625, + "learning_rate": 6.208254642440705e-06, + "loss": 21.1301, + "step": 240040 + }, + { + "epoch": 0.48491618757499483, + "grad_norm": 431.2547607421875, + "learning_rate": 6.207915918824952e-06, + "loss": 29.0501, + "step": 240050 + }, + { + "epoch": 0.48493638820767865, + "grad_norm": 950.545166015625, + "learning_rate": 6.207577189321965e-06, + "loss": 20.9198, + "step": 240060 + }, + { + "epoch": 0.48495658884036247, + "grad_norm": 599.9775390625, + "learning_rate": 6.2072384539333914e-06, + "loss": 27.4738, + "step": 240070 + }, + { + "epoch": 0.4849767894730463, + "grad_norm": 252.0543975830078, + "learning_rate": 6.206899712660887e-06, + "loss": 26.734, + "step": 240080 + }, + { + "epoch": 0.4849969901057301, + "grad_norm": 203.58958435058594, + "learning_rate": 6.206560965506097e-06, + "loss": 21.193, + "step": 240090 + }, + { + "epoch": 0.48501719073841393, + "grad_norm": 541.8986206054688, + "learning_rate": 6.206222212470675e-06, + "loss": 20.3465, + "step": 240100 + }, + { + "epoch": 0.48503739137109775, + "grad_norm": 22.384321212768555, + "learning_rate": 6.205883453556274e-06, + "loss": 26.7709, + "step": 240110 + }, + { + "epoch": 0.48505759200378157, + "grad_norm": 559.81982421875, + "learning_rate": 6.205544688764542e-06, + "loss": 27.0594, + "step": 240120 + }, + { + "epoch": 0.4850777926364654, + "grad_norm": 466.8587951660156, + "learning_rate": 6.205205918097133e-06, + "loss": 21.3222, + "step": 240130 + }, + { + "epoch": 0.4850979932691492, + "grad_norm": 707.7942504882812, + "learning_rate": 6.204867141555695e-06, + "loss": 17.0808, + "step": 240140 + }, + { + "epoch": 0.48511819390183303, + "grad_norm": 265.2966613769531, + "learning_rate": 6.20452835914188e-06, + "loss": 28.1372, + "step": 240150 + }, + { + "epoch": 0.48513839453451685, + "grad_norm": 248.39361572265625, + "learning_rate": 6.204189570857342e-06, + "loss": 26.3115, + "step": 240160 + }, + { + "epoch": 0.4851585951672006, + "grad_norm": 284.2188720703125, + "learning_rate": 6.2038507767037295e-06, + "loss": 19.0415, + "step": 240170 + }, + { + "epoch": 0.48517879579988443, + "grad_norm": 466.6912536621094, + "learning_rate": 6.2035119766826935e-06, + "loss": 16.9674, + "step": 240180 + }, + { + "epoch": 0.48519899643256825, + "grad_norm": 381.26611328125, + "learning_rate": 6.203173170795887e-06, + "loss": 19.6884, + "step": 240190 + }, + { + "epoch": 0.4852191970652521, + "grad_norm": 532.6866455078125, + "learning_rate": 6.202834359044959e-06, + "loss": 17.3738, + "step": 240200 + }, + { + "epoch": 0.4852393976979359, + "grad_norm": 239.88803100585938, + "learning_rate": 6.2024955414315634e-06, + "loss": 13.0791, + "step": 240210 + }, + { + "epoch": 0.4852595983306197, + "grad_norm": 295.5515441894531, + "learning_rate": 6.202156717957351e-06, + "loss": 26.7166, + "step": 240220 + }, + { + "epoch": 0.48527979896330353, + "grad_norm": 314.4953918457031, + "learning_rate": 6.2018178886239695e-06, + "loss": 23.1536, + "step": 240230 + }, + { + "epoch": 0.48529999959598735, + "grad_norm": 420.4693908691406, + "learning_rate": 6.201479053433077e-06, + "loss": 17.1124, + "step": 240240 + }, + { + "epoch": 0.4853202002286712, + "grad_norm": 90.6326904296875, + "learning_rate": 6.2011402123863194e-06, + "loss": 25.9831, + "step": 240250 + }, + { + "epoch": 0.485340400861355, + "grad_norm": 519.2074584960938, + "learning_rate": 6.2008013654853505e-06, + "loss": 19.4891, + "step": 240260 + }, + { + "epoch": 0.4853606014940388, + "grad_norm": 337.56329345703125, + "learning_rate": 6.200462512731821e-06, + "loss": 14.9792, + "step": 240270 + }, + { + "epoch": 0.48538080212672263, + "grad_norm": 106.90280151367188, + "learning_rate": 6.200123654127383e-06, + "loss": 22.7102, + "step": 240280 + }, + { + "epoch": 0.4854010027594064, + "grad_norm": 149.78073120117188, + "learning_rate": 6.199784789673689e-06, + "loss": 23.2156, + "step": 240290 + }, + { + "epoch": 0.4854212033920902, + "grad_norm": 868.6480712890625, + "learning_rate": 6.199445919372388e-06, + "loss": 43.4462, + "step": 240300 + }, + { + "epoch": 0.48544140402477404, + "grad_norm": 400.4078063964844, + "learning_rate": 6.199107043225134e-06, + "loss": 12.8822, + "step": 240310 + }, + { + "epoch": 0.48546160465745786, + "grad_norm": 384.9847717285156, + "learning_rate": 6.198768161233577e-06, + "loss": 24.6104, + "step": 240320 + }, + { + "epoch": 0.4854818052901417, + "grad_norm": 174.50941467285156, + "learning_rate": 6.19842927339937e-06, + "loss": 24.4269, + "step": 240330 + }, + { + "epoch": 0.4855020059228255, + "grad_norm": 378.19415283203125, + "learning_rate": 6.198090379724163e-06, + "loss": 7.8148, + "step": 240340 + }, + { + "epoch": 0.4855222065555093, + "grad_norm": 239.51068115234375, + "learning_rate": 6.1977514802096105e-06, + "loss": 10.5392, + "step": 240350 + }, + { + "epoch": 0.48554240718819314, + "grad_norm": 537.36328125, + "learning_rate": 6.197412574857361e-06, + "loss": 17.7848, + "step": 240360 + }, + { + "epoch": 0.48556260782087696, + "grad_norm": 346.7205505371094, + "learning_rate": 6.197073663669069e-06, + "loss": 14.8186, + "step": 240370 + }, + { + "epoch": 0.4855828084535608, + "grad_norm": 267.4868469238281, + "learning_rate": 6.196734746646384e-06, + "loss": 26.4634, + "step": 240380 + }, + { + "epoch": 0.4856030090862446, + "grad_norm": 225.48048400878906, + "learning_rate": 6.19639582379096e-06, + "loss": 10.4006, + "step": 240390 + }, + { + "epoch": 0.4856232097189284, + "grad_norm": 335.59796142578125, + "learning_rate": 6.1960568951044475e-06, + "loss": 20.1079, + "step": 240400 + }, + { + "epoch": 0.48564341035161224, + "grad_norm": 686.9068603515625, + "learning_rate": 6.195717960588499e-06, + "loss": 17.1123, + "step": 240410 + }, + { + "epoch": 0.485663610984296, + "grad_norm": 449.8243103027344, + "learning_rate": 6.195379020244765e-06, + "loss": 16.0279, + "step": 240420 + }, + { + "epoch": 0.4856838116169798, + "grad_norm": 833.7225952148438, + "learning_rate": 6.195040074074899e-06, + "loss": 29.0912, + "step": 240430 + }, + { + "epoch": 0.48570401224966364, + "grad_norm": 206.11856079101562, + "learning_rate": 6.1947011220805535e-06, + "loss": 9.9603, + "step": 240440 + }, + { + "epoch": 0.48572421288234746, + "grad_norm": 652.8059692382812, + "learning_rate": 6.19436216426338e-06, + "loss": 23.0246, + "step": 240450 + }, + { + "epoch": 0.4857444135150313, + "grad_norm": 392.7476806640625, + "learning_rate": 6.194023200625029e-06, + "loss": 11.9411, + "step": 240460 + }, + { + "epoch": 0.4857646141477151, + "grad_norm": 333.5895690917969, + "learning_rate": 6.193684231167154e-06, + "loss": 21.2221, + "step": 240470 + }, + { + "epoch": 0.4857848147803989, + "grad_norm": 475.376708984375, + "learning_rate": 6.193345255891407e-06, + "loss": 22.862, + "step": 240480 + }, + { + "epoch": 0.48580501541308274, + "grad_norm": 651.0830688476562, + "learning_rate": 6.19300627479944e-06, + "loss": 17.0758, + "step": 240490 + }, + { + "epoch": 0.48582521604576656, + "grad_norm": 192.7631072998047, + "learning_rate": 6.192667287892905e-06, + "loss": 28.367, + "step": 240500 + }, + { + "epoch": 0.4858454166784504, + "grad_norm": 296.6648254394531, + "learning_rate": 6.192328295173455e-06, + "loss": 18.921, + "step": 240510 + }, + { + "epoch": 0.4858656173111342, + "grad_norm": 175.9440460205078, + "learning_rate": 6.191989296642741e-06, + "loss": 34.9542, + "step": 240520 + }, + { + "epoch": 0.485885817943818, + "grad_norm": 734.977783203125, + "learning_rate": 6.1916502923024145e-06, + "loss": 29.6474, + "step": 240530 + }, + { + "epoch": 0.48590601857650184, + "grad_norm": 586.5077514648438, + "learning_rate": 6.191311282154131e-06, + "loss": 26.4853, + "step": 240540 + }, + { + "epoch": 0.4859262192091856, + "grad_norm": 535.6624145507812, + "learning_rate": 6.1909722661995394e-06, + "loss": 14.4432, + "step": 240550 + }, + { + "epoch": 0.4859464198418694, + "grad_norm": 276.8780212402344, + "learning_rate": 6.190633244440295e-06, + "loss": 11.2932, + "step": 240560 + }, + { + "epoch": 0.48596662047455325, + "grad_norm": 98.0551528930664, + "learning_rate": 6.190294216878045e-06, + "loss": 11.7655, + "step": 240570 + }, + { + "epoch": 0.48598682110723707, + "grad_norm": 275.4180908203125, + "learning_rate": 6.189955183514449e-06, + "loss": 19.0767, + "step": 240580 + }, + { + "epoch": 0.4860070217399209, + "grad_norm": 438.4539489746094, + "learning_rate": 6.1896161443511546e-06, + "loss": 28.8467, + "step": 240590 + }, + { + "epoch": 0.4860272223726047, + "grad_norm": 488.70843505859375, + "learning_rate": 6.189277099389816e-06, + "loss": 31.3845, + "step": 240600 + }, + { + "epoch": 0.4860474230052885, + "grad_norm": 647.655029296875, + "learning_rate": 6.188938048632084e-06, + "loss": 23.5975, + "step": 240610 + }, + { + "epoch": 0.48606762363797235, + "grad_norm": 404.51593017578125, + "learning_rate": 6.188598992079613e-06, + "loss": 15.501, + "step": 240620 + }, + { + "epoch": 0.48608782427065617, + "grad_norm": 723.1797485351562, + "learning_rate": 6.188259929734054e-06, + "loss": 29.2327, + "step": 240630 + }, + { + "epoch": 0.48610802490334, + "grad_norm": 179.34674072265625, + "learning_rate": 6.187920861597061e-06, + "loss": 27.3343, + "step": 240640 + }, + { + "epoch": 0.4861282255360238, + "grad_norm": 1395.5333251953125, + "learning_rate": 6.187581787670285e-06, + "loss": 24.1373, + "step": 240650 + }, + { + "epoch": 0.4861484261687076, + "grad_norm": 304.010009765625, + "learning_rate": 6.18724270795538e-06, + "loss": 17.1675, + "step": 240660 + }, + { + "epoch": 0.48616862680139145, + "grad_norm": 1029.99658203125, + "learning_rate": 6.186903622453998e-06, + "loss": 26.6351, + "step": 240670 + }, + { + "epoch": 0.4861888274340752, + "grad_norm": 5.324008941650391, + "learning_rate": 6.18656453116779e-06, + "loss": 17.6654, + "step": 240680 + }, + { + "epoch": 0.48620902806675903, + "grad_norm": 240.3070068359375, + "learning_rate": 6.186225434098413e-06, + "loss": 38.494, + "step": 240690 + }, + { + "epoch": 0.48622922869944285, + "grad_norm": 137.70303344726562, + "learning_rate": 6.185886331247516e-06, + "loss": 11.0379, + "step": 240700 + }, + { + "epoch": 0.48624942933212667, + "grad_norm": 0.020675495266914368, + "learning_rate": 6.1855472226167525e-06, + "loss": 14.5225, + "step": 240710 + }, + { + "epoch": 0.4862696299648105, + "grad_norm": 137.741943359375, + "learning_rate": 6.185208108207776e-06, + "loss": 10.1962, + "step": 240720 + }, + { + "epoch": 0.4862898305974943, + "grad_norm": 576.734375, + "learning_rate": 6.184868988022238e-06, + "loss": 32.971, + "step": 240730 + }, + { + "epoch": 0.48631003123017813, + "grad_norm": 975.5730590820312, + "learning_rate": 6.184529862061794e-06, + "loss": 31.4935, + "step": 240740 + }, + { + "epoch": 0.48633023186286195, + "grad_norm": 686.66259765625, + "learning_rate": 6.184190730328095e-06, + "loss": 15.9249, + "step": 240750 + }, + { + "epoch": 0.48635043249554577, + "grad_norm": 419.7538146972656, + "learning_rate": 6.1838515928227925e-06, + "loss": 36.5501, + "step": 240760 + }, + { + "epoch": 0.4863706331282296, + "grad_norm": 249.2308807373047, + "learning_rate": 6.1835124495475415e-06, + "loss": 28.6281, + "step": 240770 + }, + { + "epoch": 0.4863908337609134, + "grad_norm": 269.2325744628906, + "learning_rate": 6.183173300503995e-06, + "loss": 14.3527, + "step": 240780 + }, + { + "epoch": 0.48641103439359723, + "grad_norm": 336.5515441894531, + "learning_rate": 6.182834145693805e-06, + "loss": 34.5664, + "step": 240790 + }, + { + "epoch": 0.48643123502628105, + "grad_norm": 146.33477783203125, + "learning_rate": 6.182494985118625e-06, + "loss": 13.8386, + "step": 240800 + }, + { + "epoch": 0.4864514356589648, + "grad_norm": 209.32386779785156, + "learning_rate": 6.182155818780107e-06, + "loss": 21.1546, + "step": 240810 + }, + { + "epoch": 0.48647163629164863, + "grad_norm": 245.36766052246094, + "learning_rate": 6.181816646679904e-06, + "loss": 22.4551, + "step": 240820 + }, + { + "epoch": 0.48649183692433245, + "grad_norm": 66.82134246826172, + "learning_rate": 6.181477468819673e-06, + "loss": 6.197, + "step": 240830 + }, + { + "epoch": 0.4865120375570163, + "grad_norm": 560.7149047851562, + "learning_rate": 6.181138285201062e-06, + "loss": 30.5613, + "step": 240840 + }, + { + "epoch": 0.4865322381897001, + "grad_norm": 463.9530944824219, + "learning_rate": 6.180799095825727e-06, + "loss": 11.7607, + "step": 240850 + }, + { + "epoch": 0.4865524388223839, + "grad_norm": 480.4827575683594, + "learning_rate": 6.18045990069532e-06, + "loss": 18.891, + "step": 240860 + }, + { + "epoch": 0.48657263945506773, + "grad_norm": 701.4447021484375, + "learning_rate": 6.180120699811495e-06, + "loss": 17.7352, + "step": 240870 + }, + { + "epoch": 0.48659284008775155, + "grad_norm": 586.0391845703125, + "learning_rate": 6.179781493175906e-06, + "loss": 17.9909, + "step": 240880 + }, + { + "epoch": 0.4866130407204354, + "grad_norm": 673.2279663085938, + "learning_rate": 6.179442280790202e-06, + "loss": 24.3892, + "step": 240890 + }, + { + "epoch": 0.4866332413531192, + "grad_norm": 284.8859558105469, + "learning_rate": 6.179103062656042e-06, + "loss": 26.0115, + "step": 240900 + }, + { + "epoch": 0.486653441985803, + "grad_norm": 129.76742553710938, + "learning_rate": 6.178763838775076e-06, + "loss": 12.9221, + "step": 240910 + }, + { + "epoch": 0.48667364261848683, + "grad_norm": 338.1567687988281, + "learning_rate": 6.178424609148957e-06, + "loss": 13.301, + "step": 240920 + }, + { + "epoch": 0.4866938432511706, + "grad_norm": 299.5407409667969, + "learning_rate": 6.178085373779341e-06, + "loss": 20.4077, + "step": 240930 + }, + { + "epoch": 0.4867140438838544, + "grad_norm": 341.9033508300781, + "learning_rate": 6.17774613266788e-06, + "loss": 12.6328, + "step": 240940 + }, + { + "epoch": 0.48673424451653824, + "grad_norm": 285.49285888671875, + "learning_rate": 6.177406885816224e-06, + "loss": 30.9322, + "step": 240950 + }, + { + "epoch": 0.48675444514922206, + "grad_norm": 589.399658203125, + "learning_rate": 6.177067633226034e-06, + "loss": 20.0082, + "step": 240960 + }, + { + "epoch": 0.4867746457819059, + "grad_norm": 269.603271484375, + "learning_rate": 6.1767283748989555e-06, + "loss": 20.4365, + "step": 240970 + }, + { + "epoch": 0.4867948464145897, + "grad_norm": 522.3572387695312, + "learning_rate": 6.176389110836647e-06, + "loss": 35.5067, + "step": 240980 + }, + { + "epoch": 0.4868150470472735, + "grad_norm": 204.86964416503906, + "learning_rate": 6.176049841040762e-06, + "loss": 21.5063, + "step": 240990 + }, + { + "epoch": 0.48683524767995734, + "grad_norm": 370.3238525390625, + "learning_rate": 6.17571056551295e-06, + "loss": 16.0572, + "step": 241000 + }, + { + "epoch": 0.48685544831264116, + "grad_norm": 608.7770385742188, + "learning_rate": 6.1753712842548695e-06, + "loss": 21.5328, + "step": 241010 + }, + { + "epoch": 0.486875648945325, + "grad_norm": 394.47198486328125, + "learning_rate": 6.175031997268171e-06, + "loss": 21.9332, + "step": 241020 + }, + { + "epoch": 0.4868958495780088, + "grad_norm": 174.228271484375, + "learning_rate": 6.174692704554509e-06, + "loss": 9.0828, + "step": 241030 + }, + { + "epoch": 0.4869160502106926, + "grad_norm": 536.2742309570312, + "learning_rate": 6.174353406115537e-06, + "loss": 34.6836, + "step": 241040 + }, + { + "epoch": 0.48693625084337644, + "grad_norm": 281.7669677734375, + "learning_rate": 6.17401410195291e-06, + "loss": 33.572, + "step": 241050 + }, + { + "epoch": 0.4869564514760602, + "grad_norm": 15.558895111083984, + "learning_rate": 6.17367479206828e-06, + "loss": 12.0758, + "step": 241060 + }, + { + "epoch": 0.486976652108744, + "grad_norm": 274.0773010253906, + "learning_rate": 6.173335476463303e-06, + "loss": 49.4286, + "step": 241070 + }, + { + "epoch": 0.48699685274142784, + "grad_norm": 286.81341552734375, + "learning_rate": 6.172996155139629e-06, + "loss": 14.5024, + "step": 241080 + }, + { + "epoch": 0.48701705337411166, + "grad_norm": 158.656494140625, + "learning_rate": 6.172656828098914e-06, + "loss": 20.9697, + "step": 241090 + }, + { + "epoch": 0.4870372540067955, + "grad_norm": 190.72592163085938, + "learning_rate": 6.172317495342812e-06, + "loss": 40.965, + "step": 241100 + }, + { + "epoch": 0.4870574546394793, + "grad_norm": 232.5026092529297, + "learning_rate": 6.171978156872978e-06, + "loss": 20.3812, + "step": 241110 + }, + { + "epoch": 0.4870776552721631, + "grad_norm": 78.13396453857422, + "learning_rate": 6.171638812691065e-06, + "loss": 28.8399, + "step": 241120 + }, + { + "epoch": 0.48709785590484694, + "grad_norm": 442.35137939453125, + "learning_rate": 6.171299462798725e-06, + "loss": 25.7488, + "step": 241130 + }, + { + "epoch": 0.48711805653753076, + "grad_norm": 113.7076416015625, + "learning_rate": 6.170960107197613e-06, + "loss": 10.6443, + "step": 241140 + }, + { + "epoch": 0.4871382571702146, + "grad_norm": 169.17816162109375, + "learning_rate": 6.1706207458893855e-06, + "loss": 13.0223, + "step": 241150 + }, + { + "epoch": 0.4871584578028984, + "grad_norm": 236.05850219726562, + "learning_rate": 6.170281378875692e-06, + "loss": 28.1193, + "step": 241160 + }, + { + "epoch": 0.4871786584355822, + "grad_norm": 343.2547302246094, + "learning_rate": 6.169942006158192e-06, + "loss": 22.3624, + "step": 241170 + }, + { + "epoch": 0.48719885906826604, + "grad_norm": 254.20347595214844, + "learning_rate": 6.169602627738533e-06, + "loss": 18.9047, + "step": 241180 + }, + { + "epoch": 0.4872190597009498, + "grad_norm": 315.96368408203125, + "learning_rate": 6.169263243618375e-06, + "loss": 10.5766, + "step": 241190 + }, + { + "epoch": 0.48723926033363363, + "grad_norm": 284.27606201171875, + "learning_rate": 6.168923853799369e-06, + "loss": 24.1813, + "step": 241200 + }, + { + "epoch": 0.48725946096631745, + "grad_norm": 629.637451171875, + "learning_rate": 6.16858445828317e-06, + "loss": 33.6193, + "step": 241210 + }, + { + "epoch": 0.48727966159900127, + "grad_norm": 573.7402954101562, + "learning_rate": 6.168245057071434e-06, + "loss": 26.8433, + "step": 241220 + }, + { + "epoch": 0.4872998622316851, + "grad_norm": 338.18426513671875, + "learning_rate": 6.167905650165811e-06, + "loss": 18.7747, + "step": 241230 + }, + { + "epoch": 0.4873200628643689, + "grad_norm": 467.04095458984375, + "learning_rate": 6.167566237567957e-06, + "loss": 30.4401, + "step": 241240 + }, + { + "epoch": 0.48734026349705273, + "grad_norm": 233.25152587890625, + "learning_rate": 6.1672268192795285e-06, + "loss": 15.6139, + "step": 241250 + }, + { + "epoch": 0.48736046412973655, + "grad_norm": 622.6288452148438, + "learning_rate": 6.166887395302177e-06, + "loss": 47.6286, + "step": 241260 + }, + { + "epoch": 0.48738066476242037, + "grad_norm": 102.83345031738281, + "learning_rate": 6.166547965637557e-06, + "loss": 20.3463, + "step": 241270 + }, + { + "epoch": 0.4874008653951042, + "grad_norm": 530.1830444335938, + "learning_rate": 6.166208530287327e-06, + "loss": 33.7022, + "step": 241280 + }, + { + "epoch": 0.487421066027788, + "grad_norm": 496.9211120605469, + "learning_rate": 6.165869089253134e-06, + "loss": 17.6051, + "step": 241290 + }, + { + "epoch": 0.48744126666047183, + "grad_norm": 454.7920837402344, + "learning_rate": 6.16552964253664e-06, + "loss": 13.1026, + "step": 241300 + }, + { + "epoch": 0.48746146729315565, + "grad_norm": 367.9190979003906, + "learning_rate": 6.165190190139494e-06, + "loss": 12.3061, + "step": 241310 + }, + { + "epoch": 0.4874816679258394, + "grad_norm": 107.29808807373047, + "learning_rate": 6.164850732063352e-06, + "loss": 15.9814, + "step": 241320 + }, + { + "epoch": 0.48750186855852323, + "grad_norm": 325.2901611328125, + "learning_rate": 6.164511268309871e-06, + "loss": 13.5714, + "step": 241330 + }, + { + "epoch": 0.48752206919120705, + "grad_norm": 538.7465209960938, + "learning_rate": 6.1641717988807006e-06, + "loss": 20.7551, + "step": 241340 + }, + { + "epoch": 0.4875422698238909, + "grad_norm": 165.80274963378906, + "learning_rate": 6.163832323777499e-06, + "loss": 20.0998, + "step": 241350 + }, + { + "epoch": 0.4875624704565747, + "grad_norm": 793.2183837890625, + "learning_rate": 6.16349284300192e-06, + "loss": 21.3794, + "step": 241360 + }, + { + "epoch": 0.4875826710892585, + "grad_norm": 228.20997619628906, + "learning_rate": 6.1631533565556175e-06, + "loss": 17.5305, + "step": 241370 + }, + { + "epoch": 0.48760287172194233, + "grad_norm": 377.90380859375, + "learning_rate": 6.162813864440247e-06, + "loss": 10.1957, + "step": 241380 + }, + { + "epoch": 0.48762307235462615, + "grad_norm": 180.87107849121094, + "learning_rate": 6.162474366657464e-06, + "loss": 16.6347, + "step": 241390 + }, + { + "epoch": 0.48764327298731, + "grad_norm": 209.7317352294922, + "learning_rate": 6.1621348632089205e-06, + "loss": 24.1603, + "step": 241400 + }, + { + "epoch": 0.4876634736199938, + "grad_norm": 400.8587951660156, + "learning_rate": 6.161795354096273e-06, + "loss": 32.7475, + "step": 241410 + }, + { + "epoch": 0.4876836742526776, + "grad_norm": 380.6274719238281, + "learning_rate": 6.161455839321175e-06, + "loss": 17.9665, + "step": 241420 + }, + { + "epoch": 0.48770387488536143, + "grad_norm": 490.2958984375, + "learning_rate": 6.161116318885283e-06, + "loss": 22.4291, + "step": 241430 + }, + { + "epoch": 0.48772407551804525, + "grad_norm": 253.66799926757812, + "learning_rate": 6.160776792790252e-06, + "loss": 10.5034, + "step": 241440 + }, + { + "epoch": 0.487744276150729, + "grad_norm": 435.06976318359375, + "learning_rate": 6.1604372610377335e-06, + "loss": 8.3813, + "step": 241450 + }, + { + "epoch": 0.48776447678341284, + "grad_norm": 684.455322265625, + "learning_rate": 6.160097723629387e-06, + "loss": 13.18, + "step": 241460 + }, + { + "epoch": 0.48778467741609666, + "grad_norm": 356.3554382324219, + "learning_rate": 6.159758180566863e-06, + "loss": 14.033, + "step": 241470 + }, + { + "epoch": 0.4878048780487805, + "grad_norm": 360.5289306640625, + "learning_rate": 6.159418631851818e-06, + "loss": 24.9207, + "step": 241480 + }, + { + "epoch": 0.4878250786814643, + "grad_norm": 573.3186645507812, + "learning_rate": 6.159079077485909e-06, + "loss": 22.123, + "step": 241490 + }, + { + "epoch": 0.4878452793141481, + "grad_norm": 21.944501876831055, + "learning_rate": 6.158739517470786e-06, + "loss": 10.3445, + "step": 241500 + }, + { + "epoch": 0.48786547994683194, + "grad_norm": 457.6626281738281, + "learning_rate": 6.158399951808111e-06, + "loss": 12.3422, + "step": 241510 + }, + { + "epoch": 0.48788568057951576, + "grad_norm": 420.6189270019531, + "learning_rate": 6.158060380499533e-06, + "loss": 11.9613, + "step": 241520 + }, + { + "epoch": 0.4879058812121996, + "grad_norm": 453.964111328125, + "learning_rate": 6.1577208035467095e-06, + "loss": 21.848, + "step": 241530 + }, + { + "epoch": 0.4879260818448834, + "grad_norm": 832.23486328125, + "learning_rate": 6.157381220951295e-06, + "loss": 25.5689, + "step": 241540 + }, + { + "epoch": 0.4879462824775672, + "grad_norm": 364.0274963378906, + "learning_rate": 6.157041632714945e-06, + "loss": 20.257, + "step": 241550 + }, + { + "epoch": 0.48796648311025104, + "grad_norm": 420.5585632324219, + "learning_rate": 6.1567020388393155e-06, + "loss": 59.8669, + "step": 241560 + }, + { + "epoch": 0.4879866837429348, + "grad_norm": 215.24520874023438, + "learning_rate": 6.156362439326059e-06, + "loss": 16.7804, + "step": 241570 + }, + { + "epoch": 0.4880068843756186, + "grad_norm": 529.3853149414062, + "learning_rate": 6.156022834176832e-06, + "loss": 19.6015, + "step": 241580 + }, + { + "epoch": 0.48802708500830244, + "grad_norm": 113.60696411132812, + "learning_rate": 6.155683223393291e-06, + "loss": 12.9144, + "step": 241590 + }, + { + "epoch": 0.48804728564098626, + "grad_norm": 218.59947204589844, + "learning_rate": 6.155343606977091e-06, + "loss": 18.9772, + "step": 241600 + }, + { + "epoch": 0.4880674862736701, + "grad_norm": 187.24163818359375, + "learning_rate": 6.155003984929883e-06, + "loss": 12.2726, + "step": 241610 + }, + { + "epoch": 0.4880876869063539, + "grad_norm": 284.59564208984375, + "learning_rate": 6.15466435725333e-06, + "loss": 9.7764, + "step": 241620 + }, + { + "epoch": 0.4881078875390377, + "grad_norm": 40.67657470703125, + "learning_rate": 6.154324723949079e-06, + "loss": 11.5925, + "step": 241630 + }, + { + "epoch": 0.48812808817172154, + "grad_norm": 166.55136108398438, + "learning_rate": 6.153985085018792e-06, + "loss": 29.3433, + "step": 241640 + }, + { + "epoch": 0.48814828880440536, + "grad_norm": 546.1761474609375, + "learning_rate": 6.15364544046412e-06, + "loss": 36.4613, + "step": 241650 + }, + { + "epoch": 0.4881684894370892, + "grad_norm": 199.06268310546875, + "learning_rate": 6.153305790286721e-06, + "loss": 29.4845, + "step": 241660 + }, + { + "epoch": 0.488188690069773, + "grad_norm": 409.25390625, + "learning_rate": 6.15296613448825e-06, + "loss": 15.4448, + "step": 241670 + }, + { + "epoch": 0.4882088907024568, + "grad_norm": 119.15308380126953, + "learning_rate": 6.152626473070361e-06, + "loss": 23.0436, + "step": 241680 + }, + { + "epoch": 0.48822909133514064, + "grad_norm": 1010.7807006835938, + "learning_rate": 6.152286806034711e-06, + "loss": 29.5118, + "step": 241690 + }, + { + "epoch": 0.4882492919678244, + "grad_norm": 545.5733032226562, + "learning_rate": 6.151947133382954e-06, + "loss": 24.7407, + "step": 241700 + }, + { + "epoch": 0.4882694926005082, + "grad_norm": 398.5425109863281, + "learning_rate": 6.151607455116746e-06, + "loss": 18.063, + "step": 241710 + }, + { + "epoch": 0.48828969323319205, + "grad_norm": 266.5617370605469, + "learning_rate": 6.1512677712377435e-06, + "loss": 23.8374, + "step": 241720 + }, + { + "epoch": 0.48830989386587587, + "grad_norm": 431.0079040527344, + "learning_rate": 6.150928081747603e-06, + "loss": 31.6731, + "step": 241730 + }, + { + "epoch": 0.4883300944985597, + "grad_norm": 576.68212890625, + "learning_rate": 6.150588386647977e-06, + "loss": 19.0013, + "step": 241740 + }, + { + "epoch": 0.4883502951312435, + "grad_norm": 172.9222412109375, + "learning_rate": 6.150248685940523e-06, + "loss": 21.7811, + "step": 241750 + }, + { + "epoch": 0.4883704957639273, + "grad_norm": 424.4622802734375, + "learning_rate": 6.149908979626897e-06, + "loss": 19.5431, + "step": 241760 + }, + { + "epoch": 0.48839069639661115, + "grad_norm": 827.7529907226562, + "learning_rate": 6.149569267708752e-06, + "loss": 34.5501, + "step": 241770 + }, + { + "epoch": 0.48841089702929497, + "grad_norm": 297.3861083984375, + "learning_rate": 6.149229550187748e-06, + "loss": 24.225, + "step": 241780 + }, + { + "epoch": 0.4884310976619788, + "grad_norm": 812.0133666992188, + "learning_rate": 6.148889827065538e-06, + "loss": 26.0939, + "step": 241790 + }, + { + "epoch": 0.4884512982946626, + "grad_norm": 118.50092315673828, + "learning_rate": 6.148550098343778e-06, + "loss": 8.6631, + "step": 241800 + }, + { + "epoch": 0.4884714989273464, + "grad_norm": 176.11004638671875, + "learning_rate": 6.148210364024125e-06, + "loss": 15.2653, + "step": 241810 + }, + { + "epoch": 0.48849169956003025, + "grad_norm": 289.4030456542969, + "learning_rate": 6.147870624108233e-06, + "loss": 12.5906, + "step": 241820 + }, + { + "epoch": 0.488511900192714, + "grad_norm": 653.380126953125, + "learning_rate": 6.147530878597761e-06, + "loss": 25.3767, + "step": 241830 + }, + { + "epoch": 0.48853210082539783, + "grad_norm": 577.6995239257812, + "learning_rate": 6.14719112749436e-06, + "loss": 24.4381, + "step": 241840 + }, + { + "epoch": 0.48855230145808165, + "grad_norm": 5.02866268157959, + "learning_rate": 6.146851370799689e-06, + "loss": 24.4571, + "step": 241850 + }, + { + "epoch": 0.48857250209076547, + "grad_norm": 338.2427673339844, + "learning_rate": 6.146511608515404e-06, + "loss": 14.1267, + "step": 241860 + }, + { + "epoch": 0.4885927027234493, + "grad_norm": 259.8116149902344, + "learning_rate": 6.146171840643161e-06, + "loss": 24.3192, + "step": 241870 + }, + { + "epoch": 0.4886129033561331, + "grad_norm": 146.1227264404297, + "learning_rate": 6.145832067184614e-06, + "loss": 19.1257, + "step": 241880 + }, + { + "epoch": 0.48863310398881693, + "grad_norm": 338.2033386230469, + "learning_rate": 6.145492288141422e-06, + "loss": 15.5932, + "step": 241890 + }, + { + "epoch": 0.48865330462150075, + "grad_norm": 264.390380859375, + "learning_rate": 6.145152503515239e-06, + "loss": 11.5019, + "step": 241900 + }, + { + "epoch": 0.48867350525418457, + "grad_norm": 366.4673156738281, + "learning_rate": 6.144812713307721e-06, + "loss": 18.0583, + "step": 241910 + }, + { + "epoch": 0.4886937058868684, + "grad_norm": 97.84915161132812, + "learning_rate": 6.144472917520526e-06, + "loss": 8.6361, + "step": 241920 + }, + { + "epoch": 0.4887139065195522, + "grad_norm": 435.5666198730469, + "learning_rate": 6.1441331161553065e-06, + "loss": 8.8293, + "step": 241930 + }, + { + "epoch": 0.48873410715223603, + "grad_norm": 257.6641540527344, + "learning_rate": 6.143793309213724e-06, + "loss": 23.476, + "step": 241940 + }, + { + "epoch": 0.48875430778491985, + "grad_norm": 55.174983978271484, + "learning_rate": 6.143453496697428e-06, + "loss": 10.6772, + "step": 241950 + }, + { + "epoch": 0.4887745084176036, + "grad_norm": 374.9102478027344, + "learning_rate": 6.143113678608081e-06, + "loss": 36.4494, + "step": 241960 + }, + { + "epoch": 0.48879470905028743, + "grad_norm": 4.087224006652832, + "learning_rate": 6.142773854947336e-06, + "loss": 31.4217, + "step": 241970 + }, + { + "epoch": 0.48881490968297125, + "grad_norm": 553.54541015625, + "learning_rate": 6.14243402571685e-06, + "loss": 40.1237, + "step": 241980 + }, + { + "epoch": 0.4888351103156551, + "grad_norm": 262.3894348144531, + "learning_rate": 6.142094190918279e-06, + "loss": 15.21, + "step": 241990 + }, + { + "epoch": 0.4888553109483389, + "grad_norm": 632.6431274414062, + "learning_rate": 6.141754350553279e-06, + "loss": 23.2838, + "step": 242000 + }, + { + "epoch": 0.4888755115810227, + "grad_norm": 398.20086669921875, + "learning_rate": 6.141414504623509e-06, + "loss": 24.0013, + "step": 242010 + }, + { + "epoch": 0.48889571221370653, + "grad_norm": 867.7739868164062, + "learning_rate": 6.14107465313062e-06, + "loss": 27.5092, + "step": 242020 + }, + { + "epoch": 0.48891591284639035, + "grad_norm": 356.7303771972656, + "learning_rate": 6.140734796076273e-06, + "loss": 15.2382, + "step": 242030 + }, + { + "epoch": 0.4889361134790742, + "grad_norm": 274.17041015625, + "learning_rate": 6.1403949334621215e-06, + "loss": 13.1964, + "step": 242040 + }, + { + "epoch": 0.488956314111758, + "grad_norm": 189.7613983154297, + "learning_rate": 6.140055065289826e-06, + "loss": 14.1268, + "step": 242050 + }, + { + "epoch": 0.4889765147444418, + "grad_norm": 364.79498291015625, + "learning_rate": 6.139715191561038e-06, + "loss": 29.4825, + "step": 242060 + }, + { + "epoch": 0.48899671537712563, + "grad_norm": 617.8101196289062, + "learning_rate": 6.139375312277418e-06, + "loss": 17.7693, + "step": 242070 + }, + { + "epoch": 0.48901691600980945, + "grad_norm": 470.8154296875, + "learning_rate": 6.1390354274406205e-06, + "loss": 18.9127, + "step": 242080 + }, + { + "epoch": 0.4890371166424932, + "grad_norm": 274.7874755859375, + "learning_rate": 6.138695537052301e-06, + "loss": 30.696, + "step": 242090 + }, + { + "epoch": 0.48905731727517704, + "grad_norm": 307.2875671386719, + "learning_rate": 6.138355641114121e-06, + "loss": 33.591, + "step": 242100 + }, + { + "epoch": 0.48907751790786086, + "grad_norm": 295.16082763671875, + "learning_rate": 6.138015739627731e-06, + "loss": 37.4238, + "step": 242110 + }, + { + "epoch": 0.4890977185405447, + "grad_norm": 630.1863403320312, + "learning_rate": 6.137675832594792e-06, + "loss": 24.3865, + "step": 242120 + }, + { + "epoch": 0.4891179191732285, + "grad_norm": 339.9185791015625, + "learning_rate": 6.137335920016957e-06, + "loss": 17.0401, + "step": 242130 + }, + { + "epoch": 0.4891381198059123, + "grad_norm": 404.68560791015625, + "learning_rate": 6.136996001895885e-06, + "loss": 28.5077, + "step": 242140 + }, + { + "epoch": 0.48915832043859614, + "grad_norm": 204.63204956054688, + "learning_rate": 6.136656078233233e-06, + "loss": 20.071, + "step": 242150 + }, + { + "epoch": 0.48917852107127996, + "grad_norm": 730.635009765625, + "learning_rate": 6.136316149030657e-06, + "loss": 16.8284, + "step": 242160 + }, + { + "epoch": 0.4891987217039638, + "grad_norm": 550.9608154296875, + "learning_rate": 6.135976214289814e-06, + "loss": 30.223, + "step": 242170 + }, + { + "epoch": 0.4892189223366476, + "grad_norm": 313.05218505859375, + "learning_rate": 6.135636274012361e-06, + "loss": 11.7001, + "step": 242180 + }, + { + "epoch": 0.4892391229693314, + "grad_norm": 329.033203125, + "learning_rate": 6.135296328199954e-06, + "loss": 31.2961, + "step": 242190 + }, + { + "epoch": 0.48925932360201524, + "grad_norm": 642.3125610351562, + "learning_rate": 6.134956376854251e-06, + "loss": 31.3009, + "step": 242200 + }, + { + "epoch": 0.489279524234699, + "grad_norm": 190.77426147460938, + "learning_rate": 6.134616419976908e-06, + "loss": 25.9056, + "step": 242210 + }, + { + "epoch": 0.4892997248673828, + "grad_norm": 554.1141967773438, + "learning_rate": 6.134276457569581e-06, + "loss": 33.5025, + "step": 242220 + }, + { + "epoch": 0.48931992550006664, + "grad_norm": 361.7659912109375, + "learning_rate": 6.133936489633929e-06, + "loss": 14.0255, + "step": 242230 + }, + { + "epoch": 0.48934012613275046, + "grad_norm": 389.94696044921875, + "learning_rate": 6.133596516171609e-06, + "loss": 28.9949, + "step": 242240 + }, + { + "epoch": 0.4893603267654343, + "grad_norm": 161.18128967285156, + "learning_rate": 6.133256537184276e-06, + "loss": 19.2994, + "step": 242250 + }, + { + "epoch": 0.4893805273981181, + "grad_norm": 258.78485107421875, + "learning_rate": 6.132916552673588e-06, + "loss": 25.4497, + "step": 242260 + }, + { + "epoch": 0.4894007280308019, + "grad_norm": 90.20409393310547, + "learning_rate": 6.132576562641203e-06, + "loss": 18.4331, + "step": 242270 + }, + { + "epoch": 0.48942092866348574, + "grad_norm": 187.44119262695312, + "learning_rate": 6.132236567088777e-06, + "loss": 18.9665, + "step": 242280 + }, + { + "epoch": 0.48944112929616956, + "grad_norm": 179.86375427246094, + "learning_rate": 6.131896566017967e-06, + "loss": 15.6383, + "step": 242290 + }, + { + "epoch": 0.4894613299288534, + "grad_norm": 829.7676391601562, + "learning_rate": 6.13155655943043e-06, + "loss": 19.8648, + "step": 242300 + }, + { + "epoch": 0.4894815305615372, + "grad_norm": 247.04991149902344, + "learning_rate": 6.131216547327824e-06, + "loss": 10.0677, + "step": 242310 + }, + { + "epoch": 0.489501731194221, + "grad_norm": 341.4189453125, + "learning_rate": 6.130876529711806e-06, + "loss": 28.5142, + "step": 242320 + }, + { + "epoch": 0.48952193182690484, + "grad_norm": 138.14256286621094, + "learning_rate": 6.130536506584032e-06, + "loss": 32.4667, + "step": 242330 + }, + { + "epoch": 0.4895421324595886, + "grad_norm": 206.76641845703125, + "learning_rate": 6.130196477946162e-06, + "loss": 22.5495, + "step": 242340 + }, + { + "epoch": 0.48956233309227243, + "grad_norm": 207.0066375732422, + "learning_rate": 6.12985644379985e-06, + "loss": 22.5523, + "step": 242350 + }, + { + "epoch": 0.48958253372495625, + "grad_norm": 156.70924377441406, + "learning_rate": 6.1295164041467545e-06, + "loss": 17.6584, + "step": 242360 + }, + { + "epoch": 0.48960273435764007, + "grad_norm": 151.94192504882812, + "learning_rate": 6.129176358988535e-06, + "loss": 19.6377, + "step": 242370 + }, + { + "epoch": 0.4896229349903239, + "grad_norm": 36.215518951416016, + "learning_rate": 6.128836308326844e-06, + "loss": 15.9161, + "step": 242380 + }, + { + "epoch": 0.4896431356230077, + "grad_norm": 188.47653198242188, + "learning_rate": 6.128496252163344e-06, + "loss": 19.3989, + "step": 242390 + }, + { + "epoch": 0.48966333625569153, + "grad_norm": 352.2079772949219, + "learning_rate": 6.128156190499688e-06, + "loss": 14.3782, + "step": 242400 + }, + { + "epoch": 0.48968353688837535, + "grad_norm": 754.5715942382812, + "learning_rate": 6.127816123337538e-06, + "loss": 17.6178, + "step": 242410 + }, + { + "epoch": 0.48970373752105917, + "grad_norm": 93.8069076538086, + "learning_rate": 6.127476050678548e-06, + "loss": 21.4399, + "step": 242420 + }, + { + "epoch": 0.489723938153743, + "grad_norm": 91.12076568603516, + "learning_rate": 6.127135972524376e-06, + "loss": 15.9388, + "step": 242430 + }, + { + "epoch": 0.4897441387864268, + "grad_norm": 537.439208984375, + "learning_rate": 6.126795888876681e-06, + "loss": 25.951, + "step": 242440 + }, + { + "epoch": 0.48976433941911063, + "grad_norm": 0.15321671962738037, + "learning_rate": 6.1264557997371185e-06, + "loss": 11.6078, + "step": 242450 + }, + { + "epoch": 0.48978454005179445, + "grad_norm": 500.2206726074219, + "learning_rate": 6.126115705107347e-06, + "loss": 10.685, + "step": 242460 + }, + { + "epoch": 0.4898047406844782, + "grad_norm": 956.3645629882812, + "learning_rate": 6.125775604989025e-06, + "loss": 20.7941, + "step": 242470 + }, + { + "epoch": 0.48982494131716203, + "grad_norm": 65.6939468383789, + "learning_rate": 6.125435499383808e-06, + "loss": 18.1657, + "step": 242480 + }, + { + "epoch": 0.48984514194984585, + "grad_norm": 326.1251525878906, + "learning_rate": 6.125095388293356e-06, + "loss": 18.04, + "step": 242490 + }, + { + "epoch": 0.4898653425825297, + "grad_norm": 208.84498596191406, + "learning_rate": 6.124755271719326e-06, + "loss": 7.6182, + "step": 242500 + }, + { + "epoch": 0.4898855432152135, + "grad_norm": 89.06510925292969, + "learning_rate": 6.124415149663374e-06, + "loss": 14.4377, + "step": 242510 + }, + { + "epoch": 0.4899057438478973, + "grad_norm": 541.0916137695312, + "learning_rate": 6.12407502212716e-06, + "loss": 22.0965, + "step": 242520 + }, + { + "epoch": 0.48992594448058113, + "grad_norm": 142.01412963867188, + "learning_rate": 6.12373488911234e-06, + "loss": 11.4386, + "step": 242530 + }, + { + "epoch": 0.48994614511326495, + "grad_norm": 1395.0645751953125, + "learning_rate": 6.123394750620571e-06, + "loss": 22.3274, + "step": 242540 + }, + { + "epoch": 0.4899663457459488, + "grad_norm": 91.57501983642578, + "learning_rate": 6.123054606653515e-06, + "loss": 27.7396, + "step": 242550 + }, + { + "epoch": 0.4899865463786326, + "grad_norm": 351.1894226074219, + "learning_rate": 6.122714457212825e-06, + "loss": 19.8754, + "step": 242560 + }, + { + "epoch": 0.4900067470113164, + "grad_norm": 672.3131103515625, + "learning_rate": 6.122374302300162e-06, + "loss": 47.4908, + "step": 242570 + }, + { + "epoch": 0.49002694764400023, + "grad_norm": 161.06809997558594, + "learning_rate": 6.122034141917183e-06, + "loss": 28.3776, + "step": 242580 + }, + { + "epoch": 0.49004714827668405, + "grad_norm": 273.0909118652344, + "learning_rate": 6.121693976065545e-06, + "loss": 21.7286, + "step": 242590 + }, + { + "epoch": 0.4900673489093678, + "grad_norm": 341.38665771484375, + "learning_rate": 6.121353804746907e-06, + "loss": 12.567, + "step": 242600 + }, + { + "epoch": 0.49008754954205164, + "grad_norm": 514.90234375, + "learning_rate": 6.121013627962925e-06, + "loss": 31.1853, + "step": 242610 + }, + { + "epoch": 0.49010775017473546, + "grad_norm": 1056.0909423828125, + "learning_rate": 6.1206734457152615e-06, + "loss": 28.3193, + "step": 242620 + }, + { + "epoch": 0.4901279508074193, + "grad_norm": 118.93157196044922, + "learning_rate": 6.12033325800557e-06, + "loss": 17.8808, + "step": 242630 + }, + { + "epoch": 0.4901481514401031, + "grad_norm": 204.60494995117188, + "learning_rate": 6.119993064835509e-06, + "loss": 12.2071, + "step": 242640 + }, + { + "epoch": 0.4901683520727869, + "grad_norm": 410.5423583984375, + "learning_rate": 6.119652866206739e-06, + "loss": 20.8945, + "step": 242650 + }, + { + "epoch": 0.49018855270547074, + "grad_norm": 348.9383544921875, + "learning_rate": 6.119312662120916e-06, + "loss": 13.519, + "step": 242660 + }, + { + "epoch": 0.49020875333815456, + "grad_norm": 170.8218994140625, + "learning_rate": 6.118972452579699e-06, + "loss": 15.5436, + "step": 242670 + }, + { + "epoch": 0.4902289539708384, + "grad_norm": 2507.953857421875, + "learning_rate": 6.118632237584748e-06, + "loss": 32.6182, + "step": 242680 + }, + { + "epoch": 0.4902491546035222, + "grad_norm": 864.6946411132812, + "learning_rate": 6.118292017137716e-06, + "loss": 20.8481, + "step": 242690 + }, + { + "epoch": 0.490269355236206, + "grad_norm": 607.1465454101562, + "learning_rate": 6.117951791240265e-06, + "loss": 25.8953, + "step": 242700 + }, + { + "epoch": 0.49028955586888984, + "grad_norm": 1.6512856483459473, + "learning_rate": 6.117611559894054e-06, + "loss": 20.952, + "step": 242710 + }, + { + "epoch": 0.49030975650157366, + "grad_norm": 491.7100830078125, + "learning_rate": 6.117271323100739e-06, + "loss": 18.1705, + "step": 242720 + }, + { + "epoch": 0.4903299571342574, + "grad_norm": 297.95159912109375, + "learning_rate": 6.116931080861979e-06, + "loss": 27.1697, + "step": 242730 + }, + { + "epoch": 0.49035015776694124, + "grad_norm": 85.9748764038086, + "learning_rate": 6.116590833179432e-06, + "loss": 30.2963, + "step": 242740 + }, + { + "epoch": 0.49037035839962506, + "grad_norm": 49.89543914794922, + "learning_rate": 6.116250580054758e-06, + "loss": 17.0189, + "step": 242750 + }, + { + "epoch": 0.4903905590323089, + "grad_norm": 111.95653533935547, + "learning_rate": 6.115910321489613e-06, + "loss": 18.4601, + "step": 242760 + }, + { + "epoch": 0.4904107596649927, + "grad_norm": 389.2860412597656, + "learning_rate": 6.115570057485656e-06, + "loss": 13.8408, + "step": 242770 + }, + { + "epoch": 0.4904309602976765, + "grad_norm": 355.4328918457031, + "learning_rate": 6.1152297880445476e-06, + "loss": 15.4722, + "step": 242780 + }, + { + "epoch": 0.49045116093036034, + "grad_norm": 432.9563903808594, + "learning_rate": 6.114889513167943e-06, + "loss": 19.0556, + "step": 242790 + }, + { + "epoch": 0.49047136156304416, + "grad_norm": 464.92437744140625, + "learning_rate": 6.114549232857503e-06, + "loss": 17.4327, + "step": 242800 + }, + { + "epoch": 0.490491562195728, + "grad_norm": 285.1131591796875, + "learning_rate": 6.114208947114883e-06, + "loss": 23.6502, + "step": 242810 + }, + { + "epoch": 0.4905117628284118, + "grad_norm": 493.794677734375, + "learning_rate": 6.113868655941747e-06, + "loss": 27.5541, + "step": 242820 + }, + { + "epoch": 0.4905319634610956, + "grad_norm": 34.96025848388672, + "learning_rate": 6.1135283593397475e-06, + "loss": 21.0633, + "step": 242830 + }, + { + "epoch": 0.49055216409377944, + "grad_norm": 262.76287841796875, + "learning_rate": 6.113188057310548e-06, + "loss": 15.749, + "step": 242840 + }, + { + "epoch": 0.4905723647264632, + "grad_norm": 321.22674560546875, + "learning_rate": 6.112847749855804e-06, + "loss": 18.4851, + "step": 242850 + }, + { + "epoch": 0.490592565359147, + "grad_norm": 383.23345947265625, + "learning_rate": 6.112507436977175e-06, + "loss": 26.9607, + "step": 242860 + }, + { + "epoch": 0.49061276599183085, + "grad_norm": 1.3280316591262817, + "learning_rate": 6.112167118676321e-06, + "loss": 16.2132, + "step": 242870 + }, + { + "epoch": 0.49063296662451467, + "grad_norm": 410.2691650390625, + "learning_rate": 6.111826794954896e-06, + "loss": 32.892, + "step": 242880 + }, + { + "epoch": 0.4906531672571985, + "grad_norm": 213.3309326171875, + "learning_rate": 6.1114864658145655e-06, + "loss": 22.2195, + "step": 242890 + }, + { + "epoch": 0.4906733678898823, + "grad_norm": 489.11309814453125, + "learning_rate": 6.111146131256983e-06, + "loss": 15.1283, + "step": 242900 + }, + { + "epoch": 0.4906935685225661, + "grad_norm": 139.47201538085938, + "learning_rate": 6.110805791283809e-06, + "loss": 14.9618, + "step": 242910 + }, + { + "epoch": 0.49071376915524995, + "grad_norm": 233.48304748535156, + "learning_rate": 6.110465445896703e-06, + "loss": 24.2609, + "step": 242920 + }, + { + "epoch": 0.49073396978793377, + "grad_norm": 133.65975952148438, + "learning_rate": 6.110125095097323e-06, + "loss": 22.8066, + "step": 242930 + }, + { + "epoch": 0.4907541704206176, + "grad_norm": 431.73089599609375, + "learning_rate": 6.109784738887327e-06, + "loss": 13.4274, + "step": 242940 + }, + { + "epoch": 0.4907743710533014, + "grad_norm": 384.90521240234375, + "learning_rate": 6.109444377268376e-06, + "loss": 31.618, + "step": 242950 + }, + { + "epoch": 0.4907945716859852, + "grad_norm": 913.1565551757812, + "learning_rate": 6.109104010242127e-06, + "loss": 30.0312, + "step": 242960 + }, + { + "epoch": 0.49081477231866905, + "grad_norm": 501.4960632324219, + "learning_rate": 6.10876363781024e-06, + "loss": 25.9956, + "step": 242970 + }, + { + "epoch": 0.4908349729513528, + "grad_norm": 190.05662536621094, + "learning_rate": 6.108423259974375e-06, + "loss": 12.8584, + "step": 242980 + }, + { + "epoch": 0.49085517358403663, + "grad_norm": 281.55047607421875, + "learning_rate": 6.108082876736185e-06, + "loss": 17.4782, + "step": 242990 + }, + { + "epoch": 0.49087537421672045, + "grad_norm": 406.5660705566406, + "learning_rate": 6.107742488097338e-06, + "loss": 27.0723, + "step": 243000 + }, + { + "epoch": 0.49089557484940427, + "grad_norm": 318.7551574707031, + "learning_rate": 6.107402094059485e-06, + "loss": 14.0688, + "step": 243010 + }, + { + "epoch": 0.4909157754820881, + "grad_norm": 64.70513153076172, + "learning_rate": 6.107061694624291e-06, + "loss": 10.8414, + "step": 243020 + }, + { + "epoch": 0.4909359761147719, + "grad_norm": 131.3190155029297, + "learning_rate": 6.1067212897934115e-06, + "loss": 29.9489, + "step": 243030 + }, + { + "epoch": 0.49095617674745573, + "grad_norm": 686.302001953125, + "learning_rate": 6.106380879568507e-06, + "loss": 26.9483, + "step": 243040 + }, + { + "epoch": 0.49097637738013955, + "grad_norm": 609.8250732421875, + "learning_rate": 6.106040463951237e-06, + "loss": 26.0329, + "step": 243050 + }, + { + "epoch": 0.49099657801282337, + "grad_norm": 180.33126831054688, + "learning_rate": 6.105700042943258e-06, + "loss": 30.505, + "step": 243060 + }, + { + "epoch": 0.4910167786455072, + "grad_norm": 273.23455810546875, + "learning_rate": 6.105359616546232e-06, + "loss": 13.8137, + "step": 243070 + }, + { + "epoch": 0.491036979278191, + "grad_norm": 520.9368286132812, + "learning_rate": 6.105019184761818e-06, + "loss": 19.1669, + "step": 243080 + }, + { + "epoch": 0.49105717991087483, + "grad_norm": 85.96605682373047, + "learning_rate": 6.104678747591674e-06, + "loss": 13.7416, + "step": 243090 + }, + { + "epoch": 0.49107738054355865, + "grad_norm": 491.14019775390625, + "learning_rate": 6.10433830503746e-06, + "loss": 10.9353, + "step": 243100 + }, + { + "epoch": 0.4910975811762424, + "grad_norm": 325.0634460449219, + "learning_rate": 6.1039978571008355e-06, + "loss": 16.0997, + "step": 243110 + }, + { + "epoch": 0.49111778180892623, + "grad_norm": 607.64111328125, + "learning_rate": 6.103657403783458e-06, + "loss": 17.9357, + "step": 243120 + }, + { + "epoch": 0.49113798244161005, + "grad_norm": 355.9501953125, + "learning_rate": 6.103316945086989e-06, + "loss": 16.597, + "step": 243130 + }, + { + "epoch": 0.4911581830742939, + "grad_norm": 448.55682373046875, + "learning_rate": 6.102976481013086e-06, + "loss": 30.567, + "step": 243140 + }, + { + "epoch": 0.4911783837069777, + "grad_norm": 284.8009033203125, + "learning_rate": 6.102636011563411e-06, + "loss": 23.6241, + "step": 243150 + }, + { + "epoch": 0.4911985843396615, + "grad_norm": 155.9999542236328, + "learning_rate": 6.102295536739622e-06, + "loss": 13.3552, + "step": 243160 + }, + { + "epoch": 0.49121878497234533, + "grad_norm": 208.71463012695312, + "learning_rate": 6.101955056543376e-06, + "loss": 33.184, + "step": 243170 + }, + { + "epoch": 0.49123898560502915, + "grad_norm": 539.50341796875, + "learning_rate": 6.101614570976336e-06, + "loss": 25.0127, + "step": 243180 + }, + { + "epoch": 0.491259186237713, + "grad_norm": 100.99124145507812, + "learning_rate": 6.101274080040161e-06, + "loss": 27.4514, + "step": 243190 + }, + { + "epoch": 0.4912793868703968, + "grad_norm": 693.3148193359375, + "learning_rate": 6.100933583736508e-06, + "loss": 17.5249, + "step": 243200 + }, + { + "epoch": 0.4912995875030806, + "grad_norm": 150.2967987060547, + "learning_rate": 6.10059308206704e-06, + "loss": 29.5014, + "step": 243210 + }, + { + "epoch": 0.49131978813576443, + "grad_norm": 358.312255859375, + "learning_rate": 6.100252575033413e-06, + "loss": 28.4867, + "step": 243220 + }, + { + "epoch": 0.49133998876844825, + "grad_norm": 285.40460205078125, + "learning_rate": 6.0999120626372895e-06, + "loss": 30.495, + "step": 243230 + }, + { + "epoch": 0.491360189401132, + "grad_norm": 596.3458862304688, + "learning_rate": 6.099571544880328e-06, + "loss": 30.6144, + "step": 243240 + }, + { + "epoch": 0.49138039003381584, + "grad_norm": 765.2393188476562, + "learning_rate": 6.099231021764188e-06, + "loss": 35.0226, + "step": 243250 + }, + { + "epoch": 0.49140059066649966, + "grad_norm": 424.7465515136719, + "learning_rate": 6.098890493290529e-06, + "loss": 15.3136, + "step": 243260 + }, + { + "epoch": 0.4914207912991835, + "grad_norm": 634.1060180664062, + "learning_rate": 6.0985499594610136e-06, + "loss": 13.1761, + "step": 243270 + }, + { + "epoch": 0.4914409919318673, + "grad_norm": 74.19820404052734, + "learning_rate": 6.098209420277294e-06, + "loss": 13.936, + "step": 243280 + }, + { + "epoch": 0.4914611925645511, + "grad_norm": 488.5048522949219, + "learning_rate": 6.097868875741039e-06, + "loss": 25.0321, + "step": 243290 + }, + { + "epoch": 0.49148139319723494, + "grad_norm": 130.7450714111328, + "learning_rate": 6.097528325853903e-06, + "loss": 7.7867, + "step": 243300 + }, + { + "epoch": 0.49150159382991876, + "grad_norm": 580.2894287109375, + "learning_rate": 6.0971877706175465e-06, + "loss": 16.2283, + "step": 243310 + }, + { + "epoch": 0.4915217944626026, + "grad_norm": 281.7629089355469, + "learning_rate": 6.09684721003363e-06, + "loss": 17.5864, + "step": 243320 + }, + { + "epoch": 0.4915419950952864, + "grad_norm": 540.7857055664062, + "learning_rate": 6.096506644103813e-06, + "loss": 16.8208, + "step": 243330 + }, + { + "epoch": 0.4915621957279702, + "grad_norm": 92.08300018310547, + "learning_rate": 6.096166072829757e-06, + "loss": 11.1677, + "step": 243340 + }, + { + "epoch": 0.49158239636065404, + "grad_norm": 549.4891357421875, + "learning_rate": 6.095825496213119e-06, + "loss": 15.3587, + "step": 243350 + }, + { + "epoch": 0.4916025969933378, + "grad_norm": 290.5025634765625, + "learning_rate": 6.095484914255561e-06, + "loss": 28.3565, + "step": 243360 + }, + { + "epoch": 0.4916227976260216, + "grad_norm": 1124.6158447265625, + "learning_rate": 6.0951443269587426e-06, + "loss": 22.2388, + "step": 243370 + }, + { + "epoch": 0.49164299825870544, + "grad_norm": 138.3731231689453, + "learning_rate": 6.094803734324324e-06, + "loss": 9.4421, + "step": 243380 + }, + { + "epoch": 0.49166319889138926, + "grad_norm": 135.51327514648438, + "learning_rate": 6.094463136353964e-06, + "loss": 18.7498, + "step": 243390 + }, + { + "epoch": 0.4916833995240731, + "grad_norm": 226.020263671875, + "learning_rate": 6.094122533049324e-06, + "loss": 31.4517, + "step": 243400 + }, + { + "epoch": 0.4917036001567569, + "grad_norm": 2.206104278564453, + "learning_rate": 6.093781924412063e-06, + "loss": 12.6963, + "step": 243410 + }, + { + "epoch": 0.4917238007894407, + "grad_norm": 43.28728485107422, + "learning_rate": 6.093441310443842e-06, + "loss": 10.0299, + "step": 243420 + }, + { + "epoch": 0.49174400142212454, + "grad_norm": 497.6747741699219, + "learning_rate": 6.093100691146321e-06, + "loss": 23.7381, + "step": 243430 + }, + { + "epoch": 0.49176420205480836, + "grad_norm": 508.7585144042969, + "learning_rate": 6.0927600665211575e-06, + "loss": 22.6063, + "step": 243440 + }, + { + "epoch": 0.4917844026874922, + "grad_norm": 144.22523498535156, + "learning_rate": 6.092419436570016e-06, + "loss": 10.8433, + "step": 243450 + }, + { + "epoch": 0.491804603320176, + "grad_norm": 283.6905822753906, + "learning_rate": 6.092078801294554e-06, + "loss": 9.281, + "step": 243460 + }, + { + "epoch": 0.4918248039528598, + "grad_norm": 548.2412719726562, + "learning_rate": 6.091738160696433e-06, + "loss": 27.9931, + "step": 243470 + }, + { + "epoch": 0.49184500458554364, + "grad_norm": 353.6029357910156, + "learning_rate": 6.091397514777313e-06, + "loss": 18.0369, + "step": 243480 + }, + { + "epoch": 0.4918652052182274, + "grad_norm": 95.61614227294922, + "learning_rate": 6.091056863538851e-06, + "loss": 16.5977, + "step": 243490 + }, + { + "epoch": 0.49188540585091123, + "grad_norm": 160.9171600341797, + "learning_rate": 6.090716206982714e-06, + "loss": 24.112, + "step": 243500 + }, + { + "epoch": 0.49190560648359505, + "grad_norm": 591.6284790039062, + "learning_rate": 6.090375545110556e-06, + "loss": 29.5164, + "step": 243510 + }, + { + "epoch": 0.49192580711627887, + "grad_norm": 400.39166259765625, + "learning_rate": 6.090034877924041e-06, + "loss": 23.6562, + "step": 243520 + }, + { + "epoch": 0.4919460077489627, + "grad_norm": 3.3571367263793945, + "learning_rate": 6.089694205424827e-06, + "loss": 15.9371, + "step": 243530 + }, + { + "epoch": 0.4919662083816465, + "grad_norm": 420.609375, + "learning_rate": 6.089353527614577e-06, + "loss": 14.1265, + "step": 243540 + }, + { + "epoch": 0.49198640901433033, + "grad_norm": 522.5365600585938, + "learning_rate": 6.08901284449495e-06, + "loss": 22.5624, + "step": 243550 + }, + { + "epoch": 0.49200660964701415, + "grad_norm": 277.09735107421875, + "learning_rate": 6.088672156067607e-06, + "loss": 17.8786, + "step": 243560 + }, + { + "epoch": 0.49202681027969797, + "grad_norm": 254.58782958984375, + "learning_rate": 6.088331462334206e-06, + "loss": 17.6637, + "step": 243570 + }, + { + "epoch": 0.4920470109123818, + "grad_norm": 76.53742980957031, + "learning_rate": 6.0879907632964095e-06, + "loss": 21.8426, + "step": 243580 + }, + { + "epoch": 0.4920672115450656, + "grad_norm": 231.54798889160156, + "learning_rate": 6.087650058955879e-06, + "loss": 18.5493, + "step": 243590 + }, + { + "epoch": 0.49208741217774943, + "grad_norm": 326.02880859375, + "learning_rate": 6.087309349314275e-06, + "loss": 18.1888, + "step": 243600 + }, + { + "epoch": 0.49210761281043325, + "grad_norm": 459.2764892578125, + "learning_rate": 6.086968634373256e-06, + "loss": 38.9905, + "step": 243610 + }, + { + "epoch": 0.492127813443117, + "grad_norm": 6.301657676696777, + "learning_rate": 6.086627914134482e-06, + "loss": 34.4115, + "step": 243620 + }, + { + "epoch": 0.49214801407580083, + "grad_norm": 271.61883544921875, + "learning_rate": 6.086287188599617e-06, + "loss": 8.9168, + "step": 243630 + }, + { + "epoch": 0.49216821470848465, + "grad_norm": 144.28807067871094, + "learning_rate": 6.085946457770321e-06, + "loss": 21.0509, + "step": 243640 + }, + { + "epoch": 0.4921884153411685, + "grad_norm": 667.0874633789062, + "learning_rate": 6.085605721648253e-06, + "loss": 20.3682, + "step": 243650 + }, + { + "epoch": 0.4922086159738523, + "grad_norm": 364.631103515625, + "learning_rate": 6.085264980235075e-06, + "loss": 19.8767, + "step": 243660 + }, + { + "epoch": 0.4922288166065361, + "grad_norm": 464.1360168457031, + "learning_rate": 6.084924233532444e-06, + "loss": 12.8749, + "step": 243670 + }, + { + "epoch": 0.49224901723921993, + "grad_norm": 260.3604736328125, + "learning_rate": 6.084583481542028e-06, + "loss": 10.4999, + "step": 243680 + }, + { + "epoch": 0.49226921787190375, + "grad_norm": 134.682373046875, + "learning_rate": 6.084242724265481e-06, + "loss": 31.0584, + "step": 243690 + }, + { + "epoch": 0.4922894185045876, + "grad_norm": 706.572265625, + "learning_rate": 6.083901961704467e-06, + "loss": 22.7602, + "step": 243700 + }, + { + "epoch": 0.4923096191372714, + "grad_norm": 399.3299865722656, + "learning_rate": 6.083561193860646e-06, + "loss": 18.2616, + "step": 243710 + }, + { + "epoch": 0.4923298197699552, + "grad_norm": 220.2206268310547, + "learning_rate": 6.083220420735681e-06, + "loss": 21.117, + "step": 243720 + }, + { + "epoch": 0.49235002040263903, + "grad_norm": 304.3521423339844, + "learning_rate": 6.08287964233123e-06, + "loss": 15.1435, + "step": 243730 + }, + { + "epoch": 0.49237022103532285, + "grad_norm": 2.1963284015655518, + "learning_rate": 6.082538858648954e-06, + "loss": 17.5348, + "step": 243740 + }, + { + "epoch": 0.4923904216680066, + "grad_norm": 200.71510314941406, + "learning_rate": 6.0821980696905145e-06, + "loss": 21.6039, + "step": 243750 + }, + { + "epoch": 0.49241062230069044, + "grad_norm": 290.66455078125, + "learning_rate": 6.081857275457574e-06, + "loss": 20.226, + "step": 243760 + }, + { + "epoch": 0.49243082293337426, + "grad_norm": 276.683349609375, + "learning_rate": 6.081516475951793e-06, + "loss": 20.0125, + "step": 243770 + }, + { + "epoch": 0.4924510235660581, + "grad_norm": 239.0572967529297, + "learning_rate": 6.081175671174831e-06, + "loss": 29.0291, + "step": 243780 + }, + { + "epoch": 0.4924712241987419, + "grad_norm": 219.093994140625, + "learning_rate": 6.0808348611283505e-06, + "loss": 15.7579, + "step": 243790 + }, + { + "epoch": 0.4924914248314257, + "grad_norm": 328.8896484375, + "learning_rate": 6.080494045814011e-06, + "loss": 12.5564, + "step": 243800 + }, + { + "epoch": 0.49251162546410954, + "grad_norm": 2166.62646484375, + "learning_rate": 6.080153225233475e-06, + "loss": 22.8739, + "step": 243810 + }, + { + "epoch": 0.49253182609679336, + "grad_norm": 174.4705810546875, + "learning_rate": 6.079812399388404e-06, + "loss": 9.6937, + "step": 243820 + }, + { + "epoch": 0.4925520267294772, + "grad_norm": 1052.1917724609375, + "learning_rate": 6.079471568280456e-06, + "loss": 20.7519, + "step": 243830 + }, + { + "epoch": 0.492572227362161, + "grad_norm": 328.778564453125, + "learning_rate": 6.079130731911298e-06, + "loss": 18.7361, + "step": 243840 + }, + { + "epoch": 0.4925924279948448, + "grad_norm": 151.9322509765625, + "learning_rate": 6.078789890282585e-06, + "loss": 29.0582, + "step": 243850 + }, + { + "epoch": 0.49261262862752864, + "grad_norm": 42.79109191894531, + "learning_rate": 6.078449043395982e-06, + "loss": 23.3692, + "step": 243860 + }, + { + "epoch": 0.49263282926021246, + "grad_norm": 350.9899597167969, + "learning_rate": 6.078108191253148e-06, + "loss": 21.4167, + "step": 243870 + }, + { + "epoch": 0.4926530298928962, + "grad_norm": 176.45204162597656, + "learning_rate": 6.077767333855748e-06, + "loss": 14.1641, + "step": 243880 + }, + { + "epoch": 0.49267323052558004, + "grad_norm": 81.61347961425781, + "learning_rate": 6.077426471205439e-06, + "loss": 18.7199, + "step": 243890 + }, + { + "epoch": 0.49269343115826386, + "grad_norm": 381.9842224121094, + "learning_rate": 6.077085603303883e-06, + "loss": 22.57, + "step": 243900 + }, + { + "epoch": 0.4927136317909477, + "grad_norm": 185.9649200439453, + "learning_rate": 6.076744730152744e-06, + "loss": 23.6399, + "step": 243910 + }, + { + "epoch": 0.4927338324236315, + "grad_norm": 548.74755859375, + "learning_rate": 6.07640385175368e-06, + "loss": 28.5055, + "step": 243920 + }, + { + "epoch": 0.4927540330563153, + "grad_norm": 489.41876220703125, + "learning_rate": 6.076062968108357e-06, + "loss": 16.5337, + "step": 243930 + }, + { + "epoch": 0.49277423368899914, + "grad_norm": 278.4815673828125, + "learning_rate": 6.0757220792184314e-06, + "loss": 23.2221, + "step": 243940 + }, + { + "epoch": 0.49279443432168296, + "grad_norm": 220.00218200683594, + "learning_rate": 6.075381185085568e-06, + "loss": 16.0409, + "step": 243950 + }, + { + "epoch": 0.4928146349543668, + "grad_norm": 15.607489585876465, + "learning_rate": 6.075040285711427e-06, + "loss": 18.7818, + "step": 243960 + }, + { + "epoch": 0.4928348355870506, + "grad_norm": 366.9059753417969, + "learning_rate": 6.074699381097669e-06, + "loss": 16.8994, + "step": 243970 + }, + { + "epoch": 0.4928550362197344, + "grad_norm": 417.60693359375, + "learning_rate": 6.074358471245957e-06, + "loss": 9.3807, + "step": 243980 + }, + { + "epoch": 0.49287523685241824, + "grad_norm": 746.2815551757812, + "learning_rate": 6.074017556157952e-06, + "loss": 31.5113, + "step": 243990 + }, + { + "epoch": 0.492895437485102, + "grad_norm": 147.13330078125, + "learning_rate": 6.073676635835317e-06, + "loss": 19.8945, + "step": 244000 + }, + { + "epoch": 0.4929156381177858, + "grad_norm": 546.3687744140625, + "learning_rate": 6.073335710279711e-06, + "loss": 19.3393, + "step": 244010 + }, + { + "epoch": 0.49293583875046965, + "grad_norm": 1691.22998046875, + "learning_rate": 6.072994779492798e-06, + "loss": 27.1752, + "step": 244020 + }, + { + "epoch": 0.49295603938315347, + "grad_norm": 597.4349365234375, + "learning_rate": 6.072653843476237e-06, + "loss": 24.8999, + "step": 244030 + }, + { + "epoch": 0.4929762400158373, + "grad_norm": 486.0323791503906, + "learning_rate": 6.072312902231692e-06, + "loss": 17.299, + "step": 244040 + }, + { + "epoch": 0.4929964406485211, + "grad_norm": 390.08251953125, + "learning_rate": 6.071971955760823e-06, + "loss": 29.5478, + "step": 244050 + }, + { + "epoch": 0.4930166412812049, + "grad_norm": 837.5003662109375, + "learning_rate": 6.071631004065296e-06, + "loss": 17.5977, + "step": 244060 + }, + { + "epoch": 0.49303684191388875, + "grad_norm": 456.85968017578125, + "learning_rate": 6.071290047146767e-06, + "loss": 25.1069, + "step": 244070 + }, + { + "epoch": 0.49305704254657257, + "grad_norm": 499.1066589355469, + "learning_rate": 6.0709490850069e-06, + "loss": 18.4598, + "step": 244080 + }, + { + "epoch": 0.4930772431792564, + "grad_norm": 422.30560302734375, + "learning_rate": 6.070608117647359e-06, + "loss": 20.2345, + "step": 244090 + }, + { + "epoch": 0.4930974438119402, + "grad_norm": 369.7128601074219, + "learning_rate": 6.0702671450698005e-06, + "loss": 14.9042, + "step": 244100 + }, + { + "epoch": 0.493117644444624, + "grad_norm": 386.27862548828125, + "learning_rate": 6.069926167275893e-06, + "loss": 14.8961, + "step": 244110 + }, + { + "epoch": 0.49313784507730785, + "grad_norm": 621.8338012695312, + "learning_rate": 6.069585184267292e-06, + "loss": 20.4106, + "step": 244120 + }, + { + "epoch": 0.4931580457099916, + "grad_norm": 261.2494201660156, + "learning_rate": 6.069244196045666e-06, + "loss": 12.0623, + "step": 244130 + }, + { + "epoch": 0.49317824634267543, + "grad_norm": 512.4036254882812, + "learning_rate": 6.068903202612672e-06, + "loss": 22.5381, + "step": 244140 + }, + { + "epoch": 0.49319844697535925, + "grad_norm": 678.3642578125, + "learning_rate": 6.068562203969972e-06, + "loss": 19.9685, + "step": 244150 + }, + { + "epoch": 0.49321864760804307, + "grad_norm": 988.5474243164062, + "learning_rate": 6.068221200119232e-06, + "loss": 12.505, + "step": 244160 + }, + { + "epoch": 0.4932388482407269, + "grad_norm": 586.0654907226562, + "learning_rate": 6.06788019106211e-06, + "loss": 16.9205, + "step": 244170 + }, + { + "epoch": 0.4932590488734107, + "grad_norm": 863.3224487304688, + "learning_rate": 6.067539176800269e-06, + "loss": 13.7263, + "step": 244180 + }, + { + "epoch": 0.49327924950609453, + "grad_norm": 410.1658935546875, + "learning_rate": 6.067198157335372e-06, + "loss": 12.6717, + "step": 244190 + }, + { + "epoch": 0.49329945013877835, + "grad_norm": 1063.82666015625, + "learning_rate": 6.066857132669081e-06, + "loss": 24.6441, + "step": 244200 + }, + { + "epoch": 0.49331965077146217, + "grad_norm": 192.57086181640625, + "learning_rate": 6.066516102803057e-06, + "loss": 22.914, + "step": 244210 + }, + { + "epoch": 0.493339851404146, + "grad_norm": 11.151764869689941, + "learning_rate": 6.066175067738964e-06, + "loss": 14.5301, + "step": 244220 + }, + { + "epoch": 0.4933600520368298, + "grad_norm": 318.1488952636719, + "learning_rate": 6.065834027478462e-06, + "loss": 22.5188, + "step": 244230 + }, + { + "epoch": 0.49338025266951363, + "grad_norm": 526.05078125, + "learning_rate": 6.0654929820232146e-06, + "loss": 11.8208, + "step": 244240 + }, + { + "epoch": 0.49340045330219745, + "grad_norm": 362.418212890625, + "learning_rate": 6.065151931374884e-06, + "loss": 11.3675, + "step": 244250 + }, + { + "epoch": 0.4934206539348812, + "grad_norm": 324.4506530761719, + "learning_rate": 6.0648108755351305e-06, + "loss": 23.5681, + "step": 244260 + }, + { + "epoch": 0.49344085456756503, + "grad_norm": 296.7850646972656, + "learning_rate": 6.06446981450562e-06, + "loss": 16.1006, + "step": 244270 + }, + { + "epoch": 0.49346105520024885, + "grad_norm": 910.4213256835938, + "learning_rate": 6.0641287482880105e-06, + "loss": 24.6717, + "step": 244280 + }, + { + "epoch": 0.4934812558329327, + "grad_norm": 241.55018615722656, + "learning_rate": 6.0637876768839696e-06, + "loss": 20.5909, + "step": 244290 + }, + { + "epoch": 0.4935014564656165, + "grad_norm": 1268.6630859375, + "learning_rate": 6.0634466002951545e-06, + "loss": 44.0809, + "step": 244300 + }, + { + "epoch": 0.4935216570983003, + "grad_norm": 61.69091033935547, + "learning_rate": 6.06310551852323e-06, + "loss": 18.8025, + "step": 244310 + }, + { + "epoch": 0.49354185773098413, + "grad_norm": 571.124755859375, + "learning_rate": 6.0627644315698575e-06, + "loss": 24.0975, + "step": 244320 + }, + { + "epoch": 0.49356205836366795, + "grad_norm": 273.9856872558594, + "learning_rate": 6.062423339436701e-06, + "loss": 26.4877, + "step": 244330 + }, + { + "epoch": 0.4935822589963518, + "grad_norm": 248.83633422851562, + "learning_rate": 6.062082242125422e-06, + "loss": 27.1512, + "step": 244340 + }, + { + "epoch": 0.4936024596290356, + "grad_norm": 507.28875732421875, + "learning_rate": 6.061741139637682e-06, + "loss": 21.4276, + "step": 244350 + }, + { + "epoch": 0.4936226602617194, + "grad_norm": 1421.45849609375, + "learning_rate": 6.061400031975147e-06, + "loss": 22.241, + "step": 244360 + }, + { + "epoch": 0.49364286089440323, + "grad_norm": 259.8501892089844, + "learning_rate": 6.061058919139474e-06, + "loss": 17.8136, + "step": 244370 + }, + { + "epoch": 0.49366306152708705, + "grad_norm": 608.4784545898438, + "learning_rate": 6.060717801132329e-06, + "loss": 24.4676, + "step": 244380 + }, + { + "epoch": 0.4936832621597708, + "grad_norm": 439.1976623535156, + "learning_rate": 6.060376677955375e-06, + "loss": 11.4727, + "step": 244390 + }, + { + "epoch": 0.49370346279245464, + "grad_norm": 355.0526428222656, + "learning_rate": 6.060035549610275e-06, + "loss": 29.9297, + "step": 244400 + }, + { + "epoch": 0.49372366342513846, + "grad_norm": 439.33917236328125, + "learning_rate": 6.0596944160986885e-06, + "loss": 26.6631, + "step": 244410 + }, + { + "epoch": 0.4937438640578223, + "grad_norm": 313.279541015625, + "learning_rate": 6.0593532774222796e-06, + "loss": 15.9866, + "step": 244420 + }, + { + "epoch": 0.4937640646905061, + "grad_norm": 462.3720703125, + "learning_rate": 6.059012133582713e-06, + "loss": 31.3824, + "step": 244430 + }, + { + "epoch": 0.4937842653231899, + "grad_norm": 561.0098876953125, + "learning_rate": 6.058670984581647e-06, + "loss": 17.5214, + "step": 244440 + }, + { + "epoch": 0.49380446595587374, + "grad_norm": 229.17897033691406, + "learning_rate": 6.058329830420749e-06, + "loss": 20.2344, + "step": 244450 + }, + { + "epoch": 0.49382466658855756, + "grad_norm": 624.6414794921875, + "learning_rate": 6.057988671101679e-06, + "loss": 16.3288, + "step": 244460 + }, + { + "epoch": 0.4938448672212414, + "grad_norm": 838.2815551757812, + "learning_rate": 6.057647506626101e-06, + "loss": 24.2822, + "step": 244470 + }, + { + "epoch": 0.4938650678539252, + "grad_norm": 188.3896484375, + "learning_rate": 6.057306336995677e-06, + "loss": 7.962, + "step": 244480 + }, + { + "epoch": 0.493885268486609, + "grad_norm": 261.6092529296875, + "learning_rate": 6.056965162212072e-06, + "loss": 22.7007, + "step": 244490 + }, + { + "epoch": 0.49390546911929284, + "grad_norm": 3.9006917476654053, + "learning_rate": 6.056623982276945e-06, + "loss": 14.9307, + "step": 244500 + }, + { + "epoch": 0.49392566975197666, + "grad_norm": 634.9256591796875, + "learning_rate": 6.05628279719196e-06, + "loss": 25.0206, + "step": 244510 + }, + { + "epoch": 0.4939458703846604, + "grad_norm": 368.6424255371094, + "learning_rate": 6.0559416069587814e-06, + "loss": 11.8245, + "step": 244520 + }, + { + "epoch": 0.49396607101734424, + "grad_norm": 246.29586791992188, + "learning_rate": 6.055600411579072e-06, + "loss": 13.7323, + "step": 244530 + }, + { + "epoch": 0.49398627165002806, + "grad_norm": 645.8884887695312, + "learning_rate": 6.055259211054496e-06, + "loss": 25.4784, + "step": 244540 + }, + { + "epoch": 0.4940064722827119, + "grad_norm": 189.35472106933594, + "learning_rate": 6.0549180053867114e-06, + "loss": 11.4991, + "step": 244550 + }, + { + "epoch": 0.4940266729153957, + "grad_norm": 3.043370008468628, + "learning_rate": 6.054576794577387e-06, + "loss": 14.5078, + "step": 244560 + }, + { + "epoch": 0.4940468735480795, + "grad_norm": 370.82861328125, + "learning_rate": 6.054235578628181e-06, + "loss": 24.295, + "step": 244570 + }, + { + "epoch": 0.49406707418076334, + "grad_norm": 152.5736846923828, + "learning_rate": 6.053894357540761e-06, + "loss": 9.8684, + "step": 244580 + }, + { + "epoch": 0.49408727481344716, + "grad_norm": 544.0726318359375, + "learning_rate": 6.053553131316785e-06, + "loss": 18.8866, + "step": 244590 + }, + { + "epoch": 0.494107475446131, + "grad_norm": 434.5020751953125, + "learning_rate": 6.0532118999579206e-06, + "loss": 22.273, + "step": 244600 + }, + { + "epoch": 0.4941276760788148, + "grad_norm": 438.2577209472656, + "learning_rate": 6.052870663465829e-06, + "loss": 19.9347, + "step": 244610 + }, + { + "epoch": 0.4941478767114986, + "grad_norm": 195.21304321289062, + "learning_rate": 6.0525294218421735e-06, + "loss": 12.3461, + "step": 244620 + }, + { + "epoch": 0.49416807734418244, + "grad_norm": 554.744140625, + "learning_rate": 6.052188175088617e-06, + "loss": 29.516, + "step": 244630 + }, + { + "epoch": 0.4941882779768662, + "grad_norm": 140.444580078125, + "learning_rate": 6.051846923206824e-06, + "loss": 10.9421, + "step": 244640 + }, + { + "epoch": 0.49420847860955003, + "grad_norm": 567.934814453125, + "learning_rate": 6.051505666198454e-06, + "loss": 18.0941, + "step": 244650 + }, + { + "epoch": 0.49422867924223385, + "grad_norm": 302.12542724609375, + "learning_rate": 6.051164404065175e-06, + "loss": 17.2545, + "step": 244660 + }, + { + "epoch": 0.49424887987491767, + "grad_norm": 167.5221405029297, + "learning_rate": 6.050823136808649e-06, + "loss": 16.8512, + "step": 244670 + }, + { + "epoch": 0.4942690805076015, + "grad_norm": 256.09210205078125, + "learning_rate": 6.050481864430536e-06, + "loss": 22.8571, + "step": 244680 + }, + { + "epoch": 0.4942892811402853, + "grad_norm": 413.8460388183594, + "learning_rate": 6.050140586932504e-06, + "loss": 21.3646, + "step": 244690 + }, + { + "epoch": 0.49430948177296913, + "grad_norm": 259.9608459472656, + "learning_rate": 6.049799304316214e-06, + "loss": 18.28, + "step": 244700 + }, + { + "epoch": 0.49432968240565295, + "grad_norm": 309.09161376953125, + "learning_rate": 6.0494580165833275e-06, + "loss": 12.0999, + "step": 244710 + }, + { + "epoch": 0.49434988303833677, + "grad_norm": 35.23161315917969, + "learning_rate": 6.049116723735512e-06, + "loss": 22.8078, + "step": 244720 + }, + { + "epoch": 0.4943700836710206, + "grad_norm": 24.270263671875, + "learning_rate": 6.048775425774426e-06, + "loss": 18.4349, + "step": 244730 + }, + { + "epoch": 0.4943902843037044, + "grad_norm": 253.10581970214844, + "learning_rate": 6.048434122701738e-06, + "loss": 23.2509, + "step": 244740 + }, + { + "epoch": 0.49441048493638823, + "grad_norm": 193.04750061035156, + "learning_rate": 6.048092814519109e-06, + "loss": 14.6667, + "step": 244750 + }, + { + "epoch": 0.49443068556907205, + "grad_norm": 55.17918395996094, + "learning_rate": 6.047751501228203e-06, + "loss": 16.3477, + "step": 244760 + }, + { + "epoch": 0.4944508862017558, + "grad_norm": 168.7065887451172, + "learning_rate": 6.047410182830684e-06, + "loss": 23.7309, + "step": 244770 + }, + { + "epoch": 0.49447108683443963, + "grad_norm": 811.4564208984375, + "learning_rate": 6.047068859328213e-06, + "loss": 25.548, + "step": 244780 + }, + { + "epoch": 0.49449128746712345, + "grad_norm": 322.5340270996094, + "learning_rate": 6.046727530722456e-06, + "loss": 17.0893, + "step": 244790 + }, + { + "epoch": 0.4945114880998073, + "grad_norm": 417.874755859375, + "learning_rate": 6.046386197015076e-06, + "loss": 39.5569, + "step": 244800 + }, + { + "epoch": 0.4945316887324911, + "grad_norm": 485.04205322265625, + "learning_rate": 6.046044858207737e-06, + "loss": 19.9152, + "step": 244810 + }, + { + "epoch": 0.4945518893651749, + "grad_norm": 586.47802734375, + "learning_rate": 6.045703514302101e-06, + "loss": 30.8003, + "step": 244820 + }, + { + "epoch": 0.49457208999785873, + "grad_norm": 0.0, + "learning_rate": 6.045362165299835e-06, + "loss": 14.0122, + "step": 244830 + }, + { + "epoch": 0.49459229063054255, + "grad_norm": 497.6436462402344, + "learning_rate": 6.0450208112026e-06, + "loss": 24.4296, + "step": 244840 + }, + { + "epoch": 0.4946124912632264, + "grad_norm": 107.70179748535156, + "learning_rate": 6.044679452012059e-06, + "loss": 21.6951, + "step": 244850 + }, + { + "epoch": 0.4946326918959102, + "grad_norm": 1035.3358154296875, + "learning_rate": 6.044338087729878e-06, + "loss": 39.922, + "step": 244860 + }, + { + "epoch": 0.494652892528594, + "grad_norm": 615.2496948242188, + "learning_rate": 6.04399671835772e-06, + "loss": 26.6206, + "step": 244870 + }, + { + "epoch": 0.49467309316127783, + "grad_norm": 252.15406799316406, + "learning_rate": 6.043655343897249e-06, + "loss": 14.9972, + "step": 244880 + }, + { + "epoch": 0.49469329379396165, + "grad_norm": 9.638592720031738, + "learning_rate": 6.043313964350126e-06, + "loss": 10.2801, + "step": 244890 + }, + { + "epoch": 0.4947134944266454, + "grad_norm": 297.4844970703125, + "learning_rate": 6.04297257971802e-06, + "loss": 21.5559, + "step": 244900 + }, + { + "epoch": 0.49473369505932924, + "grad_norm": 33.992958068847656, + "learning_rate": 6.0426311900025905e-06, + "loss": 26.3419, + "step": 244910 + }, + { + "epoch": 0.49475389569201306, + "grad_norm": 322.1072998046875, + "learning_rate": 6.042289795205504e-06, + "loss": 23.7903, + "step": 244920 + }, + { + "epoch": 0.4947740963246969, + "grad_norm": 347.6963806152344, + "learning_rate": 6.041948395328423e-06, + "loss": 15.5776, + "step": 244930 + }, + { + "epoch": 0.4947942969573807, + "grad_norm": 481.201416015625, + "learning_rate": 6.041606990373012e-06, + "loss": 18.0799, + "step": 244940 + }, + { + "epoch": 0.4948144975900645, + "grad_norm": 299.6612548828125, + "learning_rate": 6.041265580340935e-06, + "loss": 18.2105, + "step": 244950 + }, + { + "epoch": 0.49483469822274834, + "grad_norm": 0.0, + "learning_rate": 6.040924165233856e-06, + "loss": 14.3527, + "step": 244960 + }, + { + "epoch": 0.49485489885543216, + "grad_norm": 501.0664978027344, + "learning_rate": 6.040582745053438e-06, + "loss": 18.6845, + "step": 244970 + }, + { + "epoch": 0.494875099488116, + "grad_norm": 568.0910034179688, + "learning_rate": 6.040241319801346e-06, + "loss": 19.2887, + "step": 244980 + }, + { + "epoch": 0.4948953001207998, + "grad_norm": 417.7993469238281, + "learning_rate": 6.039899889479246e-06, + "loss": 17.6804, + "step": 244990 + }, + { + "epoch": 0.4949155007534836, + "grad_norm": 128.64581298828125, + "learning_rate": 6.039558454088796e-06, + "loss": 6.8771, + "step": 245000 + }, + { + "epoch": 0.49493570138616744, + "grad_norm": 101.71302032470703, + "learning_rate": 6.039217013631668e-06, + "loss": 13.3712, + "step": 245010 + }, + { + "epoch": 0.49495590201885126, + "grad_norm": 80.38015747070312, + "learning_rate": 6.0388755681095216e-06, + "loss": 19.9883, + "step": 245020 + }, + { + "epoch": 0.494976102651535, + "grad_norm": 143.52117919921875, + "learning_rate": 6.038534117524021e-06, + "loss": 19.8221, + "step": 245030 + }, + { + "epoch": 0.49499630328421884, + "grad_norm": 601.686767578125, + "learning_rate": 6.038192661876832e-06, + "loss": 25.4875, + "step": 245040 + }, + { + "epoch": 0.49501650391690266, + "grad_norm": 464.03424072265625, + "learning_rate": 6.0378512011696155e-06, + "loss": 14.3996, + "step": 245050 + }, + { + "epoch": 0.4950367045495865, + "grad_norm": 484.98480224609375, + "learning_rate": 6.03750973540404e-06, + "loss": 15.8818, + "step": 245060 + }, + { + "epoch": 0.4950569051822703, + "grad_norm": 37.089900970458984, + "learning_rate": 6.037168264581767e-06, + "loss": 22.9769, + "step": 245070 + }, + { + "epoch": 0.4950771058149541, + "grad_norm": 60.080257415771484, + "learning_rate": 6.036826788704463e-06, + "loss": 11.9262, + "step": 245080 + }, + { + "epoch": 0.49509730644763794, + "grad_norm": 528.3023681640625, + "learning_rate": 6.03648530777379e-06, + "loss": 24.5875, + "step": 245090 + }, + { + "epoch": 0.49511750708032176, + "grad_norm": 225.19271850585938, + "learning_rate": 6.036143821791413e-06, + "loss": 10.0997, + "step": 245100 + }, + { + "epoch": 0.4951377077130056, + "grad_norm": 266.2395324707031, + "learning_rate": 6.035802330758997e-06, + "loss": 15.1098, + "step": 245110 + }, + { + "epoch": 0.4951579083456894, + "grad_norm": 207.41989135742188, + "learning_rate": 6.0354608346782075e-06, + "loss": 32.8293, + "step": 245120 + }, + { + "epoch": 0.4951781089783732, + "grad_norm": 154.92733764648438, + "learning_rate": 6.035119333550705e-06, + "loss": 15.8353, + "step": 245130 + }, + { + "epoch": 0.49519830961105704, + "grad_norm": 770.8162231445312, + "learning_rate": 6.034777827378157e-06, + "loss": 31.304, + "step": 245140 + }, + { + "epoch": 0.49521851024374086, + "grad_norm": 2.7887516021728516, + "learning_rate": 6.03443631616223e-06, + "loss": 12.0799, + "step": 245150 + }, + { + "epoch": 0.4952387108764246, + "grad_norm": 568.9775390625, + "learning_rate": 6.034094799904583e-06, + "loss": 24.7265, + "step": 245160 + }, + { + "epoch": 0.49525891150910845, + "grad_norm": 582.31640625, + "learning_rate": 6.0337532786068846e-06, + "loss": 33.4081, + "step": 245170 + }, + { + "epoch": 0.49527911214179227, + "grad_norm": 827.0950317382812, + "learning_rate": 6.033411752270798e-06, + "loss": 16.2784, + "step": 245180 + }, + { + "epoch": 0.4952993127744761, + "grad_norm": 376.9583740234375, + "learning_rate": 6.033070220897988e-06, + "loss": 26.7607, + "step": 245190 + }, + { + "epoch": 0.4953195134071599, + "grad_norm": 850.7732543945312, + "learning_rate": 6.032728684490118e-06, + "loss": 29.897, + "step": 245200 + }, + { + "epoch": 0.4953397140398437, + "grad_norm": 354.1410217285156, + "learning_rate": 6.032387143048853e-06, + "loss": 20.4953, + "step": 245210 + }, + { + "epoch": 0.49535991467252755, + "grad_norm": 787.7244873046875, + "learning_rate": 6.032045596575862e-06, + "loss": 25.6331, + "step": 245220 + }, + { + "epoch": 0.49538011530521137, + "grad_norm": 369.9317626953125, + "learning_rate": 6.031704045072803e-06, + "loss": 12.0731, + "step": 245230 + }, + { + "epoch": 0.4954003159378952, + "grad_norm": 848.404296875, + "learning_rate": 6.031362488541344e-06, + "loss": 21.9786, + "step": 245240 + }, + { + "epoch": 0.495420516570579, + "grad_norm": 169.3268280029297, + "learning_rate": 6.031020926983149e-06, + "loss": 14.8847, + "step": 245250 + }, + { + "epoch": 0.4954407172032628, + "grad_norm": 28.975000381469727, + "learning_rate": 6.030679360399883e-06, + "loss": 20.2139, + "step": 245260 + }, + { + "epoch": 0.49546091783594665, + "grad_norm": 76.87944793701172, + "learning_rate": 6.030337788793212e-06, + "loss": 17.1178, + "step": 245270 + }, + { + "epoch": 0.4954811184686304, + "grad_norm": 118.92074584960938, + "learning_rate": 6.029996212164799e-06, + "loss": 18.049, + "step": 245280 + }, + { + "epoch": 0.49550131910131423, + "grad_norm": 865.7176513671875, + "learning_rate": 6.029654630516308e-06, + "loss": 24.4408, + "step": 245290 + }, + { + "epoch": 0.49552151973399805, + "grad_norm": 629.9857177734375, + "learning_rate": 6.029313043849407e-06, + "loss": 28.9345, + "step": 245300 + }, + { + "epoch": 0.49554172036668187, + "grad_norm": 344.41119384765625, + "learning_rate": 6.02897145216576e-06, + "loss": 19.0488, + "step": 245310 + }, + { + "epoch": 0.4955619209993657, + "grad_norm": 280.0577697753906, + "learning_rate": 6.0286298554670275e-06, + "loss": 20.772, + "step": 245320 + }, + { + "epoch": 0.4955821216320495, + "grad_norm": 757.3346557617188, + "learning_rate": 6.028288253754882e-06, + "loss": 26.6095, + "step": 245330 + }, + { + "epoch": 0.49560232226473333, + "grad_norm": 819.9552001953125, + "learning_rate": 6.02794664703098e-06, + "loss": 20.3762, + "step": 245340 + }, + { + "epoch": 0.49562252289741715, + "grad_norm": 885.9320068359375, + "learning_rate": 6.027605035296994e-06, + "loss": 40.4341, + "step": 245350 + }, + { + "epoch": 0.49564272353010097, + "grad_norm": 553.4578247070312, + "learning_rate": 6.027263418554585e-06, + "loss": 18.8369, + "step": 245360 + }, + { + "epoch": 0.4956629241627848, + "grad_norm": 336.39227294921875, + "learning_rate": 6.026921796805417e-06, + "loss": 24.2733, + "step": 245370 + }, + { + "epoch": 0.4956831247954686, + "grad_norm": 319.5438537597656, + "learning_rate": 6.026580170051158e-06, + "loss": 17.5498, + "step": 245380 + }, + { + "epoch": 0.49570332542815243, + "grad_norm": 568.3425903320312, + "learning_rate": 6.026238538293472e-06, + "loss": 20.4626, + "step": 245390 + }, + { + "epoch": 0.49572352606083625, + "grad_norm": 375.4813537597656, + "learning_rate": 6.025896901534023e-06, + "loss": 17.9613, + "step": 245400 + }, + { + "epoch": 0.49574372669352, + "grad_norm": 345.9901428222656, + "learning_rate": 6.025555259774478e-06, + "loss": 29.2855, + "step": 245410 + }, + { + "epoch": 0.49576392732620383, + "grad_norm": 692.1480102539062, + "learning_rate": 6.025213613016501e-06, + "loss": 21.2442, + "step": 245420 + }, + { + "epoch": 0.49578412795888765, + "grad_norm": 4.171411514282227, + "learning_rate": 6.024871961261756e-06, + "loss": 16.2202, + "step": 245430 + }, + { + "epoch": 0.4958043285915715, + "grad_norm": 748.0377197265625, + "learning_rate": 6.024530304511911e-06, + "loss": 23.804, + "step": 245440 + }, + { + "epoch": 0.4958245292242553, + "grad_norm": 177.20375061035156, + "learning_rate": 6.024188642768628e-06, + "loss": 17.9429, + "step": 245450 + }, + { + "epoch": 0.4958447298569391, + "grad_norm": 311.2812194824219, + "learning_rate": 6.023846976033574e-06, + "loss": 13.1427, + "step": 245460 + }, + { + "epoch": 0.49586493048962293, + "grad_norm": 366.9995422363281, + "learning_rate": 6.0235053043084155e-06, + "loss": 38.7233, + "step": 245470 + }, + { + "epoch": 0.49588513112230675, + "grad_norm": 176.72760009765625, + "learning_rate": 6.023163627594813e-06, + "loss": 24.8769, + "step": 245480 + }, + { + "epoch": 0.4959053317549906, + "grad_norm": 608.5573120117188, + "learning_rate": 6.022821945894439e-06, + "loss": 25.3785, + "step": 245490 + }, + { + "epoch": 0.4959255323876744, + "grad_norm": 260.3470764160156, + "learning_rate": 6.022480259208951e-06, + "loss": 28.7517, + "step": 245500 + }, + { + "epoch": 0.4959457330203582, + "grad_norm": 43.951263427734375, + "learning_rate": 6.022138567540023e-06, + "loss": 16.2127, + "step": 245510 + }, + { + "epoch": 0.49596593365304203, + "grad_norm": 650.9738159179688, + "learning_rate": 6.021796870889311e-06, + "loss": 20.0046, + "step": 245520 + }, + { + "epoch": 0.49598613428572585, + "grad_norm": 903.1184692382812, + "learning_rate": 6.0214551692584875e-06, + "loss": 34.6528, + "step": 245530 + }, + { + "epoch": 0.4960063349184096, + "grad_norm": 209.4825439453125, + "learning_rate": 6.021113462649215e-06, + "loss": 14.7326, + "step": 245540 + }, + { + "epoch": 0.49602653555109344, + "grad_norm": 390.3143615722656, + "learning_rate": 6.020771751063159e-06, + "loss": 22.1797, + "step": 245550 + }, + { + "epoch": 0.49604673618377726, + "grad_norm": 700.5403442382812, + "learning_rate": 6.020430034501986e-06, + "loss": 24.8418, + "step": 245560 + }, + { + "epoch": 0.4960669368164611, + "grad_norm": 308.3287048339844, + "learning_rate": 6.02008831296736e-06, + "loss": 18.195, + "step": 245570 + }, + { + "epoch": 0.4960871374491449, + "grad_norm": 594.5611572265625, + "learning_rate": 6.019746586460947e-06, + "loss": 19.572, + "step": 245580 + }, + { + "epoch": 0.4961073380818287, + "grad_norm": 128.37220764160156, + "learning_rate": 6.019404854984413e-06, + "loss": 18.2383, + "step": 245590 + }, + { + "epoch": 0.49612753871451254, + "grad_norm": 186.79930114746094, + "learning_rate": 6.019063118539425e-06, + "loss": 10.4839, + "step": 245600 + }, + { + "epoch": 0.49614773934719636, + "grad_norm": 349.23187255859375, + "learning_rate": 6.018721377127644e-06, + "loss": 24.1001, + "step": 245610 + }, + { + "epoch": 0.4961679399798802, + "grad_norm": 326.8674621582031, + "learning_rate": 6.018379630750741e-06, + "loss": 13.3707, + "step": 245620 + }, + { + "epoch": 0.496188140612564, + "grad_norm": 289.19384765625, + "learning_rate": 6.018037879410379e-06, + "loss": 21.4503, + "step": 245630 + }, + { + "epoch": 0.4962083412452478, + "grad_norm": 779.3234252929688, + "learning_rate": 6.017696123108223e-06, + "loss": 22.3518, + "step": 245640 + }, + { + "epoch": 0.49622854187793164, + "grad_norm": 441.6877746582031, + "learning_rate": 6.01735436184594e-06, + "loss": 34.47, + "step": 245650 + }, + { + "epoch": 0.49624874251061546, + "grad_norm": 401.6028137207031, + "learning_rate": 6.0170125956251935e-06, + "loss": 14.7449, + "step": 245660 + }, + { + "epoch": 0.4962689431432992, + "grad_norm": 321.62713623046875, + "learning_rate": 6.016670824447653e-06, + "loss": 15.5, + "step": 245670 + }, + { + "epoch": 0.49628914377598304, + "grad_norm": 222.53184509277344, + "learning_rate": 6.0163290483149826e-06, + "loss": 14.4939, + "step": 245680 + }, + { + "epoch": 0.49630934440866686, + "grad_norm": 392.558349609375, + "learning_rate": 6.0159872672288464e-06, + "loss": 16.2634, + "step": 245690 + }, + { + "epoch": 0.4963295450413507, + "grad_norm": 132.4796905517578, + "learning_rate": 6.015645481190912e-06, + "loss": 12.5828, + "step": 245700 + }, + { + "epoch": 0.4963497456740345, + "grad_norm": 474.45721435546875, + "learning_rate": 6.0153036902028435e-06, + "loss": 28.608, + "step": 245710 + }, + { + "epoch": 0.4963699463067183, + "grad_norm": 367.1876525878906, + "learning_rate": 6.01496189426631e-06, + "loss": 28.0048, + "step": 245720 + }, + { + "epoch": 0.49639014693940214, + "grad_norm": 465.5663146972656, + "learning_rate": 6.014620093382975e-06, + "loss": 24.2542, + "step": 245730 + }, + { + "epoch": 0.49641034757208596, + "grad_norm": 300.58331298828125, + "learning_rate": 6.014278287554503e-06, + "loss": 23.917, + "step": 245740 + }, + { + "epoch": 0.4964305482047698, + "grad_norm": 362.2306823730469, + "learning_rate": 6.013936476782563e-06, + "loss": 23.4033, + "step": 245750 + }, + { + "epoch": 0.4964507488374536, + "grad_norm": 233.8451385498047, + "learning_rate": 6.01359466106882e-06, + "loss": 17.0515, + "step": 245760 + }, + { + "epoch": 0.4964709494701374, + "grad_norm": 190.70643615722656, + "learning_rate": 6.013252840414938e-06, + "loss": 15.8295, + "step": 245770 + }, + { + "epoch": 0.49649115010282124, + "grad_norm": 432.47198486328125, + "learning_rate": 6.012911014822586e-06, + "loss": 18.9468, + "step": 245780 + }, + { + "epoch": 0.49651135073550506, + "grad_norm": 445.8046875, + "learning_rate": 6.012569184293427e-06, + "loss": 16.7549, + "step": 245790 + }, + { + "epoch": 0.49653155136818883, + "grad_norm": 206.34117126464844, + "learning_rate": 6.01222734882913e-06, + "loss": 17.8298, + "step": 245800 + }, + { + "epoch": 0.49655175200087265, + "grad_norm": 230.3909454345703, + "learning_rate": 6.0118855084313595e-06, + "loss": 16.422, + "step": 245810 + }, + { + "epoch": 0.49657195263355647, + "grad_norm": 522.9651489257812, + "learning_rate": 6.011543663101781e-06, + "loss": 23.1855, + "step": 245820 + }, + { + "epoch": 0.4965921532662403, + "grad_norm": 408.2215881347656, + "learning_rate": 6.011201812842062e-06, + "loss": 10.4352, + "step": 245830 + }, + { + "epoch": 0.4966123538989241, + "grad_norm": 618.5833740234375, + "learning_rate": 6.010859957653869e-06, + "loss": 28.716, + "step": 245840 + }, + { + "epoch": 0.49663255453160793, + "grad_norm": 220.0326690673828, + "learning_rate": 6.010518097538866e-06, + "loss": 11.8949, + "step": 245850 + }, + { + "epoch": 0.49665275516429175, + "grad_norm": 402.2642822265625, + "learning_rate": 6.010176232498719e-06, + "loss": 22.2256, + "step": 245860 + }, + { + "epoch": 0.49667295579697557, + "grad_norm": 377.92431640625, + "learning_rate": 6.009834362535097e-06, + "loss": 18.0756, + "step": 245870 + }, + { + "epoch": 0.4966931564296594, + "grad_norm": 485.857421875, + "learning_rate": 6.009492487649666e-06, + "loss": 16.4151, + "step": 245880 + }, + { + "epoch": 0.4967133570623432, + "grad_norm": 932.8764038085938, + "learning_rate": 6.00915060784409e-06, + "loss": 23.2241, + "step": 245890 + }, + { + "epoch": 0.49673355769502703, + "grad_norm": 539.0462036132812, + "learning_rate": 6.008808723120035e-06, + "loss": 22.4672, + "step": 245900 + }, + { + "epoch": 0.49675375832771085, + "grad_norm": 514.6181030273438, + "learning_rate": 6.0084668334791695e-06, + "loss": 13.4019, + "step": 245910 + }, + { + "epoch": 0.4967739589603946, + "grad_norm": 276.28265380859375, + "learning_rate": 6.0081249389231615e-06, + "loss": 15.6526, + "step": 245920 + }, + { + "epoch": 0.49679415959307843, + "grad_norm": 175.6931610107422, + "learning_rate": 6.00778303945367e-06, + "loss": 11.918, + "step": 245930 + }, + { + "epoch": 0.49681436022576225, + "grad_norm": 493.6187438964844, + "learning_rate": 6.007441135072371e-06, + "loss": 36.7857, + "step": 245940 + }, + { + "epoch": 0.4968345608584461, + "grad_norm": 29.675748825073242, + "learning_rate": 6.007099225780922e-06, + "loss": 26.3928, + "step": 245950 + }, + { + "epoch": 0.4968547614911299, + "grad_norm": 336.9743957519531, + "learning_rate": 6.0067573115809965e-06, + "loss": 29.182, + "step": 245960 + }, + { + "epoch": 0.4968749621238137, + "grad_norm": 319.0456237792969, + "learning_rate": 6.006415392474256e-06, + "loss": 22.9448, + "step": 245970 + }, + { + "epoch": 0.49689516275649753, + "grad_norm": 780.112548828125, + "learning_rate": 6.00607346846237e-06, + "loss": 29.6289, + "step": 245980 + }, + { + "epoch": 0.49691536338918135, + "grad_norm": 108.16007232666016, + "learning_rate": 6.005731539547004e-06, + "loss": 23.4389, + "step": 245990 + }, + { + "epoch": 0.4969355640218652, + "grad_norm": 53.6325569152832, + "learning_rate": 6.005389605729824e-06, + "loss": 13.6468, + "step": 246000 + }, + { + "epoch": 0.496955764654549, + "grad_norm": 2831.445556640625, + "learning_rate": 6.005047667012498e-06, + "loss": 34.1367, + "step": 246010 + }, + { + "epoch": 0.4969759652872328, + "grad_norm": 519.7115478515625, + "learning_rate": 6.00470572339669e-06, + "loss": 36.3964, + "step": 246020 + }, + { + "epoch": 0.49699616591991663, + "grad_norm": 222.3622589111328, + "learning_rate": 6.004363774884069e-06, + "loss": 11.4347, + "step": 246030 + }, + { + "epoch": 0.49701636655260045, + "grad_norm": 364.50537109375, + "learning_rate": 6.0040218214763e-06, + "loss": 27.0293, + "step": 246040 + }, + { + "epoch": 0.4970365671852842, + "grad_norm": 283.5711669921875, + "learning_rate": 6.003679863175052e-06, + "loss": 14.0792, + "step": 246050 + }, + { + "epoch": 0.49705676781796804, + "grad_norm": 416.9042053222656, + "learning_rate": 6.003337899981989e-06, + "loss": 14.5198, + "step": 246060 + }, + { + "epoch": 0.49707696845065186, + "grad_norm": 477.2958679199219, + "learning_rate": 6.002995931898779e-06, + "loss": 25.2685, + "step": 246070 + }, + { + "epoch": 0.4970971690833357, + "grad_norm": 440.80322265625, + "learning_rate": 6.00265395892709e-06, + "loss": 21.909, + "step": 246080 + }, + { + "epoch": 0.4971173697160195, + "grad_norm": 544.5230102539062, + "learning_rate": 6.002311981068584e-06, + "loss": 27.2489, + "step": 246090 + }, + { + "epoch": 0.4971375703487033, + "grad_norm": 136.77589416503906, + "learning_rate": 6.001969998324932e-06, + "loss": 31.3055, + "step": 246100 + }, + { + "epoch": 0.49715777098138714, + "grad_norm": 658.7988891601562, + "learning_rate": 6.0016280106978e-06, + "loss": 31.7758, + "step": 246110 + }, + { + "epoch": 0.49717797161407096, + "grad_norm": 379.6677551269531, + "learning_rate": 6.001286018188856e-06, + "loss": 13.2379, + "step": 246120 + }, + { + "epoch": 0.4971981722467548, + "grad_norm": 260.6626892089844, + "learning_rate": 6.000944020799764e-06, + "loss": 8.8193, + "step": 246130 + }, + { + "epoch": 0.4972183728794386, + "grad_norm": 377.12347412109375, + "learning_rate": 6.000602018532193e-06, + "loss": 26.2481, + "step": 246140 + }, + { + "epoch": 0.4972385735121224, + "grad_norm": 6.269888877868652, + "learning_rate": 6.000260011387809e-06, + "loss": 17.3226, + "step": 246150 + }, + { + "epoch": 0.49725877414480624, + "grad_norm": 206.4148406982422, + "learning_rate": 5.999917999368278e-06, + "loss": 22.2819, + "step": 246160 + }, + { + "epoch": 0.49727897477749006, + "grad_norm": 248.40609741210938, + "learning_rate": 5.999575982475269e-06, + "loss": 26.5883, + "step": 246170 + }, + { + "epoch": 0.4972991754101738, + "grad_norm": 159.02389526367188, + "learning_rate": 5.999233960710447e-06, + "loss": 15.2111, + "step": 246180 + }, + { + "epoch": 0.49731937604285764, + "grad_norm": 361.6206359863281, + "learning_rate": 5.99889193407548e-06, + "loss": 13.3076, + "step": 246190 + }, + { + "epoch": 0.49733957667554146, + "grad_norm": 463.2092590332031, + "learning_rate": 5.9985499025720354e-06, + "loss": 16.4278, + "step": 246200 + }, + { + "epoch": 0.4973597773082253, + "grad_norm": 1122.7664794921875, + "learning_rate": 5.998207866201781e-06, + "loss": 51.8289, + "step": 246210 + }, + { + "epoch": 0.4973799779409091, + "grad_norm": 850.292236328125, + "learning_rate": 5.99786582496638e-06, + "loss": 25.6876, + "step": 246220 + }, + { + "epoch": 0.4974001785735929, + "grad_norm": 540.2310180664062, + "learning_rate": 5.9975237788675034e-06, + "loss": 20.2985, + "step": 246230 + }, + { + "epoch": 0.49742037920627674, + "grad_norm": 601.8073120117188, + "learning_rate": 5.997181727906816e-06, + "loss": 27.2044, + "step": 246240 + }, + { + "epoch": 0.49744057983896056, + "grad_norm": 1.2072869539260864, + "learning_rate": 5.996839672085986e-06, + "loss": 17.3348, + "step": 246250 + }, + { + "epoch": 0.4974607804716444, + "grad_norm": 472.5961608886719, + "learning_rate": 5.996497611406682e-06, + "loss": 20.9375, + "step": 246260 + }, + { + "epoch": 0.4974809811043282, + "grad_norm": 66.0411605834961, + "learning_rate": 5.996155545870566e-06, + "loss": 7.7181, + "step": 246270 + }, + { + "epoch": 0.497501181737012, + "grad_norm": 386.55584716796875, + "learning_rate": 5.995813475479313e-06, + "loss": 10.0573, + "step": 246280 + }, + { + "epoch": 0.49752138236969584, + "grad_norm": 406.94683837890625, + "learning_rate": 5.995471400234584e-06, + "loss": 13.8961, + "step": 246290 + }, + { + "epoch": 0.49754158300237966, + "grad_norm": 317.17633056640625, + "learning_rate": 5.995129320138047e-06, + "loss": 11.3014, + "step": 246300 + }, + { + "epoch": 0.4975617836350634, + "grad_norm": 399.6728820800781, + "learning_rate": 5.994787235191372e-06, + "loss": 13.9557, + "step": 246310 + }, + { + "epoch": 0.49758198426774725, + "grad_norm": 367.6829833984375, + "learning_rate": 5.994445145396223e-06, + "loss": 23.8694, + "step": 246320 + }, + { + "epoch": 0.49760218490043107, + "grad_norm": 453.9792785644531, + "learning_rate": 5.994103050754271e-06, + "loss": 22.6538, + "step": 246330 + }, + { + "epoch": 0.4976223855331149, + "grad_norm": 559.6007690429688, + "learning_rate": 5.99376095126718e-06, + "loss": 26.9563, + "step": 246340 + }, + { + "epoch": 0.4976425861657987, + "grad_norm": 649.7808227539062, + "learning_rate": 5.993418846936619e-06, + "loss": 35.5436, + "step": 246350 + }, + { + "epoch": 0.4976627867984825, + "grad_norm": 264.1085510253906, + "learning_rate": 5.993076737764254e-06, + "loss": 28.9309, + "step": 246360 + }, + { + "epoch": 0.49768298743116635, + "grad_norm": 277.17120361328125, + "learning_rate": 5.9927346237517554e-06, + "loss": 17.9807, + "step": 246370 + }, + { + "epoch": 0.49770318806385017, + "grad_norm": 223.109130859375, + "learning_rate": 5.992392504900786e-06, + "loss": 11.1396, + "step": 246380 + }, + { + "epoch": 0.497723388696534, + "grad_norm": 549.0020751953125, + "learning_rate": 5.9920503812130196e-06, + "loss": 20.5239, + "step": 246390 + }, + { + "epoch": 0.4977435893292178, + "grad_norm": 256.79583740234375, + "learning_rate": 5.991708252690117e-06, + "loss": 22.6415, + "step": 246400 + }, + { + "epoch": 0.4977637899619016, + "grad_norm": 874.1477661132812, + "learning_rate": 5.991366119333749e-06, + "loss": 19.1742, + "step": 246410 + }, + { + "epoch": 0.49778399059458545, + "grad_norm": 209.1161346435547, + "learning_rate": 5.991023981145585e-06, + "loss": 16.0386, + "step": 246420 + }, + { + "epoch": 0.4978041912272692, + "grad_norm": 412.027587890625, + "learning_rate": 5.990681838127287e-06, + "loss": 10.3335, + "step": 246430 + }, + { + "epoch": 0.49782439185995303, + "grad_norm": 180.9374542236328, + "learning_rate": 5.990339690280528e-06, + "loss": 15.9683, + "step": 246440 + }, + { + "epoch": 0.49784459249263685, + "grad_norm": 772.1251831054688, + "learning_rate": 5.989997537606973e-06, + "loss": 27.9863, + "step": 246450 + }, + { + "epoch": 0.49786479312532067, + "grad_norm": 225.46536254882812, + "learning_rate": 5.9896553801082906e-06, + "loss": 14.2124, + "step": 246460 + }, + { + "epoch": 0.4978849937580045, + "grad_norm": 174.2469482421875, + "learning_rate": 5.989313217786146e-06, + "loss": 18.991, + "step": 246470 + }, + { + "epoch": 0.4979051943906883, + "grad_norm": 240.0699462890625, + "learning_rate": 5.988971050642211e-06, + "loss": 22.1914, + "step": 246480 + }, + { + "epoch": 0.49792539502337213, + "grad_norm": 232.75013732910156, + "learning_rate": 5.98862887867815e-06, + "loss": 20.932, + "step": 246490 + }, + { + "epoch": 0.49794559565605595, + "grad_norm": 236.2301788330078, + "learning_rate": 5.988286701895631e-06, + "loss": 29.7626, + "step": 246500 + }, + { + "epoch": 0.49796579628873977, + "grad_norm": 1126.671875, + "learning_rate": 5.987944520296324e-06, + "loss": 17.8402, + "step": 246510 + }, + { + "epoch": 0.4979859969214236, + "grad_norm": 151.82958984375, + "learning_rate": 5.987602333881894e-06, + "loss": 6.5217, + "step": 246520 + }, + { + "epoch": 0.4980061975541074, + "grad_norm": 62.12467956542969, + "learning_rate": 5.987260142654013e-06, + "loss": 18.377, + "step": 246530 + }, + { + "epoch": 0.49802639818679123, + "grad_norm": 371.71392822265625, + "learning_rate": 5.986917946614341e-06, + "loss": 10.6581, + "step": 246540 + }, + { + "epoch": 0.49804659881947505, + "grad_norm": 400.0055847167969, + "learning_rate": 5.986575745764553e-06, + "loss": 22.4769, + "step": 246550 + }, + { + "epoch": 0.4980667994521588, + "grad_norm": 335.1949157714844, + "learning_rate": 5.986233540106315e-06, + "loss": 21.2484, + "step": 246560 + }, + { + "epoch": 0.49808700008484263, + "grad_norm": 464.89678955078125, + "learning_rate": 5.985891329641294e-06, + "loss": 23.757, + "step": 246570 + }, + { + "epoch": 0.49810720071752645, + "grad_norm": 457.4198303222656, + "learning_rate": 5.985549114371158e-06, + "loss": 27.9353, + "step": 246580 + }, + { + "epoch": 0.4981274013502103, + "grad_norm": 350.82025146484375, + "learning_rate": 5.985206894297575e-06, + "loss": 25.8677, + "step": 246590 + }, + { + "epoch": 0.4981476019828941, + "grad_norm": 319.5579833984375, + "learning_rate": 5.984864669422214e-06, + "loss": 21.2135, + "step": 246600 + }, + { + "epoch": 0.4981678026155779, + "grad_norm": 274.120849609375, + "learning_rate": 5.9845224397467415e-06, + "loss": 20.1567, + "step": 246610 + }, + { + "epoch": 0.49818800324826173, + "grad_norm": 688.2853393554688, + "learning_rate": 5.984180205272826e-06, + "loss": 36.7907, + "step": 246620 + }, + { + "epoch": 0.49820820388094555, + "grad_norm": 60.4741096496582, + "learning_rate": 5.9838379660021354e-06, + "loss": 22.8639, + "step": 246630 + }, + { + "epoch": 0.4982284045136294, + "grad_norm": 178.92361450195312, + "learning_rate": 5.983495721936337e-06, + "loss": 18.5735, + "step": 246640 + }, + { + "epoch": 0.4982486051463132, + "grad_norm": 42.693153381347656, + "learning_rate": 5.9831534730771e-06, + "loss": 18.6057, + "step": 246650 + }, + { + "epoch": 0.498268805778997, + "grad_norm": 119.7516860961914, + "learning_rate": 5.982811219426095e-06, + "loss": 8.0343, + "step": 246660 + }, + { + "epoch": 0.49828900641168083, + "grad_norm": 23.96198844909668, + "learning_rate": 5.982468960984984e-06, + "loss": 23.6935, + "step": 246670 + }, + { + "epoch": 0.49830920704436465, + "grad_norm": 732.6522827148438, + "learning_rate": 5.9821266977554395e-06, + "loss": 24.4514, + "step": 246680 + }, + { + "epoch": 0.4983294076770484, + "grad_norm": 169.86744689941406, + "learning_rate": 5.981784429739129e-06, + "loss": 8.1248, + "step": 246690 + }, + { + "epoch": 0.49834960830973224, + "grad_norm": 861.8601684570312, + "learning_rate": 5.98144215693772e-06, + "loss": 22.5713, + "step": 246700 + }, + { + "epoch": 0.49836980894241606, + "grad_norm": 105.1708984375, + "learning_rate": 5.981099879352882e-06, + "loss": 11.4246, + "step": 246710 + }, + { + "epoch": 0.4983900095750999, + "grad_norm": 41.55343246459961, + "learning_rate": 5.9807575969862796e-06, + "loss": 12.4922, + "step": 246720 + }, + { + "epoch": 0.4984102102077837, + "grad_norm": 314.224853515625, + "learning_rate": 5.980415309839586e-06, + "loss": 21.6313, + "step": 246730 + }, + { + "epoch": 0.4984304108404675, + "grad_norm": 238.9948272705078, + "learning_rate": 5.9800730179144665e-06, + "loss": 18.5021, + "step": 246740 + }, + { + "epoch": 0.49845061147315134, + "grad_norm": 353.32952880859375, + "learning_rate": 5.979730721212589e-06, + "loss": 20.3609, + "step": 246750 + }, + { + "epoch": 0.49847081210583516, + "grad_norm": 521.1556396484375, + "learning_rate": 5.979388419735625e-06, + "loss": 12.2863, + "step": 246760 + }, + { + "epoch": 0.498491012738519, + "grad_norm": 656.0430297851562, + "learning_rate": 5.979046113485237e-06, + "loss": 21.5405, + "step": 246770 + }, + { + "epoch": 0.4985112133712028, + "grad_norm": 234.09738159179688, + "learning_rate": 5.978703802463101e-06, + "loss": 22.0934, + "step": 246780 + }, + { + "epoch": 0.4985314140038866, + "grad_norm": 338.0233459472656, + "learning_rate": 5.9783614866708785e-06, + "loss": 27.4046, + "step": 246790 + }, + { + "epoch": 0.49855161463657044, + "grad_norm": 473.8854064941406, + "learning_rate": 5.978019166110242e-06, + "loss": 16.6194, + "step": 246800 + }, + { + "epoch": 0.49857181526925426, + "grad_norm": 235.27053833007812, + "learning_rate": 5.977676840782858e-06, + "loss": 29.6591, + "step": 246810 + }, + { + "epoch": 0.498592015901938, + "grad_norm": 297.90838623046875, + "learning_rate": 5.977334510690397e-06, + "loss": 21.9008, + "step": 246820 + }, + { + "epoch": 0.49861221653462184, + "grad_norm": 1039.3126220703125, + "learning_rate": 5.9769921758345254e-06, + "loss": 26.4737, + "step": 246830 + }, + { + "epoch": 0.49863241716730566, + "grad_norm": 552.1608276367188, + "learning_rate": 5.976649836216912e-06, + "loss": 15.7347, + "step": 246840 + }, + { + "epoch": 0.4986526177999895, + "grad_norm": 323.9834289550781, + "learning_rate": 5.976307491839226e-06, + "loss": 31.6713, + "step": 246850 + }, + { + "epoch": 0.4986728184326733, + "grad_norm": 341.1391906738281, + "learning_rate": 5.975965142703135e-06, + "loss": 37.8399, + "step": 246860 + }, + { + "epoch": 0.4986930190653571, + "grad_norm": 399.14068603515625, + "learning_rate": 5.97562278881031e-06, + "loss": 16.2871, + "step": 246870 + }, + { + "epoch": 0.49871321969804094, + "grad_norm": 130.58828735351562, + "learning_rate": 5.975280430162416e-06, + "loss": 21.46, + "step": 246880 + }, + { + "epoch": 0.49873342033072476, + "grad_norm": 412.4732666015625, + "learning_rate": 5.974938066761124e-06, + "loss": 17.2512, + "step": 246890 + }, + { + "epoch": 0.4987536209634086, + "grad_norm": 516.6444091796875, + "learning_rate": 5.974595698608103e-06, + "loss": 10.0092, + "step": 246900 + }, + { + "epoch": 0.4987738215960924, + "grad_norm": 509.6756896972656, + "learning_rate": 5.974253325705021e-06, + "loss": 16.4154, + "step": 246910 + }, + { + "epoch": 0.4987940222287762, + "grad_norm": 544.9634399414062, + "learning_rate": 5.973910948053545e-06, + "loss": 231.9874, + "step": 246920 + }, + { + "epoch": 0.49881422286146004, + "grad_norm": 778.5720825195312, + "learning_rate": 5.973568565655345e-06, + "loss": 32.9222, + "step": 246930 + }, + { + "epoch": 0.49883442349414386, + "grad_norm": 67.91162109375, + "learning_rate": 5.973226178512093e-06, + "loss": 11.5947, + "step": 246940 + }, + { + "epoch": 0.49885462412682763, + "grad_norm": 171.77670288085938, + "learning_rate": 5.972883786625452e-06, + "loss": 20.4505, + "step": 246950 + }, + { + "epoch": 0.49887482475951145, + "grad_norm": 252.06382751464844, + "learning_rate": 5.972541389997093e-06, + "loss": 28.5218, + "step": 246960 + }, + { + "epoch": 0.49889502539219527, + "grad_norm": 397.5555114746094, + "learning_rate": 5.972198988628686e-06, + "loss": 18.3177, + "step": 246970 + }, + { + "epoch": 0.4989152260248791, + "grad_norm": 260.4038391113281, + "learning_rate": 5.9718565825219e-06, + "loss": 15.8065, + "step": 246980 + }, + { + "epoch": 0.4989354266575629, + "grad_norm": 368.58477783203125, + "learning_rate": 5.971514171678401e-06, + "loss": 14.7694, + "step": 246990 + }, + { + "epoch": 0.49895562729024673, + "grad_norm": 570.1238403320312, + "learning_rate": 5.97117175609986e-06, + "loss": 22.3038, + "step": 247000 + }, + { + "epoch": 0.49897582792293055, + "grad_norm": 369.3289489746094, + "learning_rate": 5.970829335787946e-06, + "loss": 14.8231, + "step": 247010 + }, + { + "epoch": 0.49899602855561437, + "grad_norm": 505.1964416503906, + "learning_rate": 5.9704869107443285e-06, + "loss": 25.5647, + "step": 247020 + }, + { + "epoch": 0.4990162291882982, + "grad_norm": 618.31396484375, + "learning_rate": 5.970144480970676e-06, + "loss": 19.858, + "step": 247030 + }, + { + "epoch": 0.499036429820982, + "grad_norm": 393.52008056640625, + "learning_rate": 5.969802046468655e-06, + "loss": 29.8178, + "step": 247040 + }, + { + "epoch": 0.49905663045366583, + "grad_norm": 172.65785217285156, + "learning_rate": 5.969459607239938e-06, + "loss": 9.4711, + "step": 247050 + }, + { + "epoch": 0.49907683108634965, + "grad_norm": 759.6141357421875, + "learning_rate": 5.969117163286191e-06, + "loss": 46.934, + "step": 247060 + }, + { + "epoch": 0.4990970317190334, + "grad_norm": 377.9792785644531, + "learning_rate": 5.968774714609086e-06, + "loss": 29.1313, + "step": 247070 + }, + { + "epoch": 0.49911723235171723, + "grad_norm": 218.08351135253906, + "learning_rate": 5.96843226121029e-06, + "loss": 13.6984, + "step": 247080 + }, + { + "epoch": 0.49913743298440105, + "grad_norm": 219.00875854492188, + "learning_rate": 5.968089803091471e-06, + "loss": 44.8839, + "step": 247090 + }, + { + "epoch": 0.4991576336170849, + "grad_norm": 50.55318069458008, + "learning_rate": 5.967747340254303e-06, + "loss": 19.8831, + "step": 247100 + }, + { + "epoch": 0.4991778342497687, + "grad_norm": 2.389132499694824, + "learning_rate": 5.967404872700449e-06, + "loss": 23.1413, + "step": 247110 + }, + { + "epoch": 0.4991980348824525, + "grad_norm": 376.08905029296875, + "learning_rate": 5.967062400431583e-06, + "loss": 21.1722, + "step": 247120 + }, + { + "epoch": 0.49921823551513633, + "grad_norm": 579.8134155273438, + "learning_rate": 5.96671992344937e-06, + "loss": 17.7002, + "step": 247130 + }, + { + "epoch": 0.49923843614782015, + "grad_norm": 359.7259826660156, + "learning_rate": 5.966377441755482e-06, + "loss": 24.0683, + "step": 247140 + }, + { + "epoch": 0.499258636780504, + "grad_norm": 246.16751098632812, + "learning_rate": 5.966034955351588e-06, + "loss": 19.4919, + "step": 247150 + }, + { + "epoch": 0.4992788374131878, + "grad_norm": 211.00054931640625, + "learning_rate": 5.965692464239358e-06, + "loss": 22.6834, + "step": 247160 + }, + { + "epoch": 0.4992990380458716, + "grad_norm": 0.9173119068145752, + "learning_rate": 5.965349968420458e-06, + "loss": 14.9866, + "step": 247170 + }, + { + "epoch": 0.49931923867855543, + "grad_norm": 418.03851318359375, + "learning_rate": 5.965007467896561e-06, + "loss": 25.7016, + "step": 247180 + }, + { + "epoch": 0.49933943931123925, + "grad_norm": 340.6460876464844, + "learning_rate": 5.9646649626693335e-06, + "loss": 11.2745, + "step": 247190 + }, + { + "epoch": 0.499359639943923, + "grad_norm": 226.1961669921875, + "learning_rate": 5.964322452740445e-06, + "loss": 27.3938, + "step": 247200 + }, + { + "epoch": 0.49937984057660684, + "grad_norm": 790.119384765625, + "learning_rate": 5.963979938111569e-06, + "loss": 28.7273, + "step": 247210 + }, + { + "epoch": 0.49940004120929066, + "grad_norm": 382.8558654785156, + "learning_rate": 5.9636374187843686e-06, + "loss": 29.2865, + "step": 247220 + }, + { + "epoch": 0.4994202418419745, + "grad_norm": 1164.3673095703125, + "learning_rate": 5.963294894760518e-06, + "loss": 22.1883, + "step": 247230 + }, + { + "epoch": 0.4994404424746583, + "grad_norm": 700.4686889648438, + "learning_rate": 5.962952366041685e-06, + "loss": 16.1192, + "step": 247240 + }, + { + "epoch": 0.4994606431073421, + "grad_norm": 296.78546142578125, + "learning_rate": 5.962609832629538e-06, + "loss": 14.6575, + "step": 247250 + }, + { + "epoch": 0.49948084374002594, + "grad_norm": 8.724777221679688, + "learning_rate": 5.962267294525747e-06, + "loss": 26.685, + "step": 247260 + }, + { + "epoch": 0.49950104437270976, + "grad_norm": 466.71270751953125, + "learning_rate": 5.961924751731985e-06, + "loss": 27.2313, + "step": 247270 + }, + { + "epoch": 0.4995212450053936, + "grad_norm": 497.3843078613281, + "learning_rate": 5.961582204249915e-06, + "loss": 30.4072, + "step": 247280 + }, + { + "epoch": 0.4995414456380774, + "grad_norm": 196.78329467773438, + "learning_rate": 5.961239652081211e-06, + "loss": 13.2455, + "step": 247290 + }, + { + "epoch": 0.4995616462707612, + "grad_norm": 404.5957946777344, + "learning_rate": 5.960897095227541e-06, + "loss": 12.8591, + "step": 247300 + }, + { + "epoch": 0.49958184690344504, + "grad_norm": 254.23377990722656, + "learning_rate": 5.960554533690576e-06, + "loss": 27.1197, + "step": 247310 + }, + { + "epoch": 0.49960204753612886, + "grad_norm": 0.0, + "learning_rate": 5.9602119674719846e-06, + "loss": 27.6494, + "step": 247320 + }, + { + "epoch": 0.4996222481688126, + "grad_norm": 210.02853393554688, + "learning_rate": 5.959869396573435e-06, + "loss": 12.8636, + "step": 247330 + }, + { + "epoch": 0.49964244880149644, + "grad_norm": 314.3268737792969, + "learning_rate": 5.959526820996602e-06, + "loss": 17.5871, + "step": 247340 + }, + { + "epoch": 0.49966264943418026, + "grad_norm": 675.576904296875, + "learning_rate": 5.959184240743149e-06, + "loss": 21.24, + "step": 247350 + }, + { + "epoch": 0.4996828500668641, + "grad_norm": 470.36102294921875, + "learning_rate": 5.958841655814749e-06, + "loss": 9.8373, + "step": 247360 + }, + { + "epoch": 0.4997030506995479, + "grad_norm": 463.9069519042969, + "learning_rate": 5.958499066213071e-06, + "loss": 18.167, + "step": 247370 + }, + { + "epoch": 0.4997232513322317, + "grad_norm": 44.60001754760742, + "learning_rate": 5.958156471939783e-06, + "loss": 18.1484, + "step": 247380 + }, + { + "epoch": 0.49974345196491554, + "grad_norm": 105.177490234375, + "learning_rate": 5.95781387299656e-06, + "loss": 23.1097, + "step": 247390 + }, + { + "epoch": 0.49976365259759936, + "grad_norm": 214.38209533691406, + "learning_rate": 5.957471269385065e-06, + "loss": 13.2777, + "step": 247400 + }, + { + "epoch": 0.4997838532302832, + "grad_norm": 262.89080810546875, + "learning_rate": 5.957128661106973e-06, + "loss": 32.9925, + "step": 247410 + }, + { + "epoch": 0.499804053862967, + "grad_norm": 761.3569946289062, + "learning_rate": 5.956786048163951e-06, + "loss": 35.6258, + "step": 247420 + }, + { + "epoch": 0.4998242544956508, + "grad_norm": 776.53662109375, + "learning_rate": 5.9564434305576726e-06, + "loss": 10.4372, + "step": 247430 + }, + { + "epoch": 0.49984445512833464, + "grad_norm": 380.9548034667969, + "learning_rate": 5.956100808289802e-06, + "loss": 8.5099, + "step": 247440 + }, + { + "epoch": 0.49986465576101846, + "grad_norm": 481.3288269042969, + "learning_rate": 5.955758181362012e-06, + "loss": 23.8243, + "step": 247450 + }, + { + "epoch": 0.4998848563937022, + "grad_norm": 415.660888671875, + "learning_rate": 5.955415549775975e-06, + "loss": 34.6802, + "step": 247460 + }, + { + "epoch": 0.49990505702638605, + "grad_norm": 307.20465087890625, + "learning_rate": 5.955072913533357e-06, + "loss": 26.1964, + "step": 247470 + }, + { + "epoch": 0.49992525765906987, + "grad_norm": 128.6803741455078, + "learning_rate": 5.954730272635829e-06, + "loss": 19.4235, + "step": 247480 + }, + { + "epoch": 0.4999454582917537, + "grad_norm": 169.08700561523438, + "learning_rate": 5.954387627085061e-06, + "loss": 13.3309, + "step": 247490 + }, + { + "epoch": 0.4999656589244375, + "grad_norm": 7.09989070892334, + "learning_rate": 5.954044976882725e-06, + "loss": 10.8221, + "step": 247500 + }, + { + "epoch": 0.4999858595571213, + "grad_norm": 669.5408325195312, + "learning_rate": 5.953702322030489e-06, + "loss": 12.0256, + "step": 247510 + }, + { + "epoch": 0.5000060601898051, + "grad_norm": 289.7550964355469, + "learning_rate": 5.9533596625300224e-06, + "loss": 12.2125, + "step": 247520 + }, + { + "epoch": 0.5000262608224889, + "grad_norm": 31.32509422302246, + "learning_rate": 5.9530169983829974e-06, + "loss": 17.0451, + "step": 247530 + }, + { + "epoch": 0.5000464614551727, + "grad_norm": 276.2655334472656, + "learning_rate": 5.952674329591083e-06, + "loss": 13.5936, + "step": 247540 + }, + { + "epoch": 0.5000666620878566, + "grad_norm": 341.00665283203125, + "learning_rate": 5.952331656155951e-06, + "loss": 18.6661, + "step": 247550 + }, + { + "epoch": 0.5000868627205404, + "grad_norm": 1379.0341796875, + "learning_rate": 5.951988978079268e-06, + "loss": 19.0653, + "step": 247560 + }, + { + "epoch": 0.5001070633532242, + "grad_norm": 607.9564208984375, + "learning_rate": 5.951646295362706e-06, + "loss": 22.1378, + "step": 247570 + }, + { + "epoch": 0.500127263985908, + "grad_norm": 489.8138732910156, + "learning_rate": 5.951303608007936e-06, + "loss": 27.8729, + "step": 247580 + }, + { + "epoch": 0.5001474646185918, + "grad_norm": 111.98165130615234, + "learning_rate": 5.950960916016629e-06, + "loss": 21.304, + "step": 247590 + }, + { + "epoch": 0.5001676652512757, + "grad_norm": 435.2226257324219, + "learning_rate": 5.950618219390451e-06, + "loss": 19.9937, + "step": 247600 + }, + { + "epoch": 0.5001878658839595, + "grad_norm": 249.712646484375, + "learning_rate": 5.9502755181310774e-06, + "loss": 12.5553, + "step": 247610 + }, + { + "epoch": 0.5002080665166433, + "grad_norm": 714.4030151367188, + "learning_rate": 5.949932812240176e-06, + "loss": 14.9255, + "step": 247620 + }, + { + "epoch": 0.5002282671493271, + "grad_norm": 16.345062255859375, + "learning_rate": 5.949590101719416e-06, + "loss": 18.0893, + "step": 247630 + }, + { + "epoch": 0.5002484677820109, + "grad_norm": 10.792337417602539, + "learning_rate": 5.949247386570471e-06, + "loss": 27.6014, + "step": 247640 + }, + { + "epoch": 0.5002686684146948, + "grad_norm": 188.4339599609375, + "learning_rate": 5.948904666795007e-06, + "loss": 22.4003, + "step": 247650 + }, + { + "epoch": 0.5002888690473786, + "grad_norm": 235.25634765625, + "learning_rate": 5.948561942394698e-06, + "loss": 12.6585, + "step": 247660 + }, + { + "epoch": 0.5003090696800624, + "grad_norm": 179.9253692626953, + "learning_rate": 5.948219213371212e-06, + "loss": 20.8502, + "step": 247670 + }, + { + "epoch": 0.5003292703127462, + "grad_norm": 292.9441833496094, + "learning_rate": 5.9478764797262225e-06, + "loss": 14.198, + "step": 247680 + }, + { + "epoch": 0.50034947094543, + "grad_norm": 399.9299621582031, + "learning_rate": 5.947533741461398e-06, + "loss": 24.5034, + "step": 247690 + }, + { + "epoch": 0.5003696715781139, + "grad_norm": 0.9099647402763367, + "learning_rate": 5.947190998578407e-06, + "loss": 18.4859, + "step": 247700 + }, + { + "epoch": 0.5003898722107977, + "grad_norm": 853.4749755859375, + "learning_rate": 5.946848251078924e-06, + "loss": 29.3719, + "step": 247710 + }, + { + "epoch": 0.5004100728434815, + "grad_norm": 336.0867614746094, + "learning_rate": 5.946505498964616e-06, + "loss": 9.8737, + "step": 247720 + }, + { + "epoch": 0.5004302734761653, + "grad_norm": 517.2442016601562, + "learning_rate": 5.9461627422371545e-06, + "loss": 20.1705, + "step": 247730 + }, + { + "epoch": 0.5004504741088491, + "grad_norm": 265.9022216796875, + "learning_rate": 5.945819980898212e-06, + "loss": 26.8305, + "step": 247740 + }, + { + "epoch": 0.500470674741533, + "grad_norm": 350.12762451171875, + "learning_rate": 5.945477214949457e-06, + "loss": 16.4993, + "step": 247750 + }, + { + "epoch": 0.5004908753742168, + "grad_norm": 357.43536376953125, + "learning_rate": 5.945134444392561e-06, + "loss": 12.5029, + "step": 247760 + }, + { + "epoch": 0.5005110760069006, + "grad_norm": 229.77108764648438, + "learning_rate": 5.944791669229195e-06, + "loss": 36.2875, + "step": 247770 + }, + { + "epoch": 0.5005312766395843, + "grad_norm": 132.93592834472656, + "learning_rate": 5.944448889461027e-06, + "loss": 18.5758, + "step": 247780 + }, + { + "epoch": 0.5005514772722681, + "grad_norm": 164.86135864257812, + "learning_rate": 5.9441061050897304e-06, + "loss": 19.3645, + "step": 247790 + }, + { + "epoch": 0.5005716779049519, + "grad_norm": 75.09053802490234, + "learning_rate": 5.943763316116977e-06, + "loss": 13.1365, + "step": 247800 + }, + { + "epoch": 0.5005918785376358, + "grad_norm": 34.7217903137207, + "learning_rate": 5.943420522544433e-06, + "loss": 12.3629, + "step": 247810 + }, + { + "epoch": 0.5006120791703196, + "grad_norm": 379.7716064453125, + "learning_rate": 5.9430777243737744e-06, + "loss": 22.4949, + "step": 247820 + }, + { + "epoch": 0.5006322798030034, + "grad_norm": 560.445068359375, + "learning_rate": 5.942734921606667e-06, + "loss": 32.3667, + "step": 247830 + }, + { + "epoch": 0.5006524804356872, + "grad_norm": 584.4889526367188, + "learning_rate": 5.942392114244786e-06, + "loss": 16.6033, + "step": 247840 + }, + { + "epoch": 0.500672681068371, + "grad_norm": 98.14608764648438, + "learning_rate": 5.942049302289798e-06, + "loss": 21.5487, + "step": 247850 + }, + { + "epoch": 0.5006928817010549, + "grad_norm": 156.64923095703125, + "learning_rate": 5.941706485743377e-06, + "loss": 19.767, + "step": 247860 + }, + { + "epoch": 0.5007130823337387, + "grad_norm": 192.50341796875, + "learning_rate": 5.941363664607193e-06, + "loss": 24.0534, + "step": 247870 + }, + { + "epoch": 0.5007332829664225, + "grad_norm": 495.8795471191406, + "learning_rate": 5.9410208388829174e-06, + "loss": 18.2998, + "step": 247880 + }, + { + "epoch": 0.5007534835991063, + "grad_norm": 505.1311950683594, + "learning_rate": 5.9406780085722194e-06, + "loss": 24.7423, + "step": 247890 + }, + { + "epoch": 0.5007736842317901, + "grad_norm": 491.1490478515625, + "learning_rate": 5.94033517367677e-06, + "loss": 24.222, + "step": 247900 + }, + { + "epoch": 0.500793884864474, + "grad_norm": 531.7823486328125, + "learning_rate": 5.939992334198242e-06, + "loss": 18.489, + "step": 247910 + }, + { + "epoch": 0.5008140854971578, + "grad_norm": 11.863853454589844, + "learning_rate": 5.939649490138305e-06, + "loss": 18.112, + "step": 247920 + }, + { + "epoch": 0.5008342861298416, + "grad_norm": 398.89837646484375, + "learning_rate": 5.939306641498632e-06, + "loss": 12.7243, + "step": 247930 + }, + { + "epoch": 0.5008544867625254, + "grad_norm": 379.3194274902344, + "learning_rate": 5.938963788280889e-06, + "loss": 21.9992, + "step": 247940 + }, + { + "epoch": 0.5008746873952092, + "grad_norm": 519.755859375, + "learning_rate": 5.938620930486754e-06, + "loss": 22.7895, + "step": 247950 + }, + { + "epoch": 0.5008948880278931, + "grad_norm": 734.8353271484375, + "learning_rate": 5.9382780681178935e-06, + "loss": 20.345, + "step": 247960 + }, + { + "epoch": 0.5009150886605769, + "grad_norm": 435.9640197753906, + "learning_rate": 5.9379352011759775e-06, + "loss": 18.0929, + "step": 247970 + }, + { + "epoch": 0.5009352892932607, + "grad_norm": 293.85467529296875, + "learning_rate": 5.9375923296626815e-06, + "loss": 17.6831, + "step": 247980 + }, + { + "epoch": 0.5009554899259445, + "grad_norm": 534.4434204101562, + "learning_rate": 5.937249453579672e-06, + "loss": 22.6555, + "step": 247990 + }, + { + "epoch": 0.5009756905586283, + "grad_norm": 575.6947631835938, + "learning_rate": 5.936906572928625e-06, + "loss": 15.8428, + "step": 248000 + }, + { + "epoch": 0.5009958911913122, + "grad_norm": 724.4385986328125, + "learning_rate": 5.936563687711206e-06, + "loss": 26.7899, + "step": 248010 + }, + { + "epoch": 0.501016091823996, + "grad_norm": 549.7718505859375, + "learning_rate": 5.936220797929091e-06, + "loss": 17.8817, + "step": 248020 + }, + { + "epoch": 0.5010362924566797, + "grad_norm": 364.80584716796875, + "learning_rate": 5.935877903583949e-06, + "loss": 16.6788, + "step": 248030 + }, + { + "epoch": 0.5010564930893635, + "grad_norm": 204.8345489501953, + "learning_rate": 5.9355350046774515e-06, + "loss": 6.9864, + "step": 248040 + }, + { + "epoch": 0.5010766937220473, + "grad_norm": 596.9667358398438, + "learning_rate": 5.93519210121127e-06, + "loss": 39.3945, + "step": 248050 + }, + { + "epoch": 0.5010968943547311, + "grad_norm": 300.4778137207031, + "learning_rate": 5.934849193187075e-06, + "loss": 26.5876, + "step": 248060 + }, + { + "epoch": 0.501117094987415, + "grad_norm": 248.50503540039062, + "learning_rate": 5.93450628060654e-06, + "loss": 13.7055, + "step": 248070 + }, + { + "epoch": 0.5011372956200988, + "grad_norm": 480.83367919921875, + "learning_rate": 5.934163363471333e-06, + "loss": 16.9877, + "step": 248080 + }, + { + "epoch": 0.5011574962527826, + "grad_norm": 191.77659606933594, + "learning_rate": 5.933820441783129e-06, + "loss": 16.6442, + "step": 248090 + }, + { + "epoch": 0.5011776968854664, + "grad_norm": 324.451904296875, + "learning_rate": 5.933477515543595e-06, + "loss": 23.7166, + "step": 248100 + }, + { + "epoch": 0.5011978975181502, + "grad_norm": 454.1504821777344, + "learning_rate": 5.933134584754407e-06, + "loss": 19.2951, + "step": 248110 + }, + { + "epoch": 0.5012180981508341, + "grad_norm": 866.717529296875, + "learning_rate": 5.932791649417233e-06, + "loss": 31.053, + "step": 248120 + }, + { + "epoch": 0.5012382987835179, + "grad_norm": 1051.307861328125, + "learning_rate": 5.932448709533746e-06, + "loss": 19.5458, + "step": 248130 + }, + { + "epoch": 0.5012584994162017, + "grad_norm": 777.1536254882812, + "learning_rate": 5.932105765105618e-06, + "loss": 15.0236, + "step": 248140 + }, + { + "epoch": 0.5012787000488855, + "grad_norm": 476.7993469238281, + "learning_rate": 5.931762816134517e-06, + "loss": 25.5232, + "step": 248150 + }, + { + "epoch": 0.5012989006815693, + "grad_norm": 572.0438842773438, + "learning_rate": 5.9314198626221185e-06, + "loss": 16.7977, + "step": 248160 + }, + { + "epoch": 0.5013191013142532, + "grad_norm": 373.8711242675781, + "learning_rate": 5.931076904570094e-06, + "loss": 16.8697, + "step": 248170 + }, + { + "epoch": 0.501339301946937, + "grad_norm": 1195.9730224609375, + "learning_rate": 5.930733941980111e-06, + "loss": 26.0967, + "step": 248180 + }, + { + "epoch": 0.5013595025796208, + "grad_norm": 405.83148193359375, + "learning_rate": 5.9303909748538444e-06, + "loss": 22.255, + "step": 248190 + }, + { + "epoch": 0.5013797032123046, + "grad_norm": 368.86798095703125, + "learning_rate": 5.930048003192965e-06, + "loss": 24.7736, + "step": 248200 + }, + { + "epoch": 0.5013999038449884, + "grad_norm": 348.43695068359375, + "learning_rate": 5.929705026999145e-06, + "loss": 22.9456, + "step": 248210 + }, + { + "epoch": 0.5014201044776723, + "grad_norm": 545.2449340820312, + "learning_rate": 5.929362046274057e-06, + "loss": 17.2474, + "step": 248220 + }, + { + "epoch": 0.5014403051103561, + "grad_norm": 556.1205444335938, + "learning_rate": 5.929019061019369e-06, + "loss": 18.0067, + "step": 248230 + }, + { + "epoch": 0.5014605057430399, + "grad_norm": 574.1353149414062, + "learning_rate": 5.928676071236756e-06, + "loss": 24.04, + "step": 248240 + }, + { + "epoch": 0.5014807063757237, + "grad_norm": 158.31907653808594, + "learning_rate": 5.928333076927888e-06, + "loss": 16.593, + "step": 248250 + }, + { + "epoch": 0.5015009070084075, + "grad_norm": 493.0557556152344, + "learning_rate": 5.927990078094435e-06, + "loss": 20.1031, + "step": 248260 + }, + { + "epoch": 0.5015211076410914, + "grad_norm": 412.5963134765625, + "learning_rate": 5.927647074738074e-06, + "loss": 15.0178, + "step": 248270 + }, + { + "epoch": 0.5015413082737752, + "grad_norm": 653.412841796875, + "learning_rate": 5.927304066860471e-06, + "loss": 20.8307, + "step": 248280 + }, + { + "epoch": 0.5015615089064589, + "grad_norm": 467.2701416015625, + "learning_rate": 5.926961054463303e-06, + "loss": 21.3891, + "step": 248290 + }, + { + "epoch": 0.5015817095391427, + "grad_norm": 370.8492431640625, + "learning_rate": 5.926618037548237e-06, + "loss": 13.9218, + "step": 248300 + }, + { + "epoch": 0.5016019101718265, + "grad_norm": 258.85211181640625, + "learning_rate": 5.926275016116949e-06, + "loss": 18.0736, + "step": 248310 + }, + { + "epoch": 0.5016221108045104, + "grad_norm": 275.01434326171875, + "learning_rate": 5.925931990171109e-06, + "loss": 17.8099, + "step": 248320 + }, + { + "epoch": 0.5016423114371942, + "grad_norm": 391.4570007324219, + "learning_rate": 5.925588959712387e-06, + "loss": 13.1118, + "step": 248330 + }, + { + "epoch": 0.501662512069878, + "grad_norm": 655.0743408203125, + "learning_rate": 5.925245924742458e-06, + "loss": 30.9306, + "step": 248340 + }, + { + "epoch": 0.5016827127025618, + "grad_norm": 332.4405822753906, + "learning_rate": 5.924902885262992e-06, + "loss": 17.6808, + "step": 248350 + }, + { + "epoch": 0.5017029133352456, + "grad_norm": 107.97994995117188, + "learning_rate": 5.924559841275661e-06, + "loss": 41.9068, + "step": 248360 + }, + { + "epoch": 0.5017231139679295, + "grad_norm": 515.9050903320312, + "learning_rate": 5.924216792782138e-06, + "loss": 29.0406, + "step": 248370 + }, + { + "epoch": 0.5017433146006133, + "grad_norm": 110.2987289428711, + "learning_rate": 5.9238737397840966e-06, + "loss": 11.4013, + "step": 248380 + }, + { + "epoch": 0.5017635152332971, + "grad_norm": 131.7434844970703, + "learning_rate": 5.923530682283204e-06, + "loss": 21.3107, + "step": 248390 + }, + { + "epoch": 0.5017837158659809, + "grad_norm": 645.4819946289062, + "learning_rate": 5.923187620281135e-06, + "loss": 35.8132, + "step": 248400 + }, + { + "epoch": 0.5018039164986647, + "grad_norm": 295.7440185546875, + "learning_rate": 5.922844553779563e-06, + "loss": 21.2253, + "step": 248410 + }, + { + "epoch": 0.5018241171313486, + "grad_norm": 754.93212890625, + "learning_rate": 5.922501482780156e-06, + "loss": 18.5812, + "step": 248420 + }, + { + "epoch": 0.5018443177640324, + "grad_norm": 88.51136779785156, + "learning_rate": 5.9221584072845914e-06, + "loss": 17.2722, + "step": 248430 + }, + { + "epoch": 0.5018645183967162, + "grad_norm": 553.53466796875, + "learning_rate": 5.921815327294537e-06, + "loss": 15.6545, + "step": 248440 + }, + { + "epoch": 0.5018847190294, + "grad_norm": 233.0700225830078, + "learning_rate": 5.9214722428116675e-06, + "loss": 22.5623, + "step": 248450 + }, + { + "epoch": 0.5019049196620838, + "grad_norm": 180.30812072753906, + "learning_rate": 5.921129153837654e-06, + "loss": 17.7093, + "step": 248460 + }, + { + "epoch": 0.5019251202947677, + "grad_norm": 178.22335815429688, + "learning_rate": 5.9207860603741674e-06, + "loss": 13.4829, + "step": 248470 + }, + { + "epoch": 0.5019453209274515, + "grad_norm": 268.0635070800781, + "learning_rate": 5.920442962422883e-06, + "loss": 20.989, + "step": 248480 + }, + { + "epoch": 0.5019655215601353, + "grad_norm": 407.7901306152344, + "learning_rate": 5.920099859985469e-06, + "loss": 12.4519, + "step": 248490 + }, + { + "epoch": 0.5019857221928191, + "grad_norm": 552.140380859375, + "learning_rate": 5.919756753063601e-06, + "loss": 18.7923, + "step": 248500 + }, + { + "epoch": 0.5020059228255029, + "grad_norm": 363.47967529296875, + "learning_rate": 5.919413641658951e-06, + "loss": 15.1383, + "step": 248510 + }, + { + "epoch": 0.5020261234581868, + "grad_norm": 680.4986572265625, + "learning_rate": 5.91907052577319e-06, + "loss": 32.3416, + "step": 248520 + }, + { + "epoch": 0.5020463240908706, + "grad_norm": 870.9769897460938, + "learning_rate": 5.9187274054079895e-06, + "loss": 29.1395, + "step": 248530 + }, + { + "epoch": 0.5020665247235544, + "grad_norm": 207.03797912597656, + "learning_rate": 5.918384280565025e-06, + "loss": 11.6947, + "step": 248540 + }, + { + "epoch": 0.5020867253562381, + "grad_norm": 300.1256103515625, + "learning_rate": 5.9180411512459655e-06, + "loss": 22.3148, + "step": 248550 + }, + { + "epoch": 0.5021069259889219, + "grad_norm": 122.85758209228516, + "learning_rate": 5.917698017452484e-06, + "loss": 18.0818, + "step": 248560 + }, + { + "epoch": 0.5021271266216057, + "grad_norm": 18.23651695251465, + "learning_rate": 5.9173548791862556e-06, + "loss": 19.688, + "step": 248570 + }, + { + "epoch": 0.5021473272542896, + "grad_norm": 423.9542541503906, + "learning_rate": 5.91701173644895e-06, + "loss": 18.1402, + "step": 248580 + }, + { + "epoch": 0.5021675278869734, + "grad_norm": 1059.9700927734375, + "learning_rate": 5.916668589242241e-06, + "loss": 29.2666, + "step": 248590 + }, + { + "epoch": 0.5021877285196572, + "grad_norm": 482.63092041015625, + "learning_rate": 5.9163254375677995e-06, + "loss": 22.8889, + "step": 248600 + }, + { + "epoch": 0.502207929152341, + "grad_norm": 797.1237182617188, + "learning_rate": 5.9159822814272995e-06, + "loss": 21.7161, + "step": 248610 + }, + { + "epoch": 0.5022281297850248, + "grad_norm": 147.08187866210938, + "learning_rate": 5.915639120822413e-06, + "loss": 29.4135, + "step": 248620 + }, + { + "epoch": 0.5022483304177087, + "grad_norm": 324.49053955078125, + "learning_rate": 5.915295955754812e-06, + "loss": 8.4809, + "step": 248630 + }, + { + "epoch": 0.5022685310503925, + "grad_norm": 2.9765026569366455, + "learning_rate": 5.914952786226169e-06, + "loss": 29.6125, + "step": 248640 + }, + { + "epoch": 0.5022887316830763, + "grad_norm": 431.4524841308594, + "learning_rate": 5.914609612238159e-06, + "loss": 13.3484, + "step": 248650 + }, + { + "epoch": 0.5023089323157601, + "grad_norm": 364.72161865234375, + "learning_rate": 5.914266433792452e-06, + "loss": 31.8002, + "step": 248660 + }, + { + "epoch": 0.502329132948444, + "grad_norm": 282.8038330078125, + "learning_rate": 5.913923250890721e-06, + "loss": 15.4929, + "step": 248670 + }, + { + "epoch": 0.5023493335811278, + "grad_norm": 420.2165832519531, + "learning_rate": 5.9135800635346385e-06, + "loss": 17.169, + "step": 248680 + }, + { + "epoch": 0.5023695342138116, + "grad_norm": 516.7526245117188, + "learning_rate": 5.913236871725877e-06, + "loss": 26.3079, + "step": 248690 + }, + { + "epoch": 0.5023897348464954, + "grad_norm": 637.6509399414062, + "learning_rate": 5.912893675466112e-06, + "loss": 23.7227, + "step": 248700 + }, + { + "epoch": 0.5024099354791792, + "grad_norm": 174.11920166015625, + "learning_rate": 5.912550474757011e-06, + "loss": 23.6315, + "step": 248710 + }, + { + "epoch": 0.502430136111863, + "grad_norm": 190.7968292236328, + "learning_rate": 5.912207269600252e-06, + "loss": 37.7386, + "step": 248720 + }, + { + "epoch": 0.5024503367445469, + "grad_norm": 326.266845703125, + "learning_rate": 5.911864059997504e-06, + "loss": 13.011, + "step": 248730 + }, + { + "epoch": 0.5024705373772307, + "grad_norm": 2355.5771484375, + "learning_rate": 5.911520845950442e-06, + "loss": 22.8896, + "step": 248740 + }, + { + "epoch": 0.5024907380099145, + "grad_norm": 413.2026062011719, + "learning_rate": 5.911177627460739e-06, + "loss": 19.1259, + "step": 248750 + }, + { + "epoch": 0.5025109386425983, + "grad_norm": 515.444091796875, + "learning_rate": 5.910834404530064e-06, + "loss": 22.6092, + "step": 248760 + }, + { + "epoch": 0.5025311392752821, + "grad_norm": 353.85015869140625, + "learning_rate": 5.910491177160094e-06, + "loss": 17.7772, + "step": 248770 + }, + { + "epoch": 0.502551339907966, + "grad_norm": 1164.6044921875, + "learning_rate": 5.910147945352501e-06, + "loss": 28.7933, + "step": 248780 + }, + { + "epoch": 0.5025715405406498, + "grad_norm": 325.5775451660156, + "learning_rate": 5.909804709108957e-06, + "loss": 41.7351, + "step": 248790 + }, + { + "epoch": 0.5025917411733335, + "grad_norm": 345.25152587890625, + "learning_rate": 5.909461468431135e-06, + "loss": 25.2505, + "step": 248800 + }, + { + "epoch": 0.5026119418060173, + "grad_norm": 671.2576904296875, + "learning_rate": 5.9091182233207075e-06, + "loss": 23.5605, + "step": 248810 + }, + { + "epoch": 0.5026321424387011, + "grad_norm": 381.11334228515625, + "learning_rate": 5.90877497377935e-06, + "loss": 29.831, + "step": 248820 + }, + { + "epoch": 0.502652343071385, + "grad_norm": 727.1109619140625, + "learning_rate": 5.908431719808731e-06, + "loss": 20.2548, + "step": 248830 + }, + { + "epoch": 0.5026725437040688, + "grad_norm": 5.139888286590576, + "learning_rate": 5.908088461410529e-06, + "loss": 45.1709, + "step": 248840 + }, + { + "epoch": 0.5026927443367526, + "grad_norm": 257.7228088378906, + "learning_rate": 5.907745198586411e-06, + "loss": 24.0451, + "step": 248850 + }, + { + "epoch": 0.5027129449694364, + "grad_norm": 268.2099609375, + "learning_rate": 5.907401931338055e-06, + "loss": 11.4164, + "step": 248860 + }, + { + "epoch": 0.5027331456021202, + "grad_norm": 350.30584716796875, + "learning_rate": 5.90705865966713e-06, + "loss": 28.8541, + "step": 248870 + }, + { + "epoch": 0.502753346234804, + "grad_norm": 721.6278686523438, + "learning_rate": 5.9067153835753125e-06, + "loss": 9.0915, + "step": 248880 + }, + { + "epoch": 0.5027735468674879, + "grad_norm": 492.8309020996094, + "learning_rate": 5.906372103064274e-06, + "loss": 11.1449, + "step": 248890 + }, + { + "epoch": 0.5027937475001717, + "grad_norm": 524.3689575195312, + "learning_rate": 5.906028818135687e-06, + "loss": 22.7391, + "step": 248900 + }, + { + "epoch": 0.5028139481328555, + "grad_norm": 220.1304931640625, + "learning_rate": 5.905685528791226e-06, + "loss": 7.5041, + "step": 248910 + }, + { + "epoch": 0.5028341487655393, + "grad_norm": 437.30859375, + "learning_rate": 5.905342235032564e-06, + "loss": 38.2453, + "step": 248920 + }, + { + "epoch": 0.5028543493982232, + "grad_norm": 430.1686096191406, + "learning_rate": 5.904998936861375e-06, + "loss": 22.9035, + "step": 248930 + }, + { + "epoch": 0.502874550030907, + "grad_norm": 306.7922058105469, + "learning_rate": 5.904655634279328e-06, + "loss": 25.6524, + "step": 248940 + }, + { + "epoch": 0.5028947506635908, + "grad_norm": 0.8928514719009399, + "learning_rate": 5.904312327288101e-06, + "loss": 22.5635, + "step": 248950 + }, + { + "epoch": 0.5029149512962746, + "grad_norm": 590.5730590820312, + "learning_rate": 5.903969015889365e-06, + "loss": 13.5564, + "step": 248960 + }, + { + "epoch": 0.5029351519289584, + "grad_norm": 627.0333862304688, + "learning_rate": 5.903625700084794e-06, + "loss": 15.467, + "step": 248970 + }, + { + "epoch": 0.5029553525616423, + "grad_norm": 83.18756866455078, + "learning_rate": 5.9032823798760595e-06, + "loss": 21.6662, + "step": 248980 + }, + { + "epoch": 0.5029755531943261, + "grad_norm": 730.4940795898438, + "learning_rate": 5.902939055264838e-06, + "loss": 25.6206, + "step": 248990 + }, + { + "epoch": 0.5029957538270099, + "grad_norm": 174.6398468017578, + "learning_rate": 5.902595726252801e-06, + "loss": 12.4195, + "step": 249000 + }, + { + "epoch": 0.5030159544596937, + "grad_norm": 294.2000427246094, + "learning_rate": 5.902252392841621e-06, + "loss": 21.5018, + "step": 249010 + }, + { + "epoch": 0.5030361550923775, + "grad_norm": 544.2066650390625, + "learning_rate": 5.901909055032974e-06, + "loss": 30.3573, + "step": 249020 + }, + { + "epoch": 0.5030563557250614, + "grad_norm": 297.4364013671875, + "learning_rate": 5.901565712828528e-06, + "loss": 16.5915, + "step": 249030 + }, + { + "epoch": 0.5030765563577452, + "grad_norm": 126.29948425292969, + "learning_rate": 5.901222366229964e-06, + "loss": 19.41, + "step": 249040 + }, + { + "epoch": 0.503096756990429, + "grad_norm": 149.2904510498047, + "learning_rate": 5.900879015238948e-06, + "loss": 19.3939, + "step": 249050 + }, + { + "epoch": 0.5031169576231127, + "grad_norm": 426.88946533203125, + "learning_rate": 5.90053565985716e-06, + "loss": 16.0953, + "step": 249060 + }, + { + "epoch": 0.5031371582557965, + "grad_norm": 497.24383544921875, + "learning_rate": 5.900192300086268e-06, + "loss": 19.2877, + "step": 249070 + }, + { + "epoch": 0.5031573588884803, + "grad_norm": 187.9085693359375, + "learning_rate": 5.89984893592795e-06, + "loss": 18.1587, + "step": 249080 + }, + { + "epoch": 0.5031775595211642, + "grad_norm": 241.67221069335938, + "learning_rate": 5.899505567383877e-06, + "loss": 33.4961, + "step": 249090 + }, + { + "epoch": 0.503197760153848, + "grad_norm": 216.5790557861328, + "learning_rate": 5.8991621944557224e-06, + "loss": 26.2352, + "step": 249100 + }, + { + "epoch": 0.5032179607865318, + "grad_norm": 249.6385498046875, + "learning_rate": 5.8988188171451596e-06, + "loss": 14.572, + "step": 249110 + }, + { + "epoch": 0.5032381614192156, + "grad_norm": 397.8631286621094, + "learning_rate": 5.898475435453863e-06, + "loss": 10.3243, + "step": 249120 + }, + { + "epoch": 0.5032583620518994, + "grad_norm": 245.76161193847656, + "learning_rate": 5.898132049383507e-06, + "loss": 11.1525, + "step": 249130 + }, + { + "epoch": 0.5032785626845833, + "grad_norm": 15.009568214416504, + "learning_rate": 5.897788658935764e-06, + "loss": 30.3441, + "step": 249140 + }, + { + "epoch": 0.5032987633172671, + "grad_norm": 262.6147766113281, + "learning_rate": 5.897445264112309e-06, + "loss": 11.2807, + "step": 249150 + }, + { + "epoch": 0.5033189639499509, + "grad_norm": 1088.070068359375, + "learning_rate": 5.897101864914814e-06, + "loss": 30.4854, + "step": 249160 + }, + { + "epoch": 0.5033391645826347, + "grad_norm": 177.8507080078125, + "learning_rate": 5.8967584613449525e-06, + "loss": 15.7606, + "step": 249170 + }, + { + "epoch": 0.5033593652153185, + "grad_norm": 207.3918914794922, + "learning_rate": 5.896415053404399e-06, + "loss": 15.7608, + "step": 249180 + }, + { + "epoch": 0.5033795658480024, + "grad_norm": 315.90283203125, + "learning_rate": 5.896071641094827e-06, + "loss": 16.8296, + "step": 249190 + }, + { + "epoch": 0.5033997664806862, + "grad_norm": 424.2550354003906, + "learning_rate": 5.8957282244179125e-06, + "loss": 15.4169, + "step": 249200 + }, + { + "epoch": 0.50341996711337, + "grad_norm": 205.88717651367188, + "learning_rate": 5.895384803375325e-06, + "loss": 26.7485, + "step": 249210 + }, + { + "epoch": 0.5034401677460538, + "grad_norm": 21.79161834716797, + "learning_rate": 5.895041377968742e-06, + "loss": 21.0781, + "step": 249220 + }, + { + "epoch": 0.5034603683787376, + "grad_norm": 265.3016357421875, + "learning_rate": 5.894697948199836e-06, + "loss": 8.8661, + "step": 249230 + }, + { + "epoch": 0.5034805690114215, + "grad_norm": 210.5745849609375, + "learning_rate": 5.89435451407028e-06, + "loss": 11.2092, + "step": 249240 + }, + { + "epoch": 0.5035007696441053, + "grad_norm": 152.37144470214844, + "learning_rate": 5.8940110755817484e-06, + "loss": 32.8861, + "step": 249250 + }, + { + "epoch": 0.5035209702767891, + "grad_norm": 545.8831787109375, + "learning_rate": 5.893667632735915e-06, + "loss": 12.1267, + "step": 249260 + }, + { + "epoch": 0.5035411709094729, + "grad_norm": 262.19146728515625, + "learning_rate": 5.893324185534456e-06, + "loss": 7.8067, + "step": 249270 + }, + { + "epoch": 0.5035613715421567, + "grad_norm": 417.0307312011719, + "learning_rate": 5.892980733979041e-06, + "loss": 17.7089, + "step": 249280 + }, + { + "epoch": 0.5035815721748406, + "grad_norm": 840.2435302734375, + "learning_rate": 5.892637278071347e-06, + "loss": 13.7266, + "step": 249290 + }, + { + "epoch": 0.5036017728075244, + "grad_norm": 406.7433166503906, + "learning_rate": 5.892293817813048e-06, + "loss": 18.5383, + "step": 249300 + }, + { + "epoch": 0.5036219734402081, + "grad_norm": 691.2929077148438, + "learning_rate": 5.891950353205817e-06, + "loss": 25.414, + "step": 249310 + }, + { + "epoch": 0.5036421740728919, + "grad_norm": 112.32616424560547, + "learning_rate": 5.891606884251326e-06, + "loss": 9.8455, + "step": 249320 + }, + { + "epoch": 0.5036623747055757, + "grad_norm": 497.7851257324219, + "learning_rate": 5.8912634109512534e-06, + "loss": 19.669, + "step": 249330 + }, + { + "epoch": 0.5036825753382596, + "grad_norm": 263.6553955078125, + "learning_rate": 5.89091993330727e-06, + "loss": 20.7168, + "step": 249340 + }, + { + "epoch": 0.5037027759709434, + "grad_norm": 568.7107543945312, + "learning_rate": 5.89057645132105e-06, + "loss": 14.7107, + "step": 249350 + }, + { + "epoch": 0.5037229766036272, + "grad_norm": 129.31324768066406, + "learning_rate": 5.8902329649942715e-06, + "loss": 45.3528, + "step": 249360 + }, + { + "epoch": 0.503743177236311, + "grad_norm": 178.8046417236328, + "learning_rate": 5.889889474328603e-06, + "loss": 16.6972, + "step": 249370 + }, + { + "epoch": 0.5037633778689948, + "grad_norm": 346.4656066894531, + "learning_rate": 5.889545979325722e-06, + "loss": 20.1498, + "step": 249380 + }, + { + "epoch": 0.5037835785016787, + "grad_norm": 207.06436157226562, + "learning_rate": 5.889202479987301e-06, + "loss": 32.255, + "step": 249390 + }, + { + "epoch": 0.5038037791343625, + "grad_norm": 495.1197509765625, + "learning_rate": 5.8888589763150165e-06, + "loss": 20.4992, + "step": 249400 + }, + { + "epoch": 0.5038239797670463, + "grad_norm": 109.89571380615234, + "learning_rate": 5.8885154683105395e-06, + "loss": 19.9023, + "step": 249410 + }, + { + "epoch": 0.5038441803997301, + "grad_norm": 454.82562255859375, + "learning_rate": 5.8881719559755454e-06, + "loss": 16.5533, + "step": 249420 + }, + { + "epoch": 0.5038643810324139, + "grad_norm": 327.36627197265625, + "learning_rate": 5.887828439311712e-06, + "loss": 19.4268, + "step": 249430 + }, + { + "epoch": 0.5038845816650978, + "grad_norm": 348.3185729980469, + "learning_rate": 5.887484918320708e-06, + "loss": 11.3746, + "step": 249440 + }, + { + "epoch": 0.5039047822977816, + "grad_norm": 779.6866455078125, + "learning_rate": 5.887141393004211e-06, + "loss": 37.914, + "step": 249450 + }, + { + "epoch": 0.5039249829304654, + "grad_norm": 275.3816833496094, + "learning_rate": 5.8867978633638935e-06, + "loss": 10.3895, + "step": 249460 + }, + { + "epoch": 0.5039451835631492, + "grad_norm": 832.8724975585938, + "learning_rate": 5.886454329401432e-06, + "loss": 36.9358, + "step": 249470 + }, + { + "epoch": 0.503965384195833, + "grad_norm": 519.391357421875, + "learning_rate": 5.8861107911184975e-06, + "loss": 12.3229, + "step": 249480 + }, + { + "epoch": 0.5039855848285169, + "grad_norm": 265.33380126953125, + "learning_rate": 5.885767248516769e-06, + "loss": 35.1115, + "step": 249490 + }, + { + "epoch": 0.5040057854612007, + "grad_norm": 675.9965209960938, + "learning_rate": 5.885423701597918e-06, + "loss": 30.9103, + "step": 249500 + }, + { + "epoch": 0.5040259860938845, + "grad_norm": 1510.18603515625, + "learning_rate": 5.885080150363618e-06, + "loss": 27.4959, + "step": 249510 + }, + { + "epoch": 0.5040461867265683, + "grad_norm": 1.5162192583084106, + "learning_rate": 5.884736594815545e-06, + "loss": 25.0254, + "step": 249520 + }, + { + "epoch": 0.5040663873592521, + "grad_norm": 606.4171142578125, + "learning_rate": 5.884393034955373e-06, + "loss": 14.7794, + "step": 249530 + }, + { + "epoch": 0.504086587991936, + "grad_norm": 14.640703201293945, + "learning_rate": 5.8840494707847786e-06, + "loss": 12.6323, + "step": 249540 + }, + { + "epoch": 0.5041067886246198, + "grad_norm": 12.581164360046387, + "learning_rate": 5.883705902305432e-06, + "loss": 15.9409, + "step": 249550 + }, + { + "epoch": 0.5041269892573036, + "grad_norm": 697.4895629882812, + "learning_rate": 5.8833623295190104e-06, + "loss": 21.9033, + "step": 249560 + }, + { + "epoch": 0.5041471898899873, + "grad_norm": 276.90704345703125, + "learning_rate": 5.883018752427189e-06, + "loss": 15.0847, + "step": 249570 + }, + { + "epoch": 0.5041673905226711, + "grad_norm": 244.14535522460938, + "learning_rate": 5.8826751710316395e-06, + "loss": 13.4849, + "step": 249580 + }, + { + "epoch": 0.5041875911553549, + "grad_norm": 1217.48193359375, + "learning_rate": 5.882331585334039e-06, + "loss": 38.1977, + "step": 249590 + }, + { + "epoch": 0.5042077917880388, + "grad_norm": 53.94611358642578, + "learning_rate": 5.881987995336062e-06, + "loss": 14.0265, + "step": 249600 + }, + { + "epoch": 0.5042279924207226, + "grad_norm": 332.9171447753906, + "learning_rate": 5.881644401039382e-06, + "loss": 11.8391, + "step": 249610 + }, + { + "epoch": 0.5042481930534064, + "grad_norm": 562.229736328125, + "learning_rate": 5.881300802445675e-06, + "loss": 14.8923, + "step": 249620 + }, + { + "epoch": 0.5042683936860902, + "grad_norm": 260.7309875488281, + "learning_rate": 5.880957199556615e-06, + "loss": 17.2081, + "step": 249630 + }, + { + "epoch": 0.504288594318774, + "grad_norm": 221.5662078857422, + "learning_rate": 5.880613592373874e-06, + "loss": 9.9935, + "step": 249640 + }, + { + "epoch": 0.5043087949514579, + "grad_norm": 240.99302673339844, + "learning_rate": 5.880269980899132e-06, + "loss": 14.6933, + "step": 249650 + }, + { + "epoch": 0.5043289955841417, + "grad_norm": 270.6983947753906, + "learning_rate": 5.879926365134059e-06, + "loss": 14.4931, + "step": 249660 + }, + { + "epoch": 0.5043491962168255, + "grad_norm": 0.22573330998420715, + "learning_rate": 5.879582745080333e-06, + "loss": 28.6685, + "step": 249670 + }, + { + "epoch": 0.5043693968495093, + "grad_norm": 395.80535888671875, + "learning_rate": 5.879239120739626e-06, + "loss": 17.6597, + "step": 249680 + }, + { + "epoch": 0.5043895974821931, + "grad_norm": 159.0031280517578, + "learning_rate": 5.878895492113614e-06, + "loss": 16.4434, + "step": 249690 + }, + { + "epoch": 0.504409798114877, + "grad_norm": 802.7413940429688, + "learning_rate": 5.878551859203974e-06, + "loss": 21.5858, + "step": 249700 + }, + { + "epoch": 0.5044299987475608, + "grad_norm": 29.40521812438965, + "learning_rate": 5.878208222012377e-06, + "loss": 15.8672, + "step": 249710 + }, + { + "epoch": 0.5044501993802446, + "grad_norm": 1343.5350341796875, + "learning_rate": 5.8778645805405e-06, + "loss": 28.0506, + "step": 249720 + }, + { + "epoch": 0.5044704000129284, + "grad_norm": 246.5369873046875, + "learning_rate": 5.8775209347900174e-06, + "loss": 16.6771, + "step": 249730 + }, + { + "epoch": 0.5044906006456122, + "grad_norm": 355.10162353515625, + "learning_rate": 5.877177284762605e-06, + "loss": 24.7707, + "step": 249740 + }, + { + "epoch": 0.5045108012782961, + "grad_norm": 145.2834930419922, + "learning_rate": 5.876833630459936e-06, + "loss": 11.5092, + "step": 249750 + }, + { + "epoch": 0.5045310019109799, + "grad_norm": 130.55377197265625, + "learning_rate": 5.876489971883688e-06, + "loss": 18.376, + "step": 249760 + }, + { + "epoch": 0.5045512025436637, + "grad_norm": 125.49608612060547, + "learning_rate": 5.876146309035532e-06, + "loss": 12.7758, + "step": 249770 + }, + { + "epoch": 0.5045714031763475, + "grad_norm": 174.88815307617188, + "learning_rate": 5.8758026419171455e-06, + "loss": 27.505, + "step": 249780 + }, + { + "epoch": 0.5045916038090313, + "grad_norm": 312.1806335449219, + "learning_rate": 5.875458970530204e-06, + "loss": 22.9433, + "step": 249790 + }, + { + "epoch": 0.5046118044417152, + "grad_norm": 50.86980438232422, + "learning_rate": 5.8751152948763815e-06, + "loss": 12.9315, + "step": 249800 + }, + { + "epoch": 0.504632005074399, + "grad_norm": 197.81698608398438, + "learning_rate": 5.874771614957353e-06, + "loss": 26.7791, + "step": 249810 + }, + { + "epoch": 0.5046522057070827, + "grad_norm": 535.0093383789062, + "learning_rate": 5.874427930774792e-06, + "loss": 23.5996, + "step": 249820 + }, + { + "epoch": 0.5046724063397665, + "grad_norm": 416.4787292480469, + "learning_rate": 5.874084242330378e-06, + "loss": 23.4198, + "step": 249830 + }, + { + "epoch": 0.5046926069724503, + "grad_norm": 259.2166442871094, + "learning_rate": 5.873740549625783e-06, + "loss": 15.2562, + "step": 249840 + }, + { + "epoch": 0.5047128076051342, + "grad_norm": 533.5535888671875, + "learning_rate": 5.873396852662682e-06, + "loss": 21.7211, + "step": 249850 + }, + { + "epoch": 0.504733008237818, + "grad_norm": 166.01058959960938, + "learning_rate": 5.873053151442749e-06, + "loss": 26.0265, + "step": 249860 + }, + { + "epoch": 0.5047532088705018, + "grad_norm": 476.9961242675781, + "learning_rate": 5.872709445967662e-06, + "loss": 23.062, + "step": 249870 + }, + { + "epoch": 0.5047734095031856, + "grad_norm": 233.0247802734375, + "learning_rate": 5.872365736239097e-06, + "loss": 7.2233, + "step": 249880 + }, + { + "epoch": 0.5047936101358694, + "grad_norm": 439.13409423828125, + "learning_rate": 5.872022022258726e-06, + "loss": 25.7809, + "step": 249890 + }, + { + "epoch": 0.5048138107685533, + "grad_norm": 262.7398681640625, + "learning_rate": 5.871678304028224e-06, + "loss": 14.5744, + "step": 249900 + }, + { + "epoch": 0.5048340114012371, + "grad_norm": 228.85159301757812, + "learning_rate": 5.8713345815492695e-06, + "loss": 10.7308, + "step": 249910 + }, + { + "epoch": 0.5048542120339209, + "grad_norm": 158.81594848632812, + "learning_rate": 5.8709908548235365e-06, + "loss": 12.7449, + "step": 249920 + }, + { + "epoch": 0.5048744126666047, + "grad_norm": 414.7752685546875, + "learning_rate": 5.870647123852696e-06, + "loss": 16.0366, + "step": 249930 + }, + { + "epoch": 0.5048946132992885, + "grad_norm": 259.3028869628906, + "learning_rate": 5.870303388638431e-06, + "loss": 14.5456, + "step": 249940 + }, + { + "epoch": 0.5049148139319724, + "grad_norm": 281.9948425292969, + "learning_rate": 5.86995964918241e-06, + "loss": 17.1693, + "step": 249950 + }, + { + "epoch": 0.5049350145646562, + "grad_norm": 413.28155517578125, + "learning_rate": 5.869615905486313e-06, + "loss": 16.3743, + "step": 249960 + }, + { + "epoch": 0.50495521519734, + "grad_norm": 92.13639831542969, + "learning_rate": 5.869272157551814e-06, + "loss": 18.2124, + "step": 249970 + }, + { + "epoch": 0.5049754158300238, + "grad_norm": 530.0686645507812, + "learning_rate": 5.868928405380585e-06, + "loss": 19.9189, + "step": 249980 + }, + { + "epoch": 0.5049956164627076, + "grad_norm": 446.3638916015625, + "learning_rate": 5.868584648974308e-06, + "loss": 14.097, + "step": 249990 + }, + { + "epoch": 0.5050158170953915, + "grad_norm": 23.526782989501953, + "learning_rate": 5.8682408883346535e-06, + "loss": 20.2342, + "step": 250000 + }, + { + "epoch": 0.5050360177280753, + "grad_norm": 3428.04833984375, + "learning_rate": 5.8678971234632965e-06, + "loss": 36.5053, + "step": 250010 + }, + { + "epoch": 0.5050562183607591, + "grad_norm": 387.45599365234375, + "learning_rate": 5.8675533543619155e-06, + "loss": 9.7146, + "step": 250020 + }, + { + "epoch": 0.5050764189934429, + "grad_norm": 1221.8453369140625, + "learning_rate": 5.867209581032184e-06, + "loss": 35.8046, + "step": 250030 + }, + { + "epoch": 0.5050966196261267, + "grad_norm": 359.765625, + "learning_rate": 5.8668658034757795e-06, + "loss": 16.4776, + "step": 250040 + }, + { + "epoch": 0.5051168202588106, + "grad_norm": 617.654052734375, + "learning_rate": 5.866522021694376e-06, + "loss": 22.2486, + "step": 250050 + }, + { + "epoch": 0.5051370208914944, + "grad_norm": 417.5396728515625, + "learning_rate": 5.866178235689648e-06, + "loss": 17.4365, + "step": 250060 + }, + { + "epoch": 0.5051572215241782, + "grad_norm": 445.7264404296875, + "learning_rate": 5.865834445463273e-06, + "loss": 17.3608, + "step": 250070 + }, + { + "epoch": 0.5051774221568619, + "grad_norm": 95.29048919677734, + "learning_rate": 5.865490651016927e-06, + "loss": 19.9819, + "step": 250080 + }, + { + "epoch": 0.5051976227895457, + "grad_norm": 512.3966674804688, + "learning_rate": 5.865146852352283e-06, + "loss": 18.7347, + "step": 250090 + }, + { + "epoch": 0.5052178234222295, + "grad_norm": 603.2786865234375, + "learning_rate": 5.8648030494710195e-06, + "loss": 17.141, + "step": 250100 + }, + { + "epoch": 0.5052380240549134, + "grad_norm": 1903.7728271484375, + "learning_rate": 5.864459242374809e-06, + "loss": 28.3695, + "step": 250110 + }, + { + "epoch": 0.5052582246875972, + "grad_norm": 370.3611755371094, + "learning_rate": 5.86411543106533e-06, + "loss": 18.9759, + "step": 250120 + }, + { + "epoch": 0.505278425320281, + "grad_norm": 186.92010498046875, + "learning_rate": 5.863771615544258e-06, + "loss": 11.4139, + "step": 250130 + }, + { + "epoch": 0.5052986259529648, + "grad_norm": 348.0819396972656, + "learning_rate": 5.863427795813266e-06, + "loss": 24.1246, + "step": 250140 + }, + { + "epoch": 0.5053188265856486, + "grad_norm": 291.275390625, + "learning_rate": 5.863083971874034e-06, + "loss": 19.7654, + "step": 250150 + }, + { + "epoch": 0.5053390272183325, + "grad_norm": 517.228271484375, + "learning_rate": 5.8627401437282334e-06, + "loss": 26.8513, + "step": 250160 + }, + { + "epoch": 0.5053592278510163, + "grad_norm": 224.11453247070312, + "learning_rate": 5.862396311377543e-06, + "loss": 13.223, + "step": 250170 + }, + { + "epoch": 0.5053794284837001, + "grad_norm": 0.0, + "learning_rate": 5.862052474823637e-06, + "loss": 20.4579, + "step": 250180 + }, + { + "epoch": 0.5053996291163839, + "grad_norm": 254.05532836914062, + "learning_rate": 5.861708634068193e-06, + "loss": 15.5208, + "step": 250190 + }, + { + "epoch": 0.5054198297490677, + "grad_norm": 451.5887145996094, + "learning_rate": 5.8613647891128845e-06, + "loss": 17.1956, + "step": 250200 + }, + { + "epoch": 0.5054400303817516, + "grad_norm": 34.32612609863281, + "learning_rate": 5.861020939959389e-06, + "loss": 13.5152, + "step": 250210 + }, + { + "epoch": 0.5054602310144354, + "grad_norm": 694.2782592773438, + "learning_rate": 5.860677086609381e-06, + "loss": 18.3581, + "step": 250220 + }, + { + "epoch": 0.5054804316471192, + "grad_norm": 176.43052673339844, + "learning_rate": 5.860333229064539e-06, + "loss": 13.6246, + "step": 250230 + }, + { + "epoch": 0.505500632279803, + "grad_norm": 393.8753662109375, + "learning_rate": 5.859989367326535e-06, + "loss": 16.0354, + "step": 250240 + }, + { + "epoch": 0.5055208329124868, + "grad_norm": 754.1749877929688, + "learning_rate": 5.859645501397048e-06, + "loss": 14.098, + "step": 250250 + }, + { + "epoch": 0.5055410335451707, + "grad_norm": 317.06646728515625, + "learning_rate": 5.859301631277754e-06, + "loss": 23.7296, + "step": 250260 + }, + { + "epoch": 0.5055612341778545, + "grad_norm": 153.16233825683594, + "learning_rate": 5.858957756970326e-06, + "loss": 24.6719, + "step": 250270 + }, + { + "epoch": 0.5055814348105383, + "grad_norm": 282.1354675292969, + "learning_rate": 5.858613878476445e-06, + "loss": 16.8805, + "step": 250280 + }, + { + "epoch": 0.5056016354432221, + "grad_norm": 244.73968505859375, + "learning_rate": 5.858269995797781e-06, + "loss": 11.7205, + "step": 250290 + }, + { + "epoch": 0.5056218360759059, + "grad_norm": 478.359130859375, + "learning_rate": 5.857926108936015e-06, + "loss": 34.8297, + "step": 250300 + }, + { + "epoch": 0.5056420367085898, + "grad_norm": 168.78770446777344, + "learning_rate": 5.8575822178928225e-06, + "loss": 21.1783, + "step": 250310 + }, + { + "epoch": 0.5056622373412736, + "grad_norm": 524.9314575195312, + "learning_rate": 5.857238322669875e-06, + "loss": 38.7921, + "step": 250320 + }, + { + "epoch": 0.5056824379739574, + "grad_norm": 714.8871459960938, + "learning_rate": 5.8568944232688554e-06, + "loss": 18.5099, + "step": 250330 + }, + { + "epoch": 0.5057026386066411, + "grad_norm": 373.43292236328125, + "learning_rate": 5.856550519691433e-06, + "loss": 22.0461, + "step": 250340 + }, + { + "epoch": 0.5057228392393249, + "grad_norm": 1048.020751953125, + "learning_rate": 5.856206611939289e-06, + "loss": 34.6704, + "step": 250350 + }, + { + "epoch": 0.5057430398720087, + "grad_norm": 454.6194152832031, + "learning_rate": 5.855862700014096e-06, + "loss": 13.2749, + "step": 250360 + }, + { + "epoch": 0.5057632405046926, + "grad_norm": 81.307373046875, + "learning_rate": 5.855518783917535e-06, + "loss": 10.3817, + "step": 250370 + }, + { + "epoch": 0.5057834411373764, + "grad_norm": 193.7640380859375, + "learning_rate": 5.855174863651279e-06, + "loss": 21.205, + "step": 250380 + }, + { + "epoch": 0.5058036417700602, + "grad_norm": 0.0, + "learning_rate": 5.854830939217002e-06, + "loss": 15.3016, + "step": 250390 + }, + { + "epoch": 0.505823842402744, + "grad_norm": 604.971923828125, + "learning_rate": 5.854487010616384e-06, + "loss": 25.737, + "step": 250400 + }, + { + "epoch": 0.5058440430354278, + "grad_norm": 413.559326171875, + "learning_rate": 5.8541430778511e-06, + "loss": 21.6659, + "step": 250410 + }, + { + "epoch": 0.5058642436681117, + "grad_norm": 13.928187370300293, + "learning_rate": 5.853799140922827e-06, + "loss": 11.1542, + "step": 250420 + }, + { + "epoch": 0.5058844443007955, + "grad_norm": 465.09954833984375, + "learning_rate": 5.853455199833238e-06, + "loss": 16.7608, + "step": 250430 + }, + { + "epoch": 0.5059046449334793, + "grad_norm": 211.7120819091797, + "learning_rate": 5.853111254584014e-06, + "loss": 19.3896, + "step": 250440 + }, + { + "epoch": 0.5059248455661631, + "grad_norm": 299.7850646972656, + "learning_rate": 5.852767305176829e-06, + "loss": 15.0662, + "step": 250450 + }, + { + "epoch": 0.505945046198847, + "grad_norm": 503.6282958984375, + "learning_rate": 5.852423351613359e-06, + "loss": 26.4411, + "step": 250460 + }, + { + "epoch": 0.5059652468315308, + "grad_norm": 141.0774383544922, + "learning_rate": 5.852079393895281e-06, + "loss": 12.8351, + "step": 250470 + }, + { + "epoch": 0.5059854474642146, + "grad_norm": 458.46051025390625, + "learning_rate": 5.85173543202427e-06, + "loss": 23.6347, + "step": 250480 + }, + { + "epoch": 0.5060056480968984, + "grad_norm": 17.644128799438477, + "learning_rate": 5.851391466002008e-06, + "loss": 21.8069, + "step": 250490 + }, + { + "epoch": 0.5060258487295822, + "grad_norm": 280.13995361328125, + "learning_rate": 5.851047495830163e-06, + "loss": 23.8772, + "step": 250500 + }, + { + "epoch": 0.506046049362266, + "grad_norm": 475.9883117675781, + "learning_rate": 5.850703521510418e-06, + "loss": 27.7783, + "step": 250510 + }, + { + "epoch": 0.5060662499949499, + "grad_norm": 284.2987365722656, + "learning_rate": 5.850359543044446e-06, + "loss": 11.7047, + "step": 250520 + }, + { + "epoch": 0.5060864506276337, + "grad_norm": 470.3309020996094, + "learning_rate": 5.850015560433926e-06, + "loss": 20.9905, + "step": 250530 + }, + { + "epoch": 0.5061066512603175, + "grad_norm": 375.3161926269531, + "learning_rate": 5.849671573680532e-06, + "loss": 9.8238, + "step": 250540 + }, + { + "epoch": 0.5061268518930013, + "grad_norm": 307.17529296875, + "learning_rate": 5.849327582785943e-06, + "loss": 12.0072, + "step": 250550 + }, + { + "epoch": 0.5061470525256851, + "grad_norm": 270.5221862792969, + "learning_rate": 5.848983587751833e-06, + "loss": 24.3678, + "step": 250560 + }, + { + "epoch": 0.506167253158369, + "grad_norm": 412.85595703125, + "learning_rate": 5.848639588579881e-06, + "loss": 16.4555, + "step": 250570 + }, + { + "epoch": 0.5061874537910528, + "grad_norm": 292.0421142578125, + "learning_rate": 5.848295585271764e-06, + "loss": 22.1581, + "step": 250580 + }, + { + "epoch": 0.5062076544237365, + "grad_norm": 634.596435546875, + "learning_rate": 5.847951577829153e-06, + "loss": 16.357, + "step": 250590 + }, + { + "epoch": 0.5062278550564203, + "grad_norm": 366.3712463378906, + "learning_rate": 5.847607566253732e-06, + "loss": 19.6015, + "step": 250600 + }, + { + "epoch": 0.5062480556891041, + "grad_norm": 319.39727783203125, + "learning_rate": 5.847263550547174e-06, + "loss": 9.9481, + "step": 250610 + }, + { + "epoch": 0.506268256321788, + "grad_norm": 373.22998046875, + "learning_rate": 5.8469195307111555e-06, + "loss": 22.338, + "step": 250620 + }, + { + "epoch": 0.5062884569544718, + "grad_norm": 552.7167358398438, + "learning_rate": 5.846575506747355e-06, + "loss": 22.6938, + "step": 250630 + }, + { + "epoch": 0.5063086575871556, + "grad_norm": 81.49649810791016, + "learning_rate": 5.846231478657447e-06, + "loss": 11.8021, + "step": 250640 + }, + { + "epoch": 0.5063288582198394, + "grad_norm": 186.39144897460938, + "learning_rate": 5.8458874464431115e-06, + "loss": 16.271, + "step": 250650 + }, + { + "epoch": 0.5063490588525232, + "grad_norm": 485.9855041503906, + "learning_rate": 5.845543410106021e-06, + "loss": 27.9823, + "step": 250660 + }, + { + "epoch": 0.5063692594852071, + "grad_norm": 348.76275634765625, + "learning_rate": 5.845199369647856e-06, + "loss": 19.1944, + "step": 250670 + }, + { + "epoch": 0.5063894601178909, + "grad_norm": 426.27056884765625, + "learning_rate": 5.84485532507029e-06, + "loss": 10.4866, + "step": 250680 + }, + { + "epoch": 0.5064096607505747, + "grad_norm": 452.5116271972656, + "learning_rate": 5.844511276375003e-06, + "loss": 19.0644, + "step": 250690 + }, + { + "epoch": 0.5064298613832585, + "grad_norm": 496.2027893066406, + "learning_rate": 5.844167223563669e-06, + "loss": 15.054, + "step": 250700 + }, + { + "epoch": 0.5064500620159423, + "grad_norm": 269.6355285644531, + "learning_rate": 5.8438231666379685e-06, + "loss": 12.0207, + "step": 250710 + }, + { + "epoch": 0.5064702626486262, + "grad_norm": 226.2335662841797, + "learning_rate": 5.843479105599576e-06, + "loss": 22.7242, + "step": 250720 + }, + { + "epoch": 0.50649046328131, + "grad_norm": 126.28277587890625, + "learning_rate": 5.843135040450168e-06, + "loss": 16.7569, + "step": 250730 + }, + { + "epoch": 0.5065106639139938, + "grad_norm": 127.23039245605469, + "learning_rate": 5.842790971191422e-06, + "loss": 19.1778, + "step": 250740 + }, + { + "epoch": 0.5065308645466776, + "grad_norm": 298.5974426269531, + "learning_rate": 5.842446897825014e-06, + "loss": 39.9161, + "step": 250750 + }, + { + "epoch": 0.5065510651793614, + "grad_norm": 450.5945739746094, + "learning_rate": 5.842102820352623e-06, + "loss": 12.08, + "step": 250760 + }, + { + "epoch": 0.5065712658120453, + "grad_norm": 257.14300537109375, + "learning_rate": 5.841758738775923e-06, + "loss": 13.8731, + "step": 250770 + }, + { + "epoch": 0.5065914664447291, + "grad_norm": 373.70355224609375, + "learning_rate": 5.841414653096597e-06, + "loss": 25.917, + "step": 250780 + }, + { + "epoch": 0.5066116670774129, + "grad_norm": 348.8895568847656, + "learning_rate": 5.841070563316316e-06, + "loss": 19.5707, + "step": 250790 + }, + { + "epoch": 0.5066318677100967, + "grad_norm": 2.082367181777954, + "learning_rate": 5.840726469436758e-06, + "loss": 32.1478, + "step": 250800 + }, + { + "epoch": 0.5066520683427805, + "grad_norm": 360.0582275390625, + "learning_rate": 5.840382371459603e-06, + "loss": 10.6786, + "step": 250810 + }, + { + "epoch": 0.5066722689754644, + "grad_norm": 214.38682556152344, + "learning_rate": 5.8400382693865255e-06, + "loss": 19.9817, + "step": 250820 + }, + { + "epoch": 0.5066924696081482, + "grad_norm": 155.58001708984375, + "learning_rate": 5.839694163219203e-06, + "loss": 12.8881, + "step": 250830 + }, + { + "epoch": 0.506712670240832, + "grad_norm": 572.53662109375, + "learning_rate": 5.839350052959313e-06, + "loss": 26.189, + "step": 250840 + }, + { + "epoch": 0.5067328708735157, + "grad_norm": 106.50894927978516, + "learning_rate": 5.839005938608533e-06, + "loss": 12.8005, + "step": 250850 + }, + { + "epoch": 0.5067530715061995, + "grad_norm": 314.3960266113281, + "learning_rate": 5.838661820168539e-06, + "loss": 21.8497, + "step": 250860 + }, + { + "epoch": 0.5067732721388833, + "grad_norm": 221.12440490722656, + "learning_rate": 5.838317697641011e-06, + "loss": 12.328, + "step": 250870 + }, + { + "epoch": 0.5067934727715672, + "grad_norm": 362.97442626953125, + "learning_rate": 5.837973571027621e-06, + "loss": 29.0551, + "step": 250880 + }, + { + "epoch": 0.506813673404251, + "grad_norm": 234.4754180908203, + "learning_rate": 5.837629440330053e-06, + "loss": 16.8788, + "step": 250890 + }, + { + "epoch": 0.5068338740369348, + "grad_norm": 167.98399353027344, + "learning_rate": 5.837285305549978e-06, + "loss": 16.4452, + "step": 250900 + }, + { + "epoch": 0.5068540746696186, + "grad_norm": 617.789306640625, + "learning_rate": 5.836941166689077e-06, + "loss": 25.1946, + "step": 250910 + }, + { + "epoch": 0.5068742753023024, + "grad_norm": 176.49386596679688, + "learning_rate": 5.836597023749028e-06, + "loss": 6.8574, + "step": 250920 + }, + { + "epoch": 0.5068944759349863, + "grad_norm": 235.6635284423828, + "learning_rate": 5.836252876731503e-06, + "loss": 20.3856, + "step": 250930 + }, + { + "epoch": 0.5069146765676701, + "grad_norm": 480.59088134765625, + "learning_rate": 5.835908725638186e-06, + "loss": 26.9011, + "step": 250940 + }, + { + "epoch": 0.5069348772003539, + "grad_norm": 165.0316619873047, + "learning_rate": 5.83556457047075e-06, + "loss": 20.5717, + "step": 250950 + }, + { + "epoch": 0.5069550778330377, + "grad_norm": 48.574119567871094, + "learning_rate": 5.835220411230873e-06, + "loss": 23.1372, + "step": 250960 + }, + { + "epoch": 0.5069752784657215, + "grad_norm": 476.2835693359375, + "learning_rate": 5.834876247920233e-06, + "loss": 15.4405, + "step": 250970 + }, + { + "epoch": 0.5069954790984054, + "grad_norm": 425.82135009765625, + "learning_rate": 5.83453208054051e-06, + "loss": 19.0542, + "step": 250980 + }, + { + "epoch": 0.5070156797310892, + "grad_norm": 557.5946044921875, + "learning_rate": 5.834187909093376e-06, + "loss": 22.1702, + "step": 250990 + }, + { + "epoch": 0.507035880363773, + "grad_norm": 140.038330078125, + "learning_rate": 5.8338437335805124e-06, + "loss": 13.9805, + "step": 251000 + }, + { + "epoch": 0.5070560809964568, + "grad_norm": 126.53459930419922, + "learning_rate": 5.833499554003596e-06, + "loss": 16.044, + "step": 251010 + }, + { + "epoch": 0.5070762816291406, + "grad_norm": 181.307861328125, + "learning_rate": 5.833155370364302e-06, + "loss": 13.4199, + "step": 251020 + }, + { + "epoch": 0.5070964822618245, + "grad_norm": 483.58795166015625, + "learning_rate": 5.832811182664312e-06, + "loss": 11.5067, + "step": 251030 + }, + { + "epoch": 0.5071166828945083, + "grad_norm": 498.8536071777344, + "learning_rate": 5.832466990905299e-06, + "loss": 15.1246, + "step": 251040 + }, + { + "epoch": 0.5071368835271921, + "grad_norm": 667.5546264648438, + "learning_rate": 5.8321227950889455e-06, + "loss": 20.355, + "step": 251050 + }, + { + "epoch": 0.5071570841598759, + "grad_norm": 239.9335174560547, + "learning_rate": 5.8317785952169245e-06, + "loss": 19.6082, + "step": 251060 + }, + { + "epoch": 0.5071772847925597, + "grad_norm": 232.27671813964844, + "learning_rate": 5.8314343912909165e-06, + "loss": 15.5812, + "step": 251070 + }, + { + "epoch": 0.5071974854252436, + "grad_norm": 221.61940002441406, + "learning_rate": 5.831090183312599e-06, + "loss": 9.0896, + "step": 251080 + }, + { + "epoch": 0.5072176860579274, + "grad_norm": 337.8977966308594, + "learning_rate": 5.830745971283646e-06, + "loss": 28.5588, + "step": 251090 + }, + { + "epoch": 0.5072378866906111, + "grad_norm": 1154.0655517578125, + "learning_rate": 5.83040175520574e-06, + "loss": 26.3353, + "step": 251100 + }, + { + "epoch": 0.5072580873232949, + "grad_norm": 484.496826171875, + "learning_rate": 5.8300575350805555e-06, + "loss": 18.4101, + "step": 251110 + }, + { + "epoch": 0.5072782879559787, + "grad_norm": 516.824462890625, + "learning_rate": 5.8297133109097715e-06, + "loss": 23.6727, + "step": 251120 + }, + { + "epoch": 0.5072984885886626, + "grad_norm": 518.7554931640625, + "learning_rate": 5.829369082695066e-06, + "loss": 29.8526, + "step": 251130 + }, + { + "epoch": 0.5073186892213464, + "grad_norm": 459.8927307128906, + "learning_rate": 5.8290248504381165e-06, + "loss": 18.8124, + "step": 251140 + }, + { + "epoch": 0.5073388898540302, + "grad_norm": 227.15487670898438, + "learning_rate": 5.828680614140599e-06, + "loss": 15.377, + "step": 251150 + }, + { + "epoch": 0.507359090486714, + "grad_norm": 309.8097229003906, + "learning_rate": 5.8283363738041945e-06, + "loss": 23.4304, + "step": 251160 + }, + { + "epoch": 0.5073792911193978, + "grad_norm": 334.9891357421875, + "learning_rate": 5.827992129430578e-06, + "loss": 28.0611, + "step": 251170 + }, + { + "epoch": 0.5073994917520817, + "grad_norm": 742.1702270507812, + "learning_rate": 5.827647881021428e-06, + "loss": 7.7474, + "step": 251180 + }, + { + "epoch": 0.5074196923847655, + "grad_norm": 364.31622314453125, + "learning_rate": 5.827303628578424e-06, + "loss": 14.3056, + "step": 251190 + }, + { + "epoch": 0.5074398930174493, + "grad_norm": 50.556453704833984, + "learning_rate": 5.826959372103239e-06, + "loss": 21.4795, + "step": 251200 + }, + { + "epoch": 0.5074600936501331, + "grad_norm": 287.05853271484375, + "learning_rate": 5.826615111597558e-06, + "loss": 16.0893, + "step": 251210 + }, + { + "epoch": 0.5074802942828169, + "grad_norm": 238.78404235839844, + "learning_rate": 5.826270847063053e-06, + "loss": 26.9163, + "step": 251220 + }, + { + "epoch": 0.5075004949155008, + "grad_norm": 780.0631713867188, + "learning_rate": 5.8259265785014054e-06, + "loss": 27.5146, + "step": 251230 + }, + { + "epoch": 0.5075206955481846, + "grad_norm": 262.5494689941406, + "learning_rate": 5.82558230591429e-06, + "loss": 20.3833, + "step": 251240 + }, + { + "epoch": 0.5075408961808684, + "grad_norm": 645.9493408203125, + "learning_rate": 5.825238029303388e-06, + "loss": 22.9718, + "step": 251250 + }, + { + "epoch": 0.5075610968135522, + "grad_norm": 572.199462890625, + "learning_rate": 5.824893748670377e-06, + "loss": 16.7412, + "step": 251260 + }, + { + "epoch": 0.507581297446236, + "grad_norm": 315.9741516113281, + "learning_rate": 5.824549464016933e-06, + "loss": 23.4883, + "step": 251270 + }, + { + "epoch": 0.5076014980789199, + "grad_norm": 11.374568939208984, + "learning_rate": 5.824205175344735e-06, + "loss": 24.5944, + "step": 251280 + }, + { + "epoch": 0.5076216987116037, + "grad_norm": 324.0670471191406, + "learning_rate": 5.82386088265546e-06, + "loss": 29.2769, + "step": 251290 + }, + { + "epoch": 0.5076418993442875, + "grad_norm": 503.2557067871094, + "learning_rate": 5.823516585950787e-06, + "loss": 25.7352, + "step": 251300 + }, + { + "epoch": 0.5076620999769713, + "grad_norm": 333.05328369140625, + "learning_rate": 5.823172285232394e-06, + "loss": 17.1377, + "step": 251310 + }, + { + "epoch": 0.5076823006096551, + "grad_norm": 811.625732421875, + "learning_rate": 5.822827980501962e-06, + "loss": 21.4771, + "step": 251320 + }, + { + "epoch": 0.507702501242339, + "grad_norm": 373.1739196777344, + "learning_rate": 5.822483671761164e-06, + "loss": 13.2281, + "step": 251330 + }, + { + "epoch": 0.5077227018750228, + "grad_norm": 341.700439453125, + "learning_rate": 5.82213935901168e-06, + "loss": 11.4042, + "step": 251340 + }, + { + "epoch": 0.5077429025077066, + "grad_norm": 473.21063232421875, + "learning_rate": 5.821795042255189e-06, + "loss": 22.6602, + "step": 251350 + }, + { + "epoch": 0.5077631031403903, + "grad_norm": 268.0250244140625, + "learning_rate": 5.8214507214933666e-06, + "loss": 18.5216, + "step": 251360 + }, + { + "epoch": 0.5077833037730741, + "grad_norm": 366.32745361328125, + "learning_rate": 5.821106396727897e-06, + "loss": 10.8505, + "step": 251370 + }, + { + "epoch": 0.5078035044057579, + "grad_norm": 469.3739318847656, + "learning_rate": 5.820762067960451e-06, + "loss": 24.6283, + "step": 251380 + }, + { + "epoch": 0.5078237050384418, + "grad_norm": 320.2749328613281, + "learning_rate": 5.820417735192712e-06, + "loss": 31.9227, + "step": 251390 + }, + { + "epoch": 0.5078439056711256, + "grad_norm": 76.45555114746094, + "learning_rate": 5.8200733984263556e-06, + "loss": 22.3928, + "step": 251400 + }, + { + "epoch": 0.5078641063038094, + "grad_norm": 402.581787109375, + "learning_rate": 5.819729057663062e-06, + "loss": 25.8906, + "step": 251410 + }, + { + "epoch": 0.5078843069364932, + "grad_norm": 353.9494934082031, + "learning_rate": 5.819384712904508e-06, + "loss": 20.1779, + "step": 251420 + }, + { + "epoch": 0.507904507569177, + "grad_norm": 234.92645263671875, + "learning_rate": 5.819040364152372e-06, + "loss": 24.8317, + "step": 251430 + }, + { + "epoch": 0.5079247082018609, + "grad_norm": 440.35650634765625, + "learning_rate": 5.8186960114083325e-06, + "loss": 15.5205, + "step": 251440 + }, + { + "epoch": 0.5079449088345447, + "grad_norm": 169.0201873779297, + "learning_rate": 5.818351654674067e-06, + "loss": 13.7567, + "step": 251450 + }, + { + "epoch": 0.5079651094672285, + "grad_norm": 264.24188232421875, + "learning_rate": 5.818007293951255e-06, + "loss": 32.0262, + "step": 251460 + }, + { + "epoch": 0.5079853100999123, + "grad_norm": 0.0, + "learning_rate": 5.817662929241576e-06, + "loss": 24.136, + "step": 251470 + }, + { + "epoch": 0.5080055107325961, + "grad_norm": 789.7282104492188, + "learning_rate": 5.817318560546708e-06, + "loss": 42.3933, + "step": 251480 + }, + { + "epoch": 0.50802571136528, + "grad_norm": 419.7528381347656, + "learning_rate": 5.8169741878683265e-06, + "loss": 13.2167, + "step": 251490 + }, + { + "epoch": 0.5080459119979638, + "grad_norm": 12.599907875061035, + "learning_rate": 5.816629811208112e-06, + "loss": 22.8365, + "step": 251500 + }, + { + "epoch": 0.5080661126306476, + "grad_norm": 86.8816909790039, + "learning_rate": 5.816285430567743e-06, + "loss": 19.1378, + "step": 251510 + }, + { + "epoch": 0.5080863132633314, + "grad_norm": 216.23777770996094, + "learning_rate": 5.815941045948898e-06, + "loss": 21.5719, + "step": 251520 + }, + { + "epoch": 0.5081065138960152, + "grad_norm": 589.9947509765625, + "learning_rate": 5.815596657353257e-06, + "loss": 10.7729, + "step": 251530 + }, + { + "epoch": 0.5081267145286991, + "grad_norm": 20.174942016601562, + "learning_rate": 5.815252264782493e-06, + "loss": 18.9093, + "step": 251540 + }, + { + "epoch": 0.5081469151613829, + "grad_norm": 552.2474365234375, + "learning_rate": 5.814907868238291e-06, + "loss": 51.3108, + "step": 251550 + }, + { + "epoch": 0.5081671157940667, + "grad_norm": 189.50640869140625, + "learning_rate": 5.814563467722328e-06, + "loss": 16.073, + "step": 251560 + }, + { + "epoch": 0.5081873164267505, + "grad_norm": 664.8473510742188, + "learning_rate": 5.8142190632362785e-06, + "loss": 30.1821, + "step": 251570 + }, + { + "epoch": 0.5082075170594343, + "grad_norm": 403.3890075683594, + "learning_rate": 5.813874654781825e-06, + "loss": 21.0588, + "step": 251580 + }, + { + "epoch": 0.5082277176921182, + "grad_norm": 323.449951171875, + "learning_rate": 5.813530242360647e-06, + "loss": 15.1011, + "step": 251590 + }, + { + "epoch": 0.508247918324802, + "grad_norm": 279.55914306640625, + "learning_rate": 5.813185825974419e-06, + "loss": 18.119, + "step": 251600 + }, + { + "epoch": 0.5082681189574858, + "grad_norm": 26.502635955810547, + "learning_rate": 5.812841405624823e-06, + "loss": 14.0624, + "step": 251610 + }, + { + "epoch": 0.5082883195901695, + "grad_norm": 614.7074584960938, + "learning_rate": 5.812496981313536e-06, + "loss": 22.1417, + "step": 251620 + }, + { + "epoch": 0.5083085202228533, + "grad_norm": 539.9589233398438, + "learning_rate": 5.8121525530422375e-06, + "loss": 23.0432, + "step": 251630 + }, + { + "epoch": 0.5083287208555372, + "grad_norm": 368.3820495605469, + "learning_rate": 5.811808120812607e-06, + "loss": 14.6848, + "step": 251640 + }, + { + "epoch": 0.508348921488221, + "grad_norm": 301.2969665527344, + "learning_rate": 5.811463684626319e-06, + "loss": 16.6718, + "step": 251650 + }, + { + "epoch": 0.5083691221209048, + "grad_norm": 349.03167724609375, + "learning_rate": 5.8111192444850586e-06, + "loss": 18.4352, + "step": 251660 + }, + { + "epoch": 0.5083893227535886, + "grad_norm": 709.9803466796875, + "learning_rate": 5.8107748003905e-06, + "loss": 30.3211, + "step": 251670 + }, + { + "epoch": 0.5084095233862724, + "grad_norm": 171.62179565429688, + "learning_rate": 5.810430352344324e-06, + "loss": 24.3387, + "step": 251680 + }, + { + "epoch": 0.5084297240189563, + "grad_norm": 160.02499389648438, + "learning_rate": 5.810085900348209e-06, + "loss": 24.2539, + "step": 251690 + }, + { + "epoch": 0.5084499246516401, + "grad_norm": 749.2161865234375, + "learning_rate": 5.809741444403831e-06, + "loss": 19.7885, + "step": 251700 + }, + { + "epoch": 0.5084701252843239, + "grad_norm": 212.47677612304688, + "learning_rate": 5.809396984512875e-06, + "loss": 26.7362, + "step": 251710 + }, + { + "epoch": 0.5084903259170077, + "grad_norm": 433.6282958984375, + "learning_rate": 5.8090525206770145e-06, + "loss": 14.2857, + "step": 251720 + }, + { + "epoch": 0.5085105265496915, + "grad_norm": 226.4599609375, + "learning_rate": 5.808708052897931e-06, + "loss": 18.0523, + "step": 251730 + }, + { + "epoch": 0.5085307271823754, + "grad_norm": 71.1861572265625, + "learning_rate": 5.808363581177301e-06, + "loss": 16.9545, + "step": 251740 + }, + { + "epoch": 0.5085509278150592, + "grad_norm": 45.71283721923828, + "learning_rate": 5.8080191055168064e-06, + "loss": 22.1804, + "step": 251750 + }, + { + "epoch": 0.508571128447743, + "grad_norm": 186.9651641845703, + "learning_rate": 5.807674625918125e-06, + "loss": 13.9291, + "step": 251760 + }, + { + "epoch": 0.5085913290804268, + "grad_norm": 365.13323974609375, + "learning_rate": 5.807330142382934e-06, + "loss": 13.7237, + "step": 251770 + }, + { + "epoch": 0.5086115297131106, + "grad_norm": 274.8147277832031, + "learning_rate": 5.806985654912915e-06, + "loss": 23.8939, + "step": 251780 + }, + { + "epoch": 0.5086317303457945, + "grad_norm": 50.02983856201172, + "learning_rate": 5.806641163509744e-06, + "loss": 12.7504, + "step": 251790 + }, + { + "epoch": 0.5086519309784783, + "grad_norm": 92.12311553955078, + "learning_rate": 5.8062966681751046e-06, + "loss": 9.1826, + "step": 251800 + }, + { + "epoch": 0.5086721316111621, + "grad_norm": 304.03076171875, + "learning_rate": 5.805952168910669e-06, + "loss": 22.5051, + "step": 251810 + }, + { + "epoch": 0.5086923322438459, + "grad_norm": 247.33364868164062, + "learning_rate": 5.805607665718124e-06, + "loss": 20.1043, + "step": 251820 + }, + { + "epoch": 0.5087125328765297, + "grad_norm": 162.25067138671875, + "learning_rate": 5.805263158599143e-06, + "loss": 11.2727, + "step": 251830 + }, + { + "epoch": 0.5087327335092136, + "grad_norm": 436.57501220703125, + "learning_rate": 5.804918647555408e-06, + "loss": 18.7904, + "step": 251840 + }, + { + "epoch": 0.5087529341418974, + "grad_norm": 581.3582763671875, + "learning_rate": 5.8045741325885965e-06, + "loss": 20.1834, + "step": 251850 + }, + { + "epoch": 0.5087731347745812, + "grad_norm": 701.9457397460938, + "learning_rate": 5.804229613700389e-06, + "loss": 37.8337, + "step": 251860 + }, + { + "epoch": 0.5087933354072649, + "grad_norm": 313.239501953125, + "learning_rate": 5.803885090892464e-06, + "loss": 21.4028, + "step": 251870 + }, + { + "epoch": 0.5088135360399487, + "grad_norm": 387.37603759765625, + "learning_rate": 5.8035405641665e-06, + "loss": 28.1262, + "step": 251880 + }, + { + "epoch": 0.5088337366726325, + "grad_norm": 724.362548828125, + "learning_rate": 5.803196033524176e-06, + "loss": 19.9967, + "step": 251890 + }, + { + "epoch": 0.5088539373053164, + "grad_norm": 1209.24462890625, + "learning_rate": 5.802851498967173e-06, + "loss": 34.1658, + "step": 251900 + }, + { + "epoch": 0.5088741379380002, + "grad_norm": 819.9832153320312, + "learning_rate": 5.802506960497168e-06, + "loss": 33.4126, + "step": 251910 + }, + { + "epoch": 0.508894338570684, + "grad_norm": 118.30169677734375, + "learning_rate": 5.802162418115842e-06, + "loss": 17.9593, + "step": 251920 + }, + { + "epoch": 0.5089145392033678, + "grad_norm": 468.00677490234375, + "learning_rate": 5.801817871824876e-06, + "loss": 17.9862, + "step": 251930 + }, + { + "epoch": 0.5089347398360516, + "grad_norm": 931.7262573242188, + "learning_rate": 5.801473321625944e-06, + "loss": 8.3095, + "step": 251940 + }, + { + "epoch": 0.5089549404687355, + "grad_norm": 596.0464477539062, + "learning_rate": 5.80112876752073e-06, + "loss": 18.7461, + "step": 251950 + }, + { + "epoch": 0.5089751411014193, + "grad_norm": 527.8135375976562, + "learning_rate": 5.80078420951091e-06, + "loss": 32.7817, + "step": 251960 + }, + { + "epoch": 0.5089953417341031, + "grad_norm": 1024.0352783203125, + "learning_rate": 5.800439647598165e-06, + "loss": 24.7098, + "step": 251970 + }, + { + "epoch": 0.5090155423667869, + "grad_norm": 235.0797882080078, + "learning_rate": 5.800095081784176e-06, + "loss": 25.3951, + "step": 251980 + }, + { + "epoch": 0.5090357429994707, + "grad_norm": 352.7430419921875, + "learning_rate": 5.799750512070618e-06, + "loss": 16.6318, + "step": 251990 + }, + { + "epoch": 0.5090559436321546, + "grad_norm": 589.234130859375, + "learning_rate": 5.799405938459175e-06, + "loss": 29.4818, + "step": 252000 + }, + { + "epoch": 0.5090761442648384, + "grad_norm": 716.2567138671875, + "learning_rate": 5.7990613609515235e-06, + "loss": 19.7052, + "step": 252010 + }, + { + "epoch": 0.5090963448975222, + "grad_norm": 107.27327728271484, + "learning_rate": 5.798716779549344e-06, + "loss": 31.656, + "step": 252020 + }, + { + "epoch": 0.509116545530206, + "grad_norm": 171.72300720214844, + "learning_rate": 5.798372194254317e-06, + "loss": 25.5078, + "step": 252030 + }, + { + "epoch": 0.5091367461628898, + "grad_norm": 158.5902557373047, + "learning_rate": 5.7980276050681195e-06, + "loss": 17.2632, + "step": 252040 + }, + { + "epoch": 0.5091569467955737, + "grad_norm": 378.5718078613281, + "learning_rate": 5.797683011992432e-06, + "loss": 8.9759, + "step": 252050 + }, + { + "epoch": 0.5091771474282575, + "grad_norm": 105.90921783447266, + "learning_rate": 5.797338415028934e-06, + "loss": 25.6245, + "step": 252060 + }, + { + "epoch": 0.5091973480609413, + "grad_norm": 2.521341562271118, + "learning_rate": 5.796993814179307e-06, + "loss": 25.0027, + "step": 252070 + }, + { + "epoch": 0.5092175486936251, + "grad_norm": 0.7935197353363037, + "learning_rate": 5.796649209445227e-06, + "loss": 16.7548, + "step": 252080 + }, + { + "epoch": 0.5092377493263089, + "grad_norm": 747.4075927734375, + "learning_rate": 5.7963046008283775e-06, + "loss": 28.5457, + "step": 252090 + }, + { + "epoch": 0.5092579499589928, + "grad_norm": 412.5313720703125, + "learning_rate": 5.795959988330434e-06, + "loss": 22.6423, + "step": 252100 + }, + { + "epoch": 0.5092781505916766, + "grad_norm": 444.43450927734375, + "learning_rate": 5.795615371953078e-06, + "loss": 23.1515, + "step": 252110 + }, + { + "epoch": 0.5092983512243604, + "grad_norm": 58.15255355834961, + "learning_rate": 5.795270751697991e-06, + "loss": 14.8879, + "step": 252120 + }, + { + "epoch": 0.5093185518570441, + "grad_norm": 1216.7862548828125, + "learning_rate": 5.794926127566849e-06, + "loss": 15.4683, + "step": 252130 + }, + { + "epoch": 0.5093387524897279, + "grad_norm": 250.36837768554688, + "learning_rate": 5.794581499561335e-06, + "loss": 22.2441, + "step": 252140 + }, + { + "epoch": 0.5093589531224118, + "grad_norm": 504.7484130859375, + "learning_rate": 5.794236867683125e-06, + "loss": 17.2118, + "step": 252150 + }, + { + "epoch": 0.5093791537550956, + "grad_norm": 375.27716064453125, + "learning_rate": 5.793892231933903e-06, + "loss": 18.3234, + "step": 252160 + }, + { + "epoch": 0.5093993543877794, + "grad_norm": 766.3939208984375, + "learning_rate": 5.793547592315345e-06, + "loss": 45.5151, + "step": 252170 + }, + { + "epoch": 0.5094195550204632, + "grad_norm": 1855.5390625, + "learning_rate": 5.793202948829133e-06, + "loss": 28.2413, + "step": 252180 + }, + { + "epoch": 0.509439755653147, + "grad_norm": 433.4533386230469, + "learning_rate": 5.792858301476946e-06, + "loss": 21.4306, + "step": 252190 + }, + { + "epoch": 0.5094599562858309, + "grad_norm": 436.69561767578125, + "learning_rate": 5.792513650260465e-06, + "loss": 18.5973, + "step": 252200 + }, + { + "epoch": 0.5094801569185147, + "grad_norm": 900.4495849609375, + "learning_rate": 5.792168995181366e-06, + "loss": 30.9446, + "step": 252210 + }, + { + "epoch": 0.5095003575511985, + "grad_norm": 286.23773193359375, + "learning_rate": 5.791824336241334e-06, + "loss": 8.588, + "step": 252220 + }, + { + "epoch": 0.5095205581838823, + "grad_norm": 244.795166015625, + "learning_rate": 5.791479673442044e-06, + "loss": 34.9819, + "step": 252230 + }, + { + "epoch": 0.5095407588165661, + "grad_norm": 282.7953186035156, + "learning_rate": 5.791135006785179e-06, + "loss": 10.0487, + "step": 252240 + }, + { + "epoch": 0.50956095944925, + "grad_norm": 665.7476806640625, + "learning_rate": 5.7907903362724195e-06, + "loss": 22.644, + "step": 252250 + }, + { + "epoch": 0.5095811600819338, + "grad_norm": 290.9110107421875, + "learning_rate": 5.790445661905441e-06, + "loss": 21.4393, + "step": 252260 + }, + { + "epoch": 0.5096013607146176, + "grad_norm": 214.25326538085938, + "learning_rate": 5.790100983685928e-06, + "loss": 14.4459, + "step": 252270 + }, + { + "epoch": 0.5096215613473014, + "grad_norm": 601.002197265625, + "learning_rate": 5.789756301615558e-06, + "loss": 15.1507, + "step": 252280 + }, + { + "epoch": 0.5096417619799852, + "grad_norm": 554.1863403320312, + "learning_rate": 5.7894116156960115e-06, + "loss": 15.7812, + "step": 252290 + }, + { + "epoch": 0.509661962612669, + "grad_norm": 393.3742370605469, + "learning_rate": 5.78906692592897e-06, + "loss": 16.9574, + "step": 252300 + }, + { + "epoch": 0.5096821632453529, + "grad_norm": 228.7798614501953, + "learning_rate": 5.788722232316109e-06, + "loss": 20.7032, + "step": 252310 + }, + { + "epoch": 0.5097023638780367, + "grad_norm": 166.04469299316406, + "learning_rate": 5.7883775348591146e-06, + "loss": 16.6758, + "step": 252320 + }, + { + "epoch": 0.5097225645107205, + "grad_norm": 538.080322265625, + "learning_rate": 5.788032833559661e-06, + "loss": 25.5036, + "step": 252330 + }, + { + "epoch": 0.5097427651434043, + "grad_norm": 511.0445556640625, + "learning_rate": 5.787688128419433e-06, + "loss": 19.0578, + "step": 252340 + }, + { + "epoch": 0.5097629657760882, + "grad_norm": 162.63348388671875, + "learning_rate": 5.787343419440108e-06, + "loss": 10.8651, + "step": 252350 + }, + { + "epoch": 0.509783166408772, + "grad_norm": 405.1139831542969, + "learning_rate": 5.786998706623365e-06, + "loss": 24.8399, + "step": 252360 + }, + { + "epoch": 0.5098033670414558, + "grad_norm": 380.32696533203125, + "learning_rate": 5.786653989970889e-06, + "loss": 11.9572, + "step": 252370 + }, + { + "epoch": 0.5098235676741395, + "grad_norm": 551.205078125, + "learning_rate": 5.786309269484355e-06, + "loss": 19.7167, + "step": 252380 + }, + { + "epoch": 0.5098437683068233, + "grad_norm": 614.0591430664062, + "learning_rate": 5.785964545165446e-06, + "loss": 13.8383, + "step": 252390 + }, + { + "epoch": 0.5098639689395071, + "grad_norm": 268.666748046875, + "learning_rate": 5.78561981701584e-06, + "loss": 14.0354, + "step": 252400 + }, + { + "epoch": 0.509884169572191, + "grad_norm": 324.810546875, + "learning_rate": 5.785275085037218e-06, + "loss": 23.4903, + "step": 252410 + }, + { + "epoch": 0.5099043702048748, + "grad_norm": 612.3726806640625, + "learning_rate": 5.7849303492312605e-06, + "loss": 26.5904, + "step": 252420 + }, + { + "epoch": 0.5099245708375586, + "grad_norm": 768.1057739257812, + "learning_rate": 5.784585609599649e-06, + "loss": 21.4947, + "step": 252430 + }, + { + "epoch": 0.5099447714702424, + "grad_norm": 147.74560546875, + "learning_rate": 5.784240866144062e-06, + "loss": 10.7797, + "step": 252440 + }, + { + "epoch": 0.5099649721029262, + "grad_norm": 275.04443359375, + "learning_rate": 5.783896118866179e-06, + "loss": 17.4282, + "step": 252450 + }, + { + "epoch": 0.5099851727356101, + "grad_norm": 835.8368530273438, + "learning_rate": 5.783551367767683e-06, + "loss": 27.2984, + "step": 252460 + }, + { + "epoch": 0.5100053733682939, + "grad_norm": 524.8331909179688, + "learning_rate": 5.783206612850251e-06, + "loss": 17.2435, + "step": 252470 + }, + { + "epoch": 0.5100255740009777, + "grad_norm": 152.20242309570312, + "learning_rate": 5.782861854115567e-06, + "loss": 19.86, + "step": 252480 + }, + { + "epoch": 0.5100457746336615, + "grad_norm": 120.97262573242188, + "learning_rate": 5.782517091565308e-06, + "loss": 17.7812, + "step": 252490 + }, + { + "epoch": 0.5100659752663453, + "grad_norm": 725.4302368164062, + "learning_rate": 5.782172325201155e-06, + "loss": 24.184, + "step": 252500 + }, + { + "epoch": 0.5100861758990292, + "grad_norm": 344.2830810546875, + "learning_rate": 5.78182755502479e-06, + "loss": 12.7987, + "step": 252510 + }, + { + "epoch": 0.510106376531713, + "grad_norm": 522.1859130859375, + "learning_rate": 5.781482781037892e-06, + "loss": 14.7735, + "step": 252520 + }, + { + "epoch": 0.5101265771643968, + "grad_norm": 416.50677490234375, + "learning_rate": 5.781138003242141e-06, + "loss": 24.5774, + "step": 252530 + }, + { + "epoch": 0.5101467777970806, + "grad_norm": 499.1576843261719, + "learning_rate": 5.780793221639219e-06, + "loss": 17.9837, + "step": 252540 + }, + { + "epoch": 0.5101669784297644, + "grad_norm": 476.3972473144531, + "learning_rate": 5.780448436230805e-06, + "loss": 11.0461, + "step": 252550 + }, + { + "epoch": 0.5101871790624483, + "grad_norm": 1126.03125, + "learning_rate": 5.7801036470185815e-06, + "loss": 48.5763, + "step": 252560 + }, + { + "epoch": 0.5102073796951321, + "grad_norm": 354.3380126953125, + "learning_rate": 5.779758854004226e-06, + "loss": 21.4074, + "step": 252570 + }, + { + "epoch": 0.5102275803278159, + "grad_norm": 568.8021850585938, + "learning_rate": 5.77941405718942e-06, + "loss": 15.7251, + "step": 252580 + }, + { + "epoch": 0.5102477809604997, + "grad_norm": 391.58837890625, + "learning_rate": 5.779069256575846e-06, + "loss": 34.3455, + "step": 252590 + }, + { + "epoch": 0.5102679815931835, + "grad_norm": 257.9892883300781, + "learning_rate": 5.778724452165181e-06, + "loss": 15.345, + "step": 252600 + }, + { + "epoch": 0.5102881822258674, + "grad_norm": 639.9202880859375, + "learning_rate": 5.7783796439591085e-06, + "loss": 21.1505, + "step": 252610 + }, + { + "epoch": 0.5103083828585512, + "grad_norm": 66.07951354980469, + "learning_rate": 5.778034831959308e-06, + "loss": 21.2015, + "step": 252620 + }, + { + "epoch": 0.510328583491235, + "grad_norm": 434.8056640625, + "learning_rate": 5.77769001616746e-06, + "loss": 34.4328, + "step": 252630 + }, + { + "epoch": 0.5103487841239187, + "grad_norm": 177.5913543701172, + "learning_rate": 5.777345196585247e-06, + "loss": 26.5298, + "step": 252640 + }, + { + "epoch": 0.5103689847566025, + "grad_norm": 686.2937622070312, + "learning_rate": 5.777000373214345e-06, + "loss": 18.0848, + "step": 252650 + }, + { + "epoch": 0.5103891853892863, + "grad_norm": 237.0764923095703, + "learning_rate": 5.776655546056439e-06, + "loss": 14.7615, + "step": 252660 + }, + { + "epoch": 0.5104093860219702, + "grad_norm": 405.82220458984375, + "learning_rate": 5.776310715113207e-06, + "loss": 25.7658, + "step": 252670 + }, + { + "epoch": 0.510429586654654, + "grad_norm": 750.3215942382812, + "learning_rate": 5.77596588038633e-06, + "loss": 26.7581, + "step": 252680 + }, + { + "epoch": 0.5104497872873378, + "grad_norm": 160.1605987548828, + "learning_rate": 5.775621041877491e-06, + "loss": 30.4518, + "step": 252690 + }, + { + "epoch": 0.5104699879200216, + "grad_norm": 259.1350402832031, + "learning_rate": 5.77527619958837e-06, + "loss": 13.9909, + "step": 252700 + }, + { + "epoch": 0.5104901885527054, + "grad_norm": 233.79591369628906, + "learning_rate": 5.774931353520645e-06, + "loss": 14.9685, + "step": 252710 + }, + { + "epoch": 0.5105103891853893, + "grad_norm": 95.31790161132812, + "learning_rate": 5.774586503676e-06, + "loss": 20.0699, + "step": 252720 + }, + { + "epoch": 0.5105305898180731, + "grad_norm": 421.1565246582031, + "learning_rate": 5.774241650056114e-06, + "loss": 28.0121, + "step": 252730 + }, + { + "epoch": 0.5105507904507569, + "grad_norm": 364.62060546875, + "learning_rate": 5.773896792662666e-06, + "loss": 21.6366, + "step": 252740 + }, + { + "epoch": 0.5105709910834407, + "grad_norm": 695.878173828125, + "learning_rate": 5.773551931497342e-06, + "loss": 20.6741, + "step": 252750 + }, + { + "epoch": 0.5105911917161245, + "grad_norm": 794.6965942382812, + "learning_rate": 5.773207066561817e-06, + "loss": 23.7065, + "step": 252760 + }, + { + "epoch": 0.5106113923488084, + "grad_norm": 835.0409545898438, + "learning_rate": 5.772862197857776e-06, + "loss": 25.0538, + "step": 252770 + }, + { + "epoch": 0.5106315929814922, + "grad_norm": 234.1491241455078, + "learning_rate": 5.772517325386898e-06, + "loss": 6.7746, + "step": 252780 + }, + { + "epoch": 0.510651793614176, + "grad_norm": 401.83001708984375, + "learning_rate": 5.772172449150865e-06, + "loss": 16.4155, + "step": 252790 + }, + { + "epoch": 0.5106719942468598, + "grad_norm": 479.2789306640625, + "learning_rate": 5.771827569151357e-06, + "loss": 18.3182, + "step": 252800 + }, + { + "epoch": 0.5106921948795436, + "grad_norm": 499.20068359375, + "learning_rate": 5.771482685390053e-06, + "loss": 18.7645, + "step": 252810 + }, + { + "epoch": 0.5107123955122275, + "grad_norm": 444.3695373535156, + "learning_rate": 5.7711377978686385e-06, + "loss": 23.063, + "step": 252820 + }, + { + "epoch": 0.5107325961449113, + "grad_norm": 402.5401916503906, + "learning_rate": 5.770792906588791e-06, + "loss": 24.9544, + "step": 252830 + }, + { + "epoch": 0.5107527967775951, + "grad_norm": 206.49508666992188, + "learning_rate": 5.770448011552192e-06, + "loss": 13.1938, + "step": 252840 + }, + { + "epoch": 0.5107729974102789, + "grad_norm": 178.63812255859375, + "learning_rate": 5.770103112760523e-06, + "loss": 19.3979, + "step": 252850 + }, + { + "epoch": 0.5107931980429627, + "grad_norm": 398.7472229003906, + "learning_rate": 5.769758210215466e-06, + "loss": 14.4969, + "step": 252860 + }, + { + "epoch": 0.5108133986756466, + "grad_norm": 195.69021606445312, + "learning_rate": 5.7694133039186986e-06, + "loss": 19.1735, + "step": 252870 + }, + { + "epoch": 0.5108335993083304, + "grad_norm": 182.81033325195312, + "learning_rate": 5.7690683938719065e-06, + "loss": 18.1955, + "step": 252880 + }, + { + "epoch": 0.5108537999410141, + "grad_norm": 49.81098175048828, + "learning_rate": 5.7687234800767666e-06, + "loss": 9.6713, + "step": 252890 + }, + { + "epoch": 0.5108740005736979, + "grad_norm": 829.9020385742188, + "learning_rate": 5.768378562534962e-06, + "loss": 13.2165, + "step": 252900 + }, + { + "epoch": 0.5108942012063817, + "grad_norm": 767.9072875976562, + "learning_rate": 5.768033641248174e-06, + "loss": 31.3949, + "step": 252910 + }, + { + "epoch": 0.5109144018390656, + "grad_norm": 1046.8443603515625, + "learning_rate": 5.767688716218083e-06, + "loss": 15.0424, + "step": 252920 + }, + { + "epoch": 0.5109346024717494, + "grad_norm": 657.3392333984375, + "learning_rate": 5.76734378744637e-06, + "loss": 18.826, + "step": 252930 + }, + { + "epoch": 0.5109548031044332, + "grad_norm": 34.600929260253906, + "learning_rate": 5.766998854934716e-06, + "loss": 21.9367, + "step": 252940 + }, + { + "epoch": 0.510975003737117, + "grad_norm": 368.00738525390625, + "learning_rate": 5.766653918684803e-06, + "loss": 20.0687, + "step": 252950 + }, + { + "epoch": 0.5109952043698008, + "grad_norm": 178.34298706054688, + "learning_rate": 5.766308978698313e-06, + "loss": 14.0907, + "step": 252960 + }, + { + "epoch": 0.5110154050024847, + "grad_norm": 337.9991455078125, + "learning_rate": 5.765964034976924e-06, + "loss": 21.5121, + "step": 252970 + }, + { + "epoch": 0.5110356056351685, + "grad_norm": 248.5225372314453, + "learning_rate": 5.765619087522322e-06, + "loss": 14.3779, + "step": 252980 + }, + { + "epoch": 0.5110558062678523, + "grad_norm": 1093.2027587890625, + "learning_rate": 5.765274136336183e-06, + "loss": 24.5636, + "step": 252990 + }, + { + "epoch": 0.5110760069005361, + "grad_norm": 521.7022094726562, + "learning_rate": 5.764929181420191e-06, + "loss": 14.9611, + "step": 253000 + }, + { + "epoch": 0.5110962075332199, + "grad_norm": 223.78298950195312, + "learning_rate": 5.7645842227760274e-06, + "loss": 24.0574, + "step": 253010 + }, + { + "epoch": 0.5111164081659038, + "grad_norm": 751.35107421875, + "learning_rate": 5.764239260405373e-06, + "loss": 27.4194, + "step": 253020 + }, + { + "epoch": 0.5111366087985876, + "grad_norm": 38.23499298095703, + "learning_rate": 5.763894294309909e-06, + "loss": 12.9641, + "step": 253030 + }, + { + "epoch": 0.5111568094312714, + "grad_norm": 295.8575439453125, + "learning_rate": 5.763549324491317e-06, + "loss": 19.4094, + "step": 253040 + }, + { + "epoch": 0.5111770100639552, + "grad_norm": 838.633544921875, + "learning_rate": 5.763204350951278e-06, + "loss": 14.7185, + "step": 253050 + }, + { + "epoch": 0.511197210696639, + "grad_norm": 187.49452209472656, + "learning_rate": 5.762859373691473e-06, + "loss": 21.8893, + "step": 253060 + }, + { + "epoch": 0.5112174113293229, + "grad_norm": 103.93560028076172, + "learning_rate": 5.7625143927135854e-06, + "loss": 16.6399, + "step": 253070 + }, + { + "epoch": 0.5112376119620067, + "grad_norm": 755.8477783203125, + "learning_rate": 5.762169408019293e-06, + "loss": 31.7384, + "step": 253080 + }, + { + "epoch": 0.5112578125946905, + "grad_norm": 473.8900146484375, + "learning_rate": 5.761824419610282e-06, + "loss": 10.6826, + "step": 253090 + }, + { + "epoch": 0.5112780132273743, + "grad_norm": 146.130126953125, + "learning_rate": 5.761479427488229e-06, + "loss": 13.3441, + "step": 253100 + }, + { + "epoch": 0.5112982138600581, + "grad_norm": 576.4959716796875, + "learning_rate": 5.761134431654819e-06, + "loss": 27.9051, + "step": 253110 + }, + { + "epoch": 0.511318414492742, + "grad_norm": 323.2041015625, + "learning_rate": 5.760789432111731e-06, + "loss": 19.4123, + "step": 253120 + }, + { + "epoch": 0.5113386151254258, + "grad_norm": 352.3463134765625, + "learning_rate": 5.760444428860648e-06, + "loss": 20.8338, + "step": 253130 + }, + { + "epoch": 0.5113588157581096, + "grad_norm": 374.3561706542969, + "learning_rate": 5.760099421903253e-06, + "loss": 23.0967, + "step": 253140 + }, + { + "epoch": 0.5113790163907933, + "grad_norm": 342.5068664550781, + "learning_rate": 5.7597544112412225e-06, + "loss": 14.8995, + "step": 253150 + }, + { + "epoch": 0.5113992170234771, + "grad_norm": 0.0, + "learning_rate": 5.759409396876242e-06, + "loss": 14.4945, + "step": 253160 + }, + { + "epoch": 0.511419417656161, + "grad_norm": 573.2758178710938, + "learning_rate": 5.759064378809993e-06, + "loss": 22.1909, + "step": 253170 + }, + { + "epoch": 0.5114396182888448, + "grad_norm": 178.08120727539062, + "learning_rate": 5.758719357044157e-06, + "loss": 25.4603, + "step": 253180 + }, + { + "epoch": 0.5114598189215286, + "grad_norm": 398.0236511230469, + "learning_rate": 5.758374331580412e-06, + "loss": 21.4598, + "step": 253190 + }, + { + "epoch": 0.5114800195542124, + "grad_norm": 313.60833740234375, + "learning_rate": 5.7580293024204455e-06, + "loss": 10.8735, + "step": 253200 + }, + { + "epoch": 0.5115002201868962, + "grad_norm": 372.1860656738281, + "learning_rate": 5.7576842695659344e-06, + "loss": 9.1743, + "step": 253210 + }, + { + "epoch": 0.51152042081958, + "grad_norm": 416.085693359375, + "learning_rate": 5.757339233018563e-06, + "loss": 23.3086, + "step": 253220 + }, + { + "epoch": 0.5115406214522639, + "grad_norm": 387.45050048828125, + "learning_rate": 5.756994192780011e-06, + "loss": 19.3354, + "step": 253230 + }, + { + "epoch": 0.5115608220849477, + "grad_norm": 243.62307739257812, + "learning_rate": 5.756649148851962e-06, + "loss": 24.3164, + "step": 253240 + }, + { + "epoch": 0.5115810227176315, + "grad_norm": 541.02392578125, + "learning_rate": 5.7563041012360975e-06, + "loss": 23.0536, + "step": 253250 + }, + { + "epoch": 0.5116012233503153, + "grad_norm": 83.83334350585938, + "learning_rate": 5.7559590499340965e-06, + "loss": 22.3992, + "step": 253260 + }, + { + "epoch": 0.5116214239829991, + "grad_norm": 95.6950454711914, + "learning_rate": 5.7556139949476445e-06, + "loss": 14.3515, + "step": 253270 + }, + { + "epoch": 0.511641624615683, + "grad_norm": 318.7217712402344, + "learning_rate": 5.755268936278421e-06, + "loss": 30.4365, + "step": 253280 + }, + { + "epoch": 0.5116618252483668, + "grad_norm": 155.037841796875, + "learning_rate": 5.754923873928108e-06, + "loss": 22.2358, + "step": 253290 + }, + { + "epoch": 0.5116820258810506, + "grad_norm": 143.9044647216797, + "learning_rate": 5.7545788078983875e-06, + "loss": 16.5542, + "step": 253300 + }, + { + "epoch": 0.5117022265137344, + "grad_norm": 230.00341796875, + "learning_rate": 5.754233738190942e-06, + "loss": 26.4446, + "step": 253310 + }, + { + "epoch": 0.5117224271464182, + "grad_norm": 146.52598571777344, + "learning_rate": 5.753888664807452e-06, + "loss": 24.9038, + "step": 253320 + }, + { + "epoch": 0.5117426277791021, + "grad_norm": 275.0461730957031, + "learning_rate": 5.753543587749601e-06, + "loss": 29.9561, + "step": 253330 + }, + { + "epoch": 0.5117628284117859, + "grad_norm": 716.48974609375, + "learning_rate": 5.753198507019068e-06, + "loss": 44.3443, + "step": 253340 + }, + { + "epoch": 0.5117830290444697, + "grad_norm": 387.1690979003906, + "learning_rate": 5.752853422617539e-06, + "loss": 15.3674, + "step": 253350 + }, + { + "epoch": 0.5118032296771535, + "grad_norm": 387.6558837890625, + "learning_rate": 5.752508334546695e-06, + "loss": 16.1463, + "step": 253360 + }, + { + "epoch": 0.5118234303098373, + "grad_norm": 194.35902404785156, + "learning_rate": 5.7521632428082135e-06, + "loss": 13.0076, + "step": 253370 + }, + { + "epoch": 0.5118436309425212, + "grad_norm": 469.7440185546875, + "learning_rate": 5.75181814740378e-06, + "loss": 35.026, + "step": 253380 + }, + { + "epoch": 0.511863831575205, + "grad_norm": 282.68035888671875, + "learning_rate": 5.751473048335078e-06, + "loss": 13.1574, + "step": 253390 + }, + { + "epoch": 0.5118840322078888, + "grad_norm": 79.40476989746094, + "learning_rate": 5.751127945603786e-06, + "loss": 4.7272, + "step": 253400 + }, + { + "epoch": 0.5119042328405725, + "grad_norm": 745.161376953125, + "learning_rate": 5.750782839211588e-06, + "loss": 19.8504, + "step": 253410 + }, + { + "epoch": 0.5119244334732563, + "grad_norm": 469.4684753417969, + "learning_rate": 5.750437729160165e-06, + "loss": 15.7141, + "step": 253420 + }, + { + "epoch": 0.5119446341059402, + "grad_norm": 262.9376220703125, + "learning_rate": 5.7500926154512e-06, + "loss": 9.5011, + "step": 253430 + }, + { + "epoch": 0.511964834738624, + "grad_norm": 455.0301208496094, + "learning_rate": 5.749747498086374e-06, + "loss": 12.2369, + "step": 253440 + }, + { + "epoch": 0.5119850353713078, + "grad_norm": 1525.0181884765625, + "learning_rate": 5.7494023770673705e-06, + "loss": 22.2308, + "step": 253450 + }, + { + "epoch": 0.5120052360039916, + "grad_norm": 812.306640625, + "learning_rate": 5.74905725239587e-06, + "loss": 21.5862, + "step": 253460 + }, + { + "epoch": 0.5120254366366754, + "grad_norm": 190.3341064453125, + "learning_rate": 5.748712124073556e-06, + "loss": 12.51, + "step": 253470 + }, + { + "epoch": 0.5120456372693593, + "grad_norm": 274.8276062011719, + "learning_rate": 5.74836699210211e-06, + "loss": 18.3088, + "step": 253480 + }, + { + "epoch": 0.5120658379020431, + "grad_norm": 347.3299865722656, + "learning_rate": 5.748021856483212e-06, + "loss": 13.7744, + "step": 253490 + }, + { + "epoch": 0.5120860385347269, + "grad_norm": 299.65985107421875, + "learning_rate": 5.747676717218549e-06, + "loss": 15.5967, + "step": 253500 + }, + { + "epoch": 0.5121062391674107, + "grad_norm": 403.5169677734375, + "learning_rate": 5.747331574309798e-06, + "loss": 18.5492, + "step": 253510 + }, + { + "epoch": 0.5121264398000945, + "grad_norm": 1063.2816162109375, + "learning_rate": 5.746986427758645e-06, + "loss": 29.8813, + "step": 253520 + }, + { + "epoch": 0.5121466404327784, + "grad_norm": 6.9237847328186035, + "learning_rate": 5.74664127756677e-06, + "loss": 24.8315, + "step": 253530 + }, + { + "epoch": 0.5121668410654622, + "grad_norm": 442.2906494140625, + "learning_rate": 5.746296123735857e-06, + "loss": 34.304, + "step": 253540 + }, + { + "epoch": 0.512187041698146, + "grad_norm": 857.4563598632812, + "learning_rate": 5.745950966267586e-06, + "loss": 16.9816, + "step": 253550 + }, + { + "epoch": 0.5122072423308298, + "grad_norm": 299.3878173828125, + "learning_rate": 5.745605805163641e-06, + "loss": 19.3526, + "step": 253560 + }, + { + "epoch": 0.5122274429635136, + "grad_norm": 259.07257080078125, + "learning_rate": 5.745260640425704e-06, + "loss": 14.5288, + "step": 253570 + }, + { + "epoch": 0.5122476435961975, + "grad_norm": 36.66910171508789, + "learning_rate": 5.744915472055457e-06, + "loss": 14.4111, + "step": 253580 + }, + { + "epoch": 0.5122678442288813, + "grad_norm": 726.9290161132812, + "learning_rate": 5.744570300054583e-06, + "loss": 28.6098, + "step": 253590 + }, + { + "epoch": 0.5122880448615651, + "grad_norm": 689.5191650390625, + "learning_rate": 5.744225124424762e-06, + "loss": 39.731, + "step": 253600 + }, + { + "epoch": 0.5123082454942489, + "grad_norm": 485.571533203125, + "learning_rate": 5.743879945167678e-06, + "loss": 25.2861, + "step": 253610 + }, + { + "epoch": 0.5123284461269327, + "grad_norm": 499.5935363769531, + "learning_rate": 5.7435347622850146e-06, + "loss": 20.1543, + "step": 253620 + }, + { + "epoch": 0.5123486467596166, + "grad_norm": 41.875450134277344, + "learning_rate": 5.743189575778452e-06, + "loss": 11.8493, + "step": 253630 + }, + { + "epoch": 0.5123688473923004, + "grad_norm": 240.07386779785156, + "learning_rate": 5.742844385649674e-06, + "loss": 19.8759, + "step": 253640 + }, + { + "epoch": 0.5123890480249842, + "grad_norm": 681.5988159179688, + "learning_rate": 5.742499191900364e-06, + "loss": 21.1939, + "step": 253650 + }, + { + "epoch": 0.5124092486576679, + "grad_norm": 275.4928894042969, + "learning_rate": 5.7421539945322006e-06, + "loss": 16.5793, + "step": 253660 + }, + { + "epoch": 0.5124294492903517, + "grad_norm": 194.4099884033203, + "learning_rate": 5.7418087935468706e-06, + "loss": 16.2816, + "step": 253670 + }, + { + "epoch": 0.5124496499230355, + "grad_norm": 355.28955078125, + "learning_rate": 5.741463588946053e-06, + "loss": 12.8818, + "step": 253680 + }, + { + "epoch": 0.5124698505557194, + "grad_norm": 100.03331756591797, + "learning_rate": 5.741118380731432e-06, + "loss": 18.4986, + "step": 253690 + }, + { + "epoch": 0.5124900511884032, + "grad_norm": 741.4827270507812, + "learning_rate": 5.740773168904691e-06, + "loss": 16.5638, + "step": 253700 + }, + { + "epoch": 0.512510251821087, + "grad_norm": 311.86761474609375, + "learning_rate": 5.74042795346751e-06, + "loss": 15.7162, + "step": 253710 + }, + { + "epoch": 0.5125304524537708, + "grad_norm": 492.0791931152344, + "learning_rate": 5.740082734421574e-06, + "loss": 19.1631, + "step": 253720 + }, + { + "epoch": 0.5125506530864546, + "grad_norm": 187.37074279785156, + "learning_rate": 5.7397375117685635e-06, + "loss": 13.953, + "step": 253730 + }, + { + "epoch": 0.5125708537191385, + "grad_norm": 188.18902587890625, + "learning_rate": 5.739392285510162e-06, + "loss": 29.3611, + "step": 253740 + }, + { + "epoch": 0.5125910543518223, + "grad_norm": 133.94537353515625, + "learning_rate": 5.7390470556480545e-06, + "loss": 21.0016, + "step": 253750 + }, + { + "epoch": 0.5126112549845061, + "grad_norm": 392.44189453125, + "learning_rate": 5.7387018221839195e-06, + "loss": 15.3782, + "step": 253760 + }, + { + "epoch": 0.5126314556171899, + "grad_norm": 450.6122131347656, + "learning_rate": 5.738356585119441e-06, + "loss": 15.1867, + "step": 253770 + }, + { + "epoch": 0.5126516562498737, + "grad_norm": 307.326416015625, + "learning_rate": 5.738011344456302e-06, + "loss": 20.9072, + "step": 253780 + }, + { + "epoch": 0.5126718568825576, + "grad_norm": 113.2793197631836, + "learning_rate": 5.737666100196188e-06, + "loss": 9.1972, + "step": 253790 + }, + { + "epoch": 0.5126920575152414, + "grad_norm": 57.68220901489258, + "learning_rate": 5.737320852340776e-06, + "loss": 19.2187, + "step": 253800 + }, + { + "epoch": 0.5127122581479252, + "grad_norm": 539.6422729492188, + "learning_rate": 5.736975600891752e-06, + "loss": 25.7557, + "step": 253810 + }, + { + "epoch": 0.512732458780609, + "grad_norm": 309.9414978027344, + "learning_rate": 5.7366303458507986e-06, + "loss": 18.8251, + "step": 253820 + }, + { + "epoch": 0.5127526594132928, + "grad_norm": 106.2264404296875, + "learning_rate": 5.736285087219599e-06, + "loss": 15.3974, + "step": 253830 + }, + { + "epoch": 0.5127728600459767, + "grad_norm": 1031.8822021484375, + "learning_rate": 5.7359398249998335e-06, + "loss": 18.8748, + "step": 253840 + }, + { + "epoch": 0.5127930606786605, + "grad_norm": 19.903709411621094, + "learning_rate": 5.735594559193187e-06, + "loss": 32.3088, + "step": 253850 + }, + { + "epoch": 0.5128132613113443, + "grad_norm": 494.54180908203125, + "learning_rate": 5.735249289801343e-06, + "loss": 21.3223, + "step": 253860 + }, + { + "epoch": 0.5128334619440281, + "grad_norm": 357.4226379394531, + "learning_rate": 5.734904016825982e-06, + "loss": 13.4894, + "step": 253870 + }, + { + "epoch": 0.5128536625767119, + "grad_norm": 520.6104125976562, + "learning_rate": 5.73455874026879e-06, + "loss": 23.4609, + "step": 253880 + }, + { + "epoch": 0.5128738632093958, + "grad_norm": 293.92706298828125, + "learning_rate": 5.7342134601314445e-06, + "loss": 18.8432, + "step": 253890 + }, + { + "epoch": 0.5128940638420796, + "grad_norm": 383.59796142578125, + "learning_rate": 5.733868176415633e-06, + "loss": 28.6413, + "step": 253900 + }, + { + "epoch": 0.5129142644747634, + "grad_norm": 313.1064758300781, + "learning_rate": 5.733522889123038e-06, + "loss": 14.5493, + "step": 253910 + }, + { + "epoch": 0.5129344651074471, + "grad_norm": 289.10418701171875, + "learning_rate": 5.733177598255341e-06, + "loss": 36.0446, + "step": 253920 + }, + { + "epoch": 0.5129546657401309, + "grad_norm": 383.3257141113281, + "learning_rate": 5.732832303814225e-06, + "loss": 14.9592, + "step": 253930 + }, + { + "epoch": 0.5129748663728148, + "grad_norm": 341.5558166503906, + "learning_rate": 5.7324870058013736e-06, + "loss": 23.8471, + "step": 253940 + }, + { + "epoch": 0.5129950670054986, + "grad_norm": 283.3092346191406, + "learning_rate": 5.732141704218469e-06, + "loss": 25.5903, + "step": 253950 + }, + { + "epoch": 0.5130152676381824, + "grad_norm": 407.0079040527344, + "learning_rate": 5.731796399067194e-06, + "loss": 17.412, + "step": 253960 + }, + { + "epoch": 0.5130354682708662, + "grad_norm": 158.69200134277344, + "learning_rate": 5.731451090349234e-06, + "loss": 22.5824, + "step": 253970 + }, + { + "epoch": 0.51305566890355, + "grad_norm": 172.71466064453125, + "learning_rate": 5.731105778066268e-06, + "loss": 20.4837, + "step": 253980 + }, + { + "epoch": 0.5130758695362339, + "grad_norm": 0.0, + "learning_rate": 5.730760462219983e-06, + "loss": 13.3426, + "step": 253990 + }, + { + "epoch": 0.5130960701689177, + "grad_norm": 682.0313110351562, + "learning_rate": 5.730415142812059e-06, + "loss": 22.8439, + "step": 254000 + }, + { + "epoch": 0.5131162708016015, + "grad_norm": 618.0068969726562, + "learning_rate": 5.73006981984418e-06, + "loss": 21.3711, + "step": 254010 + }, + { + "epoch": 0.5131364714342853, + "grad_norm": 84.22988891601562, + "learning_rate": 5.7297244933180306e-06, + "loss": 12.3084, + "step": 254020 + }, + { + "epoch": 0.5131566720669691, + "grad_norm": 698.24951171875, + "learning_rate": 5.72937916323529e-06, + "loss": 18.7768, + "step": 254030 + }, + { + "epoch": 0.513176872699653, + "grad_norm": 535.3914184570312, + "learning_rate": 5.729033829597646e-06, + "loss": 22.168, + "step": 254040 + }, + { + "epoch": 0.5131970733323368, + "grad_norm": 288.82470703125, + "learning_rate": 5.728688492406778e-06, + "loss": 10.6456, + "step": 254050 + }, + { + "epoch": 0.5132172739650206, + "grad_norm": 397.6792297363281, + "learning_rate": 5.728343151664371e-06, + "loss": 11.5455, + "step": 254060 + }, + { + "epoch": 0.5132374745977044, + "grad_norm": 196.13861083984375, + "learning_rate": 5.727997807372109e-06, + "loss": 24.8786, + "step": 254070 + }, + { + "epoch": 0.5132576752303882, + "grad_norm": 317.0704040527344, + "learning_rate": 5.727652459531674e-06, + "loss": 39.4602, + "step": 254080 + }, + { + "epoch": 0.513277875863072, + "grad_norm": 300.1142883300781, + "learning_rate": 5.727307108144749e-06, + "loss": 16.143, + "step": 254090 + }, + { + "epoch": 0.5132980764957559, + "grad_norm": 767.6620483398438, + "learning_rate": 5.726961753213016e-06, + "loss": 23.4947, + "step": 254100 + }, + { + "epoch": 0.5133182771284397, + "grad_norm": 219.3327178955078, + "learning_rate": 5.726616394738161e-06, + "loss": 16.8684, + "step": 254110 + }, + { + "epoch": 0.5133384777611235, + "grad_norm": 303.88433837890625, + "learning_rate": 5.726271032721864e-06, + "loss": 11.7734, + "step": 254120 + }, + { + "epoch": 0.5133586783938073, + "grad_norm": 467.8100280761719, + "learning_rate": 5.725925667165812e-06, + "loss": 14.3304, + "step": 254130 + }, + { + "epoch": 0.5133788790264912, + "grad_norm": 532.1683349609375, + "learning_rate": 5.725580298071685e-06, + "loss": 16.8608, + "step": 254140 + }, + { + "epoch": 0.513399079659175, + "grad_norm": 19.614660263061523, + "learning_rate": 5.725234925441169e-06, + "loss": 21.0036, + "step": 254150 + }, + { + "epoch": 0.5134192802918588, + "grad_norm": 589.5601196289062, + "learning_rate": 5.724889549275945e-06, + "loss": 21.3069, + "step": 254160 + }, + { + "epoch": 0.5134394809245425, + "grad_norm": 658.7628784179688, + "learning_rate": 5.724544169577697e-06, + "loss": 20.0233, + "step": 254170 + }, + { + "epoch": 0.5134596815572263, + "grad_norm": 92.69924926757812, + "learning_rate": 5.72419878634811e-06, + "loss": 17.4411, + "step": 254180 + }, + { + "epoch": 0.5134798821899101, + "grad_norm": 798.9140014648438, + "learning_rate": 5.7238533995888645e-06, + "loss": 23.9981, + "step": 254190 + }, + { + "epoch": 0.513500082822594, + "grad_norm": 697.9685668945312, + "learning_rate": 5.723508009301646e-06, + "loss": 24.8597, + "step": 254200 + }, + { + "epoch": 0.5135202834552778, + "grad_norm": 155.75975036621094, + "learning_rate": 5.723162615488137e-06, + "loss": 16.6985, + "step": 254210 + }, + { + "epoch": 0.5135404840879616, + "grad_norm": 227.90110778808594, + "learning_rate": 5.722817218150021e-06, + "loss": 14.4569, + "step": 254220 + }, + { + "epoch": 0.5135606847206454, + "grad_norm": 166.38514709472656, + "learning_rate": 5.722471817288982e-06, + "loss": 11.5888, + "step": 254230 + }, + { + "epoch": 0.5135808853533292, + "grad_norm": 557.6221923828125, + "learning_rate": 5.722126412906703e-06, + "loss": 19.4762, + "step": 254240 + }, + { + "epoch": 0.5136010859860131, + "grad_norm": 299.7841491699219, + "learning_rate": 5.721781005004866e-06, + "loss": 23.5099, + "step": 254250 + }, + { + "epoch": 0.5136212866186969, + "grad_norm": 454.24481201171875, + "learning_rate": 5.721435593585158e-06, + "loss": 22.7358, + "step": 254260 + }, + { + "epoch": 0.5136414872513807, + "grad_norm": 813.3714599609375, + "learning_rate": 5.72109017864926e-06, + "loss": 23.3991, + "step": 254270 + }, + { + "epoch": 0.5136616878840645, + "grad_norm": 79.51116943359375, + "learning_rate": 5.720744760198855e-06, + "loss": 25.27, + "step": 254280 + }, + { + "epoch": 0.5136818885167483, + "grad_norm": 695.857177734375, + "learning_rate": 5.720399338235628e-06, + "loss": 23.4554, + "step": 254290 + }, + { + "epoch": 0.5137020891494322, + "grad_norm": 651.18359375, + "learning_rate": 5.720053912761261e-06, + "loss": 27.6443, + "step": 254300 + }, + { + "epoch": 0.513722289782116, + "grad_norm": 587.54345703125, + "learning_rate": 5.719708483777441e-06, + "loss": 17.0722, + "step": 254310 + }, + { + "epoch": 0.5137424904147998, + "grad_norm": 564.2921752929688, + "learning_rate": 5.719363051285847e-06, + "loss": 27.6548, + "step": 254320 + }, + { + "epoch": 0.5137626910474836, + "grad_norm": 407.90386962890625, + "learning_rate": 5.719017615288165e-06, + "loss": 29.3676, + "step": 254330 + }, + { + "epoch": 0.5137828916801674, + "grad_norm": 158.3983612060547, + "learning_rate": 5.718672175786078e-06, + "loss": 11.9976, + "step": 254340 + }, + { + "epoch": 0.5138030923128513, + "grad_norm": 33.34981155395508, + "learning_rate": 5.718326732781271e-06, + "loss": 17.7713, + "step": 254350 + }, + { + "epoch": 0.5138232929455351, + "grad_norm": 328.87640380859375, + "learning_rate": 5.7179812862754265e-06, + "loss": 18.6988, + "step": 254360 + }, + { + "epoch": 0.5138434935782189, + "grad_norm": 370.8194580078125, + "learning_rate": 5.717635836270228e-06, + "loss": 20.8561, + "step": 254370 + }, + { + "epoch": 0.5138636942109027, + "grad_norm": 5.150504112243652, + "learning_rate": 5.71729038276736e-06, + "loss": 18.7786, + "step": 254380 + }, + { + "epoch": 0.5138838948435865, + "grad_norm": 172.74937438964844, + "learning_rate": 5.716944925768505e-06, + "loss": 16.3021, + "step": 254390 + }, + { + "epoch": 0.5139040954762704, + "grad_norm": 236.12741088867188, + "learning_rate": 5.716599465275347e-06, + "loss": 32.3623, + "step": 254400 + }, + { + "epoch": 0.5139242961089542, + "grad_norm": 445.7738037109375, + "learning_rate": 5.716254001289571e-06, + "loss": 22.8093, + "step": 254410 + }, + { + "epoch": 0.513944496741638, + "grad_norm": 161.113525390625, + "learning_rate": 5.7159085338128595e-06, + "loss": 9.8593, + "step": 254420 + }, + { + "epoch": 0.5139646973743217, + "grad_norm": 244.25999450683594, + "learning_rate": 5.7155630628468974e-06, + "loss": 15.116, + "step": 254430 + }, + { + "epoch": 0.5139848980070055, + "grad_norm": 507.08868408203125, + "learning_rate": 5.715217588393367e-06, + "loss": 27.5761, + "step": 254440 + }, + { + "epoch": 0.5140050986396894, + "grad_norm": 587.4240112304688, + "learning_rate": 5.714872110453952e-06, + "loss": 25.517, + "step": 254450 + }, + { + "epoch": 0.5140252992723732, + "grad_norm": 156.8381805419922, + "learning_rate": 5.714526629030338e-06, + "loss": 8.3755, + "step": 254460 + }, + { + "epoch": 0.514045499905057, + "grad_norm": 501.0858154296875, + "learning_rate": 5.714181144124209e-06, + "loss": 12.8682, + "step": 254470 + }, + { + "epoch": 0.5140657005377408, + "grad_norm": 896.33056640625, + "learning_rate": 5.7138356557372444e-06, + "loss": 22.6873, + "step": 254480 + }, + { + "epoch": 0.5140859011704246, + "grad_norm": 536.6281127929688, + "learning_rate": 5.713490163871135e-06, + "loss": 27.9533, + "step": 254490 + }, + { + "epoch": 0.5141061018031085, + "grad_norm": 204.81321716308594, + "learning_rate": 5.7131446685275595e-06, + "loss": 35.4812, + "step": 254500 + }, + { + "epoch": 0.5141263024357923, + "grad_norm": 371.892822265625, + "learning_rate": 5.712799169708203e-06, + "loss": 18.543, + "step": 254510 + }, + { + "epoch": 0.5141465030684761, + "grad_norm": 446.13824462890625, + "learning_rate": 5.71245366741475e-06, + "loss": 20.1133, + "step": 254520 + }, + { + "epoch": 0.5141667037011599, + "grad_norm": 416.196533203125, + "learning_rate": 5.712108161648885e-06, + "loss": 37.1317, + "step": 254530 + }, + { + "epoch": 0.5141869043338437, + "grad_norm": 200.76622009277344, + "learning_rate": 5.7117626524122905e-06, + "loss": 16.0216, + "step": 254540 + }, + { + "epoch": 0.5142071049665276, + "grad_norm": 258.2995300292969, + "learning_rate": 5.711417139706651e-06, + "loss": 14.974, + "step": 254550 + }, + { + "epoch": 0.5142273055992114, + "grad_norm": 272.2379455566406, + "learning_rate": 5.711071623533651e-06, + "loss": 25.0506, + "step": 254560 + }, + { + "epoch": 0.5142475062318952, + "grad_norm": 580.2442626953125, + "learning_rate": 5.710726103894974e-06, + "loss": 17.8842, + "step": 254570 + }, + { + "epoch": 0.514267706864579, + "grad_norm": 308.1773376464844, + "learning_rate": 5.710380580792305e-06, + "loss": 11.4507, + "step": 254580 + }, + { + "epoch": 0.5142879074972628, + "grad_norm": 1430.5892333984375, + "learning_rate": 5.710035054227326e-06, + "loss": 28.2847, + "step": 254590 + }, + { + "epoch": 0.5143081081299467, + "grad_norm": 448.6597900390625, + "learning_rate": 5.709689524201723e-06, + "loss": 10.425, + "step": 254600 + }, + { + "epoch": 0.5143283087626305, + "grad_norm": 723.5948486328125, + "learning_rate": 5.709343990717179e-06, + "loss": 14.4196, + "step": 254610 + }, + { + "epoch": 0.5143485093953143, + "grad_norm": 1252.2452392578125, + "learning_rate": 5.708998453775378e-06, + "loss": 35.5113, + "step": 254620 + }, + { + "epoch": 0.5143687100279981, + "grad_norm": 598.4287109375, + "learning_rate": 5.708652913378005e-06, + "loss": 18.4721, + "step": 254630 + }, + { + "epoch": 0.5143889106606819, + "grad_norm": 101.59346008300781, + "learning_rate": 5.7083073695267435e-06, + "loss": 15.5913, + "step": 254640 + }, + { + "epoch": 0.5144091112933658, + "grad_norm": 389.8990173339844, + "learning_rate": 5.707961822223279e-06, + "loss": 18.3069, + "step": 254650 + }, + { + "epoch": 0.5144293119260496, + "grad_norm": 328.4405517578125, + "learning_rate": 5.707616271469293e-06, + "loss": 31.6081, + "step": 254660 + }, + { + "epoch": 0.5144495125587334, + "grad_norm": 305.9642639160156, + "learning_rate": 5.707270717266471e-06, + "loss": 27.7813, + "step": 254670 + }, + { + "epoch": 0.5144697131914172, + "grad_norm": 509.5992126464844, + "learning_rate": 5.7069251596164975e-06, + "loss": 23.0576, + "step": 254680 + }, + { + "epoch": 0.5144899138241009, + "grad_norm": 334.6846008300781, + "learning_rate": 5.706579598521058e-06, + "loss": 11.9694, + "step": 254690 + }, + { + "epoch": 0.5145101144567847, + "grad_norm": 301.08782958984375, + "learning_rate": 5.706234033981835e-06, + "loss": 18.8511, + "step": 254700 + }, + { + "epoch": 0.5145303150894686, + "grad_norm": 750.560302734375, + "learning_rate": 5.705888466000511e-06, + "loss": 27.2115, + "step": 254710 + }, + { + "epoch": 0.5145505157221524, + "grad_norm": 107.52172088623047, + "learning_rate": 5.705542894578773e-06, + "loss": 23.0592, + "step": 254720 + }, + { + "epoch": 0.5145707163548362, + "grad_norm": 131.147216796875, + "learning_rate": 5.705197319718304e-06, + "loss": 42.1324, + "step": 254730 + }, + { + "epoch": 0.51459091698752, + "grad_norm": 138.3438262939453, + "learning_rate": 5.704851741420792e-06, + "loss": 14.9189, + "step": 254740 + }, + { + "epoch": 0.5146111176202038, + "grad_norm": 681.9591064453125, + "learning_rate": 5.704506159687914e-06, + "loss": 29.1809, + "step": 254750 + }, + { + "epoch": 0.5146313182528877, + "grad_norm": 560.5260620117188, + "learning_rate": 5.7041605745213605e-06, + "loss": 41.0378, + "step": 254760 + }, + { + "epoch": 0.5146515188855715, + "grad_norm": 606.7002563476562, + "learning_rate": 5.703814985922813e-06, + "loss": 19.3052, + "step": 254770 + }, + { + "epoch": 0.5146717195182553, + "grad_norm": 437.7874755859375, + "learning_rate": 5.703469393893957e-06, + "loss": 18.2557, + "step": 254780 + }, + { + "epoch": 0.5146919201509391, + "grad_norm": 73.53416442871094, + "learning_rate": 5.7031237984364776e-06, + "loss": 17.7876, + "step": 254790 + }, + { + "epoch": 0.5147121207836229, + "grad_norm": 738.1157836914062, + "learning_rate": 5.702778199552055e-06, + "loss": 31.4386, + "step": 254800 + }, + { + "epoch": 0.5147323214163068, + "grad_norm": 198.610107421875, + "learning_rate": 5.7024325972423795e-06, + "loss": 14.7151, + "step": 254810 + }, + { + "epoch": 0.5147525220489906, + "grad_norm": 172.3389434814453, + "learning_rate": 5.702086991509133e-06, + "loss": 15.3419, + "step": 254820 + }, + { + "epoch": 0.5147727226816744, + "grad_norm": 172.26531982421875, + "learning_rate": 5.701741382353998e-06, + "loss": 14.9749, + "step": 254830 + }, + { + "epoch": 0.5147929233143582, + "grad_norm": 178.2786407470703, + "learning_rate": 5.70139576977866e-06, + "loss": 17.5487, + "step": 254840 + }, + { + "epoch": 0.514813123947042, + "grad_norm": 287.3568115234375, + "learning_rate": 5.701050153784806e-06, + "loss": 11.0949, + "step": 254850 + }, + { + "epoch": 0.5148333245797259, + "grad_norm": 710.2113647460938, + "learning_rate": 5.7007045343741176e-06, + "loss": 31.9813, + "step": 254860 + }, + { + "epoch": 0.5148535252124097, + "grad_norm": 251.64366149902344, + "learning_rate": 5.70035891154828e-06, + "loss": 27.172, + "step": 254870 + }, + { + "epoch": 0.5148737258450935, + "grad_norm": 385.65277099609375, + "learning_rate": 5.700013285308979e-06, + "loss": 36.05, + "step": 254880 + }, + { + "epoch": 0.5148939264777773, + "grad_norm": 385.455322265625, + "learning_rate": 5.699667655657898e-06, + "loss": 17.7484, + "step": 254890 + }, + { + "epoch": 0.5149141271104611, + "grad_norm": 1162.52734375, + "learning_rate": 5.6993220225967214e-06, + "loss": 33.4321, + "step": 254900 + }, + { + "epoch": 0.514934327743145, + "grad_norm": 420.1855163574219, + "learning_rate": 5.698976386127133e-06, + "loss": 17.1253, + "step": 254910 + }, + { + "epoch": 0.5149545283758288, + "grad_norm": 322.5575256347656, + "learning_rate": 5.69863074625082e-06, + "loss": 10.0489, + "step": 254920 + }, + { + "epoch": 0.5149747290085126, + "grad_norm": 43.054256439208984, + "learning_rate": 5.6982851029694645e-06, + "loss": 19.1232, + "step": 254930 + }, + { + "epoch": 0.5149949296411963, + "grad_norm": 0.0, + "learning_rate": 5.697939456284753e-06, + "loss": 14.9083, + "step": 254940 + }, + { + "epoch": 0.5150151302738801, + "grad_norm": 12.182699203491211, + "learning_rate": 5.697593806198369e-06, + "loss": 22.7151, + "step": 254950 + }, + { + "epoch": 0.515035330906564, + "grad_norm": 2079.461669921875, + "learning_rate": 5.697248152711997e-06, + "loss": 34.4558, + "step": 254960 + }, + { + "epoch": 0.5150555315392478, + "grad_norm": 351.67364501953125, + "learning_rate": 5.696902495827323e-06, + "loss": 21.6426, + "step": 254970 + }, + { + "epoch": 0.5150757321719316, + "grad_norm": 834.896240234375, + "learning_rate": 5.69655683554603e-06, + "loss": 34.1588, + "step": 254980 + }, + { + "epoch": 0.5150959328046154, + "grad_norm": 592.2312622070312, + "learning_rate": 5.6962111718698035e-06, + "loss": 13.6797, + "step": 254990 + }, + { + "epoch": 0.5151161334372992, + "grad_norm": 207.23638916015625, + "learning_rate": 5.695865504800328e-06, + "loss": 16.1975, + "step": 255000 + }, + { + "epoch": 0.515136334069983, + "grad_norm": 103.85392761230469, + "learning_rate": 5.695519834339288e-06, + "loss": 22.8707, + "step": 255010 + }, + { + "epoch": 0.5151565347026669, + "grad_norm": 413.5932922363281, + "learning_rate": 5.695174160488369e-06, + "loss": 15.2407, + "step": 255020 + }, + { + "epoch": 0.5151767353353507, + "grad_norm": 397.1807861328125, + "learning_rate": 5.694828483249257e-06, + "loss": 20.2696, + "step": 255030 + }, + { + "epoch": 0.5151969359680345, + "grad_norm": 159.94862365722656, + "learning_rate": 5.694482802623634e-06, + "loss": 10.7502, + "step": 255040 + }, + { + "epoch": 0.5152171366007183, + "grad_norm": 633.3711547851562, + "learning_rate": 5.694137118613185e-06, + "loss": 21.5194, + "step": 255050 + }, + { + "epoch": 0.5152373372334021, + "grad_norm": 14.009786605834961, + "learning_rate": 5.693791431219599e-06, + "loss": 13.4572, + "step": 255060 + }, + { + "epoch": 0.515257537866086, + "grad_norm": 236.19485473632812, + "learning_rate": 5.693445740444554e-06, + "loss": 24.037, + "step": 255070 + }, + { + "epoch": 0.5152777384987698, + "grad_norm": 209.95321655273438, + "learning_rate": 5.693100046289741e-06, + "loss": 25.8267, + "step": 255080 + }, + { + "epoch": 0.5152979391314536, + "grad_norm": 211.92384338378906, + "learning_rate": 5.692754348756841e-06, + "loss": 10.0126, + "step": 255090 + }, + { + "epoch": 0.5153181397641374, + "grad_norm": 791.6719360351562, + "learning_rate": 5.692408647847542e-06, + "loss": 30.305, + "step": 255100 + }, + { + "epoch": 0.5153383403968212, + "grad_norm": 304.9013671875, + "learning_rate": 5.692062943563525e-06, + "loss": 17.5825, + "step": 255110 + }, + { + "epoch": 0.5153585410295051, + "grad_norm": 293.64532470703125, + "learning_rate": 5.691717235906479e-06, + "loss": 18.0099, + "step": 255120 + }, + { + "epoch": 0.5153787416621889, + "grad_norm": 357.52215576171875, + "learning_rate": 5.691371524878087e-06, + "loss": 16.9815, + "step": 255130 + }, + { + "epoch": 0.5153989422948727, + "grad_norm": 311.4905090332031, + "learning_rate": 5.6910258104800335e-06, + "loss": 13.2764, + "step": 255140 + }, + { + "epoch": 0.5154191429275565, + "grad_norm": 135.3758544921875, + "learning_rate": 5.690680092714004e-06, + "loss": 14.3897, + "step": 255150 + }, + { + "epoch": 0.5154393435602403, + "grad_norm": 468.88287353515625, + "learning_rate": 5.690334371581683e-06, + "loss": 23.5022, + "step": 255160 + }, + { + "epoch": 0.5154595441929242, + "grad_norm": 467.7669372558594, + "learning_rate": 5.689988647084756e-06, + "loss": 30.0204, + "step": 255170 + }, + { + "epoch": 0.515479744825608, + "grad_norm": 1561.510009765625, + "learning_rate": 5.6896429192249085e-06, + "loss": 23.8157, + "step": 255180 + }, + { + "epoch": 0.5154999454582918, + "grad_norm": 206.41876220703125, + "learning_rate": 5.689297188003826e-06, + "loss": 16.2573, + "step": 255190 + }, + { + "epoch": 0.5155201460909755, + "grad_norm": 300.7821044921875, + "learning_rate": 5.68895145342319e-06, + "loss": 16.7788, + "step": 255200 + }, + { + "epoch": 0.5155403467236593, + "grad_norm": 885.2864379882812, + "learning_rate": 5.688605715484691e-06, + "loss": 13.9834, + "step": 255210 + }, + { + "epoch": 0.5155605473563432, + "grad_norm": 438.3362731933594, + "learning_rate": 5.68825997419001e-06, + "loss": 12.7685, + "step": 255220 + }, + { + "epoch": 0.515580747989027, + "grad_norm": 236.73117065429688, + "learning_rate": 5.687914229540833e-06, + "loss": 19.0074, + "step": 255230 + }, + { + "epoch": 0.5156009486217108, + "grad_norm": 315.9634094238281, + "learning_rate": 5.6875684815388475e-06, + "loss": 25.4186, + "step": 255240 + }, + { + "epoch": 0.5156211492543946, + "grad_norm": 386.8504943847656, + "learning_rate": 5.687222730185733e-06, + "loss": 19.1703, + "step": 255250 + }, + { + "epoch": 0.5156413498870784, + "grad_norm": 126.34518432617188, + "learning_rate": 5.686876975483182e-06, + "loss": 16.9827, + "step": 255260 + }, + { + "epoch": 0.5156615505197623, + "grad_norm": 511.374267578125, + "learning_rate": 5.686531217432873e-06, + "loss": 30.4377, + "step": 255270 + }, + { + "epoch": 0.5156817511524461, + "grad_norm": 253.56813049316406, + "learning_rate": 5.686185456036496e-06, + "loss": 10.2572, + "step": 255280 + }, + { + "epoch": 0.5157019517851299, + "grad_norm": 184.04869079589844, + "learning_rate": 5.685839691295734e-06, + "loss": 14.3178, + "step": 255290 + }, + { + "epoch": 0.5157221524178137, + "grad_norm": 416.50103759765625, + "learning_rate": 5.685493923212273e-06, + "loss": 26.5885, + "step": 255300 + }, + { + "epoch": 0.5157423530504975, + "grad_norm": 392.1858215332031, + "learning_rate": 5.685148151787796e-06, + "loss": 11.0449, + "step": 255310 + }, + { + "epoch": 0.5157625536831814, + "grad_norm": 315.25482177734375, + "learning_rate": 5.684802377023991e-06, + "loss": 18.649, + "step": 255320 + }, + { + "epoch": 0.5157827543158652, + "grad_norm": 436.5318603515625, + "learning_rate": 5.684456598922542e-06, + "loss": 20.3775, + "step": 255330 + }, + { + "epoch": 0.515802954948549, + "grad_norm": 343.0895690917969, + "learning_rate": 5.684110817485135e-06, + "loss": 19.2569, + "step": 255340 + }, + { + "epoch": 0.5158231555812328, + "grad_norm": 271.3983459472656, + "learning_rate": 5.683765032713455e-06, + "loss": 17.9499, + "step": 255350 + }, + { + "epoch": 0.5158433562139166, + "grad_norm": 192.4423065185547, + "learning_rate": 5.683419244609185e-06, + "loss": 18.4982, + "step": 255360 + }, + { + "epoch": 0.5158635568466005, + "grad_norm": 201.5713348388672, + "learning_rate": 5.683073453174016e-06, + "loss": 18.923, + "step": 255370 + }, + { + "epoch": 0.5158837574792843, + "grad_norm": 523.779541015625, + "learning_rate": 5.682727658409628e-06, + "loss": 10.2711, + "step": 255380 + }, + { + "epoch": 0.5159039581119681, + "grad_norm": 304.9748840332031, + "learning_rate": 5.682381860317708e-06, + "loss": 15.609, + "step": 255390 + }, + { + "epoch": 0.5159241587446519, + "grad_norm": 253.09251403808594, + "learning_rate": 5.682036058899942e-06, + "loss": 22.8758, + "step": 255400 + }, + { + "epoch": 0.5159443593773357, + "grad_norm": 494.39862060546875, + "learning_rate": 5.681690254158015e-06, + "loss": 20.438, + "step": 255410 + }, + { + "epoch": 0.5159645600100196, + "grad_norm": 597.162353515625, + "learning_rate": 5.681344446093613e-06, + "loss": 33.4464, + "step": 255420 + }, + { + "epoch": 0.5159847606427034, + "grad_norm": 52.22596740722656, + "learning_rate": 5.680998634708419e-06, + "loss": 23.1126, + "step": 255430 + }, + { + "epoch": 0.5160049612753872, + "grad_norm": 234.77084350585938, + "learning_rate": 5.6806528200041226e-06, + "loss": 16.2024, + "step": 255440 + }, + { + "epoch": 0.5160251619080709, + "grad_norm": 235.65931701660156, + "learning_rate": 5.680307001982405e-06, + "loss": 7.5566, + "step": 255450 + }, + { + "epoch": 0.5160453625407547, + "grad_norm": 200.08096313476562, + "learning_rate": 5.679961180644954e-06, + "loss": 16.4467, + "step": 255460 + }, + { + "epoch": 0.5160655631734385, + "grad_norm": 535.2581787109375, + "learning_rate": 5.679615355993455e-06, + "loss": 19.9863, + "step": 255470 + }, + { + "epoch": 0.5160857638061224, + "grad_norm": 243.5124053955078, + "learning_rate": 5.679269528029593e-06, + "loss": 23.1248, + "step": 255480 + }, + { + "epoch": 0.5161059644388062, + "grad_norm": 613.87939453125, + "learning_rate": 5.678923696755054e-06, + "loss": 19.6434, + "step": 255490 + }, + { + "epoch": 0.51612616507149, + "grad_norm": 233.92079162597656, + "learning_rate": 5.678577862171523e-06, + "loss": 10.7584, + "step": 255500 + }, + { + "epoch": 0.5161463657041738, + "grad_norm": 133.3798065185547, + "learning_rate": 5.678232024280687e-06, + "loss": 27.0341, + "step": 255510 + }, + { + "epoch": 0.5161665663368576, + "grad_norm": 252.52610778808594, + "learning_rate": 5.677886183084227e-06, + "loss": 16.9974, + "step": 255520 + }, + { + "epoch": 0.5161867669695415, + "grad_norm": 406.6451416015625, + "learning_rate": 5.677540338583836e-06, + "loss": 15.9542, + "step": 255530 + }, + { + "epoch": 0.5162069676022253, + "grad_norm": 301.5099182128906, + "learning_rate": 5.677194490781192e-06, + "loss": 21.4673, + "step": 255540 + }, + { + "epoch": 0.5162271682349091, + "grad_norm": 574.8076171875, + "learning_rate": 5.676848639677987e-06, + "loss": 29.9033, + "step": 255550 + }, + { + "epoch": 0.5162473688675929, + "grad_norm": 0.0, + "learning_rate": 5.6765027852759015e-06, + "loss": 18.6978, + "step": 255560 + }, + { + "epoch": 0.5162675695002767, + "grad_norm": 9.07258415222168, + "learning_rate": 5.6761569275766246e-06, + "loss": 11.8524, + "step": 255570 + }, + { + "epoch": 0.5162877701329606, + "grad_norm": 361.3944396972656, + "learning_rate": 5.675811066581842e-06, + "loss": 19.791, + "step": 255580 + }, + { + "epoch": 0.5163079707656444, + "grad_norm": 601.8685913085938, + "learning_rate": 5.675465202293238e-06, + "loss": 31.5645, + "step": 255590 + }, + { + "epoch": 0.5163281713983282, + "grad_norm": 343.0535888671875, + "learning_rate": 5.675119334712496e-06, + "loss": 13.8398, + "step": 255600 + }, + { + "epoch": 0.516348372031012, + "grad_norm": 982.5330810546875, + "learning_rate": 5.674773463841306e-06, + "loss": 23.0152, + "step": 255610 + }, + { + "epoch": 0.5163685726636958, + "grad_norm": 190.50270080566406, + "learning_rate": 5.674427589681353e-06, + "loss": 30.673, + "step": 255620 + }, + { + "epoch": 0.5163887732963797, + "grad_norm": 3.095463275909424, + "learning_rate": 5.674081712234319e-06, + "loss": 23.7618, + "step": 255630 + }, + { + "epoch": 0.5164089739290635, + "grad_norm": 694.7069091796875, + "learning_rate": 5.6737358315018954e-06, + "loss": 13.362, + "step": 255640 + }, + { + "epoch": 0.5164291745617473, + "grad_norm": 39.145450592041016, + "learning_rate": 5.673389947485763e-06, + "loss": 24.3825, + "step": 255650 + }, + { + "epoch": 0.5164493751944311, + "grad_norm": 281.1506042480469, + "learning_rate": 5.673044060187612e-06, + "loss": 13.3317, + "step": 255660 + }, + { + "epoch": 0.516469575827115, + "grad_norm": 504.1978454589844, + "learning_rate": 5.672698169609125e-06, + "loss": 18.3809, + "step": 255670 + }, + { + "epoch": 0.5164897764597988, + "grad_norm": 1009.9043579101562, + "learning_rate": 5.672352275751986e-06, + "loss": 21.8472, + "step": 255680 + }, + { + "epoch": 0.5165099770924826, + "grad_norm": 262.5923767089844, + "learning_rate": 5.672006378617887e-06, + "loss": 13.7169, + "step": 255690 + }, + { + "epoch": 0.5165301777251664, + "grad_norm": 264.2748718261719, + "learning_rate": 5.671660478208508e-06, + "loss": 15.8185, + "step": 255700 + }, + { + "epoch": 0.5165503783578501, + "grad_norm": 118.52084350585938, + "learning_rate": 5.671314574525539e-06, + "loss": 35.465, + "step": 255710 + }, + { + "epoch": 0.5165705789905339, + "grad_norm": 80.22938537597656, + "learning_rate": 5.670968667570663e-06, + "loss": 24.3935, + "step": 255720 + }, + { + "epoch": 0.5165907796232178, + "grad_norm": 654.9724731445312, + "learning_rate": 5.670622757345567e-06, + "loss": 39.5951, + "step": 255730 + }, + { + "epoch": 0.5166109802559016, + "grad_norm": 1020.5020141601562, + "learning_rate": 5.670276843851939e-06, + "loss": 21.9963, + "step": 255740 + }, + { + "epoch": 0.5166311808885854, + "grad_norm": 456.8036804199219, + "learning_rate": 5.6699309270914615e-06, + "loss": 14.9634, + "step": 255750 + }, + { + "epoch": 0.5166513815212692, + "grad_norm": 238.6225128173828, + "learning_rate": 5.669585007065822e-06, + "loss": 33.4033, + "step": 255760 + }, + { + "epoch": 0.516671582153953, + "grad_norm": 328.130615234375, + "learning_rate": 5.669239083776705e-06, + "loss": 13.8304, + "step": 255770 + }, + { + "epoch": 0.5166917827866369, + "grad_norm": 489.61151123046875, + "learning_rate": 5.6688931572258e-06, + "loss": 45.9713, + "step": 255780 + }, + { + "epoch": 0.5167119834193207, + "grad_norm": 192.0399169921875, + "learning_rate": 5.66854722741479e-06, + "loss": 13.0071, + "step": 255790 + }, + { + "epoch": 0.5167321840520045, + "grad_norm": 159.27377319335938, + "learning_rate": 5.668201294345363e-06, + "loss": 10.714, + "step": 255800 + }, + { + "epoch": 0.5167523846846883, + "grad_norm": 150.52395629882812, + "learning_rate": 5.667855358019203e-06, + "loss": 34.7655, + "step": 255810 + }, + { + "epoch": 0.5167725853173721, + "grad_norm": 546.414306640625, + "learning_rate": 5.667509418437996e-06, + "loss": 18.7668, + "step": 255820 + }, + { + "epoch": 0.516792785950056, + "grad_norm": 1164.419677734375, + "learning_rate": 5.66716347560343e-06, + "loss": 19.0089, + "step": 255830 + }, + { + "epoch": 0.5168129865827398, + "grad_norm": 578.8226928710938, + "learning_rate": 5.66681752951719e-06, + "loss": 11.6692, + "step": 255840 + }, + { + "epoch": 0.5168331872154236, + "grad_norm": 773.8395385742188, + "learning_rate": 5.666471580180963e-06, + "loss": 20.3244, + "step": 255850 + }, + { + "epoch": 0.5168533878481074, + "grad_norm": 224.7852020263672, + "learning_rate": 5.666125627596433e-06, + "loss": 26.6597, + "step": 255860 + }, + { + "epoch": 0.5168735884807912, + "grad_norm": 71.54790496826172, + "learning_rate": 5.665779671765289e-06, + "loss": 14.9398, + "step": 255870 + }, + { + "epoch": 0.516893789113475, + "grad_norm": 738.0344848632812, + "learning_rate": 5.665433712689214e-06, + "loss": 24.7984, + "step": 255880 + }, + { + "epoch": 0.5169139897461589, + "grad_norm": 74.12384033203125, + "learning_rate": 5.665087750369898e-06, + "loss": 14.5951, + "step": 255890 + }, + { + "epoch": 0.5169341903788427, + "grad_norm": 331.020263671875, + "learning_rate": 5.6647417848090225e-06, + "loss": 18.2839, + "step": 255900 + }, + { + "epoch": 0.5169543910115265, + "grad_norm": 277.72784423828125, + "learning_rate": 5.664395816008277e-06, + "loss": 17.7138, + "step": 255910 + }, + { + "epoch": 0.5169745916442103, + "grad_norm": 527.2340087890625, + "learning_rate": 5.664049843969348e-06, + "loss": 14.3661, + "step": 255920 + }, + { + "epoch": 0.5169947922768942, + "grad_norm": 615.870849609375, + "learning_rate": 5.66370386869392e-06, + "loss": 28.072, + "step": 255930 + }, + { + "epoch": 0.517014992909578, + "grad_norm": 428.3338317871094, + "learning_rate": 5.663357890183679e-06, + "loss": 18.4579, + "step": 255940 + }, + { + "epoch": 0.5170351935422618, + "grad_norm": 350.446533203125, + "learning_rate": 5.6630119084403125e-06, + "loss": 32.2803, + "step": 255950 + }, + { + "epoch": 0.5170553941749455, + "grad_norm": 884.24853515625, + "learning_rate": 5.662665923465508e-06, + "loss": 39.4761, + "step": 255960 + }, + { + "epoch": 0.5170755948076293, + "grad_norm": 3960.321044921875, + "learning_rate": 5.662319935260947e-06, + "loss": 31.6966, + "step": 255970 + }, + { + "epoch": 0.5170957954403131, + "grad_norm": 258.8409729003906, + "learning_rate": 5.661973943828321e-06, + "loss": 19.2896, + "step": 255980 + }, + { + "epoch": 0.517115996072997, + "grad_norm": 458.2825927734375, + "learning_rate": 5.661627949169315e-06, + "loss": 37.7257, + "step": 255990 + }, + { + "epoch": 0.5171361967056808, + "grad_norm": 535.2298583984375, + "learning_rate": 5.661281951285613e-06, + "loss": 11.7949, + "step": 256000 + }, + { + "epoch": 0.5171563973383646, + "grad_norm": 327.1224670410156, + "learning_rate": 5.660935950178904e-06, + "loss": 23.6848, + "step": 256010 + }, + { + "epoch": 0.5171765979710484, + "grad_norm": 125.35787963867188, + "learning_rate": 5.660589945850872e-06, + "loss": 11.1664, + "step": 256020 + }, + { + "epoch": 0.5171967986037322, + "grad_norm": 385.37615966796875, + "learning_rate": 5.660243938303206e-06, + "loss": 20.3847, + "step": 256030 + }, + { + "epoch": 0.5172169992364161, + "grad_norm": 309.416259765625, + "learning_rate": 5.659897927537591e-06, + "loss": 15.0541, + "step": 256040 + }, + { + "epoch": 0.5172371998690999, + "grad_norm": 349.4208679199219, + "learning_rate": 5.659551913555713e-06, + "loss": 15.7538, + "step": 256050 + }, + { + "epoch": 0.5172574005017837, + "grad_norm": 311.5920104980469, + "learning_rate": 5.659205896359259e-06, + "loss": 18.7777, + "step": 256060 + }, + { + "epoch": 0.5172776011344675, + "grad_norm": 543.2024536132812, + "learning_rate": 5.658859875949916e-06, + "loss": 22.1736, + "step": 256070 + }, + { + "epoch": 0.5172978017671513, + "grad_norm": 397.7626953125, + "learning_rate": 5.65851385232937e-06, + "loss": 24.9859, + "step": 256080 + }, + { + "epoch": 0.5173180023998352, + "grad_norm": 277.24700927734375, + "learning_rate": 5.658167825499306e-06, + "loss": 15.9078, + "step": 256090 + }, + { + "epoch": 0.517338203032519, + "grad_norm": 455.8639221191406, + "learning_rate": 5.657821795461413e-06, + "loss": 21.7814, + "step": 256100 + }, + { + "epoch": 0.5173584036652028, + "grad_norm": 209.8695068359375, + "learning_rate": 5.657475762217376e-06, + "loss": 21.7525, + "step": 256110 + }, + { + "epoch": 0.5173786042978866, + "grad_norm": 203.76258850097656, + "learning_rate": 5.657129725768883e-06, + "loss": 18.5085, + "step": 256120 + }, + { + "epoch": 0.5173988049305704, + "grad_norm": 13.486207008361816, + "learning_rate": 5.656783686117617e-06, + "loss": 10.8061, + "step": 256130 + }, + { + "epoch": 0.5174190055632543, + "grad_norm": 496.200927734375, + "learning_rate": 5.656437643265269e-06, + "loss": 27.5956, + "step": 256140 + }, + { + "epoch": 0.5174392061959381, + "grad_norm": 521.7025756835938, + "learning_rate": 5.656091597213523e-06, + "loss": 20.9313, + "step": 256150 + }, + { + "epoch": 0.5174594068286219, + "grad_norm": 312.62640380859375, + "learning_rate": 5.655745547964067e-06, + "loss": 13.854, + "step": 256160 + }, + { + "epoch": 0.5174796074613057, + "grad_norm": 339.16986083984375, + "learning_rate": 5.6553994955185846e-06, + "loss": 54.9214, + "step": 256170 + }, + { + "epoch": 0.5174998080939895, + "grad_norm": 616.1905517578125, + "learning_rate": 5.655053439878766e-06, + "loss": 28.8726, + "step": 256180 + }, + { + "epoch": 0.5175200087266734, + "grad_norm": 497.0537414550781, + "learning_rate": 5.654707381046296e-06, + "loss": 18.8176, + "step": 256190 + }, + { + "epoch": 0.5175402093593572, + "grad_norm": 904.5796508789062, + "learning_rate": 5.654361319022862e-06, + "loss": 18.2005, + "step": 256200 + }, + { + "epoch": 0.517560409992041, + "grad_norm": 452.16131591796875, + "learning_rate": 5.65401525381015e-06, + "loss": 32.7201, + "step": 256210 + }, + { + "epoch": 0.5175806106247247, + "grad_norm": 398.46832275390625, + "learning_rate": 5.653669185409847e-06, + "loss": 10.5908, + "step": 256220 + }, + { + "epoch": 0.5176008112574085, + "grad_norm": 341.22216796875, + "learning_rate": 5.653323113823639e-06, + "loss": 18.9073, + "step": 256230 + }, + { + "epoch": 0.5176210118900924, + "grad_norm": 2.6971940994262695, + "learning_rate": 5.652977039053213e-06, + "loss": 23.6222, + "step": 256240 + }, + { + "epoch": 0.5176412125227762, + "grad_norm": 532.8876342773438, + "learning_rate": 5.65263096110026e-06, + "loss": 16.4939, + "step": 256250 + }, + { + "epoch": 0.51766141315546, + "grad_norm": 805.7850952148438, + "learning_rate": 5.652284879966459e-06, + "loss": 17.0429, + "step": 256260 + }, + { + "epoch": 0.5176816137881438, + "grad_norm": 589.1388549804688, + "learning_rate": 5.651938795653501e-06, + "loss": 18.2808, + "step": 256270 + }, + { + "epoch": 0.5177018144208276, + "grad_norm": 146.6456756591797, + "learning_rate": 5.651592708163074e-06, + "loss": 10.7991, + "step": 256280 + }, + { + "epoch": 0.5177220150535115, + "grad_norm": 174.4547576904297, + "learning_rate": 5.651246617496861e-06, + "loss": 17.1768, + "step": 256290 + }, + { + "epoch": 0.5177422156861953, + "grad_norm": 425.0965270996094, + "learning_rate": 5.650900523656553e-06, + "loss": 13.5376, + "step": 256300 + }, + { + "epoch": 0.5177624163188791, + "grad_norm": 299.1045227050781, + "learning_rate": 5.6505544266438325e-06, + "loss": 18.0883, + "step": 256310 + }, + { + "epoch": 0.5177826169515629, + "grad_norm": 0.0, + "learning_rate": 5.650208326460392e-06, + "loss": 20.7979, + "step": 256320 + }, + { + "epoch": 0.5178028175842467, + "grad_norm": 253.4033966064453, + "learning_rate": 5.649862223107913e-06, + "loss": 27.3647, + "step": 256330 + }, + { + "epoch": 0.5178230182169306, + "grad_norm": 298.28375244140625, + "learning_rate": 5.6495161165880826e-06, + "loss": 12.8182, + "step": 256340 + }, + { + "epoch": 0.5178432188496144, + "grad_norm": 451.9802551269531, + "learning_rate": 5.649170006902592e-06, + "loss": 26.2502, + "step": 256350 + }, + { + "epoch": 0.5178634194822982, + "grad_norm": 655.0781860351562, + "learning_rate": 5.6488238940531256e-06, + "loss": 16.0616, + "step": 256360 + }, + { + "epoch": 0.517883620114982, + "grad_norm": 150.7916259765625, + "learning_rate": 5.648477778041369e-06, + "loss": 17.0442, + "step": 256370 + }, + { + "epoch": 0.5179038207476658, + "grad_norm": 264.4485168457031, + "learning_rate": 5.6481316588690105e-06, + "loss": 24.9096, + "step": 256380 + }, + { + "epoch": 0.5179240213803497, + "grad_norm": 297.2204895019531, + "learning_rate": 5.647785536537737e-06, + "loss": 14.4927, + "step": 256390 + }, + { + "epoch": 0.5179442220130335, + "grad_norm": 497.6724548339844, + "learning_rate": 5.647439411049235e-06, + "loss": 20.867, + "step": 256400 + }, + { + "epoch": 0.5179644226457173, + "grad_norm": 389.5704650878906, + "learning_rate": 5.647093282405194e-06, + "loss": 29.3018, + "step": 256410 + }, + { + "epoch": 0.5179846232784011, + "grad_norm": 0.0, + "learning_rate": 5.646747150607297e-06, + "loss": 15.364, + "step": 256420 + }, + { + "epoch": 0.5180048239110849, + "grad_norm": 279.0824890136719, + "learning_rate": 5.646401015657232e-06, + "loss": 14.5491, + "step": 256430 + }, + { + "epoch": 0.5180250245437688, + "grad_norm": 250.67369079589844, + "learning_rate": 5.646054877556688e-06, + "loss": 17.5029, + "step": 256440 + }, + { + "epoch": 0.5180452251764526, + "grad_norm": 55.99262237548828, + "learning_rate": 5.6457087363073505e-06, + "loss": 20.637, + "step": 256450 + }, + { + "epoch": 0.5180654258091364, + "grad_norm": 783.3364868164062, + "learning_rate": 5.645362591910908e-06, + "loss": 21.0268, + "step": 256460 + }, + { + "epoch": 0.5180856264418202, + "grad_norm": 461.7561950683594, + "learning_rate": 5.645016444369045e-06, + "loss": 25.593, + "step": 256470 + }, + { + "epoch": 0.5181058270745039, + "grad_norm": 461.1914978027344, + "learning_rate": 5.644670293683451e-06, + "loss": 12.1642, + "step": 256480 + }, + { + "epoch": 0.5181260277071877, + "grad_norm": 238.78421020507812, + "learning_rate": 5.6443241398558115e-06, + "loss": 18.0042, + "step": 256490 + }, + { + "epoch": 0.5181462283398716, + "grad_norm": 340.0740051269531, + "learning_rate": 5.643977982887815e-06, + "loss": 20.3984, + "step": 256500 + }, + { + "epoch": 0.5181664289725554, + "grad_norm": 660.90087890625, + "learning_rate": 5.643631822781147e-06, + "loss": 19.8156, + "step": 256510 + }, + { + "epoch": 0.5181866296052392, + "grad_norm": 591.1057739257812, + "learning_rate": 5.643285659537496e-06, + "loss": 33.449, + "step": 256520 + }, + { + "epoch": 0.518206830237923, + "grad_norm": 96.46746063232422, + "learning_rate": 5.64293949315855e-06, + "loss": 70.5526, + "step": 256530 + }, + { + "epoch": 0.5182270308706068, + "grad_norm": 258.6355285644531, + "learning_rate": 5.642593323645993e-06, + "loss": 13.3898, + "step": 256540 + }, + { + "epoch": 0.5182472315032907, + "grad_norm": 210.60028076171875, + "learning_rate": 5.642247151001515e-06, + "loss": 17.1673, + "step": 256550 + }, + { + "epoch": 0.5182674321359745, + "grad_norm": 407.2357177734375, + "learning_rate": 5.6419009752268015e-06, + "loss": 14.4717, + "step": 256560 + }, + { + "epoch": 0.5182876327686583, + "grad_norm": 554.6689453125, + "learning_rate": 5.641554796323543e-06, + "loss": 16.5293, + "step": 256570 + }, + { + "epoch": 0.5183078334013421, + "grad_norm": 238.739501953125, + "learning_rate": 5.641208614293421e-06, + "loss": 9.929, + "step": 256580 + }, + { + "epoch": 0.5183280340340259, + "grad_norm": 374.9056091308594, + "learning_rate": 5.640862429138128e-06, + "loss": 28.3125, + "step": 256590 + }, + { + "epoch": 0.5183482346667098, + "grad_norm": 441.8193664550781, + "learning_rate": 5.640516240859348e-06, + "loss": 24.4329, + "step": 256600 + }, + { + "epoch": 0.5183684352993936, + "grad_norm": 262.81146240234375, + "learning_rate": 5.64017004945877e-06, + "loss": 26.0223, + "step": 256610 + }, + { + "epoch": 0.5183886359320774, + "grad_norm": 374.5070495605469, + "learning_rate": 5.639823854938082e-06, + "loss": 24.7278, + "step": 256620 + }, + { + "epoch": 0.5184088365647612, + "grad_norm": 157.4990234375, + "learning_rate": 5.639477657298968e-06, + "loss": 9.6328, + "step": 256630 + }, + { + "epoch": 0.518429037197445, + "grad_norm": 170.0662078857422, + "learning_rate": 5.639131456543119e-06, + "loss": 15.2021, + "step": 256640 + }, + { + "epoch": 0.5184492378301289, + "grad_norm": 92.25371551513672, + "learning_rate": 5.63878525267222e-06, + "loss": 14.6738, + "step": 256650 + }, + { + "epoch": 0.5184694384628127, + "grad_norm": 490.31964111328125, + "learning_rate": 5.63843904568796e-06, + "loss": 10.2476, + "step": 256660 + }, + { + "epoch": 0.5184896390954965, + "grad_norm": 584.7594604492188, + "learning_rate": 5.638092835592024e-06, + "loss": 20.0524, + "step": 256670 + }, + { + "epoch": 0.5185098397281803, + "grad_norm": 375.0758361816406, + "learning_rate": 5.637746622386102e-06, + "loss": 19.8257, + "step": 256680 + }, + { + "epoch": 0.5185300403608641, + "grad_norm": 136.49530029296875, + "learning_rate": 5.637400406071881e-06, + "loss": 14.1598, + "step": 256690 + }, + { + "epoch": 0.518550240993548, + "grad_norm": 609.0701293945312, + "learning_rate": 5.6370541866510476e-06, + "loss": 20.9267, + "step": 256700 + }, + { + "epoch": 0.5185704416262318, + "grad_norm": 675.3168334960938, + "learning_rate": 5.6367079641252874e-06, + "loss": 20.4015, + "step": 256710 + }, + { + "epoch": 0.5185906422589156, + "grad_norm": 60.84113693237305, + "learning_rate": 5.636361738496291e-06, + "loss": 9.445, + "step": 256720 + }, + { + "epoch": 0.5186108428915993, + "grad_norm": 318.3699645996094, + "learning_rate": 5.636015509765747e-06, + "loss": 18.0882, + "step": 256730 + }, + { + "epoch": 0.5186310435242831, + "grad_norm": 500.5754089355469, + "learning_rate": 5.6356692779353365e-06, + "loss": 21.8586, + "step": 256740 + }, + { + "epoch": 0.518651244156967, + "grad_norm": 176.42054748535156, + "learning_rate": 5.635323043006753e-06, + "loss": 13.9599, + "step": 256750 + }, + { + "epoch": 0.5186714447896508, + "grad_norm": 24.978870391845703, + "learning_rate": 5.634976804981682e-06, + "loss": 27.3057, + "step": 256760 + }, + { + "epoch": 0.5186916454223346, + "grad_norm": 2007.6275634765625, + "learning_rate": 5.634630563861811e-06, + "loss": 40.0491, + "step": 256770 + }, + { + "epoch": 0.5187118460550184, + "grad_norm": 627.5809326171875, + "learning_rate": 5.634284319648827e-06, + "loss": 43.1722, + "step": 256780 + }, + { + "epoch": 0.5187320466877022, + "grad_norm": 529.4752197265625, + "learning_rate": 5.633938072344419e-06, + "loss": 34.4752, + "step": 256790 + }, + { + "epoch": 0.518752247320386, + "grad_norm": 513.8336791992188, + "learning_rate": 5.633591821950274e-06, + "loss": 27.9681, + "step": 256800 + }, + { + "epoch": 0.5187724479530699, + "grad_norm": 1000.4723510742188, + "learning_rate": 5.633245568468079e-06, + "loss": 41.3979, + "step": 256810 + }, + { + "epoch": 0.5187926485857537, + "grad_norm": 423.302978515625, + "learning_rate": 5.6328993118995215e-06, + "loss": 27.1045, + "step": 256820 + }, + { + "epoch": 0.5188128492184375, + "grad_norm": 366.0521240234375, + "learning_rate": 5.632553052246289e-06, + "loss": 11.7155, + "step": 256830 + }, + { + "epoch": 0.5188330498511213, + "grad_norm": 580.5076293945312, + "learning_rate": 5.6322067895100705e-06, + "loss": 14.904, + "step": 256840 + }, + { + "epoch": 0.5188532504838052, + "grad_norm": 498.6604919433594, + "learning_rate": 5.631860523692553e-06, + "loss": 16.6257, + "step": 256850 + }, + { + "epoch": 0.518873451116489, + "grad_norm": 164.28042602539062, + "learning_rate": 5.631514254795424e-06, + "loss": 17.0511, + "step": 256860 + }, + { + "epoch": 0.5188936517491728, + "grad_norm": 600.585693359375, + "learning_rate": 5.6311679828203706e-06, + "loss": 22.245, + "step": 256870 + }, + { + "epoch": 0.5189138523818566, + "grad_norm": 828.4389038085938, + "learning_rate": 5.630821707769081e-06, + "loss": 16.5592, + "step": 256880 + }, + { + "epoch": 0.5189340530145404, + "grad_norm": 26.64689064025879, + "learning_rate": 5.630475429643244e-06, + "loss": 21.9752, + "step": 256890 + }, + { + "epoch": 0.5189542536472243, + "grad_norm": 557.02734375, + "learning_rate": 5.630129148444543e-06, + "loss": 10.7585, + "step": 256900 + }, + { + "epoch": 0.5189744542799081, + "grad_norm": 202.89645385742188, + "learning_rate": 5.629782864174672e-06, + "loss": 7.845, + "step": 256910 + }, + { + "epoch": 0.5189946549125919, + "grad_norm": 397.0294494628906, + "learning_rate": 5.629436576835315e-06, + "loss": 18.1145, + "step": 256920 + }, + { + "epoch": 0.5190148555452757, + "grad_norm": 602.2669677734375, + "learning_rate": 5.6290902864281605e-06, + "loss": 28.0946, + "step": 256930 + }, + { + "epoch": 0.5190350561779595, + "grad_norm": 387.1973571777344, + "learning_rate": 5.628743992954896e-06, + "loss": 17.2151, + "step": 256940 + }, + { + "epoch": 0.5190552568106434, + "grad_norm": 123.08863830566406, + "learning_rate": 5.62839769641721e-06, + "loss": 22.2356, + "step": 256950 + }, + { + "epoch": 0.5190754574433272, + "grad_norm": 201.61253356933594, + "learning_rate": 5.6280513968167895e-06, + "loss": 23.8974, + "step": 256960 + }, + { + "epoch": 0.519095658076011, + "grad_norm": 435.9090270996094, + "learning_rate": 5.627705094155322e-06, + "loss": 21.9942, + "step": 256970 + }, + { + "epoch": 0.5191158587086948, + "grad_norm": 895.2130126953125, + "learning_rate": 5.627358788434497e-06, + "loss": 24.3738, + "step": 256980 + }, + { + "epoch": 0.5191360593413785, + "grad_norm": 663.9862670898438, + "learning_rate": 5.627012479656001e-06, + "loss": 33.8452, + "step": 256990 + }, + { + "epoch": 0.5191562599740623, + "grad_norm": 660.0105590820312, + "learning_rate": 5.626666167821522e-06, + "loss": 35.4031, + "step": 257000 + }, + { + "epoch": 0.5191764606067462, + "grad_norm": 354.1170959472656, + "learning_rate": 5.626319852932748e-06, + "loss": 47.348, + "step": 257010 + }, + { + "epoch": 0.51919666123943, + "grad_norm": 278.7491760253906, + "learning_rate": 5.625973534991368e-06, + "loss": 22.948, + "step": 257020 + }, + { + "epoch": 0.5192168618721138, + "grad_norm": 220.43814086914062, + "learning_rate": 5.625627213999067e-06, + "loss": 16.3012, + "step": 257030 + }, + { + "epoch": 0.5192370625047976, + "grad_norm": 676.5736694335938, + "learning_rate": 5.6252808899575375e-06, + "loss": 12.6801, + "step": 257040 + }, + { + "epoch": 0.5192572631374814, + "grad_norm": 198.6671142578125, + "learning_rate": 5.624934562868463e-06, + "loss": 19.5471, + "step": 257050 + }, + { + "epoch": 0.5192774637701653, + "grad_norm": 668.9926147460938, + "learning_rate": 5.624588232733533e-06, + "loss": 18.102, + "step": 257060 + }, + { + "epoch": 0.5192976644028491, + "grad_norm": 286.4676818847656, + "learning_rate": 5.624241899554437e-06, + "loss": 18.2227, + "step": 257070 + }, + { + "epoch": 0.5193178650355329, + "grad_norm": 109.82481384277344, + "learning_rate": 5.62389556333286e-06, + "loss": 22.1474, + "step": 257080 + }, + { + "epoch": 0.5193380656682167, + "grad_norm": 107.15418243408203, + "learning_rate": 5.623549224070494e-06, + "loss": 21.2177, + "step": 257090 + }, + { + "epoch": 0.5193582663009005, + "grad_norm": 214.92942810058594, + "learning_rate": 5.623202881769023e-06, + "loss": 13.6254, + "step": 257100 + }, + { + "epoch": 0.5193784669335844, + "grad_norm": 227.6421356201172, + "learning_rate": 5.622856536430137e-06, + "loss": 11.4191, + "step": 257110 + }, + { + "epoch": 0.5193986675662682, + "grad_norm": 725.64453125, + "learning_rate": 5.622510188055523e-06, + "loss": 27.8473, + "step": 257120 + }, + { + "epoch": 0.519418868198952, + "grad_norm": 737.1107177734375, + "learning_rate": 5.622163836646871e-06, + "loss": 38.0646, + "step": 257130 + }, + { + "epoch": 0.5194390688316358, + "grad_norm": 222.0409698486328, + "learning_rate": 5.621817482205868e-06, + "loss": 34.0105, + "step": 257140 + }, + { + "epoch": 0.5194592694643196, + "grad_norm": 802.1209716796875, + "learning_rate": 5.6214711247342015e-06, + "loss": 18.7768, + "step": 257150 + }, + { + "epoch": 0.5194794700970035, + "grad_norm": 191.45761108398438, + "learning_rate": 5.621124764233561e-06, + "loss": 29.7747, + "step": 257160 + }, + { + "epoch": 0.5194996707296873, + "grad_norm": 81.80042266845703, + "learning_rate": 5.620778400705632e-06, + "loss": 18.1627, + "step": 257170 + }, + { + "epoch": 0.5195198713623711, + "grad_norm": 382.63897705078125, + "learning_rate": 5.620432034152107e-06, + "loss": 8.3434, + "step": 257180 + }, + { + "epoch": 0.5195400719950549, + "grad_norm": 583.813720703125, + "learning_rate": 5.620085664574668e-06, + "loss": 20.5988, + "step": 257190 + }, + { + "epoch": 0.5195602726277387, + "grad_norm": 727.8671264648438, + "learning_rate": 5.6197392919750095e-06, + "loss": 23.7689, + "step": 257200 + }, + { + "epoch": 0.5195804732604226, + "grad_norm": 570.191162109375, + "learning_rate": 5.619392916354815e-06, + "loss": 23.2146, + "step": 257210 + }, + { + "epoch": 0.5196006738931064, + "grad_norm": 453.07550048828125, + "learning_rate": 5.619046537715776e-06, + "loss": 19.9638, + "step": 257220 + }, + { + "epoch": 0.5196208745257902, + "grad_norm": 162.64599609375, + "learning_rate": 5.61870015605958e-06, + "loss": 25.7242, + "step": 257230 + }, + { + "epoch": 0.5196410751584739, + "grad_norm": 451.34259033203125, + "learning_rate": 5.618353771387912e-06, + "loss": 47.1894, + "step": 257240 + }, + { + "epoch": 0.5196612757911577, + "grad_norm": 460.22637939453125, + "learning_rate": 5.618007383702464e-06, + "loss": 18.1238, + "step": 257250 + }, + { + "epoch": 0.5196814764238415, + "grad_norm": 284.9989318847656, + "learning_rate": 5.617660993004923e-06, + "loss": 22.8709, + "step": 257260 + }, + { + "epoch": 0.5197016770565254, + "grad_norm": 375.36834716796875, + "learning_rate": 5.617314599296977e-06, + "loss": 9.8315, + "step": 257270 + }, + { + "epoch": 0.5197218776892092, + "grad_norm": 505.88555908203125, + "learning_rate": 5.616968202580315e-06, + "loss": 12.7147, + "step": 257280 + }, + { + "epoch": 0.519742078321893, + "grad_norm": 341.1786804199219, + "learning_rate": 5.6166218028566246e-06, + "loss": 19.2148, + "step": 257290 + }, + { + "epoch": 0.5197622789545768, + "grad_norm": 212.43516540527344, + "learning_rate": 5.616275400127594e-06, + "loss": 11.8919, + "step": 257300 + }, + { + "epoch": 0.5197824795872606, + "grad_norm": 330.664306640625, + "learning_rate": 5.615928994394913e-06, + "loss": 17.9222, + "step": 257310 + }, + { + "epoch": 0.5198026802199445, + "grad_norm": 320.563232421875, + "learning_rate": 5.615582585660266e-06, + "loss": 22.1219, + "step": 257320 + }, + { + "epoch": 0.5198228808526283, + "grad_norm": 721.0665893554688, + "learning_rate": 5.615236173925347e-06, + "loss": 30.3213, + "step": 257330 + }, + { + "epoch": 0.5198430814853121, + "grad_norm": 286.6113586425781, + "learning_rate": 5.61488975919184e-06, + "loss": 21.4331, + "step": 257340 + }, + { + "epoch": 0.5198632821179959, + "grad_norm": 349.9152526855469, + "learning_rate": 5.6145433414614345e-06, + "loss": 17.5125, + "step": 257350 + }, + { + "epoch": 0.5198834827506797, + "grad_norm": 214.19204711914062, + "learning_rate": 5.614196920735822e-06, + "loss": 14.9003, + "step": 257360 + }, + { + "epoch": 0.5199036833833636, + "grad_norm": 443.8629150390625, + "learning_rate": 5.613850497016687e-06, + "loss": 26.6429, + "step": 257370 + }, + { + "epoch": 0.5199238840160474, + "grad_norm": 262.8409423828125, + "learning_rate": 5.613504070305717e-06, + "loss": 12.8079, + "step": 257380 + }, + { + "epoch": 0.5199440846487312, + "grad_norm": 423.7279357910156, + "learning_rate": 5.613157640604605e-06, + "loss": 34.1807, + "step": 257390 + }, + { + "epoch": 0.519964285281415, + "grad_norm": 560.2799072265625, + "learning_rate": 5.612811207915034e-06, + "loss": 20.5849, + "step": 257400 + }, + { + "epoch": 0.5199844859140988, + "grad_norm": 783.8386840820312, + "learning_rate": 5.6124647722386996e-06, + "loss": 32.4919, + "step": 257410 + }, + { + "epoch": 0.5200046865467827, + "grad_norm": 269.1947326660156, + "learning_rate": 5.612118333577283e-06, + "loss": 27.8402, + "step": 257420 + }, + { + "epoch": 0.5200248871794665, + "grad_norm": 212.03346252441406, + "learning_rate": 5.611771891932477e-06, + "loss": 12.0659, + "step": 257430 + }, + { + "epoch": 0.5200450878121503, + "grad_norm": 248.64772033691406, + "learning_rate": 5.611425447305969e-06, + "loss": 22.6025, + "step": 257440 + }, + { + "epoch": 0.5200652884448341, + "grad_norm": 114.89862823486328, + "learning_rate": 5.611078999699448e-06, + "loss": 15.9903, + "step": 257450 + }, + { + "epoch": 0.520085489077518, + "grad_norm": 453.12615966796875, + "learning_rate": 5.6107325491146024e-06, + "loss": 25.1079, + "step": 257460 + }, + { + "epoch": 0.5201056897102018, + "grad_norm": 623.2423706054688, + "learning_rate": 5.61038609555312e-06, + "loss": 25.0207, + "step": 257470 + }, + { + "epoch": 0.5201258903428856, + "grad_norm": 266.9727478027344, + "learning_rate": 5.610039639016689e-06, + "loss": 18.0565, + "step": 257480 + }, + { + "epoch": 0.5201460909755694, + "grad_norm": 0.0, + "learning_rate": 5.609693179506999e-06, + "loss": 21.2829, + "step": 257490 + }, + { + "epoch": 0.5201662916082531, + "grad_norm": 68.14176940917969, + "learning_rate": 5.609346717025738e-06, + "loss": 34.4678, + "step": 257500 + }, + { + "epoch": 0.5201864922409369, + "grad_norm": 354.24346923828125, + "learning_rate": 5.609000251574596e-06, + "loss": 15.353, + "step": 257510 + }, + { + "epoch": 0.5202066928736208, + "grad_norm": 228.03439331054688, + "learning_rate": 5.60865378315526e-06, + "loss": 22.66, + "step": 257520 + }, + { + "epoch": 0.5202268935063046, + "grad_norm": 407.65802001953125, + "learning_rate": 5.6083073117694186e-06, + "loss": 16.2957, + "step": 257530 + }, + { + "epoch": 0.5202470941389884, + "grad_norm": 619.2686767578125, + "learning_rate": 5.607960837418763e-06, + "loss": 17.9244, + "step": 257540 + }, + { + "epoch": 0.5202672947716722, + "grad_norm": 528.3983764648438, + "learning_rate": 5.6076143601049795e-06, + "loss": 24.1809, + "step": 257550 + }, + { + "epoch": 0.520287495404356, + "grad_norm": 326.4344177246094, + "learning_rate": 5.607267879829757e-06, + "loss": 15.3803, + "step": 257560 + }, + { + "epoch": 0.5203076960370399, + "grad_norm": 7.018338680267334, + "learning_rate": 5.606921396594785e-06, + "loss": 22.5545, + "step": 257570 + }, + { + "epoch": 0.5203278966697237, + "grad_norm": 53.98643112182617, + "learning_rate": 5.60657491040175e-06, + "loss": 47.3502, + "step": 257580 + }, + { + "epoch": 0.5203480973024075, + "grad_norm": 408.90850830078125, + "learning_rate": 5.606228421252344e-06, + "loss": 22.2837, + "step": 257590 + }, + { + "epoch": 0.5203682979350913, + "grad_norm": 629.2796630859375, + "learning_rate": 5.605881929148254e-06, + "loss": 19.7562, + "step": 257600 + }, + { + "epoch": 0.5203884985677751, + "grad_norm": 234.08175659179688, + "learning_rate": 5.605535434091168e-06, + "loss": 16.3395, + "step": 257610 + }, + { + "epoch": 0.520408699200459, + "grad_norm": 240.43798828125, + "learning_rate": 5.605188936082776e-06, + "loss": 13.7598, + "step": 257620 + }, + { + "epoch": 0.5204288998331428, + "grad_norm": 423.3980712890625, + "learning_rate": 5.604842435124769e-06, + "loss": 20.0048, + "step": 257630 + }, + { + "epoch": 0.5204491004658266, + "grad_norm": 493.5419616699219, + "learning_rate": 5.604495931218831e-06, + "loss": 16.8068, + "step": 257640 + }, + { + "epoch": 0.5204693010985104, + "grad_norm": 586.6019287109375, + "learning_rate": 5.604149424366653e-06, + "loss": 25.5807, + "step": 257650 + }, + { + "epoch": 0.5204895017311942, + "grad_norm": 421.7281494140625, + "learning_rate": 5.603802914569924e-06, + "loss": 13.6986, + "step": 257660 + }, + { + "epoch": 0.5205097023638781, + "grad_norm": 160.74330139160156, + "learning_rate": 5.603456401830333e-06, + "loss": 21.5895, + "step": 257670 + }, + { + "epoch": 0.5205299029965619, + "grad_norm": 458.26141357421875, + "learning_rate": 5.60310988614957e-06, + "loss": 19.0367, + "step": 257680 + }, + { + "epoch": 0.5205501036292457, + "grad_norm": 532.3012084960938, + "learning_rate": 5.60276336752932e-06, + "loss": 21.3122, + "step": 257690 + }, + { + "epoch": 0.5205703042619295, + "grad_norm": 474.4313049316406, + "learning_rate": 5.6024168459712765e-06, + "loss": 13.374, + "step": 257700 + }, + { + "epoch": 0.5205905048946133, + "grad_norm": 104.30779266357422, + "learning_rate": 5.602070321477126e-06, + "loss": 22.9658, + "step": 257710 + }, + { + "epoch": 0.5206107055272972, + "grad_norm": 296.1563720703125, + "learning_rate": 5.601723794048558e-06, + "loss": 18.1674, + "step": 257720 + }, + { + "epoch": 0.520630906159981, + "grad_norm": 171.5693817138672, + "learning_rate": 5.601377263687262e-06, + "loss": 22.7732, + "step": 257730 + }, + { + "epoch": 0.5206511067926648, + "grad_norm": 214.3036651611328, + "learning_rate": 5.601030730394923e-06, + "loss": 29.8944, + "step": 257740 + }, + { + "epoch": 0.5206713074253486, + "grad_norm": 532.0447998046875, + "learning_rate": 5.600684194173236e-06, + "loss": 12.5308, + "step": 257750 + }, + { + "epoch": 0.5206915080580323, + "grad_norm": 768.5252075195312, + "learning_rate": 5.600337655023887e-06, + "loss": 39.9223, + "step": 257760 + }, + { + "epoch": 0.5207117086907161, + "grad_norm": 529.4053344726562, + "learning_rate": 5.599991112948564e-06, + "loss": 15.0716, + "step": 257770 + }, + { + "epoch": 0.5207319093234, + "grad_norm": 635.5668334960938, + "learning_rate": 5.5996445679489566e-06, + "loss": 20.5109, + "step": 257780 + }, + { + "epoch": 0.5207521099560838, + "grad_norm": 27.236709594726562, + "learning_rate": 5.599298020026757e-06, + "loss": 24.8813, + "step": 257790 + }, + { + "epoch": 0.5207723105887676, + "grad_norm": 298.06182861328125, + "learning_rate": 5.598951469183649e-06, + "loss": 23.0239, + "step": 257800 + }, + { + "epoch": 0.5207925112214514, + "grad_norm": 210.44068908691406, + "learning_rate": 5.598604915421324e-06, + "loss": 12.4049, + "step": 257810 + }, + { + "epoch": 0.5208127118541352, + "grad_norm": 376.73248291015625, + "learning_rate": 5.598258358741472e-06, + "loss": 13.9753, + "step": 257820 + }, + { + "epoch": 0.5208329124868191, + "grad_norm": 513.9283447265625, + "learning_rate": 5.597911799145781e-06, + "loss": 19.6141, + "step": 257830 + }, + { + "epoch": 0.5208531131195029, + "grad_norm": 832.7177734375, + "learning_rate": 5.597565236635942e-06, + "loss": 30.1937, + "step": 257840 + }, + { + "epoch": 0.5208733137521867, + "grad_norm": 361.26300048828125, + "learning_rate": 5.59721867121364e-06, + "loss": 21.9525, + "step": 257850 + }, + { + "epoch": 0.5208935143848705, + "grad_norm": 328.2436828613281, + "learning_rate": 5.596872102880568e-06, + "loss": 18.5472, + "step": 257860 + }, + { + "epoch": 0.5209137150175543, + "grad_norm": 127.17230987548828, + "learning_rate": 5.596525531638415e-06, + "loss": 15.9013, + "step": 257870 + }, + { + "epoch": 0.5209339156502382, + "grad_norm": 309.1257019042969, + "learning_rate": 5.596178957488867e-06, + "loss": 28.6147, + "step": 257880 + }, + { + "epoch": 0.520954116282922, + "grad_norm": 60.98568344116211, + "learning_rate": 5.595832380433616e-06, + "loss": 9.7927, + "step": 257890 + }, + { + "epoch": 0.5209743169156058, + "grad_norm": 233.31597900390625, + "learning_rate": 5.59548580047435e-06, + "loss": 19.158, + "step": 257900 + }, + { + "epoch": 0.5209945175482896, + "grad_norm": 370.2422790527344, + "learning_rate": 5.595139217612758e-06, + "loss": 26.8181, + "step": 257910 + }, + { + "epoch": 0.5210147181809734, + "grad_norm": 410.9178771972656, + "learning_rate": 5.59479263185053e-06, + "loss": 16.782, + "step": 257920 + }, + { + "epoch": 0.5210349188136573, + "grad_norm": 408.39849853515625, + "learning_rate": 5.594446043189355e-06, + "loss": 15.3456, + "step": 257930 + }, + { + "epoch": 0.5210551194463411, + "grad_norm": 505.7781066894531, + "learning_rate": 5.594099451630921e-06, + "loss": 16.3654, + "step": 257940 + }, + { + "epoch": 0.5210753200790249, + "grad_norm": 962.6716918945312, + "learning_rate": 5.593752857176921e-06, + "loss": 21.7308, + "step": 257950 + }, + { + "epoch": 0.5210955207117087, + "grad_norm": 455.1202697753906, + "learning_rate": 5.593406259829038e-06, + "loss": 14.5062, + "step": 257960 + }, + { + "epoch": 0.5211157213443925, + "grad_norm": 300.18402099609375, + "learning_rate": 5.593059659588968e-06, + "loss": 19.0189, + "step": 257970 + }, + { + "epoch": 0.5211359219770764, + "grad_norm": 220.6542205810547, + "learning_rate": 5.592713056458395e-06, + "loss": 18.6487, + "step": 257980 + }, + { + "epoch": 0.5211561226097602, + "grad_norm": 323.6950378417969, + "learning_rate": 5.592366450439012e-06, + "loss": 25.2693, + "step": 257990 + }, + { + "epoch": 0.521176323242444, + "grad_norm": 262.5322570800781, + "learning_rate": 5.592019841532507e-06, + "loss": 13.7469, + "step": 258000 + }, + { + "epoch": 0.5211965238751277, + "grad_norm": 505.59637451171875, + "learning_rate": 5.591673229740566e-06, + "loss": 23.9838, + "step": 258010 + }, + { + "epoch": 0.5212167245078115, + "grad_norm": 244.86148071289062, + "learning_rate": 5.591326615064885e-06, + "loss": 17.9718, + "step": 258020 + }, + { + "epoch": 0.5212369251404954, + "grad_norm": 1824.36767578125, + "learning_rate": 5.590979997507146e-06, + "loss": 37.4517, + "step": 258030 + }, + { + "epoch": 0.5212571257731792, + "grad_norm": 532.5745239257812, + "learning_rate": 5.590633377069046e-06, + "loss": 17.079, + "step": 258040 + }, + { + "epoch": 0.521277326405863, + "grad_norm": 624.6132202148438, + "learning_rate": 5.590286753752269e-06, + "loss": 18.441, + "step": 258050 + }, + { + "epoch": 0.5212975270385468, + "grad_norm": 184.2586212158203, + "learning_rate": 5.5899401275585064e-06, + "loss": 17.4711, + "step": 258060 + }, + { + "epoch": 0.5213177276712306, + "grad_norm": 7.946119785308838, + "learning_rate": 5.5895934984894476e-06, + "loss": 18.3772, + "step": 258070 + }, + { + "epoch": 0.5213379283039145, + "grad_norm": 750.5645141601562, + "learning_rate": 5.58924686654678e-06, + "loss": 22.892, + "step": 258080 + }, + { + "epoch": 0.5213581289365983, + "grad_norm": 154.44918823242188, + "learning_rate": 5.588900231732196e-06, + "loss": 16.0328, + "step": 258090 + }, + { + "epoch": 0.5213783295692821, + "grad_norm": 418.48748779296875, + "learning_rate": 5.588553594047382e-06, + "loss": 19.5818, + "step": 258100 + }, + { + "epoch": 0.5213985302019659, + "grad_norm": 445.0689392089844, + "learning_rate": 5.5882069534940305e-06, + "loss": 22.2834, + "step": 258110 + }, + { + "epoch": 0.5214187308346497, + "grad_norm": 94.60790252685547, + "learning_rate": 5.58786031007383e-06, + "loss": 11.85, + "step": 258120 + }, + { + "epoch": 0.5214389314673336, + "grad_norm": 231.06190490722656, + "learning_rate": 5.5875136637884695e-06, + "loss": 13.5067, + "step": 258130 + }, + { + "epoch": 0.5214591321000174, + "grad_norm": 347.8343200683594, + "learning_rate": 5.587167014639638e-06, + "loss": 9.9655, + "step": 258140 + }, + { + "epoch": 0.5214793327327012, + "grad_norm": 226.5662078857422, + "learning_rate": 5.5868203626290266e-06, + "loss": 23.8623, + "step": 258150 + }, + { + "epoch": 0.521499533365385, + "grad_norm": 258.6246337890625, + "learning_rate": 5.586473707758322e-06, + "loss": 20.4882, + "step": 258160 + }, + { + "epoch": 0.5215197339980688, + "grad_norm": 378.86688232421875, + "learning_rate": 5.586127050029218e-06, + "loss": 20.8964, + "step": 258170 + }, + { + "epoch": 0.5215399346307527, + "grad_norm": 51.07099533081055, + "learning_rate": 5.585780389443401e-06, + "loss": 27.5344, + "step": 258180 + }, + { + "epoch": 0.5215601352634365, + "grad_norm": 400.8778076171875, + "learning_rate": 5.58543372600256e-06, + "loss": 15.4608, + "step": 258190 + }, + { + "epoch": 0.5215803358961203, + "grad_norm": 472.527099609375, + "learning_rate": 5.585087059708389e-06, + "loss": 24.3184, + "step": 258200 + }, + { + "epoch": 0.5216005365288041, + "grad_norm": 138.24754333496094, + "learning_rate": 5.584740390562572e-06, + "loss": 7.7216, + "step": 258210 + }, + { + "epoch": 0.5216207371614879, + "grad_norm": 337.6424560546875, + "learning_rate": 5.584393718566802e-06, + "loss": 21.9638, + "step": 258220 + }, + { + "epoch": 0.5216409377941718, + "grad_norm": 606.82763671875, + "learning_rate": 5.584047043722768e-06, + "loss": 36.9224, + "step": 258230 + }, + { + "epoch": 0.5216611384268556, + "grad_norm": 577.6422729492188, + "learning_rate": 5.5837003660321596e-06, + "loss": 22.4856, + "step": 258240 + }, + { + "epoch": 0.5216813390595394, + "grad_norm": 38.453495025634766, + "learning_rate": 5.5833536854966665e-06, + "loss": 9.996, + "step": 258250 + }, + { + "epoch": 0.5217015396922232, + "grad_norm": 347.631103515625, + "learning_rate": 5.5830070021179785e-06, + "loss": 18.9333, + "step": 258260 + }, + { + "epoch": 0.5217217403249069, + "grad_norm": 794.9881591796875, + "learning_rate": 5.582660315897785e-06, + "loss": 32.6643, + "step": 258270 + }, + { + "epoch": 0.5217419409575907, + "grad_norm": 156.62416076660156, + "learning_rate": 5.582313626837776e-06, + "loss": 26.8639, + "step": 258280 + }, + { + "epoch": 0.5217621415902746, + "grad_norm": 418.0146789550781, + "learning_rate": 5.58196693493964e-06, + "loss": 18.6138, + "step": 258290 + }, + { + "epoch": 0.5217823422229584, + "grad_norm": 332.8797607421875, + "learning_rate": 5.581620240205068e-06, + "loss": 21.2172, + "step": 258300 + }, + { + "epoch": 0.5218025428556422, + "grad_norm": 186.88479614257812, + "learning_rate": 5.58127354263575e-06, + "loss": 23.2312, + "step": 258310 + }, + { + "epoch": 0.521822743488326, + "grad_norm": 295.6640319824219, + "learning_rate": 5.580926842233375e-06, + "loss": 14.8955, + "step": 258320 + }, + { + "epoch": 0.5218429441210098, + "grad_norm": 284.3543395996094, + "learning_rate": 5.580580138999633e-06, + "loss": 25.2832, + "step": 258330 + }, + { + "epoch": 0.5218631447536937, + "grad_norm": 323.582275390625, + "learning_rate": 5.580233432936215e-06, + "loss": 18.7778, + "step": 258340 + }, + { + "epoch": 0.5218833453863775, + "grad_norm": 8.183572769165039, + "learning_rate": 5.5798867240448075e-06, + "loss": 13.8316, + "step": 258350 + }, + { + "epoch": 0.5219035460190613, + "grad_norm": 312.5469970703125, + "learning_rate": 5.579540012327103e-06, + "loss": 15.7613, + "step": 258360 + }, + { + "epoch": 0.5219237466517451, + "grad_norm": 963.30712890625, + "learning_rate": 5.579193297784792e-06, + "loss": 15.6505, + "step": 258370 + }, + { + "epoch": 0.5219439472844289, + "grad_norm": 3.0414092540740967, + "learning_rate": 5.578846580419562e-06, + "loss": 15.5307, + "step": 258380 + }, + { + "epoch": 0.5219641479171128, + "grad_norm": 202.86471557617188, + "learning_rate": 5.578499860233104e-06, + "loss": 23.9264, + "step": 258390 + }, + { + "epoch": 0.5219843485497966, + "grad_norm": 354.16278076171875, + "learning_rate": 5.578153137227109e-06, + "loss": 10.0166, + "step": 258400 + }, + { + "epoch": 0.5220045491824804, + "grad_norm": 265.00823974609375, + "learning_rate": 5.577806411403265e-06, + "loss": 5.5652, + "step": 258410 + }, + { + "epoch": 0.5220247498151642, + "grad_norm": 117.40001678466797, + "learning_rate": 5.577459682763262e-06, + "loss": 16.3928, + "step": 258420 + }, + { + "epoch": 0.522044950447848, + "grad_norm": 643.47607421875, + "learning_rate": 5.577112951308792e-06, + "loss": 29.5794, + "step": 258430 + }, + { + "epoch": 0.5220651510805319, + "grad_norm": 251.23094177246094, + "learning_rate": 5.576766217041541e-06, + "loss": 15.4696, + "step": 258440 + }, + { + "epoch": 0.5220853517132157, + "grad_norm": 486.6929626464844, + "learning_rate": 5.576419479963204e-06, + "loss": 24.7433, + "step": 258450 + }, + { + "epoch": 0.5221055523458995, + "grad_norm": 480.2086486816406, + "learning_rate": 5.576072740075467e-06, + "loss": 14.7398, + "step": 258460 + }, + { + "epoch": 0.5221257529785833, + "grad_norm": 373.065673828125, + "learning_rate": 5.575725997380023e-06, + "loss": 25.3467, + "step": 258470 + }, + { + "epoch": 0.5221459536112671, + "grad_norm": 414.2657165527344, + "learning_rate": 5.575379251878558e-06, + "loss": 23.7654, + "step": 258480 + }, + { + "epoch": 0.522166154243951, + "grad_norm": 460.9728088378906, + "learning_rate": 5.575032503572765e-06, + "loss": 23.8758, + "step": 258490 + }, + { + "epoch": 0.5221863548766348, + "grad_norm": 9.743806838989258, + "learning_rate": 5.5746857524643335e-06, + "loss": 9.0604, + "step": 258500 + }, + { + "epoch": 0.5222065555093186, + "grad_norm": 460.9274597167969, + "learning_rate": 5.5743389985549535e-06, + "loss": 20.8826, + "step": 258510 + }, + { + "epoch": 0.5222267561420023, + "grad_norm": 192.0853729248047, + "learning_rate": 5.573992241846315e-06, + "loss": 10.7161, + "step": 258520 + }, + { + "epoch": 0.5222469567746861, + "grad_norm": 405.860595703125, + "learning_rate": 5.573645482340107e-06, + "loss": 17.9584, + "step": 258530 + }, + { + "epoch": 0.52226715740737, + "grad_norm": 528.8161010742188, + "learning_rate": 5.573298720038022e-06, + "loss": 12.7832, + "step": 258540 + }, + { + "epoch": 0.5222873580400538, + "grad_norm": 217.5623779296875, + "learning_rate": 5.572951954941748e-06, + "loss": 18.7989, + "step": 258550 + }, + { + "epoch": 0.5223075586727376, + "grad_norm": 474.1935119628906, + "learning_rate": 5.572605187052975e-06, + "loss": 11.0172, + "step": 258560 + }, + { + "epoch": 0.5223277593054214, + "grad_norm": 152.40489196777344, + "learning_rate": 5.572258416373394e-06, + "loss": 12.0545, + "step": 258570 + }, + { + "epoch": 0.5223479599381052, + "grad_norm": 699.2028198242188, + "learning_rate": 5.571911642904696e-06, + "loss": 21.6052, + "step": 258580 + }, + { + "epoch": 0.522368160570789, + "grad_norm": 181.78952026367188, + "learning_rate": 5.571564866648569e-06, + "loss": 12.0304, + "step": 258590 + }, + { + "epoch": 0.5223883612034729, + "grad_norm": 129.2386016845703, + "learning_rate": 5.5712180876067045e-06, + "loss": 15.395, + "step": 258600 + }, + { + "epoch": 0.5224085618361567, + "grad_norm": 459.8892822265625, + "learning_rate": 5.570871305780793e-06, + "loss": 18.8547, + "step": 258610 + }, + { + "epoch": 0.5224287624688405, + "grad_norm": 175.90707397460938, + "learning_rate": 5.570524521172523e-06, + "loss": 13.9561, + "step": 258620 + }, + { + "epoch": 0.5224489631015243, + "grad_norm": 388.4878234863281, + "learning_rate": 5.570177733783586e-06, + "loss": 19.4359, + "step": 258630 + }, + { + "epoch": 0.5224691637342082, + "grad_norm": 270.1606140136719, + "learning_rate": 5.56983094361567e-06, + "loss": 9.4464, + "step": 258640 + }, + { + "epoch": 0.522489364366892, + "grad_norm": 446.36431884765625, + "learning_rate": 5.56948415067047e-06, + "loss": 34.3768, + "step": 258650 + }, + { + "epoch": 0.5225095649995758, + "grad_norm": 142.0673828125, + "learning_rate": 5.569137354949672e-06, + "loss": 23.0147, + "step": 258660 + }, + { + "epoch": 0.5225297656322596, + "grad_norm": 418.1278076171875, + "learning_rate": 5.568790556454967e-06, + "loss": 15.567, + "step": 258670 + }, + { + "epoch": 0.5225499662649434, + "grad_norm": 229.39422607421875, + "learning_rate": 5.568443755188048e-06, + "loss": 4.9313, + "step": 258680 + }, + { + "epoch": 0.5225701668976273, + "grad_norm": 677.0360717773438, + "learning_rate": 5.568096951150601e-06, + "loss": 17.7067, + "step": 258690 + }, + { + "epoch": 0.5225903675303111, + "grad_norm": 255.9961700439453, + "learning_rate": 5.567750144344318e-06, + "loss": 15.2524, + "step": 258700 + }, + { + "epoch": 0.5226105681629949, + "grad_norm": 224.96157836914062, + "learning_rate": 5.567403334770891e-06, + "loss": 12.1423, + "step": 258710 + }, + { + "epoch": 0.5226307687956787, + "grad_norm": 129.64767456054688, + "learning_rate": 5.567056522432008e-06, + "loss": 17.2798, + "step": 258720 + }, + { + "epoch": 0.5226509694283625, + "grad_norm": 889.9985961914062, + "learning_rate": 5.5667097073293605e-06, + "loss": 34.7892, + "step": 258730 + }, + { + "epoch": 0.5226711700610464, + "grad_norm": 9.282400131225586, + "learning_rate": 5.56636288946464e-06, + "loss": 24.7925, + "step": 258740 + }, + { + "epoch": 0.5226913706937302, + "grad_norm": 600.868896484375, + "learning_rate": 5.566016068839535e-06, + "loss": 19.0472, + "step": 258750 + }, + { + "epoch": 0.522711571326414, + "grad_norm": 686.1902465820312, + "learning_rate": 5.565669245455735e-06, + "loss": 15.5807, + "step": 258760 + }, + { + "epoch": 0.5227317719590978, + "grad_norm": 508.9956359863281, + "learning_rate": 5.565322419314933e-06, + "loss": 23.0285, + "step": 258770 + }, + { + "epoch": 0.5227519725917815, + "grad_norm": 360.8724060058594, + "learning_rate": 5.564975590418816e-06, + "loss": 36.6206, + "step": 258780 + }, + { + "epoch": 0.5227721732244653, + "grad_norm": 632.7630004882812, + "learning_rate": 5.564628758769079e-06, + "loss": 30.5118, + "step": 258790 + }, + { + "epoch": 0.5227923738571492, + "grad_norm": 651.6338500976562, + "learning_rate": 5.5642819243674085e-06, + "loss": 20.9828, + "step": 258800 + }, + { + "epoch": 0.522812574489833, + "grad_norm": 803.3969116210938, + "learning_rate": 5.563935087215497e-06, + "loss": 20.3904, + "step": 258810 + }, + { + "epoch": 0.5228327751225168, + "grad_norm": 385.5091247558594, + "learning_rate": 5.563588247315035e-06, + "loss": 23.2029, + "step": 258820 + }, + { + "epoch": 0.5228529757552006, + "grad_norm": 12.902904510498047, + "learning_rate": 5.563241404667711e-06, + "loss": 12.9647, + "step": 258830 + }, + { + "epoch": 0.5228731763878844, + "grad_norm": 383.08953857421875, + "learning_rate": 5.562894559275216e-06, + "loss": 23.1276, + "step": 258840 + }, + { + "epoch": 0.5228933770205683, + "grad_norm": 623.5396118164062, + "learning_rate": 5.562547711139243e-06, + "loss": 16.249, + "step": 258850 + }, + { + "epoch": 0.5229135776532521, + "grad_norm": 159.24526977539062, + "learning_rate": 5.562200860261481e-06, + "loss": 14.0956, + "step": 258860 + }, + { + "epoch": 0.5229337782859359, + "grad_norm": 216.1454620361328, + "learning_rate": 5.5618540066436174e-06, + "loss": 7.0407, + "step": 258870 + }, + { + "epoch": 0.5229539789186197, + "grad_norm": 645.8809204101562, + "learning_rate": 5.561507150287347e-06, + "loss": 24.7977, + "step": 258880 + }, + { + "epoch": 0.5229741795513035, + "grad_norm": 380.7661437988281, + "learning_rate": 5.56116029119436e-06, + "loss": 30.2449, + "step": 258890 + }, + { + "epoch": 0.5229943801839874, + "grad_norm": 387.68853759765625, + "learning_rate": 5.560813429366345e-06, + "loss": 19.7908, + "step": 258900 + }, + { + "epoch": 0.5230145808166712, + "grad_norm": 1197.5845947265625, + "learning_rate": 5.560466564804993e-06, + "loss": 20.6971, + "step": 258910 + }, + { + "epoch": 0.523034781449355, + "grad_norm": 276.0072021484375, + "learning_rate": 5.560119697511995e-06, + "loss": 10.2198, + "step": 258920 + }, + { + "epoch": 0.5230549820820388, + "grad_norm": 137.66383361816406, + "learning_rate": 5.559772827489042e-06, + "loss": 24.7138, + "step": 258930 + }, + { + "epoch": 0.5230751827147226, + "grad_norm": 4.750400543212891, + "learning_rate": 5.559425954737824e-06, + "loss": 13.2065, + "step": 258940 + }, + { + "epoch": 0.5230953833474065, + "grad_norm": 300.6987609863281, + "learning_rate": 5.559079079260032e-06, + "loss": 15.55, + "step": 258950 + }, + { + "epoch": 0.5231155839800903, + "grad_norm": 561.4017333984375, + "learning_rate": 5.558732201057355e-06, + "loss": 14.0307, + "step": 258960 + }, + { + "epoch": 0.5231357846127741, + "grad_norm": 286.33770751953125, + "learning_rate": 5.558385320131487e-06, + "loss": 18.4139, + "step": 258970 + }, + { + "epoch": 0.5231559852454579, + "grad_norm": 183.73011779785156, + "learning_rate": 5.558038436484116e-06, + "loss": 19.1513, + "step": 258980 + }, + { + "epoch": 0.5231761858781417, + "grad_norm": 427.0572509765625, + "learning_rate": 5.5576915501169314e-06, + "loss": 17.268, + "step": 258990 + }, + { + "epoch": 0.5231963865108256, + "grad_norm": 202.74530029296875, + "learning_rate": 5.557344661031628e-06, + "loss": 9.5992, + "step": 259000 + }, + { + "epoch": 0.5232165871435094, + "grad_norm": 387.3416748046875, + "learning_rate": 5.556997769229893e-06, + "loss": 25.6398, + "step": 259010 + }, + { + "epoch": 0.5232367877761932, + "grad_norm": 444.55316162109375, + "learning_rate": 5.556650874713421e-06, + "loss": 39.484, + "step": 259020 + }, + { + "epoch": 0.5232569884088769, + "grad_norm": 277.5933532714844, + "learning_rate": 5.556303977483898e-06, + "loss": 15.9852, + "step": 259030 + }, + { + "epoch": 0.5232771890415607, + "grad_norm": 36.20888900756836, + "learning_rate": 5.555957077543016e-06, + "loss": 19.1169, + "step": 259040 + }, + { + "epoch": 0.5232973896742446, + "grad_norm": 122.30401611328125, + "learning_rate": 5.555610174892468e-06, + "loss": 20.7532, + "step": 259050 + }, + { + "epoch": 0.5233175903069284, + "grad_norm": 87.84234619140625, + "learning_rate": 5.555263269533945e-06, + "loss": 20.0165, + "step": 259060 + }, + { + "epoch": 0.5233377909396122, + "grad_norm": 177.54856872558594, + "learning_rate": 5.554916361469133e-06, + "loss": 14.0952, + "step": 259070 + }, + { + "epoch": 0.523357991572296, + "grad_norm": 609.1646728515625, + "learning_rate": 5.554569450699727e-06, + "loss": 13.2931, + "step": 259080 + }, + { + "epoch": 0.5233781922049798, + "grad_norm": 630.5967407226562, + "learning_rate": 5.554222537227417e-06, + "loss": 27.3751, + "step": 259090 + }, + { + "epoch": 0.5233983928376637, + "grad_norm": 144.98471069335938, + "learning_rate": 5.553875621053893e-06, + "loss": 25.0189, + "step": 259100 + }, + { + "epoch": 0.5234185934703475, + "grad_norm": 248.9592742919922, + "learning_rate": 5.553528702180848e-06, + "loss": 10.8189, + "step": 259110 + }, + { + "epoch": 0.5234387941030313, + "grad_norm": 407.3052673339844, + "learning_rate": 5.55318178060997e-06, + "loss": 19.5606, + "step": 259120 + }, + { + "epoch": 0.5234589947357151, + "grad_norm": 116.8155746459961, + "learning_rate": 5.5528348563429524e-06, + "loss": 21.4337, + "step": 259130 + }, + { + "epoch": 0.5234791953683989, + "grad_norm": 73.57362365722656, + "learning_rate": 5.552487929381484e-06, + "loss": 13.3468, + "step": 259140 + }, + { + "epoch": 0.5234993960010828, + "grad_norm": 380.897216796875, + "learning_rate": 5.552140999727256e-06, + "loss": 16.9576, + "step": 259150 + }, + { + "epoch": 0.5235195966337666, + "grad_norm": 63.59773635864258, + "learning_rate": 5.551794067381959e-06, + "loss": 28.4819, + "step": 259160 + }, + { + "epoch": 0.5235397972664504, + "grad_norm": 531.4129638671875, + "learning_rate": 5.551447132347286e-06, + "loss": 18.2065, + "step": 259170 + }, + { + "epoch": 0.5235599978991342, + "grad_norm": 217.3151092529297, + "learning_rate": 5.551100194624925e-06, + "loss": 21.4243, + "step": 259180 + }, + { + "epoch": 0.523580198531818, + "grad_norm": 118.2372055053711, + "learning_rate": 5.5507532542165706e-06, + "loss": 22.6855, + "step": 259190 + }, + { + "epoch": 0.5236003991645019, + "grad_norm": 262.572021484375, + "learning_rate": 5.5504063111239116e-06, + "loss": 15.9268, + "step": 259200 + }, + { + "epoch": 0.5236205997971857, + "grad_norm": 266.8731384277344, + "learning_rate": 5.550059365348638e-06, + "loss": 11.6768, + "step": 259210 + }, + { + "epoch": 0.5236408004298695, + "grad_norm": 476.862060546875, + "learning_rate": 5.549712416892442e-06, + "loss": 9.7514, + "step": 259220 + }, + { + "epoch": 0.5236610010625533, + "grad_norm": 726.2845458984375, + "learning_rate": 5.549365465757013e-06, + "loss": 34.2101, + "step": 259230 + }, + { + "epoch": 0.5236812016952371, + "grad_norm": 332.2506103515625, + "learning_rate": 5.549018511944046e-06, + "loss": 21.7106, + "step": 259240 + }, + { + "epoch": 0.523701402327921, + "grad_norm": 215.2087860107422, + "learning_rate": 5.548671555455226e-06, + "loss": 48.7787, + "step": 259250 + }, + { + "epoch": 0.5237216029606048, + "grad_norm": 285.1471862792969, + "learning_rate": 5.548324596292251e-06, + "loss": 33.917, + "step": 259260 + }, + { + "epoch": 0.5237418035932886, + "grad_norm": 445.18170166015625, + "learning_rate": 5.547977634456806e-06, + "loss": 28.387, + "step": 259270 + }, + { + "epoch": 0.5237620042259724, + "grad_norm": 357.4407653808594, + "learning_rate": 5.547630669950585e-06, + "loss": 21.1768, + "step": 259280 + }, + { + "epoch": 0.5237822048586561, + "grad_norm": 551.136474609375, + "learning_rate": 5.547283702775279e-06, + "loss": 20.7401, + "step": 259290 + }, + { + "epoch": 0.5238024054913399, + "grad_norm": 64.77119445800781, + "learning_rate": 5.546936732932578e-06, + "loss": 8.634, + "step": 259300 + }, + { + "epoch": 0.5238226061240238, + "grad_norm": 549.214111328125, + "learning_rate": 5.546589760424175e-06, + "loss": 24.5534, + "step": 259310 + }, + { + "epoch": 0.5238428067567076, + "grad_norm": 286.0890808105469, + "learning_rate": 5.5462427852517585e-06, + "loss": 26.9847, + "step": 259320 + }, + { + "epoch": 0.5238630073893914, + "grad_norm": 508.03515625, + "learning_rate": 5.545895807417021e-06, + "loss": 13.2195, + "step": 259330 + }, + { + "epoch": 0.5238832080220752, + "grad_norm": 533.0213012695312, + "learning_rate": 5.545548826921653e-06, + "loss": 18.3663, + "step": 259340 + }, + { + "epoch": 0.523903408654759, + "grad_norm": 323.7482604980469, + "learning_rate": 5.545201843767348e-06, + "loss": 19.4811, + "step": 259350 + }, + { + "epoch": 0.5239236092874429, + "grad_norm": 187.0774383544922, + "learning_rate": 5.544854857955795e-06, + "loss": 17.8492, + "step": 259360 + }, + { + "epoch": 0.5239438099201267, + "grad_norm": 166.46250915527344, + "learning_rate": 5.544507869488684e-06, + "loss": 14.3456, + "step": 259370 + }, + { + "epoch": 0.5239640105528105, + "grad_norm": 171.3353729248047, + "learning_rate": 5.544160878367709e-06, + "loss": 25.5687, + "step": 259380 + }, + { + "epoch": 0.5239842111854943, + "grad_norm": 277.5767517089844, + "learning_rate": 5.543813884594559e-06, + "loss": 15.5179, + "step": 259390 + }, + { + "epoch": 0.5240044118181781, + "grad_norm": 541.5955200195312, + "learning_rate": 5.543466888170927e-06, + "loss": 30.6137, + "step": 259400 + }, + { + "epoch": 0.524024612450862, + "grad_norm": 83.60375213623047, + "learning_rate": 5.5431198890985014e-06, + "loss": 5.8933, + "step": 259410 + }, + { + "epoch": 0.5240448130835458, + "grad_norm": 878.072509765625, + "learning_rate": 5.542772887378978e-06, + "loss": 21.106, + "step": 259420 + }, + { + "epoch": 0.5240650137162296, + "grad_norm": 395.443359375, + "learning_rate": 5.5424258830140434e-06, + "loss": 16.6393, + "step": 259430 + }, + { + "epoch": 0.5240852143489134, + "grad_norm": 267.90826416015625, + "learning_rate": 5.542078876005391e-06, + "loss": 51.0356, + "step": 259440 + }, + { + "epoch": 0.5241054149815972, + "grad_norm": 134.7003936767578, + "learning_rate": 5.541731866354713e-06, + "loss": 11.3166, + "step": 259450 + }, + { + "epoch": 0.5241256156142811, + "grad_norm": 288.36785888671875, + "learning_rate": 5.5413848540637e-06, + "loss": 20.2249, + "step": 259460 + }, + { + "epoch": 0.5241458162469649, + "grad_norm": 441.0600280761719, + "learning_rate": 5.541037839134041e-06, + "loss": 24.9278, + "step": 259470 + }, + { + "epoch": 0.5241660168796487, + "grad_norm": 201.4093475341797, + "learning_rate": 5.5406908215674306e-06, + "loss": 14.75, + "step": 259480 + }, + { + "epoch": 0.5241862175123325, + "grad_norm": 244.89747619628906, + "learning_rate": 5.5403438013655575e-06, + "loss": 17.7876, + "step": 259490 + }, + { + "epoch": 0.5242064181450163, + "grad_norm": 192.8192901611328, + "learning_rate": 5.539996778530114e-06, + "loss": 24.9327, + "step": 259500 + }, + { + "epoch": 0.5242266187777002, + "grad_norm": 958.9153442382812, + "learning_rate": 5.539649753062795e-06, + "loss": 23.6456, + "step": 259510 + }, + { + "epoch": 0.524246819410384, + "grad_norm": 782.2985229492188, + "learning_rate": 5.5393027249652844e-06, + "loss": 29.5384, + "step": 259520 + }, + { + "epoch": 0.5242670200430678, + "grad_norm": 467.1959533691406, + "learning_rate": 5.5389556942392794e-06, + "loss": 24.0411, + "step": 259530 + }, + { + "epoch": 0.5242872206757516, + "grad_norm": 102.59488677978516, + "learning_rate": 5.538608660886471e-06, + "loss": 14.9539, + "step": 259540 + }, + { + "epoch": 0.5243074213084353, + "grad_norm": 43.54492950439453, + "learning_rate": 5.5382616249085476e-06, + "loss": 23.5561, + "step": 259550 + }, + { + "epoch": 0.5243276219411191, + "grad_norm": 133.77264404296875, + "learning_rate": 5.537914586307204e-06, + "loss": 21.1789, + "step": 259560 + }, + { + "epoch": 0.524347822573803, + "grad_norm": 668.717041015625, + "learning_rate": 5.537567545084127e-06, + "loss": 23.9103, + "step": 259570 + }, + { + "epoch": 0.5243680232064868, + "grad_norm": 197.05282592773438, + "learning_rate": 5.537220501241014e-06, + "loss": 12.083, + "step": 259580 + }, + { + "epoch": 0.5243882238391706, + "grad_norm": 219.2777862548828, + "learning_rate": 5.536873454779552e-06, + "loss": 20.6043, + "step": 259590 + }, + { + "epoch": 0.5244084244718544, + "grad_norm": 630.7650756835938, + "learning_rate": 5.536526405701433e-06, + "loss": 14.3581, + "step": 259600 + }, + { + "epoch": 0.5244286251045382, + "grad_norm": 358.7485046386719, + "learning_rate": 5.536179354008351e-06, + "loss": 7.4995, + "step": 259610 + }, + { + "epoch": 0.5244488257372221, + "grad_norm": 27.89251708984375, + "learning_rate": 5.5358322997019955e-06, + "loss": 10.0689, + "step": 259620 + }, + { + "epoch": 0.5244690263699059, + "grad_norm": 708.9737548828125, + "learning_rate": 5.535485242784059e-06, + "loss": 8.1251, + "step": 259630 + }, + { + "epoch": 0.5244892270025897, + "grad_norm": 477.3925476074219, + "learning_rate": 5.5351381832562316e-06, + "loss": 30.8436, + "step": 259640 + }, + { + "epoch": 0.5245094276352735, + "grad_norm": 79.54764556884766, + "learning_rate": 5.534791121120205e-06, + "loss": 34.1224, + "step": 259650 + }, + { + "epoch": 0.5245296282679573, + "grad_norm": 58.21992492675781, + "learning_rate": 5.534444056377671e-06, + "loss": 34.8295, + "step": 259660 + }, + { + "epoch": 0.5245498289006412, + "grad_norm": 249.1851806640625, + "learning_rate": 5.534096989030324e-06, + "loss": 20.0038, + "step": 259670 + }, + { + "epoch": 0.524570029533325, + "grad_norm": 430.1670227050781, + "learning_rate": 5.53374991907985e-06, + "loss": 20.2447, + "step": 259680 + }, + { + "epoch": 0.5245902301660088, + "grad_norm": 979.83203125, + "learning_rate": 5.533402846527947e-06, + "loss": 24.442, + "step": 259690 + }, + { + "epoch": 0.5246104307986926, + "grad_norm": 292.3692321777344, + "learning_rate": 5.5330557713763e-06, + "loss": 10.8437, + "step": 259700 + }, + { + "epoch": 0.5246306314313764, + "grad_norm": 372.9408874511719, + "learning_rate": 5.532708693626605e-06, + "loss": 19.4395, + "step": 259710 + }, + { + "epoch": 0.5246508320640603, + "grad_norm": 218.0306396484375, + "learning_rate": 5.5323616132805536e-06, + "loss": 14.077, + "step": 259720 + }, + { + "epoch": 0.5246710326967441, + "grad_norm": 14.43504810333252, + "learning_rate": 5.532014530339834e-06, + "loss": 35.8548, + "step": 259730 + }, + { + "epoch": 0.5246912333294279, + "grad_norm": 429.4441223144531, + "learning_rate": 5.531667444806142e-06, + "loss": 17.55, + "step": 259740 + }, + { + "epoch": 0.5247114339621117, + "grad_norm": 744.641845703125, + "learning_rate": 5.5313203566811666e-06, + "loss": 11.5287, + "step": 259750 + }, + { + "epoch": 0.5247316345947955, + "grad_norm": 42.671661376953125, + "learning_rate": 5.5309732659666e-06, + "loss": 13.2541, + "step": 259760 + }, + { + "epoch": 0.5247518352274794, + "grad_norm": 256.8294677734375, + "learning_rate": 5.530626172664135e-06, + "loss": 17.0344, + "step": 259770 + }, + { + "epoch": 0.5247720358601632, + "grad_norm": 190.9846649169922, + "learning_rate": 5.530279076775461e-06, + "loss": 22.5209, + "step": 259780 + }, + { + "epoch": 0.524792236492847, + "grad_norm": 273.3391418457031, + "learning_rate": 5.529931978302272e-06, + "loss": 21.7716, + "step": 259790 + }, + { + "epoch": 0.5248124371255307, + "grad_norm": 238.27056884765625, + "learning_rate": 5.52958487724626e-06, + "loss": 22.222, + "step": 259800 + }, + { + "epoch": 0.5248326377582145, + "grad_norm": 543.8564453125, + "learning_rate": 5.529237773609114e-06, + "loss": 20.6765, + "step": 259810 + }, + { + "epoch": 0.5248528383908984, + "grad_norm": 168.21966552734375, + "learning_rate": 5.528890667392527e-06, + "loss": 48.8529, + "step": 259820 + }, + { + "epoch": 0.5248730390235822, + "grad_norm": 560.0949096679688, + "learning_rate": 5.528543558598193e-06, + "loss": 15.7487, + "step": 259830 + }, + { + "epoch": 0.524893239656266, + "grad_norm": 247.71060180664062, + "learning_rate": 5.528196447227798e-06, + "loss": 17.572, + "step": 259840 + }, + { + "epoch": 0.5249134402889498, + "grad_norm": 71.76203918457031, + "learning_rate": 5.527849333283042e-06, + "loss": 24.7157, + "step": 259850 + }, + { + "epoch": 0.5249336409216336, + "grad_norm": 907.2033081054688, + "learning_rate": 5.527502216765609e-06, + "loss": 17.4662, + "step": 259860 + }, + { + "epoch": 0.5249538415543175, + "grad_norm": 84.12830352783203, + "learning_rate": 5.527155097677196e-06, + "loss": 19.072, + "step": 259870 + }, + { + "epoch": 0.5249740421870013, + "grad_norm": 338.2107849121094, + "learning_rate": 5.526807976019492e-06, + "loss": 19.2565, + "step": 259880 + }, + { + "epoch": 0.5249942428196851, + "grad_norm": 615.3682250976562, + "learning_rate": 5.526460851794191e-06, + "loss": 21.7621, + "step": 259890 + }, + { + "epoch": 0.5250144434523689, + "grad_norm": 32.63579559326172, + "learning_rate": 5.526113725002984e-06, + "loss": 11.3412, + "step": 259900 + }, + { + "epoch": 0.5250346440850527, + "grad_norm": 265.86065673828125, + "learning_rate": 5.525766595647561e-06, + "loss": 15.7407, + "step": 259910 + }, + { + "epoch": 0.5250548447177366, + "grad_norm": 41.338722229003906, + "learning_rate": 5.525419463729615e-06, + "loss": 15.5702, + "step": 259920 + }, + { + "epoch": 0.5250750453504204, + "grad_norm": 880.5347290039062, + "learning_rate": 5.525072329250839e-06, + "loss": 11.2259, + "step": 259930 + }, + { + "epoch": 0.5250952459831042, + "grad_norm": 801.6827392578125, + "learning_rate": 5.524725192212924e-06, + "loss": 27.9179, + "step": 259940 + }, + { + "epoch": 0.525115446615788, + "grad_norm": 504.3584289550781, + "learning_rate": 5.524378052617563e-06, + "loss": 18.1123, + "step": 259950 + }, + { + "epoch": 0.5251356472484718, + "grad_norm": 2.5646278858184814, + "learning_rate": 5.524030910466447e-06, + "loss": 19.4226, + "step": 259960 + }, + { + "epoch": 0.5251558478811557, + "grad_norm": 334.9571838378906, + "learning_rate": 5.523683765761266e-06, + "loss": 19.0106, + "step": 259970 + }, + { + "epoch": 0.5251760485138395, + "grad_norm": 253.3269805908203, + "learning_rate": 5.523336618503715e-06, + "loss": 22.0745, + "step": 259980 + }, + { + "epoch": 0.5251962491465233, + "grad_norm": 360.09344482421875, + "learning_rate": 5.522989468695487e-06, + "loss": 28.4839, + "step": 259990 + }, + { + "epoch": 0.5252164497792071, + "grad_norm": 324.1053771972656, + "learning_rate": 5.522642316338268e-06, + "loss": 19.7486, + "step": 260000 + }, + { + "epoch": 0.5252366504118909, + "grad_norm": 876.195556640625, + "learning_rate": 5.5222951614337564e-06, + "loss": 26.3472, + "step": 260010 + }, + { + "epoch": 0.5252568510445748, + "grad_norm": 337.7935791015625, + "learning_rate": 5.521948003983639e-06, + "loss": 12.4376, + "step": 260020 + }, + { + "epoch": 0.5252770516772586, + "grad_norm": 852.6124267578125, + "learning_rate": 5.521600843989613e-06, + "loss": 22.2194, + "step": 260030 + }, + { + "epoch": 0.5252972523099424, + "grad_norm": 80.01764678955078, + "learning_rate": 5.521253681453366e-06, + "loss": 20.5192, + "step": 260040 + }, + { + "epoch": 0.5253174529426262, + "grad_norm": 462.2069091796875, + "learning_rate": 5.520906516376592e-06, + "loss": 10.2419, + "step": 260050 + }, + { + "epoch": 0.5253376535753099, + "grad_norm": 166.80796813964844, + "learning_rate": 5.520559348760984e-06, + "loss": 25.0185, + "step": 260060 + }, + { + "epoch": 0.5253578542079937, + "grad_norm": 49.324462890625, + "learning_rate": 5.520212178608231e-06, + "loss": 7.3639, + "step": 260070 + }, + { + "epoch": 0.5253780548406776, + "grad_norm": 80.81123352050781, + "learning_rate": 5.519865005920029e-06, + "loss": 11.0352, + "step": 260080 + }, + { + "epoch": 0.5253982554733614, + "grad_norm": 341.9402770996094, + "learning_rate": 5.519517830698067e-06, + "loss": 13.5035, + "step": 260090 + }, + { + "epoch": 0.5254184561060452, + "grad_norm": 532.9110107421875, + "learning_rate": 5.519170652944037e-06, + "loss": 36.718, + "step": 260100 + }, + { + "epoch": 0.525438656738729, + "grad_norm": 636.3525390625, + "learning_rate": 5.518823472659634e-06, + "loss": 20.4944, + "step": 260110 + }, + { + "epoch": 0.5254588573714128, + "grad_norm": 248.55691528320312, + "learning_rate": 5.518476289846548e-06, + "loss": 15.9055, + "step": 260120 + }, + { + "epoch": 0.5254790580040967, + "grad_norm": 428.7301025390625, + "learning_rate": 5.518129104506471e-06, + "loss": 18.6148, + "step": 260130 + }, + { + "epoch": 0.5254992586367805, + "grad_norm": 366.0169372558594, + "learning_rate": 5.5177819166410955e-06, + "loss": 16.0438, + "step": 260140 + }, + { + "epoch": 0.5255194592694643, + "grad_norm": 411.673583984375, + "learning_rate": 5.517434726252113e-06, + "loss": 11.1201, + "step": 260150 + }, + { + "epoch": 0.5255396599021481, + "grad_norm": 130.8328094482422, + "learning_rate": 5.5170875333412176e-06, + "loss": 38.1429, + "step": 260160 + }, + { + "epoch": 0.525559860534832, + "grad_norm": 114.20699310302734, + "learning_rate": 5.516740337910101e-06, + "loss": 8.5824, + "step": 260170 + }, + { + "epoch": 0.5255800611675158, + "grad_norm": 404.4974060058594, + "learning_rate": 5.516393139960452e-06, + "loss": 20.9042, + "step": 260180 + }, + { + "epoch": 0.5256002618001996, + "grad_norm": 707.0994262695312, + "learning_rate": 5.516045939493968e-06, + "loss": 25.061, + "step": 260190 + }, + { + "epoch": 0.5256204624328834, + "grad_norm": 922.6636352539062, + "learning_rate": 5.515698736512337e-06, + "loss": 31.5711, + "step": 260200 + }, + { + "epoch": 0.5256406630655672, + "grad_norm": 298.8223876953125, + "learning_rate": 5.515351531017254e-06, + "loss": 23.908, + "step": 260210 + }, + { + "epoch": 0.525660863698251, + "grad_norm": 406.9949645996094, + "learning_rate": 5.51500432301041e-06, + "loss": 34.995, + "step": 260220 + }, + { + "epoch": 0.5256810643309349, + "grad_norm": 162.56687927246094, + "learning_rate": 5.514657112493497e-06, + "loss": 19.2722, + "step": 260230 + }, + { + "epoch": 0.5257012649636187, + "grad_norm": 178.6824951171875, + "learning_rate": 5.514309899468209e-06, + "loss": 25.4962, + "step": 260240 + }, + { + "epoch": 0.5257214655963025, + "grad_norm": 518.660400390625, + "learning_rate": 5.513962683936235e-06, + "loss": 17.0572, + "step": 260250 + }, + { + "epoch": 0.5257416662289863, + "grad_norm": 370.029296875, + "learning_rate": 5.51361546589927e-06, + "loss": 26.6064, + "step": 260260 + }, + { + "epoch": 0.5257618668616701, + "grad_norm": 435.8376770019531, + "learning_rate": 5.513268245359005e-06, + "loss": 15.1549, + "step": 260270 + }, + { + "epoch": 0.525782067494354, + "grad_norm": 313.71844482421875, + "learning_rate": 5.512921022317135e-06, + "loss": 31.2322, + "step": 260280 + }, + { + "epoch": 0.5258022681270378, + "grad_norm": 409.15087890625, + "learning_rate": 5.512573796775347e-06, + "loss": 23.1105, + "step": 260290 + }, + { + "epoch": 0.5258224687597216, + "grad_norm": 168.183837890625, + "learning_rate": 5.512226568735338e-06, + "loss": 22.3686, + "step": 260300 + }, + { + "epoch": 0.5258426693924053, + "grad_norm": 538.6890869140625, + "learning_rate": 5.5118793381987985e-06, + "loss": 22.7936, + "step": 260310 + }, + { + "epoch": 0.5258628700250891, + "grad_norm": 240.4720916748047, + "learning_rate": 5.511532105167422e-06, + "loss": 12.3813, + "step": 260320 + }, + { + "epoch": 0.525883070657773, + "grad_norm": 488.4220275878906, + "learning_rate": 5.5111848696429005e-06, + "loss": 6.9798, + "step": 260330 + }, + { + "epoch": 0.5259032712904568, + "grad_norm": 302.3664855957031, + "learning_rate": 5.510837631626923e-06, + "loss": 14.1341, + "step": 260340 + }, + { + "epoch": 0.5259234719231406, + "grad_norm": 546.3446655273438, + "learning_rate": 5.510490391121188e-06, + "loss": 18.4073, + "step": 260350 + }, + { + "epoch": 0.5259436725558244, + "grad_norm": 316.3846740722656, + "learning_rate": 5.510143148127384e-06, + "loss": 13.9157, + "step": 260360 + }, + { + "epoch": 0.5259638731885082, + "grad_norm": 372.5844421386719, + "learning_rate": 5.509795902647203e-06, + "loss": 21.7814, + "step": 260370 + }, + { + "epoch": 0.525984073821192, + "grad_norm": 738.1446533203125, + "learning_rate": 5.509448654682339e-06, + "loss": 26.4201, + "step": 260380 + }, + { + "epoch": 0.5260042744538759, + "grad_norm": 143.0221710205078, + "learning_rate": 5.509101404234485e-06, + "loss": 11.4723, + "step": 260390 + }, + { + "epoch": 0.5260244750865597, + "grad_norm": 206.6165008544922, + "learning_rate": 5.508754151305332e-06, + "loss": 23.3635, + "step": 260400 + }, + { + "epoch": 0.5260446757192435, + "grad_norm": 785.65087890625, + "learning_rate": 5.508406895896573e-06, + "loss": 21.6582, + "step": 260410 + }, + { + "epoch": 0.5260648763519273, + "grad_norm": 122.45549011230469, + "learning_rate": 5.5080596380099e-06, + "loss": 14.9553, + "step": 260420 + }, + { + "epoch": 0.5260850769846112, + "grad_norm": 499.3188781738281, + "learning_rate": 5.507712377647006e-06, + "loss": 11.4883, + "step": 260430 + }, + { + "epoch": 0.526105277617295, + "grad_norm": 263.1232604980469, + "learning_rate": 5.507365114809585e-06, + "loss": 27.1334, + "step": 260440 + }, + { + "epoch": 0.5261254782499788, + "grad_norm": 584.4727172851562, + "learning_rate": 5.507017849499326e-06, + "loss": 14.8529, + "step": 260450 + }, + { + "epoch": 0.5261456788826626, + "grad_norm": 383.7892761230469, + "learning_rate": 5.506670581717925e-06, + "loss": 18.6287, + "step": 260460 + }, + { + "epoch": 0.5261658795153464, + "grad_norm": 479.3489685058594, + "learning_rate": 5.506323311467071e-06, + "loss": 6.2042, + "step": 260470 + }, + { + "epoch": 0.5261860801480303, + "grad_norm": 282.7895812988281, + "learning_rate": 5.5059760387484595e-06, + "loss": 13.2479, + "step": 260480 + }, + { + "epoch": 0.5262062807807141, + "grad_norm": 495.27392578125, + "learning_rate": 5.505628763563783e-06, + "loss": 23.3407, + "step": 260490 + }, + { + "epoch": 0.5262264814133979, + "grad_norm": 222.0509033203125, + "learning_rate": 5.505281485914732e-06, + "loss": 27.4989, + "step": 260500 + }, + { + "epoch": 0.5262466820460817, + "grad_norm": 963.912353515625, + "learning_rate": 5.504934205803002e-06, + "loss": 25.8154, + "step": 260510 + }, + { + "epoch": 0.5262668826787655, + "grad_norm": 527.7839965820312, + "learning_rate": 5.504586923230283e-06, + "loss": 13.4463, + "step": 260520 + }, + { + "epoch": 0.5262870833114494, + "grad_norm": 256.66802978515625, + "learning_rate": 5.504239638198267e-06, + "loss": 21.2495, + "step": 260530 + }, + { + "epoch": 0.5263072839441332, + "grad_norm": 241.9513397216797, + "learning_rate": 5.503892350708651e-06, + "loss": 17.1914, + "step": 260540 + }, + { + "epoch": 0.526327484576817, + "grad_norm": 326.9222106933594, + "learning_rate": 5.503545060763123e-06, + "loss": 34.5534, + "step": 260550 + }, + { + "epoch": 0.5263476852095008, + "grad_norm": 357.99700927734375, + "learning_rate": 5.503197768363378e-06, + "loss": 14.2177, + "step": 260560 + }, + { + "epoch": 0.5263678858421845, + "grad_norm": 246.52980041503906, + "learning_rate": 5.502850473511108e-06, + "loss": 12.4622, + "step": 260570 + }, + { + "epoch": 0.5263880864748683, + "grad_norm": 480.41668701171875, + "learning_rate": 5.502503176208006e-06, + "loss": 25.3514, + "step": 260580 + }, + { + "epoch": 0.5264082871075522, + "grad_norm": 269.68865966796875, + "learning_rate": 5.502155876455764e-06, + "loss": 25.3879, + "step": 260590 + }, + { + "epoch": 0.526428487740236, + "grad_norm": 408.2362060546875, + "learning_rate": 5.5018085742560745e-06, + "loss": 16.3543, + "step": 260600 + }, + { + "epoch": 0.5264486883729198, + "grad_norm": 249.12245178222656, + "learning_rate": 5.501461269610632e-06, + "loss": 13.5725, + "step": 260610 + }, + { + "epoch": 0.5264688890056036, + "grad_norm": 438.4444580078125, + "learning_rate": 5.501113962521129e-06, + "loss": 20.8684, + "step": 260620 + }, + { + "epoch": 0.5264890896382874, + "grad_norm": 0.0, + "learning_rate": 5.5007666529892545e-06, + "loss": 24.0455, + "step": 260630 + }, + { + "epoch": 0.5265092902709713, + "grad_norm": 277.3448181152344, + "learning_rate": 5.500419341016707e-06, + "loss": 14.3256, + "step": 260640 + }, + { + "epoch": 0.5265294909036551, + "grad_norm": 425.6750793457031, + "learning_rate": 5.500072026605175e-06, + "loss": 21.7922, + "step": 260650 + }, + { + "epoch": 0.5265496915363389, + "grad_norm": 398.42236328125, + "learning_rate": 5.499724709756352e-06, + "loss": 12.7472, + "step": 260660 + }, + { + "epoch": 0.5265698921690227, + "grad_norm": 651.5570068359375, + "learning_rate": 5.499377390471933e-06, + "loss": 27.2713, + "step": 260670 + }, + { + "epoch": 0.5265900928017065, + "grad_norm": 364.0670166015625, + "learning_rate": 5.4990300687536065e-06, + "loss": 12.4383, + "step": 260680 + }, + { + "epoch": 0.5266102934343904, + "grad_norm": 425.282470703125, + "learning_rate": 5.498682744603071e-06, + "loss": 13.0307, + "step": 260690 + }, + { + "epoch": 0.5266304940670742, + "grad_norm": 311.4023132324219, + "learning_rate": 5.498335418022015e-06, + "loss": 20.9397, + "step": 260700 + }, + { + "epoch": 0.526650694699758, + "grad_norm": 159.4119415283203, + "learning_rate": 5.497988089012132e-06, + "loss": 15.087, + "step": 260710 + }, + { + "epoch": 0.5266708953324418, + "grad_norm": 601.800048828125, + "learning_rate": 5.497640757575116e-06, + "loss": 26.5291, + "step": 260720 + }, + { + "epoch": 0.5266910959651256, + "grad_norm": 203.097900390625, + "learning_rate": 5.497293423712661e-06, + "loss": 24.5891, + "step": 260730 + }, + { + "epoch": 0.5267112965978095, + "grad_norm": 162.74554443359375, + "learning_rate": 5.4969460874264555e-06, + "loss": 8.3716, + "step": 260740 + }, + { + "epoch": 0.5267314972304933, + "grad_norm": 526.2352294921875, + "learning_rate": 5.496598748718196e-06, + "loss": 14.3457, + "step": 260750 + }, + { + "epoch": 0.5267516978631771, + "grad_norm": 764.6087036132812, + "learning_rate": 5.4962514075895746e-06, + "loss": 20.3772, + "step": 260760 + }, + { + "epoch": 0.5267718984958609, + "grad_norm": 541.0398559570312, + "learning_rate": 5.4959040640422836e-06, + "loss": 22.6953, + "step": 260770 + }, + { + "epoch": 0.5267920991285447, + "grad_norm": 414.8677978515625, + "learning_rate": 5.495556718078017e-06, + "loss": 25.4641, + "step": 260780 + }, + { + "epoch": 0.5268122997612286, + "grad_norm": 350.560791015625, + "learning_rate": 5.495209369698466e-06, + "loss": 16.316, + "step": 260790 + }, + { + "epoch": 0.5268325003939124, + "grad_norm": 316.64569091796875, + "learning_rate": 5.4948620189053255e-06, + "loss": 17.0801, + "step": 260800 + }, + { + "epoch": 0.5268527010265962, + "grad_norm": 488.3839111328125, + "learning_rate": 5.494514665700288e-06, + "loss": 12.9622, + "step": 260810 + }, + { + "epoch": 0.52687290165928, + "grad_norm": 972.8881225585938, + "learning_rate": 5.494167310085045e-06, + "loss": 28.7659, + "step": 260820 + }, + { + "epoch": 0.5268931022919637, + "grad_norm": 395.9073181152344, + "learning_rate": 5.49381995206129e-06, + "loss": 56.1014, + "step": 260830 + }, + { + "epoch": 0.5269133029246476, + "grad_norm": 301.20037841796875, + "learning_rate": 5.493472591630717e-06, + "loss": 16.386, + "step": 260840 + }, + { + "epoch": 0.5269335035573314, + "grad_norm": 263.2437744140625, + "learning_rate": 5.49312522879502e-06, + "loss": 24.0984, + "step": 260850 + }, + { + "epoch": 0.5269537041900152, + "grad_norm": 208.31399536132812, + "learning_rate": 5.492777863555889e-06, + "loss": 20.7022, + "step": 260860 + }, + { + "epoch": 0.526973904822699, + "grad_norm": 1261.545654296875, + "learning_rate": 5.492430495915018e-06, + "loss": 18.0993, + "step": 260870 + }, + { + "epoch": 0.5269941054553828, + "grad_norm": 612.3999633789062, + "learning_rate": 5.4920831258741016e-06, + "loss": 18.8504, + "step": 260880 + }, + { + "epoch": 0.5270143060880667, + "grad_norm": 126.61519622802734, + "learning_rate": 5.491735753434832e-06, + "loss": 18.0525, + "step": 260890 + }, + { + "epoch": 0.5270345067207505, + "grad_norm": 538.5910034179688, + "learning_rate": 5.491388378598899e-06, + "loss": 14.6739, + "step": 260900 + }, + { + "epoch": 0.5270547073534343, + "grad_norm": 222.38272094726562, + "learning_rate": 5.4910410013680015e-06, + "loss": 27.0445, + "step": 260910 + }, + { + "epoch": 0.5270749079861181, + "grad_norm": 426.7323303222656, + "learning_rate": 5.490693621743829e-06, + "loss": 22.8191, + "step": 260920 + }, + { + "epoch": 0.5270951086188019, + "grad_norm": 346.5004577636719, + "learning_rate": 5.490346239728076e-06, + "loss": 29.5733, + "step": 260930 + }, + { + "epoch": 0.5271153092514858, + "grad_norm": 237.61834716796875, + "learning_rate": 5.489998855322435e-06, + "loss": 15.4577, + "step": 260940 + }, + { + "epoch": 0.5271355098841696, + "grad_norm": 492.9292907714844, + "learning_rate": 5.489651468528596e-06, + "loss": 17.7796, + "step": 260950 + }, + { + "epoch": 0.5271557105168534, + "grad_norm": 382.273681640625, + "learning_rate": 5.489304079348259e-06, + "loss": 17.7072, + "step": 260960 + }, + { + "epoch": 0.5271759111495372, + "grad_norm": 1783.85107421875, + "learning_rate": 5.488956687783111e-06, + "loss": 35.1095, + "step": 260970 + }, + { + "epoch": 0.527196111782221, + "grad_norm": 1896.2325439453125, + "learning_rate": 5.4886092938348475e-06, + "loss": 25.3209, + "step": 260980 + }, + { + "epoch": 0.5272163124149049, + "grad_norm": 309.3804931640625, + "learning_rate": 5.488261897505163e-06, + "loss": 12.4335, + "step": 260990 + }, + { + "epoch": 0.5272365130475887, + "grad_norm": 187.5780487060547, + "learning_rate": 5.487914498795748e-06, + "loss": 22.6779, + "step": 261000 + }, + { + "epoch": 0.5272567136802725, + "grad_norm": 206.28851318359375, + "learning_rate": 5.487567097708298e-06, + "loss": 8.0016, + "step": 261010 + }, + { + "epoch": 0.5272769143129563, + "grad_norm": 516.361572265625, + "learning_rate": 5.487219694244505e-06, + "loss": 25.4137, + "step": 261020 + }, + { + "epoch": 0.5272971149456401, + "grad_norm": 166.94139099121094, + "learning_rate": 5.48687228840606e-06, + "loss": 21.2136, + "step": 261030 + }, + { + "epoch": 0.527317315578324, + "grad_norm": 1025.854736328125, + "learning_rate": 5.48652488019466e-06, + "loss": 45.8827, + "step": 261040 + }, + { + "epoch": 0.5273375162110078, + "grad_norm": 244.59588623046875, + "learning_rate": 5.486177469611999e-06, + "loss": 21.5262, + "step": 261050 + }, + { + "epoch": 0.5273577168436916, + "grad_norm": 22.894620895385742, + "learning_rate": 5.485830056659763e-06, + "loss": 19.5141, + "step": 261060 + }, + { + "epoch": 0.5273779174763754, + "grad_norm": 74.53453826904297, + "learning_rate": 5.4854826413396546e-06, + "loss": 8.8525, + "step": 261070 + }, + { + "epoch": 0.5273981181090591, + "grad_norm": 1197.507080078125, + "learning_rate": 5.485135223653362e-06, + "loss": 25.0771, + "step": 261080 + }, + { + "epoch": 0.5274183187417429, + "grad_norm": 365.26715087890625, + "learning_rate": 5.484787803602577e-06, + "loss": 18.7501, + "step": 261090 + }, + { + "epoch": 0.5274385193744268, + "grad_norm": 415.0979309082031, + "learning_rate": 5.484440381188997e-06, + "loss": 22.1903, + "step": 261100 + }, + { + "epoch": 0.5274587200071106, + "grad_norm": 46.778907775878906, + "learning_rate": 5.484092956414312e-06, + "loss": 16.4155, + "step": 261110 + }, + { + "epoch": 0.5274789206397944, + "grad_norm": 499.7535400390625, + "learning_rate": 5.483745529280219e-06, + "loss": 18.5447, + "step": 261120 + }, + { + "epoch": 0.5274991212724782, + "grad_norm": 426.1343994140625, + "learning_rate": 5.4833980997884054e-06, + "loss": 12.4086, + "step": 261130 + }, + { + "epoch": 0.527519321905162, + "grad_norm": 275.7474060058594, + "learning_rate": 5.483050667940571e-06, + "loss": 15.0397, + "step": 261140 + }, + { + "epoch": 0.5275395225378459, + "grad_norm": 797.1040649414062, + "learning_rate": 5.482703233738405e-06, + "loss": 31.8337, + "step": 261150 + }, + { + "epoch": 0.5275597231705297, + "grad_norm": 308.7997131347656, + "learning_rate": 5.482355797183602e-06, + "loss": 37.3068, + "step": 261160 + }, + { + "epoch": 0.5275799238032135, + "grad_norm": 614.94189453125, + "learning_rate": 5.482008358277855e-06, + "loss": 22.907, + "step": 261170 + }, + { + "epoch": 0.5276001244358973, + "grad_norm": 459.3724060058594, + "learning_rate": 5.48166091702286e-06, + "loss": 18.8272, + "step": 261180 + }, + { + "epoch": 0.5276203250685811, + "grad_norm": 352.71551513671875, + "learning_rate": 5.481313473420306e-06, + "loss": 28.507, + "step": 261190 + }, + { + "epoch": 0.527640525701265, + "grad_norm": 658.2681274414062, + "learning_rate": 5.480966027471889e-06, + "loss": 19.0675, + "step": 261200 + }, + { + "epoch": 0.5276607263339488, + "grad_norm": 430.00823974609375, + "learning_rate": 5.480618579179301e-06, + "loss": 29.2889, + "step": 261210 + }, + { + "epoch": 0.5276809269666326, + "grad_norm": 448.3904113769531, + "learning_rate": 5.4802711285442375e-06, + "loss": 17.5373, + "step": 261220 + }, + { + "epoch": 0.5277011275993164, + "grad_norm": 440.13897705078125, + "learning_rate": 5.4799236755683916e-06, + "loss": 28.9601, + "step": 261230 + }, + { + "epoch": 0.5277213282320002, + "grad_norm": 285.37274169921875, + "learning_rate": 5.479576220253453e-06, + "loss": 20.8815, + "step": 261240 + }, + { + "epoch": 0.5277415288646841, + "grad_norm": 361.54547119140625, + "learning_rate": 5.4792287626011206e-06, + "loss": 25.0595, + "step": 261250 + }, + { + "epoch": 0.5277617294973679, + "grad_norm": 581.8213500976562, + "learning_rate": 5.478881302613085e-06, + "loss": 13.6178, + "step": 261260 + }, + { + "epoch": 0.5277819301300517, + "grad_norm": 423.68377685546875, + "learning_rate": 5.478533840291039e-06, + "loss": 24.4431, + "step": 261270 + }, + { + "epoch": 0.5278021307627355, + "grad_norm": 391.1540222167969, + "learning_rate": 5.478186375636678e-06, + "loss": 23.6289, + "step": 261280 + }, + { + "epoch": 0.5278223313954193, + "grad_norm": 696.769287109375, + "learning_rate": 5.477838908651694e-06, + "loss": 26.5889, + "step": 261290 + }, + { + "epoch": 0.5278425320281032, + "grad_norm": 509.2430725097656, + "learning_rate": 5.477491439337782e-06, + "loss": 17.4735, + "step": 261300 + }, + { + "epoch": 0.527862732660787, + "grad_norm": 263.2110290527344, + "learning_rate": 5.477143967696634e-06, + "loss": 12.5045, + "step": 261310 + }, + { + "epoch": 0.5278829332934708, + "grad_norm": 552.67626953125, + "learning_rate": 5.476796493729943e-06, + "loss": 25.14, + "step": 261320 + }, + { + "epoch": 0.5279031339261546, + "grad_norm": 209.8045196533203, + "learning_rate": 5.476449017439406e-06, + "loss": 25.4521, + "step": 261330 + }, + { + "epoch": 0.5279233345588383, + "grad_norm": 316.1282653808594, + "learning_rate": 5.476101538826714e-06, + "loss": 18.1899, + "step": 261340 + }, + { + "epoch": 0.5279435351915222, + "grad_norm": 735.6625366210938, + "learning_rate": 5.4757540578935595e-06, + "loss": 17.8253, + "step": 261350 + }, + { + "epoch": 0.527963735824206, + "grad_norm": 435.79742431640625, + "learning_rate": 5.475406574641637e-06, + "loss": 32.5203, + "step": 261360 + }, + { + "epoch": 0.5279839364568898, + "grad_norm": 236.00254821777344, + "learning_rate": 5.475059089072642e-06, + "loss": 25.0577, + "step": 261370 + }, + { + "epoch": 0.5280041370895736, + "grad_norm": 498.2556457519531, + "learning_rate": 5.474711601188266e-06, + "loss": 20.142, + "step": 261380 + }, + { + "epoch": 0.5280243377222574, + "grad_norm": 131.3717803955078, + "learning_rate": 5.4743641109902045e-06, + "loss": 16.5378, + "step": 261390 + }, + { + "epoch": 0.5280445383549413, + "grad_norm": 601.6570434570312, + "learning_rate": 5.474016618480147e-06, + "loss": 12.4973, + "step": 261400 + }, + { + "epoch": 0.5280647389876251, + "grad_norm": 404.3240051269531, + "learning_rate": 5.473669123659793e-06, + "loss": 27.5379, + "step": 261410 + }, + { + "epoch": 0.5280849396203089, + "grad_norm": 400.5625, + "learning_rate": 5.4733216265308305e-06, + "loss": 8.8741, + "step": 261420 + }, + { + "epoch": 0.5281051402529927, + "grad_norm": 200.19439697265625, + "learning_rate": 5.472974127094957e-06, + "loss": 21.7069, + "step": 261430 + }, + { + "epoch": 0.5281253408856765, + "grad_norm": 34.03194808959961, + "learning_rate": 5.472626625353865e-06, + "loss": 25.374, + "step": 261440 + }, + { + "epoch": 0.5281455415183604, + "grad_norm": 265.70245361328125, + "learning_rate": 5.472279121309248e-06, + "loss": 13.8763, + "step": 261450 + }, + { + "epoch": 0.5281657421510442, + "grad_norm": 331.0539245605469, + "learning_rate": 5.471931614962802e-06, + "loss": 17.9479, + "step": 261460 + }, + { + "epoch": 0.528185942783728, + "grad_norm": 254.81101989746094, + "learning_rate": 5.471584106316216e-06, + "loss": 12.9582, + "step": 261470 + }, + { + "epoch": 0.5282061434164118, + "grad_norm": 425.1376647949219, + "learning_rate": 5.471236595371187e-06, + "loss": 17.6356, + "step": 261480 + }, + { + "epoch": 0.5282263440490956, + "grad_norm": 202.1089324951172, + "learning_rate": 5.470889082129407e-06, + "loss": 27.5508, + "step": 261490 + }, + { + "epoch": 0.5282465446817795, + "grad_norm": 197.38787841796875, + "learning_rate": 5.470541566592573e-06, + "loss": 11.6215, + "step": 261500 + }, + { + "epoch": 0.5282667453144633, + "grad_norm": 307.8194580078125, + "learning_rate": 5.470194048762374e-06, + "loss": 11.7671, + "step": 261510 + }, + { + "epoch": 0.5282869459471471, + "grad_norm": 186.99232482910156, + "learning_rate": 5.469846528640508e-06, + "loss": 7.1621, + "step": 261520 + }, + { + "epoch": 0.5283071465798309, + "grad_norm": 593.8322143554688, + "learning_rate": 5.469499006228666e-06, + "loss": 16.4775, + "step": 261530 + }, + { + "epoch": 0.5283273472125147, + "grad_norm": 398.6197814941406, + "learning_rate": 5.469151481528543e-06, + "loss": 17.6969, + "step": 261540 + }, + { + "epoch": 0.5283475478451986, + "grad_norm": 964.5626220703125, + "learning_rate": 5.468803954541834e-06, + "loss": 36.3001, + "step": 261550 + }, + { + "epoch": 0.5283677484778824, + "grad_norm": 590.163330078125, + "learning_rate": 5.468456425270229e-06, + "loss": 36.6744, + "step": 261560 + }, + { + "epoch": 0.5283879491105662, + "grad_norm": 377.348388671875, + "learning_rate": 5.468108893715426e-06, + "loss": 20.7374, + "step": 261570 + }, + { + "epoch": 0.52840814974325, + "grad_norm": 678.54541015625, + "learning_rate": 5.467761359879116e-06, + "loss": 18.3416, + "step": 261580 + }, + { + "epoch": 0.5284283503759337, + "grad_norm": 401.6737365722656, + "learning_rate": 5.467413823762994e-06, + "loss": 16.5677, + "step": 261590 + }, + { + "epoch": 0.5284485510086175, + "grad_norm": 260.65948486328125, + "learning_rate": 5.467066285368754e-06, + "loss": 28.8671, + "step": 261600 + }, + { + "epoch": 0.5284687516413014, + "grad_norm": 184.55581665039062, + "learning_rate": 5.466718744698089e-06, + "loss": 12.4206, + "step": 261610 + }, + { + "epoch": 0.5284889522739852, + "grad_norm": 484.190185546875, + "learning_rate": 5.4663712017526946e-06, + "loss": 21.1198, + "step": 261620 + }, + { + "epoch": 0.528509152906669, + "grad_norm": 331.16400146484375, + "learning_rate": 5.466023656534263e-06, + "loss": 30.1253, + "step": 261630 + }, + { + "epoch": 0.5285293535393528, + "grad_norm": 388.0387268066406, + "learning_rate": 5.4656761090444875e-06, + "loss": 11.3414, + "step": 261640 + }, + { + "epoch": 0.5285495541720366, + "grad_norm": 256.4833679199219, + "learning_rate": 5.465328559285064e-06, + "loss": 16.1876, + "step": 261650 + }, + { + "epoch": 0.5285697548047205, + "grad_norm": 202.82069396972656, + "learning_rate": 5.464981007257686e-06, + "loss": 13.6507, + "step": 261660 + }, + { + "epoch": 0.5285899554374043, + "grad_norm": 369.3669738769531, + "learning_rate": 5.4646334529640454e-06, + "loss": 21.4978, + "step": 261670 + }, + { + "epoch": 0.5286101560700881, + "grad_norm": 597.6946411132812, + "learning_rate": 5.46428589640584e-06, + "loss": 47.2797, + "step": 261680 + }, + { + "epoch": 0.5286303567027719, + "grad_norm": 237.5253448486328, + "learning_rate": 5.46393833758476e-06, + "loss": 22.3166, + "step": 261690 + }, + { + "epoch": 0.5286505573354557, + "grad_norm": 517.5831909179688, + "learning_rate": 5.463590776502501e-06, + "loss": 10.3808, + "step": 261700 + }, + { + "epoch": 0.5286707579681396, + "grad_norm": 714.017333984375, + "learning_rate": 5.463243213160758e-06, + "loss": 24.9592, + "step": 261710 + }, + { + "epoch": 0.5286909586008234, + "grad_norm": 165.38265991210938, + "learning_rate": 5.462895647561222e-06, + "loss": 11.7884, + "step": 261720 + }, + { + "epoch": 0.5287111592335072, + "grad_norm": 323.59521484375, + "learning_rate": 5.46254807970559e-06, + "loss": 20.3679, + "step": 261730 + }, + { + "epoch": 0.528731359866191, + "grad_norm": 516.4874267578125, + "learning_rate": 5.462200509595553e-06, + "loss": 14.5833, + "step": 261740 + }, + { + "epoch": 0.5287515604988748, + "grad_norm": 424.514892578125, + "learning_rate": 5.461852937232809e-06, + "loss": 22.5686, + "step": 261750 + }, + { + "epoch": 0.5287717611315587, + "grad_norm": 448.6566467285156, + "learning_rate": 5.461505362619048e-06, + "loss": 14.5051, + "step": 261760 + }, + { + "epoch": 0.5287919617642425, + "grad_norm": 690.8274536132812, + "learning_rate": 5.4611577857559676e-06, + "loss": 24.0594, + "step": 261770 + }, + { + "epoch": 0.5288121623969263, + "grad_norm": 214.0674591064453, + "learning_rate": 5.460810206645258e-06, + "loss": 9.9378, + "step": 261780 + }, + { + "epoch": 0.5288323630296101, + "grad_norm": 643.51513671875, + "learning_rate": 5.460462625288617e-06, + "loss": 25.3458, + "step": 261790 + }, + { + "epoch": 0.5288525636622939, + "grad_norm": 1006.0845336914062, + "learning_rate": 5.460115041687737e-06, + "loss": 22.4337, + "step": 261800 + }, + { + "epoch": 0.5288727642949778, + "grad_norm": 6.621699333190918, + "learning_rate": 5.4597674558443114e-06, + "loss": 27.015, + "step": 261810 + }, + { + "epoch": 0.5288929649276616, + "grad_norm": 489.2602844238281, + "learning_rate": 5.459419867760034e-06, + "loss": 21.4121, + "step": 261820 + }, + { + "epoch": 0.5289131655603454, + "grad_norm": 0.0008042035042308271, + "learning_rate": 5.4590722774366015e-06, + "loss": 21.7596, + "step": 261830 + }, + { + "epoch": 0.5289333661930292, + "grad_norm": 517.4910888671875, + "learning_rate": 5.458724684875707e-06, + "loss": 23.4646, + "step": 261840 + }, + { + "epoch": 0.5289535668257129, + "grad_norm": 464.22216796875, + "learning_rate": 5.4583770900790415e-06, + "loss": 20.7891, + "step": 261850 + }, + { + "epoch": 0.5289737674583967, + "grad_norm": 373.5762023925781, + "learning_rate": 5.458029493048303e-06, + "loss": 12.5113, + "step": 261860 + }, + { + "epoch": 0.5289939680910806, + "grad_norm": 287.6373596191406, + "learning_rate": 5.457681893785185e-06, + "loss": 15.555, + "step": 261870 + }, + { + "epoch": 0.5290141687237644, + "grad_norm": 267.95806884765625, + "learning_rate": 5.45733429229138e-06, + "loss": 12.9552, + "step": 261880 + }, + { + "epoch": 0.5290343693564482, + "grad_norm": 231.57301330566406, + "learning_rate": 5.456986688568584e-06, + "loss": 23.6357, + "step": 261890 + }, + { + "epoch": 0.529054569989132, + "grad_norm": 631.8321533203125, + "learning_rate": 5.456639082618489e-06, + "loss": 15.179, + "step": 261900 + }, + { + "epoch": 0.5290747706218158, + "grad_norm": 250.34884643554688, + "learning_rate": 5.456291474442792e-06, + "loss": 27.8501, + "step": 261910 + }, + { + "epoch": 0.5290949712544997, + "grad_norm": 242.3703155517578, + "learning_rate": 5.455943864043185e-06, + "loss": 14.8319, + "step": 261920 + }, + { + "epoch": 0.5291151718871835, + "grad_norm": 227.57235717773438, + "learning_rate": 5.4555962514213624e-06, + "loss": 33.865, + "step": 261930 + }, + { + "epoch": 0.5291353725198673, + "grad_norm": 166.92816162109375, + "learning_rate": 5.4552486365790196e-06, + "loss": 13.2772, + "step": 261940 + }, + { + "epoch": 0.5291555731525511, + "grad_norm": 155.92852783203125, + "learning_rate": 5.454901019517851e-06, + "loss": 14.4723, + "step": 261950 + }, + { + "epoch": 0.529175773785235, + "grad_norm": 267.2330017089844, + "learning_rate": 5.454553400239548e-06, + "loss": 9.4315, + "step": 261960 + }, + { + "epoch": 0.5291959744179188, + "grad_norm": 326.3690490722656, + "learning_rate": 5.454205778745808e-06, + "loss": 11.2432, + "step": 261970 + }, + { + "epoch": 0.5292161750506026, + "grad_norm": 183.72789001464844, + "learning_rate": 5.453858155038324e-06, + "loss": 15.6313, + "step": 261980 + }, + { + "epoch": 0.5292363756832864, + "grad_norm": 476.95166015625, + "learning_rate": 5.45351052911879e-06, + "loss": 23.1629, + "step": 261990 + }, + { + "epoch": 0.5292565763159702, + "grad_norm": 203.19126892089844, + "learning_rate": 5.453162900988902e-06, + "loss": 23.9593, + "step": 262000 + }, + { + "epoch": 0.529276776948654, + "grad_norm": 998.18310546875, + "learning_rate": 5.452815270650351e-06, + "loss": 27.4498, + "step": 262010 + }, + { + "epoch": 0.5292969775813379, + "grad_norm": 193.37799072265625, + "learning_rate": 5.452467638104834e-06, + "loss": 21.1561, + "step": 262020 + }, + { + "epoch": 0.5293171782140217, + "grad_norm": 324.4336853027344, + "learning_rate": 5.452120003354046e-06, + "loss": 23.6304, + "step": 262030 + }, + { + "epoch": 0.5293373788467055, + "grad_norm": 529.0345458984375, + "learning_rate": 5.451772366399678e-06, + "loss": 31.1582, + "step": 262040 + }, + { + "epoch": 0.5293575794793893, + "grad_norm": 330.73876953125, + "learning_rate": 5.451424727243428e-06, + "loss": 19.3234, + "step": 262050 + }, + { + "epoch": 0.5293777801120731, + "grad_norm": 322.2069091796875, + "learning_rate": 5.451077085886987e-06, + "loss": 28.1488, + "step": 262060 + }, + { + "epoch": 0.529397980744757, + "grad_norm": 409.0740051269531, + "learning_rate": 5.450729442332052e-06, + "loss": 24.9972, + "step": 262070 + }, + { + "epoch": 0.5294181813774408, + "grad_norm": 347.320068359375, + "learning_rate": 5.450381796580317e-06, + "loss": 17.1953, + "step": 262080 + }, + { + "epoch": 0.5294383820101246, + "grad_norm": 373.0260925292969, + "learning_rate": 5.450034148633474e-06, + "loss": 19.227, + "step": 262090 + }, + { + "epoch": 0.5294585826428083, + "grad_norm": 339.3687438964844, + "learning_rate": 5.449686498493219e-06, + "loss": 17.1814, + "step": 262100 + }, + { + "epoch": 0.5294787832754921, + "grad_norm": 316.0090637207031, + "learning_rate": 5.449338846161248e-06, + "loss": 14.4609, + "step": 262110 + }, + { + "epoch": 0.529498983908176, + "grad_norm": 207.0604705810547, + "learning_rate": 5.448991191639254e-06, + "loss": 13.3553, + "step": 262120 + }, + { + "epoch": 0.5295191845408598, + "grad_norm": 262.06134033203125, + "learning_rate": 5.448643534928931e-06, + "loss": 27.3413, + "step": 262130 + }, + { + "epoch": 0.5295393851735436, + "grad_norm": 409.28448486328125, + "learning_rate": 5.448295876031974e-06, + "loss": 26.6529, + "step": 262140 + }, + { + "epoch": 0.5295595858062274, + "grad_norm": 401.1116638183594, + "learning_rate": 5.447948214950078e-06, + "loss": 12.4856, + "step": 262150 + }, + { + "epoch": 0.5295797864389112, + "grad_norm": 510.2715759277344, + "learning_rate": 5.447600551684936e-06, + "loss": 21.6855, + "step": 262160 + }, + { + "epoch": 0.5295999870715951, + "grad_norm": 93.42176818847656, + "learning_rate": 5.4472528862382415e-06, + "loss": 28.3929, + "step": 262170 + }, + { + "epoch": 0.5296201877042789, + "grad_norm": 619.77001953125, + "learning_rate": 5.446905218611694e-06, + "loss": 19.0133, + "step": 262180 + }, + { + "epoch": 0.5296403883369627, + "grad_norm": 194.4291534423828, + "learning_rate": 5.4465575488069795e-06, + "loss": 22.0499, + "step": 262190 + }, + { + "epoch": 0.5296605889696465, + "grad_norm": 246.71034240722656, + "learning_rate": 5.446209876825803e-06, + "loss": 10.5823, + "step": 262200 + }, + { + "epoch": 0.5296807896023303, + "grad_norm": 1150.928955078125, + "learning_rate": 5.445862202669851e-06, + "loss": 25.7024, + "step": 262210 + }, + { + "epoch": 0.5297009902350142, + "grad_norm": 501.6942443847656, + "learning_rate": 5.445514526340822e-06, + "loss": 14.3524, + "step": 262220 + }, + { + "epoch": 0.529721190867698, + "grad_norm": 231.6753387451172, + "learning_rate": 5.445166847840409e-06, + "loss": 32.6382, + "step": 262230 + }, + { + "epoch": 0.5297413915003818, + "grad_norm": 387.2635192871094, + "learning_rate": 5.444819167170306e-06, + "loss": 33.5837, + "step": 262240 + }, + { + "epoch": 0.5297615921330656, + "grad_norm": 553.984619140625, + "learning_rate": 5.4444714843322085e-06, + "loss": 20.1767, + "step": 262250 + }, + { + "epoch": 0.5297817927657494, + "grad_norm": 254.49600219726562, + "learning_rate": 5.444123799327811e-06, + "loss": 20.5725, + "step": 262260 + }, + { + "epoch": 0.5298019933984333, + "grad_norm": 629.6550903320312, + "learning_rate": 5.443776112158808e-06, + "loss": 22.7081, + "step": 262270 + }, + { + "epoch": 0.5298221940311171, + "grad_norm": 13.064043998718262, + "learning_rate": 5.443428422826893e-06, + "loss": 27.7504, + "step": 262280 + }, + { + "epoch": 0.5298423946638009, + "grad_norm": 439.01837158203125, + "learning_rate": 5.443080731333764e-06, + "loss": 24.9362, + "step": 262290 + }, + { + "epoch": 0.5298625952964847, + "grad_norm": 493.9464111328125, + "learning_rate": 5.442733037681112e-06, + "loss": 14.7247, + "step": 262300 + }, + { + "epoch": 0.5298827959291685, + "grad_norm": 275.8974914550781, + "learning_rate": 5.442385341870633e-06, + "loss": 23.168, + "step": 262310 + }, + { + "epoch": 0.5299029965618524, + "grad_norm": 209.71031188964844, + "learning_rate": 5.442037643904022e-06, + "loss": 8.5229, + "step": 262320 + }, + { + "epoch": 0.5299231971945362, + "grad_norm": 402.67315673828125, + "learning_rate": 5.4416899437829705e-06, + "loss": 19.1335, + "step": 262330 + }, + { + "epoch": 0.52994339782722, + "grad_norm": 658.8362426757812, + "learning_rate": 5.441342241509179e-06, + "loss": 34.262, + "step": 262340 + }, + { + "epoch": 0.5299635984599038, + "grad_norm": 238.89524841308594, + "learning_rate": 5.440994537084337e-06, + "loss": 17.8649, + "step": 262350 + }, + { + "epoch": 0.5299837990925875, + "grad_norm": 655.3569946289062, + "learning_rate": 5.440646830510142e-06, + "loss": 19.8445, + "step": 262360 + }, + { + "epoch": 0.5300039997252713, + "grad_norm": 324.5028991699219, + "learning_rate": 5.440299121788289e-06, + "loss": 12.9395, + "step": 262370 + }, + { + "epoch": 0.5300242003579552, + "grad_norm": 69.92562866210938, + "learning_rate": 5.439951410920469e-06, + "loss": 18.4332, + "step": 262380 + }, + { + "epoch": 0.530044400990639, + "grad_norm": 413.9827575683594, + "learning_rate": 5.439603697908381e-06, + "loss": 38.8664, + "step": 262390 + }, + { + "epoch": 0.5300646016233228, + "grad_norm": 273.46185302734375, + "learning_rate": 5.439255982753717e-06, + "loss": 11.9836, + "step": 262400 + }, + { + "epoch": 0.5300848022560066, + "grad_norm": 353.564208984375, + "learning_rate": 5.438908265458172e-06, + "loss": 25.7608, + "step": 262410 + }, + { + "epoch": 0.5301050028886904, + "grad_norm": 431.2625427246094, + "learning_rate": 5.438560546023442e-06, + "loss": 17.5037, + "step": 262420 + }, + { + "epoch": 0.5301252035213743, + "grad_norm": 985.5978393554688, + "learning_rate": 5.438212824451221e-06, + "loss": 26.1163, + "step": 262430 + }, + { + "epoch": 0.5301454041540581, + "grad_norm": 507.8580017089844, + "learning_rate": 5.437865100743205e-06, + "loss": 31.0964, + "step": 262440 + }, + { + "epoch": 0.5301656047867419, + "grad_norm": 264.5030517578125, + "learning_rate": 5.437517374901087e-06, + "loss": 28.6044, + "step": 262450 + }, + { + "epoch": 0.5301858054194257, + "grad_norm": 303.4159851074219, + "learning_rate": 5.437169646926561e-06, + "loss": 21.8809, + "step": 262460 + }, + { + "epoch": 0.5302060060521095, + "grad_norm": 243.92320251464844, + "learning_rate": 5.436821916821325e-06, + "loss": 18.0218, + "step": 262470 + }, + { + "epoch": 0.5302262066847934, + "grad_norm": 497.0818176269531, + "learning_rate": 5.436474184587071e-06, + "loss": 24.2977, + "step": 262480 + }, + { + "epoch": 0.5302464073174772, + "grad_norm": 230.01681518554688, + "learning_rate": 5.436126450225495e-06, + "loss": 13.5655, + "step": 262490 + }, + { + "epoch": 0.530266607950161, + "grad_norm": 855.636962890625, + "learning_rate": 5.435778713738292e-06, + "loss": 18.769, + "step": 262500 + }, + { + "epoch": 0.5302868085828448, + "grad_norm": 65.8647689819336, + "learning_rate": 5.435430975127155e-06, + "loss": 11.7633, + "step": 262510 + }, + { + "epoch": 0.5303070092155286, + "grad_norm": 525.977294921875, + "learning_rate": 5.435083234393782e-06, + "loss": 10.8237, + "step": 262520 + }, + { + "epoch": 0.5303272098482125, + "grad_norm": 0.0, + "learning_rate": 5.434735491539866e-06, + "loss": 19.7727, + "step": 262530 + }, + { + "epoch": 0.5303474104808963, + "grad_norm": 242.5271453857422, + "learning_rate": 5.4343877465671e-06, + "loss": 19.3758, + "step": 262540 + }, + { + "epoch": 0.5303676111135801, + "grad_norm": 199.46829223632812, + "learning_rate": 5.434039999477182e-06, + "loss": 22.1765, + "step": 262550 + }, + { + "epoch": 0.5303878117462639, + "grad_norm": 486.7574462890625, + "learning_rate": 5.433692250271806e-06, + "loss": 25.3172, + "step": 262560 + }, + { + "epoch": 0.5304080123789477, + "grad_norm": 491.2746887207031, + "learning_rate": 5.433344498952666e-06, + "loss": 17.1765, + "step": 262570 + }, + { + "epoch": 0.5304282130116316, + "grad_norm": 347.827392578125, + "learning_rate": 5.432996745521458e-06, + "loss": 13.0126, + "step": 262580 + }, + { + "epoch": 0.5304484136443154, + "grad_norm": 342.2633972167969, + "learning_rate": 5.4326489899798765e-06, + "loss": 26.3243, + "step": 262590 + }, + { + "epoch": 0.5304686142769992, + "grad_norm": 442.13446044921875, + "learning_rate": 5.432301232329615e-06, + "loss": 18.282, + "step": 262600 + }, + { + "epoch": 0.530488814909683, + "grad_norm": 99.82096099853516, + "learning_rate": 5.431953472572372e-06, + "loss": 16.625, + "step": 262610 + }, + { + "epoch": 0.5305090155423667, + "grad_norm": 167.40602111816406, + "learning_rate": 5.431605710709838e-06, + "loss": 23.094, + "step": 262620 + }, + { + "epoch": 0.5305292161750506, + "grad_norm": 457.5943603515625, + "learning_rate": 5.431257946743711e-06, + "loss": 15.3913, + "step": 262630 + }, + { + "epoch": 0.5305494168077344, + "grad_norm": 643.6044311523438, + "learning_rate": 5.430910180675685e-06, + "loss": 16.8985, + "step": 262640 + }, + { + "epoch": 0.5305696174404182, + "grad_norm": 727.259521484375, + "learning_rate": 5.430562412507454e-06, + "loss": 21.613, + "step": 262650 + }, + { + "epoch": 0.530589818073102, + "grad_norm": 164.78611755371094, + "learning_rate": 5.430214642240716e-06, + "loss": 17.1354, + "step": 262660 + }, + { + "epoch": 0.5306100187057858, + "grad_norm": 858.7236328125, + "learning_rate": 5.429866869877163e-06, + "loss": 18.4539, + "step": 262670 + }, + { + "epoch": 0.5306302193384697, + "grad_norm": 365.9755859375, + "learning_rate": 5.429519095418492e-06, + "loss": 12.9498, + "step": 262680 + }, + { + "epoch": 0.5306504199711535, + "grad_norm": 76.51131439208984, + "learning_rate": 5.429171318866395e-06, + "loss": 31.2644, + "step": 262690 + }, + { + "epoch": 0.5306706206038373, + "grad_norm": 505.0215148925781, + "learning_rate": 5.42882354022257e-06, + "loss": 16.3202, + "step": 262700 + }, + { + "epoch": 0.5306908212365211, + "grad_norm": 508.2402038574219, + "learning_rate": 5.428475759488711e-06, + "loss": 29.6478, + "step": 262710 + }, + { + "epoch": 0.5307110218692049, + "grad_norm": 233.74713134765625, + "learning_rate": 5.428127976666513e-06, + "loss": 12.7844, + "step": 262720 + }, + { + "epoch": 0.5307312225018888, + "grad_norm": 324.79144287109375, + "learning_rate": 5.4277801917576724e-06, + "loss": 22.4773, + "step": 262730 + }, + { + "epoch": 0.5307514231345726, + "grad_norm": 248.5419921875, + "learning_rate": 5.427432404763882e-06, + "loss": 17.6601, + "step": 262740 + }, + { + "epoch": 0.5307716237672564, + "grad_norm": 476.61553955078125, + "learning_rate": 5.4270846156868386e-06, + "loss": 25.1193, + "step": 262750 + }, + { + "epoch": 0.5307918243999402, + "grad_norm": 211.3850860595703, + "learning_rate": 5.426736824528236e-06, + "loss": 15.8085, + "step": 262760 + }, + { + "epoch": 0.530812025032624, + "grad_norm": 348.0032653808594, + "learning_rate": 5.426389031289771e-06, + "loss": 21.6775, + "step": 262770 + }, + { + "epoch": 0.5308322256653079, + "grad_norm": 217.60696411132812, + "learning_rate": 5.426041235973134e-06, + "loss": 10.3254, + "step": 262780 + }, + { + "epoch": 0.5308524262979917, + "grad_norm": 178.12786865234375, + "learning_rate": 5.4256934385800275e-06, + "loss": 20.2644, + "step": 262790 + }, + { + "epoch": 0.5308726269306755, + "grad_norm": 539.5770263671875, + "learning_rate": 5.425345639112141e-06, + "loss": 15.3722, + "step": 262800 + }, + { + "epoch": 0.5308928275633593, + "grad_norm": 329.5556335449219, + "learning_rate": 5.424997837571172e-06, + "loss": 19.8378, + "step": 262810 + }, + { + "epoch": 0.5309130281960431, + "grad_norm": 478.9924621582031, + "learning_rate": 5.4246500339588144e-06, + "loss": 16.7398, + "step": 262820 + }, + { + "epoch": 0.530933228828727, + "grad_norm": 320.1447448730469, + "learning_rate": 5.4243022282767645e-06, + "loss": 15.0234, + "step": 262830 + }, + { + "epoch": 0.5309534294614108, + "grad_norm": 396.3629150390625, + "learning_rate": 5.4239544205267185e-06, + "loss": 26.5332, + "step": 262840 + }, + { + "epoch": 0.5309736300940946, + "grad_norm": 552.6524047851562, + "learning_rate": 5.423606610710368e-06, + "loss": 24.3753, + "step": 262850 + }, + { + "epoch": 0.5309938307267784, + "grad_norm": 164.86257934570312, + "learning_rate": 5.4232587988294105e-06, + "loss": 21.6404, + "step": 262860 + }, + { + "epoch": 0.5310140313594621, + "grad_norm": 496.59881591796875, + "learning_rate": 5.422910984885542e-06, + "loss": 26.8352, + "step": 262870 + }, + { + "epoch": 0.5310342319921459, + "grad_norm": 390.3055114746094, + "learning_rate": 5.422563168880456e-06, + "loss": 21.1653, + "step": 262880 + }, + { + "epoch": 0.5310544326248298, + "grad_norm": 309.1153564453125, + "learning_rate": 5.422215350815848e-06, + "loss": 16.7842, + "step": 262890 + }, + { + "epoch": 0.5310746332575136, + "grad_norm": 361.0472106933594, + "learning_rate": 5.4218675306934145e-06, + "loss": 16.1422, + "step": 262900 + }, + { + "epoch": 0.5310948338901974, + "grad_norm": 293.56005859375, + "learning_rate": 5.42151970851485e-06, + "loss": 25.3251, + "step": 262910 + }, + { + "epoch": 0.5311150345228812, + "grad_norm": 444.24945068359375, + "learning_rate": 5.4211718842818485e-06, + "loss": 10.4506, + "step": 262920 + }, + { + "epoch": 0.531135235155565, + "grad_norm": 623.7088623046875, + "learning_rate": 5.420824057996107e-06, + "loss": 13.4804, + "step": 262930 + }, + { + "epoch": 0.5311554357882489, + "grad_norm": 291.3700866699219, + "learning_rate": 5.420476229659319e-06, + "loss": 19.6258, + "step": 262940 + }, + { + "epoch": 0.5311756364209327, + "grad_norm": 892.9168090820312, + "learning_rate": 5.420128399273183e-06, + "loss": 25.5505, + "step": 262950 + }, + { + "epoch": 0.5311958370536165, + "grad_norm": 189.8000946044922, + "learning_rate": 5.419780566839389e-06, + "loss": 16.807, + "step": 262960 + }, + { + "epoch": 0.5312160376863003, + "grad_norm": 361.9689025878906, + "learning_rate": 5.419432732359637e-06, + "loss": 11.4353, + "step": 262970 + }, + { + "epoch": 0.5312362383189841, + "grad_norm": 395.75323486328125, + "learning_rate": 5.419084895835621e-06, + "loss": 14.5804, + "step": 262980 + }, + { + "epoch": 0.531256438951668, + "grad_norm": 632.162841796875, + "learning_rate": 5.418737057269037e-06, + "loss": 27.1474, + "step": 262990 + }, + { + "epoch": 0.5312766395843518, + "grad_norm": 764.3357543945312, + "learning_rate": 5.41838921666158e-06, + "loss": 24.8534, + "step": 263000 + }, + { + "epoch": 0.5312968402170356, + "grad_norm": 414.3321838378906, + "learning_rate": 5.418041374014942e-06, + "loss": 23.2771, + "step": 263010 + }, + { + "epoch": 0.5313170408497194, + "grad_norm": 606.6253051757812, + "learning_rate": 5.417693529330822e-06, + "loss": 19.7729, + "step": 263020 + }, + { + "epoch": 0.5313372414824032, + "grad_norm": 281.4236755371094, + "learning_rate": 5.417345682610914e-06, + "loss": 19.4735, + "step": 263030 + }, + { + "epoch": 0.5313574421150871, + "grad_norm": 360.08984375, + "learning_rate": 5.416997833856914e-06, + "loss": 30.3656, + "step": 263040 + }, + { + "epoch": 0.5313776427477709, + "grad_norm": 422.86907958984375, + "learning_rate": 5.416649983070518e-06, + "loss": 13.8087, + "step": 263050 + }, + { + "epoch": 0.5313978433804547, + "grad_norm": 122.8550033569336, + "learning_rate": 5.4163021302534204e-06, + "loss": 29.5725, + "step": 263060 + }, + { + "epoch": 0.5314180440131385, + "grad_norm": 382.35406494140625, + "learning_rate": 5.415954275407316e-06, + "loss": 15.5037, + "step": 263070 + }, + { + "epoch": 0.5314382446458223, + "grad_norm": 160.0564727783203, + "learning_rate": 5.415606418533901e-06, + "loss": 21.0315, + "step": 263080 + }, + { + "epoch": 0.5314584452785062, + "grad_norm": 151.24624633789062, + "learning_rate": 5.4152585596348704e-06, + "loss": 25.3234, + "step": 263090 + }, + { + "epoch": 0.53147864591119, + "grad_norm": 480.6485595703125, + "learning_rate": 5.41491069871192e-06, + "loss": 10.9988, + "step": 263100 + }, + { + "epoch": 0.5314988465438738, + "grad_norm": 193.90916442871094, + "learning_rate": 5.414562835766747e-06, + "loss": 30.0569, + "step": 263110 + }, + { + "epoch": 0.5315190471765576, + "grad_norm": 253.57791137695312, + "learning_rate": 5.414214970801041e-06, + "loss": 24.2024, + "step": 263120 + }, + { + "epoch": 0.5315392478092413, + "grad_norm": 846.8713989257812, + "learning_rate": 5.413867103816506e-06, + "loss": 21.1907, + "step": 263130 + }, + { + "epoch": 0.5315594484419252, + "grad_norm": 164.007568359375, + "learning_rate": 5.413519234814831e-06, + "loss": 26.9846, + "step": 263140 + }, + { + "epoch": 0.531579649074609, + "grad_norm": 461.7021179199219, + "learning_rate": 5.413171363797713e-06, + "loss": 12.8953, + "step": 263150 + }, + { + "epoch": 0.5315998497072928, + "grad_norm": 934.0023193359375, + "learning_rate": 5.412823490766849e-06, + "loss": 25.0997, + "step": 263160 + }, + { + "epoch": 0.5316200503399766, + "grad_norm": 208.72476196289062, + "learning_rate": 5.412475615723931e-06, + "loss": 17.7574, + "step": 263170 + }, + { + "epoch": 0.5316402509726604, + "grad_norm": 161.92298889160156, + "learning_rate": 5.41212773867066e-06, + "loss": 30.3754, + "step": 263180 + }, + { + "epoch": 0.5316604516053443, + "grad_norm": 382.603759765625, + "learning_rate": 5.4117798596087265e-06, + "loss": 29.2349, + "step": 263190 + }, + { + "epoch": 0.5316806522380281, + "grad_norm": 222.22035217285156, + "learning_rate": 5.411431978539829e-06, + "loss": 23.2413, + "step": 263200 + }, + { + "epoch": 0.5317008528707119, + "grad_norm": 378.6025695800781, + "learning_rate": 5.411084095465661e-06, + "loss": 25.6467, + "step": 263210 + }, + { + "epoch": 0.5317210535033957, + "grad_norm": 756.3626098632812, + "learning_rate": 5.41073621038792e-06, + "loss": 21.5958, + "step": 263220 + }, + { + "epoch": 0.5317412541360795, + "grad_norm": 386.68011474609375, + "learning_rate": 5.410388323308299e-06, + "loss": 22.2969, + "step": 263230 + }, + { + "epoch": 0.5317614547687634, + "grad_norm": 219.22743225097656, + "learning_rate": 5.410040434228496e-06, + "loss": 10.8826, + "step": 263240 + }, + { + "epoch": 0.5317816554014472, + "grad_norm": 13.215092658996582, + "learning_rate": 5.409692543150206e-06, + "loss": 16.3198, + "step": 263250 + }, + { + "epoch": 0.531801856034131, + "grad_norm": 567.9088134765625, + "learning_rate": 5.409344650075123e-06, + "loss": 25.1716, + "step": 263260 + }, + { + "epoch": 0.5318220566668148, + "grad_norm": 360.5703125, + "learning_rate": 5.4089967550049445e-06, + "loss": 12.8228, + "step": 263270 + }, + { + "epoch": 0.5318422572994986, + "grad_norm": 446.908935546875, + "learning_rate": 5.408648857941365e-06, + "loss": 17.5523, + "step": 263280 + }, + { + "epoch": 0.5318624579321825, + "grad_norm": 444.71173095703125, + "learning_rate": 5.408300958886083e-06, + "loss": 21.1386, + "step": 263290 + }, + { + "epoch": 0.5318826585648663, + "grad_norm": 288.6525573730469, + "learning_rate": 5.4079530578407895e-06, + "loss": 13.9982, + "step": 263300 + }, + { + "epoch": 0.5319028591975501, + "grad_norm": 433.6891174316406, + "learning_rate": 5.407605154807182e-06, + "loss": 11.7726, + "step": 263310 + }, + { + "epoch": 0.5319230598302339, + "grad_norm": 534.1845092773438, + "learning_rate": 5.4072572497869556e-06, + "loss": 15.9662, + "step": 263320 + }, + { + "epoch": 0.5319432604629177, + "grad_norm": 350.7384948730469, + "learning_rate": 5.406909342781809e-06, + "loss": 25.5752, + "step": 263330 + }, + { + "epoch": 0.5319634610956016, + "grad_norm": 232.4453125, + "learning_rate": 5.406561433793435e-06, + "loss": 28.6806, + "step": 263340 + }, + { + "epoch": 0.5319836617282854, + "grad_norm": 550.2820434570312, + "learning_rate": 5.406213522823529e-06, + "loss": 15.2013, + "step": 263350 + }, + { + "epoch": 0.5320038623609692, + "grad_norm": 417.4783630371094, + "learning_rate": 5.4058656098737885e-06, + "loss": 34.7693, + "step": 263360 + }, + { + "epoch": 0.532024062993653, + "grad_norm": 53.68265914916992, + "learning_rate": 5.405517694945907e-06, + "loss": 26.0981, + "step": 263370 + }, + { + "epoch": 0.5320442636263367, + "grad_norm": 616.639404296875, + "learning_rate": 5.405169778041583e-06, + "loss": 11.6597, + "step": 263380 + }, + { + "epoch": 0.5320644642590205, + "grad_norm": 471.22039794921875, + "learning_rate": 5.404821859162509e-06, + "loss": 19.9855, + "step": 263390 + }, + { + "epoch": 0.5320846648917044, + "grad_norm": 598.6522827148438, + "learning_rate": 5.404473938310384e-06, + "loss": 18.3524, + "step": 263400 + }, + { + "epoch": 0.5321048655243882, + "grad_norm": 20.47283363342285, + "learning_rate": 5.404126015486901e-06, + "loss": 20.8549, + "step": 263410 + }, + { + "epoch": 0.532125066157072, + "grad_norm": 283.7949523925781, + "learning_rate": 5.403778090693758e-06, + "loss": 21.7683, + "step": 263420 + }, + { + "epoch": 0.5321452667897558, + "grad_norm": 192.6545867919922, + "learning_rate": 5.403430163932648e-06, + "loss": 32.1624, + "step": 263430 + }, + { + "epoch": 0.5321654674224396, + "grad_norm": 189.44528198242188, + "learning_rate": 5.403082235205269e-06, + "loss": 14.1211, + "step": 263440 + }, + { + "epoch": 0.5321856680551235, + "grad_norm": 684.8155517578125, + "learning_rate": 5.402734304513316e-06, + "loss": 22.6836, + "step": 263450 + }, + { + "epoch": 0.5322058686878073, + "grad_norm": 173.18606567382812, + "learning_rate": 5.402386371858486e-06, + "loss": 22.5028, + "step": 263460 + }, + { + "epoch": 0.5322260693204911, + "grad_norm": 170.42735290527344, + "learning_rate": 5.402038437242471e-06, + "loss": 21.4767, + "step": 263470 + }, + { + "epoch": 0.5322462699531749, + "grad_norm": 345.851318359375, + "learning_rate": 5.401690500666972e-06, + "loss": 8.1845, + "step": 263480 + }, + { + "epoch": 0.5322664705858587, + "grad_norm": 36.30461502075195, + "learning_rate": 5.401342562133682e-06, + "loss": 17.9214, + "step": 263490 + }, + { + "epoch": 0.5322866712185426, + "grad_norm": 216.7414093017578, + "learning_rate": 5.400994621644294e-06, + "loss": 9.2869, + "step": 263500 + }, + { + "epoch": 0.5323068718512264, + "grad_norm": 907.1314086914062, + "learning_rate": 5.4006466792005105e-06, + "loss": 21.1397, + "step": 263510 + }, + { + "epoch": 0.5323270724839102, + "grad_norm": 406.8268737792969, + "learning_rate": 5.400298734804023e-06, + "loss": 20.9543, + "step": 263520 + }, + { + "epoch": 0.532347273116594, + "grad_norm": 215.2248077392578, + "learning_rate": 5.399950788456526e-06, + "loss": 28.9349, + "step": 263530 + }, + { + "epoch": 0.5323674737492778, + "grad_norm": 479.4017639160156, + "learning_rate": 5.39960284015972e-06, + "loss": 21.6751, + "step": 263540 + }, + { + "epoch": 0.5323876743819617, + "grad_norm": 192.6239776611328, + "learning_rate": 5.399254889915296e-06, + "loss": 24.9698, + "step": 263550 + }, + { + "epoch": 0.5324078750146455, + "grad_norm": 377.7315368652344, + "learning_rate": 5.398906937724954e-06, + "loss": 15.3041, + "step": 263560 + }, + { + "epoch": 0.5324280756473293, + "grad_norm": 252.1598358154297, + "learning_rate": 5.398558983590385e-06, + "loss": 21.2734, + "step": 263570 + }, + { + "epoch": 0.5324482762800131, + "grad_norm": 87.29096984863281, + "learning_rate": 5.398211027513291e-06, + "loss": 27.8237, + "step": 263580 + }, + { + "epoch": 0.5324684769126969, + "grad_norm": 158.6113739013672, + "learning_rate": 5.397863069495364e-06, + "loss": 19.541, + "step": 263590 + }, + { + "epoch": 0.5324886775453808, + "grad_norm": 502.4717712402344, + "learning_rate": 5.3975151095383e-06, + "loss": 35.3605, + "step": 263600 + }, + { + "epoch": 0.5325088781780646, + "grad_norm": 255.55706787109375, + "learning_rate": 5.397167147643796e-06, + "loss": 14.7615, + "step": 263610 + }, + { + "epoch": 0.5325290788107484, + "grad_norm": 555.1712036132812, + "learning_rate": 5.396819183813547e-06, + "loss": 21.8285, + "step": 263620 + }, + { + "epoch": 0.5325492794434322, + "grad_norm": 501.9200744628906, + "learning_rate": 5.396471218049249e-06, + "loss": 9.4004, + "step": 263630 + }, + { + "epoch": 0.5325694800761159, + "grad_norm": 322.0678405761719, + "learning_rate": 5.3961232503526e-06, + "loss": 16.2422, + "step": 263640 + }, + { + "epoch": 0.5325896807087998, + "grad_norm": 29.05496597290039, + "learning_rate": 5.3957752807252925e-06, + "loss": 12.9038, + "step": 263650 + }, + { + "epoch": 0.5326098813414836, + "grad_norm": 376.70440673828125, + "learning_rate": 5.3954273091690245e-06, + "loss": 18.4567, + "step": 263660 + }, + { + "epoch": 0.5326300819741674, + "grad_norm": 471.65032958984375, + "learning_rate": 5.395079335685494e-06, + "loss": 18.5832, + "step": 263670 + }, + { + "epoch": 0.5326502826068512, + "grad_norm": 666.2213745117188, + "learning_rate": 5.394731360276393e-06, + "loss": 23.8404, + "step": 263680 + }, + { + "epoch": 0.532670483239535, + "grad_norm": 320.22027587890625, + "learning_rate": 5.394383382943419e-06, + "loss": 12.4747, + "step": 263690 + }, + { + "epoch": 0.5326906838722189, + "grad_norm": 524.0413208007812, + "learning_rate": 5.394035403688268e-06, + "loss": 19.0076, + "step": 263700 + }, + { + "epoch": 0.5327108845049027, + "grad_norm": 131.5553436279297, + "learning_rate": 5.393687422512637e-06, + "loss": 16.9744, + "step": 263710 + }, + { + "epoch": 0.5327310851375865, + "grad_norm": 190.25286865234375, + "learning_rate": 5.393339439418222e-06, + "loss": 25.3414, + "step": 263720 + }, + { + "epoch": 0.5327512857702703, + "grad_norm": 732.912353515625, + "learning_rate": 5.392991454406716e-06, + "loss": 16.244, + "step": 263730 + }, + { + "epoch": 0.5327714864029541, + "grad_norm": 690.7264404296875, + "learning_rate": 5.39264346747982e-06, + "loss": 20.082, + "step": 263740 + }, + { + "epoch": 0.532791687035638, + "grad_norm": 474.1722412109375, + "learning_rate": 5.392295478639226e-06, + "loss": 28.8244, + "step": 263750 + }, + { + "epoch": 0.5328118876683218, + "grad_norm": 269.3618469238281, + "learning_rate": 5.391947487886631e-06, + "loss": 15.4086, + "step": 263760 + }, + { + "epoch": 0.5328320883010056, + "grad_norm": 628.8046875, + "learning_rate": 5.391599495223732e-06, + "loss": 19.8448, + "step": 263770 + }, + { + "epoch": 0.5328522889336894, + "grad_norm": 449.0356750488281, + "learning_rate": 5.391251500652224e-06, + "loss": 16.8209, + "step": 263780 + }, + { + "epoch": 0.5328724895663732, + "grad_norm": 577.2578735351562, + "learning_rate": 5.390903504173805e-06, + "loss": 10.4767, + "step": 263790 + }, + { + "epoch": 0.532892690199057, + "grad_norm": 622.21875, + "learning_rate": 5.390555505790168e-06, + "loss": 20.1476, + "step": 263800 + }, + { + "epoch": 0.5329128908317409, + "grad_norm": 592.1292114257812, + "learning_rate": 5.390207505503012e-06, + "loss": 14.5329, + "step": 263810 + }, + { + "epoch": 0.5329330914644247, + "grad_norm": 421.0754699707031, + "learning_rate": 5.389859503314031e-06, + "loss": 10.4641, + "step": 263820 + }, + { + "epoch": 0.5329532920971085, + "grad_norm": 457.61810302734375, + "learning_rate": 5.389511499224925e-06, + "loss": 26.411, + "step": 263830 + }, + { + "epoch": 0.5329734927297923, + "grad_norm": 511.234619140625, + "learning_rate": 5.389163493237382e-06, + "loss": 14.4797, + "step": 263840 + }, + { + "epoch": 0.5329936933624762, + "grad_norm": 1096.11767578125, + "learning_rate": 5.388815485353109e-06, + "loss": 38.672, + "step": 263850 + }, + { + "epoch": 0.53301389399516, + "grad_norm": 571.6307983398438, + "learning_rate": 5.388467475573792e-06, + "loss": 25.8154, + "step": 263860 + }, + { + "epoch": 0.5330340946278438, + "grad_norm": 466.66357421875, + "learning_rate": 5.388119463901134e-06, + "loss": 17.001, + "step": 263870 + }, + { + "epoch": 0.5330542952605276, + "grad_norm": 517.427978515625, + "learning_rate": 5.3877714503368285e-06, + "loss": 11.6967, + "step": 263880 + }, + { + "epoch": 0.5330744958932114, + "grad_norm": 994.4615478515625, + "learning_rate": 5.387423434882571e-06, + "loss": 30.1973, + "step": 263890 + }, + { + "epoch": 0.5330946965258951, + "grad_norm": 829.72314453125, + "learning_rate": 5.3870754175400595e-06, + "loss": 28.2975, + "step": 263900 + }, + { + "epoch": 0.533114897158579, + "grad_norm": 346.5272521972656, + "learning_rate": 5.386727398310989e-06, + "loss": 26.1818, + "step": 263910 + }, + { + "epoch": 0.5331350977912628, + "grad_norm": 585.033203125, + "learning_rate": 5.386379377197056e-06, + "loss": 18.1059, + "step": 263920 + }, + { + "epoch": 0.5331552984239466, + "grad_norm": 358.1483154296875, + "learning_rate": 5.386031354199956e-06, + "loss": 19.7494, + "step": 263930 + }, + { + "epoch": 0.5331754990566304, + "grad_norm": 246.29762268066406, + "learning_rate": 5.385683329321387e-06, + "loss": 23.5754, + "step": 263940 + }, + { + "epoch": 0.5331956996893142, + "grad_norm": 184.22647094726562, + "learning_rate": 5.385335302563046e-06, + "loss": 20.0688, + "step": 263950 + }, + { + "epoch": 0.5332159003219981, + "grad_norm": 810.5045776367188, + "learning_rate": 5.384987273926625e-06, + "loss": 13.1073, + "step": 263960 + }, + { + "epoch": 0.5332361009546819, + "grad_norm": 677.8699951171875, + "learning_rate": 5.384639243413824e-06, + "loss": 30.2363, + "step": 263970 + }, + { + "epoch": 0.5332563015873657, + "grad_norm": 516.2007446289062, + "learning_rate": 5.384291211026337e-06, + "loss": 17.3284, + "step": 263980 + }, + { + "epoch": 0.5332765022200495, + "grad_norm": 1023.1954345703125, + "learning_rate": 5.383943176765862e-06, + "loss": 18.264, + "step": 263990 + }, + { + "epoch": 0.5332967028527333, + "grad_norm": 2461.611083984375, + "learning_rate": 5.383595140634093e-06, + "loss": 28.0451, + "step": 264000 + }, + { + "epoch": 0.5333169034854172, + "grad_norm": 215.49591064453125, + "learning_rate": 5.383247102632731e-06, + "loss": 16.7719, + "step": 264010 + }, + { + "epoch": 0.533337104118101, + "grad_norm": 626.0396118164062, + "learning_rate": 5.3828990627634655e-06, + "loss": 27.2502, + "step": 264020 + }, + { + "epoch": 0.5333573047507848, + "grad_norm": 960.50927734375, + "learning_rate": 5.382551021027999e-06, + "loss": 17.597, + "step": 264030 + }, + { + "epoch": 0.5333775053834686, + "grad_norm": 663.5912475585938, + "learning_rate": 5.382202977428025e-06, + "loss": 22.6198, + "step": 264040 + }, + { + "epoch": 0.5333977060161524, + "grad_norm": 67.04728698730469, + "learning_rate": 5.381854931965238e-06, + "loss": 18.9294, + "step": 264050 + }, + { + "epoch": 0.5334179066488363, + "grad_norm": 393.4915771484375, + "learning_rate": 5.381506884641339e-06, + "loss": 13.7701, + "step": 264060 + }, + { + "epoch": 0.5334381072815201, + "grad_norm": 151.0877227783203, + "learning_rate": 5.381158835458019e-06, + "loss": 18.0856, + "step": 264070 + }, + { + "epoch": 0.5334583079142039, + "grad_norm": 547.76318359375, + "learning_rate": 5.380810784416979e-06, + "loss": 16.9888, + "step": 264080 + }, + { + "epoch": 0.5334785085468877, + "grad_norm": 132.0612335205078, + "learning_rate": 5.380462731519912e-06, + "loss": 10.7681, + "step": 264090 + }, + { + "epoch": 0.5334987091795715, + "grad_norm": 51.655364990234375, + "learning_rate": 5.380114676768516e-06, + "loss": 14.4731, + "step": 264100 + }, + { + "epoch": 0.5335189098122554, + "grad_norm": 601.3800048828125, + "learning_rate": 5.379766620164488e-06, + "loss": 15.6159, + "step": 264110 + }, + { + "epoch": 0.5335391104449392, + "grad_norm": 146.5826416015625, + "learning_rate": 5.379418561709524e-06, + "loss": 22.4184, + "step": 264120 + }, + { + "epoch": 0.533559311077623, + "grad_norm": 796.7298583984375, + "learning_rate": 5.37907050140532e-06, + "loss": 17.6309, + "step": 264130 + }, + { + "epoch": 0.5335795117103068, + "grad_norm": 187.71275329589844, + "learning_rate": 5.378722439253571e-06, + "loss": 15.502, + "step": 264140 + }, + { + "epoch": 0.5335997123429905, + "grad_norm": 731.462646484375, + "learning_rate": 5.378374375255977e-06, + "loss": 25.3865, + "step": 264150 + }, + { + "epoch": 0.5336199129756743, + "grad_norm": 299.503662109375, + "learning_rate": 5.378026309414229e-06, + "loss": 22.9609, + "step": 264160 + }, + { + "epoch": 0.5336401136083582, + "grad_norm": 481.3091125488281, + "learning_rate": 5.377678241730029e-06, + "loss": 29.2946, + "step": 264170 + }, + { + "epoch": 0.533660314241042, + "grad_norm": 677.627685546875, + "learning_rate": 5.377330172205068e-06, + "loss": 22.5868, + "step": 264180 + }, + { + "epoch": 0.5336805148737258, + "grad_norm": 163.53543090820312, + "learning_rate": 5.37698210084105e-06, + "loss": 16.8451, + "step": 264190 + }, + { + "epoch": 0.5337007155064096, + "grad_norm": 795.53125, + "learning_rate": 5.376634027639664e-06, + "loss": 39.883, + "step": 264200 + }, + { + "epoch": 0.5337209161390934, + "grad_norm": 380.3072204589844, + "learning_rate": 5.37628595260261e-06, + "loss": 25.8065, + "step": 264210 + }, + { + "epoch": 0.5337411167717773, + "grad_norm": 0.0, + "learning_rate": 5.375937875731585e-06, + "loss": 11.8602, + "step": 264220 + }, + { + "epoch": 0.5337613174044611, + "grad_norm": 466.2163391113281, + "learning_rate": 5.375589797028282e-06, + "loss": 6.8212, + "step": 264230 + }, + { + "epoch": 0.5337815180371449, + "grad_norm": 132.32325744628906, + "learning_rate": 5.375241716494403e-06, + "loss": 22.9862, + "step": 264240 + }, + { + "epoch": 0.5338017186698287, + "grad_norm": 620.8463134765625, + "learning_rate": 5.3748936341316395e-06, + "loss": 14.3129, + "step": 264250 + }, + { + "epoch": 0.5338219193025125, + "grad_norm": 738.5233154296875, + "learning_rate": 5.37454554994169e-06, + "loss": 26.2885, + "step": 264260 + }, + { + "epoch": 0.5338421199351964, + "grad_norm": 753.781494140625, + "learning_rate": 5.374197463926251e-06, + "loss": 21.0322, + "step": 264270 + }, + { + "epoch": 0.5338623205678802, + "grad_norm": 271.4347839355469, + "learning_rate": 5.37384937608702e-06, + "loss": 14.2221, + "step": 264280 + }, + { + "epoch": 0.533882521200564, + "grad_norm": 267.5508117675781, + "learning_rate": 5.373501286425691e-06, + "loss": 17.9659, + "step": 264290 + }, + { + "epoch": 0.5339027218332478, + "grad_norm": 663.1692504882812, + "learning_rate": 5.373153194943962e-06, + "loss": 24.0293, + "step": 264300 + }, + { + "epoch": 0.5339229224659316, + "grad_norm": 359.5267333984375, + "learning_rate": 5.37280510164353e-06, + "loss": 32.0395, + "step": 264310 + }, + { + "epoch": 0.5339431230986155, + "grad_norm": 239.31358337402344, + "learning_rate": 5.37245700652609e-06, + "loss": 16.0137, + "step": 264320 + }, + { + "epoch": 0.5339633237312993, + "grad_norm": 512.493408203125, + "learning_rate": 5.372108909593343e-06, + "loss": 23.8659, + "step": 264330 + }, + { + "epoch": 0.5339835243639831, + "grad_norm": 398.17938232421875, + "learning_rate": 5.371760810846979e-06, + "loss": 11.4735, + "step": 264340 + }, + { + "epoch": 0.5340037249966669, + "grad_norm": 352.3731384277344, + "learning_rate": 5.371412710288701e-06, + "loss": 18.8082, + "step": 264350 + }, + { + "epoch": 0.5340239256293507, + "grad_norm": 93.62528228759766, + "learning_rate": 5.3710646079202e-06, + "loss": 14.97, + "step": 264360 + }, + { + "epoch": 0.5340441262620346, + "grad_norm": 362.8443603515625, + "learning_rate": 5.370716503743175e-06, + "loss": 35.2391, + "step": 264370 + }, + { + "epoch": 0.5340643268947184, + "grad_norm": 905.8558349609375, + "learning_rate": 5.370368397759324e-06, + "loss": 44.9503, + "step": 264380 + }, + { + "epoch": 0.5340845275274022, + "grad_norm": 132.63365173339844, + "learning_rate": 5.370020289970341e-06, + "loss": 32.7998, + "step": 264390 + }, + { + "epoch": 0.534104728160086, + "grad_norm": 348.46185302734375, + "learning_rate": 5.3696721803779265e-06, + "loss": 25.7999, + "step": 264400 + }, + { + "epoch": 0.5341249287927697, + "grad_norm": 138.4418487548828, + "learning_rate": 5.369324068983772e-06, + "loss": 14.2852, + "step": 264410 + }, + { + "epoch": 0.5341451294254536, + "grad_norm": 157.70175170898438, + "learning_rate": 5.368975955789577e-06, + "loss": 18.0189, + "step": 264420 + }, + { + "epoch": 0.5341653300581374, + "grad_norm": 368.96221923828125, + "learning_rate": 5.368627840797039e-06, + "loss": 17.9959, + "step": 264430 + }, + { + "epoch": 0.5341855306908212, + "grad_norm": 186.53025817871094, + "learning_rate": 5.368279724007854e-06, + "loss": 18.3524, + "step": 264440 + }, + { + "epoch": 0.534205731323505, + "grad_norm": 403.6632385253906, + "learning_rate": 5.3679316054237165e-06, + "loss": 42.2809, + "step": 264450 + }, + { + "epoch": 0.5342259319561888, + "grad_norm": 3.657899856567383, + "learning_rate": 5.367583485046327e-06, + "loss": 29.1559, + "step": 264460 + }, + { + "epoch": 0.5342461325888727, + "grad_norm": 186.83163452148438, + "learning_rate": 5.367235362877379e-06, + "loss": 9.2051, + "step": 264470 + }, + { + "epoch": 0.5342663332215565, + "grad_norm": 41.42416000366211, + "learning_rate": 5.366887238918571e-06, + "loss": 15.1823, + "step": 264480 + }, + { + "epoch": 0.5342865338542403, + "grad_norm": 75.92926788330078, + "learning_rate": 5.3665391131716e-06, + "loss": 18.3596, + "step": 264490 + }, + { + "epoch": 0.5343067344869241, + "grad_norm": 676.78662109375, + "learning_rate": 5.366190985638159e-06, + "loss": 47.9756, + "step": 264500 + }, + { + "epoch": 0.5343269351196079, + "grad_norm": 94.7362060546875, + "learning_rate": 5.36584285631995e-06, + "loss": 10.0836, + "step": 264510 + }, + { + "epoch": 0.5343471357522918, + "grad_norm": 174.0113525390625, + "learning_rate": 5.365494725218667e-06, + "loss": 16.7035, + "step": 264520 + }, + { + "epoch": 0.5343673363849756, + "grad_norm": 222.8553924560547, + "learning_rate": 5.3651465923360045e-06, + "loss": 34.4077, + "step": 264530 + }, + { + "epoch": 0.5343875370176594, + "grad_norm": 285.4593200683594, + "learning_rate": 5.3647984576736645e-06, + "loss": 17.6323, + "step": 264540 + }, + { + "epoch": 0.5344077376503432, + "grad_norm": 414.40814208984375, + "learning_rate": 5.3644503212333395e-06, + "loss": 14.8091, + "step": 264550 + }, + { + "epoch": 0.534427938283027, + "grad_norm": 125.51368713378906, + "learning_rate": 5.36410218301673e-06, + "loss": 23.6634, + "step": 264560 + }, + { + "epoch": 0.5344481389157109, + "grad_norm": 270.2915344238281, + "learning_rate": 5.363754043025528e-06, + "loss": 18.8809, + "step": 264570 + }, + { + "epoch": 0.5344683395483947, + "grad_norm": 235.06610107421875, + "learning_rate": 5.3634059012614345e-06, + "loss": 7.6422, + "step": 264580 + }, + { + "epoch": 0.5344885401810785, + "grad_norm": 2.9614596366882324, + "learning_rate": 5.363057757726145e-06, + "loss": 12.5189, + "step": 264590 + }, + { + "epoch": 0.5345087408137623, + "grad_norm": 199.17440795898438, + "learning_rate": 5.362709612421355e-06, + "loss": 22.2044, + "step": 264600 + }, + { + "epoch": 0.5345289414464461, + "grad_norm": 224.73255920410156, + "learning_rate": 5.362361465348762e-06, + "loss": 22.9273, + "step": 264610 + }, + { + "epoch": 0.53454914207913, + "grad_norm": 516.4451293945312, + "learning_rate": 5.3620133165100656e-06, + "loss": 25.1774, + "step": 264620 + }, + { + "epoch": 0.5345693427118138, + "grad_norm": 359.0810241699219, + "learning_rate": 5.3616651659069576e-06, + "loss": 27.5257, + "step": 264630 + }, + { + "epoch": 0.5345895433444976, + "grad_norm": 6.806414604187012, + "learning_rate": 5.3613170135411384e-06, + "loss": 13.3821, + "step": 264640 + }, + { + "epoch": 0.5346097439771814, + "grad_norm": 545.328125, + "learning_rate": 5.360968859414305e-06, + "loss": 30.1279, + "step": 264650 + }, + { + "epoch": 0.5346299446098651, + "grad_norm": 198.06240844726562, + "learning_rate": 5.36062070352815e-06, + "loss": 16.2758, + "step": 264660 + }, + { + "epoch": 0.534650145242549, + "grad_norm": 197.97659301757812, + "learning_rate": 5.360272545884376e-06, + "loss": 17.6192, + "step": 264670 + }, + { + "epoch": 0.5346703458752328, + "grad_norm": 284.88006591796875, + "learning_rate": 5.359924386484676e-06, + "loss": 32.0421, + "step": 264680 + }, + { + "epoch": 0.5346905465079166, + "grad_norm": 350.51611328125, + "learning_rate": 5.35957622533075e-06, + "loss": 26.1766, + "step": 264690 + }, + { + "epoch": 0.5347107471406004, + "grad_norm": 242.34136962890625, + "learning_rate": 5.359228062424292e-06, + "loss": 13.5947, + "step": 264700 + }, + { + "epoch": 0.5347309477732842, + "grad_norm": 189.86558532714844, + "learning_rate": 5.358879897767e-06, + "loss": 14.2171, + "step": 264710 + }, + { + "epoch": 0.534751148405968, + "grad_norm": 526.793701171875, + "learning_rate": 5.358531731360571e-06, + "loss": 10.9611, + "step": 264720 + }, + { + "epoch": 0.5347713490386519, + "grad_norm": 241.5596466064453, + "learning_rate": 5.358183563206703e-06, + "loss": 17.8949, + "step": 264730 + }, + { + "epoch": 0.5347915496713357, + "grad_norm": 396.1840515136719, + "learning_rate": 5.357835393307089e-06, + "loss": 14.8254, + "step": 264740 + }, + { + "epoch": 0.5348117503040195, + "grad_norm": 981.4369506835938, + "learning_rate": 5.35748722166343e-06, + "loss": 38.9434, + "step": 264750 + }, + { + "epoch": 0.5348319509367033, + "grad_norm": 362.9976806640625, + "learning_rate": 5.357139048277422e-06, + "loss": 13.3401, + "step": 264760 + }, + { + "epoch": 0.5348521515693871, + "grad_norm": 249.92623901367188, + "learning_rate": 5.356790873150761e-06, + "loss": 14.2971, + "step": 264770 + }, + { + "epoch": 0.534872352202071, + "grad_norm": 123.45661163330078, + "learning_rate": 5.356442696285146e-06, + "loss": 8.8225, + "step": 264780 + }, + { + "epoch": 0.5348925528347548, + "grad_norm": 342.8233337402344, + "learning_rate": 5.3560945176822695e-06, + "loss": 25.6532, + "step": 264790 + }, + { + "epoch": 0.5349127534674386, + "grad_norm": 267.54119873046875, + "learning_rate": 5.355746337343835e-06, + "loss": 12.2637, + "step": 264800 + }, + { + "epoch": 0.5349329541001224, + "grad_norm": 144.90191650390625, + "learning_rate": 5.355398155271535e-06, + "loss": 11.5702, + "step": 264810 + }, + { + "epoch": 0.5349531547328062, + "grad_norm": 128.60110473632812, + "learning_rate": 5.355049971467066e-06, + "loss": 10.0458, + "step": 264820 + }, + { + "epoch": 0.5349733553654901, + "grad_norm": 270.35791015625, + "learning_rate": 5.354701785932129e-06, + "loss": 17.4736, + "step": 264830 + }, + { + "epoch": 0.5349935559981739, + "grad_norm": 367.4037780761719, + "learning_rate": 5.354353598668416e-06, + "loss": 24.0634, + "step": 264840 + }, + { + "epoch": 0.5350137566308577, + "grad_norm": 223.6572723388672, + "learning_rate": 5.354005409677628e-06, + "loss": 33.9743, + "step": 264850 + }, + { + "epoch": 0.5350339572635415, + "grad_norm": 302.42987060546875, + "learning_rate": 5.35365721896146e-06, + "loss": 14.558, + "step": 264860 + }, + { + "epoch": 0.5350541578962253, + "grad_norm": 418.0154113769531, + "learning_rate": 5.353309026521609e-06, + "loss": 22.6505, + "step": 264870 + }, + { + "epoch": 0.5350743585289092, + "grad_norm": 835.4775390625, + "learning_rate": 5.3529608323597735e-06, + "loss": 41.2067, + "step": 264880 + }, + { + "epoch": 0.535094559161593, + "grad_norm": 608.3998413085938, + "learning_rate": 5.352612636477651e-06, + "loss": 27.9021, + "step": 264890 + }, + { + "epoch": 0.5351147597942768, + "grad_norm": 211.52325439453125, + "learning_rate": 5.352264438876935e-06, + "loss": 29.2275, + "step": 264900 + }, + { + "epoch": 0.5351349604269606, + "grad_norm": 251.80059814453125, + "learning_rate": 5.351916239559326e-06, + "loss": 5.9152, + "step": 264910 + }, + { + "epoch": 0.5351551610596443, + "grad_norm": 632.3524169921875, + "learning_rate": 5.35156803852652e-06, + "loss": 26.1784, + "step": 264920 + }, + { + "epoch": 0.5351753616923282, + "grad_norm": 812.4949340820312, + "learning_rate": 5.351219835780213e-06, + "loss": 17.2401, + "step": 264930 + }, + { + "epoch": 0.535195562325012, + "grad_norm": 150.9505615234375, + "learning_rate": 5.3508716313221054e-06, + "loss": 12.239, + "step": 264940 + }, + { + "epoch": 0.5352157629576958, + "grad_norm": 980.598388671875, + "learning_rate": 5.3505234251538885e-06, + "loss": 28.4981, + "step": 264950 + }, + { + "epoch": 0.5352359635903796, + "grad_norm": 192.49562072753906, + "learning_rate": 5.3501752172772655e-06, + "loss": 10.0218, + "step": 264960 + }, + { + "epoch": 0.5352561642230634, + "grad_norm": 170.17161560058594, + "learning_rate": 5.34982700769393e-06, + "loss": 8.1467, + "step": 264970 + }, + { + "epoch": 0.5352763648557473, + "grad_norm": 513.2670288085938, + "learning_rate": 5.3494787964055805e-06, + "loss": 27.5369, + "step": 264980 + }, + { + "epoch": 0.5352965654884311, + "grad_norm": 569.3101806640625, + "learning_rate": 5.349130583413915e-06, + "loss": 11.8051, + "step": 264990 + }, + { + "epoch": 0.5353167661211149, + "grad_norm": 567.8678588867188, + "learning_rate": 5.348782368720627e-06, + "loss": 14.2739, + "step": 265000 + }, + { + "epoch": 0.5353369667537987, + "grad_norm": 437.3683166503906, + "learning_rate": 5.348434152327418e-06, + "loss": 15.0816, + "step": 265010 + }, + { + "epoch": 0.5353571673864825, + "grad_norm": 764.6904296875, + "learning_rate": 5.348085934235981e-06, + "loss": 40.8282, + "step": 265020 + }, + { + "epoch": 0.5353773680191664, + "grad_norm": 804.3911743164062, + "learning_rate": 5.347737714448017e-06, + "loss": 24.8363, + "step": 265030 + }, + { + "epoch": 0.5353975686518502, + "grad_norm": 68.34645080566406, + "learning_rate": 5.347389492965221e-06, + "loss": 9.1033, + "step": 265040 + }, + { + "epoch": 0.535417769284534, + "grad_norm": 336.69256591796875, + "learning_rate": 5.347041269789293e-06, + "loss": 22.3642, + "step": 265050 + }, + { + "epoch": 0.5354379699172178, + "grad_norm": 390.03448486328125, + "learning_rate": 5.346693044921925e-06, + "loss": 16.6374, + "step": 265060 + }, + { + "epoch": 0.5354581705499016, + "grad_norm": 650.5511474609375, + "learning_rate": 5.3463448183648185e-06, + "loss": 13.1788, + "step": 265070 + }, + { + "epoch": 0.5354783711825855, + "grad_norm": 311.6661071777344, + "learning_rate": 5.345996590119668e-06, + "loss": 22.0306, + "step": 265080 + }, + { + "epoch": 0.5354985718152693, + "grad_norm": 418.624267578125, + "learning_rate": 5.345648360188173e-06, + "loss": 20.1837, + "step": 265090 + }, + { + "epoch": 0.5355187724479531, + "grad_norm": 565.4595336914062, + "learning_rate": 5.345300128572031e-06, + "loss": 19.1615, + "step": 265100 + }, + { + "epoch": 0.5355389730806369, + "grad_norm": 297.052978515625, + "learning_rate": 5.344951895272935e-06, + "loss": 21.3977, + "step": 265110 + }, + { + "epoch": 0.5355591737133207, + "grad_norm": 360.16717529296875, + "learning_rate": 5.344603660292588e-06, + "loss": 11.1924, + "step": 265120 + }, + { + "epoch": 0.5355793743460046, + "grad_norm": 843.7276611328125, + "learning_rate": 5.344255423632684e-06, + "loss": 28.0311, + "step": 265130 + }, + { + "epoch": 0.5355995749786884, + "grad_norm": 568.5252685546875, + "learning_rate": 5.34390718529492e-06, + "loss": 26.0348, + "step": 265140 + }, + { + "epoch": 0.5356197756113722, + "grad_norm": 220.79872131347656, + "learning_rate": 5.343558945280994e-06, + "loss": 8.2561, + "step": 265150 + }, + { + "epoch": 0.535639976244056, + "grad_norm": 349.0790710449219, + "learning_rate": 5.343210703592604e-06, + "loss": 19.9893, + "step": 265160 + }, + { + "epoch": 0.5356601768767397, + "grad_norm": 677.7298583984375, + "learning_rate": 5.342862460231448e-06, + "loss": 18.7261, + "step": 265170 + }, + { + "epoch": 0.5356803775094235, + "grad_norm": 477.18865966796875, + "learning_rate": 5.34251421519922e-06, + "loss": 33.3211, + "step": 265180 + }, + { + "epoch": 0.5357005781421074, + "grad_norm": 416.8053283691406, + "learning_rate": 5.3421659684976205e-06, + "loss": 17.2127, + "step": 265190 + }, + { + "epoch": 0.5357207787747912, + "grad_norm": 132.3507843017578, + "learning_rate": 5.341817720128344e-06, + "loss": 15.1434, + "step": 265200 + }, + { + "epoch": 0.535740979407475, + "grad_norm": 1010.3002319335938, + "learning_rate": 5.341469470093091e-06, + "loss": 21.6635, + "step": 265210 + }, + { + "epoch": 0.5357611800401588, + "grad_norm": 529.0772094726562, + "learning_rate": 5.341121218393555e-06, + "loss": 13.5647, + "step": 265220 + }, + { + "epoch": 0.5357813806728426, + "grad_norm": 189.75848388671875, + "learning_rate": 5.340772965031439e-06, + "loss": 11.9191, + "step": 265230 + }, + { + "epoch": 0.5358015813055265, + "grad_norm": 48.30891036987305, + "learning_rate": 5.340424710008434e-06, + "loss": 19.925, + "step": 265240 + }, + { + "epoch": 0.5358217819382103, + "grad_norm": 179.47984313964844, + "learning_rate": 5.3400764533262415e-06, + "loss": 26.0865, + "step": 265250 + }, + { + "epoch": 0.5358419825708941, + "grad_norm": 579.6912841796875, + "learning_rate": 5.339728194986559e-06, + "loss": 17.8074, + "step": 265260 + }, + { + "epoch": 0.5358621832035779, + "grad_norm": 443.2523498535156, + "learning_rate": 5.339379934991079e-06, + "loss": 24.461, + "step": 265270 + }, + { + "epoch": 0.5358823838362617, + "grad_norm": 655.2186279296875, + "learning_rate": 5.339031673341505e-06, + "loss": 20.1031, + "step": 265280 + }, + { + "epoch": 0.5359025844689456, + "grad_norm": 189.1371307373047, + "learning_rate": 5.338683410039529e-06, + "loss": 23.1255, + "step": 265290 + }, + { + "epoch": 0.5359227851016294, + "grad_norm": 243.77195739746094, + "learning_rate": 5.338335145086855e-06, + "loss": 17.1901, + "step": 265300 + }, + { + "epoch": 0.5359429857343132, + "grad_norm": 265.5787353515625, + "learning_rate": 5.337986878485174e-06, + "loss": 23.4474, + "step": 265310 + }, + { + "epoch": 0.535963186366997, + "grad_norm": 517.0961303710938, + "learning_rate": 5.337638610236186e-06, + "loss": 13.078, + "step": 265320 + }, + { + "epoch": 0.5359833869996808, + "grad_norm": 487.6114501953125, + "learning_rate": 5.3372903403415896e-06, + "loss": 12.1002, + "step": 265330 + }, + { + "epoch": 0.5360035876323647, + "grad_norm": 82.22420501708984, + "learning_rate": 5.336942068803081e-06, + "loss": 12.6738, + "step": 265340 + }, + { + "epoch": 0.5360237882650485, + "grad_norm": 509.4190673828125, + "learning_rate": 5.336593795622357e-06, + "loss": 14.7771, + "step": 265350 + }, + { + "epoch": 0.5360439888977323, + "grad_norm": 313.20086669921875, + "learning_rate": 5.336245520801115e-06, + "loss": 12.552, + "step": 265360 + }, + { + "epoch": 0.5360641895304161, + "grad_norm": 1148.6875, + "learning_rate": 5.335897244341054e-06, + "loss": 34.6896, + "step": 265370 + }, + { + "epoch": 0.5360843901630999, + "grad_norm": 690.5263671875, + "learning_rate": 5.33554896624387e-06, + "loss": 18.4221, + "step": 265380 + }, + { + "epoch": 0.5361045907957838, + "grad_norm": 207.52447509765625, + "learning_rate": 5.335200686511262e-06, + "loss": 10.7602, + "step": 265390 + }, + { + "epoch": 0.5361247914284676, + "grad_norm": 132.81065368652344, + "learning_rate": 5.334852405144926e-06, + "loss": 13.416, + "step": 265400 + }, + { + "epoch": 0.5361449920611514, + "grad_norm": 482.5021667480469, + "learning_rate": 5.3345041221465586e-06, + "loss": 16.366, + "step": 265410 + }, + { + "epoch": 0.5361651926938352, + "grad_norm": 504.22662353515625, + "learning_rate": 5.33415583751786e-06, + "loss": 27.1793, + "step": 265420 + }, + { + "epoch": 0.5361853933265189, + "grad_norm": 259.0811462402344, + "learning_rate": 5.333807551260526e-06, + "loss": 21.8486, + "step": 265430 + }, + { + "epoch": 0.5362055939592028, + "grad_norm": 437.5867919921875, + "learning_rate": 5.333459263376256e-06, + "loss": 11.8019, + "step": 265440 + }, + { + "epoch": 0.5362257945918866, + "grad_norm": 261.80950927734375, + "learning_rate": 5.333110973866743e-06, + "loss": 22.4439, + "step": 265450 + }, + { + "epoch": 0.5362459952245704, + "grad_norm": 339.9731750488281, + "learning_rate": 5.3327626827336906e-06, + "loss": 18.0988, + "step": 265460 + }, + { + "epoch": 0.5362661958572542, + "grad_norm": 6.128870487213135, + "learning_rate": 5.332414389978792e-06, + "loss": 13.528, + "step": 265470 + }, + { + "epoch": 0.536286396489938, + "grad_norm": 386.41497802734375, + "learning_rate": 5.332066095603745e-06, + "loss": 26.8685, + "step": 265480 + }, + { + "epoch": 0.5363065971226219, + "grad_norm": 173.89610290527344, + "learning_rate": 5.33171779961025e-06, + "loss": 12.9385, + "step": 265490 + }, + { + "epoch": 0.5363267977553057, + "grad_norm": 117.55145263671875, + "learning_rate": 5.3313695020000026e-06, + "loss": 34.7294, + "step": 265500 + }, + { + "epoch": 0.5363469983879895, + "grad_norm": 658.5884399414062, + "learning_rate": 5.331021202774699e-06, + "loss": 16.3639, + "step": 265510 + }, + { + "epoch": 0.5363671990206733, + "grad_norm": 597.7453002929688, + "learning_rate": 5.330672901936038e-06, + "loss": 17.0038, + "step": 265520 + }, + { + "epoch": 0.5363873996533571, + "grad_norm": 363.2785339355469, + "learning_rate": 5.330324599485718e-06, + "loss": 16.9144, + "step": 265530 + }, + { + "epoch": 0.536407600286041, + "grad_norm": 480.3471984863281, + "learning_rate": 5.329976295425437e-06, + "loss": 7.5248, + "step": 265540 + }, + { + "epoch": 0.5364278009187248, + "grad_norm": 242.0186767578125, + "learning_rate": 5.32962798975689e-06, + "loss": 9.2614, + "step": 265550 + }, + { + "epoch": 0.5364480015514086, + "grad_norm": 327.3664855957031, + "learning_rate": 5.329279682481776e-06, + "loss": 11.1113, + "step": 265560 + }, + { + "epoch": 0.5364682021840924, + "grad_norm": 95.38156127929688, + "learning_rate": 5.328931373601794e-06, + "loss": 15.4326, + "step": 265570 + }, + { + "epoch": 0.5364884028167762, + "grad_norm": 186.45181274414062, + "learning_rate": 5.328583063118641e-06, + "loss": 21.2168, + "step": 265580 + }, + { + "epoch": 0.53650860344946, + "grad_norm": 360.18255615234375, + "learning_rate": 5.328234751034011e-06, + "loss": 18.9164, + "step": 265590 + }, + { + "epoch": 0.5365288040821439, + "grad_norm": 646.5363159179688, + "learning_rate": 5.327886437349609e-06, + "loss": 19.7649, + "step": 265600 + }, + { + "epoch": 0.5365490047148277, + "grad_norm": 1159.917236328125, + "learning_rate": 5.327538122067124e-06, + "loss": 19.1672, + "step": 265610 + }, + { + "epoch": 0.5365692053475115, + "grad_norm": 368.328369140625, + "learning_rate": 5.327189805188261e-06, + "loss": 20.3391, + "step": 265620 + }, + { + "epoch": 0.5365894059801953, + "grad_norm": 289.69073486328125, + "learning_rate": 5.326841486714713e-06, + "loss": 21.6539, + "step": 265630 + }, + { + "epoch": 0.5366096066128792, + "grad_norm": 823.12158203125, + "learning_rate": 5.326493166648179e-06, + "loss": 27.7374, + "step": 265640 + }, + { + "epoch": 0.536629807245563, + "grad_norm": 100.33020782470703, + "learning_rate": 5.326144844990357e-06, + "loss": 26.8109, + "step": 265650 + }, + { + "epoch": 0.5366500078782468, + "grad_norm": 919.996337890625, + "learning_rate": 5.3257965217429465e-06, + "loss": 14.6398, + "step": 265660 + }, + { + "epoch": 0.5366702085109306, + "grad_norm": 547.6209716796875, + "learning_rate": 5.325448196907642e-06, + "loss": 20.7509, + "step": 265670 + }, + { + "epoch": 0.5366904091436144, + "grad_norm": 66.95198822021484, + "learning_rate": 5.3250998704861425e-06, + "loss": 17.0755, + "step": 265680 + }, + { + "epoch": 0.5367106097762981, + "grad_norm": 407.3754577636719, + "learning_rate": 5.324751542480144e-06, + "loss": 23.6862, + "step": 265690 + }, + { + "epoch": 0.536730810408982, + "grad_norm": 219.86447143554688, + "learning_rate": 5.3244032128913485e-06, + "loss": 23.5132, + "step": 265700 + }, + { + "epoch": 0.5367510110416658, + "grad_norm": 139.1474151611328, + "learning_rate": 5.32405488172145e-06, + "loss": 24.8371, + "step": 265710 + }, + { + "epoch": 0.5367712116743496, + "grad_norm": 184.0261993408203, + "learning_rate": 5.3237065489721465e-06, + "loss": 19.828, + "step": 265720 + }, + { + "epoch": 0.5367914123070334, + "grad_norm": 397.114013671875, + "learning_rate": 5.3233582146451375e-06, + "loss": 36.484, + "step": 265730 + }, + { + "epoch": 0.5368116129397172, + "grad_norm": 935.712890625, + "learning_rate": 5.323009878742119e-06, + "loss": 18.6447, + "step": 265740 + }, + { + "epoch": 0.5368318135724011, + "grad_norm": 311.7372741699219, + "learning_rate": 5.322661541264791e-06, + "loss": 31.576, + "step": 265750 + }, + { + "epoch": 0.5368520142050849, + "grad_norm": 748.21142578125, + "learning_rate": 5.322313202214848e-06, + "loss": 28.1338, + "step": 265760 + }, + { + "epoch": 0.5368722148377687, + "grad_norm": 348.9870300292969, + "learning_rate": 5.32196486159399e-06, + "loss": 12.2605, + "step": 265770 + }, + { + "epoch": 0.5368924154704525, + "grad_norm": 197.72886657714844, + "learning_rate": 5.321616519403916e-06, + "loss": 20.8514, + "step": 265780 + }, + { + "epoch": 0.5369126161031363, + "grad_norm": 5.432241439819336, + "learning_rate": 5.3212681756463205e-06, + "loss": 22.8081, + "step": 265790 + }, + { + "epoch": 0.5369328167358202, + "grad_norm": 96.3584213256836, + "learning_rate": 5.320919830322903e-06, + "loss": 13.6813, + "step": 265800 + }, + { + "epoch": 0.536953017368504, + "grad_norm": 199.02757263183594, + "learning_rate": 5.320571483435362e-06, + "loss": 17.0379, + "step": 265810 + }, + { + "epoch": 0.5369732180011878, + "grad_norm": 302.49151611328125, + "learning_rate": 5.320223134985393e-06, + "loss": 17.1833, + "step": 265820 + }, + { + "epoch": 0.5369934186338716, + "grad_norm": 193.92271423339844, + "learning_rate": 5.319874784974696e-06, + "loss": 14.019, + "step": 265830 + }, + { + "epoch": 0.5370136192665554, + "grad_norm": 475.6900329589844, + "learning_rate": 5.319526433404969e-06, + "loss": 20.0408, + "step": 265840 + }, + { + "epoch": 0.5370338198992393, + "grad_norm": 348.04736328125, + "learning_rate": 5.319178080277908e-06, + "loss": 15.6812, + "step": 265850 + }, + { + "epoch": 0.5370540205319231, + "grad_norm": 248.185302734375, + "learning_rate": 5.318829725595212e-06, + "loss": 6.4697, + "step": 265860 + }, + { + "epoch": 0.5370742211646069, + "grad_norm": 264.96893310546875, + "learning_rate": 5.318481369358579e-06, + "loss": 10.0228, + "step": 265870 + }, + { + "epoch": 0.5370944217972907, + "grad_norm": 385.7796630859375, + "learning_rate": 5.318133011569704e-06, + "loss": 22.5801, + "step": 265880 + }, + { + "epoch": 0.5371146224299745, + "grad_norm": 699.6243286132812, + "learning_rate": 5.31778465223029e-06, + "loss": 26.2126, + "step": 265890 + }, + { + "epoch": 0.5371348230626584, + "grad_norm": 434.1804504394531, + "learning_rate": 5.317436291342031e-06, + "loss": 23.8011, + "step": 265900 + }, + { + "epoch": 0.5371550236953422, + "grad_norm": 483.005615234375, + "learning_rate": 5.3170879289066265e-06, + "loss": 23.9942, + "step": 265910 + }, + { + "epoch": 0.537175224328026, + "grad_norm": 495.5999450683594, + "learning_rate": 5.316739564925773e-06, + "loss": 16.2944, + "step": 265920 + }, + { + "epoch": 0.5371954249607098, + "grad_norm": 310.24444580078125, + "learning_rate": 5.3163911994011705e-06, + "loss": 45.5477, + "step": 265930 + }, + { + "epoch": 0.5372156255933935, + "grad_norm": 141.1289520263672, + "learning_rate": 5.316042832334516e-06, + "loss": 11.9408, + "step": 265940 + }, + { + "epoch": 0.5372358262260774, + "grad_norm": 497.3875427246094, + "learning_rate": 5.315694463727506e-06, + "loss": 17.9419, + "step": 265950 + }, + { + "epoch": 0.5372560268587612, + "grad_norm": 814.2578735351562, + "learning_rate": 5.3153460935818405e-06, + "loss": 22.0574, + "step": 265960 + }, + { + "epoch": 0.537276227491445, + "grad_norm": 87.76453399658203, + "learning_rate": 5.314997721899214e-06, + "loss": 19.425, + "step": 265970 + }, + { + "epoch": 0.5372964281241288, + "grad_norm": 379.69281005859375, + "learning_rate": 5.3146493486813285e-06, + "loss": 35.2839, + "step": 265980 + }, + { + "epoch": 0.5373166287568126, + "grad_norm": 507.013671875, + "learning_rate": 5.31430097392988e-06, + "loss": 24.9944, + "step": 265990 + }, + { + "epoch": 0.5373368293894965, + "grad_norm": 222.95751953125, + "learning_rate": 5.3139525976465675e-06, + "loss": 21.1369, + "step": 266000 + }, + { + "epoch": 0.5373570300221803, + "grad_norm": 295.1164245605469, + "learning_rate": 5.313604219833087e-06, + "loss": 18.3363, + "step": 266010 + }, + { + "epoch": 0.5373772306548641, + "grad_norm": 584.6785278320312, + "learning_rate": 5.313255840491138e-06, + "loss": 24.1707, + "step": 266020 + }, + { + "epoch": 0.5373974312875479, + "grad_norm": 1128.349365234375, + "learning_rate": 5.312907459622418e-06, + "loss": 39.631, + "step": 266030 + }, + { + "epoch": 0.5374176319202317, + "grad_norm": 30.17184829711914, + "learning_rate": 5.3125590772286255e-06, + "loss": 23.6821, + "step": 266040 + }, + { + "epoch": 0.5374378325529156, + "grad_norm": 455.755615234375, + "learning_rate": 5.312210693311458e-06, + "loss": 28.8897, + "step": 266050 + }, + { + "epoch": 0.5374580331855994, + "grad_norm": 327.5693664550781, + "learning_rate": 5.311862307872611e-06, + "loss": 30.3681, + "step": 266060 + }, + { + "epoch": 0.5374782338182832, + "grad_norm": 410.9100646972656, + "learning_rate": 5.311513920913789e-06, + "loss": 15.8005, + "step": 266070 + }, + { + "epoch": 0.537498434450967, + "grad_norm": 311.3714599609375, + "learning_rate": 5.311165532436683e-06, + "loss": 28.2955, + "step": 266080 + }, + { + "epoch": 0.5375186350836508, + "grad_norm": 184.61801147460938, + "learning_rate": 5.310817142442995e-06, + "loss": 9.4785, + "step": 266090 + }, + { + "epoch": 0.5375388357163347, + "grad_norm": 302.0754699707031, + "learning_rate": 5.310468750934421e-06, + "loss": 12.7787, + "step": 266100 + }, + { + "epoch": 0.5375590363490185, + "grad_norm": 229.22055053710938, + "learning_rate": 5.310120357912661e-06, + "loss": 16.4678, + "step": 266110 + }, + { + "epoch": 0.5375792369817023, + "grad_norm": 55.7581787109375, + "learning_rate": 5.309771963379412e-06, + "loss": 19.0601, + "step": 266120 + }, + { + "epoch": 0.5375994376143861, + "grad_norm": 637.02734375, + "learning_rate": 5.309423567336371e-06, + "loss": 21.7023, + "step": 266130 + }, + { + "epoch": 0.5376196382470699, + "grad_norm": 1429.7181396484375, + "learning_rate": 5.309075169785238e-06, + "loss": 47.7239, + "step": 266140 + }, + { + "epoch": 0.5376398388797538, + "grad_norm": 313.2904968261719, + "learning_rate": 5.30872677072771e-06, + "loss": 12.5589, + "step": 266150 + }, + { + "epoch": 0.5376600395124376, + "grad_norm": 704.8517456054688, + "learning_rate": 5.308378370165486e-06, + "loss": 36.1345, + "step": 266160 + }, + { + "epoch": 0.5376802401451214, + "grad_norm": 615.5545043945312, + "learning_rate": 5.308029968100261e-06, + "loss": 17.5777, + "step": 266170 + }, + { + "epoch": 0.5377004407778052, + "grad_norm": 398.2856140136719, + "learning_rate": 5.307681564533736e-06, + "loss": 30.5398, + "step": 266180 + }, + { + "epoch": 0.537720641410489, + "grad_norm": 374.3896179199219, + "learning_rate": 5.307333159467609e-06, + "loss": 14.7331, + "step": 266190 + }, + { + "epoch": 0.5377408420431727, + "grad_norm": 242.48318481445312, + "learning_rate": 5.306984752903578e-06, + "loss": 21.0312, + "step": 266200 + }, + { + "epoch": 0.5377610426758566, + "grad_norm": 848.95849609375, + "learning_rate": 5.3066363448433414e-06, + "loss": 15.3074, + "step": 266210 + }, + { + "epoch": 0.5377812433085404, + "grad_norm": 334.0947265625, + "learning_rate": 5.306287935288593e-06, + "loss": 31.8886, + "step": 266220 + }, + { + "epoch": 0.5378014439412242, + "grad_norm": 628.0471801757812, + "learning_rate": 5.305939524241037e-06, + "loss": 22.4649, + "step": 266230 + }, + { + "epoch": 0.537821644573908, + "grad_norm": 388.18621826171875, + "learning_rate": 5.305591111702368e-06, + "loss": 35.5274, + "step": 266240 + }, + { + "epoch": 0.5378418452065918, + "grad_norm": 321.6109619140625, + "learning_rate": 5.3052426976742855e-06, + "loss": 14.3846, + "step": 266250 + }, + { + "epoch": 0.5378620458392757, + "grad_norm": 4.319701671600342, + "learning_rate": 5.304894282158486e-06, + "loss": 13.5861, + "step": 266260 + }, + { + "epoch": 0.5378822464719595, + "grad_norm": 72.18042755126953, + "learning_rate": 5.304545865156669e-06, + "loss": 22.5068, + "step": 266270 + }, + { + "epoch": 0.5379024471046433, + "grad_norm": 151.73434448242188, + "learning_rate": 5.3041974466705335e-06, + "loss": 24.1643, + "step": 266280 + }, + { + "epoch": 0.5379226477373271, + "grad_norm": 489.5429992675781, + "learning_rate": 5.303849026701776e-06, + "loss": 15.1565, + "step": 266290 + }, + { + "epoch": 0.5379428483700109, + "grad_norm": 852.3153076171875, + "learning_rate": 5.303500605252095e-06, + "loss": 18.5286, + "step": 266300 + }, + { + "epoch": 0.5379630490026948, + "grad_norm": 391.2786865234375, + "learning_rate": 5.303152182323189e-06, + "loss": 14.128, + "step": 266310 + }, + { + "epoch": 0.5379832496353786, + "grad_norm": 1297.095947265625, + "learning_rate": 5.302803757916757e-06, + "loss": 22.1098, + "step": 266320 + }, + { + "epoch": 0.5380034502680624, + "grad_norm": 126.69831085205078, + "learning_rate": 5.302455332034494e-06, + "loss": 20.9619, + "step": 266330 + }, + { + "epoch": 0.5380236509007462, + "grad_norm": 473.3225402832031, + "learning_rate": 5.3021069046781025e-06, + "loss": 18.761, + "step": 266340 + }, + { + "epoch": 0.53804385153343, + "grad_norm": 582.2522583007812, + "learning_rate": 5.301758475849278e-06, + "loss": 10.9951, + "step": 266350 + }, + { + "epoch": 0.5380640521661139, + "grad_norm": 348.973876953125, + "learning_rate": 5.301410045549719e-06, + "loss": 22.3628, + "step": 266360 + }, + { + "epoch": 0.5380842527987977, + "grad_norm": 660.4752807617188, + "learning_rate": 5.301061613781123e-06, + "loss": 17.2147, + "step": 266370 + }, + { + "epoch": 0.5381044534314815, + "grad_norm": 382.2339782714844, + "learning_rate": 5.300713180545189e-06, + "loss": 17.9901, + "step": 266380 + }, + { + "epoch": 0.5381246540641653, + "grad_norm": 213.0685577392578, + "learning_rate": 5.300364745843618e-06, + "loss": 20.0158, + "step": 266390 + }, + { + "epoch": 0.5381448546968491, + "grad_norm": 420.6485290527344, + "learning_rate": 5.300016309678104e-06, + "loss": 24.067, + "step": 266400 + }, + { + "epoch": 0.538165055329533, + "grad_norm": 139.35597229003906, + "learning_rate": 5.299667872050348e-06, + "loss": 45.067, + "step": 266410 + }, + { + "epoch": 0.5381852559622168, + "grad_norm": 586.6095581054688, + "learning_rate": 5.299319432962046e-06, + "loss": 22.0159, + "step": 266420 + }, + { + "epoch": 0.5382054565949006, + "grad_norm": 281.5572814941406, + "learning_rate": 5.298970992414897e-06, + "loss": 17.244, + "step": 266430 + }, + { + "epoch": 0.5382256572275844, + "grad_norm": 395.41571044921875, + "learning_rate": 5.2986225504106e-06, + "loss": 12.9337, + "step": 266440 + }, + { + "epoch": 0.5382458578602681, + "grad_norm": 293.61102294921875, + "learning_rate": 5.298274106950855e-06, + "loss": 14.9702, + "step": 266450 + }, + { + "epoch": 0.538266058492952, + "grad_norm": 419.4900817871094, + "learning_rate": 5.297925662037356e-06, + "loss": 6.5225, + "step": 266460 + }, + { + "epoch": 0.5382862591256358, + "grad_norm": 711.2694091796875, + "learning_rate": 5.297577215671803e-06, + "loss": 17.3594, + "step": 266470 + }, + { + "epoch": 0.5383064597583196, + "grad_norm": 271.6884460449219, + "learning_rate": 5.297228767855898e-06, + "loss": 15.5889, + "step": 266480 + }, + { + "epoch": 0.5383266603910034, + "grad_norm": 25.23544692993164, + "learning_rate": 5.296880318591331e-06, + "loss": 32.5112, + "step": 266490 + }, + { + "epoch": 0.5383468610236872, + "grad_norm": 230.8773193359375, + "learning_rate": 5.296531867879809e-06, + "loss": 11.6164, + "step": 266500 + }, + { + "epoch": 0.538367061656371, + "grad_norm": 308.32122802734375, + "learning_rate": 5.296183415723024e-06, + "loss": 21.5435, + "step": 266510 + }, + { + "epoch": 0.5383872622890549, + "grad_norm": 521.9244384765625, + "learning_rate": 5.29583496212268e-06, + "loss": 19.3713, + "step": 266520 + }, + { + "epoch": 0.5384074629217387, + "grad_norm": 428.1097717285156, + "learning_rate": 5.2954865070804705e-06, + "loss": 11.197, + "step": 266530 + }, + { + "epoch": 0.5384276635544225, + "grad_norm": 313.6487731933594, + "learning_rate": 5.295138050598097e-06, + "loss": 17.1205, + "step": 266540 + }, + { + "epoch": 0.5384478641871063, + "grad_norm": 500.1407470703125, + "learning_rate": 5.294789592677255e-06, + "loss": 19.4713, + "step": 266550 + }, + { + "epoch": 0.5384680648197901, + "grad_norm": 61.38530349731445, + "learning_rate": 5.2944411333196445e-06, + "loss": 12.1264, + "step": 266560 + }, + { + "epoch": 0.538488265452474, + "grad_norm": 93.75505065917969, + "learning_rate": 5.294092672526963e-06, + "loss": 18.3813, + "step": 266570 + }, + { + "epoch": 0.5385084660851578, + "grad_norm": 318.5310363769531, + "learning_rate": 5.293744210300911e-06, + "loss": 13.4829, + "step": 266580 + }, + { + "epoch": 0.5385286667178416, + "grad_norm": 590.1112060546875, + "learning_rate": 5.293395746643184e-06, + "loss": 17.2738, + "step": 266590 + }, + { + "epoch": 0.5385488673505254, + "grad_norm": 513.4954223632812, + "learning_rate": 5.293047281555482e-06, + "loss": 32.3895, + "step": 266600 + }, + { + "epoch": 0.5385690679832092, + "grad_norm": 461.12060546875, + "learning_rate": 5.292698815039505e-06, + "loss": 12.3369, + "step": 266610 + }, + { + "epoch": 0.5385892686158931, + "grad_norm": 617.8843994140625, + "learning_rate": 5.292350347096949e-06, + "loss": 15.8319, + "step": 266620 + }, + { + "epoch": 0.5386094692485769, + "grad_norm": 57.038421630859375, + "learning_rate": 5.29200187772951e-06, + "loss": 13.7819, + "step": 266630 + }, + { + "epoch": 0.5386296698812607, + "grad_norm": 592.28466796875, + "learning_rate": 5.291653406938892e-06, + "loss": 24.1666, + "step": 266640 + }, + { + "epoch": 0.5386498705139445, + "grad_norm": 262.87371826171875, + "learning_rate": 5.291304934726789e-06, + "loss": 18.2219, + "step": 266650 + }, + { + "epoch": 0.5386700711466283, + "grad_norm": 327.9078369140625, + "learning_rate": 5.290956461094903e-06, + "loss": 25.113, + "step": 266660 + }, + { + "epoch": 0.5386902717793122, + "grad_norm": 344.01123046875, + "learning_rate": 5.290607986044928e-06, + "loss": 43.9331, + "step": 266670 + }, + { + "epoch": 0.538710472411996, + "grad_norm": 587.4368896484375, + "learning_rate": 5.290259509578567e-06, + "loss": 19.13, + "step": 266680 + }, + { + "epoch": 0.5387306730446798, + "grad_norm": 274.8553466796875, + "learning_rate": 5.289911031697515e-06, + "loss": 13.857, + "step": 266690 + }, + { + "epoch": 0.5387508736773636, + "grad_norm": 161.65403747558594, + "learning_rate": 5.289562552403472e-06, + "loss": 15.1486, + "step": 266700 + }, + { + "epoch": 0.5387710743100473, + "grad_norm": 561.6729736328125, + "learning_rate": 5.289214071698138e-06, + "loss": 13.5816, + "step": 266710 + }, + { + "epoch": 0.5387912749427312, + "grad_norm": 635.9703369140625, + "learning_rate": 5.2888655895832075e-06, + "loss": 27.2917, + "step": 266720 + }, + { + "epoch": 0.538811475575415, + "grad_norm": 506.6813659667969, + "learning_rate": 5.288517106060383e-06, + "loss": 13.8341, + "step": 266730 + }, + { + "epoch": 0.5388316762080988, + "grad_norm": 993.2285766601562, + "learning_rate": 5.288168621131359e-06, + "loss": 21.6998, + "step": 266740 + }, + { + "epoch": 0.5388518768407826, + "grad_norm": 94.2969741821289, + "learning_rate": 5.287820134797837e-06, + "loss": 14.8746, + "step": 266750 + }, + { + "epoch": 0.5388720774734664, + "grad_norm": 326.3339538574219, + "learning_rate": 5.287471647061515e-06, + "loss": 18.561, + "step": 266760 + }, + { + "epoch": 0.5388922781061503, + "grad_norm": 454.34283447265625, + "learning_rate": 5.2871231579240916e-06, + "loss": 14.6379, + "step": 266770 + }, + { + "epoch": 0.5389124787388341, + "grad_norm": 159.68142700195312, + "learning_rate": 5.286774667387262e-06, + "loss": 18.4233, + "step": 266780 + }, + { + "epoch": 0.5389326793715179, + "grad_norm": 461.82659912109375, + "learning_rate": 5.28642617545273e-06, + "loss": 12.2659, + "step": 266790 + }, + { + "epoch": 0.5389528800042017, + "grad_norm": 282.8290710449219, + "learning_rate": 5.2860776821221915e-06, + "loss": 9.7265, + "step": 266800 + }, + { + "epoch": 0.5389730806368855, + "grad_norm": 1727.9700927734375, + "learning_rate": 5.285729187397344e-06, + "loss": 27.368, + "step": 266810 + }, + { + "epoch": 0.5389932812695694, + "grad_norm": 12628.2509765625, + "learning_rate": 5.285380691279889e-06, + "loss": 27.0251, + "step": 266820 + }, + { + "epoch": 0.5390134819022532, + "grad_norm": 397.4987487792969, + "learning_rate": 5.2850321937715195e-06, + "loss": 8.3365, + "step": 266830 + }, + { + "epoch": 0.539033682534937, + "grad_norm": 37.92121505737305, + "learning_rate": 5.284683694873941e-06, + "loss": 16.2793, + "step": 266840 + }, + { + "epoch": 0.5390538831676208, + "grad_norm": 811.298095703125, + "learning_rate": 5.284335194588848e-06, + "loss": 32.7642, + "step": 266850 + }, + { + "epoch": 0.5390740838003046, + "grad_norm": 731.6546020507812, + "learning_rate": 5.28398669291794e-06, + "loss": 26.945, + "step": 266860 + }, + { + "epoch": 0.5390942844329885, + "grad_norm": 415.3759460449219, + "learning_rate": 5.283638189862914e-06, + "loss": 21.0374, + "step": 266870 + }, + { + "epoch": 0.5391144850656723, + "grad_norm": 83.38080596923828, + "learning_rate": 5.28328968542547e-06, + "loss": 17.559, + "step": 266880 + }, + { + "epoch": 0.5391346856983561, + "grad_norm": 6.075273036956787, + "learning_rate": 5.2829411796073085e-06, + "loss": 16.2683, + "step": 266890 + }, + { + "epoch": 0.5391548863310399, + "grad_norm": 128.59246826171875, + "learning_rate": 5.282592672410124e-06, + "loss": 12.9341, + "step": 266900 + }, + { + "epoch": 0.5391750869637237, + "grad_norm": 199.89334106445312, + "learning_rate": 5.282244163835617e-06, + "loss": 18.6665, + "step": 266910 + }, + { + "epoch": 0.5391952875964076, + "grad_norm": 18.035985946655273, + "learning_rate": 5.281895653885486e-06, + "loss": 36.1795, + "step": 266920 + }, + { + "epoch": 0.5392154882290914, + "grad_norm": 919.3248901367188, + "learning_rate": 5.2815471425614315e-06, + "loss": 24.2861, + "step": 266930 + }, + { + "epoch": 0.5392356888617752, + "grad_norm": 145.3681182861328, + "learning_rate": 5.281198629865149e-06, + "loss": 23.8972, + "step": 266940 + }, + { + "epoch": 0.539255889494459, + "grad_norm": 48.641456604003906, + "learning_rate": 5.28085011579834e-06, + "loss": 26.0535, + "step": 266950 + }, + { + "epoch": 0.5392760901271428, + "grad_norm": 470.5023498535156, + "learning_rate": 5.2805016003627e-06, + "loss": 19.4136, + "step": 266960 + }, + { + "epoch": 0.5392962907598265, + "grad_norm": 451.8315734863281, + "learning_rate": 5.2801530835599295e-06, + "loss": 18.7958, + "step": 266970 + }, + { + "epoch": 0.5393164913925104, + "grad_norm": 201.1184844970703, + "learning_rate": 5.2798045653917275e-06, + "loss": 8.757, + "step": 266980 + }, + { + "epoch": 0.5393366920251942, + "grad_norm": 233.8871307373047, + "learning_rate": 5.27945604585979e-06, + "loss": 18.3737, + "step": 266990 + }, + { + "epoch": 0.539356892657878, + "grad_norm": 332.1441955566406, + "learning_rate": 5.27910752496582e-06, + "loss": 14.6293, + "step": 267000 + }, + { + "epoch": 0.5393770932905618, + "grad_norm": 530.1821899414062, + "learning_rate": 5.278759002711513e-06, + "loss": 26.4881, + "step": 267010 + }, + { + "epoch": 0.5393972939232456, + "grad_norm": 262.960693359375, + "learning_rate": 5.278410479098568e-06, + "loss": 29.0102, + "step": 267020 + }, + { + "epoch": 0.5394174945559295, + "grad_norm": 546.9447021484375, + "learning_rate": 5.278061954128684e-06, + "loss": 24.2532, + "step": 267030 + }, + { + "epoch": 0.5394376951886133, + "grad_norm": 283.64508056640625, + "learning_rate": 5.27771342780356e-06, + "loss": 20.5688, + "step": 267040 + }, + { + "epoch": 0.5394578958212971, + "grad_norm": 308.6816101074219, + "learning_rate": 5.2773649001248965e-06, + "loss": 19.5089, + "step": 267050 + }, + { + "epoch": 0.5394780964539809, + "grad_norm": 499.01544189453125, + "learning_rate": 5.277016371094388e-06, + "loss": 13.3796, + "step": 267060 + }, + { + "epoch": 0.5394982970866647, + "grad_norm": 1139.388427734375, + "learning_rate": 5.276667840713735e-06, + "loss": 19.0707, + "step": 267070 + }, + { + "epoch": 0.5395184977193486, + "grad_norm": 236.742431640625, + "learning_rate": 5.276319308984637e-06, + "loss": 39.8516, + "step": 267080 + }, + { + "epoch": 0.5395386983520324, + "grad_norm": 527.2527465820312, + "learning_rate": 5.275970775908793e-06, + "loss": 16.3725, + "step": 267090 + }, + { + "epoch": 0.5395588989847162, + "grad_norm": 308.12030029296875, + "learning_rate": 5.275622241487899e-06, + "loss": 17.6134, + "step": 267100 + }, + { + "epoch": 0.5395790996174, + "grad_norm": 303.1805725097656, + "learning_rate": 5.275273705723657e-06, + "loss": 7.6083, + "step": 267110 + }, + { + "epoch": 0.5395993002500838, + "grad_norm": 1047.5565185546875, + "learning_rate": 5.274925168617763e-06, + "loss": 31.721, + "step": 267120 + }, + { + "epoch": 0.5396195008827677, + "grad_norm": 816.3365478515625, + "learning_rate": 5.274576630171919e-06, + "loss": 25.0034, + "step": 267130 + }, + { + "epoch": 0.5396397015154515, + "grad_norm": 320.99114990234375, + "learning_rate": 5.274228090387821e-06, + "loss": 30.0734, + "step": 267140 + }, + { + "epoch": 0.5396599021481353, + "grad_norm": 137.37452697753906, + "learning_rate": 5.273879549267168e-06, + "loss": 24.587, + "step": 267150 + }, + { + "epoch": 0.5396801027808191, + "grad_norm": 1203.2591552734375, + "learning_rate": 5.2735310068116605e-06, + "loss": 33.0592, + "step": 267160 + }, + { + "epoch": 0.539700303413503, + "grad_norm": 397.1142578125, + "learning_rate": 5.2731824630229955e-06, + "loss": 26.8125, + "step": 267170 + }, + { + "epoch": 0.5397205040461868, + "grad_norm": 462.3182067871094, + "learning_rate": 5.272833917902872e-06, + "loss": 22.8828, + "step": 267180 + }, + { + "epoch": 0.5397407046788706, + "grad_norm": 593.2162475585938, + "learning_rate": 5.27248537145299e-06, + "loss": 18.0968, + "step": 267190 + }, + { + "epoch": 0.5397609053115544, + "grad_norm": 804.9915161132812, + "learning_rate": 5.272136823675046e-06, + "loss": 33.5634, + "step": 267200 + }, + { + "epoch": 0.5397811059442382, + "grad_norm": 453.7289123535156, + "learning_rate": 5.271788274570741e-06, + "loss": 11.2804, + "step": 267210 + }, + { + "epoch": 0.5398013065769219, + "grad_norm": 412.3597717285156, + "learning_rate": 5.2714397241417736e-06, + "loss": 14.8988, + "step": 267220 + }, + { + "epoch": 0.5398215072096058, + "grad_norm": 485.5625915527344, + "learning_rate": 5.271091172389841e-06, + "loss": 23.9658, + "step": 267230 + }, + { + "epoch": 0.5398417078422896, + "grad_norm": 442.7439270019531, + "learning_rate": 5.270742619316642e-06, + "loss": 26.0454, + "step": 267240 + }, + { + "epoch": 0.5398619084749734, + "grad_norm": 297.7681579589844, + "learning_rate": 5.270394064923878e-06, + "loss": 17.0849, + "step": 267250 + }, + { + "epoch": 0.5398821091076572, + "grad_norm": 182.657958984375, + "learning_rate": 5.270045509213244e-06, + "loss": 17.654, + "step": 267260 + }, + { + "epoch": 0.539902309740341, + "grad_norm": 176.88636779785156, + "learning_rate": 5.2696969521864435e-06, + "loss": 22.5983, + "step": 267270 + }, + { + "epoch": 0.5399225103730249, + "grad_norm": 457.3097839355469, + "learning_rate": 5.2693483938451705e-06, + "loss": 12.3397, + "step": 267280 + }, + { + "epoch": 0.5399427110057087, + "grad_norm": 396.48687744140625, + "learning_rate": 5.268999834191128e-06, + "loss": 18.101, + "step": 267290 + }, + { + "epoch": 0.5399629116383925, + "grad_norm": 133.23741149902344, + "learning_rate": 5.268651273226011e-06, + "loss": 16.3219, + "step": 267300 + }, + { + "epoch": 0.5399831122710763, + "grad_norm": 331.26904296875, + "learning_rate": 5.268302710951522e-06, + "loss": 16.586, + "step": 267310 + }, + { + "epoch": 0.5400033129037601, + "grad_norm": 266.48883056640625, + "learning_rate": 5.267954147369359e-06, + "loss": 44.3651, + "step": 267320 + }, + { + "epoch": 0.540023513536444, + "grad_norm": 179.49365234375, + "learning_rate": 5.267605582481216e-06, + "loss": 36.2442, + "step": 267330 + }, + { + "epoch": 0.5400437141691278, + "grad_norm": 267.8640441894531, + "learning_rate": 5.2672570162887996e-06, + "loss": 33.6776, + "step": 267340 + }, + { + "epoch": 0.5400639148018116, + "grad_norm": 326.7994079589844, + "learning_rate": 5.2669084487938025e-06, + "loss": 16.1556, + "step": 267350 + }, + { + "epoch": 0.5400841154344954, + "grad_norm": 774.1517333984375, + "learning_rate": 5.266559879997928e-06, + "loss": 28.1933, + "step": 267360 + }, + { + "epoch": 0.5401043160671792, + "grad_norm": 384.9989318847656, + "learning_rate": 5.266211309902871e-06, + "loss": 11.1729, + "step": 267370 + }, + { + "epoch": 0.540124516699863, + "grad_norm": 370.0032653808594, + "learning_rate": 5.265862738510335e-06, + "loss": 24.5614, + "step": 267380 + }, + { + "epoch": 0.5401447173325469, + "grad_norm": 230.2034454345703, + "learning_rate": 5.265514165822014e-06, + "loss": 18.2394, + "step": 267390 + }, + { + "epoch": 0.5401649179652307, + "grad_norm": 593.0401000976562, + "learning_rate": 5.26516559183961e-06, + "loss": 22.137, + "step": 267400 + }, + { + "epoch": 0.5401851185979145, + "grad_norm": 378.4549560546875, + "learning_rate": 5.26481701656482e-06, + "loss": 14.3919, + "step": 267410 + }, + { + "epoch": 0.5402053192305983, + "grad_norm": 608.0015869140625, + "learning_rate": 5.264468439999345e-06, + "loss": 14.4138, + "step": 267420 + }, + { + "epoch": 0.5402255198632822, + "grad_norm": 188.7982940673828, + "learning_rate": 5.2641198621448845e-06, + "loss": 24.5316, + "step": 267430 + }, + { + "epoch": 0.540245720495966, + "grad_norm": 646.892822265625, + "learning_rate": 5.263771283003133e-06, + "loss": 30.7849, + "step": 267440 + }, + { + "epoch": 0.5402659211286498, + "grad_norm": 83.54496765136719, + "learning_rate": 5.263422702575793e-06, + "loss": 17.6118, + "step": 267450 + }, + { + "epoch": 0.5402861217613336, + "grad_norm": 192.852783203125, + "learning_rate": 5.263074120864564e-06, + "loss": 15.532, + "step": 267460 + }, + { + "epoch": 0.5403063223940174, + "grad_norm": 295.820556640625, + "learning_rate": 5.2627255378711414e-06, + "loss": 16.4569, + "step": 267470 + }, + { + "epoch": 0.5403265230267011, + "grad_norm": 378.82110595703125, + "learning_rate": 5.262376953597228e-06, + "loss": 11.037, + "step": 267480 + }, + { + "epoch": 0.540346723659385, + "grad_norm": 229.91859436035156, + "learning_rate": 5.26202836804452e-06, + "loss": 10.3388, + "step": 267490 + }, + { + "epoch": 0.5403669242920688, + "grad_norm": 309.54864501953125, + "learning_rate": 5.2616797812147205e-06, + "loss": 26.4148, + "step": 267500 + }, + { + "epoch": 0.5403871249247526, + "grad_norm": 227.2258758544922, + "learning_rate": 5.261331193109524e-06, + "loss": 14.9826, + "step": 267510 + }, + { + "epoch": 0.5404073255574364, + "grad_norm": 405.8484191894531, + "learning_rate": 5.260982603730629e-06, + "loss": 32.9173, + "step": 267520 + }, + { + "epoch": 0.5404275261901202, + "grad_norm": 359.85107421875, + "learning_rate": 5.260634013079738e-06, + "loss": 29.7109, + "step": 267530 + }, + { + "epoch": 0.5404477268228041, + "grad_norm": 280.190673828125, + "learning_rate": 5.260285421158548e-06, + "loss": 13.0215, + "step": 267540 + }, + { + "epoch": 0.5404679274554879, + "grad_norm": 438.93182373046875, + "learning_rate": 5.259936827968758e-06, + "loss": 15.9954, + "step": 267550 + }, + { + "epoch": 0.5404881280881717, + "grad_norm": 421.66094970703125, + "learning_rate": 5.25958823351207e-06, + "loss": 14.2632, + "step": 267560 + }, + { + "epoch": 0.5405083287208555, + "grad_norm": 798.4306640625, + "learning_rate": 5.259239637790178e-06, + "loss": 26.1816, + "step": 267570 + }, + { + "epoch": 0.5405285293535393, + "grad_norm": 48.887691497802734, + "learning_rate": 5.258891040804783e-06, + "loss": 15.2979, + "step": 267580 + }, + { + "epoch": 0.5405487299862232, + "grad_norm": 1093.5631103515625, + "learning_rate": 5.258542442557586e-06, + "loss": 40.7765, + "step": 267590 + }, + { + "epoch": 0.540568930618907, + "grad_norm": 53.55421447753906, + "learning_rate": 5.258193843050283e-06, + "loss": 23.1957, + "step": 267600 + }, + { + "epoch": 0.5405891312515908, + "grad_norm": 135.53494262695312, + "learning_rate": 5.257845242284576e-06, + "loss": 20.9141, + "step": 267610 + }, + { + "epoch": 0.5406093318842746, + "grad_norm": 1050.6634521484375, + "learning_rate": 5.2574966402621615e-06, + "loss": 30.1993, + "step": 267620 + }, + { + "epoch": 0.5406295325169584, + "grad_norm": 712.6954345703125, + "learning_rate": 5.25714803698474e-06, + "loss": 21.2461, + "step": 267630 + }, + { + "epoch": 0.5406497331496423, + "grad_norm": 475.6708679199219, + "learning_rate": 5.25679943245401e-06, + "loss": 19.1392, + "step": 267640 + }, + { + "epoch": 0.5406699337823261, + "grad_norm": 401.18487548828125, + "learning_rate": 5.256450826671671e-06, + "loss": 18.4018, + "step": 267650 + }, + { + "epoch": 0.5406901344150099, + "grad_norm": 669.2423095703125, + "learning_rate": 5.256102219639423e-06, + "loss": 23.2288, + "step": 267660 + }, + { + "epoch": 0.5407103350476937, + "grad_norm": 739.7628784179688, + "learning_rate": 5.2557536113589625e-06, + "loss": 18.7592, + "step": 267670 + }, + { + "epoch": 0.5407305356803775, + "grad_norm": 198.78236389160156, + "learning_rate": 5.25540500183199e-06, + "loss": 22.221, + "step": 267680 + }, + { + "epoch": 0.5407507363130614, + "grad_norm": 156.10227966308594, + "learning_rate": 5.2550563910602035e-06, + "loss": 12.436, + "step": 267690 + }, + { + "epoch": 0.5407709369457452, + "grad_norm": 215.703369140625, + "learning_rate": 5.254707779045305e-06, + "loss": 18.5177, + "step": 267700 + }, + { + "epoch": 0.540791137578429, + "grad_norm": 460.5707702636719, + "learning_rate": 5.25435916578899e-06, + "loss": 21.5672, + "step": 267710 + }, + { + "epoch": 0.5408113382111128, + "grad_norm": 178.6480255126953, + "learning_rate": 5.254010551292961e-06, + "loss": 25.5017, + "step": 267720 + }, + { + "epoch": 0.5408315388437965, + "grad_norm": 196.70570373535156, + "learning_rate": 5.253661935558914e-06, + "loss": 15.2379, + "step": 267730 + }, + { + "epoch": 0.5408517394764804, + "grad_norm": 361.7843017578125, + "learning_rate": 5.25331331858855e-06, + "loss": 11.7066, + "step": 267740 + }, + { + "epoch": 0.5408719401091642, + "grad_norm": 279.7679748535156, + "learning_rate": 5.252964700383567e-06, + "loss": 13.664, + "step": 267750 + }, + { + "epoch": 0.540892140741848, + "grad_norm": 196.13653564453125, + "learning_rate": 5.252616080945665e-06, + "loss": 14.2319, + "step": 267760 + }, + { + "epoch": 0.5409123413745318, + "grad_norm": 273.5278015136719, + "learning_rate": 5.252267460276544e-06, + "loss": 33.3677, + "step": 267770 + }, + { + "epoch": 0.5409325420072156, + "grad_norm": 220.14305114746094, + "learning_rate": 5.2519188383779e-06, + "loss": 11.0212, + "step": 267780 + }, + { + "epoch": 0.5409527426398995, + "grad_norm": 286.4742431640625, + "learning_rate": 5.251570215251436e-06, + "loss": 22.6878, + "step": 267790 + }, + { + "epoch": 0.5409729432725833, + "grad_norm": 8.339970588684082, + "learning_rate": 5.251221590898848e-06, + "loss": 22.2532, + "step": 267800 + }, + { + "epoch": 0.5409931439052671, + "grad_norm": 316.2557067871094, + "learning_rate": 5.250872965321837e-06, + "loss": 18.4021, + "step": 267810 + }, + { + "epoch": 0.5410133445379509, + "grad_norm": 287.31121826171875, + "learning_rate": 5.250524338522102e-06, + "loss": 13.7786, + "step": 267820 + }, + { + "epoch": 0.5410335451706347, + "grad_norm": 74.9324722290039, + "learning_rate": 5.250175710501342e-06, + "loss": 9.5435, + "step": 267830 + }, + { + "epoch": 0.5410537458033186, + "grad_norm": 336.6729736328125, + "learning_rate": 5.249827081261255e-06, + "loss": 15.7134, + "step": 267840 + }, + { + "epoch": 0.5410739464360024, + "grad_norm": 230.1107635498047, + "learning_rate": 5.249478450803541e-06, + "loss": 19.7327, + "step": 267850 + }, + { + "epoch": 0.5410941470686862, + "grad_norm": 105.64854431152344, + "learning_rate": 5.2491298191298986e-06, + "loss": 22.1532, + "step": 267860 + }, + { + "epoch": 0.54111434770137, + "grad_norm": 268.0977783203125, + "learning_rate": 5.248781186242029e-06, + "loss": 12.7907, + "step": 267870 + }, + { + "epoch": 0.5411345483340538, + "grad_norm": 396.26824951171875, + "learning_rate": 5.2484325521416315e-06, + "loss": 12.5745, + "step": 267880 + }, + { + "epoch": 0.5411547489667377, + "grad_norm": 264.67877197265625, + "learning_rate": 5.2480839168304e-06, + "loss": 13.2185, + "step": 267890 + }, + { + "epoch": 0.5411749495994215, + "grad_norm": 726.4971313476562, + "learning_rate": 5.247735280310041e-06, + "loss": 14.9952, + "step": 267900 + }, + { + "epoch": 0.5411951502321053, + "grad_norm": 745.1004638671875, + "learning_rate": 5.247386642582248e-06, + "loss": 19.5732, + "step": 267910 + }, + { + "epoch": 0.5412153508647891, + "grad_norm": 595.3374633789062, + "learning_rate": 5.2470380036487245e-06, + "loss": 16.2793, + "step": 267920 + }, + { + "epoch": 0.5412355514974729, + "grad_norm": 361.1235046386719, + "learning_rate": 5.246689363511167e-06, + "loss": 25.6698, + "step": 267930 + }, + { + "epoch": 0.5412557521301568, + "grad_norm": 419.58831787109375, + "learning_rate": 5.2463407221712745e-06, + "loss": 19.84, + "step": 267940 + }, + { + "epoch": 0.5412759527628406, + "grad_norm": 489.6854553222656, + "learning_rate": 5.245992079630748e-06, + "loss": 18.5201, + "step": 267950 + }, + { + "epoch": 0.5412961533955244, + "grad_norm": 530.859130859375, + "learning_rate": 5.2456434358912865e-06, + "loss": 26.3349, + "step": 267960 + }, + { + "epoch": 0.5413163540282082, + "grad_norm": 123.85330200195312, + "learning_rate": 5.245294790954587e-06, + "loss": 16.3806, + "step": 267970 + }, + { + "epoch": 0.541336554660892, + "grad_norm": 94.74688720703125, + "learning_rate": 5.244946144822351e-06, + "loss": 10.811, + "step": 267980 + }, + { + "epoch": 0.5413567552935757, + "grad_norm": 700.8999633789062, + "learning_rate": 5.24459749749628e-06, + "loss": 18.7394, + "step": 267990 + }, + { + "epoch": 0.5413769559262596, + "grad_norm": 251.6261444091797, + "learning_rate": 5.244248848978067e-06, + "loss": 11.436, + "step": 268000 + }, + { + "epoch": 0.5413971565589434, + "grad_norm": 291.4370422363281, + "learning_rate": 5.243900199269416e-06, + "loss": 17.3246, + "step": 268010 + }, + { + "epoch": 0.5414173571916272, + "grad_norm": 607.9581909179688, + "learning_rate": 5.2435515483720246e-06, + "loss": 22.6848, + "step": 268020 + }, + { + "epoch": 0.541437557824311, + "grad_norm": 525.0802612304688, + "learning_rate": 5.243202896287593e-06, + "loss": 22.731, + "step": 268030 + }, + { + "epoch": 0.5414577584569948, + "grad_norm": 36.72077941894531, + "learning_rate": 5.242854243017821e-06, + "loss": 14.782, + "step": 268040 + }, + { + "epoch": 0.5414779590896787, + "grad_norm": 349.0224914550781, + "learning_rate": 5.242505588564404e-06, + "loss": 7.2446, + "step": 268050 + }, + { + "epoch": 0.5414981597223625, + "grad_norm": 381.7718200683594, + "learning_rate": 5.2421569329290465e-06, + "loss": 8.3921, + "step": 268060 + }, + { + "epoch": 0.5415183603550463, + "grad_norm": 159.91815185546875, + "learning_rate": 5.241808276113445e-06, + "loss": 22.856, + "step": 268070 + }, + { + "epoch": 0.5415385609877301, + "grad_norm": 399.8495178222656, + "learning_rate": 5.241459618119299e-06, + "loss": 27.867, + "step": 268080 + }, + { + "epoch": 0.5415587616204139, + "grad_norm": 467.8490905761719, + "learning_rate": 5.241110958948307e-06, + "loss": 15.6234, + "step": 268090 + }, + { + "epoch": 0.5415789622530978, + "grad_norm": 747.2193603515625, + "learning_rate": 5.240762298602171e-06, + "loss": 16.2672, + "step": 268100 + }, + { + "epoch": 0.5415991628857816, + "grad_norm": 251.5902099609375, + "learning_rate": 5.240413637082588e-06, + "loss": 23.726, + "step": 268110 + }, + { + "epoch": 0.5416193635184654, + "grad_norm": 357.5133056640625, + "learning_rate": 5.240064974391259e-06, + "loss": 22.1624, + "step": 268120 + }, + { + "epoch": 0.5416395641511492, + "grad_norm": 354.6746826171875, + "learning_rate": 5.239716310529882e-06, + "loss": 18.5596, + "step": 268130 + }, + { + "epoch": 0.541659764783833, + "grad_norm": 55.200035095214844, + "learning_rate": 5.2393676455001565e-06, + "loss": 31.4963, + "step": 268140 + }, + { + "epoch": 0.5416799654165169, + "grad_norm": 192.11386108398438, + "learning_rate": 5.239018979303784e-06, + "loss": 33.9058, + "step": 268150 + }, + { + "epoch": 0.5417001660492007, + "grad_norm": 1041.11865234375, + "learning_rate": 5.238670311942459e-06, + "loss": 17.9892, + "step": 268160 + }, + { + "epoch": 0.5417203666818845, + "grad_norm": 320.5815734863281, + "learning_rate": 5.2383216434178856e-06, + "loss": 11.3272, + "step": 268170 + }, + { + "epoch": 0.5417405673145683, + "grad_norm": 223.17514038085938, + "learning_rate": 5.237972973731761e-06, + "loss": 34.2814, + "step": 268180 + }, + { + "epoch": 0.5417607679472521, + "grad_norm": 267.6535949707031, + "learning_rate": 5.237624302885785e-06, + "loss": 32.6544, + "step": 268190 + }, + { + "epoch": 0.541780968579936, + "grad_norm": 145.8458251953125, + "learning_rate": 5.237275630881658e-06, + "loss": 13.8566, + "step": 268200 + }, + { + "epoch": 0.5418011692126198, + "grad_norm": 121.94860076904297, + "learning_rate": 5.236926957721075e-06, + "loss": 20.9379, + "step": 268210 + }, + { + "epoch": 0.5418213698453036, + "grad_norm": 485.9193420410156, + "learning_rate": 5.236578283405742e-06, + "loss": 16.1732, + "step": 268220 + }, + { + "epoch": 0.5418415704779874, + "grad_norm": 321.4118957519531, + "learning_rate": 5.236229607937354e-06, + "loss": 18.9713, + "step": 268230 + }, + { + "epoch": 0.5418617711106711, + "grad_norm": 611.24609375, + "learning_rate": 5.235880931317612e-06, + "loss": 29.5335, + "step": 268240 + }, + { + "epoch": 0.541881971743355, + "grad_norm": 239.5470733642578, + "learning_rate": 5.235532253548213e-06, + "loss": 20.4106, + "step": 268250 + }, + { + "epoch": 0.5419021723760388, + "grad_norm": 388.25921630859375, + "learning_rate": 5.235183574630861e-06, + "loss": 15.0666, + "step": 268260 + }, + { + "epoch": 0.5419223730087226, + "grad_norm": 419.8719482421875, + "learning_rate": 5.234834894567252e-06, + "loss": 32.829, + "step": 268270 + }, + { + "epoch": 0.5419425736414064, + "grad_norm": 182.15264892578125, + "learning_rate": 5.2344862133590855e-06, + "loss": 14.6446, + "step": 268280 + }, + { + "epoch": 0.5419627742740902, + "grad_norm": 431.7532653808594, + "learning_rate": 5.234137531008062e-06, + "loss": 13.4429, + "step": 268290 + }, + { + "epoch": 0.541982974906774, + "grad_norm": 26.060903549194336, + "learning_rate": 5.233788847515882e-06, + "loss": 29.4722, + "step": 268300 + }, + { + "epoch": 0.5420031755394579, + "grad_norm": 404.68316650390625, + "learning_rate": 5.233440162884241e-06, + "loss": 24.2382, + "step": 268310 + }, + { + "epoch": 0.5420233761721417, + "grad_norm": 320.2318420410156, + "learning_rate": 5.233091477114842e-06, + "loss": 27.1324, + "step": 268320 + }, + { + "epoch": 0.5420435768048255, + "grad_norm": 369.7477722167969, + "learning_rate": 5.232742790209384e-06, + "loss": 16.1336, + "step": 268330 + }, + { + "epoch": 0.5420637774375093, + "grad_norm": 546.682861328125, + "learning_rate": 5.232394102169566e-06, + "loss": 29.3535, + "step": 268340 + }, + { + "epoch": 0.5420839780701932, + "grad_norm": 316.20721435546875, + "learning_rate": 5.2320454129970866e-06, + "loss": 15.8043, + "step": 268350 + }, + { + "epoch": 0.542104178702877, + "grad_norm": 466.38140869140625, + "learning_rate": 5.2316967226936454e-06, + "loss": 19.0468, + "step": 268360 + }, + { + "epoch": 0.5421243793355608, + "grad_norm": 338.31561279296875, + "learning_rate": 5.231348031260943e-06, + "loss": 13.6088, + "step": 268370 + }, + { + "epoch": 0.5421445799682446, + "grad_norm": 194.09835815429688, + "learning_rate": 5.2309993387006795e-06, + "loss": 21.1841, + "step": 268380 + }, + { + "epoch": 0.5421647806009284, + "grad_norm": 562.6314697265625, + "learning_rate": 5.230650645014551e-06, + "loss": 16.7676, + "step": 268390 + }, + { + "epoch": 0.5421849812336123, + "grad_norm": 541.9793090820312, + "learning_rate": 5.230301950204261e-06, + "loss": 23.2916, + "step": 268400 + }, + { + "epoch": 0.5422051818662961, + "grad_norm": 569.9012451171875, + "learning_rate": 5.229953254271507e-06, + "loss": 25.0741, + "step": 268410 + }, + { + "epoch": 0.5422253824989799, + "grad_norm": 446.9424133300781, + "learning_rate": 5.229604557217988e-06, + "loss": 12.487, + "step": 268420 + }, + { + "epoch": 0.5422455831316637, + "grad_norm": 567.8118286132812, + "learning_rate": 5.229255859045405e-06, + "loss": 16.18, + "step": 268430 + }, + { + "epoch": 0.5422657837643475, + "grad_norm": 532.9462280273438, + "learning_rate": 5.228907159755457e-06, + "loss": 23.2328, + "step": 268440 + }, + { + "epoch": 0.5422859843970314, + "grad_norm": 241.0742950439453, + "learning_rate": 5.228558459349844e-06, + "loss": 17.8243, + "step": 268450 + }, + { + "epoch": 0.5423061850297152, + "grad_norm": 225.08526611328125, + "learning_rate": 5.2282097578302624e-06, + "loss": 21.8836, + "step": 268460 + }, + { + "epoch": 0.542326385662399, + "grad_norm": 14.749529838562012, + "learning_rate": 5.227861055198415e-06, + "loss": 15.4618, + "step": 268470 + }, + { + "epoch": 0.5423465862950828, + "grad_norm": 668.3131103515625, + "learning_rate": 5.227512351456001e-06, + "loss": 14.1384, + "step": 268480 + }, + { + "epoch": 0.5423667869277666, + "grad_norm": 226.07180786132812, + "learning_rate": 5.227163646604721e-06, + "loss": 19.7496, + "step": 268490 + }, + { + "epoch": 0.5423869875604503, + "grad_norm": 608.514892578125, + "learning_rate": 5.226814940646268e-06, + "loss": 21.0938, + "step": 268500 + }, + { + "epoch": 0.5424071881931342, + "grad_norm": 132.5753631591797, + "learning_rate": 5.226466233582351e-06, + "loss": 14.3611, + "step": 268510 + }, + { + "epoch": 0.542427388825818, + "grad_norm": 197.1700439453125, + "learning_rate": 5.226117525414663e-06, + "loss": 27.8339, + "step": 268520 + }, + { + "epoch": 0.5424475894585018, + "grad_norm": 395.619873046875, + "learning_rate": 5.225768816144907e-06, + "loss": 12.5512, + "step": 268530 + }, + { + "epoch": 0.5424677900911856, + "grad_norm": 291.5622253417969, + "learning_rate": 5.225420105774781e-06, + "loss": 18.111, + "step": 268540 + }, + { + "epoch": 0.5424879907238694, + "grad_norm": 297.4586486816406, + "learning_rate": 5.2250713943059826e-06, + "loss": 27.0939, + "step": 268550 + }, + { + "epoch": 0.5425081913565533, + "grad_norm": 330.82550048828125, + "learning_rate": 5.224722681740217e-06, + "loss": 21.5205, + "step": 268560 + }, + { + "epoch": 0.5425283919892371, + "grad_norm": 186.67616271972656, + "learning_rate": 5.224373968079177e-06, + "loss": 11.1447, + "step": 268570 + }, + { + "epoch": 0.5425485926219209, + "grad_norm": 267.33770751953125, + "learning_rate": 5.224025253324567e-06, + "loss": 17.7968, + "step": 268580 + }, + { + "epoch": 0.5425687932546047, + "grad_norm": 28.95694923400879, + "learning_rate": 5.223676537478085e-06, + "loss": 14.0016, + "step": 268590 + }, + { + "epoch": 0.5425889938872885, + "grad_norm": 443.6894836425781, + "learning_rate": 5.223327820541432e-06, + "loss": 13.041, + "step": 268600 + }, + { + "epoch": 0.5426091945199724, + "grad_norm": 370.8968505859375, + "learning_rate": 5.222979102516304e-06, + "loss": 12.6683, + "step": 268610 + }, + { + "epoch": 0.5426293951526562, + "grad_norm": 374.7540588378906, + "learning_rate": 5.2226303834044036e-06, + "loss": 11.2769, + "step": 268620 + }, + { + "epoch": 0.54264959578534, + "grad_norm": 565.6763916015625, + "learning_rate": 5.22228166320743e-06, + "loss": 16.972, + "step": 268630 + }, + { + "epoch": 0.5426697964180238, + "grad_norm": 792.1552124023438, + "learning_rate": 5.2219329419270825e-06, + "loss": 7.2791, + "step": 268640 + }, + { + "epoch": 0.5426899970507076, + "grad_norm": 504.08355712890625, + "learning_rate": 5.221584219565061e-06, + "loss": 20.0575, + "step": 268650 + }, + { + "epoch": 0.5427101976833915, + "grad_norm": 303.4017639160156, + "learning_rate": 5.221235496123064e-06, + "loss": 21.0303, + "step": 268660 + }, + { + "epoch": 0.5427303983160753, + "grad_norm": 837.9013671875, + "learning_rate": 5.220886771602793e-06, + "loss": 10.3367, + "step": 268670 + }, + { + "epoch": 0.5427505989487591, + "grad_norm": 13.7284517288208, + "learning_rate": 5.2205380460059466e-06, + "loss": 14.3829, + "step": 268680 + }, + { + "epoch": 0.5427707995814429, + "grad_norm": 620.3934326171875, + "learning_rate": 5.2201893193342234e-06, + "loss": 16.6033, + "step": 268690 + }, + { + "epoch": 0.5427910002141267, + "grad_norm": 307.4211120605469, + "learning_rate": 5.219840591589325e-06, + "loss": 15.6857, + "step": 268700 + }, + { + "epoch": 0.5428112008468106, + "grad_norm": 263.71844482421875, + "learning_rate": 5.21949186277295e-06, + "loss": 17.0162, + "step": 268710 + }, + { + "epoch": 0.5428314014794944, + "grad_norm": 777.85791015625, + "learning_rate": 5.219143132886799e-06, + "loss": 31.5894, + "step": 268720 + }, + { + "epoch": 0.5428516021121782, + "grad_norm": 820.7977294921875, + "learning_rate": 5.218794401932571e-06, + "loss": 22.0244, + "step": 268730 + }, + { + "epoch": 0.542871802744862, + "grad_norm": 204.06298828125, + "learning_rate": 5.218445669911964e-06, + "loss": 11.2491, + "step": 268740 + }, + { + "epoch": 0.5428920033775458, + "grad_norm": 285.7690124511719, + "learning_rate": 5.218096936826681e-06, + "loss": 28.146, + "step": 268750 + }, + { + "epoch": 0.5429122040102295, + "grad_norm": 364.461181640625, + "learning_rate": 5.21774820267842e-06, + "loss": 18.1274, + "step": 268760 + }, + { + "epoch": 0.5429324046429134, + "grad_norm": 359.6499938964844, + "learning_rate": 5.2173994674688786e-06, + "loss": 13.214, + "step": 268770 + }, + { + "epoch": 0.5429526052755972, + "grad_norm": 197.79251098632812, + "learning_rate": 5.2170507311997605e-06, + "loss": 27.0702, + "step": 268780 + }, + { + "epoch": 0.542972805908281, + "grad_norm": 292.5074768066406, + "learning_rate": 5.216701993872763e-06, + "loss": 26.8686, + "step": 268790 + }, + { + "epoch": 0.5429930065409648, + "grad_norm": 40.50156021118164, + "learning_rate": 5.216353255489586e-06, + "loss": 16.3967, + "step": 268800 + }, + { + "epoch": 0.5430132071736486, + "grad_norm": 1.1966899633407593, + "learning_rate": 5.21600451605193e-06, + "loss": 8.233, + "step": 268810 + }, + { + "epoch": 0.5430334078063325, + "grad_norm": 549.1659545898438, + "learning_rate": 5.215655775561493e-06, + "loss": 19.3528, + "step": 268820 + }, + { + "epoch": 0.5430536084390163, + "grad_norm": 483.72149658203125, + "learning_rate": 5.215307034019977e-06, + "loss": 14.8691, + "step": 268830 + }, + { + "epoch": 0.5430738090717001, + "grad_norm": 265.9928283691406, + "learning_rate": 5.214958291429079e-06, + "loss": 17.7766, + "step": 268840 + }, + { + "epoch": 0.5430940097043839, + "grad_norm": 193.4778289794922, + "learning_rate": 5.214609547790504e-06, + "loss": 22.8305, + "step": 268850 + }, + { + "epoch": 0.5431142103370677, + "grad_norm": 284.76556396484375, + "learning_rate": 5.214260803105945e-06, + "loss": 25.5298, + "step": 268860 + }, + { + "epoch": 0.5431344109697516, + "grad_norm": 406.9671936035156, + "learning_rate": 5.213912057377105e-06, + "loss": 19.4018, + "step": 268870 + }, + { + "epoch": 0.5431546116024354, + "grad_norm": 384.6708679199219, + "learning_rate": 5.213563310605686e-06, + "loss": 10.0793, + "step": 268880 + }, + { + "epoch": 0.5431748122351192, + "grad_norm": 449.76385498046875, + "learning_rate": 5.213214562793383e-06, + "loss": 24.3866, + "step": 268890 + }, + { + "epoch": 0.543195012867803, + "grad_norm": 897.1937255859375, + "learning_rate": 5.212865813941899e-06, + "loss": 26.1392, + "step": 268900 + }, + { + "epoch": 0.5432152135004868, + "grad_norm": 268.6221923828125, + "learning_rate": 5.2125170640529325e-06, + "loss": 17.073, + "step": 268910 + }, + { + "epoch": 0.5432354141331707, + "grad_norm": 9.538450241088867, + "learning_rate": 5.212168313128183e-06, + "loss": 14.7929, + "step": 268920 + }, + { + "epoch": 0.5432556147658545, + "grad_norm": 189.12136840820312, + "learning_rate": 5.2118195611693515e-06, + "loss": 23.0357, + "step": 268930 + }, + { + "epoch": 0.5432758153985383, + "grad_norm": 558.6755981445312, + "learning_rate": 5.211470808178137e-06, + "loss": 17.7658, + "step": 268940 + }, + { + "epoch": 0.5432960160312221, + "grad_norm": 303.1871032714844, + "learning_rate": 5.21112205415624e-06, + "loss": 12.9415, + "step": 268950 + }, + { + "epoch": 0.543316216663906, + "grad_norm": 241.51431274414062, + "learning_rate": 5.210773299105358e-06, + "loss": 18.8631, + "step": 268960 + }, + { + "epoch": 0.5433364172965898, + "grad_norm": 963.1439208984375, + "learning_rate": 5.210424543027195e-06, + "loss": 28.2425, + "step": 268970 + }, + { + "epoch": 0.5433566179292736, + "grad_norm": 170.69017028808594, + "learning_rate": 5.210075785923446e-06, + "loss": 13.1179, + "step": 268980 + }, + { + "epoch": 0.5433768185619574, + "grad_norm": 865.9637451171875, + "learning_rate": 5.209727027795816e-06, + "loss": 24.2814, + "step": 268990 + }, + { + "epoch": 0.5433970191946412, + "grad_norm": 271.4307861328125, + "learning_rate": 5.209378268645998e-06, + "loss": 31.9874, + "step": 269000 + }, + { + "epoch": 0.5434172198273249, + "grad_norm": 139.47418212890625, + "learning_rate": 5.209029508475699e-06, + "loss": 17.2439, + "step": 269010 + }, + { + "epoch": 0.5434374204600088, + "grad_norm": 471.4844665527344, + "learning_rate": 5.208680747286614e-06, + "loss": 16.2661, + "step": 269020 + }, + { + "epoch": 0.5434576210926926, + "grad_norm": 422.0306701660156, + "learning_rate": 5.2083319850804445e-06, + "loss": 17.746, + "step": 269030 + }, + { + "epoch": 0.5434778217253764, + "grad_norm": 225.77320861816406, + "learning_rate": 5.20798322185889e-06, + "loss": 18.9828, + "step": 269040 + }, + { + "epoch": 0.5434980223580602, + "grad_norm": 206.78524780273438, + "learning_rate": 5.207634457623652e-06, + "loss": 15.9055, + "step": 269050 + }, + { + "epoch": 0.543518222990744, + "grad_norm": 135.02735900878906, + "learning_rate": 5.207285692376427e-06, + "loss": 14.0669, + "step": 269060 + }, + { + "epoch": 0.5435384236234279, + "grad_norm": 956.4881591796875, + "learning_rate": 5.206936926118917e-06, + "loss": 38.2816, + "step": 269070 + }, + { + "epoch": 0.5435586242561117, + "grad_norm": 196.66917419433594, + "learning_rate": 5.206588158852822e-06, + "loss": 11.8373, + "step": 269080 + }, + { + "epoch": 0.5435788248887955, + "grad_norm": 289.5069274902344, + "learning_rate": 5.206239390579842e-06, + "loss": 8.7243, + "step": 269090 + }, + { + "epoch": 0.5435990255214793, + "grad_norm": 179.99478149414062, + "learning_rate": 5.205890621301676e-06, + "loss": 11.5368, + "step": 269100 + }, + { + "epoch": 0.5436192261541631, + "grad_norm": 248.57186889648438, + "learning_rate": 5.205541851020022e-06, + "loss": 11.7105, + "step": 269110 + }, + { + "epoch": 0.543639426786847, + "grad_norm": 318.1573486328125, + "learning_rate": 5.205193079736584e-06, + "loss": 20.7421, + "step": 269120 + }, + { + "epoch": 0.5436596274195308, + "grad_norm": 1006.7952270507812, + "learning_rate": 5.204844307453059e-06, + "loss": 25.0289, + "step": 269130 + }, + { + "epoch": 0.5436798280522146, + "grad_norm": 522.5182495117188, + "learning_rate": 5.204495534171148e-06, + "loss": 16.6759, + "step": 269140 + }, + { + "epoch": 0.5437000286848984, + "grad_norm": 107.71540832519531, + "learning_rate": 5.204146759892551e-06, + "loss": 23.3373, + "step": 269150 + }, + { + "epoch": 0.5437202293175822, + "grad_norm": 685.9620971679688, + "learning_rate": 5.2037979846189655e-06, + "loss": 12.0589, + "step": 269160 + }, + { + "epoch": 0.5437404299502661, + "grad_norm": 2979.21728515625, + "learning_rate": 5.203449208352096e-06, + "loss": 28.0118, + "step": 269170 + }, + { + "epoch": 0.5437606305829499, + "grad_norm": 626.6129150390625, + "learning_rate": 5.203100431093638e-06, + "loss": 23.7391, + "step": 269180 + }, + { + "epoch": 0.5437808312156337, + "grad_norm": 168.13368225097656, + "learning_rate": 5.202751652845294e-06, + "loss": 20.9722, + "step": 269190 + }, + { + "epoch": 0.5438010318483175, + "grad_norm": 336.0380554199219, + "learning_rate": 5.202402873608763e-06, + "loss": 38.7637, + "step": 269200 + }, + { + "epoch": 0.5438212324810013, + "grad_norm": 485.0022888183594, + "learning_rate": 5.2020540933857455e-06, + "loss": 27.5945, + "step": 269210 + }, + { + "epoch": 0.5438414331136852, + "grad_norm": 364.8822021484375, + "learning_rate": 5.201705312177939e-06, + "loss": 12.1223, + "step": 269220 + }, + { + "epoch": 0.543861633746369, + "grad_norm": 606.0297241210938, + "learning_rate": 5.2013565299870475e-06, + "loss": 18.6942, + "step": 269230 + }, + { + "epoch": 0.5438818343790528, + "grad_norm": 76.48331451416016, + "learning_rate": 5.201007746814767e-06, + "loss": 16.9994, + "step": 269240 + }, + { + "epoch": 0.5439020350117366, + "grad_norm": 617.9075317382812, + "learning_rate": 5.200658962662799e-06, + "loss": 9.8852, + "step": 269250 + }, + { + "epoch": 0.5439222356444204, + "grad_norm": 290.7354736328125, + "learning_rate": 5.2003101775328455e-06, + "loss": 22.7082, + "step": 269260 + }, + { + "epoch": 0.5439424362771041, + "grad_norm": 322.8319396972656, + "learning_rate": 5.199961391426601e-06, + "loss": 22.3802, + "step": 269270 + }, + { + "epoch": 0.543962636909788, + "grad_norm": 250.70684814453125, + "learning_rate": 5.199612604345773e-06, + "loss": 12.3198, + "step": 269280 + }, + { + "epoch": 0.5439828375424718, + "grad_norm": 176.34652709960938, + "learning_rate": 5.199263816292055e-06, + "loss": 15.4487, + "step": 269290 + }, + { + "epoch": 0.5440030381751556, + "grad_norm": 1224.541015625, + "learning_rate": 5.19891502726715e-06, + "loss": 32.8499, + "step": 269300 + }, + { + "epoch": 0.5440232388078394, + "grad_norm": 406.5760498046875, + "learning_rate": 5.198566237272757e-06, + "loss": 17.3707, + "step": 269310 + }, + { + "epoch": 0.5440434394405232, + "grad_norm": 221.11219787597656, + "learning_rate": 5.198217446310576e-06, + "loss": 10.185, + "step": 269320 + }, + { + "epoch": 0.5440636400732071, + "grad_norm": 830.6646118164062, + "learning_rate": 5.197868654382307e-06, + "loss": 34.4302, + "step": 269330 + }, + { + "epoch": 0.5440838407058909, + "grad_norm": 111.73726654052734, + "learning_rate": 5.197519861489652e-06, + "loss": 17.7608, + "step": 269340 + }, + { + "epoch": 0.5441040413385747, + "grad_norm": 180.2215576171875, + "learning_rate": 5.197171067634307e-06, + "loss": 18.8368, + "step": 269350 + }, + { + "epoch": 0.5441242419712585, + "grad_norm": 1184.9173583984375, + "learning_rate": 5.196822272817975e-06, + "loss": 36.22, + "step": 269360 + }, + { + "epoch": 0.5441444426039423, + "grad_norm": 233.44515991210938, + "learning_rate": 5.196473477042355e-06, + "loss": 24.8944, + "step": 269370 + }, + { + "epoch": 0.5441646432366262, + "grad_norm": 394.85345458984375, + "learning_rate": 5.196124680309148e-06, + "loss": 18.5167, + "step": 269380 + }, + { + "epoch": 0.54418484386931, + "grad_norm": 300.61065673828125, + "learning_rate": 5.1957758826200525e-06, + "loss": 31.084, + "step": 269390 + }, + { + "epoch": 0.5442050445019938, + "grad_norm": 208.00262451171875, + "learning_rate": 5.195427083976768e-06, + "loss": 12.3589, + "step": 269400 + }, + { + "epoch": 0.5442252451346776, + "grad_norm": 1135.732666015625, + "learning_rate": 5.195078284380996e-06, + "loss": 37.1302, + "step": 269410 + }, + { + "epoch": 0.5442454457673614, + "grad_norm": 279.19293212890625, + "learning_rate": 5.194729483834438e-06, + "loss": 23.9411, + "step": 269420 + }, + { + "epoch": 0.5442656464000453, + "grad_norm": 68.35921478271484, + "learning_rate": 5.19438068233879e-06, + "loss": 20.2188, + "step": 269430 + }, + { + "epoch": 0.5442858470327291, + "grad_norm": 842.069091796875, + "learning_rate": 5.194031879895756e-06, + "loss": 19.3563, + "step": 269440 + }, + { + "epoch": 0.5443060476654129, + "grad_norm": 885.3028564453125, + "learning_rate": 5.193683076507031e-06, + "loss": 20.23, + "step": 269450 + }, + { + "epoch": 0.5443262482980967, + "grad_norm": 429.1953125, + "learning_rate": 5.193334272174321e-06, + "loss": 29.8051, + "step": 269460 + }, + { + "epoch": 0.5443464489307805, + "grad_norm": 422.0041809082031, + "learning_rate": 5.192985466899323e-06, + "loss": 23.2527, + "step": 269470 + }, + { + "epoch": 0.5443666495634644, + "grad_norm": 176.17156982421875, + "learning_rate": 5.1926366606837365e-06, + "loss": 23.1212, + "step": 269480 + }, + { + "epoch": 0.5443868501961482, + "grad_norm": 359.56170654296875, + "learning_rate": 5.192287853529263e-06, + "loss": 21.667, + "step": 269490 + }, + { + "epoch": 0.544407050828832, + "grad_norm": 124.85405731201172, + "learning_rate": 5.1919390454376e-06, + "loss": 16.2355, + "step": 269500 + }, + { + "epoch": 0.5444272514615158, + "grad_norm": 139.08444213867188, + "learning_rate": 5.191590236410451e-06, + "loss": 21.6483, + "step": 269510 + }, + { + "epoch": 0.5444474520941995, + "grad_norm": 316.3241882324219, + "learning_rate": 5.191241426449513e-06, + "loss": 18.8722, + "step": 269520 + }, + { + "epoch": 0.5444676527268834, + "grad_norm": 696.6727905273438, + "learning_rate": 5.1908926155564885e-06, + "loss": 30.8763, + "step": 269530 + }, + { + "epoch": 0.5444878533595672, + "grad_norm": 32.10171127319336, + "learning_rate": 5.190543803733077e-06, + "loss": 16.6814, + "step": 269540 + }, + { + "epoch": 0.544508053992251, + "grad_norm": 391.8594055175781, + "learning_rate": 5.190194990980979e-06, + "loss": 14.8044, + "step": 269550 + }, + { + "epoch": 0.5445282546249348, + "grad_norm": 607.99755859375, + "learning_rate": 5.189846177301892e-06, + "loss": 27.7045, + "step": 269560 + }, + { + "epoch": 0.5445484552576186, + "grad_norm": 142.82421875, + "learning_rate": 5.189497362697518e-06, + "loss": 15.7861, + "step": 269570 + }, + { + "epoch": 0.5445686558903025, + "grad_norm": 652.6636352539062, + "learning_rate": 5.189148547169558e-06, + "loss": 24.9412, + "step": 269580 + }, + { + "epoch": 0.5445888565229863, + "grad_norm": 799.7808227539062, + "learning_rate": 5.188799730719708e-06, + "loss": 32.4974, + "step": 269590 + }, + { + "epoch": 0.5446090571556701, + "grad_norm": 546.4911499023438, + "learning_rate": 5.188450913349674e-06, + "loss": 20.361, + "step": 269600 + }, + { + "epoch": 0.5446292577883539, + "grad_norm": 428.3880615234375, + "learning_rate": 5.188102095061151e-06, + "loss": 22.8714, + "step": 269610 + }, + { + "epoch": 0.5446494584210377, + "grad_norm": 172.1936798095703, + "learning_rate": 5.187753275855843e-06, + "loss": 22.8166, + "step": 269620 + }, + { + "epoch": 0.5446696590537216, + "grad_norm": 164.95391845703125, + "learning_rate": 5.187404455735448e-06, + "loss": 25.7966, + "step": 269630 + }, + { + "epoch": 0.5446898596864054, + "grad_norm": 339.9266357421875, + "learning_rate": 5.187055634701664e-06, + "loss": 19.976, + "step": 269640 + }, + { + "epoch": 0.5447100603190892, + "grad_norm": 284.510986328125, + "learning_rate": 5.186706812756197e-06, + "loss": 20.0402, + "step": 269650 + }, + { + "epoch": 0.544730260951773, + "grad_norm": 332.6054382324219, + "learning_rate": 5.1863579899007424e-06, + "loss": 15.5388, + "step": 269660 + }, + { + "epoch": 0.5447504615844568, + "grad_norm": 222.80596923828125, + "learning_rate": 5.186009166137e-06, + "loss": 14.0953, + "step": 269670 + }, + { + "epoch": 0.5447706622171407, + "grad_norm": 301.080078125, + "learning_rate": 5.185660341466673e-06, + "loss": 14.633, + "step": 269680 + }, + { + "epoch": 0.5447908628498245, + "grad_norm": 150.59100341796875, + "learning_rate": 5.1853115158914595e-06, + "loss": 9.0634, + "step": 269690 + }, + { + "epoch": 0.5448110634825083, + "grad_norm": 7.729396820068359, + "learning_rate": 5.18496268941306e-06, + "loss": 11.2065, + "step": 269700 + }, + { + "epoch": 0.5448312641151921, + "grad_norm": 382.56707763671875, + "learning_rate": 5.1846138620331766e-06, + "loss": 11.5221, + "step": 269710 + }, + { + "epoch": 0.5448514647478759, + "grad_norm": 199.4328155517578, + "learning_rate": 5.184265033753506e-06, + "loss": 35.3915, + "step": 269720 + }, + { + "epoch": 0.5448716653805598, + "grad_norm": 404.68121337890625, + "learning_rate": 5.18391620457575e-06, + "loss": 26.1202, + "step": 269730 + }, + { + "epoch": 0.5448918660132436, + "grad_norm": 141.96356201171875, + "learning_rate": 5.183567374501608e-06, + "loss": 20.8223, + "step": 269740 + }, + { + "epoch": 0.5449120666459274, + "grad_norm": 345.89385986328125, + "learning_rate": 5.183218543532782e-06, + "loss": 9.901, + "step": 269750 + }, + { + "epoch": 0.5449322672786112, + "grad_norm": 273.4797058105469, + "learning_rate": 5.182869711670971e-06, + "loss": 17.5253, + "step": 269760 + }, + { + "epoch": 0.544952467911295, + "grad_norm": 746.3378295898438, + "learning_rate": 5.182520878917874e-06, + "loss": 24.2267, + "step": 269770 + }, + { + "epoch": 0.5449726685439787, + "grad_norm": 607.471435546875, + "learning_rate": 5.1821720452751945e-06, + "loss": 28.7323, + "step": 269780 + }, + { + "epoch": 0.5449928691766626, + "grad_norm": 164.28695678710938, + "learning_rate": 5.181823210744629e-06, + "loss": 25.6031, + "step": 269790 + }, + { + "epoch": 0.5450130698093464, + "grad_norm": 321.724609375, + "learning_rate": 5.18147437532788e-06, + "loss": 15.5116, + "step": 269800 + }, + { + "epoch": 0.5450332704420302, + "grad_norm": 240.0693359375, + "learning_rate": 5.181125539026646e-06, + "loss": 14.679, + "step": 269810 + }, + { + "epoch": 0.545053471074714, + "grad_norm": 276.6699523925781, + "learning_rate": 5.180776701842629e-06, + "loss": 6.2979, + "step": 269820 + }, + { + "epoch": 0.5450736717073978, + "grad_norm": 476.1277770996094, + "learning_rate": 5.180427863777528e-06, + "loss": 21.9404, + "step": 269830 + }, + { + "epoch": 0.5450938723400817, + "grad_norm": 945.3489990234375, + "learning_rate": 5.180079024833043e-06, + "loss": 16.7016, + "step": 269840 + }, + { + "epoch": 0.5451140729727655, + "grad_norm": 468.953125, + "learning_rate": 5.179730185010875e-06, + "loss": 23.8215, + "step": 269850 + }, + { + "epoch": 0.5451342736054493, + "grad_norm": 0.0, + "learning_rate": 5.179381344312724e-06, + "loss": 16.315, + "step": 269860 + }, + { + "epoch": 0.5451544742381331, + "grad_norm": 127.08235931396484, + "learning_rate": 5.179032502740291e-06, + "loss": 20.8154, + "step": 269870 + }, + { + "epoch": 0.5451746748708169, + "grad_norm": 334.3542175292969, + "learning_rate": 5.178683660295273e-06, + "loss": 24.2088, + "step": 269880 + }, + { + "epoch": 0.5451948755035008, + "grad_norm": 500.1344909667969, + "learning_rate": 5.178334816979374e-06, + "loss": 20.7751, + "step": 269890 + }, + { + "epoch": 0.5452150761361846, + "grad_norm": 343.8796691894531, + "learning_rate": 5.177985972794293e-06, + "loss": 20.277, + "step": 269900 + }, + { + "epoch": 0.5452352767688684, + "grad_norm": 111.69133758544922, + "learning_rate": 5.177637127741729e-06, + "loss": 21.7036, + "step": 269910 + }, + { + "epoch": 0.5452554774015522, + "grad_norm": 1137.2344970703125, + "learning_rate": 5.177288281823385e-06, + "loss": 34.2687, + "step": 269920 + }, + { + "epoch": 0.545275678034236, + "grad_norm": 1319.4700927734375, + "learning_rate": 5.176939435040958e-06, + "loss": 34.8743, + "step": 269930 + }, + { + "epoch": 0.5452958786669199, + "grad_norm": 269.3182067871094, + "learning_rate": 5.17659058739615e-06, + "loss": 24.8758, + "step": 269940 + }, + { + "epoch": 0.5453160792996037, + "grad_norm": 627.1061401367188, + "learning_rate": 5.17624173889066e-06, + "loss": 19.9231, + "step": 269950 + }, + { + "epoch": 0.5453362799322875, + "grad_norm": 83.94268035888672, + "learning_rate": 5.175892889526189e-06, + "loss": 13.5603, + "step": 269960 + }, + { + "epoch": 0.5453564805649713, + "grad_norm": 182.0495147705078, + "learning_rate": 5.175544039304439e-06, + "loss": 18.6815, + "step": 269970 + }, + { + "epoch": 0.5453766811976551, + "grad_norm": 352.86236572265625, + "learning_rate": 5.175195188227108e-06, + "loss": 29.8947, + "step": 269980 + }, + { + "epoch": 0.545396881830339, + "grad_norm": 341.9924011230469, + "learning_rate": 5.174846336295897e-06, + "loss": 22.6534, + "step": 269990 + }, + { + "epoch": 0.5454170824630228, + "grad_norm": 563.7020874023438, + "learning_rate": 5.174497483512506e-06, + "loss": 15.0685, + "step": 270000 + }, + { + "epoch": 0.5454372830957066, + "grad_norm": 606.5368041992188, + "learning_rate": 5.174148629878635e-06, + "loss": 31.138, + "step": 270010 + }, + { + "epoch": 0.5454574837283904, + "grad_norm": 295.58697509765625, + "learning_rate": 5.1737997753959846e-06, + "loss": 30.6939, + "step": 270020 + }, + { + "epoch": 0.5454776843610742, + "grad_norm": 554.0674438476562, + "learning_rate": 5.173450920066256e-06, + "loss": 38.5939, + "step": 270030 + }, + { + "epoch": 0.545497884993758, + "grad_norm": 462.5925598144531, + "learning_rate": 5.173102063891148e-06, + "loss": 14.4654, + "step": 270040 + }, + { + "epoch": 0.5455180856264418, + "grad_norm": 452.27740478515625, + "learning_rate": 5.172753206872363e-06, + "loss": 24.4104, + "step": 270050 + }, + { + "epoch": 0.5455382862591256, + "grad_norm": 256.5585632324219, + "learning_rate": 5.172404349011599e-06, + "loss": 21.2853, + "step": 270060 + }, + { + "epoch": 0.5455584868918094, + "grad_norm": 289.0415344238281, + "learning_rate": 5.172055490310555e-06, + "loss": 22.3098, + "step": 270070 + }, + { + "epoch": 0.5455786875244932, + "grad_norm": 336.8205871582031, + "learning_rate": 5.171706630770935e-06, + "loss": 14.1378, + "step": 270080 + }, + { + "epoch": 0.545598888157177, + "grad_norm": 336.18194580078125, + "learning_rate": 5.171357770394439e-06, + "loss": 14.374, + "step": 270090 + }, + { + "epoch": 0.5456190887898609, + "grad_norm": 344.4145812988281, + "learning_rate": 5.171008909182765e-06, + "loss": 19.6497, + "step": 270100 + }, + { + "epoch": 0.5456392894225447, + "grad_norm": 236.5043182373047, + "learning_rate": 5.170660047137613e-06, + "loss": 28.6461, + "step": 270110 + }, + { + "epoch": 0.5456594900552285, + "grad_norm": 1888.2957763671875, + "learning_rate": 5.1703111842606864e-06, + "loss": 15.333, + "step": 270120 + }, + { + "epoch": 0.5456796906879123, + "grad_norm": 143.0895233154297, + "learning_rate": 5.169962320553683e-06, + "loss": 17.241, + "step": 270130 + }, + { + "epoch": 0.5456998913205962, + "grad_norm": 0.0, + "learning_rate": 5.1696134560183045e-06, + "loss": 12.0756, + "step": 270140 + }, + { + "epoch": 0.54572009195328, + "grad_norm": 269.81671142578125, + "learning_rate": 5.169264590656249e-06, + "loss": 17.0617, + "step": 270150 + }, + { + "epoch": 0.5457402925859638, + "grad_norm": 704.2405395507812, + "learning_rate": 5.16891572446922e-06, + "loss": 23.0752, + "step": 270160 + }, + { + "epoch": 0.5457604932186476, + "grad_norm": 98.35591125488281, + "learning_rate": 5.168566857458917e-06, + "loss": 11.9061, + "step": 270170 + }, + { + "epoch": 0.5457806938513314, + "grad_norm": 455.7306213378906, + "learning_rate": 5.168217989627037e-06, + "loss": 16.8541, + "step": 270180 + }, + { + "epoch": 0.5458008944840153, + "grad_norm": 530.9266357421875, + "learning_rate": 5.1678691209752855e-06, + "loss": 16.3598, + "step": 270190 + }, + { + "epoch": 0.5458210951166991, + "grad_norm": 88.47686767578125, + "learning_rate": 5.167520251505358e-06, + "loss": 8.2374, + "step": 270200 + }, + { + "epoch": 0.5458412957493829, + "grad_norm": 567.403076171875, + "learning_rate": 5.1671713812189585e-06, + "loss": 14.3183, + "step": 270210 + }, + { + "epoch": 0.5458614963820667, + "grad_norm": 599.0480346679688, + "learning_rate": 5.166822510117785e-06, + "loss": 28.233, + "step": 270220 + }, + { + "epoch": 0.5458816970147505, + "grad_norm": 457.58868408203125, + "learning_rate": 5.166473638203539e-06, + "loss": 21.6233, + "step": 270230 + }, + { + "epoch": 0.5459018976474344, + "grad_norm": 813.5020751953125, + "learning_rate": 5.166124765477923e-06, + "loss": 33.3464, + "step": 270240 + }, + { + "epoch": 0.5459220982801182, + "grad_norm": 288.6170959472656, + "learning_rate": 5.165775891942631e-06, + "loss": 24.956, + "step": 270250 + }, + { + "epoch": 0.545942298912802, + "grad_norm": 282.595703125, + "learning_rate": 5.165427017599371e-06, + "loss": 25.055, + "step": 270260 + }, + { + "epoch": 0.5459624995454858, + "grad_norm": 388.81134033203125, + "learning_rate": 5.1650781424498385e-06, + "loss": 20.2176, + "step": 270270 + }, + { + "epoch": 0.5459827001781696, + "grad_norm": 460.6811218261719, + "learning_rate": 5.164729266495735e-06, + "loss": 55.1902, + "step": 270280 + }, + { + "epoch": 0.5460029008108533, + "grad_norm": 466.9342956542969, + "learning_rate": 5.16438038973876e-06, + "loss": 37.4311, + "step": 270290 + }, + { + "epoch": 0.5460231014435372, + "grad_norm": 1034.878662109375, + "learning_rate": 5.164031512180616e-06, + "loss": 23.7974, + "step": 270300 + }, + { + "epoch": 0.546043302076221, + "grad_norm": 187.3511199951172, + "learning_rate": 5.163682633823003e-06, + "loss": 14.4381, + "step": 270310 + }, + { + "epoch": 0.5460635027089048, + "grad_norm": 494.9722900390625, + "learning_rate": 5.16333375466762e-06, + "loss": 22.032, + "step": 270320 + }, + { + "epoch": 0.5460837033415886, + "grad_norm": 14.278849601745605, + "learning_rate": 5.162984874716168e-06, + "loss": 26.7451, + "step": 270330 + }, + { + "epoch": 0.5461039039742724, + "grad_norm": 902.59521484375, + "learning_rate": 5.162635993970347e-06, + "loss": 17.8312, + "step": 270340 + }, + { + "epoch": 0.5461241046069563, + "grad_norm": 305.71771240234375, + "learning_rate": 5.162287112431858e-06, + "loss": 12.0388, + "step": 270350 + }, + { + "epoch": 0.5461443052396401, + "grad_norm": 253.86412048339844, + "learning_rate": 5.1619382301024025e-06, + "loss": 16.5757, + "step": 270360 + }, + { + "epoch": 0.5461645058723239, + "grad_norm": 260.5821228027344, + "learning_rate": 5.16158934698368e-06, + "loss": 15.3401, + "step": 270370 + }, + { + "epoch": 0.5461847065050077, + "grad_norm": 229.546875, + "learning_rate": 5.161240463077387e-06, + "loss": 25.9571, + "step": 270380 + }, + { + "epoch": 0.5462049071376915, + "grad_norm": 382.9422607421875, + "learning_rate": 5.160891578385232e-06, + "loss": 13.2796, + "step": 270390 + }, + { + "epoch": 0.5462251077703754, + "grad_norm": 537.4158325195312, + "learning_rate": 5.160542692908909e-06, + "loss": 20.2076, + "step": 270400 + }, + { + "epoch": 0.5462453084030592, + "grad_norm": 308.6058349609375, + "learning_rate": 5.16019380665012e-06, + "loss": 8.9912, + "step": 270410 + }, + { + "epoch": 0.546265509035743, + "grad_norm": 621.6725463867188, + "learning_rate": 5.159844919610566e-06, + "loss": 25.2461, + "step": 270420 + }, + { + "epoch": 0.5462857096684268, + "grad_norm": 643.1211547851562, + "learning_rate": 5.159496031791947e-06, + "loss": 37.0699, + "step": 270430 + }, + { + "epoch": 0.5463059103011106, + "grad_norm": 235.88206481933594, + "learning_rate": 5.159147143195965e-06, + "loss": 14.9954, + "step": 270440 + }, + { + "epoch": 0.5463261109337945, + "grad_norm": 227.0337371826172, + "learning_rate": 5.158798253824319e-06, + "loss": 13.3651, + "step": 270450 + }, + { + "epoch": 0.5463463115664783, + "grad_norm": 0.0, + "learning_rate": 5.158449363678708e-06, + "loss": 25.9175, + "step": 270460 + }, + { + "epoch": 0.5463665121991621, + "grad_norm": 153.15687561035156, + "learning_rate": 5.1581004727608345e-06, + "loss": 6.5073, + "step": 270470 + }, + { + "epoch": 0.5463867128318459, + "grad_norm": 160.54690551757812, + "learning_rate": 5.1577515810724e-06, + "loss": 18.2212, + "step": 270480 + }, + { + "epoch": 0.5464069134645297, + "grad_norm": 191.25054931640625, + "learning_rate": 5.1574026886151005e-06, + "loss": 29.0367, + "step": 270490 + }, + { + "epoch": 0.5464271140972136, + "grad_norm": 418.3304748535156, + "learning_rate": 5.157053795390642e-06, + "loss": 22.2465, + "step": 270500 + }, + { + "epoch": 0.5464473147298974, + "grad_norm": 260.75054931640625, + "learning_rate": 5.156704901400722e-06, + "loss": 23.1032, + "step": 270510 + }, + { + "epoch": 0.5464675153625812, + "grad_norm": 302.0773620605469, + "learning_rate": 5.156356006647041e-06, + "loss": 10.0188, + "step": 270520 + }, + { + "epoch": 0.546487715995265, + "grad_norm": 492.5730895996094, + "learning_rate": 5.156007111131301e-06, + "loss": 12.8026, + "step": 270530 + }, + { + "epoch": 0.5465079166279488, + "grad_norm": 380.69659423828125, + "learning_rate": 5.155658214855197e-06, + "loss": 11.8795, + "step": 270540 + }, + { + "epoch": 0.5465281172606326, + "grad_norm": 1181.2581787109375, + "learning_rate": 5.155309317820438e-06, + "loss": 15.8361, + "step": 270550 + }, + { + "epoch": 0.5465483178933164, + "grad_norm": 564.9685668945312, + "learning_rate": 5.154960420028718e-06, + "loss": 17.8807, + "step": 270560 + }, + { + "epoch": 0.5465685185260002, + "grad_norm": 392.8375549316406, + "learning_rate": 5.154611521481742e-06, + "loss": 16.7477, + "step": 270570 + }, + { + "epoch": 0.546588719158684, + "grad_norm": 645.0961303710938, + "learning_rate": 5.154262622181205e-06, + "loss": 25.7439, + "step": 270580 + }, + { + "epoch": 0.5466089197913678, + "grad_norm": 239.34262084960938, + "learning_rate": 5.153913722128813e-06, + "loss": 36.698, + "step": 270590 + }, + { + "epoch": 0.5466291204240517, + "grad_norm": 254.29592895507812, + "learning_rate": 5.153564821326265e-06, + "loss": 13.964, + "step": 270600 + }, + { + "epoch": 0.5466493210567355, + "grad_norm": 125.31671142578125, + "learning_rate": 5.153215919775259e-06, + "loss": 12.2003, + "step": 270610 + }, + { + "epoch": 0.5466695216894193, + "grad_norm": 204.45960998535156, + "learning_rate": 5.1528670174774965e-06, + "loss": 20.8455, + "step": 270620 + }, + { + "epoch": 0.5466897223221031, + "grad_norm": 798.116943359375, + "learning_rate": 5.15251811443468e-06, + "loss": 24.2493, + "step": 270630 + }, + { + "epoch": 0.5467099229547869, + "grad_norm": 1514.6075439453125, + "learning_rate": 5.152169210648509e-06, + "loss": 20.5565, + "step": 270640 + }, + { + "epoch": 0.5467301235874708, + "grad_norm": 501.8855285644531, + "learning_rate": 5.151820306120682e-06, + "loss": 14.8503, + "step": 270650 + }, + { + "epoch": 0.5467503242201546, + "grad_norm": 257.0301513671875, + "learning_rate": 5.151471400852903e-06, + "loss": 43.7242, + "step": 270660 + }, + { + "epoch": 0.5467705248528384, + "grad_norm": 514.08349609375, + "learning_rate": 5.15112249484687e-06, + "loss": 17.7152, + "step": 270670 + }, + { + "epoch": 0.5467907254855222, + "grad_norm": 753.3203125, + "learning_rate": 5.150773588104284e-06, + "loss": 24.4477, + "step": 270680 + }, + { + "epoch": 0.546810926118206, + "grad_norm": 124.55496978759766, + "learning_rate": 5.150424680626846e-06, + "loss": 23.9681, + "step": 270690 + }, + { + "epoch": 0.5468311267508899, + "grad_norm": 154.38717651367188, + "learning_rate": 5.150075772416256e-06, + "loss": 21.9183, + "step": 270700 + }, + { + "epoch": 0.5468513273835737, + "grad_norm": 116.17897033691406, + "learning_rate": 5.149726863474217e-06, + "loss": 10.05, + "step": 270710 + }, + { + "epoch": 0.5468715280162575, + "grad_norm": 372.535888671875, + "learning_rate": 5.149377953802426e-06, + "loss": 27.1142, + "step": 270720 + }, + { + "epoch": 0.5468917286489413, + "grad_norm": 397.5634765625, + "learning_rate": 5.149029043402584e-06, + "loss": 14.8103, + "step": 270730 + }, + { + "epoch": 0.5469119292816251, + "grad_norm": 167.58265686035156, + "learning_rate": 5.1486801322763935e-06, + "loss": 9.2214, + "step": 270740 + }, + { + "epoch": 0.546932129914309, + "grad_norm": 442.3593444824219, + "learning_rate": 5.148331220425554e-06, + "loss": 24.5757, + "step": 270750 + }, + { + "epoch": 0.5469523305469928, + "grad_norm": 278.6236267089844, + "learning_rate": 5.147982307851766e-06, + "loss": 23.799, + "step": 270760 + }, + { + "epoch": 0.5469725311796766, + "grad_norm": 266.60107421875, + "learning_rate": 5.147633394556731e-06, + "loss": 19.6006, + "step": 270770 + }, + { + "epoch": 0.5469927318123604, + "grad_norm": 565.9557495117188, + "learning_rate": 5.147284480542149e-06, + "loss": 27.3513, + "step": 270780 + }, + { + "epoch": 0.5470129324450442, + "grad_norm": 403.841064453125, + "learning_rate": 5.1469355658097186e-06, + "loss": 36.6437, + "step": 270790 + }, + { + "epoch": 0.5470331330777279, + "grad_norm": 257.75042724609375, + "learning_rate": 5.146586650361143e-06, + "loss": 11.5962, + "step": 270800 + }, + { + "epoch": 0.5470533337104118, + "grad_norm": 434.3193054199219, + "learning_rate": 5.146237734198121e-06, + "loss": 24.7431, + "step": 270810 + }, + { + "epoch": 0.5470735343430956, + "grad_norm": 113.63842010498047, + "learning_rate": 5.145888817322355e-06, + "loss": 5.8752, + "step": 270820 + }, + { + "epoch": 0.5470937349757794, + "grad_norm": 305.0410461425781, + "learning_rate": 5.145539899735543e-06, + "loss": 14.7208, + "step": 270830 + }, + { + "epoch": 0.5471139356084632, + "grad_norm": 384.3434143066406, + "learning_rate": 5.1451909814393895e-06, + "loss": 10.4676, + "step": 270840 + }, + { + "epoch": 0.547134136241147, + "grad_norm": 13.002974510192871, + "learning_rate": 5.144842062435591e-06, + "loss": 21.7042, + "step": 270850 + }, + { + "epoch": 0.5471543368738309, + "grad_norm": 137.16197204589844, + "learning_rate": 5.144493142725851e-06, + "loss": 21.789, + "step": 270860 + }, + { + "epoch": 0.5471745375065147, + "grad_norm": 8.841368675231934, + "learning_rate": 5.144144222311868e-06, + "loss": 10.6037, + "step": 270870 + }, + { + "epoch": 0.5471947381391985, + "grad_norm": 352.5816345214844, + "learning_rate": 5.143795301195343e-06, + "loss": 17.2547, + "step": 270880 + }, + { + "epoch": 0.5472149387718823, + "grad_norm": 1420.1578369140625, + "learning_rate": 5.1434463793779795e-06, + "loss": 13.2471, + "step": 270890 + }, + { + "epoch": 0.5472351394045661, + "grad_norm": 334.9108581542969, + "learning_rate": 5.143097456861474e-06, + "loss": 39.9207, + "step": 270900 + }, + { + "epoch": 0.54725534003725, + "grad_norm": 1152.33251953125, + "learning_rate": 5.14274853364753e-06, + "loss": 35.0431, + "step": 270910 + }, + { + "epoch": 0.5472755406699338, + "grad_norm": 319.8374938964844, + "learning_rate": 5.142399609737846e-06, + "loss": 13.7766, + "step": 270920 + }, + { + "epoch": 0.5472957413026176, + "grad_norm": 570.3477172851562, + "learning_rate": 5.142050685134124e-06, + "loss": 9.5086, + "step": 270930 + }, + { + "epoch": 0.5473159419353014, + "grad_norm": 504.3333740234375, + "learning_rate": 5.141701759838065e-06, + "loss": 31.4781, + "step": 270940 + }, + { + "epoch": 0.5473361425679852, + "grad_norm": 715.6351928710938, + "learning_rate": 5.141352833851367e-06, + "loss": 22.7826, + "step": 270950 + }, + { + "epoch": 0.5473563432006691, + "grad_norm": 93.32371520996094, + "learning_rate": 5.141003907175733e-06, + "loss": 14.7414, + "step": 270960 + }, + { + "epoch": 0.5473765438333529, + "grad_norm": 496.8685607910156, + "learning_rate": 5.140654979812864e-06, + "loss": 37.5867, + "step": 270970 + }, + { + "epoch": 0.5473967444660367, + "grad_norm": 646.5079956054688, + "learning_rate": 5.140306051764459e-06, + "loss": 16.9507, + "step": 270980 + }, + { + "epoch": 0.5474169450987205, + "grad_norm": 478.1387023925781, + "learning_rate": 5.139957123032217e-06, + "loss": 22.7514, + "step": 270990 + }, + { + "epoch": 0.5474371457314043, + "grad_norm": 152.84176635742188, + "learning_rate": 5.139608193617846e-06, + "loss": 13.7706, + "step": 271000 + }, + { + "epoch": 0.5474573463640882, + "grad_norm": 361.98858642578125, + "learning_rate": 5.139259263523038e-06, + "loss": 12.1614, + "step": 271010 + }, + { + "epoch": 0.547477546996772, + "grad_norm": 924.8574829101562, + "learning_rate": 5.138910332749499e-06, + "loss": 17.7725, + "step": 271020 + }, + { + "epoch": 0.5474977476294558, + "grad_norm": 611.1624145507812, + "learning_rate": 5.138561401298926e-06, + "loss": 23.9968, + "step": 271030 + }, + { + "epoch": 0.5475179482621396, + "grad_norm": 111.28363800048828, + "learning_rate": 5.138212469173022e-06, + "loss": 23.7445, + "step": 271040 + }, + { + "epoch": 0.5475381488948234, + "grad_norm": 335.9480285644531, + "learning_rate": 5.1378635363734884e-06, + "loss": 11.0445, + "step": 271050 + }, + { + "epoch": 0.5475583495275071, + "grad_norm": 195.91561889648438, + "learning_rate": 5.137514602902024e-06, + "loss": 17.046, + "step": 271060 + }, + { + "epoch": 0.547578550160191, + "grad_norm": 205.09426879882812, + "learning_rate": 5.13716566876033e-06, + "loss": 27.2901, + "step": 271070 + }, + { + "epoch": 0.5475987507928748, + "grad_norm": 89.74513244628906, + "learning_rate": 5.136816733950108e-06, + "loss": 20.4734, + "step": 271080 + }, + { + "epoch": 0.5476189514255586, + "grad_norm": 491.3321838378906, + "learning_rate": 5.136467798473057e-06, + "loss": 11.6179, + "step": 271090 + }, + { + "epoch": 0.5476391520582424, + "grad_norm": 633.1895751953125, + "learning_rate": 5.136118862330876e-06, + "loss": 34.9297, + "step": 271100 + }, + { + "epoch": 0.5476593526909262, + "grad_norm": 216.28421020507812, + "learning_rate": 5.135769925525272e-06, + "loss": 16.7076, + "step": 271110 + }, + { + "epoch": 0.5476795533236101, + "grad_norm": 574.0361328125, + "learning_rate": 5.135420988057941e-06, + "loss": 15.4687, + "step": 271120 + }, + { + "epoch": 0.5476997539562939, + "grad_norm": 326.55908203125, + "learning_rate": 5.135072049930584e-06, + "loss": 20.5921, + "step": 271130 + }, + { + "epoch": 0.5477199545889777, + "grad_norm": 225.47398376464844, + "learning_rate": 5.1347231111449034e-06, + "loss": 17.2656, + "step": 271140 + }, + { + "epoch": 0.5477401552216615, + "grad_norm": 268.95947265625, + "learning_rate": 5.134374171702596e-06, + "loss": 20.3391, + "step": 271150 + }, + { + "epoch": 0.5477603558543453, + "grad_norm": 530.3201904296875, + "learning_rate": 5.1340252316053686e-06, + "loss": 15.5422, + "step": 271160 + }, + { + "epoch": 0.5477805564870292, + "grad_norm": 383.7457580566406, + "learning_rate": 5.133676290854915e-06, + "loss": 24.9986, + "step": 271170 + }, + { + "epoch": 0.547800757119713, + "grad_norm": 243.0459442138672, + "learning_rate": 5.133327349452941e-06, + "loss": 16.5114, + "step": 271180 + }, + { + "epoch": 0.5478209577523968, + "grad_norm": 416.4145812988281, + "learning_rate": 5.1329784074011454e-06, + "loss": 22.5556, + "step": 271190 + }, + { + "epoch": 0.5478411583850806, + "grad_norm": 550.4816284179688, + "learning_rate": 5.13262946470123e-06, + "loss": 23.3909, + "step": 271200 + }, + { + "epoch": 0.5478613590177644, + "grad_norm": 329.26409912109375, + "learning_rate": 5.132280521354896e-06, + "loss": 45.6847, + "step": 271210 + }, + { + "epoch": 0.5478815596504483, + "grad_norm": 51.97325134277344, + "learning_rate": 5.13193157736384e-06, + "loss": 8.4784, + "step": 271220 + }, + { + "epoch": 0.5479017602831321, + "grad_norm": 607.20849609375, + "learning_rate": 5.131582632729766e-06, + "loss": 17.7073, + "step": 271230 + }, + { + "epoch": 0.5479219609158159, + "grad_norm": 425.3113098144531, + "learning_rate": 5.131233687454375e-06, + "loss": 20.6337, + "step": 271240 + }, + { + "epoch": 0.5479421615484997, + "grad_norm": 841.8544921875, + "learning_rate": 5.130884741539367e-06, + "loss": 25.0148, + "step": 271250 + }, + { + "epoch": 0.5479623621811835, + "grad_norm": 463.42962646484375, + "learning_rate": 5.130535794986441e-06, + "loss": 16.8616, + "step": 271260 + }, + { + "epoch": 0.5479825628138674, + "grad_norm": 357.28094482421875, + "learning_rate": 5.130186847797302e-06, + "loss": 22.4319, + "step": 271270 + }, + { + "epoch": 0.5480027634465512, + "grad_norm": 64.43353271484375, + "learning_rate": 5.1298378999736465e-06, + "loss": 10.2836, + "step": 271280 + }, + { + "epoch": 0.548022964079235, + "grad_norm": 245.54632568359375, + "learning_rate": 5.129488951517176e-06, + "loss": 14.4729, + "step": 271290 + }, + { + "epoch": 0.5480431647119188, + "grad_norm": 460.65496826171875, + "learning_rate": 5.1291400024295946e-06, + "loss": 15.8487, + "step": 271300 + }, + { + "epoch": 0.5480633653446025, + "grad_norm": 472.4771423339844, + "learning_rate": 5.128791052712597e-06, + "loss": 28.0166, + "step": 271310 + }, + { + "epoch": 0.5480835659772864, + "grad_norm": 399.5858154296875, + "learning_rate": 5.128442102367891e-06, + "loss": 20.5258, + "step": 271320 + }, + { + "epoch": 0.5481037666099702, + "grad_norm": 112.1563491821289, + "learning_rate": 5.128093151397172e-06, + "loss": 11.7805, + "step": 271330 + }, + { + "epoch": 0.548123967242654, + "grad_norm": 25.99222183227539, + "learning_rate": 5.127744199802143e-06, + "loss": 17.4676, + "step": 271340 + }, + { + "epoch": 0.5481441678753378, + "grad_norm": 237.99264526367188, + "learning_rate": 5.127395247584503e-06, + "loss": 14.9504, + "step": 271350 + }, + { + "epoch": 0.5481643685080216, + "grad_norm": 364.6976013183594, + "learning_rate": 5.127046294745955e-06, + "loss": 13.0167, + "step": 271360 + }, + { + "epoch": 0.5481845691407055, + "grad_norm": 189.3727264404297, + "learning_rate": 5.126697341288198e-06, + "loss": 16.8966, + "step": 271370 + }, + { + "epoch": 0.5482047697733893, + "grad_norm": 285.57855224609375, + "learning_rate": 5.126348387212935e-06, + "loss": 20.2623, + "step": 271380 + }, + { + "epoch": 0.5482249704060731, + "grad_norm": 0.8439492583274841, + "learning_rate": 5.125999432521864e-06, + "loss": 12.5823, + "step": 271390 + }, + { + "epoch": 0.5482451710387569, + "grad_norm": 262.3326416015625, + "learning_rate": 5.1256504772166885e-06, + "loss": 17.7855, + "step": 271400 + }, + { + "epoch": 0.5482653716714407, + "grad_norm": 997.421875, + "learning_rate": 5.125301521299107e-06, + "loss": 27.1073, + "step": 271410 + }, + { + "epoch": 0.5482855723041246, + "grad_norm": 450.3413391113281, + "learning_rate": 5.12495256477082e-06, + "loss": 16.3724, + "step": 271420 + }, + { + "epoch": 0.5483057729368084, + "grad_norm": 209.2963409423828, + "learning_rate": 5.124603607633532e-06, + "loss": 26.7913, + "step": 271430 + }, + { + "epoch": 0.5483259735694922, + "grad_norm": 647.6463012695312, + "learning_rate": 5.124254649888938e-06, + "loss": 22.9064, + "step": 271440 + }, + { + "epoch": 0.548346174202176, + "grad_norm": 273.53656005859375, + "learning_rate": 5.123905691538744e-06, + "loss": 20.3962, + "step": 271450 + }, + { + "epoch": 0.5483663748348598, + "grad_norm": 306.15765380859375, + "learning_rate": 5.123556732584648e-06, + "loss": 7.9716, + "step": 271460 + }, + { + "epoch": 0.5483865754675437, + "grad_norm": 337.2430419921875, + "learning_rate": 5.1232077730283515e-06, + "loss": 32.7838, + "step": 271470 + }, + { + "epoch": 0.5484067761002275, + "grad_norm": 330.0291748046875, + "learning_rate": 5.122858812871555e-06, + "loss": 38.0643, + "step": 271480 + }, + { + "epoch": 0.5484269767329113, + "grad_norm": 496.5209655761719, + "learning_rate": 5.122509852115959e-06, + "loss": 19.4325, + "step": 271490 + }, + { + "epoch": 0.5484471773655951, + "grad_norm": 211.59396362304688, + "learning_rate": 5.1221608907632665e-06, + "loss": 10.1448, + "step": 271500 + }, + { + "epoch": 0.5484673779982789, + "grad_norm": 427.5717468261719, + "learning_rate": 5.121811928815176e-06, + "loss": 21.4249, + "step": 271510 + }, + { + "epoch": 0.5484875786309628, + "grad_norm": 429.03070068359375, + "learning_rate": 5.121462966273388e-06, + "loss": 10.5538, + "step": 271520 + }, + { + "epoch": 0.5485077792636466, + "grad_norm": 0.0, + "learning_rate": 5.121114003139605e-06, + "loss": 29.7853, + "step": 271530 + }, + { + "epoch": 0.5485279798963304, + "grad_norm": 381.1743469238281, + "learning_rate": 5.120765039415528e-06, + "loss": 24.2726, + "step": 271540 + }, + { + "epoch": 0.5485481805290142, + "grad_norm": 305.34710693359375, + "learning_rate": 5.120416075102855e-06, + "loss": 13.1915, + "step": 271550 + }, + { + "epoch": 0.548568381161698, + "grad_norm": 125.90432739257812, + "learning_rate": 5.120067110203289e-06, + "loss": 27.9554, + "step": 271560 + }, + { + "epoch": 0.5485885817943817, + "grad_norm": 97.01104736328125, + "learning_rate": 5.119718144718532e-06, + "loss": 20.3684, + "step": 271570 + }, + { + "epoch": 0.5486087824270656, + "grad_norm": 538.3457641601562, + "learning_rate": 5.119369178650282e-06, + "loss": 29.191, + "step": 271580 + }, + { + "epoch": 0.5486289830597494, + "grad_norm": 475.2385559082031, + "learning_rate": 5.119020212000242e-06, + "loss": 31.225, + "step": 271590 + }, + { + "epoch": 0.5486491836924332, + "grad_norm": 417.1386413574219, + "learning_rate": 5.118671244770111e-06, + "loss": 21.5984, + "step": 271600 + }, + { + "epoch": 0.548669384325117, + "grad_norm": 390.43096923828125, + "learning_rate": 5.118322276961591e-06, + "loss": 10.422, + "step": 271610 + }, + { + "epoch": 0.5486895849578008, + "grad_norm": 91.09558868408203, + "learning_rate": 5.117973308576383e-06, + "loss": 39.9538, + "step": 271620 + }, + { + "epoch": 0.5487097855904847, + "grad_norm": 272.7152099609375, + "learning_rate": 5.117624339616186e-06, + "loss": 18.8097, + "step": 271630 + }, + { + "epoch": 0.5487299862231685, + "grad_norm": 36.199092864990234, + "learning_rate": 5.117275370082704e-06, + "loss": 19.1957, + "step": 271640 + }, + { + "epoch": 0.5487501868558523, + "grad_norm": 124.73023223876953, + "learning_rate": 5.116926399977634e-06, + "loss": 17.8846, + "step": 271650 + }, + { + "epoch": 0.5487703874885361, + "grad_norm": 499.0604553222656, + "learning_rate": 5.116577429302682e-06, + "loss": 8.9388, + "step": 271660 + }, + { + "epoch": 0.54879058812122, + "grad_norm": 598.5208129882812, + "learning_rate": 5.116228458059544e-06, + "loss": 27.804, + "step": 271670 + }, + { + "epoch": 0.5488107887539038, + "grad_norm": 224.67706298828125, + "learning_rate": 5.115879486249922e-06, + "loss": 12.2586, + "step": 271680 + }, + { + "epoch": 0.5488309893865876, + "grad_norm": 588.0548095703125, + "learning_rate": 5.115530513875517e-06, + "loss": 13.8001, + "step": 271690 + }, + { + "epoch": 0.5488511900192714, + "grad_norm": 401.2864685058594, + "learning_rate": 5.115181540938032e-06, + "loss": 16.7872, + "step": 271700 + }, + { + "epoch": 0.5488713906519552, + "grad_norm": 765.8482666015625, + "learning_rate": 5.114832567439164e-06, + "loss": 27.1772, + "step": 271710 + }, + { + "epoch": 0.548891591284639, + "grad_norm": 650.2271728515625, + "learning_rate": 5.114483593380619e-06, + "loss": 27.4372, + "step": 271720 + }, + { + "epoch": 0.5489117919173229, + "grad_norm": 141.1337890625, + "learning_rate": 5.114134618764093e-06, + "loss": 15.951, + "step": 271730 + }, + { + "epoch": 0.5489319925500067, + "grad_norm": 351.6592712402344, + "learning_rate": 5.113785643591289e-06, + "loss": 15.2401, + "step": 271740 + }, + { + "epoch": 0.5489521931826905, + "grad_norm": 182.3080596923828, + "learning_rate": 5.113436667863908e-06, + "loss": 25.1616, + "step": 271750 + }, + { + "epoch": 0.5489723938153743, + "grad_norm": 205.26272583007812, + "learning_rate": 5.1130876915836495e-06, + "loss": 6.5299, + "step": 271760 + }, + { + "epoch": 0.5489925944480581, + "grad_norm": 0.0, + "learning_rate": 5.112738714752216e-06, + "loss": 18.4389, + "step": 271770 + }, + { + "epoch": 0.549012795080742, + "grad_norm": 237.01702880859375, + "learning_rate": 5.112389737371307e-06, + "loss": 16.0729, + "step": 271780 + }, + { + "epoch": 0.5490329957134258, + "grad_norm": 600.59033203125, + "learning_rate": 5.112040759442624e-06, + "loss": 18.2496, + "step": 271790 + }, + { + "epoch": 0.5490531963461096, + "grad_norm": 106.196044921875, + "learning_rate": 5.111691780967869e-06, + "loss": 9.5061, + "step": 271800 + }, + { + "epoch": 0.5490733969787934, + "grad_norm": 149.70947265625, + "learning_rate": 5.111342801948741e-06, + "loss": 11.8095, + "step": 271810 + }, + { + "epoch": 0.5490935976114772, + "grad_norm": 280.45989990234375, + "learning_rate": 5.110993822386943e-06, + "loss": 34.7241, + "step": 271820 + }, + { + "epoch": 0.549113798244161, + "grad_norm": 474.2350769042969, + "learning_rate": 5.110644842284173e-06, + "loss": 20.4751, + "step": 271830 + }, + { + "epoch": 0.5491339988768448, + "grad_norm": 517.3894653320312, + "learning_rate": 5.110295861642134e-06, + "loss": 17.7995, + "step": 271840 + }, + { + "epoch": 0.5491541995095286, + "grad_norm": 361.8367919921875, + "learning_rate": 5.109946880462526e-06, + "loss": 11.9078, + "step": 271850 + }, + { + "epoch": 0.5491744001422124, + "grad_norm": 56.79434585571289, + "learning_rate": 5.109597898747052e-06, + "loss": 19.3067, + "step": 271860 + }, + { + "epoch": 0.5491946007748962, + "grad_norm": 246.1206512451172, + "learning_rate": 5.109248916497408e-06, + "loss": 26.395, + "step": 271870 + }, + { + "epoch": 0.54921480140758, + "grad_norm": 264.8724365234375, + "learning_rate": 5.1088999337153015e-06, + "loss": 11.9957, + "step": 271880 + }, + { + "epoch": 0.5492350020402639, + "grad_norm": 530.26123046875, + "learning_rate": 5.108550950402427e-06, + "loss": 14.8299, + "step": 271890 + }, + { + "epoch": 0.5492552026729477, + "grad_norm": 167.43325805664062, + "learning_rate": 5.1082019665604895e-06, + "loss": 17.903, + "step": 271900 + }, + { + "epoch": 0.5492754033056315, + "grad_norm": 412.4073791503906, + "learning_rate": 5.1078529821911895e-06, + "loss": 29.7963, + "step": 271910 + }, + { + "epoch": 0.5492956039383153, + "grad_norm": 336.5175476074219, + "learning_rate": 5.107503997296225e-06, + "loss": 27.0349, + "step": 271920 + }, + { + "epoch": 0.5493158045709992, + "grad_norm": 312.0638427734375, + "learning_rate": 5.107155011877302e-06, + "loss": 19.7038, + "step": 271930 + }, + { + "epoch": 0.549336005203683, + "grad_norm": 752.3568725585938, + "learning_rate": 5.1068060259361155e-06, + "loss": 15.015, + "step": 271940 + }, + { + "epoch": 0.5493562058363668, + "grad_norm": 413.9532470703125, + "learning_rate": 5.1064570394743705e-06, + "loss": 19.5474, + "step": 271950 + }, + { + "epoch": 0.5493764064690506, + "grad_norm": 341.1914367675781, + "learning_rate": 5.106108052493768e-06, + "loss": 14.9049, + "step": 271960 + }, + { + "epoch": 0.5493966071017344, + "grad_norm": 18.22888946533203, + "learning_rate": 5.105759064996007e-06, + "loss": 11.7169, + "step": 271970 + }, + { + "epoch": 0.5494168077344183, + "grad_norm": 1162.8382568359375, + "learning_rate": 5.105410076982789e-06, + "loss": 33.184, + "step": 271980 + }, + { + "epoch": 0.5494370083671021, + "grad_norm": 1.4117162227630615, + "learning_rate": 5.105061088455815e-06, + "loss": 28.6601, + "step": 271990 + }, + { + "epoch": 0.5494572089997859, + "grad_norm": 412.9360046386719, + "learning_rate": 5.1047120994167855e-06, + "loss": 14.7263, + "step": 272000 + }, + { + "epoch": 0.5494774096324697, + "grad_norm": 227.7907257080078, + "learning_rate": 5.104363109867403e-06, + "loss": 16.2703, + "step": 272010 + }, + { + "epoch": 0.5494976102651535, + "grad_norm": 109.08214569091797, + "learning_rate": 5.104014119809366e-06, + "loss": 14.6951, + "step": 272020 + }, + { + "epoch": 0.5495178108978374, + "grad_norm": 335.3134765625, + "learning_rate": 5.1036651292443774e-06, + "loss": 23.7036, + "step": 272030 + }, + { + "epoch": 0.5495380115305212, + "grad_norm": 922.094970703125, + "learning_rate": 5.103316138174139e-06, + "loss": 11.4573, + "step": 272040 + }, + { + "epoch": 0.549558212163205, + "grad_norm": 224.463134765625, + "learning_rate": 5.102967146600348e-06, + "loss": 18.6878, + "step": 272050 + }, + { + "epoch": 0.5495784127958888, + "grad_norm": 398.591796875, + "learning_rate": 5.102618154524709e-06, + "loss": 24.6213, + "step": 272060 + }, + { + "epoch": 0.5495986134285726, + "grad_norm": 433.3297424316406, + "learning_rate": 5.1022691619489205e-06, + "loss": 14.8695, + "step": 272070 + }, + { + "epoch": 0.5496188140612563, + "grad_norm": 447.2281494140625, + "learning_rate": 5.101920168874686e-06, + "loss": 23.7672, + "step": 272080 + }, + { + "epoch": 0.5496390146939402, + "grad_norm": 301.21612548828125, + "learning_rate": 5.101571175303704e-06, + "loss": 22.6396, + "step": 272090 + }, + { + "epoch": 0.549659215326624, + "grad_norm": 260.4903259277344, + "learning_rate": 5.101222181237676e-06, + "loss": 23.6789, + "step": 272100 + }, + { + "epoch": 0.5496794159593078, + "grad_norm": 305.85595703125, + "learning_rate": 5.1008731866783045e-06, + "loss": 21.5858, + "step": 272110 + }, + { + "epoch": 0.5496996165919916, + "grad_norm": 766.408935546875, + "learning_rate": 5.100524191627289e-06, + "loss": 18.3037, + "step": 272120 + }, + { + "epoch": 0.5497198172246754, + "grad_norm": 489.1642150878906, + "learning_rate": 5.10017519608633e-06, + "loss": 19.8543, + "step": 272130 + }, + { + "epoch": 0.5497400178573593, + "grad_norm": 459.5745544433594, + "learning_rate": 5.09982620005713e-06, + "loss": 14.109, + "step": 272140 + }, + { + "epoch": 0.5497602184900431, + "grad_norm": 449.01348876953125, + "learning_rate": 5.09947720354139e-06, + "loss": 16.2363, + "step": 272150 + }, + { + "epoch": 0.5497804191227269, + "grad_norm": 355.3873291015625, + "learning_rate": 5.0991282065408086e-06, + "loss": 31.8173, + "step": 272160 + }, + { + "epoch": 0.5498006197554107, + "grad_norm": 507.5166320800781, + "learning_rate": 5.098779209057089e-06, + "loss": 17.5008, + "step": 272170 + }, + { + "epoch": 0.5498208203880945, + "grad_norm": 612.6504516601562, + "learning_rate": 5.098430211091931e-06, + "loss": 27.119, + "step": 272180 + }, + { + "epoch": 0.5498410210207784, + "grad_norm": 195.0165557861328, + "learning_rate": 5.098081212647036e-06, + "loss": 11.7921, + "step": 272190 + }, + { + "epoch": 0.5498612216534622, + "grad_norm": 9.930233001708984, + "learning_rate": 5.097732213724107e-06, + "loss": 13.5304, + "step": 272200 + }, + { + "epoch": 0.549881422286146, + "grad_norm": 826.8731689453125, + "learning_rate": 5.0973832143248405e-06, + "loss": 21.6225, + "step": 272210 + }, + { + "epoch": 0.5499016229188298, + "grad_norm": 240.11874389648438, + "learning_rate": 5.097034214450941e-06, + "loss": 18.5178, + "step": 272220 + }, + { + "epoch": 0.5499218235515136, + "grad_norm": 293.3193054199219, + "learning_rate": 5.096685214104109e-06, + "loss": 28.7017, + "step": 272230 + }, + { + "epoch": 0.5499420241841975, + "grad_norm": 376.0853271484375, + "learning_rate": 5.096336213286044e-06, + "loss": 35.3474, + "step": 272240 + }, + { + "epoch": 0.5499622248168813, + "grad_norm": 399.8958435058594, + "learning_rate": 5.09598721199845e-06, + "loss": 15.9976, + "step": 272250 + }, + { + "epoch": 0.5499824254495651, + "grad_norm": 211.1976318359375, + "learning_rate": 5.095638210243023e-06, + "loss": 10.4221, + "step": 272260 + }, + { + "epoch": 0.5500026260822489, + "grad_norm": 426.4907531738281, + "learning_rate": 5.09528920802147e-06, + "loss": 17.1838, + "step": 272270 + }, + { + "epoch": 0.5500228267149327, + "grad_norm": 147.65066528320312, + "learning_rate": 5.094940205335487e-06, + "loss": 11.8268, + "step": 272280 + }, + { + "epoch": 0.5500430273476166, + "grad_norm": 263.3006896972656, + "learning_rate": 5.094591202186777e-06, + "loss": 28.0874, + "step": 272290 + }, + { + "epoch": 0.5500632279803004, + "grad_norm": 771.3316040039062, + "learning_rate": 5.0942421985770415e-06, + "loss": 12.1778, + "step": 272300 + }, + { + "epoch": 0.5500834286129842, + "grad_norm": 194.57643127441406, + "learning_rate": 5.093893194507982e-06, + "loss": 25.5668, + "step": 272310 + }, + { + "epoch": 0.550103629245668, + "grad_norm": 8.136611938476562, + "learning_rate": 5.093544189981297e-06, + "loss": 13.8155, + "step": 272320 + }, + { + "epoch": 0.5501238298783518, + "grad_norm": 263.6270446777344, + "learning_rate": 5.093195184998689e-06, + "loss": 14.6481, + "step": 272330 + }, + { + "epoch": 0.5501440305110356, + "grad_norm": 264.63226318359375, + "learning_rate": 5.092846179561859e-06, + "loss": 12.843, + "step": 272340 + }, + { + "epoch": 0.5501642311437194, + "grad_norm": 507.2126770019531, + "learning_rate": 5.092497173672508e-06, + "loss": 19.0892, + "step": 272350 + }, + { + "epoch": 0.5501844317764032, + "grad_norm": 703.0762329101562, + "learning_rate": 5.092148167332338e-06, + "loss": 27.9065, + "step": 272360 + }, + { + "epoch": 0.550204632409087, + "grad_norm": 303.1719055175781, + "learning_rate": 5.091799160543047e-06, + "loss": 18.8407, + "step": 272370 + }, + { + "epoch": 0.5502248330417708, + "grad_norm": 279.81689453125, + "learning_rate": 5.09145015330634e-06, + "loss": 18.0741, + "step": 272380 + }, + { + "epoch": 0.5502450336744547, + "grad_norm": 334.1409912109375, + "learning_rate": 5.091101145623916e-06, + "loss": 18.1141, + "step": 272390 + }, + { + "epoch": 0.5502652343071385, + "grad_norm": 299.89886474609375, + "learning_rate": 5.090752137497474e-06, + "loss": 21.4631, + "step": 272400 + }, + { + "epoch": 0.5502854349398223, + "grad_norm": 216.7633056640625, + "learning_rate": 5.090403128928719e-06, + "loss": 12.8604, + "step": 272410 + }, + { + "epoch": 0.5503056355725061, + "grad_norm": 591.6594848632812, + "learning_rate": 5.090054119919349e-06, + "loss": 35.0272, + "step": 272420 + }, + { + "epoch": 0.5503258362051899, + "grad_norm": 1284.27978515625, + "learning_rate": 5.089705110471068e-06, + "loss": 20.0657, + "step": 272430 + }, + { + "epoch": 0.5503460368378738, + "grad_norm": 298.3394775390625, + "learning_rate": 5.089356100585574e-06, + "loss": 9.1207, + "step": 272440 + }, + { + "epoch": 0.5503662374705576, + "grad_norm": 102.35218811035156, + "learning_rate": 5.089007090264568e-06, + "loss": 12.6346, + "step": 272450 + }, + { + "epoch": 0.5503864381032414, + "grad_norm": 466.7763366699219, + "learning_rate": 5.088658079509754e-06, + "loss": 18.0275, + "step": 272460 + }, + { + "epoch": 0.5504066387359252, + "grad_norm": 719.829833984375, + "learning_rate": 5.08830906832283e-06, + "loss": 21.3638, + "step": 272470 + }, + { + "epoch": 0.550426839368609, + "grad_norm": 798.9398193359375, + "learning_rate": 5.087960056705499e-06, + "loss": 35.3024, + "step": 272480 + }, + { + "epoch": 0.5504470400012929, + "grad_norm": 353.4775695800781, + "learning_rate": 5.087611044659462e-06, + "loss": 14.3152, + "step": 272490 + }, + { + "epoch": 0.5504672406339767, + "grad_norm": 469.1107177734375, + "learning_rate": 5.087262032186418e-06, + "loss": 21.2768, + "step": 272500 + }, + { + "epoch": 0.5504874412666605, + "grad_norm": 538.6112060546875, + "learning_rate": 5.08691301928807e-06, + "loss": 13.7717, + "step": 272510 + }, + { + "epoch": 0.5505076418993443, + "grad_norm": 96.44287872314453, + "learning_rate": 5.08656400596612e-06, + "loss": 21.8078, + "step": 272520 + }, + { + "epoch": 0.5505278425320281, + "grad_norm": 480.8155517578125, + "learning_rate": 5.086214992222265e-06, + "loss": 28.9269, + "step": 272530 + }, + { + "epoch": 0.550548043164712, + "grad_norm": 1.9340639114379883, + "learning_rate": 5.08586597805821e-06, + "loss": 6.4889, + "step": 272540 + }, + { + "epoch": 0.5505682437973958, + "grad_norm": 289.463623046875, + "learning_rate": 5.085516963475653e-06, + "loss": 25.3641, + "step": 272550 + }, + { + "epoch": 0.5505884444300796, + "grad_norm": 544.4713134765625, + "learning_rate": 5.0851679484762996e-06, + "loss": 16.4804, + "step": 272560 + }, + { + "epoch": 0.5506086450627634, + "grad_norm": 432.995849609375, + "learning_rate": 5.084818933061846e-06, + "loss": 21.1848, + "step": 272570 + }, + { + "epoch": 0.5506288456954472, + "grad_norm": 239.32839965820312, + "learning_rate": 5.084469917233995e-06, + "loss": 18.6366, + "step": 272580 + }, + { + "epoch": 0.5506490463281309, + "grad_norm": 328.63525390625, + "learning_rate": 5.084120900994449e-06, + "loss": 19.7848, + "step": 272590 + }, + { + "epoch": 0.5506692469608148, + "grad_norm": 429.8453674316406, + "learning_rate": 5.083771884344908e-06, + "loss": 35.6967, + "step": 272600 + }, + { + "epoch": 0.5506894475934986, + "grad_norm": 256.73309326171875, + "learning_rate": 5.0834228672870725e-06, + "loss": 20.9563, + "step": 272610 + }, + { + "epoch": 0.5507096482261824, + "grad_norm": 314.0640869140625, + "learning_rate": 5.083073849822643e-06, + "loss": 36.8181, + "step": 272620 + }, + { + "epoch": 0.5507298488588662, + "grad_norm": 316.7877197265625, + "learning_rate": 5.0827248319533225e-06, + "loss": 32.7444, + "step": 272630 + }, + { + "epoch": 0.55075004949155, + "grad_norm": 16.720890045166016, + "learning_rate": 5.082375813680811e-06, + "loss": 11.1595, + "step": 272640 + }, + { + "epoch": 0.5507702501242339, + "grad_norm": 28.173297882080078, + "learning_rate": 5.0820267950068115e-06, + "loss": 15.0024, + "step": 272650 + }, + { + "epoch": 0.5507904507569177, + "grad_norm": 62.392906188964844, + "learning_rate": 5.0816777759330215e-06, + "loss": 14.6509, + "step": 272660 + }, + { + "epoch": 0.5508106513896015, + "grad_norm": 285.71002197265625, + "learning_rate": 5.081328756461145e-06, + "loss": 14.8924, + "step": 272670 + }, + { + "epoch": 0.5508308520222853, + "grad_norm": 183.81626892089844, + "learning_rate": 5.08097973659288e-06, + "loss": 20.9146, + "step": 272680 + }, + { + "epoch": 0.5508510526549691, + "grad_norm": 0.0, + "learning_rate": 5.0806307163299305e-06, + "loss": 10.7393, + "step": 272690 + }, + { + "epoch": 0.550871253287653, + "grad_norm": 542.828125, + "learning_rate": 5.080281695673999e-06, + "loss": 16.5757, + "step": 272700 + }, + { + "epoch": 0.5508914539203368, + "grad_norm": 549.6185302734375, + "learning_rate": 5.079932674626781e-06, + "loss": 10.9822, + "step": 272710 + }, + { + "epoch": 0.5509116545530206, + "grad_norm": 942.9983520507812, + "learning_rate": 5.079583653189982e-06, + "loss": 14.7968, + "step": 272720 + }, + { + "epoch": 0.5509318551857044, + "grad_norm": 0.18729591369628906, + "learning_rate": 5.079234631365303e-06, + "loss": 15.978, + "step": 272730 + }, + { + "epoch": 0.5509520558183882, + "grad_norm": 160.91488647460938, + "learning_rate": 5.0788856091544425e-06, + "loss": 17.7453, + "step": 272740 + }, + { + "epoch": 0.5509722564510721, + "grad_norm": 187.86961364746094, + "learning_rate": 5.078536586559104e-06, + "loss": 12.1295, + "step": 272750 + }, + { + "epoch": 0.5509924570837559, + "grad_norm": 461.9878234863281, + "learning_rate": 5.078187563580988e-06, + "loss": 26.6588, + "step": 272760 + }, + { + "epoch": 0.5510126577164397, + "grad_norm": 771.72705078125, + "learning_rate": 5.077838540221794e-06, + "loss": 28.8553, + "step": 272770 + }, + { + "epoch": 0.5510328583491235, + "grad_norm": 229.1396026611328, + "learning_rate": 5.077489516483225e-06, + "loss": 56.1018, + "step": 272780 + }, + { + "epoch": 0.5510530589818073, + "grad_norm": 297.3424072265625, + "learning_rate": 5.077140492366982e-06, + "loss": 16.4137, + "step": 272790 + }, + { + "epoch": 0.5510732596144912, + "grad_norm": 705.1866455078125, + "learning_rate": 5.0767914678747655e-06, + "loss": 24.9397, + "step": 272800 + }, + { + "epoch": 0.551093460247175, + "grad_norm": 580.1013793945312, + "learning_rate": 5.076442443008277e-06, + "loss": 16.4608, + "step": 272810 + }, + { + "epoch": 0.5511136608798588, + "grad_norm": 289.314453125, + "learning_rate": 5.076093417769216e-06, + "loss": 15.8894, + "step": 272820 + }, + { + "epoch": 0.5511338615125426, + "grad_norm": 588.11474609375, + "learning_rate": 5.075744392159285e-06, + "loss": 16.0408, + "step": 272830 + }, + { + "epoch": 0.5511540621452264, + "grad_norm": 664.2628784179688, + "learning_rate": 5.075395366180186e-06, + "loss": 22.328, + "step": 272840 + }, + { + "epoch": 0.5511742627779102, + "grad_norm": 30.495628356933594, + "learning_rate": 5.0750463398336195e-06, + "loss": 46.8985, + "step": 272850 + }, + { + "epoch": 0.551194463410594, + "grad_norm": 509.7676086425781, + "learning_rate": 5.074697313121286e-06, + "loss": 26.6984, + "step": 272860 + }, + { + "epoch": 0.5512146640432778, + "grad_norm": 735.7946166992188, + "learning_rate": 5.074348286044884e-06, + "loss": 33.8265, + "step": 272870 + }, + { + "epoch": 0.5512348646759616, + "grad_norm": 172.11744689941406, + "learning_rate": 5.073999258606121e-06, + "loss": 9.6947, + "step": 272880 + }, + { + "epoch": 0.5512550653086454, + "grad_norm": 338.67047119140625, + "learning_rate": 5.073650230806693e-06, + "loss": 19.2771, + "step": 272890 + }, + { + "epoch": 0.5512752659413293, + "grad_norm": 427.8309326171875, + "learning_rate": 5.073301202648304e-06, + "loss": 16.5368, + "step": 272900 + }, + { + "epoch": 0.5512954665740131, + "grad_norm": 640.8612670898438, + "learning_rate": 5.072952174132653e-06, + "loss": 21.4062, + "step": 272910 + }, + { + "epoch": 0.5513156672066969, + "grad_norm": 132.24171447753906, + "learning_rate": 5.072603145261442e-06, + "loss": 11.2991, + "step": 272920 + }, + { + "epoch": 0.5513358678393807, + "grad_norm": 178.75592041015625, + "learning_rate": 5.072254116036372e-06, + "loss": 11.4653, + "step": 272930 + }, + { + "epoch": 0.5513560684720645, + "grad_norm": 96.27967071533203, + "learning_rate": 5.071905086459145e-06, + "loss": 25.1724, + "step": 272940 + }, + { + "epoch": 0.5513762691047484, + "grad_norm": 99.01346588134766, + "learning_rate": 5.0715560565314595e-06, + "loss": 21.9778, + "step": 272950 + }, + { + "epoch": 0.5513964697374322, + "grad_norm": 502.8134765625, + "learning_rate": 5.071207026255019e-06, + "loss": 12.7288, + "step": 272960 + }, + { + "epoch": 0.551416670370116, + "grad_norm": 272.5439453125, + "learning_rate": 5.070857995631526e-06, + "loss": 7.4142, + "step": 272970 + }, + { + "epoch": 0.5514368710027998, + "grad_norm": 695.0064086914062, + "learning_rate": 5.070508964662677e-06, + "loss": 38.9627, + "step": 272980 + }, + { + "epoch": 0.5514570716354836, + "grad_norm": 1581.58984375, + "learning_rate": 5.070159933350178e-06, + "loss": 15.3666, + "step": 272990 + }, + { + "epoch": 0.5514772722681675, + "grad_norm": 410.3154296875, + "learning_rate": 5.069810901695727e-06, + "loss": 26.9396, + "step": 273000 + }, + { + "epoch": 0.5514974729008513, + "grad_norm": 3.71242356300354, + "learning_rate": 5.0694618697010265e-06, + "loss": 19.1724, + "step": 273010 + }, + { + "epoch": 0.5515176735335351, + "grad_norm": 614.3184814453125, + "learning_rate": 5.069112837367777e-06, + "loss": 17.708, + "step": 273020 + }, + { + "epoch": 0.5515378741662189, + "grad_norm": 496.86505126953125, + "learning_rate": 5.068763804697679e-06, + "loss": 29.1489, + "step": 273030 + }, + { + "epoch": 0.5515580747989027, + "grad_norm": 333.1636657714844, + "learning_rate": 5.068414771692437e-06, + "loss": 18.1164, + "step": 273040 + }, + { + "epoch": 0.5515782754315866, + "grad_norm": 1166.0439453125, + "learning_rate": 5.068065738353748e-06, + "loss": 34.7047, + "step": 273050 + }, + { + "epoch": 0.5515984760642704, + "grad_norm": 240.95263671875, + "learning_rate": 5.067716704683315e-06, + "loss": 14.7328, + "step": 273060 + }, + { + "epoch": 0.5516186766969542, + "grad_norm": 405.3726501464844, + "learning_rate": 5.067367670682839e-06, + "loss": 21.5972, + "step": 273070 + }, + { + "epoch": 0.551638877329638, + "grad_norm": 255.45797729492188, + "learning_rate": 5.067018636354021e-06, + "loss": 11.9275, + "step": 273080 + }, + { + "epoch": 0.5516590779623218, + "grad_norm": 273.974365234375, + "learning_rate": 5.066669601698562e-06, + "loss": 13.6709, + "step": 273090 + }, + { + "epoch": 0.5516792785950057, + "grad_norm": 1166.1033935546875, + "learning_rate": 5.066320566718165e-06, + "loss": 25.2127, + "step": 273100 + }, + { + "epoch": 0.5516994792276894, + "grad_norm": 423.2397155761719, + "learning_rate": 5.065971531414528e-06, + "loss": 19.6114, + "step": 273110 + }, + { + "epoch": 0.5517196798603732, + "grad_norm": 349.9311828613281, + "learning_rate": 5.0656224957893545e-06, + "loss": 19.3992, + "step": 273120 + }, + { + "epoch": 0.551739880493057, + "grad_norm": 718.978759765625, + "learning_rate": 5.065273459844345e-06, + "loss": 15.1803, + "step": 273130 + }, + { + "epoch": 0.5517600811257408, + "grad_norm": 319.8184509277344, + "learning_rate": 5.0649244235812e-06, + "loss": 18.1024, + "step": 273140 + }, + { + "epoch": 0.5517802817584246, + "grad_norm": 0.0, + "learning_rate": 5.0645753870016224e-06, + "loss": 16.6912, + "step": 273150 + }, + { + "epoch": 0.5518004823911085, + "grad_norm": 1063.666015625, + "learning_rate": 5.0642263501073096e-06, + "loss": 24.3302, + "step": 273160 + }, + { + "epoch": 0.5518206830237923, + "grad_norm": 451.7352600097656, + "learning_rate": 5.063877312899969e-06, + "loss": 23.0781, + "step": 273170 + }, + { + "epoch": 0.5518408836564761, + "grad_norm": 255.57518005371094, + "learning_rate": 5.063528275381295e-06, + "loss": 15.9749, + "step": 273180 + }, + { + "epoch": 0.5518610842891599, + "grad_norm": 34.647613525390625, + "learning_rate": 5.063179237552993e-06, + "loss": 16.2164, + "step": 273190 + }, + { + "epoch": 0.5518812849218437, + "grad_norm": 861.7901000976562, + "learning_rate": 5.062830199416764e-06, + "loss": 24.4547, + "step": 273200 + }, + { + "epoch": 0.5519014855545276, + "grad_norm": 538.9898681640625, + "learning_rate": 5.062481160974308e-06, + "loss": 15.3167, + "step": 273210 + }, + { + "epoch": 0.5519216861872114, + "grad_norm": 351.23944091796875, + "learning_rate": 5.0621321222273255e-06, + "loss": 32.0002, + "step": 273220 + }, + { + "epoch": 0.5519418868198952, + "grad_norm": 411.3854675292969, + "learning_rate": 5.061783083177519e-06, + "loss": 12.7911, + "step": 273230 + }, + { + "epoch": 0.551962087452579, + "grad_norm": 592.5401611328125, + "learning_rate": 5.061434043826588e-06, + "loss": 19.5976, + "step": 273240 + }, + { + "epoch": 0.5519822880852628, + "grad_norm": 609.3246459960938, + "learning_rate": 5.061085004176236e-06, + "loss": 19.3405, + "step": 273250 + }, + { + "epoch": 0.5520024887179467, + "grad_norm": 154.5178985595703, + "learning_rate": 5.060735964228164e-06, + "loss": 10.1593, + "step": 273260 + }, + { + "epoch": 0.5520226893506305, + "grad_norm": 155.55648803710938, + "learning_rate": 5.060386923984071e-06, + "loss": 13.301, + "step": 273270 + }, + { + "epoch": 0.5520428899833143, + "grad_norm": 1033.5396728515625, + "learning_rate": 5.06003788344566e-06, + "loss": 33.5247, + "step": 273280 + }, + { + "epoch": 0.5520630906159981, + "grad_norm": 265.4301452636719, + "learning_rate": 5.0596888426146325e-06, + "loss": 13.5706, + "step": 273290 + }, + { + "epoch": 0.5520832912486819, + "grad_norm": 465.9914855957031, + "learning_rate": 5.059339801492687e-06, + "loss": 24.8596, + "step": 273300 + }, + { + "epoch": 0.5521034918813658, + "grad_norm": 377.5484924316406, + "learning_rate": 5.058990760081528e-06, + "loss": 20.5792, + "step": 273310 + }, + { + "epoch": 0.5521236925140496, + "grad_norm": 484.415283203125, + "learning_rate": 5.058641718382853e-06, + "loss": 32.0516, + "step": 273320 + }, + { + "epoch": 0.5521438931467334, + "grad_norm": 103.9054183959961, + "learning_rate": 5.058292676398368e-06, + "loss": 12.6073, + "step": 273330 + }, + { + "epoch": 0.5521640937794172, + "grad_norm": 226.50808715820312, + "learning_rate": 5.0579436341297705e-06, + "loss": 14.3633, + "step": 273340 + }, + { + "epoch": 0.552184294412101, + "grad_norm": 573.47412109375, + "learning_rate": 5.0575945915787616e-06, + "loss": 18.9658, + "step": 273350 + }, + { + "epoch": 0.5522044950447847, + "grad_norm": 885.1104125976562, + "learning_rate": 5.057245548747045e-06, + "loss": 26.1932, + "step": 273360 + }, + { + "epoch": 0.5522246956774686, + "grad_norm": 221.2861785888672, + "learning_rate": 5.056896505636322e-06, + "loss": 15.6163, + "step": 273370 + }, + { + "epoch": 0.5522448963101524, + "grad_norm": 258.8567199707031, + "learning_rate": 5.05654746224829e-06, + "loss": 25.2411, + "step": 273380 + }, + { + "epoch": 0.5522650969428362, + "grad_norm": 690.5338745117188, + "learning_rate": 5.056198418584653e-06, + "loss": 28.2111, + "step": 273390 + }, + { + "epoch": 0.55228529757552, + "grad_norm": 625.1259155273438, + "learning_rate": 5.055849374647112e-06, + "loss": 23.1402, + "step": 273400 + }, + { + "epoch": 0.5523054982082038, + "grad_norm": 353.44970703125, + "learning_rate": 5.0555003304373674e-06, + "loss": 26.7578, + "step": 273410 + }, + { + "epoch": 0.5523256988408877, + "grad_norm": 520.411865234375, + "learning_rate": 5.0551512859571215e-06, + "loss": 13.3652, + "step": 273420 + }, + { + "epoch": 0.5523458994735715, + "grad_norm": 476.323486328125, + "learning_rate": 5.054802241208075e-06, + "loss": 38.6315, + "step": 273430 + }, + { + "epoch": 0.5523661001062553, + "grad_norm": 390.4078674316406, + "learning_rate": 5.054453196191929e-06, + "loss": 13.9013, + "step": 273440 + }, + { + "epoch": 0.5523863007389391, + "grad_norm": 366.37664794921875, + "learning_rate": 5.054104150910384e-06, + "loss": 17.8417, + "step": 273450 + }, + { + "epoch": 0.552406501371623, + "grad_norm": 337.0237731933594, + "learning_rate": 5.053755105365142e-06, + "loss": 24.9817, + "step": 273460 + }, + { + "epoch": 0.5524267020043068, + "grad_norm": 534.3334350585938, + "learning_rate": 5.053406059557906e-06, + "loss": 21.2075, + "step": 273470 + }, + { + "epoch": 0.5524469026369906, + "grad_norm": 381.9807434082031, + "learning_rate": 5.0530570134903725e-06, + "loss": 20.7716, + "step": 273480 + }, + { + "epoch": 0.5524671032696744, + "grad_norm": 287.4529724121094, + "learning_rate": 5.052707967164248e-06, + "loss": 13.157, + "step": 273490 + }, + { + "epoch": 0.5524873039023582, + "grad_norm": 414.7626647949219, + "learning_rate": 5.05235892058123e-06, + "loss": 26.3657, + "step": 273500 + }, + { + "epoch": 0.552507504535042, + "grad_norm": 446.5314636230469, + "learning_rate": 5.052009873743021e-06, + "loss": 17.5753, + "step": 273510 + }, + { + "epoch": 0.5525277051677259, + "grad_norm": 54.65743637084961, + "learning_rate": 5.051660826651322e-06, + "loss": 24.5452, + "step": 273520 + }, + { + "epoch": 0.5525479058004097, + "grad_norm": 271.3385925292969, + "learning_rate": 5.0513117793078345e-06, + "loss": 17.0786, + "step": 273530 + }, + { + "epoch": 0.5525681064330935, + "grad_norm": 163.66925048828125, + "learning_rate": 5.0509627317142606e-06, + "loss": 14.266, + "step": 273540 + }, + { + "epoch": 0.5525883070657773, + "grad_norm": 467.8886413574219, + "learning_rate": 5.050613683872299e-06, + "loss": 21.9826, + "step": 273550 + }, + { + "epoch": 0.5526085076984611, + "grad_norm": 150.468505859375, + "learning_rate": 5.050264635783654e-06, + "loss": 11.6281, + "step": 273560 + }, + { + "epoch": 0.552628708331145, + "grad_norm": 307.3399658203125, + "learning_rate": 5.049915587450024e-06, + "loss": 18.0145, + "step": 273570 + }, + { + "epoch": 0.5526489089638288, + "grad_norm": 926.0619506835938, + "learning_rate": 5.049566538873113e-06, + "loss": 27.2354, + "step": 273580 + }, + { + "epoch": 0.5526691095965126, + "grad_norm": 92.09886932373047, + "learning_rate": 5.049217490054619e-06, + "loss": 16.0031, + "step": 273590 + }, + { + "epoch": 0.5526893102291964, + "grad_norm": 300.13995361328125, + "learning_rate": 5.048868440996246e-06, + "loss": 20.6436, + "step": 273600 + }, + { + "epoch": 0.5527095108618802, + "grad_norm": 410.94781494140625, + "learning_rate": 5.048519391699693e-06, + "loss": 20.6964, + "step": 273610 + }, + { + "epoch": 0.552729711494564, + "grad_norm": 257.89715576171875, + "learning_rate": 5.048170342166664e-06, + "loss": 16.4832, + "step": 273620 + }, + { + "epoch": 0.5527499121272478, + "grad_norm": 194.0541534423828, + "learning_rate": 5.0478212923988576e-06, + "loss": 13.7152, + "step": 273630 + }, + { + "epoch": 0.5527701127599316, + "grad_norm": 584.8863525390625, + "learning_rate": 5.047472242397976e-06, + "loss": 16.2681, + "step": 273640 + }, + { + "epoch": 0.5527903133926154, + "grad_norm": 461.9921875, + "learning_rate": 5.047123192165722e-06, + "loss": 10.395, + "step": 273650 + }, + { + "epoch": 0.5528105140252992, + "grad_norm": 571.4125366210938, + "learning_rate": 5.046774141703793e-06, + "loss": 25.3463, + "step": 273660 + }, + { + "epoch": 0.5528307146579831, + "grad_norm": 6.0866923332214355, + "learning_rate": 5.046425091013893e-06, + "loss": 13.3849, + "step": 273670 + }, + { + "epoch": 0.5528509152906669, + "grad_norm": 218.32177734375, + "learning_rate": 5.046076040097722e-06, + "loss": 27.7339, + "step": 273680 + }, + { + "epoch": 0.5528711159233507, + "grad_norm": 613.5769653320312, + "learning_rate": 5.045726988956984e-06, + "loss": 17.5133, + "step": 273690 + }, + { + "epoch": 0.5528913165560345, + "grad_norm": 693.943603515625, + "learning_rate": 5.045377937593376e-06, + "loss": 26.6492, + "step": 273700 + }, + { + "epoch": 0.5529115171887183, + "grad_norm": 213.2667999267578, + "learning_rate": 5.045028886008605e-06, + "loss": 10.0386, + "step": 273710 + }, + { + "epoch": 0.5529317178214022, + "grad_norm": 581.2164916992188, + "learning_rate": 5.044679834204366e-06, + "loss": 27.7785, + "step": 273720 + }, + { + "epoch": 0.552951918454086, + "grad_norm": 290.66510009765625, + "learning_rate": 5.044330782182363e-06, + "loss": 13.7657, + "step": 273730 + }, + { + "epoch": 0.5529721190867698, + "grad_norm": 529.0045166015625, + "learning_rate": 5.043981729944298e-06, + "loss": 24.8721, + "step": 273740 + }, + { + "epoch": 0.5529923197194536, + "grad_norm": 259.0770263671875, + "learning_rate": 5.04363267749187e-06, + "loss": 13.4246, + "step": 273750 + }, + { + "epoch": 0.5530125203521374, + "grad_norm": 870.479736328125, + "learning_rate": 5.043283624826783e-06, + "loss": 21.902, + "step": 273760 + }, + { + "epoch": 0.5530327209848213, + "grad_norm": 241.07386779785156, + "learning_rate": 5.042934571950735e-06, + "loss": 31.6515, + "step": 273770 + }, + { + "epoch": 0.5530529216175051, + "grad_norm": 159.55506896972656, + "learning_rate": 5.042585518865431e-06, + "loss": 15.1274, + "step": 273780 + }, + { + "epoch": 0.5530731222501889, + "grad_norm": 244.0486297607422, + "learning_rate": 5.04223646557257e-06, + "loss": 10.7543, + "step": 273790 + }, + { + "epoch": 0.5530933228828727, + "grad_norm": 417.79833984375, + "learning_rate": 5.041887412073853e-06, + "loss": 22.1971, + "step": 273800 + }, + { + "epoch": 0.5531135235155565, + "grad_norm": 55.917091369628906, + "learning_rate": 5.041538358370983e-06, + "loss": 17.2982, + "step": 273810 + }, + { + "epoch": 0.5531337241482404, + "grad_norm": 294.291259765625, + "learning_rate": 5.04118930446566e-06, + "loss": 29.3356, + "step": 273820 + }, + { + "epoch": 0.5531539247809242, + "grad_norm": 225.97354125976562, + "learning_rate": 5.040840250359584e-06, + "loss": 22.751, + "step": 273830 + }, + { + "epoch": 0.553174125413608, + "grad_norm": 387.2437744140625, + "learning_rate": 5.0404911960544575e-06, + "loss": 8.9484, + "step": 273840 + }, + { + "epoch": 0.5531943260462918, + "grad_norm": 531.6309814453125, + "learning_rate": 5.040142141551982e-06, + "loss": 15.1858, + "step": 273850 + }, + { + "epoch": 0.5532145266789756, + "grad_norm": 321.93206787109375, + "learning_rate": 5.039793086853859e-06, + "loss": 14.421, + "step": 273860 + }, + { + "epoch": 0.5532347273116593, + "grad_norm": 742.7291259765625, + "learning_rate": 5.039444031961791e-06, + "loss": 21.1888, + "step": 273870 + }, + { + "epoch": 0.5532549279443432, + "grad_norm": 496.53289794921875, + "learning_rate": 5.0390949768774755e-06, + "loss": 20.8615, + "step": 273880 + }, + { + "epoch": 0.553275128577027, + "grad_norm": 305.88897705078125, + "learning_rate": 5.038745921602617e-06, + "loss": 28.2547, + "step": 273890 + }, + { + "epoch": 0.5532953292097108, + "grad_norm": 366.7085266113281, + "learning_rate": 5.038396866138915e-06, + "loss": 20.9414, + "step": 273900 + }, + { + "epoch": 0.5533155298423946, + "grad_norm": 681.21435546875, + "learning_rate": 5.03804781048807e-06, + "loss": 16.4326, + "step": 273910 + }, + { + "epoch": 0.5533357304750784, + "grad_norm": 278.194580078125, + "learning_rate": 5.037698754651786e-06, + "loss": 26.1326, + "step": 273920 + }, + { + "epoch": 0.5533559311077623, + "grad_norm": 89.72135925292969, + "learning_rate": 5.037349698631762e-06, + "loss": 22.8615, + "step": 273930 + }, + { + "epoch": 0.5533761317404461, + "grad_norm": 130.1783447265625, + "learning_rate": 5.037000642429701e-06, + "loss": 14.1529, + "step": 273940 + }, + { + "epoch": 0.5533963323731299, + "grad_norm": 12.04598617553711, + "learning_rate": 5.036651586047303e-06, + "loss": 18.4062, + "step": 273950 + }, + { + "epoch": 0.5534165330058137, + "grad_norm": 390.1788330078125, + "learning_rate": 5.03630252948627e-06, + "loss": 11.0696, + "step": 273960 + }, + { + "epoch": 0.5534367336384975, + "grad_norm": 823.8593139648438, + "learning_rate": 5.035953472748304e-06, + "loss": 24.8164, + "step": 273970 + }, + { + "epoch": 0.5534569342711814, + "grad_norm": 702.7938842773438, + "learning_rate": 5.035604415835102e-06, + "loss": 27.215, + "step": 273980 + }, + { + "epoch": 0.5534771349038652, + "grad_norm": 1129.494873046875, + "learning_rate": 5.035255358748371e-06, + "loss": 27.6904, + "step": 273990 + }, + { + "epoch": 0.553497335536549, + "grad_norm": 90.69853973388672, + "learning_rate": 5.034906301489808e-06, + "loss": 24.6617, + "step": 274000 + }, + { + "epoch": 0.5535175361692328, + "grad_norm": 323.79534912109375, + "learning_rate": 5.034557244061117e-06, + "loss": 26.2408, + "step": 274010 + }, + { + "epoch": 0.5535377368019166, + "grad_norm": 139.48471069335938, + "learning_rate": 5.034208186463998e-06, + "loss": 16.7441, + "step": 274020 + }, + { + "epoch": 0.5535579374346005, + "grad_norm": 331.9467468261719, + "learning_rate": 5.0338591287001525e-06, + "loss": 15.302, + "step": 274030 + }, + { + "epoch": 0.5535781380672843, + "grad_norm": 451.3541564941406, + "learning_rate": 5.03351007077128e-06, + "loss": 18.568, + "step": 274040 + }, + { + "epoch": 0.5535983386999681, + "grad_norm": 647.1574096679688, + "learning_rate": 5.033161012679087e-06, + "loss": 16.6301, + "step": 274050 + }, + { + "epoch": 0.5536185393326519, + "grad_norm": 532.1934204101562, + "learning_rate": 5.032811954425268e-06, + "loss": 18.1645, + "step": 274060 + }, + { + "epoch": 0.5536387399653357, + "grad_norm": 246.70433044433594, + "learning_rate": 5.0324628960115296e-06, + "loss": 9.0928, + "step": 274070 + }, + { + "epoch": 0.5536589405980196, + "grad_norm": 290.0281982421875, + "learning_rate": 5.032113837439571e-06, + "loss": 11.8767, + "step": 274080 + }, + { + "epoch": 0.5536791412307034, + "grad_norm": 175.98838806152344, + "learning_rate": 5.0317647787110915e-06, + "loss": 24.4881, + "step": 274090 + }, + { + "epoch": 0.5536993418633872, + "grad_norm": 110.80126953125, + "learning_rate": 5.031415719827796e-06, + "loss": 21.7746, + "step": 274100 + }, + { + "epoch": 0.553719542496071, + "grad_norm": 564.2908935546875, + "learning_rate": 5.031066660791383e-06, + "loss": 21.3942, + "step": 274110 + }, + { + "epoch": 0.5537397431287548, + "grad_norm": 557.5830688476562, + "learning_rate": 5.030717601603556e-06, + "loss": 15.1739, + "step": 274120 + }, + { + "epoch": 0.5537599437614386, + "grad_norm": 420.3285827636719, + "learning_rate": 5.030368542266013e-06, + "loss": 22.6166, + "step": 274130 + }, + { + "epoch": 0.5537801443941224, + "grad_norm": 441.185302734375, + "learning_rate": 5.030019482780459e-06, + "loss": 17.6675, + "step": 274140 + }, + { + "epoch": 0.5538003450268062, + "grad_norm": 785.4033203125, + "learning_rate": 5.029670423148595e-06, + "loss": 22.174, + "step": 274150 + }, + { + "epoch": 0.55382054565949, + "grad_norm": 628.7343139648438, + "learning_rate": 5.029321363372119e-06, + "loss": 12.8308, + "step": 274160 + }, + { + "epoch": 0.5538407462921738, + "grad_norm": 450.0870056152344, + "learning_rate": 5.0289723034527345e-06, + "loss": 14.8841, + "step": 274170 + }, + { + "epoch": 0.5538609469248577, + "grad_norm": 740.5001831054688, + "learning_rate": 5.028623243392143e-06, + "loss": 18.4416, + "step": 274180 + }, + { + "epoch": 0.5538811475575415, + "grad_norm": 549.3732299804688, + "learning_rate": 5.028274183192046e-06, + "loss": 43.1109, + "step": 274190 + }, + { + "epoch": 0.5539013481902253, + "grad_norm": 375.171630859375, + "learning_rate": 5.027925122854141e-06, + "loss": 14.3481, + "step": 274200 + }, + { + "epoch": 0.5539215488229091, + "grad_norm": 410.18707275390625, + "learning_rate": 5.027576062380136e-06, + "loss": 20.8517, + "step": 274210 + }, + { + "epoch": 0.5539417494555929, + "grad_norm": 440.8715515136719, + "learning_rate": 5.027227001771727e-06, + "loss": 7.7379, + "step": 274220 + }, + { + "epoch": 0.5539619500882768, + "grad_norm": 636.6222534179688, + "learning_rate": 5.0268779410306164e-06, + "loss": 30.4021, + "step": 274230 + }, + { + "epoch": 0.5539821507209606, + "grad_norm": 162.743896484375, + "learning_rate": 5.026528880158508e-06, + "loss": 20.3886, + "step": 274240 + }, + { + "epoch": 0.5540023513536444, + "grad_norm": 97.67532348632812, + "learning_rate": 5.0261798191570975e-06, + "loss": 25.955, + "step": 274250 + }, + { + "epoch": 0.5540225519863282, + "grad_norm": 479.6761779785156, + "learning_rate": 5.025830758028093e-06, + "loss": 21.1345, + "step": 274260 + }, + { + "epoch": 0.554042752619012, + "grad_norm": 252.00277709960938, + "learning_rate": 5.025481696773191e-06, + "loss": 11.5059, + "step": 274270 + }, + { + "epoch": 0.5540629532516959, + "grad_norm": 524.0341796875, + "learning_rate": 5.025132635394095e-06, + "loss": 19.7833, + "step": 274280 + }, + { + "epoch": 0.5540831538843797, + "grad_norm": 1358.397705078125, + "learning_rate": 5.024783573892505e-06, + "loss": 31.1891, + "step": 274290 + }, + { + "epoch": 0.5541033545170635, + "grad_norm": 768.6133422851562, + "learning_rate": 5.024434512270123e-06, + "loss": 14.8123, + "step": 274300 + }, + { + "epoch": 0.5541235551497473, + "grad_norm": 195.52130126953125, + "learning_rate": 5.0240854505286505e-06, + "loss": 9.175, + "step": 274310 + }, + { + "epoch": 0.5541437557824311, + "grad_norm": 318.7700500488281, + "learning_rate": 5.023736388669789e-06, + "loss": 17.8853, + "step": 274320 + }, + { + "epoch": 0.554163956415115, + "grad_norm": 462.0083312988281, + "learning_rate": 5.023387326695238e-06, + "loss": 10.3964, + "step": 274330 + }, + { + "epoch": 0.5541841570477988, + "grad_norm": 183.35916137695312, + "learning_rate": 5.0230382646067e-06, + "loss": 14.3003, + "step": 274340 + }, + { + "epoch": 0.5542043576804826, + "grad_norm": 666.4971923828125, + "learning_rate": 5.0226892024058785e-06, + "loss": 30.8109, + "step": 274350 + }, + { + "epoch": 0.5542245583131664, + "grad_norm": 497.9892883300781, + "learning_rate": 5.022340140094469e-06, + "loss": 19.2232, + "step": 274360 + }, + { + "epoch": 0.5542447589458502, + "grad_norm": 148.6495361328125, + "learning_rate": 5.021991077674179e-06, + "loss": 11.1378, + "step": 274370 + }, + { + "epoch": 0.5542649595785339, + "grad_norm": 497.6520080566406, + "learning_rate": 5.021642015146705e-06, + "loss": 25.8033, + "step": 274380 + }, + { + "epoch": 0.5542851602112178, + "grad_norm": 372.19580078125, + "learning_rate": 5.021292952513752e-06, + "loss": 22.642, + "step": 274390 + }, + { + "epoch": 0.5543053608439016, + "grad_norm": 480.487548828125, + "learning_rate": 5.0209438897770205e-06, + "loss": 19.5264, + "step": 274400 + }, + { + "epoch": 0.5543255614765854, + "grad_norm": 492.4881896972656, + "learning_rate": 5.020594826938209e-06, + "loss": 31.9544, + "step": 274410 + }, + { + "epoch": 0.5543457621092692, + "grad_norm": 452.8685302734375, + "learning_rate": 5.020245763999024e-06, + "loss": 25.9853, + "step": 274420 + }, + { + "epoch": 0.554365962741953, + "grad_norm": 177.11929321289062, + "learning_rate": 5.01989670096116e-06, + "loss": 25.5444, + "step": 274430 + }, + { + "epoch": 0.5543861633746369, + "grad_norm": 328.0756530761719, + "learning_rate": 5.019547637826323e-06, + "loss": 23.5047, + "step": 274440 + }, + { + "epoch": 0.5544063640073207, + "grad_norm": 124.34886169433594, + "learning_rate": 5.019198574596213e-06, + "loss": 13.553, + "step": 274450 + }, + { + "epoch": 0.5544265646400045, + "grad_norm": 7.3490891456604, + "learning_rate": 5.018849511272532e-06, + "loss": 29.7773, + "step": 274460 + }, + { + "epoch": 0.5544467652726883, + "grad_norm": 428.7235107421875, + "learning_rate": 5.018500447856981e-06, + "loss": 21.9885, + "step": 274470 + }, + { + "epoch": 0.5544669659053721, + "grad_norm": 249.6378936767578, + "learning_rate": 5.0181513843512615e-06, + "loss": 10.3065, + "step": 274480 + }, + { + "epoch": 0.554487166538056, + "grad_norm": 142.67721557617188, + "learning_rate": 5.017802320757073e-06, + "loss": 27.0052, + "step": 274490 + }, + { + "epoch": 0.5545073671707398, + "grad_norm": 707.7395629882812, + "learning_rate": 5.0174532570761194e-06, + "loss": 23.4318, + "step": 274500 + }, + { + "epoch": 0.5545275678034236, + "grad_norm": 369.94677734375, + "learning_rate": 5.0171041933101e-06, + "loss": 32.0048, + "step": 274510 + }, + { + "epoch": 0.5545477684361074, + "grad_norm": 453.3552551269531, + "learning_rate": 5.016755129460717e-06, + "loss": 26.6822, + "step": 274520 + }, + { + "epoch": 0.5545679690687912, + "grad_norm": 341.4087219238281, + "learning_rate": 5.016406065529672e-06, + "loss": 23.0827, + "step": 274530 + }, + { + "epoch": 0.5545881697014751, + "grad_norm": 361.6666259765625, + "learning_rate": 5.016057001518664e-06, + "loss": 19.9923, + "step": 274540 + }, + { + "epoch": 0.5546083703341589, + "grad_norm": 421.46746826171875, + "learning_rate": 5.015707937429398e-06, + "loss": 16.2056, + "step": 274550 + }, + { + "epoch": 0.5546285709668427, + "grad_norm": 103.21510314941406, + "learning_rate": 5.0153588732635734e-06, + "loss": 34.3507, + "step": 274560 + }, + { + "epoch": 0.5546487715995265, + "grad_norm": 150.9375457763672, + "learning_rate": 5.015009809022891e-06, + "loss": 31.226, + "step": 274570 + }, + { + "epoch": 0.5546689722322103, + "grad_norm": 227.6450653076172, + "learning_rate": 5.014660744709053e-06, + "loss": 16.8979, + "step": 274580 + }, + { + "epoch": 0.5546891728648942, + "grad_norm": 578.1546630859375, + "learning_rate": 5.014311680323759e-06, + "loss": 26.6818, + "step": 274590 + }, + { + "epoch": 0.554709373497578, + "grad_norm": 266.2040100097656, + "learning_rate": 5.013962615868714e-06, + "loss": 24.6263, + "step": 274600 + }, + { + "epoch": 0.5547295741302618, + "grad_norm": 110.05225372314453, + "learning_rate": 5.013613551345614e-06, + "loss": 8.7034, + "step": 274610 + }, + { + "epoch": 0.5547497747629456, + "grad_norm": 286.9217834472656, + "learning_rate": 5.013264486756165e-06, + "loss": 16.5623, + "step": 274620 + }, + { + "epoch": 0.5547699753956294, + "grad_norm": 160.9605255126953, + "learning_rate": 5.012915422102066e-06, + "loss": 12.3764, + "step": 274630 + }, + { + "epoch": 0.5547901760283132, + "grad_norm": 672.0164794921875, + "learning_rate": 5.0125663573850204e-06, + "loss": 17.3915, + "step": 274640 + }, + { + "epoch": 0.554810376660997, + "grad_norm": 245.0459747314453, + "learning_rate": 5.012217292606726e-06, + "loss": 22.9495, + "step": 274650 + }, + { + "epoch": 0.5548305772936808, + "grad_norm": 501.7677001953125, + "learning_rate": 5.011868227768886e-06, + "loss": 19.9008, + "step": 274660 + }, + { + "epoch": 0.5548507779263646, + "grad_norm": 456.3265686035156, + "learning_rate": 5.011519162873202e-06, + "loss": 18.3497, + "step": 274670 + }, + { + "epoch": 0.5548709785590484, + "grad_norm": 305.5513610839844, + "learning_rate": 5.011170097921375e-06, + "loss": 10.6182, + "step": 274680 + }, + { + "epoch": 0.5548911791917323, + "grad_norm": 485.4612731933594, + "learning_rate": 5.010821032915108e-06, + "loss": 15.1355, + "step": 274690 + }, + { + "epoch": 0.5549113798244161, + "grad_norm": 632.5755615234375, + "learning_rate": 5.010471967856096e-06, + "loss": 18.6826, + "step": 274700 + }, + { + "epoch": 0.5549315804570999, + "grad_norm": 838.0838012695312, + "learning_rate": 5.01012290274605e-06, + "loss": 17.3757, + "step": 274710 + }, + { + "epoch": 0.5549517810897837, + "grad_norm": 577.13720703125, + "learning_rate": 5.009773837586663e-06, + "loss": 11.8671, + "step": 274720 + }, + { + "epoch": 0.5549719817224675, + "grad_norm": 638.5238647460938, + "learning_rate": 5.0094247723796405e-06, + "loss": 17.7085, + "step": 274730 + }, + { + "epoch": 0.5549921823551514, + "grad_norm": 226.6175079345703, + "learning_rate": 5.009075707126684e-06, + "loss": 17.44, + "step": 274740 + }, + { + "epoch": 0.5550123829878352, + "grad_norm": 471.9786071777344, + "learning_rate": 5.008726641829492e-06, + "loss": 32.2502, + "step": 274750 + }, + { + "epoch": 0.555032583620519, + "grad_norm": 15.962459564208984, + "learning_rate": 5.008377576489769e-06, + "loss": 12.847, + "step": 274760 + }, + { + "epoch": 0.5550527842532028, + "grad_norm": 634.9119873046875, + "learning_rate": 5.008028511109213e-06, + "loss": 14.3255, + "step": 274770 + }, + { + "epoch": 0.5550729848858866, + "grad_norm": 528.1943359375, + "learning_rate": 5.007679445689527e-06, + "loss": 9.3628, + "step": 274780 + }, + { + "epoch": 0.5550931855185705, + "grad_norm": 354.98638916015625, + "learning_rate": 5.007330380232414e-06, + "loss": 22.5814, + "step": 274790 + }, + { + "epoch": 0.5551133861512543, + "grad_norm": 649.9990844726562, + "learning_rate": 5.006981314739573e-06, + "loss": 12.6906, + "step": 274800 + }, + { + "epoch": 0.5551335867839381, + "grad_norm": 425.6797790527344, + "learning_rate": 5.0066322492127036e-06, + "loss": 38.1626, + "step": 274810 + }, + { + "epoch": 0.5551537874166219, + "grad_norm": 430.2084655761719, + "learning_rate": 5.006283183653513e-06, + "loss": 19.2571, + "step": 274820 + }, + { + "epoch": 0.5551739880493057, + "grad_norm": 637.6443481445312, + "learning_rate": 5.005934118063697e-06, + "loss": 28.1729, + "step": 274830 + }, + { + "epoch": 0.5551941886819896, + "grad_norm": 421.87628173828125, + "learning_rate": 5.005585052444959e-06, + "loss": 17.778, + "step": 274840 + }, + { + "epoch": 0.5552143893146734, + "grad_norm": 79.63214111328125, + "learning_rate": 5.005235986799001e-06, + "loss": 27.9943, + "step": 274850 + }, + { + "epoch": 0.5552345899473572, + "grad_norm": 305.18768310546875, + "learning_rate": 5.004886921127521e-06, + "loss": 22.6701, + "step": 274860 + }, + { + "epoch": 0.555254790580041, + "grad_norm": 535.249755859375, + "learning_rate": 5.0045378554322256e-06, + "loss": 27.5421, + "step": 274870 + }, + { + "epoch": 0.5552749912127248, + "grad_norm": 519.9200439453125, + "learning_rate": 5.004188789714811e-06, + "loss": 15.89, + "step": 274880 + }, + { + "epoch": 0.5552951918454087, + "grad_norm": 445.910888671875, + "learning_rate": 5.003839723976982e-06, + "loss": 14.5666, + "step": 274890 + }, + { + "epoch": 0.5553153924780924, + "grad_norm": 36.32236862182617, + "learning_rate": 5.003490658220438e-06, + "loss": 20.8325, + "step": 274900 + }, + { + "epoch": 0.5553355931107762, + "grad_norm": 99.9104995727539, + "learning_rate": 5.0031415924468816e-06, + "loss": 11.8601, + "step": 274910 + }, + { + "epoch": 0.55535579374346, + "grad_norm": 668.4713134765625, + "learning_rate": 5.002792526658015e-06, + "loss": 22.1278, + "step": 274920 + }, + { + "epoch": 0.5553759943761438, + "grad_norm": 766.052978515625, + "learning_rate": 5.002443460855535e-06, + "loss": 36.3304, + "step": 274930 + }, + { + "epoch": 0.5553961950088276, + "grad_norm": 734.5594482421875, + "learning_rate": 5.002094395041147e-06, + "loss": 12.9675, + "step": 274940 + }, + { + "epoch": 0.5554163956415115, + "grad_norm": 328.8623962402344, + "learning_rate": 5.001745329216551e-06, + "loss": 12.0246, + "step": 274950 + }, + { + "epoch": 0.5554365962741953, + "grad_norm": 1398.352783203125, + "learning_rate": 5.00139626338345e-06, + "loss": 25.3666, + "step": 274960 + }, + { + "epoch": 0.5554567969068791, + "grad_norm": 183.2097930908203, + "learning_rate": 5.00104719754354e-06, + "loss": 34.8507, + "step": 274970 + }, + { + "epoch": 0.5554769975395629, + "grad_norm": 325.44854736328125, + "learning_rate": 5.000698131698531e-06, + "loss": 26.734, + "step": 274980 + }, + { + "epoch": 0.5554971981722467, + "grad_norm": 1094.00341796875, + "learning_rate": 5.000349065850117e-06, + "loss": 30.343, + "step": 274990 + }, + { + "epoch": 0.5555173988049306, + "grad_norm": 314.4059753417969, + "learning_rate": 5e-06, + "loss": 26.2702, + "step": 275000 + }, + { + "epoch": 0.5555375994376144, + "grad_norm": 176.5780792236328, + "learning_rate": 4.999650934149885e-06, + "loss": 6.8526, + "step": 275010 + }, + { + "epoch": 0.5555578000702982, + "grad_norm": 613.3285522460938, + "learning_rate": 4.999301868301472e-06, + "loss": 20.9954, + "step": 275020 + }, + { + "epoch": 0.555578000702982, + "grad_norm": 305.3586730957031, + "learning_rate": 4.9989528024564606e-06, + "loss": 15.1626, + "step": 275030 + }, + { + "epoch": 0.5555982013356658, + "grad_norm": 89.95189666748047, + "learning_rate": 4.998603736616552e-06, + "loss": 21.0642, + "step": 275040 + }, + { + "epoch": 0.5556184019683497, + "grad_norm": 638.3482055664062, + "learning_rate": 4.9982546707834514e-06, + "loss": 12.8903, + "step": 275050 + }, + { + "epoch": 0.5556386026010335, + "grad_norm": 201.15760803222656, + "learning_rate": 4.9979056049588545e-06, + "loss": 16.1875, + "step": 275060 + }, + { + "epoch": 0.5556588032337173, + "grad_norm": 589.5166625976562, + "learning_rate": 4.997556539144467e-06, + "loss": 19.4037, + "step": 275070 + }, + { + "epoch": 0.5556790038664011, + "grad_norm": 234.26467895507812, + "learning_rate": 4.9972074733419875e-06, + "loss": 18.1829, + "step": 275080 + }, + { + "epoch": 0.5556992044990849, + "grad_norm": 190.3571014404297, + "learning_rate": 4.996858407553119e-06, + "loss": 14.9323, + "step": 275090 + }, + { + "epoch": 0.5557194051317688, + "grad_norm": 422.9874267578125, + "learning_rate": 4.996509341779563e-06, + "loss": 29.7398, + "step": 275100 + }, + { + "epoch": 0.5557396057644526, + "grad_norm": 604.4100341796875, + "learning_rate": 4.996160276023018e-06, + "loss": 43.8679, + "step": 275110 + }, + { + "epoch": 0.5557598063971364, + "grad_norm": 86.82774353027344, + "learning_rate": 4.99581121028519e-06, + "loss": 22.9737, + "step": 275120 + }, + { + "epoch": 0.5557800070298202, + "grad_norm": 414.9309387207031, + "learning_rate": 4.995462144567776e-06, + "loss": 15.1246, + "step": 275130 + }, + { + "epoch": 0.555800207662504, + "grad_norm": 579.406005859375, + "learning_rate": 4.9951130788724796e-06, + "loss": 13.6963, + "step": 275140 + }, + { + "epoch": 0.5558204082951878, + "grad_norm": 509.14105224609375, + "learning_rate": 4.994764013201002e-06, + "loss": 10.8033, + "step": 275150 + }, + { + "epoch": 0.5558406089278716, + "grad_norm": 303.1459655761719, + "learning_rate": 4.994414947555043e-06, + "loss": 26.2788, + "step": 275160 + }, + { + "epoch": 0.5558608095605554, + "grad_norm": 543.0546264648438, + "learning_rate": 4.994065881936305e-06, + "loss": 30.235, + "step": 275170 + }, + { + "epoch": 0.5558810101932392, + "grad_norm": 205.7716064453125, + "learning_rate": 4.99371681634649e-06, + "loss": 22.751, + "step": 275180 + }, + { + "epoch": 0.555901210825923, + "grad_norm": 149.52926635742188, + "learning_rate": 4.993367750787297e-06, + "loss": 14.7283, + "step": 275190 + }, + { + "epoch": 0.5559214114586069, + "grad_norm": 280.9228210449219, + "learning_rate": 4.993018685260428e-06, + "loss": 16.4838, + "step": 275200 + }, + { + "epoch": 0.5559416120912907, + "grad_norm": 195.95802307128906, + "learning_rate": 4.992669619767589e-06, + "loss": 11.5435, + "step": 275210 + }, + { + "epoch": 0.5559618127239745, + "grad_norm": 6.11033296585083, + "learning_rate": 4.992320554310474e-06, + "loss": 47.3396, + "step": 275220 + }, + { + "epoch": 0.5559820133566583, + "grad_norm": 446.1274719238281, + "learning_rate": 4.991971488890789e-06, + "loss": 31.3487, + "step": 275230 + }, + { + "epoch": 0.5560022139893421, + "grad_norm": 178.6339569091797, + "learning_rate": 4.991622423510233e-06, + "loss": 31.0977, + "step": 275240 + }, + { + "epoch": 0.556022414622026, + "grad_norm": 376.4140930175781, + "learning_rate": 4.9912733581705095e-06, + "loss": 8.8311, + "step": 275250 + }, + { + "epoch": 0.5560426152547098, + "grad_norm": 331.22418212890625, + "learning_rate": 4.9909242928733185e-06, + "loss": 25.8522, + "step": 275260 + }, + { + "epoch": 0.5560628158873936, + "grad_norm": 475.052001953125, + "learning_rate": 4.990575227620359e-06, + "loss": 29.3616, + "step": 275270 + }, + { + "epoch": 0.5560830165200774, + "grad_norm": 559.9166870117188, + "learning_rate": 4.990226162413338e-06, + "loss": 30.6506, + "step": 275280 + }, + { + "epoch": 0.5561032171527612, + "grad_norm": 911.1737060546875, + "learning_rate": 4.989877097253952e-06, + "loss": 18.0968, + "step": 275290 + }, + { + "epoch": 0.556123417785445, + "grad_norm": 772.8670043945312, + "learning_rate": 4.9895280321439036e-06, + "loss": 9.2231, + "step": 275300 + }, + { + "epoch": 0.5561436184181289, + "grad_norm": 642.33984375, + "learning_rate": 4.989178967084896e-06, + "loss": 19.8402, + "step": 275310 + }, + { + "epoch": 0.5561638190508127, + "grad_norm": 7.4433393478393555, + "learning_rate": 4.9888299020786265e-06, + "loss": 19.86, + "step": 275320 + }, + { + "epoch": 0.5561840196834965, + "grad_norm": 671.8960571289062, + "learning_rate": 4.9884808371268e-06, + "loss": 32.5698, + "step": 275330 + }, + { + "epoch": 0.5562042203161803, + "grad_norm": 198.35899353027344, + "learning_rate": 4.9881317722311165e-06, + "loss": 5.8491, + "step": 275340 + }, + { + "epoch": 0.5562244209488642, + "grad_norm": 410.0425720214844, + "learning_rate": 4.987782707393276e-06, + "loss": 13.0426, + "step": 275350 + }, + { + "epoch": 0.556244621581548, + "grad_norm": 564.2764892578125, + "learning_rate": 4.987433642614981e-06, + "loss": 20.9403, + "step": 275360 + }, + { + "epoch": 0.5562648222142318, + "grad_norm": 2.9762794971466064, + "learning_rate": 4.987084577897936e-06, + "loss": 14.003, + "step": 275370 + }, + { + "epoch": 0.5562850228469156, + "grad_norm": 101.92252349853516, + "learning_rate": 4.986735513243836e-06, + "loss": 8.4082, + "step": 275380 + }, + { + "epoch": 0.5563052234795994, + "grad_norm": 227.08737182617188, + "learning_rate": 4.9863864486543865e-06, + "loss": 24.8647, + "step": 275390 + }, + { + "epoch": 0.5563254241122833, + "grad_norm": 573.6603393554688, + "learning_rate": 4.986037384131288e-06, + "loss": 12.8151, + "step": 275400 + }, + { + "epoch": 0.556345624744967, + "grad_norm": 730.9862060546875, + "learning_rate": 4.985688319676242e-06, + "loss": 21.6598, + "step": 275410 + }, + { + "epoch": 0.5563658253776508, + "grad_norm": 222.87841796875, + "learning_rate": 4.985339255290948e-06, + "loss": 9.5083, + "step": 275420 + }, + { + "epoch": 0.5563860260103346, + "grad_norm": 336.2375183105469, + "learning_rate": 4.98499019097711e-06, + "loss": 12.9924, + "step": 275430 + }, + { + "epoch": 0.5564062266430184, + "grad_norm": 220.37144470214844, + "learning_rate": 4.984641126736428e-06, + "loss": 15.1812, + "step": 275440 + }, + { + "epoch": 0.5564264272757022, + "grad_norm": 594.5784301757812, + "learning_rate": 4.984292062570603e-06, + "loss": 33.5213, + "step": 275450 + }, + { + "epoch": 0.5564466279083861, + "grad_norm": 480.54833984375, + "learning_rate": 4.983942998481336e-06, + "loss": 16.8189, + "step": 275460 + }, + { + "epoch": 0.5564668285410699, + "grad_norm": 331.3450012207031, + "learning_rate": 4.9835939344703305e-06, + "loss": 23.6895, + "step": 275470 + }, + { + "epoch": 0.5564870291737537, + "grad_norm": 141.87034606933594, + "learning_rate": 4.983244870539284e-06, + "loss": 20.68, + "step": 275480 + }, + { + "epoch": 0.5565072298064375, + "grad_norm": 6.501047134399414, + "learning_rate": 4.9828958066899e-06, + "loss": 7.0658, + "step": 275490 + }, + { + "epoch": 0.5565274304391213, + "grad_norm": 368.3595275878906, + "learning_rate": 4.982546742923883e-06, + "loss": 13.4994, + "step": 275500 + }, + { + "epoch": 0.5565476310718052, + "grad_norm": 343.9040222167969, + "learning_rate": 4.9821976792429274e-06, + "loss": 11.4451, + "step": 275510 + }, + { + "epoch": 0.556567831704489, + "grad_norm": 151.44241333007812, + "learning_rate": 4.981848615648739e-06, + "loss": 22.9241, + "step": 275520 + }, + { + "epoch": 0.5565880323371728, + "grad_norm": 292.4028015136719, + "learning_rate": 4.9814995521430195e-06, + "loss": 20.1823, + "step": 275530 + }, + { + "epoch": 0.5566082329698566, + "grad_norm": 405.0984802246094, + "learning_rate": 4.981150488727469e-06, + "loss": 11.9487, + "step": 275540 + }, + { + "epoch": 0.5566284336025404, + "grad_norm": 322.7618103027344, + "learning_rate": 4.980801425403788e-06, + "loss": 14.5687, + "step": 275550 + }, + { + "epoch": 0.5566486342352243, + "grad_norm": 59.802757263183594, + "learning_rate": 4.980452362173676e-06, + "loss": 18.1742, + "step": 275560 + }, + { + "epoch": 0.5566688348679081, + "grad_norm": 389.64208984375, + "learning_rate": 4.980103299038842e-06, + "loss": 12.1644, + "step": 275570 + }, + { + "epoch": 0.5566890355005919, + "grad_norm": 1275.4359130859375, + "learning_rate": 4.979754236000978e-06, + "loss": 24.2242, + "step": 275580 + }, + { + "epoch": 0.5567092361332757, + "grad_norm": 934.800537109375, + "learning_rate": 4.979405173061791e-06, + "loss": 14.1845, + "step": 275590 + }, + { + "epoch": 0.5567294367659595, + "grad_norm": 309.9809265136719, + "learning_rate": 4.979056110222982e-06, + "loss": 19.1958, + "step": 275600 + }, + { + "epoch": 0.5567496373986434, + "grad_norm": 421.5162048339844, + "learning_rate": 4.978707047486249e-06, + "loss": 21.8696, + "step": 275610 + }, + { + "epoch": 0.5567698380313272, + "grad_norm": 500.7240905761719, + "learning_rate": 4.978357984853296e-06, + "loss": 15.3565, + "step": 275620 + }, + { + "epoch": 0.556790038664011, + "grad_norm": 199.2356414794922, + "learning_rate": 4.9780089223258235e-06, + "loss": 18.4397, + "step": 275630 + }, + { + "epoch": 0.5568102392966948, + "grad_norm": 0.5291028022766113, + "learning_rate": 4.977659859905532e-06, + "loss": 26.18, + "step": 275640 + }, + { + "epoch": 0.5568304399293786, + "grad_norm": 240.7754669189453, + "learning_rate": 4.977310797594124e-06, + "loss": 11.6249, + "step": 275650 + }, + { + "epoch": 0.5568506405620623, + "grad_norm": 207.90576171875, + "learning_rate": 4.9769617353933025e-06, + "loss": 14.2674, + "step": 275660 + }, + { + "epoch": 0.5568708411947462, + "grad_norm": 35.586524963378906, + "learning_rate": 4.976612673304764e-06, + "loss": 19.3714, + "step": 275670 + }, + { + "epoch": 0.55689104182743, + "grad_norm": 288.2771301269531, + "learning_rate": 4.976263611330213e-06, + "loss": 16.8436, + "step": 275680 + }, + { + "epoch": 0.5569112424601138, + "grad_norm": 541.0880126953125, + "learning_rate": 4.97591454947135e-06, + "loss": 9.6839, + "step": 275690 + }, + { + "epoch": 0.5569314430927976, + "grad_norm": 368.6847229003906, + "learning_rate": 4.975565487729879e-06, + "loss": 13.0142, + "step": 275700 + }, + { + "epoch": 0.5569516437254814, + "grad_norm": 535.9522705078125, + "learning_rate": 4.9752164261074964e-06, + "loss": 34.5483, + "step": 275710 + }, + { + "epoch": 0.5569718443581653, + "grad_norm": 0.9300443530082703, + "learning_rate": 4.974867364605906e-06, + "loss": 14.6588, + "step": 275720 + }, + { + "epoch": 0.5569920449908491, + "grad_norm": 629.6343994140625, + "learning_rate": 4.97451830322681e-06, + "loss": 27.1088, + "step": 275730 + }, + { + "epoch": 0.5570122456235329, + "grad_norm": 686.4780883789062, + "learning_rate": 4.974169241971908e-06, + "loss": 21.9865, + "step": 275740 + }, + { + "epoch": 0.5570324462562167, + "grad_norm": 3.323115825653076, + "learning_rate": 4.9738201808429025e-06, + "loss": 12.1139, + "step": 275750 + }, + { + "epoch": 0.5570526468889005, + "grad_norm": 518.5536499023438, + "learning_rate": 4.973471119841495e-06, + "loss": 25.2883, + "step": 275760 + }, + { + "epoch": 0.5570728475215844, + "grad_norm": 698.150634765625, + "learning_rate": 4.973122058969384e-06, + "loss": 22.5602, + "step": 275770 + }, + { + "epoch": 0.5570930481542682, + "grad_norm": 344.7100524902344, + "learning_rate": 4.972772998228274e-06, + "loss": 15.8895, + "step": 275780 + }, + { + "epoch": 0.557113248786952, + "grad_norm": 664.1873779296875, + "learning_rate": 4.972423937619866e-06, + "loss": 22.1436, + "step": 275790 + }, + { + "epoch": 0.5571334494196358, + "grad_norm": 299.5687561035156, + "learning_rate": 4.9720748771458595e-06, + "loss": 12.1201, + "step": 275800 + }, + { + "epoch": 0.5571536500523196, + "grad_norm": 193.34686279296875, + "learning_rate": 4.971725816807956e-06, + "loss": 19.3396, + "step": 275810 + }, + { + "epoch": 0.5571738506850035, + "grad_norm": 323.8533935546875, + "learning_rate": 4.97137675660786e-06, + "loss": 20.3575, + "step": 275820 + }, + { + "epoch": 0.5571940513176873, + "grad_norm": 595.7634887695312, + "learning_rate": 4.971027696547266e-06, + "loss": 29.8513, + "step": 275830 + }, + { + "epoch": 0.5572142519503711, + "grad_norm": 399.06573486328125, + "learning_rate": 4.970678636627882e-06, + "loss": 24.1719, + "step": 275840 + }, + { + "epoch": 0.5572344525830549, + "grad_norm": 719.6656494140625, + "learning_rate": 4.970329576851406e-06, + "loss": 15.5013, + "step": 275850 + }, + { + "epoch": 0.5572546532157387, + "grad_norm": 171.09657287597656, + "learning_rate": 4.969980517219542e-06, + "loss": 10.368, + "step": 275860 + }, + { + "epoch": 0.5572748538484226, + "grad_norm": 934.2931518554688, + "learning_rate": 4.969631457733988e-06, + "loss": 27.2345, + "step": 275870 + }, + { + "epoch": 0.5572950544811064, + "grad_norm": 237.9071807861328, + "learning_rate": 4.969282398396445e-06, + "loss": 13.1042, + "step": 275880 + }, + { + "epoch": 0.5573152551137902, + "grad_norm": 238.2025146484375, + "learning_rate": 4.96893333920862e-06, + "loss": 17.233, + "step": 275890 + }, + { + "epoch": 0.557335455746474, + "grad_norm": 214.6839141845703, + "learning_rate": 4.968584280172206e-06, + "loss": 18.8641, + "step": 275900 + }, + { + "epoch": 0.5573556563791578, + "grad_norm": 265.20068359375, + "learning_rate": 4.968235221288909e-06, + "loss": 15.945, + "step": 275910 + }, + { + "epoch": 0.5573758570118416, + "grad_norm": 274.2738952636719, + "learning_rate": 4.967886162560432e-06, + "loss": 30.9454, + "step": 275920 + }, + { + "epoch": 0.5573960576445254, + "grad_norm": 226.4269561767578, + "learning_rate": 4.967537103988472e-06, + "loss": 33.7239, + "step": 275930 + }, + { + "epoch": 0.5574162582772092, + "grad_norm": 726.7615966796875, + "learning_rate": 4.967188045574733e-06, + "loss": 22.764, + "step": 275940 + }, + { + "epoch": 0.557436458909893, + "grad_norm": 358.86590576171875, + "learning_rate": 4.966838987320916e-06, + "loss": 19.9171, + "step": 275950 + }, + { + "epoch": 0.5574566595425768, + "grad_norm": 449.6456298828125, + "learning_rate": 4.966489929228721e-06, + "loss": 29.3903, + "step": 275960 + }, + { + "epoch": 0.5574768601752607, + "grad_norm": 632.8236694335938, + "learning_rate": 4.966140871299849e-06, + "loss": 17.1162, + "step": 275970 + }, + { + "epoch": 0.5574970608079445, + "grad_norm": 301.8818054199219, + "learning_rate": 4.965791813536004e-06, + "loss": 21.3535, + "step": 275980 + }, + { + "epoch": 0.5575172614406283, + "grad_norm": 653.9390869140625, + "learning_rate": 4.9654427559388845e-06, + "loss": 14.3596, + "step": 275990 + }, + { + "epoch": 0.5575374620733121, + "grad_norm": 340.1526794433594, + "learning_rate": 4.965093698510192e-06, + "loss": 13.6781, + "step": 276000 + }, + { + "epoch": 0.5575576627059959, + "grad_norm": 508.2944030761719, + "learning_rate": 4.96474464125163e-06, + "loss": 13.1891, + "step": 276010 + }, + { + "epoch": 0.5575778633386798, + "grad_norm": 469.45013427734375, + "learning_rate": 4.964395584164899e-06, + "loss": 17.2308, + "step": 276020 + }, + { + "epoch": 0.5575980639713636, + "grad_norm": 320.333251953125, + "learning_rate": 4.964046527251698e-06, + "loss": 34.6382, + "step": 276030 + }, + { + "epoch": 0.5576182646040474, + "grad_norm": 855.4724731445312, + "learning_rate": 4.9636974705137305e-06, + "loss": 19.0854, + "step": 276040 + }, + { + "epoch": 0.5576384652367312, + "grad_norm": 845.752685546875, + "learning_rate": 4.9633484139526975e-06, + "loss": 17.8679, + "step": 276050 + }, + { + "epoch": 0.557658665869415, + "grad_norm": 450.273193359375, + "learning_rate": 4.9629993575702995e-06, + "loss": 31.8959, + "step": 276060 + }, + { + "epoch": 0.5576788665020989, + "grad_norm": 152.7506103515625, + "learning_rate": 4.962650301368238e-06, + "loss": 11.4191, + "step": 276070 + }, + { + "epoch": 0.5576990671347827, + "grad_norm": 248.6024169921875, + "learning_rate": 4.962301245348215e-06, + "loss": 17.2046, + "step": 276080 + }, + { + "epoch": 0.5577192677674665, + "grad_norm": 265.6996154785156, + "learning_rate": 4.961952189511932e-06, + "loss": 17.0613, + "step": 276090 + }, + { + "epoch": 0.5577394684001503, + "grad_norm": 490.7113037109375, + "learning_rate": 4.961603133861086e-06, + "loss": 20.1365, + "step": 276100 + }, + { + "epoch": 0.5577596690328341, + "grad_norm": 58.4522590637207, + "learning_rate": 4.961254078397386e-06, + "loss": 19.8307, + "step": 276110 + }, + { + "epoch": 0.557779869665518, + "grad_norm": 89.830078125, + "learning_rate": 4.960905023122526e-06, + "loss": 17.4171, + "step": 276120 + }, + { + "epoch": 0.5578000702982018, + "grad_norm": 278.5671691894531, + "learning_rate": 4.9605559680382104e-06, + "loss": 17.8109, + "step": 276130 + }, + { + "epoch": 0.5578202709308856, + "grad_norm": 647.3381958007812, + "learning_rate": 4.960206913146141e-06, + "loss": 18.2097, + "step": 276140 + }, + { + "epoch": 0.5578404715635694, + "grad_norm": 448.3829345703125, + "learning_rate": 4.9598578584480186e-06, + "loss": 7.8238, + "step": 276150 + }, + { + "epoch": 0.5578606721962532, + "grad_norm": 86.89173889160156, + "learning_rate": 4.959508803945543e-06, + "loss": 11.8763, + "step": 276160 + }, + { + "epoch": 0.5578808728289371, + "grad_norm": 96.09246063232422, + "learning_rate": 4.9591597496404165e-06, + "loss": 9.3863, + "step": 276170 + }, + { + "epoch": 0.5579010734616208, + "grad_norm": 163.861572265625, + "learning_rate": 4.958810695534343e-06, + "loss": 23.1588, + "step": 276180 + }, + { + "epoch": 0.5579212740943046, + "grad_norm": 256.9462585449219, + "learning_rate": 4.958461641629018e-06, + "loss": 16.4793, + "step": 276190 + }, + { + "epoch": 0.5579414747269884, + "grad_norm": 177.65811157226562, + "learning_rate": 4.9581125879261476e-06, + "loss": 11.4484, + "step": 276200 + }, + { + "epoch": 0.5579616753596722, + "grad_norm": 376.4410705566406, + "learning_rate": 4.957763534427431e-06, + "loss": 21.685, + "step": 276210 + }, + { + "epoch": 0.557981875992356, + "grad_norm": 385.52325439453125, + "learning_rate": 4.9574144811345695e-06, + "loss": 10.8762, + "step": 276220 + }, + { + "epoch": 0.5580020766250399, + "grad_norm": 700.00830078125, + "learning_rate": 4.957065428049265e-06, + "loss": 15.0065, + "step": 276230 + }, + { + "epoch": 0.5580222772577237, + "grad_norm": 247.28700256347656, + "learning_rate": 4.956716375173219e-06, + "loss": 27.583, + "step": 276240 + }, + { + "epoch": 0.5580424778904075, + "grad_norm": 265.1885681152344, + "learning_rate": 4.956367322508131e-06, + "loss": 11.7517, + "step": 276250 + }, + { + "epoch": 0.5580626785230913, + "grad_norm": 252.61227416992188, + "learning_rate": 4.956018270055703e-06, + "loss": 23.381, + "step": 276260 + }, + { + "epoch": 0.5580828791557751, + "grad_norm": 0.002575975377112627, + "learning_rate": 4.9556692178176395e-06, + "loss": 16.0502, + "step": 276270 + }, + { + "epoch": 0.558103079788459, + "grad_norm": 363.0325927734375, + "learning_rate": 4.955320165795636e-06, + "loss": 33.9509, + "step": 276280 + }, + { + "epoch": 0.5581232804211428, + "grad_norm": 158.7438507080078, + "learning_rate": 4.954971113991397e-06, + "loss": 14.7082, + "step": 276290 + }, + { + "epoch": 0.5581434810538266, + "grad_norm": 11.163829803466797, + "learning_rate": 4.954622062406623e-06, + "loss": 23.315, + "step": 276300 + }, + { + "epoch": 0.5581636816865104, + "grad_norm": 406.20037841796875, + "learning_rate": 4.954273011043018e-06, + "loss": 24.7558, + "step": 276310 + }, + { + "epoch": 0.5581838823191942, + "grad_norm": 180.91114807128906, + "learning_rate": 4.9539239599022784e-06, + "loss": 19.4117, + "step": 276320 + }, + { + "epoch": 0.5582040829518781, + "grad_norm": 461.32830810546875, + "learning_rate": 4.953574908986108e-06, + "loss": 21.8708, + "step": 276330 + }, + { + "epoch": 0.5582242835845619, + "grad_norm": 499.2826843261719, + "learning_rate": 4.95322585829621e-06, + "loss": 24.8518, + "step": 276340 + }, + { + "epoch": 0.5582444842172457, + "grad_norm": 267.67010498046875, + "learning_rate": 4.952876807834281e-06, + "loss": 27.8653, + "step": 276350 + }, + { + "epoch": 0.5582646848499295, + "grad_norm": 232.80372619628906, + "learning_rate": 4.952527757602025e-06, + "loss": 18.1488, + "step": 276360 + }, + { + "epoch": 0.5582848854826133, + "grad_norm": 518.7470703125, + "learning_rate": 4.952178707601144e-06, + "loss": 55.594, + "step": 276370 + }, + { + "epoch": 0.5583050861152972, + "grad_norm": 667.466552734375, + "learning_rate": 4.951829657833337e-06, + "loss": 21.389, + "step": 276380 + }, + { + "epoch": 0.558325286747981, + "grad_norm": 367.1182556152344, + "learning_rate": 4.951480608300308e-06, + "loss": 13.6146, + "step": 276390 + }, + { + "epoch": 0.5583454873806648, + "grad_norm": 34.47004699707031, + "learning_rate": 4.951131559003756e-06, + "loss": 15.0139, + "step": 276400 + }, + { + "epoch": 0.5583656880133486, + "grad_norm": 748.808837890625, + "learning_rate": 4.950782509945383e-06, + "loss": 20.2013, + "step": 276410 + }, + { + "epoch": 0.5583858886460324, + "grad_norm": 411.05963134765625, + "learning_rate": 4.950433461126888e-06, + "loss": 17.0833, + "step": 276420 + }, + { + "epoch": 0.5584060892787162, + "grad_norm": 304.01220703125, + "learning_rate": 4.950084412549978e-06, + "loss": 13.3847, + "step": 276430 + }, + { + "epoch": 0.5584262899114, + "grad_norm": 4.686367034912109, + "learning_rate": 4.949735364216348e-06, + "loss": 14.4116, + "step": 276440 + }, + { + "epoch": 0.5584464905440838, + "grad_norm": 303.236083984375, + "learning_rate": 4.9493863161277016e-06, + "loss": 13.1932, + "step": 276450 + }, + { + "epoch": 0.5584666911767676, + "grad_norm": 244.71414184570312, + "learning_rate": 4.949037268285741e-06, + "loss": 35.4463, + "step": 276460 + }, + { + "epoch": 0.5584868918094514, + "grad_norm": 418.2349548339844, + "learning_rate": 4.948688220692167e-06, + "loss": 25.0785, + "step": 276470 + }, + { + "epoch": 0.5585070924421353, + "grad_norm": 287.80889892578125, + "learning_rate": 4.94833917334868e-06, + "loss": 13.6992, + "step": 276480 + }, + { + "epoch": 0.5585272930748191, + "grad_norm": 324.98583984375, + "learning_rate": 4.9479901262569795e-06, + "loss": 45.5813, + "step": 276490 + }, + { + "epoch": 0.5585474937075029, + "grad_norm": 103.70023345947266, + "learning_rate": 4.9476410794187726e-06, + "loss": 18.5455, + "step": 276500 + }, + { + "epoch": 0.5585676943401867, + "grad_norm": 165.24594116210938, + "learning_rate": 4.947292032835754e-06, + "loss": 18.9912, + "step": 276510 + }, + { + "epoch": 0.5585878949728705, + "grad_norm": 549.1682739257812, + "learning_rate": 4.946942986509628e-06, + "loss": 20.0053, + "step": 276520 + }, + { + "epoch": 0.5586080956055544, + "grad_norm": 333.8079833984375, + "learning_rate": 4.946593940442097e-06, + "loss": 17.1418, + "step": 276530 + }, + { + "epoch": 0.5586282962382382, + "grad_norm": 62.44570541381836, + "learning_rate": 4.9462448946348594e-06, + "loss": 12.6096, + "step": 276540 + }, + { + "epoch": 0.558648496870922, + "grad_norm": 1.0580861568450928, + "learning_rate": 4.945895849089618e-06, + "loss": 23.9356, + "step": 276550 + }, + { + "epoch": 0.5586686975036058, + "grad_norm": 361.0372314453125, + "learning_rate": 4.945546803808074e-06, + "loss": 18.6434, + "step": 276560 + }, + { + "epoch": 0.5586888981362896, + "grad_norm": 661.345947265625, + "learning_rate": 4.945197758791928e-06, + "loss": 23.0392, + "step": 276570 + }, + { + "epoch": 0.5587090987689735, + "grad_norm": 276.2427978515625, + "learning_rate": 4.944848714042879e-06, + "loss": 18.4101, + "step": 276580 + }, + { + "epoch": 0.5587292994016573, + "grad_norm": 280.89178466796875, + "learning_rate": 4.9444996695626325e-06, + "loss": 9.9693, + "step": 276590 + }, + { + "epoch": 0.5587495000343411, + "grad_norm": 143.61856079101562, + "learning_rate": 4.94415062535289e-06, + "loss": 18.7723, + "step": 276600 + }, + { + "epoch": 0.5587697006670249, + "grad_norm": 1065.444091796875, + "learning_rate": 4.943801581415348e-06, + "loss": 14.0857, + "step": 276610 + }, + { + "epoch": 0.5587899012997087, + "grad_norm": 334.22528076171875, + "learning_rate": 4.9434525377517115e-06, + "loss": 28.4729, + "step": 276620 + }, + { + "epoch": 0.5588101019323926, + "grad_norm": 42.48712921142578, + "learning_rate": 4.9431034943636816e-06, + "loss": 25.1483, + "step": 276630 + }, + { + "epoch": 0.5588303025650764, + "grad_norm": 203.1193084716797, + "learning_rate": 4.942754451252957e-06, + "loss": 21.2341, + "step": 276640 + }, + { + "epoch": 0.5588505031977602, + "grad_norm": 11.171894073486328, + "learning_rate": 4.942405408421238e-06, + "loss": 11.2092, + "step": 276650 + }, + { + "epoch": 0.558870703830444, + "grad_norm": 395.5190124511719, + "learning_rate": 4.942056365870231e-06, + "loss": 8.646, + "step": 276660 + }, + { + "epoch": 0.5588909044631278, + "grad_norm": 689.1996459960938, + "learning_rate": 4.941707323601633e-06, + "loss": 24.7655, + "step": 276670 + }, + { + "epoch": 0.5589111050958117, + "grad_norm": 23.772083282470703, + "learning_rate": 4.941358281617148e-06, + "loss": 23.8002, + "step": 276680 + }, + { + "epoch": 0.5589313057284954, + "grad_norm": 328.78912353515625, + "learning_rate": 4.941009239918474e-06, + "loss": 25.579, + "step": 276690 + }, + { + "epoch": 0.5589515063611792, + "grad_norm": 803.1546630859375, + "learning_rate": 4.940660198507315e-06, + "loss": 18.9472, + "step": 276700 + }, + { + "epoch": 0.558971706993863, + "grad_norm": 106.0699234008789, + "learning_rate": 4.940311157385369e-06, + "loss": 16.5392, + "step": 276710 + }, + { + "epoch": 0.5589919076265468, + "grad_norm": 462.4819030761719, + "learning_rate": 4.939962116554343e-06, + "loss": 20.0266, + "step": 276720 + }, + { + "epoch": 0.5590121082592306, + "grad_norm": 595.452880859375, + "learning_rate": 4.93961307601593e-06, + "loss": 31.1329, + "step": 276730 + }, + { + "epoch": 0.5590323088919145, + "grad_norm": 165.49835205078125, + "learning_rate": 4.939264035771837e-06, + "loss": 12.3925, + "step": 276740 + }, + { + "epoch": 0.5590525095245983, + "grad_norm": 550.9774169921875, + "learning_rate": 4.938914995823764e-06, + "loss": 22.1677, + "step": 276750 + }, + { + "epoch": 0.5590727101572821, + "grad_norm": 835.67333984375, + "learning_rate": 4.938565956173413e-06, + "loss": 23.3062, + "step": 276760 + }, + { + "epoch": 0.5590929107899659, + "grad_norm": 139.3406219482422, + "learning_rate": 4.938216916822483e-06, + "loss": 15.5264, + "step": 276770 + }, + { + "epoch": 0.5591131114226497, + "grad_norm": 130.74205017089844, + "learning_rate": 4.937867877772675e-06, + "loss": 15.202, + "step": 276780 + }, + { + "epoch": 0.5591333120553336, + "grad_norm": 568.3181762695312, + "learning_rate": 4.937518839025695e-06, + "loss": 14.5231, + "step": 276790 + }, + { + "epoch": 0.5591535126880174, + "grad_norm": 90.9675064086914, + "learning_rate": 4.937169800583237e-06, + "loss": 12.635, + "step": 276800 + }, + { + "epoch": 0.5591737133207012, + "grad_norm": 604.9907836914062, + "learning_rate": 4.936820762447007e-06, + "loss": 18.2224, + "step": 276810 + }, + { + "epoch": 0.559193913953385, + "grad_norm": 195.16026306152344, + "learning_rate": 4.936471724618706e-06, + "loss": 12.2022, + "step": 276820 + }, + { + "epoch": 0.5592141145860688, + "grad_norm": 270.56549072265625, + "learning_rate": 4.936122687100034e-06, + "loss": 21.3985, + "step": 276830 + }, + { + "epoch": 0.5592343152187527, + "grad_norm": 221.6443634033203, + "learning_rate": 4.93577364989269e-06, + "loss": 25.6469, + "step": 276840 + }, + { + "epoch": 0.5592545158514365, + "grad_norm": 395.1712951660156, + "learning_rate": 4.93542461299838e-06, + "loss": 13.1065, + "step": 276850 + }, + { + "epoch": 0.5592747164841203, + "grad_norm": 390.8291320800781, + "learning_rate": 4.935075576418802e-06, + "loss": 15.2034, + "step": 276860 + }, + { + "epoch": 0.5592949171168041, + "grad_norm": 516.2682495117188, + "learning_rate": 4.934726540155656e-06, + "loss": 27.2103, + "step": 276870 + }, + { + "epoch": 0.5593151177494879, + "grad_norm": 246.35308837890625, + "learning_rate": 4.934377504210648e-06, + "loss": 24.024, + "step": 276880 + }, + { + "epoch": 0.5593353183821718, + "grad_norm": 823.3765869140625, + "learning_rate": 4.934028468585473e-06, + "loss": 15.8606, + "step": 276890 + }, + { + "epoch": 0.5593555190148556, + "grad_norm": 365.7604064941406, + "learning_rate": 4.933679433281837e-06, + "loss": 17.7518, + "step": 276900 + }, + { + "epoch": 0.5593757196475394, + "grad_norm": 2235.00390625, + "learning_rate": 4.933330398301438e-06, + "loss": 32.2481, + "step": 276910 + }, + { + "epoch": 0.5593959202802232, + "grad_norm": 180.20101928710938, + "learning_rate": 4.932981363645981e-06, + "loss": 26.0842, + "step": 276920 + }, + { + "epoch": 0.559416120912907, + "grad_norm": 416.2943420410156, + "learning_rate": 4.932632329317162e-06, + "loss": 18.6779, + "step": 276930 + }, + { + "epoch": 0.5594363215455908, + "grad_norm": 325.4767150878906, + "learning_rate": 4.9322832953166856e-06, + "loss": 18.2107, + "step": 276940 + }, + { + "epoch": 0.5594565221782746, + "grad_norm": 626.8479614257812, + "learning_rate": 4.931934261646255e-06, + "loss": 24.3436, + "step": 276950 + }, + { + "epoch": 0.5594767228109584, + "grad_norm": 3.538747549057007, + "learning_rate": 4.931585228307564e-06, + "loss": 17.349, + "step": 276960 + }, + { + "epoch": 0.5594969234436422, + "grad_norm": 71.02916717529297, + "learning_rate": 4.931236195302321e-06, + "loss": 20.873, + "step": 276970 + }, + { + "epoch": 0.559517124076326, + "grad_norm": 1171.236328125, + "learning_rate": 4.930887162632225e-06, + "loss": 20.8846, + "step": 276980 + }, + { + "epoch": 0.5595373247090099, + "grad_norm": 430.44439697265625, + "learning_rate": 4.930538130298975e-06, + "loss": 31.7646, + "step": 276990 + }, + { + "epoch": 0.5595575253416937, + "grad_norm": 498.4757995605469, + "learning_rate": 4.9301890983042744e-06, + "loss": 25.1662, + "step": 277000 + }, + { + "epoch": 0.5595777259743775, + "grad_norm": 421.6228332519531, + "learning_rate": 4.929840066649824e-06, + "loss": 20.3946, + "step": 277010 + }, + { + "epoch": 0.5595979266070613, + "grad_norm": 22.71751594543457, + "learning_rate": 4.929491035337325e-06, + "loss": 19.0553, + "step": 277020 + }, + { + "epoch": 0.5596181272397451, + "grad_norm": 301.5302429199219, + "learning_rate": 4.929142004368475e-06, + "loss": 15.6127, + "step": 277030 + }, + { + "epoch": 0.559638327872429, + "grad_norm": 460.103759765625, + "learning_rate": 4.928792973744983e-06, + "loss": 39.9705, + "step": 277040 + }, + { + "epoch": 0.5596585285051128, + "grad_norm": 742.2890014648438, + "learning_rate": 4.928443943468541e-06, + "loss": 16.4028, + "step": 277050 + }, + { + "epoch": 0.5596787291377966, + "grad_norm": 540.8504638671875, + "learning_rate": 4.928094913540857e-06, + "loss": 11.9522, + "step": 277060 + }, + { + "epoch": 0.5596989297704804, + "grad_norm": 491.7070617675781, + "learning_rate": 4.927745883963629e-06, + "loss": 17.2544, + "step": 277070 + }, + { + "epoch": 0.5597191304031642, + "grad_norm": 322.5917663574219, + "learning_rate": 4.92739685473856e-06, + "loss": 20.6291, + "step": 277080 + }, + { + "epoch": 0.559739331035848, + "grad_norm": 276.1647033691406, + "learning_rate": 4.927047825867349e-06, + "loss": 18.5095, + "step": 277090 + }, + { + "epoch": 0.5597595316685319, + "grad_norm": 3.7427709102630615, + "learning_rate": 4.926698797351697e-06, + "loss": 17.7034, + "step": 277100 + }, + { + "epoch": 0.5597797323012157, + "grad_norm": 286.2646789550781, + "learning_rate": 4.926349769193308e-06, + "loss": 11.4961, + "step": 277110 + }, + { + "epoch": 0.5597999329338995, + "grad_norm": 735.1262817382812, + "learning_rate": 4.92600074139388e-06, + "loss": 16.7976, + "step": 277120 + }, + { + "epoch": 0.5598201335665833, + "grad_norm": 621.32177734375, + "learning_rate": 4.925651713955115e-06, + "loss": 26.1611, + "step": 277130 + }, + { + "epoch": 0.5598403341992672, + "grad_norm": 364.3940734863281, + "learning_rate": 4.925302686878717e-06, + "loss": 25.4851, + "step": 277140 + }, + { + "epoch": 0.559860534831951, + "grad_norm": 224.42904663085938, + "learning_rate": 4.924953660166383e-06, + "loss": 23.8722, + "step": 277150 + }, + { + "epoch": 0.5598807354646348, + "grad_norm": 360.2662048339844, + "learning_rate": 4.924604633819815e-06, + "loss": 16.7067, + "step": 277160 + }, + { + "epoch": 0.5599009360973186, + "grad_norm": 286.9324951171875, + "learning_rate": 4.924255607840717e-06, + "loss": 21.0849, + "step": 277170 + }, + { + "epoch": 0.5599211367300024, + "grad_norm": 224.43296813964844, + "learning_rate": 4.923906582230786e-06, + "loss": 21.4258, + "step": 277180 + }, + { + "epoch": 0.5599413373626863, + "grad_norm": 276.378173828125, + "learning_rate": 4.923557556991724e-06, + "loss": 11.3986, + "step": 277190 + }, + { + "epoch": 0.55996153799537, + "grad_norm": 205.25302124023438, + "learning_rate": 4.923208532125235e-06, + "loss": 23.9816, + "step": 277200 + }, + { + "epoch": 0.5599817386280538, + "grad_norm": 420.8791809082031, + "learning_rate": 4.9228595076330196e-06, + "loss": 10.7947, + "step": 277210 + }, + { + "epoch": 0.5600019392607376, + "grad_norm": 702.0499877929688, + "learning_rate": 4.9225104835167755e-06, + "loss": 12.4634, + "step": 277220 + }, + { + "epoch": 0.5600221398934214, + "grad_norm": 382.42437744140625, + "learning_rate": 4.9221614597782066e-06, + "loss": 18.7814, + "step": 277230 + }, + { + "epoch": 0.5600423405261052, + "grad_norm": 213.75582885742188, + "learning_rate": 4.921812436419014e-06, + "loss": 21.6486, + "step": 277240 + }, + { + "epoch": 0.5600625411587891, + "grad_norm": 0.0, + "learning_rate": 4.921463413440898e-06, + "loss": 25.8586, + "step": 277250 + }, + { + "epoch": 0.5600827417914729, + "grad_norm": 214.06646728515625, + "learning_rate": 4.9211143908455575e-06, + "loss": 22.5008, + "step": 277260 + }, + { + "epoch": 0.5601029424241567, + "grad_norm": 183.02252197265625, + "learning_rate": 4.920765368634699e-06, + "loss": 10.4723, + "step": 277270 + }, + { + "epoch": 0.5601231430568405, + "grad_norm": 505.80792236328125, + "learning_rate": 4.920416346810019e-06, + "loss": 7.7827, + "step": 277280 + }, + { + "epoch": 0.5601433436895243, + "grad_norm": 811.605712890625, + "learning_rate": 4.920067325373219e-06, + "loss": 33.468, + "step": 277290 + }, + { + "epoch": 0.5601635443222082, + "grad_norm": 243.12509155273438, + "learning_rate": 4.9197183043260035e-06, + "loss": 18.1433, + "step": 277300 + }, + { + "epoch": 0.560183744954892, + "grad_norm": 90.96365356445312, + "learning_rate": 4.91936928367007e-06, + "loss": 21.1801, + "step": 277310 + }, + { + "epoch": 0.5602039455875758, + "grad_norm": 127.96955871582031, + "learning_rate": 4.919020263407121e-06, + "loss": 16.3724, + "step": 277320 + }, + { + "epoch": 0.5602241462202596, + "grad_norm": 180.5815887451172, + "learning_rate": 4.918671243538859e-06, + "loss": 12.0421, + "step": 277330 + }, + { + "epoch": 0.5602443468529434, + "grad_norm": 673.2286987304688, + "learning_rate": 4.91832222406698e-06, + "loss": 24.3858, + "step": 277340 + }, + { + "epoch": 0.5602645474856273, + "grad_norm": 712.7254028320312, + "learning_rate": 4.91797320499319e-06, + "loss": 16.976, + "step": 277350 + }, + { + "epoch": 0.5602847481183111, + "grad_norm": 106.54489135742188, + "learning_rate": 4.9176241863191895e-06, + "loss": 20.2608, + "step": 277360 + }, + { + "epoch": 0.5603049487509949, + "grad_norm": 595.0263671875, + "learning_rate": 4.917275168046678e-06, + "loss": 26.9587, + "step": 277370 + }, + { + "epoch": 0.5603251493836787, + "grad_norm": 177.71038818359375, + "learning_rate": 4.916926150177358e-06, + "loss": 12.8446, + "step": 277380 + }, + { + "epoch": 0.5603453500163625, + "grad_norm": 499.8451232910156, + "learning_rate": 4.916577132712929e-06, + "loss": 20.6954, + "step": 277390 + }, + { + "epoch": 0.5603655506490464, + "grad_norm": 453.36749267578125, + "learning_rate": 4.9162281156550945e-06, + "loss": 27.0135, + "step": 277400 + }, + { + "epoch": 0.5603857512817302, + "grad_norm": 66.6358871459961, + "learning_rate": 4.915879099005552e-06, + "loss": 15.8422, + "step": 277410 + }, + { + "epoch": 0.560405951914414, + "grad_norm": 292.084716796875, + "learning_rate": 4.915530082766005e-06, + "loss": 14.657, + "step": 277420 + }, + { + "epoch": 0.5604261525470978, + "grad_norm": 387.52532958984375, + "learning_rate": 4.915181066938156e-06, + "loss": 20.5576, + "step": 277430 + }, + { + "epoch": 0.5604463531797816, + "grad_norm": 121.14635467529297, + "learning_rate": 4.914832051523702e-06, + "loss": 11.5561, + "step": 277440 + }, + { + "epoch": 0.5604665538124654, + "grad_norm": 164.2471160888672, + "learning_rate": 4.9144830365243464e-06, + "loss": 20.0493, + "step": 277450 + }, + { + "epoch": 0.5604867544451492, + "grad_norm": 688.5679931640625, + "learning_rate": 4.914134021941792e-06, + "loss": 21.2318, + "step": 277460 + }, + { + "epoch": 0.560506955077833, + "grad_norm": 433.1025695800781, + "learning_rate": 4.913785007777737e-06, + "loss": 13.295, + "step": 277470 + }, + { + "epoch": 0.5605271557105168, + "grad_norm": 65.97478485107422, + "learning_rate": 4.9134359940338815e-06, + "loss": 22.3415, + "step": 277480 + }, + { + "epoch": 0.5605473563432006, + "grad_norm": 436.15618896484375, + "learning_rate": 4.913086980711932e-06, + "loss": 31.5212, + "step": 277490 + }, + { + "epoch": 0.5605675569758845, + "grad_norm": 299.2603454589844, + "learning_rate": 4.9127379678135825e-06, + "loss": 19.6428, + "step": 277500 + }, + { + "epoch": 0.5605877576085683, + "grad_norm": 734.9041748046875, + "learning_rate": 4.91238895534054e-06, + "loss": 9.5273, + "step": 277510 + }, + { + "epoch": 0.5606079582412521, + "grad_norm": 139.4139862060547, + "learning_rate": 4.912039943294502e-06, + "loss": 15.6401, + "step": 277520 + }, + { + "epoch": 0.5606281588739359, + "grad_norm": 113.31924438476562, + "learning_rate": 4.911690931677172e-06, + "loss": 14.8608, + "step": 277530 + }, + { + "epoch": 0.5606483595066197, + "grad_norm": 70.83113861083984, + "learning_rate": 4.911341920490248e-06, + "loss": 21.0946, + "step": 277540 + }, + { + "epoch": 0.5606685601393036, + "grad_norm": 169.0532989501953, + "learning_rate": 4.910992909735432e-06, + "loss": 30.2231, + "step": 277550 + }, + { + "epoch": 0.5606887607719874, + "grad_norm": 214.66018676757812, + "learning_rate": 4.910643899414429e-06, + "loss": 13.8178, + "step": 277560 + }, + { + "epoch": 0.5607089614046712, + "grad_norm": 409.4646301269531, + "learning_rate": 4.910294889528934e-06, + "loss": 14.5599, + "step": 277570 + }, + { + "epoch": 0.560729162037355, + "grad_norm": 110.76598358154297, + "learning_rate": 4.909945880080651e-06, + "loss": 13.367, + "step": 277580 + }, + { + "epoch": 0.5607493626700388, + "grad_norm": 1096.98486328125, + "learning_rate": 4.909596871071283e-06, + "loss": 18.3253, + "step": 277590 + }, + { + "epoch": 0.5607695633027227, + "grad_norm": 201.20767211914062, + "learning_rate": 4.9092478625025266e-06, + "loss": 7.0634, + "step": 277600 + }, + { + "epoch": 0.5607897639354065, + "grad_norm": 285.1210632324219, + "learning_rate": 4.908898854376086e-06, + "loss": 13.3978, + "step": 277610 + }, + { + "epoch": 0.5608099645680903, + "grad_norm": 126.02528381347656, + "learning_rate": 4.908549846693662e-06, + "loss": 8.4953, + "step": 277620 + }, + { + "epoch": 0.5608301652007741, + "grad_norm": 615.0642700195312, + "learning_rate": 4.908200839456955e-06, + "loss": 12.3349, + "step": 277630 + }, + { + "epoch": 0.5608503658334579, + "grad_norm": 328.4018859863281, + "learning_rate": 4.907851832667663e-06, + "loss": 13.5078, + "step": 277640 + }, + { + "epoch": 0.5608705664661418, + "grad_norm": 209.73092651367188, + "learning_rate": 4.9075028263274925e-06, + "loss": 10.9013, + "step": 277650 + }, + { + "epoch": 0.5608907670988256, + "grad_norm": 306.17877197265625, + "learning_rate": 4.907153820438142e-06, + "loss": 12.0092, + "step": 277660 + }, + { + "epoch": 0.5609109677315094, + "grad_norm": 981.385009765625, + "learning_rate": 4.9068048150013124e-06, + "loss": 24.4804, + "step": 277670 + }, + { + "epoch": 0.5609311683641932, + "grad_norm": 285.9813537597656, + "learning_rate": 4.906455810018705e-06, + "loss": 14.1561, + "step": 277680 + }, + { + "epoch": 0.560951368996877, + "grad_norm": 416.0923767089844, + "learning_rate": 4.906106805492021e-06, + "loss": 22.5247, + "step": 277690 + }, + { + "epoch": 0.5609715696295609, + "grad_norm": 894.4718627929688, + "learning_rate": 4.90575780142296e-06, + "loss": 22.7162, + "step": 277700 + }, + { + "epoch": 0.5609917702622446, + "grad_norm": 330.90423583984375, + "learning_rate": 4.905408797813223e-06, + "loss": 27.7434, + "step": 277710 + }, + { + "epoch": 0.5610119708949284, + "grad_norm": 1.8141493797302246, + "learning_rate": 4.9050597946645155e-06, + "loss": 24.424, + "step": 277720 + }, + { + "epoch": 0.5610321715276122, + "grad_norm": 0.0, + "learning_rate": 4.904710791978532e-06, + "loss": 15.6168, + "step": 277730 + }, + { + "epoch": 0.561052372160296, + "grad_norm": 156.7672119140625, + "learning_rate": 4.9043617897569775e-06, + "loss": 17.4672, + "step": 277740 + }, + { + "epoch": 0.5610725727929798, + "grad_norm": 321.03082275390625, + "learning_rate": 4.904012788001553e-06, + "loss": 17.8866, + "step": 277750 + }, + { + "epoch": 0.5610927734256637, + "grad_norm": 1058.6097412109375, + "learning_rate": 4.903663786713957e-06, + "loss": 29.5709, + "step": 277760 + }, + { + "epoch": 0.5611129740583475, + "grad_norm": 305.7610778808594, + "learning_rate": 4.903314785895893e-06, + "loss": 27.1025, + "step": 277770 + }, + { + "epoch": 0.5611331746910313, + "grad_norm": 905.561767578125, + "learning_rate": 4.902965785549061e-06, + "loss": 24.3868, + "step": 277780 + }, + { + "epoch": 0.5611533753237151, + "grad_norm": 126.15748596191406, + "learning_rate": 4.902616785675162e-06, + "loss": 22.0782, + "step": 277790 + }, + { + "epoch": 0.5611735759563989, + "grad_norm": 414.2947082519531, + "learning_rate": 4.902267786275895e-06, + "loss": 30.3595, + "step": 277800 + }, + { + "epoch": 0.5611937765890828, + "grad_norm": 775.2684326171875, + "learning_rate": 4.901918787352965e-06, + "loss": 21.3826, + "step": 277810 + }, + { + "epoch": 0.5612139772217666, + "grad_norm": 314.74493408203125, + "learning_rate": 4.901569788908071e-06, + "loss": 15.4112, + "step": 277820 + }, + { + "epoch": 0.5612341778544504, + "grad_norm": 272.0699768066406, + "learning_rate": 4.901220790942913e-06, + "loss": 24.3775, + "step": 277830 + }, + { + "epoch": 0.5612543784871342, + "grad_norm": 484.6834411621094, + "learning_rate": 4.900871793459193e-06, + "loss": 25.3064, + "step": 277840 + }, + { + "epoch": 0.561274579119818, + "grad_norm": 310.4993591308594, + "learning_rate": 4.900522796458613e-06, + "loss": 12.8083, + "step": 277850 + }, + { + "epoch": 0.5612947797525019, + "grad_norm": 277.1334228515625, + "learning_rate": 4.900173799942873e-06, + "loss": 25.7227, + "step": 277860 + }, + { + "epoch": 0.5613149803851857, + "grad_norm": 506.7071533203125, + "learning_rate": 4.89982480391367e-06, + "loss": 14.2547, + "step": 277870 + }, + { + "epoch": 0.5613351810178695, + "grad_norm": 345.21087646484375, + "learning_rate": 4.899475808372714e-06, + "loss": 17.3943, + "step": 277880 + }, + { + "epoch": 0.5613553816505533, + "grad_norm": 498.5858459472656, + "learning_rate": 4.899126813321697e-06, + "loss": 21.8743, + "step": 277890 + }, + { + "epoch": 0.5613755822832371, + "grad_norm": 39.76124954223633, + "learning_rate": 4.898777818762325e-06, + "loss": 17.1321, + "step": 277900 + }, + { + "epoch": 0.561395782915921, + "grad_norm": 258.9658508300781, + "learning_rate": 4.898428824696298e-06, + "loss": 26.9842, + "step": 277910 + }, + { + "epoch": 0.5614159835486048, + "grad_norm": 106.4733657836914, + "learning_rate": 4.898079831125316e-06, + "loss": 16.924, + "step": 277920 + }, + { + "epoch": 0.5614361841812886, + "grad_norm": 326.1387023925781, + "learning_rate": 4.897730838051081e-06, + "loss": 28.6345, + "step": 277930 + }, + { + "epoch": 0.5614563848139724, + "grad_norm": 521.5694580078125, + "learning_rate": 4.897381845475294e-06, + "loss": 26.2371, + "step": 277940 + }, + { + "epoch": 0.5614765854466562, + "grad_norm": 156.76898193359375, + "learning_rate": 4.897032853399653e-06, + "loss": 20.5586, + "step": 277950 + }, + { + "epoch": 0.5614967860793401, + "grad_norm": 0.0, + "learning_rate": 4.896683861825863e-06, + "loss": 12.6179, + "step": 277960 + }, + { + "epoch": 0.5615169867120238, + "grad_norm": 380.5687561035156, + "learning_rate": 4.896334870755623e-06, + "loss": 20.4136, + "step": 277970 + }, + { + "epoch": 0.5615371873447076, + "grad_norm": 407.02862548828125, + "learning_rate": 4.895985880190636e-06, + "loss": 13.2282, + "step": 277980 + }, + { + "epoch": 0.5615573879773914, + "grad_norm": 336.0501708984375, + "learning_rate": 4.895636890132599e-06, + "loss": 21.0171, + "step": 277990 + }, + { + "epoch": 0.5615775886100752, + "grad_norm": 1520.871826171875, + "learning_rate": 4.895287900583216e-06, + "loss": 35.0917, + "step": 278000 + }, + { + "epoch": 0.561597789242759, + "grad_norm": 282.1485900878906, + "learning_rate": 4.894938911544188e-06, + "loss": 22.7634, + "step": 278010 + }, + { + "epoch": 0.5616179898754429, + "grad_norm": 99.08549499511719, + "learning_rate": 4.894589923017212e-06, + "loss": 16.0685, + "step": 278020 + }, + { + "epoch": 0.5616381905081267, + "grad_norm": 393.707763671875, + "learning_rate": 4.894240935003994e-06, + "loss": 15.8697, + "step": 278030 + }, + { + "epoch": 0.5616583911408105, + "grad_norm": 433.3358459472656, + "learning_rate": 4.893891947506234e-06, + "loss": 15.7811, + "step": 278040 + }, + { + "epoch": 0.5616785917734943, + "grad_norm": 338.52593994140625, + "learning_rate": 4.89354296052563e-06, + "loss": 24.2855, + "step": 278050 + }, + { + "epoch": 0.5616987924061781, + "grad_norm": 190.8038330078125, + "learning_rate": 4.893193974063885e-06, + "loss": 22.1841, + "step": 278060 + }, + { + "epoch": 0.561718993038862, + "grad_norm": 635.9793701171875, + "learning_rate": 4.892844988122701e-06, + "loss": 37.8201, + "step": 278070 + }, + { + "epoch": 0.5617391936715458, + "grad_norm": 258.3083801269531, + "learning_rate": 4.892496002703777e-06, + "loss": 19.9726, + "step": 278080 + }, + { + "epoch": 0.5617593943042296, + "grad_norm": 263.74127197265625, + "learning_rate": 4.892147017808812e-06, + "loss": 37.1322, + "step": 278090 + }, + { + "epoch": 0.5617795949369134, + "grad_norm": 375.145751953125, + "learning_rate": 4.891798033439511e-06, + "loss": 16.6032, + "step": 278100 + }, + { + "epoch": 0.5617997955695972, + "grad_norm": 161.28228759765625, + "learning_rate": 4.891449049597574e-06, + "loss": 18.2481, + "step": 278110 + }, + { + "epoch": 0.5618199962022811, + "grad_norm": 18.511276245117188, + "learning_rate": 4.891100066284701e-06, + "loss": 12.4458, + "step": 278120 + }, + { + "epoch": 0.5618401968349649, + "grad_norm": 229.6265411376953, + "learning_rate": 4.8907510835025924e-06, + "loss": 17.1244, + "step": 278130 + }, + { + "epoch": 0.5618603974676487, + "grad_norm": 318.438232421875, + "learning_rate": 4.890402101252951e-06, + "loss": 18.0329, + "step": 278140 + }, + { + "epoch": 0.5618805981003325, + "grad_norm": 344.5833740234375, + "learning_rate": 4.890053119537475e-06, + "loss": 13.6882, + "step": 278150 + }, + { + "epoch": 0.5619007987330163, + "grad_norm": 100.07310485839844, + "learning_rate": 4.889704138357867e-06, + "loss": 25.8497, + "step": 278160 + }, + { + "epoch": 0.5619209993657002, + "grad_norm": 2.145982265472412, + "learning_rate": 4.889355157715829e-06, + "loss": 24.2324, + "step": 278170 + }, + { + "epoch": 0.561941199998384, + "grad_norm": 0.07046766579151154, + "learning_rate": 4.889006177613059e-06, + "loss": 23.4975, + "step": 278180 + }, + { + "epoch": 0.5619614006310678, + "grad_norm": 305.05572509765625, + "learning_rate": 4.888657198051259e-06, + "loss": 23.9864, + "step": 278190 + }, + { + "epoch": 0.5619816012637516, + "grad_norm": 247.42340087890625, + "learning_rate": 4.888308219032133e-06, + "loss": 19.8828, + "step": 278200 + }, + { + "epoch": 0.5620018018964354, + "grad_norm": 354.2450256347656, + "learning_rate": 4.8879592405573765e-06, + "loss": 22.2688, + "step": 278210 + }, + { + "epoch": 0.5620220025291192, + "grad_norm": 305.7784423828125, + "learning_rate": 4.887610262628694e-06, + "loss": 15.7137, + "step": 278220 + }, + { + "epoch": 0.562042203161803, + "grad_norm": 240.77565002441406, + "learning_rate": 4.887261285247787e-06, + "loss": 10.2753, + "step": 278230 + }, + { + "epoch": 0.5620624037944868, + "grad_norm": 394.3060607910156, + "learning_rate": 4.886912308416353e-06, + "loss": 43.582, + "step": 278240 + }, + { + "epoch": 0.5620826044271706, + "grad_norm": 282.2279052734375, + "learning_rate": 4.886563332136093e-06, + "loss": 15.0963, + "step": 278250 + }, + { + "epoch": 0.5621028050598544, + "grad_norm": 500.75189208984375, + "learning_rate": 4.886214356408712e-06, + "loss": 12.8432, + "step": 278260 + }, + { + "epoch": 0.5621230056925383, + "grad_norm": 333.8148193359375, + "learning_rate": 4.885865381235909e-06, + "loss": 13.4417, + "step": 278270 + }, + { + "epoch": 0.5621432063252221, + "grad_norm": 383.7184143066406, + "learning_rate": 4.885516406619383e-06, + "loss": 33.8934, + "step": 278280 + }, + { + "epoch": 0.5621634069579059, + "grad_norm": 510.25048828125, + "learning_rate": 4.885167432560836e-06, + "loss": 16.0545, + "step": 278290 + }, + { + "epoch": 0.5621836075905897, + "grad_norm": 259.1756591796875, + "learning_rate": 4.88481845906197e-06, + "loss": 28.3703, + "step": 278300 + }, + { + "epoch": 0.5622038082232735, + "grad_norm": 1500.63330078125, + "learning_rate": 4.884469486124484e-06, + "loss": 29.4633, + "step": 278310 + }, + { + "epoch": 0.5622240088559574, + "grad_norm": 85.11979675292969, + "learning_rate": 4.884120513750079e-06, + "loss": 18.508, + "step": 278320 + }, + { + "epoch": 0.5622442094886412, + "grad_norm": 594.4029541015625, + "learning_rate": 4.8837715419404596e-06, + "loss": 25.4804, + "step": 278330 + }, + { + "epoch": 0.562264410121325, + "grad_norm": 234.77423095703125, + "learning_rate": 4.88342257069732e-06, + "loss": 12.2237, + "step": 278340 + }, + { + "epoch": 0.5622846107540088, + "grad_norm": 266.2846984863281, + "learning_rate": 4.883073600022366e-06, + "loss": 17.3074, + "step": 278350 + }, + { + "epoch": 0.5623048113866926, + "grad_norm": 210.6982879638672, + "learning_rate": 4.882724629917298e-06, + "loss": 20.9359, + "step": 278360 + }, + { + "epoch": 0.5623250120193765, + "grad_norm": 568.3202514648438, + "learning_rate": 4.8823756603838155e-06, + "loss": 16.2269, + "step": 278370 + }, + { + "epoch": 0.5623452126520603, + "grad_norm": 443.9132080078125, + "learning_rate": 4.882026691423619e-06, + "loss": 28.8183, + "step": 278380 + }, + { + "epoch": 0.5623654132847441, + "grad_norm": 409.15472412109375, + "learning_rate": 4.881677723038411e-06, + "loss": 19.5304, + "step": 278390 + }, + { + "epoch": 0.5623856139174279, + "grad_norm": 562.7970581054688, + "learning_rate": 4.881328755229892e-06, + "loss": 31.7977, + "step": 278400 + }, + { + "epoch": 0.5624058145501117, + "grad_norm": 449.59893798828125, + "learning_rate": 4.88097978799976e-06, + "loss": 18.6794, + "step": 278410 + }, + { + "epoch": 0.5624260151827956, + "grad_norm": 170.77944946289062, + "learning_rate": 4.880630821349718e-06, + "loss": 19.7776, + "step": 278420 + }, + { + "epoch": 0.5624462158154794, + "grad_norm": 317.1785888671875, + "learning_rate": 4.8802818552814695e-06, + "loss": 15.5262, + "step": 278430 + }, + { + "epoch": 0.5624664164481632, + "grad_norm": 501.2215270996094, + "learning_rate": 4.879932889796711e-06, + "loss": 17.5528, + "step": 278440 + }, + { + "epoch": 0.562486617080847, + "grad_norm": 905.4297485351562, + "learning_rate": 4.879583924897146e-06, + "loss": 24.2411, + "step": 278450 + }, + { + "epoch": 0.5625068177135308, + "grad_norm": 734.2091674804688, + "learning_rate": 4.879234960584474e-06, + "loss": 24.3813, + "step": 278460 + }, + { + "epoch": 0.5625270183462147, + "grad_norm": 492.3178405761719, + "learning_rate": 4.878885996860396e-06, + "loss": 20.8955, + "step": 278470 + }, + { + "epoch": 0.5625472189788984, + "grad_norm": 371.9866027832031, + "learning_rate": 4.878537033726612e-06, + "loss": 17.3136, + "step": 278480 + }, + { + "epoch": 0.5625674196115822, + "grad_norm": 158.58082580566406, + "learning_rate": 4.878188071184827e-06, + "loss": 23.8322, + "step": 278490 + }, + { + "epoch": 0.562587620244266, + "grad_norm": 367.6337890625, + "learning_rate": 4.877839109236735e-06, + "loss": 21.9475, + "step": 278500 + }, + { + "epoch": 0.5626078208769498, + "grad_norm": 480.400146484375, + "learning_rate": 4.877490147884042e-06, + "loss": 16.6314, + "step": 278510 + }, + { + "epoch": 0.5626280215096336, + "grad_norm": 152.57212829589844, + "learning_rate": 4.8771411871284465e-06, + "loss": 32.986, + "step": 278520 + }, + { + "epoch": 0.5626482221423175, + "grad_norm": 217.05926513671875, + "learning_rate": 4.87679222697165e-06, + "loss": 14.5209, + "step": 278530 + }, + { + "epoch": 0.5626684227750013, + "grad_norm": 402.0018615722656, + "learning_rate": 4.8764432674153536e-06, + "loss": 21.1349, + "step": 278540 + }, + { + "epoch": 0.5626886234076851, + "grad_norm": 287.79718017578125, + "learning_rate": 4.8760943084612585e-06, + "loss": 10.3715, + "step": 278550 + }, + { + "epoch": 0.5627088240403689, + "grad_norm": 391.60687255859375, + "learning_rate": 4.875745350111064e-06, + "loss": 16.3361, + "step": 278560 + }, + { + "epoch": 0.5627290246730527, + "grad_norm": 976.3838500976562, + "learning_rate": 4.87539639236647e-06, + "loss": 24.7614, + "step": 278570 + }, + { + "epoch": 0.5627492253057366, + "grad_norm": 523.6411743164062, + "learning_rate": 4.87504743522918e-06, + "loss": 26.72, + "step": 278580 + }, + { + "epoch": 0.5627694259384204, + "grad_norm": 453.2245178222656, + "learning_rate": 4.874698478700895e-06, + "loss": 20.8388, + "step": 278590 + }, + { + "epoch": 0.5627896265711042, + "grad_norm": 410.42626953125, + "learning_rate": 4.874349522783313e-06, + "loss": 18.1273, + "step": 278600 + }, + { + "epoch": 0.562809827203788, + "grad_norm": 468.30401611328125, + "learning_rate": 4.874000567478137e-06, + "loss": 19.8073, + "step": 278610 + }, + { + "epoch": 0.5628300278364718, + "grad_norm": 615.7197265625, + "learning_rate": 4.873651612787067e-06, + "loss": 24.4914, + "step": 278620 + }, + { + "epoch": 0.5628502284691557, + "grad_norm": 515.7312622070312, + "learning_rate": 4.873302658711803e-06, + "loss": 14.5848, + "step": 278630 + }, + { + "epoch": 0.5628704291018395, + "grad_norm": 380.0971374511719, + "learning_rate": 4.872953705254045e-06, + "loss": 12.4876, + "step": 278640 + }, + { + "epoch": 0.5628906297345233, + "grad_norm": 178.8086700439453, + "learning_rate": 4.8726047524154985e-06, + "loss": 18.3994, + "step": 278650 + }, + { + "epoch": 0.5629108303672071, + "grad_norm": 145.70703125, + "learning_rate": 4.872255800197859e-06, + "loss": 21.4966, + "step": 278660 + }, + { + "epoch": 0.562931030999891, + "grad_norm": 119.83448028564453, + "learning_rate": 4.87190684860283e-06, + "loss": 17.6204, + "step": 278670 + }, + { + "epoch": 0.5629512316325748, + "grad_norm": 247.00778198242188, + "learning_rate": 4.871557897632111e-06, + "loss": 27.0825, + "step": 278680 + }, + { + "epoch": 0.5629714322652586, + "grad_norm": 358.8352355957031, + "learning_rate": 4.871208947287404e-06, + "loss": 37.2915, + "step": 278690 + }, + { + "epoch": 0.5629916328979424, + "grad_norm": 531.917724609375, + "learning_rate": 4.870859997570407e-06, + "loss": 18.6128, + "step": 278700 + }, + { + "epoch": 0.5630118335306262, + "grad_norm": 585.2196044921875, + "learning_rate": 4.870511048482824e-06, + "loss": 36.4132, + "step": 278710 + }, + { + "epoch": 0.56303203416331, + "grad_norm": 78.5827865600586, + "learning_rate": 4.870162100026355e-06, + "loss": 15.3775, + "step": 278720 + }, + { + "epoch": 0.5630522347959938, + "grad_norm": 515.3905029296875, + "learning_rate": 4.8698131522027e-06, + "loss": 18.907, + "step": 278730 + }, + { + "epoch": 0.5630724354286776, + "grad_norm": 257.6161804199219, + "learning_rate": 4.8694642050135595e-06, + "loss": 21.4215, + "step": 278740 + }, + { + "epoch": 0.5630926360613614, + "grad_norm": 313.5485534667969, + "learning_rate": 4.869115258460636e-06, + "loss": 17.9498, + "step": 278750 + }, + { + "epoch": 0.5631128366940452, + "grad_norm": 545.9793701171875, + "learning_rate": 4.868766312545627e-06, + "loss": 31.018, + "step": 278760 + }, + { + "epoch": 0.563133037326729, + "grad_norm": 31.352577209472656, + "learning_rate": 4.868417367270234e-06, + "loss": 24.2385, + "step": 278770 + }, + { + "epoch": 0.5631532379594129, + "grad_norm": 362.8998107910156, + "learning_rate": 4.8680684226361624e-06, + "loss": 27.9904, + "step": 278780 + }, + { + "epoch": 0.5631734385920967, + "grad_norm": 70.08490753173828, + "learning_rate": 4.867719478645106e-06, + "loss": 16.635, + "step": 278790 + }, + { + "epoch": 0.5631936392247805, + "grad_norm": 120.2506103515625, + "learning_rate": 4.86737053529877e-06, + "loss": 15.7078, + "step": 278800 + }, + { + "epoch": 0.5632138398574643, + "grad_norm": 279.32183837890625, + "learning_rate": 4.867021592598855e-06, + "loss": 12.1191, + "step": 278810 + }, + { + "epoch": 0.5632340404901481, + "grad_norm": 367.0658264160156, + "learning_rate": 4.86667265054706e-06, + "loss": 20.4642, + "step": 278820 + }, + { + "epoch": 0.563254241122832, + "grad_norm": 447.9247131347656, + "learning_rate": 4.8663237091450856e-06, + "loss": 23.8312, + "step": 278830 + }, + { + "epoch": 0.5632744417555158, + "grad_norm": 526.6144409179688, + "learning_rate": 4.865974768394635e-06, + "loss": 27.8641, + "step": 278840 + }, + { + "epoch": 0.5632946423881996, + "grad_norm": 343.0456848144531, + "learning_rate": 4.865625828297405e-06, + "loss": 13.2943, + "step": 278850 + }, + { + "epoch": 0.5633148430208834, + "grad_norm": 204.62393188476562, + "learning_rate": 4.865276888855098e-06, + "loss": 23.9033, + "step": 278860 + }, + { + "epoch": 0.5633350436535672, + "grad_norm": 225.6749725341797, + "learning_rate": 4.864927950069417e-06, + "loss": 22.7242, + "step": 278870 + }, + { + "epoch": 0.5633552442862511, + "grad_norm": 289.04107666015625, + "learning_rate": 4.864579011942061e-06, + "loss": 13.1412, + "step": 278880 + }, + { + "epoch": 0.5633754449189349, + "grad_norm": 335.34326171875, + "learning_rate": 4.8642300744747285e-06, + "loss": 21.6426, + "step": 278890 + }, + { + "epoch": 0.5633956455516187, + "grad_norm": 569.2777709960938, + "learning_rate": 4.863881137669123e-06, + "loss": 18.1175, + "step": 278900 + }, + { + "epoch": 0.5634158461843025, + "grad_norm": 293.57550048828125, + "learning_rate": 4.8635322015269455e-06, + "loss": 19.5456, + "step": 278910 + }, + { + "epoch": 0.5634360468169863, + "grad_norm": 422.96697998046875, + "learning_rate": 4.863183266049895e-06, + "loss": 19.1662, + "step": 278920 + }, + { + "epoch": 0.5634562474496702, + "grad_norm": 603.3621215820312, + "learning_rate": 4.86283433123967e-06, + "loss": 22.3891, + "step": 278930 + }, + { + "epoch": 0.563476448082354, + "grad_norm": 676.2031860351562, + "learning_rate": 4.862485397097979e-06, + "loss": 14.0476, + "step": 278940 + }, + { + "epoch": 0.5634966487150378, + "grad_norm": 577.7756958007812, + "learning_rate": 4.862136463626512e-06, + "loss": 11.8324, + "step": 278950 + }, + { + "epoch": 0.5635168493477216, + "grad_norm": 428.1227111816406, + "learning_rate": 4.861787530826979e-06, + "loss": 17.5698, + "step": 278960 + }, + { + "epoch": 0.5635370499804054, + "grad_norm": 298.8414001464844, + "learning_rate": 4.861438598701076e-06, + "loss": 26.2628, + "step": 278970 + }, + { + "epoch": 0.5635572506130893, + "grad_norm": 201.48023986816406, + "learning_rate": 4.861089667250504e-06, + "loss": 18.7578, + "step": 278980 + }, + { + "epoch": 0.563577451245773, + "grad_norm": 233.4277801513672, + "learning_rate": 4.860740736476963e-06, + "loss": 21.6329, + "step": 278990 + }, + { + "epoch": 0.5635976518784568, + "grad_norm": 428.1015930175781, + "learning_rate": 4.860391806382157e-06, + "loss": 13.8313, + "step": 279000 + }, + { + "epoch": 0.5636178525111406, + "grad_norm": 135.4143829345703, + "learning_rate": 4.860042876967784e-06, + "loss": 13.5498, + "step": 279010 + }, + { + "epoch": 0.5636380531438244, + "grad_norm": 326.22918701171875, + "learning_rate": 4.859693948235542e-06, + "loss": 15.7906, + "step": 279020 + }, + { + "epoch": 0.5636582537765082, + "grad_norm": 168.1468048095703, + "learning_rate": 4.859345020187137e-06, + "loss": 14.452, + "step": 279030 + }, + { + "epoch": 0.5636784544091921, + "grad_norm": 822.0842895507812, + "learning_rate": 4.858996092824268e-06, + "loss": 16.4693, + "step": 279040 + }, + { + "epoch": 0.5636986550418759, + "grad_norm": 126.21709442138672, + "learning_rate": 4.8586471661486345e-06, + "loss": 19.1546, + "step": 279050 + }, + { + "epoch": 0.5637188556745597, + "grad_norm": 154.8107147216797, + "learning_rate": 4.8582982401619376e-06, + "loss": 11.1282, + "step": 279060 + }, + { + "epoch": 0.5637390563072435, + "grad_norm": 609.3305053710938, + "learning_rate": 4.857949314865878e-06, + "loss": 20.3392, + "step": 279070 + }, + { + "epoch": 0.5637592569399273, + "grad_norm": 270.771240234375, + "learning_rate": 4.857600390262156e-06, + "loss": 27.7497, + "step": 279080 + }, + { + "epoch": 0.5637794575726112, + "grad_norm": 537.5477905273438, + "learning_rate": 4.857251466352471e-06, + "loss": 17.6501, + "step": 279090 + }, + { + "epoch": 0.563799658205295, + "grad_norm": 469.29638671875, + "learning_rate": 4.856902543138528e-06, + "loss": 19.0969, + "step": 279100 + }, + { + "epoch": 0.5638198588379788, + "grad_norm": 727.9530639648438, + "learning_rate": 4.856553620622021e-06, + "loss": 21.7398, + "step": 279110 + }, + { + "epoch": 0.5638400594706626, + "grad_norm": 252.12364196777344, + "learning_rate": 4.856204698804656e-06, + "loss": 17.4326, + "step": 279120 + }, + { + "epoch": 0.5638602601033464, + "grad_norm": 537.7473754882812, + "learning_rate": 4.855855777688133e-06, + "loss": 13.5666, + "step": 279130 + }, + { + "epoch": 0.5638804607360303, + "grad_norm": 379.7203674316406, + "learning_rate": 4.85550685727415e-06, + "loss": 26.9287, + "step": 279140 + }, + { + "epoch": 0.5639006613687141, + "grad_norm": 372.1243896484375, + "learning_rate": 4.85515793756441e-06, + "loss": 12.531, + "step": 279150 + }, + { + "epoch": 0.5639208620013979, + "grad_norm": 740.4132690429688, + "learning_rate": 4.854809018560611e-06, + "loss": 28.5175, + "step": 279160 + }, + { + "epoch": 0.5639410626340817, + "grad_norm": 252.4897003173828, + "learning_rate": 4.8544601002644585e-06, + "loss": 19.9668, + "step": 279170 + }, + { + "epoch": 0.5639612632667655, + "grad_norm": 380.94866943359375, + "learning_rate": 4.854111182677646e-06, + "loss": 18.2015, + "step": 279180 + }, + { + "epoch": 0.5639814638994494, + "grad_norm": 175.6231689453125, + "learning_rate": 4.85376226580188e-06, + "loss": 14.1915, + "step": 279190 + }, + { + "epoch": 0.5640016645321332, + "grad_norm": 573.9922485351562, + "learning_rate": 4.853413349638859e-06, + "loss": 18.1083, + "step": 279200 + }, + { + "epoch": 0.564021865164817, + "grad_norm": 517.6943969726562, + "learning_rate": 4.853064434190283e-06, + "loss": 21.1547, + "step": 279210 + }, + { + "epoch": 0.5640420657975008, + "grad_norm": 392.81610107421875, + "learning_rate": 4.852715519457854e-06, + "loss": 26.4975, + "step": 279220 + }, + { + "epoch": 0.5640622664301846, + "grad_norm": 33.941307067871094, + "learning_rate": 4.852366605443271e-06, + "loss": 13.3732, + "step": 279230 + }, + { + "epoch": 0.5640824670628685, + "grad_norm": 250.2884979248047, + "learning_rate": 4.8520176921482355e-06, + "loss": 16.7073, + "step": 279240 + }, + { + "epoch": 0.5641026676955522, + "grad_norm": 1069.0157470703125, + "learning_rate": 4.851668779574446e-06, + "loss": 18.5817, + "step": 279250 + }, + { + "epoch": 0.564122868328236, + "grad_norm": 283.9867248535156, + "learning_rate": 4.851319867723607e-06, + "loss": 21.8473, + "step": 279260 + }, + { + "epoch": 0.5641430689609198, + "grad_norm": 201.53427124023438, + "learning_rate": 4.8509709565974165e-06, + "loss": 21.4699, + "step": 279270 + }, + { + "epoch": 0.5641632695936036, + "grad_norm": 250.0788116455078, + "learning_rate": 4.850622046197576e-06, + "loss": 25.5267, + "step": 279280 + }, + { + "epoch": 0.5641834702262875, + "grad_norm": 391.1429443359375, + "learning_rate": 4.8502731365257855e-06, + "loss": 22.5211, + "step": 279290 + }, + { + "epoch": 0.5642036708589713, + "grad_norm": 571.8225708007812, + "learning_rate": 4.8499242275837444e-06, + "loss": 20.4421, + "step": 279300 + }, + { + "epoch": 0.5642238714916551, + "grad_norm": 620.9213256835938, + "learning_rate": 4.8495753193731545e-06, + "loss": 37.9211, + "step": 279310 + }, + { + "epoch": 0.5642440721243389, + "grad_norm": 709.5044555664062, + "learning_rate": 4.8492264118957165e-06, + "loss": 34.7678, + "step": 279320 + }, + { + "epoch": 0.5642642727570227, + "grad_norm": 393.5628967285156, + "learning_rate": 4.848877505153131e-06, + "loss": 29.5212, + "step": 279330 + }, + { + "epoch": 0.5642844733897066, + "grad_norm": 674.4771728515625, + "learning_rate": 4.848528599147098e-06, + "loss": 24.2638, + "step": 279340 + }, + { + "epoch": 0.5643046740223904, + "grad_norm": 303.4005126953125, + "learning_rate": 4.848179693879319e-06, + "loss": 23.9594, + "step": 279350 + }, + { + "epoch": 0.5643248746550742, + "grad_norm": 322.4669494628906, + "learning_rate": 4.8478307893514934e-06, + "loss": 18.0247, + "step": 279360 + }, + { + "epoch": 0.564345075287758, + "grad_norm": 294.3116455078125, + "learning_rate": 4.847481885565322e-06, + "loss": 12.8049, + "step": 279370 + }, + { + "epoch": 0.5643652759204418, + "grad_norm": 332.1798400878906, + "learning_rate": 4.8471329825225034e-06, + "loss": 16.6716, + "step": 279380 + }, + { + "epoch": 0.5643854765531257, + "grad_norm": 308.9256896972656, + "learning_rate": 4.846784080224744e-06, + "loss": 16.0941, + "step": 279390 + }, + { + "epoch": 0.5644056771858095, + "grad_norm": 822.2352294921875, + "learning_rate": 4.846435178673737e-06, + "loss": 27.9769, + "step": 279400 + }, + { + "epoch": 0.5644258778184933, + "grad_norm": 526.4793090820312, + "learning_rate": 4.846086277871188e-06, + "loss": 23.6909, + "step": 279410 + }, + { + "epoch": 0.5644460784511771, + "grad_norm": 481.90557861328125, + "learning_rate": 4.845737377818796e-06, + "loss": 18.722, + "step": 279420 + }, + { + "epoch": 0.5644662790838609, + "grad_norm": 430.7373352050781, + "learning_rate": 4.845388478518261e-06, + "loss": 43.4882, + "step": 279430 + }, + { + "epoch": 0.5644864797165448, + "grad_norm": 389.912841796875, + "learning_rate": 4.845039579971283e-06, + "loss": 35.5991, + "step": 279440 + }, + { + "epoch": 0.5645066803492286, + "grad_norm": 473.7503967285156, + "learning_rate": 4.8446906821795645e-06, + "loss": 21.2011, + "step": 279450 + }, + { + "epoch": 0.5645268809819124, + "grad_norm": 570.1041870117188, + "learning_rate": 4.8443417851448035e-06, + "loss": 11.7235, + "step": 279460 + }, + { + "epoch": 0.5645470816145962, + "grad_norm": 639.3972778320312, + "learning_rate": 4.843992888868702e-06, + "loss": 17.078, + "step": 279470 + }, + { + "epoch": 0.56456728224728, + "grad_norm": 812.7796630859375, + "learning_rate": 4.84364399335296e-06, + "loss": 33.3612, + "step": 279480 + }, + { + "epoch": 0.5645874828799639, + "grad_norm": 702.8096313476562, + "learning_rate": 4.84329509859928e-06, + "loss": 21.8659, + "step": 279490 + }, + { + "epoch": 0.5646076835126476, + "grad_norm": 184.54876708984375, + "learning_rate": 4.842946204609359e-06, + "loss": 18.5785, + "step": 279500 + }, + { + "epoch": 0.5646278841453314, + "grad_norm": 437.0586853027344, + "learning_rate": 4.8425973113848995e-06, + "loss": 34.0539, + "step": 279510 + }, + { + "epoch": 0.5646480847780152, + "grad_norm": 413.0774230957031, + "learning_rate": 4.842248418927603e-06, + "loss": 37.4086, + "step": 279520 + }, + { + "epoch": 0.564668285410699, + "grad_norm": 742.9720458984375, + "learning_rate": 4.841899527239166e-06, + "loss": 29.4849, + "step": 279530 + }, + { + "epoch": 0.5646884860433828, + "grad_norm": 554.9951171875, + "learning_rate": 4.841550636321292e-06, + "loss": 20.8777, + "step": 279540 + }, + { + "epoch": 0.5647086866760667, + "grad_norm": 443.943359375, + "learning_rate": 4.8412017461756845e-06, + "loss": 33.688, + "step": 279550 + }, + { + "epoch": 0.5647288873087505, + "grad_norm": 480.1344909667969, + "learning_rate": 4.8408528568040365e-06, + "loss": 21.1558, + "step": 279560 + }, + { + "epoch": 0.5647490879414343, + "grad_norm": 206.64242553710938, + "learning_rate": 4.840503968208054e-06, + "loss": 12.8486, + "step": 279570 + }, + { + "epoch": 0.5647692885741181, + "grad_norm": 297.1890563964844, + "learning_rate": 4.840155080389436e-06, + "loss": 17.4767, + "step": 279580 + }, + { + "epoch": 0.5647894892068019, + "grad_norm": 501.24017333984375, + "learning_rate": 4.839806193349882e-06, + "loss": 17.387, + "step": 279590 + }, + { + "epoch": 0.5648096898394858, + "grad_norm": 59.65628433227539, + "learning_rate": 4.839457307091093e-06, + "loss": 26.266, + "step": 279600 + }, + { + "epoch": 0.5648298904721696, + "grad_norm": 78.96497344970703, + "learning_rate": 4.839108421614771e-06, + "loss": 10.7241, + "step": 279610 + }, + { + "epoch": 0.5648500911048534, + "grad_norm": 249.42201232910156, + "learning_rate": 4.838759536922614e-06, + "loss": 35.6987, + "step": 279620 + }, + { + "epoch": 0.5648702917375372, + "grad_norm": 475.94378662109375, + "learning_rate": 4.838410653016322e-06, + "loss": 19.9952, + "step": 279630 + }, + { + "epoch": 0.564890492370221, + "grad_norm": 576.3741455078125, + "learning_rate": 4.838061769897598e-06, + "loss": 40.5024, + "step": 279640 + }, + { + "epoch": 0.5649106930029049, + "grad_norm": 413.41949462890625, + "learning_rate": 4.837712887568143e-06, + "loss": 24.0064, + "step": 279650 + }, + { + "epoch": 0.5649308936355887, + "grad_norm": 250.33079528808594, + "learning_rate": 4.837364006029654e-06, + "loss": 13.1016, + "step": 279660 + }, + { + "epoch": 0.5649510942682725, + "grad_norm": 585.5135498046875, + "learning_rate": 4.837015125283833e-06, + "loss": 29.7956, + "step": 279670 + }, + { + "epoch": 0.5649712949009563, + "grad_norm": 368.90618896484375, + "learning_rate": 4.8366662453323826e-06, + "loss": 24.4503, + "step": 279680 + }, + { + "epoch": 0.5649914955336401, + "grad_norm": 1304.8232421875, + "learning_rate": 4.836317366176999e-06, + "loss": 24.6257, + "step": 279690 + }, + { + "epoch": 0.565011696166324, + "grad_norm": 768.2684936523438, + "learning_rate": 4.835968487819384e-06, + "loss": 20.2988, + "step": 279700 + }, + { + "epoch": 0.5650318967990078, + "grad_norm": 307.398681640625, + "learning_rate": 4.835619610261242e-06, + "loss": 28.9655, + "step": 279710 + }, + { + "epoch": 0.5650520974316916, + "grad_norm": 331.45989990234375, + "learning_rate": 4.835270733504267e-06, + "loss": 15.687, + "step": 279720 + }, + { + "epoch": 0.5650722980643754, + "grad_norm": 525.4197387695312, + "learning_rate": 4.834921857550163e-06, + "loss": 18.0182, + "step": 279730 + }, + { + "epoch": 0.5650924986970592, + "grad_norm": 243.6503143310547, + "learning_rate": 4.834572982400631e-06, + "loss": 14.2919, + "step": 279740 + }, + { + "epoch": 0.5651126993297431, + "grad_norm": 314.42059326171875, + "learning_rate": 4.8342241080573696e-06, + "loss": 19.056, + "step": 279750 + }, + { + "epoch": 0.5651328999624268, + "grad_norm": 404.8274841308594, + "learning_rate": 4.83387523452208e-06, + "loss": 26.8217, + "step": 279760 + }, + { + "epoch": 0.5651531005951106, + "grad_norm": 466.4205322265625, + "learning_rate": 4.833526361796461e-06, + "loss": 21.2137, + "step": 279770 + }, + { + "epoch": 0.5651733012277944, + "grad_norm": 380.7063293457031, + "learning_rate": 4.833177489882217e-06, + "loss": 23.7955, + "step": 279780 + }, + { + "epoch": 0.5651935018604782, + "grad_norm": 0.0, + "learning_rate": 4.832828618781042e-06, + "loss": 13.6185, + "step": 279790 + }, + { + "epoch": 0.565213702493162, + "grad_norm": 358.6909484863281, + "learning_rate": 4.832479748494643e-06, + "loss": 25.1073, + "step": 279800 + }, + { + "epoch": 0.5652339031258459, + "grad_norm": 370.5196838378906, + "learning_rate": 4.832130879024717e-06, + "loss": 24.2912, + "step": 279810 + }, + { + "epoch": 0.5652541037585297, + "grad_norm": 373.6588439941406, + "learning_rate": 4.831782010372964e-06, + "loss": 10.702, + "step": 279820 + }, + { + "epoch": 0.5652743043912135, + "grad_norm": 731.8858032226562, + "learning_rate": 4.831433142541086e-06, + "loss": 20.3273, + "step": 279830 + }, + { + "epoch": 0.5652945050238973, + "grad_norm": 3.847933053970337, + "learning_rate": 4.831084275530782e-06, + "loss": 20.619, + "step": 279840 + }, + { + "epoch": 0.5653147056565812, + "grad_norm": 339.99896240234375, + "learning_rate": 4.830735409343752e-06, + "loss": 22.4393, + "step": 279850 + }, + { + "epoch": 0.565334906289265, + "grad_norm": 606.836181640625, + "learning_rate": 4.830386543981696e-06, + "loss": 26.6426, + "step": 279860 + }, + { + "epoch": 0.5653551069219488, + "grad_norm": 641.5547485351562, + "learning_rate": 4.83003767944632e-06, + "loss": 13.8716, + "step": 279870 + }, + { + "epoch": 0.5653753075546326, + "grad_norm": 105.29841613769531, + "learning_rate": 4.829688815739315e-06, + "loss": 12.9752, + "step": 279880 + }, + { + "epoch": 0.5653955081873164, + "grad_norm": 622.8025512695312, + "learning_rate": 4.829339952862388e-06, + "loss": 19.8063, + "step": 279890 + }, + { + "epoch": 0.5654157088200003, + "grad_norm": 310.1514587402344, + "learning_rate": 4.828991090817238e-06, + "loss": 18.3451, + "step": 279900 + }, + { + "epoch": 0.5654359094526841, + "grad_norm": 161.7609405517578, + "learning_rate": 4.828642229605564e-06, + "loss": 24.5835, + "step": 279910 + }, + { + "epoch": 0.5654561100853679, + "grad_norm": 311.6127624511719, + "learning_rate": 4.8282933692290665e-06, + "loss": 17.4492, + "step": 279920 + }, + { + "epoch": 0.5654763107180517, + "grad_norm": 33.28404235839844, + "learning_rate": 4.827944509689445e-06, + "loss": 9.8755, + "step": 279930 + }, + { + "epoch": 0.5654965113507355, + "grad_norm": 298.7069396972656, + "learning_rate": 4.827595650988404e-06, + "loss": 27.2258, + "step": 279940 + }, + { + "epoch": 0.5655167119834194, + "grad_norm": 593.8300170898438, + "learning_rate": 4.827246793127639e-06, + "loss": 12.0075, + "step": 279950 + }, + { + "epoch": 0.5655369126161032, + "grad_norm": 344.9347839355469, + "learning_rate": 4.826897936108853e-06, + "loss": 16.5854, + "step": 279960 + }, + { + "epoch": 0.565557113248787, + "grad_norm": 676.9986572265625, + "learning_rate": 4.8265490799337455e-06, + "loss": 21.8863, + "step": 279970 + }, + { + "epoch": 0.5655773138814708, + "grad_norm": 195.52239990234375, + "learning_rate": 4.826200224604017e-06, + "loss": 7.0834, + "step": 279980 + }, + { + "epoch": 0.5655975145141546, + "grad_norm": 499.9918212890625, + "learning_rate": 4.8258513701213665e-06, + "loss": 10.8261, + "step": 279990 + }, + { + "epoch": 0.5656177151468385, + "grad_norm": 863.3130493164062, + "learning_rate": 4.825502516487497e-06, + "loss": 17.5498, + "step": 280000 + }, + { + "epoch": 0.5656379157795222, + "grad_norm": 390.77508544921875, + "learning_rate": 4.825153663704104e-06, + "loss": 22.8388, + "step": 280010 + }, + { + "epoch": 0.565658116412206, + "grad_norm": 553.9664916992188, + "learning_rate": 4.824804811772893e-06, + "loss": 26.1581, + "step": 280020 + }, + { + "epoch": 0.5656783170448898, + "grad_norm": 293.2061767578125, + "learning_rate": 4.824455960695563e-06, + "loss": 24.5048, + "step": 280030 + }, + { + "epoch": 0.5656985176775736, + "grad_norm": 359.7368469238281, + "learning_rate": 4.8241071104738115e-06, + "loss": 22.6865, + "step": 280040 + }, + { + "epoch": 0.5657187183102574, + "grad_norm": 460.0821533203125, + "learning_rate": 4.823758261109341e-06, + "loss": 10.5031, + "step": 280050 + }, + { + "epoch": 0.5657389189429413, + "grad_norm": 309.7868347167969, + "learning_rate": 4.8234094126038524e-06, + "loss": 17.6452, + "step": 280060 + }, + { + "epoch": 0.5657591195756251, + "grad_norm": 226.4683074951172, + "learning_rate": 4.823060564959045e-06, + "loss": 19.3593, + "step": 280070 + }, + { + "epoch": 0.5657793202083089, + "grad_norm": 187.0656280517578, + "learning_rate": 4.8227117181766165e-06, + "loss": 14.3262, + "step": 280080 + }, + { + "epoch": 0.5657995208409927, + "grad_norm": 770.1342163085938, + "learning_rate": 4.82236287225827e-06, + "loss": 28.2982, + "step": 280090 + }, + { + "epoch": 0.5658197214736765, + "grad_norm": 872.9797973632812, + "learning_rate": 4.822014027205708e-06, + "loss": 18.9361, + "step": 280100 + }, + { + "epoch": 0.5658399221063604, + "grad_norm": 205.1911163330078, + "learning_rate": 4.8216651830206265e-06, + "loss": 21.1727, + "step": 280110 + }, + { + "epoch": 0.5658601227390442, + "grad_norm": 499.60150146484375, + "learning_rate": 4.821316339704727e-06, + "loss": 20.7421, + "step": 280120 + }, + { + "epoch": 0.565880323371728, + "grad_norm": 568.2881469726562, + "learning_rate": 4.8209674972597116e-06, + "loss": 29.0031, + "step": 280130 + }, + { + "epoch": 0.5659005240044118, + "grad_norm": 166.3893280029297, + "learning_rate": 4.820618655687277e-06, + "loss": 22.0419, + "step": 280140 + }, + { + "epoch": 0.5659207246370956, + "grad_norm": 367.2247314453125, + "learning_rate": 4.820269814989125e-06, + "loss": 19.7562, + "step": 280150 + }, + { + "epoch": 0.5659409252697795, + "grad_norm": 209.6517791748047, + "learning_rate": 4.819920975166959e-06, + "loss": 25.7543, + "step": 280160 + }, + { + "epoch": 0.5659611259024633, + "grad_norm": 76.78960418701172, + "learning_rate": 4.8195721362224725e-06, + "loss": 17.7162, + "step": 280170 + }, + { + "epoch": 0.5659813265351471, + "grad_norm": 594.5751342773438, + "learning_rate": 4.819223298157372e-06, + "loss": 22.5247, + "step": 280180 + }, + { + "epoch": 0.5660015271678309, + "grad_norm": 412.4549255371094, + "learning_rate": 4.818874460973356e-06, + "loss": 22.4813, + "step": 280190 + }, + { + "epoch": 0.5660217278005147, + "grad_norm": 313.1060791015625, + "learning_rate": 4.818525624672122e-06, + "loss": 19.6513, + "step": 280200 + }, + { + "epoch": 0.5660419284331986, + "grad_norm": 293.21575927734375, + "learning_rate": 4.818176789255372e-06, + "loss": 10.3151, + "step": 280210 + }, + { + "epoch": 0.5660621290658824, + "grad_norm": 129.5390167236328, + "learning_rate": 4.8178279547248055e-06, + "loss": 17.854, + "step": 280220 + }, + { + "epoch": 0.5660823296985662, + "grad_norm": 384.1737060546875, + "learning_rate": 4.817479121082127e-06, + "loss": 31.3266, + "step": 280230 + }, + { + "epoch": 0.56610253033125, + "grad_norm": 148.68045043945312, + "learning_rate": 4.81713028832903e-06, + "loss": 15.6833, + "step": 280240 + }, + { + "epoch": 0.5661227309639338, + "grad_norm": 267.49066162109375, + "learning_rate": 4.816781456467218e-06, + "loss": 14.2361, + "step": 280250 + }, + { + "epoch": 0.5661429315966177, + "grad_norm": 537.7645263671875, + "learning_rate": 4.816432625498394e-06, + "loss": 20.0404, + "step": 280260 + }, + { + "epoch": 0.5661631322293014, + "grad_norm": 0.0, + "learning_rate": 4.816083795424252e-06, + "loss": 47.0537, + "step": 280270 + }, + { + "epoch": 0.5661833328619852, + "grad_norm": 413.2496643066406, + "learning_rate": 4.815734966246496e-06, + "loss": 26.7662, + "step": 280280 + }, + { + "epoch": 0.566203533494669, + "grad_norm": 724.4093017578125, + "learning_rate": 4.815386137966827e-06, + "loss": 28.9771, + "step": 280290 + }, + { + "epoch": 0.5662237341273528, + "grad_norm": 181.0941619873047, + "learning_rate": 4.815037310586941e-06, + "loss": 22.6037, + "step": 280300 + }, + { + "epoch": 0.5662439347600366, + "grad_norm": 541.9688720703125, + "learning_rate": 4.8146884841085405e-06, + "loss": 21.288, + "step": 280310 + }, + { + "epoch": 0.5662641353927205, + "grad_norm": 0.7081328630447388, + "learning_rate": 4.8143396585333295e-06, + "loss": 14.5067, + "step": 280320 + }, + { + "epoch": 0.5662843360254043, + "grad_norm": 337.2891845703125, + "learning_rate": 4.813990833863001e-06, + "loss": 26.5572, + "step": 280330 + }, + { + "epoch": 0.5663045366580881, + "grad_norm": 287.2080993652344, + "learning_rate": 4.81364201009926e-06, + "loss": 30.8338, + "step": 280340 + }, + { + "epoch": 0.5663247372907719, + "grad_norm": 611.9329223632812, + "learning_rate": 4.813293187243806e-06, + "loss": 16.5224, + "step": 280350 + }, + { + "epoch": 0.5663449379234557, + "grad_norm": 452.2556457519531, + "learning_rate": 4.812944365298337e-06, + "loss": 14.7404, + "step": 280360 + }, + { + "epoch": 0.5663651385561396, + "grad_norm": 48.49687576293945, + "learning_rate": 4.812595544264554e-06, + "loss": 22.9897, + "step": 280370 + }, + { + "epoch": 0.5663853391888234, + "grad_norm": 390.6477355957031, + "learning_rate": 4.812246724144158e-06, + "loss": 27.3596, + "step": 280380 + }, + { + "epoch": 0.5664055398215072, + "grad_norm": 498.2398681640625, + "learning_rate": 4.811897904938851e-06, + "loss": 32.31, + "step": 280390 + }, + { + "epoch": 0.566425740454191, + "grad_norm": 742.808837890625, + "learning_rate": 4.811549086650327e-06, + "loss": 19.7711, + "step": 280400 + }, + { + "epoch": 0.5664459410868748, + "grad_norm": 235.90054321289062, + "learning_rate": 4.811200269280292e-06, + "loss": 12.1947, + "step": 280410 + }, + { + "epoch": 0.5664661417195587, + "grad_norm": 596.4269409179688, + "learning_rate": 4.810851452830445e-06, + "loss": 15.5925, + "step": 280420 + }, + { + "epoch": 0.5664863423522425, + "grad_norm": 466.2594909667969, + "learning_rate": 4.810502637302484e-06, + "loss": 18.6616, + "step": 280430 + }, + { + "epoch": 0.5665065429849263, + "grad_norm": 294.703369140625, + "learning_rate": 4.81015382269811e-06, + "loss": 17.7578, + "step": 280440 + }, + { + "epoch": 0.5665267436176101, + "grad_norm": 164.2449951171875, + "learning_rate": 4.809805009019024e-06, + "loss": 9.3957, + "step": 280450 + }, + { + "epoch": 0.566546944250294, + "grad_norm": 396.9536437988281, + "learning_rate": 4.809456196266925e-06, + "loss": 30.4809, + "step": 280460 + }, + { + "epoch": 0.5665671448829778, + "grad_norm": 332.344970703125, + "learning_rate": 4.809107384443511e-06, + "loss": 14.1402, + "step": 280470 + }, + { + "epoch": 0.5665873455156616, + "grad_norm": 486.4132385253906, + "learning_rate": 4.808758573550488e-06, + "loss": 16.9073, + "step": 280480 + }, + { + "epoch": 0.5666075461483454, + "grad_norm": 511.4957275390625, + "learning_rate": 4.8084097635895505e-06, + "loss": 18.6391, + "step": 280490 + }, + { + "epoch": 0.5666277467810292, + "grad_norm": 71.69532012939453, + "learning_rate": 4.8080609545624004e-06, + "loss": 14.7074, + "step": 280500 + }, + { + "epoch": 0.566647947413713, + "grad_norm": 246.12234497070312, + "learning_rate": 4.80771214647074e-06, + "loss": 28.4342, + "step": 280510 + }, + { + "epoch": 0.5666681480463968, + "grad_norm": 143.98562622070312, + "learning_rate": 4.807363339316265e-06, + "loss": 19.2224, + "step": 280520 + }, + { + "epoch": 0.5666883486790806, + "grad_norm": 533.887939453125, + "learning_rate": 4.807014533100679e-06, + "loss": 22.2298, + "step": 280530 + }, + { + "epoch": 0.5667085493117644, + "grad_norm": 434.1194152832031, + "learning_rate": 4.806665727825679e-06, + "loss": 15.4137, + "step": 280540 + }, + { + "epoch": 0.5667287499444482, + "grad_norm": 296.2685241699219, + "learning_rate": 4.80631692349297e-06, + "loss": 17.5248, + "step": 280550 + }, + { + "epoch": 0.566748950577132, + "grad_norm": 915.760498046875, + "learning_rate": 4.805968120104246e-06, + "loss": 21.6545, + "step": 280560 + }, + { + "epoch": 0.5667691512098159, + "grad_norm": 649.8720092773438, + "learning_rate": 4.805619317661211e-06, + "loss": 15.8103, + "step": 280570 + }, + { + "epoch": 0.5667893518424997, + "grad_norm": 542.402587890625, + "learning_rate": 4.805270516165564e-06, + "loss": 18.8665, + "step": 280580 + }, + { + "epoch": 0.5668095524751835, + "grad_norm": 480.7325744628906, + "learning_rate": 4.8049217156190045e-06, + "loss": 20.6515, + "step": 280590 + }, + { + "epoch": 0.5668297531078673, + "grad_norm": 130.50900268554688, + "learning_rate": 4.8045729160232326e-06, + "loss": 34.2715, + "step": 280600 + }, + { + "epoch": 0.5668499537405511, + "grad_norm": 19.832996368408203, + "learning_rate": 4.80422411737995e-06, + "loss": 13.7181, + "step": 280610 + }, + { + "epoch": 0.566870154373235, + "grad_norm": 164.57254028320312, + "learning_rate": 4.8038753196908534e-06, + "loss": 13.625, + "step": 280620 + }, + { + "epoch": 0.5668903550059188, + "grad_norm": 194.63401794433594, + "learning_rate": 4.803526522957645e-06, + "loss": 11.2362, + "step": 280630 + }, + { + "epoch": 0.5669105556386026, + "grad_norm": 37.46737289428711, + "learning_rate": 4.803177727182026e-06, + "loss": 13.6499, + "step": 280640 + }, + { + "epoch": 0.5669307562712864, + "grad_norm": 197.3994140625, + "learning_rate": 4.802828932365694e-06, + "loss": 27.7821, + "step": 280650 + }, + { + "epoch": 0.5669509569039702, + "grad_norm": 2664.021728515625, + "learning_rate": 4.80248013851035e-06, + "loss": 20.306, + "step": 280660 + }, + { + "epoch": 0.5669711575366541, + "grad_norm": 307.2100524902344, + "learning_rate": 4.802131345617694e-06, + "loss": 17.2772, + "step": 280670 + }, + { + "epoch": 0.5669913581693379, + "grad_norm": 358.0711975097656, + "learning_rate": 4.801782553689426e-06, + "loss": 25.9146, + "step": 280680 + }, + { + "epoch": 0.5670115588020217, + "grad_norm": 716.7332153320312, + "learning_rate": 4.801433762727244e-06, + "loss": 48.5615, + "step": 280690 + }, + { + "epoch": 0.5670317594347055, + "grad_norm": 282.84881591796875, + "learning_rate": 4.801084972732851e-06, + "loss": 14.6979, + "step": 280700 + }, + { + "epoch": 0.5670519600673893, + "grad_norm": 430.576904296875, + "learning_rate": 4.800736183707946e-06, + "loss": 14.4485, + "step": 280710 + }, + { + "epoch": 0.5670721607000732, + "grad_norm": 459.9219665527344, + "learning_rate": 4.800387395654229e-06, + "loss": 30.1554, + "step": 280720 + }, + { + "epoch": 0.567092361332757, + "grad_norm": 556.7384643554688, + "learning_rate": 4.800038608573398e-06, + "loss": 13.7043, + "step": 280730 + }, + { + "epoch": 0.5671125619654408, + "grad_norm": 298.2059326171875, + "learning_rate": 4.799689822467157e-06, + "loss": 27.1181, + "step": 280740 + }, + { + "epoch": 0.5671327625981246, + "grad_norm": 600.5845947265625, + "learning_rate": 4.799341037337203e-06, + "loss": 38.7656, + "step": 280750 + }, + { + "epoch": 0.5671529632308084, + "grad_norm": 496.095947265625, + "learning_rate": 4.798992253185233e-06, + "loss": 16.733, + "step": 280760 + }, + { + "epoch": 0.5671731638634923, + "grad_norm": 310.8692321777344, + "learning_rate": 4.798643470012956e-06, + "loss": 9.6334, + "step": 280770 + }, + { + "epoch": 0.567193364496176, + "grad_norm": 613.90771484375, + "learning_rate": 4.798294687822062e-06, + "loss": 19.7655, + "step": 280780 + }, + { + "epoch": 0.5672135651288598, + "grad_norm": 235.32797241210938, + "learning_rate": 4.797945906614256e-06, + "loss": 35.1255, + "step": 280790 + }, + { + "epoch": 0.5672337657615436, + "grad_norm": 342.994384765625, + "learning_rate": 4.797597126391238e-06, + "loss": 15.036, + "step": 280800 + }, + { + "epoch": 0.5672539663942274, + "grad_norm": 156.72019958496094, + "learning_rate": 4.797248347154707e-06, + "loss": 27.67, + "step": 280810 + }, + { + "epoch": 0.5672741670269112, + "grad_norm": 219.47348022460938, + "learning_rate": 4.796899568906363e-06, + "loss": 13.3814, + "step": 280820 + }, + { + "epoch": 0.5672943676595951, + "grad_norm": 582.8139038085938, + "learning_rate": 4.796550791647905e-06, + "loss": 24.4508, + "step": 280830 + }, + { + "epoch": 0.5673145682922789, + "grad_norm": 143.56533813476562, + "learning_rate": 4.796202015381035e-06, + "loss": 20.3075, + "step": 280840 + }, + { + "epoch": 0.5673347689249627, + "grad_norm": 216.61070251464844, + "learning_rate": 4.79585324010745e-06, + "loss": 9.2895, + "step": 280850 + }, + { + "epoch": 0.5673549695576465, + "grad_norm": 441.1408386230469, + "learning_rate": 4.795504465828853e-06, + "loss": 24.3529, + "step": 280860 + }, + { + "epoch": 0.5673751701903303, + "grad_norm": 434.54815673828125, + "learning_rate": 4.795155692546942e-06, + "loss": 21.3057, + "step": 280870 + }, + { + "epoch": 0.5673953708230142, + "grad_norm": 436.3892517089844, + "learning_rate": 4.794806920263417e-06, + "loss": 29.8058, + "step": 280880 + }, + { + "epoch": 0.567415571455698, + "grad_norm": 492.61260986328125, + "learning_rate": 4.794458148979979e-06, + "loss": 23.2764, + "step": 280890 + }, + { + "epoch": 0.5674357720883818, + "grad_norm": 469.8958740234375, + "learning_rate": 4.794109378698327e-06, + "loss": 15.7702, + "step": 280900 + }, + { + "epoch": 0.5674559727210656, + "grad_norm": 716.3184814453125, + "learning_rate": 4.793760609420161e-06, + "loss": 25.6189, + "step": 280910 + }, + { + "epoch": 0.5674761733537494, + "grad_norm": 133.90565490722656, + "learning_rate": 4.7934118411471785e-06, + "loss": 25.8989, + "step": 280920 + }, + { + "epoch": 0.5674963739864333, + "grad_norm": 1263.489501953125, + "learning_rate": 4.7930630738810855e-06, + "loss": 41.5443, + "step": 280930 + }, + { + "epoch": 0.5675165746191171, + "grad_norm": 505.8876037597656, + "learning_rate": 4.792714307623574e-06, + "loss": 29.7477, + "step": 280940 + }, + { + "epoch": 0.5675367752518009, + "grad_norm": 155.53802490234375, + "learning_rate": 4.79236554237635e-06, + "loss": 15.5484, + "step": 280950 + }, + { + "epoch": 0.5675569758844847, + "grad_norm": 138.1282958984375, + "learning_rate": 4.792016778141111e-06, + "loss": 18.8946, + "step": 280960 + }, + { + "epoch": 0.5675771765171685, + "grad_norm": 397.5463562011719, + "learning_rate": 4.791668014919557e-06, + "loss": 21.5662, + "step": 280970 + }, + { + "epoch": 0.5675973771498524, + "grad_norm": 464.1717529296875, + "learning_rate": 4.791319252713388e-06, + "loss": 16.8356, + "step": 280980 + }, + { + "epoch": 0.5676175777825362, + "grad_norm": 216.24668884277344, + "learning_rate": 4.790970491524302e-06, + "loss": 15.9672, + "step": 280990 + }, + { + "epoch": 0.56763777841522, + "grad_norm": 5.927821636199951, + "learning_rate": 4.7906217313540035e-06, + "loss": 8.7603, + "step": 281000 + }, + { + "epoch": 0.5676579790479038, + "grad_norm": 86.56169891357422, + "learning_rate": 4.790272972204186e-06, + "loss": 15.9588, + "step": 281010 + }, + { + "epoch": 0.5676781796805876, + "grad_norm": 398.86572265625, + "learning_rate": 4.789924214076554e-06, + "loss": 28.5181, + "step": 281020 + }, + { + "epoch": 0.5676983803132715, + "grad_norm": 143.67971801757812, + "learning_rate": 4.789575456972807e-06, + "loss": 14.2371, + "step": 281030 + }, + { + "epoch": 0.5677185809459552, + "grad_norm": 327.8136291503906, + "learning_rate": 4.789226700894643e-06, + "loss": 16.7905, + "step": 281040 + }, + { + "epoch": 0.567738781578639, + "grad_norm": 320.13824462890625, + "learning_rate": 4.788877945843762e-06, + "loss": 15.8231, + "step": 281050 + }, + { + "epoch": 0.5677589822113228, + "grad_norm": 341.2049560546875, + "learning_rate": 4.7885291918218656e-06, + "loss": 15.138, + "step": 281060 + }, + { + "epoch": 0.5677791828440066, + "grad_norm": 213.2560272216797, + "learning_rate": 4.788180438830651e-06, + "loss": 26.9224, + "step": 281070 + }, + { + "epoch": 0.5677993834766905, + "grad_norm": 1243.47119140625, + "learning_rate": 4.787831686871818e-06, + "loss": 16.1275, + "step": 281080 + }, + { + "epoch": 0.5678195841093743, + "grad_norm": 551.6944580078125, + "learning_rate": 4.787482935947071e-06, + "loss": 18.4467, + "step": 281090 + }, + { + "epoch": 0.5678397847420581, + "grad_norm": 283.828369140625, + "learning_rate": 4.787134186058103e-06, + "loss": 24.1669, + "step": 281100 + }, + { + "epoch": 0.5678599853747419, + "grad_norm": 422.85626220703125, + "learning_rate": 4.7867854372066185e-06, + "loss": 22.0561, + "step": 281110 + }, + { + "epoch": 0.5678801860074257, + "grad_norm": 400.2016906738281, + "learning_rate": 4.786436689394317e-06, + "loss": 14.1001, + "step": 281120 + }, + { + "epoch": 0.5679003866401096, + "grad_norm": 658.47705078125, + "learning_rate": 4.7860879426228965e-06, + "loss": 22.3099, + "step": 281130 + }, + { + "epoch": 0.5679205872727934, + "grad_norm": 725.5134887695312, + "learning_rate": 4.785739196894056e-06, + "loss": 18.1255, + "step": 281140 + }, + { + "epoch": 0.5679407879054772, + "grad_norm": 195.5977783203125, + "learning_rate": 4.785390452209497e-06, + "loss": 19.2654, + "step": 281150 + }, + { + "epoch": 0.567960988538161, + "grad_norm": 75.21509552001953, + "learning_rate": 4.785041708570921e-06, + "loss": 15.7326, + "step": 281160 + }, + { + "epoch": 0.5679811891708448, + "grad_norm": 1675.8681640625, + "learning_rate": 4.784692965980024e-06, + "loss": 21.5999, + "step": 281170 + }, + { + "epoch": 0.5680013898035287, + "grad_norm": 433.5064392089844, + "learning_rate": 4.784344224438508e-06, + "loss": 16.2804, + "step": 281180 + }, + { + "epoch": 0.5680215904362125, + "grad_norm": 276.25213623046875, + "learning_rate": 4.783995483948072e-06, + "loss": 13.0544, + "step": 281190 + }, + { + "epoch": 0.5680417910688963, + "grad_norm": 760.7417602539062, + "learning_rate": 4.783646744510416e-06, + "loss": 18.5853, + "step": 281200 + }, + { + "epoch": 0.5680619917015801, + "grad_norm": 549.2852172851562, + "learning_rate": 4.783298006127238e-06, + "loss": 20.926, + "step": 281210 + }, + { + "epoch": 0.5680821923342639, + "grad_norm": 409.4819641113281, + "learning_rate": 4.782949268800242e-06, + "loss": 21.3589, + "step": 281220 + }, + { + "epoch": 0.5681023929669478, + "grad_norm": 357.6761169433594, + "learning_rate": 4.782600532531123e-06, + "loss": 16.9343, + "step": 281230 + }, + { + "epoch": 0.5681225935996316, + "grad_norm": 330.9826354980469, + "learning_rate": 4.7822517973215814e-06, + "loss": 14.5164, + "step": 281240 + }, + { + "epoch": 0.5681427942323154, + "grad_norm": 616.1460571289062, + "learning_rate": 4.781903063173321e-06, + "loss": 15.6828, + "step": 281250 + }, + { + "epoch": 0.5681629948649992, + "grad_norm": 366.4429016113281, + "learning_rate": 4.7815543300880374e-06, + "loss": 11.1462, + "step": 281260 + }, + { + "epoch": 0.568183195497683, + "grad_norm": 842.9706420898438, + "learning_rate": 4.781205598067431e-06, + "loss": 20.5096, + "step": 281270 + }, + { + "epoch": 0.5682033961303669, + "grad_norm": 202.7994384765625, + "learning_rate": 4.780856867113202e-06, + "loss": 18.9489, + "step": 281280 + }, + { + "epoch": 0.5682235967630506, + "grad_norm": 269.7091979980469, + "learning_rate": 4.780508137227052e-06, + "loss": 28.9723, + "step": 281290 + }, + { + "epoch": 0.5682437973957344, + "grad_norm": 323.88671875, + "learning_rate": 4.780159408410677e-06, + "loss": 21.4513, + "step": 281300 + }, + { + "epoch": 0.5682639980284182, + "grad_norm": 199.1850128173828, + "learning_rate": 4.7798106806657765e-06, + "loss": 12.8795, + "step": 281310 + }, + { + "epoch": 0.568284198661102, + "grad_norm": 8.439977645874023, + "learning_rate": 4.779461953994055e-06, + "loss": 14.439, + "step": 281320 + }, + { + "epoch": 0.5683043992937858, + "grad_norm": 114.3587875366211, + "learning_rate": 4.779113228397208e-06, + "loss": 17.9183, + "step": 281330 + }, + { + "epoch": 0.5683245999264697, + "grad_norm": 220.39505004882812, + "learning_rate": 4.778764503876937e-06, + "loss": 19.0702, + "step": 281340 + }, + { + "epoch": 0.5683448005591535, + "grad_norm": 538.3729248046875, + "learning_rate": 4.778415780434941e-06, + "loss": 49.2227, + "step": 281350 + }, + { + "epoch": 0.5683650011918373, + "grad_norm": 142.07130432128906, + "learning_rate": 4.778067058072919e-06, + "loss": 18.075, + "step": 281360 + }, + { + "epoch": 0.5683852018245211, + "grad_norm": 186.54107666015625, + "learning_rate": 4.777718336792571e-06, + "loss": 17.1097, + "step": 281370 + }, + { + "epoch": 0.568405402457205, + "grad_norm": 175.35775756835938, + "learning_rate": 4.777369616595599e-06, + "loss": 12.8044, + "step": 281380 + }, + { + "epoch": 0.5684256030898888, + "grad_norm": 773.0756225585938, + "learning_rate": 4.777020897483697e-06, + "loss": 19.8385, + "step": 281390 + }, + { + "epoch": 0.5684458037225726, + "grad_norm": 546.4215698242188, + "learning_rate": 4.7766721794585704e-06, + "loss": 12.1978, + "step": 281400 + }, + { + "epoch": 0.5684660043552564, + "grad_norm": 453.91693115234375, + "learning_rate": 4.776323462521916e-06, + "loss": 22.5422, + "step": 281410 + }, + { + "epoch": 0.5684862049879402, + "grad_norm": 571.7416381835938, + "learning_rate": 4.775974746675434e-06, + "loss": 17.7625, + "step": 281420 + }, + { + "epoch": 0.568506405620624, + "grad_norm": 144.67437744140625, + "learning_rate": 4.7756260319208245e-06, + "loss": 18.1891, + "step": 281430 + }, + { + "epoch": 0.5685266062533079, + "grad_norm": 463.4716796875, + "learning_rate": 4.775277318259784e-06, + "loss": 10.107, + "step": 281440 + }, + { + "epoch": 0.5685468068859917, + "grad_norm": 156.15028381347656, + "learning_rate": 4.774928605694018e-06, + "loss": 16.9089, + "step": 281450 + }, + { + "epoch": 0.5685670075186755, + "grad_norm": 113.29301452636719, + "learning_rate": 4.77457989422522e-06, + "loss": 19.2779, + "step": 281460 + }, + { + "epoch": 0.5685872081513593, + "grad_norm": 875.8801879882812, + "learning_rate": 4.774231183855093e-06, + "loss": 17.1479, + "step": 281470 + }, + { + "epoch": 0.5686074087840431, + "grad_norm": 354.21478271484375, + "learning_rate": 4.773882474585338e-06, + "loss": 13.1078, + "step": 281480 + }, + { + "epoch": 0.568627609416727, + "grad_norm": 400.21356201171875, + "learning_rate": 4.77353376641765e-06, + "loss": 20.4264, + "step": 281490 + }, + { + "epoch": 0.5686478100494108, + "grad_norm": 648.59423828125, + "learning_rate": 4.7731850593537316e-06, + "loss": 15.6605, + "step": 281500 + }, + { + "epoch": 0.5686680106820946, + "grad_norm": 213.05715942382812, + "learning_rate": 4.772836353395283e-06, + "loss": 8.2071, + "step": 281510 + }, + { + "epoch": 0.5686882113147784, + "grad_norm": 500.738525390625, + "learning_rate": 4.772487648544e-06, + "loss": 20.6662, + "step": 281520 + }, + { + "epoch": 0.5687084119474622, + "grad_norm": 439.78668212890625, + "learning_rate": 4.772138944801585e-06, + "loss": 19.7219, + "step": 281530 + }, + { + "epoch": 0.5687286125801461, + "grad_norm": 164.27357482910156, + "learning_rate": 4.77179024216974e-06, + "loss": 15.299, + "step": 281540 + }, + { + "epoch": 0.5687488132128298, + "grad_norm": 575.8212280273438, + "learning_rate": 4.771441540650158e-06, + "loss": 31.0574, + "step": 281550 + }, + { + "epoch": 0.5687690138455136, + "grad_norm": 17.1769962310791, + "learning_rate": 4.771092840244544e-06, + "loss": 8.2188, + "step": 281560 + }, + { + "epoch": 0.5687892144781974, + "grad_norm": 346.69927978515625, + "learning_rate": 4.770744140954596e-06, + "loss": 18.1759, + "step": 281570 + }, + { + "epoch": 0.5688094151108812, + "grad_norm": 0.0, + "learning_rate": 4.7703954427820125e-06, + "loss": 22.863, + "step": 281580 + }, + { + "epoch": 0.568829615743565, + "grad_norm": 687.7879638671875, + "learning_rate": 4.770046745728494e-06, + "loss": 17.0569, + "step": 281590 + }, + { + "epoch": 0.5688498163762489, + "grad_norm": 317.03082275390625, + "learning_rate": 4.769698049795739e-06, + "loss": 23.1847, + "step": 281600 + }, + { + "epoch": 0.5688700170089327, + "grad_norm": 509.2730712890625, + "learning_rate": 4.76934935498545e-06, + "loss": 24.0008, + "step": 281610 + }, + { + "epoch": 0.5688902176416165, + "grad_norm": 474.55657958984375, + "learning_rate": 4.769000661299322e-06, + "loss": 31.9968, + "step": 281620 + }, + { + "epoch": 0.5689104182743003, + "grad_norm": 48.3017463684082, + "learning_rate": 4.768651968739057e-06, + "loss": 12.2753, + "step": 281630 + }, + { + "epoch": 0.5689306189069842, + "grad_norm": 276.9859924316406, + "learning_rate": 4.768303277306356e-06, + "loss": 23.6071, + "step": 281640 + }, + { + "epoch": 0.568950819539668, + "grad_norm": 175.224609375, + "learning_rate": 4.767954587002915e-06, + "loss": 22.392, + "step": 281650 + }, + { + "epoch": 0.5689710201723518, + "grad_norm": 144.96896362304688, + "learning_rate": 4.767605897830436e-06, + "loss": 19.9227, + "step": 281660 + }, + { + "epoch": 0.5689912208050356, + "grad_norm": 155.78317260742188, + "learning_rate": 4.767257209790618e-06, + "loss": 21.4134, + "step": 281670 + }, + { + "epoch": 0.5690114214377194, + "grad_norm": 313.7227478027344, + "learning_rate": 4.76690852288516e-06, + "loss": 14.5007, + "step": 281680 + }, + { + "epoch": 0.5690316220704033, + "grad_norm": 100.6632308959961, + "learning_rate": 4.76655983711576e-06, + "loss": 19.0345, + "step": 281690 + }, + { + "epoch": 0.5690518227030871, + "grad_norm": 377.2840270996094, + "learning_rate": 4.766211152484122e-06, + "loss": 16.0773, + "step": 281700 + }, + { + "epoch": 0.5690720233357709, + "grad_norm": 110.20166015625, + "learning_rate": 4.765862468991939e-06, + "loss": 19.0861, + "step": 281710 + }, + { + "epoch": 0.5690922239684547, + "grad_norm": 404.0787658691406, + "learning_rate": 4.765513786640915e-06, + "loss": 27.0663, + "step": 281720 + }, + { + "epoch": 0.5691124246011385, + "grad_norm": 369.5503845214844, + "learning_rate": 4.765165105432749e-06, + "loss": 22.2442, + "step": 281730 + }, + { + "epoch": 0.5691326252338224, + "grad_norm": 85.97356414794922, + "learning_rate": 4.764816425369141e-06, + "loss": 17.9469, + "step": 281740 + }, + { + "epoch": 0.5691528258665062, + "grad_norm": 0.6718572974205017, + "learning_rate": 4.7644677464517874e-06, + "loss": 16.9593, + "step": 281750 + }, + { + "epoch": 0.56917302649919, + "grad_norm": 539.9227905273438, + "learning_rate": 4.764119068682389e-06, + "loss": 34.1517, + "step": 281760 + }, + { + "epoch": 0.5691932271318738, + "grad_norm": 237.00498962402344, + "learning_rate": 4.7637703920626484e-06, + "loss": 30.3294, + "step": 281770 + }, + { + "epoch": 0.5692134277645576, + "grad_norm": 412.45269775390625, + "learning_rate": 4.76342171659426e-06, + "loss": 20.29, + "step": 281780 + }, + { + "epoch": 0.5692336283972415, + "grad_norm": 191.51370239257812, + "learning_rate": 4.763073042278925e-06, + "loss": 12.3758, + "step": 281790 + }, + { + "epoch": 0.5692538290299252, + "grad_norm": 286.8011169433594, + "learning_rate": 4.762724369118346e-06, + "loss": 16.6205, + "step": 281800 + }, + { + "epoch": 0.569274029662609, + "grad_norm": 269.1073303222656, + "learning_rate": 4.762375697114217e-06, + "loss": 19.3239, + "step": 281810 + }, + { + "epoch": 0.5692942302952928, + "grad_norm": 294.68017578125, + "learning_rate": 4.762027026268241e-06, + "loss": 36.4113, + "step": 281820 + }, + { + "epoch": 0.5693144309279766, + "grad_norm": 270.39190673828125, + "learning_rate": 4.761678356582117e-06, + "loss": 30.8563, + "step": 281830 + }, + { + "epoch": 0.5693346315606604, + "grad_norm": 402.2464599609375, + "learning_rate": 4.761329688057543e-06, + "loss": 53.8486, + "step": 281840 + }, + { + "epoch": 0.5693548321933443, + "grad_norm": 305.43365478515625, + "learning_rate": 4.760981020696218e-06, + "loss": 18.5423, + "step": 281850 + }, + { + "epoch": 0.5693750328260281, + "grad_norm": 868.0171508789062, + "learning_rate": 4.760632354499846e-06, + "loss": 34.0763, + "step": 281860 + }, + { + "epoch": 0.5693952334587119, + "grad_norm": 412.5047607421875, + "learning_rate": 4.760283689470119e-06, + "loss": 22.9232, + "step": 281870 + }, + { + "epoch": 0.5694154340913957, + "grad_norm": 247.03213500976562, + "learning_rate": 4.759935025608742e-06, + "loss": 15.8073, + "step": 281880 + }, + { + "epoch": 0.5694356347240795, + "grad_norm": 423.51239013671875, + "learning_rate": 4.7595863629174125e-06, + "loss": 18.0722, + "step": 281890 + }, + { + "epoch": 0.5694558353567634, + "grad_norm": 303.29095458984375, + "learning_rate": 4.759237701397831e-06, + "loss": 20.4888, + "step": 281900 + }, + { + "epoch": 0.5694760359894472, + "grad_norm": 536.0755004882812, + "learning_rate": 4.758889041051694e-06, + "loss": 25.8428, + "step": 281910 + }, + { + "epoch": 0.569496236622131, + "grad_norm": 746.490478515625, + "learning_rate": 4.758540381880702e-06, + "loss": 18.181, + "step": 281920 + }, + { + "epoch": 0.5695164372548148, + "grad_norm": 3515.61279296875, + "learning_rate": 4.7581917238865565e-06, + "loss": 54.3617, + "step": 281930 + }, + { + "epoch": 0.5695366378874986, + "grad_norm": 229.06857299804688, + "learning_rate": 4.757843067070955e-06, + "loss": 28.6551, + "step": 281940 + }, + { + "epoch": 0.5695568385201825, + "grad_norm": 844.1669921875, + "learning_rate": 4.757494411435597e-06, + "loss": 45.6698, + "step": 281950 + }, + { + "epoch": 0.5695770391528663, + "grad_norm": 357.9671325683594, + "learning_rate": 4.757145756982182e-06, + "loss": 14.9378, + "step": 281960 + }, + { + "epoch": 0.5695972397855501, + "grad_norm": 726.5892333984375, + "learning_rate": 4.756797103712409e-06, + "loss": 17.4079, + "step": 281970 + }, + { + "epoch": 0.5696174404182339, + "grad_norm": 243.23487854003906, + "learning_rate": 4.756448451627977e-06, + "loss": 14.5127, + "step": 281980 + }, + { + "epoch": 0.5696376410509177, + "grad_norm": 648.876708984375, + "learning_rate": 4.7560998007305865e-06, + "loss": 9.6891, + "step": 281990 + }, + { + "epoch": 0.5696578416836016, + "grad_norm": 268.9858093261719, + "learning_rate": 4.755751151021934e-06, + "loss": 30.9187, + "step": 282000 + }, + { + "epoch": 0.5696780423162854, + "grad_norm": 164.58192443847656, + "learning_rate": 4.755402502503722e-06, + "loss": 19.8471, + "step": 282010 + }, + { + "epoch": 0.5696982429489692, + "grad_norm": 496.41510009765625, + "learning_rate": 4.7550538551776495e-06, + "loss": 17.9169, + "step": 282020 + }, + { + "epoch": 0.569718443581653, + "grad_norm": 682.2836303710938, + "learning_rate": 4.754705209045414e-06, + "loss": 27.1735, + "step": 282030 + }, + { + "epoch": 0.5697386442143368, + "grad_norm": 565.455078125, + "learning_rate": 4.754356564108715e-06, + "loss": 25.299, + "step": 282040 + }, + { + "epoch": 0.5697588448470207, + "grad_norm": 469.9396057128906, + "learning_rate": 4.754007920369252e-06, + "loss": 21.7952, + "step": 282050 + }, + { + "epoch": 0.5697790454797044, + "grad_norm": 777.315185546875, + "learning_rate": 4.753659277828727e-06, + "loss": 28.935, + "step": 282060 + }, + { + "epoch": 0.5697992461123882, + "grad_norm": 533.95068359375, + "learning_rate": 4.7533106364888345e-06, + "loss": 13.9896, + "step": 282070 + }, + { + "epoch": 0.569819446745072, + "grad_norm": 189.7462158203125, + "learning_rate": 4.752961996351276e-06, + "loss": 10.9761, + "step": 282080 + }, + { + "epoch": 0.5698396473777558, + "grad_norm": 422.6891784667969, + "learning_rate": 4.7526133574177525e-06, + "loss": 20.301, + "step": 282090 + }, + { + "epoch": 0.5698598480104397, + "grad_norm": 877.2344970703125, + "learning_rate": 4.752264719689961e-06, + "loss": 28.6998, + "step": 282100 + }, + { + "epoch": 0.5698800486431235, + "grad_norm": 604.5916137695312, + "learning_rate": 4.7519160831696e-06, + "loss": 25.9339, + "step": 282110 + }, + { + "epoch": 0.5699002492758073, + "grad_norm": 416.35302734375, + "learning_rate": 4.751567447858372e-06, + "loss": 19.9025, + "step": 282120 + }, + { + "epoch": 0.5699204499084911, + "grad_norm": 733.4224243164062, + "learning_rate": 4.751218813757972e-06, + "loss": 28.5293, + "step": 282130 + }, + { + "epoch": 0.5699406505411749, + "grad_norm": 273.4183654785156, + "learning_rate": 4.7508701808701006e-06, + "loss": 21.8609, + "step": 282140 + }, + { + "epoch": 0.5699608511738588, + "grad_norm": 388.1825866699219, + "learning_rate": 4.7505215491964615e-06, + "loss": 9.5101, + "step": 282150 + }, + { + "epoch": 0.5699810518065426, + "grad_norm": 242.81985473632812, + "learning_rate": 4.750172918738747e-06, + "loss": 24.2439, + "step": 282160 + }, + { + "epoch": 0.5700012524392264, + "grad_norm": 210.24472045898438, + "learning_rate": 4.7498242894986595e-06, + "loss": 17.8979, + "step": 282170 + }, + { + "epoch": 0.5700214530719102, + "grad_norm": 287.41278076171875, + "learning_rate": 4.7494756614779e-06, + "loss": 39.2889, + "step": 282180 + }, + { + "epoch": 0.570041653704594, + "grad_norm": 527.2347412109375, + "learning_rate": 4.749127034678165e-06, + "loss": 18.7753, + "step": 282190 + }, + { + "epoch": 0.5700618543372779, + "grad_norm": 317.0581970214844, + "learning_rate": 4.748778409101153e-06, + "loss": 18.5149, + "step": 282200 + }, + { + "epoch": 0.5700820549699617, + "grad_norm": 689.727783203125, + "learning_rate": 4.748429784748564e-06, + "loss": 22.1526, + "step": 282210 + }, + { + "epoch": 0.5701022556026455, + "grad_norm": 229.11322021484375, + "learning_rate": 4.748081161622101e-06, + "loss": 10.3614, + "step": 282220 + }, + { + "epoch": 0.5701224562353293, + "grad_norm": 315.3077392578125, + "learning_rate": 4.7477325397234575e-06, + "loss": 16.2357, + "step": 282230 + }, + { + "epoch": 0.5701426568680131, + "grad_norm": 1760.7344970703125, + "learning_rate": 4.747383919054335e-06, + "loss": 33.4112, + "step": 282240 + }, + { + "epoch": 0.570162857500697, + "grad_norm": 266.5099792480469, + "learning_rate": 4.747035299616434e-06, + "loss": 15.6812, + "step": 282250 + }, + { + "epoch": 0.5701830581333808, + "grad_norm": 192.255615234375, + "learning_rate": 4.746686681411451e-06, + "loss": 19.3875, + "step": 282260 + }, + { + "epoch": 0.5702032587660646, + "grad_norm": 709.2426147460938, + "learning_rate": 4.746338064441087e-06, + "loss": 37.0994, + "step": 282270 + }, + { + "epoch": 0.5702234593987484, + "grad_norm": 465.7525634765625, + "learning_rate": 4.745989448707042e-06, + "loss": 22.1241, + "step": 282280 + }, + { + "epoch": 0.5702436600314322, + "grad_norm": 225.25108337402344, + "learning_rate": 4.745640834211012e-06, + "loss": 11.6983, + "step": 282290 + }, + { + "epoch": 0.570263860664116, + "grad_norm": 559.8445434570312, + "learning_rate": 4.745292220954696e-06, + "loss": 24.5407, + "step": 282300 + }, + { + "epoch": 0.5702840612967999, + "grad_norm": 53.93816375732422, + "learning_rate": 4.744943608939799e-06, + "loss": 20.1797, + "step": 282310 + }, + { + "epoch": 0.5703042619294836, + "grad_norm": 362.0687561035156, + "learning_rate": 4.744594998168012e-06, + "loss": 16.7693, + "step": 282320 + }, + { + "epoch": 0.5703244625621674, + "grad_norm": 396.4376525878906, + "learning_rate": 4.744246388641039e-06, + "loss": 17.0657, + "step": 282330 + }, + { + "epoch": 0.5703446631948512, + "grad_norm": 371.0090026855469, + "learning_rate": 4.743897780360578e-06, + "loss": 11.6621, + "step": 282340 + }, + { + "epoch": 0.570364863827535, + "grad_norm": 321.3207702636719, + "learning_rate": 4.74354917332833e-06, + "loss": 19.1614, + "step": 282350 + }, + { + "epoch": 0.5703850644602189, + "grad_norm": 195.78021240234375, + "learning_rate": 4.7432005675459905e-06, + "loss": 28.7699, + "step": 282360 + }, + { + "epoch": 0.5704052650929027, + "grad_norm": 183.8314971923828, + "learning_rate": 4.74285196301526e-06, + "loss": 29.7161, + "step": 282370 + }, + { + "epoch": 0.5704254657255865, + "grad_norm": 764.7041015625, + "learning_rate": 4.742503359737841e-06, + "loss": 17.1614, + "step": 282380 + }, + { + "epoch": 0.5704456663582703, + "grad_norm": 172.70347595214844, + "learning_rate": 4.742154757715425e-06, + "loss": 20.2722, + "step": 282390 + }, + { + "epoch": 0.5704658669909541, + "grad_norm": 276.43487548828125, + "learning_rate": 4.741806156949718e-06, + "loss": 12.9694, + "step": 282400 + }, + { + "epoch": 0.570486067623638, + "grad_norm": 163.83872985839844, + "learning_rate": 4.741457557442416e-06, + "loss": 18.2952, + "step": 282410 + }, + { + "epoch": 0.5705062682563218, + "grad_norm": 459.07470703125, + "learning_rate": 4.7411089591952184e-06, + "loss": 38.4178, + "step": 282420 + }, + { + "epoch": 0.5705264688890056, + "grad_norm": 609.1342163085938, + "learning_rate": 4.740760362209824e-06, + "loss": 25.564, + "step": 282430 + }, + { + "epoch": 0.5705466695216894, + "grad_norm": 382.3327331542969, + "learning_rate": 4.740411766487933e-06, + "loss": 16.1625, + "step": 282440 + }, + { + "epoch": 0.5705668701543732, + "grad_norm": 377.52508544921875, + "learning_rate": 4.740063172031243e-06, + "loss": 37.4605, + "step": 282450 + }, + { + "epoch": 0.5705870707870571, + "grad_norm": 220.13027954101562, + "learning_rate": 4.7397145788414525e-06, + "loss": 20.6798, + "step": 282460 + }, + { + "epoch": 0.5706072714197409, + "grad_norm": 680.699951171875, + "learning_rate": 4.739365986920265e-06, + "loss": 16.0136, + "step": 282470 + }, + { + "epoch": 0.5706274720524247, + "grad_norm": 482.6812438964844, + "learning_rate": 4.7390173962693724e-06, + "loss": 21.8054, + "step": 282480 + }, + { + "epoch": 0.5706476726851085, + "grad_norm": 221.7867889404297, + "learning_rate": 4.738668806890479e-06, + "loss": 9.6261, + "step": 282490 + }, + { + "epoch": 0.5706678733177923, + "grad_norm": 340.18707275390625, + "learning_rate": 4.738320218785281e-06, + "loss": 20.3762, + "step": 282500 + }, + { + "epoch": 0.5706880739504762, + "grad_norm": 853.8125610351562, + "learning_rate": 4.737971631955481e-06, + "loss": 23.8096, + "step": 282510 + }, + { + "epoch": 0.57070827458316, + "grad_norm": 961.3689575195312, + "learning_rate": 4.737623046402774e-06, + "loss": 17.6552, + "step": 282520 + }, + { + "epoch": 0.5707284752158438, + "grad_norm": 388.4830017089844, + "learning_rate": 4.737274462128858e-06, + "loss": 26.1711, + "step": 282530 + }, + { + "epoch": 0.5707486758485276, + "grad_norm": 307.6900939941406, + "learning_rate": 4.736925879135439e-06, + "loss": 20.3948, + "step": 282540 + }, + { + "epoch": 0.5707688764812114, + "grad_norm": 420.995361328125, + "learning_rate": 4.7365772974242075e-06, + "loss": 17.5211, + "step": 282550 + }, + { + "epoch": 0.5707890771138953, + "grad_norm": 434.5019836425781, + "learning_rate": 4.736228716996868e-06, + "loss": 8.8662, + "step": 282560 + }, + { + "epoch": 0.570809277746579, + "grad_norm": 259.2095947265625, + "learning_rate": 4.735880137855118e-06, + "loss": 18.553, + "step": 282570 + }, + { + "epoch": 0.5708294783792628, + "grad_norm": 473.3949279785156, + "learning_rate": 4.735531560000656e-06, + "loss": 29.2533, + "step": 282580 + }, + { + "epoch": 0.5708496790119466, + "grad_norm": 432.2046203613281, + "learning_rate": 4.735182983435181e-06, + "loss": 36.4894, + "step": 282590 + }, + { + "epoch": 0.5708698796446304, + "grad_norm": 220.06646728515625, + "learning_rate": 4.734834408160393e-06, + "loss": 27.4091, + "step": 282600 + }, + { + "epoch": 0.5708900802773142, + "grad_norm": 65.85558319091797, + "learning_rate": 4.734485834177987e-06, + "loss": 15.7926, + "step": 282610 + }, + { + "epoch": 0.5709102809099981, + "grad_norm": 346.2181091308594, + "learning_rate": 4.734137261489667e-06, + "loss": 14.9316, + "step": 282620 + }, + { + "epoch": 0.5709304815426819, + "grad_norm": 592.6093139648438, + "learning_rate": 4.73378869009713e-06, + "loss": 27.6009, + "step": 282630 + }, + { + "epoch": 0.5709506821753657, + "grad_norm": 948.87451171875, + "learning_rate": 4.7334401200020745e-06, + "loss": 24.6394, + "step": 282640 + }, + { + "epoch": 0.5709708828080495, + "grad_norm": 336.6229248046875, + "learning_rate": 4.733091551206198e-06, + "loss": 11.5142, + "step": 282650 + }, + { + "epoch": 0.5709910834407333, + "grad_norm": 371.5719909667969, + "learning_rate": 4.732742983711202e-06, + "loss": 23.0044, + "step": 282660 + }, + { + "epoch": 0.5710112840734172, + "grad_norm": 586.419189453125, + "learning_rate": 4.732394417518785e-06, + "loss": 22.4501, + "step": 282670 + }, + { + "epoch": 0.571031484706101, + "grad_norm": 234.36026000976562, + "learning_rate": 4.732045852630644e-06, + "loss": 18.4327, + "step": 282680 + }, + { + "epoch": 0.5710516853387848, + "grad_norm": 165.2674102783203, + "learning_rate": 4.731697289048479e-06, + "loss": 19.9626, + "step": 282690 + }, + { + "epoch": 0.5710718859714686, + "grad_norm": 162.3771514892578, + "learning_rate": 4.73134872677399e-06, + "loss": 18.7244, + "step": 282700 + }, + { + "epoch": 0.5710920866041524, + "grad_norm": 232.7933349609375, + "learning_rate": 4.731000165808874e-06, + "loss": 13.7459, + "step": 282710 + }, + { + "epoch": 0.5711122872368363, + "grad_norm": 664.8682250976562, + "learning_rate": 4.73065160615483e-06, + "loss": 14.6749, + "step": 282720 + }, + { + "epoch": 0.5711324878695201, + "grad_norm": 262.6925964355469, + "learning_rate": 4.730303047813559e-06, + "loss": 25.8547, + "step": 282730 + }, + { + "epoch": 0.5711526885022039, + "grad_norm": 0.0, + "learning_rate": 4.7299544907867576e-06, + "loss": 16.9246, + "step": 282740 + }, + { + "epoch": 0.5711728891348877, + "grad_norm": 468.6184997558594, + "learning_rate": 4.729605935076123e-06, + "loss": 10.2398, + "step": 282750 + }, + { + "epoch": 0.5711930897675715, + "grad_norm": 613.7074584960938, + "learning_rate": 4.7292573806833605e-06, + "loss": 32.9369, + "step": 282760 + }, + { + "epoch": 0.5712132904002554, + "grad_norm": 359.0036315917969, + "learning_rate": 4.7289088276101616e-06, + "loss": 18.4079, + "step": 282770 + }, + { + "epoch": 0.5712334910329392, + "grad_norm": 253.33212280273438, + "learning_rate": 4.728560275858228e-06, + "loss": 19.6505, + "step": 282780 + }, + { + "epoch": 0.571253691665623, + "grad_norm": 139.0804443359375, + "learning_rate": 4.72821172542926e-06, + "loss": 10.7074, + "step": 282790 + }, + { + "epoch": 0.5712738922983068, + "grad_norm": 402.0624084472656, + "learning_rate": 4.727863176324955e-06, + "loss": 14.0952, + "step": 282800 + }, + { + "epoch": 0.5712940929309906, + "grad_norm": 665.03759765625, + "learning_rate": 4.7275146285470115e-06, + "loss": 25.4496, + "step": 282810 + }, + { + "epoch": 0.5713142935636745, + "grad_norm": 224.83116149902344, + "learning_rate": 4.727166082097127e-06, + "loss": 15.2676, + "step": 282820 + }, + { + "epoch": 0.5713344941963582, + "grad_norm": 315.6308288574219, + "learning_rate": 4.726817536977006e-06, + "loss": 31.0143, + "step": 282830 + }, + { + "epoch": 0.571354694829042, + "grad_norm": 403.99566650390625, + "learning_rate": 4.72646899318834e-06, + "loss": 24.1681, + "step": 282840 + }, + { + "epoch": 0.5713748954617258, + "grad_norm": 239.3922576904297, + "learning_rate": 4.7261204507328315e-06, + "loss": 21.9767, + "step": 282850 + }, + { + "epoch": 0.5713950960944096, + "grad_norm": 193.40237426757812, + "learning_rate": 4.72577190961218e-06, + "loss": 13.7396, + "step": 282860 + }, + { + "epoch": 0.5714152967270935, + "grad_norm": 274.01324462890625, + "learning_rate": 4.725423369828082e-06, + "loss": 21.2507, + "step": 282870 + }, + { + "epoch": 0.5714354973597773, + "grad_norm": 77.75462341308594, + "learning_rate": 4.725074831382237e-06, + "loss": 11.1334, + "step": 282880 + }, + { + "epoch": 0.5714556979924611, + "grad_norm": 210.43345642089844, + "learning_rate": 4.724726294276345e-06, + "loss": 34.2454, + "step": 282890 + }, + { + "epoch": 0.5714758986251449, + "grad_norm": 884.965576171875, + "learning_rate": 4.7243777585121034e-06, + "loss": 36.7404, + "step": 282900 + }, + { + "epoch": 0.5714960992578287, + "grad_norm": 313.2972717285156, + "learning_rate": 4.724029224091209e-06, + "loss": 25.3529, + "step": 282910 + }, + { + "epoch": 0.5715162998905126, + "grad_norm": 246.2036590576172, + "learning_rate": 4.723680691015366e-06, + "loss": 19.2682, + "step": 282920 + }, + { + "epoch": 0.5715365005231964, + "grad_norm": 337.3580322265625, + "learning_rate": 4.723332159286267e-06, + "loss": 13.7533, + "step": 282930 + }, + { + "epoch": 0.5715567011558802, + "grad_norm": 198.6982879638672, + "learning_rate": 4.722983628905614e-06, + "loss": 13.4708, + "step": 282940 + }, + { + "epoch": 0.571576901788564, + "grad_norm": 232.9688262939453, + "learning_rate": 4.722635099875106e-06, + "loss": 13.4651, + "step": 282950 + }, + { + "epoch": 0.5715971024212478, + "grad_norm": 689.5285034179688, + "learning_rate": 4.722286572196441e-06, + "loss": 25.4789, + "step": 282960 + }, + { + "epoch": 0.5716173030539317, + "grad_norm": 894.1478881835938, + "learning_rate": 4.721938045871317e-06, + "loss": 10.2879, + "step": 282970 + }, + { + "epoch": 0.5716375036866155, + "grad_norm": 856.7816162109375, + "learning_rate": 4.721589520901433e-06, + "loss": 27.1754, + "step": 282980 + }, + { + "epoch": 0.5716577043192993, + "grad_norm": 550.2301025390625, + "learning_rate": 4.7212409972884894e-06, + "loss": 16.1218, + "step": 282990 + }, + { + "epoch": 0.5716779049519831, + "grad_norm": 111.42938232421875, + "learning_rate": 4.720892475034181e-06, + "loss": 9.3968, + "step": 283000 + }, + { + "epoch": 0.5716981055846669, + "grad_norm": 697.5478515625, + "learning_rate": 4.72054395414021e-06, + "loss": 23.3174, + "step": 283010 + }, + { + "epoch": 0.5717183062173508, + "grad_norm": 221.56004333496094, + "learning_rate": 4.720195434608275e-06, + "loss": 13.4679, + "step": 283020 + }, + { + "epoch": 0.5717385068500346, + "grad_norm": 376.4030456542969, + "learning_rate": 4.719846916440072e-06, + "loss": 15.4297, + "step": 283030 + }, + { + "epoch": 0.5717587074827184, + "grad_norm": 407.6830749511719, + "learning_rate": 4.719498399637302e-06, + "loss": 13.9991, + "step": 283040 + }, + { + "epoch": 0.5717789081154022, + "grad_norm": 186.31411743164062, + "learning_rate": 4.719149884201664e-06, + "loss": 28.5582, + "step": 283050 + }, + { + "epoch": 0.571799108748086, + "grad_norm": 179.6485595703125, + "learning_rate": 4.718801370134853e-06, + "loss": 11.7174, + "step": 283060 + }, + { + "epoch": 0.5718193093807699, + "grad_norm": 168.99423217773438, + "learning_rate": 4.718452857438569e-06, + "loss": 11.1836, + "step": 283070 + }, + { + "epoch": 0.5718395100134536, + "grad_norm": 640.58447265625, + "learning_rate": 4.7181043461145155e-06, + "loss": 27.7388, + "step": 283080 + }, + { + "epoch": 0.5718597106461374, + "grad_norm": 445.20245361328125, + "learning_rate": 4.717755836164384e-06, + "loss": 18.8104, + "step": 283090 + }, + { + "epoch": 0.5718799112788212, + "grad_norm": 398.60247802734375, + "learning_rate": 4.717407327589878e-06, + "loss": 15.9926, + "step": 283100 + }, + { + "epoch": 0.571900111911505, + "grad_norm": 410.79620361328125, + "learning_rate": 4.717058820392694e-06, + "loss": 27.9072, + "step": 283110 + }, + { + "epoch": 0.5719203125441888, + "grad_norm": 378.79998779296875, + "learning_rate": 4.7167103145745315e-06, + "loss": 10.7875, + "step": 283120 + }, + { + "epoch": 0.5719405131768727, + "grad_norm": 306.0965881347656, + "learning_rate": 4.716361810137088e-06, + "loss": 11.5585, + "step": 283130 + }, + { + "epoch": 0.5719607138095565, + "grad_norm": 703.5479125976562, + "learning_rate": 4.716013307082061e-06, + "loss": 20.6412, + "step": 283140 + }, + { + "epoch": 0.5719809144422403, + "grad_norm": 440.7583923339844, + "learning_rate": 4.715664805411155e-06, + "loss": 14.8568, + "step": 283150 + }, + { + "epoch": 0.5720011150749241, + "grad_norm": 126.62924194335938, + "learning_rate": 4.715316305126059e-06, + "loss": 25.2764, + "step": 283160 + }, + { + "epoch": 0.572021315707608, + "grad_norm": 433.77154541015625, + "learning_rate": 4.71496780622848e-06, + "loss": 14.7368, + "step": 283170 + }, + { + "epoch": 0.5720415163402918, + "grad_norm": 335.3080139160156, + "learning_rate": 4.714619308720113e-06, + "loss": 15.2308, + "step": 283180 + }, + { + "epoch": 0.5720617169729756, + "grad_norm": 654.994873046875, + "learning_rate": 4.714270812602657e-06, + "loss": 20.2485, + "step": 283190 + }, + { + "epoch": 0.5720819176056594, + "grad_norm": 383.84527587890625, + "learning_rate": 4.71392231787781e-06, + "loss": 23.0565, + "step": 283200 + }, + { + "epoch": 0.5721021182383432, + "grad_norm": 302.2643127441406, + "learning_rate": 4.713573824547271e-06, + "loss": 15.9064, + "step": 283210 + }, + { + "epoch": 0.572122318871027, + "grad_norm": 136.98361206054688, + "learning_rate": 4.7132253326127394e-06, + "loss": 31.1406, + "step": 283220 + }, + { + "epoch": 0.5721425195037109, + "grad_norm": 314.59521484375, + "learning_rate": 4.71287684207591e-06, + "loss": 13.0349, + "step": 283230 + }, + { + "epoch": 0.5721627201363947, + "grad_norm": 258.1735534667969, + "learning_rate": 4.712528352938487e-06, + "loss": 17.8948, + "step": 283240 + }, + { + "epoch": 0.5721829207690785, + "grad_norm": 230.92201232910156, + "learning_rate": 4.712179865202164e-06, + "loss": 19.5708, + "step": 283250 + }, + { + "epoch": 0.5722031214017623, + "grad_norm": 1058.202392578125, + "learning_rate": 4.711831378868643e-06, + "loss": 34.6557, + "step": 283260 + }, + { + "epoch": 0.5722233220344461, + "grad_norm": 283.6180725097656, + "learning_rate": 4.71148289393962e-06, + "loss": 24.73, + "step": 283270 + }, + { + "epoch": 0.57224352266713, + "grad_norm": 104.200927734375, + "learning_rate": 4.711134410416794e-06, + "loss": 13.046, + "step": 283280 + }, + { + "epoch": 0.5722637232998138, + "grad_norm": 419.1370544433594, + "learning_rate": 4.7107859283018635e-06, + "loss": 21.7297, + "step": 283290 + }, + { + "epoch": 0.5722839239324976, + "grad_norm": 206.23863220214844, + "learning_rate": 4.710437447596528e-06, + "loss": 26.745, + "step": 283300 + }, + { + "epoch": 0.5723041245651814, + "grad_norm": 258.88409423828125, + "learning_rate": 4.710088968302486e-06, + "loss": 18.8854, + "step": 283310 + }, + { + "epoch": 0.5723243251978652, + "grad_norm": 283.68267822265625, + "learning_rate": 4.709740490421435e-06, + "loss": 11.6669, + "step": 283320 + }, + { + "epoch": 0.5723445258305491, + "grad_norm": 254.8634490966797, + "learning_rate": 4.709392013955073e-06, + "loss": 32.1802, + "step": 283330 + }, + { + "epoch": 0.5723647264632328, + "grad_norm": 380.2320861816406, + "learning_rate": 4.7090435389051e-06, + "loss": 16.0333, + "step": 283340 + }, + { + "epoch": 0.5723849270959166, + "grad_norm": 524.1536254882812, + "learning_rate": 4.708695065273213e-06, + "loss": 19.2232, + "step": 283350 + }, + { + "epoch": 0.5724051277286004, + "grad_norm": 1058.6983642578125, + "learning_rate": 4.708346593061109e-06, + "loss": 21.6366, + "step": 283360 + }, + { + "epoch": 0.5724253283612842, + "grad_norm": 252.1746826171875, + "learning_rate": 4.707998122270493e-06, + "loss": 17.1793, + "step": 283370 + }, + { + "epoch": 0.5724455289939681, + "grad_norm": 162.8192596435547, + "learning_rate": 4.707649652903054e-06, + "loss": 20.0504, + "step": 283380 + }, + { + "epoch": 0.5724657296266519, + "grad_norm": 43.742671966552734, + "learning_rate": 4.707301184960496e-06, + "loss": 25.1147, + "step": 283390 + }, + { + "epoch": 0.5724859302593357, + "grad_norm": 3088.93408203125, + "learning_rate": 4.706952718444518e-06, + "loss": 30.3981, + "step": 283400 + }, + { + "epoch": 0.5725061308920195, + "grad_norm": 237.17019653320312, + "learning_rate": 4.706604253356817e-06, + "loss": 18.2808, + "step": 283410 + }, + { + "epoch": 0.5725263315247033, + "grad_norm": 874.8134155273438, + "learning_rate": 4.70625578969909e-06, + "loss": 29.7182, + "step": 283420 + }, + { + "epoch": 0.5725465321573872, + "grad_norm": 278.7037353515625, + "learning_rate": 4.705907327473036e-06, + "loss": 28.5297, + "step": 283430 + }, + { + "epoch": 0.572566732790071, + "grad_norm": 385.6778564453125, + "learning_rate": 4.705558866680357e-06, + "loss": 50.21, + "step": 283440 + }, + { + "epoch": 0.5725869334227548, + "grad_norm": 407.6443786621094, + "learning_rate": 4.705210407322746e-06, + "loss": 27.3507, + "step": 283450 + }, + { + "epoch": 0.5726071340554386, + "grad_norm": 931.3094482421875, + "learning_rate": 4.704861949401904e-06, + "loss": 23.4207, + "step": 283460 + }, + { + "epoch": 0.5726273346881224, + "grad_norm": 274.1264343261719, + "learning_rate": 4.70451349291953e-06, + "loss": 25.8156, + "step": 283470 + }, + { + "epoch": 0.5726475353208063, + "grad_norm": 49.78269958496094, + "learning_rate": 4.704165037877321e-06, + "loss": 10.8104, + "step": 283480 + }, + { + "epoch": 0.5726677359534901, + "grad_norm": 151.04638671875, + "learning_rate": 4.703816584276975e-06, + "loss": 16.2919, + "step": 283490 + }, + { + "epoch": 0.5726879365861739, + "grad_norm": 215.10507202148438, + "learning_rate": 4.703468132120193e-06, + "loss": 28.8384, + "step": 283500 + }, + { + "epoch": 0.5727081372188577, + "grad_norm": 266.309814453125, + "learning_rate": 4.70311968140867e-06, + "loss": 22.1145, + "step": 283510 + }, + { + "epoch": 0.5727283378515415, + "grad_norm": 493.8695983886719, + "learning_rate": 4.702771232144104e-06, + "loss": 25.2268, + "step": 283520 + }, + { + "epoch": 0.5727485384842254, + "grad_norm": 559.64794921875, + "learning_rate": 4.702422784328199e-06, + "loss": 18.1718, + "step": 283530 + }, + { + "epoch": 0.5727687391169092, + "grad_norm": 842.6926879882812, + "learning_rate": 4.702074337962645e-06, + "loss": 29.7846, + "step": 283540 + }, + { + "epoch": 0.572788939749593, + "grad_norm": 142.5811767578125, + "learning_rate": 4.7017258930491474e-06, + "loss": 18.9788, + "step": 283550 + }, + { + "epoch": 0.5728091403822768, + "grad_norm": 325.9900207519531, + "learning_rate": 4.7013774495894e-06, + "loss": 27.6884, + "step": 283560 + }, + { + "epoch": 0.5728293410149606, + "grad_norm": 693.3881225585938, + "learning_rate": 4.7010290075851035e-06, + "loss": 19.1565, + "step": 283570 + }, + { + "epoch": 0.5728495416476445, + "grad_norm": 379.7054443359375, + "learning_rate": 4.700680567037956e-06, + "loss": 16.461, + "step": 283580 + }, + { + "epoch": 0.5728697422803282, + "grad_norm": 783.3622436523438, + "learning_rate": 4.7003321279496526e-06, + "loss": 14.4221, + "step": 283590 + }, + { + "epoch": 0.572889942913012, + "grad_norm": 840.8717651367188, + "learning_rate": 4.699983690321898e-06, + "loss": 30.6254, + "step": 283600 + }, + { + "epoch": 0.5729101435456958, + "grad_norm": 136.46791076660156, + "learning_rate": 4.699635254156383e-06, + "loss": 14.5199, + "step": 283610 + }, + { + "epoch": 0.5729303441783796, + "grad_norm": 459.1830139160156, + "learning_rate": 4.69928681945481e-06, + "loss": 24.2669, + "step": 283620 + }, + { + "epoch": 0.5729505448110634, + "grad_norm": 317.6564025878906, + "learning_rate": 4.6989383862188785e-06, + "loss": 25.7679, + "step": 283630 + }, + { + "epoch": 0.5729707454437473, + "grad_norm": 109.60838317871094, + "learning_rate": 4.6985899544502835e-06, + "loss": 14.57, + "step": 283640 + }, + { + "epoch": 0.5729909460764311, + "grad_norm": 291.0938720703125, + "learning_rate": 4.698241524150724e-06, + "loss": 26.9732, + "step": 283650 + }, + { + "epoch": 0.5730111467091149, + "grad_norm": 954.2431640625, + "learning_rate": 4.6978930953219e-06, + "loss": 27.9935, + "step": 283660 + }, + { + "epoch": 0.5730313473417987, + "grad_norm": 355.62835693359375, + "learning_rate": 4.697544667965507e-06, + "loss": 24.8484, + "step": 283670 + }, + { + "epoch": 0.5730515479744825, + "grad_norm": 367.4694519042969, + "learning_rate": 4.697196242083245e-06, + "loss": 23.9496, + "step": 283680 + }, + { + "epoch": 0.5730717486071664, + "grad_norm": 160.22972106933594, + "learning_rate": 4.696847817676814e-06, + "loss": 10.2788, + "step": 283690 + }, + { + "epoch": 0.5730919492398502, + "grad_norm": 408.6793518066406, + "learning_rate": 4.696499394747906e-06, + "loss": 14.0879, + "step": 283700 + }, + { + "epoch": 0.573112149872534, + "grad_norm": 317.40509033203125, + "learning_rate": 4.696150973298225e-06, + "loss": 17.0443, + "step": 283710 + }, + { + "epoch": 0.5731323505052178, + "grad_norm": 310.83990478515625, + "learning_rate": 4.695802553329467e-06, + "loss": 24.7235, + "step": 283720 + }, + { + "epoch": 0.5731525511379016, + "grad_norm": 422.622802734375, + "learning_rate": 4.695454134843332e-06, + "loss": 11.7616, + "step": 283730 + }, + { + "epoch": 0.5731727517705855, + "grad_norm": 276.022216796875, + "learning_rate": 4.695105717841516e-06, + "loss": 25.9616, + "step": 283740 + }, + { + "epoch": 0.5731929524032693, + "grad_norm": 247.97230529785156, + "learning_rate": 4.694757302325715e-06, + "loss": 17.5643, + "step": 283750 + }, + { + "epoch": 0.5732131530359531, + "grad_norm": 1246.0626220703125, + "learning_rate": 4.694408888297635e-06, + "loss": 12.6615, + "step": 283760 + }, + { + "epoch": 0.5732333536686369, + "grad_norm": 1299.63720703125, + "learning_rate": 4.694060475758964e-06, + "loss": 31.702, + "step": 283770 + }, + { + "epoch": 0.5732535543013207, + "grad_norm": 8.856138229370117, + "learning_rate": 4.693712064711408e-06, + "loss": 11.0963, + "step": 283780 + }, + { + "epoch": 0.5732737549340046, + "grad_norm": 234.0096893310547, + "learning_rate": 4.693363655156662e-06, + "loss": 13.3781, + "step": 283790 + }, + { + "epoch": 0.5732939555666884, + "grad_norm": 263.8318176269531, + "learning_rate": 4.693015247096423e-06, + "loss": 7.937, + "step": 283800 + }, + { + "epoch": 0.5733141561993722, + "grad_norm": 318.6791076660156, + "learning_rate": 4.6926668405323915e-06, + "loss": 17.5293, + "step": 283810 + }, + { + "epoch": 0.573334356832056, + "grad_norm": 537.685302734375, + "learning_rate": 4.692318435466265e-06, + "loss": 23.3625, + "step": 283820 + }, + { + "epoch": 0.5733545574647398, + "grad_norm": 351.2520751953125, + "learning_rate": 4.691970031899741e-06, + "loss": 19.2957, + "step": 283830 + }, + { + "epoch": 0.5733747580974237, + "grad_norm": 209.622802734375, + "learning_rate": 4.691621629834516e-06, + "loss": 9.8297, + "step": 283840 + }, + { + "epoch": 0.5733949587301074, + "grad_norm": 186.2348175048828, + "learning_rate": 4.691273229272291e-06, + "loss": 18.9344, + "step": 283850 + }, + { + "epoch": 0.5734151593627912, + "grad_norm": 190.50189208984375, + "learning_rate": 4.6909248302147634e-06, + "loss": 21.8252, + "step": 283860 + }, + { + "epoch": 0.573435359995475, + "grad_norm": 211.6445770263672, + "learning_rate": 4.69057643266363e-06, + "loss": 13.5997, + "step": 283870 + }, + { + "epoch": 0.5734555606281588, + "grad_norm": 669.6118774414062, + "learning_rate": 4.69022803662059e-06, + "loss": 32.8732, + "step": 283880 + }, + { + "epoch": 0.5734757612608427, + "grad_norm": 353.7624206542969, + "learning_rate": 4.689879642087341e-06, + "loss": 16.5534, + "step": 283890 + }, + { + "epoch": 0.5734959618935265, + "grad_norm": 102.54301452636719, + "learning_rate": 4.689531249065581e-06, + "loss": 18.7183, + "step": 283900 + }, + { + "epoch": 0.5735161625262103, + "grad_norm": 212.87945556640625, + "learning_rate": 4.689182857557006e-06, + "loss": 15.7912, + "step": 283910 + }, + { + "epoch": 0.5735363631588941, + "grad_norm": 412.1329040527344, + "learning_rate": 4.688834467563318e-06, + "loss": 21.5479, + "step": 283920 + }, + { + "epoch": 0.5735565637915779, + "grad_norm": 547.8233642578125, + "learning_rate": 4.688486079086213e-06, + "loss": 14.5297, + "step": 283930 + }, + { + "epoch": 0.5735767644242618, + "grad_norm": 75.20598602294922, + "learning_rate": 4.688137692127389e-06, + "loss": 15.2088, + "step": 283940 + }, + { + "epoch": 0.5735969650569456, + "grad_norm": 256.3761291503906, + "learning_rate": 4.687789306688544e-06, + "loss": 20.6092, + "step": 283950 + }, + { + "epoch": 0.5736171656896294, + "grad_norm": 375.7027893066406, + "learning_rate": 4.687440922771376e-06, + "loss": 11.2912, + "step": 283960 + }, + { + "epoch": 0.5736373663223132, + "grad_norm": 363.12255859375, + "learning_rate": 4.687092540377583e-06, + "loss": 22.0384, + "step": 283970 + }, + { + "epoch": 0.573657566954997, + "grad_norm": 455.80316162109375, + "learning_rate": 4.686744159508864e-06, + "loss": 10.5865, + "step": 283980 + }, + { + "epoch": 0.5736777675876809, + "grad_norm": 338.5058288574219, + "learning_rate": 4.686395780166914e-06, + "loss": 9.4401, + "step": 283990 + }, + { + "epoch": 0.5736979682203647, + "grad_norm": 506.3396911621094, + "learning_rate": 4.686047402353433e-06, + "loss": 20.7148, + "step": 284000 + }, + { + "epoch": 0.5737181688530485, + "grad_norm": 266.2140808105469, + "learning_rate": 4.68569902607012e-06, + "loss": 10.6317, + "step": 284010 + }, + { + "epoch": 0.5737383694857323, + "grad_norm": 436.3377380371094, + "learning_rate": 4.685350651318672e-06, + "loss": 26.013, + "step": 284020 + }, + { + "epoch": 0.5737585701184161, + "grad_norm": 99.87852478027344, + "learning_rate": 4.685002278100787e-06, + "loss": 22.1473, + "step": 284030 + }, + { + "epoch": 0.5737787707511, + "grad_norm": 399.6739196777344, + "learning_rate": 4.68465390641816e-06, + "loss": 20.2335, + "step": 284040 + }, + { + "epoch": 0.5737989713837838, + "grad_norm": 523.8590698242188, + "learning_rate": 4.684305536272496e-06, + "loss": 18.9442, + "step": 284050 + }, + { + "epoch": 0.5738191720164676, + "grad_norm": 330.4162902832031, + "learning_rate": 4.683957167665485e-06, + "loss": 32.5483, + "step": 284060 + }, + { + "epoch": 0.5738393726491514, + "grad_norm": 979.9481201171875, + "learning_rate": 4.6836088005988295e-06, + "loss": 20.9171, + "step": 284070 + }, + { + "epoch": 0.5738595732818352, + "grad_norm": 64.82814025878906, + "learning_rate": 4.6832604350742275e-06, + "loss": 22.665, + "step": 284080 + }, + { + "epoch": 0.573879773914519, + "grad_norm": 995.423828125, + "learning_rate": 4.682912071093374e-06, + "loss": 33.3612, + "step": 284090 + }, + { + "epoch": 0.5738999745472029, + "grad_norm": 253.1041259765625, + "learning_rate": 4.68256370865797e-06, + "loss": 21.2609, + "step": 284100 + }, + { + "epoch": 0.5739201751798866, + "grad_norm": 514.6492309570312, + "learning_rate": 4.682215347769712e-06, + "loss": 26.7161, + "step": 284110 + }, + { + "epoch": 0.5739403758125704, + "grad_norm": 598.346923828125, + "learning_rate": 4.681866988430297e-06, + "loss": 18.6298, + "step": 284120 + }, + { + "epoch": 0.5739605764452542, + "grad_norm": 174.0736083984375, + "learning_rate": 4.6815186306414225e-06, + "loss": 12.6335, + "step": 284130 + }, + { + "epoch": 0.573980777077938, + "grad_norm": 762.2974853515625, + "learning_rate": 4.681170274404791e-06, + "loss": 22.1227, + "step": 284140 + }, + { + "epoch": 0.5740009777106219, + "grad_norm": 496.7584228515625, + "learning_rate": 4.680821919722094e-06, + "loss": 13.5093, + "step": 284150 + }, + { + "epoch": 0.5740211783433057, + "grad_norm": 402.07781982421875, + "learning_rate": 4.6804735665950325e-06, + "loss": 24.4496, + "step": 284160 + }, + { + "epoch": 0.5740413789759895, + "grad_norm": 425.4755554199219, + "learning_rate": 4.680125215025304e-06, + "loss": 16.626, + "step": 284170 + }, + { + "epoch": 0.5740615796086733, + "grad_norm": 625.1243286132812, + "learning_rate": 4.679776865014609e-06, + "loss": 40.6312, + "step": 284180 + }, + { + "epoch": 0.5740817802413571, + "grad_norm": 578.5704345703125, + "learning_rate": 4.67942851656464e-06, + "loss": 31.0527, + "step": 284190 + }, + { + "epoch": 0.574101980874041, + "grad_norm": 300.3113708496094, + "learning_rate": 4.679080169677097e-06, + "loss": 8.6096, + "step": 284200 + }, + { + "epoch": 0.5741221815067248, + "grad_norm": 106.82528686523438, + "learning_rate": 4.678731824353682e-06, + "loss": 25.4715, + "step": 284210 + }, + { + "epoch": 0.5741423821394086, + "grad_norm": 465.44378662109375, + "learning_rate": 4.678383480596085e-06, + "loss": 17.6224, + "step": 284220 + }, + { + "epoch": 0.5741625827720924, + "grad_norm": 702.7250366210938, + "learning_rate": 4.678035138406009e-06, + "loss": 46.881, + "step": 284230 + }, + { + "epoch": 0.5741827834047762, + "grad_norm": 499.0555114746094, + "learning_rate": 4.677686797785153e-06, + "loss": 23.8855, + "step": 284240 + }, + { + "epoch": 0.5742029840374601, + "grad_norm": 381.1941833496094, + "learning_rate": 4.677338458735211e-06, + "loss": 40.2674, + "step": 284250 + }, + { + "epoch": 0.5742231846701439, + "grad_norm": 339.23687744140625, + "learning_rate": 4.676990121257882e-06, + "loss": 18.1712, + "step": 284260 + }, + { + "epoch": 0.5742433853028277, + "grad_norm": 263.4681091308594, + "learning_rate": 4.676641785354865e-06, + "loss": 15.9104, + "step": 284270 + }, + { + "epoch": 0.5742635859355115, + "grad_norm": 641.75732421875, + "learning_rate": 4.676293451027855e-06, + "loss": 16.1626, + "step": 284280 + }, + { + "epoch": 0.5742837865681953, + "grad_norm": 232.75022888183594, + "learning_rate": 4.675945118278552e-06, + "loss": 22.6068, + "step": 284290 + }, + { + "epoch": 0.5743039872008792, + "grad_norm": 133.041259765625, + "learning_rate": 4.675596787108652e-06, + "loss": 15.6812, + "step": 284300 + }, + { + "epoch": 0.574324187833563, + "grad_norm": 406.1040954589844, + "learning_rate": 4.675248457519857e-06, + "loss": 28.2724, + "step": 284310 + }, + { + "epoch": 0.5743443884662468, + "grad_norm": 268.32012939453125, + "learning_rate": 4.67490012951386e-06, + "loss": 16.0275, + "step": 284320 + }, + { + "epoch": 0.5743645890989306, + "grad_norm": 570.830810546875, + "learning_rate": 4.6745518030923595e-06, + "loss": 16.8502, + "step": 284330 + }, + { + "epoch": 0.5743847897316144, + "grad_norm": 233.2513427734375, + "learning_rate": 4.674203478257055e-06, + "loss": 20.7955, + "step": 284340 + }, + { + "epoch": 0.5744049903642983, + "grad_norm": 257.6399230957031, + "learning_rate": 4.673855155009644e-06, + "loss": 19.863, + "step": 284350 + }, + { + "epoch": 0.574425190996982, + "grad_norm": 264.8829345703125, + "learning_rate": 4.673506833351821e-06, + "loss": 23.7191, + "step": 284360 + }, + { + "epoch": 0.5744453916296658, + "grad_norm": 985.7921752929688, + "learning_rate": 4.673158513285289e-06, + "loss": 32.1464, + "step": 284370 + }, + { + "epoch": 0.5744655922623496, + "grad_norm": 295.1639709472656, + "learning_rate": 4.67281019481174e-06, + "loss": 20.751, + "step": 284380 + }, + { + "epoch": 0.5744857928950334, + "grad_norm": 330.4938659667969, + "learning_rate": 4.672461877932877e-06, + "loss": 14.6958, + "step": 284390 + }, + { + "epoch": 0.5745059935277173, + "grad_norm": 92.31692504882812, + "learning_rate": 4.672113562650394e-06, + "loss": 15.6347, + "step": 284400 + }, + { + "epoch": 0.5745261941604011, + "grad_norm": 381.076904296875, + "learning_rate": 4.6717652489659894e-06, + "loss": 22.0171, + "step": 284410 + }, + { + "epoch": 0.5745463947930849, + "grad_norm": 922.604248046875, + "learning_rate": 4.671416936881361e-06, + "loss": 20.3584, + "step": 284420 + }, + { + "epoch": 0.5745665954257687, + "grad_norm": 203.6077117919922, + "learning_rate": 4.671068626398208e-06, + "loss": 30.4045, + "step": 284430 + }, + { + "epoch": 0.5745867960584525, + "grad_norm": 357.0528564453125, + "learning_rate": 4.670720317518226e-06, + "loss": 7.9743, + "step": 284440 + }, + { + "epoch": 0.5746069966911364, + "grad_norm": 5.510529041290283, + "learning_rate": 4.670372010243111e-06, + "loss": 27.2087, + "step": 284450 + }, + { + "epoch": 0.5746271973238202, + "grad_norm": 436.4128112792969, + "learning_rate": 4.670023704574564e-06, + "loss": 12.2783, + "step": 284460 + }, + { + "epoch": 0.574647397956504, + "grad_norm": 172.4686737060547, + "learning_rate": 4.669675400514283e-06, + "loss": 25.0014, + "step": 284470 + }, + { + "epoch": 0.5746675985891878, + "grad_norm": 24.989734649658203, + "learning_rate": 4.669327098063963e-06, + "loss": 19.5955, + "step": 284480 + }, + { + "epoch": 0.5746877992218716, + "grad_norm": 398.2413024902344, + "learning_rate": 4.668978797225303e-06, + "loss": 22.0749, + "step": 284490 + }, + { + "epoch": 0.5747079998545555, + "grad_norm": 725.6436157226562, + "learning_rate": 4.668630498000001e-06, + "loss": 41.6987, + "step": 284500 + }, + { + "epoch": 0.5747282004872393, + "grad_norm": 56.92924880981445, + "learning_rate": 4.668282200389752e-06, + "loss": 16.7594, + "step": 284510 + }, + { + "epoch": 0.5747484011199231, + "grad_norm": 60.30400848388672, + "learning_rate": 4.667933904396255e-06, + "loss": 20.4393, + "step": 284520 + }, + { + "epoch": 0.5747686017526069, + "grad_norm": 287.49615478515625, + "learning_rate": 4.667585610021211e-06, + "loss": 15.5899, + "step": 284530 + }, + { + "epoch": 0.5747888023852907, + "grad_norm": 302.23297119140625, + "learning_rate": 4.667237317266311e-06, + "loss": 23.3494, + "step": 284540 + }, + { + "epoch": 0.5748090030179746, + "grad_norm": 1191.14111328125, + "learning_rate": 4.666889026133257e-06, + "loss": 34.3193, + "step": 284550 + }, + { + "epoch": 0.5748292036506584, + "grad_norm": 481.72100830078125, + "learning_rate": 4.666540736623746e-06, + "loss": 20.3894, + "step": 284560 + }, + { + "epoch": 0.5748494042833422, + "grad_norm": 23486.73046875, + "learning_rate": 4.666192448739475e-06, + "loss": 34.2088, + "step": 284570 + }, + { + "epoch": 0.574869604916026, + "grad_norm": 408.9329528808594, + "learning_rate": 4.665844162482141e-06, + "loss": 10.6384, + "step": 284580 + }, + { + "epoch": 0.5748898055487098, + "grad_norm": 731.5371704101562, + "learning_rate": 4.665495877853443e-06, + "loss": 27.1174, + "step": 284590 + }, + { + "epoch": 0.5749100061813937, + "grad_norm": 747.6769409179688, + "learning_rate": 4.6651475948550765e-06, + "loss": 25.1141, + "step": 284600 + }, + { + "epoch": 0.5749302068140775, + "grad_norm": 386.1933898925781, + "learning_rate": 4.66479931348874e-06, + "loss": 16.0697, + "step": 284610 + }, + { + "epoch": 0.5749504074467612, + "grad_norm": 645.2978515625, + "learning_rate": 4.664451033756131e-06, + "loss": 11.5531, + "step": 284620 + }, + { + "epoch": 0.574970608079445, + "grad_norm": 1509.620361328125, + "learning_rate": 4.664102755658948e-06, + "loss": 22.9517, + "step": 284630 + }, + { + "epoch": 0.5749908087121288, + "grad_norm": 352.56793212890625, + "learning_rate": 4.663754479198887e-06, + "loss": 11.9502, + "step": 284640 + }, + { + "epoch": 0.5750110093448126, + "grad_norm": 480.3440246582031, + "learning_rate": 4.663406204377645e-06, + "loss": 17.9299, + "step": 284650 + }, + { + "epoch": 0.5750312099774965, + "grad_norm": 376.5037841796875, + "learning_rate": 4.663057931196922e-06, + "loss": 27.2972, + "step": 284660 + }, + { + "epoch": 0.5750514106101803, + "grad_norm": 502.1021423339844, + "learning_rate": 4.662709659658411e-06, + "loss": 20.9237, + "step": 284670 + }, + { + "epoch": 0.5750716112428641, + "grad_norm": 236.69732666015625, + "learning_rate": 4.6623613897638146e-06, + "loss": 32.1217, + "step": 284680 + }, + { + "epoch": 0.5750918118755479, + "grad_norm": 2.2966716289520264, + "learning_rate": 4.6620131215148275e-06, + "loss": 13.1908, + "step": 284690 + }, + { + "epoch": 0.5751120125082317, + "grad_norm": 326.837158203125, + "learning_rate": 4.661664854913147e-06, + "loss": 14.4557, + "step": 284700 + }, + { + "epoch": 0.5751322131409156, + "grad_norm": 546.8901977539062, + "learning_rate": 4.661316589960471e-06, + "loss": 10.5499, + "step": 284710 + }, + { + "epoch": 0.5751524137735994, + "grad_norm": 292.4002990722656, + "learning_rate": 4.660968326658497e-06, + "loss": 16.1762, + "step": 284720 + }, + { + "epoch": 0.5751726144062832, + "grad_norm": 18.51688003540039, + "learning_rate": 4.660620065008923e-06, + "loss": 18.8795, + "step": 284730 + }, + { + "epoch": 0.575192815038967, + "grad_norm": 591.8967895507812, + "learning_rate": 4.6602718050134435e-06, + "loss": 22.2219, + "step": 284740 + }, + { + "epoch": 0.5752130156716508, + "grad_norm": 146.34230041503906, + "learning_rate": 4.659923546673761e-06, + "loss": 38.448, + "step": 284750 + }, + { + "epoch": 0.5752332163043347, + "grad_norm": 65.420166015625, + "learning_rate": 4.659575289991567e-06, + "loss": 21.2529, + "step": 284760 + }, + { + "epoch": 0.5752534169370185, + "grad_norm": 187.43064880371094, + "learning_rate": 4.659227034968563e-06, + "loss": 31.3825, + "step": 284770 + }, + { + "epoch": 0.5752736175697023, + "grad_norm": 240.3877410888672, + "learning_rate": 4.658878781606445e-06, + "loss": 12.2182, + "step": 284780 + }, + { + "epoch": 0.5752938182023861, + "grad_norm": 404.1460266113281, + "learning_rate": 4.658530529906911e-06, + "loss": 26.9092, + "step": 284790 + }, + { + "epoch": 0.5753140188350699, + "grad_norm": 446.2254638671875, + "learning_rate": 4.658182279871657e-06, + "loss": 13.1347, + "step": 284800 + }, + { + "epoch": 0.5753342194677538, + "grad_norm": 307.27301025390625, + "learning_rate": 4.65783403150238e-06, + "loss": 18.2271, + "step": 284810 + }, + { + "epoch": 0.5753544201004376, + "grad_norm": 97.33065032958984, + "learning_rate": 4.657485784800782e-06, + "loss": 14.324, + "step": 284820 + }, + { + "epoch": 0.5753746207331214, + "grad_norm": 219.03695678710938, + "learning_rate": 4.657137539768553e-06, + "loss": 24.3588, + "step": 284830 + }, + { + "epoch": 0.5753948213658052, + "grad_norm": 416.7481384277344, + "learning_rate": 4.656789296407396e-06, + "loss": 14.1049, + "step": 284840 + }, + { + "epoch": 0.575415021998489, + "grad_norm": 98.80854797363281, + "learning_rate": 4.656441054719007e-06, + "loss": 16.7309, + "step": 284850 + }, + { + "epoch": 0.5754352226311729, + "grad_norm": 305.9570007324219, + "learning_rate": 4.656092814705082e-06, + "loss": 23.5086, + "step": 284860 + }, + { + "epoch": 0.5754554232638566, + "grad_norm": 123.20638275146484, + "learning_rate": 4.655744576367318e-06, + "loss": 10.871, + "step": 284870 + }, + { + "epoch": 0.5754756238965404, + "grad_norm": 298.4538879394531, + "learning_rate": 4.655396339707414e-06, + "loss": 8.1463, + "step": 284880 + }, + { + "epoch": 0.5754958245292242, + "grad_norm": 255.8907470703125, + "learning_rate": 4.655048104727066e-06, + "loss": 5.6172, + "step": 284890 + }, + { + "epoch": 0.575516025161908, + "grad_norm": 600.5504150390625, + "learning_rate": 4.654699871427972e-06, + "loss": 34.4467, + "step": 284900 + }, + { + "epoch": 0.5755362257945918, + "grad_norm": 317.0341491699219, + "learning_rate": 4.654351639811828e-06, + "loss": 17.3721, + "step": 284910 + }, + { + "epoch": 0.5755564264272757, + "grad_norm": 424.71966552734375, + "learning_rate": 4.654003409880333e-06, + "loss": 17.8341, + "step": 284920 + }, + { + "epoch": 0.5755766270599595, + "grad_norm": 28.263832092285156, + "learning_rate": 4.653655181635184e-06, + "loss": 18.1012, + "step": 284930 + }, + { + "epoch": 0.5755968276926433, + "grad_norm": 538.7200317382812, + "learning_rate": 4.653306955078077e-06, + "loss": 19.7967, + "step": 284940 + }, + { + "epoch": 0.5756170283253271, + "grad_norm": 451.8140869140625, + "learning_rate": 4.652958730210711e-06, + "loss": 11.1338, + "step": 284950 + }, + { + "epoch": 0.575637228958011, + "grad_norm": 193.7926025390625, + "learning_rate": 4.65261050703478e-06, + "loss": 11.4366, + "step": 284960 + }, + { + "epoch": 0.5756574295906948, + "grad_norm": 140.07032775878906, + "learning_rate": 4.652262285551983e-06, + "loss": 22.7315, + "step": 284970 + }, + { + "epoch": 0.5756776302233786, + "grad_norm": 301.08514404296875, + "learning_rate": 4.651914065764021e-06, + "loss": 14.3331, + "step": 284980 + }, + { + "epoch": 0.5756978308560624, + "grad_norm": 198.33978271484375, + "learning_rate": 4.651565847672584e-06, + "loss": 12.0796, + "step": 284990 + }, + { + "epoch": 0.5757180314887462, + "grad_norm": 460.6780700683594, + "learning_rate": 4.651217631279374e-06, + "loss": 20.4087, + "step": 285000 + }, + { + "epoch": 0.57573823212143, + "grad_norm": 490.967041015625, + "learning_rate": 4.650869416586088e-06, + "loss": 13.374, + "step": 285010 + }, + { + "epoch": 0.5757584327541139, + "grad_norm": 320.7071228027344, + "learning_rate": 4.650521203594421e-06, + "loss": 9.587, + "step": 285020 + }, + { + "epoch": 0.5757786333867977, + "grad_norm": 60.833412170410156, + "learning_rate": 4.6501729923060705e-06, + "loss": 19.382, + "step": 285030 + }, + { + "epoch": 0.5757988340194815, + "grad_norm": 319.47808837890625, + "learning_rate": 4.649824782722737e-06, + "loss": 12.8303, + "step": 285040 + }, + { + "epoch": 0.5758190346521653, + "grad_norm": 768.2241821289062, + "learning_rate": 4.649476574846113e-06, + "loss": 17.567, + "step": 285050 + }, + { + "epoch": 0.5758392352848491, + "grad_norm": 686.4849853515625, + "learning_rate": 4.649128368677896e-06, + "loss": 28.091, + "step": 285060 + }, + { + "epoch": 0.575859435917533, + "grad_norm": 17.46307945251465, + "learning_rate": 4.648780164219787e-06, + "loss": 31.0867, + "step": 285070 + }, + { + "epoch": 0.5758796365502168, + "grad_norm": 828.7020263671875, + "learning_rate": 4.648431961473482e-06, + "loss": 17.7138, + "step": 285080 + }, + { + "epoch": 0.5758998371829006, + "grad_norm": 192.98765563964844, + "learning_rate": 4.648083760440676e-06, + "loss": 9.0035, + "step": 285090 + }, + { + "epoch": 0.5759200378155844, + "grad_norm": 580.1175537109375, + "learning_rate": 4.6477355611230655e-06, + "loss": 20.5532, + "step": 285100 + }, + { + "epoch": 0.5759402384482682, + "grad_norm": 309.5498962402344, + "learning_rate": 4.6473873635223514e-06, + "loss": 27.1804, + "step": 285110 + }, + { + "epoch": 0.5759604390809521, + "grad_norm": 6.215914726257324, + "learning_rate": 4.647039167640227e-06, + "loss": 22.8904, + "step": 285120 + }, + { + "epoch": 0.5759806397136358, + "grad_norm": 251.83738708496094, + "learning_rate": 4.646690973478391e-06, + "loss": 18.3718, + "step": 285130 + }, + { + "epoch": 0.5760008403463196, + "grad_norm": 225.23223876953125, + "learning_rate": 4.6463427810385425e-06, + "loss": 13.746, + "step": 285140 + }, + { + "epoch": 0.5760210409790034, + "grad_norm": 242.44158935546875, + "learning_rate": 4.645994590322373e-06, + "loss": 31.7429, + "step": 285150 + }, + { + "epoch": 0.5760412416116872, + "grad_norm": 86.13539123535156, + "learning_rate": 4.645646401331585e-06, + "loss": 10.4489, + "step": 285160 + }, + { + "epoch": 0.5760614422443711, + "grad_norm": 189.4027557373047, + "learning_rate": 4.6452982140678735e-06, + "loss": 16.5795, + "step": 285170 + }, + { + "epoch": 0.5760816428770549, + "grad_norm": 614.1434326171875, + "learning_rate": 4.644950028532935e-06, + "loss": 30.6802, + "step": 285180 + }, + { + "epoch": 0.5761018435097387, + "grad_norm": 612.4061889648438, + "learning_rate": 4.644601844728467e-06, + "loss": 22.207, + "step": 285190 + }, + { + "epoch": 0.5761220441424225, + "grad_norm": 458.0311584472656, + "learning_rate": 4.644253662656167e-06, + "loss": 24.1358, + "step": 285200 + }, + { + "epoch": 0.5761422447751063, + "grad_norm": 276.1941223144531, + "learning_rate": 4.643905482317731e-06, + "loss": 12.9366, + "step": 285210 + }, + { + "epoch": 0.5761624454077902, + "grad_norm": 282.3110656738281, + "learning_rate": 4.643557303714855e-06, + "loss": 17.2801, + "step": 285220 + }, + { + "epoch": 0.576182646040474, + "grad_norm": 489.2680358886719, + "learning_rate": 4.643209126849239e-06, + "loss": 19.5753, + "step": 285230 + }, + { + "epoch": 0.5762028466731578, + "grad_norm": 351.1014709472656, + "learning_rate": 4.64286095172258e-06, + "loss": 22.7033, + "step": 285240 + }, + { + "epoch": 0.5762230473058416, + "grad_norm": 1463.812255859375, + "learning_rate": 4.642512778336571e-06, + "loss": 31.2228, + "step": 285250 + }, + { + "epoch": 0.5762432479385254, + "grad_norm": 221.68878173828125, + "learning_rate": 4.642164606692912e-06, + "loss": 17.4044, + "step": 285260 + }, + { + "epoch": 0.5762634485712093, + "grad_norm": 183.8222198486328, + "learning_rate": 4.641816436793301e-06, + "loss": 13.5053, + "step": 285270 + }, + { + "epoch": 0.5762836492038931, + "grad_norm": 571.3402709960938, + "learning_rate": 4.64146826863943e-06, + "loss": 32.581, + "step": 285280 + }, + { + "epoch": 0.5763038498365769, + "grad_norm": 667.9365844726562, + "learning_rate": 4.641120102233001e-06, + "loss": 18.8078, + "step": 285290 + }, + { + "epoch": 0.5763240504692607, + "grad_norm": 479.35076904296875, + "learning_rate": 4.6407719375757095e-06, + "loss": 16.2684, + "step": 285300 + }, + { + "epoch": 0.5763442511019445, + "grad_norm": 166.7450408935547, + "learning_rate": 4.6404237746692514e-06, + "loss": 20.5289, + "step": 285310 + }, + { + "epoch": 0.5763644517346284, + "grad_norm": 466.5275573730469, + "learning_rate": 4.640075613515324e-06, + "loss": 12.6233, + "step": 285320 + }, + { + "epoch": 0.5763846523673122, + "grad_norm": 399.5809631347656, + "learning_rate": 4.639727454115626e-06, + "loss": 13.901, + "step": 285330 + }, + { + "epoch": 0.576404852999996, + "grad_norm": 781.8657836914062, + "learning_rate": 4.639379296471851e-06, + "loss": 27.9794, + "step": 285340 + }, + { + "epoch": 0.5764250536326798, + "grad_norm": 514.377197265625, + "learning_rate": 4.639031140585697e-06, + "loss": 12.9701, + "step": 285350 + }, + { + "epoch": 0.5764452542653636, + "grad_norm": 309.2298889160156, + "learning_rate": 4.638682986458862e-06, + "loss": 23.8775, + "step": 285360 + }, + { + "epoch": 0.5764654548980475, + "grad_norm": 578.560302734375, + "learning_rate": 4.638334834093044e-06, + "loss": 24.1145, + "step": 285370 + }, + { + "epoch": 0.5764856555307313, + "grad_norm": 559.6069946289062, + "learning_rate": 4.637986683489937e-06, + "loss": 18.4905, + "step": 285380 + }, + { + "epoch": 0.576505856163415, + "grad_norm": 204.30908203125, + "learning_rate": 4.637638534651238e-06, + "loss": 16.427, + "step": 285390 + }, + { + "epoch": 0.5765260567960988, + "grad_norm": 588.1658325195312, + "learning_rate": 4.637290387578647e-06, + "loss": 32.3938, + "step": 285400 + }, + { + "epoch": 0.5765462574287826, + "grad_norm": 392.609619140625, + "learning_rate": 4.636942242273857e-06, + "loss": 22.5501, + "step": 285410 + }, + { + "epoch": 0.5765664580614664, + "grad_norm": 246.3218536376953, + "learning_rate": 4.6365940987385655e-06, + "loss": 26.3835, + "step": 285420 + }, + { + "epoch": 0.5765866586941503, + "grad_norm": 23.963226318359375, + "learning_rate": 4.636245956974474e-06, + "loss": 11.035, + "step": 285430 + }, + { + "epoch": 0.5766068593268341, + "grad_norm": 346.2037658691406, + "learning_rate": 4.635897816983272e-06, + "loss": 22.7347, + "step": 285440 + }, + { + "epoch": 0.5766270599595179, + "grad_norm": 189.39111328125, + "learning_rate": 4.635549678766661e-06, + "loss": 11.7879, + "step": 285450 + }, + { + "epoch": 0.5766472605922017, + "grad_norm": 18.474414825439453, + "learning_rate": 4.635201542326337e-06, + "loss": 18.6868, + "step": 285460 + }, + { + "epoch": 0.5766674612248855, + "grad_norm": 460.7426452636719, + "learning_rate": 4.634853407663996e-06, + "loss": 22.0784, + "step": 285470 + }, + { + "epoch": 0.5766876618575694, + "grad_norm": 597.2728881835938, + "learning_rate": 4.634505274781336e-06, + "loss": 11.4606, + "step": 285480 + }, + { + "epoch": 0.5767078624902532, + "grad_norm": 94.71599578857422, + "learning_rate": 4.634157143680053e-06, + "loss": 8.5866, + "step": 285490 + }, + { + "epoch": 0.576728063122937, + "grad_norm": 1044.5379638671875, + "learning_rate": 4.6338090143618435e-06, + "loss": 24.0246, + "step": 285500 + }, + { + "epoch": 0.5767482637556208, + "grad_norm": 116.58110809326172, + "learning_rate": 4.633460886828402e-06, + "loss": 9.3799, + "step": 285510 + }, + { + "epoch": 0.5767684643883046, + "grad_norm": 587.9881591796875, + "learning_rate": 4.63311276108143e-06, + "loss": 22.7472, + "step": 285520 + }, + { + "epoch": 0.5767886650209885, + "grad_norm": 17.695222854614258, + "learning_rate": 4.632764637122622e-06, + "loss": 18.6941, + "step": 285530 + }, + { + "epoch": 0.5768088656536723, + "grad_norm": 28.241188049316406, + "learning_rate": 4.632416514953675e-06, + "loss": 22.2359, + "step": 285540 + }, + { + "epoch": 0.5768290662863561, + "grad_norm": 439.5326232910156, + "learning_rate": 4.6320683945762835e-06, + "loss": 20.7384, + "step": 285550 + }, + { + "epoch": 0.5768492669190399, + "grad_norm": 828.4425048828125, + "learning_rate": 4.631720275992148e-06, + "loss": 25.1309, + "step": 285560 + }, + { + "epoch": 0.5768694675517237, + "grad_norm": 534.795654296875, + "learning_rate": 4.631372159202962e-06, + "loss": 24.1091, + "step": 285570 + }, + { + "epoch": 0.5768896681844076, + "grad_norm": 459.3776550292969, + "learning_rate": 4.631024044210422e-06, + "loss": 26.7064, + "step": 285580 + }, + { + "epoch": 0.5769098688170914, + "grad_norm": 611.1201782226562, + "learning_rate": 4.6306759310162304e-06, + "loss": 12.0516, + "step": 285590 + }, + { + "epoch": 0.5769300694497752, + "grad_norm": 586.6641235351562, + "learning_rate": 4.630327819622076e-06, + "loss": 40.366, + "step": 285600 + }, + { + "epoch": 0.576950270082459, + "grad_norm": 601.6295166015625, + "learning_rate": 4.62997971002966e-06, + "loss": 24.1361, + "step": 285610 + }, + { + "epoch": 0.5769704707151428, + "grad_norm": 614.7229614257812, + "learning_rate": 4.629631602240678e-06, + "loss": 29.7239, + "step": 285620 + }, + { + "epoch": 0.5769906713478267, + "grad_norm": 255.7998504638672, + "learning_rate": 4.6292834962568265e-06, + "loss": 27.0257, + "step": 285630 + }, + { + "epoch": 0.5770108719805104, + "grad_norm": 165.79042053222656, + "learning_rate": 4.628935392079802e-06, + "loss": 13.3021, + "step": 285640 + }, + { + "epoch": 0.5770310726131942, + "grad_norm": 214.94522094726562, + "learning_rate": 4.628587289711303e-06, + "loss": 18.4219, + "step": 285650 + }, + { + "epoch": 0.577051273245878, + "grad_norm": 175.484375, + "learning_rate": 4.628239189153023e-06, + "loss": 13.9871, + "step": 285660 + }, + { + "epoch": 0.5770714738785618, + "grad_norm": 3.3779449462890625, + "learning_rate": 4.627891090406659e-06, + "loss": 4.6265, + "step": 285670 + }, + { + "epoch": 0.5770916745112457, + "grad_norm": 274.63739013671875, + "learning_rate": 4.627542993473909e-06, + "loss": 23.1313, + "step": 285680 + }, + { + "epoch": 0.5771118751439295, + "grad_norm": 465.6402587890625, + "learning_rate": 4.6271948983564715e-06, + "loss": 35.0637, + "step": 285690 + }, + { + "epoch": 0.5771320757766133, + "grad_norm": 532.4801635742188, + "learning_rate": 4.6268468050560394e-06, + "loss": 16.0424, + "step": 285700 + }, + { + "epoch": 0.5771522764092971, + "grad_norm": 193.82769775390625, + "learning_rate": 4.626498713574311e-06, + "loss": 18.8277, + "step": 285710 + }, + { + "epoch": 0.5771724770419809, + "grad_norm": 428.383056640625, + "learning_rate": 4.626150623912983e-06, + "loss": 24.673, + "step": 285720 + }, + { + "epoch": 0.5771926776746648, + "grad_norm": 482.5599060058594, + "learning_rate": 4.625802536073751e-06, + "loss": 17.7467, + "step": 285730 + }, + { + "epoch": 0.5772128783073486, + "grad_norm": 221.01895141601562, + "learning_rate": 4.625454450058311e-06, + "loss": 24.9587, + "step": 285740 + }, + { + "epoch": 0.5772330789400324, + "grad_norm": 355.8588562011719, + "learning_rate": 4.625106365868363e-06, + "loss": 23.7542, + "step": 285750 + }, + { + "epoch": 0.5772532795727162, + "grad_norm": 243.97454833984375, + "learning_rate": 4.624758283505599e-06, + "loss": 22.4432, + "step": 285760 + }, + { + "epoch": 0.5772734802054, + "grad_norm": 331.9516296386719, + "learning_rate": 4.624410202971718e-06, + "loss": 14.9428, + "step": 285770 + }, + { + "epoch": 0.5772936808380839, + "grad_norm": 206.70016479492188, + "learning_rate": 4.624062124268418e-06, + "loss": 23.7267, + "step": 285780 + }, + { + "epoch": 0.5773138814707677, + "grad_norm": 22.560590744018555, + "learning_rate": 4.6237140473973916e-06, + "loss": 21.4815, + "step": 285790 + }, + { + "epoch": 0.5773340821034515, + "grad_norm": 655.5744018554688, + "learning_rate": 4.6233659723603374e-06, + "loss": 31.4316, + "step": 285800 + }, + { + "epoch": 0.5773542827361353, + "grad_norm": 320.5361328125, + "learning_rate": 4.623017899158953e-06, + "loss": 17.9883, + "step": 285810 + }, + { + "epoch": 0.5773744833688191, + "grad_norm": 694.0796508789062, + "learning_rate": 4.6226698277949325e-06, + "loss": 27.7262, + "step": 285820 + }, + { + "epoch": 0.577394684001503, + "grad_norm": 558.8837280273438, + "learning_rate": 4.622321758269972e-06, + "loss": 14.9198, + "step": 285830 + }, + { + "epoch": 0.5774148846341868, + "grad_norm": 376.5775451660156, + "learning_rate": 4.621973690585772e-06, + "loss": 19.4645, + "step": 285840 + }, + { + "epoch": 0.5774350852668706, + "grad_norm": 444.7864685058594, + "learning_rate": 4.621625624744026e-06, + "loss": 21.8486, + "step": 285850 + }, + { + "epoch": 0.5774552858995544, + "grad_norm": 542.1842041015625, + "learning_rate": 4.62127756074643e-06, + "loss": 20.4572, + "step": 285860 + }, + { + "epoch": 0.5774754865322382, + "grad_norm": 693.6360473632812, + "learning_rate": 4.620929498594682e-06, + "loss": 18.3903, + "step": 285870 + }, + { + "epoch": 0.5774956871649221, + "grad_norm": 454.9926452636719, + "learning_rate": 4.620581438290478e-06, + "loss": 18.1475, + "step": 285880 + }, + { + "epoch": 0.5775158877976059, + "grad_norm": 4.223319053649902, + "learning_rate": 4.620233379835513e-06, + "loss": 14.8415, + "step": 285890 + }, + { + "epoch": 0.5775360884302896, + "grad_norm": 841.5548706054688, + "learning_rate": 4.619885323231484e-06, + "loss": 26.8061, + "step": 285900 + }, + { + "epoch": 0.5775562890629734, + "grad_norm": 322.03594970703125, + "learning_rate": 4.6195372684800895e-06, + "loss": 13.2405, + "step": 285910 + }, + { + "epoch": 0.5775764896956572, + "grad_norm": 643.8477783203125, + "learning_rate": 4.619189215583023e-06, + "loss": 31.1178, + "step": 285920 + }, + { + "epoch": 0.577596690328341, + "grad_norm": 263.5108642578125, + "learning_rate": 4.618841164541982e-06, + "loss": 19.3314, + "step": 285930 + }, + { + "epoch": 0.5776168909610249, + "grad_norm": 479.68951416015625, + "learning_rate": 4.618493115358665e-06, + "loss": 13.9931, + "step": 285940 + }, + { + "epoch": 0.5776370915937087, + "grad_norm": 262.92724609375, + "learning_rate": 4.618145068034764e-06, + "loss": 10.7394, + "step": 285950 + }, + { + "epoch": 0.5776572922263925, + "grad_norm": 26.95539665222168, + "learning_rate": 4.617797022571977e-06, + "loss": 14.5507, + "step": 285960 + }, + { + "epoch": 0.5776774928590763, + "grad_norm": 127.87303924560547, + "learning_rate": 4.617448978972002e-06, + "loss": 13.9312, + "step": 285970 + }, + { + "epoch": 0.5776976934917601, + "grad_norm": 687.3388061523438, + "learning_rate": 4.617100937236535e-06, + "loss": 18.8998, + "step": 285980 + }, + { + "epoch": 0.577717894124444, + "grad_norm": 141.27928161621094, + "learning_rate": 4.616752897367271e-06, + "loss": 25.3384, + "step": 285990 + }, + { + "epoch": 0.5777380947571278, + "grad_norm": 382.4380798339844, + "learning_rate": 4.6164048593659076e-06, + "loss": 16.0115, + "step": 286000 + }, + { + "epoch": 0.5777582953898116, + "grad_norm": 662.46435546875, + "learning_rate": 4.6160568232341406e-06, + "loss": 16.4892, + "step": 286010 + }, + { + "epoch": 0.5777784960224954, + "grad_norm": 140.6461639404297, + "learning_rate": 4.615708788973664e-06, + "loss": 17.4467, + "step": 286020 + }, + { + "epoch": 0.5777986966551792, + "grad_norm": 255.77389526367188, + "learning_rate": 4.615360756586177e-06, + "loss": 33.3595, + "step": 286030 + }, + { + "epoch": 0.5778188972878631, + "grad_norm": 159.06700134277344, + "learning_rate": 4.615012726073376e-06, + "loss": 15.472, + "step": 286040 + }, + { + "epoch": 0.5778390979205469, + "grad_norm": 284.42010498046875, + "learning_rate": 4.614664697436956e-06, + "loss": 14.2701, + "step": 286050 + }, + { + "epoch": 0.5778592985532307, + "grad_norm": 103.40221405029297, + "learning_rate": 4.614316670678612e-06, + "loss": 20.7587, + "step": 286060 + }, + { + "epoch": 0.5778794991859145, + "grad_norm": 351.7590637207031, + "learning_rate": 4.6139686458000445e-06, + "loss": 15.2206, + "step": 286070 + }, + { + "epoch": 0.5778996998185983, + "grad_norm": 218.58926391601562, + "learning_rate": 4.613620622802945e-06, + "loss": 11.3273, + "step": 286080 + }, + { + "epoch": 0.5779199004512822, + "grad_norm": 235.08702087402344, + "learning_rate": 4.6132726016890126e-06, + "loss": 10.5108, + "step": 286090 + }, + { + "epoch": 0.577940101083966, + "grad_norm": 94.96534729003906, + "learning_rate": 4.612924582459943e-06, + "loss": 30.6132, + "step": 286100 + }, + { + "epoch": 0.5779603017166498, + "grad_norm": 678.427978515625, + "learning_rate": 4.612576565117431e-06, + "loss": 18.7635, + "step": 286110 + }, + { + "epoch": 0.5779805023493336, + "grad_norm": 223.06146240234375, + "learning_rate": 4.612228549663173e-06, + "loss": 24.9446, + "step": 286120 + }, + { + "epoch": 0.5780007029820174, + "grad_norm": 698.78271484375, + "learning_rate": 4.611880536098867e-06, + "loss": 21.3133, + "step": 286130 + }, + { + "epoch": 0.5780209036147013, + "grad_norm": 59.89653778076172, + "learning_rate": 4.61153252442621e-06, + "loss": 11.58, + "step": 286140 + }, + { + "epoch": 0.578041104247385, + "grad_norm": 148.0355682373047, + "learning_rate": 4.611184514646894e-06, + "loss": 21.3908, + "step": 286150 + }, + { + "epoch": 0.5780613048800688, + "grad_norm": 400.5846862792969, + "learning_rate": 4.610836506762618e-06, + "loss": 13.8255, + "step": 286160 + }, + { + "epoch": 0.5780815055127526, + "grad_norm": 523.7686767578125, + "learning_rate": 4.610488500775078e-06, + "loss": 17.843, + "step": 286170 + }, + { + "epoch": 0.5781017061454364, + "grad_norm": 54.488616943359375, + "learning_rate": 4.61014049668597e-06, + "loss": 17.2355, + "step": 286180 + }, + { + "epoch": 0.5781219067781203, + "grad_norm": 592.7142944335938, + "learning_rate": 4.6097924944969885e-06, + "loss": 38.8723, + "step": 286190 + }, + { + "epoch": 0.5781421074108041, + "grad_norm": 207.0294952392578, + "learning_rate": 4.609444494209834e-06, + "loss": 22.1777, + "step": 286200 + }, + { + "epoch": 0.5781623080434879, + "grad_norm": 550.2642211914062, + "learning_rate": 4.609096495826196e-06, + "loss": 16.6159, + "step": 286210 + }, + { + "epoch": 0.5781825086761717, + "grad_norm": 147.8919219970703, + "learning_rate": 4.608748499347777e-06, + "loss": 13.4256, + "step": 286220 + }, + { + "epoch": 0.5782027093088555, + "grad_norm": 397.0547180175781, + "learning_rate": 4.60840050477627e-06, + "loss": 9.2584, + "step": 286230 + }, + { + "epoch": 0.5782229099415394, + "grad_norm": 258.3592224121094, + "learning_rate": 4.608052512113371e-06, + "loss": 20.8878, + "step": 286240 + }, + { + "epoch": 0.5782431105742232, + "grad_norm": 446.7528991699219, + "learning_rate": 4.6077045213607765e-06, + "loss": 11.6692, + "step": 286250 + }, + { + "epoch": 0.578263311206907, + "grad_norm": 441.7554626464844, + "learning_rate": 4.607356532520183e-06, + "loss": 22.9813, + "step": 286260 + }, + { + "epoch": 0.5782835118395908, + "grad_norm": 88.1390380859375, + "learning_rate": 4.607008545593286e-06, + "loss": 20.0238, + "step": 286270 + }, + { + "epoch": 0.5783037124722746, + "grad_norm": 168.27078247070312, + "learning_rate": 4.606660560581779e-06, + "loss": 16.4193, + "step": 286280 + }, + { + "epoch": 0.5783239131049585, + "grad_norm": 100.44886779785156, + "learning_rate": 4.606312577487364e-06, + "loss": 21.3463, + "step": 286290 + }, + { + "epoch": 0.5783441137376423, + "grad_norm": 1153.3206787109375, + "learning_rate": 4.605964596311733e-06, + "loss": 20.3138, + "step": 286300 + }, + { + "epoch": 0.5783643143703261, + "grad_norm": 283.0211486816406, + "learning_rate": 4.605616617056583e-06, + "loss": 14.5252, + "step": 286310 + }, + { + "epoch": 0.5783845150030099, + "grad_norm": 268.0848693847656, + "learning_rate": 4.6052686397236084e-06, + "loss": 13.064, + "step": 286320 + }, + { + "epoch": 0.5784047156356937, + "grad_norm": 270.8878479003906, + "learning_rate": 4.6049206643145086e-06, + "loss": 11.1924, + "step": 286330 + }, + { + "epoch": 0.5784249162683776, + "grad_norm": 821.0260620117188, + "learning_rate": 4.604572690830976e-06, + "loss": 17.9596, + "step": 286340 + }, + { + "epoch": 0.5784451169010614, + "grad_norm": 460.11639404296875, + "learning_rate": 4.604224719274708e-06, + "loss": 23.2671, + "step": 286350 + }, + { + "epoch": 0.5784653175337452, + "grad_norm": 198.3529510498047, + "learning_rate": 4.603876749647404e-06, + "loss": 71.356, + "step": 286360 + }, + { + "epoch": 0.578485518166429, + "grad_norm": 3.7742393016815186, + "learning_rate": 4.6035287819507515e-06, + "loss": 26.5519, + "step": 286370 + }, + { + "epoch": 0.5785057187991128, + "grad_norm": 253.92433166503906, + "learning_rate": 4.603180816186454e-06, + "loss": 14.5308, + "step": 286380 + }, + { + "epoch": 0.5785259194317967, + "grad_norm": 309.96221923828125, + "learning_rate": 4.6028328523562065e-06, + "loss": 16.8102, + "step": 286390 + }, + { + "epoch": 0.5785461200644805, + "grad_norm": 395.74951171875, + "learning_rate": 4.602484890461702e-06, + "loss": 15.503, + "step": 286400 + }, + { + "epoch": 0.5785663206971642, + "grad_norm": 298.9871826171875, + "learning_rate": 4.602136930504638e-06, + "loss": 25.5999, + "step": 286410 + }, + { + "epoch": 0.578586521329848, + "grad_norm": 534.5841674804688, + "learning_rate": 4.601788972486709e-06, + "loss": 21.1018, + "step": 286420 + }, + { + "epoch": 0.5786067219625318, + "grad_norm": 840.9351806640625, + "learning_rate": 4.601441016409616e-06, + "loss": 10.8312, + "step": 286430 + }, + { + "epoch": 0.5786269225952156, + "grad_norm": 394.81683349609375, + "learning_rate": 4.601093062275048e-06, + "loss": 10.7939, + "step": 286440 + }, + { + "epoch": 0.5786471232278995, + "grad_norm": 309.9267578125, + "learning_rate": 4.600745110084704e-06, + "loss": 23.2984, + "step": 286450 + }, + { + "epoch": 0.5786673238605833, + "grad_norm": 318.5095520019531, + "learning_rate": 4.6003971598402825e-06, + "loss": 25.5717, + "step": 286460 + }, + { + "epoch": 0.5786875244932671, + "grad_norm": 342.0323486328125, + "learning_rate": 4.600049211543475e-06, + "loss": 31.5346, + "step": 286470 + }, + { + "epoch": 0.5787077251259509, + "grad_norm": 1021.0853881835938, + "learning_rate": 4.599701265195979e-06, + "loss": 25.0337, + "step": 286480 + }, + { + "epoch": 0.5787279257586347, + "grad_norm": 0.24047352373600006, + "learning_rate": 4.599353320799492e-06, + "loss": 20.3372, + "step": 286490 + }, + { + "epoch": 0.5787481263913186, + "grad_norm": 63.22924041748047, + "learning_rate": 4.5990053783557066e-06, + "loss": 24.6879, + "step": 286500 + }, + { + "epoch": 0.5787683270240024, + "grad_norm": 89.17341613769531, + "learning_rate": 4.598657437866319e-06, + "loss": 17.7072, + "step": 286510 + }, + { + "epoch": 0.5787885276566862, + "grad_norm": 154.88485717773438, + "learning_rate": 4.59830949933303e-06, + "loss": 15.3738, + "step": 286520 + }, + { + "epoch": 0.57880872828937, + "grad_norm": 178.2100372314453, + "learning_rate": 4.5979615627575295e-06, + "loss": 21.1074, + "step": 286530 + }, + { + "epoch": 0.5788289289220538, + "grad_norm": 273.7059326171875, + "learning_rate": 4.597613628141516e-06, + "loss": 23.362, + "step": 286540 + }, + { + "epoch": 0.5788491295547377, + "grad_norm": 335.8993835449219, + "learning_rate": 4.597265695486685e-06, + "loss": 25.632, + "step": 286550 + }, + { + "epoch": 0.5788693301874215, + "grad_norm": 106.1709976196289, + "learning_rate": 4.5969177647947325e-06, + "loss": 19.213, + "step": 286560 + }, + { + "epoch": 0.5788895308201053, + "grad_norm": 277.79095458984375, + "learning_rate": 4.596569836067353e-06, + "loss": 16.6344, + "step": 286570 + }, + { + "epoch": 0.5789097314527891, + "grad_norm": 273.9773254394531, + "learning_rate": 4.596221909306243e-06, + "loss": 17.7803, + "step": 286580 + }, + { + "epoch": 0.5789299320854729, + "grad_norm": 536.6856079101562, + "learning_rate": 4.5958739845131e-06, + "loss": 19.9283, + "step": 286590 + }, + { + "epoch": 0.5789501327181568, + "grad_norm": 151.8507537841797, + "learning_rate": 4.595526061689617e-06, + "loss": 20.9777, + "step": 286600 + }, + { + "epoch": 0.5789703333508406, + "grad_norm": 438.26495361328125, + "learning_rate": 4.5951781408374915e-06, + "loss": 31.4876, + "step": 286610 + }, + { + "epoch": 0.5789905339835244, + "grad_norm": 323.227294921875, + "learning_rate": 4.594830221958419e-06, + "loss": 26.477, + "step": 286620 + }, + { + "epoch": 0.5790107346162082, + "grad_norm": 35.54415512084961, + "learning_rate": 4.594482305054094e-06, + "loss": 15.607, + "step": 286630 + }, + { + "epoch": 0.579030935248892, + "grad_norm": 283.94781494140625, + "learning_rate": 4.594134390126213e-06, + "loss": 20.376, + "step": 286640 + }, + { + "epoch": 0.5790511358815759, + "grad_norm": 536.4644775390625, + "learning_rate": 4.593786477176473e-06, + "loss": 15.0391, + "step": 286650 + }, + { + "epoch": 0.5790713365142596, + "grad_norm": 164.15357971191406, + "learning_rate": 4.593438566206567e-06, + "loss": 20.2518, + "step": 286660 + }, + { + "epoch": 0.5790915371469434, + "grad_norm": 388.2125244140625, + "learning_rate": 4.593090657218192e-06, + "loss": 20.0896, + "step": 286670 + }, + { + "epoch": 0.5791117377796272, + "grad_norm": 200.10169982910156, + "learning_rate": 4.592742750213045e-06, + "loss": 30.201, + "step": 286680 + }, + { + "epoch": 0.579131938412311, + "grad_norm": 227.21434020996094, + "learning_rate": 4.59239484519282e-06, + "loss": 14.6815, + "step": 286690 + }, + { + "epoch": 0.5791521390449949, + "grad_norm": 13.020706176757812, + "learning_rate": 4.592046942159213e-06, + "loss": 15.0803, + "step": 286700 + }, + { + "epoch": 0.5791723396776787, + "grad_norm": 376.193115234375, + "learning_rate": 4.591699041113921e-06, + "loss": 19.5756, + "step": 286710 + }, + { + "epoch": 0.5791925403103625, + "grad_norm": 580.8711547851562, + "learning_rate": 4.5913511420586365e-06, + "loss": 33.743, + "step": 286720 + }, + { + "epoch": 0.5792127409430463, + "grad_norm": 304.92724609375, + "learning_rate": 4.591003244995056e-06, + "loss": 20.8417, + "step": 286730 + }, + { + "epoch": 0.5792329415757301, + "grad_norm": 239.2734375, + "learning_rate": 4.5906553499248775e-06, + "loss": 25.4103, + "step": 286740 + }, + { + "epoch": 0.579253142208414, + "grad_norm": 142.53900146484375, + "learning_rate": 4.590307456849797e-06, + "loss": 12.3575, + "step": 286750 + }, + { + "epoch": 0.5792733428410978, + "grad_norm": 168.9327392578125, + "learning_rate": 4.589959565771505e-06, + "loss": 15.8229, + "step": 286760 + }, + { + "epoch": 0.5792935434737816, + "grad_norm": 490.1933898925781, + "learning_rate": 4.589611676691702e-06, + "loss": 8.6005, + "step": 286770 + }, + { + "epoch": 0.5793137441064654, + "grad_norm": 188.1273651123047, + "learning_rate": 4.589263789612083e-06, + "loss": 32.1084, + "step": 286780 + }, + { + "epoch": 0.5793339447391492, + "grad_norm": 163.9735870361328, + "learning_rate": 4.588915904534341e-06, + "loss": 8.0317, + "step": 286790 + }, + { + "epoch": 0.579354145371833, + "grad_norm": 319.4701843261719, + "learning_rate": 4.588568021460172e-06, + "loss": 18.7378, + "step": 286800 + }, + { + "epoch": 0.5793743460045169, + "grad_norm": 392.4018859863281, + "learning_rate": 4.588220140391276e-06, + "loss": 33.772, + "step": 286810 + }, + { + "epoch": 0.5793945466372007, + "grad_norm": 518.6945190429688, + "learning_rate": 4.5878722613293415e-06, + "loss": 21.3204, + "step": 286820 + }, + { + "epoch": 0.5794147472698845, + "grad_norm": 408.5188293457031, + "learning_rate": 4.5875243842760684e-06, + "loss": 14.1288, + "step": 286830 + }, + { + "epoch": 0.5794349479025683, + "grad_norm": 463.9983215332031, + "learning_rate": 4.5871765092331536e-06, + "loss": 26.961, + "step": 286840 + }, + { + "epoch": 0.5794551485352522, + "grad_norm": 567.8639526367188, + "learning_rate": 4.586828636202288e-06, + "loss": 18.6884, + "step": 286850 + }, + { + "epoch": 0.579475349167936, + "grad_norm": 175.72988891601562, + "learning_rate": 4.58648076518517e-06, + "loss": 21.585, + "step": 286860 + }, + { + "epoch": 0.5794955498006198, + "grad_norm": 314.5470275878906, + "learning_rate": 4.586132896183494e-06, + "loss": 13.5188, + "step": 286870 + }, + { + "epoch": 0.5795157504333036, + "grad_norm": 173.61602783203125, + "learning_rate": 4.5857850291989596e-06, + "loss": 21.5312, + "step": 286880 + }, + { + "epoch": 0.5795359510659874, + "grad_norm": 800.304931640625, + "learning_rate": 4.585437164233255e-06, + "loss": 217.2249, + "step": 286890 + }, + { + "epoch": 0.5795561516986713, + "grad_norm": 161.47119140625, + "learning_rate": 4.5850893012880806e-06, + "loss": 11.6351, + "step": 286900 + }, + { + "epoch": 0.5795763523313551, + "grad_norm": 493.5769348144531, + "learning_rate": 4.584741440365131e-06, + "loss": 22.1845, + "step": 286910 + }, + { + "epoch": 0.5795965529640388, + "grad_norm": 124.64498901367188, + "learning_rate": 4.5843935814661e-06, + "loss": 23.2055, + "step": 286920 + }, + { + "epoch": 0.5796167535967226, + "grad_norm": 223.63775634765625, + "learning_rate": 4.584045724592686e-06, + "loss": 10.2217, + "step": 286930 + }, + { + "epoch": 0.5796369542294064, + "grad_norm": 1177.7420654296875, + "learning_rate": 4.583697869746582e-06, + "loss": 30.2963, + "step": 286940 + }, + { + "epoch": 0.5796571548620902, + "grad_norm": 45.36520004272461, + "learning_rate": 4.583350016929484e-06, + "loss": 16.0916, + "step": 286950 + }, + { + "epoch": 0.5796773554947741, + "grad_norm": 557.9664306640625, + "learning_rate": 4.583002166143086e-06, + "loss": 12.6396, + "step": 286960 + }, + { + "epoch": 0.5796975561274579, + "grad_norm": 278.6199951171875, + "learning_rate": 4.582654317389088e-06, + "loss": 7.6749, + "step": 286970 + }, + { + "epoch": 0.5797177567601417, + "grad_norm": 443.8434143066406, + "learning_rate": 4.58230647066918e-06, + "loss": 19.511, + "step": 286980 + }, + { + "epoch": 0.5797379573928255, + "grad_norm": 422.25677490234375, + "learning_rate": 4.581958625985059e-06, + "loss": 17.3441, + "step": 286990 + }, + { + "epoch": 0.5797581580255093, + "grad_norm": 204.35519409179688, + "learning_rate": 4.581610783338424e-06, + "loss": 22.0155, + "step": 287000 + }, + { + "epoch": 0.5797783586581932, + "grad_norm": 589.8109741210938, + "learning_rate": 4.581262942730965e-06, + "loss": 13.1869, + "step": 287010 + }, + { + "epoch": 0.579798559290877, + "grad_norm": 406.0547180175781, + "learning_rate": 4.58091510416438e-06, + "loss": 15.0325, + "step": 287020 + }, + { + "epoch": 0.5798187599235608, + "grad_norm": 63.33355712890625, + "learning_rate": 4.580567267640363e-06, + "loss": 18.0086, + "step": 287030 + }, + { + "epoch": 0.5798389605562446, + "grad_norm": 195.82664489746094, + "learning_rate": 4.580219433160613e-06, + "loss": 50.8753, + "step": 287040 + }, + { + "epoch": 0.5798591611889284, + "grad_norm": 33.13059997558594, + "learning_rate": 4.579871600726819e-06, + "loss": 26.4659, + "step": 287050 + }, + { + "epoch": 0.5798793618216123, + "grad_norm": 666.5596923828125, + "learning_rate": 4.579523770340681e-06, + "loss": 18.8621, + "step": 287060 + }, + { + "epoch": 0.5798995624542961, + "grad_norm": 581.1525268554688, + "learning_rate": 4.579175942003895e-06, + "loss": 21.7768, + "step": 287070 + }, + { + "epoch": 0.5799197630869799, + "grad_norm": 287.9134521484375, + "learning_rate": 4.578828115718153e-06, + "loss": 11.986, + "step": 287080 + }, + { + "epoch": 0.5799399637196637, + "grad_norm": 562.1785278320312, + "learning_rate": 4.578480291485153e-06, + "loss": 24.1375, + "step": 287090 + }, + { + "epoch": 0.5799601643523475, + "grad_norm": 201.13356018066406, + "learning_rate": 4.578132469306588e-06, + "loss": 17.8536, + "step": 287100 + }, + { + "epoch": 0.5799803649850314, + "grad_norm": 63.46379089355469, + "learning_rate": 4.5777846491841536e-06, + "loss": 21.7501, + "step": 287110 + }, + { + "epoch": 0.5800005656177152, + "grad_norm": 523.4032592773438, + "learning_rate": 4.577436831119545e-06, + "loss": 17.5555, + "step": 287120 + }, + { + "epoch": 0.580020766250399, + "grad_norm": 2.2471923828125, + "learning_rate": 4.577089015114461e-06, + "loss": 14.3536, + "step": 287130 + }, + { + "epoch": 0.5800409668830828, + "grad_norm": 1016.3574829101562, + "learning_rate": 4.57674120117059e-06, + "loss": 17.8618, + "step": 287140 + }, + { + "epoch": 0.5800611675157666, + "grad_norm": 332.4660339355469, + "learning_rate": 4.576393389289633e-06, + "loss": 12.7511, + "step": 287150 + }, + { + "epoch": 0.5800813681484505, + "grad_norm": 174.55853271484375, + "learning_rate": 4.576045579473284e-06, + "loss": 22.5814, + "step": 287160 + }, + { + "epoch": 0.5801015687811343, + "grad_norm": 28.36602020263672, + "learning_rate": 4.575697771723236e-06, + "loss": 10.2393, + "step": 287170 + }, + { + "epoch": 0.580121769413818, + "grad_norm": 220.09515380859375, + "learning_rate": 4.575349966041187e-06, + "loss": 11.1455, + "step": 287180 + }, + { + "epoch": 0.5801419700465018, + "grad_norm": 174.09661865234375, + "learning_rate": 4.5750021624288285e-06, + "loss": 6.7785, + "step": 287190 + }, + { + "epoch": 0.5801621706791856, + "grad_norm": 443.2445373535156, + "learning_rate": 4.57465436088786e-06, + "loss": 23.8112, + "step": 287200 + }, + { + "epoch": 0.5801823713118694, + "grad_norm": 245.59463500976562, + "learning_rate": 4.574306561419974e-06, + "loss": 18.2209, + "step": 287210 + }, + { + "epoch": 0.5802025719445533, + "grad_norm": 409.2039794921875, + "learning_rate": 4.573958764026866e-06, + "loss": 10.2005, + "step": 287220 + }, + { + "epoch": 0.5802227725772371, + "grad_norm": 924.8493041992188, + "learning_rate": 4.573610968710233e-06, + "loss": 19.8324, + "step": 287230 + }, + { + "epoch": 0.5802429732099209, + "grad_norm": 490.92413330078125, + "learning_rate": 4.573263175471766e-06, + "loss": 16.8571, + "step": 287240 + }, + { + "epoch": 0.5802631738426047, + "grad_norm": 110.32278442382812, + "learning_rate": 4.572915384313163e-06, + "loss": 9.8044, + "step": 287250 + }, + { + "epoch": 0.5802833744752885, + "grad_norm": 151.6855926513672, + "learning_rate": 4.57256759523612e-06, + "loss": 22.8395, + "step": 287260 + }, + { + "epoch": 0.5803035751079724, + "grad_norm": 229.5697784423828, + "learning_rate": 4.572219808242328e-06, + "loss": 20.2441, + "step": 287270 + }, + { + "epoch": 0.5803237757406562, + "grad_norm": 383.2900390625, + "learning_rate": 4.571872023333487e-06, + "loss": 20.9785, + "step": 287280 + }, + { + "epoch": 0.58034397637334, + "grad_norm": 570.6531982421875, + "learning_rate": 4.57152424051129e-06, + "loss": 22.2628, + "step": 287290 + }, + { + "epoch": 0.5803641770060238, + "grad_norm": 89.79441833496094, + "learning_rate": 4.571176459777431e-06, + "loss": 37.244, + "step": 287300 + }, + { + "epoch": 0.5803843776387076, + "grad_norm": 169.4888153076172, + "learning_rate": 4.570828681133606e-06, + "loss": 13.9649, + "step": 287310 + }, + { + "epoch": 0.5804045782713915, + "grad_norm": 468.6986389160156, + "learning_rate": 4.570480904581511e-06, + "loss": 27.3959, + "step": 287320 + }, + { + "epoch": 0.5804247789040753, + "grad_norm": 386.35137939453125, + "learning_rate": 4.5701331301228395e-06, + "loss": 23.3206, + "step": 287330 + }, + { + "epoch": 0.5804449795367591, + "grad_norm": 387.7290344238281, + "learning_rate": 4.5697853577592846e-06, + "loss": 18.9909, + "step": 287340 + }, + { + "epoch": 0.5804651801694429, + "grad_norm": 516.852294921875, + "learning_rate": 4.569437587492545e-06, + "loss": 12.2166, + "step": 287350 + }, + { + "epoch": 0.5804853808021267, + "grad_norm": 359.33599853515625, + "learning_rate": 4.569089819324317e-06, + "loss": 9.5905, + "step": 287360 + }, + { + "epoch": 0.5805055814348106, + "grad_norm": 562.2782592773438, + "learning_rate": 4.56874205325629e-06, + "loss": 17.7842, + "step": 287370 + }, + { + "epoch": 0.5805257820674944, + "grad_norm": 362.6568908691406, + "learning_rate": 4.568394289290163e-06, + "loss": 21.974, + "step": 287380 + }, + { + "epoch": 0.5805459827001782, + "grad_norm": 225.65406799316406, + "learning_rate": 4.5680465274276306e-06, + "loss": 17.6372, + "step": 287390 + }, + { + "epoch": 0.580566183332862, + "grad_norm": 454.0522766113281, + "learning_rate": 4.5676987676703865e-06, + "loss": 18.4502, + "step": 287400 + }, + { + "epoch": 0.5805863839655458, + "grad_norm": 207.7526397705078, + "learning_rate": 4.567351010020124e-06, + "loss": 14.4471, + "step": 287410 + }, + { + "epoch": 0.5806065845982297, + "grad_norm": 397.4646301269531, + "learning_rate": 4.567003254478545e-06, + "loss": 19.5973, + "step": 287420 + }, + { + "epoch": 0.5806267852309134, + "grad_norm": 484.93377685546875, + "learning_rate": 4.566655501047335e-06, + "loss": 22.8856, + "step": 287430 + }, + { + "epoch": 0.5806469858635972, + "grad_norm": 415.3368225097656, + "learning_rate": 4.566307749728195e-06, + "loss": 20.3493, + "step": 287440 + }, + { + "epoch": 0.580667186496281, + "grad_norm": 691.5158081054688, + "learning_rate": 4.56596000052282e-06, + "loss": 20.2267, + "step": 287450 + }, + { + "epoch": 0.5806873871289648, + "grad_norm": 280.9125671386719, + "learning_rate": 4.565612253432902e-06, + "loss": 15.8731, + "step": 287460 + }, + { + "epoch": 0.5807075877616487, + "grad_norm": 175.01219177246094, + "learning_rate": 4.565264508460137e-06, + "loss": 10.2099, + "step": 287470 + }, + { + "epoch": 0.5807277883943325, + "grad_norm": 394.6832275390625, + "learning_rate": 4.564916765606218e-06, + "loss": 15.352, + "step": 287480 + }, + { + "epoch": 0.5807479890270163, + "grad_norm": 269.8906555175781, + "learning_rate": 4.564569024872846e-06, + "loss": 17.4512, + "step": 287490 + }, + { + "epoch": 0.5807681896597001, + "grad_norm": 192.99142456054688, + "learning_rate": 4.564221286261709e-06, + "loss": 7.287, + "step": 287500 + }, + { + "epoch": 0.5807883902923839, + "grad_norm": 382.4043273925781, + "learning_rate": 4.563873549774506e-06, + "loss": 20.0747, + "step": 287510 + }, + { + "epoch": 0.5808085909250678, + "grad_norm": 133.63954162597656, + "learning_rate": 4.56352581541293e-06, + "loss": 27.828, + "step": 287520 + }, + { + "epoch": 0.5808287915577516, + "grad_norm": 407.80230712890625, + "learning_rate": 4.5631780831786765e-06, + "loss": 22.2352, + "step": 287530 + }, + { + "epoch": 0.5808489921904354, + "grad_norm": 592.959228515625, + "learning_rate": 4.5628303530734395e-06, + "loss": 22.7711, + "step": 287540 + }, + { + "epoch": 0.5808691928231192, + "grad_norm": 159.25987243652344, + "learning_rate": 4.5624826250989156e-06, + "loss": 15.9744, + "step": 287550 + }, + { + "epoch": 0.580889393455803, + "grad_norm": 18.627595901489258, + "learning_rate": 4.562134899256797e-06, + "loss": 28.6998, + "step": 287560 + }, + { + "epoch": 0.5809095940884869, + "grad_norm": 481.6170959472656, + "learning_rate": 4.561787175548779e-06, + "loss": 16.4343, + "step": 287570 + }, + { + "epoch": 0.5809297947211707, + "grad_norm": 274.38763427734375, + "learning_rate": 4.56143945397656e-06, + "loss": 14.786, + "step": 287580 + }, + { + "epoch": 0.5809499953538545, + "grad_norm": 249.79217529296875, + "learning_rate": 4.5610917345418285e-06, + "loss": 12.4017, + "step": 287590 + }, + { + "epoch": 0.5809701959865383, + "grad_norm": 113.56183624267578, + "learning_rate": 4.560744017246284e-06, + "loss": 17.7107, + "step": 287600 + }, + { + "epoch": 0.5809903966192221, + "grad_norm": 106.08311462402344, + "learning_rate": 4.560396302091622e-06, + "loss": 24.6664, + "step": 287610 + }, + { + "epoch": 0.581010597251906, + "grad_norm": 107.45292663574219, + "learning_rate": 4.5600485890795325e-06, + "loss": 11.5848, + "step": 287620 + }, + { + "epoch": 0.5810307978845898, + "grad_norm": 274.9022216796875, + "learning_rate": 4.559700878211714e-06, + "loss": 14.5965, + "step": 287630 + }, + { + "epoch": 0.5810509985172736, + "grad_norm": 259.85986328125, + "learning_rate": 4.5593531694898576e-06, + "loss": 21.7642, + "step": 287640 + }, + { + "epoch": 0.5810711991499574, + "grad_norm": 212.4558563232422, + "learning_rate": 4.559005462915665e-06, + "loss": 22.2325, + "step": 287650 + }, + { + "epoch": 0.5810913997826412, + "grad_norm": 446.8583068847656, + "learning_rate": 4.558657758490822e-06, + "loss": 14.455, + "step": 287660 + }, + { + "epoch": 0.5811116004153251, + "grad_norm": 626.045654296875, + "learning_rate": 4.5583100562170294e-06, + "loss": 12.9183, + "step": 287670 + }, + { + "epoch": 0.5811318010480089, + "grad_norm": 509.44427490234375, + "learning_rate": 4.55796235609598e-06, + "loss": 11.8783, + "step": 287680 + }, + { + "epoch": 0.5811520016806926, + "grad_norm": 395.339599609375, + "learning_rate": 4.557614658129369e-06, + "loss": 9.2559, + "step": 287690 + }, + { + "epoch": 0.5811722023133764, + "grad_norm": 326.5661926269531, + "learning_rate": 4.557266962318889e-06, + "loss": 24.1077, + "step": 287700 + }, + { + "epoch": 0.5811924029460602, + "grad_norm": 43.446617126464844, + "learning_rate": 4.556919268666238e-06, + "loss": 19.6863, + "step": 287710 + }, + { + "epoch": 0.581212603578744, + "grad_norm": 296.4700927734375, + "learning_rate": 4.5565715771731075e-06, + "loss": 31.4844, + "step": 287720 + }, + { + "epoch": 0.5812328042114279, + "grad_norm": 282.0885009765625, + "learning_rate": 4.556223887841192e-06, + "loss": 21.5665, + "step": 287730 + }, + { + "epoch": 0.5812530048441117, + "grad_norm": 263.99066162109375, + "learning_rate": 4.555876200672192e-06, + "loss": 18.8875, + "step": 287740 + }, + { + "epoch": 0.5812732054767955, + "grad_norm": 2.8462512493133545, + "learning_rate": 4.555528515667793e-06, + "loss": 12.3651, + "step": 287750 + }, + { + "epoch": 0.5812934061094793, + "grad_norm": 164.52963256835938, + "learning_rate": 4.555180832829695e-06, + "loss": 19.4629, + "step": 287760 + }, + { + "epoch": 0.5813136067421631, + "grad_norm": 493.3425598144531, + "learning_rate": 4.554833152159594e-06, + "loss": 14.9375, + "step": 287770 + }, + { + "epoch": 0.581333807374847, + "grad_norm": 169.95790100097656, + "learning_rate": 4.55448547365918e-06, + "loss": 10.8374, + "step": 287780 + }, + { + "epoch": 0.5813540080075308, + "grad_norm": 167.28880310058594, + "learning_rate": 4.5541377973301505e-06, + "loss": 19.0744, + "step": 287790 + }, + { + "epoch": 0.5813742086402146, + "grad_norm": 216.5238494873047, + "learning_rate": 4.553790123174198e-06, + "loss": 22.8251, + "step": 287800 + }, + { + "epoch": 0.5813944092728984, + "grad_norm": 540.6886596679688, + "learning_rate": 4.553442451193021e-06, + "loss": 17.8551, + "step": 287810 + }, + { + "epoch": 0.5814146099055822, + "grad_norm": 124.38551330566406, + "learning_rate": 4.553094781388309e-06, + "loss": 18.4553, + "step": 287820 + }, + { + "epoch": 0.5814348105382661, + "grad_norm": 391.7790222167969, + "learning_rate": 4.552747113761759e-06, + "loss": 10.7357, + "step": 287830 + }, + { + "epoch": 0.5814550111709499, + "grad_norm": 499.1982727050781, + "learning_rate": 4.552399448315067e-06, + "loss": 22.711, + "step": 287840 + }, + { + "epoch": 0.5814752118036337, + "grad_norm": 557.7574462890625, + "learning_rate": 4.552051785049925e-06, + "loss": 22.8761, + "step": 287850 + }, + { + "epoch": 0.5814954124363175, + "grad_norm": 20.266773223876953, + "learning_rate": 4.551704123968027e-06, + "loss": 12.2276, + "step": 287860 + }, + { + "epoch": 0.5815156130690013, + "grad_norm": 0.0, + "learning_rate": 4.55135646507107e-06, + "loss": 23.068, + "step": 287870 + }, + { + "epoch": 0.5815358137016852, + "grad_norm": 70.69975280761719, + "learning_rate": 4.551008808360747e-06, + "loss": 26.6899, + "step": 287880 + }, + { + "epoch": 0.581556014334369, + "grad_norm": 253.72727966308594, + "learning_rate": 4.550661153838752e-06, + "loss": 19.0773, + "step": 287890 + }, + { + "epoch": 0.5815762149670528, + "grad_norm": 669.1590576171875, + "learning_rate": 4.5503135015067815e-06, + "loss": 19.9383, + "step": 287900 + }, + { + "epoch": 0.5815964155997366, + "grad_norm": 689.8755493164062, + "learning_rate": 4.549965851366528e-06, + "loss": 18.6397, + "step": 287910 + }, + { + "epoch": 0.5816166162324204, + "grad_norm": 631.7351684570312, + "learning_rate": 4.549618203419684e-06, + "loss": 14.6648, + "step": 287920 + }, + { + "epoch": 0.5816368168651043, + "grad_norm": 458.5159606933594, + "learning_rate": 4.549270557667949e-06, + "loss": 19.0287, + "step": 287930 + }, + { + "epoch": 0.581657017497788, + "grad_norm": 1231.04248046875, + "learning_rate": 4.548922914113014e-06, + "loss": 43.0836, + "step": 287940 + }, + { + "epoch": 0.5816772181304718, + "grad_norm": 546.4039306640625, + "learning_rate": 4.548575272756573e-06, + "loss": 13.7789, + "step": 287950 + }, + { + "epoch": 0.5816974187631556, + "grad_norm": 531.2095947265625, + "learning_rate": 4.548227633600322e-06, + "loss": 17.3354, + "step": 287960 + }, + { + "epoch": 0.5817176193958394, + "grad_norm": 574.9794921875, + "learning_rate": 4.547879996645956e-06, + "loss": 18.906, + "step": 287970 + }, + { + "epoch": 0.5817378200285233, + "grad_norm": 114.48976135253906, + "learning_rate": 4.5475323618951665e-06, + "loss": 20.1184, + "step": 287980 + }, + { + "epoch": 0.5817580206612071, + "grad_norm": 164.88328552246094, + "learning_rate": 4.5471847293496495e-06, + "loss": 23.6874, + "step": 287990 + }, + { + "epoch": 0.5817782212938909, + "grad_norm": 20.963428497314453, + "learning_rate": 4.546837099011101e-06, + "loss": 12.7755, + "step": 288000 + }, + { + "epoch": 0.5817984219265747, + "grad_norm": 276.94244384765625, + "learning_rate": 4.546489470881211e-06, + "loss": 16.2343, + "step": 288010 + }, + { + "epoch": 0.5818186225592585, + "grad_norm": 284.3495788574219, + "learning_rate": 4.5461418449616765e-06, + "loss": 19.776, + "step": 288020 + }, + { + "epoch": 0.5818388231919424, + "grad_norm": 25.324033737182617, + "learning_rate": 4.5457942212541944e-06, + "loss": 27.6506, + "step": 288030 + }, + { + "epoch": 0.5818590238246262, + "grad_norm": 87.18405151367188, + "learning_rate": 4.545446599760453e-06, + "loss": 16.6801, + "step": 288040 + }, + { + "epoch": 0.58187922445731, + "grad_norm": 823.1707763671875, + "learning_rate": 4.545098980482151e-06, + "loss": 19.9332, + "step": 288050 + }, + { + "epoch": 0.5818994250899938, + "grad_norm": 916.0725708007812, + "learning_rate": 4.544751363420981e-06, + "loss": 27.003, + "step": 288060 + }, + { + "epoch": 0.5819196257226776, + "grad_norm": 371.8467102050781, + "learning_rate": 4.544403748578638e-06, + "loss": 24.9305, + "step": 288070 + }, + { + "epoch": 0.5819398263553615, + "grad_norm": 406.9341125488281, + "learning_rate": 4.544056135956816e-06, + "loss": 19.5344, + "step": 288080 + }, + { + "epoch": 0.5819600269880453, + "grad_norm": 1053.5054931640625, + "learning_rate": 4.543708525557208e-06, + "loss": 48.0804, + "step": 288090 + }, + { + "epoch": 0.5819802276207291, + "grad_norm": 192.28451538085938, + "learning_rate": 4.543360917381512e-06, + "loss": 12.5592, + "step": 288100 + }, + { + "epoch": 0.5820004282534129, + "grad_norm": 68.4184799194336, + "learning_rate": 4.543013311431417e-06, + "loss": 12.5787, + "step": 288110 + }, + { + "epoch": 0.5820206288860967, + "grad_norm": 327.2054443359375, + "learning_rate": 4.54266570770862e-06, + "loss": 16.7678, + "step": 288120 + }, + { + "epoch": 0.5820408295187806, + "grad_norm": 734.5392456054688, + "learning_rate": 4.542318106214817e-06, + "loss": 23.1994, + "step": 288130 + }, + { + "epoch": 0.5820610301514644, + "grad_norm": 542.3958129882812, + "learning_rate": 4.541970506951698e-06, + "loss": 22.6746, + "step": 288140 + }, + { + "epoch": 0.5820812307841482, + "grad_norm": 24.169677734375, + "learning_rate": 4.541622909920959e-06, + "loss": 14.9233, + "step": 288150 + }, + { + "epoch": 0.582101431416832, + "grad_norm": 4.579701900482178, + "learning_rate": 4.541275315124296e-06, + "loss": 11.0592, + "step": 288160 + }, + { + "epoch": 0.5821216320495158, + "grad_norm": 453.038818359375, + "learning_rate": 4.5409277225634e-06, + "loss": 13.6834, + "step": 288170 + }, + { + "epoch": 0.5821418326821997, + "grad_norm": 152.3386688232422, + "learning_rate": 4.540580132239966e-06, + "loss": 20.8467, + "step": 288180 + }, + { + "epoch": 0.5821620333148835, + "grad_norm": 571.427001953125, + "learning_rate": 4.540232544155692e-06, + "loss": 27.3594, + "step": 288190 + }, + { + "epoch": 0.5821822339475672, + "grad_norm": 588.6444091796875, + "learning_rate": 4.539884958312265e-06, + "loss": 22.54, + "step": 288200 + }, + { + "epoch": 0.582202434580251, + "grad_norm": 333.3024597167969, + "learning_rate": 4.539537374711384e-06, + "loss": 19.5617, + "step": 288210 + }, + { + "epoch": 0.5822226352129348, + "grad_norm": 237.90054321289062, + "learning_rate": 4.5391897933547436e-06, + "loss": 22.8537, + "step": 288220 + }, + { + "epoch": 0.5822428358456186, + "grad_norm": 604.9053344726562, + "learning_rate": 4.538842214244035e-06, + "loss": 22.4265, + "step": 288230 + }, + { + "epoch": 0.5822630364783025, + "grad_norm": 503.3699035644531, + "learning_rate": 4.538494637380953e-06, + "loss": 22.6053, + "step": 288240 + }, + { + "epoch": 0.5822832371109863, + "grad_norm": 430.1613464355469, + "learning_rate": 4.538147062767191e-06, + "loss": 26.2458, + "step": 288250 + }, + { + "epoch": 0.5823034377436701, + "grad_norm": 5.554739475250244, + "learning_rate": 4.5377994904044485e-06, + "loss": 21.6245, + "step": 288260 + }, + { + "epoch": 0.5823236383763539, + "grad_norm": 442.7223815917969, + "learning_rate": 4.537451920294411e-06, + "loss": 24.7883, + "step": 288270 + }, + { + "epoch": 0.5823438390090377, + "grad_norm": 566.8401489257812, + "learning_rate": 4.537104352438779e-06, + "loss": 17.7039, + "step": 288280 + }, + { + "epoch": 0.5823640396417216, + "grad_norm": 266.1036071777344, + "learning_rate": 4.5367567868392445e-06, + "loss": 13.9091, + "step": 288290 + }, + { + "epoch": 0.5823842402744054, + "grad_norm": 478.8221740722656, + "learning_rate": 4.5364092234975e-06, + "loss": 13.9773, + "step": 288300 + }, + { + "epoch": 0.5824044409070892, + "grad_norm": 693.6759033203125, + "learning_rate": 4.536061662415241e-06, + "loss": 46.1397, + "step": 288310 + }, + { + "epoch": 0.582424641539773, + "grad_norm": 367.02276611328125, + "learning_rate": 4.535714103594162e-06, + "loss": 14.7058, + "step": 288320 + }, + { + "epoch": 0.5824448421724568, + "grad_norm": 294.8173828125, + "learning_rate": 4.535366547035955e-06, + "loss": 7.4283, + "step": 288330 + }, + { + "epoch": 0.5824650428051407, + "grad_norm": 1459.068603515625, + "learning_rate": 4.535018992742315e-06, + "loss": 33.764, + "step": 288340 + }, + { + "epoch": 0.5824852434378245, + "grad_norm": 231.80996704101562, + "learning_rate": 4.534671440714939e-06, + "loss": 22.8392, + "step": 288350 + }, + { + "epoch": 0.5825054440705083, + "grad_norm": 917.2021484375, + "learning_rate": 4.534323890955514e-06, + "loss": 27.3177, + "step": 288360 + }, + { + "epoch": 0.5825256447031921, + "grad_norm": 411.9390869140625, + "learning_rate": 4.533976343465739e-06, + "loss": 21.1837, + "step": 288370 + }, + { + "epoch": 0.582545845335876, + "grad_norm": 304.41455078125, + "learning_rate": 4.533628798247308e-06, + "loss": 17.4472, + "step": 288380 + }, + { + "epoch": 0.5825660459685598, + "grad_norm": 485.7433166503906, + "learning_rate": 4.533281255301913e-06, + "loss": 28.0418, + "step": 288390 + }, + { + "epoch": 0.5825862466012436, + "grad_norm": 526.515625, + "learning_rate": 4.532933714631248e-06, + "loss": 14.768, + "step": 288400 + }, + { + "epoch": 0.5826064472339274, + "grad_norm": 335.7763366699219, + "learning_rate": 4.532586176237007e-06, + "loss": 15.7195, + "step": 288410 + }, + { + "epoch": 0.5826266478666112, + "grad_norm": 452.3872375488281, + "learning_rate": 4.532238640120887e-06, + "loss": 23.946, + "step": 288420 + }, + { + "epoch": 0.582646848499295, + "grad_norm": 144.83799743652344, + "learning_rate": 4.531891106284576e-06, + "loss": 17.1629, + "step": 288430 + }, + { + "epoch": 0.5826670491319789, + "grad_norm": 40.88713836669922, + "learning_rate": 4.531543574729772e-06, + "loss": 32.2347, + "step": 288440 + }, + { + "epoch": 0.5826872497646627, + "grad_norm": 157.28497314453125, + "learning_rate": 4.5311960454581685e-06, + "loss": 22.038, + "step": 288450 + }, + { + "epoch": 0.5827074503973464, + "grad_norm": 490.5456237792969, + "learning_rate": 4.5308485184714585e-06, + "loss": 16.3475, + "step": 288460 + }, + { + "epoch": 0.5827276510300302, + "grad_norm": 391.0112609863281, + "learning_rate": 4.530500993771335e-06, + "loss": 15.6698, + "step": 288470 + }, + { + "epoch": 0.582747851662714, + "grad_norm": 42.42919921875, + "learning_rate": 4.530153471359495e-06, + "loss": 14.5029, + "step": 288480 + }, + { + "epoch": 0.5827680522953979, + "grad_norm": 241.38282775878906, + "learning_rate": 4.529805951237628e-06, + "loss": 14.4093, + "step": 288490 + }, + { + "epoch": 0.5827882529280817, + "grad_norm": 162.1904754638672, + "learning_rate": 4.529458433407429e-06, + "loss": 15.7063, + "step": 288500 + }, + { + "epoch": 0.5828084535607655, + "grad_norm": 615.3388061523438, + "learning_rate": 4.529110917870594e-06, + "loss": 27.1512, + "step": 288510 + }, + { + "epoch": 0.5828286541934493, + "grad_norm": 329.695068359375, + "learning_rate": 4.528763404628815e-06, + "loss": 37.7019, + "step": 288520 + }, + { + "epoch": 0.5828488548261331, + "grad_norm": 411.4642639160156, + "learning_rate": 4.528415893683785e-06, + "loss": 17.7521, + "step": 288530 + }, + { + "epoch": 0.582869055458817, + "grad_norm": 225.48716735839844, + "learning_rate": 4.5280683850372e-06, + "loss": 8.8208, + "step": 288540 + }, + { + "epoch": 0.5828892560915008, + "grad_norm": 522.101806640625, + "learning_rate": 4.527720878690752e-06, + "loss": 26.4603, + "step": 288550 + }, + { + "epoch": 0.5829094567241846, + "grad_norm": 274.3569030761719, + "learning_rate": 4.527373374646136e-06, + "loss": 21.8242, + "step": 288560 + }, + { + "epoch": 0.5829296573568684, + "grad_norm": 226.85150146484375, + "learning_rate": 4.527025872905043e-06, + "loss": 11.6747, + "step": 288570 + }, + { + "epoch": 0.5829498579895522, + "grad_norm": 649.5722045898438, + "learning_rate": 4.52667837346917e-06, + "loss": 20.3602, + "step": 288580 + }, + { + "epoch": 0.582970058622236, + "grad_norm": 627.9529418945312, + "learning_rate": 4.526330876340209e-06, + "loss": 20.5397, + "step": 288590 + }, + { + "epoch": 0.5829902592549199, + "grad_norm": 178.1346893310547, + "learning_rate": 4.525983381519853e-06, + "loss": 14.0624, + "step": 288600 + }, + { + "epoch": 0.5830104598876037, + "grad_norm": 721.8845825195312, + "learning_rate": 4.525635889009798e-06, + "loss": 26.133, + "step": 288610 + }, + { + "epoch": 0.5830306605202875, + "grad_norm": 629.3155517578125, + "learning_rate": 4.5252883988117356e-06, + "loss": 21.6051, + "step": 288620 + }, + { + "epoch": 0.5830508611529713, + "grad_norm": 549.1826171875, + "learning_rate": 4.524940910927359e-06, + "loss": 26.5076, + "step": 288630 + }, + { + "epoch": 0.5830710617856552, + "grad_norm": 245.66929626464844, + "learning_rate": 4.524593425358364e-06, + "loss": 11.9352, + "step": 288640 + }, + { + "epoch": 0.583091262418339, + "grad_norm": 463.8624267578125, + "learning_rate": 4.524245942106442e-06, + "loss": 21.8307, + "step": 288650 + }, + { + "epoch": 0.5831114630510228, + "grad_norm": 347.1028747558594, + "learning_rate": 4.523898461173288e-06, + "loss": 6.4501, + "step": 288660 + }, + { + "epoch": 0.5831316636837066, + "grad_norm": 693.39306640625, + "learning_rate": 4.5235509825605965e-06, + "loss": 21.0688, + "step": 288670 + }, + { + "epoch": 0.5831518643163904, + "grad_norm": 626.076904296875, + "learning_rate": 4.523203506270058e-06, + "loss": 17.7032, + "step": 288680 + }, + { + "epoch": 0.5831720649490743, + "grad_norm": 303.54803466796875, + "learning_rate": 4.5228560323033675e-06, + "loss": 18.9164, + "step": 288690 + }, + { + "epoch": 0.5831922655817581, + "grad_norm": 255.87606811523438, + "learning_rate": 4.522508560662219e-06, + "loss": 19.003, + "step": 288700 + }, + { + "epoch": 0.5832124662144418, + "grad_norm": 217.08895874023438, + "learning_rate": 4.522161091348308e-06, + "loss": 18.2368, + "step": 288710 + }, + { + "epoch": 0.5832326668471256, + "grad_norm": 529.8825073242188, + "learning_rate": 4.521813624363323e-06, + "loss": 25.8109, + "step": 288720 + }, + { + "epoch": 0.5832528674798094, + "grad_norm": 520.8623657226562, + "learning_rate": 4.521466159708962e-06, + "loss": 21.7238, + "step": 288730 + }, + { + "epoch": 0.5832730681124932, + "grad_norm": 29.747051239013672, + "learning_rate": 4.521118697386917e-06, + "loss": 11.2049, + "step": 288740 + }, + { + "epoch": 0.5832932687451771, + "grad_norm": 346.5938720703125, + "learning_rate": 4.52077123739888e-06, + "loss": 22.0653, + "step": 288750 + }, + { + "epoch": 0.5833134693778609, + "grad_norm": 179.41348266601562, + "learning_rate": 4.520423779746547e-06, + "loss": 19.6645, + "step": 288760 + }, + { + "epoch": 0.5833336700105447, + "grad_norm": 683.7769165039062, + "learning_rate": 4.520076324431612e-06, + "loss": 26.248, + "step": 288770 + }, + { + "epoch": 0.5833538706432285, + "grad_norm": 198.7277374267578, + "learning_rate": 4.519728871455764e-06, + "loss": 17.4634, + "step": 288780 + }, + { + "epoch": 0.5833740712759123, + "grad_norm": 60.829261779785156, + "learning_rate": 4.519381420820699e-06, + "loss": 25.8267, + "step": 288790 + }, + { + "epoch": 0.5833942719085962, + "grad_norm": 98.24775695800781, + "learning_rate": 4.519033972528114e-06, + "loss": 21.8371, + "step": 288800 + }, + { + "epoch": 0.58341447254128, + "grad_norm": 700.4127197265625, + "learning_rate": 4.518686526579695e-06, + "loss": 14.5168, + "step": 288810 + }, + { + "epoch": 0.5834346731739638, + "grad_norm": 361.0285339355469, + "learning_rate": 4.518339082977142e-06, + "loss": 23.4959, + "step": 288820 + }, + { + "epoch": 0.5834548738066476, + "grad_norm": 611.7372436523438, + "learning_rate": 4.517991641722146e-06, + "loss": 12.2447, + "step": 288830 + }, + { + "epoch": 0.5834750744393314, + "grad_norm": 477.4300842285156, + "learning_rate": 4.517644202816399e-06, + "loss": 20.9765, + "step": 288840 + }, + { + "epoch": 0.5834952750720153, + "grad_norm": 469.504638671875, + "learning_rate": 4.517296766261596e-06, + "loss": 14.6185, + "step": 288850 + }, + { + "epoch": 0.5835154757046991, + "grad_norm": 377.9756164550781, + "learning_rate": 4.516949332059429e-06, + "loss": 9.1162, + "step": 288860 + }, + { + "epoch": 0.5835356763373829, + "grad_norm": 477.607421875, + "learning_rate": 4.516601900211595e-06, + "loss": 27.8192, + "step": 288870 + }, + { + "epoch": 0.5835558769700667, + "grad_norm": 136.77139282226562, + "learning_rate": 4.516254470719783e-06, + "loss": 15.3713, + "step": 288880 + }, + { + "epoch": 0.5835760776027505, + "grad_norm": 562.5999755859375, + "learning_rate": 4.515907043585688e-06, + "loss": 16.258, + "step": 288890 + }, + { + "epoch": 0.5835962782354344, + "grad_norm": 3.4296677112579346, + "learning_rate": 4.5155596188110055e-06, + "loss": 19.372, + "step": 288900 + }, + { + "epoch": 0.5836164788681182, + "grad_norm": 393.48583984375, + "learning_rate": 4.515212196397424e-06, + "loss": 13.7755, + "step": 288910 + }, + { + "epoch": 0.583636679500802, + "grad_norm": 251.51904296875, + "learning_rate": 4.5148647763466405e-06, + "loss": 21.0673, + "step": 288920 + }, + { + "epoch": 0.5836568801334858, + "grad_norm": 1052.9322509765625, + "learning_rate": 4.514517358660347e-06, + "loss": 16.6626, + "step": 288930 + }, + { + "epoch": 0.5836770807661696, + "grad_norm": 484.47119140625, + "learning_rate": 4.514169943340238e-06, + "loss": 19.6845, + "step": 288940 + }, + { + "epoch": 0.5836972813988535, + "grad_norm": 55.82490921020508, + "learning_rate": 4.513822530388004e-06, + "loss": 25.6817, + "step": 288950 + }, + { + "epoch": 0.5837174820315373, + "grad_norm": 392.26385498046875, + "learning_rate": 4.513475119805342e-06, + "loss": 16.0411, + "step": 288960 + }, + { + "epoch": 0.583737682664221, + "grad_norm": 221.4032745361328, + "learning_rate": 4.513127711593941e-06, + "loss": 9.7543, + "step": 288970 + }, + { + "epoch": 0.5837578832969048, + "grad_norm": 289.62725830078125, + "learning_rate": 4.512780305755498e-06, + "loss": 17.1563, + "step": 288980 + }, + { + "epoch": 0.5837780839295886, + "grad_norm": 84.89144897460938, + "learning_rate": 4.512432902291703e-06, + "loss": 30.9123, + "step": 288990 + }, + { + "epoch": 0.5837982845622725, + "grad_norm": 87.54905700683594, + "learning_rate": 4.512085501204254e-06, + "loss": 14.9372, + "step": 289000 + }, + { + "epoch": 0.5838184851949563, + "grad_norm": 118.93350982666016, + "learning_rate": 4.511738102494839e-06, + "loss": 13.1498, + "step": 289010 + }, + { + "epoch": 0.5838386858276401, + "grad_norm": 316.2390441894531, + "learning_rate": 4.5113907061651524e-06, + "loss": 22.6011, + "step": 289020 + }, + { + "epoch": 0.5838588864603239, + "grad_norm": 306.5341796875, + "learning_rate": 4.511043312216891e-06, + "loss": 26.2065, + "step": 289030 + }, + { + "epoch": 0.5838790870930077, + "grad_norm": 165.6309356689453, + "learning_rate": 4.510695920651742e-06, + "loss": 27.4762, + "step": 289040 + }, + { + "epoch": 0.5838992877256916, + "grad_norm": 160.45919799804688, + "learning_rate": 4.510348531471403e-06, + "loss": 16.4532, + "step": 289050 + }, + { + "epoch": 0.5839194883583754, + "grad_norm": 64.6205062866211, + "learning_rate": 4.510001144677568e-06, + "loss": 9.6832, + "step": 289060 + }, + { + "epoch": 0.5839396889910592, + "grad_norm": 452.0450439453125, + "learning_rate": 4.509653760271926e-06, + "loss": 12.1438, + "step": 289070 + }, + { + "epoch": 0.583959889623743, + "grad_norm": 19.967851638793945, + "learning_rate": 4.509306378256172e-06, + "loss": 13.3504, + "step": 289080 + }, + { + "epoch": 0.5839800902564268, + "grad_norm": 254.1773223876953, + "learning_rate": 4.508958998632e-06, + "loss": 23.1505, + "step": 289090 + }, + { + "epoch": 0.5840002908891107, + "grad_norm": 863.7036743164062, + "learning_rate": 4.508611621401102e-06, + "loss": 23.4529, + "step": 289100 + }, + { + "epoch": 0.5840204915217945, + "grad_norm": 303.6961669921875, + "learning_rate": 4.50826424656517e-06, + "loss": 15.552, + "step": 289110 + }, + { + "epoch": 0.5840406921544783, + "grad_norm": 358.7690734863281, + "learning_rate": 4.507916874125902e-06, + "loss": 22.2946, + "step": 289120 + }, + { + "epoch": 0.5840608927871621, + "grad_norm": 330.37896728515625, + "learning_rate": 4.507569504084983e-06, + "loss": 28.6702, + "step": 289130 + }, + { + "epoch": 0.5840810934198459, + "grad_norm": 5.9064483642578125, + "learning_rate": 4.5072221364441126e-06, + "loss": 18.6341, + "step": 289140 + }, + { + "epoch": 0.5841012940525298, + "grad_norm": 214.5717315673828, + "learning_rate": 4.506874771204981e-06, + "loss": 15.4846, + "step": 289150 + }, + { + "epoch": 0.5841214946852136, + "grad_norm": 230.72467041015625, + "learning_rate": 4.506527408369285e-06, + "loss": 17.9754, + "step": 289160 + }, + { + "epoch": 0.5841416953178974, + "grad_norm": 510.48687744140625, + "learning_rate": 4.506180047938711e-06, + "loss": 37.1512, + "step": 289170 + }, + { + "epoch": 0.5841618959505812, + "grad_norm": 689.6371459960938, + "learning_rate": 4.505832689914956e-06, + "loss": 22.063, + "step": 289180 + }, + { + "epoch": 0.584182096583265, + "grad_norm": 243.24209594726562, + "learning_rate": 4.505485334299714e-06, + "loss": 12.5306, + "step": 289190 + }, + { + "epoch": 0.5842022972159489, + "grad_norm": 547.1610717773438, + "learning_rate": 4.505137981094675e-06, + "loss": 8.9335, + "step": 289200 + }, + { + "epoch": 0.5842224978486327, + "grad_norm": 285.6500549316406, + "learning_rate": 4.504790630301535e-06, + "loss": 19.7256, + "step": 289210 + }, + { + "epoch": 0.5842426984813164, + "grad_norm": 699.3668823242188, + "learning_rate": 4.504443281921985e-06, + "loss": 16.4456, + "step": 289220 + }, + { + "epoch": 0.5842628991140002, + "grad_norm": 481.00897216796875, + "learning_rate": 4.504095935957718e-06, + "loss": 18.3304, + "step": 289230 + }, + { + "epoch": 0.584283099746684, + "grad_norm": 563.0565185546875, + "learning_rate": 4.503748592410427e-06, + "loss": 18.334, + "step": 289240 + }, + { + "epoch": 0.5843033003793678, + "grad_norm": 357.5160827636719, + "learning_rate": 4.5034012512818065e-06, + "loss": 22.5118, + "step": 289250 + }, + { + "epoch": 0.5843235010120517, + "grad_norm": 362.5657958984375, + "learning_rate": 4.503053912573545e-06, + "loss": 10.575, + "step": 289260 + }, + { + "epoch": 0.5843437016447355, + "grad_norm": 168.14981079101562, + "learning_rate": 4.502706576287341e-06, + "loss": 16.7396, + "step": 289270 + }, + { + "epoch": 0.5843639022774193, + "grad_norm": 244.01336669921875, + "learning_rate": 4.502359242424885e-06, + "loss": 25.2524, + "step": 289280 + }, + { + "epoch": 0.5843841029101031, + "grad_norm": 512.7816772460938, + "learning_rate": 4.502011910987869e-06, + "loss": 16.5723, + "step": 289290 + }, + { + "epoch": 0.5844043035427869, + "grad_norm": 598.6182861328125, + "learning_rate": 4.5016645819779865e-06, + "loss": 27.588, + "step": 289300 + }, + { + "epoch": 0.5844245041754708, + "grad_norm": 686.1741943359375, + "learning_rate": 4.501317255396931e-06, + "loss": 14.1701, + "step": 289310 + }, + { + "epoch": 0.5844447048081546, + "grad_norm": 635.1903076171875, + "learning_rate": 4.500969931246394e-06, + "loss": 24.9359, + "step": 289320 + }, + { + "epoch": 0.5844649054408384, + "grad_norm": 277.31646728515625, + "learning_rate": 4.500622609528068e-06, + "loss": 13.0281, + "step": 289330 + }, + { + "epoch": 0.5844851060735222, + "grad_norm": 461.67437744140625, + "learning_rate": 4.500275290243648e-06, + "loss": 14.4205, + "step": 289340 + }, + { + "epoch": 0.584505306706206, + "grad_norm": 1250.0423583984375, + "learning_rate": 4.499927973394826e-06, + "loss": 15.3964, + "step": 289350 + }, + { + "epoch": 0.5845255073388899, + "grad_norm": 604.5083618164062, + "learning_rate": 4.499580658983294e-06, + "loss": 20.9392, + "step": 289360 + }, + { + "epoch": 0.5845457079715737, + "grad_norm": 0.01670226640999317, + "learning_rate": 4.4992333470107455e-06, + "loss": 31.5604, + "step": 289370 + }, + { + "epoch": 0.5845659086042575, + "grad_norm": 291.5645446777344, + "learning_rate": 4.498886037478874e-06, + "loss": 16.1465, + "step": 289380 + }, + { + "epoch": 0.5845861092369413, + "grad_norm": 126.17826080322266, + "learning_rate": 4.498538730389369e-06, + "loss": 13.0477, + "step": 289390 + }, + { + "epoch": 0.5846063098696251, + "grad_norm": 501.94659423828125, + "learning_rate": 4.4981914257439254e-06, + "loss": 24.4841, + "step": 289400 + }, + { + "epoch": 0.584626510502309, + "grad_norm": 9.489340782165527, + "learning_rate": 4.497844123544239e-06, + "loss": 14.4267, + "step": 289410 + }, + { + "epoch": 0.5846467111349928, + "grad_norm": 153.04071044921875, + "learning_rate": 4.497496823791996e-06, + "loss": 28.7787, + "step": 289420 + }, + { + "epoch": 0.5846669117676766, + "grad_norm": 164.69967651367188, + "learning_rate": 4.497149526488893e-06, + "loss": 16.5493, + "step": 289430 + }, + { + "epoch": 0.5846871124003604, + "grad_norm": 139.81759643554688, + "learning_rate": 4.496802231636624e-06, + "loss": 13.9025, + "step": 289440 + }, + { + "epoch": 0.5847073130330442, + "grad_norm": 48.29279327392578, + "learning_rate": 4.496454939236879e-06, + "loss": 12.715, + "step": 289450 + }, + { + "epoch": 0.5847275136657281, + "grad_norm": 1061.7513427734375, + "learning_rate": 4.496107649291351e-06, + "loss": 24.7893, + "step": 289460 + }, + { + "epoch": 0.5847477142984119, + "grad_norm": 402.9743347167969, + "learning_rate": 4.495760361801732e-06, + "loss": 25.3875, + "step": 289470 + }, + { + "epoch": 0.5847679149310956, + "grad_norm": 202.3970184326172, + "learning_rate": 4.49541307676972e-06, + "loss": 19.6393, + "step": 289480 + }, + { + "epoch": 0.5847881155637794, + "grad_norm": 492.6229553222656, + "learning_rate": 4.495065794196999e-06, + "loss": 21.0966, + "step": 289490 + }, + { + "epoch": 0.5848083161964632, + "grad_norm": 111.33905029296875, + "learning_rate": 4.494718514085269e-06, + "loss": 18.6436, + "step": 289500 + }, + { + "epoch": 0.584828516829147, + "grad_norm": 302.810546875, + "learning_rate": 4.494371236436219e-06, + "loss": 18.3843, + "step": 289510 + }, + { + "epoch": 0.5848487174618309, + "grad_norm": 494.04827880859375, + "learning_rate": 4.494023961251542e-06, + "loss": 16.7611, + "step": 289520 + }, + { + "epoch": 0.5848689180945147, + "grad_norm": 309.8783874511719, + "learning_rate": 4.49367668853293e-06, + "loss": 20.4386, + "step": 289530 + }, + { + "epoch": 0.5848891187271985, + "grad_norm": 378.0588073730469, + "learning_rate": 4.4933294182820785e-06, + "loss": 20.1889, + "step": 289540 + }, + { + "epoch": 0.5849093193598823, + "grad_norm": 633.7971801757812, + "learning_rate": 4.492982150500677e-06, + "loss": 17.3576, + "step": 289550 + }, + { + "epoch": 0.5849295199925661, + "grad_norm": 259.9631652832031, + "learning_rate": 4.492634885190417e-06, + "loss": 21.1418, + "step": 289560 + }, + { + "epoch": 0.58494972062525, + "grad_norm": 342.33038330078125, + "learning_rate": 4.492287622352996e-06, + "loss": 15.3021, + "step": 289570 + }, + { + "epoch": 0.5849699212579338, + "grad_norm": 365.4823913574219, + "learning_rate": 4.491940361990101e-06, + "loss": 9.4205, + "step": 289580 + }, + { + "epoch": 0.5849901218906176, + "grad_norm": 412.4847412109375, + "learning_rate": 4.4915931041034285e-06, + "loss": 16.2116, + "step": 289590 + }, + { + "epoch": 0.5850103225233014, + "grad_norm": 399.02093505859375, + "learning_rate": 4.491245848694669e-06, + "loss": 24.1776, + "step": 289600 + }, + { + "epoch": 0.5850305231559852, + "grad_norm": 664.1265869140625, + "learning_rate": 4.490898595765517e-06, + "loss": 27.871, + "step": 289610 + }, + { + "epoch": 0.5850507237886691, + "grad_norm": 686.6106567382812, + "learning_rate": 4.490551345317662e-06, + "loss": 13.0017, + "step": 289620 + }, + { + "epoch": 0.5850709244213529, + "grad_norm": 591.8042602539062, + "learning_rate": 4.4902040973527974e-06, + "loss": 14.9969, + "step": 289630 + }, + { + "epoch": 0.5850911250540367, + "grad_norm": 510.093994140625, + "learning_rate": 4.489856851872619e-06, + "loss": 32.5532, + "step": 289640 + }, + { + "epoch": 0.5851113256867205, + "grad_norm": 156.0966796875, + "learning_rate": 4.489509608878813e-06, + "loss": 12.641, + "step": 289650 + }, + { + "epoch": 0.5851315263194043, + "grad_norm": 688.0595092773438, + "learning_rate": 4.4891623683730765e-06, + "loss": 21.0297, + "step": 289660 + }, + { + "epoch": 0.5851517269520882, + "grad_norm": 425.06640625, + "learning_rate": 4.488815130357103e-06, + "loss": 34.7271, + "step": 289670 + }, + { + "epoch": 0.585171927584772, + "grad_norm": 227.1132049560547, + "learning_rate": 4.48846789483258e-06, + "loss": 19.1965, + "step": 289680 + }, + { + "epoch": 0.5851921282174558, + "grad_norm": 185.37522888183594, + "learning_rate": 4.488120661801202e-06, + "loss": 13.6779, + "step": 289690 + }, + { + "epoch": 0.5852123288501396, + "grad_norm": 764.2107543945312, + "learning_rate": 4.487773431264664e-06, + "loss": 19.2322, + "step": 289700 + }, + { + "epoch": 0.5852325294828234, + "grad_norm": 267.73468017578125, + "learning_rate": 4.487426203224655e-06, + "loss": 14.0345, + "step": 289710 + }, + { + "epoch": 0.5852527301155073, + "grad_norm": 557.2090454101562, + "learning_rate": 4.487078977682867e-06, + "loss": 16.3052, + "step": 289720 + }, + { + "epoch": 0.585272930748191, + "grad_norm": 371.2828674316406, + "learning_rate": 4.486731754640997e-06, + "loss": 17.3501, + "step": 289730 + }, + { + "epoch": 0.5852931313808748, + "grad_norm": 417.99871826171875, + "learning_rate": 4.486384534100732e-06, + "loss": 27.2229, + "step": 289740 + }, + { + "epoch": 0.5853133320135586, + "grad_norm": 197.7482147216797, + "learning_rate": 4.4860373160637665e-06, + "loss": 18.6305, + "step": 289750 + }, + { + "epoch": 0.5853335326462424, + "grad_norm": 634.205078125, + "learning_rate": 4.485690100531793e-06, + "loss": 38.9959, + "step": 289760 + }, + { + "epoch": 0.5853537332789263, + "grad_norm": 21.159589767456055, + "learning_rate": 4.485342887506505e-06, + "loss": 7.3589, + "step": 289770 + }, + { + "epoch": 0.5853739339116101, + "grad_norm": 416.6664123535156, + "learning_rate": 4.484995676989592e-06, + "loss": 26.0226, + "step": 289780 + }, + { + "epoch": 0.5853941345442939, + "grad_norm": 187.21580505371094, + "learning_rate": 4.4846484689827465e-06, + "loss": 22.066, + "step": 289790 + }, + { + "epoch": 0.5854143351769777, + "grad_norm": 403.3133544921875, + "learning_rate": 4.484301263487664e-06, + "loss": 39.342, + "step": 289800 + }, + { + "epoch": 0.5854345358096615, + "grad_norm": 192.5809783935547, + "learning_rate": 4.483954060506033e-06, + "loss": 24.9163, + "step": 289810 + }, + { + "epoch": 0.5854547364423454, + "grad_norm": 529.3194580078125, + "learning_rate": 4.4836068600395484e-06, + "loss": 11.6182, + "step": 289820 + }, + { + "epoch": 0.5854749370750292, + "grad_norm": 221.81625366210938, + "learning_rate": 4.483259662089902e-06, + "loss": 19.4294, + "step": 289830 + }, + { + "epoch": 0.585495137707713, + "grad_norm": 710.7893676757812, + "learning_rate": 4.482912466658784e-06, + "loss": 17.4546, + "step": 289840 + }, + { + "epoch": 0.5855153383403968, + "grad_norm": 870.0003662109375, + "learning_rate": 4.482565273747888e-06, + "loss": 15.259, + "step": 289850 + }, + { + "epoch": 0.5855355389730806, + "grad_norm": 112.72933959960938, + "learning_rate": 4.482218083358907e-06, + "loss": 31.1874, + "step": 289860 + }, + { + "epoch": 0.5855557396057645, + "grad_norm": 295.20269775390625, + "learning_rate": 4.481870895493531e-06, + "loss": 17.5055, + "step": 289870 + }, + { + "epoch": 0.5855759402384483, + "grad_norm": 875.2280883789062, + "learning_rate": 4.481523710153454e-06, + "loss": 27.2396, + "step": 289880 + }, + { + "epoch": 0.5855961408711321, + "grad_norm": 524.6953125, + "learning_rate": 4.481176527340368e-06, + "loss": 23.6339, + "step": 289890 + }, + { + "epoch": 0.5856163415038159, + "grad_norm": 267.17681884765625, + "learning_rate": 4.4808293470559645e-06, + "loss": 12.8049, + "step": 289900 + }, + { + "epoch": 0.5856365421364997, + "grad_norm": 495.7286682128906, + "learning_rate": 4.480482169301935e-06, + "loss": 19.0436, + "step": 289910 + }, + { + "epoch": 0.5856567427691836, + "grad_norm": 181.76971435546875, + "learning_rate": 4.480134994079973e-06, + "loss": 12.6426, + "step": 289920 + }, + { + "epoch": 0.5856769434018674, + "grad_norm": 967.9786987304688, + "learning_rate": 4.479787821391771e-06, + "loss": 21.7154, + "step": 289930 + }, + { + "epoch": 0.5856971440345512, + "grad_norm": 180.23681640625, + "learning_rate": 4.4794406512390175e-06, + "loss": 14.4039, + "step": 289940 + }, + { + "epoch": 0.585717344667235, + "grad_norm": 91.26536560058594, + "learning_rate": 4.479093483623409e-06, + "loss": 16.9906, + "step": 289950 + }, + { + "epoch": 0.5857375452999188, + "grad_norm": 179.8864288330078, + "learning_rate": 4.478746318546636e-06, + "loss": 14.3518, + "step": 289960 + }, + { + "epoch": 0.5857577459326027, + "grad_norm": 1979.79150390625, + "learning_rate": 4.478399156010389e-06, + "loss": 26.318, + "step": 289970 + }, + { + "epoch": 0.5857779465652865, + "grad_norm": 107.35133361816406, + "learning_rate": 4.478051996016362e-06, + "loss": 8.2717, + "step": 289980 + }, + { + "epoch": 0.5857981471979702, + "grad_norm": 308.7791748046875, + "learning_rate": 4.477704838566246e-06, + "loss": 14.4244, + "step": 289990 + }, + { + "epoch": 0.585818347830654, + "grad_norm": 445.58343505859375, + "learning_rate": 4.477357683661734e-06, + "loss": 26.5464, + "step": 290000 + }, + { + "epoch": 0.5858385484633378, + "grad_norm": 211.74684143066406, + "learning_rate": 4.477010531304515e-06, + "loss": 21.5916, + "step": 290010 + }, + { + "epoch": 0.5858587490960216, + "grad_norm": 361.0027770996094, + "learning_rate": 4.476663381496287e-06, + "loss": 20.5326, + "step": 290020 + }, + { + "epoch": 0.5858789497287055, + "grad_norm": 46.908023834228516, + "learning_rate": 4.476316234238735e-06, + "loss": 13.4577, + "step": 290030 + }, + { + "epoch": 0.5858991503613893, + "grad_norm": 577.0363159179688, + "learning_rate": 4.4759690895335545e-06, + "loss": 19.1926, + "step": 290040 + }, + { + "epoch": 0.5859193509940731, + "grad_norm": 274.8571472167969, + "learning_rate": 4.475621947382438e-06, + "loss": 15.9146, + "step": 290050 + }, + { + "epoch": 0.5859395516267569, + "grad_norm": 319.69970703125, + "learning_rate": 4.475274807787077e-06, + "loss": 18.9863, + "step": 290060 + }, + { + "epoch": 0.5859597522594407, + "grad_norm": 284.3815002441406, + "learning_rate": 4.474927670749162e-06, + "loss": 23.6112, + "step": 290070 + }, + { + "epoch": 0.5859799528921246, + "grad_norm": 355.4840087890625, + "learning_rate": 4.474580536270385e-06, + "loss": 24.4852, + "step": 290080 + }, + { + "epoch": 0.5860001535248084, + "grad_norm": 139.454833984375, + "learning_rate": 4.474233404352442e-06, + "loss": 15.0361, + "step": 290090 + }, + { + "epoch": 0.5860203541574922, + "grad_norm": 235.73052978515625, + "learning_rate": 4.473886274997018e-06, + "loss": 20.6806, + "step": 290100 + }, + { + "epoch": 0.586040554790176, + "grad_norm": 212.15199279785156, + "learning_rate": 4.47353914820581e-06, + "loss": 15.8834, + "step": 290110 + }, + { + "epoch": 0.5860607554228598, + "grad_norm": 295.6542053222656, + "learning_rate": 4.473192023980509e-06, + "loss": 32.0004, + "step": 290120 + }, + { + "epoch": 0.5860809560555437, + "grad_norm": 457.3767395019531, + "learning_rate": 4.472844902322805e-06, + "loss": 15.9072, + "step": 290130 + }, + { + "epoch": 0.5861011566882275, + "grad_norm": 84.76927947998047, + "learning_rate": 4.472497783234392e-06, + "loss": 11.4173, + "step": 290140 + }, + { + "epoch": 0.5861213573209113, + "grad_norm": 74.38208770751953, + "learning_rate": 4.472150666716961e-06, + "loss": 51.078, + "step": 290150 + }, + { + "epoch": 0.5861415579535951, + "grad_norm": 386.08428955078125, + "learning_rate": 4.471803552772203e-06, + "loss": 18.1884, + "step": 290160 + }, + { + "epoch": 0.586161758586279, + "grad_norm": 429.62066650390625, + "learning_rate": 4.471456441401809e-06, + "loss": 26.5948, + "step": 290170 + }, + { + "epoch": 0.5861819592189628, + "grad_norm": 357.4833679199219, + "learning_rate": 4.471109332607475e-06, + "loss": 22.8235, + "step": 290180 + }, + { + "epoch": 0.5862021598516466, + "grad_norm": 412.8485412597656, + "learning_rate": 4.4707622263908875e-06, + "loss": 10.7813, + "step": 290190 + }, + { + "epoch": 0.5862223604843304, + "grad_norm": 303.6448974609375, + "learning_rate": 4.470415122753742e-06, + "loss": 26.0047, + "step": 290200 + }, + { + "epoch": 0.5862425611170142, + "grad_norm": 502.8114013671875, + "learning_rate": 4.470068021697728e-06, + "loss": 17.8887, + "step": 290210 + }, + { + "epoch": 0.586262761749698, + "grad_norm": 531.0615844726562, + "learning_rate": 4.4697209232245395e-06, + "loss": 23.2769, + "step": 290220 + }, + { + "epoch": 0.5862829623823819, + "grad_norm": 398.9246826171875, + "learning_rate": 4.469373827335866e-06, + "loss": 21.4093, + "step": 290230 + }, + { + "epoch": 0.5863031630150657, + "grad_norm": 65.7341537475586, + "learning_rate": 4.4690267340334e-06, + "loss": 22.8751, + "step": 290240 + }, + { + "epoch": 0.5863233636477494, + "grad_norm": 404.8388671875, + "learning_rate": 4.468679643318836e-06, + "loss": 25.5584, + "step": 290250 + }, + { + "epoch": 0.5863435642804332, + "grad_norm": 1449.1175537109375, + "learning_rate": 4.468332555193859e-06, + "loss": 28.4466, + "step": 290260 + }, + { + "epoch": 0.586363764913117, + "grad_norm": 658.9000854492188, + "learning_rate": 4.467985469660166e-06, + "loss": 26.6578, + "step": 290270 + }, + { + "epoch": 0.5863839655458009, + "grad_norm": 603.8819580078125, + "learning_rate": 4.467638386719448e-06, + "loss": 23.7443, + "step": 290280 + }, + { + "epoch": 0.5864041661784847, + "grad_norm": 437.9130859375, + "learning_rate": 4.467291306373396e-06, + "loss": 16.2615, + "step": 290290 + }, + { + "epoch": 0.5864243668111685, + "grad_norm": 521.2614135742188, + "learning_rate": 4.466944228623701e-06, + "loss": 33.7539, + "step": 290300 + }, + { + "epoch": 0.5864445674438523, + "grad_norm": 266.4351806640625, + "learning_rate": 4.466597153472056e-06, + "loss": 26.3424, + "step": 290310 + }, + { + "epoch": 0.5864647680765361, + "grad_norm": 18.054241180419922, + "learning_rate": 4.4662500809201515e-06, + "loss": 17.4335, + "step": 290320 + }, + { + "epoch": 0.58648496870922, + "grad_norm": 486.8254699707031, + "learning_rate": 4.465903010969677e-06, + "loss": 29.748, + "step": 290330 + }, + { + "epoch": 0.5865051693419038, + "grad_norm": 391.8296813964844, + "learning_rate": 4.46555594362233e-06, + "loss": 22.5543, + "step": 290340 + }, + { + "epoch": 0.5865253699745876, + "grad_norm": 401.99688720703125, + "learning_rate": 4.4652088788797965e-06, + "loss": 26.4605, + "step": 290350 + }, + { + "epoch": 0.5865455706072714, + "grad_norm": 324.543212890625, + "learning_rate": 4.46486181674377e-06, + "loss": 14.6501, + "step": 290360 + }, + { + "epoch": 0.5865657712399552, + "grad_norm": 355.5074157714844, + "learning_rate": 4.464514757215943e-06, + "loss": 12.5329, + "step": 290370 + }, + { + "epoch": 0.5865859718726391, + "grad_norm": 328.8937072753906, + "learning_rate": 4.464167700298006e-06, + "loss": 17.8105, + "step": 290380 + }, + { + "epoch": 0.5866061725053229, + "grad_norm": 86.53602600097656, + "learning_rate": 4.463820645991651e-06, + "loss": 17.9668, + "step": 290390 + }, + { + "epoch": 0.5866263731380067, + "grad_norm": 529.2399291992188, + "learning_rate": 4.463473594298567e-06, + "loss": 29.0053, + "step": 290400 + }, + { + "epoch": 0.5866465737706905, + "grad_norm": 433.610107421875, + "learning_rate": 4.463126545220451e-06, + "loss": 14.7375, + "step": 290410 + }, + { + "epoch": 0.5866667744033743, + "grad_norm": 68.43505096435547, + "learning_rate": 4.462779498758988e-06, + "loss": 28.9777, + "step": 290420 + }, + { + "epoch": 0.5866869750360582, + "grad_norm": 524.79833984375, + "learning_rate": 4.462432454915873e-06, + "loss": 7.0757, + "step": 290430 + }, + { + "epoch": 0.586707175668742, + "grad_norm": 273.8827209472656, + "learning_rate": 4.4620854136928e-06, + "loss": 13.9228, + "step": 290440 + }, + { + "epoch": 0.5867273763014258, + "grad_norm": 232.83982849121094, + "learning_rate": 4.461738375091454e-06, + "loss": 26.8315, + "step": 290450 + }, + { + "epoch": 0.5867475769341096, + "grad_norm": 306.6068420410156, + "learning_rate": 4.461391339113531e-06, + "loss": 12.5558, + "step": 290460 + }, + { + "epoch": 0.5867677775667934, + "grad_norm": 587.6408081054688, + "learning_rate": 4.461044305760722e-06, + "loss": 29.6375, + "step": 290470 + }, + { + "epoch": 0.5867879781994773, + "grad_norm": 544.8992309570312, + "learning_rate": 4.460697275034717e-06, + "loss": 24.7492, + "step": 290480 + }, + { + "epoch": 0.5868081788321611, + "grad_norm": 377.0224609375, + "learning_rate": 4.460350246937207e-06, + "loss": 22.4234, + "step": 290490 + }, + { + "epoch": 0.5868283794648448, + "grad_norm": 430.548583984375, + "learning_rate": 4.460003221469886e-06, + "loss": 18.8128, + "step": 290500 + }, + { + "epoch": 0.5868485800975286, + "grad_norm": 229.59872436523438, + "learning_rate": 4.459656198634444e-06, + "loss": 17.5641, + "step": 290510 + }, + { + "epoch": 0.5868687807302124, + "grad_norm": 103.71757507324219, + "learning_rate": 4.459309178432571e-06, + "loss": 20.7203, + "step": 290520 + }, + { + "epoch": 0.5868889813628962, + "grad_norm": 616.0745239257812, + "learning_rate": 4.458962160865961e-06, + "loss": 54.3132, + "step": 290530 + }, + { + "epoch": 0.5869091819955801, + "grad_norm": 600.0247192382812, + "learning_rate": 4.458615145936303e-06, + "loss": 17.3017, + "step": 290540 + }, + { + "epoch": 0.5869293826282639, + "grad_norm": 744.3265991210938, + "learning_rate": 4.458268133645289e-06, + "loss": 11.4757, + "step": 290550 + }, + { + "epoch": 0.5869495832609477, + "grad_norm": 552.5853271484375, + "learning_rate": 4.457921123994609e-06, + "loss": 16.1665, + "step": 290560 + }, + { + "epoch": 0.5869697838936315, + "grad_norm": 561.9412231445312, + "learning_rate": 4.457574116985958e-06, + "loss": 22.5984, + "step": 290570 + }, + { + "epoch": 0.5869899845263153, + "grad_norm": 507.02099609375, + "learning_rate": 4.457227112621024e-06, + "loss": 16.5404, + "step": 290580 + }, + { + "epoch": 0.5870101851589992, + "grad_norm": 151.58840942382812, + "learning_rate": 4.456880110901499e-06, + "loss": 15.3226, + "step": 290590 + }, + { + "epoch": 0.587030385791683, + "grad_norm": 186.79566955566406, + "learning_rate": 4.456533111829076e-06, + "loss": 25.297, + "step": 290600 + }, + { + "epoch": 0.5870505864243668, + "grad_norm": 442.0946350097656, + "learning_rate": 4.456186115405443e-06, + "loss": 15.7763, + "step": 290610 + }, + { + "epoch": 0.5870707870570506, + "grad_norm": 608.88330078125, + "learning_rate": 4.455839121632292e-06, + "loss": 25.6646, + "step": 290620 + }, + { + "epoch": 0.5870909876897344, + "grad_norm": 107.75144958496094, + "learning_rate": 4.455492130511318e-06, + "loss": 14.3466, + "step": 290630 + }, + { + "epoch": 0.5871111883224183, + "grad_norm": 391.6009826660156, + "learning_rate": 4.455145142044207e-06, + "loss": 19.8789, + "step": 290640 + }, + { + "epoch": 0.5871313889551021, + "grad_norm": 410.3229675292969, + "learning_rate": 4.4547981562326535e-06, + "loss": 14.6501, + "step": 290650 + }, + { + "epoch": 0.5871515895877859, + "grad_norm": 876.847412109375, + "learning_rate": 4.454451173078347e-06, + "loss": 18.3208, + "step": 290660 + }, + { + "epoch": 0.5871717902204697, + "grad_norm": 202.0698699951172, + "learning_rate": 4.454104192582981e-06, + "loss": 20.384, + "step": 290670 + }, + { + "epoch": 0.5871919908531535, + "grad_norm": 489.487060546875, + "learning_rate": 4.453757214748243e-06, + "loss": 33.0625, + "step": 290680 + }, + { + "epoch": 0.5872121914858374, + "grad_norm": 452.5086975097656, + "learning_rate": 4.453410239575826e-06, + "loss": 24.4016, + "step": 290690 + }, + { + "epoch": 0.5872323921185212, + "grad_norm": 161.07870483398438, + "learning_rate": 4.453063267067424e-06, + "loss": 13.2982, + "step": 290700 + }, + { + "epoch": 0.587252592751205, + "grad_norm": 187.7038116455078, + "learning_rate": 4.452716297224722e-06, + "loss": 18.8365, + "step": 290710 + }, + { + "epoch": 0.5872727933838888, + "grad_norm": 412.16259765625, + "learning_rate": 4.452369330049415e-06, + "loss": 20.687, + "step": 290720 + }, + { + "epoch": 0.5872929940165726, + "grad_norm": 195.6519317626953, + "learning_rate": 4.452022365543195e-06, + "loss": 12.3374, + "step": 290730 + }, + { + "epoch": 0.5873131946492565, + "grad_norm": 726.9053955078125, + "learning_rate": 4.451675403707751e-06, + "loss": 20.9327, + "step": 290740 + }, + { + "epoch": 0.5873333952819403, + "grad_norm": 136.5218505859375, + "learning_rate": 4.451328444544774e-06, + "loss": 28.0344, + "step": 290750 + }, + { + "epoch": 0.587353595914624, + "grad_norm": 264.0720520019531, + "learning_rate": 4.450981488055957e-06, + "loss": 10.633, + "step": 290760 + }, + { + "epoch": 0.5873737965473078, + "grad_norm": 170.5759735107422, + "learning_rate": 4.450634534242989e-06, + "loss": 15.1688, + "step": 290770 + }, + { + "epoch": 0.5873939971799916, + "grad_norm": 142.72483825683594, + "learning_rate": 4.4502875831075596e-06, + "loss": 15.1651, + "step": 290780 + }, + { + "epoch": 0.5874141978126755, + "grad_norm": 121.17604064941406, + "learning_rate": 4.449940634651365e-06, + "loss": 20.2002, + "step": 290790 + }, + { + "epoch": 0.5874343984453593, + "grad_norm": 239.14881896972656, + "learning_rate": 4.44959368887609e-06, + "loss": 21.0542, + "step": 290800 + }, + { + "epoch": 0.5874545990780431, + "grad_norm": 786.7036743164062, + "learning_rate": 4.44924674578343e-06, + "loss": 18.8186, + "step": 290810 + }, + { + "epoch": 0.5874747997107269, + "grad_norm": 751.5238037109375, + "learning_rate": 4.4488998053750746e-06, + "loss": 29.3956, + "step": 290820 + }, + { + "epoch": 0.5874950003434107, + "grad_norm": 50.34823226928711, + "learning_rate": 4.448552867652715e-06, + "loss": 15.6865, + "step": 290830 + }, + { + "epoch": 0.5875152009760946, + "grad_norm": 153.85646057128906, + "learning_rate": 4.448205932618042e-06, + "loss": 11.5163, + "step": 290840 + }, + { + "epoch": 0.5875354016087784, + "grad_norm": 656.3759155273438, + "learning_rate": 4.447859000272744e-06, + "loss": 18.2736, + "step": 290850 + }, + { + "epoch": 0.5875556022414622, + "grad_norm": 509.01837158203125, + "learning_rate": 4.447512070618519e-06, + "loss": 25.3698, + "step": 290860 + }, + { + "epoch": 0.587575802874146, + "grad_norm": 397.4849548339844, + "learning_rate": 4.447165143657049e-06, + "loss": 15.6323, + "step": 290870 + }, + { + "epoch": 0.5875960035068298, + "grad_norm": 450.42822265625, + "learning_rate": 4.44681821939003e-06, + "loss": 21.6473, + "step": 290880 + }, + { + "epoch": 0.5876162041395137, + "grad_norm": 460.9064025878906, + "learning_rate": 4.446471297819154e-06, + "loss": 18.155, + "step": 290890 + }, + { + "epoch": 0.5876364047721975, + "grad_norm": 444.8457946777344, + "learning_rate": 4.446124378946108e-06, + "loss": 15.3499, + "step": 290900 + }, + { + "epoch": 0.5876566054048813, + "grad_norm": 137.37344360351562, + "learning_rate": 4.4457774627725835e-06, + "loss": 9.8823, + "step": 290910 + }, + { + "epoch": 0.5876768060375651, + "grad_norm": 1177.26171875, + "learning_rate": 4.4454305493002744e-06, + "loss": 17.1093, + "step": 290920 + }, + { + "epoch": 0.5876970066702489, + "grad_norm": 467.1009216308594, + "learning_rate": 4.44508363853087e-06, + "loss": 17.3269, + "step": 290930 + }, + { + "epoch": 0.5877172073029328, + "grad_norm": 14.7145414352417, + "learning_rate": 4.444736730466057e-06, + "loss": 14.337, + "step": 290940 + }, + { + "epoch": 0.5877374079356166, + "grad_norm": 623.8619995117188, + "learning_rate": 4.444389825107534e-06, + "loss": 28.5236, + "step": 290950 + }, + { + "epoch": 0.5877576085683004, + "grad_norm": 848.1859130859375, + "learning_rate": 4.444042922456985e-06, + "loss": 22.265, + "step": 290960 + }, + { + "epoch": 0.5877778092009842, + "grad_norm": 206.10719299316406, + "learning_rate": 4.4436960225161045e-06, + "loss": 28.2873, + "step": 290970 + }, + { + "epoch": 0.587798009833668, + "grad_norm": 235.267578125, + "learning_rate": 4.443349125286581e-06, + "loss": 21.8742, + "step": 290980 + }, + { + "epoch": 0.5878182104663519, + "grad_norm": 253.72393798828125, + "learning_rate": 4.443002230770108e-06, + "loss": 14.245, + "step": 290990 + }, + { + "epoch": 0.5878384110990357, + "grad_norm": 59.76152420043945, + "learning_rate": 4.442655338968373e-06, + "loss": 22.391, + "step": 291000 + }, + { + "epoch": 0.5878586117317194, + "grad_norm": 252.1258087158203, + "learning_rate": 4.4423084498830685e-06, + "loss": 31.2413, + "step": 291010 + }, + { + "epoch": 0.5878788123644032, + "grad_norm": 429.10491943359375, + "learning_rate": 4.4419615635158875e-06, + "loss": 18.1277, + "step": 291020 + }, + { + "epoch": 0.587899012997087, + "grad_norm": 486.5559997558594, + "learning_rate": 4.441614679868514e-06, + "loss": 21.1466, + "step": 291030 + }, + { + "epoch": 0.5879192136297708, + "grad_norm": 324.23699951171875, + "learning_rate": 4.441267798942646e-06, + "loss": 13.5959, + "step": 291040 + }, + { + "epoch": 0.5879394142624547, + "grad_norm": 464.66375732421875, + "learning_rate": 4.44092092073997e-06, + "loss": 11.8075, + "step": 291050 + }, + { + "epoch": 0.5879596148951385, + "grad_norm": 18.357135772705078, + "learning_rate": 4.440574045262178e-06, + "loss": 14.8909, + "step": 291060 + }, + { + "epoch": 0.5879798155278223, + "grad_norm": 305.482666015625, + "learning_rate": 4.440227172510959e-06, + "loss": 19.1991, + "step": 291070 + }, + { + "epoch": 0.5880000161605061, + "grad_norm": 248.35919189453125, + "learning_rate": 4.439880302488007e-06, + "loss": 12.7908, + "step": 291080 + }, + { + "epoch": 0.5880202167931899, + "grad_norm": 480.1448059082031, + "learning_rate": 4.439533435195009e-06, + "loss": 10.5695, + "step": 291090 + }, + { + "epoch": 0.5880404174258738, + "grad_norm": 265.6449279785156, + "learning_rate": 4.439186570633656e-06, + "loss": 9.9222, + "step": 291100 + }, + { + "epoch": 0.5880606180585576, + "grad_norm": 328.6371154785156, + "learning_rate": 4.43883970880564e-06, + "loss": 19.673, + "step": 291110 + }, + { + "epoch": 0.5880808186912414, + "grad_norm": 1000.6197509765625, + "learning_rate": 4.4384928497126534e-06, + "loss": 31.4514, + "step": 291120 + }, + { + "epoch": 0.5881010193239252, + "grad_norm": 771.0360717773438, + "learning_rate": 4.438145993356383e-06, + "loss": 19.6446, + "step": 291130 + }, + { + "epoch": 0.588121219956609, + "grad_norm": 199.8487548828125, + "learning_rate": 4.437799139738521e-06, + "loss": 15.8613, + "step": 291140 + }, + { + "epoch": 0.5881414205892929, + "grad_norm": 322.72802734375, + "learning_rate": 4.437452288860759e-06, + "loss": 14.5427, + "step": 291150 + }, + { + "epoch": 0.5881616212219767, + "grad_norm": 19.49073600769043, + "learning_rate": 4.437105440724785e-06, + "loss": 10.3116, + "step": 291160 + }, + { + "epoch": 0.5881818218546605, + "grad_norm": 501.9320068359375, + "learning_rate": 4.43675859533229e-06, + "loss": 12.4192, + "step": 291170 + }, + { + "epoch": 0.5882020224873443, + "grad_norm": 114.68348693847656, + "learning_rate": 4.4364117526849674e-06, + "loss": 14.7263, + "step": 291180 + }, + { + "epoch": 0.5882222231200281, + "grad_norm": 254.6109161376953, + "learning_rate": 4.436064912784504e-06, + "loss": 26.2108, + "step": 291190 + }, + { + "epoch": 0.588242423752712, + "grad_norm": 255.48748779296875, + "learning_rate": 4.4357180756325915e-06, + "loss": 30.6562, + "step": 291200 + }, + { + "epoch": 0.5882626243853958, + "grad_norm": 339.65087890625, + "learning_rate": 4.435371241230923e-06, + "loss": 19.6818, + "step": 291210 + }, + { + "epoch": 0.5882828250180796, + "grad_norm": 574.1737670898438, + "learning_rate": 4.435024409581185e-06, + "loss": 10.0981, + "step": 291220 + }, + { + "epoch": 0.5883030256507634, + "grad_norm": 927.2265625, + "learning_rate": 4.434677580685069e-06, + "loss": 27.3333, + "step": 291230 + }, + { + "epoch": 0.5883232262834472, + "grad_norm": 205.8227081298828, + "learning_rate": 4.434330754544267e-06, + "loss": 9.6747, + "step": 291240 + }, + { + "epoch": 0.5883434269161311, + "grad_norm": 529.600341796875, + "learning_rate": 4.4339839311604675e-06, + "loss": 33.645, + "step": 291250 + }, + { + "epoch": 0.5883636275488149, + "grad_norm": 451.12298583984375, + "learning_rate": 4.433637110535361e-06, + "loss": 18.1124, + "step": 291260 + }, + { + "epoch": 0.5883838281814986, + "grad_norm": 38.88941955566406, + "learning_rate": 4.4332902926706395e-06, + "loss": 11.802, + "step": 291270 + }, + { + "epoch": 0.5884040288141824, + "grad_norm": 319.8966979980469, + "learning_rate": 4.432943477567993e-06, + "loss": 18.3529, + "step": 291280 + }, + { + "epoch": 0.5884242294468662, + "grad_norm": 122.94017028808594, + "learning_rate": 4.43259666522911e-06, + "loss": 27.3233, + "step": 291290 + }, + { + "epoch": 0.58844443007955, + "grad_norm": 464.04132080078125, + "learning_rate": 4.432249855655681e-06, + "loss": 17.2269, + "step": 291300 + }, + { + "epoch": 0.5884646307122339, + "grad_norm": 419.5864562988281, + "learning_rate": 4.431903048849402e-06, + "loss": 22.1468, + "step": 291310 + }, + { + "epoch": 0.5884848313449177, + "grad_norm": 610.0906982421875, + "learning_rate": 4.431556244811954e-06, + "loss": 24.5321, + "step": 291320 + }, + { + "epoch": 0.5885050319776015, + "grad_norm": 608.2277221679688, + "learning_rate": 4.431209443545033e-06, + "loss": 22.2584, + "step": 291330 + }, + { + "epoch": 0.5885252326102853, + "grad_norm": 363.5028076171875, + "learning_rate": 4.43086264505033e-06, + "loss": 18.7677, + "step": 291340 + }, + { + "epoch": 0.5885454332429692, + "grad_norm": 67.64398956298828, + "learning_rate": 4.430515849329532e-06, + "loss": 14.4721, + "step": 291350 + }, + { + "epoch": 0.588565633875653, + "grad_norm": 184.47952270507812, + "learning_rate": 4.43016905638433e-06, + "loss": 18.4786, + "step": 291360 + }, + { + "epoch": 0.5885858345083368, + "grad_norm": 323.0634460449219, + "learning_rate": 4.429822266216417e-06, + "loss": 12.0939, + "step": 291370 + }, + { + "epoch": 0.5886060351410206, + "grad_norm": 24.33133316040039, + "learning_rate": 4.42947547882748e-06, + "loss": 23.6053, + "step": 291380 + }, + { + "epoch": 0.5886262357737044, + "grad_norm": 321.9494323730469, + "learning_rate": 4.4291286942192085e-06, + "loss": 23.0632, + "step": 291390 + }, + { + "epoch": 0.5886464364063883, + "grad_norm": 142.2558135986328, + "learning_rate": 4.428781912393299e-06, + "loss": 17.5378, + "step": 291400 + }, + { + "epoch": 0.5886666370390721, + "grad_norm": 976.93359375, + "learning_rate": 4.4284351333514315e-06, + "loss": 20.9709, + "step": 291410 + }, + { + "epoch": 0.5886868376717559, + "grad_norm": 308.3500061035156, + "learning_rate": 4.428088357095306e-06, + "loss": 21.0857, + "step": 291420 + }, + { + "epoch": 0.5887070383044397, + "grad_norm": 209.01194763183594, + "learning_rate": 4.427741583626607e-06, + "loss": 19.3746, + "step": 291430 + }, + { + "epoch": 0.5887272389371235, + "grad_norm": 293.47027587890625, + "learning_rate": 4.4273948129470264e-06, + "loss": 26.9354, + "step": 291440 + }, + { + "epoch": 0.5887474395698074, + "grad_norm": 370.9302673339844, + "learning_rate": 4.427048045058254e-06, + "loss": 41.0488, + "step": 291450 + }, + { + "epoch": 0.5887676402024912, + "grad_norm": 210.16143798828125, + "learning_rate": 4.426701279961978e-06, + "loss": 27.4176, + "step": 291460 + }, + { + "epoch": 0.588787840835175, + "grad_norm": 277.9696044921875, + "learning_rate": 4.426354517659894e-06, + "loss": 26.8509, + "step": 291470 + }, + { + "epoch": 0.5888080414678588, + "grad_norm": 1105.115234375, + "learning_rate": 4.426007758153686e-06, + "loss": 17.7384, + "step": 291480 + }, + { + "epoch": 0.5888282421005426, + "grad_norm": 39.77167510986328, + "learning_rate": 4.4256610014450465e-06, + "loss": 8.5918, + "step": 291490 + }, + { + "epoch": 0.5888484427332265, + "grad_norm": 216.14683532714844, + "learning_rate": 4.425314247535668e-06, + "loss": 27.5087, + "step": 291500 + }, + { + "epoch": 0.5888686433659103, + "grad_norm": 747.41552734375, + "learning_rate": 4.4249674964272365e-06, + "loss": 20.6558, + "step": 291510 + }, + { + "epoch": 0.5888888439985941, + "grad_norm": 413.9656982421875, + "learning_rate": 4.424620748121443e-06, + "loss": 9.3693, + "step": 291520 + }, + { + "epoch": 0.5889090446312778, + "grad_norm": 226.15438842773438, + "learning_rate": 4.42427400261998e-06, + "loss": 15.4582, + "step": 291530 + }, + { + "epoch": 0.5889292452639616, + "grad_norm": 177.61599731445312, + "learning_rate": 4.423927259924535e-06, + "loss": 13.591, + "step": 291540 + }, + { + "epoch": 0.5889494458966454, + "grad_norm": 375.0335693359375, + "learning_rate": 4.423580520036797e-06, + "loss": 16.3396, + "step": 291550 + }, + { + "epoch": 0.5889696465293293, + "grad_norm": 354.91650390625, + "learning_rate": 4.423233782958459e-06, + "loss": 20.1236, + "step": 291560 + }, + { + "epoch": 0.5889898471620131, + "grad_norm": 685.5487060546875, + "learning_rate": 4.42288704869121e-06, + "loss": 22.85, + "step": 291570 + }, + { + "epoch": 0.5890100477946969, + "grad_norm": 615.638671875, + "learning_rate": 4.422540317236739e-06, + "loss": 32.7813, + "step": 291580 + }, + { + "epoch": 0.5890302484273807, + "grad_norm": 283.7459411621094, + "learning_rate": 4.422193588596736e-06, + "loss": 16.7828, + "step": 291590 + }, + { + "epoch": 0.5890504490600645, + "grad_norm": 325.37493896484375, + "learning_rate": 4.4218468627728935e-06, + "loss": 19.9842, + "step": 291600 + }, + { + "epoch": 0.5890706496927484, + "grad_norm": 72.0322265625, + "learning_rate": 4.421500139766897e-06, + "loss": 14.3139, + "step": 291610 + }, + { + "epoch": 0.5890908503254322, + "grad_norm": 136.69898986816406, + "learning_rate": 4.4211534195804385e-06, + "loss": 13.7135, + "step": 291620 + }, + { + "epoch": 0.589111050958116, + "grad_norm": 398.2770080566406, + "learning_rate": 4.420806702215211e-06, + "loss": 16.5902, + "step": 291630 + }, + { + "epoch": 0.5891312515907998, + "grad_norm": 61.03514862060547, + "learning_rate": 4.4204599876728975e-06, + "loss": 38.1336, + "step": 291640 + }, + { + "epoch": 0.5891514522234836, + "grad_norm": 430.0029296875, + "learning_rate": 4.420113275955193e-06, + "loss": 23.032, + "step": 291650 + }, + { + "epoch": 0.5891716528561675, + "grad_norm": 668.921630859375, + "learning_rate": 4.419766567063788e-06, + "loss": 22.493, + "step": 291660 + }, + { + "epoch": 0.5891918534888513, + "grad_norm": 414.0310974121094, + "learning_rate": 4.419419861000369e-06, + "loss": 21.329, + "step": 291670 + }, + { + "epoch": 0.5892120541215351, + "grad_norm": 328.0341491699219, + "learning_rate": 4.419073157766626e-06, + "loss": 22.7492, + "step": 291680 + }, + { + "epoch": 0.5892322547542189, + "grad_norm": 271.34423828125, + "learning_rate": 4.418726457364252e-06, + "loss": 15.9894, + "step": 291690 + }, + { + "epoch": 0.5892524553869027, + "grad_norm": 133.44625854492188, + "learning_rate": 4.418379759794934e-06, + "loss": 20.1859, + "step": 291700 + }, + { + "epoch": 0.5892726560195866, + "grad_norm": 729.990966796875, + "learning_rate": 4.418033065060361e-06, + "loss": 29.3806, + "step": 291710 + }, + { + "epoch": 0.5892928566522704, + "grad_norm": 140.03456115722656, + "learning_rate": 4.417686373162225e-06, + "loss": 12.5217, + "step": 291720 + }, + { + "epoch": 0.5893130572849542, + "grad_norm": 374.0782775878906, + "learning_rate": 4.417339684102217e-06, + "loss": 26.505, + "step": 291730 + }, + { + "epoch": 0.589333257917638, + "grad_norm": 452.8331604003906, + "learning_rate": 4.416992997882023e-06, + "loss": 16.9886, + "step": 291740 + }, + { + "epoch": 0.5893534585503218, + "grad_norm": 295.45562744140625, + "learning_rate": 4.416646314503334e-06, + "loss": 22.808, + "step": 291750 + }, + { + "epoch": 0.5893736591830057, + "grad_norm": 583.7633056640625, + "learning_rate": 4.416299633967842e-06, + "loss": 29.0666, + "step": 291760 + }, + { + "epoch": 0.5893938598156895, + "grad_norm": 690.9365234375, + "learning_rate": 4.415952956277234e-06, + "loss": 17.5846, + "step": 291770 + }, + { + "epoch": 0.5894140604483732, + "grad_norm": 227.87814331054688, + "learning_rate": 4.415606281433199e-06, + "loss": 32.6798, + "step": 291780 + }, + { + "epoch": 0.589434261081057, + "grad_norm": 232.24734497070312, + "learning_rate": 4.415259609437431e-06, + "loss": 14.3116, + "step": 291790 + }, + { + "epoch": 0.5894544617137408, + "grad_norm": 382.23486328125, + "learning_rate": 4.414912940291614e-06, + "loss": 22.9699, + "step": 291800 + }, + { + "epoch": 0.5894746623464246, + "grad_norm": 479.1570129394531, + "learning_rate": 4.414566273997441e-06, + "loss": 22.8322, + "step": 291810 + }, + { + "epoch": 0.5894948629791085, + "grad_norm": 260.9040832519531, + "learning_rate": 4.414219610556601e-06, + "loss": 16.0159, + "step": 291820 + }, + { + "epoch": 0.5895150636117923, + "grad_norm": 351.2622985839844, + "learning_rate": 4.413872949970785e-06, + "loss": 13.9373, + "step": 291830 + }, + { + "epoch": 0.5895352642444761, + "grad_norm": 338.8872985839844, + "learning_rate": 4.413526292241679e-06, + "loss": 16.4922, + "step": 291840 + }, + { + "epoch": 0.5895554648771599, + "grad_norm": 218.44630432128906, + "learning_rate": 4.413179637370977e-06, + "loss": 16.8497, + "step": 291850 + }, + { + "epoch": 0.5895756655098437, + "grad_norm": 327.5431823730469, + "learning_rate": 4.412832985360363e-06, + "loss": 26.4961, + "step": 291860 + }, + { + "epoch": 0.5895958661425276, + "grad_norm": 166.36444091796875, + "learning_rate": 4.412486336211531e-06, + "loss": 8.5061, + "step": 291870 + }, + { + "epoch": 0.5896160667752114, + "grad_norm": 440.8267517089844, + "learning_rate": 4.412139689926171e-06, + "loss": 28.1508, + "step": 291880 + }, + { + "epoch": 0.5896362674078952, + "grad_norm": 405.8255310058594, + "learning_rate": 4.41179304650597e-06, + "loss": 8.3289, + "step": 291890 + }, + { + "epoch": 0.589656468040579, + "grad_norm": 469.4025573730469, + "learning_rate": 4.4114464059526185e-06, + "loss": 13.9428, + "step": 291900 + }, + { + "epoch": 0.5896766686732628, + "grad_norm": 341.0938415527344, + "learning_rate": 4.4110997682678056e-06, + "loss": 11.3571, + "step": 291910 + }, + { + "epoch": 0.5896968693059467, + "grad_norm": 256.1963806152344, + "learning_rate": 4.410753133453222e-06, + "loss": 8.6076, + "step": 291920 + }, + { + "epoch": 0.5897170699386305, + "grad_norm": 234.28823852539062, + "learning_rate": 4.410406501510554e-06, + "loss": 34.8677, + "step": 291930 + }, + { + "epoch": 0.5897372705713143, + "grad_norm": 345.32879638671875, + "learning_rate": 4.410059872441494e-06, + "loss": 22.7719, + "step": 291940 + }, + { + "epoch": 0.5897574712039981, + "grad_norm": 347.39068603515625, + "learning_rate": 4.409713246247732e-06, + "loss": 20.8243, + "step": 291950 + }, + { + "epoch": 0.589777671836682, + "grad_norm": 381.36431884765625, + "learning_rate": 4.409366622930955e-06, + "loss": 18.0616, + "step": 291960 + }, + { + "epoch": 0.5897978724693658, + "grad_norm": 341.75372314453125, + "learning_rate": 4.409020002492854e-06, + "loss": 14.8532, + "step": 291970 + }, + { + "epoch": 0.5898180731020496, + "grad_norm": 1875.607177734375, + "learning_rate": 4.4086733849351174e-06, + "loss": 29.2385, + "step": 291980 + }, + { + "epoch": 0.5898382737347334, + "grad_norm": 302.335693359375, + "learning_rate": 4.408326770259435e-06, + "loss": 12.9311, + "step": 291990 + }, + { + "epoch": 0.5898584743674172, + "grad_norm": 287.7569274902344, + "learning_rate": 4.4079801584674955e-06, + "loss": 20.1383, + "step": 292000 + }, + { + "epoch": 0.589878675000101, + "grad_norm": 484.9185791015625, + "learning_rate": 4.407633549560991e-06, + "loss": 18.7982, + "step": 292010 + }, + { + "epoch": 0.5898988756327849, + "grad_norm": 401.7894287109375, + "learning_rate": 4.407286943541606e-06, + "loss": 14.3355, + "step": 292020 + }, + { + "epoch": 0.5899190762654687, + "grad_norm": 468.5331115722656, + "learning_rate": 4.406940340411034e-06, + "loss": 25.4369, + "step": 292030 + }, + { + "epoch": 0.5899392768981524, + "grad_norm": 231.7653350830078, + "learning_rate": 4.406593740170963e-06, + "loss": 12.8763, + "step": 292040 + }, + { + "epoch": 0.5899594775308362, + "grad_norm": 547.8223876953125, + "learning_rate": 4.406247142823082e-06, + "loss": 14.7608, + "step": 292050 + }, + { + "epoch": 0.58997967816352, + "grad_norm": 387.529541015625, + "learning_rate": 4.4059005483690805e-06, + "loss": 23.933, + "step": 292060 + }, + { + "epoch": 0.5899998787962039, + "grad_norm": 193.5162353515625, + "learning_rate": 4.405553956810646e-06, + "loss": 21.2242, + "step": 292070 + }, + { + "epoch": 0.5900200794288877, + "grad_norm": 779.8995361328125, + "learning_rate": 4.405207368149472e-06, + "loss": 23.684, + "step": 292080 + }, + { + "epoch": 0.5900402800615715, + "grad_norm": 418.9433288574219, + "learning_rate": 4.404860782387243e-06, + "loss": 32.1124, + "step": 292090 + }, + { + "epoch": 0.5900604806942553, + "grad_norm": 167.64556884765625, + "learning_rate": 4.404514199525651e-06, + "loss": 18.7874, + "step": 292100 + }, + { + "epoch": 0.5900806813269391, + "grad_norm": 239.8409423828125, + "learning_rate": 4.404167619566386e-06, + "loss": 18.2346, + "step": 292110 + }, + { + "epoch": 0.590100881959623, + "grad_norm": 13.00391674041748, + "learning_rate": 4.403821042511135e-06, + "loss": 16.5054, + "step": 292120 + }, + { + "epoch": 0.5901210825923068, + "grad_norm": 2.531355142593384, + "learning_rate": 4.403474468361587e-06, + "loss": 17.1285, + "step": 292130 + }, + { + "epoch": 0.5901412832249906, + "grad_norm": 568.1395874023438, + "learning_rate": 4.4031278971194335e-06, + "loss": 19.5606, + "step": 292140 + }, + { + "epoch": 0.5901614838576744, + "grad_norm": 248.3144989013672, + "learning_rate": 4.402781328786361e-06, + "loss": 10.4688, + "step": 292150 + }, + { + "epoch": 0.5901816844903582, + "grad_norm": 65.69276428222656, + "learning_rate": 4.402434763364059e-06, + "loss": 22.1509, + "step": 292160 + }, + { + "epoch": 0.5902018851230421, + "grad_norm": 328.47430419921875, + "learning_rate": 4.4020882008542185e-06, + "loss": 16.2797, + "step": 292170 + }, + { + "epoch": 0.5902220857557259, + "grad_norm": 91.8713150024414, + "learning_rate": 4.401741641258529e-06, + "loss": 28.9528, + "step": 292180 + }, + { + "epoch": 0.5902422863884097, + "grad_norm": 318.346435546875, + "learning_rate": 4.401395084578677e-06, + "loss": 22.6256, + "step": 292190 + }, + { + "epoch": 0.5902624870210935, + "grad_norm": 142.5501251220703, + "learning_rate": 4.401048530816353e-06, + "loss": 14.0909, + "step": 292200 + }, + { + "epoch": 0.5902826876537773, + "grad_norm": 382.1142272949219, + "learning_rate": 4.4007019799732465e-06, + "loss": 20.4528, + "step": 292210 + }, + { + "epoch": 0.5903028882864612, + "grad_norm": 438.4935302734375, + "learning_rate": 4.400355432051044e-06, + "loss": 24.4151, + "step": 292220 + }, + { + "epoch": 0.590323088919145, + "grad_norm": 346.56231689453125, + "learning_rate": 4.400008887051437e-06, + "loss": 32.0562, + "step": 292230 + }, + { + "epoch": 0.5903432895518288, + "grad_norm": 263.6671447753906, + "learning_rate": 4.399662344976116e-06, + "loss": 12.4963, + "step": 292240 + }, + { + "epoch": 0.5903634901845126, + "grad_norm": 294.6814880371094, + "learning_rate": 4.399315805826765e-06, + "loss": 15.2193, + "step": 292250 + }, + { + "epoch": 0.5903836908171964, + "grad_norm": 688.6610717773438, + "learning_rate": 4.398969269605077e-06, + "loss": 15.6875, + "step": 292260 + }, + { + "epoch": 0.5904038914498803, + "grad_norm": 625.572021484375, + "learning_rate": 4.398622736312741e-06, + "loss": 37.7061, + "step": 292270 + }, + { + "epoch": 0.5904240920825641, + "grad_norm": 464.1097412109375, + "learning_rate": 4.398276205951443e-06, + "loss": 26.6318, + "step": 292280 + }, + { + "epoch": 0.5904442927152478, + "grad_norm": 963.5963134765625, + "learning_rate": 4.3979296785228744e-06, + "loss": 15.4111, + "step": 292290 + }, + { + "epoch": 0.5904644933479316, + "grad_norm": 259.0113830566406, + "learning_rate": 4.397583154028725e-06, + "loss": 15.5287, + "step": 292300 + }, + { + "epoch": 0.5904846939806154, + "grad_norm": 517.06005859375, + "learning_rate": 4.397236632470681e-06, + "loss": 22.2305, + "step": 292310 + }, + { + "epoch": 0.5905048946132992, + "grad_norm": 461.43206787109375, + "learning_rate": 4.3968901138504315e-06, + "loss": 14.02, + "step": 292320 + }, + { + "epoch": 0.5905250952459831, + "grad_norm": 482.19598388671875, + "learning_rate": 4.396543598169667e-06, + "loss": 12.3937, + "step": 292330 + }, + { + "epoch": 0.5905452958786669, + "grad_norm": 509.9044189453125, + "learning_rate": 4.3961970854300774e-06, + "loss": 22.6853, + "step": 292340 + }, + { + "epoch": 0.5905654965113507, + "grad_norm": 570.2136840820312, + "learning_rate": 4.395850575633348e-06, + "loss": 19.9869, + "step": 292350 + }, + { + "epoch": 0.5905856971440345, + "grad_norm": 501.50726318359375, + "learning_rate": 4.395504068781171e-06, + "loss": 26.7932, + "step": 292360 + }, + { + "epoch": 0.5906058977767183, + "grad_norm": 539.0986328125, + "learning_rate": 4.3951575648752346e-06, + "loss": 28.5295, + "step": 292370 + }, + { + "epoch": 0.5906260984094022, + "grad_norm": 197.9573211669922, + "learning_rate": 4.394811063917225e-06, + "loss": 24.7447, + "step": 292380 + }, + { + "epoch": 0.590646299042086, + "grad_norm": 216.0475616455078, + "learning_rate": 4.394464565908832e-06, + "loss": 15.1393, + "step": 292390 + }, + { + "epoch": 0.5906664996747698, + "grad_norm": 563.8461303710938, + "learning_rate": 4.394118070851749e-06, + "loss": 34.5846, + "step": 292400 + }, + { + "epoch": 0.5906867003074536, + "grad_norm": 1089.698486328125, + "learning_rate": 4.3937715787476576e-06, + "loss": 14.3999, + "step": 292410 + }, + { + "epoch": 0.5907069009401374, + "grad_norm": 134.1151580810547, + "learning_rate": 4.393425089598251e-06, + "loss": 17.3636, + "step": 292420 + }, + { + "epoch": 0.5907271015728213, + "grad_norm": 266.62481689453125, + "learning_rate": 4.393078603405218e-06, + "loss": 25.2376, + "step": 292430 + }, + { + "epoch": 0.5907473022055051, + "grad_norm": 279.544189453125, + "learning_rate": 4.392732120170245e-06, + "loss": 15.8477, + "step": 292440 + }, + { + "epoch": 0.5907675028381889, + "grad_norm": 489.8971252441406, + "learning_rate": 4.392385639895022e-06, + "loss": 18.6997, + "step": 292450 + }, + { + "epoch": 0.5907877034708727, + "grad_norm": 110.44843292236328, + "learning_rate": 4.392039162581239e-06, + "loss": 10.3079, + "step": 292460 + }, + { + "epoch": 0.5908079041035565, + "grad_norm": 130.7806396484375, + "learning_rate": 4.391692688230583e-06, + "loss": 17.9064, + "step": 292470 + }, + { + "epoch": 0.5908281047362404, + "grad_norm": 449.1365966796875, + "learning_rate": 4.391346216844741e-06, + "loss": 18.5721, + "step": 292480 + }, + { + "epoch": 0.5908483053689242, + "grad_norm": 174.7170867919922, + "learning_rate": 4.390999748425405e-06, + "loss": 17.5537, + "step": 292490 + }, + { + "epoch": 0.590868506001608, + "grad_norm": 488.6687927246094, + "learning_rate": 4.390653282974264e-06, + "loss": 15.9013, + "step": 292500 + }, + { + "epoch": 0.5908887066342918, + "grad_norm": 643.6644287109375, + "learning_rate": 4.390306820493003e-06, + "loss": 34.1992, + "step": 292510 + }, + { + "epoch": 0.5909089072669756, + "grad_norm": 484.48309326171875, + "learning_rate": 4.389960360983313e-06, + "loss": 26.0471, + "step": 292520 + }, + { + "epoch": 0.5909291078996595, + "grad_norm": 364.2028503417969, + "learning_rate": 4.3896139044468835e-06, + "loss": 10.2862, + "step": 292530 + }, + { + "epoch": 0.5909493085323433, + "grad_norm": 269.10894775390625, + "learning_rate": 4.389267450885399e-06, + "loss": 9.8183, + "step": 292540 + }, + { + "epoch": 0.590969509165027, + "grad_norm": 162.6874542236328, + "learning_rate": 4.388921000300553e-06, + "loss": 18.3012, + "step": 292550 + }, + { + "epoch": 0.5909897097977108, + "grad_norm": 451.3988342285156, + "learning_rate": 4.388574552694032e-06, + "loss": 23.3555, + "step": 292560 + }, + { + "epoch": 0.5910099104303946, + "grad_norm": 204.1512451171875, + "learning_rate": 4.3882281080675234e-06, + "loss": 12.2363, + "step": 292570 + }, + { + "epoch": 0.5910301110630785, + "grad_norm": 255.6993865966797, + "learning_rate": 4.387881666422718e-06, + "loss": 13.7673, + "step": 292580 + }, + { + "epoch": 0.5910503116957623, + "grad_norm": 717.266357421875, + "learning_rate": 4.387535227761303e-06, + "loss": 23.6473, + "step": 292590 + }, + { + "epoch": 0.5910705123284461, + "grad_norm": 508.82684326171875, + "learning_rate": 4.387188792084967e-06, + "loss": 27.529, + "step": 292600 + }, + { + "epoch": 0.5910907129611299, + "grad_norm": 211.74488830566406, + "learning_rate": 4.386842359395396e-06, + "loss": 15.0736, + "step": 292610 + }, + { + "epoch": 0.5911109135938137, + "grad_norm": 214.7193603515625, + "learning_rate": 4.3864959296942835e-06, + "loss": 27.5348, + "step": 292620 + }, + { + "epoch": 0.5911311142264976, + "grad_norm": 900.5426025390625, + "learning_rate": 4.386149502983316e-06, + "loss": 29.8566, + "step": 292630 + }, + { + "epoch": 0.5911513148591814, + "grad_norm": 853.7367553710938, + "learning_rate": 4.38580307926418e-06, + "loss": 34.2588, + "step": 292640 + }, + { + "epoch": 0.5911715154918652, + "grad_norm": 826.5645751953125, + "learning_rate": 4.385456658538565e-06, + "loss": 28.2878, + "step": 292650 + }, + { + "epoch": 0.591191716124549, + "grad_norm": 290.5691833496094, + "learning_rate": 4.385110240808161e-06, + "loss": 19.5628, + "step": 292660 + }, + { + "epoch": 0.5912119167572328, + "grad_norm": 705.2667236328125, + "learning_rate": 4.384763826074655e-06, + "loss": 16.3966, + "step": 292670 + }, + { + "epoch": 0.5912321173899167, + "grad_norm": 506.11907958984375, + "learning_rate": 4.384417414339734e-06, + "loss": 13.8883, + "step": 292680 + }, + { + "epoch": 0.5912523180226005, + "grad_norm": 436.683837890625, + "learning_rate": 4.38407100560509e-06, + "loss": 15.7811, + "step": 292690 + }, + { + "epoch": 0.5912725186552843, + "grad_norm": 288.6622009277344, + "learning_rate": 4.383724599872407e-06, + "loss": 24.5138, + "step": 292700 + }, + { + "epoch": 0.5912927192879681, + "grad_norm": 307.366455078125, + "learning_rate": 4.383378197143376e-06, + "loss": 28.7145, + "step": 292710 + }, + { + "epoch": 0.5913129199206519, + "grad_norm": 119.44100189208984, + "learning_rate": 4.3830317974196864e-06, + "loss": 12.5694, + "step": 292720 + }, + { + "epoch": 0.5913331205533358, + "grad_norm": 551.1475219726562, + "learning_rate": 4.382685400703024e-06, + "loss": 16.1562, + "step": 292730 + }, + { + "epoch": 0.5913533211860196, + "grad_norm": 293.0324401855469, + "learning_rate": 4.382339006995078e-06, + "loss": 25.7829, + "step": 292740 + }, + { + "epoch": 0.5913735218187034, + "grad_norm": 155.7109832763672, + "learning_rate": 4.381992616297538e-06, + "loss": 24.7547, + "step": 292750 + }, + { + "epoch": 0.5913937224513872, + "grad_norm": 279.54583740234375, + "learning_rate": 4.38164622861209e-06, + "loss": 20.3726, + "step": 292760 + }, + { + "epoch": 0.591413923084071, + "grad_norm": 325.4188537597656, + "learning_rate": 4.381299843940421e-06, + "loss": 19.2596, + "step": 292770 + }, + { + "epoch": 0.5914341237167549, + "grad_norm": 233.92567443847656, + "learning_rate": 4.3809534622842245e-06, + "loss": 34.5578, + "step": 292780 + }, + { + "epoch": 0.5914543243494387, + "grad_norm": 247.04754638671875, + "learning_rate": 4.380607083645185e-06, + "loss": 19.3653, + "step": 292790 + }, + { + "epoch": 0.5914745249821224, + "grad_norm": 258.3706970214844, + "learning_rate": 4.380260708024991e-06, + "loss": 18.3587, + "step": 292800 + }, + { + "epoch": 0.5914947256148062, + "grad_norm": 388.7332458496094, + "learning_rate": 4.379914335425332e-06, + "loss": 17.6423, + "step": 292810 + }, + { + "epoch": 0.59151492624749, + "grad_norm": 640.5714721679688, + "learning_rate": 4.379567965847896e-06, + "loss": 15.7696, + "step": 292820 + }, + { + "epoch": 0.5915351268801738, + "grad_norm": 389.6690979003906, + "learning_rate": 4.379221599294369e-06, + "loss": 28.9675, + "step": 292830 + }, + { + "epoch": 0.5915553275128577, + "grad_norm": 354.93170166015625, + "learning_rate": 4.37887523576644e-06, + "loss": 21.2728, + "step": 292840 + }, + { + "epoch": 0.5915755281455415, + "grad_norm": 502.6838684082031, + "learning_rate": 4.378528875265801e-06, + "loss": 23.8819, + "step": 292850 + }, + { + "epoch": 0.5915957287782253, + "grad_norm": 413.7265930175781, + "learning_rate": 4.378182517794133e-06, + "loss": 12.2479, + "step": 292860 + }, + { + "epoch": 0.5916159294109091, + "grad_norm": 2.227445125579834, + "learning_rate": 4.3778361633531296e-06, + "loss": 15.8987, + "step": 292870 + }, + { + "epoch": 0.591636130043593, + "grad_norm": 563.5757446289062, + "learning_rate": 4.377489811944478e-06, + "loss": 27.6906, + "step": 292880 + }, + { + "epoch": 0.5916563306762768, + "grad_norm": 156.43028259277344, + "learning_rate": 4.377143463569865e-06, + "loss": 24.4495, + "step": 292890 + }, + { + "epoch": 0.5916765313089606, + "grad_norm": 467.7649230957031, + "learning_rate": 4.376797118230978e-06, + "loss": 17.7552, + "step": 292900 + }, + { + "epoch": 0.5916967319416444, + "grad_norm": 18.85779571533203, + "learning_rate": 4.37645077592951e-06, + "loss": 13.7122, + "step": 292910 + }, + { + "epoch": 0.5917169325743282, + "grad_norm": 344.03594970703125, + "learning_rate": 4.376104436667142e-06, + "loss": 17.1261, + "step": 292920 + }, + { + "epoch": 0.591737133207012, + "grad_norm": 419.68511962890625, + "learning_rate": 4.375758100445564e-06, + "loss": 23.6224, + "step": 292930 + }, + { + "epoch": 0.5917573338396959, + "grad_norm": 208.86512756347656, + "learning_rate": 4.375411767266468e-06, + "loss": 10.4842, + "step": 292940 + }, + { + "epoch": 0.5917775344723797, + "grad_norm": 637.0712890625, + "learning_rate": 4.375065437131539e-06, + "loss": 37.4901, + "step": 292950 + }, + { + "epoch": 0.5917977351050635, + "grad_norm": 584.8602905273438, + "learning_rate": 4.374719110042465e-06, + "loss": 24.5428, + "step": 292960 + }, + { + "epoch": 0.5918179357377473, + "grad_norm": 291.6265563964844, + "learning_rate": 4.374372786000934e-06, + "loss": 32.9707, + "step": 292970 + }, + { + "epoch": 0.5918381363704311, + "grad_norm": 3.442250967025757, + "learning_rate": 4.374026465008634e-06, + "loss": 47.2955, + "step": 292980 + }, + { + "epoch": 0.591858337003115, + "grad_norm": 542.1782836914062, + "learning_rate": 4.373680147067254e-06, + "loss": 22.1078, + "step": 292990 + }, + { + "epoch": 0.5918785376357988, + "grad_norm": 143.48858642578125, + "learning_rate": 4.373333832178478e-06, + "loss": 20.4554, + "step": 293000 + }, + { + "epoch": 0.5918987382684826, + "grad_norm": 401.704345703125, + "learning_rate": 4.372987520344002e-06, + "loss": 11.1024, + "step": 293010 + }, + { + "epoch": 0.5919189389011664, + "grad_norm": 41.04372024536133, + "learning_rate": 4.3726412115655046e-06, + "loss": 18.1149, + "step": 293020 + }, + { + "epoch": 0.5919391395338502, + "grad_norm": 371.2960510253906, + "learning_rate": 4.372294905844679e-06, + "loss": 27.4418, + "step": 293030 + }, + { + "epoch": 0.5919593401665341, + "grad_norm": 292.4776916503906, + "learning_rate": 4.371948603183213e-06, + "loss": 23.4645, + "step": 293040 + }, + { + "epoch": 0.5919795407992179, + "grad_norm": 787.6366577148438, + "learning_rate": 4.371602303582792e-06, + "loss": 18.1713, + "step": 293050 + }, + { + "epoch": 0.5919997414319016, + "grad_norm": 218.78627014160156, + "learning_rate": 4.3712560070451055e-06, + "loss": 10.6186, + "step": 293060 + }, + { + "epoch": 0.5920199420645854, + "grad_norm": 202.3785858154297, + "learning_rate": 4.3709097135718395e-06, + "loss": 16.0047, + "step": 293070 + }, + { + "epoch": 0.5920401426972692, + "grad_norm": 400.23284912109375, + "learning_rate": 4.370563423164687e-06, + "loss": 16.0984, + "step": 293080 + }, + { + "epoch": 0.592060343329953, + "grad_norm": 400.9034729003906, + "learning_rate": 4.370217135825329e-06, + "loss": 26.1754, + "step": 293090 + }, + { + "epoch": 0.5920805439626369, + "grad_norm": 914.625, + "learning_rate": 4.369870851555457e-06, + "loss": 28.7863, + "step": 293100 + }, + { + "epoch": 0.5921007445953207, + "grad_norm": 603.1434936523438, + "learning_rate": 4.369524570356759e-06, + "loss": 25.2377, + "step": 293110 + }, + { + "epoch": 0.5921209452280045, + "grad_norm": 254.59913635253906, + "learning_rate": 4.369178292230921e-06, + "loss": 11.5563, + "step": 293120 + }, + { + "epoch": 0.5921411458606883, + "grad_norm": 118.04940795898438, + "learning_rate": 4.368832017179631e-06, + "loss": 24.2292, + "step": 293130 + }, + { + "epoch": 0.5921613464933722, + "grad_norm": 122.41696166992188, + "learning_rate": 4.368485745204579e-06, + "loss": 15.2559, + "step": 293140 + }, + { + "epoch": 0.592181547126056, + "grad_norm": 782.8267211914062, + "learning_rate": 4.3681394763074495e-06, + "loss": 33.4571, + "step": 293150 + }, + { + "epoch": 0.5922017477587398, + "grad_norm": 564.6712036132812, + "learning_rate": 4.36779321048993e-06, + "loss": 16.5709, + "step": 293160 + }, + { + "epoch": 0.5922219483914236, + "grad_norm": 537.654541015625, + "learning_rate": 4.367446947753712e-06, + "loss": 16.9303, + "step": 293170 + }, + { + "epoch": 0.5922421490241074, + "grad_norm": 601.1361694335938, + "learning_rate": 4.36710068810048e-06, + "loss": 22.0937, + "step": 293180 + }, + { + "epoch": 0.5922623496567913, + "grad_norm": 832.7262573242188, + "learning_rate": 4.366754431531923e-06, + "loss": 13.957, + "step": 293190 + }, + { + "epoch": 0.5922825502894751, + "grad_norm": 318.0546875, + "learning_rate": 4.366408178049728e-06, + "loss": 13.1061, + "step": 293200 + }, + { + "epoch": 0.5923027509221589, + "grad_norm": 454.3702087402344, + "learning_rate": 4.366061927655582e-06, + "loss": 21.6157, + "step": 293210 + }, + { + "epoch": 0.5923229515548427, + "grad_norm": 170.03672790527344, + "learning_rate": 4.3657156803511745e-06, + "loss": 17.3532, + "step": 293220 + }, + { + "epoch": 0.5923431521875265, + "grad_norm": 299.2825927734375, + "learning_rate": 4.3653694361381894e-06, + "loss": 12.3919, + "step": 293230 + }, + { + "epoch": 0.5923633528202104, + "grad_norm": 180.69033813476562, + "learning_rate": 4.365023195018319e-06, + "loss": 23.5594, + "step": 293240 + }, + { + "epoch": 0.5923835534528942, + "grad_norm": 471.05535888671875, + "learning_rate": 4.3646769569932475e-06, + "loss": 27.935, + "step": 293250 + }, + { + "epoch": 0.592403754085578, + "grad_norm": 272.734130859375, + "learning_rate": 4.364330722064664e-06, + "loss": 17.1042, + "step": 293260 + }, + { + "epoch": 0.5924239547182618, + "grad_norm": 185.55250549316406, + "learning_rate": 4.363984490234257e-06, + "loss": 19.1317, + "step": 293270 + }, + { + "epoch": 0.5924441553509456, + "grad_norm": 324.3248291015625, + "learning_rate": 4.36363826150371e-06, + "loss": 12.6496, + "step": 293280 + }, + { + "epoch": 0.5924643559836295, + "grad_norm": 113.91188049316406, + "learning_rate": 4.3632920358747125e-06, + "loss": 12.1835, + "step": 293290 + }, + { + "epoch": 0.5924845566163133, + "grad_norm": 245.42926025390625, + "learning_rate": 4.362945813348956e-06, + "loss": 22.5566, + "step": 293300 + }, + { + "epoch": 0.5925047572489971, + "grad_norm": 617.8198852539062, + "learning_rate": 4.36259959392812e-06, + "loss": 28.8473, + "step": 293310 + }, + { + "epoch": 0.5925249578816808, + "grad_norm": 411.3521423339844, + "learning_rate": 4.3622533776138985e-06, + "loss": 22.5069, + "step": 293320 + }, + { + "epoch": 0.5925451585143646, + "grad_norm": 479.8760070800781, + "learning_rate": 4.361907164407977e-06, + "loss": 27.9318, + "step": 293330 + }, + { + "epoch": 0.5925653591470484, + "grad_norm": 671.952392578125, + "learning_rate": 4.361560954312042e-06, + "loss": 12.5237, + "step": 293340 + }, + { + "epoch": 0.5925855597797323, + "grad_norm": 266.8735656738281, + "learning_rate": 4.361214747327781e-06, + "loss": 10.6885, + "step": 293350 + }, + { + "epoch": 0.5926057604124161, + "grad_norm": 1242.9803466796875, + "learning_rate": 4.360868543456883e-06, + "loss": 29.3166, + "step": 293360 + }, + { + "epoch": 0.5926259610450999, + "grad_norm": 262.1438903808594, + "learning_rate": 4.360522342701033e-06, + "loss": 22.7987, + "step": 293370 + }, + { + "epoch": 0.5926461616777837, + "grad_norm": 17.517539978027344, + "learning_rate": 4.360176145061919e-06, + "loss": 20.8275, + "step": 293380 + }, + { + "epoch": 0.5926663623104675, + "grad_norm": 324.86761474609375, + "learning_rate": 4.35982995054123e-06, + "loss": 15.8237, + "step": 293390 + }, + { + "epoch": 0.5926865629431514, + "grad_norm": 419.5167236328125, + "learning_rate": 4.359483759140654e-06, + "loss": 29.5502, + "step": 293400 + }, + { + "epoch": 0.5927067635758352, + "grad_norm": 268.809326171875, + "learning_rate": 4.359137570861874e-06, + "loss": 12.0043, + "step": 293410 + }, + { + "epoch": 0.592726964208519, + "grad_norm": 126.4826431274414, + "learning_rate": 4.35879138570658e-06, + "loss": 17.419, + "step": 293420 + }, + { + "epoch": 0.5927471648412028, + "grad_norm": 553.036865234375, + "learning_rate": 4.35844520367646e-06, + "loss": 22.4381, + "step": 293430 + }, + { + "epoch": 0.5927673654738866, + "grad_norm": 197.84173583984375, + "learning_rate": 4.358099024773199e-06, + "loss": 32.0205, + "step": 293440 + }, + { + "epoch": 0.5927875661065705, + "grad_norm": 254.42730712890625, + "learning_rate": 4.357752848998486e-06, + "loss": 17.9257, + "step": 293450 + }, + { + "epoch": 0.5928077667392543, + "grad_norm": 474.2779235839844, + "learning_rate": 4.357406676354009e-06, + "loss": 20.7048, + "step": 293460 + }, + { + "epoch": 0.5928279673719381, + "grad_norm": 229.4510955810547, + "learning_rate": 4.357060506841452e-06, + "loss": 14.3312, + "step": 293470 + }, + { + "epoch": 0.5928481680046219, + "grad_norm": 218.31007385253906, + "learning_rate": 4.356714340462505e-06, + "loss": 14.5799, + "step": 293480 + }, + { + "epoch": 0.5928683686373057, + "grad_norm": 120.61658477783203, + "learning_rate": 4.356368177218855e-06, + "loss": 20.3388, + "step": 293490 + }, + { + "epoch": 0.5928885692699896, + "grad_norm": 566.609619140625, + "learning_rate": 4.356022017112187e-06, + "loss": 15.9435, + "step": 293500 + }, + { + "epoch": 0.5929087699026734, + "grad_norm": 252.36489868164062, + "learning_rate": 4.35567586014419e-06, + "loss": 13.5621, + "step": 293510 + }, + { + "epoch": 0.5929289705353572, + "grad_norm": 575.0886840820312, + "learning_rate": 4.355329706316552e-06, + "loss": 27.5622, + "step": 293520 + }, + { + "epoch": 0.592949171168041, + "grad_norm": 577.52490234375, + "learning_rate": 4.354983555630957e-06, + "loss": 28.4473, + "step": 293530 + }, + { + "epoch": 0.5929693718007248, + "grad_norm": 69.46257019042969, + "learning_rate": 4.354637408089093e-06, + "loss": 13.2063, + "step": 293540 + }, + { + "epoch": 0.5929895724334087, + "grad_norm": 408.7625732421875, + "learning_rate": 4.35429126369265e-06, + "loss": 18.4322, + "step": 293550 + }, + { + "epoch": 0.5930097730660925, + "grad_norm": 450.35546875, + "learning_rate": 4.353945122443314e-06, + "loss": 22.1054, + "step": 293560 + }, + { + "epoch": 0.5930299736987762, + "grad_norm": 386.18634033203125, + "learning_rate": 4.3535989843427695e-06, + "loss": 16.7074, + "step": 293570 + }, + { + "epoch": 0.59305017433146, + "grad_norm": 566.9544677734375, + "learning_rate": 4.3532528493927055e-06, + "loss": 21.7555, + "step": 293580 + }, + { + "epoch": 0.5930703749641438, + "grad_norm": 207.8059844970703, + "learning_rate": 4.352906717594809e-06, + "loss": 13.8164, + "step": 293590 + }, + { + "epoch": 0.5930905755968277, + "grad_norm": 539.943603515625, + "learning_rate": 4.352560588950766e-06, + "loss": 20.9404, + "step": 293600 + }, + { + "epoch": 0.5931107762295115, + "grad_norm": 396.5697937011719, + "learning_rate": 4.352214463462263e-06, + "loss": 23.2138, + "step": 293610 + }, + { + "epoch": 0.5931309768621953, + "grad_norm": 54.271324157714844, + "learning_rate": 4.351868341130992e-06, + "loss": 8.0321, + "step": 293620 + }, + { + "epoch": 0.5931511774948791, + "grad_norm": 297.89630126953125, + "learning_rate": 4.351522221958633e-06, + "loss": 15.4077, + "step": 293630 + }, + { + "epoch": 0.5931713781275629, + "grad_norm": 708.1715698242188, + "learning_rate": 4.351176105946876e-06, + "loss": 30.5469, + "step": 293640 + }, + { + "epoch": 0.5931915787602468, + "grad_norm": 676.7525634765625, + "learning_rate": 4.350829993097409e-06, + "loss": 27.1389, + "step": 293650 + }, + { + "epoch": 0.5932117793929306, + "grad_norm": 178.55630493164062, + "learning_rate": 4.350483883411918e-06, + "loss": 18.748, + "step": 293660 + }, + { + "epoch": 0.5932319800256144, + "grad_norm": 358.53997802734375, + "learning_rate": 4.350137776892089e-06, + "loss": 15.7355, + "step": 293670 + }, + { + "epoch": 0.5932521806582982, + "grad_norm": 176.16119384765625, + "learning_rate": 4.349791673539609e-06, + "loss": 24.2379, + "step": 293680 + }, + { + "epoch": 0.593272381290982, + "grad_norm": 330.54815673828125, + "learning_rate": 4.349445573356168e-06, + "loss": 5.8712, + "step": 293690 + }, + { + "epoch": 0.5932925819236659, + "grad_norm": 103.69247436523438, + "learning_rate": 4.349099476343448e-06, + "loss": 21.6638, + "step": 293700 + }, + { + "epoch": 0.5933127825563497, + "grad_norm": 0.0001358857552986592, + "learning_rate": 4.3487533825031395e-06, + "loss": 41.3696, + "step": 293710 + }, + { + "epoch": 0.5933329831890335, + "grad_norm": 274.2425842285156, + "learning_rate": 4.348407291836928e-06, + "loss": 22.3109, + "step": 293720 + }, + { + "epoch": 0.5933531838217173, + "grad_norm": 250.2849578857422, + "learning_rate": 4.3480612043465e-06, + "loss": 12.1256, + "step": 293730 + }, + { + "epoch": 0.5933733844544011, + "grad_norm": 43.5379753112793, + "learning_rate": 4.347715120033543e-06, + "loss": 27.7267, + "step": 293740 + }, + { + "epoch": 0.593393585087085, + "grad_norm": 77.8458480834961, + "learning_rate": 4.347369038899744e-06, + "loss": 10.1903, + "step": 293750 + }, + { + "epoch": 0.5934137857197688, + "grad_norm": 312.5565185546875, + "learning_rate": 4.3470229609467875e-06, + "loss": 13.7661, + "step": 293760 + }, + { + "epoch": 0.5934339863524526, + "grad_norm": 589.7279052734375, + "learning_rate": 4.346676886176361e-06, + "loss": 26.557, + "step": 293770 + }, + { + "epoch": 0.5934541869851364, + "grad_norm": 236.9444122314453, + "learning_rate": 4.346330814590156e-06, + "loss": 24.97, + "step": 293780 + }, + { + "epoch": 0.5934743876178202, + "grad_norm": 679.8486328125, + "learning_rate": 4.345984746189852e-06, + "loss": 26.8117, + "step": 293790 + }, + { + "epoch": 0.593494588250504, + "grad_norm": 509.91424560546875, + "learning_rate": 4.34563868097714e-06, + "loss": 21.3296, + "step": 293800 + }, + { + "epoch": 0.5935147888831879, + "grad_norm": 497.14593505859375, + "learning_rate": 4.3452926189537056e-06, + "loss": 22.1593, + "step": 293810 + }, + { + "epoch": 0.5935349895158717, + "grad_norm": 683.8970336914062, + "learning_rate": 4.344946560121236e-06, + "loss": 21.4249, + "step": 293820 + }, + { + "epoch": 0.5935551901485554, + "grad_norm": 367.6731262207031, + "learning_rate": 4.344600504481416e-06, + "loss": 17.2732, + "step": 293830 + }, + { + "epoch": 0.5935753907812392, + "grad_norm": 224.6978302001953, + "learning_rate": 4.344254452035934e-06, + "loss": 20.5409, + "step": 293840 + }, + { + "epoch": 0.593595591413923, + "grad_norm": 494.1619567871094, + "learning_rate": 4.343908402786478e-06, + "loss": 17.6511, + "step": 293850 + }, + { + "epoch": 0.5936157920466069, + "grad_norm": 355.9205627441406, + "learning_rate": 4.343562356734732e-06, + "loss": 11.0737, + "step": 293860 + }, + { + "epoch": 0.5936359926792907, + "grad_norm": 320.394287109375, + "learning_rate": 4.3432163138823826e-06, + "loss": 36.4259, + "step": 293870 + }, + { + "epoch": 0.5936561933119745, + "grad_norm": 359.1979675292969, + "learning_rate": 4.34287027423112e-06, + "loss": 17.2608, + "step": 293880 + }, + { + "epoch": 0.5936763939446583, + "grad_norm": 204.18450927734375, + "learning_rate": 4.342524237782625e-06, + "loss": 10.8793, + "step": 293890 + }, + { + "epoch": 0.5936965945773421, + "grad_norm": 608.7417602539062, + "learning_rate": 4.342178204538588e-06, + "loss": 15.9976, + "step": 293900 + }, + { + "epoch": 0.593716795210026, + "grad_norm": 302.6972351074219, + "learning_rate": 4.341832174500696e-06, + "loss": 15.7724, + "step": 293910 + }, + { + "epoch": 0.5937369958427098, + "grad_norm": 715.2201538085938, + "learning_rate": 4.341486147670631e-06, + "loss": 24.8976, + "step": 293920 + }, + { + "epoch": 0.5937571964753936, + "grad_norm": 836.394775390625, + "learning_rate": 4.341140124050085e-06, + "loss": 15.729, + "step": 293930 + }, + { + "epoch": 0.5937773971080774, + "grad_norm": 155.0250701904297, + "learning_rate": 4.340794103640743e-06, + "loss": 22.8357, + "step": 293940 + }, + { + "epoch": 0.5937975977407612, + "grad_norm": 191.35006713867188, + "learning_rate": 4.340448086444288e-06, + "loss": 13.4048, + "step": 293950 + }, + { + "epoch": 0.5938177983734451, + "grad_norm": 397.1959533691406, + "learning_rate": 4.340102072462411e-06, + "loss": 39.0802, + "step": 293960 + }, + { + "epoch": 0.5938379990061289, + "grad_norm": 601.370361328125, + "learning_rate": 4.339756061696796e-06, + "loss": 39.2011, + "step": 293970 + }, + { + "epoch": 0.5938581996388127, + "grad_norm": 276.6083984375, + "learning_rate": 4.33941005414913e-06, + "loss": 19.5583, + "step": 293980 + }, + { + "epoch": 0.5938784002714965, + "grad_norm": 400.5001525878906, + "learning_rate": 4.339064049821098e-06, + "loss": 15.7781, + "step": 293990 + }, + { + "epoch": 0.5938986009041803, + "grad_norm": 632.0048828125, + "learning_rate": 4.3387180487143875e-06, + "loss": 19.4148, + "step": 294000 + }, + { + "epoch": 0.5939188015368642, + "grad_norm": 75.03510284423828, + "learning_rate": 4.338372050830687e-06, + "loss": 12.7211, + "step": 294010 + }, + { + "epoch": 0.593939002169548, + "grad_norm": 412.6486511230469, + "learning_rate": 4.3380260561716795e-06, + "loss": 30.6878, + "step": 294020 + }, + { + "epoch": 0.5939592028022318, + "grad_norm": 323.1822204589844, + "learning_rate": 4.337680064739053e-06, + "loss": 13.3549, + "step": 294030 + }, + { + "epoch": 0.5939794034349156, + "grad_norm": 25.212068557739258, + "learning_rate": 4.337334076534495e-06, + "loss": 32.215, + "step": 294040 + }, + { + "epoch": 0.5939996040675994, + "grad_norm": 437.5789489746094, + "learning_rate": 4.336988091559688e-06, + "loss": 18.3768, + "step": 294050 + }, + { + "epoch": 0.5940198047002833, + "grad_norm": 297.6226806640625, + "learning_rate": 4.3366421098163215e-06, + "loss": 22.5547, + "step": 294060 + }, + { + "epoch": 0.5940400053329671, + "grad_norm": 350.1910705566406, + "learning_rate": 4.336296131306083e-06, + "loss": 14.6618, + "step": 294070 + }, + { + "epoch": 0.5940602059656508, + "grad_norm": 228.41531372070312, + "learning_rate": 4.335950156030653e-06, + "loss": 20.6949, + "step": 294080 + }, + { + "epoch": 0.5940804065983346, + "grad_norm": 513.9437255859375, + "learning_rate": 4.335604183991723e-06, + "loss": 13.8732, + "step": 294090 + }, + { + "epoch": 0.5941006072310184, + "grad_norm": 364.6128234863281, + "learning_rate": 4.335258215190979e-06, + "loss": 28.7425, + "step": 294100 + }, + { + "epoch": 0.5941208078637022, + "grad_norm": 495.7064514160156, + "learning_rate": 4.334912249630104e-06, + "loss": 18.0418, + "step": 294110 + }, + { + "epoch": 0.5941410084963861, + "grad_norm": 301.4275817871094, + "learning_rate": 4.334566287310787e-06, + "loss": 16.9719, + "step": 294120 + }, + { + "epoch": 0.5941612091290699, + "grad_norm": 692.076416015625, + "learning_rate": 4.334220328234711e-06, + "loss": 20.4319, + "step": 294130 + }, + { + "epoch": 0.5941814097617537, + "grad_norm": 390.8522644042969, + "learning_rate": 4.333874372403569e-06, + "loss": 17.7004, + "step": 294140 + }, + { + "epoch": 0.5942016103944375, + "grad_norm": 545.078369140625, + "learning_rate": 4.3335284198190385e-06, + "loss": 20.4591, + "step": 294150 + }, + { + "epoch": 0.5942218110271213, + "grad_norm": 110.72166442871094, + "learning_rate": 4.33318247048281e-06, + "loss": 18.9082, + "step": 294160 + }, + { + "epoch": 0.5942420116598052, + "grad_norm": 375.0422668457031, + "learning_rate": 4.332836524396571e-06, + "loss": 14.5648, + "step": 294170 + }, + { + "epoch": 0.594262212292489, + "grad_norm": 702.2772827148438, + "learning_rate": 4.332490581562005e-06, + "loss": 17.8529, + "step": 294180 + }, + { + "epoch": 0.5942824129251728, + "grad_norm": 118.50646209716797, + "learning_rate": 4.332144641980799e-06, + "loss": 14.6945, + "step": 294190 + }, + { + "epoch": 0.5943026135578566, + "grad_norm": 527.292236328125, + "learning_rate": 4.331798705654639e-06, + "loss": 35.8904, + "step": 294200 + }, + { + "epoch": 0.5943228141905404, + "grad_norm": 257.5843811035156, + "learning_rate": 4.331452772585212e-06, + "loss": 16.7288, + "step": 294210 + }, + { + "epoch": 0.5943430148232243, + "grad_norm": 155.10333251953125, + "learning_rate": 4.3311068427742e-06, + "loss": 28.4758, + "step": 294220 + }, + { + "epoch": 0.5943632154559081, + "grad_norm": 193.52496337890625, + "learning_rate": 4.330760916223297e-06, + "loss": 24.1052, + "step": 294230 + }, + { + "epoch": 0.5943834160885919, + "grad_norm": 369.0464172363281, + "learning_rate": 4.33041499293418e-06, + "loss": 13.5614, + "step": 294240 + }, + { + "epoch": 0.5944036167212757, + "grad_norm": 40.26343536376953, + "learning_rate": 4.33006907290854e-06, + "loss": 13.6466, + "step": 294250 + }, + { + "epoch": 0.5944238173539595, + "grad_norm": 196.87574768066406, + "learning_rate": 4.329723156148064e-06, + "loss": 19.1329, + "step": 294260 + }, + { + "epoch": 0.5944440179866434, + "grad_norm": 245.2478485107422, + "learning_rate": 4.3293772426544336e-06, + "loss": 12.3665, + "step": 294270 + }, + { + "epoch": 0.5944642186193272, + "grad_norm": 285.3468933105469, + "learning_rate": 4.329031332429338e-06, + "loss": 19.2312, + "step": 294280 + }, + { + "epoch": 0.594484419252011, + "grad_norm": 89.25577545166016, + "learning_rate": 4.328685425474462e-06, + "loss": 17.1383, + "step": 294290 + }, + { + "epoch": 0.5945046198846948, + "grad_norm": 230.0323486328125, + "learning_rate": 4.328339521791493e-06, + "loss": 28.7411, + "step": 294300 + }, + { + "epoch": 0.5945248205173786, + "grad_norm": 319.6100158691406, + "learning_rate": 4.327993621382115e-06, + "loss": 15.9985, + "step": 294310 + }, + { + "epoch": 0.5945450211500625, + "grad_norm": 63.36458969116211, + "learning_rate": 4.327647724248014e-06, + "loss": 18.7383, + "step": 294320 + }, + { + "epoch": 0.5945652217827463, + "grad_norm": 1.0024418830871582, + "learning_rate": 4.327301830390878e-06, + "loss": 9.2964, + "step": 294330 + }, + { + "epoch": 0.59458542241543, + "grad_norm": 668.4827880859375, + "learning_rate": 4.32695593981239e-06, + "loss": 28.4542, + "step": 294340 + }, + { + "epoch": 0.5946056230481138, + "grad_norm": 564.9884033203125, + "learning_rate": 4.326610052514238e-06, + "loss": 14.9649, + "step": 294350 + }, + { + "epoch": 0.5946258236807976, + "grad_norm": 358.91278076171875, + "learning_rate": 4.326264168498106e-06, + "loss": 14.0499, + "step": 294360 + }, + { + "epoch": 0.5946460243134815, + "grad_norm": 235.9729461669922, + "learning_rate": 4.325918287765682e-06, + "loss": 17.5944, + "step": 294370 + }, + { + "epoch": 0.5946662249461653, + "grad_norm": 16.25257682800293, + "learning_rate": 4.325572410318648e-06, + "loss": 18.7083, + "step": 294380 + }, + { + "epoch": 0.5946864255788491, + "grad_norm": 360.6956481933594, + "learning_rate": 4.325226536158696e-06, + "loss": 30.3617, + "step": 294390 + }, + { + "epoch": 0.5947066262115329, + "grad_norm": 248.47532653808594, + "learning_rate": 4.3248806652875045e-06, + "loss": 23.8472, + "step": 294400 + }, + { + "epoch": 0.5947268268442167, + "grad_norm": 6.759785175323486, + "learning_rate": 4.324534797706764e-06, + "loss": 12.691, + "step": 294410 + }, + { + "epoch": 0.5947470274769006, + "grad_norm": 445.4151916503906, + "learning_rate": 4.32418893341816e-06, + "loss": 15.8771, + "step": 294420 + }, + { + "epoch": 0.5947672281095844, + "grad_norm": 374.858154296875, + "learning_rate": 4.323843072423376e-06, + "loss": 22.9934, + "step": 294430 + }, + { + "epoch": 0.5947874287422682, + "grad_norm": 469.3614807128906, + "learning_rate": 4.323497214724099e-06, + "loss": 24.0278, + "step": 294440 + }, + { + "epoch": 0.594807629374952, + "grad_norm": 648.9302978515625, + "learning_rate": 4.323151360322014e-06, + "loss": 14.9563, + "step": 294450 + }, + { + "epoch": 0.5948278300076358, + "grad_norm": 381.16802978515625, + "learning_rate": 4.32280550921881e-06, + "loss": 11.6384, + "step": 294460 + }, + { + "epoch": 0.5948480306403197, + "grad_norm": 357.280517578125, + "learning_rate": 4.3224596614161666e-06, + "loss": 16.645, + "step": 294470 + }, + { + "epoch": 0.5948682312730035, + "grad_norm": 221.17393493652344, + "learning_rate": 4.322113816915774e-06, + "loss": 17.212, + "step": 294480 + }, + { + "epoch": 0.5948884319056873, + "grad_norm": 515.7291259765625, + "learning_rate": 4.321767975719317e-06, + "loss": 27.4941, + "step": 294490 + }, + { + "epoch": 0.5949086325383711, + "grad_norm": 434.7697448730469, + "learning_rate": 4.321422137828479e-06, + "loss": 13.8194, + "step": 294500 + }, + { + "epoch": 0.5949288331710549, + "grad_norm": 470.9134521484375, + "learning_rate": 4.321076303244948e-06, + "loss": 10.7291, + "step": 294510 + }, + { + "epoch": 0.5949490338037388, + "grad_norm": 41.78329849243164, + "learning_rate": 4.320730471970409e-06, + "loss": 20.9479, + "step": 294520 + }, + { + "epoch": 0.5949692344364226, + "grad_norm": 477.39697265625, + "learning_rate": 4.320384644006546e-06, + "loss": 18.3437, + "step": 294530 + }, + { + "epoch": 0.5949894350691064, + "grad_norm": 253.376220703125, + "learning_rate": 4.320038819355047e-06, + "loss": 17.6595, + "step": 294540 + }, + { + "epoch": 0.5950096357017902, + "grad_norm": 38.193458557128906, + "learning_rate": 4.319692998017597e-06, + "loss": 19.5714, + "step": 294550 + }, + { + "epoch": 0.595029836334474, + "grad_norm": 192.7274627685547, + "learning_rate": 4.31934717999588e-06, + "loss": 13.3706, + "step": 294560 + }, + { + "epoch": 0.5950500369671579, + "grad_norm": 348.2456359863281, + "learning_rate": 4.319001365291582e-06, + "loss": 23.303, + "step": 294570 + }, + { + "epoch": 0.5950702375998417, + "grad_norm": 609.5001220703125, + "learning_rate": 4.31865555390639e-06, + "loss": 24.8376, + "step": 294580 + }, + { + "epoch": 0.5950904382325255, + "grad_norm": 475.0218200683594, + "learning_rate": 4.318309745841987e-06, + "loss": 20.352, + "step": 294590 + }, + { + "epoch": 0.5951106388652092, + "grad_norm": 442.212158203125, + "learning_rate": 4.317963941100059e-06, + "loss": 17.3036, + "step": 294600 + }, + { + "epoch": 0.595130839497893, + "grad_norm": 381.7318115234375, + "learning_rate": 4.3176181396822925e-06, + "loss": 19.4467, + "step": 294610 + }, + { + "epoch": 0.5951510401305768, + "grad_norm": 458.5332336425781, + "learning_rate": 4.317272341590373e-06, + "loss": 23.6071, + "step": 294620 + }, + { + "epoch": 0.5951712407632607, + "grad_norm": 537.0899047851562, + "learning_rate": 4.3169265468259855e-06, + "loss": 28.2252, + "step": 294630 + }, + { + "epoch": 0.5951914413959445, + "grad_norm": 19.255142211914062, + "learning_rate": 4.316580755390814e-06, + "loss": 8.8401, + "step": 294640 + }, + { + "epoch": 0.5952116420286283, + "grad_norm": 665.742431640625, + "learning_rate": 4.316234967286548e-06, + "loss": 13.0372, + "step": 294650 + }, + { + "epoch": 0.5952318426613121, + "grad_norm": 465.10662841796875, + "learning_rate": 4.315889182514867e-06, + "loss": 23.982, + "step": 294660 + }, + { + "epoch": 0.595252043293996, + "grad_norm": 176.72573852539062, + "learning_rate": 4.315543401077458e-06, + "loss": 14.8843, + "step": 294670 + }, + { + "epoch": 0.5952722439266798, + "grad_norm": 399.09808349609375, + "learning_rate": 4.315197622976011e-06, + "loss": 22.9115, + "step": 294680 + }, + { + "epoch": 0.5952924445593636, + "grad_norm": 55.9252815246582, + "learning_rate": 4.314851848212205e-06, + "loss": 26.8541, + "step": 294690 + }, + { + "epoch": 0.5953126451920474, + "grad_norm": 866.7681884765625, + "learning_rate": 4.314506076787729e-06, + "loss": 22.4973, + "step": 294700 + }, + { + "epoch": 0.5953328458247312, + "grad_norm": 427.6380615234375, + "learning_rate": 4.314160308704269e-06, + "loss": 16.7196, + "step": 294710 + }, + { + "epoch": 0.595353046457415, + "grad_norm": 166.1153564453125, + "learning_rate": 4.313814543963505e-06, + "loss": 16.7142, + "step": 294720 + }, + { + "epoch": 0.5953732470900989, + "grad_norm": 237.71177673339844, + "learning_rate": 4.313468782567128e-06, + "loss": 18.7256, + "step": 294730 + }, + { + "epoch": 0.5953934477227827, + "grad_norm": 105.42239379882812, + "learning_rate": 4.313123024516819e-06, + "loss": 15.7055, + "step": 294740 + }, + { + "epoch": 0.5954136483554665, + "grad_norm": 139.54412841796875, + "learning_rate": 4.312777269814268e-06, + "loss": 19.2914, + "step": 294750 + }, + { + "epoch": 0.5954338489881503, + "grad_norm": 216.41641235351562, + "learning_rate": 4.312431518461154e-06, + "loss": 18.8749, + "step": 294760 + }, + { + "epoch": 0.5954540496208341, + "grad_norm": 298.35150146484375, + "learning_rate": 4.312085770459167e-06, + "loss": 14.0649, + "step": 294770 + }, + { + "epoch": 0.595474250253518, + "grad_norm": 1004.7127075195312, + "learning_rate": 4.311740025809992e-06, + "loss": 12.4464, + "step": 294780 + }, + { + "epoch": 0.5954944508862018, + "grad_norm": 178.72906494140625, + "learning_rate": 4.31139428451531e-06, + "loss": 33.6189, + "step": 294790 + }, + { + "epoch": 0.5955146515188856, + "grad_norm": 72.146484375, + "learning_rate": 4.31104854657681e-06, + "loss": 14.6239, + "step": 294800 + }, + { + "epoch": 0.5955348521515694, + "grad_norm": 362.326416015625, + "learning_rate": 4.310702811996177e-06, + "loss": 11.8445, + "step": 294810 + }, + { + "epoch": 0.5955550527842532, + "grad_norm": 178.36807250976562, + "learning_rate": 4.310357080775092e-06, + "loss": 28.5427, + "step": 294820 + }, + { + "epoch": 0.5955752534169371, + "grad_norm": 10.326326370239258, + "learning_rate": 4.3100113529152444e-06, + "loss": 22.2627, + "step": 294830 + }, + { + "epoch": 0.5955954540496209, + "grad_norm": 168.36253356933594, + "learning_rate": 4.30966562841832e-06, + "loss": 10.1222, + "step": 294840 + }, + { + "epoch": 0.5956156546823046, + "grad_norm": 315.1814270019531, + "learning_rate": 4.309319907285998e-06, + "loss": 14.5882, + "step": 294850 + }, + { + "epoch": 0.5956358553149884, + "grad_norm": 566.19287109375, + "learning_rate": 4.308974189519968e-06, + "loss": 19.206, + "step": 294860 + }, + { + "epoch": 0.5956560559476722, + "grad_norm": 843.1705932617188, + "learning_rate": 4.308628475121916e-06, + "loss": 25.139, + "step": 294870 + }, + { + "epoch": 0.5956762565803561, + "grad_norm": 272.6224060058594, + "learning_rate": 4.308282764093523e-06, + "loss": 25.7788, + "step": 294880 + }, + { + "epoch": 0.5956964572130399, + "grad_norm": 408.2192077636719, + "learning_rate": 4.307937056436476e-06, + "loss": 9.7835, + "step": 294890 + }, + { + "epoch": 0.5957166578457237, + "grad_norm": 103.95111083984375, + "learning_rate": 4.307591352152459e-06, + "loss": 24.1882, + "step": 294900 + }, + { + "epoch": 0.5957368584784075, + "grad_norm": 958.2185668945312, + "learning_rate": 4.307245651243161e-06, + "loss": 22.2643, + "step": 294910 + }, + { + "epoch": 0.5957570591110913, + "grad_norm": 357.21533203125, + "learning_rate": 4.30689995371026e-06, + "loss": 15.4138, + "step": 294920 + }, + { + "epoch": 0.5957772597437752, + "grad_norm": 567.7787475585938, + "learning_rate": 4.306554259555447e-06, + "loss": 25.7151, + "step": 294930 + }, + { + "epoch": 0.595797460376459, + "grad_norm": 434.95721435546875, + "learning_rate": 4.306208568780404e-06, + "loss": 17.9521, + "step": 294940 + }, + { + "epoch": 0.5958176610091428, + "grad_norm": 255.12109375, + "learning_rate": 4.3058628813868154e-06, + "loss": 14.506, + "step": 294950 + }, + { + "epoch": 0.5958378616418266, + "grad_norm": 244.69798278808594, + "learning_rate": 4.305517197376367e-06, + "loss": 8.4031, + "step": 294960 + }, + { + "epoch": 0.5958580622745104, + "grad_norm": 134.893310546875, + "learning_rate": 4.305171516750746e-06, + "loss": 32.4018, + "step": 294970 + }, + { + "epoch": 0.5958782629071943, + "grad_norm": 343.2240295410156, + "learning_rate": 4.3048258395116326e-06, + "loss": 11.0011, + "step": 294980 + }, + { + "epoch": 0.5958984635398781, + "grad_norm": 864.6599731445312, + "learning_rate": 4.304480165660712e-06, + "loss": 22.8115, + "step": 294990 + }, + { + "epoch": 0.5959186641725619, + "grad_norm": 333.3636474609375, + "learning_rate": 4.304134495199675e-06, + "loss": 17.9298, + "step": 295000 + }, + { + "epoch": 0.5959388648052457, + "grad_norm": 546.2763671875, + "learning_rate": 4.303788828130198e-06, + "loss": 21.2158, + "step": 295010 + }, + { + "epoch": 0.5959590654379295, + "grad_norm": 239.50503540039062, + "learning_rate": 4.303443164453971e-06, + "loss": 16.3914, + "step": 295020 + }, + { + "epoch": 0.5959792660706134, + "grad_norm": 202.0510711669922, + "learning_rate": 4.303097504172679e-06, + "loss": 16.3034, + "step": 295030 + }, + { + "epoch": 0.5959994667032972, + "grad_norm": 90.5951919555664, + "learning_rate": 4.302751847288005e-06, + "loss": 13.3175, + "step": 295040 + }, + { + "epoch": 0.596019667335981, + "grad_norm": 271.5483703613281, + "learning_rate": 4.302406193801632e-06, + "loss": 32.728, + "step": 295050 + }, + { + "epoch": 0.5960398679686648, + "grad_norm": 708.0242309570312, + "learning_rate": 4.302060543715247e-06, + "loss": 14.4512, + "step": 295060 + }, + { + "epoch": 0.5960600686013486, + "grad_norm": 195.20159912109375, + "learning_rate": 4.301714897030537e-06, + "loss": 16.2609, + "step": 295070 + }, + { + "epoch": 0.5960802692340325, + "grad_norm": 470.51141357421875, + "learning_rate": 4.3013692537491805e-06, + "loss": 12.1136, + "step": 295080 + }, + { + "epoch": 0.5961004698667163, + "grad_norm": 82.72560119628906, + "learning_rate": 4.3010236138728674e-06, + "loss": 28.6893, + "step": 295090 + }, + { + "epoch": 0.5961206704994001, + "grad_norm": 1.0823280811309814, + "learning_rate": 4.300677977403281e-06, + "loss": 9.5059, + "step": 295100 + }, + { + "epoch": 0.5961408711320838, + "grad_norm": 656.1585693359375, + "learning_rate": 4.3003323443421045e-06, + "loss": 27.7189, + "step": 295110 + }, + { + "epoch": 0.5961610717647676, + "grad_norm": 13.325555801391602, + "learning_rate": 4.299986714691022e-06, + "loss": 10.0998, + "step": 295120 + }, + { + "epoch": 0.5961812723974514, + "grad_norm": 448.62322998046875, + "learning_rate": 4.299641088451721e-06, + "loss": 16.4809, + "step": 295130 + }, + { + "epoch": 0.5962014730301353, + "grad_norm": 335.9667053222656, + "learning_rate": 4.299295465625884e-06, + "loss": 19.6562, + "step": 295140 + }, + { + "epoch": 0.5962216736628191, + "grad_norm": 517.5101928710938, + "learning_rate": 4.298949846215195e-06, + "loss": 27.424, + "step": 295150 + }, + { + "epoch": 0.5962418742955029, + "grad_norm": 513.454345703125, + "learning_rate": 4.298604230221341e-06, + "loss": 16.3277, + "step": 295160 + }, + { + "epoch": 0.5962620749281867, + "grad_norm": 480.7550964355469, + "learning_rate": 4.298258617646004e-06, + "loss": 24.3027, + "step": 295170 + }, + { + "epoch": 0.5962822755608705, + "grad_norm": 394.1954345703125, + "learning_rate": 4.29791300849087e-06, + "loss": 15.316, + "step": 295180 + }, + { + "epoch": 0.5963024761935544, + "grad_norm": 57.472801208496094, + "learning_rate": 4.297567402757621e-06, + "loss": 26.7314, + "step": 295190 + }, + { + "epoch": 0.5963226768262382, + "grad_norm": 47.60441207885742, + "learning_rate": 4.297221800447946e-06, + "loss": 16.3556, + "step": 295200 + }, + { + "epoch": 0.596342877458922, + "grad_norm": 525.2633666992188, + "learning_rate": 4.296876201563524e-06, + "loss": 12.7697, + "step": 295210 + }, + { + "epoch": 0.5963630780916058, + "grad_norm": 275.6269226074219, + "learning_rate": 4.296530606106043e-06, + "loss": 16.9218, + "step": 295220 + }, + { + "epoch": 0.5963832787242896, + "grad_norm": 219.87693786621094, + "learning_rate": 4.296185014077188e-06, + "loss": 12.1012, + "step": 295230 + }, + { + "epoch": 0.5964034793569735, + "grad_norm": 270.0559387207031, + "learning_rate": 4.295839425478641e-06, + "loss": 16.1903, + "step": 295240 + }, + { + "epoch": 0.5964236799896573, + "grad_norm": 473.7545471191406, + "learning_rate": 4.295493840312087e-06, + "loss": 26.2634, + "step": 295250 + }, + { + "epoch": 0.5964438806223411, + "grad_norm": 516.518798828125, + "learning_rate": 4.295148258579211e-06, + "loss": 21.7435, + "step": 295260 + }, + { + "epoch": 0.5964640812550249, + "grad_norm": 380.75726318359375, + "learning_rate": 4.294802680281696e-06, + "loss": 12.6696, + "step": 295270 + }, + { + "epoch": 0.5964842818877087, + "grad_norm": 1.5298367738723755, + "learning_rate": 4.294457105421228e-06, + "loss": 15.1886, + "step": 295280 + }, + { + "epoch": 0.5965044825203926, + "grad_norm": 239.9437713623047, + "learning_rate": 4.294111533999492e-06, + "loss": 11.9446, + "step": 295290 + }, + { + "epoch": 0.5965246831530764, + "grad_norm": 701.978271484375, + "learning_rate": 4.293765966018167e-06, + "loss": 27.484, + "step": 295300 + }, + { + "epoch": 0.5965448837857602, + "grad_norm": 544.4490966796875, + "learning_rate": 4.293420401478943e-06, + "loss": 14.4572, + "step": 295310 + }, + { + "epoch": 0.596565084418444, + "grad_norm": 171.7335968017578, + "learning_rate": 4.293074840383504e-06, + "loss": 16.894, + "step": 295320 + }, + { + "epoch": 0.5965852850511278, + "grad_norm": 219.708251953125, + "learning_rate": 4.29272928273353e-06, + "loss": 17.1215, + "step": 295330 + }, + { + "epoch": 0.5966054856838117, + "grad_norm": 466.144775390625, + "learning_rate": 4.2923837285307085e-06, + "loss": 15.0668, + "step": 295340 + }, + { + "epoch": 0.5966256863164955, + "grad_norm": 44.40127182006836, + "learning_rate": 4.292038177776722e-06, + "loss": 24.7863, + "step": 295350 + }, + { + "epoch": 0.5966458869491792, + "grad_norm": 426.68804931640625, + "learning_rate": 4.291692630473258e-06, + "loss": 17.3891, + "step": 295360 + }, + { + "epoch": 0.596666087581863, + "grad_norm": 218.92205810546875, + "learning_rate": 4.291347086621996e-06, + "loss": 11.4738, + "step": 295370 + }, + { + "epoch": 0.5966862882145468, + "grad_norm": 366.910888671875, + "learning_rate": 4.2910015462246225e-06, + "loss": 14.6246, + "step": 295380 + }, + { + "epoch": 0.5967064888472307, + "grad_norm": 339.2355041503906, + "learning_rate": 4.290656009282823e-06, + "loss": 11.8092, + "step": 295390 + }, + { + "epoch": 0.5967266894799145, + "grad_norm": 243.0731964111328, + "learning_rate": 4.290310475798278e-06, + "loss": 19.5325, + "step": 295400 + }, + { + "epoch": 0.5967468901125983, + "grad_norm": 843.3153076171875, + "learning_rate": 4.289964945772675e-06, + "loss": 30.3153, + "step": 295410 + }, + { + "epoch": 0.5967670907452821, + "grad_norm": 384.42706298828125, + "learning_rate": 4.289619419207698e-06, + "loss": 40.5178, + "step": 295420 + }, + { + "epoch": 0.5967872913779659, + "grad_norm": 330.98248291015625, + "learning_rate": 4.289273896105027e-06, + "loss": 16.1977, + "step": 295430 + }, + { + "epoch": 0.5968074920106498, + "grad_norm": 389.6863708496094, + "learning_rate": 4.288928376466349e-06, + "loss": 11.3452, + "step": 295440 + }, + { + "epoch": 0.5968276926433336, + "grad_norm": 231.9348602294922, + "learning_rate": 4.288582860293351e-06, + "loss": 13.2791, + "step": 295450 + }, + { + "epoch": 0.5968478932760174, + "grad_norm": 305.238525390625, + "learning_rate": 4.288237347587711e-06, + "loss": 23.6382, + "step": 295460 + }, + { + "epoch": 0.5968680939087012, + "grad_norm": 509.225830078125, + "learning_rate": 4.287891838351117e-06, + "loss": 11.3871, + "step": 295470 + }, + { + "epoch": 0.596888294541385, + "grad_norm": 183.37425231933594, + "learning_rate": 4.2875463325852514e-06, + "loss": 15.3508, + "step": 295480 + }, + { + "epoch": 0.5969084951740689, + "grad_norm": 359.0205383300781, + "learning_rate": 4.287200830291799e-06, + "loss": 31.7106, + "step": 295490 + }, + { + "epoch": 0.5969286958067527, + "grad_norm": 648.9158935546875, + "learning_rate": 4.286855331472442e-06, + "loss": 22.7002, + "step": 295500 + }, + { + "epoch": 0.5969488964394365, + "grad_norm": 893.8304443359375, + "learning_rate": 4.286509836128866e-06, + "loss": 29.3073, + "step": 295510 + }, + { + "epoch": 0.5969690970721203, + "grad_norm": 126.27137756347656, + "learning_rate": 4.286164344262756e-06, + "loss": 18.4401, + "step": 295520 + }, + { + "epoch": 0.5969892977048041, + "grad_norm": 558.2559204101562, + "learning_rate": 4.285818855875793e-06, + "loss": 20.1282, + "step": 295530 + }, + { + "epoch": 0.597009498337488, + "grad_norm": 188.3037109375, + "learning_rate": 4.285473370969663e-06, + "loss": 24.9841, + "step": 295540 + }, + { + "epoch": 0.5970296989701718, + "grad_norm": 323.2847900390625, + "learning_rate": 4.285127889546049e-06, + "loss": 18.3854, + "step": 295550 + }, + { + "epoch": 0.5970498996028556, + "grad_norm": 452.3296203613281, + "learning_rate": 4.284782411606635e-06, + "loss": 16.278, + "step": 295560 + }, + { + "epoch": 0.5970701002355394, + "grad_norm": 438.90399169921875, + "learning_rate": 4.284436937153105e-06, + "loss": 14.4432, + "step": 295570 + }, + { + "epoch": 0.5970903008682232, + "grad_norm": 384.4964904785156, + "learning_rate": 4.284091466187142e-06, + "loss": 29.4804, + "step": 295580 + }, + { + "epoch": 0.597110501500907, + "grad_norm": 133.99334716796875, + "learning_rate": 4.283745998710431e-06, + "loss": 14.5344, + "step": 295590 + }, + { + "epoch": 0.5971307021335909, + "grad_norm": 545.9478149414062, + "learning_rate": 4.283400534724654e-06, + "loss": 17.9776, + "step": 295600 + }, + { + "epoch": 0.5971509027662747, + "grad_norm": 436.95098876953125, + "learning_rate": 4.283055074231498e-06, + "loss": 21.3602, + "step": 295610 + }, + { + "epoch": 0.5971711033989584, + "grad_norm": 215.1697235107422, + "learning_rate": 4.282709617232642e-06, + "loss": 17.1219, + "step": 295620 + }, + { + "epoch": 0.5971913040316422, + "grad_norm": 426.19122314453125, + "learning_rate": 4.282364163729773e-06, + "loss": 15.4137, + "step": 295630 + }, + { + "epoch": 0.597211504664326, + "grad_norm": 336.3559875488281, + "learning_rate": 4.282018713724576e-06, + "loss": 19.4231, + "step": 295640 + }, + { + "epoch": 0.5972317052970099, + "grad_norm": 368.91864013671875, + "learning_rate": 4.281673267218731e-06, + "loss": 20.4048, + "step": 295650 + }, + { + "epoch": 0.5972519059296937, + "grad_norm": 424.2174987792969, + "learning_rate": 4.281327824213923e-06, + "loss": 19.186, + "step": 295660 + }, + { + "epoch": 0.5972721065623775, + "grad_norm": 219.901611328125, + "learning_rate": 4.280982384711835e-06, + "loss": 27.6856, + "step": 295670 + }, + { + "epoch": 0.5972923071950613, + "grad_norm": 342.3939514160156, + "learning_rate": 4.280636948714155e-06, + "loss": 20.1481, + "step": 295680 + }, + { + "epoch": 0.5973125078277451, + "grad_norm": 530.6146850585938, + "learning_rate": 4.280291516222561e-06, + "loss": 12.2937, + "step": 295690 + }, + { + "epoch": 0.597332708460429, + "grad_norm": 410.14599609375, + "learning_rate": 4.279946087238739e-06, + "loss": 15.0413, + "step": 295700 + }, + { + "epoch": 0.5973529090931128, + "grad_norm": 253.63958740234375, + "learning_rate": 4.279600661764374e-06, + "loss": 17.5502, + "step": 295710 + }, + { + "epoch": 0.5973731097257966, + "grad_norm": 572.5477294921875, + "learning_rate": 4.279255239801146e-06, + "loss": 43.3426, + "step": 295720 + }, + { + "epoch": 0.5973933103584804, + "grad_norm": 0.3940809965133667, + "learning_rate": 4.278909821350742e-06, + "loss": 11.7097, + "step": 295730 + }, + { + "epoch": 0.5974135109911642, + "grad_norm": 280.0107421875, + "learning_rate": 4.278564406414844e-06, + "loss": 18.1503, + "step": 295740 + }, + { + "epoch": 0.5974337116238481, + "grad_norm": 126.16145324707031, + "learning_rate": 4.278218994995135e-06, + "loss": 13.3549, + "step": 295750 + }, + { + "epoch": 0.5974539122565319, + "grad_norm": 1052.9522705078125, + "learning_rate": 4.277873587093298e-06, + "loss": 22.0248, + "step": 295760 + }, + { + "epoch": 0.5974741128892157, + "grad_norm": 211.38116455078125, + "learning_rate": 4.27752818271102e-06, + "loss": 11.4287, + "step": 295770 + }, + { + "epoch": 0.5974943135218995, + "grad_norm": 423.4015197753906, + "learning_rate": 4.27718278184998e-06, + "loss": 23.6106, + "step": 295780 + }, + { + "epoch": 0.5975145141545833, + "grad_norm": 307.5094299316406, + "learning_rate": 4.276837384511864e-06, + "loss": 13.9268, + "step": 295790 + }, + { + "epoch": 0.5975347147872672, + "grad_norm": 250.709716796875, + "learning_rate": 4.2764919906983545e-06, + "loss": 21.8706, + "step": 295800 + }, + { + "epoch": 0.597554915419951, + "grad_norm": 214.07861328125, + "learning_rate": 4.276146600411137e-06, + "loss": 16.992, + "step": 295810 + }, + { + "epoch": 0.5975751160526348, + "grad_norm": 915.2093505859375, + "learning_rate": 4.2758012136518925e-06, + "loss": 19.7232, + "step": 295820 + }, + { + "epoch": 0.5975953166853186, + "grad_norm": 499.217041015625, + "learning_rate": 4.275455830422303e-06, + "loss": 16.4675, + "step": 295830 + }, + { + "epoch": 0.5976155173180024, + "grad_norm": 702.2924194335938, + "learning_rate": 4.275110450724056e-06, + "loss": 15.909, + "step": 295840 + }, + { + "epoch": 0.5976357179506863, + "grad_norm": 562.8489990234375, + "learning_rate": 4.274765074558832e-06, + "loss": 22.8658, + "step": 295850 + }, + { + "epoch": 0.5976559185833701, + "grad_norm": 255.215087890625, + "learning_rate": 4.274419701928315e-06, + "loss": 13.3467, + "step": 295860 + }, + { + "epoch": 0.5976761192160538, + "grad_norm": 214.14102172851562, + "learning_rate": 4.27407433283419e-06, + "loss": 16.7613, + "step": 295870 + }, + { + "epoch": 0.5976963198487376, + "grad_norm": 669.8508911132812, + "learning_rate": 4.273728967278137e-06, + "loss": 15.156, + "step": 295880 + }, + { + "epoch": 0.5977165204814214, + "grad_norm": 414.38214111328125, + "learning_rate": 4.273383605261841e-06, + "loss": 15.0318, + "step": 295890 + }, + { + "epoch": 0.5977367211141053, + "grad_norm": 272.2400817871094, + "learning_rate": 4.273038246786986e-06, + "loss": 24.4473, + "step": 295900 + }, + { + "epoch": 0.5977569217467891, + "grad_norm": 270.507568359375, + "learning_rate": 4.272692891855253e-06, + "loss": 24.971, + "step": 295910 + }, + { + "epoch": 0.5977771223794729, + "grad_norm": 531.7005004882812, + "learning_rate": 4.272347540468327e-06, + "loss": 28.0525, + "step": 295920 + }, + { + "epoch": 0.5977973230121567, + "grad_norm": 172.25665283203125, + "learning_rate": 4.272002192627892e-06, + "loss": 19.4454, + "step": 295930 + }, + { + "epoch": 0.5978175236448405, + "grad_norm": 162.74212646484375, + "learning_rate": 4.2716568483356295e-06, + "loss": 8.7676, + "step": 295940 + }, + { + "epoch": 0.5978377242775244, + "grad_norm": 199.9047088623047, + "learning_rate": 4.2713115075932225e-06, + "loss": 18.3432, + "step": 295950 + }, + { + "epoch": 0.5978579249102082, + "grad_norm": 138.35780334472656, + "learning_rate": 4.270966170402354e-06, + "loss": 20.9482, + "step": 295960 + }, + { + "epoch": 0.597878125542892, + "grad_norm": 848.1473388671875, + "learning_rate": 4.2706208367647115e-06, + "loss": 26.043, + "step": 295970 + }, + { + "epoch": 0.5978983261755758, + "grad_norm": 380.1227722167969, + "learning_rate": 4.270275506681971e-06, + "loss": 33.2933, + "step": 295980 + }, + { + "epoch": 0.5979185268082596, + "grad_norm": 455.56158447265625, + "learning_rate": 4.26993018015582e-06, + "loss": 25.1772, + "step": 295990 + }, + { + "epoch": 0.5979387274409435, + "grad_norm": 517.9584350585938, + "learning_rate": 4.269584857187942e-06, + "loss": 16.8953, + "step": 296000 + }, + { + "epoch": 0.5979589280736273, + "grad_norm": 146.59242248535156, + "learning_rate": 4.2692395377800185e-06, + "loss": 15.436, + "step": 296010 + }, + { + "epoch": 0.5979791287063111, + "grad_norm": 33.330013275146484, + "learning_rate": 4.268894221933733e-06, + "loss": 27.1096, + "step": 296020 + }, + { + "epoch": 0.5979993293389949, + "grad_norm": 71.1676025390625, + "learning_rate": 4.268548909650768e-06, + "loss": 26.74, + "step": 296030 + }, + { + "epoch": 0.5980195299716787, + "grad_norm": 183.0547332763672, + "learning_rate": 4.2682036009328065e-06, + "loss": 16.7322, + "step": 296040 + }, + { + "epoch": 0.5980397306043626, + "grad_norm": 302.4768371582031, + "learning_rate": 4.267858295781531e-06, + "loss": 18.9091, + "step": 296050 + }, + { + "epoch": 0.5980599312370464, + "grad_norm": 466.72283935546875, + "learning_rate": 4.267512994198629e-06, + "loss": 13.3001, + "step": 296060 + }, + { + "epoch": 0.5980801318697302, + "grad_norm": 458.6282653808594, + "learning_rate": 4.267167696185776e-06, + "loss": 23.9082, + "step": 296070 + }, + { + "epoch": 0.598100332502414, + "grad_norm": 208.31300354003906, + "learning_rate": 4.2668224017446595e-06, + "loss": 13.3059, + "step": 296080 + }, + { + "epoch": 0.5981205331350978, + "grad_norm": 283.8260498046875, + "learning_rate": 4.266477110876963e-06, + "loss": 23.312, + "step": 296090 + }, + { + "epoch": 0.5981407337677817, + "grad_norm": 240.76292419433594, + "learning_rate": 4.266131823584368e-06, + "loss": 28.9725, + "step": 296100 + }, + { + "epoch": 0.5981609344004655, + "grad_norm": 61.494651794433594, + "learning_rate": 4.265786539868556e-06, + "loss": 13.0544, + "step": 296110 + }, + { + "epoch": 0.5981811350331493, + "grad_norm": 152.66912841796875, + "learning_rate": 4.265441259731211e-06, + "loss": 25.169, + "step": 296120 + }, + { + "epoch": 0.598201335665833, + "grad_norm": 390.42437744140625, + "learning_rate": 4.26509598317402e-06, + "loss": 14.1549, + "step": 296130 + }, + { + "epoch": 0.5982215362985168, + "grad_norm": 536.2457885742188, + "learning_rate": 4.2647507101986575e-06, + "loss": 15.1672, + "step": 296140 + }, + { + "epoch": 0.5982417369312006, + "grad_norm": 223.54296875, + "learning_rate": 4.264405440806813e-06, + "loss": 8.8315, + "step": 296150 + }, + { + "epoch": 0.5982619375638845, + "grad_norm": 168.26861572265625, + "learning_rate": 4.264060175000168e-06, + "loss": 15.8226, + "step": 296160 + }, + { + "epoch": 0.5982821381965683, + "grad_norm": 524.5977783203125, + "learning_rate": 4.263714912780403e-06, + "loss": 22.0013, + "step": 296170 + }, + { + "epoch": 0.5983023388292521, + "grad_norm": 292.9800109863281, + "learning_rate": 4.263369654149203e-06, + "loss": 28.8297, + "step": 296180 + }, + { + "epoch": 0.5983225394619359, + "grad_norm": 314.0179443359375, + "learning_rate": 4.263024399108251e-06, + "loss": 13.5808, + "step": 296190 + }, + { + "epoch": 0.5983427400946197, + "grad_norm": 1170.5003662109375, + "learning_rate": 4.262679147659227e-06, + "loss": 30.0497, + "step": 296200 + }, + { + "epoch": 0.5983629407273036, + "grad_norm": 240.40748596191406, + "learning_rate": 4.262333899803814e-06, + "loss": 24.7635, + "step": 296210 + }, + { + "epoch": 0.5983831413599874, + "grad_norm": 187.4853515625, + "learning_rate": 4.2619886555436995e-06, + "loss": 22.2111, + "step": 296220 + }, + { + "epoch": 0.5984033419926712, + "grad_norm": 603.9671630859375, + "learning_rate": 4.26164341488056e-06, + "loss": 12.4047, + "step": 296230 + }, + { + "epoch": 0.598423542625355, + "grad_norm": 636.5455322265625, + "learning_rate": 4.261298177816082e-06, + "loss": 21.9845, + "step": 296240 + }, + { + "epoch": 0.5984437432580388, + "grad_norm": 815.9725341796875, + "learning_rate": 4.260952944351947e-06, + "loss": 16.762, + "step": 296250 + }, + { + "epoch": 0.5984639438907227, + "grad_norm": 391.21893310546875, + "learning_rate": 4.260607714489839e-06, + "loss": 15.0348, + "step": 296260 + }, + { + "epoch": 0.5984841445234065, + "grad_norm": 636.109375, + "learning_rate": 4.260262488231438e-06, + "loss": 16.916, + "step": 296270 + }, + { + "epoch": 0.5985043451560903, + "grad_norm": 450.681884765625, + "learning_rate": 4.259917265578427e-06, + "loss": 22.7164, + "step": 296280 + }, + { + "epoch": 0.5985245457887741, + "grad_norm": 313.92584228515625, + "learning_rate": 4.259572046532493e-06, + "loss": 16.3863, + "step": 296290 + }, + { + "epoch": 0.5985447464214579, + "grad_norm": 300.09429931640625, + "learning_rate": 4.259226831095311e-06, + "loss": 38.5883, + "step": 296300 + }, + { + "epoch": 0.5985649470541418, + "grad_norm": 708.135498046875, + "learning_rate": 4.258881619268569e-06, + "loss": 26.8095, + "step": 296310 + }, + { + "epoch": 0.5985851476868256, + "grad_norm": 33.75373077392578, + "learning_rate": 4.258536411053949e-06, + "loss": 25.2684, + "step": 296320 + }, + { + "epoch": 0.5986053483195094, + "grad_norm": 500.82025146484375, + "learning_rate": 4.258191206453132e-06, + "loss": 29.452, + "step": 296330 + }, + { + "epoch": 0.5986255489521932, + "grad_norm": 106.94981384277344, + "learning_rate": 4.2578460054678e-06, + "loss": 25.7791, + "step": 296340 + }, + { + "epoch": 0.598645749584877, + "grad_norm": 538.3078002929688, + "learning_rate": 4.25750080809964e-06, + "loss": 23.9905, + "step": 296350 + }, + { + "epoch": 0.5986659502175609, + "grad_norm": 315.728515625, + "learning_rate": 4.2571556143503275e-06, + "loss": 11.6234, + "step": 296360 + }, + { + "epoch": 0.5986861508502447, + "grad_norm": 468.2890625, + "learning_rate": 4.256810424221548e-06, + "loss": 22.3584, + "step": 296370 + }, + { + "epoch": 0.5987063514829285, + "grad_norm": 641.2987060546875, + "learning_rate": 4.256465237714989e-06, + "loss": 29.7856, + "step": 296380 + }, + { + "epoch": 0.5987265521156122, + "grad_norm": 332.5379333496094, + "learning_rate": 4.2561200548323224e-06, + "loss": 11.4542, + "step": 296390 + }, + { + "epoch": 0.598746752748296, + "grad_norm": 576.611083984375, + "learning_rate": 4.255774875575239e-06, + "loss": 43.6743, + "step": 296400 + }, + { + "epoch": 0.5987669533809798, + "grad_norm": 110.72086334228516, + "learning_rate": 4.2554296999454194e-06, + "loss": 19.4639, + "step": 296410 + }, + { + "epoch": 0.5987871540136637, + "grad_norm": 120.25221252441406, + "learning_rate": 4.2550845279445455e-06, + "loss": 10.3498, + "step": 296420 + }, + { + "epoch": 0.5988073546463475, + "grad_norm": 310.85357666015625, + "learning_rate": 4.254739359574298e-06, + "loss": 12.2019, + "step": 296430 + }, + { + "epoch": 0.5988275552790313, + "grad_norm": 131.8038330078125, + "learning_rate": 4.25439419483636e-06, + "loss": 13.7566, + "step": 296440 + }, + { + "epoch": 0.5988477559117151, + "grad_norm": 153.33343505859375, + "learning_rate": 4.2540490337324156e-06, + "loss": 17.411, + "step": 296450 + }, + { + "epoch": 0.598867956544399, + "grad_norm": 706.9985961914062, + "learning_rate": 4.253703876264144e-06, + "loss": 36.8503, + "step": 296460 + }, + { + "epoch": 0.5988881571770828, + "grad_norm": 45.09660720825195, + "learning_rate": 4.253358722433231e-06, + "loss": 19.5842, + "step": 296470 + }, + { + "epoch": 0.5989083578097666, + "grad_norm": 87.81842803955078, + "learning_rate": 4.253013572241356e-06, + "loss": 23.6001, + "step": 296480 + }, + { + "epoch": 0.5989285584424504, + "grad_norm": 404.8854064941406, + "learning_rate": 4.252668425690203e-06, + "loss": 35.4945, + "step": 296490 + }, + { + "epoch": 0.5989487590751342, + "grad_norm": 411.6787414550781, + "learning_rate": 4.2523232827814534e-06, + "loss": 14.2964, + "step": 296500 + }, + { + "epoch": 0.598968959707818, + "grad_norm": 830.9055786132812, + "learning_rate": 4.251978143516789e-06, + "loss": 29.1576, + "step": 296510 + }, + { + "epoch": 0.5989891603405019, + "grad_norm": 81.86772155761719, + "learning_rate": 4.251633007897891e-06, + "loss": 15.3215, + "step": 296520 + }, + { + "epoch": 0.5990093609731857, + "grad_norm": 503.2459716796875, + "learning_rate": 4.251287875926445e-06, + "loss": 17.9395, + "step": 296530 + }, + { + "epoch": 0.5990295616058695, + "grad_norm": 279.11480712890625, + "learning_rate": 4.250942747604131e-06, + "loss": 23.3277, + "step": 296540 + }, + { + "epoch": 0.5990497622385533, + "grad_norm": 188.3449249267578, + "learning_rate": 4.250597622932631e-06, + "loss": 17.9754, + "step": 296550 + }, + { + "epoch": 0.5990699628712371, + "grad_norm": 211.36280822753906, + "learning_rate": 4.250252501913627e-06, + "loss": 18.1663, + "step": 296560 + }, + { + "epoch": 0.599090163503921, + "grad_norm": 249.63092041015625, + "learning_rate": 4.249907384548801e-06, + "loss": 15.9893, + "step": 296570 + }, + { + "epoch": 0.5991103641366048, + "grad_norm": 161.63848876953125, + "learning_rate": 4.249562270839837e-06, + "loss": 24.6142, + "step": 296580 + }, + { + "epoch": 0.5991305647692886, + "grad_norm": 1131.3251953125, + "learning_rate": 4.249217160788413e-06, + "loss": 25.2706, + "step": 296590 + }, + { + "epoch": 0.5991507654019724, + "grad_norm": 171.084228515625, + "learning_rate": 4.248872054396215e-06, + "loss": 15.0584, + "step": 296600 + }, + { + "epoch": 0.5991709660346562, + "grad_norm": 323.4442443847656, + "learning_rate": 4.248526951664924e-06, + "loss": 11.89, + "step": 296610 + }, + { + "epoch": 0.5991911666673401, + "grad_norm": 329.6981506347656, + "learning_rate": 4.248181852596221e-06, + "loss": 20.4941, + "step": 296620 + }, + { + "epoch": 0.5992113673000239, + "grad_norm": 318.2145690917969, + "learning_rate": 4.247836757191787e-06, + "loss": 27.5163, + "step": 296630 + }, + { + "epoch": 0.5992315679327076, + "grad_norm": 196.76779174804688, + "learning_rate": 4.2474916654533085e-06, + "loss": 11.5402, + "step": 296640 + }, + { + "epoch": 0.5992517685653914, + "grad_norm": 235.93809509277344, + "learning_rate": 4.247146577382462e-06, + "loss": 14.8903, + "step": 296650 + }, + { + "epoch": 0.5992719691980752, + "grad_norm": 127.6539077758789, + "learning_rate": 4.246801492980931e-06, + "loss": 17.8151, + "step": 296660 + }, + { + "epoch": 0.5992921698307591, + "grad_norm": 345.1177978515625, + "learning_rate": 4.246456412250401e-06, + "loss": 9.2841, + "step": 296670 + }, + { + "epoch": 0.5993123704634429, + "grad_norm": 241.55496215820312, + "learning_rate": 4.246111335192548e-06, + "loss": 14.0622, + "step": 296680 + }, + { + "epoch": 0.5993325710961267, + "grad_norm": 325.2892150878906, + "learning_rate": 4.245766261809059e-06, + "loss": 21.6404, + "step": 296690 + }, + { + "epoch": 0.5993527717288105, + "grad_norm": 291.8177185058594, + "learning_rate": 4.245421192101613e-06, + "loss": 20.2458, + "step": 296700 + }, + { + "epoch": 0.5993729723614943, + "grad_norm": 349.75384521484375, + "learning_rate": 4.245076126071894e-06, + "loss": 16.9885, + "step": 296710 + }, + { + "epoch": 0.5993931729941782, + "grad_norm": 4.573607444763184, + "learning_rate": 4.244731063721581e-06, + "loss": 19.9236, + "step": 296720 + }, + { + "epoch": 0.599413373626862, + "grad_norm": 216.05674743652344, + "learning_rate": 4.244386005052356e-06, + "loss": 15.9433, + "step": 296730 + }, + { + "epoch": 0.5994335742595458, + "grad_norm": 305.31494140625, + "learning_rate": 4.244040950065905e-06, + "loss": 19.6417, + "step": 296740 + }, + { + "epoch": 0.5994537748922296, + "grad_norm": 313.0830078125, + "learning_rate": 4.243695898763904e-06, + "loss": 16.5436, + "step": 296750 + }, + { + "epoch": 0.5994739755249134, + "grad_norm": 445.0265197753906, + "learning_rate": 4.243350851148039e-06, + "loss": 16.1247, + "step": 296760 + }, + { + "epoch": 0.5994941761575973, + "grad_norm": 342.6043701171875, + "learning_rate": 4.24300580721999e-06, + "loss": 33.9881, + "step": 296770 + }, + { + "epoch": 0.5995143767902811, + "grad_norm": 153.574951171875, + "learning_rate": 4.242660766981439e-06, + "loss": 11.1193, + "step": 296780 + }, + { + "epoch": 0.5995345774229649, + "grad_norm": 244.50338745117188, + "learning_rate": 4.242315730434066e-06, + "loss": 14.952, + "step": 296790 + }, + { + "epoch": 0.5995547780556487, + "grad_norm": 485.69671630859375, + "learning_rate": 4.241970697579557e-06, + "loss": 29.0701, + "step": 296800 + }, + { + "epoch": 0.5995749786883325, + "grad_norm": 143.21177673339844, + "learning_rate": 4.2416256684195885e-06, + "loss": 22.8393, + "step": 296810 + }, + { + "epoch": 0.5995951793210164, + "grad_norm": 553.4241333007812, + "learning_rate": 4.241280642955845e-06, + "loss": 30.5847, + "step": 296820 + }, + { + "epoch": 0.5996153799537002, + "grad_norm": 447.57470703125, + "learning_rate": 4.24093562119001e-06, + "loss": 10.8338, + "step": 296830 + }, + { + "epoch": 0.599635580586384, + "grad_norm": 306.1315002441406, + "learning_rate": 4.240590603123759e-06, + "loss": 30.0672, + "step": 296840 + }, + { + "epoch": 0.5996557812190678, + "grad_norm": 371.797607421875, + "learning_rate": 4.240245588758778e-06, + "loss": 22.4331, + "step": 296850 + }, + { + "epoch": 0.5996759818517516, + "grad_norm": 228.33164978027344, + "learning_rate": 4.23990057809675e-06, + "loss": 20.1725, + "step": 296860 + }, + { + "epoch": 0.5996961824844355, + "grad_norm": 332.0324401855469, + "learning_rate": 4.239555571139353e-06, + "loss": 15.8661, + "step": 296870 + }, + { + "epoch": 0.5997163831171193, + "grad_norm": 328.9191589355469, + "learning_rate": 4.23921056788827e-06, + "loss": 11.8912, + "step": 296880 + }, + { + "epoch": 0.5997365837498031, + "grad_norm": 709.0545654296875, + "learning_rate": 4.238865568345182e-06, + "loss": 24.6709, + "step": 296890 + }, + { + "epoch": 0.5997567843824868, + "grad_norm": 398.2712707519531, + "learning_rate": 4.238520572511773e-06, + "loss": 17.7421, + "step": 296900 + }, + { + "epoch": 0.5997769850151706, + "grad_norm": 472.1728820800781, + "learning_rate": 4.238175580389719e-06, + "loss": 10.7249, + "step": 296910 + }, + { + "epoch": 0.5997971856478544, + "grad_norm": 359.57073974609375, + "learning_rate": 4.2378305919807075e-06, + "loss": 28.1431, + "step": 296920 + }, + { + "epoch": 0.5998173862805383, + "grad_norm": 801.3305053710938, + "learning_rate": 4.237485607286417e-06, + "loss": 25.6771, + "step": 296930 + }, + { + "epoch": 0.5998375869132221, + "grad_norm": 300.6571044921875, + "learning_rate": 4.237140626308528e-06, + "loss": 13.8791, + "step": 296940 + }, + { + "epoch": 0.5998577875459059, + "grad_norm": 122.9664077758789, + "learning_rate": 4.2367956490487235e-06, + "loss": 15.0534, + "step": 296950 + }, + { + "epoch": 0.5998779881785897, + "grad_norm": 481.1971740722656, + "learning_rate": 4.2364506755086856e-06, + "loss": 21.6385, + "step": 296960 + }, + { + "epoch": 0.5998981888112735, + "grad_norm": 484.2899169921875, + "learning_rate": 4.236105705690094e-06, + "loss": 21.4996, + "step": 296970 + }, + { + "epoch": 0.5999183894439574, + "grad_norm": 476.460693359375, + "learning_rate": 4.2357607395946275e-06, + "loss": 29.8308, + "step": 296980 + }, + { + "epoch": 0.5999385900766412, + "grad_norm": 158.4099884033203, + "learning_rate": 4.235415777223976e-06, + "loss": 19.2788, + "step": 296990 + }, + { + "epoch": 0.599958790709325, + "grad_norm": 468.2158203125, + "learning_rate": 4.23507081857981e-06, + "loss": 14.2631, + "step": 297000 + }, + { + "epoch": 0.5999789913420088, + "grad_norm": 345.7831726074219, + "learning_rate": 4.234725863663819e-06, + "loss": 16.6418, + "step": 297010 + }, + { + "epoch": 0.5999991919746926, + "grad_norm": 166.0795135498047, + "learning_rate": 4.23438091247768e-06, + "loss": 12.6064, + "step": 297020 + }, + { + "epoch": 0.6000193926073765, + "grad_norm": 749.620361328125, + "learning_rate": 4.234035965023077e-06, + "loss": 42.4881, + "step": 297030 + }, + { + "epoch": 0.6000395932400603, + "grad_norm": 724.5010986328125, + "learning_rate": 4.233691021301689e-06, + "loss": 25.3071, + "step": 297040 + }, + { + "epoch": 0.6000597938727441, + "grad_norm": 126.72706604003906, + "learning_rate": 4.233346081315197e-06, + "loss": 35.7059, + "step": 297050 + }, + { + "epoch": 0.6000799945054279, + "grad_norm": 453.6716613769531, + "learning_rate": 4.233001145065286e-06, + "loss": 15.4326, + "step": 297060 + }, + { + "epoch": 0.6001001951381117, + "grad_norm": 97.83983612060547, + "learning_rate": 4.232656212553631e-06, + "loss": 14.1084, + "step": 297070 + }, + { + "epoch": 0.6001203957707956, + "grad_norm": 549.7896118164062, + "learning_rate": 4.232311283781918e-06, + "loss": 15.4117, + "step": 297080 + }, + { + "epoch": 0.6001405964034794, + "grad_norm": 498.0633850097656, + "learning_rate": 4.231966358751828e-06, + "loss": 15.6065, + "step": 297090 + }, + { + "epoch": 0.6001607970361632, + "grad_norm": 610.4193725585938, + "learning_rate": 4.23162143746504e-06, + "loss": 22.8777, + "step": 297100 + }, + { + "epoch": 0.600180997668847, + "grad_norm": 411.71893310546875, + "learning_rate": 4.231276519923235e-06, + "loss": 13.5345, + "step": 297110 + }, + { + "epoch": 0.6002011983015308, + "grad_norm": 678.7944946289062, + "learning_rate": 4.230931606128096e-06, + "loss": 40.3877, + "step": 297120 + }, + { + "epoch": 0.6002213989342147, + "grad_norm": 550.8128662109375, + "learning_rate": 4.230586696081303e-06, + "loss": 14.962, + "step": 297130 + }, + { + "epoch": 0.6002415995668985, + "grad_norm": 138.277587890625, + "learning_rate": 4.230241789784535e-06, + "loss": 10.8564, + "step": 297140 + }, + { + "epoch": 0.6002618001995822, + "grad_norm": 884.471923828125, + "learning_rate": 4.2298968872394784e-06, + "loss": 15.1153, + "step": 297150 + }, + { + "epoch": 0.600282000832266, + "grad_norm": 203.5852813720703, + "learning_rate": 4.229551988447809e-06, + "loss": 26.2936, + "step": 297160 + }, + { + "epoch": 0.6003022014649498, + "grad_norm": 141.3539276123047, + "learning_rate": 4.22920709341121e-06, + "loss": 25.7338, + "step": 297170 + }, + { + "epoch": 0.6003224020976337, + "grad_norm": 209.94851684570312, + "learning_rate": 4.228862202131362e-06, + "loss": 14.39, + "step": 297180 + }, + { + "epoch": 0.6003426027303175, + "grad_norm": 109.16659545898438, + "learning_rate": 4.228517314609948e-06, + "loss": 32.7086, + "step": 297190 + }, + { + "epoch": 0.6003628033630013, + "grad_norm": 225.36117553710938, + "learning_rate": 4.228172430848645e-06, + "loss": 12.7105, + "step": 297200 + }, + { + "epoch": 0.6003830039956851, + "grad_norm": 664.6941528320312, + "learning_rate": 4.227827550849136e-06, + "loss": 27.9936, + "step": 297210 + }, + { + "epoch": 0.6004032046283689, + "grad_norm": 0.0, + "learning_rate": 4.227482674613103e-06, + "loss": 21.371, + "step": 297220 + }, + { + "epoch": 0.6004234052610528, + "grad_norm": 189.36280822753906, + "learning_rate": 4.227137802142225e-06, + "loss": 12.8405, + "step": 297230 + }, + { + "epoch": 0.6004436058937366, + "grad_norm": 371.6158447265625, + "learning_rate": 4.226792933438183e-06, + "loss": 33.8094, + "step": 297240 + }, + { + "epoch": 0.6004638065264204, + "grad_norm": 482.85467529296875, + "learning_rate": 4.226448068502661e-06, + "loss": 8.6523, + "step": 297250 + }, + { + "epoch": 0.6004840071591042, + "grad_norm": 684.0409545898438, + "learning_rate": 4.2261032073373355e-06, + "loss": 22.2693, + "step": 297260 + }, + { + "epoch": 0.600504207791788, + "grad_norm": 752.8082275390625, + "learning_rate": 4.225758349943888e-06, + "loss": 18.2599, + "step": 297270 + }, + { + "epoch": 0.6005244084244719, + "grad_norm": 200.68338012695312, + "learning_rate": 4.225413496324003e-06, + "loss": 17.5361, + "step": 297280 + }, + { + "epoch": 0.6005446090571557, + "grad_norm": 182.53616333007812, + "learning_rate": 4.225068646479356e-06, + "loss": 18.5988, + "step": 297290 + }, + { + "epoch": 0.6005648096898395, + "grad_norm": 300.6029357910156, + "learning_rate": 4.224723800411631e-06, + "loss": 19.0446, + "step": 297300 + }, + { + "epoch": 0.6005850103225233, + "grad_norm": 486.53961181640625, + "learning_rate": 4.22437895812251e-06, + "loss": 11.3072, + "step": 297310 + }, + { + "epoch": 0.6006052109552071, + "grad_norm": 631.1341552734375, + "learning_rate": 4.224034119613671e-06, + "loss": 13.3442, + "step": 297320 + }, + { + "epoch": 0.600625411587891, + "grad_norm": 324.3525085449219, + "learning_rate": 4.223689284886795e-06, + "loss": 24.3036, + "step": 297330 + }, + { + "epoch": 0.6006456122205748, + "grad_norm": 456.65167236328125, + "learning_rate": 4.223344453943562e-06, + "loss": 18.9171, + "step": 297340 + }, + { + "epoch": 0.6006658128532586, + "grad_norm": 425.72894287109375, + "learning_rate": 4.222999626785658e-06, + "loss": 17.0448, + "step": 297350 + }, + { + "epoch": 0.6006860134859424, + "grad_norm": 679.47021484375, + "learning_rate": 4.2226548034147555e-06, + "loss": 22.0326, + "step": 297360 + }, + { + "epoch": 0.6007062141186262, + "grad_norm": 224.88563537597656, + "learning_rate": 4.222309983832541e-06, + "loss": 28.2879, + "step": 297370 + }, + { + "epoch": 0.6007264147513101, + "grad_norm": 302.3341369628906, + "learning_rate": 4.221965168040693e-06, + "loss": 24.6421, + "step": 297380 + }, + { + "epoch": 0.6007466153839939, + "grad_norm": 285.5158386230469, + "learning_rate": 4.221620356040892e-06, + "loss": 27.8586, + "step": 297390 + }, + { + "epoch": 0.6007668160166777, + "grad_norm": 308.74993896484375, + "learning_rate": 4.22127554783482e-06, + "loss": 31.893, + "step": 297400 + }, + { + "epoch": 0.6007870166493614, + "grad_norm": 157.2686004638672, + "learning_rate": 4.220930743424157e-06, + "loss": 19.8243, + "step": 297410 + }, + { + "epoch": 0.6008072172820452, + "grad_norm": 605.8692626953125, + "learning_rate": 4.220585942810582e-06, + "loss": 12.8327, + "step": 297420 + }, + { + "epoch": 0.600827417914729, + "grad_norm": 792.844482421875, + "learning_rate": 4.220241145995775e-06, + "loss": 39.2107, + "step": 297430 + }, + { + "epoch": 0.6008476185474129, + "grad_norm": 881.8159790039062, + "learning_rate": 4.219896352981422e-06, + "loss": 27.649, + "step": 297440 + }, + { + "epoch": 0.6008678191800967, + "grad_norm": 38.221126556396484, + "learning_rate": 4.219551563769196e-06, + "loss": 27.9305, + "step": 297450 + }, + { + "epoch": 0.6008880198127805, + "grad_norm": 546.9678344726562, + "learning_rate": 4.219206778360782e-06, + "loss": 13.5159, + "step": 297460 + }, + { + "epoch": 0.6009082204454643, + "grad_norm": 151.08399963378906, + "learning_rate": 4.218861996757859e-06, + "loss": 13.8155, + "step": 297470 + }, + { + "epoch": 0.6009284210781481, + "grad_norm": 413.4466552734375, + "learning_rate": 4.218517218962111e-06, + "loss": 11.1458, + "step": 297480 + }, + { + "epoch": 0.600948621710832, + "grad_norm": 448.5552978515625, + "learning_rate": 4.218172444975212e-06, + "loss": 10.4878, + "step": 297490 + }, + { + "epoch": 0.6009688223435158, + "grad_norm": 304.6234436035156, + "learning_rate": 4.217827674798845e-06, + "loss": 10.4263, + "step": 297500 + }, + { + "epoch": 0.6009890229761996, + "grad_norm": 158.3286895751953, + "learning_rate": 4.217482908434695e-06, + "loss": 14.0449, + "step": 297510 + }, + { + "epoch": 0.6010092236088834, + "grad_norm": 685.6396484375, + "learning_rate": 4.217138145884435e-06, + "loss": 21.0802, + "step": 297520 + }, + { + "epoch": 0.6010294242415672, + "grad_norm": 237.3669891357422, + "learning_rate": 4.216793387149749e-06, + "loss": 25.1187, + "step": 297530 + }, + { + "epoch": 0.6010496248742511, + "grad_norm": 378.9090576171875, + "learning_rate": 4.216448632232319e-06, + "loss": 31.9589, + "step": 297540 + }, + { + "epoch": 0.6010698255069349, + "grad_norm": 248.90325927734375, + "learning_rate": 4.216103881133822e-06, + "loss": 28.8022, + "step": 297550 + }, + { + "epoch": 0.6010900261396187, + "grad_norm": 61.23601150512695, + "learning_rate": 4.21575913385594e-06, + "loss": 6.3718, + "step": 297560 + }, + { + "epoch": 0.6011102267723025, + "grad_norm": 502.94525146484375, + "learning_rate": 4.215414390400353e-06, + "loss": 30.2917, + "step": 297570 + }, + { + "epoch": 0.6011304274049863, + "grad_norm": 912.2431030273438, + "learning_rate": 4.21506965076874e-06, + "loss": 35.4973, + "step": 297580 + }, + { + "epoch": 0.6011506280376702, + "grad_norm": 344.84832763671875, + "learning_rate": 4.2147249149627826e-06, + "loss": 17.9056, + "step": 297590 + }, + { + "epoch": 0.601170828670354, + "grad_norm": 599.3446044921875, + "learning_rate": 4.2143801829841635e-06, + "loss": 10.8853, + "step": 297600 + }, + { + "epoch": 0.6011910293030378, + "grad_norm": 50.71405029296875, + "learning_rate": 4.214035454834556e-06, + "loss": 19.0953, + "step": 297610 + }, + { + "epoch": 0.6012112299357216, + "grad_norm": 504.15814208984375, + "learning_rate": 4.213690730515646e-06, + "loss": 19.7619, + "step": 297620 + }, + { + "epoch": 0.6012314305684054, + "grad_norm": 246.1601104736328, + "learning_rate": 4.213346010029112e-06, + "loss": 16.6431, + "step": 297630 + }, + { + "epoch": 0.6012516312010893, + "grad_norm": 141.6668701171875, + "learning_rate": 4.213001293376635e-06, + "loss": 17.8635, + "step": 297640 + }, + { + "epoch": 0.6012718318337731, + "grad_norm": 399.06060791015625, + "learning_rate": 4.212656580559894e-06, + "loss": 10.4143, + "step": 297650 + }, + { + "epoch": 0.6012920324664568, + "grad_norm": 491.92266845703125, + "learning_rate": 4.212311871580568e-06, + "loss": 13.642, + "step": 297660 + }, + { + "epoch": 0.6013122330991406, + "grad_norm": 146.76007080078125, + "learning_rate": 4.2119671664403404e-06, + "loss": 10.5023, + "step": 297670 + }, + { + "epoch": 0.6013324337318244, + "grad_norm": 369.0421447753906, + "learning_rate": 4.211622465140887e-06, + "loss": 24.4898, + "step": 297680 + }, + { + "epoch": 0.6013526343645083, + "grad_norm": 372.4095458984375, + "learning_rate": 4.211277767683891e-06, + "loss": 18.4748, + "step": 297690 + }, + { + "epoch": 0.6013728349971921, + "grad_norm": 219.39378356933594, + "learning_rate": 4.210933074071033e-06, + "loss": 14.4395, + "step": 297700 + }, + { + "epoch": 0.6013930356298759, + "grad_norm": 593.753173828125, + "learning_rate": 4.21058838430399e-06, + "loss": 20.9442, + "step": 297710 + }, + { + "epoch": 0.6014132362625597, + "grad_norm": 545.1883544921875, + "learning_rate": 4.2102436983844435e-06, + "loss": 39.2257, + "step": 297720 + }, + { + "epoch": 0.6014334368952435, + "grad_norm": 990.1265258789062, + "learning_rate": 4.209899016314075e-06, + "loss": 16.6062, + "step": 297730 + }, + { + "epoch": 0.6014536375279274, + "grad_norm": 182.2779083251953, + "learning_rate": 4.209554338094561e-06, + "loss": 11.2611, + "step": 297740 + }, + { + "epoch": 0.6014738381606112, + "grad_norm": 59.02173614501953, + "learning_rate": 4.209209663727583e-06, + "loss": 15.6947, + "step": 297750 + }, + { + "epoch": 0.601494038793295, + "grad_norm": 220.6677703857422, + "learning_rate": 4.208864993214821e-06, + "loss": 27.8063, + "step": 297760 + }, + { + "epoch": 0.6015142394259788, + "grad_norm": 209.71670532226562, + "learning_rate": 4.208520326557957e-06, + "loss": 16.7089, + "step": 297770 + }, + { + "epoch": 0.6015344400586626, + "grad_norm": 97.02810668945312, + "learning_rate": 4.208175663758668e-06, + "loss": 30.089, + "step": 297780 + }, + { + "epoch": 0.6015546406913465, + "grad_norm": 367.11175537109375, + "learning_rate": 4.2078310048186345e-06, + "loss": 33.2478, + "step": 297790 + }, + { + "epoch": 0.6015748413240303, + "grad_norm": 248.53724670410156, + "learning_rate": 4.207486349739538e-06, + "loss": 9.8839, + "step": 297800 + }, + { + "epoch": 0.6015950419567141, + "grad_norm": 820.9579467773438, + "learning_rate": 4.207141698523055e-06, + "loss": 35.9561, + "step": 297810 + }, + { + "epoch": 0.6016152425893979, + "grad_norm": 587.4654541015625, + "learning_rate": 4.206797051170867e-06, + "loss": 33.4931, + "step": 297820 + }, + { + "epoch": 0.6016354432220817, + "grad_norm": 437.03118896484375, + "learning_rate": 4.206452407684656e-06, + "loss": 18.3676, + "step": 297830 + }, + { + "epoch": 0.6016556438547656, + "grad_norm": 533.9827880859375, + "learning_rate": 4.206107768066099e-06, + "loss": 13.6058, + "step": 297840 + }, + { + "epoch": 0.6016758444874494, + "grad_norm": 610.5231323242188, + "learning_rate": 4.205763132316875e-06, + "loss": 16.0694, + "step": 297850 + }, + { + "epoch": 0.6016960451201332, + "grad_norm": 584.7250366210938, + "learning_rate": 4.2054185004386675e-06, + "loss": 16.5326, + "step": 297860 + }, + { + "epoch": 0.601716245752817, + "grad_norm": 449.36236572265625, + "learning_rate": 4.205073872433152e-06, + "loss": 13.229, + "step": 297870 + }, + { + "epoch": 0.6017364463855008, + "grad_norm": 428.4736328125, + "learning_rate": 4.2047292483020096e-06, + "loss": 17.5223, + "step": 297880 + }, + { + "epoch": 0.6017566470181847, + "grad_norm": 396.2667541503906, + "learning_rate": 4.204384628046924e-06, + "loss": 18.1104, + "step": 297890 + }, + { + "epoch": 0.6017768476508685, + "grad_norm": 70.28057098388672, + "learning_rate": 4.204040011669567e-06, + "loss": 31.339, + "step": 297900 + }, + { + "epoch": 0.6017970482835523, + "grad_norm": 286.1497802734375, + "learning_rate": 4.203695399171624e-06, + "loss": 13.5954, + "step": 297910 + }, + { + "epoch": 0.601817248916236, + "grad_norm": 109.39277648925781, + "learning_rate": 4.203350790554773e-06, + "loss": 9.4843, + "step": 297920 + }, + { + "epoch": 0.6018374495489198, + "grad_norm": 245.7013397216797, + "learning_rate": 4.203006185820695e-06, + "loss": 18.6695, + "step": 297930 + }, + { + "epoch": 0.6018576501816036, + "grad_norm": 682.17333984375, + "learning_rate": 4.2026615849710665e-06, + "loss": 23.312, + "step": 297940 + }, + { + "epoch": 0.6018778508142875, + "grad_norm": 366.9371032714844, + "learning_rate": 4.202316988007568e-06, + "loss": 19.1287, + "step": 297950 + }, + { + "epoch": 0.6018980514469713, + "grad_norm": 275.8540344238281, + "learning_rate": 4.201972394931883e-06, + "loss": 12.0746, + "step": 297960 + }, + { + "epoch": 0.6019182520796551, + "grad_norm": 194.72300720214844, + "learning_rate": 4.201627805745684e-06, + "loss": 32.8855, + "step": 297970 + }, + { + "epoch": 0.6019384527123389, + "grad_norm": 152.77197265625, + "learning_rate": 4.201283220450656e-06, + "loss": 9.2756, + "step": 297980 + }, + { + "epoch": 0.6019586533450227, + "grad_norm": 302.232421875, + "learning_rate": 4.200938639048477e-06, + "loss": 14.8609, + "step": 297990 + }, + { + "epoch": 0.6019788539777066, + "grad_norm": 592.5577392578125, + "learning_rate": 4.200594061540827e-06, + "loss": 24.2384, + "step": 298000 + }, + { + "epoch": 0.6019990546103904, + "grad_norm": 370.72088623046875, + "learning_rate": 4.200249487929383e-06, + "loss": 47.6909, + "step": 298010 + }, + { + "epoch": 0.6020192552430742, + "grad_norm": 194.29681396484375, + "learning_rate": 4.199904918215827e-06, + "loss": 13.4187, + "step": 298020 + }, + { + "epoch": 0.602039455875758, + "grad_norm": 595.0006103515625, + "learning_rate": 4.199560352401836e-06, + "loss": 44.0719, + "step": 298030 + }, + { + "epoch": 0.6020596565084418, + "grad_norm": 587.3641967773438, + "learning_rate": 4.199215790489091e-06, + "loss": 20.1468, + "step": 298040 + }, + { + "epoch": 0.6020798571411257, + "grad_norm": 578.7800903320312, + "learning_rate": 4.198871232479274e-06, + "loss": 19.0328, + "step": 298050 + }, + { + "epoch": 0.6021000577738095, + "grad_norm": 613.0132446289062, + "learning_rate": 4.1985266783740575e-06, + "loss": 18.4533, + "step": 298060 + }, + { + "epoch": 0.6021202584064933, + "grad_norm": 1191.32080078125, + "learning_rate": 4.198182128175126e-06, + "loss": 21.9817, + "step": 298070 + }, + { + "epoch": 0.6021404590391771, + "grad_norm": 521.42822265625, + "learning_rate": 4.197837581884158e-06, + "loss": 16.8588, + "step": 298080 + }, + { + "epoch": 0.6021606596718609, + "grad_norm": 275.6913757324219, + "learning_rate": 4.1974930395028325e-06, + "loss": 29.4446, + "step": 298090 + }, + { + "epoch": 0.6021808603045448, + "grad_norm": 191.11502075195312, + "learning_rate": 4.197148501032829e-06, + "loss": 19.7182, + "step": 298100 + }, + { + "epoch": 0.6022010609372286, + "grad_norm": 681.275390625, + "learning_rate": 4.1968039664758245e-06, + "loss": 13.4476, + "step": 298110 + }, + { + "epoch": 0.6022212615699124, + "grad_norm": 168.35659790039062, + "learning_rate": 4.196459435833503e-06, + "loss": 18.5558, + "step": 298120 + }, + { + "epoch": 0.6022414622025962, + "grad_norm": 318.42889404296875, + "learning_rate": 4.196114909107538e-06, + "loss": 18.9843, + "step": 298130 + }, + { + "epoch": 0.60226166283528, + "grad_norm": 521.52734375, + "learning_rate": 4.195770386299612e-06, + "loss": 12.5705, + "step": 298140 + }, + { + "epoch": 0.6022818634679639, + "grad_norm": 285.8468017578125, + "learning_rate": 4.195425867411404e-06, + "loss": 16.2484, + "step": 298150 + }, + { + "epoch": 0.6023020641006477, + "grad_norm": 379.369873046875, + "learning_rate": 4.195081352444593e-06, + "loss": 26.881, + "step": 298160 + }, + { + "epoch": 0.6023222647333315, + "grad_norm": 205.96356201171875, + "learning_rate": 4.194736841400858e-06, + "loss": 8.2632, + "step": 298170 + }, + { + "epoch": 0.6023424653660152, + "grad_norm": 289.69073486328125, + "learning_rate": 4.1943923342818785e-06, + "loss": 12.5926, + "step": 298180 + }, + { + "epoch": 0.602362665998699, + "grad_norm": 302.1565856933594, + "learning_rate": 4.194047831089332e-06, + "loss": 8.5044, + "step": 298190 + }, + { + "epoch": 0.6023828666313829, + "grad_norm": 553.7530517578125, + "learning_rate": 4.193703331824898e-06, + "loss": 12.2299, + "step": 298200 + }, + { + "epoch": 0.6024030672640667, + "grad_norm": 278.378662109375, + "learning_rate": 4.193358836490258e-06, + "loss": 11.1498, + "step": 298210 + }, + { + "epoch": 0.6024232678967505, + "grad_norm": 736.283935546875, + "learning_rate": 4.193014345087088e-06, + "loss": 21.8522, + "step": 298220 + }, + { + "epoch": 0.6024434685294343, + "grad_norm": 343.1639709472656, + "learning_rate": 4.192669857617068e-06, + "loss": 17.3157, + "step": 298230 + }, + { + "epoch": 0.6024636691621181, + "grad_norm": 306.9372863769531, + "learning_rate": 4.192325374081877e-06, + "loss": 12.2171, + "step": 298240 + }, + { + "epoch": 0.602483869794802, + "grad_norm": 14.186240196228027, + "learning_rate": 4.191980894483195e-06, + "loss": 15.5916, + "step": 298250 + }, + { + "epoch": 0.6025040704274858, + "grad_norm": 23.902244567871094, + "learning_rate": 4.1916364188227e-06, + "loss": 16.8782, + "step": 298260 + }, + { + "epoch": 0.6025242710601696, + "grad_norm": 214.9687957763672, + "learning_rate": 4.19129194710207e-06, + "loss": 18.2315, + "step": 298270 + }, + { + "epoch": 0.6025444716928534, + "grad_norm": 435.8495788574219, + "learning_rate": 4.190947479322988e-06, + "loss": 13.3487, + "step": 298280 + }, + { + "epoch": 0.6025646723255372, + "grad_norm": 468.54241943359375, + "learning_rate": 4.190603015487126e-06, + "loss": 20.7279, + "step": 298290 + }, + { + "epoch": 0.602584872958221, + "grad_norm": 577.8019409179688, + "learning_rate": 4.190258555596168e-06, + "loss": 17.5265, + "step": 298300 + }, + { + "epoch": 0.6026050735909049, + "grad_norm": 131.77757263183594, + "learning_rate": 4.1899140996517934e-06, + "loss": 13.6653, + "step": 298310 + }, + { + "epoch": 0.6026252742235887, + "grad_norm": 420.06695556640625, + "learning_rate": 4.189569647655677e-06, + "loss": 24.7182, + "step": 298320 + }, + { + "epoch": 0.6026454748562725, + "grad_norm": 417.7607116699219, + "learning_rate": 4.189225199609501e-06, + "loss": 15.1371, + "step": 298330 + }, + { + "epoch": 0.6026656754889563, + "grad_norm": 610.1613159179688, + "learning_rate": 4.188880755514944e-06, + "loss": 21.7652, + "step": 298340 + }, + { + "epoch": 0.6026858761216402, + "grad_norm": 87.60273742675781, + "learning_rate": 4.1885363153736825e-06, + "loss": 23.8042, + "step": 298350 + }, + { + "epoch": 0.602706076754324, + "grad_norm": 183.4608154296875, + "learning_rate": 4.188191879187395e-06, + "loss": 18.7179, + "step": 298360 + }, + { + "epoch": 0.6027262773870078, + "grad_norm": 497.125732421875, + "learning_rate": 4.187847446957763e-06, + "loss": 19.7117, + "step": 298370 + }, + { + "epoch": 0.6027464780196916, + "grad_norm": 105.0024642944336, + "learning_rate": 4.187503018686466e-06, + "loss": 8.4717, + "step": 298380 + }, + { + "epoch": 0.6027666786523754, + "grad_norm": 603.5338134765625, + "learning_rate": 4.1871585943751795e-06, + "loss": 30.7272, + "step": 298390 + }, + { + "epoch": 0.6027868792850593, + "grad_norm": 190.3978271484375, + "learning_rate": 4.186814174025582e-06, + "loss": 15.1717, + "step": 298400 + }, + { + "epoch": 0.6028070799177431, + "grad_norm": 429.34259033203125, + "learning_rate": 4.186469757639356e-06, + "loss": 26.493, + "step": 298410 + }, + { + "epoch": 0.6028272805504269, + "grad_norm": 620.0953979492188, + "learning_rate": 4.186125345218177e-06, + "loss": 18.7135, + "step": 298420 + }, + { + "epoch": 0.6028474811831106, + "grad_norm": 671.0869750976562, + "learning_rate": 4.185780936763722e-06, + "loss": 25.2038, + "step": 298430 + }, + { + "epoch": 0.6028676818157944, + "grad_norm": 81.80601501464844, + "learning_rate": 4.185436532277675e-06, + "loss": 14.779, + "step": 298440 + }, + { + "epoch": 0.6028878824484782, + "grad_norm": 399.3650817871094, + "learning_rate": 4.18509213176171e-06, + "loss": 24.9025, + "step": 298450 + }, + { + "epoch": 0.6029080830811621, + "grad_norm": 385.64703369140625, + "learning_rate": 4.184747735217507e-06, + "loss": 16.8063, + "step": 298460 + }, + { + "epoch": 0.6029282837138459, + "grad_norm": 385.7299499511719, + "learning_rate": 4.184403342646746e-06, + "loss": 29.951, + "step": 298470 + }, + { + "epoch": 0.6029484843465297, + "grad_norm": 178.00515747070312, + "learning_rate": 4.1840589540511035e-06, + "loss": 14.1718, + "step": 298480 + }, + { + "epoch": 0.6029686849792135, + "grad_norm": 132.05557250976562, + "learning_rate": 4.183714569432259e-06, + "loss": 16.2912, + "step": 298490 + }, + { + "epoch": 0.6029888856118973, + "grad_norm": 234.59046936035156, + "learning_rate": 4.183370188791891e-06, + "loss": 23.0456, + "step": 298500 + }, + { + "epoch": 0.6030090862445812, + "grad_norm": 333.8605041503906, + "learning_rate": 4.183025812131674e-06, + "loss": 16.6333, + "step": 298510 + }, + { + "epoch": 0.603029286877265, + "grad_norm": 121.52058410644531, + "learning_rate": 4.182681439453294e-06, + "loss": 16.7034, + "step": 298520 + }, + { + "epoch": 0.6030494875099488, + "grad_norm": 572.5895385742188, + "learning_rate": 4.182337070758425e-06, + "loss": 17.9095, + "step": 298530 + }, + { + "epoch": 0.6030696881426326, + "grad_norm": 560.9683227539062, + "learning_rate": 4.1819927060487454e-06, + "loss": 15.1039, + "step": 298540 + }, + { + "epoch": 0.6030898887753164, + "grad_norm": 448.8406677246094, + "learning_rate": 4.181648345325934e-06, + "loss": 16.6443, + "step": 298550 + }, + { + "epoch": 0.6031100894080003, + "grad_norm": 165.1465301513672, + "learning_rate": 4.181303988591669e-06, + "loss": 10.6893, + "step": 298560 + }, + { + "epoch": 0.6031302900406841, + "grad_norm": 545.265380859375, + "learning_rate": 4.1809596358476315e-06, + "loss": 25.455, + "step": 298570 + }, + { + "epoch": 0.6031504906733679, + "grad_norm": 677.195068359375, + "learning_rate": 4.180615287095494e-06, + "loss": 21.0746, + "step": 298580 + }, + { + "epoch": 0.6031706913060517, + "grad_norm": 21.15522003173828, + "learning_rate": 4.180270942336939e-06, + "loss": 19.539, + "step": 298590 + }, + { + "epoch": 0.6031908919387355, + "grad_norm": 495.523193359375, + "learning_rate": 4.179926601573645e-06, + "loss": 27.5947, + "step": 298600 + }, + { + "epoch": 0.6032110925714194, + "grad_norm": 173.38722229003906, + "learning_rate": 4.179582264807289e-06, + "loss": 14.384, + "step": 298610 + }, + { + "epoch": 0.6032312932041032, + "grad_norm": 204.8512420654297, + "learning_rate": 4.17923793203955e-06, + "loss": 12.818, + "step": 298620 + }, + { + "epoch": 0.603251493836787, + "grad_norm": 488.9126281738281, + "learning_rate": 4.1788936032721065e-06, + "loss": 23.2934, + "step": 298630 + }, + { + "epoch": 0.6032716944694708, + "grad_norm": 260.3359069824219, + "learning_rate": 4.178549278506634e-06, + "loss": 15.384, + "step": 298640 + }, + { + "epoch": 0.6032918951021546, + "grad_norm": 439.2271423339844, + "learning_rate": 4.178204957744812e-06, + "loss": 14.0319, + "step": 298650 + }, + { + "epoch": 0.6033120957348385, + "grad_norm": 283.25811767578125, + "learning_rate": 4.177860640988323e-06, + "loss": 19.0398, + "step": 298660 + }, + { + "epoch": 0.6033322963675223, + "grad_norm": 196.1883544921875, + "learning_rate": 4.177516328238838e-06, + "loss": 22.395, + "step": 298670 + }, + { + "epoch": 0.6033524970002061, + "grad_norm": 476.9971618652344, + "learning_rate": 4.17717201949804e-06, + "loss": 17.1944, + "step": 298680 + }, + { + "epoch": 0.6033726976328898, + "grad_norm": 39.04746627807617, + "learning_rate": 4.176827714767606e-06, + "loss": 10.2527, + "step": 298690 + }, + { + "epoch": 0.6033928982655736, + "grad_norm": 464.3876037597656, + "learning_rate": 4.176483414049214e-06, + "loss": 12.7073, + "step": 298700 + }, + { + "epoch": 0.6034130988982574, + "grad_norm": 224.52981567382812, + "learning_rate": 4.176139117344542e-06, + "loss": 12.2768, + "step": 298710 + }, + { + "epoch": 0.6034332995309413, + "grad_norm": 412.624755859375, + "learning_rate": 4.175794824655266e-06, + "loss": 17.816, + "step": 298720 + }, + { + "epoch": 0.6034535001636251, + "grad_norm": 424.7938537597656, + "learning_rate": 4.17545053598307e-06, + "loss": 26.5277, + "step": 298730 + }, + { + "epoch": 0.6034737007963089, + "grad_norm": 142.6999053955078, + "learning_rate": 4.1751062513296245e-06, + "loss": 17.8631, + "step": 298740 + }, + { + "epoch": 0.6034939014289927, + "grad_norm": 331.29315185546875, + "learning_rate": 4.174761970696612e-06, + "loss": 16.1455, + "step": 298750 + }, + { + "epoch": 0.6035141020616765, + "grad_norm": 639.9597778320312, + "learning_rate": 4.174417694085711e-06, + "loss": 27.9107, + "step": 298760 + }, + { + "epoch": 0.6035343026943604, + "grad_norm": 94.59380340576172, + "learning_rate": 4.174073421498597e-06, + "loss": 20.1087, + "step": 298770 + }, + { + "epoch": 0.6035545033270442, + "grad_norm": 157.8151092529297, + "learning_rate": 4.173729152936948e-06, + "loss": 23.2745, + "step": 298780 + }, + { + "epoch": 0.603574703959728, + "grad_norm": 1348.0771484375, + "learning_rate": 4.173384888402446e-06, + "loss": 19.5732, + "step": 298790 + }, + { + "epoch": 0.6035949045924118, + "grad_norm": 0.0, + "learning_rate": 4.173040627896762e-06, + "loss": 31.22, + "step": 298800 + }, + { + "epoch": 0.6036151052250956, + "grad_norm": 1027.85107421875, + "learning_rate": 4.172696371421579e-06, + "loss": 18.6806, + "step": 298810 + }, + { + "epoch": 0.6036353058577795, + "grad_norm": 69.05319213867188, + "learning_rate": 4.172352118978573e-06, + "loss": 39.396, + "step": 298820 + }, + { + "epoch": 0.6036555064904633, + "grad_norm": 891.92529296875, + "learning_rate": 4.172007870569425e-06, + "loss": 17.773, + "step": 298830 + }, + { + "epoch": 0.6036757071231471, + "grad_norm": 165.69923400878906, + "learning_rate": 4.171663626195808e-06, + "loss": 14.5471, + "step": 298840 + }, + { + "epoch": 0.6036959077558309, + "grad_norm": 274.5810241699219, + "learning_rate": 4.171319385859402e-06, + "loss": 17.4796, + "step": 298850 + }, + { + "epoch": 0.6037161083885147, + "grad_norm": 415.4864196777344, + "learning_rate": 4.170975149561886e-06, + "loss": 11.9298, + "step": 298860 + }, + { + "epoch": 0.6037363090211986, + "grad_norm": 492.5143127441406, + "learning_rate": 4.170630917304935e-06, + "loss": 27.1149, + "step": 298870 + }, + { + "epoch": 0.6037565096538824, + "grad_norm": 274.8476257324219, + "learning_rate": 4.1702866890902285e-06, + "loss": 26.0232, + "step": 298880 + }, + { + "epoch": 0.6037767102865662, + "grad_norm": 183.34765625, + "learning_rate": 4.169942464919446e-06, + "loss": 20.6724, + "step": 298890 + }, + { + "epoch": 0.60379691091925, + "grad_norm": 9.085321426391602, + "learning_rate": 4.169598244794261e-06, + "loss": 17.5541, + "step": 298900 + }, + { + "epoch": 0.6038171115519338, + "grad_norm": 534.8471069335938, + "learning_rate": 4.169254028716355e-06, + "loss": 28.0896, + "step": 298910 + }, + { + "epoch": 0.6038373121846177, + "grad_norm": 283.2433166503906, + "learning_rate": 4.1689098166874046e-06, + "loss": 19.5865, + "step": 298920 + }, + { + "epoch": 0.6038575128173015, + "grad_norm": 195.63902282714844, + "learning_rate": 4.168565608709085e-06, + "loss": 15.0253, + "step": 298930 + }, + { + "epoch": 0.6038777134499852, + "grad_norm": 481.405029296875, + "learning_rate": 4.168221404783076e-06, + "loss": 19.6114, + "step": 298940 + }, + { + "epoch": 0.603897914082669, + "grad_norm": 316.28387451171875, + "learning_rate": 4.167877204911057e-06, + "loss": 19.1945, + "step": 298950 + }, + { + "epoch": 0.6039181147153528, + "grad_norm": 426.0697326660156, + "learning_rate": 4.167533009094702e-06, + "loss": 18.7576, + "step": 298960 + }, + { + "epoch": 0.6039383153480367, + "grad_norm": 268.5688781738281, + "learning_rate": 4.167188817335689e-06, + "loss": 19.8746, + "step": 298970 + }, + { + "epoch": 0.6039585159807205, + "grad_norm": 392.6434631347656, + "learning_rate": 4.166844629635698e-06, + "loss": 15.2853, + "step": 298980 + }, + { + "epoch": 0.6039787166134043, + "grad_norm": 92.74348449707031, + "learning_rate": 4.166500445996407e-06, + "loss": 24.6408, + "step": 298990 + }, + { + "epoch": 0.6039989172460881, + "grad_norm": 365.57110595703125, + "learning_rate": 4.166156266419489e-06, + "loss": 23.8099, + "step": 299000 + }, + { + "epoch": 0.6040191178787719, + "grad_norm": 356.58251953125, + "learning_rate": 4.1658120909066255e-06, + "loss": 14.063, + "step": 299010 + }, + { + "epoch": 0.6040393185114558, + "grad_norm": 485.7627868652344, + "learning_rate": 4.165467919459493e-06, + "loss": 14.8582, + "step": 299020 + }, + { + "epoch": 0.6040595191441396, + "grad_norm": 400.6348571777344, + "learning_rate": 4.165123752079768e-06, + "loss": 27.36, + "step": 299030 + }, + { + "epoch": 0.6040797197768234, + "grad_norm": 230.67910766601562, + "learning_rate": 4.1647795887691275e-06, + "loss": 13.5759, + "step": 299040 + }, + { + "epoch": 0.6040999204095072, + "grad_norm": 331.7479248046875, + "learning_rate": 4.164435429529253e-06, + "loss": 20.1367, + "step": 299050 + }, + { + "epoch": 0.604120121042191, + "grad_norm": 319.7065124511719, + "learning_rate": 4.164091274361815e-06, + "loss": 15.6679, + "step": 299060 + }, + { + "epoch": 0.6041403216748749, + "grad_norm": 517.3455810546875, + "learning_rate": 4.163747123268497e-06, + "loss": 15.0055, + "step": 299070 + }, + { + "epoch": 0.6041605223075587, + "grad_norm": 414.9480285644531, + "learning_rate": 4.1634029762509755e-06, + "loss": 23.7813, + "step": 299080 + }, + { + "epoch": 0.6041807229402425, + "grad_norm": 1325.8746337890625, + "learning_rate": 4.163058833310925e-06, + "loss": 8.0147, + "step": 299090 + }, + { + "epoch": 0.6042009235729263, + "grad_norm": 637.223876953125, + "learning_rate": 4.162714694450023e-06, + "loss": 31.1577, + "step": 299100 + }, + { + "epoch": 0.6042211242056101, + "grad_norm": 202.93833923339844, + "learning_rate": 4.16237055966995e-06, + "loss": 20.886, + "step": 299110 + }, + { + "epoch": 0.604241324838294, + "grad_norm": 792.5867919921875, + "learning_rate": 4.16202642897238e-06, + "loss": 19.1535, + "step": 299120 + }, + { + "epoch": 0.6042615254709778, + "grad_norm": 255.61788940429688, + "learning_rate": 4.161682302358991e-06, + "loss": 18.4287, + "step": 299130 + }, + { + "epoch": 0.6042817261036616, + "grad_norm": 424.9412536621094, + "learning_rate": 4.161338179831461e-06, + "loss": 36.7544, + "step": 299140 + }, + { + "epoch": 0.6043019267363454, + "grad_norm": 569.3251342773438, + "learning_rate": 4.160994061391469e-06, + "loss": 28.6545, + "step": 299150 + }, + { + "epoch": 0.6043221273690292, + "grad_norm": 347.0662536621094, + "learning_rate": 4.1606499470406885e-06, + "loss": 41.8323, + "step": 299160 + }, + { + "epoch": 0.6043423280017131, + "grad_norm": 330.9170227050781, + "learning_rate": 4.1603058367807986e-06, + "loss": 20.5058, + "step": 299170 + }, + { + "epoch": 0.6043625286343969, + "grad_norm": 449.6436767578125, + "learning_rate": 4.159961730613478e-06, + "loss": 19.3311, + "step": 299180 + }, + { + "epoch": 0.6043827292670807, + "grad_norm": 142.1448516845703, + "learning_rate": 4.1596176285403985e-06, + "loss": 22.8406, + "step": 299190 + }, + { + "epoch": 0.6044029298997644, + "grad_norm": 119.17735290527344, + "learning_rate": 4.159273530563243e-06, + "loss": 9.936, + "step": 299200 + }, + { + "epoch": 0.6044231305324482, + "grad_norm": 185.63804626464844, + "learning_rate": 4.158929436683687e-06, + "loss": 11.7484, + "step": 299210 + }, + { + "epoch": 0.604443331165132, + "grad_norm": 191.6988983154297, + "learning_rate": 4.158585346903405e-06, + "loss": 16.4373, + "step": 299220 + }, + { + "epoch": 0.6044635317978159, + "grad_norm": 463.1911926269531, + "learning_rate": 4.1582412612240765e-06, + "loss": 21.7914, + "step": 299230 + }, + { + "epoch": 0.6044837324304997, + "grad_norm": 354.4056091308594, + "learning_rate": 4.157897179647379e-06, + "loss": 15.9639, + "step": 299240 + }, + { + "epoch": 0.6045039330631835, + "grad_norm": 233.71731567382812, + "learning_rate": 4.157553102174988e-06, + "loss": 18.4421, + "step": 299250 + }, + { + "epoch": 0.6045241336958673, + "grad_norm": 202.58102416992188, + "learning_rate": 4.15720902880858e-06, + "loss": 31.7332, + "step": 299260 + }, + { + "epoch": 0.6045443343285511, + "grad_norm": 390.31072998046875, + "learning_rate": 4.156864959549833e-06, + "loss": 24.7472, + "step": 299270 + }, + { + "epoch": 0.604564534961235, + "grad_norm": 398.3007507324219, + "learning_rate": 4.156520894400426e-06, + "loss": 22.008, + "step": 299280 + }, + { + "epoch": 0.6045847355939188, + "grad_norm": 316.69415283203125, + "learning_rate": 4.156176833362032e-06, + "loss": 16.2221, + "step": 299290 + }, + { + "epoch": 0.6046049362266026, + "grad_norm": 421.1678466796875, + "learning_rate": 4.155832776436331e-06, + "loss": 31.4256, + "step": 299300 + }, + { + "epoch": 0.6046251368592864, + "grad_norm": 73.86972045898438, + "learning_rate": 4.155488723624999e-06, + "loss": 11.5173, + "step": 299310 + }, + { + "epoch": 0.6046453374919702, + "grad_norm": 490.62542724609375, + "learning_rate": 4.1551446749297104e-06, + "loss": 24.8878, + "step": 299320 + }, + { + "epoch": 0.6046655381246541, + "grad_norm": 498.3382873535156, + "learning_rate": 4.154800630352145e-06, + "loss": 13.6941, + "step": 299330 + }, + { + "epoch": 0.6046857387573379, + "grad_norm": 313.3454284667969, + "learning_rate": 4.154456589893981e-06, + "loss": 14.2694, + "step": 299340 + }, + { + "epoch": 0.6047059393900217, + "grad_norm": 293.8607177734375, + "learning_rate": 4.15411255355689e-06, + "loss": 19.203, + "step": 299350 + }, + { + "epoch": 0.6047261400227055, + "grad_norm": 226.7985076904297, + "learning_rate": 4.153768521342552e-06, + "loss": 8.4586, + "step": 299360 + }, + { + "epoch": 0.6047463406553893, + "grad_norm": 611.42236328125, + "learning_rate": 4.153424493252646e-06, + "loss": 27.3687, + "step": 299370 + }, + { + "epoch": 0.6047665412880732, + "grad_norm": 197.8191375732422, + "learning_rate": 4.153080469288845e-06, + "loss": 11.2582, + "step": 299380 + }, + { + "epoch": 0.604786741920757, + "grad_norm": 402.68408203125, + "learning_rate": 4.152736449452827e-06, + "loss": 31.7333, + "step": 299390 + }, + { + "epoch": 0.6048069425534408, + "grad_norm": 491.1822509765625, + "learning_rate": 4.15239243374627e-06, + "loss": 20.3953, + "step": 299400 + }, + { + "epoch": 0.6048271431861246, + "grad_norm": 734.6697387695312, + "learning_rate": 4.152048422170848e-06, + "loss": 30.9655, + "step": 299410 + }, + { + "epoch": 0.6048473438188084, + "grad_norm": 941.27099609375, + "learning_rate": 4.151704414728238e-06, + "loss": 24.8178, + "step": 299420 + }, + { + "epoch": 0.6048675444514923, + "grad_norm": 299.1211853027344, + "learning_rate": 4.151360411420119e-06, + "loss": 14.6758, + "step": 299430 + }, + { + "epoch": 0.6048877450841761, + "grad_norm": 801.124755859375, + "learning_rate": 4.1510164122481675e-06, + "loss": 17.4863, + "step": 299440 + }, + { + "epoch": 0.6049079457168599, + "grad_norm": 428.1326904296875, + "learning_rate": 4.150672417214058e-06, + "loss": 9.6424, + "step": 299450 + }, + { + "epoch": 0.6049281463495436, + "grad_norm": 351.8671569824219, + "learning_rate": 4.150328426319469e-06, + "loss": 19.342, + "step": 299460 + }, + { + "epoch": 0.6049483469822274, + "grad_norm": 244.36212158203125, + "learning_rate": 4.149984439566076e-06, + "loss": 31.1823, + "step": 299470 + }, + { + "epoch": 0.6049685476149113, + "grad_norm": 230.03631591796875, + "learning_rate": 4.149640456955555e-06, + "loss": 19.8825, + "step": 299480 + }, + { + "epoch": 0.6049887482475951, + "grad_norm": 563.4520263671875, + "learning_rate": 4.149296478489583e-06, + "loss": 35.0656, + "step": 299490 + }, + { + "epoch": 0.6050089488802789, + "grad_norm": 448.1905517578125, + "learning_rate": 4.148952504169839e-06, + "loss": 15.7536, + "step": 299500 + }, + { + "epoch": 0.6050291495129627, + "grad_norm": 621.169189453125, + "learning_rate": 4.1486085339979944e-06, + "loss": 14.6395, + "step": 299510 + }, + { + "epoch": 0.6050493501456465, + "grad_norm": 390.1469421386719, + "learning_rate": 4.148264567975729e-06, + "loss": 32.5824, + "step": 299520 + }, + { + "epoch": 0.6050695507783304, + "grad_norm": 420.9831848144531, + "learning_rate": 4.1479206061047205e-06, + "loss": 25.5124, + "step": 299530 + }, + { + "epoch": 0.6050897514110142, + "grad_norm": 512.182861328125, + "learning_rate": 4.147576648386643e-06, + "loss": 20.1751, + "step": 299540 + }, + { + "epoch": 0.605109952043698, + "grad_norm": 197.9337921142578, + "learning_rate": 4.147232694823173e-06, + "loss": 9.2306, + "step": 299550 + }, + { + "epoch": 0.6051301526763818, + "grad_norm": 3.1143038272857666, + "learning_rate": 4.146888745415988e-06, + "loss": 26.7723, + "step": 299560 + }, + { + "epoch": 0.6051503533090656, + "grad_norm": 160.60667419433594, + "learning_rate": 4.146544800166764e-06, + "loss": 16.7458, + "step": 299570 + }, + { + "epoch": 0.6051705539417495, + "grad_norm": 113.62411499023438, + "learning_rate": 4.146200859077175e-06, + "loss": 23.438, + "step": 299580 + }, + { + "epoch": 0.6051907545744333, + "grad_norm": 434.8067626953125, + "learning_rate": 4.1458569221489006e-06, + "loss": 10.0016, + "step": 299590 + }, + { + "epoch": 0.6052109552071171, + "grad_norm": 410.1781921386719, + "learning_rate": 4.145512989383618e-06, + "loss": 17.4798, + "step": 299600 + }, + { + "epoch": 0.6052311558398009, + "grad_norm": 497.3607482910156, + "learning_rate": 4.1451690607829995e-06, + "loss": 15.7014, + "step": 299610 + }, + { + "epoch": 0.6052513564724847, + "grad_norm": 271.93951416015625, + "learning_rate": 4.144825136348724e-06, + "loss": 20.2383, + "step": 299620 + }, + { + "epoch": 0.6052715571051686, + "grad_norm": 153.39515686035156, + "learning_rate": 4.144481216082467e-06, + "loss": 16.2669, + "step": 299630 + }, + { + "epoch": 0.6052917577378524, + "grad_norm": 278.66412353515625, + "learning_rate": 4.1441372999859046e-06, + "loss": 19.849, + "step": 299640 + }, + { + "epoch": 0.6053119583705362, + "grad_norm": 165.04925537109375, + "learning_rate": 4.143793388060712e-06, + "loss": 18.7088, + "step": 299650 + }, + { + "epoch": 0.60533215900322, + "grad_norm": 371.5042419433594, + "learning_rate": 4.143449480308569e-06, + "loss": 22.5886, + "step": 299660 + }, + { + "epoch": 0.6053523596359038, + "grad_norm": 273.3786315917969, + "learning_rate": 4.143105576731147e-06, + "loss": 15.8427, + "step": 299670 + }, + { + "epoch": 0.6053725602685877, + "grad_norm": 96.92143249511719, + "learning_rate": 4.1427616773301245e-06, + "loss": 19.4343, + "step": 299680 + }, + { + "epoch": 0.6053927609012715, + "grad_norm": 211.26661682128906, + "learning_rate": 4.14241778210718e-06, + "loss": 25.5301, + "step": 299690 + }, + { + "epoch": 0.6054129615339553, + "grad_norm": 747.14404296875, + "learning_rate": 4.142073891063986e-06, + "loss": 23.3289, + "step": 299700 + }, + { + "epoch": 0.605433162166639, + "grad_norm": 220.15956115722656, + "learning_rate": 4.1417300042022195e-06, + "loss": 13.643, + "step": 299710 + }, + { + "epoch": 0.6054533627993228, + "grad_norm": 275.4222106933594, + "learning_rate": 4.141386121523558e-06, + "loss": 26.8061, + "step": 299720 + }, + { + "epoch": 0.6054735634320066, + "grad_norm": 687.5581665039062, + "learning_rate": 4.141042243029675e-06, + "loss": 19.9321, + "step": 299730 + }, + { + "epoch": 0.6054937640646905, + "grad_norm": 196.90611267089844, + "learning_rate": 4.140698368722247e-06, + "loss": 22.7837, + "step": 299740 + }, + { + "epoch": 0.6055139646973743, + "grad_norm": 672.401123046875, + "learning_rate": 4.140354498602952e-06, + "loss": 26.9086, + "step": 299750 + }, + { + "epoch": 0.6055341653300581, + "grad_norm": 46.130191802978516, + "learning_rate": 4.140010632673466e-06, + "loss": 21.9361, + "step": 299760 + }, + { + "epoch": 0.6055543659627419, + "grad_norm": 678.9490966796875, + "learning_rate": 4.139666770935463e-06, + "loss": 22.1638, + "step": 299770 + }, + { + "epoch": 0.6055745665954257, + "grad_norm": 540.2193603515625, + "learning_rate": 4.13932291339062e-06, + "loss": 26.2758, + "step": 299780 + }, + { + "epoch": 0.6055947672281096, + "grad_norm": 526.988525390625, + "learning_rate": 4.138979060040613e-06, + "loss": 25.5432, + "step": 299790 + }, + { + "epoch": 0.6056149678607934, + "grad_norm": 159.88980102539062, + "learning_rate": 4.138635210887117e-06, + "loss": 11.3931, + "step": 299800 + }, + { + "epoch": 0.6056351684934772, + "grad_norm": 529.3505249023438, + "learning_rate": 4.138291365931808e-06, + "loss": 30.4891, + "step": 299810 + }, + { + "epoch": 0.605655369126161, + "grad_norm": 522.1632690429688, + "learning_rate": 4.137947525176364e-06, + "loss": 16.009, + "step": 299820 + }, + { + "epoch": 0.6056755697588448, + "grad_norm": 67.28885650634766, + "learning_rate": 4.137603688622458e-06, + "loss": 21.4409, + "step": 299830 + }, + { + "epoch": 0.6056957703915287, + "grad_norm": 70.01111602783203, + "learning_rate": 4.137259856271767e-06, + "loss": 16.8961, + "step": 299840 + }, + { + "epoch": 0.6057159710242125, + "grad_norm": 880.1499633789062, + "learning_rate": 4.1369160281259685e-06, + "loss": 34.1453, + "step": 299850 + }, + { + "epoch": 0.6057361716568963, + "grad_norm": 56.148624420166016, + "learning_rate": 4.136572204186735e-06, + "loss": 12.5547, + "step": 299860 + }, + { + "epoch": 0.6057563722895801, + "grad_norm": 260.4873352050781, + "learning_rate": 4.136228384455743e-06, + "loss": 14.8398, + "step": 299870 + }, + { + "epoch": 0.605776572922264, + "grad_norm": 284.2904968261719, + "learning_rate": 4.13588456893467e-06, + "loss": 21.4895, + "step": 299880 + }, + { + "epoch": 0.6057967735549478, + "grad_norm": 559.5950317382812, + "learning_rate": 4.1355407576251925e-06, + "loss": 13.033, + "step": 299890 + }, + { + "epoch": 0.6058169741876316, + "grad_norm": 21.05386734008789, + "learning_rate": 4.135196950528982e-06, + "loss": 10.6381, + "step": 299900 + }, + { + "epoch": 0.6058371748203154, + "grad_norm": 172.6096954345703, + "learning_rate": 4.134853147647718e-06, + "loss": 19.6288, + "step": 299910 + }, + { + "epoch": 0.6058573754529992, + "grad_norm": 258.5855407714844, + "learning_rate": 4.134509348983075e-06, + "loss": 27.6375, + "step": 299920 + }, + { + "epoch": 0.605877576085683, + "grad_norm": 717.6494140625, + "learning_rate": 4.134165554536728e-06, + "loss": 14.4708, + "step": 299930 + }, + { + "epoch": 0.6058977767183669, + "grad_norm": 37.4779052734375, + "learning_rate": 4.133821764310352e-06, + "loss": 26.5522, + "step": 299940 + }, + { + "epoch": 0.6059179773510507, + "grad_norm": 358.26361083984375, + "learning_rate": 4.133477978305626e-06, + "loss": 29.5468, + "step": 299950 + }, + { + "epoch": 0.6059381779837345, + "grad_norm": 264.0389709472656, + "learning_rate": 4.133134196524221e-06, + "loss": 19.7583, + "step": 299960 + }, + { + "epoch": 0.6059583786164182, + "grad_norm": 196.28353881835938, + "learning_rate": 4.132790418967816e-06, + "loss": 26.5197, + "step": 299970 + }, + { + "epoch": 0.605978579249102, + "grad_norm": 252.9100799560547, + "learning_rate": 4.132446645638086e-06, + "loss": 17.9467, + "step": 299980 + }, + { + "epoch": 0.6059987798817859, + "grad_norm": 443.7536926269531, + "learning_rate": 4.132102876536705e-06, + "loss": 26.3148, + "step": 299990 + }, + { + "epoch": 0.6060189805144697, + "grad_norm": 156.29220581054688, + "learning_rate": 4.131759111665349e-06, + "loss": 16.4471, + "step": 300000 + }, + { + "epoch": 0.6060391811471535, + "grad_norm": 441.18731689453125, + "learning_rate": 4.131415351025695e-06, + "loss": 11.3066, + "step": 300010 + }, + { + "epoch": 0.6060593817798373, + "grad_norm": 0.0, + "learning_rate": 4.131071594619416e-06, + "loss": 34.9451, + "step": 300020 + }, + { + "epoch": 0.6060795824125211, + "grad_norm": 213.2185516357422, + "learning_rate": 4.130727842448187e-06, + "loss": 11.2, + "step": 300030 + }, + { + "epoch": 0.606099783045205, + "grad_norm": 287.30926513671875, + "learning_rate": 4.130384094513688e-06, + "loss": 10.5321, + "step": 300040 + }, + { + "epoch": 0.6061199836778888, + "grad_norm": 448.23309326171875, + "learning_rate": 4.13004035081759e-06, + "loss": 26.159, + "step": 300050 + }, + { + "epoch": 0.6061401843105726, + "grad_norm": 222.05453491210938, + "learning_rate": 4.1296966113615705e-06, + "loss": 17.9621, + "step": 300060 + }, + { + "epoch": 0.6061603849432564, + "grad_norm": 302.32293701171875, + "learning_rate": 4.129352876147304e-06, + "loss": 28.4212, + "step": 300070 + }, + { + "epoch": 0.6061805855759402, + "grad_norm": 216.21511840820312, + "learning_rate": 4.129009145176467e-06, + "loss": 23.9892, + "step": 300080 + }, + { + "epoch": 0.606200786208624, + "grad_norm": 409.4975891113281, + "learning_rate": 4.128665418450732e-06, + "loss": 17.4646, + "step": 300090 + }, + { + "epoch": 0.6062209868413079, + "grad_norm": 228.02105712890625, + "learning_rate": 4.128321695971775e-06, + "loss": 11.725, + "step": 300100 + }, + { + "epoch": 0.6062411874739917, + "grad_norm": 521.8179931640625, + "learning_rate": 4.127977977741277e-06, + "loss": 24.6463, + "step": 300110 + }, + { + "epoch": 0.6062613881066755, + "grad_norm": 2.4430065155029297, + "learning_rate": 4.127634263760904e-06, + "loss": 26.2461, + "step": 300120 + }, + { + "epoch": 0.6062815887393593, + "grad_norm": 184.41311645507812, + "learning_rate": 4.127290554032337e-06, + "loss": 13.7439, + "step": 300130 + }, + { + "epoch": 0.6063017893720432, + "grad_norm": 23.768491744995117, + "learning_rate": 4.126946848557252e-06, + "loss": 23.512, + "step": 300140 + }, + { + "epoch": 0.606321990004727, + "grad_norm": 416.1184387207031, + "learning_rate": 4.12660314733732e-06, + "loss": 18.3558, + "step": 300150 + }, + { + "epoch": 0.6063421906374108, + "grad_norm": 82.60257720947266, + "learning_rate": 4.126259450374219e-06, + "loss": 11.7744, + "step": 300160 + }, + { + "epoch": 0.6063623912700946, + "grad_norm": 648.251708984375, + "learning_rate": 4.125915757669624e-06, + "loss": 19.0724, + "step": 300170 + }, + { + "epoch": 0.6063825919027784, + "grad_norm": 531.5507202148438, + "learning_rate": 4.1255720692252084e-06, + "loss": 26.0003, + "step": 300180 + }, + { + "epoch": 0.6064027925354623, + "grad_norm": 433.8568420410156, + "learning_rate": 4.125228385042648e-06, + "loss": 13.7875, + "step": 300190 + }, + { + "epoch": 0.6064229931681461, + "grad_norm": 348.8671569824219, + "learning_rate": 4.124884705123619e-06, + "loss": 25.9525, + "step": 300200 + }, + { + "epoch": 0.6064431938008299, + "grad_norm": 191.41236877441406, + "learning_rate": 4.124541029469798e-06, + "loss": 22.0683, + "step": 300210 + }, + { + "epoch": 0.6064633944335136, + "grad_norm": 129.49757385253906, + "learning_rate": 4.124197358082855e-06, + "loss": 17.101, + "step": 300220 + }, + { + "epoch": 0.6064835950661974, + "grad_norm": 284.73553466796875, + "learning_rate": 4.12385369096447e-06, + "loss": 17.0187, + "step": 300230 + }, + { + "epoch": 0.6065037956988812, + "grad_norm": 421.3599853515625, + "learning_rate": 4.123510028116315e-06, + "loss": 26.3037, + "step": 300240 + }, + { + "epoch": 0.6065239963315651, + "grad_norm": 9.619393348693848, + "learning_rate": 4.123166369540066e-06, + "loss": 17.2235, + "step": 300250 + }, + { + "epoch": 0.6065441969642489, + "grad_norm": 608.0734252929688, + "learning_rate": 4.1228227152373955e-06, + "loss": 17.3087, + "step": 300260 + }, + { + "epoch": 0.6065643975969327, + "grad_norm": 68.26480102539062, + "learning_rate": 4.122479065209984e-06, + "loss": 19.6844, + "step": 300270 + }, + { + "epoch": 0.6065845982296165, + "grad_norm": 513.013916015625, + "learning_rate": 4.122135419459501e-06, + "loss": 23.8992, + "step": 300280 + }, + { + "epoch": 0.6066047988623003, + "grad_norm": 524.5921630859375, + "learning_rate": 4.121791777987624e-06, + "loss": 18.7422, + "step": 300290 + }, + { + "epoch": 0.6066249994949842, + "grad_norm": 588.4210205078125, + "learning_rate": 4.121448140796029e-06, + "loss": 20.0128, + "step": 300300 + }, + { + "epoch": 0.606645200127668, + "grad_norm": 734.3225708007812, + "learning_rate": 4.121104507886387e-06, + "loss": 27.1061, + "step": 300310 + }, + { + "epoch": 0.6066654007603518, + "grad_norm": 429.5791931152344, + "learning_rate": 4.120760879260375e-06, + "loss": 18.7656, + "step": 300320 + }, + { + "epoch": 0.6066856013930356, + "grad_norm": 28.860681533813477, + "learning_rate": 4.120417254919668e-06, + "loss": 18.3603, + "step": 300330 + }, + { + "epoch": 0.6067058020257194, + "grad_norm": 295.4386901855469, + "learning_rate": 4.120073634865943e-06, + "loss": 14.0962, + "step": 300340 + }, + { + "epoch": 0.6067260026584033, + "grad_norm": 130.5166473388672, + "learning_rate": 4.119730019100869e-06, + "loss": 21.8594, + "step": 300350 + }, + { + "epoch": 0.6067462032910871, + "grad_norm": 533.1133422851562, + "learning_rate": 4.119386407626126e-06, + "loss": 30.9608, + "step": 300360 + }, + { + "epoch": 0.6067664039237709, + "grad_norm": 233.6865692138672, + "learning_rate": 4.119042800443387e-06, + "loss": 11.7632, + "step": 300370 + }, + { + "epoch": 0.6067866045564547, + "grad_norm": 276.0942687988281, + "learning_rate": 4.118699197554327e-06, + "loss": 16.6963, + "step": 300380 + }, + { + "epoch": 0.6068068051891385, + "grad_norm": 222.0702362060547, + "learning_rate": 4.118355598960619e-06, + "loss": 15.4077, + "step": 300390 + }, + { + "epoch": 0.6068270058218224, + "grad_norm": 728.0061645507812, + "learning_rate": 4.118012004663939e-06, + "loss": 16.6156, + "step": 300400 + }, + { + "epoch": 0.6068472064545062, + "grad_norm": 492.2958068847656, + "learning_rate": 4.117668414665962e-06, + "loss": 26.7079, + "step": 300410 + }, + { + "epoch": 0.60686740708719, + "grad_norm": 1085.5723876953125, + "learning_rate": 4.117324828968361e-06, + "loss": 20.2057, + "step": 300420 + }, + { + "epoch": 0.6068876077198738, + "grad_norm": 354.3676452636719, + "learning_rate": 4.116981247572814e-06, + "loss": 15.1552, + "step": 300430 + }, + { + "epoch": 0.6069078083525576, + "grad_norm": 410.5220642089844, + "learning_rate": 4.11663767048099e-06, + "loss": 30.9106, + "step": 300440 + }, + { + "epoch": 0.6069280089852415, + "grad_norm": 82.55916595458984, + "learning_rate": 4.1162940976945695e-06, + "loss": 11.6088, + "step": 300450 + }, + { + "epoch": 0.6069482096179253, + "grad_norm": 266.9975280761719, + "learning_rate": 4.115950529215225e-06, + "loss": 21.6371, + "step": 300460 + }, + { + "epoch": 0.6069684102506091, + "grad_norm": 1182.3514404296875, + "learning_rate": 4.115606965044628e-06, + "loss": 26.6251, + "step": 300470 + }, + { + "epoch": 0.6069886108832928, + "grad_norm": 586.1812133789062, + "learning_rate": 4.115263405184456e-06, + "loss": 22.5297, + "step": 300480 + }, + { + "epoch": 0.6070088115159766, + "grad_norm": 467.3453369140625, + "learning_rate": 4.114919849636383e-06, + "loss": 17.5214, + "step": 300490 + }, + { + "epoch": 0.6070290121486605, + "grad_norm": 1092.0445556640625, + "learning_rate": 4.114576298402085e-06, + "loss": 19.347, + "step": 300500 + }, + { + "epoch": 0.6070492127813443, + "grad_norm": 200.702880859375, + "learning_rate": 4.1142327514832326e-06, + "loss": 24.5764, + "step": 300510 + }, + { + "epoch": 0.6070694134140281, + "grad_norm": 320.72991943359375, + "learning_rate": 4.1138892088815025e-06, + "loss": 22.3576, + "step": 300520 + }, + { + "epoch": 0.6070896140467119, + "grad_norm": 583.939453125, + "learning_rate": 4.113545670598571e-06, + "loss": 24.4126, + "step": 300530 + }, + { + "epoch": 0.6071098146793957, + "grad_norm": 105.73104095458984, + "learning_rate": 4.113202136636108e-06, + "loss": 12.8853, + "step": 300540 + }, + { + "epoch": 0.6071300153120796, + "grad_norm": 10.637992858886719, + "learning_rate": 4.11285860699579e-06, + "loss": 15.2372, + "step": 300550 + }, + { + "epoch": 0.6071502159447634, + "grad_norm": 460.02093505859375, + "learning_rate": 4.112515081679295e-06, + "loss": 11.4211, + "step": 300560 + }, + { + "epoch": 0.6071704165774472, + "grad_norm": 180.24130249023438, + "learning_rate": 4.112171560688289e-06, + "loss": 16.7634, + "step": 300570 + }, + { + "epoch": 0.607190617210131, + "grad_norm": 420.7903137207031, + "learning_rate": 4.111828044024454e-06, + "loss": 31.8523, + "step": 300580 + }, + { + "epoch": 0.6072108178428148, + "grad_norm": 109.20729064941406, + "learning_rate": 4.111484531689462e-06, + "loss": 12.5776, + "step": 300590 + }, + { + "epoch": 0.6072310184754987, + "grad_norm": 521.873046875, + "learning_rate": 4.111141023684986e-06, + "loss": 16.0646, + "step": 300600 + }, + { + "epoch": 0.6072512191081825, + "grad_norm": 529.4212036132812, + "learning_rate": 4.1107975200126996e-06, + "loss": 18.8753, + "step": 300610 + }, + { + "epoch": 0.6072714197408663, + "grad_norm": 377.3888854980469, + "learning_rate": 4.11045402067428e-06, + "loss": 21.1732, + "step": 300620 + }, + { + "epoch": 0.6072916203735501, + "grad_norm": 289.85052490234375, + "learning_rate": 4.110110525671399e-06, + "loss": 19.4854, + "step": 300630 + }, + { + "epoch": 0.6073118210062339, + "grad_norm": 321.3138427734375, + "learning_rate": 4.109767035005729e-06, + "loss": 17.224, + "step": 300640 + }, + { + "epoch": 0.6073320216389178, + "grad_norm": 292.6101379394531, + "learning_rate": 4.109423548678949e-06, + "loss": 11.0724, + "step": 300650 + }, + { + "epoch": 0.6073522222716016, + "grad_norm": 283.8573913574219, + "learning_rate": 4.109080066692731e-06, + "loss": 15.4067, + "step": 300660 + }, + { + "epoch": 0.6073724229042854, + "grad_norm": 511.39447021484375, + "learning_rate": 4.108736589048748e-06, + "loss": 27.6468, + "step": 300670 + }, + { + "epoch": 0.6073926235369692, + "grad_norm": 84.58218383789062, + "learning_rate": 4.108393115748675e-06, + "loss": 16.2186, + "step": 300680 + }, + { + "epoch": 0.607412824169653, + "grad_norm": 302.5014953613281, + "learning_rate": 4.108049646794186e-06, + "loss": 20.6337, + "step": 300690 + }, + { + "epoch": 0.6074330248023369, + "grad_norm": 190.1282501220703, + "learning_rate": 4.107706182186954e-06, + "loss": 18.9557, + "step": 300700 + }, + { + "epoch": 0.6074532254350207, + "grad_norm": 402.6483154296875, + "learning_rate": 4.107362721928653e-06, + "loss": 18.9309, + "step": 300710 + }, + { + "epoch": 0.6074734260677045, + "grad_norm": 293.894775390625, + "learning_rate": 4.107019266020961e-06, + "loss": 13.3887, + "step": 300720 + }, + { + "epoch": 0.6074936267003882, + "grad_norm": 395.2062072753906, + "learning_rate": 4.106675814465545e-06, + "loss": 15.7355, + "step": 300730 + }, + { + "epoch": 0.607513827333072, + "grad_norm": 451.3974609375, + "learning_rate": 4.106332367264085e-06, + "loss": 12.7407, + "step": 300740 + }, + { + "epoch": 0.6075340279657558, + "grad_norm": 483.8083801269531, + "learning_rate": 4.105988924418252e-06, + "loss": 18.352, + "step": 300750 + }, + { + "epoch": 0.6075542285984397, + "grad_norm": 446.7733154296875, + "learning_rate": 4.105645485929721e-06, + "loss": 10.8636, + "step": 300760 + }, + { + "epoch": 0.6075744292311235, + "grad_norm": 17.746076583862305, + "learning_rate": 4.105302051800166e-06, + "loss": 20.9803, + "step": 300770 + }, + { + "epoch": 0.6075946298638073, + "grad_norm": 59.37981414794922, + "learning_rate": 4.1049586220312594e-06, + "loss": 46.9566, + "step": 300780 + }, + { + "epoch": 0.6076148304964911, + "grad_norm": 398.4682312011719, + "learning_rate": 4.104615196624676e-06, + "loss": 18.145, + "step": 300790 + }, + { + "epoch": 0.6076350311291749, + "grad_norm": 346.58404541015625, + "learning_rate": 4.104271775582089e-06, + "loss": 24.9918, + "step": 300800 + }, + { + "epoch": 0.6076552317618588, + "grad_norm": 340.0547180175781, + "learning_rate": 4.103928358905173e-06, + "loss": 13.4387, + "step": 300810 + }, + { + "epoch": 0.6076754323945426, + "grad_norm": 552.000732421875, + "learning_rate": 4.1035849465956024e-06, + "loss": 23.7551, + "step": 300820 + }, + { + "epoch": 0.6076956330272264, + "grad_norm": 355.2682800292969, + "learning_rate": 4.103241538655049e-06, + "loss": 14.823, + "step": 300830 + }, + { + "epoch": 0.6077158336599102, + "grad_norm": 347.156005859375, + "learning_rate": 4.1028981350851885e-06, + "loss": 8.7515, + "step": 300840 + }, + { + "epoch": 0.607736034292594, + "grad_norm": 338.92706298828125, + "learning_rate": 4.102554735887694e-06, + "loss": 20.7175, + "step": 300850 + }, + { + "epoch": 0.6077562349252779, + "grad_norm": 262.94232177734375, + "learning_rate": 4.102211341064237e-06, + "loss": 16.7035, + "step": 300860 + }, + { + "epoch": 0.6077764355579617, + "grad_norm": 54.84391784667969, + "learning_rate": 4.101867950616493e-06, + "loss": 14.0018, + "step": 300870 + }, + { + "epoch": 0.6077966361906455, + "grad_norm": 39.956153869628906, + "learning_rate": 4.101524564546139e-06, + "loss": 20.8418, + "step": 300880 + }, + { + "epoch": 0.6078168368233293, + "grad_norm": 400.2420349121094, + "learning_rate": 4.101181182854841e-06, + "loss": 13.893, + "step": 300890 + }, + { + "epoch": 0.6078370374560131, + "grad_norm": 701.3252563476562, + "learning_rate": 4.100837805544279e-06, + "loss": 26.9054, + "step": 300900 + }, + { + "epoch": 0.607857238088697, + "grad_norm": 66.36013793945312, + "learning_rate": 4.100494432616126e-06, + "loss": 13.9931, + "step": 300910 + }, + { + "epoch": 0.6078774387213808, + "grad_norm": 552.8189697265625, + "learning_rate": 4.1001510640720525e-06, + "loss": 35.3448, + "step": 300920 + }, + { + "epoch": 0.6078976393540646, + "grad_norm": 425.0431823730469, + "learning_rate": 4.099807699913733e-06, + "loss": 23.3374, + "step": 300930 + }, + { + "epoch": 0.6079178399867484, + "grad_norm": 142.89114379882812, + "learning_rate": 4.09946434014284e-06, + "loss": 17.9236, + "step": 300940 + }, + { + "epoch": 0.6079380406194322, + "grad_norm": 329.4931640625, + "learning_rate": 4.099120984761053e-06, + "loss": 21.6973, + "step": 300950 + }, + { + "epoch": 0.6079582412521161, + "grad_norm": 145.770263671875, + "learning_rate": 4.098777633770038e-06, + "loss": 8.8998, + "step": 300960 + }, + { + "epoch": 0.6079784418847999, + "grad_norm": 125.50902557373047, + "learning_rate": 4.0984342871714725e-06, + "loss": 28.4501, + "step": 300970 + }, + { + "epoch": 0.6079986425174837, + "grad_norm": 1056.3038330078125, + "learning_rate": 4.0980909449670295e-06, + "loss": 32.9664, + "step": 300980 + }, + { + "epoch": 0.6080188431501674, + "grad_norm": 501.4185791015625, + "learning_rate": 4.09774760715838e-06, + "loss": 18.9642, + "step": 300990 + }, + { + "epoch": 0.6080390437828512, + "grad_norm": 400.7528991699219, + "learning_rate": 4.0974042737472005e-06, + "loss": 13.6935, + "step": 301000 + }, + { + "epoch": 0.608059244415535, + "grad_norm": 769.8255615234375, + "learning_rate": 4.0970609447351635e-06, + "loss": 22.393, + "step": 301010 + }, + { + "epoch": 0.6080794450482189, + "grad_norm": 406.9213562011719, + "learning_rate": 4.096717620123941e-06, + "loss": 27.2523, + "step": 301020 + }, + { + "epoch": 0.6080996456809027, + "grad_norm": 329.3095397949219, + "learning_rate": 4.096374299915207e-06, + "loss": 16.3404, + "step": 301030 + }, + { + "epoch": 0.6081198463135865, + "grad_norm": 603.5701904296875, + "learning_rate": 4.096030984110638e-06, + "loss": 15.9213, + "step": 301040 + }, + { + "epoch": 0.6081400469462703, + "grad_norm": 262.6082763671875, + "learning_rate": 4.0956876727119e-06, + "loss": 23.9107, + "step": 301050 + }, + { + "epoch": 0.6081602475789541, + "grad_norm": 231.5294952392578, + "learning_rate": 4.095344365720673e-06, + "loss": 18.7248, + "step": 301060 + }, + { + "epoch": 0.608180448211638, + "grad_norm": 33.1486930847168, + "learning_rate": 4.095001063138629e-06, + "loss": 10.5146, + "step": 301070 + }, + { + "epoch": 0.6082006488443218, + "grad_norm": 385.7687683105469, + "learning_rate": 4.0946577649674375e-06, + "loss": 19.0383, + "step": 301080 + }, + { + "epoch": 0.6082208494770056, + "grad_norm": 578.4713745117188, + "learning_rate": 4.094314471208775e-06, + "loss": 17.3994, + "step": 301090 + }, + { + "epoch": 0.6082410501096894, + "grad_norm": 613.3488159179688, + "learning_rate": 4.093971181864313e-06, + "loss": 20.3948, + "step": 301100 + }, + { + "epoch": 0.6082612507423732, + "grad_norm": 88.45452117919922, + "learning_rate": 4.093627896935727e-06, + "loss": 12.3156, + "step": 301110 + }, + { + "epoch": 0.6082814513750571, + "grad_norm": 139.64923095703125, + "learning_rate": 4.093284616424688e-06, + "loss": 16.1912, + "step": 301120 + }, + { + "epoch": 0.6083016520077409, + "grad_norm": 367.98553466796875, + "learning_rate": 4.092941340332871e-06, + "loss": 22.5281, + "step": 301130 + }, + { + "epoch": 0.6083218526404247, + "grad_norm": 462.8436279296875, + "learning_rate": 4.092598068661948e-06, + "loss": 24.1809, + "step": 301140 + }, + { + "epoch": 0.6083420532731085, + "grad_norm": 382.7926025390625, + "learning_rate": 4.092254801413591e-06, + "loss": 14.3907, + "step": 301150 + }, + { + "epoch": 0.6083622539057923, + "grad_norm": 198.9886932373047, + "learning_rate": 4.091911538589474e-06, + "loss": 16.9348, + "step": 301160 + }, + { + "epoch": 0.6083824545384762, + "grad_norm": 504.3564453125, + "learning_rate": 4.091568280191271e-06, + "loss": 12.4013, + "step": 301170 + }, + { + "epoch": 0.60840265517116, + "grad_norm": 413.9426574707031, + "learning_rate": 4.091225026220652e-06, + "loss": 20.0764, + "step": 301180 + }, + { + "epoch": 0.6084228558038438, + "grad_norm": 357.5829162597656, + "learning_rate": 4.090881776679293e-06, + "loss": 10.3754, + "step": 301190 + }, + { + "epoch": 0.6084430564365276, + "grad_norm": 1410.547607421875, + "learning_rate": 4.090538531568867e-06, + "loss": 35.9018, + "step": 301200 + }, + { + "epoch": 0.6084632570692114, + "grad_norm": 63.061458587646484, + "learning_rate": 4.090195290891045e-06, + "loss": 13.956, + "step": 301210 + }, + { + "epoch": 0.6084834577018953, + "grad_norm": 487.25469970703125, + "learning_rate": 4.0898520546475e-06, + "loss": 32.4872, + "step": 301220 + }, + { + "epoch": 0.6085036583345791, + "grad_norm": 333.2745361328125, + "learning_rate": 4.089508822839907e-06, + "loss": 16.9703, + "step": 301230 + }, + { + "epoch": 0.6085238589672629, + "grad_norm": 230.5712127685547, + "learning_rate": 4.089165595469937e-06, + "loss": 13.3602, + "step": 301240 + }, + { + "epoch": 0.6085440595999466, + "grad_norm": 283.78399658203125, + "learning_rate": 4.088822372539263e-06, + "loss": 21.2786, + "step": 301250 + }, + { + "epoch": 0.6085642602326304, + "grad_norm": 206.6420440673828, + "learning_rate": 4.0884791540495585e-06, + "loss": 12.7147, + "step": 301260 + }, + { + "epoch": 0.6085844608653143, + "grad_norm": 735.728271484375, + "learning_rate": 4.0881359400024964e-06, + "loss": 31.9729, + "step": 301270 + }, + { + "epoch": 0.6086046614979981, + "grad_norm": 251.7333526611328, + "learning_rate": 4.087792730399749e-06, + "loss": 15.1294, + "step": 301280 + }, + { + "epoch": 0.6086248621306819, + "grad_norm": 673.1650390625, + "learning_rate": 4.087449525242989e-06, + "loss": 18.04, + "step": 301290 + }, + { + "epoch": 0.6086450627633657, + "grad_norm": 457.9283447265625, + "learning_rate": 4.087106324533891e-06, + "loss": 16.8589, + "step": 301300 + }, + { + "epoch": 0.6086652633960495, + "grad_norm": 679.8662719726562, + "learning_rate": 4.086763128274124e-06, + "loss": 15.5843, + "step": 301310 + }, + { + "epoch": 0.6086854640287334, + "grad_norm": 296.8544921875, + "learning_rate": 4.086419936465362e-06, + "loss": 15.3972, + "step": 301320 + }, + { + "epoch": 0.6087056646614172, + "grad_norm": 234.82400512695312, + "learning_rate": 4.0860767491092825e-06, + "loss": 19.8179, + "step": 301330 + }, + { + "epoch": 0.608725865294101, + "grad_norm": 485.77532958984375, + "learning_rate": 4.08573356620755e-06, + "loss": 17.0995, + "step": 301340 + }, + { + "epoch": 0.6087460659267848, + "grad_norm": 162.58717346191406, + "learning_rate": 4.0853903877618425e-06, + "loss": 17.0327, + "step": 301350 + }, + { + "epoch": 0.6087662665594686, + "grad_norm": 101.74900817871094, + "learning_rate": 4.085047213773831e-06, + "loss": 10.7914, + "step": 301360 + }, + { + "epoch": 0.6087864671921525, + "grad_norm": 405.9892272949219, + "learning_rate": 4.08470404424519e-06, + "loss": 26.3598, + "step": 301370 + }, + { + "epoch": 0.6088066678248363, + "grad_norm": 504.0001525878906, + "learning_rate": 4.084360879177588e-06, + "loss": 14.5078, + "step": 301380 + }, + { + "epoch": 0.6088268684575201, + "grad_norm": 297.1695251464844, + "learning_rate": 4.0840177185727005e-06, + "loss": 23.0532, + "step": 301390 + }, + { + "epoch": 0.6088470690902039, + "grad_norm": 337.6854553222656, + "learning_rate": 4.083674562432203e-06, + "loss": 25.5926, + "step": 301400 + }, + { + "epoch": 0.6088672697228877, + "grad_norm": 258.78253173828125, + "learning_rate": 4.0833314107577605e-06, + "loss": 7.1162, + "step": 301410 + }, + { + "epoch": 0.6088874703555716, + "grad_norm": 299.7767333984375, + "learning_rate": 4.08298826355105e-06, + "loss": 22.6414, + "step": 301420 + }, + { + "epoch": 0.6089076709882554, + "grad_norm": 147.05128479003906, + "learning_rate": 4.082645120813746e-06, + "loss": 16.316, + "step": 301430 + }, + { + "epoch": 0.6089278716209392, + "grad_norm": 709.7376708984375, + "learning_rate": 4.082301982547517e-06, + "loss": 13.4846, + "step": 301440 + }, + { + "epoch": 0.608948072253623, + "grad_norm": 324.18280029296875, + "learning_rate": 4.081958848754035e-06, + "loss": 8.5537, + "step": 301450 + }, + { + "epoch": 0.6089682728863068, + "grad_norm": 267.200927734375, + "learning_rate": 4.081615719434978e-06, + "loss": 16.7705, + "step": 301460 + }, + { + "epoch": 0.6089884735189907, + "grad_norm": 24.94508934020996, + "learning_rate": 4.081272594592011e-06, + "loss": 14.3104, + "step": 301470 + }, + { + "epoch": 0.6090086741516745, + "grad_norm": 2.499239921569824, + "learning_rate": 4.080929474226811e-06, + "loss": 15.5801, + "step": 301480 + }, + { + "epoch": 0.6090288747843583, + "grad_norm": 652.1822509765625, + "learning_rate": 4.080586358341051e-06, + "loss": 19.2077, + "step": 301490 + }, + { + "epoch": 0.609049075417042, + "grad_norm": 338.8230285644531, + "learning_rate": 4.0802432469364e-06, + "loss": 6.6709, + "step": 301500 + }, + { + "epoch": 0.6090692760497258, + "grad_norm": 340.7795715332031, + "learning_rate": 4.0799001400145315e-06, + "loss": 20.0169, + "step": 301510 + }, + { + "epoch": 0.6090894766824096, + "grad_norm": 232.5472869873047, + "learning_rate": 4.07955703757712e-06, + "loss": 9.472, + "step": 301520 + }, + { + "epoch": 0.6091096773150935, + "grad_norm": 235.46383666992188, + "learning_rate": 4.079213939625834e-06, + "loss": 28.0348, + "step": 301530 + }, + { + "epoch": 0.6091298779477773, + "grad_norm": 275.21759033203125, + "learning_rate": 4.078870846162349e-06, + "loss": 12.917, + "step": 301540 + }, + { + "epoch": 0.6091500785804611, + "grad_norm": 352.4283752441406, + "learning_rate": 4.078527757188333e-06, + "loss": 26.7873, + "step": 301550 + }, + { + "epoch": 0.6091702792131449, + "grad_norm": 514.5021362304688, + "learning_rate": 4.078184672705465e-06, + "loss": 25.0382, + "step": 301560 + }, + { + "epoch": 0.6091904798458287, + "grad_norm": 575.0938110351562, + "learning_rate": 4.077841592715409e-06, + "loss": 12.1316, + "step": 301570 + }, + { + "epoch": 0.6092106804785126, + "grad_norm": 349.18182373046875, + "learning_rate": 4.077498517219844e-06, + "loss": 17.5067, + "step": 301580 + }, + { + "epoch": 0.6092308811111964, + "grad_norm": 136.44674682617188, + "learning_rate": 4.0771554462204395e-06, + "loss": 27.4129, + "step": 301590 + }, + { + "epoch": 0.6092510817438802, + "grad_norm": 2124.14599609375, + "learning_rate": 4.0768123797188665e-06, + "loss": 25.9766, + "step": 301600 + }, + { + "epoch": 0.609271282376564, + "grad_norm": 524.9080200195312, + "learning_rate": 4.076469317716798e-06, + "loss": 16.3862, + "step": 301610 + }, + { + "epoch": 0.6092914830092478, + "grad_norm": 674.9454345703125, + "learning_rate": 4.076126260215906e-06, + "loss": 21.6937, + "step": 301620 + }, + { + "epoch": 0.6093116836419317, + "grad_norm": 213.24224853515625, + "learning_rate": 4.0757832072178626e-06, + "loss": 7.0525, + "step": 301630 + }, + { + "epoch": 0.6093318842746155, + "grad_norm": 69.69252014160156, + "learning_rate": 4.075440158724339e-06, + "loss": 16.8704, + "step": 301640 + }, + { + "epoch": 0.6093520849072993, + "grad_norm": 332.0189514160156, + "learning_rate": 4.075097114737011e-06, + "loss": 15.391, + "step": 301650 + }, + { + "epoch": 0.6093722855399831, + "grad_norm": 916.816650390625, + "learning_rate": 4.074754075257543e-06, + "loss": 23.7619, + "step": 301660 + }, + { + "epoch": 0.609392486172667, + "grad_norm": 1299.2314453125, + "learning_rate": 4.074411040287614e-06, + "loss": 22.8498, + "step": 301670 + }, + { + "epoch": 0.6094126868053508, + "grad_norm": 494.7002258300781, + "learning_rate": 4.074068009828894e-06, + "loss": 23.4934, + "step": 301680 + }, + { + "epoch": 0.6094328874380346, + "grad_norm": 860.8915405273438, + "learning_rate": 4.073724983883053e-06, + "loss": 22.2737, + "step": 301690 + }, + { + "epoch": 0.6094530880707184, + "grad_norm": 55.833587646484375, + "learning_rate": 4.073381962451764e-06, + "loss": 11.6702, + "step": 301700 + }, + { + "epoch": 0.6094732887034022, + "grad_norm": 487.8169860839844, + "learning_rate": 4.073038945536698e-06, + "loss": 14.0798, + "step": 301710 + }, + { + "epoch": 0.609493489336086, + "grad_norm": 296.30450439453125, + "learning_rate": 4.07269593313953e-06, + "loss": 14.3807, + "step": 301720 + }, + { + "epoch": 0.6095136899687699, + "grad_norm": 198.1383819580078, + "learning_rate": 4.0723529252619276e-06, + "loss": 14.6119, + "step": 301730 + }, + { + "epoch": 0.6095338906014537, + "grad_norm": 911.2178955078125, + "learning_rate": 4.0720099219055655e-06, + "loss": 12.2603, + "step": 301740 + }, + { + "epoch": 0.6095540912341375, + "grad_norm": 492.0929260253906, + "learning_rate": 4.0716669230721154e-06, + "loss": 18.7033, + "step": 301750 + }, + { + "epoch": 0.6095742918668212, + "grad_norm": 137.6938018798828, + "learning_rate": 4.071323928763247e-06, + "loss": 10.1898, + "step": 301760 + }, + { + "epoch": 0.609594492499505, + "grad_norm": 84.15969848632812, + "learning_rate": 4.070980938980633e-06, + "loss": 20.4293, + "step": 301770 + }, + { + "epoch": 0.6096146931321889, + "grad_norm": 538.3965454101562, + "learning_rate": 4.070637953725946e-06, + "loss": 20.5844, + "step": 301780 + }, + { + "epoch": 0.6096348937648727, + "grad_norm": 504.7793884277344, + "learning_rate": 4.0702949730008565e-06, + "loss": 30.9394, + "step": 301790 + }, + { + "epoch": 0.6096550943975565, + "grad_norm": 293.6222229003906, + "learning_rate": 4.069951996807034e-06, + "loss": 15.7182, + "step": 301800 + }, + { + "epoch": 0.6096752950302403, + "grad_norm": 395.0409851074219, + "learning_rate": 4.069609025146156e-06, + "loss": 18.8731, + "step": 301810 + }, + { + "epoch": 0.6096954956629241, + "grad_norm": 337.16558837890625, + "learning_rate": 4.0692660580198905e-06, + "loss": 23.5865, + "step": 301820 + }, + { + "epoch": 0.609715696295608, + "grad_norm": 289.2772216796875, + "learning_rate": 4.068923095429909e-06, + "loss": 16.9094, + "step": 301830 + }, + { + "epoch": 0.6097358969282918, + "grad_norm": 0.0, + "learning_rate": 4.068580137377882e-06, + "loss": 16.1499, + "step": 301840 + }, + { + "epoch": 0.6097560975609756, + "grad_norm": 395.0556640625, + "learning_rate": 4.068237183865485e-06, + "loss": 17.7865, + "step": 301850 + }, + { + "epoch": 0.6097762981936594, + "grad_norm": 392.32684326171875, + "learning_rate": 4.067894234894384e-06, + "loss": 32.0226, + "step": 301860 + }, + { + "epoch": 0.6097964988263432, + "grad_norm": 276.8617248535156, + "learning_rate": 4.067551290466255e-06, + "loss": 29.1514, + "step": 301870 + }, + { + "epoch": 0.6098166994590271, + "grad_norm": 499.0665588378906, + "learning_rate": 4.067208350582769e-06, + "loss": 14.1026, + "step": 301880 + }, + { + "epoch": 0.6098369000917109, + "grad_norm": 710.1145629882812, + "learning_rate": 4.066865415245594e-06, + "loss": 29.4181, + "step": 301890 + }, + { + "epoch": 0.6098571007243947, + "grad_norm": 307.7794189453125, + "learning_rate": 4.066522484456406e-06, + "loss": 13.1219, + "step": 301900 + }, + { + "epoch": 0.6098773013570785, + "grad_norm": 589.836181640625, + "learning_rate": 4.066179558216874e-06, + "loss": 21.7507, + "step": 301910 + }, + { + "epoch": 0.6098975019897623, + "grad_norm": 137.0048828125, + "learning_rate": 4.0658366365286684e-06, + "loss": 22.991, + "step": 301920 + }, + { + "epoch": 0.6099177026224462, + "grad_norm": 503.49127197265625, + "learning_rate": 4.06549371939346e-06, + "loss": 11.3635, + "step": 301930 + }, + { + "epoch": 0.60993790325513, + "grad_norm": 541.7255859375, + "learning_rate": 4.0651508068129264e-06, + "loss": 17.8539, + "step": 301940 + }, + { + "epoch": 0.6099581038878138, + "grad_norm": 244.3643035888672, + "learning_rate": 4.064807898788731e-06, + "loss": 23.5271, + "step": 301950 + }, + { + "epoch": 0.6099783045204976, + "grad_norm": 189.54734802246094, + "learning_rate": 4.064464995322549e-06, + "loss": 15.1665, + "step": 301960 + }, + { + "epoch": 0.6099985051531814, + "grad_norm": 291.32281494140625, + "learning_rate": 4.064122096416053e-06, + "loss": 24.556, + "step": 301970 + }, + { + "epoch": 0.6100187057858653, + "grad_norm": 221.17601013183594, + "learning_rate": 4.063779202070911e-06, + "loss": 18.5762, + "step": 301980 + }, + { + "epoch": 0.6100389064185491, + "grad_norm": 510.4002990722656, + "learning_rate": 4.0634363122887945e-06, + "loss": 20.7079, + "step": 301990 + }, + { + "epoch": 0.6100591070512329, + "grad_norm": 287.87322998046875, + "learning_rate": 4.063093427071376e-06, + "loss": 16.008, + "step": 302000 + }, + { + "epoch": 0.6100793076839166, + "grad_norm": 572.6348876953125, + "learning_rate": 4.06275054642033e-06, + "loss": 21.2857, + "step": 302010 + }, + { + "epoch": 0.6100995083166004, + "grad_norm": 485.3209228515625, + "learning_rate": 4.06240767033732e-06, + "loss": 14.0891, + "step": 302020 + }, + { + "epoch": 0.6101197089492842, + "grad_norm": 688.4270629882812, + "learning_rate": 4.0620647988240225e-06, + "loss": 22.3187, + "step": 302030 + }, + { + "epoch": 0.6101399095819681, + "grad_norm": 452.1419372558594, + "learning_rate": 4.061721931882109e-06, + "loss": 16.7207, + "step": 302040 + }, + { + "epoch": 0.6101601102146519, + "grad_norm": 0.0, + "learning_rate": 4.061379069513248e-06, + "loss": 19.5551, + "step": 302050 + }, + { + "epoch": 0.6101803108473357, + "grad_norm": 138.41639709472656, + "learning_rate": 4.0610362117191106e-06, + "loss": 17.4711, + "step": 302060 + }, + { + "epoch": 0.6102005114800195, + "grad_norm": 408.5327453613281, + "learning_rate": 4.0606933585013704e-06, + "loss": 19.5146, + "step": 302070 + }, + { + "epoch": 0.6102207121127033, + "grad_norm": 504.6182861328125, + "learning_rate": 4.060350509861696e-06, + "loss": 15.9573, + "step": 302080 + }, + { + "epoch": 0.6102409127453872, + "grad_norm": 39.33086013793945, + "learning_rate": 4.0600076658017585e-06, + "loss": 29.3718, + "step": 302090 + }, + { + "epoch": 0.610261113378071, + "grad_norm": 667.924560546875, + "learning_rate": 4.0596648263232315e-06, + "loss": 11.2847, + "step": 302100 + }, + { + "epoch": 0.6102813140107548, + "grad_norm": 562.177734375, + "learning_rate": 4.059321991427782e-06, + "loss": 13.6215, + "step": 302110 + }, + { + "epoch": 0.6103015146434386, + "grad_norm": 124.45332336425781, + "learning_rate": 4.058979161117084e-06, + "loss": 12.5601, + "step": 302120 + }, + { + "epoch": 0.6103217152761224, + "grad_norm": 294.8182373046875, + "learning_rate": 4.058636335392809e-06, + "loss": 21.3163, + "step": 302130 + }, + { + "epoch": 0.6103419159088063, + "grad_norm": 695.095947265625, + "learning_rate": 4.0582935142566245e-06, + "loss": 22.359, + "step": 302140 + }, + { + "epoch": 0.6103621165414901, + "grad_norm": 1088.446044921875, + "learning_rate": 4.057950697710203e-06, + "loss": 19.9529, + "step": 302150 + }, + { + "epoch": 0.6103823171741739, + "grad_norm": 591.505859375, + "learning_rate": 4.057607885755215e-06, + "loss": 26.2497, + "step": 302160 + }, + { + "epoch": 0.6104025178068577, + "grad_norm": 757.7903442382812, + "learning_rate": 4.057265078393335e-06, + "loss": 18.1819, + "step": 302170 + }, + { + "epoch": 0.6104227184395415, + "grad_norm": 473.31011962890625, + "learning_rate": 4.056922275626227e-06, + "loss": 27.9173, + "step": 302180 + }, + { + "epoch": 0.6104429190722254, + "grad_norm": 497.7650451660156, + "learning_rate": 4.056579477455567e-06, + "loss": 24.6485, + "step": 302190 + }, + { + "epoch": 0.6104631197049092, + "grad_norm": 115.89996337890625, + "learning_rate": 4.0562366838830255e-06, + "loss": 11.2858, + "step": 302200 + }, + { + "epoch": 0.610483320337593, + "grad_norm": 630.05029296875, + "learning_rate": 4.05589389491027e-06, + "loss": 17.092, + "step": 302210 + }, + { + "epoch": 0.6105035209702768, + "grad_norm": 26.682065963745117, + "learning_rate": 4.0555511105389735e-06, + "loss": 15.3019, + "step": 302220 + }, + { + "epoch": 0.6105237216029606, + "grad_norm": 286.85235595703125, + "learning_rate": 4.055208330770808e-06, + "loss": 16.3573, + "step": 302230 + }, + { + "epoch": 0.6105439222356445, + "grad_norm": 315.05401611328125, + "learning_rate": 4.054865555607441e-06, + "loss": 20.918, + "step": 302240 + }, + { + "epoch": 0.6105641228683283, + "grad_norm": 283.9547119140625, + "learning_rate": 4.054522785050543e-06, + "loss": 28.8137, + "step": 302250 + }, + { + "epoch": 0.6105843235010121, + "grad_norm": 698.4987182617188, + "learning_rate": 4.05418001910179e-06, + "loss": 9.4429, + "step": 302260 + }, + { + "epoch": 0.6106045241336958, + "grad_norm": 179.47476196289062, + "learning_rate": 4.053837257762846e-06, + "loss": 15.6023, + "step": 302270 + }, + { + "epoch": 0.6106247247663796, + "grad_norm": 462.5314025878906, + "learning_rate": 4.053494501035385e-06, + "loss": 23.7735, + "step": 302280 + }, + { + "epoch": 0.6106449253990635, + "grad_norm": 890.564697265625, + "learning_rate": 4.053151748921078e-06, + "loss": 16.0309, + "step": 302290 + }, + { + "epoch": 0.6106651260317473, + "grad_norm": 143.55178833007812, + "learning_rate": 4.052809001421595e-06, + "loss": 30.7759, + "step": 302300 + }, + { + "epoch": 0.6106853266644311, + "grad_norm": 368.9315185546875, + "learning_rate": 4.0524662585386045e-06, + "loss": 19.7443, + "step": 302310 + }, + { + "epoch": 0.6107055272971149, + "grad_norm": 427.68548583984375, + "learning_rate": 4.0521235202737775e-06, + "loss": 21.6405, + "step": 302320 + }, + { + "epoch": 0.6107257279297987, + "grad_norm": 155.68882751464844, + "learning_rate": 4.051780786628789e-06, + "loss": 10.4967, + "step": 302330 + }, + { + "epoch": 0.6107459285624826, + "grad_norm": 407.12408447265625, + "learning_rate": 4.0514380576053035e-06, + "loss": 23.8343, + "step": 302340 + }, + { + "epoch": 0.6107661291951664, + "grad_norm": 1344.190673828125, + "learning_rate": 4.051095333204994e-06, + "loss": 18.8498, + "step": 302350 + }, + { + "epoch": 0.6107863298278502, + "grad_norm": 346.4909362792969, + "learning_rate": 4.0507526134295314e-06, + "loss": 23.4103, + "step": 302360 + }, + { + "epoch": 0.610806530460534, + "grad_norm": 316.17620849609375, + "learning_rate": 4.050409898280585e-06, + "loss": 9.4559, + "step": 302370 + }, + { + "epoch": 0.6108267310932178, + "grad_norm": 574.623046875, + "learning_rate": 4.050067187759826e-06, + "loss": 17.2412, + "step": 302380 + }, + { + "epoch": 0.6108469317259017, + "grad_norm": 708.8782348632812, + "learning_rate": 4.049724481868924e-06, + "loss": 23.2733, + "step": 302390 + }, + { + "epoch": 0.6108671323585855, + "grad_norm": 695.9046020507812, + "learning_rate": 4.0493817806095504e-06, + "loss": 24.038, + "step": 302400 + }, + { + "epoch": 0.6108873329912693, + "grad_norm": 128.50205993652344, + "learning_rate": 4.049039083983372e-06, + "loss": 20.3051, + "step": 302410 + }, + { + "epoch": 0.6109075336239531, + "grad_norm": 255.7003631591797, + "learning_rate": 4.048696391992065e-06, + "loss": 24.7297, + "step": 302420 + }, + { + "epoch": 0.6109277342566369, + "grad_norm": 461.51251220703125, + "learning_rate": 4.048353704637295e-06, + "loss": 11.599, + "step": 302430 + }, + { + "epoch": 0.6109479348893208, + "grad_norm": 117.34037017822266, + "learning_rate": 4.048011021920733e-06, + "loss": 18.9199, + "step": 302440 + }, + { + "epoch": 0.6109681355220046, + "grad_norm": 323.08642578125, + "learning_rate": 4.047668343844051e-06, + "loss": 12.1964, + "step": 302450 + }, + { + "epoch": 0.6109883361546884, + "grad_norm": 204.79774475097656, + "learning_rate": 4.047325670408918e-06, + "loss": 15.2674, + "step": 302460 + }, + { + "epoch": 0.6110085367873722, + "grad_norm": 46.07692337036133, + "learning_rate": 4.046983001617004e-06, + "loss": 18.4622, + "step": 302470 + }, + { + "epoch": 0.611028737420056, + "grad_norm": 294.8068542480469, + "learning_rate": 4.0466403374699775e-06, + "loss": 41.0519, + "step": 302480 + }, + { + "epoch": 0.6110489380527399, + "grad_norm": 298.0314025878906, + "learning_rate": 4.046297677969513e-06, + "loss": 17.706, + "step": 302490 + }, + { + "epoch": 0.6110691386854237, + "grad_norm": 676.768310546875, + "learning_rate": 4.045955023117276e-06, + "loss": 21.6609, + "step": 302500 + }, + { + "epoch": 0.6110893393181075, + "grad_norm": 455.82550048828125, + "learning_rate": 4.045612372914939e-06, + "loss": 19.1227, + "step": 302510 + }, + { + "epoch": 0.6111095399507913, + "grad_norm": 204.07211303710938, + "learning_rate": 4.045269727364173e-06, + "loss": 11.8948, + "step": 302520 + }, + { + "epoch": 0.611129740583475, + "grad_norm": 638.8299560546875, + "learning_rate": 4.044927086466646e-06, + "loss": 16.8545, + "step": 302530 + }, + { + "epoch": 0.6111499412161588, + "grad_norm": 543.645263671875, + "learning_rate": 4.044584450224026e-06, + "loss": 15.1937, + "step": 302540 + }, + { + "epoch": 0.6111701418488427, + "grad_norm": 8.59873104095459, + "learning_rate": 4.0442418186379895e-06, + "loss": 10.9402, + "step": 302550 + }, + { + "epoch": 0.6111903424815265, + "grad_norm": 274.8184814453125, + "learning_rate": 4.043899191710199e-06, + "loss": 19.9204, + "step": 302560 + }, + { + "epoch": 0.6112105431142103, + "grad_norm": 982.5318603515625, + "learning_rate": 4.043556569442329e-06, + "loss": 22.9097, + "step": 302570 + }, + { + "epoch": 0.6112307437468941, + "grad_norm": 172.61416625976562, + "learning_rate": 4.0432139518360495e-06, + "loss": 16.2588, + "step": 302580 + }, + { + "epoch": 0.6112509443795779, + "grad_norm": 596.6406860351562, + "learning_rate": 4.0428713388930276e-06, + "loss": 18.488, + "step": 302590 + }, + { + "epoch": 0.6112711450122618, + "grad_norm": 794.8352661132812, + "learning_rate": 4.042528730614935e-06, + "loss": 30.5124, + "step": 302600 + }, + { + "epoch": 0.6112913456449456, + "grad_norm": 319.686767578125, + "learning_rate": 4.042186127003441e-06, + "loss": 19.7382, + "step": 302610 + }, + { + "epoch": 0.6113115462776294, + "grad_norm": 648.2125244140625, + "learning_rate": 4.0418435280602185e-06, + "loss": 13.3559, + "step": 302620 + }, + { + "epoch": 0.6113317469103132, + "grad_norm": 453.7065734863281, + "learning_rate": 4.04150093378693e-06, + "loss": 28.3858, + "step": 302630 + }, + { + "epoch": 0.611351947542997, + "grad_norm": 233.4899139404297, + "learning_rate": 4.041158344185252e-06, + "loss": 20.3837, + "step": 302640 + }, + { + "epoch": 0.6113721481756809, + "grad_norm": 516.4542846679688, + "learning_rate": 4.040815759256852e-06, + "loss": 31.8236, + "step": 302650 + }, + { + "epoch": 0.6113923488083647, + "grad_norm": 262.2412414550781, + "learning_rate": 4.0404731790034e-06, + "loss": 35.1186, + "step": 302660 + }, + { + "epoch": 0.6114125494410485, + "grad_norm": 518.053955078125, + "learning_rate": 4.040130603426565e-06, + "loss": 29.4273, + "step": 302670 + }, + { + "epoch": 0.6114327500737323, + "grad_norm": 485.6481018066406, + "learning_rate": 4.039788032528017e-06, + "loss": 9.7625, + "step": 302680 + }, + { + "epoch": 0.6114529507064161, + "grad_norm": 330.59075927734375, + "learning_rate": 4.039445466309426e-06, + "loss": 21.0793, + "step": 302690 + }, + { + "epoch": 0.6114731513391, + "grad_norm": 378.89361572265625, + "learning_rate": 4.039102904772459e-06, + "loss": 10.9552, + "step": 302700 + }, + { + "epoch": 0.6114933519717838, + "grad_norm": 561.077392578125, + "learning_rate": 4.0387603479187915e-06, + "loss": 28.2956, + "step": 302710 + }, + { + "epoch": 0.6115135526044676, + "grad_norm": 745.5084838867188, + "learning_rate": 4.038417795750086e-06, + "loss": 17.587, + "step": 302720 + }, + { + "epoch": 0.6115337532371514, + "grad_norm": 403.7786865234375, + "learning_rate": 4.038075248268018e-06, + "loss": 19.6259, + "step": 302730 + }, + { + "epoch": 0.6115539538698352, + "grad_norm": 409.99603271484375, + "learning_rate": 4.0377327054742544e-06, + "loss": 20.5259, + "step": 302740 + }, + { + "epoch": 0.6115741545025191, + "grad_norm": 316.7395324707031, + "learning_rate": 4.037390167370464e-06, + "loss": 15.7151, + "step": 302750 + }, + { + "epoch": 0.6115943551352029, + "grad_norm": 449.1668395996094, + "learning_rate": 4.037047633958317e-06, + "loss": 17.8964, + "step": 302760 + }, + { + "epoch": 0.6116145557678867, + "grad_norm": 250.11282348632812, + "learning_rate": 4.0367051052394825e-06, + "loss": 17.4639, + "step": 302770 + }, + { + "epoch": 0.6116347564005704, + "grad_norm": 535.4613037109375, + "learning_rate": 4.036362581215633e-06, + "loss": 18.9022, + "step": 302780 + }, + { + "epoch": 0.6116549570332542, + "grad_norm": 19.670238494873047, + "learning_rate": 4.036020061888432e-06, + "loss": 18.115, + "step": 302790 + }, + { + "epoch": 0.611675157665938, + "grad_norm": 480.7803039550781, + "learning_rate": 4.035677547259555e-06, + "loss": 18.1596, + "step": 302800 + }, + { + "epoch": 0.6116953582986219, + "grad_norm": 227.60894775390625, + "learning_rate": 4.035335037330668e-06, + "loss": 28.7043, + "step": 302810 + }, + { + "epoch": 0.6117155589313057, + "grad_norm": 541.886962890625, + "learning_rate": 4.034992532103441e-06, + "loss": 20.1744, + "step": 302820 + }, + { + "epoch": 0.6117357595639895, + "grad_norm": 239.96633911132812, + "learning_rate": 4.034650031579543e-06, + "loss": 14.6629, + "step": 302830 + }, + { + "epoch": 0.6117559601966733, + "grad_norm": 655.57470703125, + "learning_rate": 4.0343075357606445e-06, + "loss": 18.103, + "step": 302840 + }, + { + "epoch": 0.6117761608293572, + "grad_norm": 320.12969970703125, + "learning_rate": 4.0339650446484135e-06, + "loss": 21.4581, + "step": 302850 + }, + { + "epoch": 0.611796361462041, + "grad_norm": 46.63835144042969, + "learning_rate": 4.033622558244519e-06, + "loss": 11.3173, + "step": 302860 + }, + { + "epoch": 0.6118165620947248, + "grad_norm": 1075.1484375, + "learning_rate": 4.0332800765506325e-06, + "loss": 11.4977, + "step": 302870 + }, + { + "epoch": 0.6118367627274086, + "grad_norm": 473.89617919921875, + "learning_rate": 4.03293759956842e-06, + "loss": 15.609, + "step": 302880 + }, + { + "epoch": 0.6118569633600924, + "grad_norm": 121.10155487060547, + "learning_rate": 4.032595127299552e-06, + "loss": 22.2685, + "step": 302890 + }, + { + "epoch": 0.6118771639927763, + "grad_norm": 155.80386352539062, + "learning_rate": 4.032252659745699e-06, + "loss": 15.1171, + "step": 302900 + }, + { + "epoch": 0.6118973646254601, + "grad_norm": 72.47950744628906, + "learning_rate": 4.03191019690853e-06, + "loss": 12.6798, + "step": 302910 + }, + { + "epoch": 0.6119175652581439, + "grad_norm": 267.1290283203125, + "learning_rate": 4.031567738789713e-06, + "loss": 20.5022, + "step": 302920 + }, + { + "epoch": 0.6119377658908277, + "grad_norm": 473.3138427734375, + "learning_rate": 4.031225285390915e-06, + "loss": 15.3429, + "step": 302930 + }, + { + "epoch": 0.6119579665235115, + "grad_norm": 401.8825378417969, + "learning_rate": 4.0308828367138106e-06, + "loss": 8.9975, + "step": 302940 + }, + { + "epoch": 0.6119781671561954, + "grad_norm": 543.8609619140625, + "learning_rate": 4.030540392760064e-06, + "loss": 31.137, + "step": 302950 + }, + { + "epoch": 0.6119983677888792, + "grad_norm": 147.74777221679688, + "learning_rate": 4.030197953531346e-06, + "loss": 17.4207, + "step": 302960 + }, + { + "epoch": 0.612018568421563, + "grad_norm": 444.2214660644531, + "learning_rate": 4.029855519029326e-06, + "loss": 26.9827, + "step": 302970 + }, + { + "epoch": 0.6120387690542468, + "grad_norm": 561.8872680664062, + "learning_rate": 4.029513089255673e-06, + "loss": 15.0142, + "step": 302980 + }, + { + "epoch": 0.6120589696869306, + "grad_norm": 523.2400512695312, + "learning_rate": 4.0291706642120545e-06, + "loss": 12.7622, + "step": 302990 + }, + { + "epoch": 0.6120791703196145, + "grad_norm": 34.56816864013672, + "learning_rate": 4.028828243900141e-06, + "loss": 13.7786, + "step": 303000 + }, + { + "epoch": 0.6120993709522983, + "grad_norm": 695.3958129882812, + "learning_rate": 4.028485828321601e-06, + "loss": 19.7008, + "step": 303010 + }, + { + "epoch": 0.6121195715849821, + "grad_norm": 662.0638427734375, + "learning_rate": 4.028143417478102e-06, + "loss": 18.3338, + "step": 303020 + }, + { + "epoch": 0.6121397722176659, + "grad_norm": 19.25673484802246, + "learning_rate": 4.0278010113713165e-06, + "loss": 16.5985, + "step": 303030 + }, + { + "epoch": 0.6121599728503496, + "grad_norm": 665.7344360351562, + "learning_rate": 4.027458610002908e-06, + "loss": 15.9306, + "step": 303040 + }, + { + "epoch": 0.6121801734830334, + "grad_norm": 688.4962158203125, + "learning_rate": 4.02711621337455e-06, + "loss": 35.8098, + "step": 303050 + }, + { + "epoch": 0.6122003741157173, + "grad_norm": 160.4550323486328, + "learning_rate": 4.0267738214879095e-06, + "loss": 17.0107, + "step": 303060 + }, + { + "epoch": 0.6122205747484011, + "grad_norm": 70.0464096069336, + "learning_rate": 4.026431434344656e-06, + "loss": 18.876, + "step": 303070 + }, + { + "epoch": 0.6122407753810849, + "grad_norm": 385.9349670410156, + "learning_rate": 4.0260890519464565e-06, + "loss": 19.8726, + "step": 303080 + }, + { + "epoch": 0.6122609760137687, + "grad_norm": 198.9353790283203, + "learning_rate": 4.02574667429498e-06, + "loss": 14.8484, + "step": 303090 + }, + { + "epoch": 0.6122811766464525, + "grad_norm": 312.6994934082031, + "learning_rate": 4.025404301391898e-06, + "loss": 13.5631, + "step": 303100 + }, + { + "epoch": 0.6123013772791364, + "grad_norm": 167.55181884765625, + "learning_rate": 4.0250619332388765e-06, + "loss": 8.2391, + "step": 303110 + }, + { + "epoch": 0.6123215779118202, + "grad_norm": 446.42742919921875, + "learning_rate": 4.024719569837584e-06, + "loss": 13.5969, + "step": 303120 + }, + { + "epoch": 0.612341778544504, + "grad_norm": 112.34558868408203, + "learning_rate": 4.024377211189693e-06, + "loss": 13.0806, + "step": 303130 + }, + { + "epoch": 0.6123619791771878, + "grad_norm": 133.58258056640625, + "learning_rate": 4.024034857296866e-06, + "loss": 18.9411, + "step": 303140 + }, + { + "epoch": 0.6123821798098716, + "grad_norm": 633.3317260742188, + "learning_rate": 4.023692508160776e-06, + "loss": 19.7699, + "step": 303150 + }, + { + "epoch": 0.6124023804425555, + "grad_norm": 499.4707946777344, + "learning_rate": 4.0233501637830905e-06, + "loss": 15.0687, + "step": 303160 + }, + { + "epoch": 0.6124225810752393, + "grad_norm": 623.6322021484375, + "learning_rate": 4.023007824165476e-06, + "loss": 25.112, + "step": 303170 + }, + { + "epoch": 0.6124427817079231, + "grad_norm": 69.01964569091797, + "learning_rate": 4.022665489309604e-06, + "loss": 25.0718, + "step": 303180 + }, + { + "epoch": 0.6124629823406069, + "grad_norm": 588.364501953125, + "learning_rate": 4.022323159217144e-06, + "loss": 13.9179, + "step": 303190 + }, + { + "epoch": 0.6124831829732907, + "grad_norm": 367.04351806640625, + "learning_rate": 4.02198083388976e-06, + "loss": 19.4609, + "step": 303200 + }, + { + "epoch": 0.6125033836059746, + "grad_norm": 573.3627319335938, + "learning_rate": 4.021638513329123e-06, + "loss": 16.3759, + "step": 303210 + }, + { + "epoch": 0.6125235842386584, + "grad_norm": 401.361328125, + "learning_rate": 4.0212961975369e-06, + "loss": 22.8179, + "step": 303220 + }, + { + "epoch": 0.6125437848713422, + "grad_norm": 376.02496337890625, + "learning_rate": 4.020953886514764e-06, + "loss": 16.6355, + "step": 303230 + }, + { + "epoch": 0.612563985504026, + "grad_norm": 86.31979370117188, + "learning_rate": 4.020611580264377e-06, + "loss": 29.3063, + "step": 303240 + }, + { + "epoch": 0.6125841861367098, + "grad_norm": 227.11631774902344, + "learning_rate": 4.020269278787411e-06, + "loss": 15.518, + "step": 303250 + }, + { + "epoch": 0.6126043867693937, + "grad_norm": 182.04917907714844, + "learning_rate": 4.019926982085536e-06, + "loss": 11.5511, + "step": 303260 + }, + { + "epoch": 0.6126245874020775, + "grad_norm": 373.6564636230469, + "learning_rate": 4.019584690160416e-06, + "loss": 14.9567, + "step": 303270 + }, + { + "epoch": 0.6126447880347613, + "grad_norm": 124.20471954345703, + "learning_rate": 4.019242403013721e-06, + "loss": 21.3783, + "step": 303280 + }, + { + "epoch": 0.612664988667445, + "grad_norm": 322.8251037597656, + "learning_rate": 4.0189001206471215e-06, + "loss": 27.5457, + "step": 303290 + }, + { + "epoch": 0.6126851893001288, + "grad_norm": 661.531982421875, + "learning_rate": 4.018557843062282e-06, + "loss": 24.307, + "step": 303300 + }, + { + "epoch": 0.6127053899328126, + "grad_norm": 225.1516571044922, + "learning_rate": 4.018215570260872e-06, + "loss": 22.9267, + "step": 303310 + }, + { + "epoch": 0.6127255905654965, + "grad_norm": 188.3852996826172, + "learning_rate": 4.017873302244563e-06, + "loss": 19.5008, + "step": 303320 + }, + { + "epoch": 0.6127457911981803, + "grad_norm": 1034.5069580078125, + "learning_rate": 4.017531039015017e-06, + "loss": 20.9308, + "step": 303330 + }, + { + "epoch": 0.6127659918308641, + "grad_norm": 228.44070434570312, + "learning_rate": 4.017188780573907e-06, + "loss": 16.3561, + "step": 303340 + }, + { + "epoch": 0.6127861924635479, + "grad_norm": 220.6223602294922, + "learning_rate": 4.016846526922901e-06, + "loss": 18.9106, + "step": 303350 + }, + { + "epoch": 0.6128063930962317, + "grad_norm": 465.0409240722656, + "learning_rate": 4.016504278063664e-06, + "loss": 11.076, + "step": 303360 + }, + { + "epoch": 0.6128265937289156, + "grad_norm": 175.07225036621094, + "learning_rate": 4.016162033997867e-06, + "loss": 13.1767, + "step": 303370 + }, + { + "epoch": 0.6128467943615994, + "grad_norm": 375.5794372558594, + "learning_rate": 4.0158197947271746e-06, + "loss": 10.0069, + "step": 303380 + }, + { + "epoch": 0.6128669949942832, + "grad_norm": 803.9150390625, + "learning_rate": 4.015477560253261e-06, + "loss": 21.1764, + "step": 303390 + }, + { + "epoch": 0.612887195626967, + "grad_norm": 204.79698181152344, + "learning_rate": 4.015135330577787e-06, + "loss": 13.4105, + "step": 303400 + }, + { + "epoch": 0.6129073962596508, + "grad_norm": 984.7935791015625, + "learning_rate": 4.014793105702425e-06, + "loss": 23.9986, + "step": 303410 + }, + { + "epoch": 0.6129275968923347, + "grad_norm": 1003.0968627929688, + "learning_rate": 4.014450885628843e-06, + "loss": 26.145, + "step": 303420 + }, + { + "epoch": 0.6129477975250185, + "grad_norm": 239.93844604492188, + "learning_rate": 4.014108670358707e-06, + "loss": 12.8953, + "step": 303430 + }, + { + "epoch": 0.6129679981577023, + "grad_norm": 448.0235595703125, + "learning_rate": 4.013766459893686e-06, + "loss": 15.014, + "step": 303440 + }, + { + "epoch": 0.6129881987903861, + "grad_norm": 345.88165283203125, + "learning_rate": 4.0134242542354486e-06, + "loss": 11.6867, + "step": 303450 + }, + { + "epoch": 0.61300839942307, + "grad_norm": 535.87548828125, + "learning_rate": 4.013082053385661e-06, + "loss": 20.6952, + "step": 303460 + }, + { + "epoch": 0.6130286000557538, + "grad_norm": 1100.1561279296875, + "learning_rate": 4.01273985734599e-06, + "loss": 35.3558, + "step": 303470 + }, + { + "epoch": 0.6130488006884376, + "grad_norm": 496.0742492675781, + "learning_rate": 4.012397666118108e-06, + "loss": 31.4369, + "step": 303480 + }, + { + "epoch": 0.6130690013211214, + "grad_norm": 101.76065063476562, + "learning_rate": 4.012055479703678e-06, + "loss": 13.0555, + "step": 303490 + }, + { + "epoch": 0.6130892019538052, + "grad_norm": 87.12376403808594, + "learning_rate": 4.0117132981043695e-06, + "loss": 16.1753, + "step": 303500 + }, + { + "epoch": 0.613109402586489, + "grad_norm": 330.2441711425781, + "learning_rate": 4.011371121321851e-06, + "loss": 36.0391, + "step": 303510 + }, + { + "epoch": 0.6131296032191729, + "grad_norm": 332.4491882324219, + "learning_rate": 4.011028949357791e-06, + "loss": 17.1912, + "step": 303520 + }, + { + "epoch": 0.6131498038518567, + "grad_norm": 324.7025146484375, + "learning_rate": 4.010686782213855e-06, + "loss": 13.4496, + "step": 303530 + }, + { + "epoch": 0.6131700044845405, + "grad_norm": 522.0205688476562, + "learning_rate": 4.01034461989171e-06, + "loss": 23.7555, + "step": 303540 + }, + { + "epoch": 0.6131902051172242, + "grad_norm": 150.48866271972656, + "learning_rate": 4.01000246239303e-06, + "loss": 22.1188, + "step": 303550 + }, + { + "epoch": 0.613210405749908, + "grad_norm": 96.71208190917969, + "learning_rate": 4.009660309719473e-06, + "loss": 7.4786, + "step": 303560 + }, + { + "epoch": 0.6132306063825919, + "grad_norm": 51.67375946044922, + "learning_rate": 4.009318161872714e-06, + "loss": 6.5284, + "step": 303570 + }, + { + "epoch": 0.6132508070152757, + "grad_norm": 144.28921508789062, + "learning_rate": 4.008976018854418e-06, + "loss": 15.5809, + "step": 303580 + }, + { + "epoch": 0.6132710076479595, + "grad_norm": 659.6348266601562, + "learning_rate": 4.0086338806662525e-06, + "loss": 20.033, + "step": 303590 + }, + { + "epoch": 0.6132912082806433, + "grad_norm": 925.919921875, + "learning_rate": 4.0082917473098845e-06, + "loss": 28.6464, + "step": 303600 + }, + { + "epoch": 0.6133114089133271, + "grad_norm": 655.435302734375, + "learning_rate": 4.007949618786984e-06, + "loss": 15.9405, + "step": 303610 + }, + { + "epoch": 0.613331609546011, + "grad_norm": 416.43170166015625, + "learning_rate": 4.007607495099215e-06, + "loss": 23.06, + "step": 303620 + }, + { + "epoch": 0.6133518101786948, + "grad_norm": 147.84814453125, + "learning_rate": 4.007265376248246e-06, + "loss": 22.9756, + "step": 303630 + }, + { + "epoch": 0.6133720108113786, + "grad_norm": 256.745361328125, + "learning_rate": 4.0069232622357475e-06, + "loss": 23.1978, + "step": 303640 + }, + { + "epoch": 0.6133922114440624, + "grad_norm": 379.4791259765625, + "learning_rate": 4.006581153063383e-06, + "loss": 25.9361, + "step": 303650 + }, + { + "epoch": 0.6134124120767462, + "grad_norm": 322.6779479980469, + "learning_rate": 4.006239048732822e-06, + "loss": 27.4228, + "step": 303660 + }, + { + "epoch": 0.6134326127094301, + "grad_norm": 1167.3402099609375, + "learning_rate": 4.005896949245731e-06, + "loss": 17.2036, + "step": 303670 + }, + { + "epoch": 0.6134528133421139, + "grad_norm": 494.796630859375, + "learning_rate": 4.005554854603779e-06, + "loss": 8.5401, + "step": 303680 + }, + { + "epoch": 0.6134730139747977, + "grad_norm": 279.4873962402344, + "learning_rate": 4.0052127648086305e-06, + "loss": 29.294, + "step": 303690 + }, + { + "epoch": 0.6134932146074815, + "grad_norm": 65.90160369873047, + "learning_rate": 4.004870679861953e-06, + "loss": 10.5197, + "step": 303700 + }, + { + "epoch": 0.6135134152401653, + "grad_norm": 313.8829040527344, + "learning_rate": 4.004528599765419e-06, + "loss": 20.6517, + "step": 303710 + }, + { + "epoch": 0.6135336158728492, + "grad_norm": 222.48297119140625, + "learning_rate": 4.004186524520689e-06, + "loss": 27.6633, + "step": 303720 + }, + { + "epoch": 0.613553816505533, + "grad_norm": 238.88255310058594, + "learning_rate": 4.003844454129434e-06, + "loss": 15.0304, + "step": 303730 + }, + { + "epoch": 0.6135740171382168, + "grad_norm": 487.00274658203125, + "learning_rate": 4.003502388593321e-06, + "loss": 18.3681, + "step": 303740 + }, + { + "epoch": 0.6135942177709006, + "grad_norm": 429.69561767578125, + "learning_rate": 4.003160327914015e-06, + "loss": 22.5376, + "step": 303750 + }, + { + "epoch": 0.6136144184035844, + "grad_norm": 675.3248901367188, + "learning_rate": 4.002818272093185e-06, + "loss": 40.3278, + "step": 303760 + }, + { + "epoch": 0.6136346190362683, + "grad_norm": 607.4163818359375, + "learning_rate": 4.002476221132499e-06, + "loss": 21.3432, + "step": 303770 + }, + { + "epoch": 0.6136548196689521, + "grad_norm": 403.9083557128906, + "learning_rate": 4.002134175033621e-06, + "loss": 31.5958, + "step": 303780 + }, + { + "epoch": 0.6136750203016359, + "grad_norm": 455.50909423828125, + "learning_rate": 4.001792133798221e-06, + "loss": 18.6742, + "step": 303790 + }, + { + "epoch": 0.6136952209343196, + "grad_norm": 652.5449829101562, + "learning_rate": 4.001450097427965e-06, + "loss": 15.4099, + "step": 303800 + }, + { + "epoch": 0.6137154215670034, + "grad_norm": 347.6587829589844, + "learning_rate": 4.001108065924521e-06, + "loss": 25.1859, + "step": 303810 + }, + { + "epoch": 0.6137356221996872, + "grad_norm": 527.2731323242188, + "learning_rate": 4.000766039289554e-06, + "loss": 23.9697, + "step": 303820 + }, + { + "epoch": 0.6137558228323711, + "grad_norm": 234.25405883789062, + "learning_rate": 4.000424017524732e-06, + "loss": 20.098, + "step": 303830 + }, + { + "epoch": 0.6137760234650549, + "grad_norm": 370.8182678222656, + "learning_rate": 4.000082000631724e-06, + "loss": 16.4596, + "step": 303840 + }, + { + "epoch": 0.6137962240977387, + "grad_norm": 269.0312805175781, + "learning_rate": 3.999739988612192e-06, + "loss": 8.1935, + "step": 303850 + }, + { + "epoch": 0.6138164247304225, + "grad_norm": 119.41081237792969, + "learning_rate": 3.999397981467808e-06, + "loss": 10.811, + "step": 303860 + }, + { + "epoch": 0.6138366253631063, + "grad_norm": 600.5228271484375, + "learning_rate": 3.999055979200238e-06, + "loss": 20.6857, + "step": 303870 + }, + { + "epoch": 0.6138568259957902, + "grad_norm": 313.8599548339844, + "learning_rate": 3.998713981811145e-06, + "loss": 21.2647, + "step": 303880 + }, + { + "epoch": 0.613877026628474, + "grad_norm": 486.4746398925781, + "learning_rate": 3.9983719893022e-06, + "loss": 20.1357, + "step": 303890 + }, + { + "epoch": 0.6138972272611578, + "grad_norm": 305.7726135253906, + "learning_rate": 3.9980300016750696e-06, + "loss": 13.1338, + "step": 303900 + }, + { + "epoch": 0.6139174278938416, + "grad_norm": 425.5389404296875, + "learning_rate": 3.997688018931418e-06, + "loss": 16.3044, + "step": 303910 + }, + { + "epoch": 0.6139376285265254, + "grad_norm": 0.0, + "learning_rate": 3.997346041072912e-06, + "loss": 25.3658, + "step": 303920 + }, + { + "epoch": 0.6139578291592093, + "grad_norm": 366.0242614746094, + "learning_rate": 3.997004068101224e-06, + "loss": 13.5478, + "step": 303930 + }, + { + "epoch": 0.6139780297918931, + "grad_norm": 76.50985717773438, + "learning_rate": 3.9966621000180125e-06, + "loss": 22.9871, + "step": 303940 + }, + { + "epoch": 0.6139982304245769, + "grad_norm": 428.3826599121094, + "learning_rate": 3.9963201368249495e-06, + "loss": 23.8295, + "step": 303950 + }, + { + "epoch": 0.6140184310572607, + "grad_norm": 525.0429077148438, + "learning_rate": 3.9959781785237e-06, + "loss": 21.652, + "step": 303960 + }, + { + "epoch": 0.6140386316899445, + "grad_norm": 276.49359130859375, + "learning_rate": 3.995636225115933e-06, + "loss": 18.2424, + "step": 303970 + }, + { + "epoch": 0.6140588323226284, + "grad_norm": 287.13702392578125, + "learning_rate": 3.995294276603312e-06, + "loss": 21.179, + "step": 303980 + }, + { + "epoch": 0.6140790329553122, + "grad_norm": 619.6387939453125, + "learning_rate": 3.9949523329875025e-06, + "loss": 22.7562, + "step": 303990 + }, + { + "epoch": 0.614099233587996, + "grad_norm": 584.1336059570312, + "learning_rate": 3.994610394270178e-06, + "loss": 20.0137, + "step": 304000 + }, + { + "epoch": 0.6141194342206798, + "grad_norm": 1.6148418188095093, + "learning_rate": 3.994268460452997e-06, + "loss": 19.0874, + "step": 304010 + }, + { + "epoch": 0.6141396348533636, + "grad_norm": 380.2392272949219, + "learning_rate": 3.993926531537631e-06, + "loss": 11.8434, + "step": 304020 + }, + { + "epoch": 0.6141598354860475, + "grad_norm": 501.98760986328125, + "learning_rate": 3.993584607525745e-06, + "loss": 23.473, + "step": 304030 + }, + { + "epoch": 0.6141800361187313, + "grad_norm": 296.1498107910156, + "learning_rate": 3.993242688419006e-06, + "loss": 21.8972, + "step": 304040 + }, + { + "epoch": 0.6142002367514151, + "grad_norm": 742.5661010742188, + "learning_rate": 3.992900774219078e-06, + "loss": 23.2279, + "step": 304050 + }, + { + "epoch": 0.6142204373840988, + "grad_norm": 823.869140625, + "learning_rate": 3.992558864927633e-06, + "loss": 25.0372, + "step": 304060 + }, + { + "epoch": 0.6142406380167826, + "grad_norm": 145.0618896484375, + "learning_rate": 3.9922169605463305e-06, + "loss": 11.2473, + "step": 304070 + }, + { + "epoch": 0.6142608386494665, + "grad_norm": 239.45742797851562, + "learning_rate": 3.991875061076841e-06, + "loss": 18.8624, + "step": 304080 + }, + { + "epoch": 0.6142810392821503, + "grad_norm": 312.9098815917969, + "learning_rate": 3.991533166520832e-06, + "loss": 17.5972, + "step": 304090 + }, + { + "epoch": 0.6143012399148341, + "grad_norm": 787.7771606445312, + "learning_rate": 3.991191276879966e-06, + "loss": 29.638, + "step": 304100 + }, + { + "epoch": 0.6143214405475179, + "grad_norm": 487.5486755371094, + "learning_rate": 3.990849392155912e-06, + "loss": 15.2809, + "step": 304110 + }, + { + "epoch": 0.6143416411802017, + "grad_norm": 830.9132080078125, + "learning_rate": 3.990507512350336e-06, + "loss": 33.5881, + "step": 304120 + }, + { + "epoch": 0.6143618418128856, + "grad_norm": 979.7211303710938, + "learning_rate": 3.990165637464904e-06, + "loss": 27.8697, + "step": 304130 + }, + { + "epoch": 0.6143820424455694, + "grad_norm": 261.5354309082031, + "learning_rate": 3.9898237675012815e-06, + "loss": 15.2131, + "step": 304140 + }, + { + "epoch": 0.6144022430782532, + "grad_norm": 354.2962341308594, + "learning_rate": 3.989481902461135e-06, + "loss": 30.4945, + "step": 304150 + }, + { + "epoch": 0.614422443710937, + "grad_norm": 229.3984375, + "learning_rate": 3.989140042346134e-06, + "loss": 19.7272, + "step": 304160 + }, + { + "epoch": 0.6144426443436208, + "grad_norm": 605.9855346679688, + "learning_rate": 3.988798187157939e-06, + "loss": 20.3877, + "step": 304170 + }, + { + "epoch": 0.6144628449763047, + "grad_norm": 405.9658508300781, + "learning_rate": 3.988456336898219e-06, + "loss": 13.6832, + "step": 304180 + }, + { + "epoch": 0.6144830456089885, + "grad_norm": 666.5990600585938, + "learning_rate": 3.988114491568642e-06, + "loss": 21.0847, + "step": 304190 + }, + { + "epoch": 0.6145032462416723, + "grad_norm": 371.8177185058594, + "learning_rate": 3.987772651170871e-06, + "loss": 43.3977, + "step": 304200 + }, + { + "epoch": 0.6145234468743561, + "grad_norm": 229.81198120117188, + "learning_rate": 3.9874308157065735e-06, + "loss": 23.0913, + "step": 304210 + }, + { + "epoch": 0.6145436475070399, + "grad_norm": 616.7988891601562, + "learning_rate": 3.987088985177417e-06, + "loss": 22.285, + "step": 304220 + }, + { + "epoch": 0.6145638481397238, + "grad_norm": 8.764073371887207, + "learning_rate": 3.986747159585063e-06, + "loss": 23.5479, + "step": 304230 + }, + { + "epoch": 0.6145840487724076, + "grad_norm": 497.9407653808594, + "learning_rate": 3.986405338931182e-06, + "loss": 26.8621, + "step": 304240 + }, + { + "epoch": 0.6146042494050914, + "grad_norm": 566.9827270507812, + "learning_rate": 3.986063523217439e-06, + "loss": 17.8532, + "step": 304250 + }, + { + "epoch": 0.6146244500377752, + "grad_norm": 63.36670684814453, + "learning_rate": 3.9857217124454985e-06, + "loss": 27.5186, + "step": 304260 + }, + { + "epoch": 0.614644650670459, + "grad_norm": 1081.5235595703125, + "learning_rate": 3.985379906617027e-06, + "loss": 28.2573, + "step": 304270 + }, + { + "epoch": 0.6146648513031429, + "grad_norm": 623.7537841796875, + "learning_rate": 3.985038105733691e-06, + "loss": 18.582, + "step": 304280 + }, + { + "epoch": 0.6146850519358267, + "grad_norm": 146.89149475097656, + "learning_rate": 3.984696309797157e-06, + "loss": 12.7521, + "step": 304290 + }, + { + "epoch": 0.6147052525685105, + "grad_norm": 223.62774658203125, + "learning_rate": 3.98435451880909e-06, + "loss": 35.098, + "step": 304300 + }, + { + "epoch": 0.6147254532011943, + "grad_norm": 286.808837890625, + "learning_rate": 3.984012732771154e-06, + "loss": 14.6493, + "step": 304310 + }, + { + "epoch": 0.614745653833878, + "grad_norm": 549.3030395507812, + "learning_rate": 3.98367095168502e-06, + "loss": 16.418, + "step": 304320 + }, + { + "epoch": 0.6147658544665618, + "grad_norm": 268.3217468261719, + "learning_rate": 3.983329175552348e-06, + "loss": 18.5564, + "step": 304330 + }, + { + "epoch": 0.6147860550992457, + "grad_norm": 547.75, + "learning_rate": 3.9829874043748064e-06, + "loss": 24.7735, + "step": 304340 + }, + { + "epoch": 0.6148062557319295, + "grad_norm": 301.2500915527344, + "learning_rate": 3.982645638154062e-06, + "loss": 22.7877, + "step": 304350 + }, + { + "epoch": 0.6148264563646133, + "grad_norm": 413.0594177246094, + "learning_rate": 3.982303876891778e-06, + "loss": 17.6814, + "step": 304360 + }, + { + "epoch": 0.6148466569972971, + "grad_norm": 201.77066040039062, + "learning_rate": 3.981962120589623e-06, + "loss": 20.4034, + "step": 304370 + }, + { + "epoch": 0.614866857629981, + "grad_norm": 174.31021118164062, + "learning_rate": 3.981620369249261e-06, + "loss": 11.626, + "step": 304380 + }, + { + "epoch": 0.6148870582626648, + "grad_norm": 620.9212646484375, + "learning_rate": 3.981278622872357e-06, + "loss": 14.6557, + "step": 304390 + }, + { + "epoch": 0.6149072588953486, + "grad_norm": 311.0087585449219, + "learning_rate": 3.980936881460576e-06, + "loss": 44.8629, + "step": 304400 + }, + { + "epoch": 0.6149274595280324, + "grad_norm": 224.42835998535156, + "learning_rate": 3.980595145015588e-06, + "loss": 15.2197, + "step": 304410 + }, + { + "epoch": 0.6149476601607162, + "grad_norm": 236.81829833984375, + "learning_rate": 3.9802534135390544e-06, + "loss": 8.8921, + "step": 304420 + }, + { + "epoch": 0.6149678607934, + "grad_norm": 615.7477416992188, + "learning_rate": 3.979911687032642e-06, + "loss": 17.2336, + "step": 304430 + }, + { + "epoch": 0.6149880614260839, + "grad_norm": 40.66111755371094, + "learning_rate": 3.979569965498016e-06, + "loss": 14.4898, + "step": 304440 + }, + { + "epoch": 0.6150082620587677, + "grad_norm": 454.35565185546875, + "learning_rate": 3.979228248936843e-06, + "loss": 46.3418, + "step": 304450 + }, + { + "epoch": 0.6150284626914515, + "grad_norm": 646.9774780273438, + "learning_rate": 3.978886537350786e-06, + "loss": 30.9838, + "step": 304460 + }, + { + "epoch": 0.6150486633241353, + "grad_norm": 284.7167663574219, + "learning_rate": 3.978544830741513e-06, + "loss": 16.6017, + "step": 304470 + }, + { + "epoch": 0.6150688639568191, + "grad_norm": 661.4013061523438, + "learning_rate": 3.9782031291106895e-06, + "loss": 18.4857, + "step": 304480 + }, + { + "epoch": 0.615089064589503, + "grad_norm": 55.05121612548828, + "learning_rate": 3.97786143245998e-06, + "loss": 23.2033, + "step": 304490 + }, + { + "epoch": 0.6151092652221868, + "grad_norm": 568.1244506835938, + "learning_rate": 3.977519740791049e-06, + "loss": 15.2688, + "step": 304500 + }, + { + "epoch": 0.6151294658548706, + "grad_norm": 469.814697265625, + "learning_rate": 3.977178054105564e-06, + "loss": 22.9027, + "step": 304510 + }, + { + "epoch": 0.6151496664875544, + "grad_norm": 908.1153564453125, + "learning_rate": 3.9768363724051875e-06, + "loss": 17.428, + "step": 304520 + }, + { + "epoch": 0.6151698671202382, + "grad_norm": 77.04640197753906, + "learning_rate": 3.976494695691586e-06, + "loss": 22.9595, + "step": 304530 + }, + { + "epoch": 0.6151900677529221, + "grad_norm": 361.153564453125, + "learning_rate": 3.976153023966428e-06, + "loss": 15.1676, + "step": 304540 + }, + { + "epoch": 0.6152102683856059, + "grad_norm": 409.250732421875, + "learning_rate": 3.9758113572313735e-06, + "loss": 16.0041, + "step": 304550 + }, + { + "epoch": 0.6152304690182897, + "grad_norm": 555.212890625, + "learning_rate": 3.975469695488091e-06, + "loss": 29.4781, + "step": 304560 + }, + { + "epoch": 0.6152506696509734, + "grad_norm": 321.8433837890625, + "learning_rate": 3.975128038738245e-06, + "loss": 24.9564, + "step": 304570 + }, + { + "epoch": 0.6152708702836572, + "grad_norm": 48.59434509277344, + "learning_rate": 3.974786386983501e-06, + "loss": 19.8498, + "step": 304580 + }, + { + "epoch": 0.615291070916341, + "grad_norm": 245.10911560058594, + "learning_rate": 3.974444740225524e-06, + "loss": 18.3821, + "step": 304590 + }, + { + "epoch": 0.6153112715490249, + "grad_norm": 297.1552734375, + "learning_rate": 3.974103098465976e-06, + "loss": 15.4168, + "step": 304600 + }, + { + "epoch": 0.6153314721817087, + "grad_norm": 179.0855712890625, + "learning_rate": 3.97376146170653e-06, + "loss": 18.0489, + "step": 304610 + }, + { + "epoch": 0.6153516728143925, + "grad_norm": 794.9833374023438, + "learning_rate": 3.973419829948843e-06, + "loss": 21.6872, + "step": 304620 + }, + { + "epoch": 0.6153718734470763, + "grad_norm": 347.2474365234375, + "learning_rate": 3.973078203194584e-06, + "loss": 19.7647, + "step": 304630 + }, + { + "epoch": 0.6153920740797602, + "grad_norm": 254.1619110107422, + "learning_rate": 3.972736581445418e-06, + "loss": 13.7167, + "step": 304640 + }, + { + "epoch": 0.615412274712444, + "grad_norm": 251.6357879638672, + "learning_rate": 3.972394964703008e-06, + "loss": 23.3833, + "step": 304650 + }, + { + "epoch": 0.6154324753451278, + "grad_norm": 192.97264099121094, + "learning_rate": 3.97205335296902e-06, + "loss": 21.9039, + "step": 304660 + }, + { + "epoch": 0.6154526759778116, + "grad_norm": 363.0151672363281, + "learning_rate": 3.971711746245122e-06, + "loss": 19.9189, + "step": 304670 + }, + { + "epoch": 0.6154728766104954, + "grad_norm": 130.87635803222656, + "learning_rate": 3.971370144532973e-06, + "loss": 14.7775, + "step": 304680 + }, + { + "epoch": 0.6154930772431793, + "grad_norm": 377.2640075683594, + "learning_rate": 3.971028547834241e-06, + "loss": 14.1147, + "step": 304690 + }, + { + "epoch": 0.6155132778758631, + "grad_norm": 354.9339599609375, + "learning_rate": 3.970686956150595e-06, + "loss": 16.043, + "step": 304700 + }, + { + "epoch": 0.6155334785085469, + "grad_norm": 534.2333374023438, + "learning_rate": 3.970345369483693e-06, + "loss": 15.6232, + "step": 304710 + }, + { + "epoch": 0.6155536791412307, + "grad_norm": 553.8934936523438, + "learning_rate": 3.970003787835203e-06, + "loss": 25.1067, + "step": 304720 + }, + { + "epoch": 0.6155738797739145, + "grad_norm": 279.48504638671875, + "learning_rate": 3.969662211206789e-06, + "loss": 19.6881, + "step": 304730 + }, + { + "epoch": 0.6155940804065984, + "grad_norm": 542.3599243164062, + "learning_rate": 3.969320639600118e-06, + "loss": 21.9852, + "step": 304740 + }, + { + "epoch": 0.6156142810392822, + "grad_norm": 461.4300842285156, + "learning_rate": 3.968979073016853e-06, + "loss": 17.8222, + "step": 304750 + }, + { + "epoch": 0.615634481671966, + "grad_norm": 145.6568145751953, + "learning_rate": 3.968637511458657e-06, + "loss": 25.1161, + "step": 304760 + }, + { + "epoch": 0.6156546823046498, + "grad_norm": 176.57940673828125, + "learning_rate": 3.9682959549272e-06, + "loss": 16.715, + "step": 304770 + }, + { + "epoch": 0.6156748829373336, + "grad_norm": 222.05470275878906, + "learning_rate": 3.9679544034241406e-06, + "loss": 26.3055, + "step": 304780 + }, + { + "epoch": 0.6156950835700175, + "grad_norm": 26.114879608154297, + "learning_rate": 3.967612856951146e-06, + "loss": 11.9065, + "step": 304790 + }, + { + "epoch": 0.6157152842027013, + "grad_norm": 470.2635192871094, + "learning_rate": 3.967271315509884e-06, + "loss": 27.9257, + "step": 304800 + }, + { + "epoch": 0.6157354848353851, + "grad_norm": 0.0, + "learning_rate": 3.966929779102015e-06, + "loss": 27.4237, + "step": 304810 + }, + { + "epoch": 0.6157556854680689, + "grad_norm": 225.7501983642578, + "learning_rate": 3.9665882477292036e-06, + "loss": 18.1502, + "step": 304820 + }, + { + "epoch": 0.6157758861007526, + "grad_norm": 927.1912841796875, + "learning_rate": 3.966246721393118e-06, + "loss": 15.8755, + "step": 304830 + }, + { + "epoch": 0.6157960867334364, + "grad_norm": 717.72900390625, + "learning_rate": 3.965905200095419e-06, + "loss": 31.2173, + "step": 304840 + }, + { + "epoch": 0.6158162873661203, + "grad_norm": 178.7381591796875, + "learning_rate": 3.965563683837772e-06, + "loss": 14.679, + "step": 304850 + }, + { + "epoch": 0.6158364879988041, + "grad_norm": 286.8285827636719, + "learning_rate": 3.965222172621844e-06, + "loss": 22.5503, + "step": 304860 + }, + { + "epoch": 0.6158566886314879, + "grad_norm": 566.3438720703125, + "learning_rate": 3.964880666449296e-06, + "loss": 30.0114, + "step": 304870 + }, + { + "epoch": 0.6158768892641717, + "grad_norm": 261.7210388183594, + "learning_rate": 3.964539165321795e-06, + "loss": 26.8727, + "step": 304880 + }, + { + "epoch": 0.6158970898968555, + "grad_norm": 740.4841918945312, + "learning_rate": 3.964197669241004e-06, + "loss": 19.0218, + "step": 304890 + }, + { + "epoch": 0.6159172905295394, + "grad_norm": 70.38446807861328, + "learning_rate": 3.963856178208588e-06, + "loss": 8.1849, + "step": 304900 + }, + { + "epoch": 0.6159374911622232, + "grad_norm": 555.62255859375, + "learning_rate": 3.963514692226212e-06, + "loss": 15.2413, + "step": 304910 + }, + { + "epoch": 0.615957691794907, + "grad_norm": 241.1875, + "learning_rate": 3.963173211295538e-06, + "loss": 22.6484, + "step": 304920 + }, + { + "epoch": 0.6159778924275908, + "grad_norm": 395.1697692871094, + "learning_rate": 3.962831735418235e-06, + "loss": 22.5365, + "step": 304930 + }, + { + "epoch": 0.6159980930602746, + "grad_norm": 334.2985534667969, + "learning_rate": 3.962490264595961e-06, + "loss": 27.5217, + "step": 304940 + }, + { + "epoch": 0.6160182936929585, + "grad_norm": 288.04449462890625, + "learning_rate": 3.962148798830385e-06, + "loss": 10.9517, + "step": 304950 + }, + { + "epoch": 0.6160384943256423, + "grad_norm": 292.25628662109375, + "learning_rate": 3.9618073381231705e-06, + "loss": 32.295, + "step": 304960 + }, + { + "epoch": 0.6160586949583261, + "grad_norm": 226.55381774902344, + "learning_rate": 3.9614658824759815e-06, + "loss": 26.4119, + "step": 304970 + }, + { + "epoch": 0.6160788955910099, + "grad_norm": 145.2852325439453, + "learning_rate": 3.96112443189048e-06, + "loss": 23.9111, + "step": 304980 + }, + { + "epoch": 0.6160990962236937, + "grad_norm": 78.74932861328125, + "learning_rate": 3.960782986368334e-06, + "loss": 17.1106, + "step": 304990 + }, + { + "epoch": 0.6161192968563776, + "grad_norm": 62.719512939453125, + "learning_rate": 3.960441545911205e-06, + "loss": 21.7747, + "step": 305000 + }, + { + "epoch": 0.6161394974890614, + "grad_norm": 69.73066711425781, + "learning_rate": 3.960100110520756e-06, + "loss": 12.6648, + "step": 305010 + }, + { + "epoch": 0.6161596981217452, + "grad_norm": 481.134765625, + "learning_rate": 3.9597586801986544e-06, + "loss": 18.5707, + "step": 305020 + }, + { + "epoch": 0.616179898754429, + "grad_norm": 297.143798828125, + "learning_rate": 3.959417254946563e-06, + "loss": 20.0953, + "step": 305030 + }, + { + "epoch": 0.6162000993871128, + "grad_norm": 262.7975158691406, + "learning_rate": 3.9590758347661465e-06, + "loss": 10.907, + "step": 305040 + }, + { + "epoch": 0.6162203000197967, + "grad_norm": 55.615638732910156, + "learning_rate": 3.9587344196590665e-06, + "loss": 16.1214, + "step": 305050 + }, + { + "epoch": 0.6162405006524805, + "grad_norm": 562.6689453125, + "learning_rate": 3.95839300962699e-06, + "loss": 9.3046, + "step": 305060 + }, + { + "epoch": 0.6162607012851643, + "grad_norm": 822.115478515625, + "learning_rate": 3.958051604671579e-06, + "loss": 21.9632, + "step": 305070 + }, + { + "epoch": 0.616280901917848, + "grad_norm": 326.88604736328125, + "learning_rate": 3.957710204794497e-06, + "loss": 11.3189, + "step": 305080 + }, + { + "epoch": 0.6163011025505318, + "grad_norm": 299.18133544921875, + "learning_rate": 3.95736880999741e-06, + "loss": 17.1818, + "step": 305090 + }, + { + "epoch": 0.6163213031832157, + "grad_norm": 348.8399963378906, + "learning_rate": 3.957027420281981e-06, + "loss": 24.7105, + "step": 305100 + }, + { + "epoch": 0.6163415038158995, + "grad_norm": 324.0838317871094, + "learning_rate": 3.956686035649874e-06, + "loss": 19.6219, + "step": 305110 + }, + { + "epoch": 0.6163617044485833, + "grad_norm": 147.7954559326172, + "learning_rate": 3.956344656102754e-06, + "loss": 12.9203, + "step": 305120 + }, + { + "epoch": 0.6163819050812671, + "grad_norm": 707.2263793945312, + "learning_rate": 3.9560032816422825e-06, + "loss": 20.8565, + "step": 305130 + }, + { + "epoch": 0.6164021057139509, + "grad_norm": 122.77702331542969, + "learning_rate": 3.955661912270123e-06, + "loss": 19.8666, + "step": 305140 + }, + { + "epoch": 0.6164223063466348, + "grad_norm": 300.4569091796875, + "learning_rate": 3.955320547987943e-06, + "loss": 11.3689, + "step": 305150 + }, + { + "epoch": 0.6164425069793186, + "grad_norm": 995.3106079101562, + "learning_rate": 3.954979188797402e-06, + "loss": 38.2021, + "step": 305160 + }, + { + "epoch": 0.6164627076120024, + "grad_norm": 627.9874877929688, + "learning_rate": 3.954637834700166e-06, + "loss": 17.7492, + "step": 305170 + }, + { + "epoch": 0.6164829082446862, + "grad_norm": 489.7788391113281, + "learning_rate": 3.954296485697899e-06, + "loss": 20.5142, + "step": 305180 + }, + { + "epoch": 0.61650310887737, + "grad_norm": 258.4852294921875, + "learning_rate": 3.953955141792264e-06, + "loss": 12.8674, + "step": 305190 + }, + { + "epoch": 0.6165233095100539, + "grad_norm": 503.8763122558594, + "learning_rate": 3.9536138029849244e-06, + "loss": 22.3047, + "step": 305200 + }, + { + "epoch": 0.6165435101427377, + "grad_norm": 588.955322265625, + "learning_rate": 3.953272469277544e-06, + "loss": 24.0847, + "step": 305210 + }, + { + "epoch": 0.6165637107754215, + "grad_norm": 323.96630859375, + "learning_rate": 3.952931140671789e-06, + "loss": 15.9219, + "step": 305220 + }, + { + "epoch": 0.6165839114081053, + "grad_norm": 281.60577392578125, + "learning_rate": 3.9525898171693175e-06, + "loss": 32.4246, + "step": 305230 + }, + { + "epoch": 0.6166041120407891, + "grad_norm": 11.025744438171387, + "learning_rate": 3.952248498771797e-06, + "loss": 16.3389, + "step": 305240 + }, + { + "epoch": 0.616624312673473, + "grad_norm": 113.29571533203125, + "learning_rate": 3.951907185480892e-06, + "loss": 14.3495, + "step": 305250 + }, + { + "epoch": 0.6166445133061568, + "grad_norm": 148.98886108398438, + "learning_rate": 3.9515658772982625e-06, + "loss": 19.239, + "step": 305260 + }, + { + "epoch": 0.6166647139388406, + "grad_norm": 600.8794555664062, + "learning_rate": 3.951224574225574e-06, + "loss": 22.2261, + "step": 305270 + }, + { + "epoch": 0.6166849145715244, + "grad_norm": 353.5679016113281, + "learning_rate": 3.950883276264491e-06, + "loss": 24.45, + "step": 305280 + }, + { + "epoch": 0.6167051152042082, + "grad_norm": 126.06834411621094, + "learning_rate": 3.950541983416675e-06, + "loss": 21.8497, + "step": 305290 + }, + { + "epoch": 0.616725315836892, + "grad_norm": 434.01812744140625, + "learning_rate": 3.950200695683788e-06, + "loss": 12.7697, + "step": 305300 + }, + { + "epoch": 0.6167455164695759, + "grad_norm": 572.202880859375, + "learning_rate": 3.9498594130674985e-06, + "loss": 13.4393, + "step": 305310 + }, + { + "epoch": 0.6167657171022597, + "grad_norm": 701.241455078125, + "learning_rate": 3.949518135569465e-06, + "loss": 25.3619, + "step": 305320 + }, + { + "epoch": 0.6167859177349435, + "grad_norm": 184.28607177734375, + "learning_rate": 3.949176863191353e-06, + "loss": 10.7238, + "step": 305330 + }, + { + "epoch": 0.6168061183676272, + "grad_norm": 10.19638442993164, + "learning_rate": 3.948835595934826e-06, + "loss": 10.2953, + "step": 305340 + }, + { + "epoch": 0.616826319000311, + "grad_norm": 28.1948184967041, + "learning_rate": 3.9484943338015465e-06, + "loss": 20.9164, + "step": 305350 + }, + { + "epoch": 0.6168465196329949, + "grad_norm": 194.9653778076172, + "learning_rate": 3.948153076793179e-06, + "loss": 25.6456, + "step": 305360 + }, + { + "epoch": 0.6168667202656787, + "grad_norm": 489.40155029296875, + "learning_rate": 3.947811824911383e-06, + "loss": 11.7695, + "step": 305370 + }, + { + "epoch": 0.6168869208983625, + "grad_norm": 212.13853454589844, + "learning_rate": 3.947470578157829e-06, + "loss": 12.6597, + "step": 305380 + }, + { + "epoch": 0.6169071215310463, + "grad_norm": 862.0413818359375, + "learning_rate": 3.9471293365341716e-06, + "loss": 17.0545, + "step": 305390 + }, + { + "epoch": 0.6169273221637301, + "grad_norm": 81.56665802001953, + "learning_rate": 3.94678810004208e-06, + "loss": 25.2166, + "step": 305400 + }, + { + "epoch": 0.616947522796414, + "grad_norm": 32.02766799926758, + "learning_rate": 3.946446868683216e-06, + "loss": 19.9479, + "step": 305410 + }, + { + "epoch": 0.6169677234290978, + "grad_norm": 316.7696838378906, + "learning_rate": 3.946105642459241e-06, + "loss": 13.6745, + "step": 305420 + }, + { + "epoch": 0.6169879240617816, + "grad_norm": 420.5115661621094, + "learning_rate": 3.9457644213718195e-06, + "loss": 16.9697, + "step": 305430 + }, + { + "epoch": 0.6170081246944654, + "grad_norm": 628.47412109375, + "learning_rate": 3.945423205422616e-06, + "loss": 15.0264, + "step": 305440 + }, + { + "epoch": 0.6170283253271492, + "grad_norm": 293.2240905761719, + "learning_rate": 3.94508199461329e-06, + "loss": 26.2494, + "step": 305450 + }, + { + "epoch": 0.6170485259598331, + "grad_norm": 660.800048828125, + "learning_rate": 3.9447407889455054e-06, + "loss": 30.5347, + "step": 305460 + }, + { + "epoch": 0.6170687265925169, + "grad_norm": 447.24627685546875, + "learning_rate": 3.944399588420928e-06, + "loss": 11.819, + "step": 305470 + }, + { + "epoch": 0.6170889272252007, + "grad_norm": 427.98876953125, + "learning_rate": 3.944058393041219e-06, + "loss": 16.9037, + "step": 305480 + }, + { + "epoch": 0.6171091278578845, + "grad_norm": 300.6212463378906, + "learning_rate": 3.943717202808041e-06, + "loss": 22.4586, + "step": 305490 + }, + { + "epoch": 0.6171293284905683, + "grad_norm": 499.1124267578125, + "learning_rate": 3.943376017723058e-06, + "loss": 33.0014, + "step": 305500 + }, + { + "epoch": 0.6171495291232522, + "grad_norm": 298.091064453125, + "learning_rate": 3.9430348377879315e-06, + "loss": 9.2643, + "step": 305510 + }, + { + "epoch": 0.617169729755936, + "grad_norm": 240.9927978515625, + "learning_rate": 3.942693663004324e-06, + "loss": 34.2811, + "step": 305520 + }, + { + "epoch": 0.6171899303886198, + "grad_norm": 204.7012176513672, + "learning_rate": 3.942352493373899e-06, + "loss": 11.6956, + "step": 305530 + }, + { + "epoch": 0.6172101310213036, + "grad_norm": 183.16407775878906, + "learning_rate": 3.9420113288983235e-06, + "loss": 20.7804, + "step": 305540 + }, + { + "epoch": 0.6172303316539874, + "grad_norm": 362.5018615722656, + "learning_rate": 3.941670169579252e-06, + "loss": 12.9724, + "step": 305550 + }, + { + "epoch": 0.6172505322866713, + "grad_norm": 191.02978515625, + "learning_rate": 3.9413290154183536e-06, + "loss": 22.3215, + "step": 305560 + }, + { + "epoch": 0.6172707329193551, + "grad_norm": 222.10240173339844, + "learning_rate": 3.94098786641729e-06, + "loss": 11.3918, + "step": 305570 + }, + { + "epoch": 0.6172909335520389, + "grad_norm": 40.461708068847656, + "learning_rate": 3.940646722577722e-06, + "loss": 5.4216, + "step": 305580 + }, + { + "epoch": 0.6173111341847227, + "grad_norm": 328.94140625, + "learning_rate": 3.940305583901314e-06, + "loss": 32.9386, + "step": 305590 + }, + { + "epoch": 0.6173313348174064, + "grad_norm": 19.103342056274414, + "learning_rate": 3.939964450389728e-06, + "loss": 12.3217, + "step": 305600 + }, + { + "epoch": 0.6173515354500902, + "grad_norm": 590.836181640625, + "learning_rate": 3.939623322044627e-06, + "loss": 33.5012, + "step": 305610 + }, + { + "epoch": 0.6173717360827741, + "grad_norm": 513.8269653320312, + "learning_rate": 3.9392821988676715e-06, + "loss": 12.7758, + "step": 305620 + }, + { + "epoch": 0.6173919367154579, + "grad_norm": 133.01303100585938, + "learning_rate": 3.938941080860527e-06, + "loss": 29.4992, + "step": 305630 + }, + { + "epoch": 0.6174121373481417, + "grad_norm": 326.3035583496094, + "learning_rate": 3.938599968024855e-06, + "loss": 12.9297, + "step": 305640 + }, + { + "epoch": 0.6174323379808255, + "grad_norm": 894.0367431640625, + "learning_rate": 3.938258860362319e-06, + "loss": 20.602, + "step": 305650 + }, + { + "epoch": 0.6174525386135093, + "grad_norm": 750.4785766601562, + "learning_rate": 3.937917757874579e-06, + "loss": 28.8761, + "step": 305660 + }, + { + "epoch": 0.6174727392461932, + "grad_norm": 487.0335998535156, + "learning_rate": 3.9375766605633005e-06, + "loss": 29.3503, + "step": 305670 + }, + { + "epoch": 0.617492939878877, + "grad_norm": 322.8643493652344, + "learning_rate": 3.937235568430143e-06, + "loss": 18.7492, + "step": 305680 + }, + { + "epoch": 0.6175131405115608, + "grad_norm": 54.897117614746094, + "learning_rate": 3.9368944814767704e-06, + "loss": 26.2001, + "step": 305690 + }, + { + "epoch": 0.6175333411442446, + "grad_norm": 934.5421142578125, + "learning_rate": 3.936553399704848e-06, + "loss": 18.8792, + "step": 305700 + }, + { + "epoch": 0.6175535417769284, + "grad_norm": 739.4932250976562, + "learning_rate": 3.936212323116032e-06, + "loss": 27.0148, + "step": 305710 + }, + { + "epoch": 0.6175737424096123, + "grad_norm": 747.6240234375, + "learning_rate": 3.935871251711989e-06, + "loss": 35.2148, + "step": 305720 + }, + { + "epoch": 0.6175939430422961, + "grad_norm": 114.89200592041016, + "learning_rate": 3.935530185494381e-06, + "loss": 6.3092, + "step": 305730 + }, + { + "epoch": 0.6176141436749799, + "grad_norm": 351.3406982421875, + "learning_rate": 3.93518912446487e-06, + "loss": 23.3945, + "step": 305740 + }, + { + "epoch": 0.6176343443076637, + "grad_norm": 286.9163513183594, + "learning_rate": 3.934848068625117e-06, + "loss": 23.4078, + "step": 305750 + }, + { + "epoch": 0.6176545449403475, + "grad_norm": 425.3580017089844, + "learning_rate": 3.934507017976788e-06, + "loss": 19.2041, + "step": 305760 + }, + { + "epoch": 0.6176747455730314, + "grad_norm": 288.7109680175781, + "learning_rate": 3.9341659725215395e-06, + "loss": 28.5857, + "step": 305770 + }, + { + "epoch": 0.6176949462057152, + "grad_norm": 673.9471435546875, + "learning_rate": 3.9338249322610375e-06, + "loss": 30.6302, + "step": 305780 + }, + { + "epoch": 0.617715146838399, + "grad_norm": 189.43878173828125, + "learning_rate": 3.933483897196944e-06, + "loss": 16.6219, + "step": 305790 + }, + { + "epoch": 0.6177353474710828, + "grad_norm": 242.22720336914062, + "learning_rate": 3.933142867330921e-06, + "loss": 17.8705, + "step": 305800 + }, + { + "epoch": 0.6177555481037666, + "grad_norm": 492.8101806640625, + "learning_rate": 3.932801842664629e-06, + "loss": 23.6165, + "step": 305810 + }, + { + "epoch": 0.6177757487364505, + "grad_norm": 320.36065673828125, + "learning_rate": 3.932460823199732e-06, + "loss": 18.9187, + "step": 305820 + }, + { + "epoch": 0.6177959493691343, + "grad_norm": 657.8652954101562, + "learning_rate": 3.932119808937892e-06, + "loss": 18.8226, + "step": 305830 + }, + { + "epoch": 0.6178161500018181, + "grad_norm": 594.8040771484375, + "learning_rate": 3.9317787998807695e-06, + "loss": 15.5412, + "step": 305840 + }, + { + "epoch": 0.6178363506345018, + "grad_norm": 173.3395538330078, + "learning_rate": 3.931437796030028e-06, + "loss": 18.2146, + "step": 305850 + }, + { + "epoch": 0.6178565512671856, + "grad_norm": 205.6853485107422, + "learning_rate": 3.93109679738733e-06, + "loss": 16.1719, + "step": 305860 + }, + { + "epoch": 0.6178767518998695, + "grad_norm": 234.95541381835938, + "learning_rate": 3.9307558039543355e-06, + "loss": 19.4078, + "step": 305870 + }, + { + "epoch": 0.6178969525325533, + "grad_norm": 233.0981903076172, + "learning_rate": 3.930414815732709e-06, + "loss": 17.0163, + "step": 305880 + }, + { + "epoch": 0.6179171531652371, + "grad_norm": 615.3446655273438, + "learning_rate": 3.93007383272411e-06, + "loss": 18.0681, + "step": 305890 + }, + { + "epoch": 0.6179373537979209, + "grad_norm": 184.77142333984375, + "learning_rate": 3.9297328549302e-06, + "loss": 18.3668, + "step": 305900 + }, + { + "epoch": 0.6179575544306047, + "grad_norm": 169.15184020996094, + "learning_rate": 3.929391882352643e-06, + "loss": 17.5263, + "step": 305910 + }, + { + "epoch": 0.6179777550632886, + "grad_norm": 324.7297668457031, + "learning_rate": 3.929050914993102e-06, + "loss": 21.9217, + "step": 305920 + }, + { + "epoch": 0.6179979556959724, + "grad_norm": 523.7531127929688, + "learning_rate": 3.928709952853235e-06, + "loss": 12.4756, + "step": 305930 + }, + { + "epoch": 0.6180181563286562, + "grad_norm": 439.7877197265625, + "learning_rate": 3.928368995934706e-06, + "loss": 25.2746, + "step": 305940 + }, + { + "epoch": 0.61803835696134, + "grad_norm": 74.69799041748047, + "learning_rate": 3.928028044239176e-06, + "loss": 14.5338, + "step": 305950 + }, + { + "epoch": 0.6180585575940238, + "grad_norm": 190.77581787109375, + "learning_rate": 3.927687097768309e-06, + "loss": 14.2405, + "step": 305960 + }, + { + "epoch": 0.6180787582267077, + "grad_norm": 178.42745971679688, + "learning_rate": 3.927346156523764e-06, + "loss": 15.6832, + "step": 305970 + }, + { + "epoch": 0.6180989588593915, + "grad_norm": 258.56976318359375, + "learning_rate": 3.927005220507203e-06, + "loss": 16.2546, + "step": 305980 + }, + { + "epoch": 0.6181191594920753, + "grad_norm": 330.5308837890625, + "learning_rate": 3.926664289720291e-06, + "loss": 26.6828, + "step": 305990 + }, + { + "epoch": 0.6181393601247591, + "grad_norm": 504.2160339355469, + "learning_rate": 3.926323364164684e-06, + "loss": 33.5386, + "step": 306000 + }, + { + "epoch": 0.6181595607574429, + "grad_norm": 229.79554748535156, + "learning_rate": 3.925982443842048e-06, + "loss": 11.3198, + "step": 306010 + }, + { + "epoch": 0.6181797613901268, + "grad_norm": 323.853759765625, + "learning_rate": 3.925641528754045e-06, + "loss": 18.9552, + "step": 306020 + }, + { + "epoch": 0.6181999620228106, + "grad_norm": 537.7722778320312, + "learning_rate": 3.925300618902332e-06, + "loss": 16.2266, + "step": 306030 + }, + { + "epoch": 0.6182201626554944, + "grad_norm": 889.2670288085938, + "learning_rate": 3.924959714288575e-06, + "loss": 21.2791, + "step": 306040 + }, + { + "epoch": 0.6182403632881782, + "grad_norm": 188.69305419921875, + "learning_rate": 3.924618814914435e-06, + "loss": 2.5766, + "step": 306050 + }, + { + "epoch": 0.618260563920862, + "grad_norm": 116.12600708007812, + "learning_rate": 3.924277920781571e-06, + "loss": 14.2036, + "step": 306060 + }, + { + "epoch": 0.6182807645535459, + "grad_norm": 758.5924072265625, + "learning_rate": 3.9239370318916445e-06, + "loss": 23.0557, + "step": 306070 + }, + { + "epoch": 0.6183009651862297, + "grad_norm": 328.2210998535156, + "learning_rate": 3.92359614824632e-06, + "loss": 26.6263, + "step": 306080 + }, + { + "epoch": 0.6183211658189135, + "grad_norm": 452.1582946777344, + "learning_rate": 3.923255269847258e-06, + "loss": 23.0582, + "step": 306090 + }, + { + "epoch": 0.6183413664515973, + "grad_norm": 236.93359375, + "learning_rate": 3.922914396696118e-06, + "loss": 18.4523, + "step": 306100 + }, + { + "epoch": 0.618361567084281, + "grad_norm": 373.7156066894531, + "learning_rate": 3.9225735287945635e-06, + "loss": 19.818, + "step": 306110 + }, + { + "epoch": 0.6183817677169648, + "grad_norm": 716.6705322265625, + "learning_rate": 3.922232666144255e-06, + "loss": 26.8796, + "step": 306120 + }, + { + "epoch": 0.6184019683496487, + "grad_norm": 351.5494384765625, + "learning_rate": 3.921891808746853e-06, + "loss": 23.5783, + "step": 306130 + }, + { + "epoch": 0.6184221689823325, + "grad_norm": 891.4788818359375, + "learning_rate": 3.921550956604019e-06, + "loss": 30.9712, + "step": 306140 + }, + { + "epoch": 0.6184423696150163, + "grad_norm": 733.364990234375, + "learning_rate": 3.921210109717417e-06, + "loss": 22.7058, + "step": 306150 + }, + { + "epoch": 0.6184625702477001, + "grad_norm": 238.14309692382812, + "learning_rate": 3.920869268088704e-06, + "loss": 14.636, + "step": 306160 + }, + { + "epoch": 0.618482770880384, + "grad_norm": 396.8487854003906, + "learning_rate": 3.920528431719544e-06, + "loss": 19.4024, + "step": 306170 + }, + { + "epoch": 0.6185029715130678, + "grad_norm": 256.1111145019531, + "learning_rate": 3.9201876006115985e-06, + "loss": 16.7025, + "step": 306180 + }, + { + "epoch": 0.6185231721457516, + "grad_norm": 3.769822120666504, + "learning_rate": 3.9198467747665265e-06, + "loss": 39.128, + "step": 306190 + }, + { + "epoch": 0.6185433727784354, + "grad_norm": 212.6315155029297, + "learning_rate": 3.91950595418599e-06, + "loss": 31.1895, + "step": 306200 + }, + { + "epoch": 0.6185635734111192, + "grad_norm": 152.0775909423828, + "learning_rate": 3.919165138871652e-06, + "loss": 16.7545, + "step": 306210 + }, + { + "epoch": 0.618583774043803, + "grad_norm": 682.8530883789062, + "learning_rate": 3.918824328825171e-06, + "loss": 26.2047, + "step": 306220 + }, + { + "epoch": 0.6186039746764869, + "grad_norm": 472.5846252441406, + "learning_rate": 3.918483524048208e-06, + "loss": 16.3899, + "step": 306230 + }, + { + "epoch": 0.6186241753091707, + "grad_norm": 108.487548828125, + "learning_rate": 3.9181427245424266e-06, + "loss": 33.4169, + "step": 306240 + }, + { + "epoch": 0.6186443759418545, + "grad_norm": 850.3353271484375, + "learning_rate": 3.917801930309486e-06, + "loss": 27.5641, + "step": 306250 + }, + { + "epoch": 0.6186645765745383, + "grad_norm": 224.6884002685547, + "learning_rate": 3.9174611413510474e-06, + "loss": 17.4463, + "step": 306260 + }, + { + "epoch": 0.6186847772072221, + "grad_norm": 484.1405334472656, + "learning_rate": 3.9171203576687725e-06, + "loss": 17.8375, + "step": 306270 + }, + { + "epoch": 0.618704977839906, + "grad_norm": 306.95953369140625, + "learning_rate": 3.916779579264322e-06, + "loss": 29.5048, + "step": 306280 + }, + { + "epoch": 0.6187251784725898, + "grad_norm": 196.38314819335938, + "learning_rate": 3.916438806139355e-06, + "loss": 13.314, + "step": 306290 + }, + { + "epoch": 0.6187453791052736, + "grad_norm": 2.9020464420318604, + "learning_rate": 3.9160980382955336e-06, + "loss": 30.9055, + "step": 306300 + }, + { + "epoch": 0.6187655797379574, + "grad_norm": 332.1005859375, + "learning_rate": 3.9157572757345215e-06, + "loss": 11.4602, + "step": 306310 + }, + { + "epoch": 0.6187857803706412, + "grad_norm": 113.29553985595703, + "learning_rate": 3.915416518457974e-06, + "loss": 17.0311, + "step": 306320 + }, + { + "epoch": 0.6188059810033251, + "grad_norm": 109.80133056640625, + "learning_rate": 3.915075766467556e-06, + "loss": 15.5748, + "step": 306330 + }, + { + "epoch": 0.6188261816360089, + "grad_norm": 497.6032409667969, + "learning_rate": 3.914735019764928e-06, + "loss": 21.3831, + "step": 306340 + }, + { + "epoch": 0.6188463822686927, + "grad_norm": 340.36737060546875, + "learning_rate": 3.914394278351749e-06, + "loss": 17.5105, + "step": 306350 + }, + { + "epoch": 0.6188665829013764, + "grad_norm": 211.54811096191406, + "learning_rate": 3.91405354222968e-06, + "loss": 23.4012, + "step": 306360 + }, + { + "epoch": 0.6188867835340602, + "grad_norm": 58.582115173339844, + "learning_rate": 3.913712811400384e-06, + "loss": 15.3924, + "step": 306370 + }, + { + "epoch": 0.6189069841667441, + "grad_norm": 320.6402893066406, + "learning_rate": 3.913372085865519e-06, + "loss": 8.7064, + "step": 306380 + }, + { + "epoch": 0.6189271847994279, + "grad_norm": 321.906005859375, + "learning_rate": 3.913031365626746e-06, + "loss": 17.4174, + "step": 306390 + }, + { + "epoch": 0.6189473854321117, + "grad_norm": 69.85985565185547, + "learning_rate": 3.912690650685726e-06, + "loss": 30.7, + "step": 306400 + }, + { + "epoch": 0.6189675860647955, + "grad_norm": 440.7568664550781, + "learning_rate": 3.912349941044122e-06, + "loss": 20.0458, + "step": 306410 + }, + { + "epoch": 0.6189877866974793, + "grad_norm": 377.60418701171875, + "learning_rate": 3.912009236703591e-06, + "loss": 16.6445, + "step": 306420 + }, + { + "epoch": 0.6190079873301632, + "grad_norm": 434.11669921875, + "learning_rate": 3.911668537665796e-06, + "loss": 7.9538, + "step": 306430 + }, + { + "epoch": 0.619028187962847, + "grad_norm": 396.4697570800781, + "learning_rate": 3.9113278439323965e-06, + "loss": 23.5603, + "step": 306440 + }, + { + "epoch": 0.6190483885955308, + "grad_norm": 686.4473266601562, + "learning_rate": 3.9109871555050514e-06, + "loss": 22.799, + "step": 306450 + }, + { + "epoch": 0.6190685892282146, + "grad_norm": 188.32522583007812, + "learning_rate": 3.910646472385423e-06, + "loss": 21.7802, + "step": 306460 + }, + { + "epoch": 0.6190887898608984, + "grad_norm": 173.89407348632812, + "learning_rate": 3.910305794575174e-06, + "loss": 24.067, + "step": 306470 + }, + { + "epoch": 0.6191089904935823, + "grad_norm": 423.35076904296875, + "learning_rate": 3.90996512207596e-06, + "loss": 24.6759, + "step": 306480 + }, + { + "epoch": 0.6191291911262661, + "grad_norm": 258.6020812988281, + "learning_rate": 3.9096244548894445e-06, + "loss": 16.9252, + "step": 306490 + }, + { + "epoch": 0.6191493917589499, + "grad_norm": 638.9388427734375, + "learning_rate": 3.909283793017289e-06, + "loss": 29.1464, + "step": 306500 + }, + { + "epoch": 0.6191695923916337, + "grad_norm": 127.90264129638672, + "learning_rate": 3.90894313646115e-06, + "loss": 19.206, + "step": 306510 + }, + { + "epoch": 0.6191897930243175, + "grad_norm": 198.00038146972656, + "learning_rate": 3.908602485222688e-06, + "loss": 11.8646, + "step": 306520 + }, + { + "epoch": 0.6192099936570014, + "grad_norm": 430.843505859375, + "learning_rate": 3.908261839303568e-06, + "loss": 17.6792, + "step": 306530 + }, + { + "epoch": 0.6192301942896852, + "grad_norm": 493.9074401855469, + "learning_rate": 3.9079211987054475e-06, + "loss": 21.0978, + "step": 306540 + }, + { + "epoch": 0.619250394922369, + "grad_norm": 332.5508728027344, + "learning_rate": 3.907580563429985e-06, + "loss": 12.8158, + "step": 306550 + }, + { + "epoch": 0.6192705955550528, + "grad_norm": 138.62803649902344, + "learning_rate": 3.907239933478843e-06, + "loss": 24.9861, + "step": 306560 + }, + { + "epoch": 0.6192907961877366, + "grad_norm": 404.02410888671875, + "learning_rate": 3.906899308853682e-06, + "loss": 22.7066, + "step": 306570 + }, + { + "epoch": 0.6193109968204205, + "grad_norm": 316.9248962402344, + "learning_rate": 3.9065586895561605e-06, + "loss": 38.4835, + "step": 306580 + }, + { + "epoch": 0.6193311974531043, + "grad_norm": 875.6754760742188, + "learning_rate": 3.906218075587938e-06, + "loss": 21.3687, + "step": 306590 + }, + { + "epoch": 0.6193513980857881, + "grad_norm": 437.1156311035156, + "learning_rate": 3.905877466950679e-06, + "loss": 14.8848, + "step": 306600 + }, + { + "epoch": 0.6193715987184719, + "grad_norm": 74.68804931640625, + "learning_rate": 3.905536863646037e-06, + "loss": 12.2746, + "step": 306610 + }, + { + "epoch": 0.6193917993511556, + "grad_norm": 239.02340698242188, + "learning_rate": 3.905196265675677e-06, + "loss": 28.3326, + "step": 306620 + }, + { + "epoch": 0.6194119999838394, + "grad_norm": 157.01031494140625, + "learning_rate": 3.904855673041259e-06, + "loss": 12.1806, + "step": 306630 + }, + { + "epoch": 0.6194322006165233, + "grad_norm": 178.1280059814453, + "learning_rate": 3.90451508574444e-06, + "loss": 30.4363, + "step": 306640 + }, + { + "epoch": 0.6194524012492071, + "grad_norm": 613.2965698242188, + "learning_rate": 3.904174503786882e-06, + "loss": 23.6172, + "step": 306650 + }, + { + "epoch": 0.6194726018818909, + "grad_norm": 587.6942749023438, + "learning_rate": 3.903833927170245e-06, + "loss": 17.304, + "step": 306660 + }, + { + "epoch": 0.6194928025145747, + "grad_norm": 779.6909790039062, + "learning_rate": 3.9034933558961885e-06, + "loss": 32.5964, + "step": 306670 + }, + { + "epoch": 0.6195130031472585, + "grad_norm": 370.8109436035156, + "learning_rate": 3.9031527899663705e-06, + "loss": 31.931, + "step": 306680 + }, + { + "epoch": 0.6195332037799424, + "grad_norm": 36.33906555175781, + "learning_rate": 3.9028122293824535e-06, + "loss": 13.0738, + "step": 306690 + }, + { + "epoch": 0.6195534044126262, + "grad_norm": 881.9010620117188, + "learning_rate": 3.902471674146099e-06, + "loss": 20.5898, + "step": 306700 + }, + { + "epoch": 0.61957360504531, + "grad_norm": 220.03152465820312, + "learning_rate": 3.902131124258962e-06, + "loss": 8.8976, + "step": 306710 + }, + { + "epoch": 0.6195938056779938, + "grad_norm": 357.095947265625, + "learning_rate": 3.901790579722706e-06, + "loss": 29.167, + "step": 306720 + }, + { + "epoch": 0.6196140063106776, + "grad_norm": 217.9332733154297, + "learning_rate": 3.90145004053899e-06, + "loss": 17.4131, + "step": 306730 + }, + { + "epoch": 0.6196342069433615, + "grad_norm": 160.70803833007812, + "learning_rate": 3.901109506709472e-06, + "loss": 19.6984, + "step": 306740 + }, + { + "epoch": 0.6196544075760453, + "grad_norm": 485.2276916503906, + "learning_rate": 3.900768978235812e-06, + "loss": 16.8355, + "step": 306750 + }, + { + "epoch": 0.6196746082087291, + "grad_norm": 225.5886688232422, + "learning_rate": 3.900428455119674e-06, + "loss": 10.9822, + "step": 306760 + }, + { + "epoch": 0.6196948088414129, + "grad_norm": 381.4754333496094, + "learning_rate": 3.900087937362711e-06, + "loss": 9.0045, + "step": 306770 + }, + { + "epoch": 0.6197150094740967, + "grad_norm": 563.8507690429688, + "learning_rate": 3.899747424966588e-06, + "loss": 18.8555, + "step": 306780 + }, + { + "epoch": 0.6197352101067806, + "grad_norm": 165.44664001464844, + "learning_rate": 3.899406917932962e-06, + "loss": 14.7919, + "step": 306790 + }, + { + "epoch": 0.6197554107394644, + "grad_norm": 352.6623840332031, + "learning_rate": 3.899066416263493e-06, + "loss": 14.8232, + "step": 306800 + }, + { + "epoch": 0.6197756113721482, + "grad_norm": 863.5376586914062, + "learning_rate": 3.898725919959841e-06, + "loss": 34.6785, + "step": 306810 + }, + { + "epoch": 0.619795812004832, + "grad_norm": 593.6881713867188, + "learning_rate": 3.898385429023666e-06, + "loss": 13.6415, + "step": 306820 + }, + { + "epoch": 0.6198160126375158, + "grad_norm": 652.6988525390625, + "learning_rate": 3.898044943456626e-06, + "loss": 30.2298, + "step": 306830 + }, + { + "epoch": 0.6198362132701997, + "grad_norm": 423.9455261230469, + "learning_rate": 3.89770446326038e-06, + "loss": 38.2633, + "step": 306840 + }, + { + "epoch": 0.6198564139028835, + "grad_norm": 132.27984619140625, + "learning_rate": 3.89736398843659e-06, + "loss": 22.5795, + "step": 306850 + }, + { + "epoch": 0.6198766145355673, + "grad_norm": 302.6119689941406, + "learning_rate": 3.897023518986915e-06, + "loss": 13.0181, + "step": 306860 + }, + { + "epoch": 0.619896815168251, + "grad_norm": 662.6363525390625, + "learning_rate": 3.896683054913013e-06, + "loss": 31.4033, + "step": 306870 + }, + { + "epoch": 0.6199170158009348, + "grad_norm": 678.5469970703125, + "learning_rate": 3.896342596216543e-06, + "loss": 24.0122, + "step": 306880 + }, + { + "epoch": 0.6199372164336187, + "grad_norm": 443.99639892578125, + "learning_rate": 3.896002142899167e-06, + "loss": 16.1085, + "step": 306890 + }, + { + "epoch": 0.6199574170663025, + "grad_norm": 462.47393798828125, + "learning_rate": 3.895661694962542e-06, + "loss": 14.1491, + "step": 306900 + }, + { + "epoch": 0.6199776176989863, + "grad_norm": 354.35906982421875, + "learning_rate": 3.895321252408326e-06, + "loss": 39.664, + "step": 306910 + }, + { + "epoch": 0.6199978183316701, + "grad_norm": 326.7041320800781, + "learning_rate": 3.894980815238184e-06, + "loss": 11.0437, + "step": 306920 + }, + { + "epoch": 0.6200180189643539, + "grad_norm": 410.8968811035156, + "learning_rate": 3.894640383453769e-06, + "loss": 15.1732, + "step": 306930 + }, + { + "epoch": 0.6200382195970378, + "grad_norm": 319.5135192871094, + "learning_rate": 3.894299957056743e-06, + "loss": 26.3188, + "step": 306940 + }, + { + "epoch": 0.6200584202297216, + "grad_norm": 248.26473999023438, + "learning_rate": 3.8939595360487655e-06, + "loss": 15.6178, + "step": 306950 + }, + { + "epoch": 0.6200786208624054, + "grad_norm": 694.5548095703125, + "learning_rate": 3.893619120431494e-06, + "loss": 23.5753, + "step": 306960 + }, + { + "epoch": 0.6200988214950892, + "grad_norm": 651.9447631835938, + "learning_rate": 3.893278710206589e-06, + "loss": 19.1723, + "step": 306970 + }, + { + "epoch": 0.620119022127773, + "grad_norm": 243.91006469726562, + "learning_rate": 3.892938305375712e-06, + "loss": 20.8598, + "step": 306980 + }, + { + "epoch": 0.6201392227604569, + "grad_norm": 360.2294006347656, + "learning_rate": 3.892597905940516e-06, + "loss": 16.3503, + "step": 306990 + }, + { + "epoch": 0.6201594233931407, + "grad_norm": 479.9851989746094, + "learning_rate": 3.892257511902664e-06, + "loss": 17.4354, + "step": 307000 + }, + { + "epoch": 0.6201796240258245, + "grad_norm": 254.92938232421875, + "learning_rate": 3.891917123263815e-06, + "loss": 10.1215, + "step": 307010 + }, + { + "epoch": 0.6201998246585083, + "grad_norm": 357.59124755859375, + "learning_rate": 3.891576740025628e-06, + "loss": 37.9377, + "step": 307020 + }, + { + "epoch": 0.6202200252911921, + "grad_norm": 23.786584854125977, + "learning_rate": 3.891236362189761e-06, + "loss": 12.9558, + "step": 307030 + }, + { + "epoch": 0.620240225923876, + "grad_norm": 344.36334228515625, + "learning_rate": 3.890895989757874e-06, + "loss": 20.0871, + "step": 307040 + }, + { + "epoch": 0.6202604265565598, + "grad_norm": 98.81551361083984, + "learning_rate": 3.890555622731626e-06, + "loss": 11.1531, + "step": 307050 + }, + { + "epoch": 0.6202806271892436, + "grad_norm": 317.57373046875, + "learning_rate": 3.890215261112674e-06, + "loss": 11.9396, + "step": 307060 + }, + { + "epoch": 0.6203008278219274, + "grad_norm": 446.8298034667969, + "learning_rate": 3.889874904902678e-06, + "loss": 13.6021, + "step": 307070 + }, + { + "epoch": 0.6203210284546112, + "grad_norm": 1420.668701171875, + "learning_rate": 3.889534554103299e-06, + "loss": 28.161, + "step": 307080 + }, + { + "epoch": 0.620341229087295, + "grad_norm": 562.9849853515625, + "learning_rate": 3.889194208716192e-06, + "loss": 23.4622, + "step": 307090 + }, + { + "epoch": 0.6203614297199789, + "grad_norm": 553.1746215820312, + "learning_rate": 3.888853868743018e-06, + "loss": 18.4087, + "step": 307100 + }, + { + "epoch": 0.6203816303526627, + "grad_norm": 381.9627990722656, + "learning_rate": 3.888513534185438e-06, + "loss": 20.6817, + "step": 307110 + }, + { + "epoch": 0.6204018309853465, + "grad_norm": 84.61685943603516, + "learning_rate": 3.888173205045105e-06, + "loss": 17.944, + "step": 307120 + }, + { + "epoch": 0.6204220316180302, + "grad_norm": 1.7519794702529907, + "learning_rate": 3.887832881323681e-06, + "loss": 14.0085, + "step": 307130 + }, + { + "epoch": 0.620442232250714, + "grad_norm": 775.5814819335938, + "learning_rate": 3.887492563022826e-06, + "loss": 18.5329, + "step": 307140 + }, + { + "epoch": 0.6204624328833979, + "grad_norm": 185.97178649902344, + "learning_rate": 3.887152250144197e-06, + "loss": 14.3087, + "step": 307150 + }, + { + "epoch": 0.6204826335160817, + "grad_norm": 565.8896484375, + "learning_rate": 3.886811942689453e-06, + "loss": 35.2329, + "step": 307160 + }, + { + "epoch": 0.6205028341487655, + "grad_norm": 564.121337890625, + "learning_rate": 3.8864716406602525e-06, + "loss": 16.3574, + "step": 307170 + }, + { + "epoch": 0.6205230347814493, + "grad_norm": 712.0159912109375, + "learning_rate": 3.886131344058255e-06, + "loss": 19.8137, + "step": 307180 + }, + { + "epoch": 0.6205432354141331, + "grad_norm": 13.809062957763672, + "learning_rate": 3.8857910528851175e-06, + "loss": 15.8435, + "step": 307190 + }, + { + "epoch": 0.620563436046817, + "grad_norm": 517.502685546875, + "learning_rate": 3.885450767142498e-06, + "loss": 21.0594, + "step": 307200 + }, + { + "epoch": 0.6205836366795008, + "grad_norm": 635.3877563476562, + "learning_rate": 3.8851104868320595e-06, + "loss": 31.6942, + "step": 307210 + }, + { + "epoch": 0.6206038373121846, + "grad_norm": 230.7758026123047, + "learning_rate": 3.884770211955454e-06, + "loss": 19.3524, + "step": 307220 + }, + { + "epoch": 0.6206240379448684, + "grad_norm": 414.101318359375, + "learning_rate": 3.884429942514345e-06, + "loss": 13.2517, + "step": 307230 + }, + { + "epoch": 0.6206442385775522, + "grad_norm": 230.80601501464844, + "learning_rate": 3.884089678510389e-06, + "loss": 15.3668, + "step": 307240 + }, + { + "epoch": 0.6206644392102361, + "grad_norm": 294.1860046386719, + "learning_rate": 3.883749419945244e-06, + "loss": 14.2085, + "step": 307250 + }, + { + "epoch": 0.6206846398429199, + "grad_norm": 375.5740051269531, + "learning_rate": 3.883409166820569e-06, + "loss": 23.9223, + "step": 307260 + }, + { + "epoch": 0.6207048404756037, + "grad_norm": 352.6200256347656, + "learning_rate": 3.883068919138023e-06, + "loss": 18.2123, + "step": 307270 + }, + { + "epoch": 0.6207250411082875, + "grad_norm": 19.69367027282715, + "learning_rate": 3.882728676899263e-06, + "loss": 24.5393, + "step": 307280 + }, + { + "epoch": 0.6207452417409713, + "grad_norm": 553.554931640625, + "learning_rate": 3.882388440105947e-06, + "loss": 21.5132, + "step": 307290 + }, + { + "epoch": 0.6207654423736552, + "grad_norm": 261.3301696777344, + "learning_rate": 3.882048208759735e-06, + "loss": 24.2588, + "step": 307300 + }, + { + "epoch": 0.620785643006339, + "grad_norm": 284.4010009765625, + "learning_rate": 3.8817079828622855e-06, + "loss": 21.4293, + "step": 307310 + }, + { + "epoch": 0.6208058436390228, + "grad_norm": 109.50505065917969, + "learning_rate": 3.881367762415255e-06, + "loss": 10.3847, + "step": 307320 + }, + { + "epoch": 0.6208260442717066, + "grad_norm": 1052.4429931640625, + "learning_rate": 3.881027547420302e-06, + "loss": 36.2355, + "step": 307330 + }, + { + "epoch": 0.6208462449043904, + "grad_norm": 629.2431640625, + "learning_rate": 3.880687337879086e-06, + "loss": 15.3413, + "step": 307340 + }, + { + "epoch": 0.6208664455370743, + "grad_norm": 250.07577514648438, + "learning_rate": 3.880347133793263e-06, + "loss": 19.1702, + "step": 307350 + }, + { + "epoch": 0.6208866461697581, + "grad_norm": 201.0216522216797, + "learning_rate": 3.880006935164491e-06, + "loss": 11.6162, + "step": 307360 + }, + { + "epoch": 0.6209068468024419, + "grad_norm": 322.825439453125, + "learning_rate": 3.8796667419944335e-06, + "loss": 15.8074, + "step": 307370 + }, + { + "epoch": 0.6209270474351257, + "grad_norm": 298.6944274902344, + "learning_rate": 3.87932655428474e-06, + "loss": 22.8595, + "step": 307380 + }, + { + "epoch": 0.6209472480678094, + "grad_norm": 602.3025512695312, + "learning_rate": 3.878986372037074e-06, + "loss": 18.5704, + "step": 307390 + }, + { + "epoch": 0.6209674487004933, + "grad_norm": 183.25563049316406, + "learning_rate": 3.8786461952530955e-06, + "loss": 29.4538, + "step": 307400 + }, + { + "epoch": 0.6209876493331771, + "grad_norm": 150.2943115234375, + "learning_rate": 3.878306023934457e-06, + "loss": 20.3949, + "step": 307410 + }, + { + "epoch": 0.6210078499658609, + "grad_norm": 508.4177551269531, + "learning_rate": 3.877965858082818e-06, + "loss": 14.1169, + "step": 307420 + }, + { + "epoch": 0.6210280505985447, + "grad_norm": 229.1527099609375, + "learning_rate": 3.87762569769984e-06, + "loss": 16.7883, + "step": 307430 + }, + { + "epoch": 0.6210482512312285, + "grad_norm": 92.78300476074219, + "learning_rate": 3.877285542787176e-06, + "loss": 23.3407, + "step": 307440 + }, + { + "epoch": 0.6210684518639124, + "grad_norm": 571.0462646484375, + "learning_rate": 3.876945393346486e-06, + "loss": 12.3129, + "step": 307450 + }, + { + "epoch": 0.6210886524965962, + "grad_norm": 69.4331283569336, + "learning_rate": 3.8766052493794286e-06, + "loss": 13.6684, + "step": 307460 + }, + { + "epoch": 0.62110885312928, + "grad_norm": 178.68429565429688, + "learning_rate": 3.876265110887662e-06, + "loss": 14.7824, + "step": 307470 + }, + { + "epoch": 0.6211290537619638, + "grad_norm": 421.0312194824219, + "learning_rate": 3.875924977872842e-06, + "loss": 15.1686, + "step": 307480 + }, + { + "epoch": 0.6211492543946476, + "grad_norm": 0.0, + "learning_rate": 3.875584850336627e-06, + "loss": 26.5098, + "step": 307490 + }, + { + "epoch": 0.6211694550273315, + "grad_norm": 365.2734375, + "learning_rate": 3.875244728280676e-06, + "loss": 15.8003, + "step": 307500 + }, + { + "epoch": 0.6211896556600153, + "grad_norm": 729.2528076171875, + "learning_rate": 3.8749046117066455e-06, + "loss": 11.6154, + "step": 307510 + }, + { + "epoch": 0.6212098562926991, + "grad_norm": 625.0670166015625, + "learning_rate": 3.874564500616192e-06, + "loss": 15.0667, + "step": 307520 + }, + { + "epoch": 0.6212300569253829, + "grad_norm": 0.0, + "learning_rate": 3.874224395010977e-06, + "loss": 11.4449, + "step": 307530 + }, + { + "epoch": 0.6212502575580667, + "grad_norm": 430.8566589355469, + "learning_rate": 3.873884294892654e-06, + "loss": 19.7118, + "step": 307540 + }, + { + "epoch": 0.6212704581907506, + "grad_norm": 661.2942504882812, + "learning_rate": 3.873544200262882e-06, + "loss": 15.6916, + "step": 307550 + }, + { + "epoch": 0.6212906588234344, + "grad_norm": 253.31130981445312, + "learning_rate": 3.873204111123321e-06, + "loss": 15.5921, + "step": 307560 + }, + { + "epoch": 0.6213108594561182, + "grad_norm": 159.25498962402344, + "learning_rate": 3.872864027475626e-06, + "loss": 19.4433, + "step": 307570 + }, + { + "epoch": 0.621331060088802, + "grad_norm": 432.11834716796875, + "learning_rate": 3.872523949321454e-06, + "loss": 15.8293, + "step": 307580 + }, + { + "epoch": 0.6213512607214858, + "grad_norm": 53.555694580078125, + "learning_rate": 3.872183876662462e-06, + "loss": 26.4413, + "step": 307590 + }, + { + "epoch": 0.6213714613541697, + "grad_norm": 308.6258239746094, + "learning_rate": 3.871843809500313e-06, + "loss": 28.8189, + "step": 307600 + }, + { + "epoch": 0.6213916619868535, + "grad_norm": 20.56621551513672, + "learning_rate": 3.871503747836657e-06, + "loss": 10.9581, + "step": 307610 + }, + { + "epoch": 0.6214118626195373, + "grad_norm": 465.7938537597656, + "learning_rate": 3.8711636916731566e-06, + "loss": 17.9902, + "step": 307620 + }, + { + "epoch": 0.6214320632522211, + "grad_norm": 476.42034912109375, + "learning_rate": 3.870823641011467e-06, + "loss": 21.1891, + "step": 307630 + }, + { + "epoch": 0.6214522638849048, + "grad_norm": 242.0484161376953, + "learning_rate": 3.870483595853246e-06, + "loss": 27.705, + "step": 307640 + }, + { + "epoch": 0.6214724645175886, + "grad_norm": 685.3764038085938, + "learning_rate": 3.870143556200152e-06, + "loss": 27.2119, + "step": 307650 + }, + { + "epoch": 0.6214926651502725, + "grad_norm": 190.67459106445312, + "learning_rate": 3.8698035220538404e-06, + "loss": 12.5892, + "step": 307660 + }, + { + "epoch": 0.6215128657829563, + "grad_norm": 3.617690324783325, + "learning_rate": 3.869463493415969e-06, + "loss": 28.9364, + "step": 307670 + }, + { + "epoch": 0.6215330664156401, + "grad_norm": 232.81556701660156, + "learning_rate": 3.869123470288195e-06, + "loss": 20.8313, + "step": 307680 + }, + { + "epoch": 0.6215532670483239, + "grad_norm": 785.2691650390625, + "learning_rate": 3.868783452672177e-06, + "loss": 18.9252, + "step": 307690 + }, + { + "epoch": 0.6215734676810077, + "grad_norm": 150.1544647216797, + "learning_rate": 3.868443440569571e-06, + "loss": 22.7764, + "step": 307700 + }, + { + "epoch": 0.6215936683136916, + "grad_norm": 529.515380859375, + "learning_rate": 3.868103433982034e-06, + "loss": 16.2962, + "step": 307710 + }, + { + "epoch": 0.6216138689463754, + "grad_norm": 536.1862182617188, + "learning_rate": 3.867763432911225e-06, + "loss": 28.8676, + "step": 307720 + }, + { + "epoch": 0.6216340695790592, + "grad_norm": 254.0311737060547, + "learning_rate": 3.867423437358799e-06, + "loss": 16.9141, + "step": 307730 + }, + { + "epoch": 0.621654270211743, + "grad_norm": 218.4225616455078, + "learning_rate": 3.867083447326413e-06, + "loss": 19.5715, + "step": 307740 + }, + { + "epoch": 0.6216744708444268, + "grad_norm": 485.4092102050781, + "learning_rate": 3.866743462815724e-06, + "loss": 13.436, + "step": 307750 + }, + { + "epoch": 0.6216946714771107, + "grad_norm": 256.953857421875, + "learning_rate": 3.866403483828392e-06, + "loss": 11.0728, + "step": 307760 + }, + { + "epoch": 0.6217148721097945, + "grad_norm": 264.557373046875, + "learning_rate": 3.866063510366072e-06, + "loss": 17.1047, + "step": 307770 + }, + { + "epoch": 0.6217350727424783, + "grad_norm": 23.962257385253906, + "learning_rate": 3.86572354243042e-06, + "loss": 26.9807, + "step": 307780 + }, + { + "epoch": 0.6217552733751621, + "grad_norm": 92.05508422851562, + "learning_rate": 3.865383580023094e-06, + "loss": 24.2598, + "step": 307790 + }, + { + "epoch": 0.6217754740078459, + "grad_norm": 243.84164428710938, + "learning_rate": 3.865043623145751e-06, + "loss": 18.737, + "step": 307800 + }, + { + "epoch": 0.6217956746405298, + "grad_norm": 205.98390197753906, + "learning_rate": 3.864703671800047e-06, + "loss": 22.9812, + "step": 307810 + }, + { + "epoch": 0.6218158752732136, + "grad_norm": 365.3945617675781, + "learning_rate": 3.8643637259876415e-06, + "loss": 21.3828, + "step": 307820 + }, + { + "epoch": 0.6218360759058974, + "grad_norm": 270.4975891113281, + "learning_rate": 3.864023785710187e-06, + "loss": 11.1547, + "step": 307830 + }, + { + "epoch": 0.6218562765385812, + "grad_norm": 793.963623046875, + "learning_rate": 3.863683850969343e-06, + "loss": 19.8202, + "step": 307840 + }, + { + "epoch": 0.621876477171265, + "grad_norm": 559.8876342773438, + "learning_rate": 3.863343921766769e-06, + "loss": 18.1483, + "step": 307850 + }, + { + "epoch": 0.6218966778039489, + "grad_norm": 616.4458618164062, + "learning_rate": 3.863003998104117e-06, + "loss": 17.1349, + "step": 307860 + }, + { + "epoch": 0.6219168784366327, + "grad_norm": 214.35891723632812, + "learning_rate": 3.862664079983045e-06, + "loss": 24.9051, + "step": 307870 + }, + { + "epoch": 0.6219370790693165, + "grad_norm": 561.56494140625, + "learning_rate": 3.862324167405212e-06, + "loss": 26.8446, + "step": 307880 + }, + { + "epoch": 0.6219572797020003, + "grad_norm": 651.3684692382812, + "learning_rate": 3.8619842603722715e-06, + "loss": 12.9932, + "step": 307890 + }, + { + "epoch": 0.621977480334684, + "grad_norm": 356.9894104003906, + "learning_rate": 3.86164435888588e-06, + "loss": 27.2443, + "step": 307900 + }, + { + "epoch": 0.6219976809673678, + "grad_norm": 55.86592483520508, + "learning_rate": 3.861304462947698e-06, + "loss": 10.004, + "step": 307910 + }, + { + "epoch": 0.6220178816000517, + "grad_norm": 355.6819152832031, + "learning_rate": 3.860964572559381e-06, + "loss": 14.478, + "step": 307920 + }, + { + "epoch": 0.6220380822327355, + "grad_norm": 8.241705894470215, + "learning_rate": 3.860624687722583e-06, + "loss": 15.1137, + "step": 307930 + }, + { + "epoch": 0.6220582828654193, + "grad_norm": 602.1299438476562, + "learning_rate": 3.860284808438962e-06, + "loss": 15.2554, + "step": 307940 + }, + { + "epoch": 0.6220784834981031, + "grad_norm": 548.4798583984375, + "learning_rate": 3.859944934710177e-06, + "loss": 23.1541, + "step": 307950 + }, + { + "epoch": 0.622098684130787, + "grad_norm": 29.896265029907227, + "learning_rate": 3.859605066537879e-06, + "loss": 19.2662, + "step": 307960 + }, + { + "epoch": 0.6221188847634708, + "grad_norm": 246.86521911621094, + "learning_rate": 3.859265203923728e-06, + "loss": 26.9573, + "step": 307970 + }, + { + "epoch": 0.6221390853961546, + "grad_norm": 296.05096435546875, + "learning_rate": 3.858925346869383e-06, + "loss": 32.5155, + "step": 307980 + }, + { + "epoch": 0.6221592860288384, + "grad_norm": 455.86322021484375, + "learning_rate": 3.858585495376494e-06, + "loss": 21.6094, + "step": 307990 + }, + { + "epoch": 0.6221794866615222, + "grad_norm": 224.6339111328125, + "learning_rate": 3.8582456494467214e-06, + "loss": 18.4335, + "step": 308000 + }, + { + "epoch": 0.622199687294206, + "grad_norm": 256.71368408203125, + "learning_rate": 3.857905809081723e-06, + "loss": 19.7549, + "step": 308010 + }, + { + "epoch": 0.6222198879268899, + "grad_norm": 132.54017639160156, + "learning_rate": 3.857565974283152e-06, + "loss": 20.27, + "step": 308020 + }, + { + "epoch": 0.6222400885595737, + "grad_norm": 639.8458251953125, + "learning_rate": 3.857226145052665e-06, + "loss": 18.2413, + "step": 308030 + }, + { + "epoch": 0.6222602891922575, + "grad_norm": 36.40522384643555, + "learning_rate": 3.856886321391919e-06, + "loss": 16.8783, + "step": 308040 + }, + { + "epoch": 0.6222804898249413, + "grad_norm": 183.32699584960938, + "learning_rate": 3.856546503302573e-06, + "loss": 12.8777, + "step": 308050 + }, + { + "epoch": 0.6223006904576251, + "grad_norm": 393.983642578125, + "learning_rate": 3.856206690786278e-06, + "loss": 12.4519, + "step": 308060 + }, + { + "epoch": 0.622320891090309, + "grad_norm": 348.84417724609375, + "learning_rate": 3.8558668838446935e-06, + "loss": 19.8369, + "step": 308070 + }, + { + "epoch": 0.6223410917229928, + "grad_norm": 527.478515625, + "learning_rate": 3.855527082479477e-06, + "loss": 17.1344, + "step": 308080 + }, + { + "epoch": 0.6223612923556766, + "grad_norm": 833.7142333984375, + "learning_rate": 3.85518728669228e-06, + "loss": 25.6628, + "step": 308090 + }, + { + "epoch": 0.6223814929883604, + "grad_norm": 902.67431640625, + "learning_rate": 3.854847496484762e-06, + "loss": 26.7043, + "step": 308100 + }, + { + "epoch": 0.6224016936210442, + "grad_norm": 354.50384521484375, + "learning_rate": 3.85450771185858e-06, + "loss": 17.5971, + "step": 308110 + }, + { + "epoch": 0.6224218942537281, + "grad_norm": 440.0574035644531, + "learning_rate": 3.854167932815387e-06, + "loss": 21.2498, + "step": 308120 + }, + { + "epoch": 0.6224420948864119, + "grad_norm": 260.6983337402344, + "learning_rate": 3.85382815935684e-06, + "loss": 15.5941, + "step": 308130 + }, + { + "epoch": 0.6224622955190957, + "grad_norm": 99.20164489746094, + "learning_rate": 3.853488391484599e-06, + "loss": 20.4455, + "step": 308140 + }, + { + "epoch": 0.6224824961517794, + "grad_norm": 410.4075012207031, + "learning_rate": 3.853148629200312e-06, + "loss": 18.4926, + "step": 308150 + }, + { + "epoch": 0.6225026967844632, + "grad_norm": 401.4082336425781, + "learning_rate": 3.852808872505642e-06, + "loss": 17.5811, + "step": 308160 + }, + { + "epoch": 0.6225228974171471, + "grad_norm": 375.050048828125, + "learning_rate": 3.8524691214022425e-06, + "loss": 26.6744, + "step": 308170 + }, + { + "epoch": 0.6225430980498309, + "grad_norm": 225.84666442871094, + "learning_rate": 3.8521293758917684e-06, + "loss": 10.9746, + "step": 308180 + }, + { + "epoch": 0.6225632986825147, + "grad_norm": 373.8145751953125, + "learning_rate": 3.851789635975877e-06, + "loss": 21.9365, + "step": 308190 + }, + { + "epoch": 0.6225834993151985, + "grad_norm": 219.49147033691406, + "learning_rate": 3.8514499016562216e-06, + "loss": 23.9357, + "step": 308200 + }, + { + "epoch": 0.6226036999478823, + "grad_norm": 391.24957275390625, + "learning_rate": 3.851110172934463e-06, + "loss": 15.7127, + "step": 308210 + }, + { + "epoch": 0.6226239005805662, + "grad_norm": 306.0335388183594, + "learning_rate": 3.850770449812252e-06, + "loss": 14.8573, + "step": 308220 + }, + { + "epoch": 0.62264410121325, + "grad_norm": 827.5209350585938, + "learning_rate": 3.850430732291248e-06, + "loss": 17.0767, + "step": 308230 + }, + { + "epoch": 0.6226643018459338, + "grad_norm": 101.06029510498047, + "learning_rate": 3.850091020373105e-06, + "loss": 16.6881, + "step": 308240 + }, + { + "epoch": 0.6226845024786176, + "grad_norm": 448.7660827636719, + "learning_rate": 3.849751314059479e-06, + "loss": 30.5649, + "step": 308250 + }, + { + "epoch": 0.6227047031113014, + "grad_norm": 690.1036987304688, + "learning_rate": 3.849411613352024e-06, + "loss": 17.4533, + "step": 308260 + }, + { + "epoch": 0.6227249037439853, + "grad_norm": 189.1039276123047, + "learning_rate": 3.8490719182524e-06, + "loss": 11.6213, + "step": 308270 + }, + { + "epoch": 0.6227451043766691, + "grad_norm": 436.6559143066406, + "learning_rate": 3.848732228762257e-06, + "loss": 14.4833, + "step": 308280 + }, + { + "epoch": 0.6227653050093529, + "grad_norm": 481.327392578125, + "learning_rate": 3.848392544883254e-06, + "loss": 40.662, + "step": 308290 + }, + { + "epoch": 0.6227855056420367, + "grad_norm": 279.5513916015625, + "learning_rate": 3.8480528666170495e-06, + "loss": 17.9576, + "step": 308300 + }, + { + "epoch": 0.6228057062747205, + "grad_norm": 109.25330352783203, + "learning_rate": 3.847713193965291e-06, + "loss": 16.0483, + "step": 308310 + }, + { + "epoch": 0.6228259069074044, + "grad_norm": 411.5627136230469, + "learning_rate": 3.84737352692964e-06, + "loss": 15.8277, + "step": 308320 + }, + { + "epoch": 0.6228461075400882, + "grad_norm": 320.0434875488281, + "learning_rate": 3.847033865511752e-06, + "loss": 20.7567, + "step": 308330 + }, + { + "epoch": 0.622866308172772, + "grad_norm": 234.54380798339844, + "learning_rate": 3.84669420971328e-06, + "loss": 9.6087, + "step": 308340 + }, + { + "epoch": 0.6228865088054558, + "grad_norm": 503.6078796386719, + "learning_rate": 3.846354559535881e-06, + "loss": 29.8908, + "step": 308350 + }, + { + "epoch": 0.6229067094381396, + "grad_norm": 79.07080841064453, + "learning_rate": 3.846014914981209e-06, + "loss": 22.0454, + "step": 308360 + }, + { + "epoch": 0.6229269100708235, + "grad_norm": 222.7816925048828, + "learning_rate": 3.845675276050923e-06, + "loss": 34.9026, + "step": 308370 + }, + { + "epoch": 0.6229471107035073, + "grad_norm": 399.2646484375, + "learning_rate": 3.845335642746672e-06, + "loss": 13.7178, + "step": 308380 + }, + { + "epoch": 0.6229673113361911, + "grad_norm": 372.23077392578125, + "learning_rate": 3.8449960150701175e-06, + "loss": 13.8694, + "step": 308390 + }, + { + "epoch": 0.6229875119688749, + "grad_norm": 367.197509765625, + "learning_rate": 3.844656393022912e-06, + "loss": 11.3064, + "step": 308400 + }, + { + "epoch": 0.6230077126015586, + "grad_norm": 295.2317199707031, + "learning_rate": 3.84431677660671e-06, + "loss": 13.8095, + "step": 308410 + }, + { + "epoch": 0.6230279132342424, + "grad_norm": 685.9607543945312, + "learning_rate": 3.843977165823169e-06, + "loss": 21.6459, + "step": 308420 + }, + { + "epoch": 0.6230481138669263, + "grad_norm": 554.89794921875, + "learning_rate": 3.843637560673943e-06, + "loss": 13.4703, + "step": 308430 + }, + { + "epoch": 0.6230683144996101, + "grad_norm": 56.357337951660156, + "learning_rate": 3.843297961160686e-06, + "loss": 14.792, + "step": 308440 + }, + { + "epoch": 0.6230885151322939, + "grad_norm": 233.34291076660156, + "learning_rate": 3.842958367285056e-06, + "loss": 22.8048, + "step": 308450 + }, + { + "epoch": 0.6231087157649777, + "grad_norm": 790.2981567382812, + "learning_rate": 3.842618779048706e-06, + "loss": 31.7122, + "step": 308460 + }, + { + "epoch": 0.6231289163976615, + "grad_norm": 167.50323486328125, + "learning_rate": 3.842279196453292e-06, + "loss": 13.0795, + "step": 308470 + }, + { + "epoch": 0.6231491170303454, + "grad_norm": 189.74818420410156, + "learning_rate": 3.841939619500468e-06, + "loss": 29.6577, + "step": 308480 + }, + { + "epoch": 0.6231693176630292, + "grad_norm": 196.10964965820312, + "learning_rate": 3.841600048191891e-06, + "loss": 13.5237, + "step": 308490 + }, + { + "epoch": 0.623189518295713, + "grad_norm": 686.2725219726562, + "learning_rate": 3.841260482529215e-06, + "loss": 18.2192, + "step": 308500 + }, + { + "epoch": 0.6232097189283968, + "grad_norm": 2298.264404296875, + "learning_rate": 3.8409209225140925e-06, + "loss": 12.5312, + "step": 308510 + }, + { + "epoch": 0.6232299195610806, + "grad_norm": 201.07447814941406, + "learning_rate": 3.840581368148182e-06, + "loss": 25.0888, + "step": 308520 + }, + { + "epoch": 0.6232501201937645, + "grad_norm": 0.0, + "learning_rate": 3.840241819433139e-06, + "loss": 18.6599, + "step": 308530 + }, + { + "epoch": 0.6232703208264483, + "grad_norm": 283.84600830078125, + "learning_rate": 3.839902276370615e-06, + "loss": 18.5898, + "step": 308540 + }, + { + "epoch": 0.6232905214591321, + "grad_norm": 341.5215759277344, + "learning_rate": 3.839562738962267e-06, + "loss": 16.1746, + "step": 308550 + }, + { + "epoch": 0.6233107220918159, + "grad_norm": 183.77569580078125, + "learning_rate": 3.83922320720975e-06, + "loss": 8.8788, + "step": 308560 + }, + { + "epoch": 0.6233309227244997, + "grad_norm": 322.90435791015625, + "learning_rate": 3.838883681114718e-06, + "loss": 19.1294, + "step": 308570 + }, + { + "epoch": 0.6233511233571836, + "grad_norm": 507.79547119140625, + "learning_rate": 3.838544160678824e-06, + "loss": 10.0477, + "step": 308580 + }, + { + "epoch": 0.6233713239898674, + "grad_norm": 461.77557373046875, + "learning_rate": 3.838204645903729e-06, + "loss": 34.7119, + "step": 308590 + }, + { + "epoch": 0.6233915246225512, + "grad_norm": 84.36308288574219, + "learning_rate": 3.83786513679108e-06, + "loss": 13.4838, + "step": 308600 + }, + { + "epoch": 0.623411725255235, + "grad_norm": 264.8419494628906, + "learning_rate": 3.837525633342537e-06, + "loss": 19.1045, + "step": 308610 + }, + { + "epoch": 0.6234319258879188, + "grad_norm": 530.059814453125, + "learning_rate": 3.837186135559754e-06, + "loss": 30.9889, + "step": 308620 + }, + { + "epoch": 0.6234521265206027, + "grad_norm": 306.8770751953125, + "learning_rate": 3.836846643444383e-06, + "loss": 20.6282, + "step": 308630 + }, + { + "epoch": 0.6234723271532865, + "grad_norm": 247.51690673828125, + "learning_rate": 3.836507156998081e-06, + "loss": 28.366, + "step": 308640 + }, + { + "epoch": 0.6234925277859703, + "grad_norm": 301.9272766113281, + "learning_rate": 3.836167676222501e-06, + "loss": 22.2943, + "step": 308650 + }, + { + "epoch": 0.6235127284186541, + "grad_norm": 432.4485168457031, + "learning_rate": 3.835828201119302e-06, + "loss": 19.5995, + "step": 308660 + }, + { + "epoch": 0.6235329290513378, + "grad_norm": 429.926025390625, + "learning_rate": 3.835488731690131e-06, + "loss": 34.4836, + "step": 308670 + }, + { + "epoch": 0.6235531296840217, + "grad_norm": 627.3590698242188, + "learning_rate": 3.835149267936649e-06, + "loss": 17.1499, + "step": 308680 + }, + { + "epoch": 0.6235733303167055, + "grad_norm": 121.15222930908203, + "learning_rate": 3.834809809860508e-06, + "loss": 21.2045, + "step": 308690 + }, + { + "epoch": 0.6235935309493893, + "grad_norm": 364.7321472167969, + "learning_rate": 3.834470357463362e-06, + "loss": 15.5802, + "step": 308700 + }, + { + "epoch": 0.6236137315820731, + "grad_norm": 205.34031677246094, + "learning_rate": 3.834130910746866e-06, + "loss": 16.2231, + "step": 308710 + }, + { + "epoch": 0.6236339322147569, + "grad_norm": 555.9500732421875, + "learning_rate": 3.833791469712676e-06, + "loss": 21.4223, + "step": 308720 + }, + { + "epoch": 0.6236541328474408, + "grad_norm": 374.8460388183594, + "learning_rate": 3.833452034362444e-06, + "loss": 29.5215, + "step": 308730 + }, + { + "epoch": 0.6236743334801246, + "grad_norm": 712.3816528320312, + "learning_rate": 3.833112604697824e-06, + "loss": 18.2273, + "step": 308740 + }, + { + "epoch": 0.6236945341128084, + "grad_norm": 773.9443969726562, + "learning_rate": 3.832773180720475e-06, + "loss": 17.7833, + "step": 308750 + }, + { + "epoch": 0.6237147347454922, + "grad_norm": 219.06417846679688, + "learning_rate": 3.832433762432044e-06, + "loss": 21.787, + "step": 308760 + }, + { + "epoch": 0.623734935378176, + "grad_norm": 337.8908386230469, + "learning_rate": 3.832094349834191e-06, + "loss": 20.1779, + "step": 308770 + }, + { + "epoch": 0.6237551360108599, + "grad_norm": 743.0489501953125, + "learning_rate": 3.83175494292857e-06, + "loss": 17.6274, + "step": 308780 + }, + { + "epoch": 0.6237753366435437, + "grad_norm": 373.329833984375, + "learning_rate": 3.831415541716831e-06, + "loss": 14.9177, + "step": 308790 + }, + { + "epoch": 0.6237955372762275, + "grad_norm": 205.29086303710938, + "learning_rate": 3.831076146200633e-06, + "loss": 12.49, + "step": 308800 + }, + { + "epoch": 0.6238157379089113, + "grad_norm": 265.4069519042969, + "learning_rate": 3.830736756381626e-06, + "loss": 18.5815, + "step": 308810 + }, + { + "epoch": 0.6238359385415951, + "grad_norm": 422.3825378417969, + "learning_rate": 3.830397372261469e-06, + "loss": 15.7553, + "step": 308820 + }, + { + "epoch": 0.623856139174279, + "grad_norm": 391.0090637207031, + "learning_rate": 3.8300579938418105e-06, + "loss": 15.3095, + "step": 308830 + }, + { + "epoch": 0.6238763398069628, + "grad_norm": 315.3426513671875, + "learning_rate": 3.8297186211243085e-06, + "loss": 17.8949, + "step": 308840 + }, + { + "epoch": 0.6238965404396466, + "grad_norm": 0.0, + "learning_rate": 3.829379254110617e-06, + "loss": 28.5212, + "step": 308850 + }, + { + "epoch": 0.6239167410723304, + "grad_norm": 1928.339599609375, + "learning_rate": 3.829039892802388e-06, + "loss": 29.9941, + "step": 308860 + }, + { + "epoch": 0.6239369417050142, + "grad_norm": 159.69752502441406, + "learning_rate": 3.828700537201277e-06, + "loss": 6.9251, + "step": 308870 + }, + { + "epoch": 0.6239571423376981, + "grad_norm": 973.2032470703125, + "learning_rate": 3.828361187308938e-06, + "loss": 27.3199, + "step": 308880 + }, + { + "epoch": 0.6239773429703819, + "grad_norm": 399.58697509765625, + "learning_rate": 3.828021843127024e-06, + "loss": 21.6232, + "step": 308890 + }, + { + "epoch": 0.6239975436030657, + "grad_norm": 173.24356079101562, + "learning_rate": 3.827682504657187e-06, + "loss": 16.9302, + "step": 308900 + }, + { + "epoch": 0.6240177442357495, + "grad_norm": 319.8014221191406, + "learning_rate": 3.827343171901089e-06, + "loss": 10.0086, + "step": 308910 + }, + { + "epoch": 0.6240379448684332, + "grad_norm": 644.0597534179688, + "learning_rate": 3.827003844860373e-06, + "loss": 37.1336, + "step": 308920 + }, + { + "epoch": 0.624058145501117, + "grad_norm": 12.235321998596191, + "learning_rate": 3.8266645235367e-06, + "loss": 10.2737, + "step": 308930 + }, + { + "epoch": 0.6240783461338009, + "grad_norm": 857.0070190429688, + "learning_rate": 3.826325207931722e-06, + "loss": 33.6433, + "step": 308940 + }, + { + "epoch": 0.6240985467664847, + "grad_norm": 195.88980102539062, + "learning_rate": 3.825985898047091e-06, + "loss": 27.6858, + "step": 308950 + }, + { + "epoch": 0.6241187473991685, + "grad_norm": 135.80322265625, + "learning_rate": 3.8256465938844635e-06, + "loss": 25.9452, + "step": 308960 + }, + { + "epoch": 0.6241389480318523, + "grad_norm": 196.93142700195312, + "learning_rate": 3.8253072954454905e-06, + "loss": 16.756, + "step": 308970 + }, + { + "epoch": 0.6241591486645361, + "grad_norm": 171.48313903808594, + "learning_rate": 3.824968002731831e-06, + "loss": 23.1062, + "step": 308980 + }, + { + "epoch": 0.62417934929722, + "grad_norm": 144.01405334472656, + "learning_rate": 3.824628715745131e-06, + "loss": 19.0789, + "step": 308990 + }, + { + "epoch": 0.6241995499299038, + "grad_norm": 355.7646179199219, + "learning_rate": 3.82428943448705e-06, + "loss": 15.1852, + "step": 309000 + }, + { + "epoch": 0.6242197505625876, + "grad_norm": 198.680419921875, + "learning_rate": 3.82395015895924e-06, + "loss": 23.6251, + "step": 309010 + }, + { + "epoch": 0.6242399511952714, + "grad_norm": 581.5150756835938, + "learning_rate": 3.823610889163354e-06, + "loss": 20.0148, + "step": 309020 + }, + { + "epoch": 0.6242601518279552, + "grad_norm": 468.5245666503906, + "learning_rate": 3.823271625101045e-06, + "loss": 14.7354, + "step": 309030 + }, + { + "epoch": 0.6242803524606391, + "grad_norm": 198.03665161132812, + "learning_rate": 3.822932366773969e-06, + "loss": 14.4478, + "step": 309040 + }, + { + "epoch": 0.6243005530933229, + "grad_norm": 324.575927734375, + "learning_rate": 3.822593114183777e-06, + "loss": 16.6424, + "step": 309050 + }, + { + "epoch": 0.6243207537260067, + "grad_norm": 225.9619903564453, + "learning_rate": 3.822253867332122e-06, + "loss": 22.4816, + "step": 309060 + }, + { + "epoch": 0.6243409543586905, + "grad_norm": 278.31353759765625, + "learning_rate": 3.821914626220661e-06, + "loss": 13.9285, + "step": 309070 + }, + { + "epoch": 0.6243611549913743, + "grad_norm": 112.58853912353516, + "learning_rate": 3.8215753908510435e-06, + "loss": 19.6135, + "step": 309080 + }, + { + "epoch": 0.6243813556240582, + "grad_norm": 440.0562744140625, + "learning_rate": 3.8212361612249255e-06, + "loss": 16.0825, + "step": 309090 + }, + { + "epoch": 0.624401556256742, + "grad_norm": 377.1228942871094, + "learning_rate": 3.820896937343959e-06, + "loss": 30.0899, + "step": 309100 + }, + { + "epoch": 0.6244217568894258, + "grad_norm": 407.56207275390625, + "learning_rate": 3.820557719209799e-06, + "loss": 14.0827, + "step": 309110 + }, + { + "epoch": 0.6244419575221096, + "grad_norm": 263.2430114746094, + "learning_rate": 3.820218506824096e-06, + "loss": 6.6251, + "step": 309120 + }, + { + "epoch": 0.6244621581547934, + "grad_norm": 319.5250549316406, + "learning_rate": 3.819879300188505e-06, + "loss": 19.9556, + "step": 309130 + }, + { + "epoch": 0.6244823587874773, + "grad_norm": 184.48289489746094, + "learning_rate": 3.8195400993046815e-06, + "loss": 16.9671, + "step": 309140 + }, + { + "epoch": 0.6245025594201611, + "grad_norm": 173.50814819335938, + "learning_rate": 3.819200904174274e-06, + "loss": 15.9102, + "step": 309150 + }, + { + "epoch": 0.6245227600528449, + "grad_norm": 686.6409912109375, + "learning_rate": 3.818861714798939e-06, + "loss": 37.288, + "step": 309160 + }, + { + "epoch": 0.6245429606855287, + "grad_norm": 157.87832641601562, + "learning_rate": 3.8185225311803295e-06, + "loss": 10.6197, + "step": 309170 + }, + { + "epoch": 0.6245631613182124, + "grad_norm": 201.24237060546875, + "learning_rate": 3.8181833533200965e-06, + "loss": 9.6761, + "step": 309180 + }, + { + "epoch": 0.6245833619508963, + "grad_norm": 208.93910217285156, + "learning_rate": 3.817844181219893e-06, + "loss": 42.3918, + "step": 309190 + }, + { + "epoch": 0.6246035625835801, + "grad_norm": 808.4969482421875, + "learning_rate": 3.817505014881378e-06, + "loss": 17.2931, + "step": 309200 + }, + { + "epoch": 0.6246237632162639, + "grad_norm": 677.9012451171875, + "learning_rate": 3.817165854306197e-06, + "loss": 10.4501, + "step": 309210 + }, + { + "epoch": 0.6246439638489477, + "grad_norm": 765.8230590820312, + "learning_rate": 3.816826699496006e-06, + "loss": 26.1429, + "step": 309220 + }, + { + "epoch": 0.6246641644816315, + "grad_norm": 210.09230041503906, + "learning_rate": 3.81648755045246e-06, + "loss": 18.1209, + "step": 309230 + }, + { + "epoch": 0.6246843651143154, + "grad_norm": 301.4053649902344, + "learning_rate": 3.816148407177209e-06, + "loss": 17.2816, + "step": 309240 + }, + { + "epoch": 0.6247045657469992, + "grad_norm": 409.62109375, + "learning_rate": 3.815809269671908e-06, + "loss": 14.4398, + "step": 309250 + }, + { + "epoch": 0.624724766379683, + "grad_norm": 247.8682098388672, + "learning_rate": 3.8154701379382064e-06, + "loss": 21.5997, + "step": 309260 + }, + { + "epoch": 0.6247449670123668, + "grad_norm": 464.99627685546875, + "learning_rate": 3.815131011977763e-06, + "loss": 20.3094, + "step": 309270 + }, + { + "epoch": 0.6247651676450506, + "grad_norm": 595.7064819335938, + "learning_rate": 3.814791891792225e-06, + "loss": 24.8131, + "step": 309280 + }, + { + "epoch": 0.6247853682777345, + "grad_norm": 353.93017578125, + "learning_rate": 3.814452777383248e-06, + "loss": 17.6499, + "step": 309290 + }, + { + "epoch": 0.6248055689104183, + "grad_norm": 65.7148666381836, + "learning_rate": 3.814113668752486e-06, + "loss": 18.4265, + "step": 309300 + }, + { + "epoch": 0.6248257695431021, + "grad_norm": 394.8728942871094, + "learning_rate": 3.8137745659015884e-06, + "loss": 18.7722, + "step": 309310 + }, + { + "epoch": 0.6248459701757859, + "grad_norm": 6.185215473175049, + "learning_rate": 3.81343546883221e-06, + "loss": 16.4115, + "step": 309320 + }, + { + "epoch": 0.6248661708084697, + "grad_norm": 838.2324829101562, + "learning_rate": 3.8130963775460045e-06, + "loss": 22.9985, + "step": 309330 + }, + { + "epoch": 0.6248863714411536, + "grad_norm": 232.22332763671875, + "learning_rate": 3.812757292044622e-06, + "loss": 13.2857, + "step": 309340 + }, + { + "epoch": 0.6249065720738374, + "grad_norm": 90.10667419433594, + "learning_rate": 3.8124182123297153e-06, + "loss": 17.6612, + "step": 309350 + }, + { + "epoch": 0.6249267727065212, + "grad_norm": 521.38037109375, + "learning_rate": 3.8120791384029414e-06, + "loss": 18.6967, + "step": 309360 + }, + { + "epoch": 0.624946973339205, + "grad_norm": 755.168701171875, + "learning_rate": 3.811740070265947e-06, + "loss": 15.8787, + "step": 309370 + }, + { + "epoch": 0.6249671739718888, + "grad_norm": 355.4938659667969, + "learning_rate": 3.8114010079203877e-06, + "loss": 24.3521, + "step": 309380 + }, + { + "epoch": 0.6249873746045727, + "grad_norm": 1087.0955810546875, + "learning_rate": 3.8110619513679176e-06, + "loss": 15.2451, + "step": 309390 + }, + { + "epoch": 0.6250075752372565, + "grad_norm": 869.311767578125, + "learning_rate": 3.810722900610186e-06, + "loss": 25.5715, + "step": 309400 + }, + { + "epoch": 0.6250277758699403, + "grad_norm": 234.51182556152344, + "learning_rate": 3.8103838556488467e-06, + "loss": 18.2496, + "step": 309410 + }, + { + "epoch": 0.6250479765026241, + "grad_norm": 622.8326416015625, + "learning_rate": 3.810044816485551e-06, + "loss": 28.3993, + "step": 309420 + }, + { + "epoch": 0.6250681771353078, + "grad_norm": 56.3055419921875, + "learning_rate": 3.809705783121956e-06, + "loss": 25.9545, + "step": 309430 + }, + { + "epoch": 0.6250883777679916, + "grad_norm": 138.11941528320312, + "learning_rate": 3.8093667555597068e-06, + "loss": 20.2521, + "step": 309440 + }, + { + "epoch": 0.6251085784006755, + "grad_norm": 470.74615478515625, + "learning_rate": 3.809027733800461e-06, + "loss": 13.6337, + "step": 309450 + }, + { + "epoch": 0.6251287790333593, + "grad_norm": 838.5234985351562, + "learning_rate": 3.808688717845871e-06, + "loss": 19.7129, + "step": 309460 + }, + { + "epoch": 0.6251489796660431, + "grad_norm": 184.8392791748047, + "learning_rate": 3.8083497076975863e-06, + "loss": 16.6563, + "step": 309470 + }, + { + "epoch": 0.6251691802987269, + "grad_norm": 320.599365234375, + "learning_rate": 3.808010703357261e-06, + "loss": 24.2884, + "step": 309480 + }, + { + "epoch": 0.6251893809314107, + "grad_norm": 146.20480346679688, + "learning_rate": 3.8076717048265477e-06, + "loss": 14.4137, + "step": 309490 + }, + { + "epoch": 0.6252095815640946, + "grad_norm": 42.37262725830078, + "learning_rate": 3.8073327121070968e-06, + "loss": 31.002, + "step": 309500 + }, + { + "epoch": 0.6252297821967784, + "grad_norm": 226.96316528320312, + "learning_rate": 3.8069937252005606e-06, + "loss": 21.3917, + "step": 309510 + }, + { + "epoch": 0.6252499828294622, + "grad_norm": 541.5800170898438, + "learning_rate": 3.8066547441085956e-06, + "loss": 30.4584, + "step": 309520 + }, + { + "epoch": 0.625270183462146, + "grad_norm": 1380.770751953125, + "learning_rate": 3.806315768832847e-06, + "loss": 44.5262, + "step": 309530 + }, + { + "epoch": 0.6252903840948298, + "grad_norm": 390.832275390625, + "learning_rate": 3.805976799374972e-06, + "loss": 43.467, + "step": 309540 + }, + { + "epoch": 0.6253105847275137, + "grad_norm": 647.9263305664062, + "learning_rate": 3.8056378357366224e-06, + "loss": 20.2768, + "step": 309550 + }, + { + "epoch": 0.6253307853601975, + "grad_norm": 167.531005859375, + "learning_rate": 3.8052988779194478e-06, + "loss": 7.7091, + "step": 309560 + }, + { + "epoch": 0.6253509859928813, + "grad_norm": 307.0677490234375, + "learning_rate": 3.804959925925102e-06, + "loss": 16.0153, + "step": 309570 + }, + { + "epoch": 0.6253711866255651, + "grad_norm": 326.3149719238281, + "learning_rate": 3.8046209797552353e-06, + "loss": 10.6156, + "step": 309580 + }, + { + "epoch": 0.6253913872582489, + "grad_norm": 71.02802276611328, + "learning_rate": 3.804282039411504e-06, + "loss": 25.8399, + "step": 309590 + }, + { + "epoch": 0.6254115878909328, + "grad_norm": 35.13526153564453, + "learning_rate": 3.8039431048955537e-06, + "loss": 24.6193, + "step": 309600 + }, + { + "epoch": 0.6254317885236166, + "grad_norm": 987.6118774414062, + "learning_rate": 3.8036041762090416e-06, + "loss": 12.8252, + "step": 309610 + }, + { + "epoch": 0.6254519891563004, + "grad_norm": 401.3702087402344, + "learning_rate": 3.8032652533536173e-06, + "loss": 21.9011, + "step": 309620 + }, + { + "epoch": 0.6254721897889842, + "grad_norm": 359.6427917480469, + "learning_rate": 3.802926336330933e-06, + "loss": 22.5889, + "step": 309630 + }, + { + "epoch": 0.625492390421668, + "grad_norm": 8.788739204406738, + "learning_rate": 3.80258742514264e-06, + "loss": 9.3963, + "step": 309640 + }, + { + "epoch": 0.6255125910543519, + "grad_norm": 454.0259094238281, + "learning_rate": 3.8022485197903924e-06, + "loss": 15.8655, + "step": 309650 + }, + { + "epoch": 0.6255327916870357, + "grad_norm": 294.662109375, + "learning_rate": 3.801909620275839e-06, + "loss": 26.1775, + "step": 309660 + }, + { + "epoch": 0.6255529923197195, + "grad_norm": 452.68206787109375, + "learning_rate": 3.8015707266006307e-06, + "loss": 32.2405, + "step": 309670 + }, + { + "epoch": 0.6255731929524033, + "grad_norm": 565.408203125, + "learning_rate": 3.801231838766425e-06, + "loss": 21.2661, + "step": 309680 + }, + { + "epoch": 0.625593393585087, + "grad_norm": 261.2580871582031, + "learning_rate": 3.8008929567748676e-06, + "loss": 23.6187, + "step": 309690 + }, + { + "epoch": 0.6256135942177709, + "grad_norm": 815.6544799804688, + "learning_rate": 3.8005540806276132e-06, + "loss": 24.3779, + "step": 309700 + }, + { + "epoch": 0.6256337948504547, + "grad_norm": 316.2920227050781, + "learning_rate": 3.800215210326312e-06, + "loss": 25.2519, + "step": 309710 + }, + { + "epoch": 0.6256539954831385, + "grad_norm": 341.334716796875, + "learning_rate": 3.7998763458726183e-06, + "loss": 20.4134, + "step": 309720 + }, + { + "epoch": 0.6256741961158223, + "grad_norm": 85.28587341308594, + "learning_rate": 3.79953748726818e-06, + "loss": 21.7302, + "step": 309730 + }, + { + "epoch": 0.6256943967485061, + "grad_norm": 273.3590393066406, + "learning_rate": 3.7991986345146503e-06, + "loss": 22.2922, + "step": 309740 + }, + { + "epoch": 0.62571459738119, + "grad_norm": 473.0509338378906, + "learning_rate": 3.798859787613682e-06, + "loss": 24.1152, + "step": 309750 + }, + { + "epoch": 0.6257347980138738, + "grad_norm": 535.754150390625, + "learning_rate": 3.7985209465669248e-06, + "loss": 28.4428, + "step": 309760 + }, + { + "epoch": 0.6257549986465576, + "grad_norm": 671.6353759765625, + "learning_rate": 3.7981821113760305e-06, + "loss": 15.1593, + "step": 309770 + }, + { + "epoch": 0.6257751992792414, + "grad_norm": 219.89952087402344, + "learning_rate": 3.797843282042652e-06, + "loss": 11.1584, + "step": 309780 + }, + { + "epoch": 0.6257953999119252, + "grad_norm": 437.75531005859375, + "learning_rate": 3.7975044585684382e-06, + "loss": 10.5002, + "step": 309790 + }, + { + "epoch": 0.625815600544609, + "grad_norm": 177.61224365234375, + "learning_rate": 3.797165640955041e-06, + "loss": 23.1008, + "step": 309800 + }, + { + "epoch": 0.6258358011772929, + "grad_norm": 311.1289978027344, + "learning_rate": 3.796826829204116e-06, + "loss": 22.5967, + "step": 309810 + }, + { + "epoch": 0.6258560018099767, + "grad_norm": 255.26515197753906, + "learning_rate": 3.796488023317308e-06, + "loss": 24.2938, + "step": 309820 + }, + { + "epoch": 0.6258762024426605, + "grad_norm": 243.3842315673828, + "learning_rate": 3.796149223296272e-06, + "loss": 11.8302, + "step": 309830 + }, + { + "epoch": 0.6258964030753443, + "grad_norm": 488.80859375, + "learning_rate": 3.79581042914266e-06, + "loss": 32.4117, + "step": 309840 + }, + { + "epoch": 0.6259166037080282, + "grad_norm": 558.3318481445312, + "learning_rate": 3.7954716408581206e-06, + "loss": 21.5674, + "step": 309850 + }, + { + "epoch": 0.625936804340712, + "grad_norm": 383.96905517578125, + "learning_rate": 3.7951328584443063e-06, + "loss": 16.1835, + "step": 309860 + }, + { + "epoch": 0.6259570049733958, + "grad_norm": 351.1549377441406, + "learning_rate": 3.7947940819028678e-06, + "loss": 16.7501, + "step": 309870 + }, + { + "epoch": 0.6259772056060796, + "grad_norm": 608.1884155273438, + "learning_rate": 3.79445531123546e-06, + "loss": 18.2358, + "step": 309880 + }, + { + "epoch": 0.6259974062387634, + "grad_norm": 593.22021484375, + "learning_rate": 3.794116546443727e-06, + "loss": 21.3049, + "step": 309890 + }, + { + "epoch": 0.6260176068714473, + "grad_norm": 402.9985046386719, + "learning_rate": 3.793777787529325e-06, + "loss": 18.023, + "step": 309900 + }, + { + "epoch": 0.6260378075041311, + "grad_norm": 287.7555236816406, + "learning_rate": 3.793439034493905e-06, + "loss": 28.1099, + "step": 309910 + }, + { + "epoch": 0.6260580081368149, + "grad_norm": 412.591796875, + "learning_rate": 3.7931002873391156e-06, + "loss": 16.8485, + "step": 309920 + }, + { + "epoch": 0.6260782087694987, + "grad_norm": 298.6705322265625, + "learning_rate": 3.792761546066609e-06, + "loss": 10.4336, + "step": 309930 + }, + { + "epoch": 0.6260984094021824, + "grad_norm": 157.7567901611328, + "learning_rate": 3.792422810678037e-06, + "loss": 9.5392, + "step": 309940 + }, + { + "epoch": 0.6261186100348662, + "grad_norm": 10.429789543151855, + "learning_rate": 3.7920840811750485e-06, + "loss": 10.1944, + "step": 309950 + }, + { + "epoch": 0.6261388106675501, + "grad_norm": 173.67898559570312, + "learning_rate": 3.7917453575592956e-06, + "loss": 8.241, + "step": 309960 + }, + { + "epoch": 0.6261590113002339, + "grad_norm": 150.3766326904297, + "learning_rate": 3.7914066398324317e-06, + "loss": 15.8893, + "step": 309970 + }, + { + "epoch": 0.6261792119329177, + "grad_norm": 94.3939208984375, + "learning_rate": 3.7910679279961025e-06, + "loss": 18.6726, + "step": 309980 + }, + { + "epoch": 0.6261994125656015, + "grad_norm": 316.7868347167969, + "learning_rate": 3.790729222051962e-06, + "loss": 19.7568, + "step": 309990 + }, + { + "epoch": 0.6262196131982853, + "grad_norm": 248.9897003173828, + "learning_rate": 3.790390522001662e-06, + "loss": 27.3872, + "step": 310000 + }, + { + "epoch": 0.6262398138309692, + "grad_norm": 206.1515350341797, + "learning_rate": 3.790051827846851e-06, + "loss": 24.9463, + "step": 310010 + }, + { + "epoch": 0.626260014463653, + "grad_norm": 340.69610595703125, + "learning_rate": 3.789713139589181e-06, + "loss": 27.058, + "step": 310020 + }, + { + "epoch": 0.6262802150963368, + "grad_norm": 232.6924591064453, + "learning_rate": 3.789374457230301e-06, + "loss": 15.3988, + "step": 310030 + }, + { + "epoch": 0.6263004157290206, + "grad_norm": 541.9817504882812, + "learning_rate": 3.789035780771866e-06, + "loss": 36.8641, + "step": 310040 + }, + { + "epoch": 0.6263206163617044, + "grad_norm": 483.5481262207031, + "learning_rate": 3.7886971102155205e-06, + "loss": 48.6675, + "step": 310050 + }, + { + "epoch": 0.6263408169943883, + "grad_norm": 46.497520446777344, + "learning_rate": 3.78835844556292e-06, + "loss": 15.9449, + "step": 310060 + }, + { + "epoch": 0.6263610176270721, + "grad_norm": 92.47747039794922, + "learning_rate": 3.7880197868157143e-06, + "loss": 15.4105, + "step": 310070 + }, + { + "epoch": 0.6263812182597559, + "grad_norm": 259.61871337890625, + "learning_rate": 3.7876811339755522e-06, + "loss": 17.3801, + "step": 310080 + }, + { + "epoch": 0.6264014188924397, + "grad_norm": 244.34396362304688, + "learning_rate": 3.7873424870440845e-06, + "loss": 21.6106, + "step": 310090 + }, + { + "epoch": 0.6264216195251235, + "grad_norm": 634.0042114257812, + "learning_rate": 3.787003846022964e-06, + "loss": 32.435, + "step": 310100 + }, + { + "epoch": 0.6264418201578074, + "grad_norm": 480.94622802734375, + "learning_rate": 3.786665210913839e-06, + "loss": 15.1496, + "step": 310110 + }, + { + "epoch": 0.6264620207904912, + "grad_norm": 491.8776550292969, + "learning_rate": 3.786326581718359e-06, + "loss": 15.3441, + "step": 310120 + }, + { + "epoch": 0.626482221423175, + "grad_norm": 540.437255859375, + "learning_rate": 3.785987958438179e-06, + "loss": 16.8419, + "step": 310130 + }, + { + "epoch": 0.6265024220558588, + "grad_norm": 169.4524383544922, + "learning_rate": 3.785649341074944e-06, + "loss": 17.0484, + "step": 310140 + }, + { + "epoch": 0.6265226226885426, + "grad_norm": 538.591064453125, + "learning_rate": 3.785310729630307e-06, + "loss": 20.2068, + "step": 310150 + }, + { + "epoch": 0.6265428233212265, + "grad_norm": 571.2584838867188, + "learning_rate": 3.784972124105919e-06, + "loss": 18.1828, + "step": 310160 + }, + { + "epoch": 0.6265630239539103, + "grad_norm": 177.67758178710938, + "learning_rate": 3.7846335245034304e-06, + "loss": 23.9734, + "step": 310170 + }, + { + "epoch": 0.6265832245865941, + "grad_norm": 356.0690002441406, + "learning_rate": 3.784294930824489e-06, + "loss": 16.6091, + "step": 310180 + }, + { + "epoch": 0.6266034252192779, + "grad_norm": 701.2662353515625, + "learning_rate": 3.783956343070746e-06, + "loss": 22.5459, + "step": 310190 + }, + { + "epoch": 0.6266236258519616, + "grad_norm": 488.3169250488281, + "learning_rate": 3.7836177612438557e-06, + "loss": 17.9133, + "step": 310200 + }, + { + "epoch": 0.6266438264846454, + "grad_norm": 568.4744262695312, + "learning_rate": 3.7832791853454616e-06, + "loss": 34.4656, + "step": 310210 + }, + { + "epoch": 0.6266640271173293, + "grad_norm": 627.6993408203125, + "learning_rate": 3.782940615377218e-06, + "loss": 17.7456, + "step": 310220 + }, + { + "epoch": 0.6266842277500131, + "grad_norm": 429.12939453125, + "learning_rate": 3.7826020513407753e-06, + "loss": 18.8567, + "step": 310230 + }, + { + "epoch": 0.6267044283826969, + "grad_norm": 638.3252563476562, + "learning_rate": 3.7822634932377814e-06, + "loss": 27.4842, + "step": 310240 + }, + { + "epoch": 0.6267246290153807, + "grad_norm": 44.70691680908203, + "learning_rate": 3.7819249410698877e-06, + "loss": 25.7978, + "step": 310250 + }, + { + "epoch": 0.6267448296480645, + "grad_norm": 253.63330078125, + "learning_rate": 3.7815863948387455e-06, + "loss": 12.2805, + "step": 310260 + }, + { + "epoch": 0.6267650302807484, + "grad_norm": 644.2283325195312, + "learning_rate": 3.7812478545460017e-06, + "loss": 24.6817, + "step": 310270 + }, + { + "epoch": 0.6267852309134322, + "grad_norm": 488.9724426269531, + "learning_rate": 3.7809093201933078e-06, + "loss": 33.4482, + "step": 310280 + }, + { + "epoch": 0.626805431546116, + "grad_norm": 310.28338623046875, + "learning_rate": 3.7805707917823165e-06, + "loss": 16.1545, + "step": 310290 + }, + { + "epoch": 0.6268256321787998, + "grad_norm": 294.4074401855469, + "learning_rate": 3.7802322693146726e-06, + "loss": 28.3809, + "step": 310300 + }, + { + "epoch": 0.6268458328114836, + "grad_norm": 351.2818603515625, + "learning_rate": 3.7798937527920294e-06, + "loss": 20.5131, + "step": 310310 + }, + { + "epoch": 0.6268660334441675, + "grad_norm": 283.7843322753906, + "learning_rate": 3.7795552422160364e-06, + "loss": 20.6313, + "step": 310320 + }, + { + "epoch": 0.6268862340768513, + "grad_norm": 283.344970703125, + "learning_rate": 3.779216737588344e-06, + "loss": 24.1549, + "step": 310330 + }, + { + "epoch": 0.6269064347095351, + "grad_norm": 543.825927734375, + "learning_rate": 3.7788782389105994e-06, + "loss": 13.9601, + "step": 310340 + }, + { + "epoch": 0.6269266353422189, + "grad_norm": 472.18084716796875, + "learning_rate": 3.778539746184454e-06, + "loss": 42.524, + "step": 310350 + }, + { + "epoch": 0.6269468359749027, + "grad_norm": 314.9637756347656, + "learning_rate": 3.77820125941156e-06, + "loss": 37.5033, + "step": 310360 + }, + { + "epoch": 0.6269670366075866, + "grad_norm": 219.22573852539062, + "learning_rate": 3.7778627785935627e-06, + "loss": 25.2901, + "step": 310370 + }, + { + "epoch": 0.6269872372402704, + "grad_norm": 186.105224609375, + "learning_rate": 3.777524303732115e-06, + "loss": 16.8399, + "step": 310380 + }, + { + "epoch": 0.6270074378729542, + "grad_norm": 413.4272155761719, + "learning_rate": 3.777185834828866e-06, + "loss": 24.5102, + "step": 310390 + }, + { + "epoch": 0.627027638505638, + "grad_norm": 345.4469299316406, + "learning_rate": 3.776847371885464e-06, + "loss": 19.8398, + "step": 310400 + }, + { + "epoch": 0.6270478391383218, + "grad_norm": 323.447509765625, + "learning_rate": 3.77650891490356e-06, + "loss": 15.6293, + "step": 310410 + }, + { + "epoch": 0.6270680397710057, + "grad_norm": 263.461181640625, + "learning_rate": 3.776170463884804e-06, + "loss": 13.2407, + "step": 310420 + }, + { + "epoch": 0.6270882404036895, + "grad_norm": 169.77740478515625, + "learning_rate": 3.775832018830843e-06, + "loss": 22.9864, + "step": 310430 + }, + { + "epoch": 0.6271084410363733, + "grad_norm": 720.8308715820312, + "learning_rate": 3.7754935797433284e-06, + "loss": 23.985, + "step": 310440 + }, + { + "epoch": 0.6271286416690571, + "grad_norm": 538.3779296875, + "learning_rate": 3.7751551466239113e-06, + "loss": 9.9866, + "step": 310450 + }, + { + "epoch": 0.6271488423017408, + "grad_norm": 314.5245361328125, + "learning_rate": 3.774816719474238e-06, + "loss": 16.5886, + "step": 310460 + }, + { + "epoch": 0.6271690429344247, + "grad_norm": 285.7383117675781, + "learning_rate": 3.77447829829596e-06, + "loss": 25.6008, + "step": 310470 + }, + { + "epoch": 0.6271892435671085, + "grad_norm": 823.970947265625, + "learning_rate": 3.7741398830907256e-06, + "loss": 24.0385, + "step": 310480 + }, + { + "epoch": 0.6272094441997923, + "grad_norm": 648.63427734375, + "learning_rate": 3.7738014738601856e-06, + "loss": 21.385, + "step": 310490 + }, + { + "epoch": 0.6272296448324761, + "grad_norm": 459.6174011230469, + "learning_rate": 3.7734630706059873e-06, + "loss": 19.2144, + "step": 310500 + }, + { + "epoch": 0.6272498454651599, + "grad_norm": 404.1106872558594, + "learning_rate": 3.7731246733297816e-06, + "loss": 30.6253, + "step": 310510 + }, + { + "epoch": 0.6272700460978438, + "grad_norm": 747.3106689453125, + "learning_rate": 3.772786282033218e-06, + "loss": 30.2892, + "step": 310520 + }, + { + "epoch": 0.6272902467305276, + "grad_norm": 395.8547058105469, + "learning_rate": 3.7724478967179457e-06, + "loss": 22.2229, + "step": 310530 + }, + { + "epoch": 0.6273104473632114, + "grad_norm": 424.5818176269531, + "learning_rate": 3.7721095173856126e-06, + "loss": 23.7399, + "step": 310540 + }, + { + "epoch": 0.6273306479958952, + "grad_norm": 326.8860778808594, + "learning_rate": 3.7717711440378695e-06, + "loss": 19.5756, + "step": 310550 + }, + { + "epoch": 0.627350848628579, + "grad_norm": 653.8480224609375, + "learning_rate": 3.771432776676364e-06, + "loss": 23.9726, + "step": 310560 + }, + { + "epoch": 0.6273710492612629, + "grad_norm": 372.75592041015625, + "learning_rate": 3.771094415302745e-06, + "loss": 16.9206, + "step": 310570 + }, + { + "epoch": 0.6273912498939467, + "grad_norm": 274.6285705566406, + "learning_rate": 3.7707560599186664e-06, + "loss": 31.2049, + "step": 310580 + }, + { + "epoch": 0.6274114505266305, + "grad_norm": 58.87826156616211, + "learning_rate": 3.7704177105257707e-06, + "loss": 34.7307, + "step": 310590 + }, + { + "epoch": 0.6274316511593143, + "grad_norm": 473.7864990234375, + "learning_rate": 3.77007936712571e-06, + "loss": 32.2064, + "step": 310600 + }, + { + "epoch": 0.6274518517919981, + "grad_norm": 375.2981872558594, + "learning_rate": 3.769741029720134e-06, + "loss": 26.7564, + "step": 310610 + }, + { + "epoch": 0.627472052424682, + "grad_norm": 242.23406982421875, + "learning_rate": 3.769402698310692e-06, + "loss": 11.6546, + "step": 310620 + }, + { + "epoch": 0.6274922530573658, + "grad_norm": 734.3656005859375, + "learning_rate": 3.7690643728990306e-06, + "loss": 18.6892, + "step": 310630 + }, + { + "epoch": 0.6275124536900496, + "grad_norm": 1153.2177734375, + "learning_rate": 3.7687260534868e-06, + "loss": 27.5929, + "step": 310640 + }, + { + "epoch": 0.6275326543227334, + "grad_norm": 521.0066528320312, + "learning_rate": 3.7683877400756513e-06, + "loss": 13.9517, + "step": 310650 + }, + { + "epoch": 0.6275528549554172, + "grad_norm": 916.9699096679688, + "learning_rate": 3.768049432667229e-06, + "loss": 10.7277, + "step": 310660 + }, + { + "epoch": 0.6275730555881011, + "grad_norm": 442.46075439453125, + "learning_rate": 3.7677111312631848e-06, + "loss": 12.1446, + "step": 310670 + }, + { + "epoch": 0.6275932562207849, + "grad_norm": 144.5475311279297, + "learning_rate": 3.7673728358651683e-06, + "loss": 25.2282, + "step": 310680 + }, + { + "epoch": 0.6276134568534687, + "grad_norm": 227.0414581298828, + "learning_rate": 3.7670345464748266e-06, + "loss": 26.1985, + "step": 310690 + }, + { + "epoch": 0.6276336574861525, + "grad_norm": 123.23226928710938, + "learning_rate": 3.7666962630938084e-06, + "loss": 9.3128, + "step": 310700 + }, + { + "epoch": 0.6276538581188362, + "grad_norm": 381.5736999511719, + "learning_rate": 3.7663579857237642e-06, + "loss": 11.2019, + "step": 310710 + }, + { + "epoch": 0.62767405875152, + "grad_norm": 282.8387451171875, + "learning_rate": 3.7660197143663407e-06, + "loss": 23.1677, + "step": 310720 + }, + { + "epoch": 0.6276942593842039, + "grad_norm": 281.12042236328125, + "learning_rate": 3.7656814490231864e-06, + "loss": 30.3826, + "step": 310730 + }, + { + "epoch": 0.6277144600168877, + "grad_norm": 271.38616943359375, + "learning_rate": 3.765343189695954e-06, + "loss": 34.6207, + "step": 310740 + }, + { + "epoch": 0.6277346606495715, + "grad_norm": 295.36065673828125, + "learning_rate": 3.765004936386286e-06, + "loss": 12.8942, + "step": 310750 + }, + { + "epoch": 0.6277548612822553, + "grad_norm": 93.36185455322266, + "learning_rate": 3.764666689095835e-06, + "loss": 19.0268, + "step": 310760 + }, + { + "epoch": 0.6277750619149391, + "grad_norm": 547.2508544921875, + "learning_rate": 3.7643284478262494e-06, + "loss": 20.2266, + "step": 310770 + }, + { + "epoch": 0.627795262547623, + "grad_norm": 323.8477783203125, + "learning_rate": 3.7639902125791774e-06, + "loss": 25.2107, + "step": 310780 + }, + { + "epoch": 0.6278154631803068, + "grad_norm": 288.24127197265625, + "learning_rate": 3.7636519833562668e-06, + "loss": 28.4912, + "step": 310790 + }, + { + "epoch": 0.6278356638129906, + "grad_norm": 0.0, + "learning_rate": 3.7633137601591647e-06, + "loss": 36.0997, + "step": 310800 + }, + { + "epoch": 0.6278558644456744, + "grad_norm": 452.8955383300781, + "learning_rate": 3.762975542989525e-06, + "loss": 21.4517, + "step": 310810 + }, + { + "epoch": 0.6278760650783582, + "grad_norm": 303.65679931640625, + "learning_rate": 3.762637331848989e-06, + "loss": 20.1704, + "step": 310820 + }, + { + "epoch": 0.6278962657110421, + "grad_norm": 466.28228759765625, + "learning_rate": 3.76229912673921e-06, + "loss": 30.9657, + "step": 310830 + }, + { + "epoch": 0.6279164663437259, + "grad_norm": 189.0724639892578, + "learning_rate": 3.761960927661836e-06, + "loss": 8.2174, + "step": 310840 + }, + { + "epoch": 0.6279366669764097, + "grad_norm": 487.91607666015625, + "learning_rate": 3.761622734618513e-06, + "loss": 11.4559, + "step": 310850 + }, + { + "epoch": 0.6279568676090935, + "grad_norm": 331.7529296875, + "learning_rate": 3.7612845476108906e-06, + "loss": 13.7425, + "step": 310860 + }, + { + "epoch": 0.6279770682417773, + "grad_norm": 489.21881103515625, + "learning_rate": 3.7609463666406175e-06, + "loss": 13.6078, + "step": 310870 + }, + { + "epoch": 0.6279972688744612, + "grad_norm": 186.98060607910156, + "learning_rate": 3.7606081917093416e-06, + "loss": 22.1829, + "step": 310880 + }, + { + "epoch": 0.628017469507145, + "grad_norm": 330.9054870605469, + "learning_rate": 3.7602700228187096e-06, + "loss": 16.8957, + "step": 310890 + }, + { + "epoch": 0.6280376701398288, + "grad_norm": 496.9715576171875, + "learning_rate": 3.759931859970374e-06, + "loss": 16.041, + "step": 310900 + }, + { + "epoch": 0.6280578707725126, + "grad_norm": 460.6424255371094, + "learning_rate": 3.7595937031659775e-06, + "loss": 12.1437, + "step": 310910 + }, + { + "epoch": 0.6280780714051964, + "grad_norm": 303.2657470703125, + "learning_rate": 3.7592555524071716e-06, + "loss": 8.7752, + "step": 310920 + }, + { + "epoch": 0.6280982720378803, + "grad_norm": 712.6080932617188, + "learning_rate": 3.7589174076956036e-06, + "loss": 27.3341, + "step": 310930 + }, + { + "epoch": 0.6281184726705641, + "grad_norm": 289.4288635253906, + "learning_rate": 3.7585792690329224e-06, + "loss": 18.1774, + "step": 310940 + }, + { + "epoch": 0.6281386733032479, + "grad_norm": 559.0425415039062, + "learning_rate": 3.758241136420775e-06, + "loss": 23.819, + "step": 310950 + }, + { + "epoch": 0.6281588739359317, + "grad_norm": 271.9239196777344, + "learning_rate": 3.7579030098608077e-06, + "loss": 12.9474, + "step": 310960 + }, + { + "epoch": 0.6281790745686154, + "grad_norm": 141.35003662109375, + "learning_rate": 3.7575648893546745e-06, + "loss": 13.6415, + "step": 310970 + }, + { + "epoch": 0.6281992752012993, + "grad_norm": 308.360595703125, + "learning_rate": 3.757226774904016e-06, + "loss": 20.1192, + "step": 310980 + }, + { + "epoch": 0.6282194758339831, + "grad_norm": 738.3095092773438, + "learning_rate": 3.7568886665104836e-06, + "loss": 16.2548, + "step": 310990 + }, + { + "epoch": 0.6282396764666669, + "grad_norm": 2.4224720001220703, + "learning_rate": 3.756550564175727e-06, + "loss": 14.6956, + "step": 311000 + }, + { + "epoch": 0.6282598770993507, + "grad_norm": 794.7053833007812, + "learning_rate": 3.756212467901391e-06, + "loss": 14.715, + "step": 311010 + }, + { + "epoch": 0.6282800777320345, + "grad_norm": 13.162860870361328, + "learning_rate": 3.755874377689125e-06, + "loss": 11.476, + "step": 311020 + }, + { + "epoch": 0.6283002783647184, + "grad_norm": 348.586669921875, + "learning_rate": 3.7555362935405766e-06, + "loss": 11.4744, + "step": 311030 + }, + { + "epoch": 0.6283204789974022, + "grad_norm": 177.64598083496094, + "learning_rate": 3.7551982154573928e-06, + "loss": 23.1496, + "step": 311040 + }, + { + "epoch": 0.628340679630086, + "grad_norm": 467.7887268066406, + "learning_rate": 3.75486014344122e-06, + "loss": 21.1767, + "step": 311050 + }, + { + "epoch": 0.6283608802627698, + "grad_norm": 406.47515869140625, + "learning_rate": 3.7545220774937115e-06, + "loss": 39.5656, + "step": 311060 + }, + { + "epoch": 0.6283810808954536, + "grad_norm": 168.41502380371094, + "learning_rate": 3.754184017616509e-06, + "loss": 23.2303, + "step": 311070 + }, + { + "epoch": 0.6284012815281375, + "grad_norm": 72.38956451416016, + "learning_rate": 3.7538459638112635e-06, + "loss": 26.4261, + "step": 311080 + }, + { + "epoch": 0.6284214821608213, + "grad_norm": 300.6415710449219, + "learning_rate": 3.7535079160796207e-06, + "loss": 19.0704, + "step": 311090 + }, + { + "epoch": 0.6284416827935051, + "grad_norm": 391.12310791015625, + "learning_rate": 3.7531698744232307e-06, + "loss": 21.1738, + "step": 311100 + }, + { + "epoch": 0.6284618834261889, + "grad_norm": 82.55567169189453, + "learning_rate": 3.7528318388437375e-06, + "loss": 38.9412, + "step": 311110 + }, + { + "epoch": 0.6284820840588727, + "grad_norm": 283.8590393066406, + "learning_rate": 3.752493809342791e-06, + "loss": 20.1902, + "step": 311120 + }, + { + "epoch": 0.6285022846915566, + "grad_norm": 518.7247314453125, + "learning_rate": 3.7521557859220405e-06, + "loss": 20.6802, + "step": 311130 + }, + { + "epoch": 0.6285224853242404, + "grad_norm": 338.15576171875, + "learning_rate": 3.751817768583129e-06, + "loss": 11.9434, + "step": 311140 + }, + { + "epoch": 0.6285426859569242, + "grad_norm": 275.5709228515625, + "learning_rate": 3.7514797573277075e-06, + "loss": 19.4292, + "step": 311150 + }, + { + "epoch": 0.628562886589608, + "grad_norm": 585.8729858398438, + "learning_rate": 3.751141752157423e-06, + "loss": 20.0025, + "step": 311160 + }, + { + "epoch": 0.6285830872222918, + "grad_norm": 671.9908447265625, + "learning_rate": 3.7508037530739207e-06, + "loss": 23.6709, + "step": 311170 + }, + { + "epoch": 0.6286032878549757, + "grad_norm": 299.63189697265625, + "learning_rate": 3.7504657600788484e-06, + "loss": 19.1549, + "step": 311180 + }, + { + "epoch": 0.6286234884876595, + "grad_norm": 507.8825378417969, + "learning_rate": 3.750127773173858e-06, + "loss": 18.7713, + "step": 311190 + }, + { + "epoch": 0.6286436891203433, + "grad_norm": 126.44001770019531, + "learning_rate": 3.74978979236059e-06, + "loss": 16.1273, + "step": 311200 + }, + { + "epoch": 0.6286638897530271, + "grad_norm": 3399.062744140625, + "learning_rate": 3.7494518176406956e-06, + "loss": 28.3728, + "step": 311210 + }, + { + "epoch": 0.6286840903857108, + "grad_norm": 226.7476806640625, + "learning_rate": 3.7491138490158213e-06, + "loss": 12.0473, + "step": 311220 + }, + { + "epoch": 0.6287042910183946, + "grad_norm": 197.57363891601562, + "learning_rate": 3.7487758864876157e-06, + "loss": 15.778, + "step": 311230 + }, + { + "epoch": 0.6287244916510785, + "grad_norm": 279.3779296875, + "learning_rate": 3.7484379300577233e-06, + "loss": 16.9205, + "step": 311240 + }, + { + "epoch": 0.6287446922837623, + "grad_norm": 504.41650390625, + "learning_rate": 3.748099979727792e-06, + "loss": 22.9833, + "step": 311250 + }, + { + "epoch": 0.6287648929164461, + "grad_norm": 263.5953063964844, + "learning_rate": 3.7477620354994733e-06, + "loss": 16.6843, + "step": 311260 + }, + { + "epoch": 0.6287850935491299, + "grad_norm": 2.402722120285034, + "learning_rate": 3.7474240973744063e-06, + "loss": 8.7284, + "step": 311270 + }, + { + "epoch": 0.6288052941818137, + "grad_norm": 89.73186492919922, + "learning_rate": 3.7470861653542438e-06, + "loss": 12.8616, + "step": 311280 + }, + { + "epoch": 0.6288254948144976, + "grad_norm": 234.08595275878906, + "learning_rate": 3.746748239440633e-06, + "loss": 27.2208, + "step": 311290 + }, + { + "epoch": 0.6288456954471814, + "grad_norm": 146.21871948242188, + "learning_rate": 3.7464103196352176e-06, + "loss": 17.5699, + "step": 311300 + }, + { + "epoch": 0.6288658960798652, + "grad_norm": 1773.0771484375, + "learning_rate": 3.746072405939646e-06, + "loss": 50.9809, + "step": 311310 + }, + { + "epoch": 0.628886096712549, + "grad_norm": 215.26148986816406, + "learning_rate": 3.7457344983555666e-06, + "loss": 29.4295, + "step": 311320 + }, + { + "epoch": 0.6289062973452328, + "grad_norm": 429.1287536621094, + "learning_rate": 3.7453965968846244e-06, + "loss": 14.8721, + "step": 311330 + }, + { + "epoch": 0.6289264979779167, + "grad_norm": 365.86956787109375, + "learning_rate": 3.7450587015284655e-06, + "loss": 15.0175, + "step": 311340 + }, + { + "epoch": 0.6289466986106005, + "grad_norm": 155.48924255371094, + "learning_rate": 3.7447208122887425e-06, + "loss": 28.3511, + "step": 311350 + }, + { + "epoch": 0.6289668992432843, + "grad_norm": 1091.9840087890625, + "learning_rate": 3.744382929167094e-06, + "loss": 22.5819, + "step": 311360 + }, + { + "epoch": 0.6289870998759681, + "grad_norm": 469.363525390625, + "learning_rate": 3.744045052165172e-06, + "loss": 21.6345, + "step": 311370 + }, + { + "epoch": 0.629007300508652, + "grad_norm": 602.15234375, + "learning_rate": 3.7437071812846216e-06, + "loss": 11.4731, + "step": 311380 + }, + { + "epoch": 0.6290275011413358, + "grad_norm": 235.06373596191406, + "learning_rate": 3.7433693165270918e-06, + "loss": 15.778, + "step": 311390 + }, + { + "epoch": 0.6290477017740196, + "grad_norm": 307.0328674316406, + "learning_rate": 3.7430314578942263e-06, + "loss": 19.5603, + "step": 311400 + }, + { + "epoch": 0.6290679024067034, + "grad_norm": 518.789306640625, + "learning_rate": 3.7426936053876715e-06, + "loss": 24.6092, + "step": 311410 + }, + { + "epoch": 0.6290881030393872, + "grad_norm": 1032.965087890625, + "learning_rate": 3.74235575900908e-06, + "loss": 21.2157, + "step": 311420 + }, + { + "epoch": 0.629108303672071, + "grad_norm": 1.090482234954834, + "learning_rate": 3.742017918760089e-06, + "loss": 14.1625, + "step": 311430 + }, + { + "epoch": 0.6291285043047549, + "grad_norm": 374.23968505859375, + "learning_rate": 3.741680084642353e-06, + "loss": 13.4388, + "step": 311440 + }, + { + "epoch": 0.6291487049374387, + "grad_norm": 486.8422546386719, + "learning_rate": 3.7413422566575153e-06, + "loss": 17.2724, + "step": 311450 + }, + { + "epoch": 0.6291689055701225, + "grad_norm": 76.85228729248047, + "learning_rate": 3.741004434807223e-06, + "loss": 14.5166, + "step": 311460 + }, + { + "epoch": 0.6291891062028063, + "grad_norm": 596.9525756835938, + "learning_rate": 3.7406666190931213e-06, + "loss": 12.2197, + "step": 311470 + }, + { + "epoch": 0.62920930683549, + "grad_norm": 241.2703094482422, + "learning_rate": 3.740328809516859e-06, + "loss": 9.6603, + "step": 311480 + }, + { + "epoch": 0.6292295074681739, + "grad_norm": 472.1174011230469, + "learning_rate": 3.7399910060800806e-06, + "loss": 19.4802, + "step": 311490 + }, + { + "epoch": 0.6292497081008577, + "grad_norm": 348.6268615722656, + "learning_rate": 3.7396532087844318e-06, + "loss": 17.3046, + "step": 311500 + }, + { + "epoch": 0.6292699087335415, + "grad_norm": 490.8245849609375, + "learning_rate": 3.7393154176315637e-06, + "loss": 15.2983, + "step": 311510 + }, + { + "epoch": 0.6292901093662253, + "grad_norm": 168.55055236816406, + "learning_rate": 3.7389776326231163e-06, + "loss": 14.4658, + "step": 311520 + }, + { + "epoch": 0.6293103099989091, + "grad_norm": 235.340576171875, + "learning_rate": 3.73863985376074e-06, + "loss": 35.2457, + "step": 311530 + }, + { + "epoch": 0.629330510631593, + "grad_norm": 492.92547607421875, + "learning_rate": 3.73830208104608e-06, + "loss": 19.1454, + "step": 311540 + }, + { + "epoch": 0.6293507112642768, + "grad_norm": 261.28448486328125, + "learning_rate": 3.7379643144807835e-06, + "loss": 20.8435, + "step": 311550 + }, + { + "epoch": 0.6293709118969606, + "grad_norm": 269.05230712890625, + "learning_rate": 3.737626554066495e-06, + "loss": 27.2126, + "step": 311560 + }, + { + "epoch": 0.6293911125296444, + "grad_norm": 1340.7391357421875, + "learning_rate": 3.7372887998048608e-06, + "loss": 58.1968, + "step": 311570 + }, + { + "epoch": 0.6294113131623282, + "grad_norm": 156.26901245117188, + "learning_rate": 3.7369510516975303e-06, + "loss": 15.1275, + "step": 311580 + }, + { + "epoch": 0.629431513795012, + "grad_norm": 232.62744140625, + "learning_rate": 3.736613309746145e-06, + "loss": 18.4961, + "step": 311590 + }, + { + "epoch": 0.6294517144276959, + "grad_norm": 362.4714660644531, + "learning_rate": 3.736275573952354e-06, + "loss": 25.5498, + "step": 311600 + }, + { + "epoch": 0.6294719150603797, + "grad_norm": 1.2732248306274414, + "learning_rate": 3.735937844317803e-06, + "loss": 10.7706, + "step": 311610 + }, + { + "epoch": 0.6294921156930635, + "grad_norm": 22.51714324951172, + "learning_rate": 3.735600120844137e-06, + "loss": 26.3688, + "step": 311620 + }, + { + "epoch": 0.6295123163257473, + "grad_norm": 1563.1842041015625, + "learning_rate": 3.735262403533002e-06, + "loss": 31.6529, + "step": 311630 + }, + { + "epoch": 0.6295325169584312, + "grad_norm": 315.1824645996094, + "learning_rate": 3.7349246923860465e-06, + "loss": 9.2269, + "step": 311640 + }, + { + "epoch": 0.629552717591115, + "grad_norm": 768.6835327148438, + "learning_rate": 3.7345869874049136e-06, + "loss": 17.4475, + "step": 311650 + }, + { + "epoch": 0.6295729182237988, + "grad_norm": 954.1617431640625, + "learning_rate": 3.734249288591249e-06, + "loss": 14.1233, + "step": 311660 + }, + { + "epoch": 0.6295931188564826, + "grad_norm": 656.4332885742188, + "learning_rate": 3.733911595946701e-06, + "loss": 17.6862, + "step": 311670 + }, + { + "epoch": 0.6296133194891664, + "grad_norm": 489.056884765625, + "learning_rate": 3.7335739094729153e-06, + "loss": 22.9326, + "step": 311680 + }, + { + "epoch": 0.6296335201218503, + "grad_norm": 345.76007080078125, + "learning_rate": 3.7332362291715353e-06, + "loss": 16.6467, + "step": 311690 + }, + { + "epoch": 0.6296537207545341, + "grad_norm": 861.7413330078125, + "learning_rate": 3.7328985550442086e-06, + "loss": 26.7971, + "step": 311700 + }, + { + "epoch": 0.6296739213872179, + "grad_norm": 249.65028381347656, + "learning_rate": 3.7325608870925817e-06, + "loss": 26.8184, + "step": 311710 + }, + { + "epoch": 0.6296941220199017, + "grad_norm": 374.0284423828125, + "learning_rate": 3.7322232253182984e-06, + "loss": 18.6817, + "step": 311720 + }, + { + "epoch": 0.6297143226525855, + "grad_norm": 8.936347961425781, + "learning_rate": 3.731885569723004e-06, + "loss": 16.0041, + "step": 311730 + }, + { + "epoch": 0.6297345232852692, + "grad_norm": 359.57806396484375, + "learning_rate": 3.7315479203083483e-06, + "loss": 7.8561, + "step": 311740 + }, + { + "epoch": 0.6297547239179531, + "grad_norm": 796.0985717773438, + "learning_rate": 3.7312102770759724e-06, + "loss": 17.5286, + "step": 311750 + }, + { + "epoch": 0.6297749245506369, + "grad_norm": 198.72044372558594, + "learning_rate": 3.7308726400275243e-06, + "loss": 14.124, + "step": 311760 + }, + { + "epoch": 0.6297951251833207, + "grad_norm": 567.6104125976562, + "learning_rate": 3.7305350091646496e-06, + "loss": 24.3485, + "step": 311770 + }, + { + "epoch": 0.6298153258160045, + "grad_norm": 550.8214721679688, + "learning_rate": 3.7301973844889922e-06, + "loss": 23.281, + "step": 311780 + }, + { + "epoch": 0.6298355264486883, + "grad_norm": 31.740415573120117, + "learning_rate": 3.729859766002198e-06, + "loss": 11.4073, + "step": 311790 + }, + { + "epoch": 0.6298557270813722, + "grad_norm": 7.452092170715332, + "learning_rate": 3.7295221537059162e-06, + "loss": 28.5895, + "step": 311800 + }, + { + "epoch": 0.629875927714056, + "grad_norm": 260.27752685546875, + "learning_rate": 3.729184547601786e-06, + "loss": 31.3245, + "step": 311810 + }, + { + "epoch": 0.6298961283467398, + "grad_norm": 464.4610595703125, + "learning_rate": 3.728846947691458e-06, + "loss": 23.1242, + "step": 311820 + }, + { + "epoch": 0.6299163289794236, + "grad_norm": 203.24380493164062, + "learning_rate": 3.7285093539765747e-06, + "loss": 13.3049, + "step": 311830 + }, + { + "epoch": 0.6299365296121074, + "grad_norm": 235.9980926513672, + "learning_rate": 3.728171766458785e-06, + "loss": 8.6781, + "step": 311840 + }, + { + "epoch": 0.6299567302447913, + "grad_norm": 376.7706604003906, + "learning_rate": 3.72783418513973e-06, + "loss": 26.8187, + "step": 311850 + }, + { + "epoch": 0.6299769308774751, + "grad_norm": 231.58253479003906, + "learning_rate": 3.727496610021055e-06, + "loss": 16.7101, + "step": 311860 + }, + { + "epoch": 0.6299971315101589, + "grad_norm": 457.01019287109375, + "learning_rate": 3.727159041104412e-06, + "loss": 10.4923, + "step": 311870 + }, + { + "epoch": 0.6300173321428427, + "grad_norm": 2049.08837890625, + "learning_rate": 3.7268214783914375e-06, + "loss": 22.617, + "step": 311880 + }, + { + "epoch": 0.6300375327755265, + "grad_norm": 251.09019470214844, + "learning_rate": 3.7264839218837817e-06, + "loss": 11.8721, + "step": 311890 + }, + { + "epoch": 0.6300577334082104, + "grad_norm": 245.5015411376953, + "learning_rate": 3.7261463715830902e-06, + "loss": 11.1776, + "step": 311900 + }, + { + "epoch": 0.6300779340408942, + "grad_norm": 606.9202270507812, + "learning_rate": 3.7258088274910054e-06, + "loss": 12.1486, + "step": 311910 + }, + { + "epoch": 0.630098134673578, + "grad_norm": 2910.516357421875, + "learning_rate": 3.725471289609174e-06, + "loss": 40.8246, + "step": 311920 + }, + { + "epoch": 0.6301183353062618, + "grad_norm": 465.2891540527344, + "learning_rate": 3.7251337579392415e-06, + "loss": 10.9321, + "step": 311930 + }, + { + "epoch": 0.6301385359389456, + "grad_norm": 212.12435913085938, + "learning_rate": 3.724796232482852e-06, + "loss": 26.7457, + "step": 311940 + }, + { + "epoch": 0.6301587365716295, + "grad_norm": 591.8861694335938, + "learning_rate": 3.7244587132416497e-06, + "loss": 25.5461, + "step": 311950 + }, + { + "epoch": 0.6301789372043133, + "grad_norm": 407.2118225097656, + "learning_rate": 3.7241212002172846e-06, + "loss": 10.734, + "step": 311960 + }, + { + "epoch": 0.6301991378369971, + "grad_norm": 366.46966552734375, + "learning_rate": 3.723783693411394e-06, + "loss": 18.4331, + "step": 311970 + }, + { + "epoch": 0.6302193384696809, + "grad_norm": 2.5028135776519775, + "learning_rate": 3.723446192825628e-06, + "loss": 9.702, + "step": 311980 + }, + { + "epoch": 0.6302395391023646, + "grad_norm": 440.0711364746094, + "learning_rate": 3.7231086984616312e-06, + "loss": 5.4914, + "step": 311990 + }, + { + "epoch": 0.6302597397350485, + "grad_norm": 218.40322875976562, + "learning_rate": 3.7227712103210485e-06, + "loss": 9.4796, + "step": 312000 + }, + { + "epoch": 0.6302799403677323, + "grad_norm": 123.79711151123047, + "learning_rate": 3.722433728405522e-06, + "loss": 11.0336, + "step": 312010 + }, + { + "epoch": 0.6303001410004161, + "grad_norm": 235.97381591796875, + "learning_rate": 3.7220962527166994e-06, + "loss": 9.7656, + "step": 312020 + }, + { + "epoch": 0.6303203416330999, + "grad_norm": 372.2127380371094, + "learning_rate": 3.7217587832562264e-06, + "loss": 23.8709, + "step": 312030 + }, + { + "epoch": 0.6303405422657837, + "grad_norm": 144.38809204101562, + "learning_rate": 3.7214213200257433e-06, + "loss": 17.704, + "step": 312040 + }, + { + "epoch": 0.6303607428984676, + "grad_norm": 559.6666870117188, + "learning_rate": 3.7210838630268986e-06, + "loss": 12.9917, + "step": 312050 + }, + { + "epoch": 0.6303809435311514, + "grad_norm": 380.5607604980469, + "learning_rate": 3.720746412261337e-06, + "loss": 12.0142, + "step": 312060 + }, + { + "epoch": 0.6304011441638352, + "grad_norm": 447.0756530761719, + "learning_rate": 3.7204089677307015e-06, + "loss": 41.8091, + "step": 312070 + }, + { + "epoch": 0.630421344796519, + "grad_norm": 12.453298568725586, + "learning_rate": 3.7200715294366376e-06, + "loss": 8.5864, + "step": 312080 + }, + { + "epoch": 0.6304415454292028, + "grad_norm": 476.9683837890625, + "learning_rate": 3.7197340973807905e-06, + "loss": 14.3071, + "step": 312090 + }, + { + "epoch": 0.6304617460618867, + "grad_norm": 312.4320068359375, + "learning_rate": 3.7193966715648026e-06, + "loss": 16.1088, + "step": 312100 + }, + { + "epoch": 0.6304819466945705, + "grad_norm": 666.4554443359375, + "learning_rate": 3.7190592519903198e-06, + "loss": 14.9029, + "step": 312110 + }, + { + "epoch": 0.6305021473272543, + "grad_norm": 502.0362854003906, + "learning_rate": 3.71872183865899e-06, + "loss": 23.3207, + "step": 312120 + }, + { + "epoch": 0.6305223479599381, + "grad_norm": 325.1219482421875, + "learning_rate": 3.7183844315724505e-06, + "loss": 15.0487, + "step": 312130 + }, + { + "epoch": 0.6305425485926219, + "grad_norm": 390.6490173339844, + "learning_rate": 3.718047030732352e-06, + "loss": 27.0646, + "step": 312140 + }, + { + "epoch": 0.6305627492253058, + "grad_norm": 776.7241821289062, + "learning_rate": 3.7177096361403362e-06, + "loss": 14.8082, + "step": 312150 + }, + { + "epoch": 0.6305829498579896, + "grad_norm": 1288.8662109375, + "learning_rate": 3.717372247798049e-06, + "loss": 19.7318, + "step": 312160 + }, + { + "epoch": 0.6306031504906734, + "grad_norm": 469.3023986816406, + "learning_rate": 3.717034865707133e-06, + "loss": 19.5128, + "step": 312170 + }, + { + "epoch": 0.6306233511233572, + "grad_norm": 461.573974609375, + "learning_rate": 3.7166974898692324e-06, + "loss": 22.3097, + "step": 312180 + }, + { + "epoch": 0.630643551756041, + "grad_norm": 1052.504150390625, + "learning_rate": 3.7163601202859963e-06, + "loss": 26.9412, + "step": 312190 + }, + { + "epoch": 0.6306637523887249, + "grad_norm": 786.5824584960938, + "learning_rate": 3.716022756959061e-06, + "loss": 26.1058, + "step": 312200 + }, + { + "epoch": 0.6306839530214087, + "grad_norm": 367.0870666503906, + "learning_rate": 3.715685399890078e-06, + "loss": 23.6425, + "step": 312210 + }, + { + "epoch": 0.6307041536540925, + "grad_norm": 503.5752868652344, + "learning_rate": 3.7153480490806883e-06, + "loss": 10.9421, + "step": 312220 + }, + { + "epoch": 0.6307243542867763, + "grad_norm": 342.5792236328125, + "learning_rate": 3.715010704532535e-06, + "loss": 14.9556, + "step": 312230 + }, + { + "epoch": 0.6307445549194601, + "grad_norm": 385.3841857910156, + "learning_rate": 3.7146733662472645e-06, + "loss": 24.8425, + "step": 312240 + }, + { + "epoch": 0.6307647555521438, + "grad_norm": 482.5807189941406, + "learning_rate": 3.7143360342265206e-06, + "loss": 34.6575, + "step": 312250 + }, + { + "epoch": 0.6307849561848277, + "grad_norm": 468.9497375488281, + "learning_rate": 3.7139987084719463e-06, + "loss": 15.1575, + "step": 312260 + }, + { + "epoch": 0.6308051568175115, + "grad_norm": 313.30010986328125, + "learning_rate": 3.7136613889851847e-06, + "loss": 14.4734, + "step": 312270 + }, + { + "epoch": 0.6308253574501953, + "grad_norm": 363.6047668457031, + "learning_rate": 3.7133240757678835e-06, + "loss": 17.5019, + "step": 312280 + }, + { + "epoch": 0.6308455580828791, + "grad_norm": 829.5293579101562, + "learning_rate": 3.7129867688216848e-06, + "loss": 43.489, + "step": 312290 + }, + { + "epoch": 0.6308657587155629, + "grad_norm": 271.2547302246094, + "learning_rate": 3.7126494681482317e-06, + "loss": 30.5318, + "step": 312300 + }, + { + "epoch": 0.6308859593482468, + "grad_norm": 435.1318664550781, + "learning_rate": 3.712312173749169e-06, + "loss": 27.9105, + "step": 312310 + }, + { + "epoch": 0.6309061599809306, + "grad_norm": 616.8460693359375, + "learning_rate": 3.7119748856261416e-06, + "loss": 17.2781, + "step": 312320 + }, + { + "epoch": 0.6309263606136144, + "grad_norm": 183.11244201660156, + "learning_rate": 3.7116376037807915e-06, + "loss": 15.1674, + "step": 312330 + }, + { + "epoch": 0.6309465612462982, + "grad_norm": 589.213623046875, + "learning_rate": 3.7113003282147625e-06, + "loss": 25.8361, + "step": 312340 + }, + { + "epoch": 0.630966761878982, + "grad_norm": 73.51373291015625, + "learning_rate": 3.7109630589297014e-06, + "loss": 18.547, + "step": 312350 + }, + { + "epoch": 0.6309869625116659, + "grad_norm": 488.0601501464844, + "learning_rate": 3.710625795927249e-06, + "loss": 18.0595, + "step": 312360 + }, + { + "epoch": 0.6310071631443497, + "grad_norm": 586.9658813476562, + "learning_rate": 3.7102885392090497e-06, + "loss": 15.2669, + "step": 312370 + }, + { + "epoch": 0.6310273637770335, + "grad_norm": 574.9307861328125, + "learning_rate": 3.709951288776749e-06, + "loss": 13.9294, + "step": 312380 + }, + { + "epoch": 0.6310475644097173, + "grad_norm": 412.48822021484375, + "learning_rate": 3.7096140446319884e-06, + "loss": 25.85, + "step": 312390 + }, + { + "epoch": 0.6310677650424011, + "grad_norm": 895.47119140625, + "learning_rate": 3.709276806776412e-06, + "loss": 35.0245, + "step": 312400 + }, + { + "epoch": 0.631087965675085, + "grad_norm": 314.1524963378906, + "learning_rate": 3.7089395752116653e-06, + "loss": 30.6317, + "step": 312410 + }, + { + "epoch": 0.6311081663077688, + "grad_norm": 392.1971435546875, + "learning_rate": 3.7086023499393887e-06, + "loss": 22.1168, + "step": 312420 + }, + { + "epoch": 0.6311283669404526, + "grad_norm": 38.94456100463867, + "learning_rate": 3.7082651309612283e-06, + "loss": 15.1257, + "step": 312430 + }, + { + "epoch": 0.6311485675731364, + "grad_norm": 572.1344604492188, + "learning_rate": 3.7079279182788263e-06, + "loss": 23.5438, + "step": 312440 + }, + { + "epoch": 0.6311687682058202, + "grad_norm": 118.326416015625, + "learning_rate": 3.707590711893829e-06, + "loss": 21.0824, + "step": 312450 + }, + { + "epoch": 0.6311889688385041, + "grad_norm": 340.10986328125, + "learning_rate": 3.707253511807877e-06, + "loss": 16.1145, + "step": 312460 + }, + { + "epoch": 0.6312091694711879, + "grad_norm": 157.62193298339844, + "learning_rate": 3.706916318022612e-06, + "loss": 11.8272, + "step": 312470 + }, + { + "epoch": 0.6312293701038717, + "grad_norm": 295.4103088378906, + "learning_rate": 3.7065791305396846e-06, + "loss": 25.9566, + "step": 312480 + }, + { + "epoch": 0.6312495707365555, + "grad_norm": 198.5532989501953, + "learning_rate": 3.70624194936073e-06, + "loss": 18.1501, + "step": 312490 + }, + { + "epoch": 0.6312697713692392, + "grad_norm": 152.81765747070312, + "learning_rate": 3.705904774487396e-06, + "loss": 14.3211, + "step": 312500 + }, + { + "epoch": 0.631289972001923, + "grad_norm": 379.5767517089844, + "learning_rate": 3.7055676059213265e-06, + "loss": 14.4697, + "step": 312510 + }, + { + "epoch": 0.6313101726346069, + "grad_norm": 245.6889190673828, + "learning_rate": 3.705230443664163e-06, + "loss": 18.7405, + "step": 312520 + }, + { + "epoch": 0.6313303732672907, + "grad_norm": 36.4300537109375, + "learning_rate": 3.704893287717548e-06, + "loss": 28.9301, + "step": 312530 + }, + { + "epoch": 0.6313505738999745, + "grad_norm": 356.6920471191406, + "learning_rate": 3.7045561380831287e-06, + "loss": 25.2074, + "step": 312540 + }, + { + "epoch": 0.6313707745326583, + "grad_norm": 235.33872985839844, + "learning_rate": 3.704218994762543e-06, + "loss": 17.2906, + "step": 312550 + }, + { + "epoch": 0.6313909751653421, + "grad_norm": 102.42072296142578, + "learning_rate": 3.7038818577574363e-06, + "loss": 19.8175, + "step": 312560 + }, + { + "epoch": 0.631411175798026, + "grad_norm": 181.9062957763672, + "learning_rate": 3.7035447270694558e-06, + "loss": 10.4626, + "step": 312570 + }, + { + "epoch": 0.6314313764307098, + "grad_norm": 305.3853759765625, + "learning_rate": 3.7032076027002377e-06, + "loss": 19.9018, + "step": 312580 + }, + { + "epoch": 0.6314515770633936, + "grad_norm": 417.63153076171875, + "learning_rate": 3.7028704846514296e-06, + "loss": 32.7365, + "step": 312590 + }, + { + "epoch": 0.6314717776960774, + "grad_norm": 547.8931884765625, + "learning_rate": 3.7025333729246733e-06, + "loss": 21.1438, + "step": 312600 + }, + { + "epoch": 0.6314919783287612, + "grad_norm": 307.0222473144531, + "learning_rate": 3.7021962675216126e-06, + "loss": 15.7725, + "step": 312610 + }, + { + "epoch": 0.6315121789614451, + "grad_norm": 172.6763153076172, + "learning_rate": 3.70185916844389e-06, + "loss": 14.39, + "step": 312620 + }, + { + "epoch": 0.6315323795941289, + "grad_norm": 325.3825378417969, + "learning_rate": 3.701522075693146e-06, + "loss": 16.4441, + "step": 312630 + }, + { + "epoch": 0.6315525802268127, + "grad_norm": 444.0788879394531, + "learning_rate": 3.7011849892710293e-06, + "loss": 21.4759, + "step": 312640 + }, + { + "epoch": 0.6315727808594965, + "grad_norm": 470.6161804199219, + "learning_rate": 3.700847909179177e-06, + "loss": 49.357, + "step": 312650 + }, + { + "epoch": 0.6315929814921803, + "grad_norm": 320.4682922363281, + "learning_rate": 3.7005108354192356e-06, + "loss": 15.674, + "step": 312660 + }, + { + "epoch": 0.6316131821248642, + "grad_norm": 211.36276245117188, + "learning_rate": 3.7001737679928467e-06, + "loss": 10.0086, + "step": 312670 + }, + { + "epoch": 0.631633382757548, + "grad_norm": 726.2916259765625, + "learning_rate": 3.6998367069016527e-06, + "loss": 33.8859, + "step": 312680 + }, + { + "epoch": 0.6316535833902318, + "grad_norm": 28.509401321411133, + "learning_rate": 3.699499652147297e-06, + "loss": 21.0636, + "step": 312690 + }, + { + "epoch": 0.6316737840229156, + "grad_norm": 381.392822265625, + "learning_rate": 3.699162603731423e-06, + "loss": 11.1215, + "step": 312700 + }, + { + "epoch": 0.6316939846555994, + "grad_norm": 734.5407104492188, + "learning_rate": 3.6988255616556725e-06, + "loss": 13.3186, + "step": 312710 + }, + { + "epoch": 0.6317141852882833, + "grad_norm": 346.9736328125, + "learning_rate": 3.6984885259216866e-06, + "loss": 11.5427, + "step": 312720 + }, + { + "epoch": 0.6317343859209671, + "grad_norm": 270.8279113769531, + "learning_rate": 3.698151496531111e-06, + "loss": 16.8361, + "step": 312730 + }, + { + "epoch": 0.6317545865536509, + "grad_norm": 396.7002258300781, + "learning_rate": 3.697814473485588e-06, + "loss": 13.7949, + "step": 312740 + }, + { + "epoch": 0.6317747871863347, + "grad_norm": 163.82882690429688, + "learning_rate": 3.6974774567867586e-06, + "loss": 16.538, + "step": 312750 + }, + { + "epoch": 0.6317949878190184, + "grad_norm": 465.40869140625, + "learning_rate": 3.6971404464362657e-06, + "loss": 31.0662, + "step": 312760 + }, + { + "epoch": 0.6318151884517023, + "grad_norm": 456.25384521484375, + "learning_rate": 3.6968034424357535e-06, + "loss": 24.0063, + "step": 312770 + }, + { + "epoch": 0.6318353890843861, + "grad_norm": 238.4464874267578, + "learning_rate": 3.6964664447868626e-06, + "loss": 20.4208, + "step": 312780 + }, + { + "epoch": 0.6318555897170699, + "grad_norm": 237.52659606933594, + "learning_rate": 3.696129453491235e-06, + "loss": 16.6057, + "step": 312790 + }, + { + "epoch": 0.6318757903497537, + "grad_norm": 865.7783813476562, + "learning_rate": 3.695792468550517e-06, + "loss": 18.1219, + "step": 312800 + }, + { + "epoch": 0.6318959909824375, + "grad_norm": 280.9550476074219, + "learning_rate": 3.6954554899663454e-06, + "loss": 14.5367, + "step": 312810 + }, + { + "epoch": 0.6319161916151214, + "grad_norm": 719.3519287109375, + "learning_rate": 3.6951185177403667e-06, + "loss": 16.1767, + "step": 312820 + }, + { + "epoch": 0.6319363922478052, + "grad_norm": 114.07866668701172, + "learning_rate": 3.6947815518742226e-06, + "loss": 17.0786, + "step": 312830 + }, + { + "epoch": 0.631956592880489, + "grad_norm": 723.0438232421875, + "learning_rate": 3.6944445923695542e-06, + "loss": 21.3457, + "step": 312840 + }, + { + "epoch": 0.6319767935131728, + "grad_norm": 181.6907958984375, + "learning_rate": 3.694107639228005e-06, + "loss": 12.0535, + "step": 312850 + }, + { + "epoch": 0.6319969941458566, + "grad_norm": 46.40886306762695, + "learning_rate": 3.6937706924512175e-06, + "loss": 13.2532, + "step": 312860 + }, + { + "epoch": 0.6320171947785405, + "grad_norm": 576.8805541992188, + "learning_rate": 3.6934337520408313e-06, + "loss": 11.5144, + "step": 312870 + }, + { + "epoch": 0.6320373954112243, + "grad_norm": 299.54864501953125, + "learning_rate": 3.6930968179984905e-06, + "loss": 23.6593, + "step": 312880 + }, + { + "epoch": 0.6320575960439081, + "grad_norm": 122.38074493408203, + "learning_rate": 3.6927598903258375e-06, + "loss": 14.0141, + "step": 312890 + }, + { + "epoch": 0.6320777966765919, + "grad_norm": 150.1681671142578, + "learning_rate": 3.6924229690245163e-06, + "loss": 11.8433, + "step": 312900 + }, + { + "epoch": 0.6320979973092757, + "grad_norm": 588.6844482421875, + "learning_rate": 3.6920860540961656e-06, + "loss": 18.118, + "step": 312910 + }, + { + "epoch": 0.6321181979419596, + "grad_norm": 159.3857421875, + "learning_rate": 3.6917491455424285e-06, + "loss": 14.1074, + "step": 312920 + }, + { + "epoch": 0.6321383985746434, + "grad_norm": 338.532958984375, + "learning_rate": 3.691412243364949e-06, + "loss": 22.701, + "step": 312930 + }, + { + "epoch": 0.6321585992073272, + "grad_norm": 129.49403381347656, + "learning_rate": 3.691075347565366e-06, + "loss": 16.3717, + "step": 312940 + }, + { + "epoch": 0.632178799840011, + "grad_norm": 233.03042602539062, + "learning_rate": 3.690738458145322e-06, + "loss": 38.7909, + "step": 312950 + }, + { + "epoch": 0.6321990004726948, + "grad_norm": 638.060546875, + "learning_rate": 3.6904015751064637e-06, + "loss": 25.8626, + "step": 312960 + }, + { + "epoch": 0.6322192011053787, + "grad_norm": 21.61378288269043, + "learning_rate": 3.690064698450425e-06, + "loss": 22.3261, + "step": 312970 + }, + { + "epoch": 0.6322394017380625, + "grad_norm": 412.9071350097656, + "learning_rate": 3.689727828178854e-06, + "loss": 9.898, + "step": 312980 + }, + { + "epoch": 0.6322596023707463, + "grad_norm": 496.47308349609375, + "learning_rate": 3.689390964293391e-06, + "loss": 9.9742, + "step": 312990 + }, + { + "epoch": 0.6322798030034301, + "grad_norm": 352.8420104980469, + "learning_rate": 3.6890541067956775e-06, + "loss": 11.8739, + "step": 313000 + }, + { + "epoch": 0.6323000036361138, + "grad_norm": 387.5815124511719, + "learning_rate": 3.6887172556873545e-06, + "loss": 17.3686, + "step": 313010 + }, + { + "epoch": 0.6323202042687976, + "grad_norm": 157.61289978027344, + "learning_rate": 3.688380410970066e-06, + "loss": 21.7196, + "step": 313020 + }, + { + "epoch": 0.6323404049014815, + "grad_norm": 476.6892395019531, + "learning_rate": 3.68804357264545e-06, + "loss": 22.1875, + "step": 313030 + }, + { + "epoch": 0.6323606055341653, + "grad_norm": 220.13218688964844, + "learning_rate": 3.6877067407151514e-06, + "loss": 19.917, + "step": 313040 + }, + { + "epoch": 0.6323808061668491, + "grad_norm": 779.32275390625, + "learning_rate": 3.6873699151808105e-06, + "loss": 17.107, + "step": 313050 + }, + { + "epoch": 0.6324010067995329, + "grad_norm": 373.5910949707031, + "learning_rate": 3.6870330960440713e-06, + "loss": 27.1193, + "step": 313060 + }, + { + "epoch": 0.6324212074322167, + "grad_norm": 211.95901489257812, + "learning_rate": 3.686696283306572e-06, + "loss": 10.6057, + "step": 313070 + }, + { + "epoch": 0.6324414080649006, + "grad_norm": 186.3962860107422, + "learning_rate": 3.686359476969957e-06, + "loss": 8.7685, + "step": 313080 + }, + { + "epoch": 0.6324616086975844, + "grad_norm": 844.3349609375, + "learning_rate": 3.6860226770358663e-06, + "loss": 25.5248, + "step": 313090 + }, + { + "epoch": 0.6324818093302682, + "grad_norm": 560.161376953125, + "learning_rate": 3.68568588350594e-06, + "loss": 13.9366, + "step": 313100 + }, + { + "epoch": 0.632502009962952, + "grad_norm": 159.23397827148438, + "learning_rate": 3.6853490963818224e-06, + "loss": 16.6956, + "step": 313110 + }, + { + "epoch": 0.6325222105956358, + "grad_norm": 828.654052734375, + "learning_rate": 3.6850123156651544e-06, + "loss": 23.5992, + "step": 313120 + }, + { + "epoch": 0.6325424112283197, + "grad_norm": 289.0027160644531, + "learning_rate": 3.6846755413575764e-06, + "loss": 17.886, + "step": 313130 + }, + { + "epoch": 0.6325626118610035, + "grad_norm": 553.8969116210938, + "learning_rate": 3.6843387734607304e-06, + "loss": 15.7221, + "step": 313140 + }, + { + "epoch": 0.6325828124936873, + "grad_norm": 135.4904022216797, + "learning_rate": 3.684002011976259e-06, + "loss": 10.7774, + "step": 313150 + }, + { + "epoch": 0.6326030131263711, + "grad_norm": 449.1959533691406, + "learning_rate": 3.6836652569057994e-06, + "loss": 21.7374, + "step": 313160 + }, + { + "epoch": 0.632623213759055, + "grad_norm": 357.0682373046875, + "learning_rate": 3.6833285082509962e-06, + "loss": 14.2281, + "step": 313170 + }, + { + "epoch": 0.6326434143917388, + "grad_norm": 202.2461700439453, + "learning_rate": 3.682991766013493e-06, + "loss": 15.7554, + "step": 313180 + }, + { + "epoch": 0.6326636150244226, + "grad_norm": 1196.7135009765625, + "learning_rate": 3.6826550301949248e-06, + "loss": 20.5433, + "step": 313190 + }, + { + "epoch": 0.6326838156571064, + "grad_norm": 240.29473876953125, + "learning_rate": 3.6823183007969375e-06, + "loss": 33.4275, + "step": 313200 + }, + { + "epoch": 0.6327040162897902, + "grad_norm": 432.63262939453125, + "learning_rate": 3.681981577821171e-06, + "loss": 15.9644, + "step": 313210 + }, + { + "epoch": 0.632724216922474, + "grad_norm": 469.47955322265625, + "learning_rate": 3.681644861269267e-06, + "loss": 13.7639, + "step": 313220 + }, + { + "epoch": 0.6327444175551579, + "grad_norm": 729.5411376953125, + "learning_rate": 3.681308151142866e-06, + "loss": 18.7102, + "step": 313230 + }, + { + "epoch": 0.6327646181878417, + "grad_norm": 130.8728485107422, + "learning_rate": 3.6809714474436075e-06, + "loss": 27.1396, + "step": 313240 + }, + { + "epoch": 0.6327848188205255, + "grad_norm": 280.59307861328125, + "learning_rate": 3.680634750173137e-06, + "loss": 15.3041, + "step": 313250 + }, + { + "epoch": 0.6328050194532093, + "grad_norm": 106.77202606201172, + "learning_rate": 3.6802980593330893e-06, + "loss": 23.9739, + "step": 313260 + }, + { + "epoch": 0.632825220085893, + "grad_norm": 443.0906066894531, + "learning_rate": 3.6799613749251105e-06, + "loss": 10.7108, + "step": 313270 + }, + { + "epoch": 0.6328454207185769, + "grad_norm": 91.66646575927734, + "learning_rate": 3.6796246969508408e-06, + "loss": 23.3665, + "step": 313280 + }, + { + "epoch": 0.6328656213512607, + "grad_norm": 612.2748413085938, + "learning_rate": 3.6792880254119195e-06, + "loss": 21.3941, + "step": 313290 + }, + { + "epoch": 0.6328858219839445, + "grad_norm": 499.7066650390625, + "learning_rate": 3.678951360309988e-06, + "loss": 19.2343, + "step": 313300 + }, + { + "epoch": 0.6329060226166283, + "grad_norm": 330.5376892089844, + "learning_rate": 3.678614701646688e-06, + "loss": 15.7366, + "step": 313310 + }, + { + "epoch": 0.6329262232493121, + "grad_norm": 242.13572692871094, + "learning_rate": 3.678278049423659e-06, + "loss": 18.0754, + "step": 313320 + }, + { + "epoch": 0.632946423881996, + "grad_norm": 445.71099853515625, + "learning_rate": 3.677941403642541e-06, + "loss": 22.9995, + "step": 313330 + }, + { + "epoch": 0.6329666245146798, + "grad_norm": 293.3953857421875, + "learning_rate": 3.6776047643049777e-06, + "loss": 15.1248, + "step": 313340 + }, + { + "epoch": 0.6329868251473636, + "grad_norm": 626.185302734375, + "learning_rate": 3.6772681314126097e-06, + "loss": 20.843, + "step": 313350 + }, + { + "epoch": 0.6330070257800474, + "grad_norm": 174.92031860351562, + "learning_rate": 3.676931504967075e-06, + "loss": 16.2623, + "step": 313360 + }, + { + "epoch": 0.6330272264127312, + "grad_norm": 381.0513610839844, + "learning_rate": 3.6765948849700155e-06, + "loss": 25.8901, + "step": 313370 + }, + { + "epoch": 0.6330474270454151, + "grad_norm": 316.2397766113281, + "learning_rate": 3.6762582714230733e-06, + "loss": 23.8412, + "step": 313380 + }, + { + "epoch": 0.6330676276780989, + "grad_norm": 478.4018249511719, + "learning_rate": 3.6759216643278865e-06, + "loss": 33.0218, + "step": 313390 + }, + { + "epoch": 0.6330878283107827, + "grad_norm": 296.0896301269531, + "learning_rate": 3.6755850636860956e-06, + "loss": 34.0951, + "step": 313400 + }, + { + "epoch": 0.6331080289434665, + "grad_norm": 359.9607238769531, + "learning_rate": 3.675248469499346e-06, + "loss": 47.9103, + "step": 313410 + }, + { + "epoch": 0.6331282295761503, + "grad_norm": 132.58859252929688, + "learning_rate": 3.674911881769272e-06, + "loss": 8.2303, + "step": 313420 + }, + { + "epoch": 0.6331484302088342, + "grad_norm": 313.1063232421875, + "learning_rate": 3.674575300497517e-06, + "loss": 21.5348, + "step": 313430 + }, + { + "epoch": 0.633168630841518, + "grad_norm": 2743.5791015625, + "learning_rate": 3.6742387256857224e-06, + "loss": 32.8768, + "step": 313440 + }, + { + "epoch": 0.6331888314742018, + "grad_norm": 144.70936584472656, + "learning_rate": 3.6739021573355273e-06, + "loss": 18.3357, + "step": 313450 + }, + { + "epoch": 0.6332090321068856, + "grad_norm": 155.98593139648438, + "learning_rate": 3.673565595448572e-06, + "loss": 15.6992, + "step": 313460 + }, + { + "epoch": 0.6332292327395694, + "grad_norm": 189.23052978515625, + "learning_rate": 3.673229040026497e-06, + "loss": 20.7769, + "step": 313470 + }, + { + "epoch": 0.6332494333722533, + "grad_norm": 324.03497314453125, + "learning_rate": 3.672892491070943e-06, + "loss": 25.4356, + "step": 313480 + }, + { + "epoch": 0.6332696340049371, + "grad_norm": 125.66798400878906, + "learning_rate": 3.672555948583548e-06, + "loss": 19.2222, + "step": 313490 + }, + { + "epoch": 0.6332898346376209, + "grad_norm": 839.9357299804688, + "learning_rate": 3.672219412565956e-06, + "loss": 32.5873, + "step": 313500 + }, + { + "epoch": 0.6333100352703047, + "grad_norm": 76.7645263671875, + "learning_rate": 3.671882883019806e-06, + "loss": 8.779, + "step": 313510 + }, + { + "epoch": 0.6333302359029885, + "grad_norm": 546.8753662109375, + "learning_rate": 3.6715463599467372e-06, + "loss": 23.4845, + "step": 313520 + }, + { + "epoch": 0.6333504365356722, + "grad_norm": 115.98934936523438, + "learning_rate": 3.67120984334839e-06, + "loss": 15.4719, + "step": 313530 + }, + { + "epoch": 0.6333706371683561, + "grad_norm": 333.93670654296875, + "learning_rate": 3.670873333226407e-06, + "loss": 16.8072, + "step": 313540 + }, + { + "epoch": 0.6333908378010399, + "grad_norm": 748.4852905273438, + "learning_rate": 3.670536829582424e-06, + "loss": 26.3143, + "step": 313550 + }, + { + "epoch": 0.6334110384337237, + "grad_norm": 1537.15185546875, + "learning_rate": 3.6702003324180823e-06, + "loss": 23.7573, + "step": 313560 + }, + { + "epoch": 0.6334312390664075, + "grad_norm": 238.7601776123047, + "learning_rate": 3.669863841735026e-06, + "loss": 11.5274, + "step": 313570 + }, + { + "epoch": 0.6334514396990913, + "grad_norm": 291.588134765625, + "learning_rate": 3.669527357534889e-06, + "loss": 24.5547, + "step": 313580 + }, + { + "epoch": 0.6334716403317752, + "grad_norm": 350.3074951171875, + "learning_rate": 3.6691908798193155e-06, + "loss": 9.7203, + "step": 313590 + }, + { + "epoch": 0.633491840964459, + "grad_norm": 444.2533264160156, + "learning_rate": 3.668854408589945e-06, + "loss": 21.6721, + "step": 313600 + }, + { + "epoch": 0.6335120415971428, + "grad_norm": 212.96951293945312, + "learning_rate": 3.668517943848416e-06, + "loss": 20.951, + "step": 313610 + }, + { + "epoch": 0.6335322422298266, + "grad_norm": 250.44496154785156, + "learning_rate": 3.6681814855963687e-06, + "loss": 15.8623, + "step": 313620 + }, + { + "epoch": 0.6335524428625104, + "grad_norm": 695.5618896484375, + "learning_rate": 3.6678450338354443e-06, + "loss": 25.7222, + "step": 313630 + }, + { + "epoch": 0.6335726434951943, + "grad_norm": 183.17965698242188, + "learning_rate": 3.667508588567281e-06, + "loss": 11.9885, + "step": 313640 + }, + { + "epoch": 0.6335928441278781, + "grad_norm": 7.7826128005981445, + "learning_rate": 3.6671721497935177e-06, + "loss": 28.6275, + "step": 313650 + }, + { + "epoch": 0.6336130447605619, + "grad_norm": 606.100830078125, + "learning_rate": 3.6668357175157974e-06, + "loss": 18.2872, + "step": 313660 + }, + { + "epoch": 0.6336332453932457, + "grad_norm": 383.4219970703125, + "learning_rate": 3.666499291735759e-06, + "loss": 13.3002, + "step": 313670 + }, + { + "epoch": 0.6336534460259295, + "grad_norm": 191.7081756591797, + "learning_rate": 3.66616287245504e-06, + "loss": 14.0361, + "step": 313680 + }, + { + "epoch": 0.6336736466586134, + "grad_norm": 527.5343627929688, + "learning_rate": 3.6658264596752814e-06, + "loss": 21.7557, + "step": 313690 + }, + { + "epoch": 0.6336938472912972, + "grad_norm": 122.24197387695312, + "learning_rate": 3.6654900533981234e-06, + "loss": 34.4489, + "step": 313700 + }, + { + "epoch": 0.633714047923981, + "grad_norm": 390.60882568359375, + "learning_rate": 3.6651536536252047e-06, + "loss": 13.396, + "step": 313710 + }, + { + "epoch": 0.6337342485566648, + "grad_norm": 253.22512817382812, + "learning_rate": 3.664817260358164e-06, + "loss": 9.8017, + "step": 313720 + }, + { + "epoch": 0.6337544491893486, + "grad_norm": 384.0865173339844, + "learning_rate": 3.6644808735986437e-06, + "loss": 16.8831, + "step": 313730 + }, + { + "epoch": 0.6337746498220325, + "grad_norm": 350.7376403808594, + "learning_rate": 3.664144493348281e-06, + "loss": 10.132, + "step": 313740 + }, + { + "epoch": 0.6337948504547163, + "grad_norm": 428.6234436035156, + "learning_rate": 3.663808119608716e-06, + "loss": 18.2588, + "step": 313750 + }, + { + "epoch": 0.6338150510874001, + "grad_norm": 441.23016357421875, + "learning_rate": 3.663471752381589e-06, + "loss": 9.8741, + "step": 313760 + }, + { + "epoch": 0.6338352517200839, + "grad_norm": 530.990234375, + "learning_rate": 3.663135391668538e-06, + "loss": 31.5144, + "step": 313770 + }, + { + "epoch": 0.6338554523527676, + "grad_norm": 170.01712036132812, + "learning_rate": 3.662799037471201e-06, + "loss": 9.309, + "step": 313780 + }, + { + "epoch": 0.6338756529854515, + "grad_norm": 442.9435729980469, + "learning_rate": 3.6624626897912213e-06, + "loss": 12.849, + "step": 313790 + }, + { + "epoch": 0.6338958536181353, + "grad_norm": 328.24481201171875, + "learning_rate": 3.6621263486302373e-06, + "loss": 15.3321, + "step": 313800 + }, + { + "epoch": 0.6339160542508191, + "grad_norm": 589.2186889648438, + "learning_rate": 3.6617900139898854e-06, + "loss": 12.429, + "step": 313810 + }, + { + "epoch": 0.6339362548835029, + "grad_norm": 172.483642578125, + "learning_rate": 3.6614536858718074e-06, + "loss": 28.6527, + "step": 313820 + }, + { + "epoch": 0.6339564555161867, + "grad_norm": 405.9244079589844, + "learning_rate": 3.661117364277642e-06, + "loss": 18.1824, + "step": 313830 + }, + { + "epoch": 0.6339766561488706, + "grad_norm": 183.97055053710938, + "learning_rate": 3.6607810492090278e-06, + "loss": 9.8192, + "step": 313840 + }, + { + "epoch": 0.6339968567815544, + "grad_norm": 68.43452453613281, + "learning_rate": 3.6604447406676036e-06, + "loss": 13.4357, + "step": 313850 + }, + { + "epoch": 0.6340170574142382, + "grad_norm": 515.4369506835938, + "learning_rate": 3.6601084386550117e-06, + "loss": 16.5309, + "step": 313860 + }, + { + "epoch": 0.634037258046922, + "grad_norm": 319.5745849609375, + "learning_rate": 3.659772143172886e-06, + "loss": 24.3469, + "step": 313870 + }, + { + "epoch": 0.6340574586796058, + "grad_norm": 262.00408935546875, + "learning_rate": 3.659435854222869e-06, + "loss": 28.8416, + "step": 313880 + }, + { + "epoch": 0.6340776593122897, + "grad_norm": 344.2080383300781, + "learning_rate": 3.6590995718066003e-06, + "loss": 18.1119, + "step": 313890 + }, + { + "epoch": 0.6340978599449735, + "grad_norm": 244.47593688964844, + "learning_rate": 3.6587632959257168e-06, + "loss": 17.9972, + "step": 313900 + }, + { + "epoch": 0.6341180605776573, + "grad_norm": 657.9804077148438, + "learning_rate": 3.658427026581858e-06, + "loss": 26.6822, + "step": 313910 + }, + { + "epoch": 0.6341382612103411, + "grad_norm": 283.5820617675781, + "learning_rate": 3.6580907637766642e-06, + "loss": 11.8351, + "step": 313920 + }, + { + "epoch": 0.6341584618430249, + "grad_norm": 154.59458923339844, + "learning_rate": 3.657754507511773e-06, + "loss": 19.1057, + "step": 313930 + }, + { + "epoch": 0.6341786624757088, + "grad_norm": 435.4707336425781, + "learning_rate": 3.657418257788822e-06, + "loss": 20.9711, + "step": 313940 + }, + { + "epoch": 0.6341988631083926, + "grad_norm": 15.950176239013672, + "learning_rate": 3.657082014609452e-06, + "loss": 10.2715, + "step": 313950 + }, + { + "epoch": 0.6342190637410764, + "grad_norm": 365.6403503417969, + "learning_rate": 3.656745777975303e-06, + "loss": 18.9155, + "step": 313960 + }, + { + "epoch": 0.6342392643737602, + "grad_norm": 159.98983764648438, + "learning_rate": 3.6564095478880114e-06, + "loss": 17.8937, + "step": 313970 + }, + { + "epoch": 0.634259465006444, + "grad_norm": 179.79183959960938, + "learning_rate": 3.656073324349216e-06, + "loss": 9.7839, + "step": 313980 + }, + { + "epoch": 0.6342796656391279, + "grad_norm": 36.0970344543457, + "learning_rate": 3.6557371073605574e-06, + "loss": 12.2711, + "step": 313990 + }, + { + "epoch": 0.6342998662718117, + "grad_norm": 266.9267272949219, + "learning_rate": 3.655400896923672e-06, + "loss": 29.3467, + "step": 314000 + }, + { + "epoch": 0.6343200669044955, + "grad_norm": 135.0736083984375, + "learning_rate": 3.655064693040199e-06, + "loss": 40.2592, + "step": 314010 + }, + { + "epoch": 0.6343402675371793, + "grad_norm": 228.64602661132812, + "learning_rate": 3.6547284957117805e-06, + "loss": 22.6136, + "step": 314020 + }, + { + "epoch": 0.6343604681698631, + "grad_norm": 495.4220886230469, + "learning_rate": 3.6543923049400487e-06, + "loss": 22.3554, + "step": 314030 + }, + { + "epoch": 0.6343806688025468, + "grad_norm": 297.6684875488281, + "learning_rate": 3.6540561207266475e-06, + "loss": 9.3782, + "step": 314040 + }, + { + "epoch": 0.6344008694352307, + "grad_norm": 150.00657653808594, + "learning_rate": 3.653719943073214e-06, + "loss": 17.1163, + "step": 314050 + }, + { + "epoch": 0.6344210700679145, + "grad_norm": 582.0066528320312, + "learning_rate": 3.653383771981385e-06, + "loss": 20.8307, + "step": 314060 + }, + { + "epoch": 0.6344412707005983, + "grad_norm": 614.0116577148438, + "learning_rate": 3.6530476074528005e-06, + "loss": 25.7003, + "step": 314070 + }, + { + "epoch": 0.6344614713332821, + "grad_norm": 305.2926330566406, + "learning_rate": 3.652711449489099e-06, + "loss": 23.6369, + "step": 314080 + }, + { + "epoch": 0.6344816719659659, + "grad_norm": 743.9189453125, + "learning_rate": 3.6523752980919183e-06, + "loss": 25.1215, + "step": 314090 + }, + { + "epoch": 0.6345018725986498, + "grad_norm": 246.1226348876953, + "learning_rate": 3.6520391532628953e-06, + "loss": 8.944, + "step": 314100 + }, + { + "epoch": 0.6345220732313336, + "grad_norm": 331.6296081542969, + "learning_rate": 3.6517030150036716e-06, + "loss": 18.8529, + "step": 314110 + }, + { + "epoch": 0.6345422738640174, + "grad_norm": 249.5379180908203, + "learning_rate": 3.6513668833158846e-06, + "loss": 18.1074, + "step": 314120 + }, + { + "epoch": 0.6345624744967012, + "grad_norm": 305.54498291015625, + "learning_rate": 3.6510307582011706e-06, + "loss": 8.4351, + "step": 314130 + }, + { + "epoch": 0.634582675129385, + "grad_norm": 318.6712951660156, + "learning_rate": 3.650694639661169e-06, + "loss": 12.2587, + "step": 314140 + }, + { + "epoch": 0.6346028757620689, + "grad_norm": 1094.7587890625, + "learning_rate": 3.6503585276975196e-06, + "loss": 21.2978, + "step": 314150 + }, + { + "epoch": 0.6346230763947527, + "grad_norm": 384.2978515625, + "learning_rate": 3.6500224223118576e-06, + "loss": 17.935, + "step": 314160 + }, + { + "epoch": 0.6346432770274365, + "grad_norm": 781.41796875, + "learning_rate": 3.6496863235058223e-06, + "loss": 39.0964, + "step": 314170 + }, + { + "epoch": 0.6346634776601203, + "grad_norm": 237.30960083007812, + "learning_rate": 3.649350231281054e-06, + "loss": 10.8582, + "step": 314180 + }, + { + "epoch": 0.6346836782928041, + "grad_norm": 107.89735412597656, + "learning_rate": 3.6490141456391864e-06, + "loss": 19.7089, + "step": 314190 + }, + { + "epoch": 0.634703878925488, + "grad_norm": 555.4315185546875, + "learning_rate": 3.648678066581861e-06, + "loss": 36.4434, + "step": 314200 + }, + { + "epoch": 0.6347240795581718, + "grad_norm": 184.58253479003906, + "learning_rate": 3.6483419941107156e-06, + "loss": 15.3814, + "step": 314210 + }, + { + "epoch": 0.6347442801908556, + "grad_norm": 252.2710418701172, + "learning_rate": 3.6480059282273872e-06, + "loss": 11.9636, + "step": 314220 + }, + { + "epoch": 0.6347644808235394, + "grad_norm": 151.30789184570312, + "learning_rate": 3.647669868933513e-06, + "loss": 14.5175, + "step": 314230 + }, + { + "epoch": 0.6347846814562232, + "grad_norm": 313.2535705566406, + "learning_rate": 3.6473338162307314e-06, + "loss": 20.2478, + "step": 314240 + }, + { + "epoch": 0.6348048820889071, + "grad_norm": 66.79661560058594, + "learning_rate": 3.6469977701206833e-06, + "loss": 33.3599, + "step": 314250 + }, + { + "epoch": 0.6348250827215909, + "grad_norm": 209.22415161132812, + "learning_rate": 3.6466617306050014e-06, + "loss": 15.8479, + "step": 314260 + }, + { + "epoch": 0.6348452833542747, + "grad_norm": 349.41436767578125, + "learning_rate": 3.646325697685327e-06, + "loss": 19.6468, + "step": 314270 + }, + { + "epoch": 0.6348654839869585, + "grad_norm": 198.7777099609375, + "learning_rate": 3.645989671363297e-06, + "loss": 14.5404, + "step": 314280 + }, + { + "epoch": 0.6348856846196422, + "grad_norm": 153.7720947265625, + "learning_rate": 3.6456536516405494e-06, + "loss": 13.566, + "step": 314290 + }, + { + "epoch": 0.634905885252326, + "grad_norm": 359.6976013183594, + "learning_rate": 3.645317638518721e-06, + "loss": 14.9885, + "step": 314300 + }, + { + "epoch": 0.6349260858850099, + "grad_norm": 0.0, + "learning_rate": 3.6449816319994512e-06, + "loss": 11.6684, + "step": 314310 + }, + { + "epoch": 0.6349462865176937, + "grad_norm": 245.663330078125, + "learning_rate": 3.644645632084376e-06, + "loss": 22.7374, + "step": 314320 + }, + { + "epoch": 0.6349664871503775, + "grad_norm": 316.9748840332031, + "learning_rate": 3.644309638775132e-06, + "loss": 20.8261, + "step": 314330 + }, + { + "epoch": 0.6349866877830613, + "grad_norm": 382.9841613769531, + "learning_rate": 3.6439736520733606e-06, + "loss": 10.0096, + "step": 314340 + }, + { + "epoch": 0.6350068884157452, + "grad_norm": 476.1699523925781, + "learning_rate": 3.6436376719806965e-06, + "loss": 26.7912, + "step": 314350 + }, + { + "epoch": 0.635027089048429, + "grad_norm": 326.27081298828125, + "learning_rate": 3.6433016984987774e-06, + "loss": 22.6489, + "step": 314360 + }, + { + "epoch": 0.6350472896811128, + "grad_norm": 296.7281494140625, + "learning_rate": 3.642965731629242e-06, + "loss": 19.9827, + "step": 314370 + }, + { + "epoch": 0.6350674903137966, + "grad_norm": 498.3815612792969, + "learning_rate": 3.6426297713737268e-06, + "loss": 22.878, + "step": 314380 + }, + { + "epoch": 0.6350876909464804, + "grad_norm": 473.14617919921875, + "learning_rate": 3.6422938177338695e-06, + "loss": 13.6374, + "step": 314390 + }, + { + "epoch": 0.6351078915791643, + "grad_norm": 576.0936279296875, + "learning_rate": 3.6419578707113055e-06, + "loss": 19.8624, + "step": 314400 + }, + { + "epoch": 0.6351280922118481, + "grad_norm": 462.81964111328125, + "learning_rate": 3.6416219303076772e-06, + "loss": 13.8512, + "step": 314410 + }, + { + "epoch": 0.6351482928445319, + "grad_norm": 911.2876586914062, + "learning_rate": 3.6412859965246173e-06, + "loss": 25.2457, + "step": 314420 + }, + { + "epoch": 0.6351684934772157, + "grad_norm": 304.0901794433594, + "learning_rate": 3.640950069363765e-06, + "loss": 18.7084, + "step": 314430 + }, + { + "epoch": 0.6351886941098995, + "grad_norm": 731.5963745117188, + "learning_rate": 3.6406141488267575e-06, + "loss": 17.0662, + "step": 314440 + }, + { + "epoch": 0.6352088947425834, + "grad_norm": 271.031005859375, + "learning_rate": 3.640278234915232e-06, + "loss": 24.9593, + "step": 314450 + }, + { + "epoch": 0.6352290953752672, + "grad_norm": 142.2168731689453, + "learning_rate": 3.639942327630823e-06, + "loss": 27.8881, + "step": 314460 + }, + { + "epoch": 0.635249296007951, + "grad_norm": 344.64324951171875, + "learning_rate": 3.6396064269751747e-06, + "loss": 17.679, + "step": 314470 + }, + { + "epoch": 0.6352694966406348, + "grad_norm": 387.39263916015625, + "learning_rate": 3.6392705329499156e-06, + "loss": 17.0301, + "step": 314480 + }, + { + "epoch": 0.6352896972733186, + "grad_norm": 417.09710693359375, + "learning_rate": 3.638934645556688e-06, + "loss": 18.0962, + "step": 314490 + }, + { + "epoch": 0.6353098979060025, + "grad_norm": 494.6003112792969, + "learning_rate": 3.6385987647971287e-06, + "loss": 15.4787, + "step": 314500 + }, + { + "epoch": 0.6353300985386863, + "grad_norm": 480.74456787109375, + "learning_rate": 3.6382628906728735e-06, + "loss": 16.8352, + "step": 314510 + }, + { + "epoch": 0.6353502991713701, + "grad_norm": 347.3243713378906, + "learning_rate": 3.63792702318556e-06, + "loss": 22.1253, + "step": 314520 + }, + { + "epoch": 0.6353704998040539, + "grad_norm": 115.21092987060547, + "learning_rate": 3.6375911623368252e-06, + "loss": 13.5392, + "step": 314530 + }, + { + "epoch": 0.6353907004367377, + "grad_norm": 289.0970153808594, + "learning_rate": 3.637255308128305e-06, + "loss": 10.7989, + "step": 314540 + }, + { + "epoch": 0.6354109010694214, + "grad_norm": 370.20806884765625, + "learning_rate": 3.6369194605616364e-06, + "loss": 11.1195, + "step": 314550 + }, + { + "epoch": 0.6354311017021053, + "grad_norm": 217.1336212158203, + "learning_rate": 3.636583619638458e-06, + "loss": 38.4268, + "step": 314560 + }, + { + "epoch": 0.6354513023347891, + "grad_norm": 652.8502807617188, + "learning_rate": 3.6362477853604066e-06, + "loss": 24.1619, + "step": 314570 + }, + { + "epoch": 0.6354715029674729, + "grad_norm": 674.3751220703125, + "learning_rate": 3.635911957729117e-06, + "loss": 17.7115, + "step": 314580 + }, + { + "epoch": 0.6354917036001567, + "grad_norm": 299.0431213378906, + "learning_rate": 3.6355761367462274e-06, + "loss": 35.0862, + "step": 314590 + }, + { + "epoch": 0.6355119042328405, + "grad_norm": 600.8801879882812, + "learning_rate": 3.635240322413375e-06, + "loss": 26.1539, + "step": 314600 + }, + { + "epoch": 0.6355321048655244, + "grad_norm": 536.4166870117188, + "learning_rate": 3.634904514732195e-06, + "loss": 21.8917, + "step": 314610 + }, + { + "epoch": 0.6355523054982082, + "grad_norm": 150.32162475585938, + "learning_rate": 3.634568713704323e-06, + "loss": 23.1291, + "step": 314620 + }, + { + "epoch": 0.635572506130892, + "grad_norm": 430.8785095214844, + "learning_rate": 3.634232919331401e-06, + "loss": 24.9513, + "step": 314630 + }, + { + "epoch": 0.6355927067635758, + "grad_norm": 410.3782653808594, + "learning_rate": 3.6338971316150593e-06, + "loss": 17.2716, + "step": 314640 + }, + { + "epoch": 0.6356129073962596, + "grad_norm": 171.90130615234375, + "learning_rate": 3.6335613505569386e-06, + "loss": 15.6909, + "step": 314650 + }, + { + "epoch": 0.6356331080289435, + "grad_norm": 330.6631164550781, + "learning_rate": 3.6332255761586745e-06, + "loss": 33.5184, + "step": 314660 + }, + { + "epoch": 0.6356533086616273, + "grad_norm": 41.728023529052734, + "learning_rate": 3.6328898084219023e-06, + "loss": 24.4267, + "step": 314670 + }, + { + "epoch": 0.6356735092943111, + "grad_norm": 580.1731567382812, + "learning_rate": 3.632554047348259e-06, + "loss": 20.6272, + "step": 314680 + }, + { + "epoch": 0.6356937099269949, + "grad_norm": 781.712890625, + "learning_rate": 3.6322182929393833e-06, + "loss": 13.5489, + "step": 314690 + }, + { + "epoch": 0.6357139105596787, + "grad_norm": 38.953609466552734, + "learning_rate": 3.6318825451969085e-06, + "loss": 11.9947, + "step": 314700 + }, + { + "epoch": 0.6357341111923626, + "grad_norm": 680.6676025390625, + "learning_rate": 3.631546804122471e-06, + "loss": 33.3907, + "step": 314710 + }, + { + "epoch": 0.6357543118250464, + "grad_norm": 690.2257080078125, + "learning_rate": 3.6312110697177095e-06, + "loss": 27.6252, + "step": 314720 + }, + { + "epoch": 0.6357745124577302, + "grad_norm": 198.4851837158203, + "learning_rate": 3.63087534198426e-06, + "loss": 17.0928, + "step": 314730 + }, + { + "epoch": 0.635794713090414, + "grad_norm": 180.5613555908203, + "learning_rate": 3.630539620923757e-06, + "loss": 16.4493, + "step": 314740 + }, + { + "epoch": 0.6358149137230978, + "grad_norm": 272.0371398925781, + "learning_rate": 3.630203906537838e-06, + "loss": 20.4483, + "step": 314750 + }, + { + "epoch": 0.6358351143557817, + "grad_norm": 234.10031127929688, + "learning_rate": 3.6298681988281405e-06, + "loss": 23.7162, + "step": 314760 + }, + { + "epoch": 0.6358553149884655, + "grad_norm": 139.9898681640625, + "learning_rate": 3.6295324977962976e-06, + "loss": 21.5939, + "step": 314770 + }, + { + "epoch": 0.6358755156211493, + "grad_norm": 27.606124877929688, + "learning_rate": 3.6291968034439463e-06, + "loss": 14.8901, + "step": 314780 + }, + { + "epoch": 0.6358957162538331, + "grad_norm": 286.2486267089844, + "learning_rate": 3.628861115772726e-06, + "loss": 18.0712, + "step": 314790 + }, + { + "epoch": 0.6359159168865169, + "grad_norm": 76.69282531738281, + "learning_rate": 3.628525434784268e-06, + "loss": 23.0269, + "step": 314800 + }, + { + "epoch": 0.6359361175192006, + "grad_norm": 8.483139991760254, + "learning_rate": 3.6281897604802113e-06, + "loss": 20.4712, + "step": 314810 + }, + { + "epoch": 0.6359563181518845, + "grad_norm": 345.8614807128906, + "learning_rate": 3.6278540928621927e-06, + "loss": 29.5564, + "step": 314820 + }, + { + "epoch": 0.6359765187845683, + "grad_norm": 227.73464965820312, + "learning_rate": 3.6275184319318456e-06, + "loss": 9.1113, + "step": 314830 + }, + { + "epoch": 0.6359967194172521, + "grad_norm": 468.9640808105469, + "learning_rate": 3.627182777690807e-06, + "loss": 7.3737, + "step": 314840 + }, + { + "epoch": 0.6360169200499359, + "grad_norm": 416.2913513183594, + "learning_rate": 3.6268471301407127e-06, + "loss": 13.9147, + "step": 314850 + }, + { + "epoch": 0.6360371206826197, + "grad_norm": 251.77516174316406, + "learning_rate": 3.626511489283201e-06, + "loss": 18.7787, + "step": 314860 + }, + { + "epoch": 0.6360573213153036, + "grad_norm": 70.39207458496094, + "learning_rate": 3.6261758551199033e-06, + "loss": 26.701, + "step": 314870 + }, + { + "epoch": 0.6360775219479874, + "grad_norm": 114.36405944824219, + "learning_rate": 3.6258402276524585e-06, + "loss": 14.7027, + "step": 314880 + }, + { + "epoch": 0.6360977225806712, + "grad_norm": 881.9510498046875, + "learning_rate": 3.6255046068825035e-06, + "loss": 20.1666, + "step": 314890 + }, + { + "epoch": 0.636117923213355, + "grad_norm": 692.839599609375, + "learning_rate": 3.625168992811671e-06, + "loss": 15.1088, + "step": 314900 + }, + { + "epoch": 0.6361381238460388, + "grad_norm": 71.59687805175781, + "learning_rate": 3.6248333854415975e-06, + "loss": 13.9184, + "step": 314910 + }, + { + "epoch": 0.6361583244787227, + "grad_norm": 435.7094421386719, + "learning_rate": 3.624497784773921e-06, + "loss": 22.1059, + "step": 314920 + }, + { + "epoch": 0.6361785251114065, + "grad_norm": 15.940292358398438, + "learning_rate": 3.624162190810274e-06, + "loss": 12.5004, + "step": 314930 + }, + { + "epoch": 0.6361987257440903, + "grad_norm": 485.35101318359375, + "learning_rate": 3.623826603552293e-06, + "loss": 20.1395, + "step": 314940 + }, + { + "epoch": 0.6362189263767741, + "grad_norm": 307.1765441894531, + "learning_rate": 3.6234910230016173e-06, + "loss": 10.4658, + "step": 314950 + }, + { + "epoch": 0.636239127009458, + "grad_norm": 320.6990966796875, + "learning_rate": 3.6231554491598766e-06, + "loss": 17.335, + "step": 314960 + }, + { + "epoch": 0.6362593276421418, + "grad_norm": 1441.4237060546875, + "learning_rate": 3.622819882028709e-06, + "loss": 22.589, + "step": 314970 + }, + { + "epoch": 0.6362795282748256, + "grad_norm": 150.527099609375, + "learning_rate": 3.6224843216097526e-06, + "loss": 10.7138, + "step": 314980 + }, + { + "epoch": 0.6362997289075094, + "grad_norm": 171.6363983154297, + "learning_rate": 3.6221487679046384e-06, + "loss": 41.1594, + "step": 314990 + }, + { + "epoch": 0.6363199295401932, + "grad_norm": 53.00648880004883, + "learning_rate": 3.6218132209150047e-06, + "loss": 13.5146, + "step": 315000 + }, + { + "epoch": 0.636340130172877, + "grad_norm": 488.0824279785156, + "learning_rate": 3.621477680642486e-06, + "loss": 19.003, + "step": 315010 + }, + { + "epoch": 0.6363603308055609, + "grad_norm": 819.4788818359375, + "learning_rate": 3.6211421470887187e-06, + "loss": 30.1531, + "step": 315020 + }, + { + "epoch": 0.6363805314382447, + "grad_norm": 68.24623107910156, + "learning_rate": 3.620806620255336e-06, + "loss": 16.0678, + "step": 315030 + }, + { + "epoch": 0.6364007320709285, + "grad_norm": 411.6278381347656, + "learning_rate": 3.6204711001439754e-06, + "loss": 19.2273, + "step": 315040 + }, + { + "epoch": 0.6364209327036123, + "grad_norm": 56.952491760253906, + "learning_rate": 3.6201355867562725e-06, + "loss": 18.4849, + "step": 315050 + }, + { + "epoch": 0.636441133336296, + "grad_norm": 507.9949951171875, + "learning_rate": 3.61980008009386e-06, + "loss": 15.0319, + "step": 315060 + }, + { + "epoch": 0.6364613339689799, + "grad_norm": 265.7088623046875, + "learning_rate": 3.6194645801583745e-06, + "loss": 18.3823, + "step": 315070 + }, + { + "epoch": 0.6364815346016637, + "grad_norm": 573.2406005859375, + "learning_rate": 3.6191290869514523e-06, + "loss": 17.9595, + "step": 315080 + }, + { + "epoch": 0.6365017352343475, + "grad_norm": 827.1715087890625, + "learning_rate": 3.6187936004747248e-06, + "loss": 16.9535, + "step": 315090 + }, + { + "epoch": 0.6365219358670313, + "grad_norm": 414.61083984375, + "learning_rate": 3.618458120729832e-06, + "loss": 8.8065, + "step": 315100 + }, + { + "epoch": 0.6365421364997151, + "grad_norm": 84.46508026123047, + "learning_rate": 3.6181226477184074e-06, + "loss": 20.7389, + "step": 315110 + }, + { + "epoch": 0.636562337132399, + "grad_norm": 683.1246948242188, + "learning_rate": 3.617787181442084e-06, + "loss": 19.4811, + "step": 315120 + }, + { + "epoch": 0.6365825377650828, + "grad_norm": 314.67730712890625, + "learning_rate": 3.6174517219024985e-06, + "loss": 34.0484, + "step": 315130 + }, + { + "epoch": 0.6366027383977666, + "grad_norm": 330.02545166015625, + "learning_rate": 3.617116269101286e-06, + "loss": 27.6005, + "step": 315140 + }, + { + "epoch": 0.6366229390304504, + "grad_norm": 82.23473358154297, + "learning_rate": 3.616780823040081e-06, + "loss": 8.8885, + "step": 315150 + }, + { + "epoch": 0.6366431396631342, + "grad_norm": 491.30096435546875, + "learning_rate": 3.616445383720517e-06, + "loss": 15.6178, + "step": 315160 + }, + { + "epoch": 0.6366633402958181, + "grad_norm": 504.1806640625, + "learning_rate": 3.616109951144231e-06, + "loss": 29.5404, + "step": 315170 + }, + { + "epoch": 0.6366835409285019, + "grad_norm": 309.6594543457031, + "learning_rate": 3.615774525312859e-06, + "loss": 11.1125, + "step": 315180 + }, + { + "epoch": 0.6367037415611857, + "grad_norm": 67.88604736328125, + "learning_rate": 3.6154391062280326e-06, + "loss": 27.2476, + "step": 315190 + }, + { + "epoch": 0.6367239421938695, + "grad_norm": 557.1927490234375, + "learning_rate": 3.6151036938913887e-06, + "loss": 25.8452, + "step": 315200 + }, + { + "epoch": 0.6367441428265533, + "grad_norm": 500.59161376953125, + "learning_rate": 3.614768288304562e-06, + "loss": 14.6013, + "step": 315210 + }, + { + "epoch": 0.6367643434592372, + "grad_norm": 248.150634765625, + "learning_rate": 3.6144328894691854e-06, + "loss": 33.7717, + "step": 315220 + }, + { + "epoch": 0.636784544091921, + "grad_norm": 124.32286071777344, + "learning_rate": 3.614097497386894e-06, + "loss": 17.7882, + "step": 315230 + }, + { + "epoch": 0.6368047447246048, + "grad_norm": 211.30921936035156, + "learning_rate": 3.613762112059327e-06, + "loss": 20.3821, + "step": 315240 + }, + { + "epoch": 0.6368249453572886, + "grad_norm": 201.91744995117188, + "learning_rate": 3.613426733488111e-06, + "loss": 13.8047, + "step": 315250 + }, + { + "epoch": 0.6368451459899724, + "grad_norm": 244.3423614501953, + "learning_rate": 3.613091361674887e-06, + "loss": 18.9554, + "step": 315260 + }, + { + "epoch": 0.6368653466226563, + "grad_norm": 173.76109313964844, + "learning_rate": 3.6127559966212885e-06, + "loss": 15.9367, + "step": 315270 + }, + { + "epoch": 0.6368855472553401, + "grad_norm": 622.0409545898438, + "learning_rate": 3.6124206383289474e-06, + "loss": 29.1095, + "step": 315280 + }, + { + "epoch": 0.6369057478880239, + "grad_norm": 198.35536193847656, + "learning_rate": 3.6120852867995003e-06, + "loss": 22.4082, + "step": 315290 + }, + { + "epoch": 0.6369259485207077, + "grad_norm": 682.5012817382812, + "learning_rate": 3.61174994203458e-06, + "loss": 17.6079, + "step": 315300 + }, + { + "epoch": 0.6369461491533915, + "grad_norm": 381.4442138671875, + "learning_rate": 3.611414604035825e-06, + "loss": 19.0631, + "step": 315310 + }, + { + "epoch": 0.6369663497860752, + "grad_norm": 244.13351440429688, + "learning_rate": 3.6110792728048636e-06, + "loss": 8.6647, + "step": 315320 + }, + { + "epoch": 0.6369865504187591, + "grad_norm": 184.3904266357422, + "learning_rate": 3.610743948343335e-06, + "loss": 10.3614, + "step": 315330 + }, + { + "epoch": 0.6370067510514429, + "grad_norm": 312.16729736328125, + "learning_rate": 3.610408630652873e-06, + "loss": 17.1388, + "step": 315340 + }, + { + "epoch": 0.6370269516841267, + "grad_norm": 79.96022033691406, + "learning_rate": 3.610073319735109e-06, + "loss": 14.7381, + "step": 315350 + }, + { + "epoch": 0.6370471523168105, + "grad_norm": 112.77809143066406, + "learning_rate": 3.6097380155916795e-06, + "loss": 17.2471, + "step": 315360 + }, + { + "epoch": 0.6370673529494943, + "grad_norm": 729.2623291015625, + "learning_rate": 3.609402718224219e-06, + "loss": 18.7645, + "step": 315370 + }, + { + "epoch": 0.6370875535821782, + "grad_norm": 424.6807556152344, + "learning_rate": 3.6090674276343608e-06, + "loss": 17.2392, + "step": 315380 + }, + { + "epoch": 0.637107754214862, + "grad_norm": 431.84405517578125, + "learning_rate": 3.608732143823737e-06, + "loss": 12.3354, + "step": 315390 + }, + { + "epoch": 0.6371279548475458, + "grad_norm": 1119.903076171875, + "learning_rate": 3.608396866793988e-06, + "loss": 14.1539, + "step": 315400 + }, + { + "epoch": 0.6371481554802296, + "grad_norm": 506.5306396484375, + "learning_rate": 3.60806159654674e-06, + "loss": 24.5863, + "step": 315410 + }, + { + "epoch": 0.6371683561129134, + "grad_norm": 62.214229583740234, + "learning_rate": 3.607726333083633e-06, + "loss": 17.4817, + "step": 315420 + }, + { + "epoch": 0.6371885567455973, + "grad_norm": 669.62548828125, + "learning_rate": 3.607391076406299e-06, + "loss": 27.4537, + "step": 315430 + }, + { + "epoch": 0.6372087573782811, + "grad_norm": 468.5104675292969, + "learning_rate": 3.607055826516372e-06, + "loss": 28.7736, + "step": 315440 + }, + { + "epoch": 0.6372289580109649, + "grad_norm": 238.2803497314453, + "learning_rate": 3.606720583415485e-06, + "loss": 35.1728, + "step": 315450 + }, + { + "epoch": 0.6372491586436487, + "grad_norm": 253.49220275878906, + "learning_rate": 3.6063853471052724e-06, + "loss": 27.1324, + "step": 315460 + }, + { + "epoch": 0.6372693592763325, + "grad_norm": 42.56626510620117, + "learning_rate": 3.606050117587372e-06, + "loss": 14.8215, + "step": 315470 + }, + { + "epoch": 0.6372895599090164, + "grad_norm": 472.00201416015625, + "learning_rate": 3.605714894863411e-06, + "loss": 15.7142, + "step": 315480 + }, + { + "epoch": 0.6373097605417002, + "grad_norm": 421.8933410644531, + "learning_rate": 3.605379678935027e-06, + "loss": 25.5709, + "step": 315490 + }, + { + "epoch": 0.637329961174384, + "grad_norm": 550.9469604492188, + "learning_rate": 3.6050444698038547e-06, + "loss": 11.3782, + "step": 315500 + }, + { + "epoch": 0.6373501618070678, + "grad_norm": 549.83056640625, + "learning_rate": 3.6047092674715257e-06, + "loss": 20.7691, + "step": 315510 + }, + { + "epoch": 0.6373703624397516, + "grad_norm": 247.39259338378906, + "learning_rate": 3.6043740719396736e-06, + "loss": 27.8713, + "step": 315520 + }, + { + "epoch": 0.6373905630724355, + "grad_norm": 478.87542724609375, + "learning_rate": 3.604038883209935e-06, + "loss": 21.6742, + "step": 315530 + }, + { + "epoch": 0.6374107637051193, + "grad_norm": 282.81341552734375, + "learning_rate": 3.60370370128394e-06, + "loss": 25.0744, + "step": 315540 + }, + { + "epoch": 0.6374309643378031, + "grad_norm": 207.08831787109375, + "learning_rate": 3.603368526163323e-06, + "loss": 15.2893, + "step": 315550 + }, + { + "epoch": 0.6374511649704869, + "grad_norm": 375.98291015625, + "learning_rate": 3.6030333578497213e-06, + "loss": 14.9089, + "step": 315560 + }, + { + "epoch": 0.6374713656031706, + "grad_norm": 270.4388732910156, + "learning_rate": 3.602698196344763e-06, + "loss": 10.7293, + "step": 315570 + }, + { + "epoch": 0.6374915662358545, + "grad_norm": 320.18365478515625, + "learning_rate": 3.6023630416500843e-06, + "loss": 18.4042, + "step": 315580 + }, + { + "epoch": 0.6375117668685383, + "grad_norm": 268.7408752441406, + "learning_rate": 3.6020278937673202e-06, + "loss": 21.908, + "step": 315590 + }, + { + "epoch": 0.6375319675012221, + "grad_norm": 375.9366760253906, + "learning_rate": 3.6016927526981014e-06, + "loss": 10.5216, + "step": 315600 + }, + { + "epoch": 0.6375521681339059, + "grad_norm": 238.86119079589844, + "learning_rate": 3.601357618444063e-06, + "loss": 14.4453, + "step": 315610 + }, + { + "epoch": 0.6375723687665897, + "grad_norm": 149.0080108642578, + "learning_rate": 3.6010224910068363e-06, + "loss": 14.9466, + "step": 315620 + }, + { + "epoch": 0.6375925693992736, + "grad_norm": 772.7325439453125, + "learning_rate": 3.6006873703880595e-06, + "loss": 17.9658, + "step": 315630 + }, + { + "epoch": 0.6376127700319574, + "grad_norm": 257.7185974121094, + "learning_rate": 3.60035225658936e-06, + "loss": 20.0764, + "step": 315640 + }, + { + "epoch": 0.6376329706646412, + "grad_norm": 518.2299194335938, + "learning_rate": 3.600017149612375e-06, + "loss": 10.1733, + "step": 315650 + }, + { + "epoch": 0.637653171297325, + "grad_norm": 158.87094116210938, + "learning_rate": 3.599682049458737e-06, + "loss": 20.2825, + "step": 315660 + }, + { + "epoch": 0.6376733719300088, + "grad_norm": 332.2776184082031, + "learning_rate": 3.5993469561300785e-06, + "loss": 17.557, + "step": 315670 + }, + { + "epoch": 0.6376935725626927, + "grad_norm": 9.964910507202148, + "learning_rate": 3.599011869628033e-06, + "loss": 21.0277, + "step": 315680 + }, + { + "epoch": 0.6377137731953765, + "grad_norm": 357.9776611328125, + "learning_rate": 3.598676789954234e-06, + "loss": 15.1413, + "step": 315690 + }, + { + "epoch": 0.6377339738280603, + "grad_norm": 422.00701904296875, + "learning_rate": 3.598341717110313e-06, + "loss": 15.5945, + "step": 315700 + }, + { + "epoch": 0.6377541744607441, + "grad_norm": 451.0946350097656, + "learning_rate": 3.598006651097905e-06, + "loss": 13.4531, + "step": 315710 + }, + { + "epoch": 0.6377743750934279, + "grad_norm": 430.7082214355469, + "learning_rate": 3.5976715919186443e-06, + "loss": 12.7757, + "step": 315720 + }, + { + "epoch": 0.6377945757261118, + "grad_norm": 654.5879516601562, + "learning_rate": 3.5973365395741612e-06, + "loss": 55.4897, + "step": 315730 + }, + { + "epoch": 0.6378147763587956, + "grad_norm": 693.4959716796875, + "learning_rate": 3.597001494066089e-06, + "loss": 15.4373, + "step": 315740 + }, + { + "epoch": 0.6378349769914794, + "grad_norm": 596.0556030273438, + "learning_rate": 3.5966664553960622e-06, + "loss": 33.6988, + "step": 315750 + }, + { + "epoch": 0.6378551776241632, + "grad_norm": 85.95985412597656, + "learning_rate": 3.596331423565712e-06, + "loss": 10.1279, + "step": 315760 + }, + { + "epoch": 0.637875378256847, + "grad_norm": 535.9702758789062, + "learning_rate": 3.595996398576672e-06, + "loss": 20.2559, + "step": 315770 + }, + { + "epoch": 0.6378955788895309, + "grad_norm": 607.033447265625, + "learning_rate": 3.5956613804305755e-06, + "loss": 14.3539, + "step": 315780 + }, + { + "epoch": 0.6379157795222147, + "grad_norm": 423.5137023925781, + "learning_rate": 3.5953263691290564e-06, + "loss": 19.5415, + "step": 315790 + }, + { + "epoch": 0.6379359801548985, + "grad_norm": 305.0976257324219, + "learning_rate": 3.5949913646737456e-06, + "loss": 17.0146, + "step": 315800 + }, + { + "epoch": 0.6379561807875823, + "grad_norm": 324.80523681640625, + "learning_rate": 3.594656367066276e-06, + "loss": 19.6394, + "step": 315810 + }, + { + "epoch": 0.6379763814202661, + "grad_norm": 582.4668579101562, + "learning_rate": 3.594321376308282e-06, + "loss": 27.4317, + "step": 315820 + }, + { + "epoch": 0.6379965820529498, + "grad_norm": 5703.4150390625, + "learning_rate": 3.5939863924013937e-06, + "loss": 38.9872, + "step": 315830 + }, + { + "epoch": 0.6380167826856337, + "grad_norm": 150.55264282226562, + "learning_rate": 3.593651415347244e-06, + "loss": 18.9136, + "step": 315840 + }, + { + "epoch": 0.6380369833183175, + "grad_norm": 552.1156005859375, + "learning_rate": 3.5933164451474708e-06, + "loss": 15.5248, + "step": 315850 + }, + { + "epoch": 0.6380571839510013, + "grad_norm": 335.07537841796875, + "learning_rate": 3.592981481803699e-06, + "loss": 18.5343, + "step": 315860 + }, + { + "epoch": 0.6380773845836851, + "grad_norm": 318.16680908203125, + "learning_rate": 3.5926465253175656e-06, + "loss": 24.1763, + "step": 315870 + }, + { + "epoch": 0.638097585216369, + "grad_norm": 275.1593322753906, + "learning_rate": 3.5923115756907033e-06, + "loss": 18.2714, + "step": 315880 + }, + { + "epoch": 0.6381177858490528, + "grad_norm": 214.022216796875, + "learning_rate": 3.591976632924743e-06, + "loss": 19.615, + "step": 315890 + }, + { + "epoch": 0.6381379864817366, + "grad_norm": 143.54090881347656, + "learning_rate": 3.5916416970213173e-06, + "loss": 29.774, + "step": 315900 + }, + { + "epoch": 0.6381581871144204, + "grad_norm": 275.9958190917969, + "learning_rate": 3.5913067679820592e-06, + "loss": 12.2171, + "step": 315910 + }, + { + "epoch": 0.6381783877471042, + "grad_norm": 398.24871826171875, + "learning_rate": 3.5909718458086033e-06, + "loss": 28.6376, + "step": 315920 + }, + { + "epoch": 0.638198588379788, + "grad_norm": 519.3574829101562, + "learning_rate": 3.5906369305025767e-06, + "loss": 13.1821, + "step": 315930 + }, + { + "epoch": 0.6382187890124719, + "grad_norm": 74.91946411132812, + "learning_rate": 3.590302022065616e-06, + "loss": 9.2701, + "step": 315940 + }, + { + "epoch": 0.6382389896451557, + "grad_norm": 314.8899841308594, + "learning_rate": 3.5899671204993535e-06, + "loss": 20.7149, + "step": 315950 + }, + { + "epoch": 0.6382591902778395, + "grad_norm": 198.80355834960938, + "learning_rate": 3.589632225805419e-06, + "loss": 34.3014, + "step": 315960 + }, + { + "epoch": 0.6382793909105233, + "grad_norm": 125.96674346923828, + "learning_rate": 3.589297337985446e-06, + "loss": 14.2554, + "step": 315970 + }, + { + "epoch": 0.6382995915432071, + "grad_norm": 0.0, + "learning_rate": 3.5889624570410675e-06, + "loss": 10.631, + "step": 315980 + }, + { + "epoch": 0.638319792175891, + "grad_norm": 372.5596008300781, + "learning_rate": 3.5886275829739144e-06, + "loss": 16.3936, + "step": 315990 + }, + { + "epoch": 0.6383399928085748, + "grad_norm": 350.9508361816406, + "learning_rate": 3.5882927157856175e-06, + "loss": 17.8655, + "step": 316000 + }, + { + "epoch": 0.6383601934412586, + "grad_norm": 185.98765563964844, + "learning_rate": 3.5879578554778137e-06, + "loss": 32.5148, + "step": 316010 + }, + { + "epoch": 0.6383803940739424, + "grad_norm": 164.63009643554688, + "learning_rate": 3.5876230020521298e-06, + "loss": 12.2725, + "step": 316020 + }, + { + "epoch": 0.6384005947066262, + "grad_norm": 289.0091552734375, + "learning_rate": 3.587288155510201e-06, + "loss": 38.6229, + "step": 316030 + }, + { + "epoch": 0.6384207953393101, + "grad_norm": 200.04074096679688, + "learning_rate": 3.5869533158536583e-06, + "loss": 31.8896, + "step": 316040 + }, + { + "epoch": 0.6384409959719939, + "grad_norm": 318.0293884277344, + "learning_rate": 3.586618483084134e-06, + "loss": 17.5421, + "step": 316050 + }, + { + "epoch": 0.6384611966046777, + "grad_norm": 653.26611328125, + "learning_rate": 3.586283657203259e-06, + "loss": 22.8768, + "step": 316060 + }, + { + "epoch": 0.6384813972373615, + "grad_norm": 291.316650390625, + "learning_rate": 3.5859488382126656e-06, + "loss": 14.7977, + "step": 316070 + }, + { + "epoch": 0.6385015978700452, + "grad_norm": 417.2750549316406, + "learning_rate": 3.585614026113989e-06, + "loss": 16.1932, + "step": 316080 + }, + { + "epoch": 0.638521798502729, + "grad_norm": 424.402587890625, + "learning_rate": 3.5852792209088543e-06, + "loss": 17.4103, + "step": 316090 + }, + { + "epoch": 0.6385419991354129, + "grad_norm": 281.0193786621094, + "learning_rate": 3.584944422598899e-06, + "loss": 16.3783, + "step": 316100 + }, + { + "epoch": 0.6385621997680967, + "grad_norm": 548.0072021484375, + "learning_rate": 3.5846096311857537e-06, + "loss": 16.5864, + "step": 316110 + }, + { + "epoch": 0.6385824004007805, + "grad_norm": 534.6842041015625, + "learning_rate": 3.584274846671048e-06, + "loss": 18.8777, + "step": 316120 + }, + { + "epoch": 0.6386026010334643, + "grad_norm": 341.5393981933594, + "learning_rate": 3.583940069056415e-06, + "loss": 21.7011, + "step": 316130 + }, + { + "epoch": 0.6386228016661482, + "grad_norm": 112.1914291381836, + "learning_rate": 3.5836052983434878e-06, + "loss": 29.0516, + "step": 316140 + }, + { + "epoch": 0.638643002298832, + "grad_norm": 13.720465660095215, + "learning_rate": 3.583270534533896e-06, + "loss": 21.6894, + "step": 316150 + }, + { + "epoch": 0.6386632029315158, + "grad_norm": 207.6351318359375, + "learning_rate": 3.5829357776292694e-06, + "loss": 16.9002, + "step": 316160 + }, + { + "epoch": 0.6386834035641996, + "grad_norm": 38.644596099853516, + "learning_rate": 3.582601027631246e-06, + "loss": 17.5596, + "step": 316170 + }, + { + "epoch": 0.6387036041968834, + "grad_norm": 460.9317626953125, + "learning_rate": 3.5822662845414502e-06, + "loss": 14.0099, + "step": 316180 + }, + { + "epoch": 0.6387238048295673, + "grad_norm": 157.43426513671875, + "learning_rate": 3.5819315483615175e-06, + "loss": 20.7033, + "step": 316190 + }, + { + "epoch": 0.6387440054622511, + "grad_norm": 424.1911926269531, + "learning_rate": 3.5815968190930793e-06, + "loss": 17.9277, + "step": 316200 + }, + { + "epoch": 0.6387642060949349, + "grad_norm": 155.28729248046875, + "learning_rate": 3.5812620967377653e-06, + "loss": 9.7971, + "step": 316210 + }, + { + "epoch": 0.6387844067276187, + "grad_norm": 581.9959716796875, + "learning_rate": 3.5809273812972078e-06, + "loss": 14.0527, + "step": 316220 + }, + { + "epoch": 0.6388046073603025, + "grad_norm": 108.32624053955078, + "learning_rate": 3.5805926727730367e-06, + "loss": 15.8633, + "step": 316230 + }, + { + "epoch": 0.6388248079929864, + "grad_norm": 359.7661437988281, + "learning_rate": 3.5802579711668883e-06, + "loss": 30.5839, + "step": 316240 + }, + { + "epoch": 0.6388450086256702, + "grad_norm": 307.0447998046875, + "learning_rate": 3.579923276480387e-06, + "loss": 35.9667, + "step": 316250 + }, + { + "epoch": 0.638865209258354, + "grad_norm": 421.31005859375, + "learning_rate": 3.5795885887151687e-06, + "loss": 17.2982, + "step": 316260 + }, + { + "epoch": 0.6388854098910378, + "grad_norm": 143.21043395996094, + "learning_rate": 3.5792539078728644e-06, + "loss": 11.963, + "step": 316270 + }, + { + "epoch": 0.6389056105237216, + "grad_norm": 504.7131042480469, + "learning_rate": 3.578919233955103e-06, + "loss": 26.5282, + "step": 316280 + }, + { + "epoch": 0.6389258111564055, + "grad_norm": 73.53141021728516, + "learning_rate": 3.5785845669635165e-06, + "loss": 24.9844, + "step": 316290 + }, + { + "epoch": 0.6389460117890893, + "grad_norm": 93.7257080078125, + "learning_rate": 3.5782499068997386e-06, + "loss": 13.139, + "step": 316300 + }, + { + "epoch": 0.6389662124217731, + "grad_norm": 151.9158935546875, + "learning_rate": 3.577915253765396e-06, + "loss": 46.7024, + "step": 316310 + }, + { + "epoch": 0.6389864130544569, + "grad_norm": 352.59100341796875, + "learning_rate": 3.5775806075621215e-06, + "loss": 20.6736, + "step": 316320 + }, + { + "epoch": 0.6390066136871407, + "grad_norm": 134.93736267089844, + "learning_rate": 3.5772459682915484e-06, + "loss": 17.2972, + "step": 316330 + }, + { + "epoch": 0.6390268143198244, + "grad_norm": 1136.881591796875, + "learning_rate": 3.5769113359553055e-06, + "loss": 28.1377, + "step": 316340 + }, + { + "epoch": 0.6390470149525083, + "grad_norm": 588.3436279296875, + "learning_rate": 3.5765767105550236e-06, + "loss": 19.0263, + "step": 316350 + }, + { + "epoch": 0.6390672155851921, + "grad_norm": 246.8373565673828, + "learning_rate": 3.576242092092334e-06, + "loss": 17.4476, + "step": 316360 + }, + { + "epoch": 0.6390874162178759, + "grad_norm": 352.61163330078125, + "learning_rate": 3.5759074805688694e-06, + "loss": 24.8118, + "step": 316370 + }, + { + "epoch": 0.6391076168505597, + "grad_norm": 385.70147705078125, + "learning_rate": 3.5755728759862573e-06, + "loss": 26.2128, + "step": 316380 + }, + { + "epoch": 0.6391278174832435, + "grad_norm": 468.3145446777344, + "learning_rate": 3.5752382783461297e-06, + "loss": 24.5198, + "step": 316390 + }, + { + "epoch": 0.6391480181159274, + "grad_norm": 68.4457015991211, + "learning_rate": 3.5749036876501196e-06, + "loss": 15.4026, + "step": 316400 + }, + { + "epoch": 0.6391682187486112, + "grad_norm": 310.00048828125, + "learning_rate": 3.5745691038998555e-06, + "loss": 37.8392, + "step": 316410 + }, + { + "epoch": 0.639188419381295, + "grad_norm": 363.8419494628906, + "learning_rate": 3.5742345270969688e-06, + "loss": 26.5229, + "step": 316420 + }, + { + "epoch": 0.6392086200139788, + "grad_norm": 380.4399719238281, + "learning_rate": 3.573899957243091e-06, + "loss": 15.0728, + "step": 316430 + }, + { + "epoch": 0.6392288206466626, + "grad_norm": 241.99363708496094, + "learning_rate": 3.573565394339851e-06, + "loss": 15.3496, + "step": 316440 + }, + { + "epoch": 0.6392490212793465, + "grad_norm": 458.9059753417969, + "learning_rate": 3.573230838388878e-06, + "loss": 22.49, + "step": 316450 + }, + { + "epoch": 0.6392692219120303, + "grad_norm": 110.33128356933594, + "learning_rate": 3.572896289391809e-06, + "loss": 21.3822, + "step": 316460 + }, + { + "epoch": 0.6392894225447141, + "grad_norm": 110.6622314453125, + "learning_rate": 3.5725617473502673e-06, + "loss": 13.4087, + "step": 316470 + }, + { + "epoch": 0.6393096231773979, + "grad_norm": 388.13360595703125, + "learning_rate": 3.5722272122658874e-06, + "loss": 9.8195, + "step": 316480 + }, + { + "epoch": 0.6393298238100817, + "grad_norm": 321.4127197265625, + "learning_rate": 3.5718926841402993e-06, + "loss": 9.2979, + "step": 316490 + }, + { + "epoch": 0.6393500244427656, + "grad_norm": 517.9979858398438, + "learning_rate": 3.571558162975133e-06, + "loss": 16.5989, + "step": 316500 + }, + { + "epoch": 0.6393702250754494, + "grad_norm": 353.75634765625, + "learning_rate": 3.5712236487720185e-06, + "loss": 15.6718, + "step": 316510 + }, + { + "epoch": 0.6393904257081332, + "grad_norm": 127.14871978759766, + "learning_rate": 3.570889141532586e-06, + "loss": 14.981, + "step": 316520 + }, + { + "epoch": 0.639410626340817, + "grad_norm": 169.82476806640625, + "learning_rate": 3.570554641258469e-06, + "loss": 11.7585, + "step": 316530 + }, + { + "epoch": 0.6394308269735008, + "grad_norm": 224.20399475097656, + "learning_rate": 3.570220147951292e-06, + "loss": 16.3962, + "step": 316540 + }, + { + "epoch": 0.6394510276061847, + "grad_norm": 100.33985137939453, + "learning_rate": 3.569885661612691e-06, + "loss": 11.0874, + "step": 316550 + }, + { + "epoch": 0.6394712282388685, + "grad_norm": 530.1268310546875, + "learning_rate": 3.5695511822442934e-06, + "loss": 18.7147, + "step": 316560 + }, + { + "epoch": 0.6394914288715523, + "grad_norm": 151.81561279296875, + "learning_rate": 3.5692167098477292e-06, + "loss": 16.5754, + "step": 316570 + }, + { + "epoch": 0.6395116295042361, + "grad_norm": 276.953369140625, + "learning_rate": 3.5688822444246297e-06, + "loss": 16.6768, + "step": 316580 + }, + { + "epoch": 0.6395318301369199, + "grad_norm": 272.35028076171875, + "learning_rate": 3.5685477859766254e-06, + "loss": 6.2578, + "step": 316590 + }, + { + "epoch": 0.6395520307696037, + "grad_norm": 531.3369750976562, + "learning_rate": 3.568213334505345e-06, + "loss": 14.722, + "step": 316600 + }, + { + "epoch": 0.6395722314022875, + "grad_norm": 402.7672119140625, + "learning_rate": 3.567878890012417e-06, + "loss": 17.8026, + "step": 316610 + }, + { + "epoch": 0.6395924320349713, + "grad_norm": 181.8702392578125, + "learning_rate": 3.567544452499477e-06, + "loss": 30.1878, + "step": 316620 + }, + { + "epoch": 0.6396126326676551, + "grad_norm": 39.49435806274414, + "learning_rate": 3.5672100219681495e-06, + "loss": 18.6056, + "step": 316630 + }, + { + "epoch": 0.6396328333003389, + "grad_norm": 177.6253204345703, + "learning_rate": 3.5668755984200664e-06, + "loss": 30.4399, + "step": 316640 + }, + { + "epoch": 0.6396530339330228, + "grad_norm": 298.5770568847656, + "learning_rate": 3.5665411818568596e-06, + "loss": 17.0189, + "step": 316650 + }, + { + "epoch": 0.6396732345657066, + "grad_norm": 22.274227142333984, + "learning_rate": 3.5662067722801556e-06, + "loss": 9.9698, + "step": 316660 + }, + { + "epoch": 0.6396934351983904, + "grad_norm": 280.33935546875, + "learning_rate": 3.5658723696915864e-06, + "loss": 26.7317, + "step": 316670 + }, + { + "epoch": 0.6397136358310742, + "grad_norm": 452.43603515625, + "learning_rate": 3.5655379740927796e-06, + "loss": 11.8348, + "step": 316680 + }, + { + "epoch": 0.639733836463758, + "grad_norm": 514.5504150390625, + "learning_rate": 3.5652035854853706e-06, + "loss": 17.5395, + "step": 316690 + }, + { + "epoch": 0.6397540370964419, + "grad_norm": 184.27651977539062, + "learning_rate": 3.564869203870982e-06, + "loss": 27.3692, + "step": 316700 + }, + { + "epoch": 0.6397742377291257, + "grad_norm": 185.65682983398438, + "learning_rate": 3.564534829251248e-06, + "loss": 13.7503, + "step": 316710 + }, + { + "epoch": 0.6397944383618095, + "grad_norm": 395.0429992675781, + "learning_rate": 3.564200461627798e-06, + "loss": 17.4748, + "step": 316720 + }, + { + "epoch": 0.6398146389944933, + "grad_norm": 604.4706420898438, + "learning_rate": 3.5638661010022604e-06, + "loss": 11.7635, + "step": 316730 + }, + { + "epoch": 0.6398348396271771, + "grad_norm": 352.2259826660156, + "learning_rate": 3.5635317473762642e-06, + "loss": 24.211, + "step": 316740 + }, + { + "epoch": 0.639855040259861, + "grad_norm": 209.53041076660156, + "learning_rate": 3.5631974007514414e-06, + "loss": 12.0263, + "step": 316750 + }, + { + "epoch": 0.6398752408925448, + "grad_norm": 357.42181396484375, + "learning_rate": 3.562863061129419e-06, + "loss": 15.5566, + "step": 316760 + }, + { + "epoch": 0.6398954415252286, + "grad_norm": 541.9507446289062, + "learning_rate": 3.562528728511827e-06, + "loss": 16.3403, + "step": 316770 + }, + { + "epoch": 0.6399156421579124, + "grad_norm": 752.5086669921875, + "learning_rate": 3.562194402900299e-06, + "loss": 22.2073, + "step": 316780 + }, + { + "epoch": 0.6399358427905962, + "grad_norm": 343.2121276855469, + "learning_rate": 3.561860084296458e-06, + "loss": 11.9575, + "step": 316790 + }, + { + "epoch": 0.63995604342328, + "grad_norm": 189.61074829101562, + "learning_rate": 3.561525772701937e-06, + "loss": 11.9739, + "step": 316800 + }, + { + "epoch": 0.6399762440559639, + "grad_norm": 220.5204315185547, + "learning_rate": 3.5611914681183647e-06, + "loss": 10.8192, + "step": 316810 + }, + { + "epoch": 0.6399964446886477, + "grad_norm": 346.1005554199219, + "learning_rate": 3.5608571705473725e-06, + "loss": 38.7872, + "step": 316820 + }, + { + "epoch": 0.6400166453213315, + "grad_norm": 56.42607116699219, + "learning_rate": 3.5605228799905865e-06, + "loss": 14.3211, + "step": 316830 + }, + { + "epoch": 0.6400368459540153, + "grad_norm": 864.3530883789062, + "learning_rate": 3.5601885964496364e-06, + "loss": 26.8919, + "step": 316840 + }, + { + "epoch": 0.640057046586699, + "grad_norm": 749.6741943359375, + "learning_rate": 3.559854319926156e-06, + "loss": 21.41, + "step": 316850 + }, + { + "epoch": 0.6400772472193829, + "grad_norm": 251.44070434570312, + "learning_rate": 3.559520050421767e-06, + "loss": 14.2742, + "step": 316860 + }, + { + "epoch": 0.6400974478520667, + "grad_norm": 424.0389709472656, + "learning_rate": 3.559185787938104e-06, + "loss": 27.6596, + "step": 316870 + }, + { + "epoch": 0.6401176484847505, + "grad_norm": 130.6412353515625, + "learning_rate": 3.558851532476796e-06, + "loss": 24.9391, + "step": 316880 + }, + { + "epoch": 0.6401378491174343, + "grad_norm": 1513.4830322265625, + "learning_rate": 3.5585172840394695e-06, + "loss": 18.4112, + "step": 316890 + }, + { + "epoch": 0.6401580497501181, + "grad_norm": 254.52268981933594, + "learning_rate": 3.5581830426277554e-06, + "loss": 27.6619, + "step": 316900 + }, + { + "epoch": 0.640178250382802, + "grad_norm": 243.79246520996094, + "learning_rate": 3.5578488082432828e-06, + "loss": 18.7862, + "step": 316910 + }, + { + "epoch": 0.6401984510154858, + "grad_norm": 346.9362487792969, + "learning_rate": 3.557514580887679e-06, + "loss": 9.7744, + "step": 316920 + }, + { + "epoch": 0.6402186516481696, + "grad_norm": 449.9012145996094, + "learning_rate": 3.5571803605625734e-06, + "loss": 27.1587, + "step": 316930 + }, + { + "epoch": 0.6402388522808534, + "grad_norm": 437.7616271972656, + "learning_rate": 3.556846147269598e-06, + "loss": 28.9926, + "step": 316940 + }, + { + "epoch": 0.6402590529135372, + "grad_norm": 733.4630126953125, + "learning_rate": 3.556511941010378e-06, + "loss": 21.667, + "step": 316950 + }, + { + "epoch": 0.6402792535462211, + "grad_norm": 65.58394622802734, + "learning_rate": 3.5561777417865438e-06, + "loss": 12.3725, + "step": 316960 + }, + { + "epoch": 0.6402994541789049, + "grad_norm": 613.214599609375, + "learning_rate": 3.5558435495997245e-06, + "loss": 27.3721, + "step": 316970 + }, + { + "epoch": 0.6403196548115887, + "grad_norm": 54.631591796875, + "learning_rate": 3.5555093644515496e-06, + "loss": 20.7772, + "step": 316980 + }, + { + "epoch": 0.6403398554442725, + "grad_norm": 275.8143615722656, + "learning_rate": 3.5551751863436458e-06, + "loss": 10.1682, + "step": 316990 + }, + { + "epoch": 0.6403600560769563, + "grad_norm": 332.1075134277344, + "learning_rate": 3.5548410152776414e-06, + "loss": 14.8546, + "step": 317000 + }, + { + "epoch": 0.6403802567096402, + "grad_norm": 378.8788757324219, + "learning_rate": 3.5545068512551695e-06, + "loss": 12.4129, + "step": 317010 + }, + { + "epoch": 0.640400457342324, + "grad_norm": 625.5926513671875, + "learning_rate": 3.5541726942778544e-06, + "loss": 14.9639, + "step": 317020 + }, + { + "epoch": 0.6404206579750078, + "grad_norm": 1099.2337646484375, + "learning_rate": 3.553838544347326e-06, + "loss": 31.2647, + "step": 317030 + }, + { + "epoch": 0.6404408586076916, + "grad_norm": 859.2303466796875, + "learning_rate": 3.5535044014652143e-06, + "loss": 22.844, + "step": 317040 + }, + { + "epoch": 0.6404610592403754, + "grad_norm": 304.50958251953125, + "learning_rate": 3.553170265633146e-06, + "loss": 16.3254, + "step": 317050 + }, + { + "epoch": 0.6404812598730593, + "grad_norm": 1029.9991455078125, + "learning_rate": 3.5528361368527503e-06, + "loss": 32.4318, + "step": 317060 + }, + { + "epoch": 0.6405014605057431, + "grad_norm": 398.1458740234375, + "learning_rate": 3.552502015125656e-06, + "loss": 14.9865, + "step": 317070 + }, + { + "epoch": 0.6405216611384269, + "grad_norm": 386.78741455078125, + "learning_rate": 3.5521679004534905e-06, + "loss": 35.0742, + "step": 317080 + }, + { + "epoch": 0.6405418617711107, + "grad_norm": 220.7437286376953, + "learning_rate": 3.551833792837883e-06, + "loss": 18.2597, + "step": 317090 + }, + { + "epoch": 0.6405620624037945, + "grad_norm": 330.812255859375, + "learning_rate": 3.5514996922804636e-06, + "loss": 20.4139, + "step": 317100 + }, + { + "epoch": 0.6405822630364782, + "grad_norm": 7.566112995147705, + "learning_rate": 3.5511655987828583e-06, + "loss": 19.3291, + "step": 317110 + }, + { + "epoch": 0.6406024636691621, + "grad_norm": 336.25860595703125, + "learning_rate": 3.550831512346695e-06, + "loss": 10.7505, + "step": 317120 + }, + { + "epoch": 0.6406226643018459, + "grad_norm": 455.1893005371094, + "learning_rate": 3.550497432973603e-06, + "loss": 11.5504, + "step": 317130 + }, + { + "epoch": 0.6406428649345297, + "grad_norm": 328.352783203125, + "learning_rate": 3.5501633606652143e-06, + "loss": 10.1881, + "step": 317140 + }, + { + "epoch": 0.6406630655672135, + "grad_norm": 340.2912902832031, + "learning_rate": 3.5498292954231497e-06, + "loss": 17.0818, + "step": 317150 + }, + { + "epoch": 0.6406832661998973, + "grad_norm": 1005.4075317382812, + "learning_rate": 3.549495237249042e-06, + "loss": 36.5023, + "step": 317160 + }, + { + "epoch": 0.6407034668325812, + "grad_norm": 9.111109733581543, + "learning_rate": 3.5491611861445198e-06, + "loss": 11.3665, + "step": 317170 + }, + { + "epoch": 0.640723667465265, + "grad_norm": 2.9664359092712402, + "learning_rate": 3.5488271421112093e-06, + "loss": 17.2836, + "step": 317180 + }, + { + "epoch": 0.6407438680979488, + "grad_norm": 191.48548889160156, + "learning_rate": 3.5484931051507387e-06, + "loss": 11.0455, + "step": 317190 + }, + { + "epoch": 0.6407640687306326, + "grad_norm": 114.32453918457031, + "learning_rate": 3.548159075264738e-06, + "loss": 24.9184, + "step": 317200 + }, + { + "epoch": 0.6407842693633164, + "grad_norm": 285.5618591308594, + "learning_rate": 3.547825052454833e-06, + "loss": 14.7186, + "step": 317210 + }, + { + "epoch": 0.6408044699960003, + "grad_norm": 222.58468627929688, + "learning_rate": 3.5474910367226517e-06, + "loss": 12.1983, + "step": 317220 + }, + { + "epoch": 0.6408246706286841, + "grad_norm": 240.85787963867188, + "learning_rate": 3.5471570280698257e-06, + "loss": 17.1003, + "step": 317230 + }, + { + "epoch": 0.6408448712613679, + "grad_norm": 673.8321533203125, + "learning_rate": 3.5468230264979774e-06, + "loss": 18.5469, + "step": 317240 + }, + { + "epoch": 0.6408650718940517, + "grad_norm": 172.52186584472656, + "learning_rate": 3.5464890320087374e-06, + "loss": 14.3257, + "step": 317250 + }, + { + "epoch": 0.6408852725267355, + "grad_norm": 260.2057189941406, + "learning_rate": 3.5461550446037363e-06, + "loss": 26.1872, + "step": 317260 + }, + { + "epoch": 0.6409054731594194, + "grad_norm": 325.0608825683594, + "learning_rate": 3.545821064284597e-06, + "loss": 18.0519, + "step": 317270 + }, + { + "epoch": 0.6409256737921032, + "grad_norm": 111.28011322021484, + "learning_rate": 3.5454870910529494e-06, + "loss": 22.6814, + "step": 317280 + }, + { + "epoch": 0.640945874424787, + "grad_norm": 1047.8089599609375, + "learning_rate": 3.545153124910421e-06, + "loss": 27.8755, + "step": 317290 + }, + { + "epoch": 0.6409660750574708, + "grad_norm": 836.8259887695312, + "learning_rate": 3.5448191658586423e-06, + "loss": 16.5274, + "step": 317300 + }, + { + "epoch": 0.6409862756901546, + "grad_norm": 279.5603332519531, + "learning_rate": 3.5444852138992357e-06, + "loss": 5.7976, + "step": 317310 + }, + { + "epoch": 0.6410064763228385, + "grad_norm": 363.93927001953125, + "learning_rate": 3.544151269033832e-06, + "loss": 14.0555, + "step": 317320 + }, + { + "epoch": 0.6410266769555223, + "grad_norm": 260.6170349121094, + "learning_rate": 3.54381733126406e-06, + "loss": 12.1871, + "step": 317330 + }, + { + "epoch": 0.6410468775882061, + "grad_norm": 340.6725158691406, + "learning_rate": 3.5434834005915453e-06, + "loss": 41.2043, + "step": 317340 + }, + { + "epoch": 0.6410670782208899, + "grad_norm": 303.91064453125, + "learning_rate": 3.5431494770179154e-06, + "loss": 22.6507, + "step": 317350 + }, + { + "epoch": 0.6410872788535736, + "grad_norm": 644.1407470703125, + "learning_rate": 3.5428155605447988e-06, + "loss": 27.9938, + "step": 317360 + }, + { + "epoch": 0.6411074794862575, + "grad_norm": 138.9471435546875, + "learning_rate": 3.5424816511738213e-06, + "loss": 22.4929, + "step": 317370 + }, + { + "epoch": 0.6411276801189413, + "grad_norm": 172.72393798828125, + "learning_rate": 3.5421477489066115e-06, + "loss": 26.1033, + "step": 317380 + }, + { + "epoch": 0.6411478807516251, + "grad_norm": 636.4935913085938, + "learning_rate": 3.541813853744799e-06, + "loss": 19.903, + "step": 317390 + }, + { + "epoch": 0.6411680813843089, + "grad_norm": 612.6116943359375, + "learning_rate": 3.5414799656900057e-06, + "loss": 20.3424, + "step": 317400 + }, + { + "epoch": 0.6411882820169927, + "grad_norm": 270.06976318359375, + "learning_rate": 3.541146084743864e-06, + "loss": 17.2903, + "step": 317410 + }, + { + "epoch": 0.6412084826496766, + "grad_norm": 270.260498046875, + "learning_rate": 3.540812210907999e-06, + "loss": 17.609, + "step": 317420 + }, + { + "epoch": 0.6412286832823604, + "grad_norm": 544.850830078125, + "learning_rate": 3.5404783441840383e-06, + "loss": 10.9086, + "step": 317430 + }, + { + "epoch": 0.6412488839150442, + "grad_norm": 265.4259033203125, + "learning_rate": 3.5401444845736092e-06, + "loss": 32.4151, + "step": 317440 + }, + { + "epoch": 0.641269084547728, + "grad_norm": 77.37215423583984, + "learning_rate": 3.539810632078338e-06, + "loss": 8.7036, + "step": 317450 + }, + { + "epoch": 0.6412892851804118, + "grad_norm": 321.9674072265625, + "learning_rate": 3.5394767866998555e-06, + "loss": 15.9489, + "step": 317460 + }, + { + "epoch": 0.6413094858130957, + "grad_norm": 332.39422607421875, + "learning_rate": 3.539142948439782e-06, + "loss": 10.9451, + "step": 317470 + }, + { + "epoch": 0.6413296864457795, + "grad_norm": 7.552122592926025, + "learning_rate": 3.538809117299751e-06, + "loss": 14.4336, + "step": 317480 + }, + { + "epoch": 0.6413498870784633, + "grad_norm": 172.63917541503906, + "learning_rate": 3.538475293281387e-06, + "loss": 15.9912, + "step": 317490 + }, + { + "epoch": 0.6413700877111471, + "grad_norm": 254.71444702148438, + "learning_rate": 3.538141476386317e-06, + "loss": 28.018, + "step": 317500 + }, + { + "epoch": 0.6413902883438309, + "grad_norm": 185.06678771972656, + "learning_rate": 3.5378076666161677e-06, + "loss": 24.2105, + "step": 317510 + }, + { + "epoch": 0.6414104889765148, + "grad_norm": 95.70032501220703, + "learning_rate": 3.537473863972568e-06, + "loss": 11.2273, + "step": 317520 + }, + { + "epoch": 0.6414306896091986, + "grad_norm": 920.5741577148438, + "learning_rate": 3.537140068457142e-06, + "loss": 24.7874, + "step": 317530 + }, + { + "epoch": 0.6414508902418824, + "grad_norm": 1017.8689575195312, + "learning_rate": 3.5368062800715163e-06, + "loss": 12.5289, + "step": 317540 + }, + { + "epoch": 0.6414710908745662, + "grad_norm": 587.6309204101562, + "learning_rate": 3.536472498817323e-06, + "loss": 20.4492, + "step": 317550 + }, + { + "epoch": 0.64149129150725, + "grad_norm": 298.1929931640625, + "learning_rate": 3.536138724696182e-06, + "loss": 18.8642, + "step": 317560 + }, + { + "epoch": 0.6415114921399339, + "grad_norm": 717.33056640625, + "learning_rate": 3.535804957709724e-06, + "loss": 20.3758, + "step": 317570 + }, + { + "epoch": 0.6415316927726177, + "grad_norm": 240.915771484375, + "learning_rate": 3.5354711978595757e-06, + "loss": 22.284, + "step": 317580 + }, + { + "epoch": 0.6415518934053015, + "grad_norm": 494.214111328125, + "learning_rate": 3.5351374451473643e-06, + "loss": 17.0445, + "step": 317590 + }, + { + "epoch": 0.6415720940379853, + "grad_norm": 544.8488159179688, + "learning_rate": 3.5348036995747135e-06, + "loss": 14.0251, + "step": 317600 + }, + { + "epoch": 0.6415922946706691, + "grad_norm": 244.27670288085938, + "learning_rate": 3.5344699611432515e-06, + "loss": 23.8881, + "step": 317610 + }, + { + "epoch": 0.6416124953033528, + "grad_norm": 0.0, + "learning_rate": 3.5341362298546077e-06, + "loss": 10.7942, + "step": 317620 + }, + { + "epoch": 0.6416326959360367, + "grad_norm": 710.4559936523438, + "learning_rate": 3.533802505710403e-06, + "loss": 22.6903, + "step": 317630 + }, + { + "epoch": 0.6416528965687205, + "grad_norm": 348.663818359375, + "learning_rate": 3.5334687887122687e-06, + "loss": 23.5508, + "step": 317640 + }, + { + "epoch": 0.6416730972014043, + "grad_norm": 430.3636779785156, + "learning_rate": 3.5331350788618303e-06, + "loss": 24.0841, + "step": 317650 + }, + { + "epoch": 0.6416932978340881, + "grad_norm": 450.00042724609375, + "learning_rate": 3.532801376160713e-06, + "loss": 17.1423, + "step": 317660 + }, + { + "epoch": 0.641713498466772, + "grad_norm": 459.4471740722656, + "learning_rate": 3.5324676806105428e-06, + "loss": 18.9422, + "step": 317670 + }, + { + "epoch": 0.6417336990994558, + "grad_norm": 651.5348510742188, + "learning_rate": 3.5321339922129493e-06, + "loss": 19.6253, + "step": 317680 + }, + { + "epoch": 0.6417538997321396, + "grad_norm": 201.7771759033203, + "learning_rate": 3.5318003109695544e-06, + "loss": 11.8734, + "step": 317690 + }, + { + "epoch": 0.6417741003648234, + "grad_norm": 113.5165023803711, + "learning_rate": 3.531466636881987e-06, + "loss": 24.3048, + "step": 317700 + }, + { + "epoch": 0.6417943009975072, + "grad_norm": 626.1658935546875, + "learning_rate": 3.531132969951875e-06, + "loss": 21.0166, + "step": 317710 + }, + { + "epoch": 0.641814501630191, + "grad_norm": 527.8425903320312, + "learning_rate": 3.5307993101808415e-06, + "loss": 23.8735, + "step": 317720 + }, + { + "epoch": 0.6418347022628749, + "grad_norm": 415.9170837402344, + "learning_rate": 3.5304656575705133e-06, + "loss": 21.1418, + "step": 317730 + }, + { + "epoch": 0.6418549028955587, + "grad_norm": 46.663726806640625, + "learning_rate": 3.530132012122518e-06, + "loss": 17.8677, + "step": 317740 + }, + { + "epoch": 0.6418751035282425, + "grad_norm": 438.56439208984375, + "learning_rate": 3.5297983738384813e-06, + "loss": 21.2878, + "step": 317750 + }, + { + "epoch": 0.6418953041609263, + "grad_norm": 356.69024658203125, + "learning_rate": 3.529464742720028e-06, + "loss": 16.4946, + "step": 317760 + }, + { + "epoch": 0.6419155047936101, + "grad_norm": 89.83252716064453, + "learning_rate": 3.5291311187687847e-06, + "loss": 21.94, + "step": 317770 + }, + { + "epoch": 0.641935705426294, + "grad_norm": 420.8074951171875, + "learning_rate": 3.5287975019863806e-06, + "loss": 17.3688, + "step": 317780 + }, + { + "epoch": 0.6419559060589778, + "grad_norm": 24.430877685546875, + "learning_rate": 3.5284638923744373e-06, + "loss": 11.2702, + "step": 317790 + }, + { + "epoch": 0.6419761066916616, + "grad_norm": 2.147768259048462, + "learning_rate": 3.5281302899345825e-06, + "loss": 24.0063, + "step": 317800 + }, + { + "epoch": 0.6419963073243454, + "grad_norm": 535.706787109375, + "learning_rate": 3.527796694668443e-06, + "loss": 16.868, + "step": 317810 + }, + { + "epoch": 0.6420165079570292, + "grad_norm": 460.1740417480469, + "learning_rate": 3.5274631065776433e-06, + "loss": 32.3511, + "step": 317820 + }, + { + "epoch": 0.6420367085897131, + "grad_norm": 386.732177734375, + "learning_rate": 3.527129525663808e-06, + "loss": 11.5894, + "step": 317830 + }, + { + "epoch": 0.6420569092223969, + "grad_norm": 189.55186462402344, + "learning_rate": 3.526795951928569e-06, + "loss": 15.4929, + "step": 317840 + }, + { + "epoch": 0.6420771098550807, + "grad_norm": 546.142822265625, + "learning_rate": 3.5264623853735435e-06, + "loss": 21.2189, + "step": 317850 + }, + { + "epoch": 0.6420973104877645, + "grad_norm": 237.81240844726562, + "learning_rate": 3.5261288260003635e-06, + "loss": 16.0755, + "step": 317860 + }, + { + "epoch": 0.6421175111204483, + "grad_norm": 119.85090637207031, + "learning_rate": 3.5257952738106528e-06, + "loss": 29.9819, + "step": 317870 + }, + { + "epoch": 0.6421377117531321, + "grad_norm": 2269.2265625, + "learning_rate": 3.525461728806038e-06, + "loss": 28.9295, + "step": 317880 + }, + { + "epoch": 0.6421579123858159, + "grad_norm": 533.5177001953125, + "learning_rate": 3.525128190988143e-06, + "loss": 11.0176, + "step": 317890 + }, + { + "epoch": 0.6421781130184997, + "grad_norm": 296.90008544921875, + "learning_rate": 3.524794660358593e-06, + "loss": 18.0153, + "step": 317900 + }, + { + "epoch": 0.6421983136511835, + "grad_norm": 170.51734924316406, + "learning_rate": 3.5244611369190184e-06, + "loss": 10.8991, + "step": 317910 + }, + { + "epoch": 0.6422185142838673, + "grad_norm": 29.92284393310547, + "learning_rate": 3.5241276206710374e-06, + "loss": 22.0229, + "step": 317920 + }, + { + "epoch": 0.6422387149165512, + "grad_norm": 337.820068359375, + "learning_rate": 3.5237941116162812e-06, + "loss": 14.0883, + "step": 317930 + }, + { + "epoch": 0.642258915549235, + "grad_norm": 317.5273132324219, + "learning_rate": 3.523460609756374e-06, + "loss": 15.1656, + "step": 317940 + }, + { + "epoch": 0.6422791161819188, + "grad_norm": 71.62299346923828, + "learning_rate": 3.5231271150929403e-06, + "loss": 14.2908, + "step": 317950 + }, + { + "epoch": 0.6422993168146026, + "grad_norm": 34.81813049316406, + "learning_rate": 3.5227936276276055e-06, + "loss": 8.2766, + "step": 317960 + }, + { + "epoch": 0.6423195174472864, + "grad_norm": 441.2760314941406, + "learning_rate": 3.522460147361996e-06, + "loss": 16.7027, + "step": 317970 + }, + { + "epoch": 0.6423397180799703, + "grad_norm": 452.09429931640625, + "learning_rate": 3.522126674297736e-06, + "loss": 24.5151, + "step": 317980 + }, + { + "epoch": 0.6423599187126541, + "grad_norm": 331.5494079589844, + "learning_rate": 3.5217932084364505e-06, + "loss": 14.5132, + "step": 317990 + }, + { + "epoch": 0.6423801193453379, + "grad_norm": 896.404541015625, + "learning_rate": 3.521459749779769e-06, + "loss": 18.3222, + "step": 318000 + }, + { + "epoch": 0.6424003199780217, + "grad_norm": 366.61956787109375, + "learning_rate": 3.5211262983293094e-06, + "loss": 17.7489, + "step": 318010 + }, + { + "epoch": 0.6424205206107055, + "grad_norm": 638.5956420898438, + "learning_rate": 3.520792854086702e-06, + "loss": 14.1063, + "step": 318020 + }, + { + "epoch": 0.6424407212433894, + "grad_norm": 483.6697692871094, + "learning_rate": 3.520459417053571e-06, + "loss": 14.0071, + "step": 318030 + }, + { + "epoch": 0.6424609218760732, + "grad_norm": 473.08154296875, + "learning_rate": 3.520125987231542e-06, + "loss": 21.0332, + "step": 318040 + }, + { + "epoch": 0.642481122508757, + "grad_norm": 607.0555419921875, + "learning_rate": 3.5197925646222387e-06, + "loss": 22.463, + "step": 318050 + }, + { + "epoch": 0.6425013231414408, + "grad_norm": 68.604736328125, + "learning_rate": 3.5194591492272863e-06, + "loss": 18.1951, + "step": 318060 + }, + { + "epoch": 0.6425215237741246, + "grad_norm": 1254.7269287109375, + "learning_rate": 3.519125741048313e-06, + "loss": 21.7519, + "step": 318070 + }, + { + "epoch": 0.6425417244068085, + "grad_norm": 354.39910888671875, + "learning_rate": 3.5187923400869384e-06, + "loss": 20.0988, + "step": 318080 + }, + { + "epoch": 0.6425619250394923, + "grad_norm": 697.2653198242188, + "learning_rate": 3.5184589463447918e-06, + "loss": 16.5158, + "step": 318090 + }, + { + "epoch": 0.6425821256721761, + "grad_norm": 53.732444763183594, + "learning_rate": 3.5181255598234963e-06, + "loss": 21.9239, + "step": 318100 + }, + { + "epoch": 0.6426023263048599, + "grad_norm": 291.7712707519531, + "learning_rate": 3.5177921805246772e-06, + "loss": 20.0169, + "step": 318110 + }, + { + "epoch": 0.6426225269375437, + "grad_norm": 314.7015686035156, + "learning_rate": 3.5174588084499594e-06, + "loss": 16.4968, + "step": 318120 + }, + { + "epoch": 0.6426427275702274, + "grad_norm": 634.839111328125, + "learning_rate": 3.5171254436009684e-06, + "loss": 15.5266, + "step": 318130 + }, + { + "epoch": 0.6426629282029113, + "grad_norm": 398.2506408691406, + "learning_rate": 3.5167920859793263e-06, + "loss": 16.1789, + "step": 318140 + }, + { + "epoch": 0.6426831288355951, + "grad_norm": 501.5930480957031, + "learning_rate": 3.5164587355866593e-06, + "loss": 26.3209, + "step": 318150 + }, + { + "epoch": 0.6427033294682789, + "grad_norm": 763.8973999023438, + "learning_rate": 3.5161253924245955e-06, + "loss": 24.0423, + "step": 318160 + }, + { + "epoch": 0.6427235301009627, + "grad_norm": 507.8896789550781, + "learning_rate": 3.5157920564947535e-06, + "loss": 18.6836, + "step": 318170 + }, + { + "epoch": 0.6427437307336465, + "grad_norm": 478.6862487792969, + "learning_rate": 3.5154587277987618e-06, + "loss": 13.5403, + "step": 318180 + }, + { + "epoch": 0.6427639313663304, + "grad_norm": 221.6300506591797, + "learning_rate": 3.5151254063382445e-06, + "loss": 18.9185, + "step": 318190 + }, + { + "epoch": 0.6427841319990142, + "grad_norm": 795.54052734375, + "learning_rate": 3.5147920921148267e-06, + "loss": 12.6649, + "step": 318200 + }, + { + "epoch": 0.642804332631698, + "grad_norm": 219.04025268554688, + "learning_rate": 3.514458785130131e-06, + "loss": 31.2245, + "step": 318210 + }, + { + "epoch": 0.6428245332643818, + "grad_norm": 804.419677734375, + "learning_rate": 3.514125485385782e-06, + "loss": 18.6786, + "step": 318220 + }, + { + "epoch": 0.6428447338970656, + "grad_norm": 180.21267700195312, + "learning_rate": 3.5137921928834085e-06, + "loss": 20.5546, + "step": 318230 + }, + { + "epoch": 0.6428649345297495, + "grad_norm": 443.3706970214844, + "learning_rate": 3.5134589076246284e-06, + "loss": 31.4394, + "step": 318240 + }, + { + "epoch": 0.6428851351624333, + "grad_norm": 238.41075134277344, + "learning_rate": 3.5131256296110703e-06, + "loss": 13.2463, + "step": 318250 + }, + { + "epoch": 0.6429053357951171, + "grad_norm": 209.2345428466797, + "learning_rate": 3.512792358844359e-06, + "loss": 12.0016, + "step": 318260 + }, + { + "epoch": 0.6429255364278009, + "grad_norm": 417.237548828125, + "learning_rate": 3.5124590953261155e-06, + "loss": 7.2993, + "step": 318270 + }, + { + "epoch": 0.6429457370604847, + "grad_norm": 220.30819702148438, + "learning_rate": 3.5121258390579667e-06, + "loss": 25.4603, + "step": 318280 + }, + { + "epoch": 0.6429659376931686, + "grad_norm": 382.668212890625, + "learning_rate": 3.511792590041537e-06, + "loss": 19.8533, + "step": 318290 + }, + { + "epoch": 0.6429861383258524, + "grad_norm": 755.5155639648438, + "learning_rate": 3.511459348278448e-06, + "loss": 27.1435, + "step": 318300 + }, + { + "epoch": 0.6430063389585362, + "grad_norm": 353.08807373046875, + "learning_rate": 3.511126113770325e-06, + "loss": 16.8397, + "step": 318310 + }, + { + "epoch": 0.64302653959122, + "grad_norm": 201.4923553466797, + "learning_rate": 3.510792886518795e-06, + "loss": 22.9137, + "step": 318320 + }, + { + "epoch": 0.6430467402239038, + "grad_norm": 553.3013916015625, + "learning_rate": 3.5104596665254786e-06, + "loss": 21.7877, + "step": 318330 + }, + { + "epoch": 0.6430669408565877, + "grad_norm": 88.64137268066406, + "learning_rate": 3.510126453792001e-06, + "loss": 11.063, + "step": 318340 + }, + { + "epoch": 0.6430871414892715, + "grad_norm": 314.2059326171875, + "learning_rate": 3.509793248319987e-06, + "loss": 15.1198, + "step": 318350 + }, + { + "epoch": 0.6431073421219553, + "grad_norm": 189.05133056640625, + "learning_rate": 3.509460050111061e-06, + "loss": 23.6499, + "step": 318360 + }, + { + "epoch": 0.6431275427546391, + "grad_norm": 161.51707458496094, + "learning_rate": 3.5091268591668427e-06, + "loss": 22.2294, + "step": 318370 + }, + { + "epoch": 0.643147743387323, + "grad_norm": 360.7963562011719, + "learning_rate": 3.5087936754889614e-06, + "loss": 12.5579, + "step": 318380 + }, + { + "epoch": 0.6431679440200067, + "grad_norm": 192.18182373046875, + "learning_rate": 3.5084604990790395e-06, + "loss": 21.6322, + "step": 318390 + }, + { + "epoch": 0.6431881446526905, + "grad_norm": 82.76537322998047, + "learning_rate": 3.508127329938699e-06, + "loss": 14.3149, + "step": 318400 + }, + { + "epoch": 0.6432083452853743, + "grad_norm": 241.1047821044922, + "learning_rate": 3.5077941680695653e-06, + "loss": 22.9015, + "step": 318410 + }, + { + "epoch": 0.6432285459180581, + "grad_norm": 192.6389617919922, + "learning_rate": 3.507461013473263e-06, + "loss": 14.514, + "step": 318420 + }, + { + "epoch": 0.6432487465507419, + "grad_norm": 626.179443359375, + "learning_rate": 3.507127866151413e-06, + "loss": 16.351, + "step": 318430 + }, + { + "epoch": 0.6432689471834258, + "grad_norm": 791.4384765625, + "learning_rate": 3.50679472610564e-06, + "loss": 27.5719, + "step": 318440 + }, + { + "epoch": 0.6432891478161096, + "grad_norm": 598.8052978515625, + "learning_rate": 3.5064615933375724e-06, + "loss": 14.8814, + "step": 318450 + }, + { + "epoch": 0.6433093484487934, + "grad_norm": 482.283935546875, + "learning_rate": 3.506128467848826e-06, + "loss": 13.2172, + "step": 318460 + }, + { + "epoch": 0.6433295490814772, + "grad_norm": 335.20953369140625, + "learning_rate": 3.505795349641029e-06, + "loss": 18.9089, + "step": 318470 + }, + { + "epoch": 0.643349749714161, + "grad_norm": 229.18142700195312, + "learning_rate": 3.5054622387158044e-06, + "loss": 38.285, + "step": 318480 + }, + { + "epoch": 0.6433699503468449, + "grad_norm": 599.0580444335938, + "learning_rate": 3.505129135074777e-06, + "loss": 20.717, + "step": 318490 + }, + { + "epoch": 0.6433901509795287, + "grad_norm": 186.5522918701172, + "learning_rate": 3.5047960387195673e-06, + "loss": 9.9613, + "step": 318500 + }, + { + "epoch": 0.6434103516122125, + "grad_norm": 494.20751953125, + "learning_rate": 3.5044629496517997e-06, + "loss": 26.5297, + "step": 318510 + }, + { + "epoch": 0.6434305522448963, + "grad_norm": 444.7451477050781, + "learning_rate": 3.5041298678731017e-06, + "loss": 32.5897, + "step": 318520 + }, + { + "epoch": 0.6434507528775801, + "grad_norm": 436.12908935546875, + "learning_rate": 3.503796793385089e-06, + "loss": 18.1273, + "step": 318530 + }, + { + "epoch": 0.643470953510264, + "grad_norm": 414.7536315917969, + "learning_rate": 3.503463726189391e-06, + "loss": 21.6207, + "step": 318540 + }, + { + "epoch": 0.6434911541429478, + "grad_norm": 160.6037139892578, + "learning_rate": 3.503130666287631e-06, + "loss": 10.8208, + "step": 318550 + }, + { + "epoch": 0.6435113547756316, + "grad_norm": 529.0670776367188, + "learning_rate": 3.502797613681429e-06, + "loss": 16.9283, + "step": 318560 + }, + { + "epoch": 0.6435315554083154, + "grad_norm": 207.29319763183594, + "learning_rate": 3.50246456837241e-06, + "loss": 18.6089, + "step": 318570 + }, + { + "epoch": 0.6435517560409992, + "grad_norm": 298.92926025390625, + "learning_rate": 3.5021315303621973e-06, + "loss": 11.2367, + "step": 318580 + }, + { + "epoch": 0.643571956673683, + "grad_norm": 394.2572937011719, + "learning_rate": 3.5017984996524134e-06, + "loss": 8.8136, + "step": 318590 + }, + { + "epoch": 0.6435921573063669, + "grad_norm": 236.72897338867188, + "learning_rate": 3.501465476244681e-06, + "loss": 16.2428, + "step": 318600 + }, + { + "epoch": 0.6436123579390507, + "grad_norm": 625.5629272460938, + "learning_rate": 3.501132460140627e-06, + "loss": 16.4657, + "step": 318610 + }, + { + "epoch": 0.6436325585717345, + "grad_norm": 346.51800537109375, + "learning_rate": 3.5007994513418687e-06, + "loss": 14.7584, + "step": 318620 + }, + { + "epoch": 0.6436527592044183, + "grad_norm": 295.20001220703125, + "learning_rate": 3.500466449850033e-06, + "loss": 19.2477, + "step": 318630 + }, + { + "epoch": 0.643672959837102, + "grad_norm": 506.7414245605469, + "learning_rate": 3.500133455666742e-06, + "loss": 16.469, + "step": 318640 + }, + { + "epoch": 0.6436931604697859, + "grad_norm": 559.2400512695312, + "learning_rate": 3.49980046879362e-06, + "loss": 33.4135, + "step": 318650 + }, + { + "epoch": 0.6437133611024697, + "grad_norm": 353.7267761230469, + "learning_rate": 3.4994674892322867e-06, + "loss": 24.5059, + "step": 318660 + }, + { + "epoch": 0.6437335617351535, + "grad_norm": 522.9329223632812, + "learning_rate": 3.4991345169843666e-06, + "loss": 32.1731, + "step": 318670 + }, + { + "epoch": 0.6437537623678373, + "grad_norm": 488.3183898925781, + "learning_rate": 3.4988015520514856e-06, + "loss": 20.1564, + "step": 318680 + }, + { + "epoch": 0.6437739630005211, + "grad_norm": 139.21360778808594, + "learning_rate": 3.4984685944352604e-06, + "loss": 19.5322, + "step": 318690 + }, + { + "epoch": 0.643794163633205, + "grad_norm": 479.5440368652344, + "learning_rate": 3.498135644137318e-06, + "loss": 43.3984, + "step": 318700 + }, + { + "epoch": 0.6438143642658888, + "grad_norm": 432.9505615234375, + "learning_rate": 3.4978027011592826e-06, + "loss": 25.1056, + "step": 318710 + }, + { + "epoch": 0.6438345648985726, + "grad_norm": 688.2528076171875, + "learning_rate": 3.4974697655027724e-06, + "loss": 14.1868, + "step": 318720 + }, + { + "epoch": 0.6438547655312564, + "grad_norm": 500.7982482910156, + "learning_rate": 3.4971368371694126e-06, + "loss": 31.8568, + "step": 318730 + }, + { + "epoch": 0.6438749661639402, + "grad_norm": 783.8529052734375, + "learning_rate": 3.496803916160827e-06, + "loss": 24.109, + "step": 318740 + }, + { + "epoch": 0.6438951667966241, + "grad_norm": 94.44432830810547, + "learning_rate": 3.4964710024786354e-06, + "loss": 10.444, + "step": 318750 + }, + { + "epoch": 0.6439153674293079, + "grad_norm": 167.6468963623047, + "learning_rate": 3.4961380961244605e-06, + "loss": 16.7094, + "step": 318760 + }, + { + "epoch": 0.6439355680619917, + "grad_norm": 152.32901000976562, + "learning_rate": 3.49580519709993e-06, + "loss": 23.922, + "step": 318770 + }, + { + "epoch": 0.6439557686946755, + "grad_norm": 65.15750122070312, + "learning_rate": 3.4954723054066593e-06, + "loss": 13.0673, + "step": 318780 + }, + { + "epoch": 0.6439759693273593, + "grad_norm": 154.20431518554688, + "learning_rate": 3.4951394210462746e-06, + "loss": 9.2022, + "step": 318790 + }, + { + "epoch": 0.6439961699600432, + "grad_norm": 574.928466796875, + "learning_rate": 3.4948065440203982e-06, + "loss": 30.3903, + "step": 318800 + }, + { + "epoch": 0.644016370592727, + "grad_norm": 173.9928741455078, + "learning_rate": 3.494473674330653e-06, + "loss": 12.7227, + "step": 318810 + }, + { + "epoch": 0.6440365712254108, + "grad_norm": 493.5740051269531, + "learning_rate": 3.49414081197866e-06, + "loss": 23.3376, + "step": 318820 + }, + { + "epoch": 0.6440567718580946, + "grad_norm": 10.109909057617188, + "learning_rate": 3.4938079569660398e-06, + "loss": 29.0626, + "step": 318830 + }, + { + "epoch": 0.6440769724907784, + "grad_norm": 125.75299072265625, + "learning_rate": 3.493475109294421e-06, + "loss": 14.8489, + "step": 318840 + }, + { + "epoch": 0.6440971731234623, + "grad_norm": 105.9942626953125, + "learning_rate": 3.4931422689654186e-06, + "loss": 20.9156, + "step": 318850 + }, + { + "epoch": 0.6441173737561461, + "grad_norm": 210.97653198242188, + "learning_rate": 3.492809435980659e-06, + "loss": 8.7984, + "step": 318860 + }, + { + "epoch": 0.6441375743888299, + "grad_norm": 297.12152099609375, + "learning_rate": 3.4924766103417648e-06, + "loss": 8.53, + "step": 318870 + }, + { + "epoch": 0.6441577750215137, + "grad_norm": 337.85479736328125, + "learning_rate": 3.492143792050355e-06, + "loss": 21.912, + "step": 318880 + }, + { + "epoch": 0.6441779756541975, + "grad_norm": 246.87548828125, + "learning_rate": 3.4918109811080535e-06, + "loss": 8.0384, + "step": 318890 + }, + { + "epoch": 0.6441981762868813, + "grad_norm": 320.1974182128906, + "learning_rate": 3.491478177516484e-06, + "loss": 11.805, + "step": 318900 + }, + { + "epoch": 0.6442183769195651, + "grad_norm": 217.13697814941406, + "learning_rate": 3.4911453812772658e-06, + "loss": 24.665, + "step": 318910 + }, + { + "epoch": 0.6442385775522489, + "grad_norm": 700.4960327148438, + "learning_rate": 3.4908125923920204e-06, + "loss": 20.8438, + "step": 318920 + }, + { + "epoch": 0.6442587781849327, + "grad_norm": 30.923364639282227, + "learning_rate": 3.490479810862373e-06, + "loss": 15.8241, + "step": 318930 + }, + { + "epoch": 0.6442789788176165, + "grad_norm": 1212.7904052734375, + "learning_rate": 3.490147036689945e-06, + "loss": 21.6489, + "step": 318940 + }, + { + "epoch": 0.6442991794503004, + "grad_norm": 391.1068115234375, + "learning_rate": 3.4898142698763555e-06, + "loss": 16.3236, + "step": 318950 + }, + { + "epoch": 0.6443193800829842, + "grad_norm": 338.1729431152344, + "learning_rate": 3.4894815104232283e-06, + "loss": 22.8707, + "step": 318960 + }, + { + "epoch": 0.644339580715668, + "grad_norm": 279.7657470703125, + "learning_rate": 3.489148758332186e-06, + "loss": 26.0585, + "step": 318970 + }, + { + "epoch": 0.6443597813483518, + "grad_norm": 447.3832702636719, + "learning_rate": 3.4888160136048488e-06, + "loss": 13.4344, + "step": 318980 + }, + { + "epoch": 0.6443799819810356, + "grad_norm": 33.17680740356445, + "learning_rate": 3.4884832762428376e-06, + "loss": 25.5586, + "step": 318990 + }, + { + "epoch": 0.6444001826137195, + "grad_norm": 232.6408233642578, + "learning_rate": 3.488150546247778e-06, + "loss": 13.2822, + "step": 319000 + }, + { + "epoch": 0.6444203832464033, + "grad_norm": 619.4563598632812, + "learning_rate": 3.4878178236212883e-06, + "loss": 29.3946, + "step": 319010 + }, + { + "epoch": 0.6444405838790871, + "grad_norm": 175.9522705078125, + "learning_rate": 3.4874851083649906e-06, + "loss": 18.2922, + "step": 319020 + }, + { + "epoch": 0.6444607845117709, + "grad_norm": 27.794336318969727, + "learning_rate": 3.487152400480509e-06, + "loss": 22.8812, + "step": 319030 + }, + { + "epoch": 0.6444809851444547, + "grad_norm": 100.01497650146484, + "learning_rate": 3.4868196999694616e-06, + "loss": 11.4736, + "step": 319040 + }, + { + "epoch": 0.6445011857771386, + "grad_norm": 412.4709777832031, + "learning_rate": 3.486487006833471e-06, + "loss": 30.6363, + "step": 319050 + }, + { + "epoch": 0.6445213864098224, + "grad_norm": 359.1695251464844, + "learning_rate": 3.4861543210741607e-06, + "loss": 14.4729, + "step": 319060 + }, + { + "epoch": 0.6445415870425062, + "grad_norm": 520.4990234375, + "learning_rate": 3.485821642693148e-06, + "loss": 22.3197, + "step": 319070 + }, + { + "epoch": 0.64456178767519, + "grad_norm": 387.98980712890625, + "learning_rate": 3.4854889716920588e-06, + "loss": 14.8493, + "step": 319080 + }, + { + "epoch": 0.6445819883078738, + "grad_norm": 22.09465980529785, + "learning_rate": 3.485156308072512e-06, + "loss": 21.9085, + "step": 319090 + }, + { + "epoch": 0.6446021889405577, + "grad_norm": 94.3161849975586, + "learning_rate": 3.484823651836131e-06, + "loss": 13.9138, + "step": 319100 + }, + { + "epoch": 0.6446223895732415, + "grad_norm": 10.478737831115723, + "learning_rate": 3.484491002984535e-06, + "loss": 8.5433, + "step": 319110 + }, + { + "epoch": 0.6446425902059253, + "grad_norm": 185.5895233154297, + "learning_rate": 3.4841583615193444e-06, + "loss": 15.9812, + "step": 319120 + }, + { + "epoch": 0.6446627908386091, + "grad_norm": 866.3886108398438, + "learning_rate": 3.4838257274421853e-06, + "loss": 31.4951, + "step": 319130 + }, + { + "epoch": 0.6446829914712929, + "grad_norm": 319.8884582519531, + "learning_rate": 3.483493100754673e-06, + "loss": 15.8195, + "step": 319140 + }, + { + "epoch": 0.6447031921039766, + "grad_norm": 472.67138671875, + "learning_rate": 3.483160481458432e-06, + "loss": 19.566, + "step": 319150 + }, + { + "epoch": 0.6447233927366605, + "grad_norm": 158.82705688476562, + "learning_rate": 3.4828278695550845e-06, + "loss": 18.621, + "step": 319160 + }, + { + "epoch": 0.6447435933693443, + "grad_norm": 383.1800231933594, + "learning_rate": 3.4824952650462486e-06, + "loss": 11.5811, + "step": 319170 + }, + { + "epoch": 0.6447637940020281, + "grad_norm": 97.35182189941406, + "learning_rate": 3.4821626679335464e-06, + "loss": 10.1588, + "step": 319180 + }, + { + "epoch": 0.6447839946347119, + "grad_norm": 712.7573852539062, + "learning_rate": 3.4818300782186e-06, + "loss": 31.499, + "step": 319190 + }, + { + "epoch": 0.6448041952673957, + "grad_norm": 765.3334350585938, + "learning_rate": 3.4814974959030294e-06, + "loss": 33.6473, + "step": 319200 + }, + { + "epoch": 0.6448243959000796, + "grad_norm": 335.5063171386719, + "learning_rate": 3.4811649209884544e-06, + "loss": 12.8144, + "step": 319210 + }, + { + "epoch": 0.6448445965327634, + "grad_norm": 473.3268737792969, + "learning_rate": 3.480832353476501e-06, + "loss": 17.6606, + "step": 319220 + }, + { + "epoch": 0.6448647971654472, + "grad_norm": 186.41500854492188, + "learning_rate": 3.480499793368783e-06, + "loss": 11.0611, + "step": 319230 + }, + { + "epoch": 0.644884997798131, + "grad_norm": 407.2973327636719, + "learning_rate": 3.4801672406669253e-06, + "loss": 18.3399, + "step": 319240 + }, + { + "epoch": 0.6449051984308148, + "grad_norm": 515.4907836914062, + "learning_rate": 3.4798346953725487e-06, + "loss": 23.0567, + "step": 319250 + }, + { + "epoch": 0.6449253990634987, + "grad_norm": 468.15130615234375, + "learning_rate": 3.4795021574872743e-06, + "loss": 24.6056, + "step": 319260 + }, + { + "epoch": 0.6449455996961825, + "grad_norm": 61.45684814453125, + "learning_rate": 3.479169627012721e-06, + "loss": 7.1199, + "step": 319270 + }, + { + "epoch": 0.6449658003288663, + "grad_norm": 203.83148193359375, + "learning_rate": 3.478837103950509e-06, + "loss": 18.5914, + "step": 319280 + }, + { + "epoch": 0.6449860009615501, + "grad_norm": 429.20367431640625, + "learning_rate": 3.4785045883022645e-06, + "loss": 16.3068, + "step": 319290 + }, + { + "epoch": 0.6450062015942339, + "grad_norm": 225.7417449951172, + "learning_rate": 3.4781720800696006e-06, + "loss": 17.3463, + "step": 319300 + }, + { + "epoch": 0.6450264022269178, + "grad_norm": 598.3883666992188, + "learning_rate": 3.477839579254142e-06, + "loss": 19.2953, + "step": 319310 + }, + { + "epoch": 0.6450466028596016, + "grad_norm": 245.51226806640625, + "learning_rate": 3.47750708585751e-06, + "loss": 14.2051, + "step": 319320 + }, + { + "epoch": 0.6450668034922854, + "grad_norm": 1205.3701171875, + "learning_rate": 3.4771745998813228e-06, + "loss": 29.5674, + "step": 319330 + }, + { + "epoch": 0.6450870041249692, + "grad_norm": 202.94825744628906, + "learning_rate": 3.4768421213272017e-06, + "loss": 13.627, + "step": 319340 + }, + { + "epoch": 0.645107204757653, + "grad_norm": 247.85951232910156, + "learning_rate": 3.476509650196769e-06, + "loss": 16.0709, + "step": 319350 + }, + { + "epoch": 0.6451274053903369, + "grad_norm": 202.9886016845703, + "learning_rate": 3.4761771864916415e-06, + "loss": 22.1857, + "step": 319360 + }, + { + "epoch": 0.6451476060230207, + "grad_norm": 436.1661376953125, + "learning_rate": 3.4758447302134414e-06, + "loss": 15.4678, + "step": 319370 + }, + { + "epoch": 0.6451678066557045, + "grad_norm": 538.0279541015625, + "learning_rate": 3.475512281363792e-06, + "loss": 14.7858, + "step": 319380 + }, + { + "epoch": 0.6451880072883883, + "grad_norm": 320.42462158203125, + "learning_rate": 3.4751798399443075e-06, + "loss": 28.9839, + "step": 319390 + }, + { + "epoch": 0.6452082079210721, + "grad_norm": 314.9214172363281, + "learning_rate": 3.474847405956613e-06, + "loss": 14.5805, + "step": 319400 + }, + { + "epoch": 0.6452284085537559, + "grad_norm": 377.7002868652344, + "learning_rate": 3.474514979402327e-06, + "loss": 27.1503, + "step": 319410 + }, + { + "epoch": 0.6452486091864397, + "grad_norm": 15.950569152832031, + "learning_rate": 3.4741825602830716e-06, + "loss": 11.5035, + "step": 319420 + }, + { + "epoch": 0.6452688098191235, + "grad_norm": 1371.911865234375, + "learning_rate": 3.4738501486004632e-06, + "loss": 26.0395, + "step": 319430 + }, + { + "epoch": 0.6452890104518073, + "grad_norm": 323.29290771484375, + "learning_rate": 3.4735177443561243e-06, + "loss": 15.6237, + "step": 319440 + }, + { + "epoch": 0.6453092110844911, + "grad_norm": 0.0, + "learning_rate": 3.4731853475516763e-06, + "loss": 17.3917, + "step": 319450 + }, + { + "epoch": 0.645329411717175, + "grad_norm": 186.2850799560547, + "learning_rate": 3.472852958188736e-06, + "loss": 19.1316, + "step": 319460 + }, + { + "epoch": 0.6453496123498588, + "grad_norm": 415.9254150390625, + "learning_rate": 3.4725205762689256e-06, + "loss": 30.8706, + "step": 319470 + }, + { + "epoch": 0.6453698129825426, + "grad_norm": 179.19468688964844, + "learning_rate": 3.472188201793866e-06, + "loss": 16.1167, + "step": 319480 + }, + { + "epoch": 0.6453900136152264, + "grad_norm": 485.9315490722656, + "learning_rate": 3.4718558347651742e-06, + "loss": 17.328, + "step": 319490 + }, + { + "epoch": 0.6454102142479102, + "grad_norm": 513.5218505859375, + "learning_rate": 3.471523475184472e-06, + "loss": 13.9099, + "step": 319500 + }, + { + "epoch": 0.645430414880594, + "grad_norm": 224.21621704101562, + "learning_rate": 3.47119112305338e-06, + "loss": 31.7493, + "step": 319510 + }, + { + "epoch": 0.6454506155132779, + "grad_norm": 300.30120849609375, + "learning_rate": 3.4708587783735164e-06, + "loss": 15.918, + "step": 319520 + }, + { + "epoch": 0.6454708161459617, + "grad_norm": 314.58984375, + "learning_rate": 3.4705264411465004e-06, + "loss": 17.2455, + "step": 319530 + }, + { + "epoch": 0.6454910167786455, + "grad_norm": 237.69674682617188, + "learning_rate": 3.470194111373954e-06, + "loss": 19.3716, + "step": 319540 + }, + { + "epoch": 0.6455112174113293, + "grad_norm": 427.9591979980469, + "learning_rate": 3.4698617890574972e-06, + "loss": 18.6955, + "step": 319550 + }, + { + "epoch": 0.6455314180440131, + "grad_norm": 522.3770751953125, + "learning_rate": 3.4695294741987474e-06, + "loss": 26.1161, + "step": 319560 + }, + { + "epoch": 0.645551618676697, + "grad_norm": 97.43962097167969, + "learning_rate": 3.4691971667993254e-06, + "loss": 20.7762, + "step": 319570 + }, + { + "epoch": 0.6455718193093808, + "grad_norm": 446.9584045410156, + "learning_rate": 3.468864866860851e-06, + "loss": 13.0959, + "step": 319580 + }, + { + "epoch": 0.6455920199420646, + "grad_norm": 2.0021135807037354, + "learning_rate": 3.468532574384943e-06, + "loss": 14.8358, + "step": 319590 + }, + { + "epoch": 0.6456122205747484, + "grad_norm": 835.078857421875, + "learning_rate": 3.4682002893732203e-06, + "loss": 26.5909, + "step": 319600 + }, + { + "epoch": 0.6456324212074322, + "grad_norm": 9.252861976623535, + "learning_rate": 3.467868011827306e-06, + "loss": 17.4459, + "step": 319610 + }, + { + "epoch": 0.6456526218401161, + "grad_norm": 277.0822448730469, + "learning_rate": 3.4675357417488163e-06, + "loss": 22.8973, + "step": 319620 + }, + { + "epoch": 0.6456728224727999, + "grad_norm": 386.9530944824219, + "learning_rate": 3.467203479139371e-06, + "loss": 12.6119, + "step": 319630 + }, + { + "epoch": 0.6456930231054837, + "grad_norm": 738.8014526367188, + "learning_rate": 3.4668712240005912e-06, + "loss": 36.349, + "step": 319640 + }, + { + "epoch": 0.6457132237381675, + "grad_norm": 318.0730285644531, + "learning_rate": 3.4665389763340945e-06, + "loss": 15.5838, + "step": 319650 + }, + { + "epoch": 0.6457334243708513, + "grad_norm": 244.52935791015625, + "learning_rate": 3.466206736141501e-06, + "loss": 17.9775, + "step": 319660 + }, + { + "epoch": 0.6457536250035351, + "grad_norm": 139.79449462890625, + "learning_rate": 3.46587450342443e-06, + "loss": 18.0357, + "step": 319670 + }, + { + "epoch": 0.6457738256362189, + "grad_norm": 785.7474975585938, + "learning_rate": 3.465542278184499e-06, + "loss": 24.5396, + "step": 319680 + }, + { + "epoch": 0.6457940262689027, + "grad_norm": 695.35595703125, + "learning_rate": 3.4652100604233296e-06, + "loss": 12.9501, + "step": 319690 + }, + { + "epoch": 0.6458142269015865, + "grad_norm": 684.6602172851562, + "learning_rate": 3.464877850142541e-06, + "loss": 14.3548, + "step": 319700 + }, + { + "epoch": 0.6458344275342703, + "grad_norm": 122.92704010009766, + "learning_rate": 3.464545647343751e-06, + "loss": 19.3015, + "step": 319710 + }, + { + "epoch": 0.6458546281669542, + "grad_norm": 579.4113159179688, + "learning_rate": 3.4642134520285796e-06, + "loss": 16.8775, + "step": 319720 + }, + { + "epoch": 0.645874828799638, + "grad_norm": 333.3854064941406, + "learning_rate": 3.463881264198645e-06, + "loss": 12.5376, + "step": 319730 + }, + { + "epoch": 0.6458950294323218, + "grad_norm": 319.0225524902344, + "learning_rate": 3.4635490838555687e-06, + "loss": 20.1659, + "step": 319740 + }, + { + "epoch": 0.6459152300650056, + "grad_norm": 227.57228088378906, + "learning_rate": 3.463216911000965e-06, + "loss": 9.6832, + "step": 319750 + }, + { + "epoch": 0.6459354306976894, + "grad_norm": 404.5276184082031, + "learning_rate": 3.4628847456364567e-06, + "loss": 17.3493, + "step": 319760 + }, + { + "epoch": 0.6459556313303733, + "grad_norm": 174.25889587402344, + "learning_rate": 3.462552587763663e-06, + "loss": 26.6191, + "step": 319770 + }, + { + "epoch": 0.6459758319630571, + "grad_norm": 263.53826904296875, + "learning_rate": 3.4622204373842006e-06, + "loss": 14.4223, + "step": 319780 + }, + { + "epoch": 0.6459960325957409, + "grad_norm": 572.0341186523438, + "learning_rate": 3.461888294499689e-06, + "loss": 13.2673, + "step": 319790 + }, + { + "epoch": 0.6460162332284247, + "grad_norm": 282.9263916015625, + "learning_rate": 3.4615561591117486e-06, + "loss": 17.9608, + "step": 319800 + }, + { + "epoch": 0.6460364338611085, + "grad_norm": 522.0670166015625, + "learning_rate": 3.461224031221995e-06, + "loss": 31.4396, + "step": 319810 + }, + { + "epoch": 0.6460566344937924, + "grad_norm": 762.7666015625, + "learning_rate": 3.4608919108320488e-06, + "loss": 24.7711, + "step": 319820 + }, + { + "epoch": 0.6460768351264762, + "grad_norm": 581.8294067382812, + "learning_rate": 3.4605597979435313e-06, + "loss": 71.8205, + "step": 319830 + }, + { + "epoch": 0.64609703575916, + "grad_norm": 192.70274353027344, + "learning_rate": 3.460227692558056e-06, + "loss": 16.0339, + "step": 319840 + }, + { + "epoch": 0.6461172363918438, + "grad_norm": 493.2317810058594, + "learning_rate": 3.459895594677245e-06, + "loss": 11.9203, + "step": 319850 + }, + { + "epoch": 0.6461374370245276, + "grad_norm": 234.32284545898438, + "learning_rate": 3.459563504302716e-06, + "loss": 13.9755, + "step": 319860 + }, + { + "epoch": 0.6461576376572115, + "grad_norm": 185.91566467285156, + "learning_rate": 3.4592314214360888e-06, + "loss": 12.033, + "step": 319870 + }, + { + "epoch": 0.6461778382898953, + "grad_norm": 175.99267578125, + "learning_rate": 3.4588993460789795e-06, + "loss": 15.0761, + "step": 319880 + }, + { + "epoch": 0.6461980389225791, + "grad_norm": 634.8748168945312, + "learning_rate": 3.4585672782330072e-06, + "loss": 24.8269, + "step": 319890 + }, + { + "epoch": 0.6462182395552629, + "grad_norm": 182.73977661132812, + "learning_rate": 3.4582352178997937e-06, + "loss": 16.3021, + "step": 319900 + }, + { + "epoch": 0.6462384401879467, + "grad_norm": 194.32150268554688, + "learning_rate": 3.457903165080952e-06, + "loss": 22.7858, + "step": 319910 + }, + { + "epoch": 0.6462586408206304, + "grad_norm": 355.3138427734375, + "learning_rate": 3.457571119778104e-06, + "loss": 19.6149, + "step": 319920 + }, + { + "epoch": 0.6462788414533143, + "grad_norm": 333.3360900878906, + "learning_rate": 3.4572390819928686e-06, + "loss": 16.6614, + "step": 319930 + }, + { + "epoch": 0.6462990420859981, + "grad_norm": 88.16149139404297, + "learning_rate": 3.4569070517268616e-06, + "loss": 31.911, + "step": 319940 + }, + { + "epoch": 0.6463192427186819, + "grad_norm": 863.8450927734375, + "learning_rate": 3.4565750289817024e-06, + "loss": 15.8914, + "step": 319950 + }, + { + "epoch": 0.6463394433513657, + "grad_norm": 412.30096435546875, + "learning_rate": 3.4562430137590107e-06, + "loss": 21.9311, + "step": 319960 + }, + { + "epoch": 0.6463596439840495, + "grad_norm": 468.2781982421875, + "learning_rate": 3.4559110060604016e-06, + "loss": 11.973, + "step": 319970 + }, + { + "epoch": 0.6463798446167334, + "grad_norm": 226.75779724121094, + "learning_rate": 3.455579005887495e-06, + "loss": 25.6286, + "step": 319980 + }, + { + "epoch": 0.6464000452494172, + "grad_norm": 109.05878448486328, + "learning_rate": 3.455247013241909e-06, + "loss": 16.3357, + "step": 319990 + }, + { + "epoch": 0.646420245882101, + "grad_norm": 463.2908020019531, + "learning_rate": 3.4549150281252635e-06, + "loss": 14.0002, + "step": 320000 + }, + { + "epoch": 0.6464404465147848, + "grad_norm": 329.0850524902344, + "learning_rate": 3.454583050539173e-06, + "loss": 23.0141, + "step": 320010 + }, + { + "epoch": 0.6464606471474686, + "grad_norm": 121.69821166992188, + "learning_rate": 3.454251080485258e-06, + "loss": 14.8856, + "step": 320020 + }, + { + "epoch": 0.6464808477801525, + "grad_norm": 432.4167785644531, + "learning_rate": 3.4539191179651367e-06, + "loss": 24.9661, + "step": 320030 + }, + { + "epoch": 0.6465010484128363, + "grad_norm": 292.9191589355469, + "learning_rate": 3.4535871629804246e-06, + "loss": 18.8819, + "step": 320040 + }, + { + "epoch": 0.6465212490455201, + "grad_norm": 372.0992431640625, + "learning_rate": 3.4532552155327404e-06, + "loss": 18.7296, + "step": 320050 + }, + { + "epoch": 0.6465414496782039, + "grad_norm": 294.7443542480469, + "learning_rate": 3.4529232756237058e-06, + "loss": 18.6634, + "step": 320060 + }, + { + "epoch": 0.6465616503108877, + "grad_norm": 921.65869140625, + "learning_rate": 3.4525913432549326e-06, + "loss": 24.3927, + "step": 320070 + }, + { + "epoch": 0.6465818509435716, + "grad_norm": 125.10314178466797, + "learning_rate": 3.452259418428042e-06, + "loss": 10.5639, + "step": 320080 + }, + { + "epoch": 0.6466020515762554, + "grad_norm": 408.093994140625, + "learning_rate": 3.451927501144653e-06, + "loss": 14.3607, + "step": 320090 + }, + { + "epoch": 0.6466222522089392, + "grad_norm": 391.8179626464844, + "learning_rate": 3.4515955914063796e-06, + "loss": 16.8749, + "step": 320100 + }, + { + "epoch": 0.646642452841623, + "grad_norm": 272.0223083496094, + "learning_rate": 3.451263689214842e-06, + "loss": 13.1955, + "step": 320110 + }, + { + "epoch": 0.6466626534743068, + "grad_norm": 517.6617431640625, + "learning_rate": 3.4509317945716585e-06, + "loss": 14.1607, + "step": 320120 + }, + { + "epoch": 0.6466828541069907, + "grad_norm": 313.298095703125, + "learning_rate": 3.4505999074784447e-06, + "loss": 15.2244, + "step": 320130 + }, + { + "epoch": 0.6467030547396745, + "grad_norm": 261.96746826171875, + "learning_rate": 3.450268027936817e-06, + "loss": 14.7913, + "step": 320140 + }, + { + "epoch": 0.6467232553723583, + "grad_norm": 241.56088256835938, + "learning_rate": 3.4499361559483976e-06, + "loss": 19.1366, + "step": 320150 + }, + { + "epoch": 0.6467434560050421, + "grad_norm": 413.460205078125, + "learning_rate": 3.4496042915148008e-06, + "loss": 21.075, + "step": 320160 + }, + { + "epoch": 0.646763656637726, + "grad_norm": 276.2939758300781, + "learning_rate": 3.449272434637645e-06, + "loss": 11.5727, + "step": 320170 + }, + { + "epoch": 0.6467838572704097, + "grad_norm": 371.8274841308594, + "learning_rate": 3.4489405853185465e-06, + "loss": 32.8321, + "step": 320180 + }, + { + "epoch": 0.6468040579030935, + "grad_norm": 8.812861442565918, + "learning_rate": 3.4486087435591243e-06, + "loss": 18.4471, + "step": 320190 + }, + { + "epoch": 0.6468242585357773, + "grad_norm": 26.10768699645996, + "learning_rate": 3.4482769093609945e-06, + "loss": 15.3442, + "step": 320200 + }, + { + "epoch": 0.6468444591684611, + "grad_norm": 118.00762939453125, + "learning_rate": 3.4479450827257733e-06, + "loss": 24.4206, + "step": 320210 + }, + { + "epoch": 0.6468646598011449, + "grad_norm": 348.4974365234375, + "learning_rate": 3.447613263655083e-06, + "loss": 26.2684, + "step": 320220 + }, + { + "epoch": 0.6468848604338288, + "grad_norm": 977.09130859375, + "learning_rate": 3.447281452150534e-06, + "loss": 21.9423, + "step": 320230 + }, + { + "epoch": 0.6469050610665126, + "grad_norm": 385.6293029785156, + "learning_rate": 3.4469496482137484e-06, + "loss": 20.0977, + "step": 320240 + }, + { + "epoch": 0.6469252616991964, + "grad_norm": 2031.3492431640625, + "learning_rate": 3.4466178518463424e-06, + "loss": 41.0484, + "step": 320250 + }, + { + "epoch": 0.6469454623318802, + "grad_norm": 375.87310791015625, + "learning_rate": 3.4462860630499316e-06, + "loss": 14.9024, + "step": 320260 + }, + { + "epoch": 0.646965662964564, + "grad_norm": 288.5296936035156, + "learning_rate": 3.445954281826134e-06, + "loss": 20.3958, + "step": 320270 + }, + { + "epoch": 0.6469858635972479, + "grad_norm": 542.4863891601562, + "learning_rate": 3.4456225081765683e-06, + "loss": 17.2139, + "step": 320280 + }, + { + "epoch": 0.6470060642299317, + "grad_norm": 680.419189453125, + "learning_rate": 3.445290742102848e-06, + "loss": 18.0196, + "step": 320290 + }, + { + "epoch": 0.6470262648626155, + "grad_norm": 213.6794891357422, + "learning_rate": 3.444958983606592e-06, + "loss": 13.3369, + "step": 320300 + }, + { + "epoch": 0.6470464654952993, + "grad_norm": 345.41864013671875, + "learning_rate": 3.444627232689418e-06, + "loss": 18.4047, + "step": 320310 + }, + { + "epoch": 0.6470666661279831, + "grad_norm": 244.41014099121094, + "learning_rate": 3.4442954893529436e-06, + "loss": 15.8739, + "step": 320320 + }, + { + "epoch": 0.647086866760667, + "grad_norm": 50.69621658325195, + "learning_rate": 3.4439637535987825e-06, + "loss": 25.0043, + "step": 320330 + }, + { + "epoch": 0.6471070673933508, + "grad_norm": 448.411376953125, + "learning_rate": 3.4436320254285537e-06, + "loss": 13.899, + "step": 320340 + }, + { + "epoch": 0.6471272680260346, + "grad_norm": 318.6735534667969, + "learning_rate": 3.4433003048438748e-06, + "loss": 23.9269, + "step": 320350 + }, + { + "epoch": 0.6471474686587184, + "grad_norm": 312.3635559082031, + "learning_rate": 3.442968591846359e-06, + "loss": 18.9191, + "step": 320360 + }, + { + "epoch": 0.6471676692914022, + "grad_norm": 75.13298034667969, + "learning_rate": 3.442636886437627e-06, + "loss": 29.475, + "step": 320370 + }, + { + "epoch": 0.6471878699240861, + "grad_norm": 607.415283203125, + "learning_rate": 3.4423051886192944e-06, + "loss": 25.1213, + "step": 320380 + }, + { + "epoch": 0.6472080705567699, + "grad_norm": 506.7091064453125, + "learning_rate": 3.4419734983929763e-06, + "loss": 23.628, + "step": 320390 + }, + { + "epoch": 0.6472282711894537, + "grad_norm": 535.0528564453125, + "learning_rate": 3.441641815760291e-06, + "loss": 11.2879, + "step": 320400 + }, + { + "epoch": 0.6472484718221375, + "grad_norm": 20.201257705688477, + "learning_rate": 3.4413101407228557e-06, + "loss": 12.2529, + "step": 320410 + }, + { + "epoch": 0.6472686724548213, + "grad_norm": 380.4597473144531, + "learning_rate": 3.440978473282284e-06, + "loss": 8.8845, + "step": 320420 + }, + { + "epoch": 0.647288873087505, + "grad_norm": 15.600037574768066, + "learning_rate": 3.440646813440193e-06, + "loss": 16.2048, + "step": 320430 + }, + { + "epoch": 0.6473090737201889, + "grad_norm": 172.44436645507812, + "learning_rate": 3.4403151611982016e-06, + "loss": 26.6767, + "step": 320440 + }, + { + "epoch": 0.6473292743528727, + "grad_norm": 677.5240478515625, + "learning_rate": 3.4399835165579266e-06, + "loss": 17.4968, + "step": 320450 + }, + { + "epoch": 0.6473494749855565, + "grad_norm": 144.59420776367188, + "learning_rate": 3.439651879520981e-06, + "loss": 13.6249, + "step": 320460 + }, + { + "epoch": 0.6473696756182403, + "grad_norm": 104.53440856933594, + "learning_rate": 3.4393202500889827e-06, + "loss": 13.4822, + "step": 320470 + }, + { + "epoch": 0.6473898762509241, + "grad_norm": 220.8779296875, + "learning_rate": 3.43898862826355e-06, + "loss": 22.2717, + "step": 320480 + }, + { + "epoch": 0.647410076883608, + "grad_norm": 531.7034912109375, + "learning_rate": 3.438657014046296e-06, + "loss": 30.2536, + "step": 320490 + }, + { + "epoch": 0.6474302775162918, + "grad_norm": 219.97042846679688, + "learning_rate": 3.4383254074388373e-06, + "loss": 12.5737, + "step": 320500 + }, + { + "epoch": 0.6474504781489756, + "grad_norm": 433.6363525390625, + "learning_rate": 3.437993808442794e-06, + "loss": 21.8176, + "step": 320510 + }, + { + "epoch": 0.6474706787816594, + "grad_norm": 481.9315490722656, + "learning_rate": 3.437662217059776e-06, + "loss": 17.6614, + "step": 320520 + }, + { + "epoch": 0.6474908794143432, + "grad_norm": 570.3108520507812, + "learning_rate": 3.437330633291405e-06, + "loss": 13.3415, + "step": 320530 + }, + { + "epoch": 0.6475110800470271, + "grad_norm": 189.6717529296875, + "learning_rate": 3.436999057139295e-06, + "loss": 19.1386, + "step": 320540 + }, + { + "epoch": 0.6475312806797109, + "grad_norm": 802.9407958984375, + "learning_rate": 3.4366674886050618e-06, + "loss": 22.4112, + "step": 320550 + }, + { + "epoch": 0.6475514813123947, + "grad_norm": 400.6368713378906, + "learning_rate": 3.436335927690321e-06, + "loss": 16.2096, + "step": 320560 + }, + { + "epoch": 0.6475716819450785, + "grad_norm": 447.6169738769531, + "learning_rate": 3.4360043743966907e-06, + "loss": 8.7606, + "step": 320570 + }, + { + "epoch": 0.6475918825777623, + "grad_norm": 28.27263832092285, + "learning_rate": 3.4356728287257845e-06, + "loss": 11.5367, + "step": 320580 + }, + { + "epoch": 0.6476120832104462, + "grad_norm": 783.3766479492188, + "learning_rate": 3.4353412906792175e-06, + "loss": 26.0925, + "step": 320590 + }, + { + "epoch": 0.64763228384313, + "grad_norm": 555.4591064453125, + "learning_rate": 3.4350097602586085e-06, + "loss": 20.1014, + "step": 320600 + }, + { + "epoch": 0.6476524844758138, + "grad_norm": 247.10903930664062, + "learning_rate": 3.4346782374655743e-06, + "loss": 20.2857, + "step": 320610 + }, + { + "epoch": 0.6476726851084976, + "grad_norm": 633.5286254882812, + "learning_rate": 3.4343467223017256e-06, + "loss": 20.1002, + "step": 320620 + }, + { + "epoch": 0.6476928857411814, + "grad_norm": 0.0, + "learning_rate": 3.4340152147686824e-06, + "loss": 13.0941, + "step": 320630 + }, + { + "epoch": 0.6477130863738653, + "grad_norm": 333.5049743652344, + "learning_rate": 3.4336837148680595e-06, + "loss": 29.6691, + "step": 320640 + }, + { + "epoch": 0.6477332870065491, + "grad_norm": 0.0, + "learning_rate": 3.4333522226014715e-06, + "loss": 30.9545, + "step": 320650 + }, + { + "epoch": 0.6477534876392329, + "grad_norm": 360.1062927246094, + "learning_rate": 3.433020737970534e-06, + "loss": 23.8065, + "step": 320660 + }, + { + "epoch": 0.6477736882719167, + "grad_norm": 1085.951416015625, + "learning_rate": 3.432689260976866e-06, + "loss": 20.8398, + "step": 320670 + }, + { + "epoch": 0.6477938889046005, + "grad_norm": 242.34632873535156, + "learning_rate": 3.4323577916220773e-06, + "loss": 27.6058, + "step": 320680 + }, + { + "epoch": 0.6478140895372843, + "grad_norm": 161.37716674804688, + "learning_rate": 3.4320263299077877e-06, + "loss": 34.7406, + "step": 320690 + }, + { + "epoch": 0.6478342901699681, + "grad_norm": 421.85528564453125, + "learning_rate": 3.4316948758356127e-06, + "loss": 16.1686, + "step": 320700 + }, + { + "epoch": 0.6478544908026519, + "grad_norm": 493.4578857421875, + "learning_rate": 3.431363429407166e-06, + "loss": 23.4922, + "step": 320710 + }, + { + "epoch": 0.6478746914353357, + "grad_norm": 622.5317993164062, + "learning_rate": 3.431031990624063e-06, + "loss": 24.4836, + "step": 320720 + }, + { + "epoch": 0.6478948920680195, + "grad_norm": 581.2822875976562, + "learning_rate": 3.4307005594879215e-06, + "loss": 13.4607, + "step": 320730 + }, + { + "epoch": 0.6479150927007034, + "grad_norm": 489.0071716308594, + "learning_rate": 3.4303691360003533e-06, + "loss": 19.3171, + "step": 320740 + }, + { + "epoch": 0.6479352933333872, + "grad_norm": 664.3818359375, + "learning_rate": 3.4300377201629753e-06, + "loss": 24.7402, + "step": 320750 + }, + { + "epoch": 0.647955493966071, + "grad_norm": 254.8978729248047, + "learning_rate": 3.4297063119774037e-06, + "loss": 18.9693, + "step": 320760 + }, + { + "epoch": 0.6479756945987548, + "grad_norm": 369.8358459472656, + "learning_rate": 3.4293749114452546e-06, + "loss": 29.1014, + "step": 320770 + }, + { + "epoch": 0.6479958952314386, + "grad_norm": 347.9951477050781, + "learning_rate": 3.4290435185681404e-06, + "loss": 30.3701, + "step": 320780 + }, + { + "epoch": 0.6480160958641225, + "grad_norm": 242.16900634765625, + "learning_rate": 3.428712133347677e-06, + "loss": 20.367, + "step": 320790 + }, + { + "epoch": 0.6480362964968063, + "grad_norm": 604.1107788085938, + "learning_rate": 3.4283807557854814e-06, + "loss": 24.7284, + "step": 320800 + }, + { + "epoch": 0.6480564971294901, + "grad_norm": 300.874755859375, + "learning_rate": 3.4280493858831665e-06, + "loss": 19.1906, + "step": 320810 + }, + { + "epoch": 0.6480766977621739, + "grad_norm": 453.137451171875, + "learning_rate": 3.4277180236423467e-06, + "loss": 17.5896, + "step": 320820 + }, + { + "epoch": 0.6480968983948577, + "grad_norm": 229.75108337402344, + "learning_rate": 3.427386669064643e-06, + "loss": 22.6297, + "step": 320830 + }, + { + "epoch": 0.6481170990275416, + "grad_norm": 161.3295440673828, + "learning_rate": 3.4270553221516618e-06, + "loss": 17.7979, + "step": 320840 + }, + { + "epoch": 0.6481372996602254, + "grad_norm": 355.29730224609375, + "learning_rate": 3.426723982905023e-06, + "loss": 29.1742, + "step": 320850 + }, + { + "epoch": 0.6481575002929092, + "grad_norm": 221.48448181152344, + "learning_rate": 3.4263926513263424e-06, + "loss": 11.8878, + "step": 320860 + }, + { + "epoch": 0.648177700925593, + "grad_norm": 40.39225387573242, + "learning_rate": 3.4260613274172316e-06, + "loss": 22.6435, + "step": 320870 + }, + { + "epoch": 0.6481979015582768, + "grad_norm": 672.4365844726562, + "learning_rate": 3.4257300111793073e-06, + "loss": 30.1296, + "step": 320880 + }, + { + "epoch": 0.6482181021909607, + "grad_norm": 651.0625610351562, + "learning_rate": 3.425398702614185e-06, + "loss": 23.6063, + "step": 320890 + }, + { + "epoch": 0.6482383028236445, + "grad_norm": 191.00192260742188, + "learning_rate": 3.4250674017234774e-06, + "loss": 16.1958, + "step": 320900 + }, + { + "epoch": 0.6482585034563283, + "grad_norm": 581.9676513671875, + "learning_rate": 3.4247361085087993e-06, + "loss": 12.2446, + "step": 320910 + }, + { + "epoch": 0.6482787040890121, + "grad_norm": 393.1851806640625, + "learning_rate": 3.4244048229717676e-06, + "loss": 21.6946, + "step": 320920 + }, + { + "epoch": 0.6482989047216959, + "grad_norm": 289.7122802734375, + "learning_rate": 3.4240735451139963e-06, + "loss": 23.8145, + "step": 320930 + }, + { + "epoch": 0.6483191053543798, + "grad_norm": 508.9690856933594, + "learning_rate": 3.4237422749370986e-06, + "loss": 23.4573, + "step": 320940 + }, + { + "epoch": 0.6483393059870635, + "grad_norm": 235.05738830566406, + "learning_rate": 3.4234110124426893e-06, + "loss": 19.8642, + "step": 320950 + }, + { + "epoch": 0.6483595066197473, + "grad_norm": 213.62127685546875, + "learning_rate": 3.4230797576323847e-06, + "loss": 26.8772, + "step": 320960 + }, + { + "epoch": 0.6483797072524311, + "grad_norm": 416.9248352050781, + "learning_rate": 3.422748510507798e-06, + "loss": 11.4521, + "step": 320970 + }, + { + "epoch": 0.6483999078851149, + "grad_norm": 244.31906127929688, + "learning_rate": 3.422417271070542e-06, + "loss": 29.9691, + "step": 320980 + }, + { + "epoch": 0.6484201085177987, + "grad_norm": 489.66864013671875, + "learning_rate": 3.4220860393222347e-06, + "loss": 15.2944, + "step": 320990 + }, + { + "epoch": 0.6484403091504826, + "grad_norm": 291.2265625, + "learning_rate": 3.4217548152644887e-06, + "loss": 13.3252, + "step": 321000 + }, + { + "epoch": 0.6484605097831664, + "grad_norm": 195.59718322753906, + "learning_rate": 3.4214235988989173e-06, + "loss": 20.0309, + "step": 321010 + }, + { + "epoch": 0.6484807104158502, + "grad_norm": 386.9002685546875, + "learning_rate": 3.421092390227137e-06, + "loss": 17.6624, + "step": 321020 + }, + { + "epoch": 0.648500911048534, + "grad_norm": 53.192901611328125, + "learning_rate": 3.42076118925076e-06, + "loss": 15.1449, + "step": 321030 + }, + { + "epoch": 0.6485211116812178, + "grad_norm": 804.015869140625, + "learning_rate": 3.4204299959714006e-06, + "loss": 17.1687, + "step": 321040 + }, + { + "epoch": 0.6485413123139017, + "grad_norm": 366.16265869140625, + "learning_rate": 3.4200988103906747e-06, + "loss": 12.5538, + "step": 321050 + }, + { + "epoch": 0.6485615129465855, + "grad_norm": 384.0155944824219, + "learning_rate": 3.4197676325101965e-06, + "loss": 19.0518, + "step": 321060 + }, + { + "epoch": 0.6485817135792693, + "grad_norm": 417.5281066894531, + "learning_rate": 3.419436462331578e-06, + "loss": 16.7324, + "step": 321070 + }, + { + "epoch": 0.6486019142119531, + "grad_norm": 81.71173858642578, + "learning_rate": 3.4191052998564344e-06, + "loss": 15.7786, + "step": 321080 + }, + { + "epoch": 0.6486221148446369, + "grad_norm": 453.1468200683594, + "learning_rate": 3.4187741450863816e-06, + "loss": 15.2719, + "step": 321090 + }, + { + "epoch": 0.6486423154773208, + "grad_norm": 78.19654846191406, + "learning_rate": 3.4184429980230305e-06, + "loss": 10.1305, + "step": 321100 + }, + { + "epoch": 0.6486625161100046, + "grad_norm": 656.2283325195312, + "learning_rate": 3.418111858667995e-06, + "loss": 27.1522, + "step": 321110 + }, + { + "epoch": 0.6486827167426884, + "grad_norm": 535.2967529296875, + "learning_rate": 3.4177807270228942e-06, + "loss": 28.5655, + "step": 321120 + }, + { + "epoch": 0.6487029173753722, + "grad_norm": 273.2851867675781, + "learning_rate": 3.4174496030893346e-06, + "loss": 15.6931, + "step": 321130 + }, + { + "epoch": 0.648723118008056, + "grad_norm": 1340.5679931640625, + "learning_rate": 3.4171184868689345e-06, + "loss": 30.0804, + "step": 321140 + }, + { + "epoch": 0.6487433186407399, + "grad_norm": 475.08551025390625, + "learning_rate": 3.4167873783633087e-06, + "loss": 16.6618, + "step": 321150 + }, + { + "epoch": 0.6487635192734237, + "grad_norm": 51.13777542114258, + "learning_rate": 3.416456277574068e-06, + "loss": 18.7528, + "step": 321160 + }, + { + "epoch": 0.6487837199061075, + "grad_norm": 139.28160095214844, + "learning_rate": 3.4161251845028264e-06, + "loss": 21.9506, + "step": 321170 + }, + { + "epoch": 0.6488039205387913, + "grad_norm": 271.64013671875, + "learning_rate": 3.4157940991512007e-06, + "loss": 11.4651, + "step": 321180 + }, + { + "epoch": 0.6488241211714751, + "grad_norm": 424.78521728515625, + "learning_rate": 3.4154630215208005e-06, + "loss": 27.9061, + "step": 321190 + }, + { + "epoch": 0.6488443218041589, + "grad_norm": 200.58016967773438, + "learning_rate": 3.4151319516132414e-06, + "loss": 23.6057, + "step": 321200 + }, + { + "epoch": 0.6488645224368427, + "grad_norm": 382.99884033203125, + "learning_rate": 3.4148008894301378e-06, + "loss": 20.8119, + "step": 321210 + }, + { + "epoch": 0.6488847230695265, + "grad_norm": 342.0165100097656, + "learning_rate": 3.4144698349731025e-06, + "loss": 7.7871, + "step": 321220 + }, + { + "epoch": 0.6489049237022103, + "grad_norm": 445.2557678222656, + "learning_rate": 3.4141387882437483e-06, + "loss": 20.2608, + "step": 321230 + }, + { + "epoch": 0.6489251243348941, + "grad_norm": 120.84185791015625, + "learning_rate": 3.4138077492436896e-06, + "loss": 17.3855, + "step": 321240 + }, + { + "epoch": 0.648945324967578, + "grad_norm": 429.3396301269531, + "learning_rate": 3.4134767179745404e-06, + "loss": 19.8901, + "step": 321250 + }, + { + "epoch": 0.6489655256002618, + "grad_norm": 76.77388763427734, + "learning_rate": 3.4131456944379126e-06, + "loss": 27.5789, + "step": 321260 + }, + { + "epoch": 0.6489857262329456, + "grad_norm": 79.2790298461914, + "learning_rate": 3.412814678635419e-06, + "loss": 12.1772, + "step": 321270 + }, + { + "epoch": 0.6490059268656294, + "grad_norm": 287.5595397949219, + "learning_rate": 3.4124836705686765e-06, + "loss": 38.2287, + "step": 321280 + }, + { + "epoch": 0.6490261274983132, + "grad_norm": 418.327392578125, + "learning_rate": 3.4121526702392938e-06, + "loss": 19.3629, + "step": 321290 + }, + { + "epoch": 0.649046328130997, + "grad_norm": 362.4310607910156, + "learning_rate": 3.411821677648887e-06, + "loss": 15.4171, + "step": 321300 + }, + { + "epoch": 0.6490665287636809, + "grad_norm": 224.0290985107422, + "learning_rate": 3.4114906927990697e-06, + "loss": 20.4532, + "step": 321310 + }, + { + "epoch": 0.6490867293963647, + "grad_norm": 311.35284423828125, + "learning_rate": 3.4111597156914535e-06, + "loss": 18.5941, + "step": 321320 + }, + { + "epoch": 0.6491069300290485, + "grad_norm": 428.9659118652344, + "learning_rate": 3.4108287463276517e-06, + "loss": 28.53, + "step": 321330 + }, + { + "epoch": 0.6491271306617323, + "grad_norm": 213.68759155273438, + "learning_rate": 3.410497784709279e-06, + "loss": 14.7193, + "step": 321340 + }, + { + "epoch": 0.6491473312944162, + "grad_norm": 288.65863037109375, + "learning_rate": 3.4101668308379465e-06, + "loss": 20.0432, + "step": 321350 + }, + { + "epoch": 0.6491675319271, + "grad_norm": 28.574405670166016, + "learning_rate": 3.409835884715267e-06, + "loss": 12.2773, + "step": 321360 + }, + { + "epoch": 0.6491877325597838, + "grad_norm": 7.877237796783447, + "learning_rate": 3.4095049463428553e-06, + "loss": 28.7102, + "step": 321370 + }, + { + "epoch": 0.6492079331924676, + "grad_norm": 253.9036407470703, + "learning_rate": 3.4091740157223253e-06, + "loss": 18.7362, + "step": 321380 + }, + { + "epoch": 0.6492281338251514, + "grad_norm": 448.0572814941406, + "learning_rate": 3.4088430928552863e-06, + "loss": 14.4309, + "step": 321390 + }, + { + "epoch": 0.6492483344578353, + "grad_norm": 458.5237731933594, + "learning_rate": 3.4085121777433532e-06, + "loss": 20.4872, + "step": 321400 + }, + { + "epoch": 0.6492685350905191, + "grad_norm": 361.4665832519531, + "learning_rate": 3.40818127038814e-06, + "loss": 12.8249, + "step": 321410 + }, + { + "epoch": 0.6492887357232029, + "grad_norm": 186.07550048828125, + "learning_rate": 3.407850370791257e-06, + "loss": 24.1596, + "step": 321420 + }, + { + "epoch": 0.6493089363558867, + "grad_norm": 131.7120819091797, + "learning_rate": 3.4075194789543174e-06, + "loss": 13.3453, + "step": 321430 + }, + { + "epoch": 0.6493291369885705, + "grad_norm": 232.39071655273438, + "learning_rate": 3.407188594878938e-06, + "loss": 10.0244, + "step": 321440 + }, + { + "epoch": 0.6493493376212544, + "grad_norm": 686.4042358398438, + "learning_rate": 3.4068577185667253e-06, + "loss": 30.2223, + "step": 321450 + }, + { + "epoch": 0.6493695382539381, + "grad_norm": 517.8428955078125, + "learning_rate": 3.406526850019295e-06, + "loss": 11.6241, + "step": 321460 + }, + { + "epoch": 0.6493897388866219, + "grad_norm": 28.20896339416504, + "learning_rate": 3.4061959892382613e-06, + "loss": 10.4526, + "step": 321470 + }, + { + "epoch": 0.6494099395193057, + "grad_norm": 149.054443359375, + "learning_rate": 3.4058651362252337e-06, + "loss": 32.4825, + "step": 321480 + }, + { + "epoch": 0.6494301401519895, + "grad_norm": 590.716796875, + "learning_rate": 3.4055342909818255e-06, + "loss": 18.6806, + "step": 321490 + }, + { + "epoch": 0.6494503407846733, + "grad_norm": 0.5362585186958313, + "learning_rate": 3.40520345350965e-06, + "loss": 20.8464, + "step": 321500 + }, + { + "epoch": 0.6494705414173572, + "grad_norm": 4.272317886352539, + "learning_rate": 3.4048726238103214e-06, + "loss": 14.8859, + "step": 321510 + }, + { + "epoch": 0.649490742050041, + "grad_norm": 98.34452819824219, + "learning_rate": 3.404541801885448e-06, + "loss": 26.1643, + "step": 321520 + }, + { + "epoch": 0.6495109426827248, + "grad_norm": 228.53408813476562, + "learning_rate": 3.4042109877366447e-06, + "loss": 18.4722, + "step": 321530 + }, + { + "epoch": 0.6495311433154086, + "grad_norm": 162.33099365234375, + "learning_rate": 3.403880181365525e-06, + "loss": 27.2294, + "step": 321540 + }, + { + "epoch": 0.6495513439480924, + "grad_norm": 280.77728271484375, + "learning_rate": 3.403549382773699e-06, + "loss": 33.4775, + "step": 321550 + }, + { + "epoch": 0.6495715445807763, + "grad_norm": 281.2795715332031, + "learning_rate": 3.4032185919627784e-06, + "loss": 11.3548, + "step": 321560 + }, + { + "epoch": 0.6495917452134601, + "grad_norm": 842.7446899414062, + "learning_rate": 3.4028878089343784e-06, + "loss": 26.5638, + "step": 321570 + }, + { + "epoch": 0.6496119458461439, + "grad_norm": 230.534423828125, + "learning_rate": 3.402557033690109e-06, + "loss": 41.9931, + "step": 321580 + }, + { + "epoch": 0.6496321464788277, + "grad_norm": 1114.5538330078125, + "learning_rate": 3.4022262662315813e-06, + "loss": 23.1025, + "step": 321590 + }, + { + "epoch": 0.6496523471115115, + "grad_norm": 321.23773193359375, + "learning_rate": 3.401895506560411e-06, + "loss": 31.6811, + "step": 321600 + }, + { + "epoch": 0.6496725477441954, + "grad_norm": 184.3525848388672, + "learning_rate": 3.401564754678207e-06, + "loss": 17.5569, + "step": 321610 + }, + { + "epoch": 0.6496927483768792, + "grad_norm": 119.01651000976562, + "learning_rate": 3.401234010586583e-06, + "loss": 12.5658, + "step": 321620 + }, + { + "epoch": 0.649712949009563, + "grad_norm": 1051.844970703125, + "learning_rate": 3.4009032742871515e-06, + "loss": 16.8424, + "step": 321630 + }, + { + "epoch": 0.6497331496422468, + "grad_norm": 417.7512512207031, + "learning_rate": 3.4005725457815225e-06, + "loss": 17.1054, + "step": 321640 + }, + { + "epoch": 0.6497533502749306, + "grad_norm": 336.3253479003906, + "learning_rate": 3.400241825071309e-06, + "loss": 15.4779, + "step": 321650 + }, + { + "epoch": 0.6497735509076145, + "grad_norm": 192.74798583984375, + "learning_rate": 3.3999111121581215e-06, + "loss": 27.2385, + "step": 321660 + }, + { + "epoch": 0.6497937515402983, + "grad_norm": 1039.3184814453125, + "learning_rate": 3.399580407043576e-06, + "loss": 30.0241, + "step": 321670 + }, + { + "epoch": 0.6498139521729821, + "grad_norm": 165.76678466796875, + "learning_rate": 3.3992497097292786e-06, + "loss": 21.279, + "step": 321680 + }, + { + "epoch": 0.6498341528056659, + "grad_norm": 524.4998168945312, + "learning_rate": 3.3989190202168452e-06, + "loss": 13.9386, + "step": 321690 + }, + { + "epoch": 0.6498543534383497, + "grad_norm": 302.688720703125, + "learning_rate": 3.3985883385078875e-06, + "loss": 15.2754, + "step": 321700 + }, + { + "epoch": 0.6498745540710335, + "grad_norm": 401.597412109375, + "learning_rate": 3.398257664604015e-06, + "loss": 15.3946, + "step": 321710 + }, + { + "epoch": 0.6498947547037173, + "grad_norm": 374.7026672363281, + "learning_rate": 3.3979269985068387e-06, + "loss": 10.6275, + "step": 321720 + }, + { + "epoch": 0.6499149553364011, + "grad_norm": 394.505615234375, + "learning_rate": 3.3975963402179756e-06, + "loss": 20.2509, + "step": 321730 + }, + { + "epoch": 0.6499351559690849, + "grad_norm": 201.50865173339844, + "learning_rate": 3.39726568973903e-06, + "loss": 29.1401, + "step": 321740 + }, + { + "epoch": 0.6499553566017687, + "grad_norm": 1979.6138916015625, + "learning_rate": 3.396935047071619e-06, + "loss": 28.8079, + "step": 321750 + }, + { + "epoch": 0.6499755572344526, + "grad_norm": 889.3950805664062, + "learning_rate": 3.3966044122173526e-06, + "loss": 24.7522, + "step": 321760 + }, + { + "epoch": 0.6499957578671364, + "grad_norm": 597.9691772460938, + "learning_rate": 3.3962737851778406e-06, + "loss": 5.9687, + "step": 321770 + }, + { + "epoch": 0.6500159584998202, + "grad_norm": 24.719833374023438, + "learning_rate": 3.3959431659546952e-06, + "loss": 13.8951, + "step": 321780 + }, + { + "epoch": 0.650036159132504, + "grad_norm": 447.026611328125, + "learning_rate": 3.39561255454953e-06, + "loss": 14.6619, + "step": 321790 + }, + { + "epoch": 0.6500563597651878, + "grad_norm": 456.4205017089844, + "learning_rate": 3.3952819509639534e-06, + "loss": 16.8525, + "step": 321800 + }, + { + "epoch": 0.6500765603978717, + "grad_norm": 207.09373474121094, + "learning_rate": 3.394951355199577e-06, + "loss": 33.0193, + "step": 321810 + }, + { + "epoch": 0.6500967610305555, + "grad_norm": 286.6849670410156, + "learning_rate": 3.3946207672580144e-06, + "loss": 24.4564, + "step": 321820 + }, + { + "epoch": 0.6501169616632393, + "grad_norm": 282.37506103515625, + "learning_rate": 3.3942901871408763e-06, + "loss": 17.8353, + "step": 321830 + }, + { + "epoch": 0.6501371622959231, + "grad_norm": 366.56939697265625, + "learning_rate": 3.3939596148497717e-06, + "loss": 10.8935, + "step": 321840 + }, + { + "epoch": 0.6501573629286069, + "grad_norm": 162.059814453125, + "learning_rate": 3.3936290503863132e-06, + "loss": 19.6672, + "step": 321850 + }, + { + "epoch": 0.6501775635612908, + "grad_norm": 337.82073974609375, + "learning_rate": 3.393298493752113e-06, + "loss": 21.535, + "step": 321860 + }, + { + "epoch": 0.6501977641939746, + "grad_norm": 287.4216613769531, + "learning_rate": 3.392967944948781e-06, + "loss": 18.5025, + "step": 321870 + }, + { + "epoch": 0.6502179648266584, + "grad_norm": 700.3267822265625, + "learning_rate": 3.392637403977925e-06, + "loss": 14.0155, + "step": 321880 + }, + { + "epoch": 0.6502381654593422, + "grad_norm": 298.53656005859375, + "learning_rate": 3.3923068708411645e-06, + "loss": 22.7206, + "step": 321890 + }, + { + "epoch": 0.650258366092026, + "grad_norm": 648.0995483398438, + "learning_rate": 3.3919763455401016e-06, + "loss": 21.8986, + "step": 321900 + }, + { + "epoch": 0.6502785667247098, + "grad_norm": 331.8587646484375, + "learning_rate": 3.391645828076352e-06, + "loss": 18.3617, + "step": 321910 + }, + { + "epoch": 0.6502987673573937, + "grad_norm": 208.50125122070312, + "learning_rate": 3.391315318451527e-06, + "loss": 15.3929, + "step": 321920 + }, + { + "epoch": 0.6503189679900775, + "grad_norm": 666.7942504882812, + "learning_rate": 3.3909848166672343e-06, + "loss": 31.979, + "step": 321930 + }, + { + "epoch": 0.6503391686227613, + "grad_norm": 364.9177551269531, + "learning_rate": 3.3906543227250866e-06, + "loss": 14.7562, + "step": 321940 + }, + { + "epoch": 0.6503593692554451, + "grad_norm": 1887.9747314453125, + "learning_rate": 3.3903238366266956e-06, + "loss": 45.4767, + "step": 321950 + }, + { + "epoch": 0.650379569888129, + "grad_norm": 334.8154296875, + "learning_rate": 3.38999335837367e-06, + "loss": 14.8031, + "step": 321960 + }, + { + "epoch": 0.6503997705208127, + "grad_norm": 140.39060974121094, + "learning_rate": 3.389662887967621e-06, + "loss": 16.8329, + "step": 321970 + }, + { + "epoch": 0.6504199711534965, + "grad_norm": 400.18804931640625, + "learning_rate": 3.38933242541016e-06, + "loss": 22.3485, + "step": 321980 + }, + { + "epoch": 0.6504401717861803, + "grad_norm": 245.1291961669922, + "learning_rate": 3.3890019707028987e-06, + "loss": 19.8698, + "step": 321990 + }, + { + "epoch": 0.6504603724188641, + "grad_norm": 511.9997253417969, + "learning_rate": 3.3886715238474454e-06, + "loss": 15.2886, + "step": 322000 + }, + { + "epoch": 0.6504805730515479, + "grad_norm": 277.54144287109375, + "learning_rate": 3.388341084845411e-06, + "loss": 11.5422, + "step": 322010 + }, + { + "epoch": 0.6505007736842318, + "grad_norm": 180.7924346923828, + "learning_rate": 3.388010653698409e-06, + "loss": 10.1466, + "step": 322020 + }, + { + "epoch": 0.6505209743169156, + "grad_norm": 413.6370849609375, + "learning_rate": 3.3876802304080457e-06, + "loss": 22.5183, + "step": 322030 + }, + { + "epoch": 0.6505411749495994, + "grad_norm": 745.9916381835938, + "learning_rate": 3.3873498149759325e-06, + "loss": 13.8117, + "step": 322040 + }, + { + "epoch": 0.6505613755822832, + "grad_norm": 215.02685546875, + "learning_rate": 3.3870194074036846e-06, + "loss": 15.0504, + "step": 322050 + }, + { + "epoch": 0.650581576214967, + "grad_norm": 453.3199462890625, + "learning_rate": 3.3866890076929036e-06, + "loss": 21.1833, + "step": 322060 + }, + { + "epoch": 0.6506017768476509, + "grad_norm": 102.20661926269531, + "learning_rate": 3.3863586158452074e-06, + "loss": 22.2214, + "step": 322070 + }, + { + "epoch": 0.6506219774803347, + "grad_norm": 655.1390380859375, + "learning_rate": 3.386028231862204e-06, + "loss": 30.9475, + "step": 322080 + }, + { + "epoch": 0.6506421781130185, + "grad_norm": 591.4625244140625, + "learning_rate": 3.385697855745502e-06, + "loss": 28.0162, + "step": 322090 + }, + { + "epoch": 0.6506623787457023, + "grad_norm": 214.79214477539062, + "learning_rate": 3.3853674874967134e-06, + "loss": 36.577, + "step": 322100 + }, + { + "epoch": 0.6506825793783861, + "grad_norm": 764.1506958007812, + "learning_rate": 3.3850371271174465e-06, + "loss": 29.5662, + "step": 322110 + }, + { + "epoch": 0.65070278001107, + "grad_norm": 311.96209716796875, + "learning_rate": 3.384706774609316e-06, + "loss": 20.8651, + "step": 322120 + }, + { + "epoch": 0.6507229806437538, + "grad_norm": 378.3182373046875, + "learning_rate": 3.3843764299739258e-06, + "loss": 21.0682, + "step": 322130 + }, + { + "epoch": 0.6507431812764376, + "grad_norm": 713.8678588867188, + "learning_rate": 3.3840460932128894e-06, + "loss": 13.7881, + "step": 322140 + }, + { + "epoch": 0.6507633819091214, + "grad_norm": 171.61329650878906, + "learning_rate": 3.3837157643278173e-06, + "loss": 30.0439, + "step": 322150 + }, + { + "epoch": 0.6507835825418052, + "grad_norm": 453.4654541015625, + "learning_rate": 3.3833854433203185e-06, + "loss": 12.3456, + "step": 322160 + }, + { + "epoch": 0.6508037831744891, + "grad_norm": 128.6647186279297, + "learning_rate": 3.3830551301920024e-06, + "loss": 23.3729, + "step": 322170 + }, + { + "epoch": 0.6508239838071729, + "grad_norm": 90.92256927490234, + "learning_rate": 3.38272482494448e-06, + "loss": 17.1569, + "step": 322180 + }, + { + "epoch": 0.6508441844398567, + "grad_norm": 277.4646301269531, + "learning_rate": 3.38239452757936e-06, + "loss": 16.6768, + "step": 322190 + }, + { + "epoch": 0.6508643850725405, + "grad_norm": 482.8577575683594, + "learning_rate": 3.3820642380982527e-06, + "loss": 35.2332, + "step": 322200 + }, + { + "epoch": 0.6508845857052243, + "grad_norm": 300.772705078125, + "learning_rate": 3.38173395650277e-06, + "loss": 12.6839, + "step": 322210 + }, + { + "epoch": 0.650904786337908, + "grad_norm": 114.0345687866211, + "learning_rate": 3.3814036827945173e-06, + "loss": 11.9252, + "step": 322220 + }, + { + "epoch": 0.6509249869705919, + "grad_norm": 613.072998046875, + "learning_rate": 3.3810734169751075e-06, + "loss": 24.3405, + "step": 322230 + }, + { + "epoch": 0.6509451876032757, + "grad_norm": 285.8229675292969, + "learning_rate": 3.3807431590461502e-06, + "loss": 13.8982, + "step": 322240 + }, + { + "epoch": 0.6509653882359595, + "grad_norm": 245.60195922851562, + "learning_rate": 3.3804129090092542e-06, + "loss": 26.4386, + "step": 322250 + }, + { + "epoch": 0.6509855888686433, + "grad_norm": 405.0401611328125, + "learning_rate": 3.3800826668660286e-06, + "loss": 17.3309, + "step": 322260 + }, + { + "epoch": 0.6510057895013271, + "grad_norm": 374.2948303222656, + "learning_rate": 3.3797524326180825e-06, + "loss": 19.3296, + "step": 322270 + }, + { + "epoch": 0.651025990134011, + "grad_norm": 510.6011657714844, + "learning_rate": 3.379422206267029e-06, + "loss": 19.7713, + "step": 322280 + }, + { + "epoch": 0.6510461907666948, + "grad_norm": 803.0283813476562, + "learning_rate": 3.3790919878144737e-06, + "loss": 16.3765, + "step": 322290 + }, + { + "epoch": 0.6510663913993786, + "grad_norm": 216.36788940429688, + "learning_rate": 3.378761777262028e-06, + "loss": 23.0298, + "step": 322300 + }, + { + "epoch": 0.6510865920320624, + "grad_norm": 684.405517578125, + "learning_rate": 3.3784315746113017e-06, + "loss": 17.6894, + "step": 322310 + }, + { + "epoch": 0.6511067926647462, + "grad_norm": 306.9706726074219, + "learning_rate": 3.378101379863902e-06, + "loss": 18.5892, + "step": 322320 + }, + { + "epoch": 0.6511269932974301, + "grad_norm": 798.5531005859375, + "learning_rate": 3.377771193021439e-06, + "loss": 18.135, + "step": 322330 + }, + { + "epoch": 0.6511471939301139, + "grad_norm": 517.0232543945312, + "learning_rate": 3.377441014085524e-06, + "loss": 20.094, + "step": 322340 + }, + { + "epoch": 0.6511673945627977, + "grad_norm": 476.083984375, + "learning_rate": 3.3771108430577624e-06, + "loss": 13.3595, + "step": 322350 + }, + { + "epoch": 0.6511875951954815, + "grad_norm": 558.2871704101562, + "learning_rate": 3.376780679939767e-06, + "loss": 10.8419, + "step": 322360 + }, + { + "epoch": 0.6512077958281653, + "grad_norm": 72.24461364746094, + "learning_rate": 3.376450524733147e-06, + "loss": 21.8569, + "step": 322370 + }, + { + "epoch": 0.6512279964608492, + "grad_norm": 436.09051513671875, + "learning_rate": 3.3761203774395083e-06, + "loss": 18.2238, + "step": 322380 + }, + { + "epoch": 0.651248197093533, + "grad_norm": 480.0425109863281, + "learning_rate": 3.3757902380604624e-06, + "loss": 12.3229, + "step": 322390 + }, + { + "epoch": 0.6512683977262168, + "grad_norm": 175.87149047851562, + "learning_rate": 3.375460106597619e-06, + "loss": 20.0545, + "step": 322400 + }, + { + "epoch": 0.6512885983589006, + "grad_norm": 229.50296020507812, + "learning_rate": 3.375129983052585e-06, + "loss": 25.8444, + "step": 322410 + }, + { + "epoch": 0.6513087989915844, + "grad_norm": 39.605587005615234, + "learning_rate": 3.3747998674269693e-06, + "loss": 21.599, + "step": 322420 + }, + { + "epoch": 0.6513289996242683, + "grad_norm": 211.9730987548828, + "learning_rate": 3.374469759722383e-06, + "loss": 10.087, + "step": 322430 + }, + { + "epoch": 0.6513492002569521, + "grad_norm": 411.57080078125, + "learning_rate": 3.3741396599404353e-06, + "loss": 17.3464, + "step": 322440 + }, + { + "epoch": 0.6513694008896359, + "grad_norm": 195.10816955566406, + "learning_rate": 3.373809568082732e-06, + "loss": 19.8394, + "step": 322450 + }, + { + "epoch": 0.6513896015223197, + "grad_norm": 1152.62255859375, + "learning_rate": 3.3734794841508838e-06, + "loss": 16.5249, + "step": 322460 + }, + { + "epoch": 0.6514098021550035, + "grad_norm": 182.6773223876953, + "learning_rate": 3.3731494081465003e-06, + "loss": 10.9776, + "step": 322470 + }, + { + "epoch": 0.6514300027876873, + "grad_norm": 93.03421783447266, + "learning_rate": 3.3728193400711884e-06, + "loss": 12.1965, + "step": 322480 + }, + { + "epoch": 0.6514502034203711, + "grad_norm": 0.0, + "learning_rate": 3.3724892799265567e-06, + "loss": 13.6043, + "step": 322490 + }, + { + "epoch": 0.6514704040530549, + "grad_norm": 605.5923461914062, + "learning_rate": 3.372159227714218e-06, + "loss": 19.1769, + "step": 322500 + }, + { + "epoch": 0.6514906046857387, + "grad_norm": 113.98303985595703, + "learning_rate": 3.371829183435775e-06, + "loss": 22.1165, + "step": 322510 + }, + { + "epoch": 0.6515108053184225, + "grad_norm": 316.26275634765625, + "learning_rate": 3.3714991470928393e-06, + "loss": 25.8412, + "step": 322520 + }, + { + "epoch": 0.6515310059511064, + "grad_norm": 166.5083770751953, + "learning_rate": 3.371169118687021e-06, + "loss": 20.1652, + "step": 322530 + }, + { + "epoch": 0.6515512065837902, + "grad_norm": 501.1871643066406, + "learning_rate": 3.370839098219926e-06, + "loss": 13.4596, + "step": 322540 + }, + { + "epoch": 0.651571407216474, + "grad_norm": 256.255126953125, + "learning_rate": 3.3705090856931626e-06, + "loss": 12.8425, + "step": 322550 + }, + { + "epoch": 0.6515916078491578, + "grad_norm": 269.9193420410156, + "learning_rate": 3.37017908110834e-06, + "loss": 13.7187, + "step": 322560 + }, + { + "epoch": 0.6516118084818416, + "grad_norm": 479.13092041015625, + "learning_rate": 3.3698490844670693e-06, + "loss": 12.8763, + "step": 322570 + }, + { + "epoch": 0.6516320091145255, + "grad_norm": 341.1509704589844, + "learning_rate": 3.3695190957709546e-06, + "loss": 17.577, + "step": 322580 + }, + { + "epoch": 0.6516522097472093, + "grad_norm": 374.6517028808594, + "learning_rate": 3.369189115021606e-06, + "loss": 17.6605, + "step": 322590 + }, + { + "epoch": 0.6516724103798931, + "grad_norm": 70.8856430053711, + "learning_rate": 3.3688591422206333e-06, + "loss": 29.0356, + "step": 322600 + }, + { + "epoch": 0.6516926110125769, + "grad_norm": 79.68965148925781, + "learning_rate": 3.3685291773696425e-06, + "loss": 18.5466, + "step": 322610 + }, + { + "epoch": 0.6517128116452607, + "grad_norm": 243.4609832763672, + "learning_rate": 3.3681992204702425e-06, + "loss": 20.2885, + "step": 322620 + }, + { + "epoch": 0.6517330122779446, + "grad_norm": 136.55223083496094, + "learning_rate": 3.3678692715240423e-06, + "loss": 15.1329, + "step": 322630 + }, + { + "epoch": 0.6517532129106284, + "grad_norm": 193.76109313964844, + "learning_rate": 3.3675393305326487e-06, + "loss": 17.1184, + "step": 322640 + }, + { + "epoch": 0.6517734135433122, + "grad_norm": 689.5020141601562, + "learning_rate": 3.367209397497669e-06, + "loss": 25.9347, + "step": 322650 + }, + { + "epoch": 0.651793614175996, + "grad_norm": 236.0913848876953, + "learning_rate": 3.3668794724207153e-06, + "loss": 19.0471, + "step": 322660 + }, + { + "epoch": 0.6518138148086798, + "grad_norm": 129.02688598632812, + "learning_rate": 3.3665495553033913e-06, + "loss": 8.0022, + "step": 322670 + }, + { + "epoch": 0.6518340154413637, + "grad_norm": 435.196044921875, + "learning_rate": 3.366219646147306e-06, + "loss": 15.066, + "step": 322680 + }, + { + "epoch": 0.6518542160740475, + "grad_norm": 61.95845413208008, + "learning_rate": 3.3658897449540707e-06, + "loss": 19.4422, + "step": 322690 + }, + { + "epoch": 0.6518744167067313, + "grad_norm": 81.69084167480469, + "learning_rate": 3.3655598517252886e-06, + "loss": 16.341, + "step": 322700 + }, + { + "epoch": 0.6518946173394151, + "grad_norm": 133.38026428222656, + "learning_rate": 3.36522996646257e-06, + "loss": 14.8244, + "step": 322710 + }, + { + "epoch": 0.6519148179720989, + "grad_norm": 155.47174072265625, + "learning_rate": 3.364900089167521e-06, + "loss": 25.1622, + "step": 322720 + }, + { + "epoch": 0.6519350186047828, + "grad_norm": 625.8029174804688, + "learning_rate": 3.364570219841753e-06, + "loss": 22.288, + "step": 322730 + }, + { + "epoch": 0.6519552192374665, + "grad_norm": 575.62158203125, + "learning_rate": 3.3642403584868694e-06, + "loss": 31.1175, + "step": 322740 + }, + { + "epoch": 0.6519754198701503, + "grad_norm": 593.5516357421875, + "learning_rate": 3.3639105051044807e-06, + "loss": 17.7484, + "step": 322750 + }, + { + "epoch": 0.6519956205028341, + "grad_norm": 280.8475036621094, + "learning_rate": 3.363580659696194e-06, + "loss": 19.1936, + "step": 322760 + }, + { + "epoch": 0.6520158211355179, + "grad_norm": 88.53751373291016, + "learning_rate": 3.3632508222636163e-06, + "loss": 18.2626, + "step": 322770 + }, + { + "epoch": 0.6520360217682017, + "grad_norm": 435.2064514160156, + "learning_rate": 3.362920992808356e-06, + "loss": 14.0426, + "step": 322780 + }, + { + "epoch": 0.6520562224008856, + "grad_norm": 233.89483642578125, + "learning_rate": 3.3625911713320205e-06, + "loss": 17.1296, + "step": 322790 + }, + { + "epoch": 0.6520764230335694, + "grad_norm": 409.69061279296875, + "learning_rate": 3.3622613578362162e-06, + "loss": 14.7899, + "step": 322800 + }, + { + "epoch": 0.6520966236662532, + "grad_norm": 500.92132568359375, + "learning_rate": 3.3619315523225506e-06, + "loss": 29.0302, + "step": 322810 + }, + { + "epoch": 0.652116824298937, + "grad_norm": 292.7793273925781, + "learning_rate": 3.361601754792635e-06, + "loss": 19.0044, + "step": 322820 + }, + { + "epoch": 0.6521370249316208, + "grad_norm": 163.41346740722656, + "learning_rate": 3.3612719652480707e-06, + "loss": 19.6094, + "step": 322830 + }, + { + "epoch": 0.6521572255643047, + "grad_norm": 448.2548522949219, + "learning_rate": 3.3609421836904688e-06, + "loss": 19.8406, + "step": 322840 + }, + { + "epoch": 0.6521774261969885, + "grad_norm": 333.2283020019531, + "learning_rate": 3.360612410121438e-06, + "loss": 21.349, + "step": 322850 + }, + { + "epoch": 0.6521976268296723, + "grad_norm": 619.9572143554688, + "learning_rate": 3.3602826445425805e-06, + "loss": 29.5249, + "step": 322860 + }, + { + "epoch": 0.6522178274623561, + "grad_norm": 338.3478698730469, + "learning_rate": 3.3599528869555074e-06, + "loss": 17.5572, + "step": 322870 + }, + { + "epoch": 0.65223802809504, + "grad_norm": 203.17066955566406, + "learning_rate": 3.359623137361825e-06, + "loss": 23.9921, + "step": 322880 + }, + { + "epoch": 0.6522582287277238, + "grad_norm": 245.39356994628906, + "learning_rate": 3.3592933957631424e-06, + "loss": 38.2778, + "step": 322890 + }, + { + "epoch": 0.6522784293604076, + "grad_norm": 2.3841423988342285, + "learning_rate": 3.358963662161062e-06, + "loss": 11.8317, + "step": 322900 + }, + { + "epoch": 0.6522986299930914, + "grad_norm": 427.2539367675781, + "learning_rate": 3.358633936557195e-06, + "loss": 23.7271, + "step": 322910 + }, + { + "epoch": 0.6523188306257752, + "grad_norm": 364.3153381347656, + "learning_rate": 3.358304218953148e-06, + "loss": 25.2088, + "step": 322920 + }, + { + "epoch": 0.652339031258459, + "grad_norm": 465.3929748535156, + "learning_rate": 3.3579745093505256e-06, + "loss": 22.5208, + "step": 322930 + }, + { + "epoch": 0.6523592318911429, + "grad_norm": 314.6540222167969, + "learning_rate": 3.3576448077509373e-06, + "loss": 29.2729, + "step": 322940 + }, + { + "epoch": 0.6523794325238267, + "grad_norm": 447.7454833984375, + "learning_rate": 3.357315114155989e-06, + "loss": 22.0601, + "step": 322950 + }, + { + "epoch": 0.6523996331565105, + "grad_norm": 565.6834106445312, + "learning_rate": 3.356985428567287e-06, + "loss": 35.9726, + "step": 322960 + }, + { + "epoch": 0.6524198337891943, + "grad_norm": 918.8121948242188, + "learning_rate": 3.3566557509864374e-06, + "loss": 14.5047, + "step": 322970 + }, + { + "epoch": 0.6524400344218781, + "grad_norm": 226.97035217285156, + "learning_rate": 3.3563260814150512e-06, + "loss": 20.9802, + "step": 322980 + }, + { + "epoch": 0.6524602350545619, + "grad_norm": 568.095703125, + "learning_rate": 3.3559964198547307e-06, + "loss": 22.3045, + "step": 322990 + }, + { + "epoch": 0.6524804356872457, + "grad_norm": 411.2491760253906, + "learning_rate": 3.355666766307084e-06, + "loss": 19.7654, + "step": 323000 + }, + { + "epoch": 0.6525006363199295, + "grad_norm": 423.4187316894531, + "learning_rate": 3.3553371207737183e-06, + "loss": 14.6672, + "step": 323010 + }, + { + "epoch": 0.6525208369526133, + "grad_norm": 330.9416809082031, + "learning_rate": 3.3550074832562417e-06, + "loss": 21.8089, + "step": 323020 + }, + { + "epoch": 0.6525410375852971, + "grad_norm": 660.3540649414062, + "learning_rate": 3.3546778537562563e-06, + "loss": 17.7518, + "step": 323030 + }, + { + "epoch": 0.652561238217981, + "grad_norm": 183.2447052001953, + "learning_rate": 3.354348232275373e-06, + "loss": 17.8972, + "step": 323040 + }, + { + "epoch": 0.6525814388506648, + "grad_norm": 329.89556884765625, + "learning_rate": 3.3540186188151976e-06, + "loss": 30.9821, + "step": 323050 + }, + { + "epoch": 0.6526016394833486, + "grad_norm": 222.6219940185547, + "learning_rate": 3.3536890133773346e-06, + "loss": 14.2322, + "step": 323060 + }, + { + "epoch": 0.6526218401160324, + "grad_norm": 327.657958984375, + "learning_rate": 3.353359415963392e-06, + "loss": 11.7364, + "step": 323070 + }, + { + "epoch": 0.6526420407487162, + "grad_norm": 53.36893844604492, + "learning_rate": 3.353029826574977e-06, + "loss": 19.6695, + "step": 323080 + }, + { + "epoch": 0.6526622413814, + "grad_norm": 521.5284423828125, + "learning_rate": 3.352700245213693e-06, + "loss": 19.8244, + "step": 323090 + }, + { + "epoch": 0.6526824420140839, + "grad_norm": 81.0189208984375, + "learning_rate": 3.352370671881148e-06, + "loss": 11.9868, + "step": 323100 + }, + { + "epoch": 0.6527026426467677, + "grad_norm": 275.5891418457031, + "learning_rate": 3.3520411065789513e-06, + "loss": 25.3043, + "step": 323110 + }, + { + "epoch": 0.6527228432794515, + "grad_norm": 423.15283203125, + "learning_rate": 3.3517115493087036e-06, + "loss": 21.4593, + "step": 323120 + }, + { + "epoch": 0.6527430439121353, + "grad_norm": 375.1765441894531, + "learning_rate": 3.3513820000720145e-06, + "loss": 25.8436, + "step": 323130 + }, + { + "epoch": 0.6527632445448192, + "grad_norm": 728.240234375, + "learning_rate": 3.3510524588704908e-06, + "loss": 23.8151, + "step": 323140 + }, + { + "epoch": 0.652783445177503, + "grad_norm": 278.7547912597656, + "learning_rate": 3.350722925705736e-06, + "loss": 10.8667, + "step": 323150 + }, + { + "epoch": 0.6528036458101868, + "grad_norm": 40.4306640625, + "learning_rate": 3.350393400579358e-06, + "loss": 24.5749, + "step": 323160 + }, + { + "epoch": 0.6528238464428706, + "grad_norm": 357.2164001464844, + "learning_rate": 3.350063883492962e-06, + "loss": 18.1901, + "step": 323170 + }, + { + "epoch": 0.6528440470755544, + "grad_norm": 864.6455078125, + "learning_rate": 3.349734374448157e-06, + "loss": 15.8515, + "step": 323180 + }, + { + "epoch": 0.6528642477082383, + "grad_norm": 870.7262573242188, + "learning_rate": 3.3494048734465433e-06, + "loss": 49.4953, + "step": 323190 + }, + { + "epoch": 0.6528844483409221, + "grad_norm": 385.034423828125, + "learning_rate": 3.3490753804897315e-06, + "loss": 15.4344, + "step": 323200 + }, + { + "epoch": 0.6529046489736059, + "grad_norm": 542.6068725585938, + "learning_rate": 3.3487458955793273e-06, + "loss": 18.1847, + "step": 323210 + }, + { + "epoch": 0.6529248496062897, + "grad_norm": 256.6165466308594, + "learning_rate": 3.3484164187169334e-06, + "loss": 20.8445, + "step": 323220 + }, + { + "epoch": 0.6529450502389735, + "grad_norm": 306.36187744140625, + "learning_rate": 3.348086949904158e-06, + "loss": 20.4172, + "step": 323230 + }, + { + "epoch": 0.6529652508716574, + "grad_norm": 3.7985031604766846, + "learning_rate": 3.347757489142608e-06, + "loss": 20.9858, + "step": 323240 + }, + { + "epoch": 0.6529854515043411, + "grad_norm": 503.82421875, + "learning_rate": 3.347428036433886e-06, + "loss": 11.0117, + "step": 323250 + }, + { + "epoch": 0.6530056521370249, + "grad_norm": 296.73895263671875, + "learning_rate": 3.3470985917795983e-06, + "loss": 14.5541, + "step": 323260 + }, + { + "epoch": 0.6530258527697087, + "grad_norm": 330.89776611328125, + "learning_rate": 3.3467691551813547e-06, + "loss": 17.3829, + "step": 323270 + }, + { + "epoch": 0.6530460534023925, + "grad_norm": 199.73748779296875, + "learning_rate": 3.3464397266407543e-06, + "loss": 22.8537, + "step": 323280 + }, + { + "epoch": 0.6530662540350763, + "grad_norm": 461.0065002441406, + "learning_rate": 3.346110306159408e-06, + "loss": 18.9679, + "step": 323290 + }, + { + "epoch": 0.6530864546677602, + "grad_norm": 609.6250610351562, + "learning_rate": 3.34578089373892e-06, + "loss": 13.0132, + "step": 323300 + }, + { + "epoch": 0.653106655300444, + "grad_norm": 22.474637985229492, + "learning_rate": 3.3454514893808943e-06, + "loss": 22.382, + "step": 323310 + }, + { + "epoch": 0.6531268559331278, + "grad_norm": 1025.268798828125, + "learning_rate": 3.3451220930869377e-06, + "loss": 21.649, + "step": 323320 + }, + { + "epoch": 0.6531470565658116, + "grad_norm": 264.25006103515625, + "learning_rate": 3.3447927048586538e-06, + "loss": 16.3413, + "step": 323330 + }, + { + "epoch": 0.6531672571984954, + "grad_norm": 346.7159729003906, + "learning_rate": 3.3444633246976526e-06, + "loss": 11.7635, + "step": 323340 + }, + { + "epoch": 0.6531874578311793, + "grad_norm": 153.5816650390625, + "learning_rate": 3.344133952605534e-06, + "loss": 8.6615, + "step": 323350 + }, + { + "epoch": 0.6532076584638631, + "grad_norm": 622.320068359375, + "learning_rate": 3.3438045885839053e-06, + "loss": 16.3805, + "step": 323360 + }, + { + "epoch": 0.6532278590965469, + "grad_norm": 120.74202728271484, + "learning_rate": 3.3434752326343745e-06, + "loss": 19.6188, + "step": 323370 + }, + { + "epoch": 0.6532480597292307, + "grad_norm": 136.3270721435547, + "learning_rate": 3.343145884758543e-06, + "loss": 22.3427, + "step": 323380 + }, + { + "epoch": 0.6532682603619145, + "grad_norm": 701.3666381835938, + "learning_rate": 3.3428165449580174e-06, + "loss": 14.1135, + "step": 323390 + }, + { + "epoch": 0.6532884609945984, + "grad_norm": 411.7037353515625, + "learning_rate": 3.3424872132344044e-06, + "loss": 14.4849, + "step": 323400 + }, + { + "epoch": 0.6533086616272822, + "grad_norm": 182.9111328125, + "learning_rate": 3.3421578895893058e-06, + "loss": 18.9418, + "step": 323410 + }, + { + "epoch": 0.653328862259966, + "grad_norm": 206.97998046875, + "learning_rate": 3.3418285740243285e-06, + "loss": 15.5435, + "step": 323420 + }, + { + "epoch": 0.6533490628926498, + "grad_norm": 336.7099609375, + "learning_rate": 3.3414992665410806e-06, + "loss": 17.1012, + "step": 323430 + }, + { + "epoch": 0.6533692635253336, + "grad_norm": 3.9773671627044678, + "learning_rate": 3.34116996714116e-06, + "loss": 13.594, + "step": 323440 + }, + { + "epoch": 0.6533894641580175, + "grad_norm": 426.3591613769531, + "learning_rate": 3.340840675826178e-06, + "loss": 26.4791, + "step": 323450 + }, + { + "epoch": 0.6534096647907013, + "grad_norm": 611.9093017578125, + "learning_rate": 3.3405113925977383e-06, + "loss": 23.0561, + "step": 323460 + }, + { + "epoch": 0.6534298654233851, + "grad_norm": 222.60874938964844, + "learning_rate": 3.340182117457443e-06, + "loss": 21.197, + "step": 323470 + }, + { + "epoch": 0.6534500660560689, + "grad_norm": 185.4382781982422, + "learning_rate": 3.3398528504068996e-06, + "loss": 20.9105, + "step": 323480 + }, + { + "epoch": 0.6534702666887527, + "grad_norm": 705.8347778320312, + "learning_rate": 3.3395235914477104e-06, + "loss": 33.4218, + "step": 323490 + }, + { + "epoch": 0.6534904673214365, + "grad_norm": 76.52244567871094, + "learning_rate": 3.339194340581485e-06, + "loss": 36.8078, + "step": 323500 + }, + { + "epoch": 0.6535106679541203, + "grad_norm": 77.32313537597656, + "learning_rate": 3.3388650978098218e-06, + "loss": 20.5604, + "step": 323510 + }, + { + "epoch": 0.6535308685868041, + "grad_norm": 272.7183837890625, + "learning_rate": 3.33853586313433e-06, + "loss": 12.8314, + "step": 323520 + }, + { + "epoch": 0.6535510692194879, + "grad_norm": 60.551856994628906, + "learning_rate": 3.3382066365566133e-06, + "loss": 18.617, + "step": 323530 + }, + { + "epoch": 0.6535712698521717, + "grad_norm": 396.419921875, + "learning_rate": 3.337877418078276e-06, + "loss": 29.6913, + "step": 323540 + }, + { + "epoch": 0.6535914704848556, + "grad_norm": 177.9261932373047, + "learning_rate": 3.3375482077009213e-06, + "loss": 26.6844, + "step": 323550 + }, + { + "epoch": 0.6536116711175394, + "grad_norm": 475.1094055175781, + "learning_rate": 3.3372190054261565e-06, + "loss": 27.4537, + "step": 323560 + }, + { + "epoch": 0.6536318717502232, + "grad_norm": 540.54248046875, + "learning_rate": 3.3368898112555843e-06, + "loss": 21.2653, + "step": 323570 + }, + { + "epoch": 0.653652072382907, + "grad_norm": 240.32376098632812, + "learning_rate": 3.336560625190808e-06, + "loss": 7.5049, + "step": 323580 + }, + { + "epoch": 0.6536722730155908, + "grad_norm": 78.92088317871094, + "learning_rate": 3.3362314472334353e-06, + "loss": 25.9188, + "step": 323590 + }, + { + "epoch": 0.6536924736482747, + "grad_norm": 289.92327880859375, + "learning_rate": 3.3359022773850673e-06, + "loss": 23.9437, + "step": 323600 + }, + { + "epoch": 0.6537126742809585, + "grad_norm": 0.13065364956855774, + "learning_rate": 3.3355731156473105e-06, + "loss": 16.9742, + "step": 323610 + }, + { + "epoch": 0.6537328749136423, + "grad_norm": 31.578344345092773, + "learning_rate": 3.335243962021768e-06, + "loss": 20.8097, + "step": 323620 + }, + { + "epoch": 0.6537530755463261, + "grad_norm": 306.77130126953125, + "learning_rate": 3.334914816510046e-06, + "loss": 17.3479, + "step": 323630 + }, + { + "epoch": 0.6537732761790099, + "grad_norm": 602.3389282226562, + "learning_rate": 3.3345856791137456e-06, + "loss": 24.9415, + "step": 323640 + }, + { + "epoch": 0.6537934768116938, + "grad_norm": 342.4835510253906, + "learning_rate": 3.334256549834472e-06, + "loss": 20.2902, + "step": 323650 + }, + { + "epoch": 0.6538136774443776, + "grad_norm": 416.8805236816406, + "learning_rate": 3.333927428673832e-06, + "loss": 19.6605, + "step": 323660 + }, + { + "epoch": 0.6538338780770614, + "grad_norm": 42.473670959472656, + "learning_rate": 3.333598315633426e-06, + "loss": 10.3506, + "step": 323670 + }, + { + "epoch": 0.6538540787097452, + "grad_norm": 390.04583740234375, + "learning_rate": 3.3332692107148605e-06, + "loss": 14.6933, + "step": 323680 + }, + { + "epoch": 0.653874279342429, + "grad_norm": 136.37127685546875, + "learning_rate": 3.3329401139197393e-06, + "loss": 17.3831, + "step": 323690 + }, + { + "epoch": 0.6538944799751129, + "grad_norm": 290.8580322265625, + "learning_rate": 3.3326110252496652e-06, + "loss": 14.5204, + "step": 323700 + }, + { + "epoch": 0.6539146806077967, + "grad_norm": 127.44840240478516, + "learning_rate": 3.3322819447062417e-06, + "loss": 8.5128, + "step": 323710 + }, + { + "epoch": 0.6539348812404805, + "grad_norm": 486.5702819824219, + "learning_rate": 3.3319528722910767e-06, + "loss": 27.6796, + "step": 323720 + }, + { + "epoch": 0.6539550818731643, + "grad_norm": 593.7366333007812, + "learning_rate": 3.3316238080057674e-06, + "loss": 33.287, + "step": 323730 + }, + { + "epoch": 0.6539752825058481, + "grad_norm": 304.499267578125, + "learning_rate": 3.3312947518519228e-06, + "loss": 18.4255, + "step": 323740 + }, + { + "epoch": 0.653995483138532, + "grad_norm": 307.52728271484375, + "learning_rate": 3.330965703831146e-06, + "loss": 16.0626, + "step": 323750 + }, + { + "epoch": 0.6540156837712157, + "grad_norm": 694.5188598632812, + "learning_rate": 3.3306366639450394e-06, + "loss": 20.4025, + "step": 323760 + }, + { + "epoch": 0.6540358844038995, + "grad_norm": 380.5897216796875, + "learning_rate": 3.3303076321952066e-06, + "loss": 24.0266, + "step": 323770 + }, + { + "epoch": 0.6540560850365833, + "grad_norm": 583.7579345703125, + "learning_rate": 3.329978608583252e-06, + "loss": 18.3361, + "step": 323780 + }, + { + "epoch": 0.6540762856692671, + "grad_norm": 211.36448669433594, + "learning_rate": 3.329649593110781e-06, + "loss": 12.6569, + "step": 323790 + }, + { + "epoch": 0.6540964863019509, + "grad_norm": 454.0394592285156, + "learning_rate": 3.3293205857793924e-06, + "loss": 19.8624, + "step": 323800 + }, + { + "epoch": 0.6541166869346348, + "grad_norm": 340.27069091796875, + "learning_rate": 3.328991586590694e-06, + "loss": 15.6996, + "step": 323810 + }, + { + "epoch": 0.6541368875673186, + "grad_norm": 574.0722045898438, + "learning_rate": 3.328662595546289e-06, + "loss": 25.6051, + "step": 323820 + }, + { + "epoch": 0.6541570882000024, + "grad_norm": 614.04931640625, + "learning_rate": 3.3283336126477785e-06, + "loss": 15.3504, + "step": 323830 + }, + { + "epoch": 0.6541772888326862, + "grad_norm": 208.8754119873047, + "learning_rate": 3.3280046378967673e-06, + "loss": 17.9637, + "step": 323840 + }, + { + "epoch": 0.65419748946537, + "grad_norm": 288.2249755859375, + "learning_rate": 3.32767567129486e-06, + "loss": 18.1662, + "step": 323850 + }, + { + "epoch": 0.6542176900980539, + "grad_norm": 125.99740600585938, + "learning_rate": 3.3273467128436575e-06, + "loss": 10.6629, + "step": 323860 + }, + { + "epoch": 0.6542378907307377, + "grad_norm": 532.9415283203125, + "learning_rate": 3.3270177625447632e-06, + "loss": 24.2573, + "step": 323870 + }, + { + "epoch": 0.6542580913634215, + "grad_norm": 930.7927856445312, + "learning_rate": 3.326688820399784e-06, + "loss": 19.2765, + "step": 323880 + }, + { + "epoch": 0.6542782919961053, + "grad_norm": 153.9420166015625, + "learning_rate": 3.326359886410318e-06, + "loss": 13.2919, + "step": 323890 + }, + { + "epoch": 0.6542984926287891, + "grad_norm": 452.17352294921875, + "learning_rate": 3.3260309605779717e-06, + "loss": 21.4966, + "step": 323900 + }, + { + "epoch": 0.654318693261473, + "grad_norm": 314.76812744140625, + "learning_rate": 3.3257020429043485e-06, + "loss": 15.2828, + "step": 323910 + }, + { + "epoch": 0.6543388938941568, + "grad_norm": 285.5110778808594, + "learning_rate": 3.32537313339105e-06, + "loss": 18.2423, + "step": 323920 + }, + { + "epoch": 0.6543590945268406, + "grad_norm": 212.53077697753906, + "learning_rate": 3.325044232039679e-06, + "loss": 16.4085, + "step": 323930 + }, + { + "epoch": 0.6543792951595244, + "grad_norm": 321.1883544921875, + "learning_rate": 3.3247153388518387e-06, + "loss": 29.8498, + "step": 323940 + }, + { + "epoch": 0.6543994957922082, + "grad_norm": 602.4036254882812, + "learning_rate": 3.3243864538291358e-06, + "loss": 10.0415, + "step": 323950 + }, + { + "epoch": 0.6544196964248921, + "grad_norm": 497.0145568847656, + "learning_rate": 3.3240575769731662e-06, + "loss": 27.176, + "step": 323960 + }, + { + "epoch": 0.6544398970575759, + "grad_norm": 302.6004333496094, + "learning_rate": 3.3237287082855386e-06, + "loss": 19.8218, + "step": 323970 + }, + { + "epoch": 0.6544600976902597, + "grad_norm": 337.1525573730469, + "learning_rate": 3.3233998477678555e-06, + "loss": 31.7608, + "step": 323980 + }, + { + "epoch": 0.6544802983229435, + "grad_norm": 317.685791015625, + "learning_rate": 3.3230709954217156e-06, + "loss": 13.8257, + "step": 323990 + }, + { + "epoch": 0.6545004989556273, + "grad_norm": 255.78964233398438, + "learning_rate": 3.322742151248726e-06, + "loss": 26.6619, + "step": 324000 + }, + { + "epoch": 0.6545206995883112, + "grad_norm": 243.2002716064453, + "learning_rate": 3.3224133152504874e-06, + "loss": 32.395, + "step": 324010 + }, + { + "epoch": 0.6545409002209949, + "grad_norm": 399.0644836425781, + "learning_rate": 3.3220844874286017e-06, + "loss": 15.7088, + "step": 324020 + }, + { + "epoch": 0.6545611008536787, + "grad_norm": 641.73974609375, + "learning_rate": 3.321755667784673e-06, + "loss": 22.6925, + "step": 324030 + }, + { + "epoch": 0.6545813014863625, + "grad_norm": 224.15005493164062, + "learning_rate": 3.3214268563203056e-06, + "loss": 17.7867, + "step": 324040 + }, + { + "epoch": 0.6546015021190463, + "grad_norm": 0.0, + "learning_rate": 3.3210980530370974e-06, + "loss": 81.2715, + "step": 324050 + }, + { + "epoch": 0.6546217027517302, + "grad_norm": 333.16357421875, + "learning_rate": 3.3207692579366548e-06, + "loss": 9.333, + "step": 324060 + }, + { + "epoch": 0.654641903384414, + "grad_norm": 451.720703125, + "learning_rate": 3.3204404710205786e-06, + "loss": 16.7943, + "step": 324070 + }, + { + "epoch": 0.6546621040170978, + "grad_norm": 596.48681640625, + "learning_rate": 3.3201116922904737e-06, + "loss": 20.8438, + "step": 324080 + }, + { + "epoch": 0.6546823046497816, + "grad_norm": 379.9711608886719, + "learning_rate": 3.3197829217479396e-06, + "loss": 10.409, + "step": 324090 + }, + { + "epoch": 0.6547025052824654, + "grad_norm": 341.3067321777344, + "learning_rate": 3.319454159394578e-06, + "loss": 10.2977, + "step": 324100 + }, + { + "epoch": 0.6547227059151493, + "grad_norm": 177.4818878173828, + "learning_rate": 3.3191254052319967e-06, + "loss": 18.6199, + "step": 324110 + }, + { + "epoch": 0.6547429065478331, + "grad_norm": 611.534423828125, + "learning_rate": 3.3187966592617905e-06, + "loss": 17.9969, + "step": 324120 + }, + { + "epoch": 0.6547631071805169, + "grad_norm": 295.9464416503906, + "learning_rate": 3.318467921485567e-06, + "loss": 20.3785, + "step": 324130 + }, + { + "epoch": 0.6547833078132007, + "grad_norm": 15.180954933166504, + "learning_rate": 3.3181391919049277e-06, + "loss": 21.2955, + "step": 324140 + }, + { + "epoch": 0.6548035084458845, + "grad_norm": 385.1485900878906, + "learning_rate": 3.317810470521473e-06, + "loss": 12.7336, + "step": 324150 + }, + { + "epoch": 0.6548237090785684, + "grad_norm": 234.79217529296875, + "learning_rate": 3.3174817573368068e-06, + "loss": 18.8312, + "step": 324160 + }, + { + "epoch": 0.6548439097112522, + "grad_norm": 401.0411682128906, + "learning_rate": 3.317153052352531e-06, + "loss": 17.0286, + "step": 324170 + }, + { + "epoch": 0.654864110343936, + "grad_norm": 83.79252624511719, + "learning_rate": 3.3168243555702455e-06, + "loss": 13.5033, + "step": 324180 + }, + { + "epoch": 0.6548843109766198, + "grad_norm": 42.736671447753906, + "learning_rate": 3.316495666991554e-06, + "loss": 11.2485, + "step": 324190 + }, + { + "epoch": 0.6549045116093036, + "grad_norm": 317.3556823730469, + "learning_rate": 3.31616698661806e-06, + "loss": 15.2511, + "step": 324200 + }, + { + "epoch": 0.6549247122419875, + "grad_norm": 476.33062744140625, + "learning_rate": 3.3158383144513618e-06, + "loss": 6.2523, + "step": 324210 + }, + { + "epoch": 0.6549449128746713, + "grad_norm": 429.07763671875, + "learning_rate": 3.315509650493065e-06, + "loss": 10.6325, + "step": 324220 + }, + { + "epoch": 0.6549651135073551, + "grad_norm": 419.2195739746094, + "learning_rate": 3.315180994744769e-06, + "loss": 13.1369, + "step": 324230 + }, + { + "epoch": 0.6549853141400389, + "grad_norm": 597.6172485351562, + "learning_rate": 3.3148523472080773e-06, + "loss": 14.6593, + "step": 324240 + }, + { + "epoch": 0.6550055147727227, + "grad_norm": 406.0660400390625, + "learning_rate": 3.3145237078845903e-06, + "loss": 18.3823, + "step": 324250 + }, + { + "epoch": 0.6550257154054066, + "grad_norm": 55.36674118041992, + "learning_rate": 3.3141950767759096e-06, + "loss": 13.1285, + "step": 324260 + }, + { + "epoch": 0.6550459160380903, + "grad_norm": 564.5735473632812, + "learning_rate": 3.3138664538836395e-06, + "loss": 27.3588, + "step": 324270 + }, + { + "epoch": 0.6550661166707741, + "grad_norm": 321.94818115234375, + "learning_rate": 3.3135378392093788e-06, + "loss": 12.9262, + "step": 324280 + }, + { + "epoch": 0.6550863173034579, + "grad_norm": 346.2619323730469, + "learning_rate": 3.3132092327547296e-06, + "loss": 18.7073, + "step": 324290 + }, + { + "epoch": 0.6551065179361417, + "grad_norm": 269.2696838378906, + "learning_rate": 3.312880634521295e-06, + "loss": 23.6498, + "step": 324300 + }, + { + "epoch": 0.6551267185688255, + "grad_norm": 29.812774658203125, + "learning_rate": 3.3125520445106753e-06, + "loss": 12.0455, + "step": 324310 + }, + { + "epoch": 0.6551469192015094, + "grad_norm": 529.48486328125, + "learning_rate": 3.312223462724472e-06, + "loss": 41.1307, + "step": 324320 + }, + { + "epoch": 0.6551671198341932, + "grad_norm": 530.9104614257812, + "learning_rate": 3.3118948891642875e-06, + "loss": 17.0012, + "step": 324330 + }, + { + "epoch": 0.655187320466877, + "grad_norm": 386.05133056640625, + "learning_rate": 3.311566323831721e-06, + "loss": 17.2066, + "step": 324340 + }, + { + "epoch": 0.6552075210995608, + "grad_norm": 282.726806640625, + "learning_rate": 3.3112377667283756e-06, + "loss": 18.5096, + "step": 324350 + }, + { + "epoch": 0.6552277217322446, + "grad_norm": 467.5544738769531, + "learning_rate": 3.3109092178558546e-06, + "loss": 30.0215, + "step": 324360 + }, + { + "epoch": 0.6552479223649285, + "grad_norm": 596.86865234375, + "learning_rate": 3.3105806772157556e-06, + "loss": 15.1171, + "step": 324370 + }, + { + "epoch": 0.6552681229976123, + "grad_norm": 376.2838134765625, + "learning_rate": 3.310252144809682e-06, + "loss": 16.6493, + "step": 324380 + }, + { + "epoch": 0.6552883236302961, + "grad_norm": 406.9255676269531, + "learning_rate": 3.309923620639233e-06, + "loss": 16.4322, + "step": 324390 + }, + { + "epoch": 0.6553085242629799, + "grad_norm": 354.6062316894531, + "learning_rate": 3.3095951047060147e-06, + "loss": 20.3177, + "step": 324400 + }, + { + "epoch": 0.6553287248956637, + "grad_norm": 174.62908935546875, + "learning_rate": 3.309266597011621e-06, + "loss": 40.0574, + "step": 324410 + }, + { + "epoch": 0.6553489255283476, + "grad_norm": 480.7134094238281, + "learning_rate": 3.308938097557659e-06, + "loss": 24.4233, + "step": 324420 + }, + { + "epoch": 0.6553691261610314, + "grad_norm": 511.335693359375, + "learning_rate": 3.308609606345728e-06, + "loss": 40.798, + "step": 324430 + }, + { + "epoch": 0.6553893267937152, + "grad_norm": 374.6742858886719, + "learning_rate": 3.3082811233774277e-06, + "loss": 15.7719, + "step": 324440 + }, + { + "epoch": 0.655409527426399, + "grad_norm": 221.66168212890625, + "learning_rate": 3.30795264865436e-06, + "loss": 26.3338, + "step": 324450 + }, + { + "epoch": 0.6554297280590828, + "grad_norm": 399.8490295410156, + "learning_rate": 3.307624182178127e-06, + "loss": 17.0068, + "step": 324460 + }, + { + "epoch": 0.6554499286917667, + "grad_norm": 241.8368377685547, + "learning_rate": 3.3072957239503273e-06, + "loss": 23.1237, + "step": 324470 + }, + { + "epoch": 0.6554701293244505, + "grad_norm": 264.2138977050781, + "learning_rate": 3.3069672739725616e-06, + "loss": 32.1639, + "step": 324480 + }, + { + "epoch": 0.6554903299571343, + "grad_norm": 449.2158203125, + "learning_rate": 3.306638832246436e-06, + "loss": 11.5351, + "step": 324490 + }, + { + "epoch": 0.6555105305898181, + "grad_norm": 279.5223083496094, + "learning_rate": 3.3063103987735433e-06, + "loss": 11.1526, + "step": 324500 + }, + { + "epoch": 0.6555307312225019, + "grad_norm": 485.7489318847656, + "learning_rate": 3.30598197355549e-06, + "loss": 25.4161, + "step": 324510 + }, + { + "epoch": 0.6555509318551858, + "grad_norm": 450.6777648925781, + "learning_rate": 3.3056535565938764e-06, + "loss": 12.6259, + "step": 324520 + }, + { + "epoch": 0.6555711324878695, + "grad_norm": 97.11417388916016, + "learning_rate": 3.3053251478902996e-06, + "loss": 16.3099, + "step": 324530 + }, + { + "epoch": 0.6555913331205533, + "grad_norm": 154.92950439453125, + "learning_rate": 3.3049967474463634e-06, + "loss": 20.5589, + "step": 324540 + }, + { + "epoch": 0.6556115337532371, + "grad_norm": 337.1134948730469, + "learning_rate": 3.3046683552636665e-06, + "loss": 18.9115, + "step": 324550 + }, + { + "epoch": 0.6556317343859209, + "grad_norm": 358.4677734375, + "learning_rate": 3.304339971343813e-06, + "loss": 19.0148, + "step": 324560 + }, + { + "epoch": 0.6556519350186047, + "grad_norm": 680.4251708984375, + "learning_rate": 3.3040115956883984e-06, + "loss": 21.4263, + "step": 324570 + }, + { + "epoch": 0.6556721356512886, + "grad_norm": 955.8135375976562, + "learning_rate": 3.3036832282990263e-06, + "loss": 36.9836, + "step": 324580 + }, + { + "epoch": 0.6556923362839724, + "grad_norm": 286.5036315917969, + "learning_rate": 3.3033548691772976e-06, + "loss": 26.052, + "step": 324590 + }, + { + "epoch": 0.6557125369166562, + "grad_norm": 609.9530029296875, + "learning_rate": 3.30302651832481e-06, + "loss": 18.3765, + "step": 324600 + }, + { + "epoch": 0.65573273754934, + "grad_norm": 775.6493530273438, + "learning_rate": 3.302698175743165e-06, + "loss": 25.0592, + "step": 324610 + }, + { + "epoch": 0.6557529381820238, + "grad_norm": 314.5340576171875, + "learning_rate": 3.3023698414339656e-06, + "loss": 17.5775, + "step": 324620 + }, + { + "epoch": 0.6557731388147077, + "grad_norm": 423.2784118652344, + "learning_rate": 3.302041515398808e-06, + "loss": 15.2809, + "step": 324630 + }, + { + "epoch": 0.6557933394473915, + "grad_norm": 183.7259521484375, + "learning_rate": 3.3017131976392926e-06, + "loss": 13.2716, + "step": 324640 + }, + { + "epoch": 0.6558135400800753, + "grad_norm": 521.1258544921875, + "learning_rate": 3.3013848881570243e-06, + "loss": 37.3796, + "step": 324650 + }, + { + "epoch": 0.6558337407127591, + "grad_norm": 512.5210571289062, + "learning_rate": 3.3010565869535976e-06, + "loss": 27.3887, + "step": 324660 + }, + { + "epoch": 0.655853941345443, + "grad_norm": 401.6698303222656, + "learning_rate": 3.3007282940306155e-06, + "loss": 24.3654, + "step": 324670 + }, + { + "epoch": 0.6558741419781268, + "grad_norm": 525.7962036132812, + "learning_rate": 3.300400009389678e-06, + "loss": 31.9341, + "step": 324680 + }, + { + "epoch": 0.6558943426108106, + "grad_norm": 407.7822265625, + "learning_rate": 3.3000717330323857e-06, + "loss": 18.6044, + "step": 324690 + }, + { + "epoch": 0.6559145432434944, + "grad_norm": 880.857421875, + "learning_rate": 3.2997434649603368e-06, + "loss": 15.3205, + "step": 324700 + }, + { + "epoch": 0.6559347438761782, + "grad_norm": 359.1968994140625, + "learning_rate": 3.2994152051751305e-06, + "loss": 33.5997, + "step": 324710 + }, + { + "epoch": 0.655954944508862, + "grad_norm": 571.3761596679688, + "learning_rate": 3.299086953678371e-06, + "loss": 26.202, + "step": 324720 + }, + { + "epoch": 0.6559751451415459, + "grad_norm": 791.4124145507812, + "learning_rate": 3.298758710471653e-06, + "loss": 22.1367, + "step": 324730 + }, + { + "epoch": 0.6559953457742297, + "grad_norm": 372.0487976074219, + "learning_rate": 3.298430475556579e-06, + "loss": 9.4637, + "step": 324740 + }, + { + "epoch": 0.6560155464069135, + "grad_norm": 322.2548828125, + "learning_rate": 3.2981022489347503e-06, + "loss": 12.5813, + "step": 324750 + }, + { + "epoch": 0.6560357470395973, + "grad_norm": 410.0075988769531, + "learning_rate": 3.297774030607763e-06, + "loss": 11.1279, + "step": 324760 + }, + { + "epoch": 0.6560559476722811, + "grad_norm": 337.5087585449219, + "learning_rate": 3.2974458205772197e-06, + "loss": 10.0531, + "step": 324770 + }, + { + "epoch": 0.6560761483049649, + "grad_norm": 604.0032348632812, + "learning_rate": 3.2971176188447196e-06, + "loss": 15.9948, + "step": 324780 + }, + { + "epoch": 0.6560963489376487, + "grad_norm": 67.086181640625, + "learning_rate": 3.2967894254118605e-06, + "loss": 9.4685, + "step": 324790 + }, + { + "epoch": 0.6561165495703325, + "grad_norm": 699.3897094726562, + "learning_rate": 3.2964612402802422e-06, + "loss": 29.6587, + "step": 324800 + }, + { + "epoch": 0.6561367502030163, + "grad_norm": 178.92552185058594, + "learning_rate": 3.2961330634514676e-06, + "loss": 19.5481, + "step": 324810 + }, + { + "epoch": 0.6561569508357001, + "grad_norm": 621.947265625, + "learning_rate": 3.2958048949271314e-06, + "loss": 18.8215, + "step": 324820 + }, + { + "epoch": 0.656177151468384, + "grad_norm": 619.4153442382812, + "learning_rate": 3.2954767347088367e-06, + "loss": 18.5938, + "step": 324830 + }, + { + "epoch": 0.6561973521010678, + "grad_norm": 182.00775146484375, + "learning_rate": 3.295148582798181e-06, + "loss": 8.6175, + "step": 324840 + }, + { + "epoch": 0.6562175527337516, + "grad_norm": 303.0915832519531, + "learning_rate": 3.2948204391967657e-06, + "loss": 16.7521, + "step": 324850 + }, + { + "epoch": 0.6562377533664354, + "grad_norm": 100.56477355957031, + "learning_rate": 3.294492303906188e-06, + "loss": 33.7697, + "step": 324860 + }, + { + "epoch": 0.6562579539991192, + "grad_norm": 1357.78466796875, + "learning_rate": 3.2941641769280464e-06, + "loss": 15.8703, + "step": 324870 + }, + { + "epoch": 0.6562781546318031, + "grad_norm": 186.18832397460938, + "learning_rate": 3.293836058263945e-06, + "loss": 20.8526, + "step": 324880 + }, + { + "epoch": 0.6562983552644869, + "grad_norm": 159.1144256591797, + "learning_rate": 3.293507947915477e-06, + "loss": 37.2876, + "step": 324890 + }, + { + "epoch": 0.6563185558971707, + "grad_norm": 444.4486083984375, + "learning_rate": 3.293179845884245e-06, + "loss": 30.4774, + "step": 324900 + }, + { + "epoch": 0.6563387565298545, + "grad_norm": 665.237060546875, + "learning_rate": 3.2928517521718483e-06, + "loss": 21.2222, + "step": 324910 + }, + { + "epoch": 0.6563589571625383, + "grad_norm": 696.8388671875, + "learning_rate": 3.2925236667798843e-06, + "loss": 29.906, + "step": 324920 + }, + { + "epoch": 0.6563791577952222, + "grad_norm": 123.77020263671875, + "learning_rate": 3.2921955897099534e-06, + "loss": 20.4031, + "step": 324930 + }, + { + "epoch": 0.656399358427906, + "grad_norm": 360.2903137207031, + "learning_rate": 3.2918675209636542e-06, + "loss": 18.8322, + "step": 324940 + }, + { + "epoch": 0.6564195590605898, + "grad_norm": 750.037353515625, + "learning_rate": 3.2915394605425836e-06, + "loss": 13.6613, + "step": 324950 + }, + { + "epoch": 0.6564397596932736, + "grad_norm": 382.3537292480469, + "learning_rate": 3.2912114084483437e-06, + "loss": 21.9366, + "step": 324960 + }, + { + "epoch": 0.6564599603259574, + "grad_norm": 273.78765869140625, + "learning_rate": 3.290883364682533e-06, + "loss": 14.4265, + "step": 324970 + }, + { + "epoch": 0.6564801609586413, + "grad_norm": 171.11929321289062, + "learning_rate": 3.2905553292467487e-06, + "loss": 14.784, + "step": 324980 + }, + { + "epoch": 0.6565003615913251, + "grad_norm": 404.71026611328125, + "learning_rate": 3.29022730214259e-06, + "loss": 18.4426, + "step": 324990 + }, + { + "epoch": 0.6565205622240089, + "grad_norm": 516.9315795898438, + "learning_rate": 3.289899283371657e-06, + "loss": 19.952, + "step": 325000 + }, + { + "epoch": 0.6565407628566927, + "grad_norm": 42.328216552734375, + "learning_rate": 3.2895712729355477e-06, + "loss": 22.5429, + "step": 325010 + }, + { + "epoch": 0.6565609634893765, + "grad_norm": 215.39959716796875, + "learning_rate": 3.2892432708358583e-06, + "loss": 16.8509, + "step": 325020 + }, + { + "epoch": 0.6565811641220604, + "grad_norm": 225.04336547851562, + "learning_rate": 3.288915277074192e-06, + "loss": 15.3697, + "step": 325030 + }, + { + "epoch": 0.6566013647547441, + "grad_norm": 241.2082977294922, + "learning_rate": 3.2885872916521445e-06, + "loss": 20.5328, + "step": 325040 + }, + { + "epoch": 0.6566215653874279, + "grad_norm": 419.43109130859375, + "learning_rate": 3.2882593145713148e-06, + "loss": 26.5507, + "step": 325050 + }, + { + "epoch": 0.6566417660201117, + "grad_norm": 740.9419555664062, + "learning_rate": 3.2879313458333017e-06, + "loss": 29.9435, + "step": 325060 + }, + { + "epoch": 0.6566619666527955, + "grad_norm": 574.1981811523438, + "learning_rate": 3.2876033854397037e-06, + "loss": 12.1584, + "step": 325070 + }, + { + "epoch": 0.6566821672854793, + "grad_norm": 196.2428436279297, + "learning_rate": 3.287275433392119e-06, + "loss": 11.8524, + "step": 325080 + }, + { + "epoch": 0.6567023679181632, + "grad_norm": 529.787353515625, + "learning_rate": 3.286947489692145e-06, + "loss": 23.899, + "step": 325090 + }, + { + "epoch": 0.656722568550847, + "grad_norm": 380.5361022949219, + "learning_rate": 3.2866195543413843e-06, + "loss": 14.2219, + "step": 325100 + }, + { + "epoch": 0.6567427691835308, + "grad_norm": 505.2468566894531, + "learning_rate": 3.2862916273414284e-06, + "loss": 15.0427, + "step": 325110 + }, + { + "epoch": 0.6567629698162146, + "grad_norm": 572.7343139648438, + "learning_rate": 3.285963708693881e-06, + "loss": 23.6778, + "step": 325120 + }, + { + "epoch": 0.6567831704488984, + "grad_norm": 358.7130126953125, + "learning_rate": 3.2856357984003382e-06, + "loss": 16.2286, + "step": 325130 + }, + { + "epoch": 0.6568033710815823, + "grad_norm": 661.60302734375, + "learning_rate": 3.2853078964623995e-06, + "loss": 21.5851, + "step": 325140 + }, + { + "epoch": 0.6568235717142661, + "grad_norm": 169.0994873046875, + "learning_rate": 3.2849800028816613e-06, + "loss": 9.0037, + "step": 325150 + }, + { + "epoch": 0.6568437723469499, + "grad_norm": 667.0199584960938, + "learning_rate": 3.2846521176597217e-06, + "loss": 27.2752, + "step": 325160 + }, + { + "epoch": 0.6568639729796337, + "grad_norm": 1.3522008657455444, + "learning_rate": 3.2843242407981823e-06, + "loss": 19.3959, + "step": 325170 + }, + { + "epoch": 0.6568841736123175, + "grad_norm": 801.1975708007812, + "learning_rate": 3.2839963722986356e-06, + "loss": 19.4962, + "step": 325180 + }, + { + "epoch": 0.6569043742450014, + "grad_norm": 214.1361083984375, + "learning_rate": 3.283668512162684e-06, + "loss": 17.1053, + "step": 325190 + }, + { + "epoch": 0.6569245748776852, + "grad_norm": 346.63372802734375, + "learning_rate": 3.2833406603919243e-06, + "loss": 14.8104, + "step": 325200 + }, + { + "epoch": 0.656944775510369, + "grad_norm": 382.4580078125, + "learning_rate": 3.2830128169879535e-06, + "loss": 32.4047, + "step": 325210 + }, + { + "epoch": 0.6569649761430528, + "grad_norm": 421.2641296386719, + "learning_rate": 3.282684981952369e-06, + "loss": 12.9478, + "step": 325220 + }, + { + "epoch": 0.6569851767757366, + "grad_norm": 134.71836853027344, + "learning_rate": 3.2823571552867717e-06, + "loss": 18.223, + "step": 325230 + }, + { + "epoch": 0.6570053774084205, + "grad_norm": 559.95361328125, + "learning_rate": 3.282029336992756e-06, + "loss": 50.2567, + "step": 325240 + }, + { + "epoch": 0.6570255780411043, + "grad_norm": 733.6630859375, + "learning_rate": 3.28170152707192e-06, + "loss": 50.7487, + "step": 325250 + }, + { + "epoch": 0.6570457786737881, + "grad_norm": 0.0, + "learning_rate": 3.281373725525865e-06, + "loss": 17.5435, + "step": 325260 + }, + { + "epoch": 0.6570659793064719, + "grad_norm": 1.3321958780288696, + "learning_rate": 3.2810459323561826e-06, + "loss": 14.6807, + "step": 325270 + }, + { + "epoch": 0.6570861799391557, + "grad_norm": 292.2225036621094, + "learning_rate": 3.2807181475644755e-06, + "loss": 29.7416, + "step": 325280 + }, + { + "epoch": 0.6571063805718395, + "grad_norm": 566.6941528320312, + "learning_rate": 3.28039037115234e-06, + "loss": 23.5769, + "step": 325290 + }, + { + "epoch": 0.6571265812045233, + "grad_norm": 44.12610626220703, + "learning_rate": 3.280062603121373e-06, + "loss": 11.8867, + "step": 325300 + }, + { + "epoch": 0.6571467818372071, + "grad_norm": 145.26451110839844, + "learning_rate": 3.2797348434731725e-06, + "loss": 12.4323, + "step": 325310 + }, + { + "epoch": 0.6571669824698909, + "grad_norm": 1019.871826171875, + "learning_rate": 3.2794070922093347e-06, + "loss": 15.0018, + "step": 325320 + }, + { + "epoch": 0.6571871831025747, + "grad_norm": 80.88499450683594, + "learning_rate": 3.2790793493314605e-06, + "loss": 9.0907, + "step": 325330 + }, + { + "epoch": 0.6572073837352586, + "grad_norm": 760.2362060546875, + "learning_rate": 3.2787516148411417e-06, + "loss": 23.8629, + "step": 325340 + }, + { + "epoch": 0.6572275843679424, + "grad_norm": 560.3916625976562, + "learning_rate": 3.27842388873998e-06, + "loss": 21.0015, + "step": 325350 + }, + { + "epoch": 0.6572477850006262, + "grad_norm": 539.2189331054688, + "learning_rate": 3.2780961710295727e-06, + "loss": 21.1835, + "step": 325360 + }, + { + "epoch": 0.65726798563331, + "grad_norm": 1000.8939819335938, + "learning_rate": 3.2777684617115145e-06, + "loss": 33.7129, + "step": 325370 + }, + { + "epoch": 0.6572881862659938, + "grad_norm": 0.1469816267490387, + "learning_rate": 3.277440760787404e-06, + "loss": 10.3213, + "step": 325380 + }, + { + "epoch": 0.6573083868986777, + "grad_norm": 224.1309814453125, + "learning_rate": 3.277113068258839e-06, + "loss": 17.3751, + "step": 325390 + }, + { + "epoch": 0.6573285875313615, + "grad_norm": 827.6720581054688, + "learning_rate": 3.2767853841274154e-06, + "loss": 24.9989, + "step": 325400 + }, + { + "epoch": 0.6573487881640453, + "grad_norm": 564.5892333984375, + "learning_rate": 3.2764577083947303e-06, + "loss": 21.6816, + "step": 325410 + }, + { + "epoch": 0.6573689887967291, + "grad_norm": 362.488525390625, + "learning_rate": 3.2761300410623834e-06, + "loss": 33.0794, + "step": 325420 + }, + { + "epoch": 0.6573891894294129, + "grad_norm": 112.11978149414062, + "learning_rate": 3.2758023821319673e-06, + "loss": 29.824, + "step": 325430 + }, + { + "epoch": 0.6574093900620968, + "grad_norm": 395.3289794921875, + "learning_rate": 3.2754747316050815e-06, + "loss": 23.8979, + "step": 325440 + }, + { + "epoch": 0.6574295906947806, + "grad_norm": 251.3035888671875, + "learning_rate": 3.2751470894833236e-06, + "loss": 11.2302, + "step": 325450 + }, + { + "epoch": 0.6574497913274644, + "grad_norm": 250.07052612304688, + "learning_rate": 3.27481945576829e-06, + "loss": 20.3601, + "step": 325460 + }, + { + "epoch": 0.6574699919601482, + "grad_norm": 145.04916381835938, + "learning_rate": 3.2744918304615757e-06, + "loss": 20.5191, + "step": 325470 + }, + { + "epoch": 0.657490192592832, + "grad_norm": 444.8187255859375, + "learning_rate": 3.2741642135647787e-06, + "loss": 23.9869, + "step": 325480 + }, + { + "epoch": 0.6575103932255159, + "grad_norm": 397.5899353027344, + "learning_rate": 3.273836605079499e-06, + "loss": 19.1656, + "step": 325490 + }, + { + "epoch": 0.6575305938581997, + "grad_norm": 667.8988647460938, + "learning_rate": 3.273509005007327e-06, + "loss": 27.4597, + "step": 325500 + }, + { + "epoch": 0.6575507944908835, + "grad_norm": 472.8876647949219, + "learning_rate": 3.273181413349864e-06, + "loss": 17.3634, + "step": 325510 + }, + { + "epoch": 0.6575709951235673, + "grad_norm": 49.553226470947266, + "learning_rate": 3.2728538301087066e-06, + "loss": 19.2197, + "step": 325520 + }, + { + "epoch": 0.6575911957562511, + "grad_norm": 480.49407958984375, + "learning_rate": 3.2725262552854485e-06, + "loss": 21.1468, + "step": 325530 + }, + { + "epoch": 0.657611396388935, + "grad_norm": 215.25254821777344, + "learning_rate": 3.272198688881688e-06, + "loss": 14.1663, + "step": 325540 + }, + { + "epoch": 0.6576315970216187, + "grad_norm": 177.4603729248047, + "learning_rate": 3.2718711308990226e-06, + "loss": 10.0919, + "step": 325550 + }, + { + "epoch": 0.6576517976543025, + "grad_norm": 821.0975341796875, + "learning_rate": 3.271543581339047e-06, + "loss": 23.5161, + "step": 325560 + }, + { + "epoch": 0.6576719982869863, + "grad_norm": 482.9058837890625, + "learning_rate": 3.271216040203357e-06, + "loss": 32.7778, + "step": 325570 + }, + { + "epoch": 0.6576921989196701, + "grad_norm": 13.38960075378418, + "learning_rate": 3.2708885074935515e-06, + "loss": 18.5887, + "step": 325580 + }, + { + "epoch": 0.6577123995523539, + "grad_norm": 249.48719787597656, + "learning_rate": 3.270560983211227e-06, + "loss": 14.3424, + "step": 325590 + }, + { + "epoch": 0.6577326001850378, + "grad_norm": 514.5307006835938, + "learning_rate": 3.2702334673579765e-06, + "loss": 12.4389, + "step": 325600 + }, + { + "epoch": 0.6577528008177216, + "grad_norm": 179.92340087890625, + "learning_rate": 3.2699059599353987e-06, + "loss": 20.0307, + "step": 325610 + }, + { + "epoch": 0.6577730014504054, + "grad_norm": 158.58670043945312, + "learning_rate": 3.2695784609450908e-06, + "loss": 22.2668, + "step": 325620 + }, + { + "epoch": 0.6577932020830892, + "grad_norm": 362.43304443359375, + "learning_rate": 3.2692509703886467e-06, + "loss": 23.5387, + "step": 325630 + }, + { + "epoch": 0.657813402715773, + "grad_norm": 437.4108581542969, + "learning_rate": 3.2689234882676622e-06, + "loss": 20.1188, + "step": 325640 + }, + { + "epoch": 0.6578336033484569, + "grad_norm": 8.09162712097168, + "learning_rate": 3.268596014583737e-06, + "loss": 25.9325, + "step": 325650 + }, + { + "epoch": 0.6578538039811407, + "grad_norm": 22.787580490112305, + "learning_rate": 3.2682685493384636e-06, + "loss": 12.1297, + "step": 325660 + }, + { + "epoch": 0.6578740046138245, + "grad_norm": 344.8905944824219, + "learning_rate": 3.2679410925334394e-06, + "loss": 10.7535, + "step": 325670 + }, + { + "epoch": 0.6578942052465083, + "grad_norm": 437.6022033691406, + "learning_rate": 3.267613644170261e-06, + "loss": 28.6233, + "step": 325680 + }, + { + "epoch": 0.6579144058791921, + "grad_norm": 89.2320556640625, + "learning_rate": 3.2672862042505227e-06, + "loss": 17.6414, + "step": 325690 + }, + { + "epoch": 0.657934606511876, + "grad_norm": 237.4945526123047, + "learning_rate": 3.26695877277582e-06, + "loss": 20.6416, + "step": 325700 + }, + { + "epoch": 0.6579548071445598, + "grad_norm": 148.95309448242188, + "learning_rate": 3.266631349747753e-06, + "loss": 13.3693, + "step": 325710 + }, + { + "epoch": 0.6579750077772436, + "grad_norm": 237.44046020507812, + "learning_rate": 3.266303935167912e-06, + "loss": 15.7174, + "step": 325720 + }, + { + "epoch": 0.6579952084099274, + "grad_norm": 681.7918090820312, + "learning_rate": 3.2659765290378963e-06, + "loss": 16.4314, + "step": 325730 + }, + { + "epoch": 0.6580154090426112, + "grad_norm": 135.437744140625, + "learning_rate": 3.265649131359301e-06, + "loss": 28.3991, + "step": 325740 + }, + { + "epoch": 0.6580356096752951, + "grad_norm": 420.930419921875, + "learning_rate": 3.2653217421337213e-06, + "loss": 19.6065, + "step": 325750 + }, + { + "epoch": 0.6580558103079789, + "grad_norm": 755.5789794921875, + "learning_rate": 3.264994361362753e-06, + "loss": 14.2181, + "step": 325760 + }, + { + "epoch": 0.6580760109406627, + "grad_norm": 41.67082595825195, + "learning_rate": 3.26466698904799e-06, + "loss": 12.7884, + "step": 325770 + }, + { + "epoch": 0.6580962115733465, + "grad_norm": 355.4094543457031, + "learning_rate": 3.2643396251910338e-06, + "loss": 14.9274, + "step": 325780 + }, + { + "epoch": 0.6581164122060303, + "grad_norm": 464.47406005859375, + "learning_rate": 3.2640122697934716e-06, + "loss": 27.0586, + "step": 325790 + }, + { + "epoch": 0.6581366128387142, + "grad_norm": 322.71044921875, + "learning_rate": 3.263684922856905e-06, + "loss": 15.635, + "step": 325800 + }, + { + "epoch": 0.6581568134713979, + "grad_norm": 211.86778259277344, + "learning_rate": 3.2633575843829278e-06, + "loss": 11.8521, + "step": 325810 + }, + { + "epoch": 0.6581770141040817, + "grad_norm": 664.3461303710938, + "learning_rate": 3.2630302543731347e-06, + "loss": 26.4699, + "step": 325820 + }, + { + "epoch": 0.6581972147367655, + "grad_norm": 583.2703857421875, + "learning_rate": 3.262702932829121e-06, + "loss": 11.8234, + "step": 325830 + }, + { + "epoch": 0.6582174153694493, + "grad_norm": 1423.8182373046875, + "learning_rate": 3.262375619752484e-06, + "loss": 15.3818, + "step": 325840 + }, + { + "epoch": 0.6582376160021332, + "grad_norm": 562.4296875, + "learning_rate": 3.262048315144816e-06, + "loss": 18.7142, + "step": 325850 + }, + { + "epoch": 0.658257816634817, + "grad_norm": 51.41434097290039, + "learning_rate": 3.2617210190077132e-06, + "loss": 22.0297, + "step": 325860 + }, + { + "epoch": 0.6582780172675008, + "grad_norm": 282.89849853515625, + "learning_rate": 3.2613937313427735e-06, + "loss": 14.765, + "step": 325870 + }, + { + "epoch": 0.6582982179001846, + "grad_norm": 38.96006393432617, + "learning_rate": 3.2610664521515874e-06, + "loss": 21.5365, + "step": 325880 + }, + { + "epoch": 0.6583184185328684, + "grad_norm": 119.70616912841797, + "learning_rate": 3.2607391814357537e-06, + "loss": 15.3491, + "step": 325890 + }, + { + "epoch": 0.6583386191655523, + "grad_norm": 201.7650604248047, + "learning_rate": 3.260411919196866e-06, + "loss": 17.0382, + "step": 325900 + }, + { + "epoch": 0.6583588197982361, + "grad_norm": 386.3955383300781, + "learning_rate": 3.2600846654365202e-06, + "loss": 11.8478, + "step": 325910 + }, + { + "epoch": 0.6583790204309199, + "grad_norm": 216.8415985107422, + "learning_rate": 3.2597574201563104e-06, + "loss": 29.6346, + "step": 325920 + }, + { + "epoch": 0.6583992210636037, + "grad_norm": 551.940185546875, + "learning_rate": 3.2594301833578307e-06, + "loss": 20.8153, + "step": 325930 + }, + { + "epoch": 0.6584194216962875, + "grad_norm": 380.404052734375, + "learning_rate": 3.25910295504268e-06, + "loss": 14.7257, + "step": 325940 + }, + { + "epoch": 0.6584396223289714, + "grad_norm": 615.5536499023438, + "learning_rate": 3.258775735212447e-06, + "loss": 23.8163, + "step": 325950 + }, + { + "epoch": 0.6584598229616552, + "grad_norm": 664.9302978515625, + "learning_rate": 3.2584485238687318e-06, + "loss": 24.1192, + "step": 325960 + }, + { + "epoch": 0.658480023594339, + "grad_norm": 243.3323516845703, + "learning_rate": 3.258121321013128e-06, + "loss": 7.5279, + "step": 325970 + }, + { + "epoch": 0.6585002242270228, + "grad_norm": 567.731689453125, + "learning_rate": 3.257794126647228e-06, + "loss": 12.0984, + "step": 325980 + }, + { + "epoch": 0.6585204248597066, + "grad_norm": 119.06748962402344, + "learning_rate": 3.257466940772629e-06, + "loss": 17.4653, + "step": 325990 + }, + { + "epoch": 0.6585406254923905, + "grad_norm": 556.4931640625, + "learning_rate": 3.2571397633909252e-06, + "loss": 20.6214, + "step": 326000 + }, + { + "epoch": 0.6585608261250743, + "grad_norm": 45.02443313598633, + "learning_rate": 3.2568125945037098e-06, + "loss": 6.4558, + "step": 326010 + }, + { + "epoch": 0.6585810267577581, + "grad_norm": 813.0732421875, + "learning_rate": 3.256485434112578e-06, + "loss": 26.3669, + "step": 326020 + }, + { + "epoch": 0.6586012273904419, + "grad_norm": 670.9710693359375, + "learning_rate": 3.2561582822191273e-06, + "loss": 12.1677, + "step": 326030 + }, + { + "epoch": 0.6586214280231257, + "grad_norm": 584.9721069335938, + "learning_rate": 3.2558311388249465e-06, + "loss": 16.4993, + "step": 326040 + }, + { + "epoch": 0.6586416286558096, + "grad_norm": 294.2562255859375, + "learning_rate": 3.2555040039316344e-06, + "loss": 15.16, + "step": 326050 + }, + { + "epoch": 0.6586618292884933, + "grad_norm": 582.047607421875, + "learning_rate": 3.255176877540784e-06, + "loss": 21.793, + "step": 326060 + }, + { + "epoch": 0.6586820299211771, + "grad_norm": 369.1705322265625, + "learning_rate": 3.2548497596539907e-06, + "loss": 12.7556, + "step": 326070 + }, + { + "epoch": 0.6587022305538609, + "grad_norm": 794.9580688476562, + "learning_rate": 3.2545226502728477e-06, + "loss": 23.036, + "step": 326080 + }, + { + "epoch": 0.6587224311865447, + "grad_norm": 387.3789367675781, + "learning_rate": 3.254195549398948e-06, + "loss": 12.0966, + "step": 326090 + }, + { + "epoch": 0.6587426318192285, + "grad_norm": 436.01580810546875, + "learning_rate": 3.2538684570338908e-06, + "loss": 8.9334, + "step": 326100 + }, + { + "epoch": 0.6587628324519124, + "grad_norm": 239.38002014160156, + "learning_rate": 3.253541373179264e-06, + "loss": 13.6047, + "step": 326110 + }, + { + "epoch": 0.6587830330845962, + "grad_norm": 441.0944519042969, + "learning_rate": 3.2532142978366654e-06, + "loss": 40.3395, + "step": 326120 + }, + { + "epoch": 0.65880323371728, + "grad_norm": 282.71319580078125, + "learning_rate": 3.252887231007689e-06, + "loss": 24.0975, + "step": 326130 + }, + { + "epoch": 0.6588234343499638, + "grad_norm": 290.3285217285156, + "learning_rate": 3.2525601726939283e-06, + "loss": 22.3136, + "step": 326140 + }, + { + "epoch": 0.6588436349826476, + "grad_norm": 197.89186096191406, + "learning_rate": 3.2522331228969774e-06, + "loss": 20.9678, + "step": 326150 + }, + { + "epoch": 0.6588638356153315, + "grad_norm": 401.70330810546875, + "learning_rate": 3.2519060816184307e-06, + "loss": 18.9292, + "step": 326160 + }, + { + "epoch": 0.6588840362480153, + "grad_norm": 576.4989013671875, + "learning_rate": 3.251579048859881e-06, + "loss": 18.1169, + "step": 326170 + }, + { + "epoch": 0.6589042368806991, + "grad_norm": 324.4037170410156, + "learning_rate": 3.2512520246229217e-06, + "loss": 25.7619, + "step": 326180 + }, + { + "epoch": 0.6589244375133829, + "grad_norm": 756.5455932617188, + "learning_rate": 3.2509250089091494e-06, + "loss": 23.6764, + "step": 326190 + }, + { + "epoch": 0.6589446381460667, + "grad_norm": 53.63800048828125, + "learning_rate": 3.2505980017201564e-06, + "loss": 25.0827, + "step": 326200 + }, + { + "epoch": 0.6589648387787506, + "grad_norm": 427.29864501953125, + "learning_rate": 3.250271003057537e-06, + "loss": 21.0991, + "step": 326210 + }, + { + "epoch": 0.6589850394114344, + "grad_norm": 492.3072509765625, + "learning_rate": 3.249944012922883e-06, + "loss": 22.0644, + "step": 326220 + }, + { + "epoch": 0.6590052400441182, + "grad_norm": 1197.179931640625, + "learning_rate": 3.249617031317792e-06, + "loss": 24.7577, + "step": 326230 + }, + { + "epoch": 0.659025440676802, + "grad_norm": 388.35888671875, + "learning_rate": 3.2492900582438537e-06, + "loss": 29.4192, + "step": 326240 + }, + { + "epoch": 0.6590456413094858, + "grad_norm": 219.8641815185547, + "learning_rate": 3.248963093702663e-06, + "loss": 14.3934, + "step": 326250 + }, + { + "epoch": 0.6590658419421697, + "grad_norm": 675.7544555664062, + "learning_rate": 3.248636137695815e-06, + "loss": 10.9527, + "step": 326260 + }, + { + "epoch": 0.6590860425748535, + "grad_norm": 193.2775421142578, + "learning_rate": 3.2483091902249008e-06, + "loss": 16.3143, + "step": 326270 + }, + { + "epoch": 0.6591062432075373, + "grad_norm": 216.4387664794922, + "learning_rate": 3.247982251291516e-06, + "loss": 9.014, + "step": 326280 + }, + { + "epoch": 0.6591264438402211, + "grad_norm": 0.0, + "learning_rate": 3.247655320897254e-06, + "loss": 19.7007, + "step": 326290 + }, + { + "epoch": 0.6591466444729049, + "grad_norm": 233.0736846923828, + "learning_rate": 3.247328399043706e-06, + "loss": 15.0967, + "step": 326300 + }, + { + "epoch": 0.6591668451055888, + "grad_norm": 402.33111572265625, + "learning_rate": 3.2470014857324673e-06, + "loss": 27.7267, + "step": 326310 + }, + { + "epoch": 0.6591870457382725, + "grad_norm": 402.5802307128906, + "learning_rate": 3.2466745809651312e-06, + "loss": 12.5053, + "step": 326320 + }, + { + "epoch": 0.6592072463709563, + "grad_norm": 641.3466186523438, + "learning_rate": 3.2463476847432883e-06, + "loss": 19.0037, + "step": 326330 + }, + { + "epoch": 0.6592274470036401, + "grad_norm": 346.65838623046875, + "learning_rate": 3.2460207970685363e-06, + "loss": 10.3399, + "step": 326340 + }, + { + "epoch": 0.6592476476363239, + "grad_norm": 122.47904205322266, + "learning_rate": 3.245693917942465e-06, + "loss": 12.2482, + "step": 326350 + }, + { + "epoch": 0.6592678482690078, + "grad_norm": 138.75299072265625, + "learning_rate": 3.245367047366671e-06, + "loss": 27.0093, + "step": 326360 + }, + { + "epoch": 0.6592880489016916, + "grad_norm": 277.60235595703125, + "learning_rate": 3.2450401853427432e-06, + "loss": 18.9396, + "step": 326370 + }, + { + "epoch": 0.6593082495343754, + "grad_norm": 36.98881530761719, + "learning_rate": 3.2447133318722756e-06, + "loss": 23.1334, + "step": 326380 + }, + { + "epoch": 0.6593284501670592, + "grad_norm": 343.0567932128906, + "learning_rate": 3.2443864869568666e-06, + "loss": 25.0246, + "step": 326390 + }, + { + "epoch": 0.659348650799743, + "grad_norm": 147.71640014648438, + "learning_rate": 3.2440596505981005e-06, + "loss": 17.0263, + "step": 326400 + }, + { + "epoch": 0.6593688514324269, + "grad_norm": 25.792308807373047, + "learning_rate": 3.243732822797576e-06, + "loss": 11.4002, + "step": 326410 + }, + { + "epoch": 0.6593890520651107, + "grad_norm": 331.8695983886719, + "learning_rate": 3.243406003556886e-06, + "loss": 16.7536, + "step": 326420 + }, + { + "epoch": 0.6594092526977945, + "grad_norm": 281.3816833496094, + "learning_rate": 3.2430791928776217e-06, + "loss": 33.9329, + "step": 326430 + }, + { + "epoch": 0.6594294533304783, + "grad_norm": 276.3078308105469, + "learning_rate": 3.2427523907613755e-06, + "loss": 20.8404, + "step": 326440 + }, + { + "epoch": 0.6594496539631621, + "grad_norm": 43.945648193359375, + "learning_rate": 3.242425597209742e-06, + "loss": 21.5606, + "step": 326450 + }, + { + "epoch": 0.659469854595846, + "grad_norm": 536.5602416992188, + "learning_rate": 3.2420988122243123e-06, + "loss": 19.6857, + "step": 326460 + }, + { + "epoch": 0.6594900552285298, + "grad_norm": 199.89599609375, + "learning_rate": 3.2417720358066785e-06, + "loss": 25.8947, + "step": 326470 + }, + { + "epoch": 0.6595102558612136, + "grad_norm": 94.38127136230469, + "learning_rate": 3.241445267958438e-06, + "loss": 23.4073, + "step": 326480 + }, + { + "epoch": 0.6595304564938974, + "grad_norm": 418.309326171875, + "learning_rate": 3.2411185086811763e-06, + "loss": 20.8045, + "step": 326490 + }, + { + "epoch": 0.6595506571265812, + "grad_norm": 296.802978515625, + "learning_rate": 3.2407917579764914e-06, + "loss": 16.1665, + "step": 326500 + }, + { + "epoch": 0.659570857759265, + "grad_norm": 273.4130859375, + "learning_rate": 3.2404650158459737e-06, + "loss": 29.4563, + "step": 326510 + }, + { + "epoch": 0.6595910583919489, + "grad_norm": 47.27882766723633, + "learning_rate": 3.240138282291217e-06, + "loss": 11.6505, + "step": 326520 + }, + { + "epoch": 0.6596112590246327, + "grad_norm": 216.8551483154297, + "learning_rate": 3.2398115573138123e-06, + "loss": 21.9539, + "step": 326530 + }, + { + "epoch": 0.6596314596573165, + "grad_norm": 372.9217834472656, + "learning_rate": 3.2394848409153514e-06, + "loss": 27.4066, + "step": 326540 + }, + { + "epoch": 0.6596516602900003, + "grad_norm": 488.9245300292969, + "learning_rate": 3.2391581330974307e-06, + "loss": 26.7345, + "step": 326550 + }, + { + "epoch": 0.6596718609226842, + "grad_norm": 277.7620849609375, + "learning_rate": 3.238831433861637e-06, + "loss": 15.3652, + "step": 326560 + }, + { + "epoch": 0.6596920615553679, + "grad_norm": 320.3008117675781, + "learning_rate": 3.2385047432095656e-06, + "loss": 15.7351, + "step": 326570 + }, + { + "epoch": 0.6597122621880517, + "grad_norm": 191.49447631835938, + "learning_rate": 3.23817806114281e-06, + "loss": 10.6893, + "step": 326580 + }, + { + "epoch": 0.6597324628207355, + "grad_norm": 363.738037109375, + "learning_rate": 3.23785138766296e-06, + "loss": 16.5293, + "step": 326590 + }, + { + "epoch": 0.6597526634534193, + "grad_norm": 444.9259338378906, + "learning_rate": 3.2375247227716077e-06, + "loss": 20.5872, + "step": 326600 + }, + { + "epoch": 0.6597728640861031, + "grad_norm": 256.9393310546875, + "learning_rate": 3.2371980664703486e-06, + "loss": 26.2523, + "step": 326610 + }, + { + "epoch": 0.659793064718787, + "grad_norm": 264.544921875, + "learning_rate": 3.2368714187607696e-06, + "loss": 14.5543, + "step": 326620 + }, + { + "epoch": 0.6598132653514708, + "grad_norm": 273.64703369140625, + "learning_rate": 3.236544779644466e-06, + "loss": 11.8041, + "step": 326630 + }, + { + "epoch": 0.6598334659841546, + "grad_norm": 354.6824645996094, + "learning_rate": 3.2362181491230295e-06, + "loss": 14.7286, + "step": 326640 + }, + { + "epoch": 0.6598536666168384, + "grad_norm": 52.0087776184082, + "learning_rate": 3.235891527198053e-06, + "loss": 14.8979, + "step": 326650 + }, + { + "epoch": 0.6598738672495222, + "grad_norm": 541.7445068359375, + "learning_rate": 3.235564913871126e-06, + "loss": 19.7645, + "step": 326660 + }, + { + "epoch": 0.6598940678822061, + "grad_norm": 415.505126953125, + "learning_rate": 3.235238309143842e-06, + "loss": 19.1636, + "step": 326670 + }, + { + "epoch": 0.6599142685148899, + "grad_norm": 420.2804260253906, + "learning_rate": 3.234911713017793e-06, + "loss": 22.9831, + "step": 326680 + }, + { + "epoch": 0.6599344691475737, + "grad_norm": 0.0, + "learning_rate": 3.2345851254945695e-06, + "loss": 7.5803, + "step": 326690 + }, + { + "epoch": 0.6599546697802575, + "grad_norm": 2.467928409576416, + "learning_rate": 3.2342585465757625e-06, + "loss": 4.4995, + "step": 326700 + }, + { + "epoch": 0.6599748704129413, + "grad_norm": 271.5982666015625, + "learning_rate": 3.2339319762629694e-06, + "loss": 20.3659, + "step": 326710 + }, + { + "epoch": 0.6599950710456252, + "grad_norm": 837.4721069335938, + "learning_rate": 3.2336054145577735e-06, + "loss": 15.789, + "step": 326720 + }, + { + "epoch": 0.660015271678309, + "grad_norm": 289.23284912109375, + "learning_rate": 3.233278861461772e-06, + "loss": 9.8312, + "step": 326730 + }, + { + "epoch": 0.6600354723109928, + "grad_norm": 925.2801513671875, + "learning_rate": 3.2329523169765566e-06, + "loss": 42.8915, + "step": 326740 + }, + { + "epoch": 0.6600556729436766, + "grad_norm": 245.6300506591797, + "learning_rate": 3.2326257811037154e-06, + "loss": 17.8565, + "step": 326750 + }, + { + "epoch": 0.6600758735763604, + "grad_norm": 81.37186431884766, + "learning_rate": 3.2322992538448418e-06, + "loss": 24.6701, + "step": 326760 + }, + { + "epoch": 0.6600960742090443, + "grad_norm": 364.2679443359375, + "learning_rate": 3.2319727352015286e-06, + "loss": 17.6833, + "step": 326770 + }, + { + "epoch": 0.6601162748417281, + "grad_norm": 765.7305297851562, + "learning_rate": 3.2316462251753646e-06, + "loss": 18.536, + "step": 326780 + }, + { + "epoch": 0.6601364754744119, + "grad_norm": 131.59066772460938, + "learning_rate": 3.2313197237679416e-06, + "loss": 25.45, + "step": 326790 + }, + { + "epoch": 0.6601566761070957, + "grad_norm": 483.0262756347656, + "learning_rate": 3.230993230980853e-06, + "loss": 18.5174, + "step": 326800 + }, + { + "epoch": 0.6601768767397795, + "grad_norm": 569.9622802734375, + "learning_rate": 3.2306667468156895e-06, + "loss": 31.4212, + "step": 326810 + }, + { + "epoch": 0.6601970773724634, + "grad_norm": 1281.259033203125, + "learning_rate": 3.2303402712740404e-06, + "loss": 35.0354, + "step": 326820 + }, + { + "epoch": 0.6602172780051471, + "grad_norm": 297.14898681640625, + "learning_rate": 3.2300138043574992e-06, + "loss": 14.748, + "step": 326830 + }, + { + "epoch": 0.6602374786378309, + "grad_norm": 359.8337707519531, + "learning_rate": 3.2296873460676557e-06, + "loss": 19.4653, + "step": 326840 + }, + { + "epoch": 0.6602576792705147, + "grad_norm": 7.154877185821533, + "learning_rate": 3.229360896406102e-06, + "loss": 11.0349, + "step": 326850 + }, + { + "epoch": 0.6602778799031985, + "grad_norm": 132.37347412109375, + "learning_rate": 3.229034455374426e-06, + "loss": 21.3548, + "step": 326860 + }, + { + "epoch": 0.6602980805358823, + "grad_norm": 246.46514892578125, + "learning_rate": 3.2287080229742253e-06, + "loss": 18.76, + "step": 326870 + }, + { + "epoch": 0.6603182811685662, + "grad_norm": 567.5435180664062, + "learning_rate": 3.228381599207083e-06, + "loss": 16.071, + "step": 326880 + }, + { + "epoch": 0.66033848180125, + "grad_norm": 587.0892333984375, + "learning_rate": 3.2280551840745953e-06, + "loss": 27.1945, + "step": 326890 + }, + { + "epoch": 0.6603586824339338, + "grad_norm": 369.40252685546875, + "learning_rate": 3.227728777578353e-06, + "loss": 9.7181, + "step": 326900 + }, + { + "epoch": 0.6603788830666176, + "grad_norm": 687.49853515625, + "learning_rate": 3.2274023797199446e-06, + "loss": 35.6283, + "step": 326910 + }, + { + "epoch": 0.6603990836993014, + "grad_norm": 322.2925109863281, + "learning_rate": 3.227075990500962e-06, + "loss": 11.0406, + "step": 326920 + }, + { + "epoch": 0.6604192843319853, + "grad_norm": 670.8181762695312, + "learning_rate": 3.226749609922997e-06, + "loss": 22.8369, + "step": 326930 + }, + { + "epoch": 0.6604394849646691, + "grad_norm": 28.338125228881836, + "learning_rate": 3.226423237987637e-06, + "loss": 8.6988, + "step": 326940 + }, + { + "epoch": 0.6604596855973529, + "grad_norm": 735.6610107421875, + "learning_rate": 3.226096874696476e-06, + "loss": 22.8139, + "step": 326950 + }, + { + "epoch": 0.6604798862300367, + "grad_norm": 132.19671630859375, + "learning_rate": 3.2257705200511035e-06, + "loss": 8.7654, + "step": 326960 + }, + { + "epoch": 0.6605000868627205, + "grad_norm": 343.75146484375, + "learning_rate": 3.2254441740531124e-06, + "loss": 18.5073, + "step": 326970 + }, + { + "epoch": 0.6605202874954044, + "grad_norm": 335.85968017578125, + "learning_rate": 3.225117836704089e-06, + "loss": 9.4129, + "step": 326980 + }, + { + "epoch": 0.6605404881280882, + "grad_norm": 58.72859191894531, + "learning_rate": 3.224791508005627e-06, + "loss": 12.8179, + "step": 326990 + }, + { + "epoch": 0.660560688760772, + "grad_norm": 241.4238739013672, + "learning_rate": 3.224465187959316e-06, + "loss": 31.1767, + "step": 327000 + }, + { + "epoch": 0.6605808893934558, + "grad_norm": 271.1313781738281, + "learning_rate": 3.224138876566745e-06, + "loss": 7.9185, + "step": 327010 + }, + { + "epoch": 0.6606010900261396, + "grad_norm": 309.2290954589844, + "learning_rate": 3.2238125738295063e-06, + "loss": 11.6941, + "step": 327020 + }, + { + "epoch": 0.6606212906588235, + "grad_norm": 437.2591247558594, + "learning_rate": 3.2234862797491905e-06, + "loss": 15.7673, + "step": 327030 + }, + { + "epoch": 0.6606414912915073, + "grad_norm": 211.36199951171875, + "learning_rate": 3.2231599943273865e-06, + "loss": 13.6241, + "step": 327040 + }, + { + "epoch": 0.6606616919241911, + "grad_norm": 247.3621368408203, + "learning_rate": 3.2228337175656856e-06, + "loss": 33.3884, + "step": 327050 + }, + { + "epoch": 0.6606818925568749, + "grad_norm": 245.90342712402344, + "learning_rate": 3.222507449465678e-06, + "loss": 15.0339, + "step": 327060 + }, + { + "epoch": 0.6607020931895587, + "grad_norm": 576.1591186523438, + "learning_rate": 3.2221811900289524e-06, + "loss": 17.7405, + "step": 327070 + }, + { + "epoch": 0.6607222938222426, + "grad_norm": 268.83837890625, + "learning_rate": 3.221854939257099e-06, + "loss": 19.998, + "step": 327080 + }, + { + "epoch": 0.6607424944549263, + "grad_norm": 196.15208435058594, + "learning_rate": 3.2215286971517123e-06, + "loss": 16.0432, + "step": 327090 + }, + { + "epoch": 0.6607626950876101, + "grad_norm": 657.3632202148438, + "learning_rate": 3.2212024637143756e-06, + "loss": 17.1239, + "step": 327100 + }, + { + "epoch": 0.6607828957202939, + "grad_norm": 289.90020751953125, + "learning_rate": 3.220876238946684e-06, + "loss": 18.2402, + "step": 327110 + }, + { + "epoch": 0.6608030963529777, + "grad_norm": 92.29066467285156, + "learning_rate": 3.2205500228502257e-06, + "loss": 12.4329, + "step": 327120 + }, + { + "epoch": 0.6608232969856616, + "grad_norm": 157.49974060058594, + "learning_rate": 3.220223815426592e-06, + "loss": 16.8881, + "step": 327130 + }, + { + "epoch": 0.6608434976183454, + "grad_norm": 817.279296875, + "learning_rate": 3.21989761667737e-06, + "loss": 21.3352, + "step": 327140 + }, + { + "epoch": 0.6608636982510292, + "grad_norm": 80.90983581542969, + "learning_rate": 3.21957142660415e-06, + "loss": 19.8514, + "step": 327150 + }, + { + "epoch": 0.660883898883713, + "grad_norm": 14.334986686706543, + "learning_rate": 3.2192452452085265e-06, + "loss": 15.8893, + "step": 327160 + }, + { + "epoch": 0.6609040995163968, + "grad_norm": 234.1011199951172, + "learning_rate": 3.218919072492082e-06, + "loss": 24.0918, + "step": 327170 + }, + { + "epoch": 0.6609243001490807, + "grad_norm": 79.61859130859375, + "learning_rate": 3.2185929084564115e-06, + "loss": 8.2091, + "step": 327180 + }, + { + "epoch": 0.6609445007817645, + "grad_norm": 300.50640869140625, + "learning_rate": 3.2182667531031044e-06, + "loss": 15.4628, + "step": 327190 + }, + { + "epoch": 0.6609647014144483, + "grad_norm": 274.9285888671875, + "learning_rate": 3.217940606433747e-06, + "loss": 17.9761, + "step": 327200 + }, + { + "epoch": 0.6609849020471321, + "grad_norm": 396.72149658203125, + "learning_rate": 3.2176144684499315e-06, + "loss": 19.7943, + "step": 327210 + }, + { + "epoch": 0.6610051026798159, + "grad_norm": 450.2877502441406, + "learning_rate": 3.2172883391532484e-06, + "loss": 12.6675, + "step": 327220 + }, + { + "epoch": 0.6610253033124998, + "grad_norm": 39.47370147705078, + "learning_rate": 3.216962218545284e-06, + "loss": 14.0956, + "step": 327230 + }, + { + "epoch": 0.6610455039451836, + "grad_norm": 720.3800659179688, + "learning_rate": 3.2166361066276287e-06, + "loss": 32.4461, + "step": 327240 + }, + { + "epoch": 0.6610657045778674, + "grad_norm": 390.7908935546875, + "learning_rate": 3.2163100034018735e-06, + "loss": 18.8091, + "step": 327250 + }, + { + "epoch": 0.6610859052105512, + "grad_norm": 0.0, + "learning_rate": 3.2159839088696088e-06, + "loss": 15.7301, + "step": 327260 + }, + { + "epoch": 0.661106105843235, + "grad_norm": 674.4672241210938, + "learning_rate": 3.21565782303242e-06, + "loss": 16.456, + "step": 327270 + }, + { + "epoch": 0.6611263064759189, + "grad_norm": 139.64060974121094, + "learning_rate": 3.2153317458918997e-06, + "loss": 24.8868, + "step": 327280 + }, + { + "epoch": 0.6611465071086027, + "grad_norm": 366.8919372558594, + "learning_rate": 3.2150056774496363e-06, + "loss": 17.7777, + "step": 327290 + }, + { + "epoch": 0.6611667077412865, + "grad_norm": 519.717041015625, + "learning_rate": 3.2146796177072183e-06, + "loss": 21.0956, + "step": 327300 + }, + { + "epoch": 0.6611869083739703, + "grad_norm": 39.76637649536133, + "learning_rate": 3.214353566666234e-06, + "loss": 16.4979, + "step": 327310 + }, + { + "epoch": 0.6612071090066541, + "grad_norm": 307.4339294433594, + "learning_rate": 3.2140275243282765e-06, + "loss": 17.9161, + "step": 327320 + }, + { + "epoch": 0.661227309639338, + "grad_norm": 88.10888671875, + "learning_rate": 3.2137014906949295e-06, + "loss": 17.8225, + "step": 327330 + }, + { + "epoch": 0.6612475102720217, + "grad_norm": 500.12506103515625, + "learning_rate": 3.2133754657677857e-06, + "loss": 10.8803, + "step": 327340 + }, + { + "epoch": 0.6612677109047055, + "grad_norm": 160.55833435058594, + "learning_rate": 3.2130494495484345e-06, + "loss": 22.7973, + "step": 327350 + }, + { + "epoch": 0.6612879115373893, + "grad_norm": 369.9012451171875, + "learning_rate": 3.2127234420384624e-06, + "loss": 20.0095, + "step": 327360 + }, + { + "epoch": 0.6613081121700731, + "grad_norm": 421.60223388671875, + "learning_rate": 3.212397443239459e-06, + "loss": 13.6308, + "step": 327370 + }, + { + "epoch": 0.661328312802757, + "grad_norm": 454.09698486328125, + "learning_rate": 3.212071453153015e-06, + "loss": 23.0256, + "step": 327380 + }, + { + "epoch": 0.6613485134354408, + "grad_norm": 433.3726501464844, + "learning_rate": 3.2117454717807174e-06, + "loss": 12.1194, + "step": 327390 + }, + { + "epoch": 0.6613687140681246, + "grad_norm": 49.15796661376953, + "learning_rate": 3.211419499124154e-06, + "loss": 10.5911, + "step": 327400 + }, + { + "epoch": 0.6613889147008084, + "grad_norm": 455.6471862792969, + "learning_rate": 3.2110935351849158e-06, + "loss": 16.7057, + "step": 327410 + }, + { + "epoch": 0.6614091153334922, + "grad_norm": 291.10888671875, + "learning_rate": 3.2107675799645923e-06, + "loss": 16.6149, + "step": 327420 + }, + { + "epoch": 0.661429315966176, + "grad_norm": 775.1611938476562, + "learning_rate": 3.210441633464769e-06, + "loss": 26.5504, + "step": 327430 + }, + { + "epoch": 0.6614495165988599, + "grad_norm": 403.2657470703125, + "learning_rate": 3.2101156956870367e-06, + "loss": 20.3346, + "step": 327440 + }, + { + "epoch": 0.6614697172315437, + "grad_norm": 134.37828063964844, + "learning_rate": 3.209789766632984e-06, + "loss": 13.6932, + "step": 327450 + }, + { + "epoch": 0.6614899178642275, + "grad_norm": 393.8540344238281, + "learning_rate": 3.209463846304198e-06, + "loss": 17.1115, + "step": 327460 + }, + { + "epoch": 0.6615101184969113, + "grad_norm": 214.12506103515625, + "learning_rate": 3.209137934702267e-06, + "loss": 16.2317, + "step": 327470 + }, + { + "epoch": 0.6615303191295951, + "grad_norm": 541.5082397460938, + "learning_rate": 3.2088120318287843e-06, + "loss": 12.0737, + "step": 327480 + }, + { + "epoch": 0.661550519762279, + "grad_norm": 148.84764099121094, + "learning_rate": 3.2084861376853304e-06, + "loss": 22.5428, + "step": 327490 + }, + { + "epoch": 0.6615707203949628, + "grad_norm": 470.9176025390625, + "learning_rate": 3.2081602522734987e-06, + "loss": 11.4023, + "step": 327500 + }, + { + "epoch": 0.6615909210276466, + "grad_norm": 234.14559936523438, + "learning_rate": 3.2078343755948783e-06, + "loss": 16.6708, + "step": 327510 + }, + { + "epoch": 0.6616111216603304, + "grad_norm": 0.9575088024139404, + "learning_rate": 3.2075085076510548e-06, + "loss": 18.4365, + "step": 327520 + }, + { + "epoch": 0.6616313222930142, + "grad_norm": 274.15380859375, + "learning_rate": 3.207182648443617e-06, + "loss": 12.2141, + "step": 327530 + }, + { + "epoch": 0.6616515229256981, + "grad_norm": 220.66366577148438, + "learning_rate": 3.206856797974155e-06, + "loss": 10.9902, + "step": 327540 + }, + { + "epoch": 0.6616717235583819, + "grad_norm": 4.854771137237549, + "learning_rate": 3.2065309562442536e-06, + "loss": 20.7919, + "step": 327550 + }, + { + "epoch": 0.6616919241910657, + "grad_norm": 294.95452880859375, + "learning_rate": 3.2062051232555024e-06, + "loss": 11.8438, + "step": 327560 + }, + { + "epoch": 0.6617121248237495, + "grad_norm": 395.31402587890625, + "learning_rate": 3.205879299009491e-06, + "loss": 15.7949, + "step": 327570 + }, + { + "epoch": 0.6617323254564333, + "grad_norm": 577.9132080078125, + "learning_rate": 3.2055534835078075e-06, + "loss": 23.7941, + "step": 327580 + }, + { + "epoch": 0.6617525260891172, + "grad_norm": 206.83401489257812, + "learning_rate": 3.205227676752037e-06, + "loss": 20.8896, + "step": 327590 + }, + { + "epoch": 0.6617727267218009, + "grad_norm": 240.10752868652344, + "learning_rate": 3.2049018787437693e-06, + "loss": 10.2279, + "step": 327600 + }, + { + "epoch": 0.6617929273544847, + "grad_norm": 512.9368896484375, + "learning_rate": 3.2045760894845932e-06, + "loss": 31.879, + "step": 327610 + }, + { + "epoch": 0.6618131279871685, + "grad_norm": 280.0784912109375, + "learning_rate": 3.2042503089760934e-06, + "loss": 28.417, + "step": 327620 + }, + { + "epoch": 0.6618333286198523, + "grad_norm": 113.35942840576172, + "learning_rate": 3.2039245372198613e-06, + "loss": 10.603, + "step": 327630 + }, + { + "epoch": 0.6618535292525362, + "grad_norm": 392.9140930175781, + "learning_rate": 3.203598774217484e-06, + "loss": 13.0894, + "step": 327640 + }, + { + "epoch": 0.66187372988522, + "grad_norm": 259.1398010253906, + "learning_rate": 3.2032730199705477e-06, + "loss": 11.6942, + "step": 327650 + }, + { + "epoch": 0.6618939305179038, + "grad_norm": 383.2666015625, + "learning_rate": 3.20294727448064e-06, + "loss": 36.4547, + "step": 327660 + }, + { + "epoch": 0.6619141311505876, + "grad_norm": 432.5340270996094, + "learning_rate": 3.2026215377493507e-06, + "loss": 24.846, + "step": 327670 + }, + { + "epoch": 0.6619343317832714, + "grad_norm": 304.7824401855469, + "learning_rate": 3.2022958097782646e-06, + "loss": 36.4396, + "step": 327680 + }, + { + "epoch": 0.6619545324159553, + "grad_norm": 1352.296875, + "learning_rate": 3.20197009056897e-06, + "loss": 20.8479, + "step": 327690 + }, + { + "epoch": 0.6619747330486391, + "grad_norm": 615.2682495117188, + "learning_rate": 3.201644380123056e-06, + "loss": 22.2662, + "step": 327700 + }, + { + "epoch": 0.6619949336813229, + "grad_norm": 319.700927734375, + "learning_rate": 3.201318678442111e-06, + "loss": 35.5316, + "step": 327710 + }, + { + "epoch": 0.6620151343140067, + "grad_norm": 402.1708984375, + "learning_rate": 3.2009929855277187e-06, + "loss": 37.3906, + "step": 327720 + }, + { + "epoch": 0.6620353349466905, + "grad_norm": 90.54914093017578, + "learning_rate": 3.200667301381468e-06, + "loss": 22.2098, + "step": 327730 + }, + { + "epoch": 0.6620555355793744, + "grad_norm": 589.4803466796875, + "learning_rate": 3.2003416260049493e-06, + "loss": 17.6694, + "step": 327740 + }, + { + "epoch": 0.6620757362120582, + "grad_norm": 640.0857543945312, + "learning_rate": 3.2000159593997447e-06, + "loss": 24.2283, + "step": 327750 + }, + { + "epoch": 0.662095936844742, + "grad_norm": 237.67442321777344, + "learning_rate": 3.1996903015674434e-06, + "loss": 11.5415, + "step": 327760 + }, + { + "epoch": 0.6621161374774258, + "grad_norm": 412.76593017578125, + "learning_rate": 3.1993646525096368e-06, + "loss": 13.7273, + "step": 327770 + }, + { + "epoch": 0.6621363381101096, + "grad_norm": 374.91485595703125, + "learning_rate": 3.1990390122279046e-06, + "loss": 13.3486, + "step": 327780 + }, + { + "epoch": 0.6621565387427935, + "grad_norm": 289.9313049316406, + "learning_rate": 3.198713380723839e-06, + "loss": 10.005, + "step": 327790 + }, + { + "epoch": 0.6621767393754773, + "grad_norm": 34.16901779174805, + "learning_rate": 3.1983877579990276e-06, + "loss": 20.7103, + "step": 327800 + }, + { + "epoch": 0.6621969400081611, + "grad_norm": 311.5425720214844, + "learning_rate": 3.198062144055054e-06, + "loss": 21.6194, + "step": 327810 + }, + { + "epoch": 0.6622171406408449, + "grad_norm": 463.7889099121094, + "learning_rate": 3.1977365388935076e-06, + "loss": 24.8787, + "step": 327820 + }, + { + "epoch": 0.6622373412735287, + "grad_norm": 37.17445755004883, + "learning_rate": 3.1974109425159754e-06, + "loss": 9.7137, + "step": 327830 + }, + { + "epoch": 0.6622575419062126, + "grad_norm": 588.4010620117188, + "learning_rate": 3.1970853549240425e-06, + "loss": 17.2024, + "step": 327840 + }, + { + "epoch": 0.6622777425388963, + "grad_norm": 439.13299560546875, + "learning_rate": 3.196759776119296e-06, + "loss": 33.6498, + "step": 327850 + }, + { + "epoch": 0.6622979431715801, + "grad_norm": 242.45326232910156, + "learning_rate": 3.1964342061033247e-06, + "loss": 13.121, + "step": 327860 + }, + { + "epoch": 0.6623181438042639, + "grad_norm": 679.048583984375, + "learning_rate": 3.1961086448777157e-06, + "loss": 21.9139, + "step": 327870 + }, + { + "epoch": 0.6623383444369477, + "grad_norm": 211.0595245361328, + "learning_rate": 3.1957830924440524e-06, + "loss": 13.6705, + "step": 327880 + }, + { + "epoch": 0.6623585450696315, + "grad_norm": 153.071533203125, + "learning_rate": 3.195457548803925e-06, + "loss": 30.9075, + "step": 327890 + }, + { + "epoch": 0.6623787457023154, + "grad_norm": 534.3335571289062, + "learning_rate": 3.195132013958918e-06, + "loss": 14.811, + "step": 327900 + }, + { + "epoch": 0.6623989463349992, + "grad_norm": 635.1417846679688, + "learning_rate": 3.1948064879106187e-06, + "loss": 15.1571, + "step": 327910 + }, + { + "epoch": 0.662419146967683, + "grad_norm": 1015.3916625976562, + "learning_rate": 3.1944809706606123e-06, + "loss": 35.8303, + "step": 327920 + }, + { + "epoch": 0.6624393476003668, + "grad_norm": 43.98500061035156, + "learning_rate": 3.1941554622104897e-06, + "loss": 16.0305, + "step": 327930 + }, + { + "epoch": 0.6624595482330506, + "grad_norm": 554.8844604492188, + "learning_rate": 3.1938299625618313e-06, + "loss": 19.7504, + "step": 327940 + }, + { + "epoch": 0.6624797488657345, + "grad_norm": 184.5054168701172, + "learning_rate": 3.193504471716228e-06, + "loss": 8.8416, + "step": 327950 + }, + { + "epoch": 0.6624999494984183, + "grad_norm": 489.889892578125, + "learning_rate": 3.1931789896752654e-06, + "loss": 15.2668, + "step": 327960 + }, + { + "epoch": 0.6625201501311021, + "grad_norm": 953.59912109375, + "learning_rate": 3.192853516440528e-06, + "loss": 23.7423, + "step": 327970 + }, + { + "epoch": 0.6625403507637859, + "grad_norm": 707.7154541015625, + "learning_rate": 3.192528052013604e-06, + "loss": 13.3632, + "step": 327980 + }, + { + "epoch": 0.6625605513964697, + "grad_norm": 248.6231689453125, + "learning_rate": 3.1922025963960796e-06, + "loss": 21.4877, + "step": 327990 + }, + { + "epoch": 0.6625807520291536, + "grad_norm": 330.9288635253906, + "learning_rate": 3.1918771495895395e-06, + "loss": 29.9458, + "step": 328000 + }, + { + "epoch": 0.6626009526618374, + "grad_norm": 290.83905029296875, + "learning_rate": 3.1915517115955704e-06, + "loss": 16.653, + "step": 328010 + }, + { + "epoch": 0.6626211532945212, + "grad_norm": 234.9760284423828, + "learning_rate": 3.1912262824157592e-06, + "loss": 13.6941, + "step": 328020 + }, + { + "epoch": 0.662641353927205, + "grad_norm": 183.3705291748047, + "learning_rate": 3.1909008620516933e-06, + "loss": 15.5814, + "step": 328030 + }, + { + "epoch": 0.6626615545598888, + "grad_norm": 536.9821166992188, + "learning_rate": 3.190575450504956e-06, + "loss": 10.8537, + "step": 328040 + }, + { + "epoch": 0.6626817551925727, + "grad_norm": 404.4079895019531, + "learning_rate": 3.190250047777134e-06, + "loss": 28.2419, + "step": 328050 + }, + { + "epoch": 0.6627019558252565, + "grad_norm": 256.31536865234375, + "learning_rate": 3.1899246538698157e-06, + "loss": 12.2106, + "step": 328060 + }, + { + "epoch": 0.6627221564579403, + "grad_norm": 326.7830505371094, + "learning_rate": 3.1895992687845836e-06, + "loss": 11.1497, + "step": 328070 + }, + { + "epoch": 0.6627423570906241, + "grad_norm": 775.0346069335938, + "learning_rate": 3.1892738925230236e-06, + "loss": 17.4392, + "step": 328080 + }, + { + "epoch": 0.6627625577233079, + "grad_norm": 111.34048461914062, + "learning_rate": 3.188948525086727e-06, + "loss": 21.5712, + "step": 328090 + }, + { + "epoch": 0.6627827583559918, + "grad_norm": 248.71444702148438, + "learning_rate": 3.188623166477272e-06, + "loss": 17.674, + "step": 328100 + }, + { + "epoch": 0.6628029589886755, + "grad_norm": 463.4655456542969, + "learning_rate": 3.188297816696249e-06, + "loss": 26.3609, + "step": 328110 + }, + { + "epoch": 0.6628231596213593, + "grad_norm": 56.81084060668945, + "learning_rate": 3.187972475745244e-06, + "loss": 15.9719, + "step": 328120 + }, + { + "epoch": 0.6628433602540431, + "grad_norm": 163.7423095703125, + "learning_rate": 3.1876471436258407e-06, + "loss": 20.6289, + "step": 328130 + }, + { + "epoch": 0.6628635608867269, + "grad_norm": 1115.420654296875, + "learning_rate": 3.1873218203396246e-06, + "loss": 27.2812, + "step": 328140 + }, + { + "epoch": 0.6628837615194108, + "grad_norm": 171.6507568359375, + "learning_rate": 3.1869965058881836e-06, + "loss": 21.7811, + "step": 328150 + }, + { + "epoch": 0.6629039621520946, + "grad_norm": 386.964599609375, + "learning_rate": 3.1866712002731004e-06, + "loss": 18.4272, + "step": 328160 + }, + { + "epoch": 0.6629241627847784, + "grad_norm": 267.7464904785156, + "learning_rate": 3.186345903495961e-06, + "loss": 11.4173, + "step": 328170 + }, + { + "epoch": 0.6629443634174622, + "grad_norm": 207.80772399902344, + "learning_rate": 3.1860206155583527e-06, + "loss": 33.6552, + "step": 328180 + }, + { + "epoch": 0.662964564050146, + "grad_norm": 219.84579467773438, + "learning_rate": 3.185695336461861e-06, + "loss": 10.6895, + "step": 328190 + }, + { + "epoch": 0.6629847646828299, + "grad_norm": 350.85308837890625, + "learning_rate": 3.185370066208069e-06, + "loss": 6.2564, + "step": 328200 + }, + { + "epoch": 0.6630049653155137, + "grad_norm": 77.57797241210938, + "learning_rate": 3.185044804798564e-06, + "loss": 15.2196, + "step": 328210 + }, + { + "epoch": 0.6630251659481975, + "grad_norm": 708.864501953125, + "learning_rate": 3.1847195522349305e-06, + "loss": 27.5036, + "step": 328220 + }, + { + "epoch": 0.6630453665808813, + "grad_norm": 9.681346893310547, + "learning_rate": 3.1843943085187527e-06, + "loss": 16.3971, + "step": 328230 + }, + { + "epoch": 0.6630655672135651, + "grad_norm": 297.8474426269531, + "learning_rate": 3.1840690736516166e-06, + "loss": 9.6413, + "step": 328240 + }, + { + "epoch": 0.663085767846249, + "grad_norm": 424.9536437988281, + "learning_rate": 3.183743847635109e-06, + "loss": 17.2958, + "step": 328250 + }, + { + "epoch": 0.6631059684789328, + "grad_norm": 70.27574920654297, + "learning_rate": 3.1834186304708126e-06, + "loss": 17.2863, + "step": 328260 + }, + { + "epoch": 0.6631261691116166, + "grad_norm": 121.83522033691406, + "learning_rate": 3.183093422160314e-06, + "loss": 14.4273, + "step": 328270 + }, + { + "epoch": 0.6631463697443004, + "grad_norm": 342.9933776855469, + "learning_rate": 3.182768222705198e-06, + "loss": 12.4681, + "step": 328280 + }, + { + "epoch": 0.6631665703769842, + "grad_norm": 213.04074096679688, + "learning_rate": 3.182443032107049e-06, + "loss": 19.0667, + "step": 328290 + }, + { + "epoch": 0.663186771009668, + "grad_norm": 223.21646118164062, + "learning_rate": 3.1821178503674515e-06, + "loss": 11.2374, + "step": 328300 + }, + { + "epoch": 0.6632069716423519, + "grad_norm": 373.4936218261719, + "learning_rate": 3.1817926774879903e-06, + "loss": 28.1364, + "step": 328310 + }, + { + "epoch": 0.6632271722750357, + "grad_norm": 426.1025695800781, + "learning_rate": 3.1814675134702534e-06, + "loss": 19.7539, + "step": 328320 + }, + { + "epoch": 0.6632473729077195, + "grad_norm": 459.76727294921875, + "learning_rate": 3.181142358315822e-06, + "loss": 20.8025, + "step": 328330 + }, + { + "epoch": 0.6632675735404033, + "grad_norm": 893.7747192382812, + "learning_rate": 3.1808172120262824e-06, + "loss": 31.0751, + "step": 328340 + }, + { + "epoch": 0.6632877741730872, + "grad_norm": 176.30690002441406, + "learning_rate": 3.1804920746032197e-06, + "loss": 23.8882, + "step": 328350 + }, + { + "epoch": 0.6633079748057709, + "grad_norm": 355.8739929199219, + "learning_rate": 3.1801669460482176e-06, + "loss": 33.8059, + "step": 328360 + }, + { + "epoch": 0.6633281754384547, + "grad_norm": 730.5296630859375, + "learning_rate": 3.1798418263628595e-06, + "loss": 22.4636, + "step": 328370 + }, + { + "epoch": 0.6633483760711385, + "grad_norm": 305.6308288574219, + "learning_rate": 3.179516715548735e-06, + "loss": 33.3801, + "step": 328380 + }, + { + "epoch": 0.6633685767038223, + "grad_norm": 306.0620422363281, + "learning_rate": 3.179191613607422e-06, + "loss": 10.5533, + "step": 328390 + }, + { + "epoch": 0.6633887773365061, + "grad_norm": 284.91986083984375, + "learning_rate": 3.178866520540509e-06, + "loss": 24.745, + "step": 328400 + }, + { + "epoch": 0.66340897796919, + "grad_norm": 65.59046173095703, + "learning_rate": 3.1785414363495808e-06, + "loss": 17.17, + "step": 328410 + }, + { + "epoch": 0.6634291786018738, + "grad_norm": 260.3446350097656, + "learning_rate": 3.17821636103622e-06, + "loss": 16.0564, + "step": 328420 + }, + { + "epoch": 0.6634493792345576, + "grad_norm": 270.76068115234375, + "learning_rate": 3.1778912946020114e-06, + "loss": 33.7841, + "step": 328430 + }, + { + "epoch": 0.6634695798672414, + "grad_norm": 580.8342895507812, + "learning_rate": 3.1775662370485406e-06, + "loss": 22.612, + "step": 328440 + }, + { + "epoch": 0.6634897804999252, + "grad_norm": 399.6605529785156, + "learning_rate": 3.17724118837739e-06, + "loss": 19.6052, + "step": 328450 + }, + { + "epoch": 0.6635099811326091, + "grad_norm": 189.4546661376953, + "learning_rate": 3.1769161485901445e-06, + "loss": 13.8748, + "step": 328460 + }, + { + "epoch": 0.6635301817652929, + "grad_norm": 482.87615966796875, + "learning_rate": 3.176591117688389e-06, + "loss": 14.321, + "step": 328470 + }, + { + "epoch": 0.6635503823979767, + "grad_norm": 473.2443542480469, + "learning_rate": 3.176266095673708e-06, + "loss": 10.8053, + "step": 328480 + }, + { + "epoch": 0.6635705830306605, + "grad_norm": 369.6527404785156, + "learning_rate": 3.175941082547684e-06, + "loss": 27.7805, + "step": 328490 + }, + { + "epoch": 0.6635907836633443, + "grad_norm": 400.4463806152344, + "learning_rate": 3.1756160783119015e-06, + "loss": 20.2652, + "step": 328500 + }, + { + "epoch": 0.6636109842960282, + "grad_norm": 126.27311706542969, + "learning_rate": 3.175291082967947e-06, + "loss": 28.937, + "step": 328510 + }, + { + "epoch": 0.663631184928712, + "grad_norm": 10.93095874786377, + "learning_rate": 3.1749660965174007e-06, + "loss": 15.2746, + "step": 328520 + }, + { + "epoch": 0.6636513855613958, + "grad_norm": 949.2610473632812, + "learning_rate": 3.1746411189618478e-06, + "loss": 15.0331, + "step": 328530 + }, + { + "epoch": 0.6636715861940796, + "grad_norm": 179.00579833984375, + "learning_rate": 3.174316150302875e-06, + "loss": 25.1306, + "step": 328540 + }, + { + "epoch": 0.6636917868267634, + "grad_norm": 293.65496826171875, + "learning_rate": 3.1739911905420617e-06, + "loss": 25.5459, + "step": 328550 + }, + { + "epoch": 0.6637119874594473, + "grad_norm": 533.2905883789062, + "learning_rate": 3.1736662396809936e-06, + "loss": 15.8173, + "step": 328560 + }, + { + "epoch": 0.6637321880921311, + "grad_norm": 880.5447387695312, + "learning_rate": 3.173341297721257e-06, + "loss": 12.0572, + "step": 328570 + }, + { + "epoch": 0.6637523887248149, + "grad_norm": 255.260009765625, + "learning_rate": 3.1730163646644317e-06, + "loss": 19.1106, + "step": 328580 + }, + { + "epoch": 0.6637725893574987, + "grad_norm": 169.76583862304688, + "learning_rate": 3.1726914405121034e-06, + "loss": 7.0164, + "step": 328590 + }, + { + "epoch": 0.6637927899901825, + "grad_norm": 329.3076477050781, + "learning_rate": 3.1723665252658564e-06, + "loss": 12.133, + "step": 328600 + }, + { + "epoch": 0.6638129906228664, + "grad_norm": 1094.4727783203125, + "learning_rate": 3.172041618927272e-06, + "loss": 24.7534, + "step": 328610 + }, + { + "epoch": 0.6638331912555501, + "grad_norm": 408.4696044921875, + "learning_rate": 3.171716721497934e-06, + "loss": 23.1637, + "step": 328620 + }, + { + "epoch": 0.6638533918882339, + "grad_norm": 706.7051391601562, + "learning_rate": 3.171391832979428e-06, + "loss": 20.8366, + "step": 328630 + }, + { + "epoch": 0.6638735925209177, + "grad_norm": 414.6630554199219, + "learning_rate": 3.171066953373338e-06, + "loss": 16.9479, + "step": 328640 + }, + { + "epoch": 0.6638937931536015, + "grad_norm": 1162.2161865234375, + "learning_rate": 3.170742082681244e-06, + "loss": 21.0609, + "step": 328650 + }, + { + "epoch": 0.6639139937862854, + "grad_norm": 615.6329956054688, + "learning_rate": 3.1704172209047324e-06, + "loss": 10.0619, + "step": 328660 + }, + { + "epoch": 0.6639341944189692, + "grad_norm": 697.70703125, + "learning_rate": 3.1700923680453855e-06, + "loss": 19.9357, + "step": 328670 + }, + { + "epoch": 0.663954395051653, + "grad_norm": 610.2066040039062, + "learning_rate": 3.1697675241047852e-06, + "loss": 17.9655, + "step": 328680 + }, + { + "epoch": 0.6639745956843368, + "grad_norm": 363.06414794921875, + "learning_rate": 3.1694426890845155e-06, + "loss": 15.7865, + "step": 328690 + }, + { + "epoch": 0.6639947963170206, + "grad_norm": 415.978759765625, + "learning_rate": 3.169117862986163e-06, + "loss": 25.984, + "step": 328700 + }, + { + "epoch": 0.6640149969497045, + "grad_norm": 528.6858520507812, + "learning_rate": 3.168793045811305e-06, + "loss": 19.4374, + "step": 328710 + }, + { + "epoch": 0.6640351975823883, + "grad_norm": 336.0189514160156, + "learning_rate": 3.1684682375615283e-06, + "loss": 12.8563, + "step": 328720 + }, + { + "epoch": 0.6640553982150721, + "grad_norm": 494.2802734375, + "learning_rate": 3.168143438238417e-06, + "loss": 29.0306, + "step": 328730 + }, + { + "epoch": 0.6640755988477559, + "grad_norm": 487.3626403808594, + "learning_rate": 3.1678186478435508e-06, + "loss": 13.4801, + "step": 328740 + }, + { + "epoch": 0.6640957994804397, + "grad_norm": 750.1293334960938, + "learning_rate": 3.167493866378514e-06, + "loss": 11.5802, + "step": 328750 + }, + { + "epoch": 0.6641160001131236, + "grad_norm": 341.471435546875, + "learning_rate": 3.1671690938448895e-06, + "loss": 15.909, + "step": 328760 + }, + { + "epoch": 0.6641362007458074, + "grad_norm": 532.2777099609375, + "learning_rate": 3.166844330244263e-06, + "loss": 24.0081, + "step": 328770 + }, + { + "epoch": 0.6641564013784912, + "grad_norm": 184.27723693847656, + "learning_rate": 3.166519575578213e-06, + "loss": 15.1475, + "step": 328780 + }, + { + "epoch": 0.664176602011175, + "grad_norm": 480.2397766113281, + "learning_rate": 3.1661948298483243e-06, + "loss": 12.8562, + "step": 328790 + }, + { + "epoch": 0.6641968026438588, + "grad_norm": 394.6187438964844, + "learning_rate": 3.16587009305618e-06, + "loss": 20.0084, + "step": 328800 + }, + { + "epoch": 0.6642170032765427, + "grad_norm": 279.446533203125, + "learning_rate": 3.165545365203363e-06, + "loss": 12.7865, + "step": 328810 + }, + { + "epoch": 0.6642372039092265, + "grad_norm": 415.18682861328125, + "learning_rate": 3.1652206462914542e-06, + "loss": 21.5043, + "step": 328820 + }, + { + "epoch": 0.6642574045419103, + "grad_norm": 271.2012634277344, + "learning_rate": 3.164895936322039e-06, + "loss": 19.6453, + "step": 328830 + }, + { + "epoch": 0.6642776051745941, + "grad_norm": 502.14935302734375, + "learning_rate": 3.1645712352966967e-06, + "loss": 21.6256, + "step": 328840 + }, + { + "epoch": 0.6642978058072779, + "grad_norm": 263.9889831542969, + "learning_rate": 3.164246543217011e-06, + "loss": 20.0408, + "step": 328850 + }, + { + "epoch": 0.6643180064399618, + "grad_norm": 60.78538131713867, + "learning_rate": 3.1639218600845673e-06, + "loss": 5.9968, + "step": 328860 + }, + { + "epoch": 0.6643382070726456, + "grad_norm": 582.1119384765625, + "learning_rate": 3.1635971859009444e-06, + "loss": 13.539, + "step": 328870 + }, + { + "epoch": 0.6643584077053293, + "grad_norm": 556.9094848632812, + "learning_rate": 3.1632725206677264e-06, + "loss": 12.4881, + "step": 328880 + }, + { + "epoch": 0.6643786083380131, + "grad_norm": 218.09471130371094, + "learning_rate": 3.1629478643864963e-06, + "loss": 15.9212, + "step": 328890 + }, + { + "epoch": 0.6643988089706969, + "grad_norm": 414.8309020996094, + "learning_rate": 3.1626232170588343e-06, + "loss": 22.3698, + "step": 328900 + }, + { + "epoch": 0.6644190096033807, + "grad_norm": 553.9437255859375, + "learning_rate": 3.1622985786863236e-06, + "loss": 29.8867, + "step": 328910 + }, + { + "epoch": 0.6644392102360646, + "grad_norm": 58.52224349975586, + "learning_rate": 3.1619739492705464e-06, + "loss": 9.5943, + "step": 328920 + }, + { + "epoch": 0.6644594108687484, + "grad_norm": 156.68765258789062, + "learning_rate": 3.1616493288130866e-06, + "loss": 9.0621, + "step": 328930 + }, + { + "epoch": 0.6644796115014322, + "grad_norm": 456.7655944824219, + "learning_rate": 3.1613247173155247e-06, + "loss": 17.5479, + "step": 328940 + }, + { + "epoch": 0.664499812134116, + "grad_norm": 321.54046630859375, + "learning_rate": 3.161000114779443e-06, + "loss": 12.8307, + "step": 328950 + }, + { + "epoch": 0.6645200127667998, + "grad_norm": 551.7155151367188, + "learning_rate": 3.1606755212064246e-06, + "loss": 16.9916, + "step": 328960 + }, + { + "epoch": 0.6645402133994837, + "grad_norm": 280.09429931640625, + "learning_rate": 3.1603509365980495e-06, + "loss": 8.5942, + "step": 328970 + }, + { + "epoch": 0.6645604140321675, + "grad_norm": 338.11456298828125, + "learning_rate": 3.1600263609559005e-06, + "loss": 10.2786, + "step": 328980 + }, + { + "epoch": 0.6645806146648513, + "grad_norm": 212.71983337402344, + "learning_rate": 3.159701794281561e-06, + "loss": 14.1564, + "step": 328990 + }, + { + "epoch": 0.6646008152975351, + "grad_norm": 646.0335693359375, + "learning_rate": 3.1593772365766107e-06, + "loss": 14.0678, + "step": 329000 + }, + { + "epoch": 0.6646210159302189, + "grad_norm": 187.28985595703125, + "learning_rate": 3.1590526878426326e-06, + "loss": 24.6466, + "step": 329010 + }, + { + "epoch": 0.6646412165629028, + "grad_norm": 370.43438720703125, + "learning_rate": 3.1587281480812093e-06, + "loss": 15.3338, + "step": 329020 + }, + { + "epoch": 0.6646614171955866, + "grad_norm": 360.96368408203125, + "learning_rate": 3.1584036172939213e-06, + "loss": 29.5299, + "step": 329030 + }, + { + "epoch": 0.6646816178282704, + "grad_norm": 200.5292205810547, + "learning_rate": 3.1580790954823505e-06, + "loss": 9.6293, + "step": 329040 + }, + { + "epoch": 0.6647018184609542, + "grad_norm": 532.2564697265625, + "learning_rate": 3.157754582648079e-06, + "loss": 21.1889, + "step": 329050 + }, + { + "epoch": 0.664722019093638, + "grad_norm": 564.5127563476562, + "learning_rate": 3.1574300787926883e-06, + "loss": 14.2916, + "step": 329060 + }, + { + "epoch": 0.6647422197263219, + "grad_norm": 379.1608581542969, + "learning_rate": 3.1571055839177583e-06, + "loss": 25.0758, + "step": 329070 + }, + { + "epoch": 0.6647624203590057, + "grad_norm": 337.03515625, + "learning_rate": 3.156781098024874e-06, + "loss": 14.498, + "step": 329080 + }, + { + "epoch": 0.6647826209916895, + "grad_norm": 65.75376892089844, + "learning_rate": 3.156456621115615e-06, + "loss": 15.2741, + "step": 329090 + }, + { + "epoch": 0.6648028216243733, + "grad_norm": 137.52867126464844, + "learning_rate": 3.1561321531915622e-06, + "loss": 17.7733, + "step": 329100 + }, + { + "epoch": 0.6648230222570571, + "grad_norm": 20.54436492919922, + "learning_rate": 3.155807694254298e-06, + "loss": 22.4343, + "step": 329110 + }, + { + "epoch": 0.664843222889741, + "grad_norm": 733.7474365234375, + "learning_rate": 3.155483244305404e-06, + "loss": 21.4831, + "step": 329120 + }, + { + "epoch": 0.6648634235224247, + "grad_norm": 144.43870544433594, + "learning_rate": 3.15515880334646e-06, + "loss": 25.4423, + "step": 329130 + }, + { + "epoch": 0.6648836241551085, + "grad_norm": 480.5118713378906, + "learning_rate": 3.1548343713790474e-06, + "loss": 20.144, + "step": 329140 + }, + { + "epoch": 0.6649038247877923, + "grad_norm": 49.870323181152344, + "learning_rate": 3.1545099484047514e-06, + "loss": 7.7324, + "step": 329150 + }, + { + "epoch": 0.6649240254204761, + "grad_norm": 91.82637023925781, + "learning_rate": 3.154185534425147e-06, + "loss": 19.8717, + "step": 329160 + }, + { + "epoch": 0.66494422605316, + "grad_norm": 1162.68798828125, + "learning_rate": 3.153861129441819e-06, + "loss": 24.6791, + "step": 329170 + }, + { + "epoch": 0.6649644266858438, + "grad_norm": 262.5423889160156, + "learning_rate": 3.1535367334563493e-06, + "loss": 11.2788, + "step": 329180 + }, + { + "epoch": 0.6649846273185276, + "grad_norm": 433.8036193847656, + "learning_rate": 3.153212346470317e-06, + "loss": 18.7376, + "step": 329190 + }, + { + "epoch": 0.6650048279512114, + "grad_norm": 267.2512512207031, + "learning_rate": 3.152887968485303e-06, + "loss": 15.4686, + "step": 329200 + }, + { + "epoch": 0.6650250285838952, + "grad_norm": 940.6142578125, + "learning_rate": 3.1525635995028884e-06, + "loss": 16.464, + "step": 329210 + }, + { + "epoch": 0.665045229216579, + "grad_norm": 304.4906005859375, + "learning_rate": 3.1522392395246584e-06, + "loss": 13.4658, + "step": 329220 + }, + { + "epoch": 0.6650654298492629, + "grad_norm": 397.871337890625, + "learning_rate": 3.151914888552186e-06, + "loss": 17.0634, + "step": 329230 + }, + { + "epoch": 0.6650856304819467, + "grad_norm": 334.5021667480469, + "learning_rate": 3.1515905465870576e-06, + "loss": 18.8055, + "step": 329240 + }, + { + "epoch": 0.6651058311146305, + "grad_norm": 124.223876953125, + "learning_rate": 3.151266213630854e-06, + "loss": 19.8596, + "step": 329250 + }, + { + "epoch": 0.6651260317473143, + "grad_norm": 611.5267333984375, + "learning_rate": 3.150941889685154e-06, + "loss": 24.7954, + "step": 329260 + }, + { + "epoch": 0.6651462323799981, + "grad_norm": 458.2458801269531, + "learning_rate": 3.1506175747515384e-06, + "loss": 18.2301, + "step": 329270 + }, + { + "epoch": 0.665166433012682, + "grad_norm": 694.9378662109375, + "learning_rate": 3.1502932688315897e-06, + "loss": 22.8646, + "step": 329280 + }, + { + "epoch": 0.6651866336453658, + "grad_norm": 414.5040283203125, + "learning_rate": 3.1499689719268854e-06, + "loss": 20.0289, + "step": 329290 + }, + { + "epoch": 0.6652068342780496, + "grad_norm": 443.5479736328125, + "learning_rate": 3.149644684039008e-06, + "loss": 22.4756, + "step": 329300 + }, + { + "epoch": 0.6652270349107334, + "grad_norm": 325.9361877441406, + "learning_rate": 3.1493204051695407e-06, + "loss": 12.9403, + "step": 329310 + }, + { + "epoch": 0.6652472355434172, + "grad_norm": 743.5194091796875, + "learning_rate": 3.148996135320058e-06, + "loss": 34.9779, + "step": 329320 + }, + { + "epoch": 0.6652674361761011, + "grad_norm": 560.8590698242188, + "learning_rate": 3.148671874492145e-06, + "loss": 26.4579, + "step": 329330 + }, + { + "epoch": 0.6652876368087849, + "grad_norm": 336.4598693847656, + "learning_rate": 3.1483476226873822e-06, + "loss": 15.5427, + "step": 329340 + }, + { + "epoch": 0.6653078374414687, + "grad_norm": 311.89337158203125, + "learning_rate": 3.1480233799073467e-06, + "loss": 17.8434, + "step": 329350 + }, + { + "epoch": 0.6653280380741525, + "grad_norm": 153.28713989257812, + "learning_rate": 3.147699146153621e-06, + "loss": 11.3492, + "step": 329360 + }, + { + "epoch": 0.6653482387068363, + "grad_norm": 156.05831909179688, + "learning_rate": 3.147374921427784e-06, + "loss": 10.0524, + "step": 329370 + }, + { + "epoch": 0.6653684393395202, + "grad_norm": 197.22079467773438, + "learning_rate": 3.14705070573142e-06, + "loss": 11.2639, + "step": 329380 + }, + { + "epoch": 0.6653886399722039, + "grad_norm": 464.6989440917969, + "learning_rate": 3.146726499066103e-06, + "loss": 20.3292, + "step": 329390 + }, + { + "epoch": 0.6654088406048877, + "grad_norm": 846.1508178710938, + "learning_rate": 3.1464023014334164e-06, + "loss": 19.1258, + "step": 329400 + }, + { + "epoch": 0.6654290412375715, + "grad_norm": 165.53643798828125, + "learning_rate": 3.146078112834943e-06, + "loss": 18.3069, + "step": 329410 + }, + { + "epoch": 0.6654492418702553, + "grad_norm": 402.226806640625, + "learning_rate": 3.1457539332722577e-06, + "loss": 21.9273, + "step": 329420 + }, + { + "epoch": 0.6654694425029392, + "grad_norm": 496.2322692871094, + "learning_rate": 3.145429762746943e-06, + "loss": 24.0402, + "step": 329430 + }, + { + "epoch": 0.665489643135623, + "grad_norm": 171.00973510742188, + "learning_rate": 3.1451056012605796e-06, + "loss": 11.35, + "step": 329440 + }, + { + "epoch": 0.6655098437683068, + "grad_norm": 595.9259643554688, + "learning_rate": 3.144781448814746e-06, + "loss": 22.6688, + "step": 329450 + }, + { + "epoch": 0.6655300444009906, + "grad_norm": 568.1805419921875, + "learning_rate": 3.1444573054110216e-06, + "loss": 23.0682, + "step": 329460 + }, + { + "epoch": 0.6655502450336744, + "grad_norm": 398.0702819824219, + "learning_rate": 3.14413317105099e-06, + "loss": 36.9149, + "step": 329470 + }, + { + "epoch": 0.6655704456663583, + "grad_norm": 359.6332702636719, + "learning_rate": 3.1438090457362253e-06, + "loss": 22.6934, + "step": 329480 + }, + { + "epoch": 0.6655906462990421, + "grad_norm": 732.9029541015625, + "learning_rate": 3.1434849294683113e-06, + "loss": 15.2932, + "step": 329490 + }, + { + "epoch": 0.6656108469317259, + "grad_norm": 238.02476501464844, + "learning_rate": 3.1431608222488276e-06, + "loss": 19.7181, + "step": 329500 + }, + { + "epoch": 0.6656310475644097, + "grad_norm": 98.36227416992188, + "learning_rate": 3.1428367240793513e-06, + "loss": 20.1967, + "step": 329510 + }, + { + "epoch": 0.6656512481970935, + "grad_norm": 402.2587585449219, + "learning_rate": 3.1425126349614636e-06, + "loss": 18.2152, + "step": 329520 + }, + { + "epoch": 0.6656714488297774, + "grad_norm": 712.5654296875, + "learning_rate": 3.1421885548967436e-06, + "loss": 22.9547, + "step": 329530 + }, + { + "epoch": 0.6656916494624612, + "grad_norm": 271.1004943847656, + "learning_rate": 3.141864483886774e-06, + "loss": 12.3942, + "step": 329540 + }, + { + "epoch": 0.665711850095145, + "grad_norm": 75.97625732421875, + "learning_rate": 3.1415404219331287e-06, + "loss": 19.4859, + "step": 329550 + }, + { + "epoch": 0.6657320507278288, + "grad_norm": 0.4789038896560669, + "learning_rate": 3.141216369037391e-06, + "loss": 12.5291, + "step": 329560 + }, + { + "epoch": 0.6657522513605126, + "grad_norm": 166.28794860839844, + "learning_rate": 3.14089232520114e-06, + "loss": 15.3067, + "step": 329570 + }, + { + "epoch": 0.6657724519931965, + "grad_norm": 132.44432067871094, + "learning_rate": 3.1405682904259534e-06, + "loss": 27.2135, + "step": 329580 + }, + { + "epoch": 0.6657926526258803, + "grad_norm": 301.5104675292969, + "learning_rate": 3.1402442647134115e-06, + "loss": 21.5171, + "step": 329590 + }, + { + "epoch": 0.6658128532585641, + "grad_norm": 234.42132568359375, + "learning_rate": 3.139920248065095e-06, + "loss": 21.408, + "step": 329600 + }, + { + "epoch": 0.6658330538912479, + "grad_norm": 349.06353759765625, + "learning_rate": 3.1395962404825787e-06, + "loss": 20.4292, + "step": 329610 + }, + { + "epoch": 0.6658532545239317, + "grad_norm": 97.02141571044922, + "learning_rate": 3.139272241967446e-06, + "loss": 11.4214, + "step": 329620 + }, + { + "epoch": 0.6658734551566156, + "grad_norm": 326.3496398925781, + "learning_rate": 3.1389482525212753e-06, + "loss": 16.2651, + "step": 329630 + }, + { + "epoch": 0.6658936557892993, + "grad_norm": 231.70472717285156, + "learning_rate": 3.138624272145645e-06, + "loss": 14.6593, + "step": 329640 + }, + { + "epoch": 0.6659138564219831, + "grad_norm": 5.015622615814209, + "learning_rate": 3.1383003008421336e-06, + "loss": 25.0485, + "step": 329650 + }, + { + "epoch": 0.6659340570546669, + "grad_norm": 598.1875, + "learning_rate": 3.137976338612322e-06, + "loss": 11.656, + "step": 329660 + }, + { + "epoch": 0.6659542576873507, + "grad_norm": 453.4358215332031, + "learning_rate": 3.1376523854577866e-06, + "loss": 18.5764, + "step": 329670 + }, + { + "epoch": 0.6659744583200345, + "grad_norm": 262.5253601074219, + "learning_rate": 3.1373284413801075e-06, + "loss": 22.8158, + "step": 329680 + }, + { + "epoch": 0.6659946589527184, + "grad_norm": 299.25579833984375, + "learning_rate": 3.137004506380864e-06, + "loss": 15.7987, + "step": 329690 + }, + { + "epoch": 0.6660148595854022, + "grad_norm": 766.4141235351562, + "learning_rate": 3.1366805804616353e-06, + "loss": 29.9571, + "step": 329700 + }, + { + "epoch": 0.666035060218086, + "grad_norm": 148.3333740234375, + "learning_rate": 3.1363566636239983e-06, + "loss": 32.8396, + "step": 329710 + }, + { + "epoch": 0.6660552608507698, + "grad_norm": 270.7835998535156, + "learning_rate": 3.1360327558695336e-06, + "loss": 10.3951, + "step": 329720 + }, + { + "epoch": 0.6660754614834536, + "grad_norm": 445.0180969238281, + "learning_rate": 3.1357088571998203e-06, + "loss": 12.224, + "step": 329730 + }, + { + "epoch": 0.6660956621161375, + "grad_norm": 342.1134948730469, + "learning_rate": 3.1353849676164344e-06, + "loss": 17.4202, + "step": 329740 + }, + { + "epoch": 0.6661158627488213, + "grad_norm": 517.21435546875, + "learning_rate": 3.1350610871209553e-06, + "loss": 26.8176, + "step": 329750 + }, + { + "epoch": 0.6661360633815051, + "grad_norm": 182.5183563232422, + "learning_rate": 3.1347372157149647e-06, + "loss": 15.5527, + "step": 329760 + }, + { + "epoch": 0.6661562640141889, + "grad_norm": 373.9974365234375, + "learning_rate": 3.1344133534000364e-06, + "loss": 16.5057, + "step": 329770 + }, + { + "epoch": 0.6661764646468727, + "grad_norm": 532.1063842773438, + "learning_rate": 3.1340895001777518e-06, + "loss": 21.1695, + "step": 329780 + }, + { + "epoch": 0.6661966652795566, + "grad_norm": 397.7397155761719, + "learning_rate": 3.13376565604969e-06, + "loss": 33.9767, + "step": 329790 + }, + { + "epoch": 0.6662168659122404, + "grad_norm": 538.232421875, + "learning_rate": 3.1334418210174268e-06, + "loss": 19.266, + "step": 329800 + }, + { + "epoch": 0.6662370665449242, + "grad_norm": 487.9904479980469, + "learning_rate": 3.1331179950825415e-06, + "loss": 15.1283, + "step": 329810 + }, + { + "epoch": 0.666257267177608, + "grad_norm": 740.597412109375, + "learning_rate": 3.132794178246612e-06, + "loss": 23.8183, + "step": 329820 + }, + { + "epoch": 0.6662774678102918, + "grad_norm": 196.72503662109375, + "learning_rate": 3.1324703705112203e-06, + "loss": 14.3614, + "step": 329830 + }, + { + "epoch": 0.6662976684429757, + "grad_norm": 298.3857116699219, + "learning_rate": 3.1321465718779375e-06, + "loss": 16.3721, + "step": 329840 + }, + { + "epoch": 0.6663178690756595, + "grad_norm": 271.487548828125, + "learning_rate": 3.1318227823483483e-06, + "loss": 34.8018, + "step": 329850 + }, + { + "epoch": 0.6663380697083433, + "grad_norm": 263.2033996582031, + "learning_rate": 3.1314990019240283e-06, + "loss": 12.5965, + "step": 329860 + }, + { + "epoch": 0.6663582703410271, + "grad_norm": 636.3667602539062, + "learning_rate": 3.1311752306065547e-06, + "loss": 14.2167, + "step": 329870 + }, + { + "epoch": 0.666378470973711, + "grad_norm": 199.5261993408203, + "learning_rate": 3.1308514683975067e-06, + "loss": 28.0261, + "step": 329880 + }, + { + "epoch": 0.6663986716063948, + "grad_norm": 336.9678649902344, + "learning_rate": 3.1305277152984624e-06, + "loss": 19.1767, + "step": 329890 + }, + { + "epoch": 0.6664188722390785, + "grad_norm": 571.2289428710938, + "learning_rate": 3.130203971310999e-06, + "loss": 32.7854, + "step": 329900 + }, + { + "epoch": 0.6664390728717623, + "grad_norm": 508.0943603515625, + "learning_rate": 3.129880236436693e-06, + "loss": 19.3444, + "step": 329910 + }, + { + "epoch": 0.6664592735044461, + "grad_norm": 184.9727325439453, + "learning_rate": 3.1295565106771275e-06, + "loss": 17.249, + "step": 329920 + }, + { + "epoch": 0.6664794741371299, + "grad_norm": 423.1129150390625, + "learning_rate": 3.129232794033873e-06, + "loss": 17.9122, + "step": 329930 + }, + { + "epoch": 0.6664996747698138, + "grad_norm": 498.97930908203125, + "learning_rate": 3.1289090865085124e-06, + "loss": 16.3645, + "step": 329940 + }, + { + "epoch": 0.6665198754024976, + "grad_norm": 365.0946044921875, + "learning_rate": 3.128585388102623e-06, + "loss": 21.9605, + "step": 329950 + }, + { + "epoch": 0.6665400760351814, + "grad_norm": 587.8406982421875, + "learning_rate": 3.1282616988177806e-06, + "loss": 35.0797, + "step": 329960 + }, + { + "epoch": 0.6665602766678652, + "grad_norm": 378.7322082519531, + "learning_rate": 3.127938018655563e-06, + "loss": 24.5784, + "step": 329970 + }, + { + "epoch": 0.666580477300549, + "grad_norm": 382.29925537109375, + "learning_rate": 3.127614347617548e-06, + "loss": 9.5781, + "step": 329980 + }, + { + "epoch": 0.6666006779332329, + "grad_norm": 717.412109375, + "learning_rate": 3.1272906857053164e-06, + "loss": 22.2062, + "step": 329990 + }, + { + "epoch": 0.6666208785659167, + "grad_norm": 229.9202423095703, + "learning_rate": 3.12696703292044e-06, + "loss": 17.7849, + "step": 330000 + }, + { + "epoch": 0.6666410791986005, + "grad_norm": 2.28291916847229, + "learning_rate": 3.1266433892645e-06, + "loss": 12.6165, + "step": 330010 + }, + { + "epoch": 0.6666612798312843, + "grad_norm": 1385.194091796875, + "learning_rate": 3.126319754739074e-06, + "loss": 33.7563, + "step": 330020 + }, + { + "epoch": 0.6666814804639681, + "grad_norm": 512.7279663085938, + "learning_rate": 3.1259961293457373e-06, + "loss": 22.3427, + "step": 330030 + }, + { + "epoch": 0.666701681096652, + "grad_norm": 364.13262939453125, + "learning_rate": 3.1256725130860676e-06, + "loss": 15.5721, + "step": 330040 + }, + { + "epoch": 0.6667218817293358, + "grad_norm": 336.09173583984375, + "learning_rate": 3.1253489059616448e-06, + "loss": 12.7987, + "step": 330050 + }, + { + "epoch": 0.6667420823620196, + "grad_norm": 549.6753540039062, + "learning_rate": 3.1250253079740423e-06, + "loss": 13.6582, + "step": 330060 + }, + { + "epoch": 0.6667622829947034, + "grad_norm": 97.47869110107422, + "learning_rate": 3.124701719124838e-06, + "loss": 19.1391, + "step": 330070 + }, + { + "epoch": 0.6667824836273872, + "grad_norm": 358.6037292480469, + "learning_rate": 3.1243781394156138e-06, + "loss": 14.0605, + "step": 330080 + }, + { + "epoch": 0.666802684260071, + "grad_norm": 398.0151062011719, + "learning_rate": 3.1240545688479395e-06, + "loss": 22.144, + "step": 330090 + }, + { + "epoch": 0.6668228848927549, + "grad_norm": 106.11761474609375, + "learning_rate": 3.1237310074233964e-06, + "loss": 15.5378, + "step": 330100 + }, + { + "epoch": 0.6668430855254387, + "grad_norm": 579.0884399414062, + "learning_rate": 3.1234074551435624e-06, + "loss": 16.5559, + "step": 330110 + }, + { + "epoch": 0.6668632861581225, + "grad_norm": 381.1731262207031, + "learning_rate": 3.1230839120100114e-06, + "loss": 31.6331, + "step": 330120 + }, + { + "epoch": 0.6668834867908063, + "grad_norm": 507.9053039550781, + "learning_rate": 3.122760378024322e-06, + "loss": 17.4301, + "step": 330130 + }, + { + "epoch": 0.6669036874234902, + "grad_norm": 492.3564147949219, + "learning_rate": 3.12243685318807e-06, + "loss": 16.9787, + "step": 330140 + }, + { + "epoch": 0.666923888056174, + "grad_norm": 503.4457092285156, + "learning_rate": 3.1221133375028357e-06, + "loss": 17.3873, + "step": 330150 + }, + { + "epoch": 0.6669440886888577, + "grad_norm": 192.4401397705078, + "learning_rate": 3.1217898309701903e-06, + "loss": 40.3832, + "step": 330160 + }, + { + "epoch": 0.6669642893215415, + "grad_norm": 1312.8914794921875, + "learning_rate": 3.121466333591715e-06, + "loss": 20.2297, + "step": 330170 + }, + { + "epoch": 0.6669844899542253, + "grad_norm": 299.81024169921875, + "learning_rate": 3.1211428453689853e-06, + "loss": 20.149, + "step": 330180 + }, + { + "epoch": 0.6670046905869091, + "grad_norm": 115.20481872558594, + "learning_rate": 3.120819366303577e-06, + "loss": 32.3739, + "step": 330190 + }, + { + "epoch": 0.667024891219593, + "grad_norm": 260.2235412597656, + "learning_rate": 3.1204958963970666e-06, + "loss": 17.1079, + "step": 330200 + }, + { + "epoch": 0.6670450918522768, + "grad_norm": 383.4471740722656, + "learning_rate": 3.1201724356510328e-06, + "loss": 18.2828, + "step": 330210 + }, + { + "epoch": 0.6670652924849606, + "grad_norm": 612.00830078125, + "learning_rate": 3.1198489840670494e-06, + "loss": 17.7996, + "step": 330220 + }, + { + "epoch": 0.6670854931176444, + "grad_norm": 296.54345703125, + "learning_rate": 3.119525541646693e-06, + "loss": 9.9124, + "step": 330230 + }, + { + "epoch": 0.6671056937503282, + "grad_norm": 484.1433410644531, + "learning_rate": 3.1192021083915424e-06, + "loss": 19.5186, + "step": 330240 + }, + { + "epoch": 0.6671258943830121, + "grad_norm": 278.9736633300781, + "learning_rate": 3.1188786843031727e-06, + "loss": 23.9681, + "step": 330250 + }, + { + "epoch": 0.6671460950156959, + "grad_norm": 433.03204345703125, + "learning_rate": 3.1185552693831595e-06, + "loss": 20.3783, + "step": 330260 + }, + { + "epoch": 0.6671662956483797, + "grad_norm": 453.2152099609375, + "learning_rate": 3.1182318636330798e-06, + "loss": 18.2361, + "step": 330270 + }, + { + "epoch": 0.6671864962810635, + "grad_norm": 39.3110466003418, + "learning_rate": 3.1179084670545114e-06, + "loss": 12.2447, + "step": 330280 + }, + { + "epoch": 0.6672066969137473, + "grad_norm": 106.91869354248047, + "learning_rate": 3.117585079649026e-06, + "loss": 17.7015, + "step": 330290 + }, + { + "epoch": 0.6672268975464312, + "grad_norm": 397.95843505859375, + "learning_rate": 3.117261701418204e-06, + "loss": 23.5151, + "step": 330300 + }, + { + "epoch": 0.667247098179115, + "grad_norm": 127.22858428955078, + "learning_rate": 3.1169383323636205e-06, + "loss": 6.6119, + "step": 330310 + }, + { + "epoch": 0.6672672988117988, + "grad_norm": 248.42555236816406, + "learning_rate": 3.116614972486851e-06, + "loss": 17.023, + "step": 330320 + }, + { + "epoch": 0.6672874994444826, + "grad_norm": 526.100341796875, + "learning_rate": 3.1162916217894713e-06, + "loss": 22.9472, + "step": 330330 + }, + { + "epoch": 0.6673077000771664, + "grad_norm": 299.6604919433594, + "learning_rate": 3.115968280273059e-06, + "loss": 26.6155, + "step": 330340 + }, + { + "epoch": 0.6673279007098503, + "grad_norm": 172.5897674560547, + "learning_rate": 3.1156449479391876e-06, + "loss": 18.7303, + "step": 330350 + }, + { + "epoch": 0.6673481013425341, + "grad_norm": 111.12664794921875, + "learning_rate": 3.115321624789433e-06, + "loss": 11.1953, + "step": 330360 + }, + { + "epoch": 0.6673683019752179, + "grad_norm": 416.7669372558594, + "learning_rate": 3.1149983108253754e-06, + "loss": 11.8979, + "step": 330370 + }, + { + "epoch": 0.6673885026079017, + "grad_norm": 191.52410888671875, + "learning_rate": 3.1146750060485847e-06, + "loss": 17.6313, + "step": 330380 + }, + { + "epoch": 0.6674087032405855, + "grad_norm": 23.746089935302734, + "learning_rate": 3.1143517104606404e-06, + "loss": 14.1874, + "step": 330390 + }, + { + "epoch": 0.6674289038732694, + "grad_norm": 284.149658203125, + "learning_rate": 3.114028424063118e-06, + "loss": 11.372, + "step": 330400 + }, + { + "epoch": 0.6674491045059531, + "grad_norm": 512.7546997070312, + "learning_rate": 3.1137051468575905e-06, + "loss": 13.7233, + "step": 330410 + }, + { + "epoch": 0.6674693051386369, + "grad_norm": 625.3117065429688, + "learning_rate": 3.113381878845636e-06, + "loss": 15.1766, + "step": 330420 + }, + { + "epoch": 0.6674895057713207, + "grad_norm": 174.7843780517578, + "learning_rate": 3.113058620028829e-06, + "loss": 18.5447, + "step": 330430 + }, + { + "epoch": 0.6675097064040045, + "grad_norm": 718.687255859375, + "learning_rate": 3.1127353704087477e-06, + "loss": 19.0231, + "step": 330440 + }, + { + "epoch": 0.6675299070366884, + "grad_norm": 50.9957389831543, + "learning_rate": 3.1124121299869627e-06, + "loss": 21.3549, + "step": 330450 + }, + { + "epoch": 0.6675501076693722, + "grad_norm": 693.8781127929688, + "learning_rate": 3.112088898765052e-06, + "loss": 22.8268, + "step": 330460 + }, + { + "epoch": 0.667570308302056, + "grad_norm": 347.0184631347656, + "learning_rate": 3.1117656767445936e-06, + "loss": 22.9062, + "step": 330470 + }, + { + "epoch": 0.6675905089347398, + "grad_norm": 299.36224365234375, + "learning_rate": 3.1114424639271584e-06, + "loss": 14.3362, + "step": 330480 + }, + { + "epoch": 0.6676107095674236, + "grad_norm": 12.645113945007324, + "learning_rate": 3.1111192603143235e-06, + "loss": 11.6925, + "step": 330490 + }, + { + "epoch": 0.6676309102001075, + "grad_norm": 472.9318542480469, + "learning_rate": 3.110796065907665e-06, + "loss": 21.9861, + "step": 330500 + }, + { + "epoch": 0.6676511108327913, + "grad_norm": 542.18212890625, + "learning_rate": 3.110472880708757e-06, + "loss": 17.1566, + "step": 330510 + }, + { + "epoch": 0.6676713114654751, + "grad_norm": 24.699552536010742, + "learning_rate": 3.110149704719174e-06, + "loss": 13.7014, + "step": 330520 + }, + { + "epoch": 0.6676915120981589, + "grad_norm": 77.46388244628906, + "learning_rate": 3.1098265379404953e-06, + "loss": 16.0435, + "step": 330530 + }, + { + "epoch": 0.6677117127308427, + "grad_norm": 839.5580444335938, + "learning_rate": 3.109503380374289e-06, + "loss": 21.5353, + "step": 330540 + }, + { + "epoch": 0.6677319133635266, + "grad_norm": 429.29266357421875, + "learning_rate": 3.109180232022135e-06, + "loss": 21.3935, + "step": 330550 + }, + { + "epoch": 0.6677521139962104, + "grad_norm": 262.7479553222656, + "learning_rate": 3.1088570928856087e-06, + "loss": 12.7966, + "step": 330560 + }, + { + "epoch": 0.6677723146288942, + "grad_norm": 150.66383361816406, + "learning_rate": 3.1085339629662826e-06, + "loss": 31.2936, + "step": 330570 + }, + { + "epoch": 0.667792515261578, + "grad_norm": 726.124755859375, + "learning_rate": 3.1082108422657328e-06, + "loss": 17.9742, + "step": 330580 + }, + { + "epoch": 0.6678127158942618, + "grad_norm": 610.7399291992188, + "learning_rate": 3.1078877307855325e-06, + "loss": 8.6843, + "step": 330590 + }, + { + "epoch": 0.6678329165269457, + "grad_norm": 124.41638946533203, + "learning_rate": 3.1075646285272608e-06, + "loss": 27.1409, + "step": 330600 + }, + { + "epoch": 0.6678531171596295, + "grad_norm": 326.7791442871094, + "learning_rate": 3.1072415354924866e-06, + "loss": 21.5234, + "step": 330610 + }, + { + "epoch": 0.6678733177923133, + "grad_norm": 294.0032958984375, + "learning_rate": 3.106918451682789e-06, + "loss": 18.8087, + "step": 330620 + }, + { + "epoch": 0.6678935184249971, + "grad_norm": 319.2794189453125, + "learning_rate": 3.1065953770997416e-06, + "loss": 21.2712, + "step": 330630 + }, + { + "epoch": 0.6679137190576809, + "grad_norm": 379.7613220214844, + "learning_rate": 3.106272311744918e-06, + "loss": 11.9827, + "step": 330640 + }, + { + "epoch": 0.6679339196903648, + "grad_norm": 259.8501892089844, + "learning_rate": 3.1059492556198933e-06, + "loss": 30.4546, + "step": 330650 + }, + { + "epoch": 0.6679541203230486, + "grad_norm": 118.22823333740234, + "learning_rate": 3.1056262087262432e-06, + "loss": 17.333, + "step": 330660 + }, + { + "epoch": 0.6679743209557323, + "grad_norm": 58.96928787231445, + "learning_rate": 3.105303171065541e-06, + "loss": 22.3445, + "step": 330670 + }, + { + "epoch": 0.6679945215884161, + "grad_norm": 283.99456787109375, + "learning_rate": 3.10498014263936e-06, + "loss": 15.5069, + "step": 330680 + }, + { + "epoch": 0.6680147222210999, + "grad_norm": 157.41123962402344, + "learning_rate": 3.1046571234492782e-06, + "loss": 14.8355, + "step": 330690 + }, + { + "epoch": 0.6680349228537837, + "grad_norm": 30.65309715270996, + "learning_rate": 3.1043341134968653e-06, + "loss": 24.6806, + "step": 330700 + }, + { + "epoch": 0.6680551234864676, + "grad_norm": 165.40370178222656, + "learning_rate": 3.1040111127836994e-06, + "loss": 35.1521, + "step": 330710 + }, + { + "epoch": 0.6680753241191514, + "grad_norm": 443.48193359375, + "learning_rate": 3.1036881213113533e-06, + "loss": 20.7113, + "step": 330720 + }, + { + "epoch": 0.6680955247518352, + "grad_norm": 614.755859375, + "learning_rate": 3.1033651390814012e-06, + "loss": 13.9008, + "step": 330730 + }, + { + "epoch": 0.668115725384519, + "grad_norm": 424.2988586425781, + "learning_rate": 3.103042166095417e-06, + "loss": 19.4053, + "step": 330740 + }, + { + "epoch": 0.6681359260172028, + "grad_norm": 177.65480041503906, + "learning_rate": 3.102719202354974e-06, + "loss": 13.7094, + "step": 330750 + }, + { + "epoch": 0.6681561266498867, + "grad_norm": 1192.040283203125, + "learning_rate": 3.102396247861651e-06, + "loss": 30.5522, + "step": 330760 + }, + { + "epoch": 0.6681763272825705, + "grad_norm": 656.6998901367188, + "learning_rate": 3.102073302617015e-06, + "loss": 13.8427, + "step": 330770 + }, + { + "epoch": 0.6681965279152543, + "grad_norm": 4368.40283203125, + "learning_rate": 3.101750366622645e-06, + "loss": 25.8686, + "step": 330780 + }, + { + "epoch": 0.6682167285479381, + "grad_norm": 581.3051147460938, + "learning_rate": 3.101427439880115e-06, + "loss": 17.9275, + "step": 330790 + }, + { + "epoch": 0.6682369291806219, + "grad_norm": 462.6530456542969, + "learning_rate": 3.1011045223909954e-06, + "loss": 21.2542, + "step": 330800 + }, + { + "epoch": 0.6682571298133058, + "grad_norm": 409.5375061035156, + "learning_rate": 3.1007816141568625e-06, + "loss": 14.8786, + "step": 330810 + }, + { + "epoch": 0.6682773304459896, + "grad_norm": 633.002685546875, + "learning_rate": 3.1004587151792903e-06, + "loss": 27.8254, + "step": 330820 + }, + { + "epoch": 0.6682975310786734, + "grad_norm": 141.9521942138672, + "learning_rate": 3.1001358254598512e-06, + "loss": 11.3634, + "step": 330830 + }, + { + "epoch": 0.6683177317113572, + "grad_norm": 553.1273193359375, + "learning_rate": 3.0998129450001184e-06, + "loss": 14.1767, + "step": 330840 + }, + { + "epoch": 0.668337932344041, + "grad_norm": 347.21759033203125, + "learning_rate": 3.0994900738016693e-06, + "loss": 15.1919, + "step": 330850 + }, + { + "epoch": 0.6683581329767249, + "grad_norm": 453.7741394042969, + "learning_rate": 3.099167211866074e-06, + "loss": 13.2673, + "step": 330860 + }, + { + "epoch": 0.6683783336094087, + "grad_norm": 77.05499267578125, + "learning_rate": 3.098844359194907e-06, + "loss": 11.8338, + "step": 330870 + }, + { + "epoch": 0.6683985342420925, + "grad_norm": 314.4021911621094, + "learning_rate": 3.098521515789742e-06, + "loss": 13.9856, + "step": 330880 + }, + { + "epoch": 0.6684187348747763, + "grad_norm": 332.9153747558594, + "learning_rate": 3.0981986816521536e-06, + "loss": 21.6265, + "step": 330890 + }, + { + "epoch": 0.6684389355074601, + "grad_norm": 773.8599853515625, + "learning_rate": 3.097875856783713e-06, + "loss": 25.3767, + "step": 330900 + }, + { + "epoch": 0.668459136140144, + "grad_norm": 431.6402587890625, + "learning_rate": 3.097553041185993e-06, + "loss": 16.8906, + "step": 330910 + }, + { + "epoch": 0.6684793367728277, + "grad_norm": 448.1880798339844, + "learning_rate": 3.097230234860572e-06, + "loss": 13.0612, + "step": 330920 + }, + { + "epoch": 0.6684995374055115, + "grad_norm": 48.33029556274414, + "learning_rate": 3.096907437809019e-06, + "loss": 16.4912, + "step": 330930 + }, + { + "epoch": 0.6685197380381953, + "grad_norm": 537.0992431640625, + "learning_rate": 3.096584650032908e-06, + "loss": 29.7366, + "step": 330940 + }, + { + "epoch": 0.6685399386708791, + "grad_norm": 524.2745971679688, + "learning_rate": 3.0962618715338135e-06, + "loss": 16.0425, + "step": 330950 + }, + { + "epoch": 0.668560139303563, + "grad_norm": 417.84759521484375, + "learning_rate": 3.095939102313307e-06, + "loss": 19.2592, + "step": 330960 + }, + { + "epoch": 0.6685803399362468, + "grad_norm": 320.2450866699219, + "learning_rate": 3.0956163423729624e-06, + "loss": 19.275, + "step": 330970 + }, + { + "epoch": 0.6686005405689306, + "grad_norm": 183.8946990966797, + "learning_rate": 3.0952935917143533e-06, + "loss": 17.7646, + "step": 330980 + }, + { + "epoch": 0.6686207412016144, + "grad_norm": 433.9994812011719, + "learning_rate": 3.0949708503390507e-06, + "loss": 14.775, + "step": 330990 + }, + { + "epoch": 0.6686409418342982, + "grad_norm": 441.6136474609375, + "learning_rate": 3.09464811824863e-06, + "loss": 11.5845, + "step": 331000 + }, + { + "epoch": 0.668661142466982, + "grad_norm": 179.2563018798828, + "learning_rate": 3.094325395444664e-06, + "loss": 10.134, + "step": 331010 + }, + { + "epoch": 0.6686813430996659, + "grad_norm": 332.4448547363281, + "learning_rate": 3.094002681928724e-06, + "loss": 18.0252, + "step": 331020 + }, + { + "epoch": 0.6687015437323497, + "grad_norm": 576.9011840820312, + "learning_rate": 3.093679977702384e-06, + "loss": 25.92, + "step": 331030 + }, + { + "epoch": 0.6687217443650335, + "grad_norm": 361.36859130859375, + "learning_rate": 3.0933572827672155e-06, + "loss": 22.8571, + "step": 331040 + }, + { + "epoch": 0.6687419449977173, + "grad_norm": 227.5767364501953, + "learning_rate": 3.093034597124795e-06, + "loss": 15.8042, + "step": 331050 + }, + { + "epoch": 0.6687621456304012, + "grad_norm": 70.67207336425781, + "learning_rate": 3.09271192077669e-06, + "loss": 12.0711, + "step": 331060 + }, + { + "epoch": 0.668782346263085, + "grad_norm": 440.76043701171875, + "learning_rate": 3.092389253724476e-06, + "loss": 10.1043, + "step": 331070 + }, + { + "epoch": 0.6688025468957688, + "grad_norm": 102.35005187988281, + "learning_rate": 3.092066595969727e-06, + "loss": 16.9162, + "step": 331080 + }, + { + "epoch": 0.6688227475284526, + "grad_norm": 297.5807800292969, + "learning_rate": 3.0917439475140133e-06, + "loss": 16.2803, + "step": 331090 + }, + { + "epoch": 0.6688429481611364, + "grad_norm": 481.68914794921875, + "learning_rate": 3.0914213083589086e-06, + "loss": 23.1711, + "step": 331100 + }, + { + "epoch": 0.6688631487938203, + "grad_norm": 483.2679138183594, + "learning_rate": 3.091098678505985e-06, + "loss": 12.9829, + "step": 331110 + }, + { + "epoch": 0.6688833494265041, + "grad_norm": 443.21087646484375, + "learning_rate": 3.0907760579568135e-06, + "loss": 27.5186, + "step": 331120 + }, + { + "epoch": 0.6689035500591879, + "grad_norm": 659.8768920898438, + "learning_rate": 3.0904534467129677e-06, + "loss": 12.9115, + "step": 331130 + }, + { + "epoch": 0.6689237506918717, + "grad_norm": 319.8518981933594, + "learning_rate": 3.0901308447760236e-06, + "loss": 9.9042, + "step": 331140 + }, + { + "epoch": 0.6689439513245555, + "grad_norm": 123.06887817382812, + "learning_rate": 3.0898082521475463e-06, + "loss": 16.7963, + "step": 331150 + }, + { + "epoch": 0.6689641519572394, + "grad_norm": 1182.05517578125, + "learning_rate": 3.089485668829113e-06, + "loss": 31.6144, + "step": 331160 + }, + { + "epoch": 0.6689843525899232, + "grad_norm": 556.3887939453125, + "learning_rate": 3.089163094822296e-06, + "loss": 26.4783, + "step": 331170 + }, + { + "epoch": 0.6690045532226069, + "grad_norm": 955.7506103515625, + "learning_rate": 3.0888405301286662e-06, + "loss": 24.3341, + "step": 331180 + }, + { + "epoch": 0.6690247538552907, + "grad_norm": 48.68580627441406, + "learning_rate": 3.088517974749795e-06, + "loss": 7.7547, + "step": 331190 + }, + { + "epoch": 0.6690449544879745, + "grad_norm": 229.7146759033203, + "learning_rate": 3.088195428687254e-06, + "loss": 20.9266, + "step": 331200 + }, + { + "epoch": 0.6690651551206583, + "grad_norm": 264.4624328613281, + "learning_rate": 3.0878728919426203e-06, + "loss": 12.4706, + "step": 331210 + }, + { + "epoch": 0.6690853557533422, + "grad_norm": 216.6519012451172, + "learning_rate": 3.0875503645174586e-06, + "loss": 19.0743, + "step": 331220 + }, + { + "epoch": 0.669105556386026, + "grad_norm": 612.282470703125, + "learning_rate": 3.087227846413346e-06, + "loss": 18.8406, + "step": 331230 + }, + { + "epoch": 0.6691257570187098, + "grad_norm": 406.8621826171875, + "learning_rate": 3.0869053376318538e-06, + "loss": 20.439, + "step": 331240 + }, + { + "epoch": 0.6691459576513936, + "grad_norm": 436.4408874511719, + "learning_rate": 3.0865828381745515e-06, + "loss": 21.0851, + "step": 331250 + }, + { + "epoch": 0.6691661582840774, + "grad_norm": 467.5116882324219, + "learning_rate": 3.086260348043013e-06, + "loss": 20.9692, + "step": 331260 + }, + { + "epoch": 0.6691863589167613, + "grad_norm": 320.90179443359375, + "learning_rate": 3.08593786723881e-06, + "loss": 16.047, + "step": 331270 + }, + { + "epoch": 0.6692065595494451, + "grad_norm": 607.719970703125, + "learning_rate": 3.0856153957635127e-06, + "loss": 12.6264, + "step": 331280 + }, + { + "epoch": 0.6692267601821289, + "grad_norm": 124.06726837158203, + "learning_rate": 3.085292933618693e-06, + "loss": 8.4945, + "step": 331290 + }, + { + "epoch": 0.6692469608148127, + "grad_norm": 204.34312438964844, + "learning_rate": 3.0849704808059266e-06, + "loss": 22.4943, + "step": 331300 + }, + { + "epoch": 0.6692671614474965, + "grad_norm": 419.5154724121094, + "learning_rate": 3.0846480373267783e-06, + "loss": 43.9789, + "step": 331310 + }, + { + "epoch": 0.6692873620801804, + "grad_norm": 347.163818359375, + "learning_rate": 3.0843256031828245e-06, + "loss": 10.7923, + "step": 331320 + }, + { + "epoch": 0.6693075627128642, + "grad_norm": 793.1201171875, + "learning_rate": 3.0840031783756353e-06, + "loss": 26.9269, + "step": 331330 + }, + { + "epoch": 0.669327763345548, + "grad_norm": 370.4996337890625, + "learning_rate": 3.0836807629067828e-06, + "loss": 13.3176, + "step": 331340 + }, + { + "epoch": 0.6693479639782318, + "grad_norm": 635.1002197265625, + "learning_rate": 3.083358356777837e-06, + "loss": 22.2128, + "step": 331350 + }, + { + "epoch": 0.6693681646109156, + "grad_norm": 196.98524475097656, + "learning_rate": 3.083035959990369e-06, + "loss": 14.1499, + "step": 331360 + }, + { + "epoch": 0.6693883652435995, + "grad_norm": 283.0307922363281, + "learning_rate": 3.082713572545955e-06, + "loss": 16.3014, + "step": 331370 + }, + { + "epoch": 0.6694085658762833, + "grad_norm": 211.803466796875, + "learning_rate": 3.0823911944461593e-06, + "loss": 15.4895, + "step": 331380 + }, + { + "epoch": 0.6694287665089671, + "grad_norm": 195.05145263671875, + "learning_rate": 3.082068825692557e-06, + "loss": 17.9763, + "step": 331390 + }, + { + "epoch": 0.6694489671416509, + "grad_norm": 380.41046142578125, + "learning_rate": 3.0817464662867192e-06, + "loss": 20.3055, + "step": 331400 + }, + { + "epoch": 0.6694691677743347, + "grad_norm": 318.0858459472656, + "learning_rate": 3.081424116230216e-06, + "loss": 30.3429, + "step": 331410 + }, + { + "epoch": 0.6694893684070186, + "grad_norm": 263.2286376953125, + "learning_rate": 3.0811017755246185e-06, + "loss": 14.8603, + "step": 331420 + }, + { + "epoch": 0.6695095690397023, + "grad_norm": 24.916217803955078, + "learning_rate": 3.0807794441715e-06, + "loss": 16.4783, + "step": 331430 + }, + { + "epoch": 0.6695297696723861, + "grad_norm": 263.9822998046875, + "learning_rate": 3.080457122172429e-06, + "loss": 12.776, + "step": 331440 + }, + { + "epoch": 0.6695499703050699, + "grad_norm": 38.8775634765625, + "learning_rate": 3.080134809528975e-06, + "loss": 22.0455, + "step": 331450 + }, + { + "epoch": 0.6695701709377537, + "grad_norm": 0.0, + "learning_rate": 3.0798125062427143e-06, + "loss": 17.708, + "step": 331460 + }, + { + "epoch": 0.6695903715704375, + "grad_norm": 308.3204345703125, + "learning_rate": 3.079490212315212e-06, + "loss": 24.2277, + "step": 331470 + }, + { + "epoch": 0.6696105722031214, + "grad_norm": 341.6029968261719, + "learning_rate": 3.0791679277480422e-06, + "loss": 11.5234, + "step": 331480 + }, + { + "epoch": 0.6696307728358052, + "grad_norm": 613.443359375, + "learning_rate": 3.078845652542775e-06, + "loss": 21.7781, + "step": 331490 + }, + { + "epoch": 0.669650973468489, + "grad_norm": 363.759521484375, + "learning_rate": 3.078523386700982e-06, + "loss": 23.5264, + "step": 331500 + }, + { + "epoch": 0.6696711741011728, + "grad_norm": 635.1989135742188, + "learning_rate": 3.0782011302242326e-06, + "loss": 16.1766, + "step": 331510 + }, + { + "epoch": 0.6696913747338566, + "grad_norm": 473.2365417480469, + "learning_rate": 3.077878883114096e-06, + "loss": 16.9013, + "step": 331520 + }, + { + "epoch": 0.6697115753665405, + "grad_norm": 412.7676696777344, + "learning_rate": 3.0775566453721475e-06, + "loss": 25.0393, + "step": 331530 + }, + { + "epoch": 0.6697317759992243, + "grad_norm": 669.5061645507812, + "learning_rate": 3.077234416999953e-06, + "loss": 12.7492, + "step": 331540 + }, + { + "epoch": 0.6697519766319081, + "grad_norm": 325.734130859375, + "learning_rate": 3.0769121979990845e-06, + "loss": 21.3628, + "step": 331550 + }, + { + "epoch": 0.6697721772645919, + "grad_norm": 888.1439819335938, + "learning_rate": 3.0765899883711148e-06, + "loss": 32.886, + "step": 331560 + }, + { + "epoch": 0.6697923778972757, + "grad_norm": 1865.0050048828125, + "learning_rate": 3.0762677881176102e-06, + "loss": 29.264, + "step": 331570 + }, + { + "epoch": 0.6698125785299596, + "grad_norm": 265.69354248046875, + "learning_rate": 3.075945597240143e-06, + "loss": 23.1557, + "step": 331580 + }, + { + "epoch": 0.6698327791626434, + "grad_norm": 323.1497497558594, + "learning_rate": 3.0756234157402854e-06, + "loss": 24.5348, + "step": 331590 + }, + { + "epoch": 0.6698529797953272, + "grad_norm": 156.175048828125, + "learning_rate": 3.0753012436196033e-06, + "loss": 31.3367, + "step": 331600 + }, + { + "epoch": 0.669873180428011, + "grad_norm": 362.2181396484375, + "learning_rate": 3.074979080879671e-06, + "loss": 9.6042, + "step": 331610 + }, + { + "epoch": 0.6698933810606948, + "grad_norm": 336.6182556152344, + "learning_rate": 3.074656927522058e-06, + "loss": 27.9608, + "step": 331620 + }, + { + "epoch": 0.6699135816933787, + "grad_norm": 389.6551208496094, + "learning_rate": 3.0743347835483323e-06, + "loss": 30.4426, + "step": 331630 + }, + { + "epoch": 0.6699337823260625, + "grad_norm": 272.8407897949219, + "learning_rate": 3.074012648960065e-06, + "loss": 18.8419, + "step": 331640 + }, + { + "epoch": 0.6699539829587463, + "grad_norm": 282.4333190917969, + "learning_rate": 3.0736905237588275e-06, + "loss": 32.4853, + "step": 331650 + }, + { + "epoch": 0.6699741835914301, + "grad_norm": 279.2618103027344, + "learning_rate": 3.073368407946189e-06, + "loss": 15.6446, + "step": 331660 + }, + { + "epoch": 0.669994384224114, + "grad_norm": 47.570289611816406, + "learning_rate": 3.0730463015237177e-06, + "loss": 14.2794, + "step": 331670 + }, + { + "epoch": 0.6700145848567978, + "grad_norm": 164.76727294921875, + "learning_rate": 3.072724204492985e-06, + "loss": 10.9514, + "step": 331680 + }, + { + "epoch": 0.6700347854894815, + "grad_norm": 283.4651184082031, + "learning_rate": 3.072402116855563e-06, + "loss": 16.4175, + "step": 331690 + }, + { + "epoch": 0.6700549861221653, + "grad_norm": 248.5535125732422, + "learning_rate": 3.0720800386130176e-06, + "loss": 19.2702, + "step": 331700 + }, + { + "epoch": 0.6700751867548491, + "grad_norm": 86.50074768066406, + "learning_rate": 3.071757969766921e-06, + "loss": 18.4815, + "step": 331710 + }, + { + "epoch": 0.6700953873875329, + "grad_norm": 660.4617919921875, + "learning_rate": 3.0714359103188422e-06, + "loss": 14.7241, + "step": 331720 + }, + { + "epoch": 0.6701155880202168, + "grad_norm": 408.3411560058594, + "learning_rate": 3.0711138602703505e-06, + "loss": 16.9852, + "step": 331730 + }, + { + "epoch": 0.6701357886529006, + "grad_norm": 438.41009521484375, + "learning_rate": 3.0707918196230147e-06, + "loss": 11.7692, + "step": 331740 + }, + { + "epoch": 0.6701559892855844, + "grad_norm": 213.214599609375, + "learning_rate": 3.0704697883784083e-06, + "loss": 19.0089, + "step": 331750 + }, + { + "epoch": 0.6701761899182682, + "grad_norm": 576.30908203125, + "learning_rate": 3.0701477665380958e-06, + "loss": 17.0343, + "step": 331760 + }, + { + "epoch": 0.670196390550952, + "grad_norm": 226.357177734375, + "learning_rate": 3.069825754103649e-06, + "loss": 17.4609, + "step": 331770 + }, + { + "epoch": 0.6702165911836359, + "grad_norm": 279.99755859375, + "learning_rate": 3.0695037510766384e-06, + "loss": 24.8922, + "step": 331780 + }, + { + "epoch": 0.6702367918163197, + "grad_norm": 625.1746215820312, + "learning_rate": 3.069181757458633e-06, + "loss": 32.1062, + "step": 331790 + }, + { + "epoch": 0.6702569924490035, + "grad_norm": 982.3031005859375, + "learning_rate": 3.0688597732512004e-06, + "loss": 27.7022, + "step": 331800 + }, + { + "epoch": 0.6702771930816873, + "grad_norm": 530.5897216796875, + "learning_rate": 3.0685377984559104e-06, + "loss": 24.4026, + "step": 331810 + }, + { + "epoch": 0.6702973937143711, + "grad_norm": 169.71556091308594, + "learning_rate": 3.0682158330743363e-06, + "loss": 19.3734, + "step": 331820 + }, + { + "epoch": 0.670317594347055, + "grad_norm": 54.75619888305664, + "learning_rate": 3.0678938771080395e-06, + "loss": 25.9983, + "step": 331830 + }, + { + "epoch": 0.6703377949797388, + "grad_norm": 375.5094909667969, + "learning_rate": 3.067571930558596e-06, + "loss": 36.7743, + "step": 331840 + }, + { + "epoch": 0.6703579956124226, + "grad_norm": 181.7646942138672, + "learning_rate": 3.0672499934275725e-06, + "loss": 19.5657, + "step": 331850 + }, + { + "epoch": 0.6703781962451064, + "grad_norm": 11.570462226867676, + "learning_rate": 3.066928065716538e-06, + "loss": 16.3424, + "step": 331860 + }, + { + "epoch": 0.6703983968777902, + "grad_norm": 289.7773742675781, + "learning_rate": 3.066606147427061e-06, + "loss": 18.3625, + "step": 331870 + }, + { + "epoch": 0.6704185975104741, + "grad_norm": 357.0284423828125, + "learning_rate": 3.066284238560713e-06, + "loss": 13.1519, + "step": 331880 + }, + { + "epoch": 0.6704387981431579, + "grad_norm": 285.1661682128906, + "learning_rate": 3.0659623391190583e-06, + "loss": 20.7197, + "step": 331890 + }, + { + "epoch": 0.6704589987758417, + "grad_norm": 229.07870483398438, + "learning_rate": 3.0656404491036696e-06, + "loss": 14.8636, + "step": 331900 + }, + { + "epoch": 0.6704791994085255, + "grad_norm": 759.066650390625, + "learning_rate": 3.0653185685161168e-06, + "loss": 24.2076, + "step": 331910 + }, + { + "epoch": 0.6704994000412093, + "grad_norm": 167.57725524902344, + "learning_rate": 3.064996697357964e-06, + "loss": 13.3535, + "step": 331920 + }, + { + "epoch": 0.6705196006738932, + "grad_norm": 0.0030020507983863354, + "learning_rate": 3.0646748356307833e-06, + "loss": 23.629, + "step": 331930 + }, + { + "epoch": 0.670539801306577, + "grad_norm": 274.72418212890625, + "learning_rate": 3.0643529833361425e-06, + "loss": 9.825, + "step": 331940 + }, + { + "epoch": 0.6705600019392607, + "grad_norm": 442.8713073730469, + "learning_rate": 3.0640311404756116e-06, + "loss": 30.5478, + "step": 331950 + }, + { + "epoch": 0.6705802025719445, + "grad_norm": 575.1695556640625, + "learning_rate": 3.063709307050757e-06, + "loss": 8.4817, + "step": 331960 + }, + { + "epoch": 0.6706004032046283, + "grad_norm": 643.443359375, + "learning_rate": 3.063387483063148e-06, + "loss": 18.8981, + "step": 331970 + }, + { + "epoch": 0.6706206038373121, + "grad_norm": 670.76611328125, + "learning_rate": 3.063065668514357e-06, + "loss": 36.5922, + "step": 331980 + }, + { + "epoch": 0.670640804469996, + "grad_norm": 71.35189056396484, + "learning_rate": 3.0627438634059447e-06, + "loss": 19.2127, + "step": 331990 + }, + { + "epoch": 0.6706610051026798, + "grad_norm": 427.9266662597656, + "learning_rate": 3.0624220677394854e-06, + "loss": 16.5064, + "step": 332000 + }, + { + "epoch": 0.6706812057353636, + "grad_norm": 226.159912109375, + "learning_rate": 3.062100281516547e-06, + "loss": 24.8843, + "step": 332010 + }, + { + "epoch": 0.6707014063680474, + "grad_norm": 345.7786865234375, + "learning_rate": 3.0617785047386953e-06, + "loss": 13.7342, + "step": 332020 + }, + { + "epoch": 0.6707216070007312, + "grad_norm": 342.26702880859375, + "learning_rate": 3.0614567374075007e-06, + "loss": 11.6023, + "step": 332030 + }, + { + "epoch": 0.6707418076334151, + "grad_norm": 441.6045227050781, + "learning_rate": 3.0611349795245317e-06, + "loss": 26.8497, + "step": 332040 + }, + { + "epoch": 0.6707620082660989, + "grad_norm": 272.1783142089844, + "learning_rate": 3.060813231091354e-06, + "loss": 8.2914, + "step": 332050 + }, + { + "epoch": 0.6707822088987827, + "grad_norm": 0.0, + "learning_rate": 3.0604914921095373e-06, + "loss": 12.8405, + "step": 332060 + }, + { + "epoch": 0.6708024095314665, + "grad_norm": 621.2687377929688, + "learning_rate": 3.0601697625806527e-06, + "loss": 29.513, + "step": 332070 + }, + { + "epoch": 0.6708226101641503, + "grad_norm": 573.689208984375, + "learning_rate": 3.0598480425062626e-06, + "loss": 13.719, + "step": 332080 + }, + { + "epoch": 0.6708428107968342, + "grad_norm": 245.5880889892578, + "learning_rate": 3.0595263318879383e-06, + "loss": 23.5509, + "step": 332090 + }, + { + "epoch": 0.670863011429518, + "grad_norm": 203.6055145263672, + "learning_rate": 3.059204630727247e-06, + "loss": 18.965, + "step": 332100 + }, + { + "epoch": 0.6708832120622018, + "grad_norm": 151.3707275390625, + "learning_rate": 3.0588829390257592e-06, + "loss": 16.137, + "step": 332110 + }, + { + "epoch": 0.6709034126948856, + "grad_norm": 243.07208251953125, + "learning_rate": 3.0585612567850385e-06, + "loss": 15.4447, + "step": 332120 + }, + { + "epoch": 0.6709236133275694, + "grad_norm": 303.47564697265625, + "learning_rate": 3.0582395840066537e-06, + "loss": 11.0508, + "step": 332130 + }, + { + "epoch": 0.6709438139602533, + "grad_norm": 203.67666625976562, + "learning_rate": 3.0579179206921773e-06, + "loss": 25.1175, + "step": 332140 + }, + { + "epoch": 0.6709640145929371, + "grad_norm": 463.525146484375, + "learning_rate": 3.0575962668431704e-06, + "loss": 46.7824, + "step": 332150 + }, + { + "epoch": 0.6709842152256209, + "grad_norm": 507.78851318359375, + "learning_rate": 3.057274622461204e-06, + "loss": 16.6115, + "step": 332160 + }, + { + "epoch": 0.6710044158583047, + "grad_norm": 252.3871307373047, + "learning_rate": 3.0569529875478465e-06, + "loss": 10.5707, + "step": 332170 + }, + { + "epoch": 0.6710246164909885, + "grad_norm": 123.0634994506836, + "learning_rate": 3.0566313621046638e-06, + "loss": 18.2667, + "step": 332180 + }, + { + "epoch": 0.6710448171236724, + "grad_norm": 213.22824096679688, + "learning_rate": 3.0563097461332246e-06, + "loss": 11.4551, + "step": 332190 + }, + { + "epoch": 0.6710650177563561, + "grad_norm": 216.26004028320312, + "learning_rate": 3.0559881396350967e-06, + "loss": 35.535, + "step": 332200 + }, + { + "epoch": 0.6710852183890399, + "grad_norm": 514.7017211914062, + "learning_rate": 3.0556665426118456e-06, + "loss": 32.0472, + "step": 332210 + }, + { + "epoch": 0.6711054190217237, + "grad_norm": 693.4927978515625, + "learning_rate": 3.055344955065039e-06, + "loss": 19.8245, + "step": 332220 + }, + { + "epoch": 0.6711256196544075, + "grad_norm": 648.6301879882812, + "learning_rate": 3.0550233769962468e-06, + "loss": 16.481, + "step": 332230 + }, + { + "epoch": 0.6711458202870914, + "grad_norm": 439.83880615234375, + "learning_rate": 3.0547018084070344e-06, + "loss": 17.4814, + "step": 332240 + }, + { + "epoch": 0.6711660209197752, + "grad_norm": 284.37274169921875, + "learning_rate": 3.0543802492989693e-06, + "loss": 21.2819, + "step": 332250 + }, + { + "epoch": 0.671186221552459, + "grad_norm": 294.681884765625, + "learning_rate": 3.054058699673619e-06, + "loss": 8.6201, + "step": 332260 + }, + { + "epoch": 0.6712064221851428, + "grad_norm": 124.22969055175781, + "learning_rate": 3.0537371595325515e-06, + "loss": 13.3757, + "step": 332270 + }, + { + "epoch": 0.6712266228178266, + "grad_norm": 557.1804809570312, + "learning_rate": 3.0534156288773307e-06, + "loss": 13.7903, + "step": 332280 + }, + { + "epoch": 0.6712468234505105, + "grad_norm": 172.43777465820312, + "learning_rate": 3.0530941077095273e-06, + "loss": 26.861, + "step": 332290 + }, + { + "epoch": 0.6712670240831943, + "grad_norm": 580.1685180664062, + "learning_rate": 3.0527725960307083e-06, + "loss": 13.5221, + "step": 332300 + }, + { + "epoch": 0.6712872247158781, + "grad_norm": 460.0882873535156, + "learning_rate": 3.0524510938424377e-06, + "loss": 16.4842, + "step": 332310 + }, + { + "epoch": 0.6713074253485619, + "grad_norm": 408.4290466308594, + "learning_rate": 3.0521296011462852e-06, + "loss": 15.9181, + "step": 332320 + }, + { + "epoch": 0.6713276259812457, + "grad_norm": 355.74078369140625, + "learning_rate": 3.0518081179438173e-06, + "loss": 8.2587, + "step": 332330 + }, + { + "epoch": 0.6713478266139296, + "grad_norm": 231.6913299560547, + "learning_rate": 3.0514866442365998e-06, + "loss": 25.9216, + "step": 332340 + }, + { + "epoch": 0.6713680272466134, + "grad_norm": 223.23443603515625, + "learning_rate": 3.0511651800261987e-06, + "loss": 21.761, + "step": 332350 + }, + { + "epoch": 0.6713882278792972, + "grad_norm": 80.02017211914062, + "learning_rate": 3.0508437253141855e-06, + "loss": 12.4434, + "step": 332360 + }, + { + "epoch": 0.671408428511981, + "grad_norm": 394.69403076171875, + "learning_rate": 3.0505222801021193e-06, + "loss": 21.9772, + "step": 332370 + }, + { + "epoch": 0.6714286291446648, + "grad_norm": 557.94873046875, + "learning_rate": 3.0502008443915733e-06, + "loss": 17.7909, + "step": 332380 + }, + { + "epoch": 0.6714488297773487, + "grad_norm": 954.2001953125, + "learning_rate": 3.049879418184112e-06, + "loss": 23.7708, + "step": 332390 + }, + { + "epoch": 0.6714690304100325, + "grad_norm": 360.5732727050781, + "learning_rate": 3.049558001481302e-06, + "loss": 21.7766, + "step": 332400 + }, + { + "epoch": 0.6714892310427163, + "grad_norm": 448.75384521484375, + "learning_rate": 3.0492365942847097e-06, + "loss": 22.361, + "step": 332410 + }, + { + "epoch": 0.6715094316754001, + "grad_norm": 48.64217758178711, + "learning_rate": 3.0489151965958998e-06, + "loss": 18.3156, + "step": 332420 + }, + { + "epoch": 0.6715296323080839, + "grad_norm": 142.99681091308594, + "learning_rate": 3.0485938084164445e-06, + "loss": 29.1174, + "step": 332430 + }, + { + "epoch": 0.6715498329407678, + "grad_norm": 340.4880676269531, + "learning_rate": 3.048272429747903e-06, + "loss": 17.4341, + "step": 332440 + }, + { + "epoch": 0.6715700335734516, + "grad_norm": 271.4729309082031, + "learning_rate": 3.047951060591845e-06, + "loss": 18.4208, + "step": 332450 + }, + { + "epoch": 0.6715902342061353, + "grad_norm": 582.4696044921875, + "learning_rate": 3.047629700949839e-06, + "loss": 15.2522, + "step": 332460 + }, + { + "epoch": 0.6716104348388191, + "grad_norm": 477.7011413574219, + "learning_rate": 3.0473083508234474e-06, + "loss": 21.3859, + "step": 332470 + }, + { + "epoch": 0.6716306354715029, + "grad_norm": 357.6345520019531, + "learning_rate": 3.0469870102142387e-06, + "loss": 17.1324, + "step": 332480 + }, + { + "epoch": 0.6716508361041867, + "grad_norm": 267.4741516113281, + "learning_rate": 3.046665679123778e-06, + "loss": 17.3637, + "step": 332490 + }, + { + "epoch": 0.6716710367368706, + "grad_norm": 266.8260498046875, + "learning_rate": 3.0463443575536324e-06, + "loss": 21.8634, + "step": 332500 + }, + { + "epoch": 0.6716912373695544, + "grad_norm": 515.8812255859375, + "learning_rate": 3.046023045505366e-06, + "loss": 11.6234, + "step": 332510 + }, + { + "epoch": 0.6717114380022382, + "grad_norm": 665.266845703125, + "learning_rate": 3.045701742980549e-06, + "loss": 36.3828, + "step": 332520 + }, + { + "epoch": 0.671731638634922, + "grad_norm": 324.485107421875, + "learning_rate": 3.0453804499807416e-06, + "loss": 13.2951, + "step": 332530 + }, + { + "epoch": 0.6717518392676058, + "grad_norm": 406.4908447265625, + "learning_rate": 3.045059166507515e-06, + "loss": 17.0992, + "step": 332540 + }, + { + "epoch": 0.6717720399002897, + "grad_norm": 182.7077178955078, + "learning_rate": 3.0447378925624316e-06, + "loss": 31.4674, + "step": 332550 + }, + { + "epoch": 0.6717922405329735, + "grad_norm": 235.09619140625, + "learning_rate": 3.04441662814706e-06, + "loss": 15.2149, + "step": 332560 + }, + { + "epoch": 0.6718124411656573, + "grad_norm": 128.1046905517578, + "learning_rate": 3.044095373262963e-06, + "loss": 20.1439, + "step": 332570 + }, + { + "epoch": 0.6718326417983411, + "grad_norm": 153.95370483398438, + "learning_rate": 3.043774127911707e-06, + "loss": 18.1651, + "step": 332580 + }, + { + "epoch": 0.6718528424310249, + "grad_norm": 123.7090835571289, + "learning_rate": 3.0434528920948626e-06, + "loss": 15.8325, + "step": 332590 + }, + { + "epoch": 0.6718730430637088, + "grad_norm": 126.16527557373047, + "learning_rate": 3.043131665813988e-06, + "loss": 14.3916, + "step": 332600 + }, + { + "epoch": 0.6718932436963926, + "grad_norm": 829.8492431640625, + "learning_rate": 3.0428104490706536e-06, + "loss": 18.0627, + "step": 332610 + }, + { + "epoch": 0.6719134443290764, + "grad_norm": 34.451019287109375, + "learning_rate": 3.0424892418664244e-06, + "loss": 9.6449, + "step": 332620 + }, + { + "epoch": 0.6719336449617602, + "grad_norm": 169.1112518310547, + "learning_rate": 3.0421680442028644e-06, + "loss": 13.193, + "step": 332630 + }, + { + "epoch": 0.671953845594444, + "grad_norm": 443.2011413574219, + "learning_rate": 3.0418468560815396e-06, + "loss": 16.4191, + "step": 332640 + }, + { + "epoch": 0.6719740462271279, + "grad_norm": 565.5675048828125, + "learning_rate": 3.041525677504017e-06, + "loss": 17.3625, + "step": 332650 + }, + { + "epoch": 0.6719942468598117, + "grad_norm": 370.6284484863281, + "learning_rate": 3.0412045084718597e-06, + "loss": 11.8785, + "step": 332660 + }, + { + "epoch": 0.6720144474924955, + "grad_norm": 648.5980834960938, + "learning_rate": 3.0408833489866326e-06, + "loss": 21.9276, + "step": 332670 + }, + { + "epoch": 0.6720346481251793, + "grad_norm": 477.17620849609375, + "learning_rate": 3.040562199049906e-06, + "loss": 26.8964, + "step": 332680 + }, + { + "epoch": 0.6720548487578631, + "grad_norm": 518.5248413085938, + "learning_rate": 3.040241058663238e-06, + "loss": 19.8161, + "step": 332690 + }, + { + "epoch": 0.672075049390547, + "grad_norm": 708.6114501953125, + "learning_rate": 3.0399199278281986e-06, + "loss": 22.0998, + "step": 332700 + }, + { + "epoch": 0.6720952500232307, + "grad_norm": 29.858028411865234, + "learning_rate": 3.039598806546352e-06, + "loss": 11.2511, + "step": 332710 + }, + { + "epoch": 0.6721154506559145, + "grad_norm": 3.5512747764587402, + "learning_rate": 3.039277694819264e-06, + "loss": 12.7374, + "step": 332720 + }, + { + "epoch": 0.6721356512885983, + "grad_norm": 539.164306640625, + "learning_rate": 3.0389565926484974e-06, + "loss": 16.9234, + "step": 332730 + }, + { + "epoch": 0.6721558519212821, + "grad_norm": 192.2627410888672, + "learning_rate": 3.038635500035618e-06, + "loss": 18.5794, + "step": 332740 + }, + { + "epoch": 0.672176052553966, + "grad_norm": 176.16976928710938, + "learning_rate": 3.0383144169821944e-06, + "loss": 27.3413, + "step": 332750 + }, + { + "epoch": 0.6721962531866498, + "grad_norm": 2305.068115234375, + "learning_rate": 3.0379933434897846e-06, + "loss": 79.7899, + "step": 332760 + }, + { + "epoch": 0.6722164538193336, + "grad_norm": 297.8510437011719, + "learning_rate": 3.0376722795599587e-06, + "loss": 12.3784, + "step": 332770 + }, + { + "epoch": 0.6722366544520174, + "grad_norm": 238.76150512695312, + "learning_rate": 3.0373512251942817e-06, + "loss": 13.8894, + "step": 332780 + }, + { + "epoch": 0.6722568550847012, + "grad_norm": 1.7508667707443237, + "learning_rate": 3.0370301803943147e-06, + "loss": 18.9702, + "step": 332790 + }, + { + "epoch": 0.672277055717385, + "grad_norm": 530.020751953125, + "learning_rate": 3.0367091451616254e-06, + "loss": 13.963, + "step": 332800 + }, + { + "epoch": 0.6722972563500689, + "grad_norm": 418.89166259765625, + "learning_rate": 3.0363881194977784e-06, + "loss": 21.8154, + "step": 332810 + }, + { + "epoch": 0.6723174569827527, + "grad_norm": 42.768985748291016, + "learning_rate": 3.0360671034043365e-06, + "loss": 6.1037, + "step": 332820 + }, + { + "epoch": 0.6723376576154365, + "grad_norm": 186.217529296875, + "learning_rate": 3.035746096882864e-06, + "loss": 12.7598, + "step": 332830 + }, + { + "epoch": 0.6723578582481203, + "grad_norm": 259.81866455078125, + "learning_rate": 3.0354250999349277e-06, + "loss": 24.601, + "step": 332840 + }, + { + "epoch": 0.6723780588808042, + "grad_norm": 418.35357666015625, + "learning_rate": 3.035104112562093e-06, + "loss": 15.1968, + "step": 332850 + }, + { + "epoch": 0.672398259513488, + "grad_norm": 705.2666625976562, + "learning_rate": 3.034783134765921e-06, + "loss": 32.0045, + "step": 332860 + }, + { + "epoch": 0.6724184601461718, + "grad_norm": 841.7813110351562, + "learning_rate": 3.034462166547978e-06, + "loss": 24.4324, + "step": 332870 + }, + { + "epoch": 0.6724386607788556, + "grad_norm": 312.4965515136719, + "learning_rate": 3.0341412079098285e-06, + "loss": 20.5919, + "step": 332880 + }, + { + "epoch": 0.6724588614115394, + "grad_norm": 266.7486877441406, + "learning_rate": 3.033820258853035e-06, + "loss": 20.5033, + "step": 332890 + }, + { + "epoch": 0.6724790620442233, + "grad_norm": 155.8134002685547, + "learning_rate": 3.033499319379163e-06, + "loss": 19.8645, + "step": 332900 + }, + { + "epoch": 0.6724992626769071, + "grad_norm": 156.40814208984375, + "learning_rate": 3.033178389489779e-06, + "loss": 9.9884, + "step": 332910 + }, + { + "epoch": 0.6725194633095909, + "grad_norm": 522.968994140625, + "learning_rate": 3.0328574691864434e-06, + "loss": 19.5756, + "step": 332920 + }, + { + "epoch": 0.6725396639422747, + "grad_norm": 226.67446899414062, + "learning_rate": 3.0325365584707216e-06, + "loss": 15.3141, + "step": 332930 + }, + { + "epoch": 0.6725598645749585, + "grad_norm": 249.93190002441406, + "learning_rate": 3.03221565734418e-06, + "loss": 9.8185, + "step": 332940 + }, + { + "epoch": 0.6725800652076424, + "grad_norm": 376.44781494140625, + "learning_rate": 3.0318947658083787e-06, + "loss": 14.493, + "step": 332950 + }, + { + "epoch": 0.6726002658403262, + "grad_norm": 231.81051635742188, + "learning_rate": 3.031573883864882e-06, + "loss": 6.6049, + "step": 332960 + }, + { + "epoch": 0.6726204664730099, + "grad_norm": 0.0, + "learning_rate": 3.03125301151526e-06, + "loss": 14.4246, + "step": 332970 + }, + { + "epoch": 0.6726406671056937, + "grad_norm": 296.96624755859375, + "learning_rate": 3.0309321487610682e-06, + "loss": 19.5908, + "step": 332980 + }, + { + "epoch": 0.6726608677383775, + "grad_norm": 409.90753173828125, + "learning_rate": 3.030611295603876e-06, + "loss": 17.2561, + "step": 332990 + }, + { + "epoch": 0.6726810683710613, + "grad_norm": 477.5803527832031, + "learning_rate": 3.030290452045245e-06, + "loss": 19.0041, + "step": 333000 + }, + { + "epoch": 0.6727012690037452, + "grad_norm": 343.8766784667969, + "learning_rate": 3.02996961808674e-06, + "loss": 23.4326, + "step": 333010 + }, + { + "epoch": 0.672721469636429, + "grad_norm": 538.6311645507812, + "learning_rate": 3.0296487937299235e-06, + "loss": 12.0906, + "step": 333020 + }, + { + "epoch": 0.6727416702691128, + "grad_norm": 185.01937866210938, + "learning_rate": 3.0293279789763584e-06, + "loss": 17.3637, + "step": 333030 + }, + { + "epoch": 0.6727618709017966, + "grad_norm": 257.2732238769531, + "learning_rate": 3.0290071738276132e-06, + "loss": 22.591, + "step": 333040 + }, + { + "epoch": 0.6727820715344804, + "grad_norm": 710.9138793945312, + "learning_rate": 3.028686378285245e-06, + "loss": 21.7439, + "step": 333050 + }, + { + "epoch": 0.6728022721671643, + "grad_norm": 348.0620422363281, + "learning_rate": 3.0283655923508214e-06, + "loss": 24.9464, + "step": 333060 + }, + { + "epoch": 0.6728224727998481, + "grad_norm": 0.0, + "learning_rate": 3.0280448160259057e-06, + "loss": 24.0796, + "step": 333070 + }, + { + "epoch": 0.6728426734325319, + "grad_norm": 271.998291015625, + "learning_rate": 3.027724049312059e-06, + "loss": 17.1429, + "step": 333080 + }, + { + "epoch": 0.6728628740652157, + "grad_norm": 433.5883483886719, + "learning_rate": 3.0274032922108465e-06, + "loss": 16.8932, + "step": 333090 + }, + { + "epoch": 0.6728830746978995, + "grad_norm": 386.6865539550781, + "learning_rate": 3.0270825447238316e-06, + "loss": 19.7961, + "step": 333100 + }, + { + "epoch": 0.6729032753305834, + "grad_norm": 306.2874450683594, + "learning_rate": 3.0267618068525773e-06, + "loss": 16.5354, + "step": 333110 + }, + { + "epoch": 0.6729234759632672, + "grad_norm": 272.48138427734375, + "learning_rate": 3.0264410785986444e-06, + "loss": 25.4983, + "step": 333120 + }, + { + "epoch": 0.672943676595951, + "grad_norm": 491.2439270019531, + "learning_rate": 3.026120359963602e-06, + "loss": 13.5619, + "step": 333130 + }, + { + "epoch": 0.6729638772286348, + "grad_norm": 544.17138671875, + "learning_rate": 3.025799650949006e-06, + "loss": 13.9844, + "step": 333140 + }, + { + "epoch": 0.6729840778613186, + "grad_norm": 191.28065490722656, + "learning_rate": 3.025478951556424e-06, + "loss": 13.5854, + "step": 333150 + }, + { + "epoch": 0.6730042784940025, + "grad_norm": 503.2778625488281, + "learning_rate": 3.0251582617874187e-06, + "loss": 19.4419, + "step": 333160 + }, + { + "epoch": 0.6730244791266863, + "grad_norm": 342.5376281738281, + "learning_rate": 3.0248375816435537e-06, + "loss": 25.083, + "step": 333170 + }, + { + "epoch": 0.6730446797593701, + "grad_norm": 163.40673828125, + "learning_rate": 3.024516911126389e-06, + "loss": 13.9405, + "step": 333180 + }, + { + "epoch": 0.6730648803920539, + "grad_norm": 339.19110107421875, + "learning_rate": 3.024196250237489e-06, + "loss": 20.5884, + "step": 333190 + }, + { + "epoch": 0.6730850810247377, + "grad_norm": 216.12643432617188, + "learning_rate": 3.023875598978419e-06, + "loss": 19.2741, + "step": 333200 + }, + { + "epoch": 0.6731052816574216, + "grad_norm": 448.638671875, + "learning_rate": 3.0235549573507374e-06, + "loss": 29.0432, + "step": 333210 + }, + { + "epoch": 0.6731254822901054, + "grad_norm": 457.5461730957031, + "learning_rate": 3.0232343253560097e-06, + "loss": 14.686, + "step": 333220 + }, + { + "epoch": 0.6731456829227891, + "grad_norm": 240.27005004882812, + "learning_rate": 3.0229137029957993e-06, + "loss": 20.8447, + "step": 333230 + }, + { + "epoch": 0.6731658835554729, + "grad_norm": 180.43292236328125, + "learning_rate": 3.022593090271667e-06, + "loss": 13.5783, + "step": 333240 + }, + { + "epoch": 0.6731860841881567, + "grad_norm": 133.79052734375, + "learning_rate": 3.022272487185176e-06, + "loss": 12.6388, + "step": 333250 + }, + { + "epoch": 0.6732062848208406, + "grad_norm": 243.9981689453125, + "learning_rate": 3.02195189373789e-06, + "loss": 20.7873, + "step": 333260 + }, + { + "epoch": 0.6732264854535244, + "grad_norm": 375.3045959472656, + "learning_rate": 3.0216313099313694e-06, + "loss": 7.6527, + "step": 333270 + }, + { + "epoch": 0.6732466860862082, + "grad_norm": 211.34437561035156, + "learning_rate": 3.0213107357671767e-06, + "loss": 5.5753, + "step": 333280 + }, + { + "epoch": 0.673266886718892, + "grad_norm": 399.2186584472656, + "learning_rate": 3.020990171246879e-06, + "loss": 14.1786, + "step": 333290 + }, + { + "epoch": 0.6732870873515758, + "grad_norm": 150.73834228515625, + "learning_rate": 3.0206696163720317e-06, + "loss": 25.2453, + "step": 333300 + }, + { + "epoch": 0.6733072879842597, + "grad_norm": 225.5262908935547, + "learning_rate": 3.0203490711442018e-06, + "loss": 17.0998, + "step": 333310 + }, + { + "epoch": 0.6733274886169435, + "grad_norm": 422.083740234375, + "learning_rate": 3.0200285355649504e-06, + "loss": 17.2822, + "step": 333320 + }, + { + "epoch": 0.6733476892496273, + "grad_norm": 88.32089233398438, + "learning_rate": 3.019708009635841e-06, + "loss": 22.9718, + "step": 333330 + }, + { + "epoch": 0.6733678898823111, + "grad_norm": 281.3292236328125, + "learning_rate": 3.019387493358434e-06, + "loss": 13.1169, + "step": 333340 + }, + { + "epoch": 0.6733880905149949, + "grad_norm": 584.72900390625, + "learning_rate": 3.0190669867342902e-06, + "loss": 15.8181, + "step": 333350 + }, + { + "epoch": 0.6734082911476788, + "grad_norm": 479.5703125, + "learning_rate": 3.0187464897649774e-06, + "loss": 27.213, + "step": 333360 + }, + { + "epoch": 0.6734284917803626, + "grad_norm": 337.3212890625, + "learning_rate": 3.0184260024520508e-06, + "loss": 19.3892, + "step": 333370 + }, + { + "epoch": 0.6734486924130464, + "grad_norm": 227.15969848632812, + "learning_rate": 3.018105524797076e-06, + "loss": 26.0862, + "step": 333380 + }, + { + "epoch": 0.6734688930457302, + "grad_norm": 128.68341064453125, + "learning_rate": 3.017785056801615e-06, + "loss": 22.8761, + "step": 333390 + }, + { + "epoch": 0.673489093678414, + "grad_norm": 461.95404052734375, + "learning_rate": 3.0174645984672298e-06, + "loss": 10.7197, + "step": 333400 + }, + { + "epoch": 0.6735092943110979, + "grad_norm": 443.9961242675781, + "learning_rate": 3.0171441497954807e-06, + "loss": 9.9019, + "step": 333410 + }, + { + "epoch": 0.6735294949437817, + "grad_norm": 482.46185302734375, + "learning_rate": 3.0168237107879315e-06, + "loss": 21.0501, + "step": 333420 + }, + { + "epoch": 0.6735496955764655, + "grad_norm": 503.1119689941406, + "learning_rate": 3.0165032814461426e-06, + "loss": 30.3579, + "step": 333430 + }, + { + "epoch": 0.6735698962091493, + "grad_norm": 437.2008361816406, + "learning_rate": 3.016182861771675e-06, + "loss": 30.5888, + "step": 333440 + }, + { + "epoch": 0.6735900968418331, + "grad_norm": 368.2873229980469, + "learning_rate": 3.015862451766092e-06, + "loss": 20.0231, + "step": 333450 + }, + { + "epoch": 0.673610297474517, + "grad_norm": 457.2514343261719, + "learning_rate": 3.0155420514309563e-06, + "loss": 41.0843, + "step": 333460 + }, + { + "epoch": 0.6736304981072008, + "grad_norm": 254.58042907714844, + "learning_rate": 3.0152216607678262e-06, + "loss": 17.6813, + "step": 333470 + }, + { + "epoch": 0.6736506987398845, + "grad_norm": 565.47314453125, + "learning_rate": 3.0149012797782655e-06, + "loss": 25.8258, + "step": 333480 + }, + { + "epoch": 0.6736708993725683, + "grad_norm": 167.13243103027344, + "learning_rate": 3.0145809084638366e-06, + "loss": 23.5754, + "step": 333490 + }, + { + "epoch": 0.6736911000052521, + "grad_norm": 352.0194091796875, + "learning_rate": 3.0142605468260976e-06, + "loss": 35.8935, + "step": 333500 + }, + { + "epoch": 0.6737113006379359, + "grad_norm": 212.7182159423828, + "learning_rate": 3.0139401948666112e-06, + "loss": 13.6547, + "step": 333510 + }, + { + "epoch": 0.6737315012706198, + "grad_norm": 168.4481201171875, + "learning_rate": 3.013619852586942e-06, + "loss": 27.5947, + "step": 333520 + }, + { + "epoch": 0.6737517019033036, + "grad_norm": 175.2161102294922, + "learning_rate": 3.013299519988647e-06, + "loss": 15.3217, + "step": 333530 + }, + { + "epoch": 0.6737719025359874, + "grad_norm": 37.93092346191406, + "learning_rate": 3.0129791970732904e-06, + "loss": 16.1902, + "step": 333540 + }, + { + "epoch": 0.6737921031686712, + "grad_norm": 446.0578918457031, + "learning_rate": 3.012658883842432e-06, + "loss": 25.7368, + "step": 333550 + }, + { + "epoch": 0.673812303801355, + "grad_norm": 71.3383560180664, + "learning_rate": 3.0123385802976323e-06, + "loss": 14.8846, + "step": 333560 + }, + { + "epoch": 0.6738325044340389, + "grad_norm": 314.0921630859375, + "learning_rate": 3.0120182864404535e-06, + "loss": 14.2352, + "step": 333570 + }, + { + "epoch": 0.6738527050667227, + "grad_norm": 783.0932006835938, + "learning_rate": 3.0116980022724575e-06, + "loss": 14.542, + "step": 333580 + }, + { + "epoch": 0.6738729056994065, + "grad_norm": 690.4853515625, + "learning_rate": 3.011377727795202e-06, + "loss": 20.697, + "step": 333590 + }, + { + "epoch": 0.6738931063320903, + "grad_norm": 1.116074562072754, + "learning_rate": 3.011057463010252e-06, + "loss": 32.6198, + "step": 333600 + }, + { + "epoch": 0.6739133069647741, + "grad_norm": 68.26553344726562, + "learning_rate": 3.0107372079191656e-06, + "loss": 12.875, + "step": 333610 + }, + { + "epoch": 0.673933507597458, + "grad_norm": 531.0414428710938, + "learning_rate": 3.010416962523507e-06, + "loss": 18.5981, + "step": 333620 + }, + { + "epoch": 0.6739537082301418, + "grad_norm": 451.71649169921875, + "learning_rate": 3.0100967268248326e-06, + "loss": 11.6052, + "step": 333630 + }, + { + "epoch": 0.6739739088628256, + "grad_norm": 403.9212341308594, + "learning_rate": 3.009776500824706e-06, + "loss": 8.2194, + "step": 333640 + }, + { + "epoch": 0.6739941094955094, + "grad_norm": 181.1913299560547, + "learning_rate": 3.009456284524688e-06, + "loss": 14.8881, + "step": 333650 + }, + { + "epoch": 0.6740143101281932, + "grad_norm": 387.13323974609375, + "learning_rate": 3.0091360779263373e-06, + "loss": 28.0891, + "step": 333660 + }, + { + "epoch": 0.6740345107608771, + "grad_norm": 456.1517639160156, + "learning_rate": 3.0088158810312163e-06, + "loss": 15.3469, + "step": 333670 + }, + { + "epoch": 0.6740547113935609, + "grad_norm": 475.6103820800781, + "learning_rate": 3.0084956938408873e-06, + "loss": 16.1338, + "step": 333680 + }, + { + "epoch": 0.6740749120262447, + "grad_norm": 417.0465393066406, + "learning_rate": 3.0081755163569066e-06, + "loss": 21.4768, + "step": 333690 + }, + { + "epoch": 0.6740951126589285, + "grad_norm": 322.91546630859375, + "learning_rate": 3.007855348580837e-06, + "loss": 25.1519, + "step": 333700 + }, + { + "epoch": 0.6741153132916123, + "grad_norm": 215.58827209472656, + "learning_rate": 3.0075351905142404e-06, + "loss": 25.6934, + "step": 333710 + }, + { + "epoch": 0.6741355139242962, + "grad_norm": 553.62060546875, + "learning_rate": 3.0072150421586737e-06, + "loss": 25.4559, + "step": 333720 + }, + { + "epoch": 0.67415571455698, + "grad_norm": 398.96392822265625, + "learning_rate": 3.0068949035156984e-06, + "loss": 18.9469, + "step": 333730 + }, + { + "epoch": 0.6741759151896637, + "grad_norm": 617.6716918945312, + "learning_rate": 3.006574774586879e-06, + "loss": 30.2767, + "step": 333740 + }, + { + "epoch": 0.6741961158223475, + "grad_norm": 322.0965881347656, + "learning_rate": 3.0062546553737692e-06, + "loss": 8.5848, + "step": 333750 + }, + { + "epoch": 0.6742163164550313, + "grad_norm": 267.24102783203125, + "learning_rate": 3.0059345458779337e-06, + "loss": 16.5096, + "step": 333760 + }, + { + "epoch": 0.6742365170877151, + "grad_norm": 366.47442626953125, + "learning_rate": 3.0056144461009313e-06, + "loss": 11.5499, + "step": 333770 + }, + { + "epoch": 0.674256717720399, + "grad_norm": 962.688232421875, + "learning_rate": 3.005294356044323e-06, + "loss": 21.765, + "step": 333780 + }, + { + "epoch": 0.6742769183530828, + "grad_norm": 19.26727294921875, + "learning_rate": 3.0049742757096666e-06, + "loss": 13.6877, + "step": 333790 + }, + { + "epoch": 0.6742971189857666, + "grad_norm": 64.33914184570312, + "learning_rate": 3.004654205098524e-06, + "loss": 18.1675, + "step": 333800 + }, + { + "epoch": 0.6743173196184504, + "grad_norm": 382.41302490234375, + "learning_rate": 3.004334144212456e-06, + "loss": 14.0915, + "step": 333810 + }, + { + "epoch": 0.6743375202511342, + "grad_norm": 450.1775207519531, + "learning_rate": 3.0040140930530198e-06, + "loss": 21.0209, + "step": 333820 + }, + { + "epoch": 0.6743577208838181, + "grad_norm": 7.644445419311523, + "learning_rate": 3.003694051621777e-06, + "loss": 14.2, + "step": 333830 + }, + { + "epoch": 0.6743779215165019, + "grad_norm": 235.771240234375, + "learning_rate": 3.003374019920289e-06, + "loss": 25.1988, + "step": 333840 + }, + { + "epoch": 0.6743981221491857, + "grad_norm": 372.7322998046875, + "learning_rate": 3.003053997950112e-06, + "loss": 11.7135, + "step": 333850 + }, + { + "epoch": 0.6744183227818695, + "grad_norm": 789.88330078125, + "learning_rate": 3.002733985712808e-06, + "loss": 17.725, + "step": 333860 + }, + { + "epoch": 0.6744385234145533, + "grad_norm": 373.9104309082031, + "learning_rate": 3.0024139832099374e-06, + "loss": 13.7187, + "step": 333870 + }, + { + "epoch": 0.6744587240472372, + "grad_norm": 294.066162109375, + "learning_rate": 3.002093990443058e-06, + "loss": 9.0585, + "step": 333880 + }, + { + "epoch": 0.674478924679921, + "grad_norm": 163.41845703125, + "learning_rate": 3.001774007413729e-06, + "loss": 19.8921, + "step": 333890 + }, + { + "epoch": 0.6744991253126048, + "grad_norm": 325.6176452636719, + "learning_rate": 3.001454034123512e-06, + "loss": 17.5673, + "step": 333900 + }, + { + "epoch": 0.6745193259452886, + "grad_norm": 257.6412048339844, + "learning_rate": 3.0011340705739665e-06, + "loss": 23.79, + "step": 333910 + }, + { + "epoch": 0.6745395265779724, + "grad_norm": 342.8741455078125, + "learning_rate": 3.0008141167666505e-06, + "loss": 17.5091, + "step": 333920 + }, + { + "epoch": 0.6745597272106563, + "grad_norm": 215.3223876953125, + "learning_rate": 3.0004941727031233e-06, + "loss": 26.7791, + "step": 333930 + }, + { + "epoch": 0.6745799278433401, + "grad_norm": 797.1070556640625, + "learning_rate": 3.0001742383849464e-06, + "loss": 29.5955, + "step": 333940 + }, + { + "epoch": 0.6746001284760239, + "grad_norm": 149.93658447265625, + "learning_rate": 2.9998543138136773e-06, + "loss": 15.479, + "step": 333950 + }, + { + "epoch": 0.6746203291087077, + "grad_norm": 138.8621826171875, + "learning_rate": 2.9995343989908743e-06, + "loss": 19.0752, + "step": 333960 + }, + { + "epoch": 0.6746405297413915, + "grad_norm": 311.9769287109375, + "learning_rate": 2.9992144939181007e-06, + "loss": 13.6434, + "step": 333970 + }, + { + "epoch": 0.6746607303740754, + "grad_norm": 322.4996643066406, + "learning_rate": 2.99889459859691e-06, + "loss": 12.097, + "step": 333980 + }, + { + "epoch": 0.6746809310067591, + "grad_norm": 152.3280792236328, + "learning_rate": 2.9985747130288657e-06, + "loss": 20.3478, + "step": 333990 + }, + { + "epoch": 0.6747011316394429, + "grad_norm": 326.5318298339844, + "learning_rate": 2.9982548372155264e-06, + "loss": 20.6573, + "step": 334000 + }, + { + "epoch": 0.6747213322721267, + "grad_norm": 543.2077026367188, + "learning_rate": 2.9979349711584494e-06, + "loss": 18.7329, + "step": 334010 + }, + { + "epoch": 0.6747415329048105, + "grad_norm": 370.9176025390625, + "learning_rate": 2.9976151148591937e-06, + "loss": 30.0699, + "step": 334020 + }, + { + "epoch": 0.6747617335374944, + "grad_norm": 654.6058349609375, + "learning_rate": 2.9972952683193207e-06, + "loss": 33.1942, + "step": 334030 + }, + { + "epoch": 0.6747819341701782, + "grad_norm": 898.57958984375, + "learning_rate": 2.9969754315403865e-06, + "loss": 23.4372, + "step": 334040 + }, + { + "epoch": 0.674802134802862, + "grad_norm": 494.323486328125, + "learning_rate": 2.9966556045239504e-06, + "loss": 15.1364, + "step": 334050 + }, + { + "epoch": 0.6748223354355458, + "grad_norm": 587.9016723632812, + "learning_rate": 2.9963357872715727e-06, + "loss": 14.8658, + "step": 334060 + }, + { + "epoch": 0.6748425360682296, + "grad_norm": 479.2344665527344, + "learning_rate": 2.9960159797848123e-06, + "loss": 24.7396, + "step": 334070 + }, + { + "epoch": 0.6748627367009135, + "grad_norm": 62.46092224121094, + "learning_rate": 2.9956961820652265e-06, + "loss": 34.0494, + "step": 334080 + }, + { + "epoch": 0.6748829373335973, + "grad_norm": 451.027587890625, + "learning_rate": 2.995376394114374e-06, + "loss": 11.8718, + "step": 334090 + }, + { + "epoch": 0.6749031379662811, + "grad_norm": 221.69540405273438, + "learning_rate": 2.9950566159338146e-06, + "loss": 7.3101, + "step": 334100 + }, + { + "epoch": 0.6749233385989649, + "grad_norm": 158.60073852539062, + "learning_rate": 2.9947368475251048e-06, + "loss": 13.518, + "step": 334110 + }, + { + "epoch": 0.6749435392316487, + "grad_norm": 274.83953857421875, + "learning_rate": 2.9944170888898037e-06, + "loss": 8.8389, + "step": 334120 + }, + { + "epoch": 0.6749637398643326, + "grad_norm": 208.35015869140625, + "learning_rate": 2.994097340029474e-06, + "loss": 27.7086, + "step": 334130 + }, + { + "epoch": 0.6749839404970164, + "grad_norm": 759.3289184570312, + "learning_rate": 2.9937776009456675e-06, + "loss": 12.1259, + "step": 334140 + }, + { + "epoch": 0.6750041411297002, + "grad_norm": 466.6158447265625, + "learning_rate": 2.9934578716399465e-06, + "loss": 22.5321, + "step": 334150 + }, + { + "epoch": 0.675024341762384, + "grad_norm": 718.4124755859375, + "learning_rate": 2.9931381521138693e-06, + "loss": 19.5304, + "step": 334160 + }, + { + "epoch": 0.6750445423950678, + "grad_norm": 850.4971923828125, + "learning_rate": 2.9928184423689923e-06, + "loss": 29.6138, + "step": 334170 + }, + { + "epoch": 0.6750647430277517, + "grad_norm": 571.2877197265625, + "learning_rate": 2.992498742406875e-06, + "loss": 30.8835, + "step": 334180 + }, + { + "epoch": 0.6750849436604355, + "grad_norm": 585.3817749023438, + "learning_rate": 2.992179052229076e-06, + "loss": 15.052, + "step": 334190 + }, + { + "epoch": 0.6751051442931193, + "grad_norm": 5.335434913635254, + "learning_rate": 2.991859371837151e-06, + "loss": 15.5326, + "step": 334200 + }, + { + "epoch": 0.6751253449258031, + "grad_norm": 334.8423767089844, + "learning_rate": 2.991539701232661e-06, + "loss": 12.7112, + "step": 334210 + }, + { + "epoch": 0.6751455455584869, + "grad_norm": 74.74103546142578, + "learning_rate": 2.991220040417162e-06, + "loss": 26.1394, + "step": 334220 + }, + { + "epoch": 0.6751657461911708, + "grad_norm": 542.1575927734375, + "learning_rate": 2.990900389392215e-06, + "loss": 26.1725, + "step": 334230 + }, + { + "epoch": 0.6751859468238546, + "grad_norm": 204.94012451171875, + "learning_rate": 2.9905807481593746e-06, + "loss": 8.5823, + "step": 334240 + }, + { + "epoch": 0.6752061474565383, + "grad_norm": 363.3467102050781, + "learning_rate": 2.9902611167202e-06, + "loss": 29.9696, + "step": 334250 + }, + { + "epoch": 0.6752263480892221, + "grad_norm": 153.37847900390625, + "learning_rate": 2.9899414950762497e-06, + "loss": 15.0252, + "step": 334260 + }, + { + "epoch": 0.6752465487219059, + "grad_norm": 837.5203247070312, + "learning_rate": 2.9896218832290784e-06, + "loss": 22.4676, + "step": 334270 + }, + { + "epoch": 0.6752667493545897, + "grad_norm": 323.2222900390625, + "learning_rate": 2.9893022811802474e-06, + "loss": 14.4997, + "step": 334280 + }, + { + "epoch": 0.6752869499872736, + "grad_norm": 14.713966369628906, + "learning_rate": 2.9889826889313144e-06, + "loss": 13.3263, + "step": 334290 + }, + { + "epoch": 0.6753071506199574, + "grad_norm": 547.5470581054688, + "learning_rate": 2.9886631064838355e-06, + "loss": 16.369, + "step": 334300 + }, + { + "epoch": 0.6753273512526412, + "grad_norm": 391.14898681640625, + "learning_rate": 2.9883435338393674e-06, + "loss": 9.4427, + "step": 334310 + }, + { + "epoch": 0.675347551885325, + "grad_norm": 1.5440407991409302, + "learning_rate": 2.988023970999471e-06, + "loss": 10.4, + "step": 334320 + }, + { + "epoch": 0.6753677525180088, + "grad_norm": 823.4159545898438, + "learning_rate": 2.9877044179657e-06, + "loss": 23.137, + "step": 334330 + }, + { + "epoch": 0.6753879531506927, + "grad_norm": 299.2149963378906, + "learning_rate": 2.9873848747396135e-06, + "loss": 13.556, + "step": 334340 + }, + { + "epoch": 0.6754081537833765, + "grad_norm": 921.8958129882812, + "learning_rate": 2.9870653413227692e-06, + "loss": 28.5156, + "step": 334350 + }, + { + "epoch": 0.6754283544160603, + "grad_norm": 169.8895263671875, + "learning_rate": 2.986745817716725e-06, + "loss": 18.187, + "step": 334360 + }, + { + "epoch": 0.6754485550487441, + "grad_norm": 261.49896240234375, + "learning_rate": 2.9864263039230378e-06, + "loss": 15.3056, + "step": 334370 + }, + { + "epoch": 0.675468755681428, + "grad_norm": 322.1488037109375, + "learning_rate": 2.9861067999432634e-06, + "loss": 20.718, + "step": 334380 + }, + { + "epoch": 0.6754889563141118, + "grad_norm": 534.993408203125, + "learning_rate": 2.9857873057789623e-06, + "loss": 31.4703, + "step": 334390 + }, + { + "epoch": 0.6755091569467956, + "grad_norm": 580.8644409179688, + "learning_rate": 2.9854678214316875e-06, + "loss": 27.6865, + "step": 334400 + }, + { + "epoch": 0.6755293575794794, + "grad_norm": 266.0247802734375, + "learning_rate": 2.9851483469029975e-06, + "loss": 24.7877, + "step": 334410 + }, + { + "epoch": 0.6755495582121632, + "grad_norm": 396.6748962402344, + "learning_rate": 2.984828882194453e-06, + "loss": 14.4112, + "step": 334420 + }, + { + "epoch": 0.675569758844847, + "grad_norm": 250.25758361816406, + "learning_rate": 2.984509427307606e-06, + "loss": 16.3577, + "step": 334430 + }, + { + "epoch": 0.6755899594775309, + "grad_norm": 340.82952880859375, + "learning_rate": 2.984189982244016e-06, + "loss": 27.7192, + "step": 334440 + }, + { + "epoch": 0.6756101601102147, + "grad_norm": 727.5569458007812, + "learning_rate": 2.9838705470052397e-06, + "loss": 19.2347, + "step": 334450 + }, + { + "epoch": 0.6756303607428985, + "grad_norm": 293.1268005371094, + "learning_rate": 2.983551121592834e-06, + "loss": 12.2812, + "step": 334460 + }, + { + "epoch": 0.6756505613755823, + "grad_norm": 374.130126953125, + "learning_rate": 2.983231706008355e-06, + "loss": 17.5481, + "step": 334470 + }, + { + "epoch": 0.6756707620082661, + "grad_norm": 0.0, + "learning_rate": 2.982912300253361e-06, + "loss": 29.4153, + "step": 334480 + }, + { + "epoch": 0.67569096264095, + "grad_norm": 302.3288879394531, + "learning_rate": 2.982592904329407e-06, + "loss": 9.5432, + "step": 334490 + }, + { + "epoch": 0.6757111632736337, + "grad_norm": 296.753173828125, + "learning_rate": 2.98227351823805e-06, + "loss": 15.1205, + "step": 334500 + }, + { + "epoch": 0.6757313639063175, + "grad_norm": 527.780029296875, + "learning_rate": 2.981954141980847e-06, + "loss": 28.0974, + "step": 334510 + }, + { + "epoch": 0.6757515645390013, + "grad_norm": 252.00889587402344, + "learning_rate": 2.981634775559357e-06, + "loss": 20.3891, + "step": 334520 + }, + { + "epoch": 0.6757717651716851, + "grad_norm": 396.3777160644531, + "learning_rate": 2.9813154189751327e-06, + "loss": 31.2829, + "step": 334530 + }, + { + "epoch": 0.675791965804369, + "grad_norm": 132.64138793945312, + "learning_rate": 2.980996072229732e-06, + "loss": 21.0928, + "step": 334540 + }, + { + "epoch": 0.6758121664370528, + "grad_norm": 207.51771545410156, + "learning_rate": 2.9806767353247127e-06, + "loss": 7.8165, + "step": 334550 + }, + { + "epoch": 0.6758323670697366, + "grad_norm": 536.8202514648438, + "learning_rate": 2.9803574082616294e-06, + "loss": 20.4465, + "step": 334560 + }, + { + "epoch": 0.6758525677024204, + "grad_norm": 299.7032775878906, + "learning_rate": 2.980038091042038e-06, + "loss": 14.682, + "step": 334570 + }, + { + "epoch": 0.6758727683351042, + "grad_norm": 499.4809875488281, + "learning_rate": 2.979718783667499e-06, + "loss": 21.5977, + "step": 334580 + }, + { + "epoch": 0.675892968967788, + "grad_norm": 2.10003924369812, + "learning_rate": 2.9793994861395625e-06, + "loss": 16.5296, + "step": 334590 + }, + { + "epoch": 0.6759131696004719, + "grad_norm": 505.8760681152344, + "learning_rate": 2.9790801984597885e-06, + "loss": 14.1168, + "step": 334600 + }, + { + "epoch": 0.6759333702331557, + "grad_norm": 477.48895263671875, + "learning_rate": 2.978760920629734e-06, + "loss": 24.5461, + "step": 334610 + }, + { + "epoch": 0.6759535708658395, + "grad_norm": 206.78285217285156, + "learning_rate": 2.9784416526509525e-06, + "loss": 14.6992, + "step": 334620 + }, + { + "epoch": 0.6759737714985233, + "grad_norm": 14.603195190429688, + "learning_rate": 2.978122394525001e-06, + "loss": 12.9751, + "step": 334630 + }, + { + "epoch": 0.6759939721312072, + "grad_norm": 582.718505859375, + "learning_rate": 2.977803146253437e-06, + "loss": 17.8561, + "step": 334640 + }, + { + "epoch": 0.676014172763891, + "grad_norm": 163.27503967285156, + "learning_rate": 2.977483907837814e-06, + "loss": 21.0278, + "step": 334650 + }, + { + "epoch": 0.6760343733965748, + "grad_norm": 166.07298278808594, + "learning_rate": 2.977164679279688e-06, + "loss": 10.4512, + "step": 334660 + }, + { + "epoch": 0.6760545740292586, + "grad_norm": 526.6989135742188, + "learning_rate": 2.9768454605806176e-06, + "loss": 13.9076, + "step": 334670 + }, + { + "epoch": 0.6760747746619424, + "grad_norm": 2.295748472213745, + "learning_rate": 2.976526251742158e-06, + "loss": 14.5048, + "step": 334680 + }, + { + "epoch": 0.6760949752946263, + "grad_norm": 531.06005859375, + "learning_rate": 2.9762070527658628e-06, + "loss": 27.5859, + "step": 334690 + }, + { + "epoch": 0.6761151759273101, + "grad_norm": 206.268798828125, + "learning_rate": 2.9758878636532884e-06, + "loss": 17.0999, + "step": 334700 + }, + { + "epoch": 0.6761353765599939, + "grad_norm": 431.9757385253906, + "learning_rate": 2.975568684405993e-06, + "loss": 22.6525, + "step": 334710 + }, + { + "epoch": 0.6761555771926777, + "grad_norm": 497.8822937011719, + "learning_rate": 2.9752495150255284e-06, + "loss": 13.6454, + "step": 334720 + }, + { + "epoch": 0.6761757778253615, + "grad_norm": 309.1185302734375, + "learning_rate": 2.9749303555134512e-06, + "loss": 34.7585, + "step": 334730 + }, + { + "epoch": 0.6761959784580454, + "grad_norm": 716.8196411132812, + "learning_rate": 2.9746112058713218e-06, + "loss": 20.6415, + "step": 334740 + }, + { + "epoch": 0.6762161790907292, + "grad_norm": 132.9484100341797, + "learning_rate": 2.974292066100688e-06, + "loss": 24.0512, + "step": 334750 + }, + { + "epoch": 0.6762363797234129, + "grad_norm": 89.66337585449219, + "learning_rate": 2.97397293620311e-06, + "loss": 17.3719, + "step": 334760 + }, + { + "epoch": 0.6762565803560967, + "grad_norm": 380.46734619140625, + "learning_rate": 2.9736538161801433e-06, + "loss": 20.5944, + "step": 334770 + }, + { + "epoch": 0.6762767809887805, + "grad_norm": 394.9644470214844, + "learning_rate": 2.9733347060333408e-06, + "loss": 17.5288, + "step": 334780 + }, + { + "epoch": 0.6762969816214643, + "grad_norm": 367.57666015625, + "learning_rate": 2.9730156057642595e-06, + "loss": 24.0068, + "step": 334790 + }, + { + "epoch": 0.6763171822541482, + "grad_norm": 576.5689086914062, + "learning_rate": 2.972696515374455e-06, + "loss": 17.9293, + "step": 334800 + }, + { + "epoch": 0.676337382886832, + "grad_norm": 913.5191040039062, + "learning_rate": 2.972377434865481e-06, + "loss": 19.3679, + "step": 334810 + }, + { + "epoch": 0.6763575835195158, + "grad_norm": 130.3188018798828, + "learning_rate": 2.972058364238892e-06, + "loss": 15.4976, + "step": 334820 + }, + { + "epoch": 0.6763777841521996, + "grad_norm": 254.9954071044922, + "learning_rate": 2.9717393034962468e-06, + "loss": 18.6514, + "step": 334830 + }, + { + "epoch": 0.6763979847848834, + "grad_norm": 431.535888671875, + "learning_rate": 2.9714202526390985e-06, + "loss": 12.6177, + "step": 334840 + }, + { + "epoch": 0.6764181854175673, + "grad_norm": 138.59254455566406, + "learning_rate": 2.9711012116690007e-06, + "loss": 15.4331, + "step": 334850 + }, + { + "epoch": 0.6764383860502511, + "grad_norm": 568.0218505859375, + "learning_rate": 2.97078218058751e-06, + "loss": 12.4271, + "step": 334860 + }, + { + "epoch": 0.6764585866829349, + "grad_norm": 759.718505859375, + "learning_rate": 2.9704631593961815e-06, + "loss": 23.042, + "step": 334870 + }, + { + "epoch": 0.6764787873156187, + "grad_norm": 398.9481506347656, + "learning_rate": 2.9701441480965683e-06, + "loss": 23.598, + "step": 334880 + }, + { + "epoch": 0.6764989879483025, + "grad_norm": 500.3331298828125, + "learning_rate": 2.969825146690225e-06, + "loss": 41.1918, + "step": 334890 + }, + { + "epoch": 0.6765191885809864, + "grad_norm": 552.3027954101562, + "learning_rate": 2.969506155178711e-06, + "loss": 23.5349, + "step": 334900 + }, + { + "epoch": 0.6765393892136702, + "grad_norm": 531.1264038085938, + "learning_rate": 2.9691871735635753e-06, + "loss": 26.5298, + "step": 334910 + }, + { + "epoch": 0.676559589846354, + "grad_norm": 625.7227172851562, + "learning_rate": 2.9688682018463755e-06, + "loss": 21.6557, + "step": 334920 + }, + { + "epoch": 0.6765797904790378, + "grad_norm": 190.6686553955078, + "learning_rate": 2.968549240028667e-06, + "loss": 7.9868, + "step": 334930 + }, + { + "epoch": 0.6765999911117216, + "grad_norm": 94.68265533447266, + "learning_rate": 2.968230288112002e-06, + "loss": 12.1203, + "step": 334940 + }, + { + "epoch": 0.6766201917444055, + "grad_norm": 575.996826171875, + "learning_rate": 2.9679113460979347e-06, + "loss": 18.6427, + "step": 334950 + }, + { + "epoch": 0.6766403923770893, + "grad_norm": 317.9371643066406, + "learning_rate": 2.967592413988023e-06, + "loss": 25.3584, + "step": 334960 + }, + { + "epoch": 0.6766605930097731, + "grad_norm": 964.5819091796875, + "learning_rate": 2.9672734917838198e-06, + "loss": 28.1624, + "step": 334970 + }, + { + "epoch": 0.6766807936424569, + "grad_norm": 7.9333271980285645, + "learning_rate": 2.9669545794868777e-06, + "loss": 7.6235, + "step": 334980 + }, + { + "epoch": 0.6767009942751407, + "grad_norm": 258.66424560546875, + "learning_rate": 2.9666356770987524e-06, + "loss": 21.1746, + "step": 334990 + }, + { + "epoch": 0.6767211949078246, + "grad_norm": 128.112548828125, + "learning_rate": 2.966316784621e-06, + "loss": 27.1505, + "step": 335000 + }, + { + "epoch": 0.6767413955405084, + "grad_norm": 335.5589599609375, + "learning_rate": 2.965997902055171e-06, + "loss": 20.931, + "step": 335010 + }, + { + "epoch": 0.6767615961731921, + "grad_norm": 706.6641845703125, + "learning_rate": 2.9656790294028216e-06, + "loss": 12.962, + "step": 335020 + }, + { + "epoch": 0.6767817968058759, + "grad_norm": 462.24798583984375, + "learning_rate": 2.965360166665508e-06, + "loss": 11.251, + "step": 335030 + }, + { + "epoch": 0.6768019974385597, + "grad_norm": 355.06365966796875, + "learning_rate": 2.9650413138447797e-06, + "loss": 23.4497, + "step": 335040 + }, + { + "epoch": 0.6768221980712436, + "grad_norm": 175.37826538085938, + "learning_rate": 2.964722470942194e-06, + "loss": 16.3155, + "step": 335050 + }, + { + "epoch": 0.6768423987039274, + "grad_norm": 677.8414916992188, + "learning_rate": 2.964403637959305e-06, + "loss": 22.8367, + "step": 335060 + }, + { + "epoch": 0.6768625993366112, + "grad_norm": 404.0458068847656, + "learning_rate": 2.9640848148976655e-06, + "loss": 14.63, + "step": 335070 + }, + { + "epoch": 0.676882799969295, + "grad_norm": 377.67852783203125, + "learning_rate": 2.963766001758829e-06, + "loss": 13.5425, + "step": 335080 + }, + { + "epoch": 0.6769030006019788, + "grad_norm": 28.240121841430664, + "learning_rate": 2.96344719854435e-06, + "loss": 9.9352, + "step": 335090 + }, + { + "epoch": 0.6769232012346627, + "grad_norm": 477.0787658691406, + "learning_rate": 2.963128405255783e-06, + "loss": 26.4698, + "step": 335100 + }, + { + "epoch": 0.6769434018673465, + "grad_norm": 376.7519226074219, + "learning_rate": 2.96280962189468e-06, + "loss": 15.0515, + "step": 335110 + }, + { + "epoch": 0.6769636025000303, + "grad_norm": 212.5742645263672, + "learning_rate": 2.962490848462596e-06, + "loss": 9.7001, + "step": 335120 + }, + { + "epoch": 0.6769838031327141, + "grad_norm": 58.66630172729492, + "learning_rate": 2.9621720849610857e-06, + "loss": 14.2607, + "step": 335130 + }, + { + "epoch": 0.6770040037653979, + "grad_norm": 307.52044677734375, + "learning_rate": 2.961853331391701e-06, + "loss": 40.5167, + "step": 335140 + }, + { + "epoch": 0.6770242043980818, + "grad_norm": 314.05023193359375, + "learning_rate": 2.9615345877559953e-06, + "loss": 11.2318, + "step": 335150 + }, + { + "epoch": 0.6770444050307656, + "grad_norm": 243.64166259765625, + "learning_rate": 2.9612158540555245e-06, + "loss": 18.5069, + "step": 335160 + }, + { + "epoch": 0.6770646056634494, + "grad_norm": 141.25399780273438, + "learning_rate": 2.9608971302918387e-06, + "loss": 14.9054, + "step": 335170 + }, + { + "epoch": 0.6770848062961332, + "grad_norm": 133.18682861328125, + "learning_rate": 2.9605784164664925e-06, + "loss": 14.4311, + "step": 335180 + }, + { + "epoch": 0.677105006928817, + "grad_norm": 839.1710815429688, + "learning_rate": 2.960259712581043e-06, + "loss": 26.1139, + "step": 335190 + }, + { + "epoch": 0.6771252075615009, + "grad_norm": 374.13555908203125, + "learning_rate": 2.9599410186370363e-06, + "loss": 27.7875, + "step": 335200 + }, + { + "epoch": 0.6771454081941847, + "grad_norm": 558.8908081054688, + "learning_rate": 2.959622334636031e-06, + "loss": 19.4503, + "step": 335210 + }, + { + "epoch": 0.6771656088268685, + "grad_norm": 369.95635986328125, + "learning_rate": 2.95930366057958e-06, + "loss": 22.0149, + "step": 335220 + }, + { + "epoch": 0.6771858094595523, + "grad_norm": 542.2275390625, + "learning_rate": 2.9589849964692352e-06, + "loss": 13.5066, + "step": 335230 + }, + { + "epoch": 0.6772060100922361, + "grad_norm": 284.5793762207031, + "learning_rate": 2.9586663423065487e-06, + "loss": 20.0974, + "step": 335240 + }, + { + "epoch": 0.67722621072492, + "grad_norm": 231.64053344726562, + "learning_rate": 2.9583476980930768e-06, + "loss": 13.341, + "step": 335250 + }, + { + "epoch": 0.6772464113576038, + "grad_norm": 559.0064086914062, + "learning_rate": 2.9580290638303692e-06, + "loss": 17.23, + "step": 335260 + }, + { + "epoch": 0.6772666119902875, + "grad_norm": 447.82196044921875, + "learning_rate": 2.9577104395199795e-06, + "loss": 10.8653, + "step": 335270 + }, + { + "epoch": 0.6772868126229713, + "grad_norm": 657.632080078125, + "learning_rate": 2.9573918251634627e-06, + "loss": 19.4192, + "step": 335280 + }, + { + "epoch": 0.6773070132556551, + "grad_norm": 514.7632446289062, + "learning_rate": 2.957073220762371e-06, + "loss": 18.8193, + "step": 335290 + }, + { + "epoch": 0.6773272138883389, + "grad_norm": 424.8777770996094, + "learning_rate": 2.9567546263182554e-06, + "loss": 18.3145, + "step": 335300 + }, + { + "epoch": 0.6773474145210228, + "grad_norm": 528.4025268554688, + "learning_rate": 2.9564360418326698e-06, + "loss": 12.5365, + "step": 335310 + }, + { + "epoch": 0.6773676151537066, + "grad_norm": 536.476318359375, + "learning_rate": 2.956117467307169e-06, + "loss": 17.0952, + "step": 335320 + }, + { + "epoch": 0.6773878157863904, + "grad_norm": 578.4048461914062, + "learning_rate": 2.955798902743302e-06, + "loss": 21.9617, + "step": 335330 + }, + { + "epoch": 0.6774080164190742, + "grad_norm": 379.8204650878906, + "learning_rate": 2.9554803481426223e-06, + "loss": 8.0091, + "step": 335340 + }, + { + "epoch": 0.677428217051758, + "grad_norm": 532.469482421875, + "learning_rate": 2.9551618035066863e-06, + "loss": 21.908, + "step": 335350 + }, + { + "epoch": 0.6774484176844419, + "grad_norm": 403.3153381347656, + "learning_rate": 2.954843268837041e-06, + "loss": 19.0119, + "step": 335360 + }, + { + "epoch": 0.6774686183171257, + "grad_norm": 391.7265930175781, + "learning_rate": 2.954524744135243e-06, + "loss": 25.1481, + "step": 335370 + }, + { + "epoch": 0.6774888189498095, + "grad_norm": 124.90596008300781, + "learning_rate": 2.954206229402844e-06, + "loss": 11.7571, + "step": 335380 + }, + { + "epoch": 0.6775090195824933, + "grad_norm": 302.9950866699219, + "learning_rate": 2.9538877246413943e-06, + "loss": 20.4238, + "step": 335390 + }, + { + "epoch": 0.6775292202151771, + "grad_norm": 154.38528442382812, + "learning_rate": 2.9535692298524477e-06, + "loss": 16.8624, + "step": 335400 + }, + { + "epoch": 0.677549420847861, + "grad_norm": 773.2611694335938, + "learning_rate": 2.953250745037556e-06, + "loss": 15.0836, + "step": 335410 + }, + { + "epoch": 0.6775696214805448, + "grad_norm": 301.2328186035156, + "learning_rate": 2.9529322701982744e-06, + "loss": 10.0662, + "step": 335420 + }, + { + "epoch": 0.6775898221132286, + "grad_norm": 367.5018005371094, + "learning_rate": 2.9526138053361496e-06, + "loss": 14.9167, + "step": 335430 + }, + { + "epoch": 0.6776100227459124, + "grad_norm": 108.8554916381836, + "learning_rate": 2.952295350452738e-06, + "loss": 27.2063, + "step": 335440 + }, + { + "epoch": 0.6776302233785962, + "grad_norm": 523.1973266601562, + "learning_rate": 2.9519769055495917e-06, + "loss": 38.7709, + "step": 335450 + }, + { + "epoch": 0.6776504240112801, + "grad_norm": 782.3070068359375, + "learning_rate": 2.9516584706282604e-06, + "loss": 27.4142, + "step": 335460 + }, + { + "epoch": 0.6776706246439639, + "grad_norm": 285.48138427734375, + "learning_rate": 2.9513400456902975e-06, + "loss": 27.1429, + "step": 335470 + }, + { + "epoch": 0.6776908252766477, + "grad_norm": 609.79541015625, + "learning_rate": 2.951021630737255e-06, + "loss": 25.0206, + "step": 335480 + }, + { + "epoch": 0.6777110259093315, + "grad_norm": 224.08694458007812, + "learning_rate": 2.950703225770684e-06, + "loss": 17.2162, + "step": 335490 + }, + { + "epoch": 0.6777312265420153, + "grad_norm": 264.4524230957031, + "learning_rate": 2.9503848307921363e-06, + "loss": 12.9259, + "step": 335500 + }, + { + "epoch": 0.6777514271746992, + "grad_norm": 192.50445556640625, + "learning_rate": 2.9500664458031656e-06, + "loss": 17.8412, + "step": 335510 + }, + { + "epoch": 0.677771627807383, + "grad_norm": 578.9146728515625, + "learning_rate": 2.949748070805322e-06, + "loss": 28.7587, + "step": 335520 + }, + { + "epoch": 0.6777918284400667, + "grad_norm": 208.0790557861328, + "learning_rate": 2.9494297058001575e-06, + "loss": 22.8426, + "step": 335530 + }, + { + "epoch": 0.6778120290727505, + "grad_norm": 276.21063232421875, + "learning_rate": 2.949111350789225e-06, + "loss": 30.0007, + "step": 335540 + }, + { + "epoch": 0.6778322297054343, + "grad_norm": 48.124542236328125, + "learning_rate": 2.948793005774074e-06, + "loss": 24.3713, + "step": 335550 + }, + { + "epoch": 0.6778524303381182, + "grad_norm": 964.6512451171875, + "learning_rate": 2.9484746707562573e-06, + "loss": 21.5953, + "step": 335560 + }, + { + "epoch": 0.677872630970802, + "grad_norm": 158.03167724609375, + "learning_rate": 2.9481563457373247e-06, + "loss": 9.1398, + "step": 335570 + }, + { + "epoch": 0.6778928316034858, + "grad_norm": 239.17372131347656, + "learning_rate": 2.9478380307188316e-06, + "loss": 11.358, + "step": 335580 + }, + { + "epoch": 0.6779130322361696, + "grad_norm": 148.4192657470703, + "learning_rate": 2.947519725702326e-06, + "loss": 17.2626, + "step": 335590 + }, + { + "epoch": 0.6779332328688534, + "grad_norm": 20.930021286010742, + "learning_rate": 2.9472014306893605e-06, + "loss": 23.6575, + "step": 335600 + }, + { + "epoch": 0.6779534335015373, + "grad_norm": 138.44613647460938, + "learning_rate": 2.946883145681486e-06, + "loss": 17.9701, + "step": 335610 + }, + { + "epoch": 0.6779736341342211, + "grad_norm": 229.4267578125, + "learning_rate": 2.946564870680255e-06, + "loss": 26.3465, + "step": 335620 + }, + { + "epoch": 0.6779938347669049, + "grad_norm": 698.5621337890625, + "learning_rate": 2.946246605687215e-06, + "loss": 18.6027, + "step": 335630 + }, + { + "epoch": 0.6780140353995887, + "grad_norm": 289.07305908203125, + "learning_rate": 2.945928350703924e-06, + "loss": 19.505, + "step": 335640 + }, + { + "epoch": 0.6780342360322725, + "grad_norm": 122.36067962646484, + "learning_rate": 2.9456101057319266e-06, + "loss": 15.033, + "step": 335650 + }, + { + "epoch": 0.6780544366649564, + "grad_norm": 199.5050506591797, + "learning_rate": 2.945291870772776e-06, + "loss": 18.9662, + "step": 335660 + }, + { + "epoch": 0.6780746372976402, + "grad_norm": 394.22943115234375, + "learning_rate": 2.9449736458280253e-06, + "loss": 25.678, + "step": 335670 + }, + { + "epoch": 0.678094837930324, + "grad_norm": 533.0518188476562, + "learning_rate": 2.9446554308992227e-06, + "loss": 12.8919, + "step": 335680 + }, + { + "epoch": 0.6781150385630078, + "grad_norm": 298.85821533203125, + "learning_rate": 2.94433722598792e-06, + "loss": 23.3194, + "step": 335690 + }, + { + "epoch": 0.6781352391956916, + "grad_norm": 106.61241912841797, + "learning_rate": 2.94401903109567e-06, + "loss": 21.9341, + "step": 335700 + }, + { + "epoch": 0.6781554398283755, + "grad_norm": 303.8421630859375, + "learning_rate": 2.94370084622402e-06, + "loss": 18.5334, + "step": 335710 + }, + { + "epoch": 0.6781756404610593, + "grad_norm": 618.7250366210938, + "learning_rate": 2.943382671374523e-06, + "loss": 28.3782, + "step": 335720 + }, + { + "epoch": 0.6781958410937431, + "grad_norm": 443.4075012207031, + "learning_rate": 2.9430645065487296e-06, + "loss": 11.1509, + "step": 335730 + }, + { + "epoch": 0.6782160417264269, + "grad_norm": 48.277862548828125, + "learning_rate": 2.9427463517481913e-06, + "loss": 11.3507, + "step": 335740 + }, + { + "epoch": 0.6782362423591107, + "grad_norm": 186.58279418945312, + "learning_rate": 2.9424282069744564e-06, + "loss": 8.9796, + "step": 335750 + }, + { + "epoch": 0.6782564429917946, + "grad_norm": 268.7702331542969, + "learning_rate": 2.9421100722290774e-06, + "loss": 21.0618, + "step": 335760 + }, + { + "epoch": 0.6782766436244784, + "grad_norm": 154.38316345214844, + "learning_rate": 2.9417919475136053e-06, + "loss": 13.6416, + "step": 335770 + }, + { + "epoch": 0.6782968442571621, + "grad_norm": 215.0345916748047, + "learning_rate": 2.9414738328295884e-06, + "loss": 10.4715, + "step": 335780 + }, + { + "epoch": 0.6783170448898459, + "grad_norm": 514.3206176757812, + "learning_rate": 2.9411557281785772e-06, + "loss": 13.2922, + "step": 335790 + }, + { + "epoch": 0.6783372455225297, + "grad_norm": 338.4372253417969, + "learning_rate": 2.940837633562127e-06, + "loss": 19.6505, + "step": 335800 + }, + { + "epoch": 0.6783574461552135, + "grad_norm": 795.0709228515625, + "learning_rate": 2.9405195489817805e-06, + "loss": 29.8077, + "step": 335810 + }, + { + "epoch": 0.6783776467878974, + "grad_norm": 428.6066589355469, + "learning_rate": 2.9402014744390937e-06, + "loss": 20.2856, + "step": 335820 + }, + { + "epoch": 0.6783978474205812, + "grad_norm": 1047.8885498046875, + "learning_rate": 2.9398834099356155e-06, + "loss": 21.924, + "step": 335830 + }, + { + "epoch": 0.678418048053265, + "grad_norm": 366.1895751953125, + "learning_rate": 2.9395653554728955e-06, + "loss": 22.1305, + "step": 335840 + }, + { + "epoch": 0.6784382486859488, + "grad_norm": 558.1474609375, + "learning_rate": 2.9392473110524834e-06, + "loss": 22.3471, + "step": 335850 + }, + { + "epoch": 0.6784584493186326, + "grad_norm": 399.8312683105469, + "learning_rate": 2.9389292766759313e-06, + "loss": 22.832, + "step": 335860 + }, + { + "epoch": 0.6784786499513165, + "grad_norm": 216.11851501464844, + "learning_rate": 2.9386112523447863e-06, + "loss": 10.0315, + "step": 335870 + }, + { + "epoch": 0.6784988505840003, + "grad_norm": 330.2546691894531, + "learning_rate": 2.9382932380606e-06, + "loss": 19.3024, + "step": 335880 + }, + { + "epoch": 0.6785190512166841, + "grad_norm": 329.8070373535156, + "learning_rate": 2.9379752338249223e-06, + "loss": 17.3057, + "step": 335890 + }, + { + "epoch": 0.6785392518493679, + "grad_norm": 4.235108852386475, + "learning_rate": 2.9376572396393047e-06, + "loss": 17.1998, + "step": 335900 + }, + { + "epoch": 0.6785594524820517, + "grad_norm": 467.51617431640625, + "learning_rate": 2.937339255505295e-06, + "loss": 7.4437, + "step": 335910 + }, + { + "epoch": 0.6785796531147356, + "grad_norm": 695.2574462890625, + "learning_rate": 2.9370212814244436e-06, + "loss": 15.0593, + "step": 335920 + }, + { + "epoch": 0.6785998537474194, + "grad_norm": 79.98819732666016, + "learning_rate": 2.9367033173983006e-06, + "loss": 21.2393, + "step": 335930 + }, + { + "epoch": 0.6786200543801032, + "grad_norm": 419.0413818359375, + "learning_rate": 2.9363853634284143e-06, + "loss": 13.0665, + "step": 335940 + }, + { + "epoch": 0.678640255012787, + "grad_norm": 0.0, + "learning_rate": 2.9360674195163354e-06, + "loss": 26.5614, + "step": 335950 + }, + { + "epoch": 0.6786604556454708, + "grad_norm": 512.8369140625, + "learning_rate": 2.935749485663616e-06, + "loss": 33.0561, + "step": 335960 + }, + { + "epoch": 0.6786806562781547, + "grad_norm": 297.6837158203125, + "learning_rate": 2.9354315618718005e-06, + "loss": 20.9158, + "step": 335970 + }, + { + "epoch": 0.6787008569108385, + "grad_norm": 0.3239041268825531, + "learning_rate": 2.9351136481424413e-06, + "loss": 6.8588, + "step": 335980 + }, + { + "epoch": 0.6787210575435223, + "grad_norm": 345.4874572753906, + "learning_rate": 2.93479574447709e-06, + "loss": 18.8582, + "step": 335990 + }, + { + "epoch": 0.6787412581762061, + "grad_norm": 958.6956176757812, + "learning_rate": 2.934477850877292e-06, + "loss": 33.1137, + "step": 336000 + }, + { + "epoch": 0.6787614588088899, + "grad_norm": 382.7071228027344, + "learning_rate": 2.9341599673445988e-06, + "loss": 25.8995, + "step": 336010 + }, + { + "epoch": 0.6787816594415738, + "grad_norm": 475.3750305175781, + "learning_rate": 2.933842093880558e-06, + "loss": 34.536, + "step": 336020 + }, + { + "epoch": 0.6788018600742576, + "grad_norm": 134.16317749023438, + "learning_rate": 2.9335242304867233e-06, + "loss": 18.3059, + "step": 336030 + }, + { + "epoch": 0.6788220607069413, + "grad_norm": 291.14495849609375, + "learning_rate": 2.933206377164638e-06, + "loss": 10.2518, + "step": 336040 + }, + { + "epoch": 0.6788422613396251, + "grad_norm": 263.8800048828125, + "learning_rate": 2.9328885339158554e-06, + "loss": 29.2639, + "step": 336050 + }, + { + "epoch": 0.6788624619723089, + "grad_norm": 145.86029052734375, + "learning_rate": 2.9325707007419235e-06, + "loss": 15.119, + "step": 336060 + }, + { + "epoch": 0.6788826626049927, + "grad_norm": 786.78369140625, + "learning_rate": 2.9322528776443917e-06, + "loss": 17.4758, + "step": 336070 + }, + { + "epoch": 0.6789028632376766, + "grad_norm": 581.380126953125, + "learning_rate": 2.9319350646248075e-06, + "loss": 13.891, + "step": 336080 + }, + { + "epoch": 0.6789230638703604, + "grad_norm": 144.01966857910156, + "learning_rate": 2.931617261684722e-06, + "loss": 14.0006, + "step": 336090 + }, + { + "epoch": 0.6789432645030442, + "grad_norm": 309.3218994140625, + "learning_rate": 2.931299468825682e-06, + "loss": 17.6286, + "step": 336100 + }, + { + "epoch": 0.678963465135728, + "grad_norm": 341.881591796875, + "learning_rate": 2.930981686049237e-06, + "loss": 30.1838, + "step": 336110 + }, + { + "epoch": 0.6789836657684118, + "grad_norm": 43.649391174316406, + "learning_rate": 2.9306639133569393e-06, + "loss": 24.5629, + "step": 336120 + }, + { + "epoch": 0.6790038664010957, + "grad_norm": 615.0656127929688, + "learning_rate": 2.930346150750332e-06, + "loss": 24.637, + "step": 336130 + }, + { + "epoch": 0.6790240670337795, + "grad_norm": 281.3387145996094, + "learning_rate": 2.930028398230966e-06, + "loss": 16.123, + "step": 336140 + }, + { + "epoch": 0.6790442676664633, + "grad_norm": 124.0038070678711, + "learning_rate": 2.929710655800393e-06, + "loss": 19.8334, + "step": 336150 + }, + { + "epoch": 0.6790644682991471, + "grad_norm": 583.1646118164062, + "learning_rate": 2.929392923460158e-06, + "loss": 18.6541, + "step": 336160 + }, + { + "epoch": 0.679084668931831, + "grad_norm": 407.7423095703125, + "learning_rate": 2.9290752012118105e-06, + "loss": 23.8656, + "step": 336170 + }, + { + "epoch": 0.6791048695645148, + "grad_norm": 374.0619812011719, + "learning_rate": 2.9287574890568982e-06, + "loss": 19.12, + "step": 336180 + }, + { + "epoch": 0.6791250701971986, + "grad_norm": 312.9776306152344, + "learning_rate": 2.928439786996973e-06, + "loss": 17.1294, + "step": 336190 + }, + { + "epoch": 0.6791452708298824, + "grad_norm": 617.8344116210938, + "learning_rate": 2.92812209503358e-06, + "loss": 41.6107, + "step": 336200 + }, + { + "epoch": 0.6791654714625662, + "grad_norm": 274.5924377441406, + "learning_rate": 2.927804413168268e-06, + "loss": 21.6986, + "step": 336210 + }, + { + "epoch": 0.67918567209525, + "grad_norm": 133.44154357910156, + "learning_rate": 2.9274867414025876e-06, + "loss": 12.2877, + "step": 336220 + }, + { + "epoch": 0.6792058727279339, + "grad_norm": 482.30206298828125, + "learning_rate": 2.927169079738084e-06, + "loss": 14.4421, + "step": 336230 + }, + { + "epoch": 0.6792260733606177, + "grad_norm": 666.83447265625, + "learning_rate": 2.9268514281763072e-06, + "loss": 29.109, + "step": 336240 + }, + { + "epoch": 0.6792462739933015, + "grad_norm": 437.9608459472656, + "learning_rate": 2.926533786718806e-06, + "loss": 20.7922, + "step": 336250 + }, + { + "epoch": 0.6792664746259853, + "grad_norm": 578.3386840820312, + "learning_rate": 2.926216155367126e-06, + "loss": 13.6233, + "step": 336260 + }, + { + "epoch": 0.6792866752586691, + "grad_norm": 294.40350341796875, + "learning_rate": 2.9258985341228174e-06, + "loss": 18.8183, + "step": 336270 + }, + { + "epoch": 0.679306875891353, + "grad_norm": 184.0799560546875, + "learning_rate": 2.9255809229874287e-06, + "loss": 20.9594, + "step": 336280 + }, + { + "epoch": 0.6793270765240368, + "grad_norm": 829.5468139648438, + "learning_rate": 2.9252633219625073e-06, + "loss": 17.8298, + "step": 336290 + }, + { + "epoch": 0.6793472771567205, + "grad_norm": 905.2503051757812, + "learning_rate": 2.9249457310495994e-06, + "loss": 12.2561, + "step": 336300 + }, + { + "epoch": 0.6793674777894043, + "grad_norm": 915.447265625, + "learning_rate": 2.924628150250256e-06, + "loss": 21.5375, + "step": 336310 + }, + { + "epoch": 0.6793876784220881, + "grad_norm": 454.55279541015625, + "learning_rate": 2.924310579566022e-06, + "loss": 10.5602, + "step": 336320 + }, + { + "epoch": 0.679407879054772, + "grad_norm": 173.49160766601562, + "learning_rate": 2.9239930189984458e-06, + "loss": 14.85, + "step": 336330 + }, + { + "epoch": 0.6794280796874558, + "grad_norm": 319.1146240234375, + "learning_rate": 2.9236754685490764e-06, + "loss": 27.3328, + "step": 336340 + }, + { + "epoch": 0.6794482803201396, + "grad_norm": 361.9736328125, + "learning_rate": 2.9233579282194617e-06, + "loss": 18.1259, + "step": 336350 + }, + { + "epoch": 0.6794684809528234, + "grad_norm": 182.25999450683594, + "learning_rate": 2.9230403980111482e-06, + "loss": 16.7042, + "step": 336360 + }, + { + "epoch": 0.6794886815855072, + "grad_norm": 581.5782470703125, + "learning_rate": 2.922722877925683e-06, + "loss": 17.7779, + "step": 336370 + }, + { + "epoch": 0.6795088822181911, + "grad_norm": 244.02505493164062, + "learning_rate": 2.922405367964617e-06, + "loss": 19.2927, + "step": 336380 + }, + { + "epoch": 0.6795290828508749, + "grad_norm": 319.10882568359375, + "learning_rate": 2.9220878681294935e-06, + "loss": 12.2844, + "step": 336390 + }, + { + "epoch": 0.6795492834835587, + "grad_norm": 350.7702331542969, + "learning_rate": 2.921770378421861e-06, + "loss": 16.9566, + "step": 336400 + }, + { + "epoch": 0.6795694841162425, + "grad_norm": 351.8890686035156, + "learning_rate": 2.92145289884327e-06, + "loss": 22.1177, + "step": 336410 + }, + { + "epoch": 0.6795896847489263, + "grad_norm": 329.609619140625, + "learning_rate": 2.9211354293952632e-06, + "loss": 23.3981, + "step": 336420 + }, + { + "epoch": 0.6796098853816102, + "grad_norm": 623.2437744140625, + "learning_rate": 2.9208179700793905e-06, + "loss": 29.25, + "step": 336430 + }, + { + "epoch": 0.679630086014294, + "grad_norm": 357.9377136230469, + "learning_rate": 2.9205005208972e-06, + "loss": 12.7148, + "step": 336440 + }, + { + "epoch": 0.6796502866469778, + "grad_norm": 281.5408020019531, + "learning_rate": 2.920183081850237e-06, + "loss": 17.607, + "step": 336450 + }, + { + "epoch": 0.6796704872796616, + "grad_norm": 381.7162780761719, + "learning_rate": 2.919865652940049e-06, + "loss": 11.5911, + "step": 336460 + }, + { + "epoch": 0.6796906879123454, + "grad_norm": 508.3994445800781, + "learning_rate": 2.919548234168183e-06, + "loss": 15.7111, + "step": 336470 + }, + { + "epoch": 0.6797108885450293, + "grad_norm": 388.01800537109375, + "learning_rate": 2.9192308255361895e-06, + "loss": 16.966, + "step": 336480 + }, + { + "epoch": 0.6797310891777131, + "grad_norm": 59.19709396362305, + "learning_rate": 2.918913427045609e-06, + "loss": 18.7259, + "step": 336490 + }, + { + "epoch": 0.6797512898103969, + "grad_norm": 849.0178833007812, + "learning_rate": 2.918596038697995e-06, + "loss": 26.1783, + "step": 336500 + }, + { + "epoch": 0.6797714904430807, + "grad_norm": 523.110107421875, + "learning_rate": 2.918278660494891e-06, + "loss": 11.7323, + "step": 336510 + }, + { + "epoch": 0.6797916910757645, + "grad_norm": 440.6187438964844, + "learning_rate": 2.917961292437842e-06, + "loss": 19.7034, + "step": 336520 + }, + { + "epoch": 0.6798118917084484, + "grad_norm": 446.0595397949219, + "learning_rate": 2.917643934528398e-06, + "loss": 27.6702, + "step": 336530 + }, + { + "epoch": 0.6798320923411322, + "grad_norm": 439.68487548828125, + "learning_rate": 2.917326586768106e-06, + "loss": 20.364, + "step": 336540 + }, + { + "epoch": 0.6798522929738159, + "grad_norm": 367.6255187988281, + "learning_rate": 2.9170092491585122e-06, + "loss": 19.888, + "step": 336550 + }, + { + "epoch": 0.6798724936064997, + "grad_norm": 139.78103637695312, + "learning_rate": 2.91669192170116e-06, + "loss": 10.5209, + "step": 336560 + }, + { + "epoch": 0.6798926942391835, + "grad_norm": 337.6980285644531, + "learning_rate": 2.9163746043976014e-06, + "loss": 11.5866, + "step": 336570 + }, + { + "epoch": 0.6799128948718673, + "grad_norm": 354.5260314941406, + "learning_rate": 2.91605729724938e-06, + "loss": 18.4546, + "step": 336580 + }, + { + "epoch": 0.6799330955045512, + "grad_norm": 57.688995361328125, + "learning_rate": 2.9157400002580407e-06, + "loss": 17.4047, + "step": 336590 + }, + { + "epoch": 0.679953296137235, + "grad_norm": 344.17626953125, + "learning_rate": 2.915422713425134e-06, + "loss": 20.1476, + "step": 336600 + }, + { + "epoch": 0.6799734967699188, + "grad_norm": 268.698486328125, + "learning_rate": 2.9151054367522013e-06, + "loss": 13.0335, + "step": 336610 + }, + { + "epoch": 0.6799936974026026, + "grad_norm": 83.21357727050781, + "learning_rate": 2.914788170240795e-06, + "loss": 13.718, + "step": 336620 + }, + { + "epoch": 0.6800138980352864, + "grad_norm": 331.0199279785156, + "learning_rate": 2.9144709138924556e-06, + "loss": 17.2042, + "step": 336630 + }, + { + "epoch": 0.6800340986679703, + "grad_norm": 306.1650390625, + "learning_rate": 2.9141536677087346e-06, + "loss": 15.1955, + "step": 336640 + }, + { + "epoch": 0.6800542993006541, + "grad_norm": 406.2598876953125, + "learning_rate": 2.9138364316911747e-06, + "loss": 12.8523, + "step": 336650 + }, + { + "epoch": 0.6800744999333379, + "grad_norm": 230.9142608642578, + "learning_rate": 2.9135192058413212e-06, + "loss": 44.1466, + "step": 336660 + }, + { + "epoch": 0.6800947005660217, + "grad_norm": 258.8046569824219, + "learning_rate": 2.9132019901607246e-06, + "loss": 21.7546, + "step": 336670 + }, + { + "epoch": 0.6801149011987055, + "grad_norm": 289.5413818359375, + "learning_rate": 2.912884784650926e-06, + "loss": 26.8708, + "step": 336680 + }, + { + "epoch": 0.6801351018313894, + "grad_norm": 1108.12158203125, + "learning_rate": 2.9125675893134762e-06, + "loss": 31.604, + "step": 336690 + }, + { + "epoch": 0.6801553024640732, + "grad_norm": 339.9401550292969, + "learning_rate": 2.912250404149918e-06, + "loss": 33.6846, + "step": 336700 + }, + { + "epoch": 0.680175503096757, + "grad_norm": 388.1901550292969, + "learning_rate": 2.9119332291617974e-06, + "loss": 21.4534, + "step": 336710 + }, + { + "epoch": 0.6801957037294408, + "grad_norm": 454.15185546875, + "learning_rate": 2.911616064350662e-06, + "loss": 26.7128, + "step": 336720 + }, + { + "epoch": 0.6802159043621246, + "grad_norm": 63.31000900268555, + "learning_rate": 2.9112989097180567e-06, + "loss": 20.6256, + "step": 336730 + }, + { + "epoch": 0.6802361049948085, + "grad_norm": 1194.2406005859375, + "learning_rate": 2.9109817652655253e-06, + "loss": 18.5528, + "step": 336740 + }, + { + "epoch": 0.6802563056274923, + "grad_norm": 487.3364562988281, + "learning_rate": 2.910664630994615e-06, + "loss": 20.9212, + "step": 336750 + }, + { + "epoch": 0.6802765062601761, + "grad_norm": 126.3383560180664, + "learning_rate": 2.9103475069068763e-06, + "loss": 10.531, + "step": 336760 + }, + { + "epoch": 0.6802967068928599, + "grad_norm": 56.48907470703125, + "learning_rate": 2.910030393003847e-06, + "loss": 19.3291, + "step": 336770 + }, + { + "epoch": 0.6803169075255437, + "grad_norm": 390.70635986328125, + "learning_rate": 2.9097132892870757e-06, + "loss": 13.2997, + "step": 336780 + }, + { + "epoch": 0.6803371081582276, + "grad_norm": 357.54412841796875, + "learning_rate": 2.9093961957581096e-06, + "loss": 17.5058, + "step": 336790 + }, + { + "epoch": 0.6803573087909114, + "grad_norm": 347.0444641113281, + "learning_rate": 2.9090791124184934e-06, + "loss": 10.6631, + "step": 336800 + }, + { + "epoch": 0.6803775094235951, + "grad_norm": 367.3725891113281, + "learning_rate": 2.9087620392697703e-06, + "loss": 9.7796, + "step": 336810 + }, + { + "epoch": 0.6803977100562789, + "grad_norm": 298.2248840332031, + "learning_rate": 2.908444976313487e-06, + "loss": 19.3133, + "step": 336820 + }, + { + "epoch": 0.6804179106889627, + "grad_norm": 271.5204772949219, + "learning_rate": 2.908127923551194e-06, + "loss": 12.7521, + "step": 336830 + }, + { + "epoch": 0.6804381113216466, + "grad_norm": 1156.487548828125, + "learning_rate": 2.9078108809844264e-06, + "loss": 20.649, + "step": 336840 + }, + { + "epoch": 0.6804583119543304, + "grad_norm": 220.82901000976562, + "learning_rate": 2.9074938486147357e-06, + "loss": 19.9492, + "step": 336850 + }, + { + "epoch": 0.6804785125870142, + "grad_norm": 248.13047790527344, + "learning_rate": 2.9071768264436685e-06, + "loss": 12.7572, + "step": 336860 + }, + { + "epoch": 0.680498713219698, + "grad_norm": 496.8698425292969, + "learning_rate": 2.9068598144727666e-06, + "loss": 19.8571, + "step": 336870 + }, + { + "epoch": 0.6805189138523818, + "grad_norm": 543.551025390625, + "learning_rate": 2.9065428127035743e-06, + "loss": 32.1801, + "step": 336880 + }, + { + "epoch": 0.6805391144850657, + "grad_norm": 411.4061279296875, + "learning_rate": 2.9062258211376414e-06, + "loss": 18.5994, + "step": 336890 + }, + { + "epoch": 0.6805593151177495, + "grad_norm": 420.7814636230469, + "learning_rate": 2.905908839776509e-06, + "loss": 33.5048, + "step": 336900 + }, + { + "epoch": 0.6805795157504333, + "grad_norm": 344.6456298828125, + "learning_rate": 2.9055918686217212e-06, + "loss": 12.5738, + "step": 336910 + }, + { + "epoch": 0.6805997163831171, + "grad_norm": 507.3303527832031, + "learning_rate": 2.9052749076748266e-06, + "loss": 34.5787, + "step": 336920 + }, + { + "epoch": 0.6806199170158009, + "grad_norm": 717.0743408203125, + "learning_rate": 2.904957956937366e-06, + "loss": 23.5352, + "step": 336930 + }, + { + "epoch": 0.6806401176484848, + "grad_norm": 809.09912109375, + "learning_rate": 2.9046410164108883e-06, + "loss": 15.4761, + "step": 336940 + }, + { + "epoch": 0.6806603182811686, + "grad_norm": 352.9372863769531, + "learning_rate": 2.9043240860969342e-06, + "loss": 20.2224, + "step": 336950 + }, + { + "epoch": 0.6806805189138524, + "grad_norm": 312.1246032714844, + "learning_rate": 2.904007165997052e-06, + "loss": 5.6943, + "step": 336960 + }, + { + "epoch": 0.6807007195465362, + "grad_norm": 636.2717895507812, + "learning_rate": 2.903690256112785e-06, + "loss": 16.9237, + "step": 336970 + }, + { + "epoch": 0.68072092017922, + "grad_norm": 597.7210693359375, + "learning_rate": 2.9033733564456756e-06, + "loss": 13.5185, + "step": 336980 + }, + { + "epoch": 0.6807411208119039, + "grad_norm": 290.738037109375, + "learning_rate": 2.9030564669972717e-06, + "loss": 8.1657, + "step": 336990 + }, + { + "epoch": 0.6807613214445877, + "grad_norm": 300.6885986328125, + "learning_rate": 2.9027395877691143e-06, + "loss": 29.2064, + "step": 337000 + }, + { + "epoch": 0.6807815220772715, + "grad_norm": 471.7787170410156, + "learning_rate": 2.902422718762752e-06, + "loss": 21.3975, + "step": 337010 + }, + { + "epoch": 0.6808017227099553, + "grad_norm": 415.0528259277344, + "learning_rate": 2.902105859979727e-06, + "loss": 12.9827, + "step": 337020 + }, + { + "epoch": 0.6808219233426391, + "grad_norm": 203.2621612548828, + "learning_rate": 2.9017890114215814e-06, + "loss": 13.1972, + "step": 337030 + }, + { + "epoch": 0.680842123975323, + "grad_norm": 408.6101379394531, + "learning_rate": 2.9014721730898637e-06, + "loss": 20.3462, + "step": 337040 + }, + { + "epoch": 0.6808623246080068, + "grad_norm": 1091.6729736328125, + "learning_rate": 2.9011553449861163e-06, + "loss": 22.4188, + "step": 337050 + }, + { + "epoch": 0.6808825252406905, + "grad_norm": 163.2935028076172, + "learning_rate": 2.900838527111881e-06, + "loss": 22.3477, + "step": 337060 + }, + { + "epoch": 0.6809027258733743, + "grad_norm": 378.9201354980469, + "learning_rate": 2.900521719468704e-06, + "loss": 19.6525, + "step": 337070 + }, + { + "epoch": 0.6809229265060581, + "grad_norm": 116.05774688720703, + "learning_rate": 2.900204922058132e-06, + "loss": 13.6343, + "step": 337080 + }, + { + "epoch": 0.6809431271387419, + "grad_norm": 703.281982421875, + "learning_rate": 2.899888134881706e-06, + "loss": 21.3625, + "step": 337090 + }, + { + "epoch": 0.6809633277714258, + "grad_norm": 383.28643798828125, + "learning_rate": 2.899571357940969e-06, + "loss": 15.0211, + "step": 337100 + }, + { + "epoch": 0.6809835284041096, + "grad_norm": 258.8240051269531, + "learning_rate": 2.8992545912374683e-06, + "loss": 22.9595, + "step": 337110 + }, + { + "epoch": 0.6810037290367934, + "grad_norm": 443.8153381347656, + "learning_rate": 2.8989378347727453e-06, + "loss": 15.601, + "step": 337120 + }, + { + "epoch": 0.6810239296694772, + "grad_norm": 243.36021423339844, + "learning_rate": 2.8986210885483436e-06, + "loss": 10.958, + "step": 337130 + }, + { + "epoch": 0.681044130302161, + "grad_norm": 337.3719177246094, + "learning_rate": 2.8983043525658065e-06, + "loss": 26.0296, + "step": 337140 + }, + { + "epoch": 0.6810643309348449, + "grad_norm": 334.7502136230469, + "learning_rate": 2.897987626826682e-06, + "loss": 17.04, + "step": 337150 + }, + { + "epoch": 0.6810845315675287, + "grad_norm": 123.75239562988281, + "learning_rate": 2.8976709113325107e-06, + "loss": 19.8106, + "step": 337160 + }, + { + "epoch": 0.6811047322002125, + "grad_norm": 188.6690673828125, + "learning_rate": 2.8973542060848347e-06, + "loss": 11.9762, + "step": 337170 + }, + { + "epoch": 0.6811249328328963, + "grad_norm": 618.4735107421875, + "learning_rate": 2.897037511085201e-06, + "loss": 24.9842, + "step": 337180 + }, + { + "epoch": 0.6811451334655801, + "grad_norm": 151.5784149169922, + "learning_rate": 2.896720826335151e-06, + "loss": 14.0799, + "step": 337190 + }, + { + "epoch": 0.681165334098264, + "grad_norm": 710.8343505859375, + "learning_rate": 2.896404151836227e-06, + "loss": 23.4284, + "step": 337200 + }, + { + "epoch": 0.6811855347309478, + "grad_norm": 135.69598388671875, + "learning_rate": 2.896087487589976e-06, + "loss": 21.608, + "step": 337210 + }, + { + "epoch": 0.6812057353636316, + "grad_norm": 280.37451171875, + "learning_rate": 2.8957708335979373e-06, + "loss": 17.083, + "step": 337220 + }, + { + "epoch": 0.6812259359963154, + "grad_norm": 575.2173461914062, + "learning_rate": 2.8954541898616585e-06, + "loss": 18.1872, + "step": 337230 + }, + { + "epoch": 0.6812461366289992, + "grad_norm": 486.860107421875, + "learning_rate": 2.895137556382679e-06, + "loss": 16.7434, + "step": 337240 + }, + { + "epoch": 0.6812663372616831, + "grad_norm": 495.7618103027344, + "learning_rate": 2.8948209331625454e-06, + "loss": 14.0619, + "step": 337250 + }, + { + "epoch": 0.6812865378943669, + "grad_norm": 402.1931457519531, + "learning_rate": 2.8945043202027987e-06, + "loss": 25.143, + "step": 337260 + }, + { + "epoch": 0.6813067385270507, + "grad_norm": 254.796875, + "learning_rate": 2.8941877175049815e-06, + "loss": 20.1072, + "step": 337270 + }, + { + "epoch": 0.6813269391597345, + "grad_norm": 354.0314025878906, + "learning_rate": 2.8938711250706397e-06, + "loss": 14.5098, + "step": 337280 + }, + { + "epoch": 0.6813471397924183, + "grad_norm": 353.9878234863281, + "learning_rate": 2.8935545429013123e-06, + "loss": 27.2135, + "step": 337290 + }, + { + "epoch": 0.6813673404251022, + "grad_norm": 344.0814208984375, + "learning_rate": 2.893237970998547e-06, + "loss": 11.1809, + "step": 337300 + }, + { + "epoch": 0.681387541057786, + "grad_norm": 241.19207763671875, + "learning_rate": 2.892921409363884e-06, + "loss": 23.2927, + "step": 337310 + }, + { + "epoch": 0.6814077416904697, + "grad_norm": 337.1117248535156, + "learning_rate": 2.8926048579988647e-06, + "loss": 18.7983, + "step": 337320 + }, + { + "epoch": 0.6814279423231535, + "grad_norm": 120.28765869140625, + "learning_rate": 2.8922883169050354e-06, + "loss": 15.756, + "step": 337330 + }, + { + "epoch": 0.6814481429558373, + "grad_norm": 196.0370635986328, + "learning_rate": 2.891971786083938e-06, + "loss": 19.3709, + "step": 337340 + }, + { + "epoch": 0.6814683435885212, + "grad_norm": 555.6265869140625, + "learning_rate": 2.8916552655371117e-06, + "loss": 25.1948, + "step": 337350 + }, + { + "epoch": 0.681488544221205, + "grad_norm": 597.2166748046875, + "learning_rate": 2.891338755266102e-06, + "loss": 19.4583, + "step": 337360 + }, + { + "epoch": 0.6815087448538888, + "grad_norm": 389.1532897949219, + "learning_rate": 2.8910222552724552e-06, + "loss": 16.0208, + "step": 337370 + }, + { + "epoch": 0.6815289454865726, + "grad_norm": 138.45277404785156, + "learning_rate": 2.8907057655577066e-06, + "loss": 9.7212, + "step": 337380 + }, + { + "epoch": 0.6815491461192564, + "grad_norm": 447.41461181640625, + "learning_rate": 2.8903892861234023e-06, + "loss": 16.6015, + "step": 337390 + }, + { + "epoch": 0.6815693467519403, + "grad_norm": 268.70367431640625, + "learning_rate": 2.8900728169710866e-06, + "loss": 7.0208, + "step": 337400 + }, + { + "epoch": 0.6815895473846241, + "grad_norm": 3082.040283203125, + "learning_rate": 2.8897563581023e-06, + "loss": 50.2535, + "step": 337410 + }, + { + "epoch": 0.6816097480173079, + "grad_norm": 273.8852844238281, + "learning_rate": 2.8894399095185833e-06, + "loss": 19.967, + "step": 337420 + }, + { + "epoch": 0.6816299486499917, + "grad_norm": 491.9859619140625, + "learning_rate": 2.8891234712214798e-06, + "loss": 40.1611, + "step": 337430 + }, + { + "epoch": 0.6816501492826755, + "grad_norm": 494.9411315917969, + "learning_rate": 2.888807043212537e-06, + "loss": 17.7439, + "step": 337440 + }, + { + "epoch": 0.6816703499153594, + "grad_norm": 145.7028350830078, + "learning_rate": 2.888490625493289e-06, + "loss": 10.0653, + "step": 337450 + }, + { + "epoch": 0.6816905505480432, + "grad_norm": 113.2907943725586, + "learning_rate": 2.8881742180652813e-06, + "loss": 13.1933, + "step": 337460 + }, + { + "epoch": 0.681710751180727, + "grad_norm": 562.1471557617188, + "learning_rate": 2.8878578209300576e-06, + "loss": 20.0976, + "step": 337470 + }, + { + "epoch": 0.6817309518134108, + "grad_norm": 302.1080017089844, + "learning_rate": 2.8875414340891596e-06, + "loss": 17.5522, + "step": 337480 + }, + { + "epoch": 0.6817511524460946, + "grad_norm": 157.92369079589844, + "learning_rate": 2.887225057544126e-06, + "loss": 21.4645, + "step": 337490 + }, + { + "epoch": 0.6817713530787785, + "grad_norm": 583.1824340820312, + "learning_rate": 2.886908691296504e-06, + "loss": 9.4382, + "step": 337500 + }, + { + "epoch": 0.6817915537114623, + "grad_norm": 252.84439086914062, + "learning_rate": 2.886592335347832e-06, + "loss": 11.8952, + "step": 337510 + }, + { + "epoch": 0.6818117543441461, + "grad_norm": 287.48907470703125, + "learning_rate": 2.8862759896996507e-06, + "loss": 22.0179, + "step": 337520 + }, + { + "epoch": 0.6818319549768299, + "grad_norm": 356.02166748046875, + "learning_rate": 2.885959654353504e-06, + "loss": 12.4505, + "step": 337530 + }, + { + "epoch": 0.6818521556095137, + "grad_norm": 267.37646484375, + "learning_rate": 2.8856433293109355e-06, + "loss": 21.0376, + "step": 337540 + }, + { + "epoch": 0.6818723562421976, + "grad_norm": 398.0299987792969, + "learning_rate": 2.8853270145734846e-06, + "loss": 20.8949, + "step": 337550 + }, + { + "epoch": 0.6818925568748814, + "grad_norm": 0.0, + "learning_rate": 2.8850107101426916e-06, + "loss": 16.1071, + "step": 337560 + }, + { + "epoch": 0.6819127575075651, + "grad_norm": 374.1020202636719, + "learning_rate": 2.884694416020102e-06, + "loss": 23.6504, + "step": 337570 + }, + { + "epoch": 0.6819329581402489, + "grad_norm": 486.46051025390625, + "learning_rate": 2.884378132207255e-06, + "loss": 16.6881, + "step": 337580 + }, + { + "epoch": 0.6819531587729327, + "grad_norm": 347.15631103515625, + "learning_rate": 2.884061858705691e-06, + "loss": 12.7797, + "step": 337590 + }, + { + "epoch": 0.6819733594056165, + "grad_norm": 276.30218505859375, + "learning_rate": 2.8837455955169547e-06, + "loss": 15.8591, + "step": 337600 + }, + { + "epoch": 0.6819935600383004, + "grad_norm": 32.22821044921875, + "learning_rate": 2.883429342642583e-06, + "loss": 22.1051, + "step": 337610 + }, + { + "epoch": 0.6820137606709842, + "grad_norm": 624.7095336914062, + "learning_rate": 2.8831131000841227e-06, + "loss": 19.8468, + "step": 337620 + }, + { + "epoch": 0.682033961303668, + "grad_norm": 66.20378112792969, + "learning_rate": 2.882796867843112e-06, + "loss": 19.7956, + "step": 337630 + }, + { + "epoch": 0.6820541619363518, + "grad_norm": 572.2191772460938, + "learning_rate": 2.8824806459210907e-06, + "loss": 16.1606, + "step": 337640 + }, + { + "epoch": 0.6820743625690356, + "grad_norm": 552.029296875, + "learning_rate": 2.8821644343196042e-06, + "loss": 19.6395, + "step": 337650 + }, + { + "epoch": 0.6820945632017195, + "grad_norm": 209.0013427734375, + "learning_rate": 2.881848233040191e-06, + "loss": 27.2228, + "step": 337660 + }, + { + "epoch": 0.6821147638344033, + "grad_norm": 323.4571838378906, + "learning_rate": 2.8815320420843906e-06, + "loss": 16.9253, + "step": 337670 + }, + { + "epoch": 0.6821349644670871, + "grad_norm": 315.37335205078125, + "learning_rate": 2.8812158614537465e-06, + "loss": 18.8757, + "step": 337680 + }, + { + "epoch": 0.6821551650997709, + "grad_norm": 407.3314208984375, + "learning_rate": 2.8808996911498012e-06, + "loss": 9.1621, + "step": 337690 + }, + { + "epoch": 0.6821753657324547, + "grad_norm": 413.5022277832031, + "learning_rate": 2.8805835311740933e-06, + "loss": 15.6154, + "step": 337700 + }, + { + "epoch": 0.6821955663651386, + "grad_norm": 843.661376953125, + "learning_rate": 2.8802673815281622e-06, + "loss": 11.72, + "step": 337710 + }, + { + "epoch": 0.6822157669978224, + "grad_norm": 352.2972412109375, + "learning_rate": 2.8799512422135534e-06, + "loss": 16.2007, + "step": 337720 + }, + { + "epoch": 0.6822359676305062, + "grad_norm": 353.974609375, + "learning_rate": 2.8796351132318047e-06, + "loss": 13.8269, + "step": 337730 + }, + { + "epoch": 0.68225616826319, + "grad_norm": 717.4970703125, + "learning_rate": 2.879318994584456e-06, + "loss": 10.8968, + "step": 337740 + }, + { + "epoch": 0.6822763688958738, + "grad_norm": 193.4995574951172, + "learning_rate": 2.8790028862730487e-06, + "loss": 18.4877, + "step": 337750 + }, + { + "epoch": 0.6822965695285577, + "grad_norm": 472.2587585449219, + "learning_rate": 2.8786867882991276e-06, + "loss": 17.5904, + "step": 337760 + }, + { + "epoch": 0.6823167701612415, + "grad_norm": 1694.71240234375, + "learning_rate": 2.8783707006642266e-06, + "loss": 18.691, + "step": 337770 + }, + { + "epoch": 0.6823369707939253, + "grad_norm": 531.9982299804688, + "learning_rate": 2.878054623369889e-06, + "loss": 15.8425, + "step": 337780 + }, + { + "epoch": 0.6823571714266091, + "grad_norm": 739.2018432617188, + "learning_rate": 2.8777385564176585e-06, + "loss": 10.0849, + "step": 337790 + }, + { + "epoch": 0.6823773720592929, + "grad_norm": 247.94149780273438, + "learning_rate": 2.877422499809072e-06, + "loss": 20.2294, + "step": 337800 + }, + { + "epoch": 0.6823975726919768, + "grad_norm": 390.6719665527344, + "learning_rate": 2.8771064535456696e-06, + "loss": 20.9425, + "step": 337810 + }, + { + "epoch": 0.6824177733246606, + "grad_norm": 750.8737182617188, + "learning_rate": 2.876790417628994e-06, + "loss": 21.3811, + "step": 337820 + }, + { + "epoch": 0.6824379739573443, + "grad_norm": 2.4040181636810303, + "learning_rate": 2.8764743920605855e-06, + "loss": 17.6284, + "step": 337830 + }, + { + "epoch": 0.6824581745900281, + "grad_norm": 563.5733642578125, + "learning_rate": 2.87615837684198e-06, + "loss": 16.2708, + "step": 337840 + }, + { + "epoch": 0.6824783752227119, + "grad_norm": 203.45425415039062, + "learning_rate": 2.875842371974722e-06, + "loss": 11.3526, + "step": 337850 + }, + { + "epoch": 0.6824985758553958, + "grad_norm": 465.1794128417969, + "learning_rate": 2.875526377460352e-06, + "loss": 26.2164, + "step": 337860 + }, + { + "epoch": 0.6825187764880796, + "grad_norm": 666.3756103515625, + "learning_rate": 2.8752103933004095e-06, + "loss": 25.2052, + "step": 337870 + }, + { + "epoch": 0.6825389771207634, + "grad_norm": 49.69120788574219, + "learning_rate": 2.874894419496431e-06, + "loss": 14.9313, + "step": 337880 + }, + { + "epoch": 0.6825591777534472, + "grad_norm": 733.9598999023438, + "learning_rate": 2.874578456049961e-06, + "loss": 24.4403, + "step": 337890 + }, + { + "epoch": 0.682579378386131, + "grad_norm": 180.44061279296875, + "learning_rate": 2.874262502962537e-06, + "loss": 17.7423, + "step": 337900 + }, + { + "epoch": 0.6825995790188149, + "grad_norm": 483.427001953125, + "learning_rate": 2.8739465602357014e-06, + "loss": 8.3198, + "step": 337910 + }, + { + "epoch": 0.6826197796514987, + "grad_norm": 579.8331909179688, + "learning_rate": 2.8736306278709923e-06, + "loss": 17.613, + "step": 337920 + }, + { + "epoch": 0.6826399802841825, + "grad_norm": 685.6690063476562, + "learning_rate": 2.8733147058699473e-06, + "loss": 17.5377, + "step": 337930 + }, + { + "epoch": 0.6826601809168663, + "grad_norm": 218.8058319091797, + "learning_rate": 2.872998794234111e-06, + "loss": 15.3337, + "step": 337940 + }, + { + "epoch": 0.6826803815495501, + "grad_norm": 597.5891723632812, + "learning_rate": 2.8726828929650206e-06, + "loss": 19.9905, + "step": 337950 + }, + { + "epoch": 0.682700582182234, + "grad_norm": 56.42866516113281, + "learning_rate": 2.8723670020642137e-06, + "loss": 23.3822, + "step": 337960 + }, + { + "epoch": 0.6827207828149178, + "grad_norm": 324.05364990234375, + "learning_rate": 2.8720511215332314e-06, + "loss": 14.599, + "step": 337970 + }, + { + "epoch": 0.6827409834476016, + "grad_norm": 641.1737060546875, + "learning_rate": 2.871735251373616e-06, + "loss": 18.2005, + "step": 337980 + }, + { + "epoch": 0.6827611840802854, + "grad_norm": 360.8026123046875, + "learning_rate": 2.871419391586906e-06, + "loss": 17.879, + "step": 337990 + }, + { + "epoch": 0.6827813847129692, + "grad_norm": 388.5684814453125, + "learning_rate": 2.871103542174637e-06, + "loss": 21.4161, + "step": 338000 + }, + { + "epoch": 0.682801585345653, + "grad_norm": 278.2432861328125, + "learning_rate": 2.8707877031383523e-06, + "loss": 19.7573, + "step": 338010 + }, + { + "epoch": 0.6828217859783369, + "grad_norm": 622.8558349609375, + "learning_rate": 2.870471874479591e-06, + "loss": 14.9239, + "step": 338020 + }, + { + "epoch": 0.6828419866110207, + "grad_norm": 3363.966796875, + "learning_rate": 2.870156056199889e-06, + "loss": 28.2797, + "step": 338030 + }, + { + "epoch": 0.6828621872437045, + "grad_norm": 472.21014404296875, + "learning_rate": 2.8698402483007885e-06, + "loss": 23.524, + "step": 338040 + }, + { + "epoch": 0.6828823878763883, + "grad_norm": 165.0049285888672, + "learning_rate": 2.8695244507838326e-06, + "loss": 15.7984, + "step": 338050 + }, + { + "epoch": 0.6829025885090722, + "grad_norm": 367.30523681640625, + "learning_rate": 2.869208663650551e-06, + "loss": 15.7757, + "step": 338060 + }, + { + "epoch": 0.682922789141756, + "grad_norm": 333.3113098144531, + "learning_rate": 2.868892886902489e-06, + "loss": 17.5221, + "step": 338070 + }, + { + "epoch": 0.6829429897744398, + "grad_norm": 265.7660217285156, + "learning_rate": 2.8685771205411862e-06, + "loss": 18.43, + "step": 338080 + }, + { + "epoch": 0.6829631904071235, + "grad_norm": 525.880859375, + "learning_rate": 2.86826136456818e-06, + "loss": 25.6477, + "step": 338090 + }, + { + "epoch": 0.6829833910398073, + "grad_norm": 595.0288696289062, + "learning_rate": 2.8679456189850076e-06, + "loss": 22.5259, + "step": 338100 + }, + { + "epoch": 0.6830035916724911, + "grad_norm": 438.26385498046875, + "learning_rate": 2.8676298837932116e-06, + "loss": 27.3373, + "step": 338110 + }, + { + "epoch": 0.683023792305175, + "grad_norm": 1112.549560546875, + "learning_rate": 2.8673141589943285e-06, + "loss": 17.6199, + "step": 338120 + }, + { + "epoch": 0.6830439929378588, + "grad_norm": 112.30738830566406, + "learning_rate": 2.866998444589896e-06, + "loss": 31.6832, + "step": 338130 + }, + { + "epoch": 0.6830641935705426, + "grad_norm": 30.14986228942871, + "learning_rate": 2.8666827405814535e-06, + "loss": 8.3946, + "step": 338140 + }, + { + "epoch": 0.6830843942032264, + "grad_norm": 616.5403442382812, + "learning_rate": 2.8663670469705434e-06, + "loss": 19.5467, + "step": 338150 + }, + { + "epoch": 0.6831045948359102, + "grad_norm": 219.76979064941406, + "learning_rate": 2.8660513637587016e-06, + "loss": 15.6856, + "step": 338160 + }, + { + "epoch": 0.6831247954685941, + "grad_norm": 734.9285888671875, + "learning_rate": 2.8657356909474644e-06, + "loss": 16.7782, + "step": 338170 + }, + { + "epoch": 0.6831449961012779, + "grad_norm": 312.7208251953125, + "learning_rate": 2.865420028538375e-06, + "loss": 13.9156, + "step": 338180 + }, + { + "epoch": 0.6831651967339617, + "grad_norm": 221.04898071289062, + "learning_rate": 2.865104376532969e-06, + "loss": 21.6149, + "step": 338190 + }, + { + "epoch": 0.6831853973666455, + "grad_norm": 483.8321838378906, + "learning_rate": 2.864788734932783e-06, + "loss": 15.3848, + "step": 338200 + }, + { + "epoch": 0.6832055979993293, + "grad_norm": 142.70033264160156, + "learning_rate": 2.8644731037393605e-06, + "loss": 11.8688, + "step": 338210 + }, + { + "epoch": 0.6832257986320132, + "grad_norm": 1207.791015625, + "learning_rate": 2.864157482954234e-06, + "loss": 20.3958, + "step": 338220 + }, + { + "epoch": 0.683245999264697, + "grad_norm": 204.30404663085938, + "learning_rate": 2.863841872578948e-06, + "loss": 20.5889, + "step": 338230 + }, + { + "epoch": 0.6832661998973808, + "grad_norm": 316.9858093261719, + "learning_rate": 2.863526272615037e-06, + "loss": 20.7871, + "step": 338240 + }, + { + "epoch": 0.6832864005300646, + "grad_norm": 245.21316528320312, + "learning_rate": 2.863210683064038e-06, + "loss": 11.0048, + "step": 338250 + }, + { + "epoch": 0.6833066011627484, + "grad_norm": 50.83566665649414, + "learning_rate": 2.8628951039274933e-06, + "loss": 46.2538, + "step": 338260 + }, + { + "epoch": 0.6833268017954323, + "grad_norm": 620.8017578125, + "learning_rate": 2.8625795352069385e-06, + "loss": 27.8345, + "step": 338270 + }, + { + "epoch": 0.6833470024281161, + "grad_norm": 153.79302978515625, + "learning_rate": 2.8622639769039094e-06, + "loss": 18.5572, + "step": 338280 + }, + { + "epoch": 0.6833672030607999, + "grad_norm": 352.8744812011719, + "learning_rate": 2.861948429019947e-06, + "loss": 13.159, + "step": 338290 + }, + { + "epoch": 0.6833874036934837, + "grad_norm": 20.52808952331543, + "learning_rate": 2.8616328915565907e-06, + "loss": 8.4701, + "step": 338300 + }, + { + "epoch": 0.6834076043261675, + "grad_norm": 4.267901420593262, + "learning_rate": 2.861317364515376e-06, + "loss": 9.4055, + "step": 338310 + }, + { + "epoch": 0.6834278049588514, + "grad_norm": 82.28601837158203, + "learning_rate": 2.8610018478978387e-06, + "loss": 18.105, + "step": 338320 + }, + { + "epoch": 0.6834480055915352, + "grad_norm": 597.6419067382812, + "learning_rate": 2.8606863417055216e-06, + "loss": 13.2078, + "step": 338330 + }, + { + "epoch": 0.6834682062242189, + "grad_norm": 309.8203430175781, + "learning_rate": 2.8603708459399594e-06, + "loss": 14.3366, + "step": 338340 + }, + { + "epoch": 0.6834884068569027, + "grad_norm": 11.254910469055176, + "learning_rate": 2.8600553606026883e-06, + "loss": 21.2411, + "step": 338350 + }, + { + "epoch": 0.6835086074895865, + "grad_norm": 401.820068359375, + "learning_rate": 2.8597398856952473e-06, + "loss": 11.7122, + "step": 338360 + }, + { + "epoch": 0.6835288081222703, + "grad_norm": 119.74920654296875, + "learning_rate": 2.8594244212191796e-06, + "loss": 16.6536, + "step": 338370 + }, + { + "epoch": 0.6835490087549542, + "grad_norm": 805.9678955078125, + "learning_rate": 2.859108967176013e-06, + "loss": 24.8198, + "step": 338380 + }, + { + "epoch": 0.683569209387638, + "grad_norm": 453.3701477050781, + "learning_rate": 2.8587935235672897e-06, + "loss": 6.5504, + "step": 338390 + }, + { + "epoch": 0.6835894100203218, + "grad_norm": 621.1905517578125, + "learning_rate": 2.858478090394549e-06, + "loss": 20.8854, + "step": 338400 + }, + { + "epoch": 0.6836096106530056, + "grad_norm": 121.0240249633789, + "learning_rate": 2.8581626676593264e-06, + "loss": 16.6853, + "step": 338410 + }, + { + "epoch": 0.6836298112856894, + "grad_norm": 326.71929931640625, + "learning_rate": 2.8578472553631577e-06, + "loss": 32.0411, + "step": 338420 + }, + { + "epoch": 0.6836500119183733, + "grad_norm": 388.282958984375, + "learning_rate": 2.857531853507583e-06, + "loss": 19.2871, + "step": 338430 + }, + { + "epoch": 0.6836702125510571, + "grad_norm": 32314.8671875, + "learning_rate": 2.8572164620941385e-06, + "loss": 47.4179, + "step": 338440 + }, + { + "epoch": 0.6836904131837409, + "grad_norm": 489.69525146484375, + "learning_rate": 2.856901081124359e-06, + "loss": 17.1072, + "step": 338450 + }, + { + "epoch": 0.6837106138164247, + "grad_norm": 166.05245971679688, + "learning_rate": 2.856585710599784e-06, + "loss": 20.7168, + "step": 338460 + }, + { + "epoch": 0.6837308144491085, + "grad_norm": 85.07772064208984, + "learning_rate": 2.8562703505219513e-06, + "loss": 18.3139, + "step": 338470 + }, + { + "epoch": 0.6837510150817924, + "grad_norm": 665.2428588867188, + "learning_rate": 2.855955000892398e-06, + "loss": 48.6721, + "step": 338480 + }, + { + "epoch": 0.6837712157144762, + "grad_norm": 501.0298156738281, + "learning_rate": 2.8556396617126575e-06, + "loss": 12.1529, + "step": 338490 + }, + { + "epoch": 0.68379141634716, + "grad_norm": 435.16632080078125, + "learning_rate": 2.8553243329842715e-06, + "loss": 28.8214, + "step": 338500 + }, + { + "epoch": 0.6838116169798438, + "grad_norm": 436.9760437011719, + "learning_rate": 2.855009014708774e-06, + "loss": 17.9255, + "step": 338510 + }, + { + "epoch": 0.6838318176125276, + "grad_norm": 133.7801971435547, + "learning_rate": 2.8546937068877013e-06, + "loss": 27.8965, + "step": 338520 + }, + { + "epoch": 0.6838520182452115, + "grad_norm": 261.2592468261719, + "learning_rate": 2.8543784095225923e-06, + "loss": 14.6261, + "step": 338530 + }, + { + "epoch": 0.6838722188778953, + "grad_norm": 300.118408203125, + "learning_rate": 2.8540631226149813e-06, + "loss": 24.2721, + "step": 338540 + }, + { + "epoch": 0.6838924195105791, + "grad_norm": 1.5835719108581543, + "learning_rate": 2.853747846166408e-06, + "loss": 5.3972, + "step": 338550 + }, + { + "epoch": 0.6839126201432629, + "grad_norm": 583.8345947265625, + "learning_rate": 2.853432580178408e-06, + "loss": 16.2432, + "step": 338560 + }, + { + "epoch": 0.6839328207759467, + "grad_norm": 344.1213073730469, + "learning_rate": 2.8531173246525144e-06, + "loss": 22.5829, + "step": 338570 + }, + { + "epoch": 0.6839530214086306, + "grad_norm": 1210.4813232421875, + "learning_rate": 2.8528020795902672e-06, + "loss": 13.6372, + "step": 338580 + }, + { + "epoch": 0.6839732220413144, + "grad_norm": 337.1247253417969, + "learning_rate": 2.852486844993204e-06, + "loss": 23.2272, + "step": 338590 + }, + { + "epoch": 0.6839934226739981, + "grad_norm": 511.3916015625, + "learning_rate": 2.8521716208628597e-06, + "loss": 15.6157, + "step": 338600 + }, + { + "epoch": 0.6840136233066819, + "grad_norm": 309.7074279785156, + "learning_rate": 2.851856407200768e-06, + "loss": 13.0667, + "step": 338610 + }, + { + "epoch": 0.6840338239393657, + "grad_norm": 821.8313598632812, + "learning_rate": 2.85154120400847e-06, + "loss": 21.2477, + "step": 338620 + }, + { + "epoch": 0.6840540245720496, + "grad_norm": 121.84247589111328, + "learning_rate": 2.8512260112874994e-06, + "loss": 16.0241, + "step": 338630 + }, + { + "epoch": 0.6840742252047334, + "grad_norm": 248.17120361328125, + "learning_rate": 2.8509108290393907e-06, + "loss": 21.9676, + "step": 338640 + }, + { + "epoch": 0.6840944258374172, + "grad_norm": 249.0458526611328, + "learning_rate": 2.850595657265682e-06, + "loss": 7.1093, + "step": 338650 + }, + { + "epoch": 0.684114626470101, + "grad_norm": 786.6953735351562, + "learning_rate": 2.850280495967913e-06, + "loss": 16.8887, + "step": 338660 + }, + { + "epoch": 0.6841348271027848, + "grad_norm": 357.6971130371094, + "learning_rate": 2.8499653451476126e-06, + "loss": 19.3543, + "step": 338670 + }, + { + "epoch": 0.6841550277354687, + "grad_norm": 285.8201599121094, + "learning_rate": 2.84965020480632e-06, + "loss": 17.6934, + "step": 338680 + }, + { + "epoch": 0.6841752283681525, + "grad_norm": 246.18898010253906, + "learning_rate": 2.8493350749455737e-06, + "loss": 41.294, + "step": 338690 + }, + { + "epoch": 0.6841954290008363, + "grad_norm": 84.6827392578125, + "learning_rate": 2.849019955566908e-06, + "loss": 25.6304, + "step": 338700 + }, + { + "epoch": 0.6842156296335201, + "grad_norm": 522.9943237304688, + "learning_rate": 2.848704846671856e-06, + "loss": 20.3795, + "step": 338710 + }, + { + "epoch": 0.6842358302662039, + "grad_norm": 348.3674011230469, + "learning_rate": 2.8483897482619566e-06, + "loss": 26.6569, + "step": 338720 + }, + { + "epoch": 0.6842560308988878, + "grad_norm": 92.71199035644531, + "learning_rate": 2.8480746603387453e-06, + "loss": 17.377, + "step": 338730 + }, + { + "epoch": 0.6842762315315716, + "grad_norm": 505.63134765625, + "learning_rate": 2.847759582903755e-06, + "loss": 17.9122, + "step": 338740 + }, + { + "epoch": 0.6842964321642554, + "grad_norm": 366.8626708984375, + "learning_rate": 2.8474445159585235e-06, + "loss": 20.0451, + "step": 338750 + }, + { + "epoch": 0.6843166327969392, + "grad_norm": 413.54095458984375, + "learning_rate": 2.8471294595045886e-06, + "loss": 11.1325, + "step": 338760 + }, + { + "epoch": 0.684336833429623, + "grad_norm": 396.3618469238281, + "learning_rate": 2.8468144135434827e-06, + "loss": 28.3938, + "step": 338770 + }, + { + "epoch": 0.6843570340623069, + "grad_norm": 243.66842651367188, + "learning_rate": 2.8464993780767414e-06, + "loss": 32.5644, + "step": 338780 + }, + { + "epoch": 0.6843772346949907, + "grad_norm": 327.5538635253906, + "learning_rate": 2.846184353105902e-06, + "loss": 29.411, + "step": 338790 + }, + { + "epoch": 0.6843974353276745, + "grad_norm": 263.5344543457031, + "learning_rate": 2.8458693386325e-06, + "loss": 11.6262, + "step": 338800 + }, + { + "epoch": 0.6844176359603583, + "grad_norm": 166.4638671875, + "learning_rate": 2.845554334658066e-06, + "loss": 21.8029, + "step": 338810 + }, + { + "epoch": 0.6844378365930421, + "grad_norm": 165.18563842773438, + "learning_rate": 2.8452393411841418e-06, + "loss": 36.7276, + "step": 338820 + }, + { + "epoch": 0.684458037225726, + "grad_norm": 85.10484313964844, + "learning_rate": 2.8449243582122577e-06, + "loss": 18.2525, + "step": 338830 + }, + { + "epoch": 0.6844782378584098, + "grad_norm": 367.1798400878906, + "learning_rate": 2.8446093857439527e-06, + "loss": 17.5455, + "step": 338840 + }, + { + "epoch": 0.6844984384910935, + "grad_norm": 522.3867797851562, + "learning_rate": 2.84429442378076e-06, + "loss": 16.7963, + "step": 338850 + }, + { + "epoch": 0.6845186391237773, + "grad_norm": 301.0872802734375, + "learning_rate": 2.843979472324213e-06, + "loss": 42.7276, + "step": 338860 + }, + { + "epoch": 0.6845388397564611, + "grad_norm": 269.10028076171875, + "learning_rate": 2.843664531375851e-06, + "loss": 8.065, + "step": 338870 + }, + { + "epoch": 0.684559040389145, + "grad_norm": 162.30120849609375, + "learning_rate": 2.8433496009372063e-06, + "loss": 11.763, + "step": 338880 + }, + { + "epoch": 0.6845792410218288, + "grad_norm": 270.188232421875, + "learning_rate": 2.843034681009812e-06, + "loss": 17.6547, + "step": 338890 + }, + { + "epoch": 0.6845994416545126, + "grad_norm": 168.6224365234375, + "learning_rate": 2.8427197715952047e-06, + "loss": 14.2912, + "step": 338900 + }, + { + "epoch": 0.6846196422871964, + "grad_norm": 388.95849609375, + "learning_rate": 2.8424048726949215e-06, + "loss": 15.9734, + "step": 338910 + }, + { + "epoch": 0.6846398429198802, + "grad_norm": 239.84727478027344, + "learning_rate": 2.842089984310496e-06, + "loss": 9.5158, + "step": 338920 + }, + { + "epoch": 0.684660043552564, + "grad_norm": 414.3646240234375, + "learning_rate": 2.8417751064434596e-06, + "loss": 23.4782, + "step": 338930 + }, + { + "epoch": 0.6846802441852479, + "grad_norm": 264.4274597167969, + "learning_rate": 2.8414602390953515e-06, + "loss": 13.3265, + "step": 338940 + }, + { + "epoch": 0.6847004448179317, + "grad_norm": 351.666015625, + "learning_rate": 2.841145382267705e-06, + "loss": 14.7839, + "step": 338950 + }, + { + "epoch": 0.6847206454506155, + "grad_norm": 805.705810546875, + "learning_rate": 2.840830535962052e-06, + "loss": 22.2572, + "step": 338960 + }, + { + "epoch": 0.6847408460832993, + "grad_norm": 145.11767578125, + "learning_rate": 2.840515700179929e-06, + "loss": 16.8044, + "step": 338970 + }, + { + "epoch": 0.6847610467159831, + "grad_norm": 343.5023498535156, + "learning_rate": 2.8402008749228737e-06, + "loss": 42.1589, + "step": 338980 + }, + { + "epoch": 0.684781247348667, + "grad_norm": 309.49102783203125, + "learning_rate": 2.8398860601924145e-06, + "loss": 9.1604, + "step": 338990 + }, + { + "epoch": 0.6848014479813508, + "grad_norm": 277.977783203125, + "learning_rate": 2.839571255990088e-06, + "loss": 9.6512, + "step": 339000 + }, + { + "epoch": 0.6848216486140346, + "grad_norm": 319.27227783203125, + "learning_rate": 2.8392564623174314e-06, + "loss": 21.7686, + "step": 339010 + }, + { + "epoch": 0.6848418492467184, + "grad_norm": 428.2383728027344, + "learning_rate": 2.838941679175977e-06, + "loss": 27.1311, + "step": 339020 + }, + { + "epoch": 0.6848620498794022, + "grad_norm": 497.61212158203125, + "learning_rate": 2.838626906567257e-06, + "loss": 34.5446, + "step": 339030 + }, + { + "epoch": 0.6848822505120861, + "grad_norm": 288.2490234375, + "learning_rate": 2.8383121444928063e-06, + "loss": 10.8883, + "step": 339040 + }, + { + "epoch": 0.6849024511447699, + "grad_norm": 619.1780395507812, + "learning_rate": 2.837997392954165e-06, + "loss": 16.7764, + "step": 339050 + }, + { + "epoch": 0.6849226517774537, + "grad_norm": 366.1655578613281, + "learning_rate": 2.8376826519528572e-06, + "loss": 13.2817, + "step": 339060 + }, + { + "epoch": 0.6849428524101375, + "grad_norm": 352.8001708984375, + "learning_rate": 2.8373679214904225e-06, + "loss": 29.2796, + "step": 339070 + }, + { + "epoch": 0.6849630530428213, + "grad_norm": 0.0, + "learning_rate": 2.837053201568396e-06, + "loss": 14.3356, + "step": 339080 + }, + { + "epoch": 0.6849832536755052, + "grad_norm": 440.4238586425781, + "learning_rate": 2.83673849218831e-06, + "loss": 18.8858, + "step": 339090 + }, + { + "epoch": 0.685003454308189, + "grad_norm": 380.555419921875, + "learning_rate": 2.8364237933516964e-06, + "loss": 12.4292, + "step": 339100 + }, + { + "epoch": 0.6850236549408727, + "grad_norm": 613.6964111328125, + "learning_rate": 2.836109105060093e-06, + "loss": 18.6043, + "step": 339110 + }, + { + "epoch": 0.6850438555735565, + "grad_norm": 151.81381225585938, + "learning_rate": 2.8357944273150304e-06, + "loss": 13.9199, + "step": 339120 + }, + { + "epoch": 0.6850640562062403, + "grad_norm": 622.8221435546875, + "learning_rate": 2.835479760118042e-06, + "loss": 17.62, + "step": 339130 + }, + { + "epoch": 0.6850842568389242, + "grad_norm": 392.4091491699219, + "learning_rate": 2.835165103470665e-06, + "loss": 13.5514, + "step": 339140 + }, + { + "epoch": 0.685104457471608, + "grad_norm": 155.42665100097656, + "learning_rate": 2.8348504573744283e-06, + "loss": 10.0394, + "step": 339150 + }, + { + "epoch": 0.6851246581042918, + "grad_norm": 434.490478515625, + "learning_rate": 2.83453582183087e-06, + "loss": 20.22, + "step": 339160 + }, + { + "epoch": 0.6851448587369756, + "grad_norm": 56.256961822509766, + "learning_rate": 2.834221196841521e-06, + "loss": 15.547, + "step": 339170 + }, + { + "epoch": 0.6851650593696594, + "grad_norm": 348.5422668457031, + "learning_rate": 2.8339065824079137e-06, + "loss": 13.5531, + "step": 339180 + }, + { + "epoch": 0.6851852600023433, + "grad_norm": 3.973308563232422, + "learning_rate": 2.8335919785315854e-06, + "loss": 13.6842, + "step": 339190 + }, + { + "epoch": 0.6852054606350271, + "grad_norm": 196.72682189941406, + "learning_rate": 2.8332773852140644e-06, + "loss": 12.7807, + "step": 339200 + }, + { + "epoch": 0.6852256612677109, + "grad_norm": 295.85382080078125, + "learning_rate": 2.832962802456889e-06, + "loss": 12.823, + "step": 339210 + }, + { + "epoch": 0.6852458619003947, + "grad_norm": 159.17343139648438, + "learning_rate": 2.8326482302615875e-06, + "loss": 18.5173, + "step": 339220 + }, + { + "epoch": 0.6852660625330785, + "grad_norm": 367.822021484375, + "learning_rate": 2.832333668629698e-06, + "loss": 14.9841, + "step": 339230 + }, + { + "epoch": 0.6852862631657624, + "grad_norm": 262.43560791015625, + "learning_rate": 2.8320191175627517e-06, + "loss": 14.2051, + "step": 339240 + }, + { + "epoch": 0.6853064637984462, + "grad_norm": 571.4912109375, + "learning_rate": 2.8317045770622784e-06, + "loss": 24.2336, + "step": 339250 + }, + { + "epoch": 0.68532666443113, + "grad_norm": 177.7554931640625, + "learning_rate": 2.831390047129815e-06, + "loss": 26.9193, + "step": 339260 + }, + { + "epoch": 0.6853468650638138, + "grad_norm": 156.73809814453125, + "learning_rate": 2.8310755277668966e-06, + "loss": 24.9445, + "step": 339270 + }, + { + "epoch": 0.6853670656964976, + "grad_norm": 141.30067443847656, + "learning_rate": 2.8307610189750496e-06, + "loss": 19.265, + "step": 339280 + }, + { + "epoch": 0.6853872663291815, + "grad_norm": 416.14892578125, + "learning_rate": 2.8304465207558103e-06, + "loss": 19.1132, + "step": 339290 + }, + { + "epoch": 0.6854074669618653, + "grad_norm": 470.0908203125, + "learning_rate": 2.830132033110713e-06, + "loss": 22.0419, + "step": 339300 + }, + { + "epoch": 0.6854276675945491, + "grad_norm": 431.71142578125, + "learning_rate": 2.829817556041289e-06, + "loss": 20.6272, + "step": 339310 + }, + { + "epoch": 0.6854478682272329, + "grad_norm": 682.9428100585938, + "learning_rate": 2.82950308954907e-06, + "loss": 23.6525, + "step": 339320 + }, + { + "epoch": 0.6854680688599167, + "grad_norm": 514.5043334960938, + "learning_rate": 2.829188633635591e-06, + "loss": 14.5834, + "step": 339330 + }, + { + "epoch": 0.6854882694926006, + "grad_norm": 441.3464050292969, + "learning_rate": 2.828874188302383e-06, + "loss": 22.2951, + "step": 339340 + }, + { + "epoch": 0.6855084701252844, + "grad_norm": 321.1398620605469, + "learning_rate": 2.8285597535509775e-06, + "loss": 23.3013, + "step": 339350 + }, + { + "epoch": 0.6855286707579682, + "grad_norm": 122.99885559082031, + "learning_rate": 2.8282453293829083e-06, + "loss": 11.8054, + "step": 339360 + }, + { + "epoch": 0.6855488713906519, + "grad_norm": 1375.604736328125, + "learning_rate": 2.8279309157997093e-06, + "loss": 28.3806, + "step": 339370 + }, + { + "epoch": 0.6855690720233357, + "grad_norm": 684.9808349609375, + "learning_rate": 2.827616512802912e-06, + "loss": 13.7723, + "step": 339380 + }, + { + "epoch": 0.6855892726560195, + "grad_norm": 208.48329162597656, + "learning_rate": 2.8273021203940466e-06, + "loss": 22.7337, + "step": 339390 + }, + { + "epoch": 0.6856094732887034, + "grad_norm": 413.0373840332031, + "learning_rate": 2.826987738574649e-06, + "loss": 14.3434, + "step": 339400 + }, + { + "epoch": 0.6856296739213872, + "grad_norm": 276.425537109375, + "learning_rate": 2.8266733673462497e-06, + "loss": 15.7779, + "step": 339410 + }, + { + "epoch": 0.685649874554071, + "grad_norm": 8.154792785644531, + "learning_rate": 2.8263590067103785e-06, + "loss": 23.8365, + "step": 339420 + }, + { + "epoch": 0.6856700751867548, + "grad_norm": 189.44625854492188, + "learning_rate": 2.8260446566685723e-06, + "loss": 16.6812, + "step": 339430 + }, + { + "epoch": 0.6856902758194386, + "grad_norm": 614.949951171875, + "learning_rate": 2.825730317222358e-06, + "loss": 17.2176, + "step": 339440 + }, + { + "epoch": 0.6857104764521225, + "grad_norm": 486.2034606933594, + "learning_rate": 2.8254159883732735e-06, + "loss": 13.4057, + "step": 339450 + }, + { + "epoch": 0.6857306770848063, + "grad_norm": 469.6888122558594, + "learning_rate": 2.8251016701228475e-06, + "loss": 16.3905, + "step": 339460 + }, + { + "epoch": 0.6857508777174901, + "grad_norm": 247.13253784179688, + "learning_rate": 2.82478736247261e-06, + "loss": 17.5554, + "step": 339470 + }, + { + "epoch": 0.6857710783501739, + "grad_norm": 203.11962890625, + "learning_rate": 2.824473065424096e-06, + "loss": 26.9208, + "step": 339480 + }, + { + "epoch": 0.6857912789828577, + "grad_norm": 152.78378295898438, + "learning_rate": 2.824158778978838e-06, + "loss": 27.2649, + "step": 339490 + }, + { + "epoch": 0.6858114796155416, + "grad_norm": 208.7306671142578, + "learning_rate": 2.8238445031383634e-06, + "loss": 16.7673, + "step": 339500 + }, + { + "epoch": 0.6858316802482254, + "grad_norm": 234.26898193359375, + "learning_rate": 2.823530237904207e-06, + "loss": 24.3152, + "step": 339510 + }, + { + "epoch": 0.6858518808809092, + "grad_norm": 342.73193359375, + "learning_rate": 2.8232159832779018e-06, + "loss": 16.2689, + "step": 339520 + }, + { + "epoch": 0.685872081513593, + "grad_norm": 607.8458862304688, + "learning_rate": 2.8229017392609782e-06, + "loss": 18.2768, + "step": 339530 + }, + { + "epoch": 0.6858922821462768, + "grad_norm": 385.2176208496094, + "learning_rate": 2.8225875058549656e-06, + "loss": 11.7633, + "step": 339540 + }, + { + "epoch": 0.6859124827789607, + "grad_norm": 220.71499633789062, + "learning_rate": 2.8222732830613995e-06, + "loss": 12.2941, + "step": 339550 + }, + { + "epoch": 0.6859326834116445, + "grad_norm": 531.9205322265625, + "learning_rate": 2.821959070881809e-06, + "loss": 12.5715, + "step": 339560 + }, + { + "epoch": 0.6859528840443283, + "grad_norm": 166.906005859375, + "learning_rate": 2.821644869317724e-06, + "loss": 13.1727, + "step": 339570 + }, + { + "epoch": 0.6859730846770121, + "grad_norm": 926.1018676757812, + "learning_rate": 2.8213306783706774e-06, + "loss": 24.542, + "step": 339580 + }, + { + "epoch": 0.6859932853096959, + "grad_norm": 295.1950988769531, + "learning_rate": 2.821016498042205e-06, + "loss": 15.5637, + "step": 339590 + }, + { + "epoch": 0.6860134859423798, + "grad_norm": 18.469051361083984, + "learning_rate": 2.8207023283338304e-06, + "loss": 21.2554, + "step": 339600 + }, + { + "epoch": 0.6860336865750636, + "grad_norm": 210.36146545410156, + "learning_rate": 2.820388169247088e-06, + "loss": 10.2452, + "step": 339610 + }, + { + "epoch": 0.6860538872077473, + "grad_norm": 347.700927734375, + "learning_rate": 2.820074020783511e-06, + "loss": 15.1436, + "step": 339620 + }, + { + "epoch": 0.6860740878404311, + "grad_norm": 155.03411865234375, + "learning_rate": 2.8197598829446294e-06, + "loss": 12.7612, + "step": 339630 + }, + { + "epoch": 0.6860942884731149, + "grad_norm": 138.85855102539062, + "learning_rate": 2.819445755731971e-06, + "loss": 11.6971, + "step": 339640 + }, + { + "epoch": 0.6861144891057988, + "grad_norm": 423.6790466308594, + "learning_rate": 2.8191316391470703e-06, + "loss": 15.4166, + "step": 339650 + }, + { + "epoch": 0.6861346897384826, + "grad_norm": 835.9815063476562, + "learning_rate": 2.8188175331914608e-06, + "loss": 30.2213, + "step": 339660 + }, + { + "epoch": 0.6861548903711664, + "grad_norm": 168.22265625, + "learning_rate": 2.8185034378666666e-06, + "loss": 37.2645, + "step": 339670 + }, + { + "epoch": 0.6861750910038502, + "grad_norm": 154.26011657714844, + "learning_rate": 2.818189353174221e-06, + "loss": 9.9011, + "step": 339680 + }, + { + "epoch": 0.686195291636534, + "grad_norm": 899.8419799804688, + "learning_rate": 2.8178752791156593e-06, + "loss": 15.8846, + "step": 339690 + }, + { + "epoch": 0.6862154922692179, + "grad_norm": 362.2852783203125, + "learning_rate": 2.8175612156925082e-06, + "loss": 21.4574, + "step": 339700 + }, + { + "epoch": 0.6862356929019017, + "grad_norm": 443.1937255859375, + "learning_rate": 2.817247162906297e-06, + "loss": 11.8531, + "step": 339710 + }, + { + "epoch": 0.6862558935345855, + "grad_norm": 359.1485290527344, + "learning_rate": 2.8169331207585603e-06, + "loss": 18.6241, + "step": 339720 + }, + { + "epoch": 0.6862760941672693, + "grad_norm": 391.692138671875, + "learning_rate": 2.816619089250827e-06, + "loss": 21.1239, + "step": 339730 + }, + { + "epoch": 0.6862962947999531, + "grad_norm": 635.1595458984375, + "learning_rate": 2.8163050683846256e-06, + "loss": 21.8796, + "step": 339740 + }, + { + "epoch": 0.686316495432637, + "grad_norm": 232.27285766601562, + "learning_rate": 2.8159910581614904e-06, + "loss": 19.166, + "step": 339750 + }, + { + "epoch": 0.6863366960653208, + "grad_norm": 766.3213500976562, + "learning_rate": 2.8156770585829475e-06, + "loss": 22.994, + "step": 339760 + }, + { + "epoch": 0.6863568966980046, + "grad_norm": 437.24273681640625, + "learning_rate": 2.815363069650532e-06, + "loss": 8.2184, + "step": 339770 + }, + { + "epoch": 0.6863770973306884, + "grad_norm": 500.9024658203125, + "learning_rate": 2.8150490913657713e-06, + "loss": 24.9419, + "step": 339780 + }, + { + "epoch": 0.6863972979633722, + "grad_norm": 323.9875793457031, + "learning_rate": 2.8147351237301957e-06, + "loss": 18.5207, + "step": 339790 + }, + { + "epoch": 0.686417498596056, + "grad_norm": 620.3389892578125, + "learning_rate": 2.814421166745337e-06, + "loss": 15.5238, + "step": 339800 + }, + { + "epoch": 0.6864376992287399, + "grad_norm": 126.69654846191406, + "learning_rate": 2.814107220412723e-06, + "loss": 25.5543, + "step": 339810 + }, + { + "epoch": 0.6864578998614237, + "grad_norm": 902.9891967773438, + "learning_rate": 2.8137932847338866e-06, + "loss": 9.4116, + "step": 339820 + }, + { + "epoch": 0.6864781004941075, + "grad_norm": 408.55572509765625, + "learning_rate": 2.813479359710355e-06, + "loss": 22.7702, + "step": 339830 + }, + { + "epoch": 0.6864983011267913, + "grad_norm": 293.8913879394531, + "learning_rate": 2.813165445343662e-06, + "loss": 26.4214, + "step": 339840 + }, + { + "epoch": 0.6865185017594752, + "grad_norm": 692.0927734375, + "learning_rate": 2.8128515416353345e-06, + "loss": 23.5343, + "step": 339850 + }, + { + "epoch": 0.686538702392159, + "grad_norm": 1028.668701171875, + "learning_rate": 2.8125376485869023e-06, + "loss": 18.7257, + "step": 339860 + }, + { + "epoch": 0.6865589030248428, + "grad_norm": 216.99252319335938, + "learning_rate": 2.812223766199898e-06, + "loss": 18.569, + "step": 339870 + }, + { + "epoch": 0.6865791036575265, + "grad_norm": 318.1048889160156, + "learning_rate": 2.8119098944758494e-06, + "loss": 11.5931, + "step": 339880 + }, + { + "epoch": 0.6865993042902103, + "grad_norm": 607.7545776367188, + "learning_rate": 2.811596033416285e-06, + "loss": 15.6965, + "step": 339890 + }, + { + "epoch": 0.6866195049228941, + "grad_norm": 173.1877899169922, + "learning_rate": 2.811282183022736e-06, + "loss": 18.8509, + "step": 339900 + }, + { + "epoch": 0.686639705555578, + "grad_norm": 424.3025817871094, + "learning_rate": 2.8109683432967346e-06, + "loss": 36.455, + "step": 339910 + }, + { + "epoch": 0.6866599061882618, + "grad_norm": 353.19000244140625, + "learning_rate": 2.8106545142398073e-06, + "loss": 34.3502, + "step": 339920 + }, + { + "epoch": 0.6866801068209456, + "grad_norm": 115.36778259277344, + "learning_rate": 2.810340695853483e-06, + "loss": 14.4016, + "step": 339930 + }, + { + "epoch": 0.6867003074536294, + "grad_norm": 226.95738220214844, + "learning_rate": 2.810026888139294e-06, + "loss": 29.3139, + "step": 339940 + }, + { + "epoch": 0.6867205080863132, + "grad_norm": 276.2227783203125, + "learning_rate": 2.809713091098768e-06, + "loss": 29.8927, + "step": 339950 + }, + { + "epoch": 0.6867407087189971, + "grad_norm": 444.8854064941406, + "learning_rate": 2.8093993047334333e-06, + "loss": 15.124, + "step": 339960 + }, + { + "epoch": 0.6867609093516809, + "grad_norm": 85.50798034667969, + "learning_rate": 2.809085529044821e-06, + "loss": 24.085, + "step": 339970 + }, + { + "epoch": 0.6867811099843647, + "grad_norm": 788.4835815429688, + "learning_rate": 2.808771764034462e-06, + "loss": 13.8028, + "step": 339980 + }, + { + "epoch": 0.6868013106170485, + "grad_norm": 32.53948974609375, + "learning_rate": 2.8084580097038834e-06, + "loss": 18.2738, + "step": 339990 + }, + { + "epoch": 0.6868215112497323, + "grad_norm": 44.076515197753906, + "learning_rate": 2.8081442660546126e-06, + "loss": 15.7829, + "step": 340000 + }, + { + "epoch": 0.6868417118824162, + "grad_norm": 288.90655517578125, + "learning_rate": 2.8078305330881826e-06, + "loss": 24.8038, + "step": 340010 + }, + { + "epoch": 0.6868619125151, + "grad_norm": 221.90176391601562, + "learning_rate": 2.8075168108061213e-06, + "loss": 13.9836, + "step": 340020 + }, + { + "epoch": 0.6868821131477838, + "grad_norm": 377.74688720703125, + "learning_rate": 2.8072030992099552e-06, + "loss": 15.0077, + "step": 340030 + }, + { + "epoch": 0.6869023137804676, + "grad_norm": 825.9044799804688, + "learning_rate": 2.806889398301217e-06, + "loss": 29.1264, + "step": 340040 + }, + { + "epoch": 0.6869225144131514, + "grad_norm": 353.9937438964844, + "learning_rate": 2.8065757080814315e-06, + "loss": 14.6999, + "step": 340050 + }, + { + "epoch": 0.6869427150458353, + "grad_norm": 311.3169250488281, + "learning_rate": 2.8062620285521325e-06, + "loss": 16.7061, + "step": 340060 + }, + { + "epoch": 0.6869629156785191, + "grad_norm": 561.126220703125, + "learning_rate": 2.8059483597148457e-06, + "loss": 15.8589, + "step": 340070 + }, + { + "epoch": 0.6869831163112029, + "grad_norm": 450.1378173828125, + "learning_rate": 2.8056347015710987e-06, + "loss": 22.9202, + "step": 340080 + }, + { + "epoch": 0.6870033169438867, + "grad_norm": 408.6428527832031, + "learning_rate": 2.805321054122424e-06, + "loss": 24.6382, + "step": 340090 + }, + { + "epoch": 0.6870235175765705, + "grad_norm": 248.0974884033203, + "learning_rate": 2.805007417370347e-06, + "loss": 12.2664, + "step": 340100 + }, + { + "epoch": 0.6870437182092544, + "grad_norm": 262.83172607421875, + "learning_rate": 2.804693791316399e-06, + "loss": 18.6025, + "step": 340110 + }, + { + "epoch": 0.6870639188419382, + "grad_norm": 318.6762390136719, + "learning_rate": 2.8043801759621053e-06, + "loss": 17.1193, + "step": 340120 + }, + { + "epoch": 0.6870841194746219, + "grad_norm": 1668.1658935546875, + "learning_rate": 2.804066571308998e-06, + "loss": 18.7926, + "step": 340130 + }, + { + "epoch": 0.6871043201073057, + "grad_norm": 465.1991882324219, + "learning_rate": 2.8037529773586047e-06, + "loss": 18.1072, + "step": 340140 + }, + { + "epoch": 0.6871245207399895, + "grad_norm": 276.9002380371094, + "learning_rate": 2.8034393941124505e-06, + "loss": 10.82, + "step": 340150 + }, + { + "epoch": 0.6871447213726734, + "grad_norm": 17.017826080322266, + "learning_rate": 2.803125821572068e-06, + "loss": 20.1898, + "step": 340160 + }, + { + "epoch": 0.6871649220053572, + "grad_norm": 163.25289916992188, + "learning_rate": 2.802812259738984e-06, + "loss": 16.3359, + "step": 340170 + }, + { + "epoch": 0.687185122638041, + "grad_norm": 0.4149203896522522, + "learning_rate": 2.8024987086147247e-06, + "loss": 15.007, + "step": 340180 + }, + { + "epoch": 0.6872053232707248, + "grad_norm": 103.61934661865234, + "learning_rate": 2.8021851682008205e-06, + "loss": 21.0726, + "step": 340190 + }, + { + "epoch": 0.6872255239034086, + "grad_norm": 610.775146484375, + "learning_rate": 2.8018716384988034e-06, + "loss": 13.1409, + "step": 340200 + }, + { + "epoch": 0.6872457245360925, + "grad_norm": 489.849853515625, + "learning_rate": 2.8015581195101927e-06, + "loss": 25.1898, + "step": 340210 + }, + { + "epoch": 0.6872659251687763, + "grad_norm": 296.70062255859375, + "learning_rate": 2.801244611236521e-06, + "loss": 11.8347, + "step": 340220 + }, + { + "epoch": 0.6872861258014601, + "grad_norm": 115.51083374023438, + "learning_rate": 2.800931113679318e-06, + "loss": 14.6181, + "step": 340230 + }, + { + "epoch": 0.6873063264341439, + "grad_norm": 339.8750915527344, + "learning_rate": 2.8006176268401107e-06, + "loss": 14.6715, + "step": 340240 + }, + { + "epoch": 0.6873265270668277, + "grad_norm": 586.9760131835938, + "learning_rate": 2.800304150720424e-06, + "loss": 13.9651, + "step": 340250 + }, + { + "epoch": 0.6873467276995116, + "grad_norm": 333.0511779785156, + "learning_rate": 2.7999906853217885e-06, + "loss": 21.7675, + "step": 340260 + }, + { + "epoch": 0.6873669283321954, + "grad_norm": 248.5399169921875, + "learning_rate": 2.7996772306457354e-06, + "loss": 16.6357, + "step": 340270 + }, + { + "epoch": 0.6873871289648792, + "grad_norm": 473.8727111816406, + "learning_rate": 2.799363786693785e-06, + "loss": 33.6923, + "step": 340280 + }, + { + "epoch": 0.687407329597563, + "grad_norm": 327.8541259765625, + "learning_rate": 2.7990503534674684e-06, + "loss": 19.5511, + "step": 340290 + }, + { + "epoch": 0.6874275302302468, + "grad_norm": 413.4512939453125, + "learning_rate": 2.798736930968315e-06, + "loss": 19.8102, + "step": 340300 + }, + { + "epoch": 0.6874477308629307, + "grad_norm": 217.4813232421875, + "learning_rate": 2.798423519197851e-06, + "loss": 12.5471, + "step": 340310 + }, + { + "epoch": 0.6874679314956145, + "grad_norm": 8.071371078491211, + "learning_rate": 2.798110118157602e-06, + "loss": 17.3754, + "step": 340320 + }, + { + "epoch": 0.6874881321282983, + "grad_norm": 216.73049926757812, + "learning_rate": 2.797796727849099e-06, + "loss": 13.7105, + "step": 340330 + }, + { + "epoch": 0.6875083327609821, + "grad_norm": 2.946580648422241, + "learning_rate": 2.7974833482738674e-06, + "loss": 9.3621, + "step": 340340 + }, + { + "epoch": 0.6875285333936659, + "grad_norm": 351.96160888671875, + "learning_rate": 2.7971699794334332e-06, + "loss": 29.4176, + "step": 340350 + }, + { + "epoch": 0.6875487340263498, + "grad_norm": 365.6743469238281, + "learning_rate": 2.7968566213293276e-06, + "loss": 29.0831, + "step": 340360 + }, + { + "epoch": 0.6875689346590336, + "grad_norm": 264.7963562011719, + "learning_rate": 2.796543273963073e-06, + "loss": 14.8826, + "step": 340370 + }, + { + "epoch": 0.6875891352917174, + "grad_norm": 778.5166625976562, + "learning_rate": 2.796229937336202e-06, + "loss": 31.3981, + "step": 340380 + }, + { + "epoch": 0.6876093359244011, + "grad_norm": 146.21185302734375, + "learning_rate": 2.795916611450238e-06, + "loss": 13.2399, + "step": 340390 + }, + { + "epoch": 0.6876295365570849, + "grad_norm": 377.69317626953125, + "learning_rate": 2.795603296306708e-06, + "loss": 16.3829, + "step": 340400 + }, + { + "epoch": 0.6876497371897687, + "grad_norm": 90.84944915771484, + "learning_rate": 2.7952899919071417e-06, + "loss": 27.3867, + "step": 340410 + }, + { + "epoch": 0.6876699378224526, + "grad_norm": 327.8248291015625, + "learning_rate": 2.7949766982530624e-06, + "loss": 12.4236, + "step": 340420 + }, + { + "epoch": 0.6876901384551364, + "grad_norm": 388.8680114746094, + "learning_rate": 2.7946634153460016e-06, + "loss": 18.8251, + "step": 340430 + }, + { + "epoch": 0.6877103390878202, + "grad_norm": 348.58453369140625, + "learning_rate": 2.794350143187482e-06, + "loss": 26.4771, + "step": 340440 + }, + { + "epoch": 0.687730539720504, + "grad_norm": 604.9873657226562, + "learning_rate": 2.794036881779034e-06, + "loss": 21.413, + "step": 340450 + }, + { + "epoch": 0.6877507403531878, + "grad_norm": 299.6792907714844, + "learning_rate": 2.7937236311221827e-06, + "loss": 17.7782, + "step": 340460 + }, + { + "epoch": 0.6877709409858717, + "grad_norm": 643.5337524414062, + "learning_rate": 2.793410391218453e-06, + "loss": 21.1648, + "step": 340470 + }, + { + "epoch": 0.6877911416185555, + "grad_norm": 114.86630249023438, + "learning_rate": 2.7930971620693746e-06, + "loss": 13.6656, + "step": 340480 + }, + { + "epoch": 0.6878113422512393, + "grad_norm": 294.94219970703125, + "learning_rate": 2.792783943676474e-06, + "loss": 18.2662, + "step": 340490 + }, + { + "epoch": 0.6878315428839231, + "grad_norm": 344.4489440917969, + "learning_rate": 2.7924707360412743e-06, + "loss": 24.6059, + "step": 340500 + }, + { + "epoch": 0.6878517435166069, + "grad_norm": 1122.903076171875, + "learning_rate": 2.7921575391653048e-06, + "loss": 26.4641, + "step": 340510 + }, + { + "epoch": 0.6878719441492908, + "grad_norm": 7706.94482421875, + "learning_rate": 2.791844353050094e-06, + "loss": 41.3216, + "step": 340520 + }, + { + "epoch": 0.6878921447819746, + "grad_norm": 545.328369140625, + "learning_rate": 2.7915311776971655e-06, + "loss": 17.2415, + "step": 340530 + }, + { + "epoch": 0.6879123454146584, + "grad_norm": 413.46563720703125, + "learning_rate": 2.7912180131080434e-06, + "loss": 18.0958, + "step": 340540 + }, + { + "epoch": 0.6879325460473422, + "grad_norm": 567.9567260742188, + "learning_rate": 2.7909048592842602e-06, + "loss": 18.6867, + "step": 340550 + }, + { + "epoch": 0.687952746680026, + "grad_norm": 469.08050537109375, + "learning_rate": 2.7905917162273377e-06, + "loss": 12.8325, + "step": 340560 + }, + { + "epoch": 0.6879729473127099, + "grad_norm": 45.83987045288086, + "learning_rate": 2.790278583938802e-06, + "loss": 12.1036, + "step": 340570 + }, + { + "epoch": 0.6879931479453937, + "grad_norm": 58.32484817504883, + "learning_rate": 2.78996546242018e-06, + "loss": 13.5492, + "step": 340580 + }, + { + "epoch": 0.6880133485780775, + "grad_norm": 198.85121154785156, + "learning_rate": 2.7896523516730005e-06, + "loss": 10.6656, + "step": 340590 + }, + { + "epoch": 0.6880335492107613, + "grad_norm": 343.50848388671875, + "learning_rate": 2.7893392516987873e-06, + "loss": 11.052, + "step": 340600 + }, + { + "epoch": 0.6880537498434451, + "grad_norm": 551.748291015625, + "learning_rate": 2.7890261624990643e-06, + "loss": 9.662, + "step": 340610 + }, + { + "epoch": 0.688073950476129, + "grad_norm": 412.3675842285156, + "learning_rate": 2.788713084075362e-06, + "loss": 12.7007, + "step": 340620 + }, + { + "epoch": 0.6880941511088128, + "grad_norm": 630.4569091796875, + "learning_rate": 2.7884000164292034e-06, + "loss": 11.6983, + "step": 340630 + }, + { + "epoch": 0.6881143517414965, + "grad_norm": 284.4775085449219, + "learning_rate": 2.7880869595621134e-06, + "loss": 17.0826, + "step": 340640 + }, + { + "epoch": 0.6881345523741803, + "grad_norm": 269.10418701171875, + "learning_rate": 2.787773913475621e-06, + "loss": 9.4737, + "step": 340650 + }, + { + "epoch": 0.6881547530068641, + "grad_norm": 339.931640625, + "learning_rate": 2.7874608781712486e-06, + "loss": 37.7955, + "step": 340660 + }, + { + "epoch": 0.688174953639548, + "grad_norm": 777.3720092773438, + "learning_rate": 2.7871478536505253e-06, + "loss": 26.072, + "step": 340670 + }, + { + "epoch": 0.6881951542722318, + "grad_norm": 427.119384765625, + "learning_rate": 2.7868348399149747e-06, + "loss": 23.7469, + "step": 340680 + }, + { + "epoch": 0.6882153549049156, + "grad_norm": 266.82476806640625, + "learning_rate": 2.786521836966121e-06, + "loss": 19.1638, + "step": 340690 + }, + { + "epoch": 0.6882355555375994, + "grad_norm": 272.7963562011719, + "learning_rate": 2.7862088448054936e-06, + "loss": 21.1126, + "step": 340700 + }, + { + "epoch": 0.6882557561702832, + "grad_norm": 87.15125274658203, + "learning_rate": 2.7858958634346132e-06, + "loss": 17.4884, + "step": 340710 + }, + { + "epoch": 0.688275956802967, + "grad_norm": 209.98974609375, + "learning_rate": 2.78558289285501e-06, + "loss": 11.1753, + "step": 340720 + }, + { + "epoch": 0.6882961574356509, + "grad_norm": 1364.9686279296875, + "learning_rate": 2.7852699330682056e-06, + "loss": 17.0484, + "step": 340730 + }, + { + "epoch": 0.6883163580683347, + "grad_norm": 993.4461059570312, + "learning_rate": 2.7849569840757284e-06, + "loss": 44.8279, + "step": 340740 + }, + { + "epoch": 0.6883365587010185, + "grad_norm": 228.784912109375, + "learning_rate": 2.7846440458791024e-06, + "loss": 24.1108, + "step": 340750 + }, + { + "epoch": 0.6883567593337023, + "grad_norm": 422.5013122558594, + "learning_rate": 2.784331118479851e-06, + "loss": 15.2329, + "step": 340760 + }, + { + "epoch": 0.6883769599663861, + "grad_norm": 281.162109375, + "learning_rate": 2.7840182018795025e-06, + "loss": 10.155, + "step": 340770 + }, + { + "epoch": 0.68839716059907, + "grad_norm": 1008.8878784179688, + "learning_rate": 2.7837052960795807e-06, + "loss": 24.7039, + "step": 340780 + }, + { + "epoch": 0.6884173612317538, + "grad_norm": 345.2157287597656, + "learning_rate": 2.783392401081609e-06, + "loss": 20.5017, + "step": 340790 + }, + { + "epoch": 0.6884375618644376, + "grad_norm": 336.3854675292969, + "learning_rate": 2.7830795168871127e-06, + "loss": 14.436, + "step": 340800 + }, + { + "epoch": 0.6884577624971214, + "grad_norm": 198.10012817382812, + "learning_rate": 2.782766643497623e-06, + "loss": 12.2432, + "step": 340810 + }, + { + "epoch": 0.6884779631298052, + "grad_norm": 745.2477416992188, + "learning_rate": 2.7824537809146555e-06, + "loss": 20.2383, + "step": 340820 + }, + { + "epoch": 0.6884981637624891, + "grad_norm": 218.8932342529297, + "learning_rate": 2.7821409291397394e-06, + "loss": 16.3388, + "step": 340830 + }, + { + "epoch": 0.6885183643951729, + "grad_norm": 406.8684387207031, + "learning_rate": 2.7818280881744007e-06, + "loss": 22.412, + "step": 340840 + }, + { + "epoch": 0.6885385650278567, + "grad_norm": 445.8983459472656, + "learning_rate": 2.7815152580201637e-06, + "loss": 19.376, + "step": 340850 + }, + { + "epoch": 0.6885587656605405, + "grad_norm": 63.937171936035156, + "learning_rate": 2.7812024386785495e-06, + "loss": 15.8397, + "step": 340860 + }, + { + "epoch": 0.6885789662932243, + "grad_norm": 108.40321350097656, + "learning_rate": 2.7808896301510867e-06, + "loss": 15.4774, + "step": 340870 + }, + { + "epoch": 0.6885991669259082, + "grad_norm": 1771.914306640625, + "learning_rate": 2.7805768324393017e-06, + "loss": 26.9394, + "step": 340880 + }, + { + "epoch": 0.688619367558592, + "grad_norm": 24.736854553222656, + "learning_rate": 2.7802640455447123e-06, + "loss": 8.4835, + "step": 340890 + }, + { + "epoch": 0.6886395681912757, + "grad_norm": 332.3523254394531, + "learning_rate": 2.779951269468847e-06, + "loss": 13.3643, + "step": 340900 + }, + { + "epoch": 0.6886597688239595, + "grad_norm": 195.14511108398438, + "learning_rate": 2.779638504213231e-06, + "loss": 9.4787, + "step": 340910 + }, + { + "epoch": 0.6886799694566433, + "grad_norm": 4.786660194396973, + "learning_rate": 2.7793257497793892e-06, + "loss": 12.2524, + "step": 340920 + }, + { + "epoch": 0.6887001700893272, + "grad_norm": 322.34686279296875, + "learning_rate": 2.7790130061688416e-06, + "loss": 11.9802, + "step": 340930 + }, + { + "epoch": 0.688720370722011, + "grad_norm": 355.6814880371094, + "learning_rate": 2.7787002733831166e-06, + "loss": 18.3249, + "step": 340940 + }, + { + "epoch": 0.6887405713546948, + "grad_norm": 230.53578186035156, + "learning_rate": 2.7783875514237373e-06, + "loss": 15.303, + "step": 340950 + }, + { + "epoch": 0.6887607719873786, + "grad_norm": 258.0885009765625, + "learning_rate": 2.7780748402922263e-06, + "loss": 9.6966, + "step": 340960 + }, + { + "epoch": 0.6887809726200624, + "grad_norm": 459.1393737792969, + "learning_rate": 2.77776213999011e-06, + "loss": 25.5466, + "step": 340970 + }, + { + "epoch": 0.6888011732527463, + "grad_norm": 9.57157039642334, + "learning_rate": 2.777449450518911e-06, + "loss": 29.0427, + "step": 340980 + }, + { + "epoch": 0.6888213738854301, + "grad_norm": 3.6269240379333496, + "learning_rate": 2.7771367718801546e-06, + "loss": 9.6223, + "step": 340990 + }, + { + "epoch": 0.6888415745181139, + "grad_norm": 962.04931640625, + "learning_rate": 2.776824104075364e-06, + "loss": 25.6872, + "step": 341000 + }, + { + "epoch": 0.6888617751507977, + "grad_norm": 0.0, + "learning_rate": 2.776511447106062e-06, + "loss": 3.9544, + "step": 341010 + }, + { + "epoch": 0.6888819757834815, + "grad_norm": 584.7926635742188, + "learning_rate": 2.7761988009737746e-06, + "loss": 19.5068, + "step": 341020 + }, + { + "epoch": 0.6889021764161654, + "grad_norm": 245.7073211669922, + "learning_rate": 2.7758861656800227e-06, + "loss": 11.2009, + "step": 341030 + }, + { + "epoch": 0.6889223770488492, + "grad_norm": 553.265380859375, + "learning_rate": 2.775573541226334e-06, + "loss": 13.5094, + "step": 341040 + }, + { + "epoch": 0.688942577681533, + "grad_norm": 148.35116577148438, + "learning_rate": 2.7752609276142282e-06, + "loss": 17.183, + "step": 341050 + }, + { + "epoch": 0.6889627783142168, + "grad_norm": 292.98052978515625, + "learning_rate": 2.7749483248452324e-06, + "loss": 13.9737, + "step": 341060 + }, + { + "epoch": 0.6889829789469006, + "grad_norm": 276.73663330078125, + "learning_rate": 2.7746357329208693e-06, + "loss": 8.6564, + "step": 341070 + }, + { + "epoch": 0.6890031795795845, + "grad_norm": 230.5380859375, + "learning_rate": 2.7743231518426594e-06, + "loss": 24.8348, + "step": 341080 + }, + { + "epoch": 0.6890233802122683, + "grad_norm": 314.87286376953125, + "learning_rate": 2.7740105816121306e-06, + "loss": 20.6187, + "step": 341090 + }, + { + "epoch": 0.6890435808449521, + "grad_norm": 252.77398681640625, + "learning_rate": 2.7736980222308042e-06, + "loss": 14.1285, + "step": 341100 + }, + { + "epoch": 0.6890637814776359, + "grad_norm": 1018.5267333984375, + "learning_rate": 2.773385473700201e-06, + "loss": 29.8728, + "step": 341110 + }, + { + "epoch": 0.6890839821103197, + "grad_norm": 221.62344360351562, + "learning_rate": 2.7730729360218478e-06, + "loss": 19.5299, + "step": 341120 + }, + { + "epoch": 0.6891041827430036, + "grad_norm": 212.8367919921875, + "learning_rate": 2.7727604091972687e-06, + "loss": 11.6628, + "step": 341130 + }, + { + "epoch": 0.6891243833756874, + "grad_norm": 75.01510620117188, + "learning_rate": 2.772447893227985e-06, + "loss": 22.7795, + "step": 341140 + }, + { + "epoch": 0.6891445840083712, + "grad_norm": 451.935791015625, + "learning_rate": 2.772135388115519e-06, + "loss": 15.9128, + "step": 341150 + }, + { + "epoch": 0.6891647846410549, + "grad_norm": 385.2341003417969, + "learning_rate": 2.7718228938613955e-06, + "loss": 16.1654, + "step": 341160 + }, + { + "epoch": 0.6891849852737387, + "grad_norm": 183.01031494140625, + "learning_rate": 2.7715104104671377e-06, + "loss": 20.1729, + "step": 341170 + }, + { + "epoch": 0.6892051859064225, + "grad_norm": 433.5887756347656, + "learning_rate": 2.7711979379342658e-06, + "loss": 21.0788, + "step": 341180 + }, + { + "epoch": 0.6892253865391064, + "grad_norm": 715.4939575195312, + "learning_rate": 2.7708854762643055e-06, + "loss": 36.1636, + "step": 341190 + }, + { + "epoch": 0.6892455871717902, + "grad_norm": 442.5892028808594, + "learning_rate": 2.7705730254587802e-06, + "loss": 37.1358, + "step": 341200 + }, + { + "epoch": 0.689265787804474, + "grad_norm": 147.4979248046875, + "learning_rate": 2.770260585519212e-06, + "loss": 26.1768, + "step": 341210 + }, + { + "epoch": 0.6892859884371578, + "grad_norm": 188.3673858642578, + "learning_rate": 2.769948156447121e-06, + "loss": 14.2528, + "step": 341220 + }, + { + "epoch": 0.6893061890698416, + "grad_norm": 719.9442138671875, + "learning_rate": 2.7696357382440344e-06, + "loss": 17.4133, + "step": 341230 + }, + { + "epoch": 0.6893263897025255, + "grad_norm": 217.36753845214844, + "learning_rate": 2.769323330911472e-06, + "loss": 18.39, + "step": 341240 + }, + { + "epoch": 0.6893465903352093, + "grad_norm": 645.3171997070312, + "learning_rate": 2.7690109344509563e-06, + "loss": 14.6812, + "step": 341250 + }, + { + "epoch": 0.6893667909678931, + "grad_norm": 514.885009765625, + "learning_rate": 2.768698548864012e-06, + "loss": 14.1372, + "step": 341260 + }, + { + "epoch": 0.6893869916005769, + "grad_norm": 0.0, + "learning_rate": 2.768386174152159e-06, + "loss": 15.9463, + "step": 341270 + }, + { + "epoch": 0.6894071922332607, + "grad_norm": 447.8780822753906, + "learning_rate": 2.7680738103169223e-06, + "loss": 24.9264, + "step": 341280 + }, + { + "epoch": 0.6894273928659446, + "grad_norm": 221.66900634765625, + "learning_rate": 2.7677614573598232e-06, + "loss": 19.5077, + "step": 341290 + }, + { + "epoch": 0.6894475934986284, + "grad_norm": 137.110595703125, + "learning_rate": 2.7674491152823825e-06, + "loss": 16.2112, + "step": 341300 + }, + { + "epoch": 0.6894677941313122, + "grad_norm": 224.9796142578125, + "learning_rate": 2.7671367840861256e-06, + "loss": 12.5398, + "step": 341310 + }, + { + "epoch": 0.689487994763996, + "grad_norm": 287.8158264160156, + "learning_rate": 2.766824463772572e-06, + "loss": 46.7692, + "step": 341320 + }, + { + "epoch": 0.6895081953966798, + "grad_norm": 142.9286346435547, + "learning_rate": 2.766512154343246e-06, + "loss": 18.3329, + "step": 341330 + }, + { + "epoch": 0.6895283960293637, + "grad_norm": 205.15731811523438, + "learning_rate": 2.766199855799667e-06, + "loss": 11.1582, + "step": 341340 + }, + { + "epoch": 0.6895485966620475, + "grad_norm": 496.43463134765625, + "learning_rate": 2.765887568143362e-06, + "loss": 19.4094, + "step": 341350 + }, + { + "epoch": 0.6895687972947313, + "grad_norm": 566.0675048828125, + "learning_rate": 2.7655752913758494e-06, + "loss": 22.1426, + "step": 341360 + }, + { + "epoch": 0.6895889979274151, + "grad_norm": 37.94282150268555, + "learning_rate": 2.76526302549865e-06, + "loss": 20.0147, + "step": 341370 + }, + { + "epoch": 0.689609198560099, + "grad_norm": 142.8148956298828, + "learning_rate": 2.7649507705132894e-06, + "loss": 12.5573, + "step": 341380 + }, + { + "epoch": 0.6896293991927828, + "grad_norm": 449.9120788574219, + "learning_rate": 2.764638526421287e-06, + "loss": 16.3284, + "step": 341390 + }, + { + "epoch": 0.6896495998254666, + "grad_norm": 310.89874267578125, + "learning_rate": 2.7643262932241642e-06, + "loss": 17.8502, + "step": 341400 + }, + { + "epoch": 0.6896698004581503, + "grad_norm": 720.1802978515625, + "learning_rate": 2.7640140709234444e-06, + "loss": 21.1976, + "step": 341410 + }, + { + "epoch": 0.6896900010908341, + "grad_norm": 299.3088073730469, + "learning_rate": 2.763701859520652e-06, + "loss": 20.999, + "step": 341420 + }, + { + "epoch": 0.6897102017235179, + "grad_norm": 226.9088592529297, + "learning_rate": 2.7633896590173014e-06, + "loss": 12.0909, + "step": 341430 + }, + { + "epoch": 0.6897304023562018, + "grad_norm": 1.896621584892273, + "learning_rate": 2.763077469414919e-06, + "loss": 12.4256, + "step": 341440 + }, + { + "epoch": 0.6897506029888856, + "grad_norm": 113.39884948730469, + "learning_rate": 2.7627652907150272e-06, + "loss": 15.5896, + "step": 341450 + }, + { + "epoch": 0.6897708036215694, + "grad_norm": 564.82568359375, + "learning_rate": 2.7624531229191453e-06, + "loss": 15.1447, + "step": 341460 + }, + { + "epoch": 0.6897910042542532, + "grad_norm": 170.11587524414062, + "learning_rate": 2.7621409660287944e-06, + "loss": 17.5078, + "step": 341470 + }, + { + "epoch": 0.689811204886937, + "grad_norm": 656.7221069335938, + "learning_rate": 2.7618288200454966e-06, + "loss": 20.1535, + "step": 341480 + }, + { + "epoch": 0.6898314055196209, + "grad_norm": 617.5283203125, + "learning_rate": 2.7615166849707786e-06, + "loss": 17.732, + "step": 341490 + }, + { + "epoch": 0.6898516061523047, + "grad_norm": 623.5250244140625, + "learning_rate": 2.761204560806152e-06, + "loss": 28.6873, + "step": 341500 + }, + { + "epoch": 0.6898718067849885, + "grad_norm": 537.3228759765625, + "learning_rate": 2.760892447553143e-06, + "loss": 27.0241, + "step": 341510 + }, + { + "epoch": 0.6898920074176723, + "grad_norm": 262.24029541015625, + "learning_rate": 2.7605803452132753e-06, + "loss": 16.3141, + "step": 341520 + }, + { + "epoch": 0.6899122080503561, + "grad_norm": 124.42293548583984, + "learning_rate": 2.7602682537880663e-06, + "loss": 13.0789, + "step": 341530 + }, + { + "epoch": 0.68993240868304, + "grad_norm": 463.8037414550781, + "learning_rate": 2.7599561732790364e-06, + "loss": 16.2278, + "step": 341540 + }, + { + "epoch": 0.6899526093157238, + "grad_norm": 343.3017578125, + "learning_rate": 2.759644103687711e-06, + "loss": 20.3726, + "step": 341550 + }, + { + "epoch": 0.6899728099484076, + "grad_norm": 0.0, + "learning_rate": 2.759332045015608e-06, + "loss": 17.1994, + "step": 341560 + }, + { + "epoch": 0.6899930105810914, + "grad_norm": 389.5445556640625, + "learning_rate": 2.759019997264247e-06, + "loss": 17.1864, + "step": 341570 + }, + { + "epoch": 0.6900132112137752, + "grad_norm": 986.8057861328125, + "learning_rate": 2.758707960435153e-06, + "loss": 25.8143, + "step": 341580 + }, + { + "epoch": 0.690033411846459, + "grad_norm": 150.53372192382812, + "learning_rate": 2.7583959345298416e-06, + "loss": 23.3609, + "step": 341590 + }, + { + "epoch": 0.6900536124791429, + "grad_norm": 324.0406188964844, + "learning_rate": 2.7580839195498397e-06, + "loss": 18.046, + "step": 341600 + }, + { + "epoch": 0.6900738131118267, + "grad_norm": 449.5357666015625, + "learning_rate": 2.757771915496662e-06, + "loss": 28.5391, + "step": 341610 + }, + { + "epoch": 0.6900940137445105, + "grad_norm": 548.3353881835938, + "learning_rate": 2.7574599223718347e-06, + "loss": 14.7917, + "step": 341620 + }, + { + "epoch": 0.6901142143771943, + "grad_norm": 565.8510131835938, + "learning_rate": 2.7571479401768754e-06, + "loss": 27.5098, + "step": 341630 + }, + { + "epoch": 0.6901344150098782, + "grad_norm": 177.2288055419922, + "learning_rate": 2.7568359689133027e-06, + "loss": 16.4068, + "step": 341640 + }, + { + "epoch": 0.690154615642562, + "grad_norm": 750.8839721679688, + "learning_rate": 2.7565240085826423e-06, + "loss": 18.1508, + "step": 341650 + }, + { + "epoch": 0.6901748162752458, + "grad_norm": 427.1605224609375, + "learning_rate": 2.7562120591864093e-06, + "loss": 14.9471, + "step": 341660 + }, + { + "epoch": 0.6901950169079295, + "grad_norm": 225.49070739746094, + "learning_rate": 2.755900120726128e-06, + "loss": 12.9618, + "step": 341670 + }, + { + "epoch": 0.6902152175406133, + "grad_norm": 139.39871215820312, + "learning_rate": 2.7555881932033186e-06, + "loss": 19.017, + "step": 341680 + }, + { + "epoch": 0.6902354181732971, + "grad_norm": 244.59457397460938, + "learning_rate": 2.7552762766194975e-06, + "loss": 6.6486, + "step": 341690 + }, + { + "epoch": 0.690255618805981, + "grad_norm": 188.11622619628906, + "learning_rate": 2.75496437097619e-06, + "loss": 16.6627, + "step": 341700 + }, + { + "epoch": 0.6902758194386648, + "grad_norm": 298.0274658203125, + "learning_rate": 2.7546524762749126e-06, + "loss": 23.5935, + "step": 341710 + }, + { + "epoch": 0.6902960200713486, + "grad_norm": 817.8543090820312, + "learning_rate": 2.7543405925171855e-06, + "loss": 24.2122, + "step": 341720 + }, + { + "epoch": 0.6903162207040324, + "grad_norm": 497.2781677246094, + "learning_rate": 2.75402871970453e-06, + "loss": 22.2856, + "step": 341730 + }, + { + "epoch": 0.6903364213367162, + "grad_norm": 191.90689086914062, + "learning_rate": 2.7537168578384703e-06, + "loss": 18.6488, + "step": 341740 + }, + { + "epoch": 0.6903566219694001, + "grad_norm": 8.177273750305176, + "learning_rate": 2.753405006920518e-06, + "loss": 26.7123, + "step": 341750 + }, + { + "epoch": 0.6903768226020839, + "grad_norm": 293.4024353027344, + "learning_rate": 2.7530931669521975e-06, + "loss": 25.3256, + "step": 341760 + }, + { + "epoch": 0.6903970232347677, + "grad_norm": 153.3657989501953, + "learning_rate": 2.75278133793503e-06, + "loss": 13.5059, + "step": 341770 + }, + { + "epoch": 0.6904172238674515, + "grad_norm": 767.7716674804688, + "learning_rate": 2.752469519870534e-06, + "loss": 28.2242, + "step": 341780 + }, + { + "epoch": 0.6904374245001353, + "grad_norm": 385.3144836425781, + "learning_rate": 2.752157712760226e-06, + "loss": 8.7122, + "step": 341790 + }, + { + "epoch": 0.6904576251328192, + "grad_norm": 358.40167236328125, + "learning_rate": 2.75184591660563e-06, + "loss": 16.5276, + "step": 341800 + }, + { + "epoch": 0.690477825765503, + "grad_norm": 265.4300231933594, + "learning_rate": 2.7515341314082657e-06, + "loss": 23.2288, + "step": 341810 + }, + { + "epoch": 0.6904980263981868, + "grad_norm": 532.1017456054688, + "learning_rate": 2.7512223571696515e-06, + "loss": 21.0139, + "step": 341820 + }, + { + "epoch": 0.6905182270308706, + "grad_norm": 345.5724792480469, + "learning_rate": 2.750910593891305e-06, + "loss": 19.7294, + "step": 341830 + }, + { + "epoch": 0.6905384276635544, + "grad_norm": 136.15821838378906, + "learning_rate": 2.7505988415747486e-06, + "loss": 12.9156, + "step": 341840 + }, + { + "epoch": 0.6905586282962383, + "grad_norm": 995.5159912109375, + "learning_rate": 2.7502871002215016e-06, + "loss": 32.061, + "step": 341850 + }, + { + "epoch": 0.6905788289289221, + "grad_norm": 1319.545654296875, + "learning_rate": 2.74997536983308e-06, + "loss": 33.5073, + "step": 341860 + }, + { + "epoch": 0.6905990295616059, + "grad_norm": 20.535587310791016, + "learning_rate": 2.7496636504110077e-06, + "loss": 19.944, + "step": 341870 + }, + { + "epoch": 0.6906192301942897, + "grad_norm": 411.4344787597656, + "learning_rate": 2.7493519419567995e-06, + "loss": 14.5516, + "step": 341880 + }, + { + "epoch": 0.6906394308269735, + "grad_norm": 391.94793701171875, + "learning_rate": 2.749040244471979e-06, + "loss": 27.9656, + "step": 341890 + }, + { + "epoch": 0.6906596314596574, + "grad_norm": 394.89111328125, + "learning_rate": 2.7487285579580635e-06, + "loss": 28.7569, + "step": 341900 + }, + { + "epoch": 0.6906798320923412, + "grad_norm": 564.4580688476562, + "learning_rate": 2.7484168824165702e-06, + "loss": 28.1125, + "step": 341910 + }, + { + "epoch": 0.6907000327250249, + "grad_norm": 390.86895751953125, + "learning_rate": 2.748105217849022e-06, + "loss": 21.425, + "step": 341920 + }, + { + "epoch": 0.6907202333577087, + "grad_norm": 591.1107788085938, + "learning_rate": 2.747793564256933e-06, + "loss": 13.648, + "step": 341930 + }, + { + "epoch": 0.6907404339903925, + "grad_norm": 362.59063720703125, + "learning_rate": 2.747481921641828e-06, + "loss": 14.0846, + "step": 341940 + }, + { + "epoch": 0.6907606346230764, + "grad_norm": 149.49891662597656, + "learning_rate": 2.7471702900052204e-06, + "loss": 10.2265, + "step": 341950 + }, + { + "epoch": 0.6907808352557602, + "grad_norm": 184.87611389160156, + "learning_rate": 2.746858669348634e-06, + "loss": 13.4705, + "step": 341960 + }, + { + "epoch": 0.690801035888444, + "grad_norm": 118.86833953857422, + "learning_rate": 2.7465470596735843e-06, + "loss": 18.1832, + "step": 341970 + }, + { + "epoch": 0.6908212365211278, + "grad_norm": 5.063994884490967, + "learning_rate": 2.74623546098159e-06, + "loss": 14.1036, + "step": 341980 + }, + { + "epoch": 0.6908414371538116, + "grad_norm": 283.65765380859375, + "learning_rate": 2.745923873274172e-06, + "loss": 24.1931, + "step": 341990 + }, + { + "epoch": 0.6908616377864955, + "grad_norm": 286.74346923828125, + "learning_rate": 2.7456122965528475e-06, + "loss": 13.6474, + "step": 342000 + }, + { + "epoch": 0.6908818384191793, + "grad_norm": 207.9541778564453, + "learning_rate": 2.745300730819134e-06, + "loss": 18.4033, + "step": 342010 + }, + { + "epoch": 0.6909020390518631, + "grad_norm": 407.3016357421875, + "learning_rate": 2.7449891760745504e-06, + "loss": 20.6398, + "step": 342020 + }, + { + "epoch": 0.6909222396845469, + "grad_norm": 126.01081085205078, + "learning_rate": 2.744677632320621e-06, + "loss": 12.2805, + "step": 342030 + }, + { + "epoch": 0.6909424403172307, + "grad_norm": 422.3426208496094, + "learning_rate": 2.744366099558855e-06, + "loss": 22.3791, + "step": 342040 + }, + { + "epoch": 0.6909626409499146, + "grad_norm": 180.5189971923828, + "learning_rate": 2.7440545777907747e-06, + "loss": 22.0096, + "step": 342050 + }, + { + "epoch": 0.6909828415825984, + "grad_norm": 1454.185302734375, + "learning_rate": 2.743743067017901e-06, + "loss": 35.3015, + "step": 342060 + }, + { + "epoch": 0.6910030422152822, + "grad_norm": 355.3629455566406, + "learning_rate": 2.7434315672417493e-06, + "loss": 16.1543, + "step": 342070 + }, + { + "epoch": 0.691023242847966, + "grad_norm": 459.5791015625, + "learning_rate": 2.743120078463837e-06, + "loss": 14.744, + "step": 342080 + }, + { + "epoch": 0.6910434434806498, + "grad_norm": 371.49664306640625, + "learning_rate": 2.7428086006856843e-06, + "loss": 18.5724, + "step": 342090 + }, + { + "epoch": 0.6910636441133337, + "grad_norm": 1652.5516357421875, + "learning_rate": 2.742497133908812e-06, + "loss": 28.9154, + "step": 342100 + }, + { + "epoch": 0.6910838447460175, + "grad_norm": 390.2788391113281, + "learning_rate": 2.7421856781347313e-06, + "loss": 16.0531, + "step": 342110 + }, + { + "epoch": 0.6911040453787013, + "grad_norm": 583.36767578125, + "learning_rate": 2.7418742333649628e-06, + "loss": 11.0729, + "step": 342120 + }, + { + "epoch": 0.6911242460113851, + "grad_norm": 72.25563049316406, + "learning_rate": 2.7415627996010287e-06, + "loss": 18.3156, + "step": 342130 + }, + { + "epoch": 0.6911444466440689, + "grad_norm": 434.4340515136719, + "learning_rate": 2.741251376844443e-06, + "loss": 31.8693, + "step": 342140 + }, + { + "epoch": 0.6911646472767528, + "grad_norm": 231.30372619628906, + "learning_rate": 2.7409399650967217e-06, + "loss": 14.0856, + "step": 342150 + }, + { + "epoch": 0.6911848479094366, + "grad_norm": 382.9341125488281, + "learning_rate": 2.7406285643593875e-06, + "loss": 22.5025, + "step": 342160 + }, + { + "epoch": 0.6912050485421204, + "grad_norm": 463.0118103027344, + "learning_rate": 2.740317174633955e-06, + "loss": 11.0961, + "step": 342170 + }, + { + "epoch": 0.6912252491748041, + "grad_norm": 394.51226806640625, + "learning_rate": 2.7400057959219416e-06, + "loss": 16.2463, + "step": 342180 + }, + { + "epoch": 0.6912454498074879, + "grad_norm": 669.329833984375, + "learning_rate": 2.7396944282248672e-06, + "loss": 15.9909, + "step": 342190 + }, + { + "epoch": 0.6912656504401717, + "grad_norm": 488.08099365234375, + "learning_rate": 2.739383071544246e-06, + "loss": 16.1023, + "step": 342200 + }, + { + "epoch": 0.6912858510728556, + "grad_norm": 474.5091552734375, + "learning_rate": 2.7390717258816003e-06, + "loss": 32.0847, + "step": 342210 + }, + { + "epoch": 0.6913060517055394, + "grad_norm": 410.9049987792969, + "learning_rate": 2.738760391238442e-06, + "loss": 19.5689, + "step": 342220 + }, + { + "epoch": 0.6913262523382232, + "grad_norm": 40.76900863647461, + "learning_rate": 2.7384490676162932e-06, + "loss": 16.7012, + "step": 342230 + }, + { + "epoch": 0.691346452970907, + "grad_norm": 129.6337432861328, + "learning_rate": 2.73813775501667e-06, + "loss": 12.0863, + "step": 342240 + }, + { + "epoch": 0.6913666536035908, + "grad_norm": 58.73592758178711, + "learning_rate": 2.7378264534410865e-06, + "loss": 18.7431, + "step": 342250 + }, + { + "epoch": 0.6913868542362747, + "grad_norm": 42.233219146728516, + "learning_rate": 2.7375151628910645e-06, + "loss": 24.7578, + "step": 342260 + }, + { + "epoch": 0.6914070548689585, + "grad_norm": 504.1520080566406, + "learning_rate": 2.7372038833681176e-06, + "loss": 12.358, + "step": 342270 + }, + { + "epoch": 0.6914272555016423, + "grad_norm": 226.17491149902344, + "learning_rate": 2.7368926148737663e-06, + "loss": 17.2239, + "step": 342280 + }, + { + "epoch": 0.6914474561343261, + "grad_norm": 329.98089599609375, + "learning_rate": 2.736581357409526e-06, + "loss": 26.5297, + "step": 342290 + }, + { + "epoch": 0.6914676567670099, + "grad_norm": 76.80755615234375, + "learning_rate": 2.736270110976912e-06, + "loss": 9.6641, + "step": 342300 + }, + { + "epoch": 0.6914878573996938, + "grad_norm": 203.5906219482422, + "learning_rate": 2.7359588755774437e-06, + "loss": 23.3691, + "step": 342310 + }, + { + "epoch": 0.6915080580323776, + "grad_norm": 378.94305419921875, + "learning_rate": 2.7356476512126386e-06, + "loss": 20.2183, + "step": 342320 + }, + { + "epoch": 0.6915282586650614, + "grad_norm": 243.5066375732422, + "learning_rate": 2.7353364378840096e-06, + "loss": 13.5426, + "step": 342330 + }, + { + "epoch": 0.6915484592977452, + "grad_norm": 462.4680480957031, + "learning_rate": 2.735025235593076e-06, + "loss": 21.7532, + "step": 342340 + }, + { + "epoch": 0.691568659930429, + "grad_norm": 440.2567138671875, + "learning_rate": 2.734714044341359e-06, + "loss": 26.9437, + "step": 342350 + }, + { + "epoch": 0.6915888605631129, + "grad_norm": 340.2488098144531, + "learning_rate": 2.7344028641303667e-06, + "loss": 10.5369, + "step": 342360 + }, + { + "epoch": 0.6916090611957967, + "grad_norm": 292.6058349609375, + "learning_rate": 2.7340916949616204e-06, + "loss": 50.0589, + "step": 342370 + }, + { + "epoch": 0.6916292618284805, + "grad_norm": 706.38330078125, + "learning_rate": 2.733780536836638e-06, + "loss": 19.9434, + "step": 342380 + }, + { + "epoch": 0.6916494624611643, + "grad_norm": 332.52581787109375, + "learning_rate": 2.733469389756934e-06, + "loss": 16.805, + "step": 342390 + }, + { + "epoch": 0.6916696630938481, + "grad_norm": 335.47833251953125, + "learning_rate": 2.7331582537240243e-06, + "loss": 23.3698, + "step": 342400 + }, + { + "epoch": 0.691689863726532, + "grad_norm": 650.4447021484375, + "learning_rate": 2.7328471287394265e-06, + "loss": 23.3412, + "step": 342410 + }, + { + "epoch": 0.6917100643592158, + "grad_norm": 625.17431640625, + "learning_rate": 2.7325360148046598e-06, + "loss": 25.9586, + "step": 342420 + }, + { + "epoch": 0.6917302649918996, + "grad_norm": 406.36944580078125, + "learning_rate": 2.732224911921234e-06, + "loss": 8.3115, + "step": 342430 + }, + { + "epoch": 0.6917504656245833, + "grad_norm": 159.1592254638672, + "learning_rate": 2.731913820090669e-06, + "loss": 23.8566, + "step": 342440 + }, + { + "epoch": 0.6917706662572671, + "grad_norm": 349.4911804199219, + "learning_rate": 2.7316027393144827e-06, + "loss": 29.5121, + "step": 342450 + }, + { + "epoch": 0.691790866889951, + "grad_norm": 113.1489028930664, + "learning_rate": 2.73129166959419e-06, + "loss": 10.7803, + "step": 342460 + }, + { + "epoch": 0.6918110675226348, + "grad_norm": 626.6707763671875, + "learning_rate": 2.730980610931304e-06, + "loss": 14.4416, + "step": 342470 + }, + { + "epoch": 0.6918312681553186, + "grad_norm": 468.79327392578125, + "learning_rate": 2.7306695633273454e-06, + "loss": 8.2098, + "step": 342480 + }, + { + "epoch": 0.6918514687880024, + "grad_norm": 246.52345275878906, + "learning_rate": 2.7303585267838263e-06, + "loss": 17.3883, + "step": 342490 + }, + { + "epoch": 0.6918716694206862, + "grad_norm": 239.7262420654297, + "learning_rate": 2.7300475013022666e-06, + "loss": 19.8601, + "step": 342500 + }, + { + "epoch": 0.69189187005337, + "grad_norm": 391.1923522949219, + "learning_rate": 2.7297364868841803e-06, + "loss": 19.1219, + "step": 342510 + }, + { + "epoch": 0.6919120706860539, + "grad_norm": 628.7379760742188, + "learning_rate": 2.72942548353108e-06, + "loss": 15.5385, + "step": 342520 + }, + { + "epoch": 0.6919322713187377, + "grad_norm": 211.38189697265625, + "learning_rate": 2.729114491244487e-06, + "loss": 14.2025, + "step": 342530 + }, + { + "epoch": 0.6919524719514215, + "grad_norm": 499.23553466796875, + "learning_rate": 2.728803510025913e-06, + "loss": 16.8967, + "step": 342540 + }, + { + "epoch": 0.6919726725841053, + "grad_norm": 363.2895202636719, + "learning_rate": 2.728492539876877e-06, + "loss": 13.3384, + "step": 342550 + }, + { + "epoch": 0.6919928732167892, + "grad_norm": 587.162841796875, + "learning_rate": 2.728181580798891e-06, + "loss": 11.0642, + "step": 342560 + }, + { + "epoch": 0.692013073849473, + "grad_norm": 361.87200927734375, + "learning_rate": 2.727870632793474e-06, + "loss": 12.462, + "step": 342570 + }, + { + "epoch": 0.6920332744821568, + "grad_norm": 535.1802368164062, + "learning_rate": 2.7275596958621397e-06, + "loss": 25.5524, + "step": 342580 + }, + { + "epoch": 0.6920534751148406, + "grad_norm": 839.19482421875, + "learning_rate": 2.7272487700064027e-06, + "loss": 23.5786, + "step": 342590 + }, + { + "epoch": 0.6920736757475244, + "grad_norm": 384.6982727050781, + "learning_rate": 2.726937855227781e-06, + "loss": 34.0722, + "step": 342600 + }, + { + "epoch": 0.6920938763802083, + "grad_norm": 692.6864624023438, + "learning_rate": 2.7266269515277887e-06, + "loss": 13.7394, + "step": 342610 + }, + { + "epoch": 0.6921140770128921, + "grad_norm": 257.4065856933594, + "learning_rate": 2.7263160589079385e-06, + "loss": 34.1307, + "step": 342620 + }, + { + "epoch": 0.6921342776455759, + "grad_norm": 430.5124816894531, + "learning_rate": 2.726005177369749e-06, + "loss": 20.9934, + "step": 342630 + }, + { + "epoch": 0.6921544782782597, + "grad_norm": 414.63714599609375, + "learning_rate": 2.7256943069147375e-06, + "loss": 3.8553, + "step": 342640 + }, + { + "epoch": 0.6921746789109435, + "grad_norm": 603.077880859375, + "learning_rate": 2.7253834475444126e-06, + "loss": 19.9507, + "step": 342650 + }, + { + "epoch": 0.6921948795436274, + "grad_norm": 500.0039978027344, + "learning_rate": 2.7250725992602926e-06, + "loss": 35.457, + "step": 342660 + }, + { + "epoch": 0.6922150801763112, + "grad_norm": 536.0646362304688, + "learning_rate": 2.724761762063895e-06, + "loss": 14.2985, + "step": 342670 + }, + { + "epoch": 0.692235280808995, + "grad_norm": 561.66748046875, + "learning_rate": 2.724450935956733e-06, + "loss": 17.293, + "step": 342680 + }, + { + "epoch": 0.6922554814416787, + "grad_norm": 486.9822998046875, + "learning_rate": 2.7241401209403185e-06, + "loss": 20.2485, + "step": 342690 + }, + { + "epoch": 0.6922756820743625, + "grad_norm": 383.0431213378906, + "learning_rate": 2.723829317016169e-06, + "loss": 12.8665, + "step": 342700 + }, + { + "epoch": 0.6922958827070463, + "grad_norm": 338.18701171875, + "learning_rate": 2.723518524185804e-06, + "loss": 27.404, + "step": 342710 + }, + { + "epoch": 0.6923160833397302, + "grad_norm": 38.59807205200195, + "learning_rate": 2.723207742450729e-06, + "loss": 7.1109, + "step": 342720 + }, + { + "epoch": 0.692336283972414, + "grad_norm": 194.36668395996094, + "learning_rate": 2.7228969718124638e-06, + "loss": 23.9927, + "step": 342730 + }, + { + "epoch": 0.6923564846050978, + "grad_norm": 262.5284423828125, + "learning_rate": 2.7225862122725243e-06, + "loss": 24.375, + "step": 342740 + }, + { + "epoch": 0.6923766852377816, + "grad_norm": 473.5325012207031, + "learning_rate": 2.7222754638324235e-06, + "loss": 19.0865, + "step": 342750 + }, + { + "epoch": 0.6923968858704654, + "grad_norm": 458.2725524902344, + "learning_rate": 2.7219647264936733e-06, + "loss": 28.6746, + "step": 342760 + }, + { + "epoch": 0.6924170865031493, + "grad_norm": 5.04648494720459, + "learning_rate": 2.7216540002577933e-06, + "loss": 20.2492, + "step": 342770 + }, + { + "epoch": 0.6924372871358331, + "grad_norm": 5.904820442199707, + "learning_rate": 2.7213432851262955e-06, + "loss": 10.9156, + "step": 342780 + }, + { + "epoch": 0.6924574877685169, + "grad_norm": 299.0598449707031, + "learning_rate": 2.721032581100692e-06, + "loss": 18.7586, + "step": 342790 + }, + { + "epoch": 0.6924776884012007, + "grad_norm": 481.9532165527344, + "learning_rate": 2.7207218881825016e-06, + "loss": 20.6702, + "step": 342800 + }, + { + "epoch": 0.6924978890338845, + "grad_norm": 837.5463256835938, + "learning_rate": 2.7204112063732337e-06, + "loss": 12.8458, + "step": 342810 + }, + { + "epoch": 0.6925180896665684, + "grad_norm": 293.55029296875, + "learning_rate": 2.720100535674407e-06, + "loss": 7.2574, + "step": 342820 + }, + { + "epoch": 0.6925382902992522, + "grad_norm": 532.0534057617188, + "learning_rate": 2.719789876087532e-06, + "loss": 25.8048, + "step": 342830 + }, + { + "epoch": 0.692558490931936, + "grad_norm": 1.385595440864563, + "learning_rate": 2.719479227614127e-06, + "loss": 9.461, + "step": 342840 + }, + { + "epoch": 0.6925786915646198, + "grad_norm": 495.5453186035156, + "learning_rate": 2.719168590255703e-06, + "loss": 18.9211, + "step": 342850 + }, + { + "epoch": 0.6925988921973036, + "grad_norm": 656.9524536132812, + "learning_rate": 2.718857964013773e-06, + "loss": 24.4337, + "step": 342860 + }, + { + "epoch": 0.6926190928299875, + "grad_norm": 786.6329345703125, + "learning_rate": 2.7185473488898544e-06, + "loss": 38.7861, + "step": 342870 + }, + { + "epoch": 0.6926392934626713, + "grad_norm": 637.03076171875, + "learning_rate": 2.7182367448854573e-06, + "loss": 16.2283, + "step": 342880 + }, + { + "epoch": 0.6926594940953551, + "grad_norm": 251.8146209716797, + "learning_rate": 2.7179261520021005e-06, + "loss": 12.2752, + "step": 342890 + }, + { + "epoch": 0.6926796947280389, + "grad_norm": 207.01556396484375, + "learning_rate": 2.717615570241294e-06, + "loss": 11.8785, + "step": 342900 + }, + { + "epoch": 0.6926998953607227, + "grad_norm": 193.8882293701172, + "learning_rate": 2.71730499960455e-06, + "loss": 13.7648, + "step": 342910 + }, + { + "epoch": 0.6927200959934066, + "grad_norm": 607.9854125976562, + "learning_rate": 2.7169944400933872e-06, + "loss": 11.8336, + "step": 342920 + }, + { + "epoch": 0.6927402966260904, + "grad_norm": 299.84722900390625, + "learning_rate": 2.7166838917093163e-06, + "loss": 25.2349, + "step": 342930 + }, + { + "epoch": 0.6927604972587742, + "grad_norm": 350.7240905761719, + "learning_rate": 2.7163733544538496e-06, + "loss": 17.3845, + "step": 342940 + }, + { + "epoch": 0.6927806978914579, + "grad_norm": 230.2483673095703, + "learning_rate": 2.716062828328502e-06, + "loss": 8.2107, + "step": 342950 + }, + { + "epoch": 0.6928008985241417, + "grad_norm": 169.19032287597656, + "learning_rate": 2.7157523133347906e-06, + "loss": 12.2735, + "step": 342960 + }, + { + "epoch": 0.6928210991568255, + "grad_norm": 297.84771728515625, + "learning_rate": 2.7154418094742222e-06, + "loss": 7.1174, + "step": 342970 + }, + { + "epoch": 0.6928412997895094, + "grad_norm": 267.66998291015625, + "learning_rate": 2.7151313167483127e-06, + "loss": 20.0398, + "step": 342980 + }, + { + "epoch": 0.6928615004221932, + "grad_norm": 495.4940185546875, + "learning_rate": 2.714820835158579e-06, + "loss": 22.8677, + "step": 342990 + }, + { + "epoch": 0.692881701054877, + "grad_norm": 300.28643798828125, + "learning_rate": 2.714510364706531e-06, + "loss": 16.3664, + "step": 343000 + }, + { + "epoch": 0.6929019016875608, + "grad_norm": 328.9463806152344, + "learning_rate": 2.7141999053936795e-06, + "loss": 21.4665, + "step": 343010 + }, + { + "epoch": 0.6929221023202446, + "grad_norm": 421.4502258300781, + "learning_rate": 2.713889457221541e-06, + "loss": 24.8321, + "step": 343020 + }, + { + "epoch": 0.6929423029529285, + "grad_norm": 117.91653442382812, + "learning_rate": 2.713579020191632e-06, + "loss": 21.2125, + "step": 343030 + }, + { + "epoch": 0.6929625035856123, + "grad_norm": 330.5600280761719, + "learning_rate": 2.713268594305458e-06, + "loss": 23.6342, + "step": 343040 + }, + { + "epoch": 0.6929827042182961, + "grad_norm": 432.38043212890625, + "learning_rate": 2.712958179564535e-06, + "loss": 16.9824, + "step": 343050 + }, + { + "epoch": 0.6930029048509799, + "grad_norm": 393.30328369140625, + "learning_rate": 2.7126477759703786e-06, + "loss": 14.0953, + "step": 343060 + }, + { + "epoch": 0.6930231054836637, + "grad_norm": 300.6133728027344, + "learning_rate": 2.7123373835244994e-06, + "loss": 16.7631, + "step": 343070 + }, + { + "epoch": 0.6930433061163476, + "grad_norm": 1277.46826171875, + "learning_rate": 2.712027002228408e-06, + "loss": 30.9857, + "step": 343080 + }, + { + "epoch": 0.6930635067490314, + "grad_norm": 453.1453857421875, + "learning_rate": 2.711716632083622e-06, + "loss": 10.205, + "step": 343090 + }, + { + "epoch": 0.6930837073817152, + "grad_norm": 170.03887939453125, + "learning_rate": 2.7114062730916513e-06, + "loss": 14.3898, + "step": 343100 + }, + { + "epoch": 0.693103908014399, + "grad_norm": 506.34442138671875, + "learning_rate": 2.711095925254007e-06, + "loss": 22.0371, + "step": 343110 + }, + { + "epoch": 0.6931241086470828, + "grad_norm": 848.0342407226562, + "learning_rate": 2.7107855885722036e-06, + "loss": 15.3399, + "step": 343120 + }, + { + "epoch": 0.6931443092797667, + "grad_norm": 136.1507568359375, + "learning_rate": 2.710475263047756e-06, + "loss": 15.8781, + "step": 343130 + }, + { + "epoch": 0.6931645099124505, + "grad_norm": 319.15411376953125, + "learning_rate": 2.7101649486821735e-06, + "loss": 21.6083, + "step": 343140 + }, + { + "epoch": 0.6931847105451343, + "grad_norm": 131.8520965576172, + "learning_rate": 2.7098546454769683e-06, + "loss": 10.8579, + "step": 343150 + }, + { + "epoch": 0.6932049111778181, + "grad_norm": 261.65594482421875, + "learning_rate": 2.7095443534336545e-06, + "loss": 13.2355, + "step": 343160 + }, + { + "epoch": 0.693225111810502, + "grad_norm": 152.35980224609375, + "learning_rate": 2.7092340725537446e-06, + "loss": 15.7499, + "step": 343170 + }, + { + "epoch": 0.6932453124431858, + "grad_norm": 1050.298828125, + "learning_rate": 2.708923802838748e-06, + "loss": 22.562, + "step": 343180 + }, + { + "epoch": 0.6932655130758696, + "grad_norm": 331.61590576171875, + "learning_rate": 2.7086135442901806e-06, + "loss": 14.9861, + "step": 343190 + }, + { + "epoch": 0.6932857137085533, + "grad_norm": 564.1597900390625, + "learning_rate": 2.708303296909551e-06, + "loss": 25.4975, + "step": 343200 + }, + { + "epoch": 0.6933059143412371, + "grad_norm": 433.4954833984375, + "learning_rate": 2.7079930606983753e-06, + "loss": 12.8945, + "step": 343210 + }, + { + "epoch": 0.6933261149739209, + "grad_norm": 160.9016876220703, + "learning_rate": 2.707682835658163e-06, + "loss": 21.6092, + "step": 343220 + }, + { + "epoch": 0.6933463156066048, + "grad_norm": 507.4945983886719, + "learning_rate": 2.7073726217904247e-06, + "loss": 17.8813, + "step": 343230 + }, + { + "epoch": 0.6933665162392886, + "grad_norm": 208.25315856933594, + "learning_rate": 2.7070624190966744e-06, + "loss": 13.1097, + "step": 343240 + }, + { + "epoch": 0.6933867168719724, + "grad_norm": 137.00123596191406, + "learning_rate": 2.7067522275784275e-06, + "loss": 14.4099, + "step": 343250 + }, + { + "epoch": 0.6934069175046562, + "grad_norm": 420.7992248535156, + "learning_rate": 2.7064420472371876e-06, + "loss": 11.5682, + "step": 343260 + }, + { + "epoch": 0.69342711813734, + "grad_norm": 290.5668640136719, + "learning_rate": 2.706131878074472e-06, + "loss": 14.6584, + "step": 343270 + }, + { + "epoch": 0.6934473187700239, + "grad_norm": 201.93861389160156, + "learning_rate": 2.7058217200917934e-06, + "loss": 14.4573, + "step": 343280 + }, + { + "epoch": 0.6934675194027077, + "grad_norm": 375.8645935058594, + "learning_rate": 2.705511573290661e-06, + "loss": 17.5655, + "step": 343290 + }, + { + "epoch": 0.6934877200353915, + "grad_norm": 368.1650085449219, + "learning_rate": 2.705201437672585e-06, + "loss": 15.0431, + "step": 343300 + }, + { + "epoch": 0.6935079206680753, + "grad_norm": 841.0086669921875, + "learning_rate": 2.7048913132390787e-06, + "loss": 26.5423, + "step": 343310 + }, + { + "epoch": 0.6935281213007591, + "grad_norm": 496.6707763671875, + "learning_rate": 2.7045811999916583e-06, + "loss": 23.5289, + "step": 343320 + }, + { + "epoch": 0.693548321933443, + "grad_norm": 398.8883056640625, + "learning_rate": 2.7042710979318266e-06, + "loss": 11.9281, + "step": 343330 + }, + { + "epoch": 0.6935685225661268, + "grad_norm": 236.6054229736328, + "learning_rate": 2.703961007061099e-06, + "loss": 12.007, + "step": 343340 + }, + { + "epoch": 0.6935887231988106, + "grad_norm": 148.3018341064453, + "learning_rate": 2.7036509273809887e-06, + "loss": 14.4419, + "step": 343350 + }, + { + "epoch": 0.6936089238314944, + "grad_norm": 311.6505432128906, + "learning_rate": 2.7033408588930054e-06, + "loss": 24.959, + "step": 343360 + }, + { + "epoch": 0.6936291244641782, + "grad_norm": 365.60302734375, + "learning_rate": 2.7030308015986583e-06, + "loss": 21.7858, + "step": 343370 + }, + { + "epoch": 0.6936493250968621, + "grad_norm": 258.2699279785156, + "learning_rate": 2.7027207554994627e-06, + "loss": 31.418, + "step": 343380 + }, + { + "epoch": 0.6936695257295459, + "grad_norm": 312.8044738769531, + "learning_rate": 2.702410720596927e-06, + "loss": 35.4766, + "step": 343390 + }, + { + "epoch": 0.6936897263622297, + "grad_norm": 534.6698608398438, + "learning_rate": 2.7021006968925613e-06, + "loss": 18.845, + "step": 343400 + }, + { + "epoch": 0.6937099269949135, + "grad_norm": 186.9039306640625, + "learning_rate": 2.7017906843878795e-06, + "loss": 17.7089, + "step": 343410 + }, + { + "epoch": 0.6937301276275973, + "grad_norm": 207.4452362060547, + "learning_rate": 2.7014806830843897e-06, + "loss": 12.9498, + "step": 343420 + }, + { + "epoch": 0.6937503282602812, + "grad_norm": 319.1103515625, + "learning_rate": 2.7011706929836056e-06, + "loss": 19.4653, + "step": 343430 + }, + { + "epoch": 0.693770528892965, + "grad_norm": 443.2889099121094, + "learning_rate": 2.700860714087036e-06, + "loss": 12.2581, + "step": 343440 + }, + { + "epoch": 0.6937907295256488, + "grad_norm": 166.7688446044922, + "learning_rate": 2.700550746396193e-06, + "loss": 16.6311, + "step": 343450 + }, + { + "epoch": 0.6938109301583325, + "grad_norm": 348.3745422363281, + "learning_rate": 2.7002407899125883e-06, + "loss": 15.0211, + "step": 343460 + }, + { + "epoch": 0.6938311307910163, + "grad_norm": 464.27130126953125, + "learning_rate": 2.699930844637728e-06, + "loss": 25.4117, + "step": 343470 + }, + { + "epoch": 0.6938513314237001, + "grad_norm": 367.8930358886719, + "learning_rate": 2.6996209105731273e-06, + "loss": 23.6164, + "step": 343480 + }, + { + "epoch": 0.693871532056384, + "grad_norm": 478.6580810546875, + "learning_rate": 2.6993109877202942e-06, + "loss": 7.154, + "step": 343490 + }, + { + "epoch": 0.6938917326890678, + "grad_norm": 455.1324462890625, + "learning_rate": 2.699001076080742e-06, + "loss": 26.1042, + "step": 343500 + }, + { + "epoch": 0.6939119333217516, + "grad_norm": 547.47119140625, + "learning_rate": 2.6986911756559795e-06, + "loss": 23.7914, + "step": 343510 + }, + { + "epoch": 0.6939321339544354, + "grad_norm": 767.7376098632812, + "learning_rate": 2.698381286447516e-06, + "loss": 44.6646, + "step": 343520 + }, + { + "epoch": 0.6939523345871192, + "grad_norm": 445.3045959472656, + "learning_rate": 2.698071408456864e-06, + "loss": 24.4303, + "step": 343530 + }, + { + "epoch": 0.6939725352198031, + "grad_norm": 459.85736083984375, + "learning_rate": 2.6977615416855325e-06, + "loss": 14.9492, + "step": 343540 + }, + { + "epoch": 0.6939927358524869, + "grad_norm": 566.3602905273438, + "learning_rate": 2.697451686135031e-06, + "loss": 20.4367, + "step": 343550 + }, + { + "epoch": 0.6940129364851707, + "grad_norm": 142.90953063964844, + "learning_rate": 2.6971418418068696e-06, + "loss": 24.6062, + "step": 343560 + }, + { + "epoch": 0.6940331371178545, + "grad_norm": 356.7073669433594, + "learning_rate": 2.696832008702564e-06, + "loss": 23.8435, + "step": 343570 + }, + { + "epoch": 0.6940533377505383, + "grad_norm": 380.256103515625, + "learning_rate": 2.6965221868236156e-06, + "loss": 29.407, + "step": 343580 + }, + { + "epoch": 0.6940735383832222, + "grad_norm": 238.80799865722656, + "learning_rate": 2.6962123761715395e-06, + "loss": 12.5504, + "step": 343590 + }, + { + "epoch": 0.694093739015906, + "grad_norm": 292.9700622558594, + "learning_rate": 2.6959025767478466e-06, + "loss": 13.9543, + "step": 343600 + }, + { + "epoch": 0.6941139396485898, + "grad_norm": 168.14193725585938, + "learning_rate": 2.6955927885540444e-06, + "loss": 13.1834, + "step": 343610 + }, + { + "epoch": 0.6941341402812736, + "grad_norm": 557.2796630859375, + "learning_rate": 2.6952830115916417e-06, + "loss": 15.7199, + "step": 343620 + }, + { + "epoch": 0.6941543409139574, + "grad_norm": 348.6419982910156, + "learning_rate": 2.6949732458621502e-06, + "loss": 15.3388, + "step": 343630 + }, + { + "epoch": 0.6941745415466413, + "grad_norm": 370.0677490234375, + "learning_rate": 2.694663491367084e-06, + "loss": 21.5792, + "step": 343640 + }, + { + "epoch": 0.6941947421793251, + "grad_norm": 630.1292114257812, + "learning_rate": 2.694353748107944e-06, + "loss": 28.5514, + "step": 343650 + }, + { + "epoch": 0.6942149428120089, + "grad_norm": 40.72578048706055, + "learning_rate": 2.694044016086244e-06, + "loss": 10.9335, + "step": 343660 + }, + { + "epoch": 0.6942351434446927, + "grad_norm": 267.8238220214844, + "learning_rate": 2.6937342953034963e-06, + "loss": 16.2739, + "step": 343670 + }, + { + "epoch": 0.6942553440773765, + "grad_norm": 513.903564453125, + "learning_rate": 2.6934245857612074e-06, + "loss": 21.8557, + "step": 343680 + }, + { + "epoch": 0.6942755447100604, + "grad_norm": 330.5280456542969, + "learning_rate": 2.6931148874608854e-06, + "loss": 22.1439, + "step": 343690 + }, + { + "epoch": 0.6942957453427442, + "grad_norm": 257.3049621582031, + "learning_rate": 2.692805200404044e-06, + "loss": 14.5472, + "step": 343700 + }, + { + "epoch": 0.6943159459754279, + "grad_norm": 534.4154052734375, + "learning_rate": 2.69249552459219e-06, + "loss": 33.5646, + "step": 343710 + }, + { + "epoch": 0.6943361466081117, + "grad_norm": 420.3919982910156, + "learning_rate": 2.6921858600268304e-06, + "loss": 16.5254, + "step": 343720 + }, + { + "epoch": 0.6943563472407955, + "grad_norm": 475.1569519042969, + "learning_rate": 2.6918762067094776e-06, + "loss": 21.7642, + "step": 343730 + }, + { + "epoch": 0.6943765478734794, + "grad_norm": 318.7789001464844, + "learning_rate": 2.6915665646416423e-06, + "loss": 22.4433, + "step": 343740 + }, + { + "epoch": 0.6943967485061632, + "grad_norm": 815.83740234375, + "learning_rate": 2.6912569338248317e-06, + "loss": 11.2253, + "step": 343750 + }, + { + "epoch": 0.694416949138847, + "grad_norm": 271.6921081542969, + "learning_rate": 2.6909473142605522e-06, + "loss": 13.2603, + "step": 343760 + }, + { + "epoch": 0.6944371497715308, + "grad_norm": 355.1222229003906, + "learning_rate": 2.6906377059503176e-06, + "loss": 20.8389, + "step": 343770 + }, + { + "epoch": 0.6944573504042146, + "grad_norm": 414.8699645996094, + "learning_rate": 2.690328108895635e-06, + "loss": 21.0342, + "step": 343780 + }, + { + "epoch": 0.6944775510368985, + "grad_norm": 24.91457748413086, + "learning_rate": 2.6900185230980115e-06, + "loss": 14.9175, + "step": 343790 + }, + { + "epoch": 0.6944977516695823, + "grad_norm": 302.8378601074219, + "learning_rate": 2.6897089485589584e-06, + "loss": 16.5532, + "step": 343800 + }, + { + "epoch": 0.6945179523022661, + "grad_norm": 547.6952514648438, + "learning_rate": 2.6893993852799827e-06, + "loss": 31.2786, + "step": 343810 + }, + { + "epoch": 0.6945381529349499, + "grad_norm": 455.7846984863281, + "learning_rate": 2.689089833262595e-06, + "loss": 14.3525, + "step": 343820 + }, + { + "epoch": 0.6945583535676337, + "grad_norm": 153.60443115234375, + "learning_rate": 2.688780292508304e-06, + "loss": 12.6853, + "step": 343830 + }, + { + "epoch": 0.6945785542003176, + "grad_norm": 671.5533447265625, + "learning_rate": 2.6884707630186158e-06, + "loss": 30.678, + "step": 343840 + }, + { + "epoch": 0.6945987548330014, + "grad_norm": 421.84185791015625, + "learning_rate": 2.6881612447950425e-06, + "loss": 14.1414, + "step": 343850 + }, + { + "epoch": 0.6946189554656852, + "grad_norm": 296.41375732421875, + "learning_rate": 2.6878517378390906e-06, + "loss": 18.8036, + "step": 343860 + }, + { + "epoch": 0.694639156098369, + "grad_norm": 399.1142272949219, + "learning_rate": 2.6875422421522667e-06, + "loss": 23.7339, + "step": 343870 + }, + { + "epoch": 0.6946593567310528, + "grad_norm": 164.97605895996094, + "learning_rate": 2.6872327577360822e-06, + "loss": 16.1473, + "step": 343880 + }, + { + "epoch": 0.6946795573637367, + "grad_norm": 187.7085418701172, + "learning_rate": 2.6869232845920466e-06, + "loss": 16.9912, + "step": 343890 + }, + { + "epoch": 0.6946997579964205, + "grad_norm": 507.7225341796875, + "learning_rate": 2.686613822721666e-06, + "loss": 28.3052, + "step": 343900 + }, + { + "epoch": 0.6947199586291043, + "grad_norm": 281.36639404296875, + "learning_rate": 2.686304372126447e-06, + "loss": 17.0173, + "step": 343910 + }, + { + "epoch": 0.6947401592617881, + "grad_norm": 643.44189453125, + "learning_rate": 2.6859949328079005e-06, + "loss": 17.3979, + "step": 343920 + }, + { + "epoch": 0.6947603598944719, + "grad_norm": 78.4656753540039, + "learning_rate": 2.685685504767537e-06, + "loss": 13.7249, + "step": 343930 + }, + { + "epoch": 0.6947805605271558, + "grad_norm": 664.6371459960938, + "learning_rate": 2.6853760880068587e-06, + "loss": 40.8635, + "step": 343940 + }, + { + "epoch": 0.6948007611598396, + "grad_norm": 369.2539367675781, + "learning_rate": 2.6850666825273762e-06, + "loss": 15.5132, + "step": 343950 + }, + { + "epoch": 0.6948209617925234, + "grad_norm": 163.865966796875, + "learning_rate": 2.6847572883305993e-06, + "loss": 20.9875, + "step": 343960 + }, + { + "epoch": 0.6948411624252071, + "grad_norm": 114.55418395996094, + "learning_rate": 2.6844479054180354e-06, + "loss": 16.347, + "step": 343970 + }, + { + "epoch": 0.6948613630578909, + "grad_norm": 505.979736328125, + "learning_rate": 2.6841385337911895e-06, + "loss": 17.2984, + "step": 343980 + }, + { + "epoch": 0.6948815636905747, + "grad_norm": 784.9866333007812, + "learning_rate": 2.683829173451573e-06, + "loss": 27.368, + "step": 343990 + }, + { + "epoch": 0.6949017643232586, + "grad_norm": 260.9141845703125, + "learning_rate": 2.683519824400693e-06, + "loss": 19.7886, + "step": 344000 + }, + { + "epoch": 0.6949219649559424, + "grad_norm": 217.6226806640625, + "learning_rate": 2.683210486640054e-06, + "loss": 9.5596, + "step": 344010 + }, + { + "epoch": 0.6949421655886262, + "grad_norm": 317.6123962402344, + "learning_rate": 2.682901160171168e-06, + "loss": 17.6249, + "step": 344020 + }, + { + "epoch": 0.69496236622131, + "grad_norm": 202.72772216796875, + "learning_rate": 2.6825918449955386e-06, + "loss": 15.8701, + "step": 344030 + }, + { + "epoch": 0.6949825668539938, + "grad_norm": 364.443359375, + "learning_rate": 2.682282541114678e-06, + "loss": 10.4873, + "step": 344040 + }, + { + "epoch": 0.6950027674866777, + "grad_norm": 299.5184631347656, + "learning_rate": 2.6819732485300887e-06, + "loss": 19.8617, + "step": 344050 + }, + { + "epoch": 0.6950229681193615, + "grad_norm": 178.6079559326172, + "learning_rate": 2.6816639672432826e-06, + "loss": 14.4802, + "step": 344060 + }, + { + "epoch": 0.6950431687520453, + "grad_norm": 554.738525390625, + "learning_rate": 2.681354697255765e-06, + "loss": 14.2377, + "step": 344070 + }, + { + "epoch": 0.6950633693847291, + "grad_norm": 383.87725830078125, + "learning_rate": 2.681045438569042e-06, + "loss": 18.7799, + "step": 344080 + }, + { + "epoch": 0.6950835700174129, + "grad_norm": 129.1428985595703, + "learning_rate": 2.680736191184624e-06, + "loss": 17.5231, + "step": 344090 + }, + { + "epoch": 0.6951037706500968, + "grad_norm": 183.20083618164062, + "learning_rate": 2.680426955104014e-06, + "loss": 9.6862, + "step": 344100 + }, + { + "epoch": 0.6951239712827806, + "grad_norm": 206.9559783935547, + "learning_rate": 2.6801177303287247e-06, + "loss": 13.197, + "step": 344110 + }, + { + "epoch": 0.6951441719154644, + "grad_norm": 320.4137878417969, + "learning_rate": 2.6798085168602595e-06, + "loss": 21.8238, + "step": 344120 + }, + { + "epoch": 0.6951643725481482, + "grad_norm": 327.3988037109375, + "learning_rate": 2.6794993147001246e-06, + "loss": 22.6297, + "step": 344130 + }, + { + "epoch": 0.695184573180832, + "grad_norm": 263.87213134765625, + "learning_rate": 2.6791901238498302e-06, + "loss": 17.3819, + "step": 344140 + }, + { + "epoch": 0.6952047738135159, + "grad_norm": 215.83154296875, + "learning_rate": 2.678880944310882e-06, + "loss": 25.1242, + "step": 344150 + }, + { + "epoch": 0.6952249744461997, + "grad_norm": 817.7406005859375, + "learning_rate": 2.678571776084784e-06, + "loss": 14.7458, + "step": 344160 + }, + { + "epoch": 0.6952451750788835, + "grad_norm": 190.51718139648438, + "learning_rate": 2.6782626191730466e-06, + "loss": 24.2561, + "step": 344170 + }, + { + "epoch": 0.6952653757115673, + "grad_norm": 2117.508056640625, + "learning_rate": 2.677953473577177e-06, + "loss": 17.2426, + "step": 344180 + }, + { + "epoch": 0.6952855763442511, + "grad_norm": 1233.36669921875, + "learning_rate": 2.6776443392986808e-06, + "loss": 42.3313, + "step": 344190 + }, + { + "epoch": 0.695305776976935, + "grad_norm": 316.6412353515625, + "learning_rate": 2.677335216339062e-06, + "loss": 18.3216, + "step": 344200 + }, + { + "epoch": 0.6953259776096188, + "grad_norm": 514.594482421875, + "learning_rate": 2.6770261046998315e-06, + "loss": 20.8065, + "step": 344210 + }, + { + "epoch": 0.6953461782423026, + "grad_norm": 220.20916748046875, + "learning_rate": 2.6767170043824942e-06, + "loss": 32.971, + "step": 344220 + }, + { + "epoch": 0.6953663788749863, + "grad_norm": 7.986196994781494, + "learning_rate": 2.6764079153885547e-06, + "loss": 15.0296, + "step": 344230 + }, + { + "epoch": 0.6953865795076701, + "grad_norm": 253.78794860839844, + "learning_rate": 2.6760988377195206e-06, + "loss": 12.7176, + "step": 344240 + }, + { + "epoch": 0.695406780140354, + "grad_norm": 332.3077392578125, + "learning_rate": 2.675789771376904e-06, + "loss": 27.8419, + "step": 344250 + }, + { + "epoch": 0.6954269807730378, + "grad_norm": 477.7747802734375, + "learning_rate": 2.6754807163622014e-06, + "loss": 15.0112, + "step": 344260 + }, + { + "epoch": 0.6954471814057216, + "grad_norm": 45.76880645751953, + "learning_rate": 2.6751716726769237e-06, + "loss": 14.3162, + "step": 344270 + }, + { + "epoch": 0.6954673820384054, + "grad_norm": 211.90830993652344, + "learning_rate": 2.67486264032258e-06, + "loss": 21.739, + "step": 344280 + }, + { + "epoch": 0.6954875826710892, + "grad_norm": 343.0636901855469, + "learning_rate": 2.674553619300673e-06, + "loss": 21.8155, + "step": 344290 + }, + { + "epoch": 0.695507783303773, + "grad_norm": 350.2322998046875, + "learning_rate": 2.6742446096127086e-06, + "loss": 12.1763, + "step": 344300 + }, + { + "epoch": 0.6955279839364569, + "grad_norm": 514.8064575195312, + "learning_rate": 2.673935611260195e-06, + "loss": 15.4479, + "step": 344310 + }, + { + "epoch": 0.6955481845691407, + "grad_norm": 208.349365234375, + "learning_rate": 2.6736266242446372e-06, + "loss": 8.9711, + "step": 344320 + }, + { + "epoch": 0.6955683852018245, + "grad_norm": 209.54495239257812, + "learning_rate": 2.6733176485675396e-06, + "loss": 17.0446, + "step": 344330 + }, + { + "epoch": 0.6955885858345083, + "grad_norm": 585.2893676757812, + "learning_rate": 2.6730086842304093e-06, + "loss": 15.8475, + "step": 344340 + }, + { + "epoch": 0.6956087864671922, + "grad_norm": 56.8120231628418, + "learning_rate": 2.6726997312347546e-06, + "loss": 24.2784, + "step": 344350 + }, + { + "epoch": 0.695628987099876, + "grad_norm": 65.433349609375, + "learning_rate": 2.672390789582079e-06, + "loss": 19.357, + "step": 344360 + }, + { + "epoch": 0.6956491877325598, + "grad_norm": 289.3479919433594, + "learning_rate": 2.672081859273886e-06, + "loss": 22.835, + "step": 344370 + }, + { + "epoch": 0.6956693883652436, + "grad_norm": 128.19265747070312, + "learning_rate": 2.6717729403116866e-06, + "loss": 8.0582, + "step": 344380 + }, + { + "epoch": 0.6956895889979274, + "grad_norm": 446.22857666015625, + "learning_rate": 2.671464032696982e-06, + "loss": 26.8629, + "step": 344390 + }, + { + "epoch": 0.6957097896306113, + "grad_norm": 1369.6859130859375, + "learning_rate": 2.671155136431279e-06, + "loss": 30.2338, + "step": 344400 + }, + { + "epoch": 0.6957299902632951, + "grad_norm": 652.7861938476562, + "learning_rate": 2.6708462515160845e-06, + "loss": 30.2917, + "step": 344410 + }, + { + "epoch": 0.6957501908959789, + "grad_norm": 532.6014404296875, + "learning_rate": 2.670537377952901e-06, + "loss": 38.7639, + "step": 344420 + }, + { + "epoch": 0.6957703915286627, + "grad_norm": 59.8785285949707, + "learning_rate": 2.670228515743238e-06, + "loss": 13.5877, + "step": 344430 + }, + { + "epoch": 0.6957905921613465, + "grad_norm": 486.962890625, + "learning_rate": 2.6699196648885984e-06, + "loss": 27.2722, + "step": 344440 + }, + { + "epoch": 0.6958107927940304, + "grad_norm": 132.49996948242188, + "learning_rate": 2.6696108253904856e-06, + "loss": 16.1759, + "step": 344450 + }, + { + "epoch": 0.6958309934267142, + "grad_norm": 17.91014289855957, + "learning_rate": 2.669301997250409e-06, + "loss": 24.9406, + "step": 344460 + }, + { + "epoch": 0.695851194059398, + "grad_norm": 221.14280700683594, + "learning_rate": 2.668993180469872e-06, + "loss": 12.3098, + "step": 344470 + }, + { + "epoch": 0.6958713946920817, + "grad_norm": 319.5102844238281, + "learning_rate": 2.668684375050378e-06, + "loss": 9.2963, + "step": 344480 + }, + { + "epoch": 0.6958915953247655, + "grad_norm": 344.1924743652344, + "learning_rate": 2.668375580993433e-06, + "loss": 13.7195, + "step": 344490 + }, + { + "epoch": 0.6959117959574493, + "grad_norm": 654.0018920898438, + "learning_rate": 2.6680667983005446e-06, + "loss": 17.6545, + "step": 344500 + }, + { + "epoch": 0.6959319965901332, + "grad_norm": 421.8318786621094, + "learning_rate": 2.667758026973216e-06, + "loss": 21.6775, + "step": 344510 + }, + { + "epoch": 0.695952197222817, + "grad_norm": 372.5625915527344, + "learning_rate": 2.667449267012949e-06, + "loss": 19.3328, + "step": 344520 + }, + { + "epoch": 0.6959723978555008, + "grad_norm": 73.21385192871094, + "learning_rate": 2.667140518421255e-06, + "loss": 12.0675, + "step": 344530 + }, + { + "epoch": 0.6959925984881846, + "grad_norm": 593.1376342773438, + "learning_rate": 2.6668317811996342e-06, + "loss": 17.3688, + "step": 344540 + }, + { + "epoch": 0.6960127991208684, + "grad_norm": 272.7303161621094, + "learning_rate": 2.66652305534959e-06, + "loss": 21.8176, + "step": 344550 + }, + { + "epoch": 0.6960329997535523, + "grad_norm": 352.7306213378906, + "learning_rate": 2.6662143408726306e-06, + "loss": 17.7654, + "step": 344560 + }, + { + "epoch": 0.6960532003862361, + "grad_norm": 185.99951171875, + "learning_rate": 2.6659056377702606e-06, + "loss": 18.1175, + "step": 344570 + }, + { + "epoch": 0.6960734010189199, + "grad_norm": 565.5797729492188, + "learning_rate": 2.6655969460439835e-06, + "loss": 23.5426, + "step": 344580 + }, + { + "epoch": 0.6960936016516037, + "grad_norm": 1198.6705322265625, + "learning_rate": 2.6652882656953016e-06, + "loss": 30.6016, + "step": 344590 + }, + { + "epoch": 0.6961138022842875, + "grad_norm": 271.77508544921875, + "learning_rate": 2.6649795967257243e-06, + "loss": 18.6716, + "step": 344600 + }, + { + "epoch": 0.6961340029169714, + "grad_norm": 478.1930236816406, + "learning_rate": 2.6646709391367524e-06, + "loss": 15.9826, + "step": 344610 + }, + { + "epoch": 0.6961542035496552, + "grad_norm": 493.9571838378906, + "learning_rate": 2.6643622929298896e-06, + "loss": 29.0761, + "step": 344620 + }, + { + "epoch": 0.696174404182339, + "grad_norm": 291.86895751953125, + "learning_rate": 2.6640536581066434e-06, + "loss": 22.289, + "step": 344630 + }, + { + "epoch": 0.6961946048150228, + "grad_norm": 647.7007446289062, + "learning_rate": 2.6637450346685145e-06, + "loss": 20.7047, + "step": 344640 + }, + { + "epoch": 0.6962148054477066, + "grad_norm": 372.8083801269531, + "learning_rate": 2.6634364226170105e-06, + "loss": 8.2205, + "step": 344650 + }, + { + "epoch": 0.6962350060803905, + "grad_norm": 396.89886474609375, + "learning_rate": 2.663127821953633e-06, + "loss": 12.5007, + "step": 344660 + }, + { + "epoch": 0.6962552067130743, + "grad_norm": 92.28195190429688, + "learning_rate": 2.6628192326798884e-06, + "loss": 15.7482, + "step": 344670 + }, + { + "epoch": 0.6962754073457581, + "grad_norm": 251.61859130859375, + "learning_rate": 2.662510654797279e-06, + "loss": 13.5063, + "step": 344680 + }, + { + "epoch": 0.6962956079784419, + "grad_norm": 314.4949035644531, + "learning_rate": 2.662202088307308e-06, + "loss": 15.6295, + "step": 344690 + }, + { + "epoch": 0.6963158086111257, + "grad_norm": 568.0778198242188, + "learning_rate": 2.661893533211482e-06, + "loss": 22.3975, + "step": 344700 + }, + { + "epoch": 0.6963360092438096, + "grad_norm": 1040.8778076171875, + "learning_rate": 2.6615849895113014e-06, + "loss": 18.2789, + "step": 344710 + }, + { + "epoch": 0.6963562098764934, + "grad_norm": 16.518474578857422, + "learning_rate": 2.661276457208274e-06, + "loss": 27.5529, + "step": 344720 + }, + { + "epoch": 0.6963764105091772, + "grad_norm": 29.804773330688477, + "learning_rate": 2.660967936303902e-06, + "loss": 8.306, + "step": 344730 + }, + { + "epoch": 0.6963966111418609, + "grad_norm": 0.0, + "learning_rate": 2.6606594267996853e-06, + "loss": 15.3719, + "step": 344740 + }, + { + "epoch": 0.6964168117745447, + "grad_norm": 557.9322509765625, + "learning_rate": 2.6603509286971342e-06, + "loss": 16.6565, + "step": 344750 + }, + { + "epoch": 0.6964370124072286, + "grad_norm": 569.5855712890625, + "learning_rate": 2.660042441997748e-06, + "loss": 19.1198, + "step": 344760 + }, + { + "epoch": 0.6964572130399124, + "grad_norm": 226.26670837402344, + "learning_rate": 2.6597339667030296e-06, + "loss": 17.1782, + "step": 344770 + }, + { + "epoch": 0.6964774136725962, + "grad_norm": 681.4418334960938, + "learning_rate": 2.659425502814484e-06, + "loss": 20.0888, + "step": 344780 + }, + { + "epoch": 0.69649761430528, + "grad_norm": 229.27304077148438, + "learning_rate": 2.659117050333616e-06, + "loss": 21.8857, + "step": 344790 + }, + { + "epoch": 0.6965178149379638, + "grad_norm": 235.35556030273438, + "learning_rate": 2.658808609261928e-06, + "loss": 9.5892, + "step": 344800 + }, + { + "epoch": 0.6965380155706477, + "grad_norm": 313.0237121582031, + "learning_rate": 2.658500179600921e-06, + "loss": 21.6451, + "step": 344810 + }, + { + "epoch": 0.6965582162033315, + "grad_norm": 36.43192672729492, + "learning_rate": 2.6581917613521026e-06, + "loss": 14.984, + "step": 344820 + }, + { + "epoch": 0.6965784168360153, + "grad_norm": 697.332763671875, + "learning_rate": 2.657883354516973e-06, + "loss": 12.2152, + "step": 344830 + }, + { + "epoch": 0.6965986174686991, + "grad_norm": 267.1387939453125, + "learning_rate": 2.6575749590970336e-06, + "loss": 17.5607, + "step": 344840 + }, + { + "epoch": 0.6966188181013829, + "grad_norm": 502.197509765625, + "learning_rate": 2.6572665750937898e-06, + "loss": 17.2962, + "step": 344850 + }, + { + "epoch": 0.6966390187340668, + "grad_norm": 291.0636291503906, + "learning_rate": 2.656958202508749e-06, + "loss": 21.6046, + "step": 344860 + }, + { + "epoch": 0.6966592193667506, + "grad_norm": 379.7904968261719, + "learning_rate": 2.656649841343406e-06, + "loss": 15.628, + "step": 344870 + }, + { + "epoch": 0.6966794199994344, + "grad_norm": 357.1584777832031, + "learning_rate": 2.656341491599267e-06, + "loss": 22.4748, + "step": 344880 + }, + { + "epoch": 0.6966996206321182, + "grad_norm": 65.4222640991211, + "learning_rate": 2.6560331532778373e-06, + "loss": 12.0059, + "step": 344890 + }, + { + "epoch": 0.696719821264802, + "grad_norm": 566.2289428710938, + "learning_rate": 2.6557248263806175e-06, + "loss": 16.9036, + "step": 344900 + }, + { + "epoch": 0.6967400218974859, + "grad_norm": 344.33990478515625, + "learning_rate": 2.655416510909109e-06, + "loss": 12.362, + "step": 344910 + }, + { + "epoch": 0.6967602225301697, + "grad_norm": 277.7760314941406, + "learning_rate": 2.655108206864818e-06, + "loss": 13.9431, + "step": 344920 + }, + { + "epoch": 0.6967804231628535, + "grad_norm": 390.2586669921875, + "learning_rate": 2.654799914249245e-06, + "loss": 12.7993, + "step": 344930 + }, + { + "epoch": 0.6968006237955373, + "grad_norm": 163.2816162109375, + "learning_rate": 2.65449163306389e-06, + "loss": 16.1431, + "step": 344940 + }, + { + "epoch": 0.6968208244282211, + "grad_norm": 2.7255659103393555, + "learning_rate": 2.65418336331026e-06, + "loss": 22.8669, + "step": 344950 + }, + { + "epoch": 0.696841025060905, + "grad_norm": 343.7413635253906, + "learning_rate": 2.653875104989857e-06, + "loss": 20.7097, + "step": 344960 + }, + { + "epoch": 0.6968612256935888, + "grad_norm": 133.50035095214844, + "learning_rate": 2.653566858104182e-06, + "loss": 12.405, + "step": 344970 + }, + { + "epoch": 0.6968814263262726, + "grad_norm": 454.4327392578125, + "learning_rate": 2.6532586226547354e-06, + "loss": 19.0066, + "step": 344980 + }, + { + "epoch": 0.6969016269589563, + "grad_norm": 358.3306884765625, + "learning_rate": 2.652950398643024e-06, + "loss": 17.5057, + "step": 344990 + }, + { + "epoch": 0.6969218275916401, + "grad_norm": 527.6987915039062, + "learning_rate": 2.6526421860705474e-06, + "loss": 29.4849, + "step": 345000 + }, + { + "epoch": 0.6969420282243239, + "grad_norm": 429.4691162109375, + "learning_rate": 2.6523339849388065e-06, + "loss": 22.8431, + "step": 345010 + }, + { + "epoch": 0.6969622288570078, + "grad_norm": 443.98284912109375, + "learning_rate": 2.6520257952493066e-06, + "loss": 19.1371, + "step": 345020 + }, + { + "epoch": 0.6969824294896916, + "grad_norm": 71.12145233154297, + "learning_rate": 2.6517176170035463e-06, + "loss": 13.3549, + "step": 345030 + }, + { + "epoch": 0.6970026301223754, + "grad_norm": 422.64208984375, + "learning_rate": 2.651409450203032e-06, + "loss": 12.5471, + "step": 345040 + }, + { + "epoch": 0.6970228307550592, + "grad_norm": 558.01513671875, + "learning_rate": 2.6511012948492625e-06, + "loss": 20.7775, + "step": 345050 + }, + { + "epoch": 0.697043031387743, + "grad_norm": 464.9036865234375, + "learning_rate": 2.650793150943739e-06, + "loss": 21.6904, + "step": 345060 + }, + { + "epoch": 0.6970632320204269, + "grad_norm": 609.20849609375, + "learning_rate": 2.650485018487966e-06, + "loss": 15.1591, + "step": 345070 + }, + { + "epoch": 0.6970834326531107, + "grad_norm": 720.4866333007812, + "learning_rate": 2.6501768974834453e-06, + "loss": 15.6479, + "step": 345080 + }, + { + "epoch": 0.6971036332857945, + "grad_norm": 306.485107421875, + "learning_rate": 2.649868787931674e-06, + "loss": 11.1549, + "step": 345090 + }, + { + "epoch": 0.6971238339184783, + "grad_norm": 402.8655090332031, + "learning_rate": 2.649560689834158e-06, + "loss": 32.3631, + "step": 345100 + }, + { + "epoch": 0.6971440345511621, + "grad_norm": 440.0959777832031, + "learning_rate": 2.6492526031924005e-06, + "loss": 37.2021, + "step": 345110 + }, + { + "epoch": 0.697164235183846, + "grad_norm": 353.9069519042969, + "learning_rate": 2.6489445280078998e-06, + "loss": 8.6506, + "step": 345120 + }, + { + "epoch": 0.6971844358165298, + "grad_norm": 306.89111328125, + "learning_rate": 2.6486364642821565e-06, + "loss": 23.3063, + "step": 345130 + }, + { + "epoch": 0.6972046364492136, + "grad_norm": 109.22026824951172, + "learning_rate": 2.6483284120166762e-06, + "loss": 7.5639, + "step": 345140 + }, + { + "epoch": 0.6972248370818974, + "grad_norm": 244.5216064453125, + "learning_rate": 2.6480203712129583e-06, + "loss": 6.595, + "step": 345150 + }, + { + "epoch": 0.6972450377145812, + "grad_norm": 465.40179443359375, + "learning_rate": 2.647712341872501e-06, + "loss": 25.7738, + "step": 345160 + }, + { + "epoch": 0.6972652383472651, + "grad_norm": 604.065185546875, + "learning_rate": 2.647404323996809e-06, + "loss": 22.5741, + "step": 345170 + }, + { + "epoch": 0.6972854389799489, + "grad_norm": 230.20556640625, + "learning_rate": 2.647096317587385e-06, + "loss": 10.2197, + "step": 345180 + }, + { + "epoch": 0.6973056396126327, + "grad_norm": 268.8053283691406, + "learning_rate": 2.646788322645728e-06, + "loss": 21.6019, + "step": 345190 + }, + { + "epoch": 0.6973258402453165, + "grad_norm": 301.2347717285156, + "learning_rate": 2.646480339173337e-06, + "loss": 20.836, + "step": 345200 + }, + { + "epoch": 0.6973460408780003, + "grad_norm": 633.9385375976562, + "learning_rate": 2.6461723671717177e-06, + "loss": 18.6043, + "step": 345210 + }, + { + "epoch": 0.6973662415106842, + "grad_norm": 0.0, + "learning_rate": 2.645864406642369e-06, + "loss": 18.6629, + "step": 345220 + }, + { + "epoch": 0.697386442143368, + "grad_norm": 78.62548065185547, + "learning_rate": 2.6455564575867893e-06, + "loss": 26.5985, + "step": 345230 + }, + { + "epoch": 0.6974066427760518, + "grad_norm": 211.503662109375, + "learning_rate": 2.645248520006482e-06, + "loss": 15.8581, + "step": 345240 + }, + { + "epoch": 0.6974268434087355, + "grad_norm": 440.4510192871094, + "learning_rate": 2.64494059390295e-06, + "loss": 33.8293, + "step": 345250 + }, + { + "epoch": 0.6974470440414193, + "grad_norm": 476.35699462890625, + "learning_rate": 2.644632679277692e-06, + "loss": 15.2913, + "step": 345260 + }, + { + "epoch": 0.6974672446741031, + "grad_norm": 797.4821166992188, + "learning_rate": 2.644324776132206e-06, + "loss": 29.4788, + "step": 345270 + }, + { + "epoch": 0.697487445306787, + "grad_norm": 491.3204345703125, + "learning_rate": 2.6440168844679983e-06, + "loss": 21.4707, + "step": 345280 + }, + { + "epoch": 0.6975076459394708, + "grad_norm": 816.2991333007812, + "learning_rate": 2.6437090042865655e-06, + "loss": 15.4586, + "step": 345290 + }, + { + "epoch": 0.6975278465721546, + "grad_norm": 30.84978675842285, + "learning_rate": 2.6434011355894074e-06, + "loss": 14.2834, + "step": 345300 + }, + { + "epoch": 0.6975480472048384, + "grad_norm": 582.0877075195312, + "learning_rate": 2.643093278378029e-06, + "loss": 14.9192, + "step": 345310 + }, + { + "epoch": 0.6975682478375222, + "grad_norm": 137.56689453125, + "learning_rate": 2.642785432653926e-06, + "loss": 11.3768, + "step": 345320 + }, + { + "epoch": 0.6975884484702061, + "grad_norm": 379.98486328125, + "learning_rate": 2.6424775984186024e-06, + "loss": 16.0875, + "step": 345330 + }, + { + "epoch": 0.6976086491028899, + "grad_norm": 195.0530242919922, + "learning_rate": 2.6421697756735577e-06, + "loss": 23.8646, + "step": 345340 + }, + { + "epoch": 0.6976288497355737, + "grad_norm": 253.6774139404297, + "learning_rate": 2.641861964420289e-06, + "loss": 12.3859, + "step": 345350 + }, + { + "epoch": 0.6976490503682575, + "grad_norm": 186.47940063476562, + "learning_rate": 2.641554164660301e-06, + "loss": 15.902, + "step": 345360 + }, + { + "epoch": 0.6976692510009413, + "grad_norm": 576.4246215820312, + "learning_rate": 2.6412463763950925e-06, + "loss": 10.885, + "step": 345370 + }, + { + "epoch": 0.6976894516336252, + "grad_norm": 4.220212936401367, + "learning_rate": 2.6409385996261606e-06, + "loss": 14.7602, + "step": 345380 + }, + { + "epoch": 0.697709652266309, + "grad_norm": 267.7427673339844, + "learning_rate": 2.640630834355008e-06, + "loss": 16.3859, + "step": 345390 + }, + { + "epoch": 0.6977298528989928, + "grad_norm": 441.47271728515625, + "learning_rate": 2.640323080583137e-06, + "loss": 22.2648, + "step": 345400 + }, + { + "epoch": 0.6977500535316766, + "grad_norm": 202.620361328125, + "learning_rate": 2.640015338312044e-06, + "loss": 14.9657, + "step": 345410 + }, + { + "epoch": 0.6977702541643604, + "grad_norm": 353.13739013671875, + "learning_rate": 2.6397076075432294e-06, + "loss": 14.2941, + "step": 345420 + }, + { + "epoch": 0.6977904547970443, + "grad_norm": 426.14117431640625, + "learning_rate": 2.6393998882781945e-06, + "loss": 17.4602, + "step": 345430 + }, + { + "epoch": 0.6978106554297281, + "grad_norm": 741.8760375976562, + "learning_rate": 2.6390921805184387e-06, + "loss": 30.9822, + "step": 345440 + }, + { + "epoch": 0.6978308560624119, + "grad_norm": 184.38121032714844, + "learning_rate": 2.63878448426546e-06, + "loss": 15.771, + "step": 345450 + }, + { + "epoch": 0.6978510566950957, + "grad_norm": 236.48931884765625, + "learning_rate": 2.6384767995207584e-06, + "loss": 16.7433, + "step": 345460 + }, + { + "epoch": 0.6978712573277795, + "grad_norm": 165.02105712890625, + "learning_rate": 2.6381691262858385e-06, + "loss": 10.6148, + "step": 345470 + }, + { + "epoch": 0.6978914579604634, + "grad_norm": 153.6220703125, + "learning_rate": 2.6378614645621916e-06, + "loss": 28.2366, + "step": 345480 + }, + { + "epoch": 0.6979116585931472, + "grad_norm": 200.2836151123047, + "learning_rate": 2.6375538143513225e-06, + "loss": 8.507, + "step": 345490 + }, + { + "epoch": 0.697931859225831, + "grad_norm": 234.54251098632812, + "learning_rate": 2.637246175654731e-06, + "loss": 9.2595, + "step": 345500 + }, + { + "epoch": 0.6979520598585147, + "grad_norm": 316.4593505859375, + "learning_rate": 2.6369385484739143e-06, + "loss": 24.5696, + "step": 345510 + }, + { + "epoch": 0.6979722604911985, + "grad_norm": 107.1210708618164, + "learning_rate": 2.636630932810371e-06, + "loss": 18.6488, + "step": 345520 + }, + { + "epoch": 0.6979924611238824, + "grad_norm": 533.1016235351562, + "learning_rate": 2.6363233286656044e-06, + "loss": 14.3121, + "step": 345530 + }, + { + "epoch": 0.6980126617565662, + "grad_norm": 634.0641479492188, + "learning_rate": 2.636015736041111e-06, + "loss": 20.5269, + "step": 345540 + }, + { + "epoch": 0.69803286238925, + "grad_norm": 392.26788330078125, + "learning_rate": 2.6357081549383877e-06, + "loss": 14.8525, + "step": 345550 + }, + { + "epoch": 0.6980530630219338, + "grad_norm": 620.7987060546875, + "learning_rate": 2.635400585358937e-06, + "loss": 24.258, + "step": 345560 + }, + { + "epoch": 0.6980732636546176, + "grad_norm": 419.3951721191406, + "learning_rate": 2.6350930273042587e-06, + "loss": 15.7807, + "step": 345570 + }, + { + "epoch": 0.6980934642873015, + "grad_norm": 685.9359741210938, + "learning_rate": 2.63478548077585e-06, + "loss": 16.4399, + "step": 345580 + }, + { + "epoch": 0.6981136649199853, + "grad_norm": 500.3210144042969, + "learning_rate": 2.634477945775208e-06, + "loss": 11.0419, + "step": 345590 + }, + { + "epoch": 0.6981338655526691, + "grad_norm": 286.71722412109375, + "learning_rate": 2.634170422303835e-06, + "loss": 13.9834, + "step": 345600 + }, + { + "epoch": 0.6981540661853529, + "grad_norm": 155.51107788085938, + "learning_rate": 2.633862910363229e-06, + "loss": 23.2099, + "step": 345610 + }, + { + "epoch": 0.6981742668180367, + "grad_norm": 81.69243621826172, + "learning_rate": 2.6335554099548865e-06, + "loss": 10.7837, + "step": 345620 + }, + { + "epoch": 0.6981944674507206, + "grad_norm": 412.6045837402344, + "learning_rate": 2.63324792108031e-06, + "loss": 28.8401, + "step": 345630 + }, + { + "epoch": 0.6982146680834044, + "grad_norm": 1176.1641845703125, + "learning_rate": 2.6329404437409934e-06, + "loss": 20.9388, + "step": 345640 + }, + { + "epoch": 0.6982348687160882, + "grad_norm": 1.3624160289764404, + "learning_rate": 2.6326329779384397e-06, + "loss": 13.8064, + "step": 345650 + }, + { + "epoch": 0.698255069348772, + "grad_norm": 276.4120178222656, + "learning_rate": 2.6323255236741465e-06, + "loss": 13.0461, + "step": 345660 + }, + { + "epoch": 0.6982752699814558, + "grad_norm": 289.1661376953125, + "learning_rate": 2.63201808094961e-06, + "loss": 30.3161, + "step": 345670 + }, + { + "epoch": 0.6982954706141397, + "grad_norm": 206.7049560546875, + "learning_rate": 2.6317106497663316e-06, + "loss": 7.163, + "step": 345680 + }, + { + "epoch": 0.6983156712468235, + "grad_norm": 146.7299346923828, + "learning_rate": 2.6314032301258072e-06, + "loss": 26.6436, + "step": 345690 + }, + { + "epoch": 0.6983358718795073, + "grad_norm": 336.31182861328125, + "learning_rate": 2.6310958220295356e-06, + "loss": 15.1083, + "step": 345700 + }, + { + "epoch": 0.6983560725121911, + "grad_norm": 124.25076293945312, + "learning_rate": 2.630788425479015e-06, + "loss": 16.3743, + "step": 345710 + }, + { + "epoch": 0.6983762731448749, + "grad_norm": 574.3560791015625, + "learning_rate": 2.6304810404757465e-06, + "loss": 22.2036, + "step": 345720 + }, + { + "epoch": 0.6983964737775588, + "grad_norm": 580.5028076171875, + "learning_rate": 2.6301736670212263e-06, + "loss": 19.0385, + "step": 345730 + }, + { + "epoch": 0.6984166744102426, + "grad_norm": 41.32448196411133, + "learning_rate": 2.62986630511695e-06, + "loss": 26.0171, + "step": 345740 + }, + { + "epoch": 0.6984368750429264, + "grad_norm": 804.4308471679688, + "learning_rate": 2.6295589547644195e-06, + "loss": 32.7556, + "step": 345750 + }, + { + "epoch": 0.6984570756756101, + "grad_norm": 118.28096008300781, + "learning_rate": 2.6292516159651317e-06, + "loss": 31.4055, + "step": 345760 + }, + { + "epoch": 0.6984772763082939, + "grad_norm": 291.58050537109375, + "learning_rate": 2.628944288720582e-06, + "loss": 23.683, + "step": 345770 + }, + { + "epoch": 0.6984974769409777, + "grad_norm": 651.7496948242188, + "learning_rate": 2.6286369730322693e-06, + "loss": 21.9618, + "step": 345780 + }, + { + "epoch": 0.6985176775736616, + "grad_norm": 316.13653564453125, + "learning_rate": 2.6283296689016953e-06, + "loss": 36.7003, + "step": 345790 + }, + { + "epoch": 0.6985378782063454, + "grad_norm": 636.0591430664062, + "learning_rate": 2.6280223763303546e-06, + "loss": 13.3677, + "step": 345800 + }, + { + "epoch": 0.6985580788390292, + "grad_norm": 351.2723083496094, + "learning_rate": 2.6277150953197427e-06, + "loss": 10.4449, + "step": 345810 + }, + { + "epoch": 0.698578279471713, + "grad_norm": 187.9607391357422, + "learning_rate": 2.6274078258713626e-06, + "loss": 11.7961, + "step": 345820 + }, + { + "epoch": 0.6985984801043968, + "grad_norm": 460.8187255859375, + "learning_rate": 2.627100567986709e-06, + "loss": 24.5505, + "step": 345830 + }, + { + "epoch": 0.6986186807370807, + "grad_norm": 497.2703552246094, + "learning_rate": 2.626793321667277e-06, + "loss": 35.9925, + "step": 345840 + }, + { + "epoch": 0.6986388813697645, + "grad_norm": 643.0888061523438, + "learning_rate": 2.626486086914566e-06, + "loss": 23.4985, + "step": 345850 + }, + { + "epoch": 0.6986590820024483, + "grad_norm": 764.6570434570312, + "learning_rate": 2.626178863730077e-06, + "loss": 27.3257, + "step": 345860 + }, + { + "epoch": 0.6986792826351321, + "grad_norm": 219.7559814453125, + "learning_rate": 2.6258716521153034e-06, + "loss": 12.841, + "step": 345870 + }, + { + "epoch": 0.698699483267816, + "grad_norm": 286.03521728515625, + "learning_rate": 2.6255644520717417e-06, + "loss": 20.3383, + "step": 345880 + }, + { + "epoch": 0.6987196839004998, + "grad_norm": 447.294921875, + "learning_rate": 2.6252572636008934e-06, + "loss": 26.421, + "step": 345890 + }, + { + "epoch": 0.6987398845331836, + "grad_norm": 512.5075073242188, + "learning_rate": 2.6249500867042523e-06, + "loss": 14.1046, + "step": 345900 + }, + { + "epoch": 0.6987600851658674, + "grad_norm": 755.806396484375, + "learning_rate": 2.6246429213833146e-06, + "loss": 17.2237, + "step": 345910 + }, + { + "epoch": 0.6987802857985512, + "grad_norm": 284.7914123535156, + "learning_rate": 2.624335767639582e-06, + "loss": 12.9259, + "step": 345920 + }, + { + "epoch": 0.698800486431235, + "grad_norm": 17.339370727539062, + "learning_rate": 2.624028625474546e-06, + "loss": 24.0289, + "step": 345930 + }, + { + "epoch": 0.6988206870639189, + "grad_norm": 398.7443542480469, + "learning_rate": 2.6237214948897084e-06, + "loss": 12.7268, + "step": 345940 + }, + { + "epoch": 0.6988408876966027, + "grad_norm": 144.84312438964844, + "learning_rate": 2.623414375886564e-06, + "loss": 18.3145, + "step": 345950 + }, + { + "epoch": 0.6988610883292865, + "grad_norm": 658.30908203125, + "learning_rate": 2.623107268466608e-06, + "loss": 28.3886, + "step": 345960 + }, + { + "epoch": 0.6988812889619703, + "grad_norm": 559.9882202148438, + "learning_rate": 2.6228001726313406e-06, + "loss": 18.2379, + "step": 345970 + }, + { + "epoch": 0.6989014895946541, + "grad_norm": 389.85107421875, + "learning_rate": 2.622493088382257e-06, + "loss": 40.59, + "step": 345980 + }, + { + "epoch": 0.698921690227338, + "grad_norm": 285.4831237792969, + "learning_rate": 2.6221860157208516e-06, + "loss": 33.7344, + "step": 345990 + }, + { + "epoch": 0.6989418908600218, + "grad_norm": 296.01312255859375, + "learning_rate": 2.6218789546486235e-06, + "loss": 13.7665, + "step": 346000 + }, + { + "epoch": 0.6989620914927056, + "grad_norm": 0.0, + "learning_rate": 2.6215719051670705e-06, + "loss": 8.9669, + "step": 346010 + }, + { + "epoch": 0.6989822921253893, + "grad_norm": 437.0624694824219, + "learning_rate": 2.6212648672776874e-06, + "loss": 13.5886, + "step": 346020 + }, + { + "epoch": 0.6990024927580731, + "grad_norm": 183.48011779785156, + "learning_rate": 2.620957840981969e-06, + "loss": 12.9059, + "step": 346030 + }, + { + "epoch": 0.699022693390757, + "grad_norm": 559.4110107421875, + "learning_rate": 2.6206508262814164e-06, + "loss": 15.7754, + "step": 346040 + }, + { + "epoch": 0.6990428940234408, + "grad_norm": 41.547645568847656, + "learning_rate": 2.6203438231775224e-06, + "loss": 10.2372, + "step": 346050 + }, + { + "epoch": 0.6990630946561246, + "grad_norm": 202.2870635986328, + "learning_rate": 2.6200368316717816e-06, + "loss": 14.3801, + "step": 346060 + }, + { + "epoch": 0.6990832952888084, + "grad_norm": 452.917236328125, + "learning_rate": 2.6197298517656933e-06, + "loss": 14.5969, + "step": 346070 + }, + { + "epoch": 0.6991034959214922, + "grad_norm": 570.240478515625, + "learning_rate": 2.6194228834607567e-06, + "loss": 42.6146, + "step": 346080 + }, + { + "epoch": 0.699123696554176, + "grad_norm": 356.0271301269531, + "learning_rate": 2.6191159267584604e-06, + "loss": 20.1366, + "step": 346090 + }, + { + "epoch": 0.6991438971868599, + "grad_norm": 162.37998962402344, + "learning_rate": 2.618808981660304e-06, + "loss": 24.5838, + "step": 346100 + }, + { + "epoch": 0.6991640978195437, + "grad_norm": 320.3457946777344, + "learning_rate": 2.618502048167786e-06, + "loss": 10.9108, + "step": 346110 + }, + { + "epoch": 0.6991842984522275, + "grad_norm": 279.5674133300781, + "learning_rate": 2.6181951262824e-06, + "loss": 22.2888, + "step": 346120 + }, + { + "epoch": 0.6992044990849113, + "grad_norm": 244.40220642089844, + "learning_rate": 2.617888216005641e-06, + "loss": 21.7767, + "step": 346130 + }, + { + "epoch": 0.6992246997175952, + "grad_norm": 406.1513977050781, + "learning_rate": 2.6175813173390063e-06, + "loss": 32.9853, + "step": 346140 + }, + { + "epoch": 0.699244900350279, + "grad_norm": 236.93878173828125, + "learning_rate": 2.6172744302839925e-06, + "loss": 25.957, + "step": 346150 + }, + { + "epoch": 0.6992651009829628, + "grad_norm": 341.7259216308594, + "learning_rate": 2.616967554842092e-06, + "loss": 20.1887, + "step": 346160 + }, + { + "epoch": 0.6992853016156466, + "grad_norm": 119.66575622558594, + "learning_rate": 2.6166606910148024e-06, + "loss": 13.2485, + "step": 346170 + }, + { + "epoch": 0.6993055022483304, + "grad_norm": 385.6126708984375, + "learning_rate": 2.6163538388036213e-06, + "loss": 22.6795, + "step": 346180 + }, + { + "epoch": 0.6993257028810143, + "grad_norm": 148.49256896972656, + "learning_rate": 2.6160469982100426e-06, + "loss": 11.1807, + "step": 346190 + }, + { + "epoch": 0.6993459035136981, + "grad_norm": 189.14205932617188, + "learning_rate": 2.61574016923556e-06, + "loss": 13.4204, + "step": 346200 + }, + { + "epoch": 0.6993661041463819, + "grad_norm": 784.8663330078125, + "learning_rate": 2.6154333518816727e-06, + "loss": 32.9437, + "step": 346210 + }, + { + "epoch": 0.6993863047790657, + "grad_norm": 207.66246032714844, + "learning_rate": 2.6151265461498737e-06, + "loss": 12.0641, + "step": 346220 + }, + { + "epoch": 0.6994065054117495, + "grad_norm": 248.05140686035156, + "learning_rate": 2.6148197520416567e-06, + "loss": 30.4625, + "step": 346230 + }, + { + "epoch": 0.6994267060444334, + "grad_norm": 375.9503173828125, + "learning_rate": 2.6145129695585213e-06, + "loss": 13.1731, + "step": 346240 + }, + { + "epoch": 0.6994469066771172, + "grad_norm": 309.3663635253906, + "learning_rate": 2.614206198701958e-06, + "loss": 14.8673, + "step": 346250 + }, + { + "epoch": 0.699467107309801, + "grad_norm": 114.35416412353516, + "learning_rate": 2.6138994394734663e-06, + "loss": 8.6712, + "step": 346260 + }, + { + "epoch": 0.6994873079424847, + "grad_norm": 404.1855773925781, + "learning_rate": 2.613592691874539e-06, + "loss": 14.2193, + "step": 346270 + }, + { + "epoch": 0.6995075085751685, + "grad_norm": 118.95695495605469, + "learning_rate": 2.6132859559066704e-06, + "loss": 10.8912, + "step": 346280 + }, + { + "epoch": 0.6995277092078523, + "grad_norm": 329.8266906738281, + "learning_rate": 2.6129792315713576e-06, + "loss": 14.2744, + "step": 346290 + }, + { + "epoch": 0.6995479098405362, + "grad_norm": 1208.884033203125, + "learning_rate": 2.612672518870093e-06, + "loss": 39.5541, + "step": 346300 + }, + { + "epoch": 0.69956811047322, + "grad_norm": 714.0730590820312, + "learning_rate": 2.6123658178043753e-06, + "loss": 16.1393, + "step": 346310 + }, + { + "epoch": 0.6995883111059038, + "grad_norm": 214.5430145263672, + "learning_rate": 2.6120591283756946e-06, + "loss": 29.9662, + "step": 346320 + }, + { + "epoch": 0.6996085117385876, + "grad_norm": 950.339111328125, + "learning_rate": 2.6117524505855507e-06, + "loss": 26.5155, + "step": 346330 + }, + { + "epoch": 0.6996287123712714, + "grad_norm": 319.1742858886719, + "learning_rate": 2.611445784435435e-06, + "loss": 18.1529, + "step": 346340 + }, + { + "epoch": 0.6996489130039553, + "grad_norm": 517.3515625, + "learning_rate": 2.6111391299268406e-06, + "loss": 14.5742, + "step": 346350 + }, + { + "epoch": 0.6996691136366391, + "grad_norm": 83.57072448730469, + "learning_rate": 2.6108324870612674e-06, + "loss": 11.5814, + "step": 346360 + }, + { + "epoch": 0.6996893142693229, + "grad_norm": 113.90496063232422, + "learning_rate": 2.610525855840206e-06, + "loss": 11.7873, + "step": 346370 + }, + { + "epoch": 0.6997095149020067, + "grad_norm": 337.7570495605469, + "learning_rate": 2.61021923626515e-06, + "loss": 34.2954, + "step": 346380 + }, + { + "epoch": 0.6997297155346905, + "grad_norm": 380.3957214355469, + "learning_rate": 2.609912628337596e-06, + "loss": 17.7364, + "step": 346390 + }, + { + "epoch": 0.6997499161673744, + "grad_norm": 61.789344787597656, + "learning_rate": 2.6096060320590393e-06, + "loss": 8.3513, + "step": 346400 + }, + { + "epoch": 0.6997701168000582, + "grad_norm": 216.51742553710938, + "learning_rate": 2.609299447430973e-06, + "loss": 16.2906, + "step": 346410 + }, + { + "epoch": 0.699790317432742, + "grad_norm": 595.2395629882812, + "learning_rate": 2.60899287445489e-06, + "loss": 16.2307, + "step": 346420 + }, + { + "epoch": 0.6998105180654258, + "grad_norm": 41.3694953918457, + "learning_rate": 2.608686313132287e-06, + "loss": 16.0588, + "step": 346430 + }, + { + "epoch": 0.6998307186981096, + "grad_norm": 108.62818908691406, + "learning_rate": 2.6083797634646567e-06, + "loss": 14.3768, + "step": 346440 + }, + { + "epoch": 0.6998509193307935, + "grad_norm": 300.60406494140625, + "learning_rate": 2.608073225453492e-06, + "loss": 18.2154, + "step": 346450 + }, + { + "epoch": 0.6998711199634773, + "grad_norm": 297.52813720703125, + "learning_rate": 2.607766699100288e-06, + "loss": 21.0632, + "step": 346460 + }, + { + "epoch": 0.6998913205961611, + "grad_norm": 135.4776611328125, + "learning_rate": 2.6074601844065407e-06, + "loss": 12.4945, + "step": 346470 + }, + { + "epoch": 0.6999115212288449, + "grad_norm": 260.7397155761719, + "learning_rate": 2.607153681373743e-06, + "loss": 26.7468, + "step": 346480 + }, + { + "epoch": 0.6999317218615287, + "grad_norm": 420.61676025390625, + "learning_rate": 2.6068471900033852e-06, + "loss": 28.9963, + "step": 346490 + }, + { + "epoch": 0.6999519224942126, + "grad_norm": 104.43257904052734, + "learning_rate": 2.6065407102969664e-06, + "loss": 20.6861, + "step": 346500 + }, + { + "epoch": 0.6999721231268964, + "grad_norm": 306.8272399902344, + "learning_rate": 2.6062342422559776e-06, + "loss": 12.7433, + "step": 346510 + }, + { + "epoch": 0.6999923237595802, + "grad_norm": 365.0401916503906, + "learning_rate": 2.605927785881911e-06, + "loss": 18.0941, + "step": 346520 + }, + { + "epoch": 0.7000125243922639, + "grad_norm": 265.7608947753906, + "learning_rate": 2.6056213411762645e-06, + "loss": 18.0347, + "step": 346530 + }, + { + "epoch": 0.7000327250249477, + "grad_norm": 504.5923156738281, + "learning_rate": 2.6053149081405267e-06, + "loss": 14.9236, + "step": 346540 + }, + { + "epoch": 0.7000529256576316, + "grad_norm": 469.1474609375, + "learning_rate": 2.6050084867761953e-06, + "loss": 19.0025, + "step": 346550 + }, + { + "epoch": 0.7000731262903154, + "grad_norm": 1204.1434326171875, + "learning_rate": 2.6047020770847618e-06, + "loss": 34.2178, + "step": 346560 + }, + { + "epoch": 0.7000933269229992, + "grad_norm": 425.03106689453125, + "learning_rate": 2.6043956790677195e-06, + "loss": 21.6887, + "step": 346570 + }, + { + "epoch": 0.700113527555683, + "grad_norm": 427.1048278808594, + "learning_rate": 2.6040892927265627e-06, + "loss": 20.1533, + "step": 346580 + }, + { + "epoch": 0.7001337281883668, + "grad_norm": 212.57095336914062, + "learning_rate": 2.603782918062784e-06, + "loss": 19.1921, + "step": 346590 + }, + { + "epoch": 0.7001539288210507, + "grad_norm": 226.52755737304688, + "learning_rate": 2.6034765550778753e-06, + "loss": 15.4578, + "step": 346600 + }, + { + "epoch": 0.7001741294537345, + "grad_norm": 323.2734680175781, + "learning_rate": 2.603170203773331e-06, + "loss": 18.7106, + "step": 346610 + }, + { + "epoch": 0.7001943300864183, + "grad_norm": 703.8001708984375, + "learning_rate": 2.6028638641506464e-06, + "loss": 12.5089, + "step": 346620 + }, + { + "epoch": 0.7002145307191021, + "grad_norm": 203.0648956298828, + "learning_rate": 2.602557536211313e-06, + "loss": 25.0396, + "step": 346630 + }, + { + "epoch": 0.7002347313517859, + "grad_norm": 447.15386962890625, + "learning_rate": 2.6022512199568205e-06, + "loss": 19.8693, + "step": 346640 + }, + { + "epoch": 0.7002549319844698, + "grad_norm": 77.42908477783203, + "learning_rate": 2.601944915388668e-06, + "loss": 20.6841, + "step": 346650 + }, + { + "epoch": 0.7002751326171536, + "grad_norm": 807.8881225585938, + "learning_rate": 2.6016386225083438e-06, + "loss": 17.1238, + "step": 346660 + }, + { + "epoch": 0.7002953332498374, + "grad_norm": 653.5703125, + "learning_rate": 2.6013323413173408e-06, + "loss": 20.1987, + "step": 346670 + }, + { + "epoch": 0.7003155338825212, + "grad_norm": 404.2229309082031, + "learning_rate": 2.601026071817153e-06, + "loss": 22.7176, + "step": 346680 + }, + { + "epoch": 0.700335734515205, + "grad_norm": 227.59866333007812, + "learning_rate": 2.600719814009277e-06, + "loss": 20.0885, + "step": 346690 + }, + { + "epoch": 0.7003559351478889, + "grad_norm": 349.4118957519531, + "learning_rate": 2.600413567895198e-06, + "loss": 14.6721, + "step": 346700 + }, + { + "epoch": 0.7003761357805727, + "grad_norm": 532.9380493164062, + "learning_rate": 2.6001073334764117e-06, + "loss": 8.393, + "step": 346710 + }, + { + "epoch": 0.7003963364132565, + "grad_norm": 193.77354431152344, + "learning_rate": 2.5998011107544134e-06, + "loss": 10.4868, + "step": 346720 + }, + { + "epoch": 0.7004165370459403, + "grad_norm": 356.72210693359375, + "learning_rate": 2.5994948997306935e-06, + "loss": 34.4526, + "step": 346730 + }, + { + "epoch": 0.7004367376786241, + "grad_norm": 557.6670532226562, + "learning_rate": 2.599188700406743e-06, + "loss": 19.6684, + "step": 346740 + }, + { + "epoch": 0.700456938311308, + "grad_norm": 303.978271484375, + "learning_rate": 2.5988825127840547e-06, + "loss": 13.9076, + "step": 346750 + }, + { + "epoch": 0.7004771389439918, + "grad_norm": 590.5808715820312, + "learning_rate": 2.5985763368641253e-06, + "loss": 15.3695, + "step": 346760 + }, + { + "epoch": 0.7004973395766756, + "grad_norm": 279.01910400390625, + "learning_rate": 2.5982701726484405e-06, + "loss": 27.293, + "step": 346770 + }, + { + "epoch": 0.7005175402093593, + "grad_norm": 216.29835510253906, + "learning_rate": 2.5979640201384953e-06, + "loss": 21.8219, + "step": 346780 + }, + { + "epoch": 0.7005377408420431, + "grad_norm": 411.185302734375, + "learning_rate": 2.597657879335784e-06, + "loss": 24.5489, + "step": 346790 + }, + { + "epoch": 0.7005579414747269, + "grad_norm": 192.6901397705078, + "learning_rate": 2.5973517502417966e-06, + "loss": 18.8785, + "step": 346800 + }, + { + "epoch": 0.7005781421074108, + "grad_norm": 137.09548950195312, + "learning_rate": 2.597045632858024e-06, + "loss": 10.2786, + "step": 346810 + }, + { + "epoch": 0.7005983427400946, + "grad_norm": 856.15966796875, + "learning_rate": 2.5967395271859614e-06, + "loss": 31.4287, + "step": 346820 + }, + { + "epoch": 0.7006185433727784, + "grad_norm": 40.23459243774414, + "learning_rate": 2.596433433227099e-06, + "loss": 7.1841, + "step": 346830 + }, + { + "epoch": 0.7006387440054622, + "grad_norm": 188.1201171875, + "learning_rate": 2.596127350982926e-06, + "loss": 5.4929, + "step": 346840 + }, + { + "epoch": 0.700658944638146, + "grad_norm": 338.2161865234375, + "learning_rate": 2.5958212804549387e-06, + "loss": 17.0177, + "step": 346850 + }, + { + "epoch": 0.7006791452708299, + "grad_norm": 118.79478454589844, + "learning_rate": 2.5955152216446255e-06, + "loss": 14.1635, + "step": 346860 + }, + { + "epoch": 0.7006993459035137, + "grad_norm": 137.65213012695312, + "learning_rate": 2.595209174553481e-06, + "loss": 12.1935, + "step": 346870 + }, + { + "epoch": 0.7007195465361975, + "grad_norm": 185.00650024414062, + "learning_rate": 2.594903139182996e-06, + "loss": 14.265, + "step": 346880 + }, + { + "epoch": 0.7007397471688813, + "grad_norm": 545.932373046875, + "learning_rate": 2.594597115534658e-06, + "loss": 17.734, + "step": 346890 + }, + { + "epoch": 0.7007599478015651, + "grad_norm": 1035.7293701171875, + "learning_rate": 2.5942911036099657e-06, + "loss": 26.7553, + "step": 346900 + }, + { + "epoch": 0.700780148434249, + "grad_norm": 385.90435791015625, + "learning_rate": 2.5939851034104035e-06, + "loss": 11.4597, + "step": 346910 + }, + { + "epoch": 0.7008003490669328, + "grad_norm": 587.3248291015625, + "learning_rate": 2.5936791149374686e-06, + "loss": 14.9327, + "step": 346920 + }, + { + "epoch": 0.7008205496996166, + "grad_norm": 234.338134765625, + "learning_rate": 2.5933731381926473e-06, + "loss": 22.0711, + "step": 346930 + }, + { + "epoch": 0.7008407503323004, + "grad_norm": 155.63592529296875, + "learning_rate": 2.593067173177436e-06, + "loss": 49.627, + "step": 346940 + }, + { + "epoch": 0.7008609509649842, + "grad_norm": 52.495059967041016, + "learning_rate": 2.5927612198933237e-06, + "loss": 22.2556, + "step": 346950 + }, + { + "epoch": 0.7008811515976681, + "grad_norm": 363.7169494628906, + "learning_rate": 2.592455278341799e-06, + "loss": 19.1529, + "step": 346960 + }, + { + "epoch": 0.7009013522303519, + "grad_norm": 88.28634643554688, + "learning_rate": 2.5921493485243566e-06, + "loss": 9.0732, + "step": 346970 + }, + { + "epoch": 0.7009215528630357, + "grad_norm": 40.72396469116211, + "learning_rate": 2.5918434304424867e-06, + "loss": 17.0255, + "step": 346980 + }, + { + "epoch": 0.7009417534957195, + "grad_norm": 554.2332763671875, + "learning_rate": 2.591537524097678e-06, + "loss": 21.5423, + "step": 346990 + }, + { + "epoch": 0.7009619541284033, + "grad_norm": 195.70889282226562, + "learning_rate": 2.5912316294914232e-06, + "loss": 11.9071, + "step": 347000 + }, + { + "epoch": 0.7009821547610872, + "grad_norm": 477.51763916015625, + "learning_rate": 2.590925746625217e-06, + "loss": 19.4229, + "step": 347010 + }, + { + "epoch": 0.701002355393771, + "grad_norm": 214.69302368164062, + "learning_rate": 2.590619875500543e-06, + "loss": 13.2143, + "step": 347020 + }, + { + "epoch": 0.7010225560264548, + "grad_norm": 521.0151977539062, + "learning_rate": 2.590314016118895e-06, + "loss": 16.2183, + "step": 347030 + }, + { + "epoch": 0.7010427566591385, + "grad_norm": 197.3564910888672, + "learning_rate": 2.5900081684817667e-06, + "loss": 20.2638, + "step": 347040 + }, + { + "epoch": 0.7010629572918223, + "grad_norm": 134.8383331298828, + "learning_rate": 2.5897023325906458e-06, + "loss": 23.2689, + "step": 347050 + }, + { + "epoch": 0.7010831579245062, + "grad_norm": 341.59088134765625, + "learning_rate": 2.589396508447022e-06, + "loss": 16.9646, + "step": 347060 + }, + { + "epoch": 0.70110335855719, + "grad_norm": 380.99041748046875, + "learning_rate": 2.5890906960523865e-06, + "loss": 11.1826, + "step": 347070 + }, + { + "epoch": 0.7011235591898738, + "grad_norm": 302.3643493652344, + "learning_rate": 2.588784895408235e-06, + "loss": 17.5542, + "step": 347080 + }, + { + "epoch": 0.7011437598225576, + "grad_norm": 407.2210388183594, + "learning_rate": 2.58847910651605e-06, + "loss": 13.7203, + "step": 347090 + }, + { + "epoch": 0.7011639604552414, + "grad_norm": 1867.4381103515625, + "learning_rate": 2.588173329377324e-06, + "loss": 29.6082, + "step": 347100 + }, + { + "epoch": 0.7011841610879253, + "grad_norm": 669.7487182617188, + "learning_rate": 2.587867563993552e-06, + "loss": 13.0289, + "step": 347110 + }, + { + "epoch": 0.7012043617206091, + "grad_norm": 50.876983642578125, + "learning_rate": 2.5875618103662204e-06, + "loss": 11.7301, + "step": 347120 + }, + { + "epoch": 0.7012245623532929, + "grad_norm": 49.40553665161133, + "learning_rate": 2.5872560684968175e-06, + "loss": 12.6641, + "step": 347130 + }, + { + "epoch": 0.7012447629859767, + "grad_norm": 378.6500549316406, + "learning_rate": 2.5869503383868387e-06, + "loss": 44.4532, + "step": 347140 + }, + { + "epoch": 0.7012649636186605, + "grad_norm": 221.18179321289062, + "learning_rate": 2.5866446200377688e-06, + "loss": 14.7554, + "step": 347150 + }, + { + "epoch": 0.7012851642513444, + "grad_norm": 81.32159423828125, + "learning_rate": 2.5863389134511024e-06, + "loss": 11.0102, + "step": 347160 + }, + { + "epoch": 0.7013053648840282, + "grad_norm": 567.9331665039062, + "learning_rate": 2.5860332186283277e-06, + "loss": 12.9765, + "step": 347170 + }, + { + "epoch": 0.701325565516712, + "grad_norm": 326.4610900878906, + "learning_rate": 2.5857275355709317e-06, + "loss": 10.4179, + "step": 347180 + }, + { + "epoch": 0.7013457661493958, + "grad_norm": 81.64095306396484, + "learning_rate": 2.585421864280409e-06, + "loss": 19.6015, + "step": 347190 + }, + { + "epoch": 0.7013659667820796, + "grad_norm": 2042.0657958984375, + "learning_rate": 2.5851162047582477e-06, + "loss": 25.512, + "step": 347200 + }, + { + "epoch": 0.7013861674147635, + "grad_norm": 341.677734375, + "learning_rate": 2.5848105570059346e-06, + "loss": 16.6826, + "step": 347210 + }, + { + "epoch": 0.7014063680474473, + "grad_norm": 276.0132751464844, + "learning_rate": 2.584504921024963e-06, + "loss": 18.1895, + "step": 347220 + }, + { + "epoch": 0.7014265686801311, + "grad_norm": 313.1415100097656, + "learning_rate": 2.5841992968168224e-06, + "loss": 12.0104, + "step": 347230 + }, + { + "epoch": 0.7014467693128149, + "grad_norm": 726.3991088867188, + "learning_rate": 2.5838936843830015e-06, + "loss": 13.0599, + "step": 347240 + }, + { + "epoch": 0.7014669699454987, + "grad_norm": 335.7645568847656, + "learning_rate": 2.5835880837249884e-06, + "loss": 23.8675, + "step": 347250 + }, + { + "epoch": 0.7014871705781826, + "grad_norm": 423.79449462890625, + "learning_rate": 2.5832824948442747e-06, + "loss": 13.2148, + "step": 347260 + }, + { + "epoch": 0.7015073712108664, + "grad_norm": 392.97625732421875, + "learning_rate": 2.5829769177423504e-06, + "loss": 26.6486, + "step": 347270 + }, + { + "epoch": 0.7015275718435502, + "grad_norm": 255.8606719970703, + "learning_rate": 2.5826713524207e-06, + "loss": 7.65, + "step": 347280 + }, + { + "epoch": 0.701547772476234, + "grad_norm": 249.2724609375, + "learning_rate": 2.5823657988808176e-06, + "loss": 12.9573, + "step": 347290 + }, + { + "epoch": 0.7015679731089177, + "grad_norm": 584.9038696289062, + "learning_rate": 2.582060257124195e-06, + "loss": 20.7267, + "step": 347300 + }, + { + "epoch": 0.7015881737416015, + "grad_norm": 586.8082275390625, + "learning_rate": 2.5817547271523124e-06, + "loss": 12.8741, + "step": 347310 + }, + { + "epoch": 0.7016083743742854, + "grad_norm": 324.95489501953125, + "learning_rate": 2.5814492089666642e-06, + "loss": 19.4545, + "step": 347320 + }, + { + "epoch": 0.7016285750069692, + "grad_norm": 535.495361328125, + "learning_rate": 2.581143702568742e-06, + "loss": 29.9428, + "step": 347330 + }, + { + "epoch": 0.701648775639653, + "grad_norm": 323.0057067871094, + "learning_rate": 2.5808382079600315e-06, + "loss": 13.38, + "step": 347340 + }, + { + "epoch": 0.7016689762723368, + "grad_norm": 477.53656005859375, + "learning_rate": 2.5805327251420205e-06, + "loss": 12.4908, + "step": 347350 + }, + { + "epoch": 0.7016891769050206, + "grad_norm": 0.0, + "learning_rate": 2.580227254116199e-06, + "loss": 14.4245, + "step": 347360 + }, + { + "epoch": 0.7017093775377045, + "grad_norm": 209.73056030273438, + "learning_rate": 2.5799217948840603e-06, + "loss": 26.2392, + "step": 347370 + }, + { + "epoch": 0.7017295781703883, + "grad_norm": 520.84716796875, + "learning_rate": 2.579616347447086e-06, + "loss": 24.331, + "step": 347380 + }, + { + "epoch": 0.7017497788030721, + "grad_norm": 537.9876098632812, + "learning_rate": 2.579310911806768e-06, + "loss": 18.937, + "step": 347390 + }, + { + "epoch": 0.7017699794357559, + "grad_norm": 362.0723876953125, + "learning_rate": 2.5790054879645964e-06, + "loss": 14.1632, + "step": 347400 + }, + { + "epoch": 0.7017901800684397, + "grad_norm": 427.97357177734375, + "learning_rate": 2.5787000759220592e-06, + "loss": 12.2612, + "step": 347410 + }, + { + "epoch": 0.7018103807011236, + "grad_norm": 450.44036865234375, + "learning_rate": 2.578394675680641e-06, + "loss": 19.8099, + "step": 347420 + }, + { + "epoch": 0.7018305813338074, + "grad_norm": 362.4385986328125, + "learning_rate": 2.578089287241836e-06, + "loss": 15.9942, + "step": 347430 + }, + { + "epoch": 0.7018507819664912, + "grad_norm": 404.1336975097656, + "learning_rate": 2.5777839106071308e-06, + "loss": 16.5669, + "step": 347440 + }, + { + "epoch": 0.701870982599175, + "grad_norm": 296.78582763671875, + "learning_rate": 2.5774785457780107e-06, + "loss": 18.6781, + "step": 347450 + }, + { + "epoch": 0.7018911832318588, + "grad_norm": 148.4103546142578, + "learning_rate": 2.577173192755968e-06, + "loss": 12.5277, + "step": 347460 + }, + { + "epoch": 0.7019113838645427, + "grad_norm": 533.5665283203125, + "learning_rate": 2.576867851542487e-06, + "loss": 20.6553, + "step": 347470 + }, + { + "epoch": 0.7019315844972265, + "grad_norm": 359.1020202636719, + "learning_rate": 2.576562522139061e-06, + "loss": 16.1798, + "step": 347480 + }, + { + "epoch": 0.7019517851299103, + "grad_norm": 115.88701629638672, + "learning_rate": 2.5762572045471744e-06, + "loss": 16.8293, + "step": 347490 + }, + { + "epoch": 0.7019719857625941, + "grad_norm": 268.5501708984375, + "learning_rate": 2.5759518987683154e-06, + "loss": 19.6261, + "step": 347500 + }, + { + "epoch": 0.7019921863952779, + "grad_norm": 328.0009460449219, + "learning_rate": 2.575646604803974e-06, + "loss": 36.7878, + "step": 347510 + }, + { + "epoch": 0.7020123870279618, + "grad_norm": 827.8441162109375, + "learning_rate": 2.5753413226556356e-06, + "loss": 22.8495, + "step": 347520 + }, + { + "epoch": 0.7020325876606456, + "grad_norm": 458.2436828613281, + "learning_rate": 2.575036052324791e-06, + "loss": 14.1508, + "step": 347530 + }, + { + "epoch": 0.7020527882933294, + "grad_norm": 467.7879943847656, + "learning_rate": 2.5747307938129245e-06, + "loss": 14.7493, + "step": 347540 + }, + { + "epoch": 0.7020729889260131, + "grad_norm": 622.4500122070312, + "learning_rate": 2.5744255471215284e-06, + "loss": 20.4477, + "step": 347550 + }, + { + "epoch": 0.7020931895586969, + "grad_norm": 494.28387451171875, + "learning_rate": 2.5741203122520876e-06, + "loss": 21.0831, + "step": 347560 + }, + { + "epoch": 0.7021133901913807, + "grad_norm": 286.85528564453125, + "learning_rate": 2.573815089206089e-06, + "loss": 29.851, + "step": 347570 + }, + { + "epoch": 0.7021335908240646, + "grad_norm": 182.8809814453125, + "learning_rate": 2.573509877985022e-06, + "loss": 19.086, + "step": 347580 + }, + { + "epoch": 0.7021537914567484, + "grad_norm": 284.8392639160156, + "learning_rate": 2.5732046785903744e-06, + "loss": 16.4531, + "step": 347590 + }, + { + "epoch": 0.7021739920894322, + "grad_norm": 0.0, + "learning_rate": 2.5728994910236304e-06, + "loss": 16.0378, + "step": 347600 + }, + { + "epoch": 0.702194192722116, + "grad_norm": 355.5620422363281, + "learning_rate": 2.572594315286281e-06, + "loss": 22.9755, + "step": 347610 + }, + { + "epoch": 0.7022143933547998, + "grad_norm": 272.7740783691406, + "learning_rate": 2.5722891513798156e-06, + "loss": 16.932, + "step": 347620 + }, + { + "epoch": 0.7022345939874837, + "grad_norm": 410.8348083496094, + "learning_rate": 2.5719839993057143e-06, + "loss": 26.2486, + "step": 347630 + }, + { + "epoch": 0.7022547946201675, + "grad_norm": 197.9288787841797, + "learning_rate": 2.571678859065469e-06, + "loss": 35.5925, + "step": 347640 + }, + { + "epoch": 0.7022749952528513, + "grad_norm": 485.03631591796875, + "learning_rate": 2.571373730660568e-06, + "loss": 22.3784, + "step": 347650 + }, + { + "epoch": 0.7022951958855351, + "grad_norm": 501.2548522949219, + "learning_rate": 2.571068614092497e-06, + "loss": 15.8858, + "step": 347660 + }, + { + "epoch": 0.702315396518219, + "grad_norm": 314.9706726074219, + "learning_rate": 2.5707635093627415e-06, + "loss": 35.8188, + "step": 347670 + }, + { + "epoch": 0.7023355971509028, + "grad_norm": 250.5843505859375, + "learning_rate": 2.5704584164727898e-06, + "loss": 29.6213, + "step": 347680 + }, + { + "epoch": 0.7023557977835866, + "grad_norm": 84.46544647216797, + "learning_rate": 2.5701533354241325e-06, + "loss": 14.2155, + "step": 347690 + }, + { + "epoch": 0.7023759984162704, + "grad_norm": 104.89397430419922, + "learning_rate": 2.5698482662182494e-06, + "loss": 19.7077, + "step": 347700 + }, + { + "epoch": 0.7023961990489542, + "grad_norm": 1.461729884147644, + "learning_rate": 2.5695432088566313e-06, + "loss": 8.5402, + "step": 347710 + }, + { + "epoch": 0.702416399681638, + "grad_norm": 338.3124084472656, + "learning_rate": 2.5692381633407672e-06, + "loss": 18.6766, + "step": 347720 + }, + { + "epoch": 0.7024366003143219, + "grad_norm": 484.132080078125, + "learning_rate": 2.568933129672141e-06, + "loss": 21.331, + "step": 347730 + }, + { + "epoch": 0.7024568009470057, + "grad_norm": 435.5819396972656, + "learning_rate": 2.568628107852238e-06, + "loss": 23.2717, + "step": 347740 + }, + { + "epoch": 0.7024770015796895, + "grad_norm": 593.2225952148438, + "learning_rate": 2.568323097882548e-06, + "loss": 33.024, + "step": 347750 + }, + { + "epoch": 0.7024972022123733, + "grad_norm": 360.1360168457031, + "learning_rate": 2.5680180997645577e-06, + "loss": 9.6854, + "step": 347760 + }, + { + "epoch": 0.7025174028450571, + "grad_norm": 450.7495422363281, + "learning_rate": 2.567713113499749e-06, + "loss": 24.615, + "step": 347770 + }, + { + "epoch": 0.702537603477741, + "grad_norm": 413.72882080078125, + "learning_rate": 2.5674081390896146e-06, + "loss": 14.1096, + "step": 347780 + }, + { + "epoch": 0.7025578041104248, + "grad_norm": 130.60775756835938, + "learning_rate": 2.567103176535635e-06, + "loss": 10.5338, + "step": 347790 + }, + { + "epoch": 0.7025780047431086, + "grad_norm": 277.84588623046875, + "learning_rate": 2.5667982258393016e-06, + "loss": 26.829, + "step": 347800 + }, + { + "epoch": 0.7025982053757923, + "grad_norm": 211.7849578857422, + "learning_rate": 2.5664932870020966e-06, + "loss": 13.9786, + "step": 347810 + }, + { + "epoch": 0.7026184060084761, + "grad_norm": 252.23204040527344, + "learning_rate": 2.5661883600255107e-06, + "loss": 10.5075, + "step": 347820 + }, + { + "epoch": 0.70263860664116, + "grad_norm": 339.7162170410156, + "learning_rate": 2.565883444911025e-06, + "loss": 21.0565, + "step": 347830 + }, + { + "epoch": 0.7026588072738438, + "grad_norm": 387.20281982421875, + "learning_rate": 2.5655785416601297e-06, + "loss": 20.3326, + "step": 347840 + }, + { + "epoch": 0.7026790079065276, + "grad_norm": 764.5905151367188, + "learning_rate": 2.5652736502743105e-06, + "loss": 22.7976, + "step": 347850 + }, + { + "epoch": 0.7026992085392114, + "grad_norm": 473.9588623046875, + "learning_rate": 2.56496877075505e-06, + "loss": 13.2835, + "step": 347860 + }, + { + "epoch": 0.7027194091718952, + "grad_norm": 2.3222599029541016, + "learning_rate": 2.564663903103838e-06, + "loss": 10.7422, + "step": 347870 + }, + { + "epoch": 0.7027396098045791, + "grad_norm": 3.153733015060425, + "learning_rate": 2.564359047322158e-06, + "loss": 10.4594, + "step": 347880 + }, + { + "epoch": 0.7027598104372629, + "grad_norm": 310.48565673828125, + "learning_rate": 2.5640542034114955e-06, + "loss": 13.2866, + "step": 347890 + }, + { + "epoch": 0.7027800110699467, + "grad_norm": 368.23065185546875, + "learning_rate": 2.5637493713733376e-06, + "loss": 20.0134, + "step": 347900 + }, + { + "epoch": 0.7028002117026305, + "grad_norm": 217.38487243652344, + "learning_rate": 2.5634445512091733e-06, + "loss": 12.3388, + "step": 347910 + }, + { + "epoch": 0.7028204123353143, + "grad_norm": 1.3076226711273193, + "learning_rate": 2.563139742920481e-06, + "loss": 13.5034, + "step": 347920 + }, + { + "epoch": 0.7028406129679982, + "grad_norm": 269.5832824707031, + "learning_rate": 2.5628349465087498e-06, + "loss": 20.369, + "step": 347930 + }, + { + "epoch": 0.702860813600682, + "grad_norm": 1015.3848266601562, + "learning_rate": 2.5625301619754678e-06, + "loss": 37.5008, + "step": 347940 + }, + { + "epoch": 0.7028810142333658, + "grad_norm": 353.0274658203125, + "learning_rate": 2.5622253893221176e-06, + "loss": 21.5301, + "step": 347950 + }, + { + "epoch": 0.7029012148660496, + "grad_norm": 160.7222900390625, + "learning_rate": 2.561920628550184e-06, + "loss": 16.6286, + "step": 347960 + }, + { + "epoch": 0.7029214154987334, + "grad_norm": 95.90129852294922, + "learning_rate": 2.5616158796611527e-06, + "loss": 15.1646, + "step": 347970 + }, + { + "epoch": 0.7029416161314173, + "grad_norm": 433.74139404296875, + "learning_rate": 2.5613111426565144e-06, + "loss": 21.9443, + "step": 347980 + }, + { + "epoch": 0.7029618167641011, + "grad_norm": 260.3633728027344, + "learning_rate": 2.5610064175377456e-06, + "loss": 18.6269, + "step": 347990 + }, + { + "epoch": 0.7029820173967849, + "grad_norm": 445.6225891113281, + "learning_rate": 2.560701704306336e-06, + "loss": 16.5316, + "step": 348000 + }, + { + "epoch": 0.7030022180294687, + "grad_norm": 217.5462646484375, + "learning_rate": 2.5603970029637727e-06, + "loss": 12.7375, + "step": 348010 + }, + { + "epoch": 0.7030224186621525, + "grad_norm": 435.4579162597656, + "learning_rate": 2.5600923135115374e-06, + "loss": 37.3925, + "step": 348020 + }, + { + "epoch": 0.7030426192948364, + "grad_norm": 334.6219177246094, + "learning_rate": 2.5597876359511153e-06, + "loss": 22.5415, + "step": 348030 + }, + { + "epoch": 0.7030628199275202, + "grad_norm": 241.5675811767578, + "learning_rate": 2.5594829702839937e-06, + "loss": 19.2447, + "step": 348040 + }, + { + "epoch": 0.703083020560204, + "grad_norm": 163.2465362548828, + "learning_rate": 2.5591783165116563e-06, + "loss": 11.7875, + "step": 348050 + }, + { + "epoch": 0.7031032211928877, + "grad_norm": 132.82749938964844, + "learning_rate": 2.5588736746355858e-06, + "loss": 21.8375, + "step": 348060 + }, + { + "epoch": 0.7031234218255715, + "grad_norm": 150.1280975341797, + "learning_rate": 2.5585690446572708e-06, + "loss": 26.0591, + "step": 348070 + }, + { + "epoch": 0.7031436224582553, + "grad_norm": 583.20654296875, + "learning_rate": 2.558264426578192e-06, + "loss": 19.3685, + "step": 348080 + }, + { + "epoch": 0.7031638230909392, + "grad_norm": 589.6922607421875, + "learning_rate": 2.557959820399839e-06, + "loss": 24.9182, + "step": 348090 + }, + { + "epoch": 0.703184023723623, + "grad_norm": 358.9769287109375, + "learning_rate": 2.557655226123693e-06, + "loss": 13.0542, + "step": 348100 + }, + { + "epoch": 0.7032042243563068, + "grad_norm": 482.1541442871094, + "learning_rate": 2.5573506437512374e-06, + "loss": 32.9986, + "step": 348110 + }, + { + "epoch": 0.7032244249889906, + "grad_norm": 782.5233764648438, + "learning_rate": 2.55704607328396e-06, + "loss": 23.4075, + "step": 348120 + }, + { + "epoch": 0.7032446256216744, + "grad_norm": 537.6296997070312, + "learning_rate": 2.556741514723342e-06, + "loss": 31.0836, + "step": 348130 + }, + { + "epoch": 0.7032648262543583, + "grad_norm": 13.034551620483398, + "learning_rate": 2.556436968070872e-06, + "loss": 17.012, + "step": 348140 + }, + { + "epoch": 0.7032850268870421, + "grad_norm": 384.6843566894531, + "learning_rate": 2.55613243332803e-06, + "loss": 18.9023, + "step": 348150 + }, + { + "epoch": 0.7033052275197259, + "grad_norm": 142.6124267578125, + "learning_rate": 2.5558279104963037e-06, + "loss": 15.5146, + "step": 348160 + }, + { + "epoch": 0.7033254281524097, + "grad_norm": 686.4570922851562, + "learning_rate": 2.5555233995771757e-06, + "loss": 19.0457, + "step": 348170 + }, + { + "epoch": 0.7033456287850935, + "grad_norm": 362.68115234375, + "learning_rate": 2.555218900572128e-06, + "loss": 10.2994, + "step": 348180 + }, + { + "epoch": 0.7033658294177774, + "grad_norm": 10.302240371704102, + "learning_rate": 2.5549144134826487e-06, + "loss": 18.6784, + "step": 348190 + }, + { + "epoch": 0.7033860300504612, + "grad_norm": 609.0881958007812, + "learning_rate": 2.5546099383102206e-06, + "loss": 16.0359, + "step": 348200 + }, + { + "epoch": 0.703406230683145, + "grad_norm": 620.99609375, + "learning_rate": 2.5543054750563246e-06, + "loss": 13.1333, + "step": 348210 + }, + { + "epoch": 0.7034264313158288, + "grad_norm": 14.392234802246094, + "learning_rate": 2.5540010237224476e-06, + "loss": 13.039, + "step": 348220 + }, + { + "epoch": 0.7034466319485126, + "grad_norm": 644.1470947265625, + "learning_rate": 2.5536965843100764e-06, + "loss": 18.3325, + "step": 348230 + }, + { + "epoch": 0.7034668325811965, + "grad_norm": 750.2973022460938, + "learning_rate": 2.5533921568206876e-06, + "loss": 13.2997, + "step": 348240 + }, + { + "epoch": 0.7034870332138803, + "grad_norm": 251.7254180908203, + "learning_rate": 2.5530877412557684e-06, + "loss": 12.3368, + "step": 348250 + }, + { + "epoch": 0.7035072338465641, + "grad_norm": 288.3773193359375, + "learning_rate": 2.5527833376168055e-06, + "loss": 10.5416, + "step": 348260 + }, + { + "epoch": 0.7035274344792479, + "grad_norm": 314.669189453125, + "learning_rate": 2.552478945905279e-06, + "loss": 14.8445, + "step": 348270 + }, + { + "epoch": 0.7035476351119317, + "grad_norm": 538.0570068359375, + "learning_rate": 2.5521745661226717e-06, + "loss": 14.2398, + "step": 348280 + }, + { + "epoch": 0.7035678357446156, + "grad_norm": 245.4825439453125, + "learning_rate": 2.5518701982704684e-06, + "loss": 12.9883, + "step": 348290 + }, + { + "epoch": 0.7035880363772994, + "grad_norm": 530.113525390625, + "learning_rate": 2.5515658423501573e-06, + "loss": 14.3787, + "step": 348300 + }, + { + "epoch": 0.7036082370099832, + "grad_norm": 60.87131118774414, + "learning_rate": 2.551261498363213e-06, + "loss": 25.7569, + "step": 348310 + }, + { + "epoch": 0.7036284376426669, + "grad_norm": 302.0447692871094, + "learning_rate": 2.5509571663111233e-06, + "loss": 17.5409, + "step": 348320 + }, + { + "epoch": 0.7036486382753507, + "grad_norm": 573.7368774414062, + "learning_rate": 2.5506528461953726e-06, + "loss": 7.0166, + "step": 348330 + }, + { + "epoch": 0.7036688389080346, + "grad_norm": 41.724220275878906, + "learning_rate": 2.5503485380174443e-06, + "loss": 26.0337, + "step": 348340 + }, + { + "epoch": 0.7036890395407184, + "grad_norm": 337.8447570800781, + "learning_rate": 2.5500442417788176e-06, + "loss": 15.9246, + "step": 348350 + }, + { + "epoch": 0.7037092401734022, + "grad_norm": 222.74319458007812, + "learning_rate": 2.549739957480979e-06, + "loss": 25.3731, + "step": 348360 + }, + { + "epoch": 0.703729440806086, + "grad_norm": 106.73396301269531, + "learning_rate": 2.549435685125412e-06, + "loss": 13.1698, + "step": 348370 + }, + { + "epoch": 0.7037496414387698, + "grad_norm": 35.89967727661133, + "learning_rate": 2.5491314247135955e-06, + "loss": 19.4369, + "step": 348380 + }, + { + "epoch": 0.7037698420714537, + "grad_norm": 150.50033569335938, + "learning_rate": 2.5488271762470172e-06, + "loss": 37.7394, + "step": 348390 + }, + { + "epoch": 0.7037900427041375, + "grad_norm": 311.3075866699219, + "learning_rate": 2.5485229397271567e-06, + "loss": 19.6422, + "step": 348400 + }, + { + "epoch": 0.7038102433368213, + "grad_norm": 756.06884765625, + "learning_rate": 2.5482187151554994e-06, + "loss": 18.5072, + "step": 348410 + }, + { + "epoch": 0.7038304439695051, + "grad_norm": 356.7169189453125, + "learning_rate": 2.547914502533525e-06, + "loss": 26.5348, + "step": 348420 + }, + { + "epoch": 0.7038506446021889, + "grad_norm": 1041.09521484375, + "learning_rate": 2.5476103018627195e-06, + "loss": 17.3727, + "step": 348430 + }, + { + "epoch": 0.7038708452348728, + "grad_norm": 146.49102783203125, + "learning_rate": 2.547306113144564e-06, + "loss": 24.9171, + "step": 348440 + }, + { + "epoch": 0.7038910458675566, + "grad_norm": 1084.015380859375, + "learning_rate": 2.54700193638054e-06, + "loss": 26.2919, + "step": 348450 + }, + { + "epoch": 0.7039112465002404, + "grad_norm": 119.42090606689453, + "learning_rate": 2.546697771572132e-06, + "loss": 18.9228, + "step": 348460 + }, + { + "epoch": 0.7039314471329242, + "grad_norm": 465.013427734375, + "learning_rate": 2.5463936187208198e-06, + "loss": 21.561, + "step": 348470 + }, + { + "epoch": 0.703951647765608, + "grad_norm": 510.8338317871094, + "learning_rate": 2.54608947782809e-06, + "loss": 22.8123, + "step": 348480 + }, + { + "epoch": 0.7039718483982919, + "grad_norm": 240.90560913085938, + "learning_rate": 2.5457853488954214e-06, + "loss": 32.2432, + "step": 348490 + }, + { + "epoch": 0.7039920490309757, + "grad_norm": 242.47865295410156, + "learning_rate": 2.545481231924296e-06, + "loss": 32.7862, + "step": 348500 + }, + { + "epoch": 0.7040122496636595, + "grad_norm": 326.0067443847656, + "learning_rate": 2.5451771269161996e-06, + "loss": 20.4139, + "step": 348510 + }, + { + "epoch": 0.7040324502963433, + "grad_norm": 292.7941589355469, + "learning_rate": 2.544873033872611e-06, + "loss": 21.531, + "step": 348520 + }, + { + "epoch": 0.7040526509290271, + "grad_norm": 296.0098571777344, + "learning_rate": 2.5445689527950135e-06, + "loss": 22.2272, + "step": 348530 + }, + { + "epoch": 0.704072851561711, + "grad_norm": 405.1947937011719, + "learning_rate": 2.5442648836848877e-06, + "loss": 17.4805, + "step": 348540 + }, + { + "epoch": 0.7040930521943948, + "grad_norm": 143.13543701171875, + "learning_rate": 2.5439608265437186e-06, + "loss": 13.4997, + "step": 348550 + }, + { + "epoch": 0.7041132528270786, + "grad_norm": 540.0049438476562, + "learning_rate": 2.5436567813729877e-06, + "loss": 13.2646, + "step": 348560 + }, + { + "epoch": 0.7041334534597624, + "grad_norm": 531.985107421875, + "learning_rate": 2.543352748174173e-06, + "loss": 20.3237, + "step": 348570 + }, + { + "epoch": 0.7041536540924461, + "grad_norm": 654.4515380859375, + "learning_rate": 2.54304872694876e-06, + "loss": 18.5625, + "step": 348580 + }, + { + "epoch": 0.7041738547251299, + "grad_norm": 200.78884887695312, + "learning_rate": 2.5427447176982323e-06, + "loss": 24.3229, + "step": 348590 + }, + { + "epoch": 0.7041940553578138, + "grad_norm": 525.7599487304688, + "learning_rate": 2.5424407204240653e-06, + "loss": 23.0156, + "step": 348600 + }, + { + "epoch": 0.7042142559904976, + "grad_norm": 540.3980712890625, + "learning_rate": 2.542136735127744e-06, + "loss": 11.9227, + "step": 348610 + }, + { + "epoch": 0.7042344566231814, + "grad_norm": 221.6465301513672, + "learning_rate": 2.541832761810753e-06, + "loss": 31.0744, + "step": 348620 + }, + { + "epoch": 0.7042546572558652, + "grad_norm": 542.7943115234375, + "learning_rate": 2.5415288004745697e-06, + "loss": 17.2589, + "step": 348630 + }, + { + "epoch": 0.704274857888549, + "grad_norm": 391.7570495605469, + "learning_rate": 2.541224851120676e-06, + "loss": 16.0362, + "step": 348640 + }, + { + "epoch": 0.7042950585212329, + "grad_norm": 17.678213119506836, + "learning_rate": 2.540920913750555e-06, + "loss": 13.9695, + "step": 348650 + }, + { + "epoch": 0.7043152591539167, + "grad_norm": 744.2957153320312, + "learning_rate": 2.5406169883656883e-06, + "loss": 25.7589, + "step": 348660 + }, + { + "epoch": 0.7043354597866005, + "grad_norm": 141.6887969970703, + "learning_rate": 2.5403130749675537e-06, + "loss": 12.5907, + "step": 348670 + }, + { + "epoch": 0.7043556604192843, + "grad_norm": 115.41643524169922, + "learning_rate": 2.540009173557637e-06, + "loss": 14.5657, + "step": 348680 + }, + { + "epoch": 0.7043758610519681, + "grad_norm": 8.622913360595703, + "learning_rate": 2.5397052841374147e-06, + "loss": 10.0912, + "step": 348690 + }, + { + "epoch": 0.704396061684652, + "grad_norm": 1027.541015625, + "learning_rate": 2.539401406708373e-06, + "loss": 32.8672, + "step": 348700 + }, + { + "epoch": 0.7044162623173358, + "grad_norm": 919.50244140625, + "learning_rate": 2.5390975412719897e-06, + "loss": 17.1704, + "step": 348710 + }, + { + "epoch": 0.7044364629500196, + "grad_norm": 239.37896728515625, + "learning_rate": 2.5387936878297452e-06, + "loss": 51.5828, + "step": 348720 + }, + { + "epoch": 0.7044566635827034, + "grad_norm": 619.6591186523438, + "learning_rate": 2.5384898463831237e-06, + "loss": 20.6317, + "step": 348730 + }, + { + "epoch": 0.7044768642153872, + "grad_norm": 422.139404296875, + "learning_rate": 2.538186016933602e-06, + "loss": 24.9099, + "step": 348740 + }, + { + "epoch": 0.7044970648480711, + "grad_norm": 387.4186706542969, + "learning_rate": 2.5378821994826654e-06, + "loss": 18.9311, + "step": 348750 + }, + { + "epoch": 0.7045172654807549, + "grad_norm": 963.0557250976562, + "learning_rate": 2.53757839403179e-06, + "loss": 23.9692, + "step": 348760 + }, + { + "epoch": 0.7045374661134387, + "grad_norm": 423.4918518066406, + "learning_rate": 2.5372746005824605e-06, + "loss": 13.7943, + "step": 348770 + }, + { + "epoch": 0.7045576667461225, + "grad_norm": 134.3823699951172, + "learning_rate": 2.5369708191361565e-06, + "loss": 13.4868, + "step": 348780 + }, + { + "epoch": 0.7045778673788063, + "grad_norm": 156.3392791748047, + "learning_rate": 2.5366670496943557e-06, + "loss": 32.6647, + "step": 348790 + }, + { + "epoch": 0.7045980680114902, + "grad_norm": 231.02288818359375, + "learning_rate": 2.536363292258543e-06, + "loss": 12.9722, + "step": 348800 + }, + { + "epoch": 0.704618268644174, + "grad_norm": 361.2793884277344, + "learning_rate": 2.5360595468301966e-06, + "loss": 12.8922, + "step": 348810 + }, + { + "epoch": 0.7046384692768578, + "grad_norm": 13.414840698242188, + "learning_rate": 2.5357558134107958e-06, + "loss": 13.9536, + "step": 348820 + }, + { + "epoch": 0.7046586699095415, + "grad_norm": 413.43621826171875, + "learning_rate": 2.5354520920018215e-06, + "loss": 20.3253, + "step": 348830 + }, + { + "epoch": 0.7046788705422253, + "grad_norm": 291.3251037597656, + "learning_rate": 2.5351483826047595e-06, + "loss": 15.2389, + "step": 348840 + }, + { + "epoch": 0.7046990711749092, + "grad_norm": 378.5207214355469, + "learning_rate": 2.5348446852210807e-06, + "loss": 26.6477, + "step": 348850 + }, + { + "epoch": 0.704719271807593, + "grad_norm": 36.3099365234375, + "learning_rate": 2.5345409998522704e-06, + "loss": 9.6446, + "step": 348860 + }, + { + "epoch": 0.7047394724402768, + "grad_norm": 485.4751281738281, + "learning_rate": 2.53423732649981e-06, + "loss": 27.9962, + "step": 348870 + }, + { + "epoch": 0.7047596730729606, + "grad_norm": 662.0821533203125, + "learning_rate": 2.533933665165178e-06, + "loss": 15.3202, + "step": 348880 + }, + { + "epoch": 0.7047798737056444, + "grad_norm": 298.1793212890625, + "learning_rate": 2.5336300158498518e-06, + "loss": 22.0453, + "step": 348890 + }, + { + "epoch": 0.7048000743383283, + "grad_norm": 718.59423828125, + "learning_rate": 2.533326378555314e-06, + "loss": 21.5961, + "step": 348900 + }, + { + "epoch": 0.7048202749710121, + "grad_norm": 336.1101379394531, + "learning_rate": 2.5330227532830483e-06, + "loss": 13.5072, + "step": 348910 + }, + { + "epoch": 0.7048404756036959, + "grad_norm": 305.3926696777344, + "learning_rate": 2.5327191400345262e-06, + "loss": 14.4775, + "step": 348920 + }, + { + "epoch": 0.7048606762363797, + "grad_norm": 873.168701171875, + "learning_rate": 2.5324155388112326e-06, + "loss": 31.5757, + "step": 348930 + }, + { + "epoch": 0.7048808768690635, + "grad_norm": 1145.8697509765625, + "learning_rate": 2.5321119496146472e-06, + "loss": 23.1881, + "step": 348940 + }, + { + "epoch": 0.7049010775017474, + "grad_norm": 1285.3612060546875, + "learning_rate": 2.5318083724462495e-06, + "loss": 27.1833, + "step": 348950 + }, + { + "epoch": 0.7049212781344312, + "grad_norm": 328.0639343261719, + "learning_rate": 2.5315048073075166e-06, + "loss": 26.5006, + "step": 348960 + }, + { + "epoch": 0.704941478767115, + "grad_norm": 371.9706115722656, + "learning_rate": 2.531201254199932e-06, + "loss": 16.9027, + "step": 348970 + }, + { + "epoch": 0.7049616793997988, + "grad_norm": 533.75927734375, + "learning_rate": 2.5308977131249724e-06, + "loss": 18.4212, + "step": 348980 + }, + { + "epoch": 0.7049818800324826, + "grad_norm": 897.893798828125, + "learning_rate": 2.5305941840841163e-06, + "loss": 23.0169, + "step": 348990 + }, + { + "epoch": 0.7050020806651665, + "grad_norm": 179.62530517578125, + "learning_rate": 2.5302906670788463e-06, + "loss": 17.0761, + "step": 349000 + }, + { + "epoch": 0.7050222812978503, + "grad_norm": 224.842041015625, + "learning_rate": 2.5299871621106387e-06, + "loss": 18.5162, + "step": 349010 + }, + { + "epoch": 0.7050424819305341, + "grad_norm": 158.41604614257812, + "learning_rate": 2.529683669180976e-06, + "loss": 13.6336, + "step": 349020 + }, + { + "epoch": 0.7050626825632179, + "grad_norm": 224.61642456054688, + "learning_rate": 2.5293801882913327e-06, + "loss": 12.4215, + "step": 349030 + }, + { + "epoch": 0.7050828831959017, + "grad_norm": 507.733642578125, + "learning_rate": 2.529076719443193e-06, + "loss": 9.5354, + "step": 349040 + }, + { + "epoch": 0.7051030838285856, + "grad_norm": 302.4479064941406, + "learning_rate": 2.528773262638034e-06, + "loss": 14.3375, + "step": 349050 + }, + { + "epoch": 0.7051232844612694, + "grad_norm": 642.5108642578125, + "learning_rate": 2.5284698178773327e-06, + "loss": 25.7173, + "step": 349060 + }, + { + "epoch": 0.7051434850939532, + "grad_norm": 372.1785583496094, + "learning_rate": 2.5281663851625703e-06, + "loss": 20.8116, + "step": 349070 + }, + { + "epoch": 0.705163685726637, + "grad_norm": 587.0077514648438, + "learning_rate": 2.5278629644952245e-06, + "loss": 19.7193, + "step": 349080 + }, + { + "epoch": 0.7051838863593207, + "grad_norm": 301.967041015625, + "learning_rate": 2.527559555876776e-06, + "loss": 16.2101, + "step": 349090 + }, + { + "epoch": 0.7052040869920045, + "grad_norm": 286.00799560546875, + "learning_rate": 2.527256159308703e-06, + "loss": 17.1478, + "step": 349100 + }, + { + "epoch": 0.7052242876246884, + "grad_norm": 474.2564392089844, + "learning_rate": 2.5269527747924816e-06, + "loss": 38.9327, + "step": 349110 + }, + { + "epoch": 0.7052444882573722, + "grad_norm": 166.05015563964844, + "learning_rate": 2.526649402329594e-06, + "loss": 19.6407, + "step": 349120 + }, + { + "epoch": 0.705264688890056, + "grad_norm": 138.1863555908203, + "learning_rate": 2.526346041921518e-06, + "loss": 20.5006, + "step": 349130 + }, + { + "epoch": 0.7052848895227398, + "grad_norm": 414.12799072265625, + "learning_rate": 2.5260426935697286e-06, + "loss": 24.341, + "step": 349140 + }, + { + "epoch": 0.7053050901554236, + "grad_norm": 475.72662353515625, + "learning_rate": 2.5257393572757073e-06, + "loss": 19.4902, + "step": 349150 + }, + { + "epoch": 0.7053252907881075, + "grad_norm": 261.77862548828125, + "learning_rate": 2.5254360330409343e-06, + "loss": 15.5935, + "step": 349160 + }, + { + "epoch": 0.7053454914207913, + "grad_norm": 126.8353500366211, + "learning_rate": 2.5251327208668856e-06, + "loss": 19.4454, + "step": 349170 + }, + { + "epoch": 0.7053656920534751, + "grad_norm": 1070.47802734375, + "learning_rate": 2.5248294207550383e-06, + "loss": 18.289, + "step": 349180 + }, + { + "epoch": 0.7053858926861589, + "grad_norm": 850.1143798828125, + "learning_rate": 2.5245261327068736e-06, + "loss": 20.9405, + "step": 349190 + }, + { + "epoch": 0.7054060933188427, + "grad_norm": 292.16888427734375, + "learning_rate": 2.524222856723869e-06, + "loss": 10.4333, + "step": 349200 + }, + { + "epoch": 0.7054262939515266, + "grad_norm": 822.7456665039062, + "learning_rate": 2.5239195928075e-06, + "loss": 17.5991, + "step": 349210 + }, + { + "epoch": 0.7054464945842104, + "grad_norm": 196.78997802734375, + "learning_rate": 2.5236163409592464e-06, + "loss": 22.1525, + "step": 349220 + }, + { + "epoch": 0.7054666952168942, + "grad_norm": 224.603515625, + "learning_rate": 2.523313101180588e-06, + "loss": 20.539, + "step": 349230 + }, + { + "epoch": 0.705486895849578, + "grad_norm": 231.63055419921875, + "learning_rate": 2.5230098734730014e-06, + "loss": 7.1304, + "step": 349240 + }, + { + "epoch": 0.7055070964822618, + "grad_norm": 544.2684936523438, + "learning_rate": 2.5227066578379624e-06, + "loss": 22.3781, + "step": 349250 + }, + { + "epoch": 0.7055272971149457, + "grad_norm": 375.712158203125, + "learning_rate": 2.522403454276952e-06, + "loss": 16.2288, + "step": 349260 + }, + { + "epoch": 0.7055474977476295, + "grad_norm": 286.46942138671875, + "learning_rate": 2.522100262791447e-06, + "loss": 15.4103, + "step": 349270 + }, + { + "epoch": 0.7055676983803133, + "grad_norm": 307.37689208984375, + "learning_rate": 2.521797083382923e-06, + "loss": 23.6451, + "step": 349280 + }, + { + "epoch": 0.7055878990129971, + "grad_norm": 483.400146484375, + "learning_rate": 2.521493916052862e-06, + "loss": 30.4754, + "step": 349290 + }, + { + "epoch": 0.7056080996456809, + "grad_norm": 425.2827453613281, + "learning_rate": 2.5211907608027366e-06, + "loss": 23.0068, + "step": 349300 + }, + { + "epoch": 0.7056283002783648, + "grad_norm": 26.249313354492188, + "learning_rate": 2.5208876176340285e-06, + "loss": 19.1362, + "step": 349310 + }, + { + "epoch": 0.7056485009110486, + "grad_norm": 129.19873046875, + "learning_rate": 2.5205844865482115e-06, + "loss": 11.3333, + "step": 349320 + }, + { + "epoch": 0.7056687015437324, + "grad_norm": 255.25157165527344, + "learning_rate": 2.5202813675467675e-06, + "loss": 49.9998, + "step": 349330 + }, + { + "epoch": 0.7056889021764161, + "grad_norm": 575.8396606445312, + "learning_rate": 2.5199782606311708e-06, + "loss": 28.7747, + "step": 349340 + }, + { + "epoch": 0.7057091028090999, + "grad_norm": 208.08485412597656, + "learning_rate": 2.5196751658028972e-06, + "loss": 18.6068, + "step": 349350 + }, + { + "epoch": 0.7057293034417838, + "grad_norm": 50.586368560791016, + "learning_rate": 2.5193720830634284e-06, + "loss": 18.1747, + "step": 349360 + }, + { + "epoch": 0.7057495040744676, + "grad_norm": 54.649024963378906, + "learning_rate": 2.5190690124142368e-06, + "loss": 9.2091, + "step": 349370 + }, + { + "epoch": 0.7057697047071514, + "grad_norm": 223.4710235595703, + "learning_rate": 2.5187659538568043e-06, + "loss": 12.027, + "step": 349380 + }, + { + "epoch": 0.7057899053398352, + "grad_norm": 315.0380554199219, + "learning_rate": 2.518462907392606e-06, + "loss": 15.8469, + "step": 349390 + }, + { + "epoch": 0.705810105972519, + "grad_norm": 687.1735229492188, + "learning_rate": 2.518159873023116e-06, + "loss": 11.4713, + "step": 349400 + }, + { + "epoch": 0.7058303066052029, + "grad_norm": 8.193793296813965, + "learning_rate": 2.5178568507498156e-06, + "loss": 19.8824, + "step": 349410 + }, + { + "epoch": 0.7058505072378867, + "grad_norm": 761.634521484375, + "learning_rate": 2.51755384057418e-06, + "loss": 25.2532, + "step": 349420 + }, + { + "epoch": 0.7058707078705705, + "grad_norm": 233.63406372070312, + "learning_rate": 2.5172508424976837e-06, + "loss": 25.4617, + "step": 349430 + }, + { + "epoch": 0.7058909085032543, + "grad_norm": 350.4383544921875, + "learning_rate": 2.5169478565218065e-06, + "loss": 13.2413, + "step": 349440 + }, + { + "epoch": 0.7059111091359381, + "grad_norm": 474.5752258300781, + "learning_rate": 2.5166448826480274e-06, + "loss": 9.202, + "step": 349450 + }, + { + "epoch": 0.705931309768622, + "grad_norm": 298.2665100097656, + "learning_rate": 2.516341920877816e-06, + "loss": 16.7047, + "step": 349460 + }, + { + "epoch": 0.7059515104013058, + "grad_norm": 440.1244201660156, + "learning_rate": 2.5160389712126537e-06, + "loss": 26.5362, + "step": 349470 + }, + { + "epoch": 0.7059717110339896, + "grad_norm": 346.0702819824219, + "learning_rate": 2.5157360336540175e-06, + "loss": 18.0136, + "step": 349480 + }, + { + "epoch": 0.7059919116666734, + "grad_norm": 612.431640625, + "learning_rate": 2.5154331082033823e-06, + "loss": 12.4774, + "step": 349490 + }, + { + "epoch": 0.7060121122993572, + "grad_norm": 118.4017333984375, + "learning_rate": 2.5151301948622235e-06, + "loss": 17.7558, + "step": 349500 + }, + { + "epoch": 0.706032312932041, + "grad_norm": 542.0604248046875, + "learning_rate": 2.5148272936320186e-06, + "loss": 17.6797, + "step": 349510 + }, + { + "epoch": 0.7060525135647249, + "grad_norm": 158.3154296875, + "learning_rate": 2.514524404514248e-06, + "loss": 8.8993, + "step": 349520 + }, + { + "epoch": 0.7060727141974087, + "grad_norm": 5338.759765625, + "learning_rate": 2.51422152751038e-06, + "loss": 12.7294, + "step": 349530 + }, + { + "epoch": 0.7060929148300925, + "grad_norm": 98.99118041992188, + "learning_rate": 2.513918662621894e-06, + "loss": 26.2244, + "step": 349540 + }, + { + "epoch": 0.7061131154627763, + "grad_norm": 849.1727294921875, + "learning_rate": 2.51361580985027e-06, + "loss": 18.898, + "step": 349550 + }, + { + "epoch": 0.7061333160954602, + "grad_norm": 421.4109802246094, + "learning_rate": 2.5133129691969806e-06, + "loss": 15.1216, + "step": 349560 + }, + { + "epoch": 0.706153516728144, + "grad_norm": 1055.987060546875, + "learning_rate": 2.5130101406635e-06, + "loss": 22.7242, + "step": 349570 + }, + { + "epoch": 0.7061737173608278, + "grad_norm": 44.73744201660156, + "learning_rate": 2.5127073242513083e-06, + "loss": 37.3095, + "step": 349580 + }, + { + "epoch": 0.7061939179935116, + "grad_norm": 306.37249755859375, + "learning_rate": 2.5124045199618795e-06, + "loss": 20.5377, + "step": 349590 + }, + { + "epoch": 0.7062141186261953, + "grad_norm": 29.394678115844727, + "learning_rate": 2.5121017277966875e-06, + "loss": 18.109, + "step": 349600 + }, + { + "epoch": 0.7062343192588791, + "grad_norm": 267.1628112792969, + "learning_rate": 2.5117989477572126e-06, + "loss": 14.5406, + "step": 349610 + }, + { + "epoch": 0.706254519891563, + "grad_norm": 317.5898742675781, + "learning_rate": 2.5114961798449245e-06, + "loss": 11.5739, + "step": 349620 + }, + { + "epoch": 0.7062747205242468, + "grad_norm": 359.2458801269531, + "learning_rate": 2.511193424061305e-06, + "loss": 33.4177, + "step": 349630 + }, + { + "epoch": 0.7062949211569306, + "grad_norm": 533.362548828125, + "learning_rate": 2.510890680407825e-06, + "loss": 12.9816, + "step": 349640 + }, + { + "epoch": 0.7063151217896144, + "grad_norm": 253.7837677001953, + "learning_rate": 2.5105879488859635e-06, + "loss": 16.3754, + "step": 349650 + }, + { + "epoch": 0.7063353224222982, + "grad_norm": 256.9443664550781, + "learning_rate": 2.510285229497195e-06, + "loss": 12.2255, + "step": 349660 + }, + { + "epoch": 0.7063555230549821, + "grad_norm": 338.7027282714844, + "learning_rate": 2.5099825222429918e-06, + "loss": 10.8429, + "step": 349670 + }, + { + "epoch": 0.7063757236876659, + "grad_norm": 578.6676025390625, + "learning_rate": 2.5096798271248337e-06, + "loss": 14.5646, + "step": 349680 + }, + { + "epoch": 0.7063959243203497, + "grad_norm": 226.984619140625, + "learning_rate": 2.5093771441441923e-06, + "loss": 8.6617, + "step": 349690 + }, + { + "epoch": 0.7064161249530335, + "grad_norm": 283.9709777832031, + "learning_rate": 2.509074473302546e-06, + "loss": 8.8569, + "step": 349700 + }, + { + "epoch": 0.7064363255857173, + "grad_norm": 755.0188598632812, + "learning_rate": 2.5087718146013697e-06, + "loss": 19.6124, + "step": 349710 + }, + { + "epoch": 0.7064565262184012, + "grad_norm": 257.248046875, + "learning_rate": 2.5084691680421346e-06, + "loss": 29.3938, + "step": 349720 + }, + { + "epoch": 0.706476726851085, + "grad_norm": 141.60836791992188, + "learning_rate": 2.508166533626321e-06, + "loss": 10.2731, + "step": 349730 + }, + { + "epoch": 0.7064969274837688, + "grad_norm": 189.88119506835938, + "learning_rate": 2.5078639113554017e-06, + "loss": 15.4532, + "step": 349740 + }, + { + "epoch": 0.7065171281164526, + "grad_norm": 423.1811828613281, + "learning_rate": 2.507561301230849e-06, + "loss": 34.6921, + "step": 349750 + }, + { + "epoch": 0.7065373287491364, + "grad_norm": 60.76221466064453, + "learning_rate": 2.5072587032541407e-06, + "loss": 28.9149, + "step": 349760 + }, + { + "epoch": 0.7065575293818203, + "grad_norm": 607.290283203125, + "learning_rate": 2.5069561174267524e-06, + "loss": 22.9158, + "step": 349770 + }, + { + "epoch": 0.7065777300145041, + "grad_norm": 565.7552490234375, + "learning_rate": 2.5066535437501584e-06, + "loss": 15.0228, + "step": 349780 + }, + { + "epoch": 0.7065979306471879, + "grad_norm": 172.1897735595703, + "learning_rate": 2.50635098222583e-06, + "loss": 16.5305, + "step": 349790 + }, + { + "epoch": 0.7066181312798717, + "grad_norm": 397.20611572265625, + "learning_rate": 2.506048432855247e-06, + "loss": 27.4047, + "step": 349800 + }, + { + "epoch": 0.7066383319125555, + "grad_norm": 185.18716430664062, + "learning_rate": 2.5057458956398806e-06, + "loss": 17.7929, + "step": 349810 + }, + { + "epoch": 0.7066585325452394, + "grad_norm": 0.0, + "learning_rate": 2.5054433705812054e-06, + "loss": 13.086, + "step": 349820 + }, + { + "epoch": 0.7066787331779232, + "grad_norm": 294.8137512207031, + "learning_rate": 2.505140857680696e-06, + "loss": 28.368, + "step": 349830 + }, + { + "epoch": 0.706698933810607, + "grad_norm": 364.5111389160156, + "learning_rate": 2.504838356939829e-06, + "loss": 14.5719, + "step": 349840 + }, + { + "epoch": 0.7067191344432907, + "grad_norm": 977.1616821289062, + "learning_rate": 2.504535868360078e-06, + "loss": 24.5785, + "step": 349850 + }, + { + "epoch": 0.7067393350759745, + "grad_norm": 155.75967407226562, + "learning_rate": 2.504233391942914e-06, + "loss": 18.3162, + "step": 349860 + }, + { + "epoch": 0.7067595357086583, + "grad_norm": 290.4385986328125, + "learning_rate": 2.503930927689816e-06, + "loss": 18.4517, + "step": 349870 + }, + { + "epoch": 0.7067797363413422, + "grad_norm": 437.519775390625, + "learning_rate": 2.503628475602256e-06, + "loss": 19.7228, + "step": 349880 + }, + { + "epoch": 0.706799936974026, + "grad_norm": 397.1817932128906, + "learning_rate": 2.5033260356817056e-06, + "loss": 18.5204, + "step": 349890 + }, + { + "epoch": 0.7068201376067098, + "grad_norm": 295.46319580078125, + "learning_rate": 2.5030236079296443e-06, + "loss": 13.8248, + "step": 349900 + }, + { + "epoch": 0.7068403382393936, + "grad_norm": 774.9948120117188, + "learning_rate": 2.50272119234754e-06, + "loss": 19.6042, + "step": 349910 + }, + { + "epoch": 0.7068605388720774, + "grad_norm": 205.68801879882812, + "learning_rate": 2.502418788936872e-06, + "loss": 12.7801, + "step": 349920 + }, + { + "epoch": 0.7068807395047613, + "grad_norm": 75.88280487060547, + "learning_rate": 2.5021163976991103e-06, + "loss": 9.6228, + "step": 349930 + }, + { + "epoch": 0.7069009401374451, + "grad_norm": 474.43792724609375, + "learning_rate": 2.501814018635732e-06, + "loss": 23.6048, + "step": 349940 + }, + { + "epoch": 0.7069211407701289, + "grad_norm": 470.50616455078125, + "learning_rate": 2.5015116517482097e-06, + "loss": 20.9527, + "step": 349950 + }, + { + "epoch": 0.7069413414028127, + "grad_norm": 618.3411254882812, + "learning_rate": 2.501209297038014e-06, + "loss": 21.1948, + "step": 349960 + }, + { + "epoch": 0.7069615420354965, + "grad_norm": 170.8068389892578, + "learning_rate": 2.500906954506623e-06, + "loss": 9.9451, + "step": 349970 + }, + { + "epoch": 0.7069817426681804, + "grad_norm": 291.6849365234375, + "learning_rate": 2.5006046241555073e-06, + "loss": 9.2926, + "step": 349980 + }, + { + "epoch": 0.7070019433008642, + "grad_norm": 490.95391845703125, + "learning_rate": 2.500302305986142e-06, + "loss": 26.7192, + "step": 349990 + }, + { + "epoch": 0.707022143933548, + "grad_norm": 476.1736145019531, + "learning_rate": 2.5000000000000015e-06, + "loss": 26.904, + "step": 350000 + }, + { + "epoch": 0.7070423445662318, + "grad_norm": 111.90296936035156, + "learning_rate": 2.499697706198555e-06, + "loss": 16.3807, + "step": 350010 + }, + { + "epoch": 0.7070625451989156, + "grad_norm": 402.1435546875, + "learning_rate": 2.499395424583281e-06, + "loss": 15.6745, + "step": 350020 + }, + { + "epoch": 0.7070827458315995, + "grad_norm": 297.2471618652344, + "learning_rate": 2.49909315515565e-06, + "loss": 14.4463, + "step": 350030 + }, + { + "epoch": 0.7071029464642833, + "grad_norm": 164.84974670410156, + "learning_rate": 2.498790897917134e-06, + "loss": 36.1733, + "step": 350040 + }, + { + "epoch": 0.7071231470969671, + "grad_norm": 350.8130187988281, + "learning_rate": 2.4984886528692076e-06, + "loss": 18.4681, + "step": 350050 + }, + { + "epoch": 0.7071433477296509, + "grad_norm": 655.9428100585938, + "learning_rate": 2.4981864200133483e-06, + "loss": 19.3289, + "step": 350060 + }, + { + "epoch": 0.7071635483623347, + "grad_norm": 495.717041015625, + "learning_rate": 2.4978841993510213e-06, + "loss": 18.2208, + "step": 350070 + }, + { + "epoch": 0.7071837489950186, + "grad_norm": 129.1037139892578, + "learning_rate": 2.4975819908837024e-06, + "loss": 12.0158, + "step": 350080 + }, + { + "epoch": 0.7072039496277024, + "grad_norm": 337.0894775390625, + "learning_rate": 2.497279794612868e-06, + "loss": 17.966, + "step": 350090 + }, + { + "epoch": 0.7072241502603862, + "grad_norm": 510.8279113769531, + "learning_rate": 2.496977610539988e-06, + "loss": 27.7402, + "step": 350100 + }, + { + "epoch": 0.7072443508930699, + "grad_norm": 960.7432861328125, + "learning_rate": 2.496675438666534e-06, + "loss": 19.3872, + "step": 350110 + }, + { + "epoch": 0.7072645515257537, + "grad_norm": 236.84434509277344, + "learning_rate": 2.49637327899398e-06, + "loss": 46.7402, + "step": 350120 + }, + { + "epoch": 0.7072847521584376, + "grad_norm": 832.6741333007812, + "learning_rate": 2.4960711315238027e-06, + "loss": 20.6005, + "step": 350130 + }, + { + "epoch": 0.7073049527911214, + "grad_norm": 924.15576171875, + "learning_rate": 2.495768996257467e-06, + "loss": 26.5795, + "step": 350140 + }, + { + "epoch": 0.7073251534238052, + "grad_norm": 359.6117858886719, + "learning_rate": 2.49546687319645e-06, + "loss": 18.2028, + "step": 350150 + }, + { + "epoch": 0.707345354056489, + "grad_norm": 179.73480224609375, + "learning_rate": 2.4951647623422256e-06, + "loss": 25.4375, + "step": 350160 + }, + { + "epoch": 0.7073655546891728, + "grad_norm": 489.7877197265625, + "learning_rate": 2.4948626636962643e-06, + "loss": 18.0493, + "step": 350170 + }, + { + "epoch": 0.7073857553218567, + "grad_norm": 301.5377197265625, + "learning_rate": 2.4945605772600364e-06, + "loss": 19.2083, + "step": 350180 + }, + { + "epoch": 0.7074059559545405, + "grad_norm": 364.8948974609375, + "learning_rate": 2.494258503035018e-06, + "loss": 13.9501, + "step": 350190 + }, + { + "epoch": 0.7074261565872243, + "grad_norm": 448.95379638671875, + "learning_rate": 2.49395644102268e-06, + "loss": 19.7066, + "step": 350200 + }, + { + "epoch": 0.7074463572199081, + "grad_norm": 423.24188232421875, + "learning_rate": 2.493654391224493e-06, + "loss": 42.3437, + "step": 350210 + }, + { + "epoch": 0.7074665578525919, + "grad_norm": 222.1471710205078, + "learning_rate": 2.493352353641932e-06, + "loss": 20.5775, + "step": 350220 + }, + { + "epoch": 0.7074867584852758, + "grad_norm": 150.0435333251953, + "learning_rate": 2.4930503282764658e-06, + "loss": 13.4709, + "step": 350230 + }, + { + "epoch": 0.7075069591179596, + "grad_norm": 416.463623046875, + "learning_rate": 2.49274831512957e-06, + "loss": 14.5652, + "step": 350240 + }, + { + "epoch": 0.7075271597506434, + "grad_norm": 228.19183349609375, + "learning_rate": 2.492446314202713e-06, + "loss": 9.9245, + "step": 350250 + }, + { + "epoch": 0.7075473603833272, + "grad_norm": 288.11798095703125, + "learning_rate": 2.4921443254973705e-06, + "loss": 12.6007, + "step": 350260 + }, + { + "epoch": 0.707567561016011, + "grad_norm": 140.6687774658203, + "learning_rate": 2.491842349015012e-06, + "loss": 13.3591, + "step": 350270 + }, + { + "epoch": 0.7075877616486949, + "grad_norm": 645.8302612304688, + "learning_rate": 2.4915403847571083e-06, + "loss": 21.0156, + "step": 350280 + }, + { + "epoch": 0.7076079622813787, + "grad_norm": 14.02106761932373, + "learning_rate": 2.4912384327251344e-06, + "loss": 9.1831, + "step": 350290 + }, + { + "epoch": 0.7076281629140625, + "grad_norm": 0.8506768345832825, + "learning_rate": 2.4909364929205575e-06, + "loss": 19.6283, + "step": 350300 + }, + { + "epoch": 0.7076483635467463, + "grad_norm": 635.20361328125, + "learning_rate": 2.4906345653448545e-06, + "loss": 22.548, + "step": 350310 + }, + { + "epoch": 0.7076685641794301, + "grad_norm": 434.19683837890625, + "learning_rate": 2.490332649999494e-06, + "loss": 16.8567, + "step": 350320 + }, + { + "epoch": 0.707688764812114, + "grad_norm": 349.81982421875, + "learning_rate": 2.490030746885946e-06, + "loss": 25.0367, + "step": 350330 + }, + { + "epoch": 0.7077089654447978, + "grad_norm": 188.83731079101562, + "learning_rate": 2.4897288560056854e-06, + "loss": 11.8076, + "step": 350340 + }, + { + "epoch": 0.7077291660774816, + "grad_norm": 285.6274719238281, + "learning_rate": 2.489426977360182e-06, + "loss": 21.5039, + "step": 350350 + }, + { + "epoch": 0.7077493667101654, + "grad_norm": 272.9427795410156, + "learning_rate": 2.4891251109509053e-06, + "loss": 15.4717, + "step": 350360 + }, + { + "epoch": 0.7077695673428491, + "grad_norm": 132.57620239257812, + "learning_rate": 2.488823256779328e-06, + "loss": 8.4129, + "step": 350370 + }, + { + "epoch": 0.707789767975533, + "grad_norm": 69.85224914550781, + "learning_rate": 2.488521414846923e-06, + "loss": 14.1976, + "step": 350380 + }, + { + "epoch": 0.7078099686082168, + "grad_norm": 81.13935852050781, + "learning_rate": 2.488219585155161e-06, + "loss": 17.9706, + "step": 350390 + }, + { + "epoch": 0.7078301692409006, + "grad_norm": 784.9371948242188, + "learning_rate": 2.48791776770551e-06, + "loss": 25.0036, + "step": 350400 + }, + { + "epoch": 0.7078503698735844, + "grad_norm": 618.5388793945312, + "learning_rate": 2.4876159624994443e-06, + "loss": 16.6287, + "step": 350410 + }, + { + "epoch": 0.7078705705062682, + "grad_norm": 415.8648681640625, + "learning_rate": 2.4873141695384346e-06, + "loss": 19.047, + "step": 350420 + }, + { + "epoch": 0.707890771138952, + "grad_norm": 415.9384460449219, + "learning_rate": 2.4870123888239485e-06, + "loss": 17.0729, + "step": 350430 + }, + { + "epoch": 0.7079109717716359, + "grad_norm": 351.33514404296875, + "learning_rate": 2.486710620357459e-06, + "loss": 11.8749, + "step": 350440 + }, + { + "epoch": 0.7079311724043197, + "grad_norm": 288.665283203125, + "learning_rate": 2.4864088641404398e-06, + "loss": 13.4284, + "step": 350450 + }, + { + "epoch": 0.7079513730370035, + "grad_norm": 206.8216552734375, + "learning_rate": 2.4861071201743586e-06, + "loss": 10.4989, + "step": 350460 + }, + { + "epoch": 0.7079715736696873, + "grad_norm": 333.7101135253906, + "learning_rate": 2.4858053884606843e-06, + "loss": 20.4974, + "step": 350470 + }, + { + "epoch": 0.7079917743023711, + "grad_norm": 258.67803955078125, + "learning_rate": 2.4855036690008918e-06, + "loss": 5.2349, + "step": 350480 + }, + { + "epoch": 0.708011974935055, + "grad_norm": 411.38092041015625, + "learning_rate": 2.485201961796449e-06, + "loss": 17.3271, + "step": 350490 + }, + { + "epoch": 0.7080321755677388, + "grad_norm": 316.57391357421875, + "learning_rate": 2.484900266848825e-06, + "loss": 21.3045, + "step": 350500 + }, + { + "epoch": 0.7080523762004226, + "grad_norm": 115.9952392578125, + "learning_rate": 2.484598584159494e-06, + "loss": 8.3172, + "step": 350510 + }, + { + "epoch": 0.7080725768331064, + "grad_norm": 404.4654541015625, + "learning_rate": 2.484296913729923e-06, + "loss": 12.901, + "step": 350520 + }, + { + "epoch": 0.7080927774657902, + "grad_norm": 265.7141418457031, + "learning_rate": 2.4839952555615846e-06, + "loss": 14.5914, + "step": 350530 + }, + { + "epoch": 0.7081129780984741, + "grad_norm": 41.656776428222656, + "learning_rate": 2.483693609655947e-06, + "loss": 12.2818, + "step": 350540 + }, + { + "epoch": 0.7081331787311579, + "grad_norm": 391.55859375, + "learning_rate": 2.4833919760144838e-06, + "loss": 29.0186, + "step": 350550 + }, + { + "epoch": 0.7081533793638417, + "grad_norm": 244.14015197753906, + "learning_rate": 2.483090354638662e-06, + "loss": 12.6377, + "step": 350560 + }, + { + "epoch": 0.7081735799965255, + "grad_norm": 446.8510437011719, + "learning_rate": 2.4827887455299516e-06, + "loss": 12.4944, + "step": 350570 + }, + { + "epoch": 0.7081937806292093, + "grad_norm": 274.75274658203125, + "learning_rate": 2.4824871486898244e-06, + "loss": 25.0839, + "step": 350580 + }, + { + "epoch": 0.7082139812618932, + "grad_norm": 234.93728637695312, + "learning_rate": 2.4821855641197483e-06, + "loss": 32.2213, + "step": 350590 + }, + { + "epoch": 0.708234181894577, + "grad_norm": 484.5325927734375, + "learning_rate": 2.4818839918211963e-06, + "loss": 15.9191, + "step": 350600 + }, + { + "epoch": 0.7082543825272608, + "grad_norm": 192.70309448242188, + "learning_rate": 2.4815824317956363e-06, + "loss": 16.686, + "step": 350610 + }, + { + "epoch": 0.7082745831599445, + "grad_norm": 0.34595391154289246, + "learning_rate": 2.4812808840445357e-06, + "loss": 15.7636, + "step": 350620 + }, + { + "epoch": 0.7082947837926283, + "grad_norm": 106.8895492553711, + "learning_rate": 2.480979348569369e-06, + "loss": 21.4576, + "step": 350630 + }, + { + "epoch": 0.7083149844253122, + "grad_norm": 388.26007080078125, + "learning_rate": 2.480677825371603e-06, + "loss": 27.7207, + "step": 350640 + }, + { + "epoch": 0.708335185057996, + "grad_norm": 116.71707153320312, + "learning_rate": 2.480376314452706e-06, + "loss": 11.684, + "step": 350650 + }, + { + "epoch": 0.7083553856906798, + "grad_norm": 438.4858093261719, + "learning_rate": 2.48007481581415e-06, + "loss": 31.7195, + "step": 350660 + }, + { + "epoch": 0.7083755863233636, + "grad_norm": 350.0809020996094, + "learning_rate": 2.479773329457406e-06, + "loss": 21.4821, + "step": 350670 + }, + { + "epoch": 0.7083957869560474, + "grad_norm": 65.26168060302734, + "learning_rate": 2.4794718553839387e-06, + "loss": 25.7347, + "step": 350680 + }, + { + "epoch": 0.7084159875887313, + "grad_norm": 442.9814147949219, + "learning_rate": 2.4791703935952193e-06, + "loss": 16.2535, + "step": 350690 + }, + { + "epoch": 0.7084361882214151, + "grad_norm": 259.0112609863281, + "learning_rate": 2.4788689440927193e-06, + "loss": 10.223, + "step": 350700 + }, + { + "epoch": 0.7084563888540989, + "grad_norm": 447.5721435546875, + "learning_rate": 2.478567506877907e-06, + "loss": 14.2684, + "step": 350710 + }, + { + "epoch": 0.7084765894867827, + "grad_norm": 725.1121826171875, + "learning_rate": 2.478266081952248e-06, + "loss": 8.9663, + "step": 350720 + }, + { + "epoch": 0.7084967901194665, + "grad_norm": 563.68359375, + "learning_rate": 2.477964669317215e-06, + "loss": 15.6711, + "step": 350730 + }, + { + "epoch": 0.7085169907521504, + "grad_norm": 408.3504638671875, + "learning_rate": 2.4776632689742803e-06, + "loss": 10.5074, + "step": 350740 + }, + { + "epoch": 0.7085371913848342, + "grad_norm": 265.1654968261719, + "learning_rate": 2.4773618809249045e-06, + "loss": 26.6782, + "step": 350750 + }, + { + "epoch": 0.708557392017518, + "grad_norm": 548.5407104492188, + "learning_rate": 2.477060505170561e-06, + "loss": 35.9407, + "step": 350760 + }, + { + "epoch": 0.7085775926502018, + "grad_norm": 151.9861602783203, + "learning_rate": 2.4767591417127207e-06, + "loss": 18.9445, + "step": 350770 + }, + { + "epoch": 0.7085977932828856, + "grad_norm": 320.5681457519531, + "learning_rate": 2.4764577905528503e-06, + "loss": 28.4419, + "step": 350780 + }, + { + "epoch": 0.7086179939155695, + "grad_norm": 472.62884521484375, + "learning_rate": 2.476156451692416e-06, + "loss": 31.2559, + "step": 350790 + }, + { + "epoch": 0.7086381945482533, + "grad_norm": 614.879638671875, + "learning_rate": 2.4758551251328923e-06, + "loss": 19.3613, + "step": 350800 + }, + { + "epoch": 0.7086583951809371, + "grad_norm": 126.76094818115234, + "learning_rate": 2.4755538108757436e-06, + "loss": 25.4135, + "step": 350810 + }, + { + "epoch": 0.7086785958136209, + "grad_norm": 384.14593505859375, + "learning_rate": 2.475252508922438e-06, + "loss": 12.4958, + "step": 350820 + }, + { + "epoch": 0.7086987964463047, + "grad_norm": 334.7979736328125, + "learning_rate": 2.4749512192744473e-06, + "loss": 9.4986, + "step": 350830 + }, + { + "epoch": 0.7087189970789886, + "grad_norm": 213.6591033935547, + "learning_rate": 2.474649941933236e-06, + "loss": 18.3898, + "step": 350840 + }, + { + "epoch": 0.7087391977116724, + "grad_norm": 289.82159423828125, + "learning_rate": 2.4743486769002767e-06, + "loss": 14.2574, + "step": 350850 + }, + { + "epoch": 0.7087593983443562, + "grad_norm": 579.4589233398438, + "learning_rate": 2.4740474241770333e-06, + "loss": 14.4531, + "step": 350860 + }, + { + "epoch": 0.70877959897704, + "grad_norm": 111.13056182861328, + "learning_rate": 2.4737461837649784e-06, + "loss": 16.4243, + "step": 350870 + }, + { + "epoch": 0.7087997996097237, + "grad_norm": 424.7178039550781, + "learning_rate": 2.4734449556655786e-06, + "loss": 15.5977, + "step": 350880 + }, + { + "epoch": 0.7088200002424075, + "grad_norm": 347.6862487792969, + "learning_rate": 2.4731437398802998e-06, + "loss": 13.8698, + "step": 350890 + }, + { + "epoch": 0.7088402008750914, + "grad_norm": 163.52963256835938, + "learning_rate": 2.4728425364106136e-06, + "loss": 22.4221, + "step": 350900 + }, + { + "epoch": 0.7088604015077752, + "grad_norm": 200.37506103515625, + "learning_rate": 2.472541345257984e-06, + "loss": 12.9967, + "step": 350910 + }, + { + "epoch": 0.708880602140459, + "grad_norm": 203.79859924316406, + "learning_rate": 2.4722401664238837e-06, + "loss": 13.5091, + "step": 350920 + }, + { + "epoch": 0.7089008027731428, + "grad_norm": 481.20965576171875, + "learning_rate": 2.4719389999097787e-06, + "loss": 11.0495, + "step": 350930 + }, + { + "epoch": 0.7089210034058266, + "grad_norm": 283.4579772949219, + "learning_rate": 2.471637845717134e-06, + "loss": 16.7812, + "step": 350940 + }, + { + "epoch": 0.7089412040385105, + "grad_norm": 273.2008361816406, + "learning_rate": 2.471336703847422e-06, + "loss": 11.4263, + "step": 350950 + }, + { + "epoch": 0.7089614046711943, + "grad_norm": 108.56293487548828, + "learning_rate": 2.4710355743021077e-06, + "loss": 18.0725, + "step": 350960 + }, + { + "epoch": 0.7089816053038781, + "grad_norm": 425.48089599609375, + "learning_rate": 2.4707344570826576e-06, + "loss": 24.4652, + "step": 350970 + }, + { + "epoch": 0.7090018059365619, + "grad_norm": 156.72557067871094, + "learning_rate": 2.470433352190541e-06, + "loss": 15.688, + "step": 350980 + }, + { + "epoch": 0.7090220065692457, + "grad_norm": 1711.9622802734375, + "learning_rate": 2.470132259627227e-06, + "loss": 35.6115, + "step": 350990 + }, + { + "epoch": 0.7090422072019296, + "grad_norm": 322.0758361816406, + "learning_rate": 2.469831179394182e-06, + "loss": 19.3249, + "step": 351000 + }, + { + "epoch": 0.7090624078346134, + "grad_norm": 184.63465881347656, + "learning_rate": 2.469530111492871e-06, + "loss": 16.4322, + "step": 351010 + }, + { + "epoch": 0.7090826084672972, + "grad_norm": 368.8414611816406, + "learning_rate": 2.4692290559247652e-06, + "loss": 17.1046, + "step": 351020 + }, + { + "epoch": 0.709102809099981, + "grad_norm": 373.5274353027344, + "learning_rate": 2.4689280126913302e-06, + "loss": 23.63, + "step": 351030 + }, + { + "epoch": 0.7091230097326648, + "grad_norm": 268.5332336425781, + "learning_rate": 2.4686269817940306e-06, + "loss": 10.7349, + "step": 351040 + }, + { + "epoch": 0.7091432103653487, + "grad_norm": 293.2986145019531, + "learning_rate": 2.4683259632343363e-06, + "loss": 20.8799, + "step": 351050 + }, + { + "epoch": 0.7091634109980325, + "grad_norm": 174.36912536621094, + "learning_rate": 2.4680249570137166e-06, + "loss": 23.0473, + "step": 351060 + }, + { + "epoch": 0.7091836116307163, + "grad_norm": 113.24224853515625, + "learning_rate": 2.467723963133636e-06, + "loss": 19.4785, + "step": 351070 + }, + { + "epoch": 0.7092038122634001, + "grad_norm": 129.29562377929688, + "learning_rate": 2.4674229815955596e-06, + "loss": 10.8498, + "step": 351080 + }, + { + "epoch": 0.7092240128960839, + "grad_norm": 103.91205596923828, + "learning_rate": 2.467122012400958e-06, + "loss": 33.7763, + "step": 351090 + }, + { + "epoch": 0.7092442135287678, + "grad_norm": 552.5250854492188, + "learning_rate": 2.4668210555512974e-06, + "loss": 14.3251, + "step": 351100 + }, + { + "epoch": 0.7092644141614516, + "grad_norm": 110.7344741821289, + "learning_rate": 2.466520111048041e-06, + "loss": 16.1519, + "step": 351110 + }, + { + "epoch": 0.7092846147941354, + "grad_norm": 321.0063171386719, + "learning_rate": 2.46621917889266e-06, + "loss": 13.9278, + "step": 351120 + }, + { + "epoch": 0.7093048154268191, + "grad_norm": 1080.12841796875, + "learning_rate": 2.4659182590866183e-06, + "loss": 26.4543, + "step": 351130 + }, + { + "epoch": 0.7093250160595029, + "grad_norm": 423.43524169921875, + "learning_rate": 2.4656173516313852e-06, + "loss": 12.3552, + "step": 351140 + }, + { + "epoch": 0.7093452166921868, + "grad_norm": 663.8313598632812, + "learning_rate": 2.465316456528424e-06, + "loss": 22.1683, + "step": 351150 + }, + { + "epoch": 0.7093654173248706, + "grad_norm": 132.00119018554688, + "learning_rate": 2.465015573779205e-06, + "loss": 32.2573, + "step": 351160 + }, + { + "epoch": 0.7093856179575544, + "grad_norm": 402.5078125, + "learning_rate": 2.464714703385192e-06, + "loss": 11.4204, + "step": 351170 + }, + { + "epoch": 0.7094058185902382, + "grad_norm": 302.7805480957031, + "learning_rate": 2.4644138453478504e-06, + "loss": 17.0685, + "step": 351180 + }, + { + "epoch": 0.709426019222922, + "grad_norm": 346.5506591796875, + "learning_rate": 2.464112999668651e-06, + "loss": 16.594, + "step": 351190 + }, + { + "epoch": 0.7094462198556059, + "grad_norm": 154.2192840576172, + "learning_rate": 2.4638121663490546e-06, + "loss": 26.4545, + "step": 351200 + }, + { + "epoch": 0.7094664204882897, + "grad_norm": 383.23046875, + "learning_rate": 2.463511345390532e-06, + "loss": 8.9706, + "step": 351210 + }, + { + "epoch": 0.7094866211209735, + "grad_norm": 396.2514343261719, + "learning_rate": 2.463210536794547e-06, + "loss": 13.9642, + "step": 351220 + }, + { + "epoch": 0.7095068217536573, + "grad_norm": 270.4677429199219, + "learning_rate": 2.4629097405625645e-06, + "loss": 18.0537, + "step": 351230 + }, + { + "epoch": 0.7095270223863411, + "grad_norm": 402.54486083984375, + "learning_rate": 2.4626089566960546e-06, + "loss": 13.869, + "step": 351240 + }, + { + "epoch": 0.709547223019025, + "grad_norm": 592.2306518554688, + "learning_rate": 2.462308185196481e-06, + "loss": 14.6964, + "step": 351250 + }, + { + "epoch": 0.7095674236517088, + "grad_norm": 229.1131134033203, + "learning_rate": 2.462007426065307e-06, + "loss": 19.8668, + "step": 351260 + }, + { + "epoch": 0.7095876242843926, + "grad_norm": 365.9820556640625, + "learning_rate": 2.4617066793040012e-06, + "loss": 24.1304, + "step": 351270 + }, + { + "epoch": 0.7096078249170764, + "grad_norm": 208.26876831054688, + "learning_rate": 2.461405944914033e-06, + "loss": 17.9849, + "step": 351280 + }, + { + "epoch": 0.7096280255497602, + "grad_norm": 27.316822052001953, + "learning_rate": 2.4611052228968606e-06, + "loss": 11.6133, + "step": 351290 + }, + { + "epoch": 0.709648226182444, + "grad_norm": 423.08477783203125, + "learning_rate": 2.4608045132539536e-06, + "loss": 20.7744, + "step": 351300 + }, + { + "epoch": 0.7096684268151279, + "grad_norm": 683.5103149414062, + "learning_rate": 2.460503815986779e-06, + "loss": 26.1462, + "step": 351310 + }, + { + "epoch": 0.7096886274478117, + "grad_norm": 421.5254821777344, + "learning_rate": 2.4602031310968013e-06, + "loss": 12.5355, + "step": 351320 + }, + { + "epoch": 0.7097088280804955, + "grad_norm": 373.98773193359375, + "learning_rate": 2.459902458585483e-06, + "loss": 9.5074, + "step": 351330 + }, + { + "epoch": 0.7097290287131793, + "grad_norm": 35.10133361816406, + "learning_rate": 2.459601798454292e-06, + "loss": 15.7518, + "step": 351340 + }, + { + "epoch": 0.7097492293458632, + "grad_norm": 270.9390869140625, + "learning_rate": 2.4593011507046976e-06, + "loss": 26.5067, + "step": 351350 + }, + { + "epoch": 0.709769429978547, + "grad_norm": 50.27530288696289, + "learning_rate": 2.459000515338158e-06, + "loss": 19.8882, + "step": 351360 + }, + { + "epoch": 0.7097896306112308, + "grad_norm": 496.9534912109375, + "learning_rate": 2.4586998923561412e-06, + "loss": 18.3404, + "step": 351370 + }, + { + "epoch": 0.7098098312439146, + "grad_norm": 417.775146484375, + "learning_rate": 2.458399281760115e-06, + "loss": 15.2598, + "step": 351380 + }, + { + "epoch": 0.7098300318765983, + "grad_norm": 707.9732666015625, + "learning_rate": 2.4580986835515423e-06, + "loss": 23.1386, + "step": 351390 + }, + { + "epoch": 0.7098502325092821, + "grad_norm": 232.51669311523438, + "learning_rate": 2.4577980977318866e-06, + "loss": 17.2174, + "step": 351400 + }, + { + "epoch": 0.709870433141966, + "grad_norm": 31.553802490234375, + "learning_rate": 2.457497524302616e-06, + "loss": 17.3653, + "step": 351410 + }, + { + "epoch": 0.7098906337746498, + "grad_norm": 114.79324340820312, + "learning_rate": 2.457196963265195e-06, + "loss": 12.134, + "step": 351420 + }, + { + "epoch": 0.7099108344073336, + "grad_norm": 601.8326416015625, + "learning_rate": 2.456896414621085e-06, + "loss": 24.023, + "step": 351430 + }, + { + "epoch": 0.7099310350400174, + "grad_norm": 115.26791381835938, + "learning_rate": 2.4565958783717534e-06, + "loss": 19.1904, + "step": 351440 + }, + { + "epoch": 0.7099512356727012, + "grad_norm": 409.48663330078125, + "learning_rate": 2.4562953545186675e-06, + "loss": 12.8175, + "step": 351450 + }, + { + "epoch": 0.7099714363053851, + "grad_norm": 385.10211181640625, + "learning_rate": 2.455994843063289e-06, + "loss": 20.6888, + "step": 351460 + }, + { + "epoch": 0.7099916369380689, + "grad_norm": 307.5181884765625, + "learning_rate": 2.455694344007082e-06, + "loss": 31.795, + "step": 351470 + }, + { + "epoch": 0.7100118375707527, + "grad_norm": 263.39544677734375, + "learning_rate": 2.455393857351513e-06, + "loss": 17.189, + "step": 351480 + }, + { + "epoch": 0.7100320382034365, + "grad_norm": 265.06463623046875, + "learning_rate": 2.455093383098046e-06, + "loss": 19.1293, + "step": 351490 + }, + { + "epoch": 0.7100522388361203, + "grad_norm": 855.9002685546875, + "learning_rate": 2.4547929212481436e-06, + "loss": 32.2827, + "step": 351500 + }, + { + "epoch": 0.7100724394688042, + "grad_norm": 422.6793212890625, + "learning_rate": 2.454492471803274e-06, + "loss": 39.923, + "step": 351510 + }, + { + "epoch": 0.710092640101488, + "grad_norm": 90.21331787109375, + "learning_rate": 2.454192034764897e-06, + "loss": 16.5796, + "step": 351520 + }, + { + "epoch": 0.7101128407341718, + "grad_norm": 340.8531188964844, + "learning_rate": 2.4538916101344806e-06, + "loss": 21.9852, + "step": 351530 + }, + { + "epoch": 0.7101330413668556, + "grad_norm": 507.1401062011719, + "learning_rate": 2.4535911979134884e-06, + "loss": 18.3361, + "step": 351540 + }, + { + "epoch": 0.7101532419995394, + "grad_norm": 130.31398010253906, + "learning_rate": 2.4532907981033822e-06, + "loss": 12.3717, + "step": 351550 + }, + { + "epoch": 0.7101734426322233, + "grad_norm": 1278.9815673828125, + "learning_rate": 2.452990410705629e-06, + "loss": 24.3266, + "step": 351560 + }, + { + "epoch": 0.7101936432649071, + "grad_norm": 389.86334228515625, + "learning_rate": 2.452690035721692e-06, + "loss": 15.1485, + "step": 351570 + }, + { + "epoch": 0.7102138438975909, + "grad_norm": 133.64479064941406, + "learning_rate": 2.4523896731530327e-06, + "loss": 12.0792, + "step": 351580 + }, + { + "epoch": 0.7102340445302747, + "grad_norm": 331.3221435546875, + "learning_rate": 2.4520893230011174e-06, + "loss": 23.9486, + "step": 351590 + }, + { + "epoch": 0.7102542451629585, + "grad_norm": 287.4932556152344, + "learning_rate": 2.4517889852674114e-06, + "loss": 17.338, + "step": 351600 + }, + { + "epoch": 0.7102744457956424, + "grad_norm": 577.8561401367188, + "learning_rate": 2.4514886599533773e-06, + "loss": 18.4957, + "step": 351610 + }, + { + "epoch": 0.7102946464283262, + "grad_norm": 81.04473114013672, + "learning_rate": 2.4511883470604757e-06, + "loss": 18.9512, + "step": 351620 + }, + { + "epoch": 0.71031484706101, + "grad_norm": 133.50791931152344, + "learning_rate": 2.450888046590175e-06, + "loss": 19.3605, + "step": 351630 + }, + { + "epoch": 0.7103350476936938, + "grad_norm": 211.41757202148438, + "learning_rate": 2.4505877585439376e-06, + "loss": 15.7611, + "step": 351640 + }, + { + "epoch": 0.7103552483263775, + "grad_norm": 1024.399169921875, + "learning_rate": 2.4502874829232238e-06, + "loss": 23.0618, + "step": 351650 + }, + { + "epoch": 0.7103754489590614, + "grad_norm": 552.9322509765625, + "learning_rate": 2.4499872197294992e-06, + "loss": 14.6487, + "step": 351660 + }, + { + "epoch": 0.7103956495917452, + "grad_norm": 395.48712158203125, + "learning_rate": 2.449686968964232e-06, + "loss": 18.742, + "step": 351670 + }, + { + "epoch": 0.710415850224429, + "grad_norm": 777.3709106445312, + "learning_rate": 2.4493867306288772e-06, + "loss": 22.8613, + "step": 351680 + }, + { + "epoch": 0.7104360508571128, + "grad_norm": 526.1470947265625, + "learning_rate": 2.449086504724902e-06, + "loss": 42.0243, + "step": 351690 + }, + { + "epoch": 0.7104562514897966, + "grad_norm": 618.8796997070312, + "learning_rate": 2.448786291253772e-06, + "loss": 24.4049, + "step": 351700 + }, + { + "epoch": 0.7104764521224805, + "grad_norm": 248.31163024902344, + "learning_rate": 2.4484860902169477e-06, + "loss": 26.042, + "step": 351710 + }, + { + "epoch": 0.7104966527551643, + "grad_norm": 344.298095703125, + "learning_rate": 2.4481859016158913e-06, + "loss": 14.6626, + "step": 351720 + }, + { + "epoch": 0.7105168533878481, + "grad_norm": 600.903076171875, + "learning_rate": 2.4478857254520688e-06, + "loss": 17.7351, + "step": 351730 + }, + { + "epoch": 0.7105370540205319, + "grad_norm": 654.8291625976562, + "learning_rate": 2.44758556172694e-06, + "loss": 26.584, + "step": 351740 + }, + { + "epoch": 0.7105572546532157, + "grad_norm": 464.43414306640625, + "learning_rate": 2.4472854104419717e-06, + "loss": 18.7963, + "step": 351750 + }, + { + "epoch": 0.7105774552858996, + "grad_norm": 152.92335510253906, + "learning_rate": 2.4469852715986232e-06, + "loss": 16.9666, + "step": 351760 + }, + { + "epoch": 0.7105976559185834, + "grad_norm": 385.8680114746094, + "learning_rate": 2.44668514519836e-06, + "loss": 11.9277, + "step": 351770 + }, + { + "epoch": 0.7106178565512672, + "grad_norm": 654.5179443359375, + "learning_rate": 2.446385031242644e-06, + "loss": 14.0661, + "step": 351780 + }, + { + "epoch": 0.710638057183951, + "grad_norm": 781.8509521484375, + "learning_rate": 2.4460849297329355e-06, + "loss": 16.5214, + "step": 351790 + }, + { + "epoch": 0.7106582578166348, + "grad_norm": 650.5888671875, + "learning_rate": 2.4457848406707014e-06, + "loss": 18.0695, + "step": 351800 + }, + { + "epoch": 0.7106784584493187, + "grad_norm": 249.22930908203125, + "learning_rate": 2.4454847640574004e-06, + "loss": 9.1941, + "step": 351810 + }, + { + "epoch": 0.7106986590820025, + "grad_norm": 281.6560974121094, + "learning_rate": 2.4451846998944985e-06, + "loss": 13.0573, + "step": 351820 + }, + { + "epoch": 0.7107188597146863, + "grad_norm": 369.3673095703125, + "learning_rate": 2.4448846481834566e-06, + "loss": 13.1717, + "step": 351830 + }, + { + "epoch": 0.7107390603473701, + "grad_norm": 397.47998046875, + "learning_rate": 2.4445846089257354e-06, + "loss": 14.0664, + "step": 351840 + }, + { + "epoch": 0.7107592609800539, + "grad_norm": 50.95933532714844, + "learning_rate": 2.4442845821228005e-06, + "loss": 12.9487, + "step": 351850 + }, + { + "epoch": 0.7107794616127378, + "grad_norm": 222.64434814453125, + "learning_rate": 2.4439845677761124e-06, + "loss": 16.0556, + "step": 351860 + }, + { + "epoch": 0.7107996622454216, + "grad_norm": 381.6914367675781, + "learning_rate": 2.4436845658871317e-06, + "loss": 15.1181, + "step": 351870 + }, + { + "epoch": 0.7108198628781054, + "grad_norm": 618.4828491210938, + "learning_rate": 2.4433845764573225e-06, + "loss": 24.6758, + "step": 351880 + }, + { + "epoch": 0.7108400635107892, + "grad_norm": 311.1064758300781, + "learning_rate": 2.4430845994881507e-06, + "loss": 30.1891, + "step": 351890 + }, + { + "epoch": 0.7108602641434729, + "grad_norm": 855.1177368164062, + "learning_rate": 2.442784634981071e-06, + "loss": 20.8685, + "step": 351900 + }, + { + "epoch": 0.7108804647761567, + "grad_norm": 230.25753784179688, + "learning_rate": 2.442484682937548e-06, + "loss": 37.8443, + "step": 351910 + }, + { + "epoch": 0.7109006654088406, + "grad_norm": 247.62557983398438, + "learning_rate": 2.4421847433590466e-06, + "loss": 11.3085, + "step": 351920 + }, + { + "epoch": 0.7109208660415244, + "grad_norm": 690.3058471679688, + "learning_rate": 2.4418848162470273e-06, + "loss": 22.4823, + "step": 351930 + }, + { + "epoch": 0.7109410666742082, + "grad_norm": 371.29913330078125, + "learning_rate": 2.441584901602948e-06, + "loss": 12.9227, + "step": 351940 + }, + { + "epoch": 0.710961267306892, + "grad_norm": 197.8428192138672, + "learning_rate": 2.4412849994282744e-06, + "loss": 12.8529, + "step": 351950 + }, + { + "epoch": 0.7109814679395758, + "grad_norm": 476.21197509765625, + "learning_rate": 2.4409851097244708e-06, + "loss": 21.9375, + "step": 351960 + }, + { + "epoch": 0.7110016685722597, + "grad_norm": 245.45703125, + "learning_rate": 2.440685232492992e-06, + "loss": 15.0306, + "step": 351970 + }, + { + "epoch": 0.7110218692049435, + "grad_norm": 465.3127746582031, + "learning_rate": 2.440385367735303e-06, + "loss": 21.7765, + "step": 351980 + }, + { + "epoch": 0.7110420698376273, + "grad_norm": 300.7366638183594, + "learning_rate": 2.440085515452867e-06, + "loss": 20.5044, + "step": 351990 + }, + { + "epoch": 0.7110622704703111, + "grad_norm": 634.2356567382812, + "learning_rate": 2.4397856756471435e-06, + "loss": 36.8186, + "step": 352000 + }, + { + "epoch": 0.7110824711029949, + "grad_norm": 967.7387084960938, + "learning_rate": 2.4394858483195923e-06, + "loss": 29.0341, + "step": 352010 + }, + { + "epoch": 0.7111026717356788, + "grad_norm": 313.5653381347656, + "learning_rate": 2.4391860334716783e-06, + "loss": 12.2251, + "step": 352020 + }, + { + "epoch": 0.7111228723683626, + "grad_norm": 165.4011688232422, + "learning_rate": 2.438886231104861e-06, + "loss": 22.8992, + "step": 352030 + }, + { + "epoch": 0.7111430730010464, + "grad_norm": 460.9947814941406, + "learning_rate": 2.4385864412206e-06, + "loss": 26.6358, + "step": 352040 + }, + { + "epoch": 0.7111632736337302, + "grad_norm": 92.2685317993164, + "learning_rate": 2.4382866638203578e-06, + "loss": 13.4465, + "step": 352050 + }, + { + "epoch": 0.711183474266414, + "grad_norm": 89.22782135009766, + "learning_rate": 2.4379868989055976e-06, + "loss": 17.6479, + "step": 352060 + }, + { + "epoch": 0.7112036748990979, + "grad_norm": 95.64362335205078, + "learning_rate": 2.4376871464777792e-06, + "loss": 10.2265, + "step": 352070 + }, + { + "epoch": 0.7112238755317817, + "grad_norm": 234.10340881347656, + "learning_rate": 2.43738740653836e-06, + "loss": 17.5689, + "step": 352080 + }, + { + "epoch": 0.7112440761644655, + "grad_norm": 84.29734802246094, + "learning_rate": 2.437087679088806e-06, + "loss": 12.9642, + "step": 352090 + }, + { + "epoch": 0.7112642767971493, + "grad_norm": 189.70095825195312, + "learning_rate": 2.4367879641305757e-06, + "loss": 21.265, + "step": 352100 + }, + { + "epoch": 0.7112844774298331, + "grad_norm": 516.434326171875, + "learning_rate": 2.4364882616651288e-06, + "loss": 14.5156, + "step": 352110 + }, + { + "epoch": 0.711304678062517, + "grad_norm": 394.54852294921875, + "learning_rate": 2.436188571693928e-06, + "loss": 18.9324, + "step": 352120 + }, + { + "epoch": 0.7113248786952008, + "grad_norm": 157.44735717773438, + "learning_rate": 2.4358888942184324e-06, + "loss": 17.6764, + "step": 352130 + }, + { + "epoch": 0.7113450793278846, + "grad_norm": 46.21310806274414, + "learning_rate": 2.4355892292401044e-06, + "loss": 21.4303, + "step": 352140 + }, + { + "epoch": 0.7113652799605684, + "grad_norm": 199.23959350585938, + "learning_rate": 2.4352895767604036e-06, + "loss": 16.9316, + "step": 352150 + }, + { + "epoch": 0.7113854805932521, + "grad_norm": 443.296142578125, + "learning_rate": 2.4349899367807885e-06, + "loss": 16.1591, + "step": 352160 + }, + { + "epoch": 0.711405681225936, + "grad_norm": 1023.393310546875, + "learning_rate": 2.4346903093027237e-06, + "loss": 22.3192, + "step": 352170 + }, + { + "epoch": 0.7114258818586198, + "grad_norm": 289.38592529296875, + "learning_rate": 2.434390694327666e-06, + "loss": 21.6989, + "step": 352180 + }, + { + "epoch": 0.7114460824913036, + "grad_norm": 344.63568115234375, + "learning_rate": 2.434091091857076e-06, + "loss": 21.0206, + "step": 352190 + }, + { + "epoch": 0.7114662831239874, + "grad_norm": 132.6818084716797, + "learning_rate": 2.4337915018924147e-06, + "loss": 13.4311, + "step": 352200 + }, + { + "epoch": 0.7114864837566712, + "grad_norm": 500.07904052734375, + "learning_rate": 2.433491924435144e-06, + "loss": 15.8135, + "step": 352210 + }, + { + "epoch": 0.711506684389355, + "grad_norm": 16.9533634185791, + "learning_rate": 2.433192359486723e-06, + "loss": 16.036, + "step": 352220 + }, + { + "epoch": 0.7115268850220389, + "grad_norm": 554.3599243164062, + "learning_rate": 2.4328928070486086e-06, + "loss": 18.9378, + "step": 352230 + }, + { + "epoch": 0.7115470856547227, + "grad_norm": 179.26231384277344, + "learning_rate": 2.432593267122265e-06, + "loss": 12.1016, + "step": 352240 + }, + { + "epoch": 0.7115672862874065, + "grad_norm": 92.05696105957031, + "learning_rate": 2.432293739709151e-06, + "loss": 16.263, + "step": 352250 + }, + { + "epoch": 0.7115874869200903, + "grad_norm": 422.00677490234375, + "learning_rate": 2.4319942248107236e-06, + "loss": 40.672, + "step": 352260 + }, + { + "epoch": 0.7116076875527741, + "grad_norm": 447.24139404296875, + "learning_rate": 2.4316947224284454e-06, + "loss": 13.0432, + "step": 352270 + }, + { + "epoch": 0.711627888185458, + "grad_norm": 416.1942138671875, + "learning_rate": 2.431395232563779e-06, + "loss": 11.9405, + "step": 352280 + }, + { + "epoch": 0.7116480888181418, + "grad_norm": 649.938232421875, + "learning_rate": 2.431095755218177e-06, + "loss": 11.7699, + "step": 352290 + }, + { + "epoch": 0.7116682894508256, + "grad_norm": 157.4993896484375, + "learning_rate": 2.4307962903931025e-06, + "loss": 9.6227, + "step": 352300 + }, + { + "epoch": 0.7116884900835094, + "grad_norm": 280.8648376464844, + "learning_rate": 2.430496838090017e-06, + "loss": 28.5089, + "step": 352310 + }, + { + "epoch": 0.7117086907161932, + "grad_norm": 405.6296691894531, + "learning_rate": 2.4301973983103793e-06, + "loss": 11.0124, + "step": 352320 + }, + { + "epoch": 0.7117288913488771, + "grad_norm": 180.7757110595703, + "learning_rate": 2.429897971055645e-06, + "loss": 18.2415, + "step": 352330 + }, + { + "epoch": 0.7117490919815609, + "grad_norm": 500.71588134765625, + "learning_rate": 2.4295985563272785e-06, + "loss": 18.0885, + "step": 352340 + }, + { + "epoch": 0.7117692926142447, + "grad_norm": 383.4118347167969, + "learning_rate": 2.4292991541267368e-06, + "loss": 19.4345, + "step": 352350 + }, + { + "epoch": 0.7117894932469285, + "grad_norm": 593.0022583007812, + "learning_rate": 2.4289997644554775e-06, + "loss": 46.2417, + "step": 352360 + }, + { + "epoch": 0.7118096938796123, + "grad_norm": 434.1874694824219, + "learning_rate": 2.428700387314961e-06, + "loss": 13.0681, + "step": 352370 + }, + { + "epoch": 0.7118298945122962, + "grad_norm": 185.2329864501953, + "learning_rate": 2.4284010227066495e-06, + "loss": 19.5999, + "step": 352380 + }, + { + "epoch": 0.71185009514498, + "grad_norm": 548.0687255859375, + "learning_rate": 2.4281016706319992e-06, + "loss": 20.2285, + "step": 352390 + }, + { + "epoch": 0.7118702957776638, + "grad_norm": 463.87506103515625, + "learning_rate": 2.4278023310924676e-06, + "loss": 13.8517, + "step": 352400 + }, + { + "epoch": 0.7118904964103475, + "grad_norm": 875.9000244140625, + "learning_rate": 2.4275030040895178e-06, + "loss": 9.5436, + "step": 352410 + }, + { + "epoch": 0.7119106970430313, + "grad_norm": 699.1371459960938, + "learning_rate": 2.4272036896246054e-06, + "loss": 29.8913, + "step": 352420 + }, + { + "epoch": 0.7119308976757152, + "grad_norm": 446.48834228515625, + "learning_rate": 2.4269043876991888e-06, + "loss": 18.5582, + "step": 352430 + }, + { + "epoch": 0.711951098308399, + "grad_norm": 636.35400390625, + "learning_rate": 2.4266050983147298e-06, + "loss": 22.3689, + "step": 352440 + }, + { + "epoch": 0.7119712989410828, + "grad_norm": 577.2525024414062, + "learning_rate": 2.4263058214726844e-06, + "loss": 12.0827, + "step": 352450 + }, + { + "epoch": 0.7119914995737666, + "grad_norm": 456.84130859375, + "learning_rate": 2.426006557174513e-06, + "loss": 17.5838, + "step": 352460 + }, + { + "epoch": 0.7120117002064504, + "grad_norm": 158.64451599121094, + "learning_rate": 2.425707305421674e-06, + "loss": 17.0695, + "step": 352470 + }, + { + "epoch": 0.7120319008391343, + "grad_norm": 328.5570373535156, + "learning_rate": 2.425408066215623e-06, + "loss": 16.1986, + "step": 352480 + }, + { + "epoch": 0.7120521014718181, + "grad_norm": 564.2173461914062, + "learning_rate": 2.4251088395578214e-06, + "loss": 27.737, + "step": 352490 + }, + { + "epoch": 0.7120723021045019, + "grad_norm": 866.6751708984375, + "learning_rate": 2.424809625449729e-06, + "loss": 29.3532, + "step": 352500 + }, + { + "epoch": 0.7120925027371857, + "grad_norm": 114.92594909667969, + "learning_rate": 2.424510423892802e-06, + "loss": 15.4919, + "step": 352510 + }, + { + "epoch": 0.7121127033698695, + "grad_norm": 201.77572631835938, + "learning_rate": 2.424211234888497e-06, + "loss": 12.8224, + "step": 352520 + }, + { + "epoch": 0.7121329040025534, + "grad_norm": 789.9775390625, + "learning_rate": 2.4239120584382757e-06, + "loss": 20.3586, + "step": 352530 + }, + { + "epoch": 0.7121531046352372, + "grad_norm": 274.7069396972656, + "learning_rate": 2.4236128945435944e-06, + "loss": 17.7998, + "step": 352540 + }, + { + "epoch": 0.712173305267921, + "grad_norm": 642.5575561523438, + "learning_rate": 2.42331374320591e-06, + "loss": 24.558, + "step": 352550 + }, + { + "epoch": 0.7121935059006048, + "grad_norm": 1134.72314453125, + "learning_rate": 2.423014604426682e-06, + "loss": 25.1194, + "step": 352560 + }, + { + "epoch": 0.7122137065332886, + "grad_norm": 169.52781677246094, + "learning_rate": 2.4227154782073716e-06, + "loss": 16.6606, + "step": 352570 + }, + { + "epoch": 0.7122339071659725, + "grad_norm": 86.97180938720703, + "learning_rate": 2.422416364549429e-06, + "loss": 14.4027, + "step": 352580 + }, + { + "epoch": 0.7122541077986563, + "grad_norm": 102.85459899902344, + "learning_rate": 2.4221172634543177e-06, + "loss": 12.2192, + "step": 352590 + }, + { + "epoch": 0.7122743084313401, + "grad_norm": 264.3111267089844, + "learning_rate": 2.4218181749234954e-06, + "loss": 20.4156, + "step": 352600 + }, + { + "epoch": 0.7122945090640239, + "grad_norm": 495.5534362792969, + "learning_rate": 2.4215190989584187e-06, + "loss": 10.5141, + "step": 352610 + }, + { + "epoch": 0.7123147096967077, + "grad_norm": 79.8003921508789, + "learning_rate": 2.4212200355605433e-06, + "loss": 33.404, + "step": 352620 + }, + { + "epoch": 0.7123349103293916, + "grad_norm": 461.2104187011719, + "learning_rate": 2.4209209847313302e-06, + "loss": 10.4754, + "step": 352630 + }, + { + "epoch": 0.7123551109620754, + "grad_norm": 338.4543151855469, + "learning_rate": 2.4206219464722356e-06, + "loss": 15.9289, + "step": 352640 + }, + { + "epoch": 0.7123753115947592, + "grad_norm": 372.7828674316406, + "learning_rate": 2.4203229207847155e-06, + "loss": 12.2598, + "step": 352650 + }, + { + "epoch": 0.712395512227443, + "grad_norm": 850.4609985351562, + "learning_rate": 2.420023907670228e-06, + "loss": 31.5782, + "step": 352660 + }, + { + "epoch": 0.7124157128601267, + "grad_norm": 354.53204345703125, + "learning_rate": 2.419724907130233e-06, + "loss": 18.0651, + "step": 352670 + }, + { + "epoch": 0.7124359134928105, + "grad_norm": 261.14532470703125, + "learning_rate": 2.4194259191661864e-06, + "loss": 22.9313, + "step": 352680 + }, + { + "epoch": 0.7124561141254944, + "grad_norm": 581.628662109375, + "learning_rate": 2.419126943779543e-06, + "loss": 26.5322, + "step": 352690 + }, + { + "epoch": 0.7124763147581782, + "grad_norm": 470.56378173828125, + "learning_rate": 2.418827980971763e-06, + "loss": 29.114, + "step": 352700 + }, + { + "epoch": 0.712496515390862, + "grad_norm": 173.2490997314453, + "learning_rate": 2.4185290307443025e-06, + "loss": 20.4095, + "step": 352710 + }, + { + "epoch": 0.7125167160235458, + "grad_norm": 597.8363647460938, + "learning_rate": 2.418230093098617e-06, + "loss": 20.5333, + "step": 352720 + }, + { + "epoch": 0.7125369166562296, + "grad_norm": 731.7879638671875, + "learning_rate": 2.417931168036166e-06, + "loss": 23.9049, + "step": 352730 + }, + { + "epoch": 0.7125571172889135, + "grad_norm": 3.5769662857055664, + "learning_rate": 2.417632255558404e-06, + "loss": 11.3522, + "step": 352740 + }, + { + "epoch": 0.7125773179215973, + "grad_norm": 54.508094787597656, + "learning_rate": 2.4173333556667912e-06, + "loss": 26.4764, + "step": 352750 + }, + { + "epoch": 0.7125975185542811, + "grad_norm": 694.1456298828125, + "learning_rate": 2.417034468362782e-06, + "loss": 22.3851, + "step": 352760 + }, + { + "epoch": 0.7126177191869649, + "grad_norm": 208.19479370117188, + "learning_rate": 2.416735593647832e-06, + "loss": 19.3907, + "step": 352770 + }, + { + "epoch": 0.7126379198196487, + "grad_norm": 163.6040802001953, + "learning_rate": 2.416436731523401e-06, + "loss": 13.7756, + "step": 352780 + }, + { + "epoch": 0.7126581204523326, + "grad_norm": 699.3252563476562, + "learning_rate": 2.4161378819909444e-06, + "loss": 20.9277, + "step": 352790 + }, + { + "epoch": 0.7126783210850164, + "grad_norm": 131.13226318359375, + "learning_rate": 2.415839045051916e-06, + "loss": 19.9862, + "step": 352800 + }, + { + "epoch": 0.7126985217177002, + "grad_norm": 203.00201416015625, + "learning_rate": 2.415540220707775e-06, + "loss": 19.0849, + "step": 352810 + }, + { + "epoch": 0.712718722350384, + "grad_norm": 190.422119140625, + "learning_rate": 2.4152414089599798e-06, + "loss": 15.3859, + "step": 352820 + }, + { + "epoch": 0.7127389229830678, + "grad_norm": 147.30853271484375, + "learning_rate": 2.4149426098099836e-06, + "loss": 14.347, + "step": 352830 + }, + { + "epoch": 0.7127591236157517, + "grad_norm": 149.81625366210938, + "learning_rate": 2.4146438232592425e-06, + "loss": 12.5917, + "step": 352840 + }, + { + "epoch": 0.7127793242484355, + "grad_norm": 273.17840576171875, + "learning_rate": 2.4143450493092146e-06, + "loss": 9.3379, + "step": 352850 + }, + { + "epoch": 0.7127995248811193, + "grad_norm": 340.0224304199219, + "learning_rate": 2.414046287961356e-06, + "loss": 22.2239, + "step": 352860 + }, + { + "epoch": 0.7128197255138031, + "grad_norm": 0.5263413190841675, + "learning_rate": 2.4137475392171204e-06, + "loss": 14.0726, + "step": 352870 + }, + { + "epoch": 0.712839926146487, + "grad_norm": 246.65101623535156, + "learning_rate": 2.4134488030779657e-06, + "loss": 12.8788, + "step": 352880 + }, + { + "epoch": 0.7128601267791708, + "grad_norm": 982.08251953125, + "learning_rate": 2.4131500795453515e-06, + "loss": 26.5317, + "step": 352890 + }, + { + "epoch": 0.7128803274118546, + "grad_norm": 252.6681365966797, + "learning_rate": 2.412851368620726e-06, + "loss": 23.6088, + "step": 352900 + }, + { + "epoch": 0.7129005280445384, + "grad_norm": 219.7015838623047, + "learning_rate": 2.41255267030555e-06, + "loss": 20.815, + "step": 352910 + }, + { + "epoch": 0.7129207286772221, + "grad_norm": 462.3797607421875, + "learning_rate": 2.412253984601279e-06, + "loss": 29.5089, + "step": 352920 + }, + { + "epoch": 0.7129409293099059, + "grad_norm": 352.5789489746094, + "learning_rate": 2.411955311509369e-06, + "loss": 34.6314, + "step": 352930 + }, + { + "epoch": 0.7129611299425898, + "grad_norm": 0.0, + "learning_rate": 2.4116566510312734e-06, + "loss": 22.799, + "step": 352940 + }, + { + "epoch": 0.7129813305752736, + "grad_norm": 267.8687744140625, + "learning_rate": 2.4113580031684487e-06, + "loss": 21.5248, + "step": 352950 + }, + { + "epoch": 0.7130015312079574, + "grad_norm": 264.94580078125, + "learning_rate": 2.4110593679223547e-06, + "loss": 20.9346, + "step": 352960 + }, + { + "epoch": 0.7130217318406412, + "grad_norm": 306.8415222167969, + "learning_rate": 2.41076074529444e-06, + "loss": 17.5703, + "step": 352970 + }, + { + "epoch": 0.713041932473325, + "grad_norm": 553.2825927734375, + "learning_rate": 2.4104621352861633e-06, + "loss": 19.9164, + "step": 352980 + }, + { + "epoch": 0.7130621331060089, + "grad_norm": 238.46238708496094, + "learning_rate": 2.4101635378989823e-06, + "loss": 18.6934, + "step": 352990 + }, + { + "epoch": 0.7130823337386927, + "grad_norm": 355.835205078125, + "learning_rate": 2.40986495313435e-06, + "loss": 20.9812, + "step": 353000 + }, + { + "epoch": 0.7131025343713765, + "grad_norm": 296.1120300292969, + "learning_rate": 2.4095663809937198e-06, + "loss": 17.2678, + "step": 353010 + }, + { + "epoch": 0.7131227350040603, + "grad_norm": 171.39669799804688, + "learning_rate": 2.4092678214785508e-06, + "loss": 7.5386, + "step": 353020 + }, + { + "epoch": 0.7131429356367441, + "grad_norm": 459.44488525390625, + "learning_rate": 2.408969274590296e-06, + "loss": 17.5729, + "step": 353030 + }, + { + "epoch": 0.713163136269428, + "grad_norm": 435.53857421875, + "learning_rate": 2.408670740330409e-06, + "loss": 14.0548, + "step": 353040 + }, + { + "epoch": 0.7131833369021118, + "grad_norm": 866.0286254882812, + "learning_rate": 2.4083722187003483e-06, + "loss": 26.1176, + "step": 353050 + }, + { + "epoch": 0.7132035375347956, + "grad_norm": 369.73272705078125, + "learning_rate": 2.408073709701565e-06, + "loss": 30.3595, + "step": 353060 + }, + { + "epoch": 0.7132237381674794, + "grad_norm": 481.9102478027344, + "learning_rate": 2.407775213335518e-06, + "loss": 13.01, + "step": 353070 + }, + { + "epoch": 0.7132439388001632, + "grad_norm": 276.68603515625, + "learning_rate": 2.407476729603661e-06, + "loss": 19.3233, + "step": 353080 + }, + { + "epoch": 0.713264139432847, + "grad_norm": 318.9827575683594, + "learning_rate": 2.4071782585074453e-06, + "loss": 18.0887, + "step": 353090 + }, + { + "epoch": 0.7132843400655309, + "grad_norm": 309.0885009765625, + "learning_rate": 2.4068798000483306e-06, + "loss": 28.781, + "step": 353100 + }, + { + "epoch": 0.7133045406982147, + "grad_norm": 500.3667907714844, + "learning_rate": 2.406581354227767e-06, + "loss": 30.4355, + "step": 353110 + }, + { + "epoch": 0.7133247413308985, + "grad_norm": 502.664306640625, + "learning_rate": 2.406282921047213e-06, + "loss": 22.4138, + "step": 353120 + }, + { + "epoch": 0.7133449419635823, + "grad_norm": 402.8708190917969, + "learning_rate": 2.40598450050812e-06, + "loss": 19.5594, + "step": 353130 + }, + { + "epoch": 0.7133651425962662, + "grad_norm": 581.7855834960938, + "learning_rate": 2.405686092611946e-06, + "loss": 10.1923, + "step": 353140 + }, + { + "epoch": 0.71338534322895, + "grad_norm": 378.47686767578125, + "learning_rate": 2.405387697360143e-06, + "loss": 25.2502, + "step": 353150 + }, + { + "epoch": 0.7134055438616338, + "grad_norm": 0.0, + "learning_rate": 2.4050893147541643e-06, + "loss": 15.7241, + "step": 353160 + }, + { + "epoch": 0.7134257444943176, + "grad_norm": 485.62335205078125, + "learning_rate": 2.4047909447954647e-06, + "loss": 13.8162, + "step": 353170 + }, + { + "epoch": 0.7134459451270013, + "grad_norm": 249.17730712890625, + "learning_rate": 2.4044925874855035e-06, + "loss": 23.7092, + "step": 353180 + }, + { + "epoch": 0.7134661457596851, + "grad_norm": 52.380401611328125, + "learning_rate": 2.404194242825727e-06, + "loss": 8.8809, + "step": 353190 + }, + { + "epoch": 0.713486346392369, + "grad_norm": 129.6929931640625, + "learning_rate": 2.403895910817593e-06, + "loss": 19.2553, + "step": 353200 + }, + { + "epoch": 0.7135065470250528, + "grad_norm": 1098.8128662109375, + "learning_rate": 2.403597591462557e-06, + "loss": 16.2623, + "step": 353210 + }, + { + "epoch": 0.7135267476577366, + "grad_norm": 145.89706420898438, + "learning_rate": 2.403299284762071e-06, + "loss": 12.9106, + "step": 353220 + }, + { + "epoch": 0.7135469482904204, + "grad_norm": 347.21124267578125, + "learning_rate": 2.403000990717588e-06, + "loss": 12.9401, + "step": 353230 + }, + { + "epoch": 0.7135671489231042, + "grad_norm": 327.05682373046875, + "learning_rate": 2.4027027093305655e-06, + "loss": 15.7113, + "step": 353240 + }, + { + "epoch": 0.7135873495557881, + "grad_norm": 391.7794494628906, + "learning_rate": 2.402404440602455e-06, + "loss": 12.8125, + "step": 353250 + }, + { + "epoch": 0.7136075501884719, + "grad_norm": 266.3108215332031, + "learning_rate": 2.4021061845347076e-06, + "loss": 28.6932, + "step": 353260 + }, + { + "epoch": 0.7136277508211557, + "grad_norm": 986.6097412109375, + "learning_rate": 2.40180794112878e-06, + "loss": 28.6111, + "step": 353270 + }, + { + "epoch": 0.7136479514538395, + "grad_norm": 476.0670471191406, + "learning_rate": 2.401509710386127e-06, + "loss": 12.6359, + "step": 353280 + }, + { + "epoch": 0.7136681520865233, + "grad_norm": 116.15657043457031, + "learning_rate": 2.4012114923082007e-06, + "loss": 13.8278, + "step": 353290 + }, + { + "epoch": 0.7136883527192072, + "grad_norm": 93.48078155517578, + "learning_rate": 2.4009132868964525e-06, + "loss": 13.8956, + "step": 353300 + }, + { + "epoch": 0.713708553351891, + "grad_norm": 472.78204345703125, + "learning_rate": 2.400615094152339e-06, + "loss": 15.9945, + "step": 353310 + }, + { + "epoch": 0.7137287539845748, + "grad_norm": 329.33526611328125, + "learning_rate": 2.4003169140773132e-06, + "loss": 23.1479, + "step": 353320 + }, + { + "epoch": 0.7137489546172586, + "grad_norm": 490.73138427734375, + "learning_rate": 2.4000187466728253e-06, + "loss": 26.9908, + "step": 353330 + }, + { + "epoch": 0.7137691552499424, + "grad_norm": 427.5204772949219, + "learning_rate": 2.3997205919403323e-06, + "loss": 11.6373, + "step": 353340 + }, + { + "epoch": 0.7137893558826263, + "grad_norm": 289.8179016113281, + "learning_rate": 2.399422449881284e-06, + "loss": 12.7979, + "step": 353350 + }, + { + "epoch": 0.7138095565153101, + "grad_norm": 429.68603515625, + "learning_rate": 2.399124320497137e-06, + "loss": 26.3519, + "step": 353360 + }, + { + "epoch": 0.7138297571479939, + "grad_norm": 353.4993591308594, + "learning_rate": 2.398826203789343e-06, + "loss": 20.2615, + "step": 353370 + }, + { + "epoch": 0.7138499577806777, + "grad_norm": 53.866371154785156, + "learning_rate": 2.3985280997593523e-06, + "loss": 10.5225, + "step": 353380 + }, + { + "epoch": 0.7138701584133615, + "grad_norm": 235.62281799316406, + "learning_rate": 2.3982300084086224e-06, + "loss": 15.1585, + "step": 353390 + }, + { + "epoch": 0.7138903590460454, + "grad_norm": 629.7739868164062, + "learning_rate": 2.3979319297386035e-06, + "loss": 23.6567, + "step": 353400 + }, + { + "epoch": 0.7139105596787292, + "grad_norm": 106.13060760498047, + "learning_rate": 2.397633863750747e-06, + "loss": 22.0218, + "step": 353410 + }, + { + "epoch": 0.713930760311413, + "grad_norm": 728.9324951171875, + "learning_rate": 2.397335810446508e-06, + "loss": 17.6712, + "step": 353420 + }, + { + "epoch": 0.7139509609440968, + "grad_norm": 259.8564758300781, + "learning_rate": 2.3970377698273396e-06, + "loss": 49.6211, + "step": 353430 + }, + { + "epoch": 0.7139711615767805, + "grad_norm": 410.4757995605469, + "learning_rate": 2.3967397418946937e-06, + "loss": 22.1843, + "step": 353440 + }, + { + "epoch": 0.7139913622094644, + "grad_norm": 547.3377685546875, + "learning_rate": 2.396441726650021e-06, + "loss": 9.9852, + "step": 353450 + }, + { + "epoch": 0.7140115628421482, + "grad_norm": 138.32223510742188, + "learning_rate": 2.396143724094777e-06, + "loss": 11.1795, + "step": 353460 + }, + { + "epoch": 0.714031763474832, + "grad_norm": 659.0087280273438, + "learning_rate": 2.395845734230413e-06, + "loss": 10.0586, + "step": 353470 + }, + { + "epoch": 0.7140519641075158, + "grad_norm": 173.6551513671875, + "learning_rate": 2.395547757058379e-06, + "loss": 20.8068, + "step": 353480 + }, + { + "epoch": 0.7140721647401996, + "grad_norm": 369.30169677734375, + "learning_rate": 2.395249792580129e-06, + "loss": 15.9972, + "step": 353490 + }, + { + "epoch": 0.7140923653728835, + "grad_norm": 230.31594848632812, + "learning_rate": 2.39495184079712e-06, + "loss": 22.5841, + "step": 353500 + }, + { + "epoch": 0.7141125660055673, + "grad_norm": 160.00787353515625, + "learning_rate": 2.3946539017107963e-06, + "loss": 65.5788, + "step": 353510 + }, + { + "epoch": 0.7141327666382511, + "grad_norm": 3.456495523452759, + "learning_rate": 2.3943559753226124e-06, + "loss": 25.4125, + "step": 353520 + }, + { + "epoch": 0.7141529672709349, + "grad_norm": 109.46832275390625, + "learning_rate": 2.3940580616340244e-06, + "loss": 24.6163, + "step": 353530 + }, + { + "epoch": 0.7141731679036187, + "grad_norm": 741.8111572265625, + "learning_rate": 2.3937601606464807e-06, + "loss": 23.2918, + "step": 353540 + }, + { + "epoch": 0.7141933685363026, + "grad_norm": 190.29891967773438, + "learning_rate": 2.393462272361432e-06, + "loss": 17.0141, + "step": 353550 + }, + { + "epoch": 0.7142135691689864, + "grad_norm": 151.7148895263672, + "learning_rate": 2.393164396780332e-06, + "loss": 19.2695, + "step": 353560 + }, + { + "epoch": 0.7142337698016702, + "grad_norm": 563.07373046875, + "learning_rate": 2.3928665339046363e-06, + "loss": 10.5755, + "step": 353570 + }, + { + "epoch": 0.714253970434354, + "grad_norm": 546.0304565429688, + "learning_rate": 2.3925686837357898e-06, + "loss": 10.5596, + "step": 353580 + }, + { + "epoch": 0.7142741710670378, + "grad_norm": 44.742496490478516, + "learning_rate": 2.3922708462752466e-06, + "loss": 13.7291, + "step": 353590 + }, + { + "epoch": 0.7142943716997217, + "grad_norm": 217.78912353515625, + "learning_rate": 2.391973021524461e-06, + "loss": 13.751, + "step": 353600 + }, + { + "epoch": 0.7143145723324055, + "grad_norm": 448.3959655761719, + "learning_rate": 2.391675209484883e-06, + "loss": 17.5057, + "step": 353610 + }, + { + "epoch": 0.7143347729650893, + "grad_norm": 6.279942989349365, + "learning_rate": 2.391377410157961e-06, + "loss": 18.6252, + "step": 353620 + }, + { + "epoch": 0.7143549735977731, + "grad_norm": 85.3839340209961, + "learning_rate": 2.391079623545152e-06, + "loss": 12.992, + "step": 353630 + }, + { + "epoch": 0.7143751742304569, + "grad_norm": 436.60076904296875, + "learning_rate": 2.390781849647904e-06, + "loss": 19.5454, + "step": 353640 + }, + { + "epoch": 0.7143953748631408, + "grad_norm": 149.72430419921875, + "learning_rate": 2.3904840884676665e-06, + "loss": 16.6928, + "step": 353650 + }, + { + "epoch": 0.7144155754958246, + "grad_norm": 946.8202514648438, + "learning_rate": 2.3901863400058954e-06, + "loss": 13.3227, + "step": 353660 + }, + { + "epoch": 0.7144357761285084, + "grad_norm": 112.90775299072266, + "learning_rate": 2.389888604264038e-06, + "loss": 10.5937, + "step": 353670 + }, + { + "epoch": 0.7144559767611922, + "grad_norm": 284.0372314453125, + "learning_rate": 2.389590881243548e-06, + "loss": 12.4341, + "step": 353680 + }, + { + "epoch": 0.7144761773938759, + "grad_norm": 425.15069580078125, + "learning_rate": 2.389293170945876e-06, + "loss": 24.0845, + "step": 353690 + }, + { + "epoch": 0.7144963780265597, + "grad_norm": 538.189453125, + "learning_rate": 2.3889954733724708e-06, + "loss": 22.6088, + "step": 353700 + }, + { + "epoch": 0.7145165786592436, + "grad_norm": 515.6146240234375, + "learning_rate": 2.3886977885247866e-06, + "loss": 31.1808, + "step": 353710 + }, + { + "epoch": 0.7145367792919274, + "grad_norm": 600.225341796875, + "learning_rate": 2.388400116404271e-06, + "loss": 20.5745, + "step": 353720 + }, + { + "epoch": 0.7145569799246112, + "grad_norm": 92.6533432006836, + "learning_rate": 2.3881024570123777e-06, + "loss": 18.8627, + "step": 353730 + }, + { + "epoch": 0.714577180557295, + "grad_norm": 390.71588134765625, + "learning_rate": 2.387804810350555e-06, + "loss": 14.4567, + "step": 353740 + }, + { + "epoch": 0.7145973811899788, + "grad_norm": 395.343017578125, + "learning_rate": 2.387507176420256e-06, + "loss": 19.7061, + "step": 353750 + }, + { + "epoch": 0.7146175818226627, + "grad_norm": 193.63609313964844, + "learning_rate": 2.387209555222931e-06, + "loss": 33.2489, + "step": 353760 + }, + { + "epoch": 0.7146377824553465, + "grad_norm": 339.81512451171875, + "learning_rate": 2.3869119467600273e-06, + "loss": 23.1288, + "step": 353770 + }, + { + "epoch": 0.7146579830880303, + "grad_norm": 276.13140869140625, + "learning_rate": 2.3866143510329998e-06, + "loss": 10.3668, + "step": 353780 + }, + { + "epoch": 0.7146781837207141, + "grad_norm": 676.8049926757812, + "learning_rate": 2.3863167680432975e-06, + "loss": 22.302, + "step": 353790 + }, + { + "epoch": 0.7146983843533979, + "grad_norm": 263.4266052246094, + "learning_rate": 2.3860191977923673e-06, + "loss": 18.8042, + "step": 353800 + }, + { + "epoch": 0.7147185849860818, + "grad_norm": 813.533203125, + "learning_rate": 2.3857216402816635e-06, + "loss": 18.3054, + "step": 353810 + }, + { + "epoch": 0.7147387856187656, + "grad_norm": 686.9315185546875, + "learning_rate": 2.385424095512637e-06, + "loss": 13.7513, + "step": 353820 + }, + { + "epoch": 0.7147589862514494, + "grad_norm": 291.71368408203125, + "learning_rate": 2.3851265634867358e-06, + "loss": 13.1515, + "step": 353830 + }, + { + "epoch": 0.7147791868841332, + "grad_norm": 699.3037109375, + "learning_rate": 2.3848290442054096e-06, + "loss": 30.5805, + "step": 353840 + }, + { + "epoch": 0.714799387516817, + "grad_norm": 155.97952270507812, + "learning_rate": 2.3845315376701112e-06, + "loss": 6.559, + "step": 353850 + }, + { + "epoch": 0.7148195881495009, + "grad_norm": 284.4759826660156, + "learning_rate": 2.384234043882288e-06, + "loss": 18.174, + "step": 353860 + }, + { + "epoch": 0.7148397887821847, + "grad_norm": 75.58837890625, + "learning_rate": 2.38393656284339e-06, + "loss": 13.5558, + "step": 353870 + }, + { + "epoch": 0.7148599894148685, + "grad_norm": 394.6120300292969, + "learning_rate": 2.3836390945548672e-06, + "loss": 26.4915, + "step": 353880 + }, + { + "epoch": 0.7148801900475523, + "grad_norm": 119.50384521484375, + "learning_rate": 2.3833416390181723e-06, + "loss": 8.0645, + "step": 353890 + }, + { + "epoch": 0.7149003906802361, + "grad_norm": 141.528076171875, + "learning_rate": 2.3830441962347528e-06, + "loss": 27.5925, + "step": 353900 + }, + { + "epoch": 0.71492059131292, + "grad_norm": 1280.6165771484375, + "learning_rate": 2.3827467662060565e-06, + "loss": 23.4032, + "step": 353910 + }, + { + "epoch": 0.7149407919456038, + "grad_norm": 489.8740234375, + "learning_rate": 2.382449348933537e-06, + "loss": 11.2119, + "step": 353920 + }, + { + "epoch": 0.7149609925782876, + "grad_norm": 0.8326438069343567, + "learning_rate": 2.382151944418642e-06, + "loss": 15.309, + "step": 353930 + }, + { + "epoch": 0.7149811932109714, + "grad_norm": 364.7646789550781, + "learning_rate": 2.381854552662819e-06, + "loss": 17.5244, + "step": 353940 + }, + { + "epoch": 0.7150013938436551, + "grad_norm": 180.71661376953125, + "learning_rate": 2.3815571736675214e-06, + "loss": 12.8768, + "step": 353950 + }, + { + "epoch": 0.715021594476339, + "grad_norm": 326.84393310546875, + "learning_rate": 2.381259807434194e-06, + "loss": 14.069, + "step": 353960 + }, + { + "epoch": 0.7150417951090228, + "grad_norm": 227.0348358154297, + "learning_rate": 2.3809624539642913e-06, + "loss": 34.223, + "step": 353970 + }, + { + "epoch": 0.7150619957417066, + "grad_norm": 11.7371244430542, + "learning_rate": 2.3806651132592597e-06, + "loss": 13.6704, + "step": 353980 + }, + { + "epoch": 0.7150821963743904, + "grad_norm": 260.1331481933594, + "learning_rate": 2.3803677853205465e-06, + "loss": 17.0676, + "step": 353990 + }, + { + "epoch": 0.7151023970070742, + "grad_norm": 7237.70263671875, + "learning_rate": 2.380070470149605e-06, + "loss": 23.8909, + "step": 354000 + }, + { + "epoch": 0.715122597639758, + "grad_norm": 551.85302734375, + "learning_rate": 2.3797731677478808e-06, + "loss": 29.154, + "step": 354010 + }, + { + "epoch": 0.7151427982724419, + "grad_norm": 845.3187255859375, + "learning_rate": 2.379475878116826e-06, + "loss": 18.3291, + "step": 354020 + }, + { + "epoch": 0.7151629989051257, + "grad_norm": 265.6539611816406, + "learning_rate": 2.379178601257886e-06, + "loss": 18.141, + "step": 354030 + }, + { + "epoch": 0.7151831995378095, + "grad_norm": 149.31614685058594, + "learning_rate": 2.3788813371725133e-06, + "loss": 14.8972, + "step": 354040 + }, + { + "epoch": 0.7152034001704933, + "grad_norm": 407.98004150390625, + "learning_rate": 2.3785840858621556e-06, + "loss": 17.6841, + "step": 354050 + }, + { + "epoch": 0.7152236008031772, + "grad_norm": 3.9735329151153564, + "learning_rate": 2.3782868473282587e-06, + "loss": 26.4006, + "step": 354060 + }, + { + "epoch": 0.715243801435861, + "grad_norm": 471.94683837890625, + "learning_rate": 2.3779896215722765e-06, + "loss": 15.9451, + "step": 354070 + }, + { + "epoch": 0.7152640020685448, + "grad_norm": 226.0657196044922, + "learning_rate": 2.3776924085956536e-06, + "loss": 17.6065, + "step": 354080 + }, + { + "epoch": 0.7152842027012286, + "grad_norm": 488.97723388671875, + "learning_rate": 2.3773952083998392e-06, + "loss": 20.6488, + "step": 354090 + }, + { + "epoch": 0.7153044033339124, + "grad_norm": 128.92112731933594, + "learning_rate": 2.3770980209862814e-06, + "loss": 25.5519, + "step": 354100 + }, + { + "epoch": 0.7153246039665963, + "grad_norm": 314.02978515625, + "learning_rate": 2.376800846356434e-06, + "loss": 20.6244, + "step": 354110 + }, + { + "epoch": 0.7153448045992801, + "grad_norm": 359.4573059082031, + "learning_rate": 2.3765036845117373e-06, + "loss": 25.4475, + "step": 354120 + }, + { + "epoch": 0.7153650052319639, + "grad_norm": 79.7017593383789, + "learning_rate": 2.3762065354536436e-06, + "loss": 6.9899, + "step": 354130 + }, + { + "epoch": 0.7153852058646477, + "grad_norm": 251.77645874023438, + "learning_rate": 2.375909399183603e-06, + "loss": 23.4261, + "step": 354140 + }, + { + "epoch": 0.7154054064973315, + "grad_norm": 495.43450927734375, + "learning_rate": 2.3756122757030614e-06, + "loss": 16.1308, + "step": 354150 + }, + { + "epoch": 0.7154256071300154, + "grad_norm": 0.0, + "learning_rate": 2.3753151650134655e-06, + "loss": 11.1015, + "step": 354160 + }, + { + "epoch": 0.7154458077626992, + "grad_norm": 213.82225036621094, + "learning_rate": 2.3750180671162656e-06, + "loss": 9.9914, + "step": 354170 + }, + { + "epoch": 0.715466008395383, + "grad_norm": 1957.6202392578125, + "learning_rate": 2.3747209820129117e-06, + "loss": 29.8445, + "step": 354180 + }, + { + "epoch": 0.7154862090280668, + "grad_norm": 1360.60400390625, + "learning_rate": 2.3744239097048465e-06, + "loss": 28.059, + "step": 354190 + }, + { + "epoch": 0.7155064096607505, + "grad_norm": 716.8304443359375, + "learning_rate": 2.3741268501935212e-06, + "loss": 21.6099, + "step": 354200 + }, + { + "epoch": 0.7155266102934343, + "grad_norm": 475.5372009277344, + "learning_rate": 2.373829803480384e-06, + "loss": 10.761, + "step": 354210 + }, + { + "epoch": 0.7155468109261182, + "grad_norm": 5.335785865783691, + "learning_rate": 2.3735327695668823e-06, + "loss": 15.5514, + "step": 354220 + }, + { + "epoch": 0.715567011558802, + "grad_norm": 306.8415832519531, + "learning_rate": 2.3732357484544616e-06, + "loss": 13.4526, + "step": 354230 + }, + { + "epoch": 0.7155872121914858, + "grad_norm": 357.27764892578125, + "learning_rate": 2.372938740144573e-06, + "loss": 19.6147, + "step": 354240 + }, + { + "epoch": 0.7156074128241696, + "grad_norm": 218.32611083984375, + "learning_rate": 2.372641744638662e-06, + "loss": 15.3973, + "step": 354250 + }, + { + "epoch": 0.7156276134568534, + "grad_norm": 593.254638671875, + "learning_rate": 2.3723447619381756e-06, + "loss": 22.3607, + "step": 354260 + }, + { + "epoch": 0.7156478140895373, + "grad_norm": 415.7333068847656, + "learning_rate": 2.3720477920445633e-06, + "loss": 11.2397, + "step": 354270 + }, + { + "epoch": 0.7156680147222211, + "grad_norm": 501.7664794921875, + "learning_rate": 2.3717508349592695e-06, + "loss": 22.7659, + "step": 354280 + }, + { + "epoch": 0.7156882153549049, + "grad_norm": 339.79876708984375, + "learning_rate": 2.3714538906837452e-06, + "loss": 25.0175, + "step": 354290 + }, + { + "epoch": 0.7157084159875887, + "grad_norm": 362.44024658203125, + "learning_rate": 2.3711569592194363e-06, + "loss": 25.5803, + "step": 354300 + }, + { + "epoch": 0.7157286166202725, + "grad_norm": 261.34521484375, + "learning_rate": 2.370860040567787e-06, + "loss": 14.9646, + "step": 354310 + }, + { + "epoch": 0.7157488172529564, + "grad_norm": 75.80406188964844, + "learning_rate": 2.3705631347302492e-06, + "loss": 13.9618, + "step": 354320 + }, + { + "epoch": 0.7157690178856402, + "grad_norm": 484.663818359375, + "learning_rate": 2.3702662417082655e-06, + "loss": 12.7879, + "step": 354330 + }, + { + "epoch": 0.715789218518324, + "grad_norm": 824.4431762695312, + "learning_rate": 2.369969361503288e-06, + "loss": 13.1514, + "step": 354340 + }, + { + "epoch": 0.7158094191510078, + "grad_norm": 326.220947265625, + "learning_rate": 2.3696724941167583e-06, + "loss": 13.5248, + "step": 354350 + }, + { + "epoch": 0.7158296197836916, + "grad_norm": 577.2223510742188, + "learning_rate": 2.369375639550127e-06, + "loss": 15.4694, + "step": 354360 + }, + { + "epoch": 0.7158498204163755, + "grad_norm": 0.0, + "learning_rate": 2.369078797804841e-06, + "loss": 31.32, + "step": 354370 + }, + { + "epoch": 0.7158700210490593, + "grad_norm": 318.8240966796875, + "learning_rate": 2.368781968882343e-06, + "loss": 25.9194, + "step": 354380 + }, + { + "epoch": 0.7158902216817431, + "grad_norm": 249.54876708984375, + "learning_rate": 2.368485152784086e-06, + "loss": 18.2879, + "step": 354390 + }, + { + "epoch": 0.7159104223144269, + "grad_norm": 357.135498046875, + "learning_rate": 2.3681883495115114e-06, + "loss": 17.8518, + "step": 354400 + }, + { + "epoch": 0.7159306229471107, + "grad_norm": 523.0523071289062, + "learning_rate": 2.3678915590660667e-06, + "loss": 12.5804, + "step": 354410 + }, + { + "epoch": 0.7159508235797946, + "grad_norm": 399.6500244140625, + "learning_rate": 2.367594781449199e-06, + "loss": 18.5693, + "step": 354420 + }, + { + "epoch": 0.7159710242124784, + "grad_norm": 665.1788940429688, + "learning_rate": 2.367298016662357e-06, + "loss": 7.4977, + "step": 354430 + }, + { + "epoch": 0.7159912248451622, + "grad_norm": 237.6899871826172, + "learning_rate": 2.3670012647069852e-06, + "loss": 17.5121, + "step": 354440 + }, + { + "epoch": 0.716011425477846, + "grad_norm": 349.6956787109375, + "learning_rate": 2.3667045255845276e-06, + "loss": 18.4566, + "step": 354450 + }, + { + "epoch": 0.7160316261105297, + "grad_norm": 423.8206787109375, + "learning_rate": 2.3664077992964356e-06, + "loss": 11.8242, + "step": 354460 + }, + { + "epoch": 0.7160518267432135, + "grad_norm": 174.9776611328125, + "learning_rate": 2.3661110858441517e-06, + "loss": 10.2079, + "step": 354470 + }, + { + "epoch": 0.7160720273758974, + "grad_norm": 612.3877563476562, + "learning_rate": 2.3658143852291214e-06, + "loss": 16.587, + "step": 354480 + }, + { + "epoch": 0.7160922280085812, + "grad_norm": 282.3558654785156, + "learning_rate": 2.3655176974527922e-06, + "loss": 16.1453, + "step": 354490 + }, + { + "epoch": 0.716112428641265, + "grad_norm": 438.8551940917969, + "learning_rate": 2.3652210225166122e-06, + "loss": 13.2628, + "step": 354500 + }, + { + "epoch": 0.7161326292739488, + "grad_norm": 310.1200256347656, + "learning_rate": 2.364924360422025e-06, + "loss": 20.7244, + "step": 354510 + }, + { + "epoch": 0.7161528299066326, + "grad_norm": 202.9417266845703, + "learning_rate": 2.3646277111704756e-06, + "loss": 25.7473, + "step": 354520 + }, + { + "epoch": 0.7161730305393165, + "grad_norm": 156.94667053222656, + "learning_rate": 2.364331074763413e-06, + "loss": 17.2835, + "step": 354530 + }, + { + "epoch": 0.7161932311720003, + "grad_norm": 327.7087707519531, + "learning_rate": 2.3640344512022807e-06, + "loss": 12.6792, + "step": 354540 + }, + { + "epoch": 0.7162134318046841, + "grad_norm": 524.0986328125, + "learning_rate": 2.3637378404885224e-06, + "loss": 15.3804, + "step": 354550 + }, + { + "epoch": 0.7162336324373679, + "grad_norm": 133.65394592285156, + "learning_rate": 2.3634412426235886e-06, + "loss": 19.5644, + "step": 354560 + }, + { + "epoch": 0.7162538330700517, + "grad_norm": 212.6671600341797, + "learning_rate": 2.3631446576089205e-06, + "loss": 23.5408, + "step": 354570 + }, + { + "epoch": 0.7162740337027356, + "grad_norm": 436.2182922363281, + "learning_rate": 2.362848085445968e-06, + "loss": 17.9776, + "step": 354580 + }, + { + "epoch": 0.7162942343354194, + "grad_norm": 410.3316955566406, + "learning_rate": 2.362551526136173e-06, + "loss": 16.9427, + "step": 354590 + }, + { + "epoch": 0.7163144349681032, + "grad_norm": 126.34648895263672, + "learning_rate": 2.3622549796809807e-06, + "loss": 20.3769, + "step": 354600 + }, + { + "epoch": 0.716334635600787, + "grad_norm": 24.165029525756836, + "learning_rate": 2.3619584460818397e-06, + "loss": 15.8446, + "step": 354610 + }, + { + "epoch": 0.7163548362334708, + "grad_norm": 271.6394958496094, + "learning_rate": 2.3616619253401913e-06, + "loss": 7.4026, + "step": 354620 + }, + { + "epoch": 0.7163750368661547, + "grad_norm": 118.4200668334961, + "learning_rate": 2.361365417457484e-06, + "loss": 14.8887, + "step": 354630 + }, + { + "epoch": 0.7163952374988385, + "grad_norm": 270.2481689453125, + "learning_rate": 2.36106892243516e-06, + "loss": 14.6306, + "step": 354640 + }, + { + "epoch": 0.7164154381315223, + "grad_norm": 660.8930053710938, + "learning_rate": 2.3607724402746685e-06, + "loss": 12.6815, + "step": 354650 + }, + { + "epoch": 0.7164356387642061, + "grad_norm": 606.572021484375, + "learning_rate": 2.3604759709774514e-06, + "loss": 31.5383, + "step": 354660 + }, + { + "epoch": 0.71645583939689, + "grad_norm": 577.8751220703125, + "learning_rate": 2.3601795145449525e-06, + "loss": 14.3754, + "step": 354670 + }, + { + "epoch": 0.7164760400295738, + "grad_norm": 3.1835930347442627, + "learning_rate": 2.3598830709786206e-06, + "loss": 19.7489, + "step": 354680 + }, + { + "epoch": 0.7164962406622576, + "grad_norm": 67.04893493652344, + "learning_rate": 2.3595866402798983e-06, + "loss": 15.8679, + "step": 354690 + }, + { + "epoch": 0.7165164412949414, + "grad_norm": 745.938232421875, + "learning_rate": 2.3592902224502284e-06, + "loss": 13.4207, + "step": 354700 + }, + { + "epoch": 0.7165366419276252, + "grad_norm": 268.83355712890625, + "learning_rate": 2.3589938174910577e-06, + "loss": 19.7198, + "step": 354710 + }, + { + "epoch": 0.7165568425603089, + "grad_norm": 404.059326171875, + "learning_rate": 2.3586974254038347e-06, + "loss": 14.0341, + "step": 354720 + }, + { + "epoch": 0.7165770431929928, + "grad_norm": 218.82638549804688, + "learning_rate": 2.3584010461899966e-06, + "loss": 24.7808, + "step": 354730 + }, + { + "epoch": 0.7165972438256766, + "grad_norm": 342.35198974609375, + "learning_rate": 2.358104679850991e-06, + "loss": 12.6807, + "step": 354740 + }, + { + "epoch": 0.7166174444583604, + "grad_norm": 414.22064208984375, + "learning_rate": 2.357808326388265e-06, + "loss": 18.3535, + "step": 354750 + }, + { + "epoch": 0.7166376450910442, + "grad_norm": 83.78343963623047, + "learning_rate": 2.3575119858032604e-06, + "loss": 30.4336, + "step": 354760 + }, + { + "epoch": 0.716657845723728, + "grad_norm": 486.3948974609375, + "learning_rate": 2.3572156580974205e-06, + "loss": 18.1159, + "step": 354770 + }, + { + "epoch": 0.7166780463564119, + "grad_norm": 1177.810791015625, + "learning_rate": 2.3569193432721904e-06, + "loss": 17.2594, + "step": 354780 + }, + { + "epoch": 0.7166982469890957, + "grad_norm": 466.76275634765625, + "learning_rate": 2.3566230413290186e-06, + "loss": 24.7981, + "step": 354790 + }, + { + "epoch": 0.7167184476217795, + "grad_norm": 403.2004699707031, + "learning_rate": 2.356326752269342e-06, + "loss": 13.501, + "step": 354800 + }, + { + "epoch": 0.7167386482544633, + "grad_norm": 106.48176574707031, + "learning_rate": 2.356030476094608e-06, + "loss": 24.7516, + "step": 354810 + }, + { + "epoch": 0.7167588488871471, + "grad_norm": 349.6374816894531, + "learning_rate": 2.355734212806263e-06, + "loss": 17.2748, + "step": 354820 + }, + { + "epoch": 0.716779049519831, + "grad_norm": 378.016357421875, + "learning_rate": 2.3554379624057485e-06, + "loss": 39.0445, + "step": 354830 + }, + { + "epoch": 0.7167992501525148, + "grad_norm": 666.3162231445312, + "learning_rate": 2.355141724894507e-06, + "loss": 18.3326, + "step": 354840 + }, + { + "epoch": 0.7168194507851986, + "grad_norm": 450.85235595703125, + "learning_rate": 2.354845500273985e-06, + "loss": 32.1428, + "step": 354850 + }, + { + "epoch": 0.7168396514178824, + "grad_norm": 335.59295654296875, + "learning_rate": 2.354549288545626e-06, + "loss": 17.131, + "step": 354860 + }, + { + "epoch": 0.7168598520505662, + "grad_norm": 353.0129089355469, + "learning_rate": 2.354253089710871e-06, + "loss": 28.5203, + "step": 354870 + }, + { + "epoch": 0.7168800526832501, + "grad_norm": 123.41834259033203, + "learning_rate": 2.3539569037711675e-06, + "loss": 16.2803, + "step": 354880 + }, + { + "epoch": 0.7169002533159339, + "grad_norm": 165.06300354003906, + "learning_rate": 2.3536607307279546e-06, + "loss": 5.1878, + "step": 354890 + }, + { + "epoch": 0.7169204539486177, + "grad_norm": 858.8455810546875, + "learning_rate": 2.353364570582681e-06, + "loss": 26.3914, + "step": 354900 + }, + { + "epoch": 0.7169406545813015, + "grad_norm": 135.5835418701172, + "learning_rate": 2.353068423336787e-06, + "loss": 11.9173, + "step": 354910 + }, + { + "epoch": 0.7169608552139853, + "grad_norm": 569.444091796875, + "learning_rate": 2.3527722889917147e-06, + "loss": 20.383, + "step": 354920 + }, + { + "epoch": 0.7169810558466692, + "grad_norm": 332.755615234375, + "learning_rate": 2.352476167548911e-06, + "loss": 10.1888, + "step": 354930 + }, + { + "epoch": 0.717001256479353, + "grad_norm": 100.6562728881836, + "learning_rate": 2.3521800590098153e-06, + "loss": 12.2876, + "step": 354940 + }, + { + "epoch": 0.7170214571120368, + "grad_norm": 173.51876831054688, + "learning_rate": 2.351883963375875e-06, + "loss": 22.0638, + "step": 354950 + }, + { + "epoch": 0.7170416577447206, + "grad_norm": 368.49957275390625, + "learning_rate": 2.3515878806485292e-06, + "loss": 30.4389, + "step": 354960 + }, + { + "epoch": 0.7170618583774043, + "grad_norm": 757.2198486328125, + "learning_rate": 2.351291810829225e-06, + "loss": 17.8596, + "step": 354970 + }, + { + "epoch": 0.7170820590100881, + "grad_norm": 6.089599609375, + "learning_rate": 2.3509957539194028e-06, + "loss": 15.726, + "step": 354980 + }, + { + "epoch": 0.717102259642772, + "grad_norm": 21.2546443939209, + "learning_rate": 2.350699709920504e-06, + "loss": 16.9506, + "step": 354990 + }, + { + "epoch": 0.7171224602754558, + "grad_norm": 328.02374267578125, + "learning_rate": 2.3504036788339763e-06, + "loss": 17.9776, + "step": 355000 + }, + { + "epoch": 0.7171426609081396, + "grad_norm": 270.3284606933594, + "learning_rate": 2.3501076606612587e-06, + "loss": 15.238, + "step": 355010 + }, + { + "epoch": 0.7171628615408234, + "grad_norm": 303.3382263183594, + "learning_rate": 2.3498116554037937e-06, + "loss": 27.3261, + "step": 355020 + }, + { + "epoch": 0.7171830621735072, + "grad_norm": 128.95709228515625, + "learning_rate": 2.349515663063025e-06, + "loss": 11.8511, + "step": 355030 + }, + { + "epoch": 0.7172032628061911, + "grad_norm": 198.04559326171875, + "learning_rate": 2.3492196836403974e-06, + "loss": 15.5393, + "step": 355040 + }, + { + "epoch": 0.7172234634388749, + "grad_norm": 123.14707946777344, + "learning_rate": 2.348923717137352e-06, + "loss": 15.7005, + "step": 355050 + }, + { + "epoch": 0.7172436640715587, + "grad_norm": 261.7399597167969, + "learning_rate": 2.3486277635553282e-06, + "loss": 17.2241, + "step": 355060 + }, + { + "epoch": 0.7172638647042425, + "grad_norm": 516.3812866210938, + "learning_rate": 2.3483318228957734e-06, + "loss": 12.9015, + "step": 355070 + }, + { + "epoch": 0.7172840653369263, + "grad_norm": 69.616455078125, + "learning_rate": 2.348035895160128e-06, + "loss": 10.3203, + "step": 355080 + }, + { + "epoch": 0.7173042659696102, + "grad_norm": 106.06146240234375, + "learning_rate": 2.347739980349831e-06, + "loss": 12.7461, + "step": 355090 + }, + { + "epoch": 0.717324466602294, + "grad_norm": 972.2819213867188, + "learning_rate": 2.3474440784663287e-06, + "loss": 23.5822, + "step": 355100 + }, + { + "epoch": 0.7173446672349778, + "grad_norm": 239.9462890625, + "learning_rate": 2.3471481895110643e-06, + "loss": 17.7041, + "step": 355110 + }, + { + "epoch": 0.7173648678676616, + "grad_norm": 480.9305114746094, + "learning_rate": 2.346852313485477e-06, + "loss": 42.3706, + "step": 355120 + }, + { + "epoch": 0.7173850685003454, + "grad_norm": 544.0484619140625, + "learning_rate": 2.346556450391009e-06, + "loss": 16.3635, + "step": 355130 + }, + { + "epoch": 0.7174052691330293, + "grad_norm": 444.17205810546875, + "learning_rate": 2.346260600229104e-06, + "loss": 17.6372, + "step": 355140 + }, + { + "epoch": 0.7174254697657131, + "grad_norm": 105.86084747314453, + "learning_rate": 2.3459647630012026e-06, + "loss": 20.5306, + "step": 355150 + }, + { + "epoch": 0.7174456703983969, + "grad_norm": 58.388790130615234, + "learning_rate": 2.345668938708746e-06, + "loss": 17.3686, + "step": 355160 + }, + { + "epoch": 0.7174658710310807, + "grad_norm": 141.6204071044922, + "learning_rate": 2.345373127353179e-06, + "loss": 10.0835, + "step": 355170 + }, + { + "epoch": 0.7174860716637645, + "grad_norm": 544.3477172851562, + "learning_rate": 2.345077328935939e-06, + "loss": 24.7715, + "step": 355180 + }, + { + "epoch": 0.7175062722964484, + "grad_norm": 74.02786254882812, + "learning_rate": 2.344781543458472e-06, + "loss": 18.9166, + "step": 355190 + }, + { + "epoch": 0.7175264729291322, + "grad_norm": 59.02185821533203, + "learning_rate": 2.344485770922218e-06, + "loss": 14.772, + "step": 355200 + }, + { + "epoch": 0.717546673561816, + "grad_norm": 837.1063842773438, + "learning_rate": 2.3441900113286164e-06, + "loss": 31.2862, + "step": 355210 + }, + { + "epoch": 0.7175668741944998, + "grad_norm": 478.86492919921875, + "learning_rate": 2.3438942646791125e-06, + "loss": 22.5689, + "step": 355220 + }, + { + "epoch": 0.7175870748271835, + "grad_norm": 133.06336975097656, + "learning_rate": 2.3435985309751436e-06, + "loss": 29.2806, + "step": 355230 + }, + { + "epoch": 0.7176072754598674, + "grad_norm": 161.58766174316406, + "learning_rate": 2.3433028102181553e-06, + "loss": 24.9607, + "step": 355240 + }, + { + "epoch": 0.7176274760925512, + "grad_norm": 725.4195556640625, + "learning_rate": 2.3430071024095853e-06, + "loss": 25.7237, + "step": 355250 + }, + { + "epoch": 0.717647676725235, + "grad_norm": 288.8263244628906, + "learning_rate": 2.3427114075508776e-06, + "loss": 19.4521, + "step": 355260 + }, + { + "epoch": 0.7176678773579188, + "grad_norm": 556.9649047851562, + "learning_rate": 2.342415725643473e-06, + "loss": 17.8697, + "step": 355270 + }, + { + "epoch": 0.7176880779906026, + "grad_norm": 261.4717102050781, + "learning_rate": 2.3421200566888096e-06, + "loss": 10.0185, + "step": 355280 + }, + { + "epoch": 0.7177082786232865, + "grad_norm": 201.7537384033203, + "learning_rate": 2.341824400688333e-06, + "loss": 22.7394, + "step": 355290 + }, + { + "epoch": 0.7177284792559703, + "grad_norm": 531.0883178710938, + "learning_rate": 2.3415287576434807e-06, + "loss": 19.9155, + "step": 355300 + }, + { + "epoch": 0.7177486798886541, + "grad_norm": 435.77392578125, + "learning_rate": 2.3412331275556936e-06, + "loss": 37.4234, + "step": 355310 + }, + { + "epoch": 0.7177688805213379, + "grad_norm": 218.02395629882812, + "learning_rate": 2.3409375104264144e-06, + "loss": 33.8755, + "step": 355320 + }, + { + "epoch": 0.7177890811540217, + "grad_norm": 130.5308837890625, + "learning_rate": 2.340641906257086e-06, + "loss": 16.9654, + "step": 355330 + }, + { + "epoch": 0.7178092817867056, + "grad_norm": 613.1275634765625, + "learning_rate": 2.3403463150491434e-06, + "loss": 15.5211, + "step": 355340 + }, + { + "epoch": 0.7178294824193894, + "grad_norm": 270.5167236328125, + "learning_rate": 2.34005073680403e-06, + "loss": 17.8015, + "step": 355350 + }, + { + "epoch": 0.7178496830520732, + "grad_norm": 260.85516357421875, + "learning_rate": 2.3397551715231887e-06, + "loss": 18.6767, + "step": 355360 + }, + { + "epoch": 0.717869883684757, + "grad_norm": 1112.9285888671875, + "learning_rate": 2.3394596192080575e-06, + "loss": 23.5197, + "step": 355370 + }, + { + "epoch": 0.7178900843174408, + "grad_norm": 546.1166381835938, + "learning_rate": 2.3391640798600762e-06, + "loss": 20.2109, + "step": 355380 + }, + { + "epoch": 0.7179102849501247, + "grad_norm": 433.3735046386719, + "learning_rate": 2.3388685534806865e-06, + "loss": 18.0769, + "step": 355390 + }, + { + "epoch": 0.7179304855828085, + "grad_norm": 316.08782958984375, + "learning_rate": 2.338573040071332e-06, + "loss": 20.6479, + "step": 355400 + }, + { + "epoch": 0.7179506862154923, + "grad_norm": 340.77081298828125, + "learning_rate": 2.338277539633446e-06, + "loss": 20.0103, + "step": 355410 + }, + { + "epoch": 0.7179708868481761, + "grad_norm": 376.880126953125, + "learning_rate": 2.3379820521684727e-06, + "loss": 25.7699, + "step": 355420 + }, + { + "epoch": 0.7179910874808599, + "grad_norm": 290.0224304199219, + "learning_rate": 2.337686577677854e-06, + "loss": 12.8468, + "step": 355430 + }, + { + "epoch": 0.7180112881135438, + "grad_norm": 1088.810302734375, + "learning_rate": 2.3373911161630274e-06, + "loss": 26.5678, + "step": 355440 + }, + { + "epoch": 0.7180314887462276, + "grad_norm": 246.1015167236328, + "learning_rate": 2.3370956676254327e-06, + "loss": 25.9341, + "step": 355450 + }, + { + "epoch": 0.7180516893789114, + "grad_norm": 282.4861755371094, + "learning_rate": 2.3368002320665118e-06, + "loss": 15.3598, + "step": 355460 + }, + { + "epoch": 0.7180718900115952, + "grad_norm": 746.9185180664062, + "learning_rate": 2.3365048094877033e-06, + "loss": 25.822, + "step": 355470 + }, + { + "epoch": 0.7180920906442789, + "grad_norm": 1026.7742919921875, + "learning_rate": 2.336209399890446e-06, + "loss": 29.8923, + "step": 355480 + }, + { + "epoch": 0.7181122912769627, + "grad_norm": 192.3336639404297, + "learning_rate": 2.3359140032761827e-06, + "loss": 16.082, + "step": 355490 + }, + { + "epoch": 0.7181324919096466, + "grad_norm": 365.6981506347656, + "learning_rate": 2.3356186196463497e-06, + "loss": 24.8557, + "step": 355500 + }, + { + "epoch": 0.7181526925423304, + "grad_norm": 964.8790283203125, + "learning_rate": 2.335323249002391e-06, + "loss": 17.6356, + "step": 355510 + }, + { + "epoch": 0.7181728931750142, + "grad_norm": 198.53807067871094, + "learning_rate": 2.3350278913457404e-06, + "loss": 14.7058, + "step": 355520 + }, + { + "epoch": 0.718193093807698, + "grad_norm": 631.6527099609375, + "learning_rate": 2.334732546677843e-06, + "loss": 19.1467, + "step": 355530 + }, + { + "epoch": 0.7182132944403818, + "grad_norm": 717.8720092773438, + "learning_rate": 2.334437215000136e-06, + "loss": 18.5422, + "step": 355540 + }, + { + "epoch": 0.7182334950730657, + "grad_norm": 925.7899169921875, + "learning_rate": 2.334141896314057e-06, + "loss": 15.6616, + "step": 355550 + }, + { + "epoch": 0.7182536957057495, + "grad_norm": 651.7544555664062, + "learning_rate": 2.333846590621049e-06, + "loss": 27.0347, + "step": 355560 + }, + { + "epoch": 0.7182738963384333, + "grad_norm": 57.169681549072266, + "learning_rate": 2.333551297922547e-06, + "loss": 22.672, + "step": 355570 + }, + { + "epoch": 0.7182940969711171, + "grad_norm": 371.8760070800781, + "learning_rate": 2.333256018219995e-06, + "loss": 14.3969, + "step": 355580 + }, + { + "epoch": 0.7183142976038009, + "grad_norm": 2.247293472290039, + "learning_rate": 2.3329607515148287e-06, + "loss": 28.4106, + "step": 355590 + }, + { + "epoch": 0.7183344982364848, + "grad_norm": 368.8210144042969, + "learning_rate": 2.3326654978084872e-06, + "loss": 20.3566, + "step": 355600 + }, + { + "epoch": 0.7183546988691686, + "grad_norm": 92.35023498535156, + "learning_rate": 2.3323702571024125e-06, + "loss": 15.0502, + "step": 355610 + }, + { + "epoch": 0.7183748995018524, + "grad_norm": 412.3029479980469, + "learning_rate": 2.3320750293980416e-06, + "loss": 25.6768, + "step": 355620 + }, + { + "epoch": 0.7183951001345362, + "grad_norm": 662.3887939453125, + "learning_rate": 2.3317798146968113e-06, + "loss": 22.7219, + "step": 355630 + }, + { + "epoch": 0.71841530076722, + "grad_norm": 241.14309692382812, + "learning_rate": 2.3314846130001622e-06, + "loss": 13.5183, + "step": 355640 + }, + { + "epoch": 0.7184355013999039, + "grad_norm": 408.4259948730469, + "learning_rate": 2.3311894243095363e-06, + "loss": 13.5035, + "step": 355650 + }, + { + "epoch": 0.7184557020325877, + "grad_norm": 262.0238342285156, + "learning_rate": 2.3308942486263685e-06, + "loss": 16.8217, + "step": 355660 + }, + { + "epoch": 0.7184759026652715, + "grad_norm": 180.18206787109375, + "learning_rate": 2.3305990859520974e-06, + "loss": 12.8303, + "step": 355670 + }, + { + "epoch": 0.7184961032979553, + "grad_norm": 157.4781494140625, + "learning_rate": 2.3303039362881634e-06, + "loss": 20.4091, + "step": 355680 + }, + { + "epoch": 0.7185163039306391, + "grad_norm": 311.36297607421875, + "learning_rate": 2.3300087996360053e-06, + "loss": 21.4781, + "step": 355690 + }, + { + "epoch": 0.718536504563323, + "grad_norm": 801.7392578125, + "learning_rate": 2.329713675997058e-06, + "loss": 22.1854, + "step": 355700 + }, + { + "epoch": 0.7185567051960068, + "grad_norm": 546.9130859375, + "learning_rate": 2.3294185653727623e-06, + "loss": 36.6917, + "step": 355710 + }, + { + "epoch": 0.7185769058286906, + "grad_norm": 107.47648620605469, + "learning_rate": 2.329123467764559e-06, + "loss": 8.9922, + "step": 355720 + }, + { + "epoch": 0.7185971064613744, + "grad_norm": 405.75994873046875, + "learning_rate": 2.3288283831738834e-06, + "loss": 23.6942, + "step": 355730 + }, + { + "epoch": 0.7186173070940581, + "grad_norm": 392.80364990234375, + "learning_rate": 2.328533311602173e-06, + "loss": 25.28, + "step": 355740 + }, + { + "epoch": 0.718637507726742, + "grad_norm": 146.98622131347656, + "learning_rate": 2.3282382530508683e-06, + "loss": 17.9209, + "step": 355750 + }, + { + "epoch": 0.7186577083594258, + "grad_norm": 250.06829833984375, + "learning_rate": 2.327943207521407e-06, + "loss": 18.7913, + "step": 355760 + }, + { + "epoch": 0.7186779089921096, + "grad_norm": 395.7095947265625, + "learning_rate": 2.3276481750152245e-06, + "loss": 14.4532, + "step": 355770 + }, + { + "epoch": 0.7186981096247934, + "grad_norm": 83.01255798339844, + "learning_rate": 2.3273531555337624e-06, + "loss": 17.7567, + "step": 355780 + }, + { + "epoch": 0.7187183102574772, + "grad_norm": 613.611572265625, + "learning_rate": 2.327058149078455e-06, + "loss": 20.1002, + "step": 355790 + }, + { + "epoch": 0.718738510890161, + "grad_norm": 208.4181365966797, + "learning_rate": 2.3267631556507443e-06, + "loss": 22.4576, + "step": 355800 + }, + { + "epoch": 0.7187587115228449, + "grad_norm": 419.4081726074219, + "learning_rate": 2.326468175252065e-06, + "loss": 17.6618, + "step": 355810 + }, + { + "epoch": 0.7187789121555287, + "grad_norm": 224.1723175048828, + "learning_rate": 2.326173207883854e-06, + "loss": 21.159, + "step": 355820 + }, + { + "epoch": 0.7187991127882125, + "grad_norm": 557.3604125976562, + "learning_rate": 2.325878253547552e-06, + "loss": 20.6774, + "step": 355830 + }, + { + "epoch": 0.7188193134208963, + "grad_norm": 222.83775329589844, + "learning_rate": 2.3255833122445937e-06, + "loss": 22.6103, + "step": 355840 + }, + { + "epoch": 0.7188395140535802, + "grad_norm": 745.8064575195312, + "learning_rate": 2.32528838397642e-06, + "loss": 23.5769, + "step": 355850 + }, + { + "epoch": 0.718859714686264, + "grad_norm": 553.0786743164062, + "learning_rate": 2.3249934687444642e-06, + "loss": 19.6189, + "step": 355860 + }, + { + "epoch": 0.7188799153189478, + "grad_norm": 283.5133056640625, + "learning_rate": 2.3246985665501674e-06, + "loss": 22.1916, + "step": 355870 + }, + { + "epoch": 0.7189001159516316, + "grad_norm": 569.7648315429688, + "learning_rate": 2.3244036773949658e-06, + "loss": 20.4876, + "step": 355880 + }, + { + "epoch": 0.7189203165843154, + "grad_norm": 474.4637451171875, + "learning_rate": 2.3241088012802938e-06, + "loss": 15.6569, + "step": 355890 + }, + { + "epoch": 0.7189405172169993, + "grad_norm": 564.4053955078125, + "learning_rate": 2.323813938207593e-06, + "loss": 17.7916, + "step": 355900 + }, + { + "epoch": 0.7189607178496831, + "grad_norm": 130.00132751464844, + "learning_rate": 2.323519088178299e-06, + "loss": 17.8169, + "step": 355910 + }, + { + "epoch": 0.7189809184823669, + "grad_norm": 610.6986083984375, + "learning_rate": 2.3232242511938452e-06, + "loss": 20.7983, + "step": 355920 + }, + { + "epoch": 0.7190011191150507, + "grad_norm": 3521.993896484375, + "learning_rate": 2.322929427255673e-06, + "loss": 53.2133, + "step": 355930 + }, + { + "epoch": 0.7190213197477345, + "grad_norm": 74.32054901123047, + "learning_rate": 2.3226346163652213e-06, + "loss": 15.5488, + "step": 355940 + }, + { + "epoch": 0.7190415203804184, + "grad_norm": 316.9515380859375, + "learning_rate": 2.32233981852392e-06, + "loss": 17.8092, + "step": 355950 + }, + { + "epoch": 0.7190617210131022, + "grad_norm": 1.8933401107788086, + "learning_rate": 2.3220450337332097e-06, + "loss": 14.9994, + "step": 355960 + }, + { + "epoch": 0.719081921645786, + "grad_norm": 56.94375228881836, + "learning_rate": 2.321750261994529e-06, + "loss": 13.7345, + "step": 355970 + }, + { + "epoch": 0.7191021222784698, + "grad_norm": 478.0467834472656, + "learning_rate": 2.321455503309313e-06, + "loss": 11.1437, + "step": 355980 + }, + { + "epoch": 0.7191223229111535, + "grad_norm": 171.91444396972656, + "learning_rate": 2.3211607576789958e-06, + "loss": 17.4037, + "step": 355990 + }, + { + "epoch": 0.7191425235438373, + "grad_norm": 87.4278335571289, + "learning_rate": 2.320866025105016e-06, + "loss": 20.4756, + "step": 356000 + }, + { + "epoch": 0.7191627241765212, + "grad_norm": 380.328125, + "learning_rate": 2.320571305588814e-06, + "loss": 22.9658, + "step": 356010 + }, + { + "epoch": 0.719182924809205, + "grad_norm": 368.05047607421875, + "learning_rate": 2.3202765991318195e-06, + "loss": 11.3421, + "step": 356020 + }, + { + "epoch": 0.7192031254418888, + "grad_norm": 47.237091064453125, + "learning_rate": 2.3199819057354712e-06, + "loss": 23.8725, + "step": 356030 + }, + { + "epoch": 0.7192233260745726, + "grad_norm": 213.0360565185547, + "learning_rate": 2.3196872254012075e-06, + "loss": 30.4471, + "step": 356040 + }, + { + "epoch": 0.7192435267072564, + "grad_norm": 340.695556640625, + "learning_rate": 2.319392558130464e-06, + "loss": 23.5536, + "step": 356050 + }, + { + "epoch": 0.7192637273399403, + "grad_norm": 348.7161865234375, + "learning_rate": 2.3190979039246738e-06, + "loss": 25.9111, + "step": 356060 + }, + { + "epoch": 0.7192839279726241, + "grad_norm": 517.1141357421875, + "learning_rate": 2.318803262785277e-06, + "loss": 27.4346, + "step": 356070 + }, + { + "epoch": 0.7193041286053079, + "grad_norm": 408.94342041015625, + "learning_rate": 2.318508634713708e-06, + "loss": 18.317, + "step": 356080 + }, + { + "epoch": 0.7193243292379917, + "grad_norm": 936.9793090820312, + "learning_rate": 2.318214019711401e-06, + "loss": 20.7434, + "step": 356090 + }, + { + "epoch": 0.7193445298706755, + "grad_norm": 680.93896484375, + "learning_rate": 2.3179194177797954e-06, + "loss": 12.71, + "step": 356100 + }, + { + "epoch": 0.7193647305033594, + "grad_norm": 309.80169677734375, + "learning_rate": 2.3176248289203237e-06, + "loss": 21.2214, + "step": 356110 + }, + { + "epoch": 0.7193849311360432, + "grad_norm": 296.0082702636719, + "learning_rate": 2.3173302531344243e-06, + "loss": 17.1504, + "step": 356120 + }, + { + "epoch": 0.719405131768727, + "grad_norm": 426.7873840332031, + "learning_rate": 2.317035690423531e-06, + "loss": 22.7068, + "step": 356130 + }, + { + "epoch": 0.7194253324014108, + "grad_norm": 324.6080627441406, + "learning_rate": 2.3167411407890814e-06, + "loss": 22.9607, + "step": 356140 + }, + { + "epoch": 0.7194455330340946, + "grad_norm": 696.2737426757812, + "learning_rate": 2.3164466042325106e-06, + "loss": 17.5741, + "step": 356150 + }, + { + "epoch": 0.7194657336667785, + "grad_norm": 359.4756774902344, + "learning_rate": 2.316152080755251e-06, + "loss": 22.4907, + "step": 356160 + }, + { + "epoch": 0.7194859342994623, + "grad_norm": 251.14215087890625, + "learning_rate": 2.315857570358743e-06, + "loss": 16.7082, + "step": 356170 + }, + { + "epoch": 0.7195061349321461, + "grad_norm": 295.7928161621094, + "learning_rate": 2.3155630730444185e-06, + "loss": 11.9237, + "step": 356180 + }, + { + "epoch": 0.7195263355648299, + "grad_norm": 336.5198974609375, + "learning_rate": 2.315268588813715e-06, + "loss": 10.258, + "step": 356190 + }, + { + "epoch": 0.7195465361975137, + "grad_norm": 1.7450218200683594, + "learning_rate": 2.3149741176680666e-06, + "loss": 8.8111, + "step": 356200 + }, + { + "epoch": 0.7195667368301976, + "grad_norm": 376.4080810546875, + "learning_rate": 2.314679659608907e-06, + "loss": 34.4667, + "step": 356210 + }, + { + "epoch": 0.7195869374628814, + "grad_norm": 273.2095031738281, + "learning_rate": 2.3143852146376754e-06, + "loss": 30.1214, + "step": 356220 + }, + { + "epoch": 0.7196071380955652, + "grad_norm": 1135.8089599609375, + "learning_rate": 2.314090782755804e-06, + "loss": 16.1298, + "step": 356230 + }, + { + "epoch": 0.719627338728249, + "grad_norm": 1103.915283203125, + "learning_rate": 2.313796363964727e-06, + "loss": 38.4009, + "step": 356240 + }, + { + "epoch": 0.7196475393609327, + "grad_norm": 333.146728515625, + "learning_rate": 2.3135019582658803e-06, + "loss": 20.5211, + "step": 356250 + }, + { + "epoch": 0.7196677399936166, + "grad_norm": 269.69256591796875, + "learning_rate": 2.3132075656607034e-06, + "loss": 11.4717, + "step": 356260 + }, + { + "epoch": 0.7196879406263004, + "grad_norm": 143.51089477539062, + "learning_rate": 2.3129131861506225e-06, + "loss": 25.2597, + "step": 356270 + }, + { + "epoch": 0.7197081412589842, + "grad_norm": 412.6779479980469, + "learning_rate": 2.3126188197370773e-06, + "loss": 15.8539, + "step": 356280 + }, + { + "epoch": 0.719728341891668, + "grad_norm": 358.5265808105469, + "learning_rate": 2.312324466421504e-06, + "loss": 28.5921, + "step": 356290 + }, + { + "epoch": 0.7197485425243518, + "grad_norm": 256.109375, + "learning_rate": 2.312030126205335e-06, + "loss": 8.2917, + "step": 356300 + }, + { + "epoch": 0.7197687431570357, + "grad_norm": 477.0332336425781, + "learning_rate": 2.3117357990900034e-06, + "loss": 27.9492, + "step": 356310 + }, + { + "epoch": 0.7197889437897195, + "grad_norm": 81.21957397460938, + "learning_rate": 2.3114414850769458e-06, + "loss": 14.0278, + "step": 356320 + }, + { + "epoch": 0.7198091444224033, + "grad_norm": 209.64715576171875, + "learning_rate": 2.3111471841675993e-06, + "loss": 26.2375, + "step": 356330 + }, + { + "epoch": 0.7198293450550871, + "grad_norm": 479.06219482421875, + "learning_rate": 2.310852896363392e-06, + "loss": 12.472, + "step": 356340 + }, + { + "epoch": 0.7198495456877709, + "grad_norm": 509.81658935546875, + "learning_rate": 2.3105586216657616e-06, + "loss": 27.761, + "step": 356350 + }, + { + "epoch": 0.7198697463204548, + "grad_norm": 616.83984375, + "learning_rate": 2.3102643600761445e-06, + "loss": 38.2269, + "step": 356360 + }, + { + "epoch": 0.7198899469531386, + "grad_norm": 180.7018585205078, + "learning_rate": 2.3099701115959715e-06, + "loss": 10.2843, + "step": 356370 + }, + { + "epoch": 0.7199101475858224, + "grad_norm": 259.7471618652344, + "learning_rate": 2.309675876226677e-06, + "loss": 12.6841, + "step": 356380 + }, + { + "epoch": 0.7199303482185062, + "grad_norm": 483.06158447265625, + "learning_rate": 2.309381653969698e-06, + "loss": 32.0829, + "step": 356390 + }, + { + "epoch": 0.71995054885119, + "grad_norm": 139.95729064941406, + "learning_rate": 2.309087444826464e-06, + "loss": 10.3258, + "step": 356400 + }, + { + "epoch": 0.7199707494838739, + "grad_norm": 650.1048583984375, + "learning_rate": 2.308793248798414e-06, + "loss": 20.2942, + "step": 356410 + }, + { + "epoch": 0.7199909501165577, + "grad_norm": 389.9107971191406, + "learning_rate": 2.308499065886978e-06, + "loss": 24.8322, + "step": 356420 + }, + { + "epoch": 0.7200111507492415, + "grad_norm": 207.32176208496094, + "learning_rate": 2.3082048960935905e-06, + "loss": 8.6192, + "step": 356430 + }, + { + "epoch": 0.7200313513819253, + "grad_norm": 474.74658203125, + "learning_rate": 2.3079107394196875e-06, + "loss": 6.9978, + "step": 356440 + }, + { + "epoch": 0.7200515520146091, + "grad_norm": 248.63880920410156, + "learning_rate": 2.3076165958666992e-06, + "loss": 9.2451, + "step": 356450 + }, + { + "epoch": 0.720071752647293, + "grad_norm": 478.6060791015625, + "learning_rate": 2.3073224654360627e-06, + "loss": 20.6712, + "step": 356460 + }, + { + "epoch": 0.7200919532799768, + "grad_norm": 191.9722900390625, + "learning_rate": 2.3070283481292077e-06, + "loss": 12.5131, + "step": 356470 + }, + { + "epoch": 0.7201121539126606, + "grad_norm": 671.8963623046875, + "learning_rate": 2.306734243947572e-06, + "loss": 25.6819, + "step": 356480 + }, + { + "epoch": 0.7201323545453444, + "grad_norm": 217.21632385253906, + "learning_rate": 2.3064401528925872e-06, + "loss": 14.4012, + "step": 356490 + }, + { + "epoch": 0.7201525551780282, + "grad_norm": 164.06991577148438, + "learning_rate": 2.3061460749656844e-06, + "loss": 12.9598, + "step": 356500 + }, + { + "epoch": 0.7201727558107119, + "grad_norm": 129.89657592773438, + "learning_rate": 2.3058520101683012e-06, + "loss": 26.4054, + "step": 356510 + }, + { + "epoch": 0.7201929564433958, + "grad_norm": 463.0583190917969, + "learning_rate": 2.3055579585018685e-06, + "loss": 19.5261, + "step": 356520 + }, + { + "epoch": 0.7202131570760796, + "grad_norm": 328.9757995605469, + "learning_rate": 2.3052639199678167e-06, + "loss": 13.6301, + "step": 356530 + }, + { + "epoch": 0.7202333577087634, + "grad_norm": 440.01763916015625, + "learning_rate": 2.3049698945675826e-06, + "loss": 13.7399, + "step": 356540 + }, + { + "epoch": 0.7202535583414472, + "grad_norm": 149.9425811767578, + "learning_rate": 2.3046758823026018e-06, + "loss": 14.2513, + "step": 356550 + }, + { + "epoch": 0.720273758974131, + "grad_norm": 842.3128662109375, + "learning_rate": 2.3043818831743003e-06, + "loss": 14.4759, + "step": 356560 + }, + { + "epoch": 0.7202939596068149, + "grad_norm": 432.9392395019531, + "learning_rate": 2.304087897184114e-06, + "loss": 24.7468, + "step": 356570 + }, + { + "epoch": 0.7203141602394987, + "grad_norm": 89.6991958618164, + "learning_rate": 2.303793924333479e-06, + "loss": 23.3948, + "step": 356580 + }, + { + "epoch": 0.7203343608721825, + "grad_norm": 412.6732482910156, + "learning_rate": 2.303499964623825e-06, + "loss": 17.0822, + "step": 356590 + }, + { + "epoch": 0.7203545615048663, + "grad_norm": 328.019287109375, + "learning_rate": 2.303206018056583e-06, + "loss": 13.4234, + "step": 356600 + }, + { + "epoch": 0.7203747621375501, + "grad_norm": 299.4187927246094, + "learning_rate": 2.3029120846331883e-06, + "loss": 16.7432, + "step": 356610 + }, + { + "epoch": 0.720394962770234, + "grad_norm": 35.62937927246094, + "learning_rate": 2.3026181643550767e-06, + "loss": 25.7069, + "step": 356620 + }, + { + "epoch": 0.7204151634029178, + "grad_norm": 350.29095458984375, + "learning_rate": 2.3023242572236728e-06, + "loss": 21.3234, + "step": 356630 + }, + { + "epoch": 0.7204353640356016, + "grad_norm": 261.7138671875, + "learning_rate": 2.3020303632404132e-06, + "loss": 15.6534, + "step": 356640 + }, + { + "epoch": 0.7204555646682854, + "grad_norm": 1417.159423828125, + "learning_rate": 2.301736482406733e-06, + "loss": 26.2665, + "step": 356650 + }, + { + "epoch": 0.7204757653009692, + "grad_norm": 655.8534545898438, + "learning_rate": 2.3014426147240614e-06, + "loss": 12.1236, + "step": 356660 + }, + { + "epoch": 0.7204959659336531, + "grad_norm": 349.3835754394531, + "learning_rate": 2.3011487601938292e-06, + "loss": 16.5535, + "step": 356670 + }, + { + "epoch": 0.7205161665663369, + "grad_norm": 194.57681274414062, + "learning_rate": 2.3008549188174728e-06, + "loss": 17.0723, + "step": 356680 + }, + { + "epoch": 0.7205363671990207, + "grad_norm": 254.3366241455078, + "learning_rate": 2.300561090596422e-06, + "loss": 13.1158, + "step": 356690 + }, + { + "epoch": 0.7205565678317045, + "grad_norm": 190.81695556640625, + "learning_rate": 2.3002672755321076e-06, + "loss": 15.1789, + "step": 356700 + }, + { + "epoch": 0.7205767684643883, + "grad_norm": 307.341064453125, + "learning_rate": 2.2999734736259644e-06, + "loss": 12.2914, + "step": 356710 + }, + { + "epoch": 0.7205969690970722, + "grad_norm": 644.7877807617188, + "learning_rate": 2.299679684879421e-06, + "loss": 31.196, + "step": 356720 + }, + { + "epoch": 0.720617169729756, + "grad_norm": 31.456466674804688, + "learning_rate": 2.2993859092939136e-06, + "loss": 13.6381, + "step": 356730 + }, + { + "epoch": 0.7206373703624398, + "grad_norm": 724.866455078125, + "learning_rate": 2.29909214687087e-06, + "loss": 17.2696, + "step": 356740 + }, + { + "epoch": 0.7206575709951236, + "grad_norm": 454.0261535644531, + "learning_rate": 2.298798397611725e-06, + "loss": 18.3462, + "step": 356750 + }, + { + "epoch": 0.7206777716278073, + "grad_norm": 8.363052368164062, + "learning_rate": 2.2985046615179098e-06, + "loss": 4.4754, + "step": 356760 + }, + { + "epoch": 0.7206979722604911, + "grad_norm": 264.83489990234375, + "learning_rate": 2.2982109385908524e-06, + "loss": 15.6975, + "step": 356770 + }, + { + "epoch": 0.720718172893175, + "grad_norm": 393.46734619140625, + "learning_rate": 2.29791722883199e-06, + "loss": 18.1938, + "step": 356780 + }, + { + "epoch": 0.7207383735258588, + "grad_norm": 801.3164672851562, + "learning_rate": 2.2976235322427487e-06, + "loss": 41.4712, + "step": 356790 + }, + { + "epoch": 0.7207585741585426, + "grad_norm": 150.0250244140625, + "learning_rate": 2.297329848824565e-06, + "loss": 8.4385, + "step": 356800 + }, + { + "epoch": 0.7207787747912264, + "grad_norm": 398.1314697265625, + "learning_rate": 2.2970361785788673e-06, + "loss": 17.7207, + "step": 356810 + }, + { + "epoch": 0.7207989754239102, + "grad_norm": 170.1879119873047, + "learning_rate": 2.296742521507086e-06, + "loss": 9.9429, + "step": 356820 + }, + { + "epoch": 0.7208191760565941, + "grad_norm": 645.7498779296875, + "learning_rate": 2.296448877610655e-06, + "loss": 18.6266, + "step": 356830 + }, + { + "epoch": 0.7208393766892779, + "grad_norm": 266.1501159667969, + "learning_rate": 2.2961552468910048e-06, + "loss": 16.7124, + "step": 356840 + }, + { + "epoch": 0.7208595773219617, + "grad_norm": 222.91485595703125, + "learning_rate": 2.295861629349564e-06, + "loss": 14.8486, + "step": 356850 + }, + { + "epoch": 0.7208797779546455, + "grad_norm": 614.7936401367188, + "learning_rate": 2.295568024987766e-06, + "loss": 42.9427, + "step": 356860 + }, + { + "epoch": 0.7208999785873293, + "grad_norm": 453.291015625, + "learning_rate": 2.2952744338070447e-06, + "loss": 18.9652, + "step": 356870 + }, + { + "epoch": 0.7209201792200132, + "grad_norm": 555.583984375, + "learning_rate": 2.2949808558088243e-06, + "loss": 21.7545, + "step": 356880 + }, + { + "epoch": 0.720940379852697, + "grad_norm": 117.07242584228516, + "learning_rate": 2.2946872909945387e-06, + "loss": 64.4816, + "step": 356890 + }, + { + "epoch": 0.7209605804853808, + "grad_norm": 117.32300567626953, + "learning_rate": 2.294393739365621e-06, + "loss": 31.0427, + "step": 356900 + }, + { + "epoch": 0.7209807811180646, + "grad_norm": 314.8080139160156, + "learning_rate": 2.294100200923501e-06, + "loss": 21.1378, + "step": 356910 + }, + { + "epoch": 0.7210009817507484, + "grad_norm": 287.3998107910156, + "learning_rate": 2.293806675669606e-06, + "loss": 18.6618, + "step": 356920 + }, + { + "epoch": 0.7210211823834323, + "grad_norm": 179.3436737060547, + "learning_rate": 2.2935131636053687e-06, + "loss": 12.9096, + "step": 356930 + }, + { + "epoch": 0.7210413830161161, + "grad_norm": 181.98316955566406, + "learning_rate": 2.293219664732224e-06, + "loss": 18.5185, + "step": 356940 + }, + { + "epoch": 0.7210615836487999, + "grad_norm": 148.46475219726562, + "learning_rate": 2.2929261790515944e-06, + "loss": 21.5742, + "step": 356950 + }, + { + "epoch": 0.7210817842814837, + "grad_norm": 298.6468200683594, + "learning_rate": 2.2926327065649144e-06, + "loss": 9.8013, + "step": 356960 + }, + { + "epoch": 0.7211019849141675, + "grad_norm": 2026.063232421875, + "learning_rate": 2.292339247273617e-06, + "loss": 35.6442, + "step": 356970 + }, + { + "epoch": 0.7211221855468514, + "grad_norm": 202.90513610839844, + "learning_rate": 2.2920458011791287e-06, + "loss": 19.6064, + "step": 356980 + }, + { + "epoch": 0.7211423861795352, + "grad_norm": 276.17608642578125, + "learning_rate": 2.291752368282879e-06, + "loss": 19.9596, + "step": 356990 + }, + { + "epoch": 0.721162586812219, + "grad_norm": 287.6712951660156, + "learning_rate": 2.2914589485863015e-06, + "loss": 27.0526, + "step": 357000 + }, + { + "epoch": 0.7211827874449028, + "grad_norm": 560.3438110351562, + "learning_rate": 2.2911655420908247e-06, + "loss": 19.6648, + "step": 357010 + }, + { + "epoch": 0.7212029880775865, + "grad_norm": 320.8586120605469, + "learning_rate": 2.290872148797877e-06, + "loss": 25.0655, + "step": 357020 + }, + { + "epoch": 0.7212231887102704, + "grad_norm": 209.60411071777344, + "learning_rate": 2.2905787687088914e-06, + "loss": 14.534, + "step": 357030 + }, + { + "epoch": 0.7212433893429542, + "grad_norm": 598.6663208007812, + "learning_rate": 2.2902854018252945e-06, + "loss": 17.7608, + "step": 357040 + }, + { + "epoch": 0.721263589975638, + "grad_norm": 455.5414123535156, + "learning_rate": 2.2899920481485192e-06, + "loss": 27.8769, + "step": 357050 + }, + { + "epoch": 0.7212837906083218, + "grad_norm": 809.207275390625, + "learning_rate": 2.2896987076799933e-06, + "loss": 20.7547, + "step": 357060 + }, + { + "epoch": 0.7213039912410056, + "grad_norm": 378.81134033203125, + "learning_rate": 2.289405380421148e-06, + "loss": 20.7372, + "step": 357070 + }, + { + "epoch": 0.7213241918736895, + "grad_norm": 579.6322631835938, + "learning_rate": 2.289112066373411e-06, + "loss": 14.606, + "step": 357080 + }, + { + "epoch": 0.7213443925063733, + "grad_norm": 318.2368469238281, + "learning_rate": 2.2888187655382143e-06, + "loss": 9.5947, + "step": 357090 + }, + { + "epoch": 0.7213645931390571, + "grad_norm": 452.0503845214844, + "learning_rate": 2.288525477916986e-06, + "loss": 15.6629, + "step": 357100 + }, + { + "epoch": 0.7213847937717409, + "grad_norm": 696.1923217773438, + "learning_rate": 2.2882322035111543e-06, + "loss": 14.907, + "step": 357110 + }, + { + "epoch": 0.7214049944044247, + "grad_norm": 836.78564453125, + "learning_rate": 2.2879389423221514e-06, + "loss": 16.9761, + "step": 357120 + }, + { + "epoch": 0.7214251950371086, + "grad_norm": 494.79388427734375, + "learning_rate": 2.287645694351406e-06, + "loss": 29.4495, + "step": 357130 + }, + { + "epoch": 0.7214453956697924, + "grad_norm": 474.1503601074219, + "learning_rate": 2.287352459600344e-06, + "loss": 9.4509, + "step": 357140 + }, + { + "epoch": 0.7214655963024762, + "grad_norm": 395.12396240234375, + "learning_rate": 2.287059238070397e-06, + "loss": 11.9516, + "step": 357150 + }, + { + "epoch": 0.72148579693516, + "grad_norm": 8.561695098876953, + "learning_rate": 2.2867660297629977e-06, + "loss": 13.3588, + "step": 357160 + }, + { + "epoch": 0.7215059975678438, + "grad_norm": 93.96231079101562, + "learning_rate": 2.2864728346795686e-06, + "loss": 14.2886, + "step": 357170 + }, + { + "epoch": 0.7215261982005277, + "grad_norm": 215.66087341308594, + "learning_rate": 2.286179652821542e-06, + "loss": 23.7562, + "step": 357180 + }, + { + "epoch": 0.7215463988332115, + "grad_norm": 438.2885437011719, + "learning_rate": 2.285886484190348e-06, + "loss": 19.0968, + "step": 357190 + }, + { + "epoch": 0.7215665994658953, + "grad_norm": 21.059423446655273, + "learning_rate": 2.285593328787414e-06, + "loss": 13.9654, + "step": 357200 + }, + { + "epoch": 0.7215868000985791, + "grad_norm": 185.17214965820312, + "learning_rate": 2.285300186614167e-06, + "loss": 12.5266, + "step": 357210 + }, + { + "epoch": 0.7216070007312629, + "grad_norm": 395.4156494140625, + "learning_rate": 2.285007057672038e-06, + "loss": 20.2223, + "step": 357220 + }, + { + "epoch": 0.7216272013639468, + "grad_norm": 411.3515319824219, + "learning_rate": 2.2847139419624585e-06, + "loss": 21.688, + "step": 357230 + }, + { + "epoch": 0.7216474019966306, + "grad_norm": 241.63412475585938, + "learning_rate": 2.2844208394868504e-06, + "loss": 14.3528, + "step": 357240 + }, + { + "epoch": 0.7216676026293144, + "grad_norm": 587.6914672851562, + "learning_rate": 2.284127750246646e-06, + "loss": 14.3501, + "step": 357250 + }, + { + "epoch": 0.7216878032619982, + "grad_norm": 674.5878295898438, + "learning_rate": 2.2838346742432753e-06, + "loss": 24.0324, + "step": 357260 + }, + { + "epoch": 0.7217080038946819, + "grad_norm": 461.4089050292969, + "learning_rate": 2.283541611478165e-06, + "loss": 10.5924, + "step": 357270 + }, + { + "epoch": 0.7217282045273657, + "grad_norm": 278.19781494140625, + "learning_rate": 2.2832485619527417e-06, + "loss": 15.6594, + "step": 357280 + }, + { + "epoch": 0.7217484051600496, + "grad_norm": 623.696044921875, + "learning_rate": 2.2829555256684372e-06, + "loss": 22.9163, + "step": 357290 + }, + { + "epoch": 0.7217686057927334, + "grad_norm": 394.54296875, + "learning_rate": 2.282662502626678e-06, + "loss": 12.9196, + "step": 357300 + }, + { + "epoch": 0.7217888064254172, + "grad_norm": 155.50816345214844, + "learning_rate": 2.2823694928288897e-06, + "loss": 20.3786, + "step": 357310 + }, + { + "epoch": 0.721809007058101, + "grad_norm": 265.4114685058594, + "learning_rate": 2.282076496276506e-06, + "loss": 15.7187, + "step": 357320 + }, + { + "epoch": 0.7218292076907848, + "grad_norm": 697.023193359375, + "learning_rate": 2.2817835129709486e-06, + "loss": 14.364, + "step": 357330 + }, + { + "epoch": 0.7218494083234687, + "grad_norm": 487.0653381347656, + "learning_rate": 2.2814905429136515e-06, + "loss": 17.4101, + "step": 357340 + }, + { + "epoch": 0.7218696089561525, + "grad_norm": 276.76165771484375, + "learning_rate": 2.281197586106037e-06, + "loss": 37.9181, + "step": 357350 + }, + { + "epoch": 0.7218898095888363, + "grad_norm": 136.1702117919922, + "learning_rate": 2.2809046425495386e-06, + "loss": 21.9253, + "step": 357360 + }, + { + "epoch": 0.7219100102215201, + "grad_norm": 11.655019760131836, + "learning_rate": 2.2806117122455806e-06, + "loss": 13.4374, + "step": 357370 + }, + { + "epoch": 0.721930210854204, + "grad_norm": 251.75360107421875, + "learning_rate": 2.280318795195589e-06, + "loss": 23.9527, + "step": 357380 + }, + { + "epoch": 0.7219504114868878, + "grad_norm": 801.7538452148438, + "learning_rate": 2.2800258914009966e-06, + "loss": 26.2478, + "step": 357390 + }, + { + "epoch": 0.7219706121195716, + "grad_norm": 335.84063720703125, + "learning_rate": 2.2797330008632255e-06, + "loss": 22.8577, + "step": 357400 + }, + { + "epoch": 0.7219908127522554, + "grad_norm": 40.592529296875, + "learning_rate": 2.2794401235837083e-06, + "loss": 11.7794, + "step": 357410 + }, + { + "epoch": 0.7220110133849392, + "grad_norm": 244.24241638183594, + "learning_rate": 2.2791472595638693e-06, + "loss": 19.2623, + "step": 357420 + }, + { + "epoch": 0.722031214017623, + "grad_norm": 183.85824584960938, + "learning_rate": 2.278854408805135e-06, + "loss": 13.9928, + "step": 357430 + }, + { + "epoch": 0.7220514146503069, + "grad_norm": 282.90216064453125, + "learning_rate": 2.2785615713089363e-06, + "loss": 20.9458, + "step": 357440 + }, + { + "epoch": 0.7220716152829907, + "grad_norm": 418.7989807128906, + "learning_rate": 2.2782687470766985e-06, + "loss": 21.4213, + "step": 357450 + }, + { + "epoch": 0.7220918159156745, + "grad_norm": 579.4332885742188, + "learning_rate": 2.277975936109846e-06, + "loss": 15.887, + "step": 357460 + }, + { + "epoch": 0.7221120165483583, + "grad_norm": 184.46786499023438, + "learning_rate": 2.2776831384098096e-06, + "loss": 15.6152, + "step": 357470 + }, + { + "epoch": 0.7221322171810421, + "grad_norm": 233.37539672851562, + "learning_rate": 2.277390353978019e-06, + "loss": 13.5625, + "step": 357480 + }, + { + "epoch": 0.722152417813726, + "grad_norm": 301.2912902832031, + "learning_rate": 2.2770975828158936e-06, + "loss": 19.0677, + "step": 357490 + }, + { + "epoch": 0.7221726184464098, + "grad_norm": 203.8992462158203, + "learning_rate": 2.2768048249248648e-06, + "loss": 9.9747, + "step": 357500 + }, + { + "epoch": 0.7221928190790936, + "grad_norm": 56.02370071411133, + "learning_rate": 2.27651208030636e-06, + "loss": 20.9339, + "step": 357510 + }, + { + "epoch": 0.7222130197117774, + "grad_norm": 509.4747619628906, + "learning_rate": 2.2762193489618057e-06, + "loss": 26.1781, + "step": 357520 + }, + { + "epoch": 0.7222332203444611, + "grad_norm": 324.1914367675781, + "learning_rate": 2.2759266308926257e-06, + "loss": 14.2268, + "step": 357530 + }, + { + "epoch": 0.722253420977145, + "grad_norm": 175.71090698242188, + "learning_rate": 2.275633926100249e-06, + "loss": 17.0575, + "step": 357540 + }, + { + "epoch": 0.7222736216098288, + "grad_norm": 3198.631103515625, + "learning_rate": 2.2753412345861065e-06, + "loss": 31.1765, + "step": 357550 + }, + { + "epoch": 0.7222938222425126, + "grad_norm": 219.0963592529297, + "learning_rate": 2.2750485563516154e-06, + "loss": 20.2615, + "step": 357560 + }, + { + "epoch": 0.7223140228751964, + "grad_norm": 77.74398040771484, + "learning_rate": 2.2747558913982084e-06, + "loss": 22.1748, + "step": 357570 + }, + { + "epoch": 0.7223342235078802, + "grad_norm": 1779.8966064453125, + "learning_rate": 2.2744632397273113e-06, + "loss": 34.767, + "step": 357580 + }, + { + "epoch": 0.722354424140564, + "grad_norm": 331.18829345703125, + "learning_rate": 2.2741706013403507e-06, + "loss": 21.3225, + "step": 357590 + }, + { + "epoch": 0.7223746247732479, + "grad_norm": 14.230195045471191, + "learning_rate": 2.27387797623875e-06, + "loss": 10.3357, + "step": 357600 + }, + { + "epoch": 0.7223948254059317, + "grad_norm": 434.1004943847656, + "learning_rate": 2.273585364423939e-06, + "loss": 22.1532, + "step": 357610 + }, + { + "epoch": 0.7224150260386155, + "grad_norm": 384.7958068847656, + "learning_rate": 2.2732927658973427e-06, + "loss": 23.2884, + "step": 357620 + }, + { + "epoch": 0.7224352266712993, + "grad_norm": 1325.3875732421875, + "learning_rate": 2.273000180660384e-06, + "loss": 32.3256, + "step": 357630 + }, + { + "epoch": 0.7224554273039832, + "grad_norm": 320.1987609863281, + "learning_rate": 2.272707608714493e-06, + "loss": 33.4472, + "step": 357640 + }, + { + "epoch": 0.722475627936667, + "grad_norm": 0.4604489505290985, + "learning_rate": 2.2724150500610946e-06, + "loss": 26.7996, + "step": 357650 + }, + { + "epoch": 0.7224958285693508, + "grad_norm": 269.8821716308594, + "learning_rate": 2.2721225047016153e-06, + "loss": 14.3756, + "step": 357660 + }, + { + "epoch": 0.7225160292020346, + "grad_norm": 134.71096801757812, + "learning_rate": 2.2718299726374786e-06, + "loss": 11.6502, + "step": 357670 + }, + { + "epoch": 0.7225362298347184, + "grad_norm": 215.3944091796875, + "learning_rate": 2.271537453870113e-06, + "loss": 17.0625, + "step": 357680 + }, + { + "epoch": 0.7225564304674023, + "grad_norm": 436.587646484375, + "learning_rate": 2.271244948400943e-06, + "loss": 18.9358, + "step": 357690 + }, + { + "epoch": 0.7225766311000861, + "grad_norm": 388.4676513671875, + "learning_rate": 2.2709524562313923e-06, + "loss": 26.0947, + "step": 357700 + }, + { + "epoch": 0.7225968317327699, + "grad_norm": 602.5151977539062, + "learning_rate": 2.2706599773628906e-06, + "loss": 13.6967, + "step": 357710 + }, + { + "epoch": 0.7226170323654537, + "grad_norm": 531.5065307617188, + "learning_rate": 2.270367511796859e-06, + "loss": 22.3433, + "step": 357720 + }, + { + "epoch": 0.7226372329981375, + "grad_norm": 413.5867004394531, + "learning_rate": 2.2700750595347263e-06, + "loss": 14.5211, + "step": 357730 + }, + { + "epoch": 0.7226574336308214, + "grad_norm": 352.24969482421875, + "learning_rate": 2.2697826205779178e-06, + "loss": 8.6732, + "step": 357740 + }, + { + "epoch": 0.7226776342635052, + "grad_norm": 385.8419494628906, + "learning_rate": 2.2694901949278554e-06, + "loss": 9.7671, + "step": 357750 + }, + { + "epoch": 0.722697834896189, + "grad_norm": 489.5587158203125, + "learning_rate": 2.269197782585968e-06, + "loss": 11.0093, + "step": 357760 + }, + { + "epoch": 0.7227180355288728, + "grad_norm": 330.181396484375, + "learning_rate": 2.26890538355368e-06, + "loss": 15.1593, + "step": 357770 + }, + { + "epoch": 0.7227382361615566, + "grad_norm": 253.468994140625, + "learning_rate": 2.2686129978324134e-06, + "loss": 13.8927, + "step": 357780 + }, + { + "epoch": 0.7227584367942403, + "grad_norm": 261.2122802734375, + "learning_rate": 2.2683206254235962e-06, + "loss": 16.3722, + "step": 357790 + }, + { + "epoch": 0.7227786374269242, + "grad_norm": 259.9643249511719, + "learning_rate": 2.268028266328655e-06, + "loss": 14.7177, + "step": 357800 + }, + { + "epoch": 0.722798838059608, + "grad_norm": 372.77593994140625, + "learning_rate": 2.2677359205490122e-06, + "loss": 20.6044, + "step": 357810 + }, + { + "epoch": 0.7228190386922918, + "grad_norm": 205.45460510253906, + "learning_rate": 2.267443588086092e-06, + "loss": 14.7403, + "step": 357820 + }, + { + "epoch": 0.7228392393249756, + "grad_norm": 9.897441864013672, + "learning_rate": 2.26715126894132e-06, + "loss": 13.5756, + "step": 357830 + }, + { + "epoch": 0.7228594399576594, + "grad_norm": 275.323486328125, + "learning_rate": 2.2668589631161246e-06, + "loss": 21.7044, + "step": 357840 + }, + { + "epoch": 0.7228796405903433, + "grad_norm": 136.56597900390625, + "learning_rate": 2.2665666706119237e-06, + "loss": 20.5228, + "step": 357850 + }, + { + "epoch": 0.7228998412230271, + "grad_norm": 436.6207580566406, + "learning_rate": 2.2662743914301455e-06, + "loss": 15.2756, + "step": 357860 + }, + { + "epoch": 0.7229200418557109, + "grad_norm": 663.1101684570312, + "learning_rate": 2.265982125572216e-06, + "loss": 23.2945, + "step": 357870 + }, + { + "epoch": 0.7229402424883947, + "grad_norm": 528.8939819335938, + "learning_rate": 2.2656898730395575e-06, + "loss": 30.845, + "step": 357880 + }, + { + "epoch": 0.7229604431210785, + "grad_norm": 165.55613708496094, + "learning_rate": 2.2653976338335936e-06, + "loss": 17.8566, + "step": 357890 + }, + { + "epoch": 0.7229806437537624, + "grad_norm": 754.408935546875, + "learning_rate": 2.265105407955752e-06, + "loss": 45.743, + "step": 357900 + }, + { + "epoch": 0.7230008443864462, + "grad_norm": 56.507568359375, + "learning_rate": 2.2648131954074546e-06, + "loss": 30.7881, + "step": 357910 + }, + { + "epoch": 0.72302104501913, + "grad_norm": 204.65957641601562, + "learning_rate": 2.264520996190124e-06, + "loss": 12.5634, + "step": 357920 + }, + { + "epoch": 0.7230412456518138, + "grad_norm": 152.5059356689453, + "learning_rate": 2.264228810305189e-06, + "loss": 7.7773, + "step": 357930 + }, + { + "epoch": 0.7230614462844976, + "grad_norm": 373.3321228027344, + "learning_rate": 2.2639366377540684e-06, + "loss": 17.3394, + "step": 357940 + }, + { + "epoch": 0.7230816469171815, + "grad_norm": 493.6985168457031, + "learning_rate": 2.263644478538191e-06, + "loss": 32.958, + "step": 357950 + }, + { + "epoch": 0.7231018475498653, + "grad_norm": 1540.5264892578125, + "learning_rate": 2.263352332658976e-06, + "loss": 24.3123, + "step": 357960 + }, + { + "epoch": 0.7231220481825491, + "grad_norm": 121.92477416992188, + "learning_rate": 2.2630602001178524e-06, + "loss": 20.463, + "step": 357970 + }, + { + "epoch": 0.7231422488152329, + "grad_norm": 182.40499877929688, + "learning_rate": 2.262768080916241e-06, + "loss": 17.9135, + "step": 357980 + }, + { + "epoch": 0.7231624494479167, + "grad_norm": 377.84173583984375, + "learning_rate": 2.2624759750555642e-06, + "loss": 14.76, + "step": 357990 + }, + { + "epoch": 0.7231826500806006, + "grad_norm": 338.09686279296875, + "learning_rate": 2.2621838825372496e-06, + "loss": 9.3154, + "step": 358000 + }, + { + "epoch": 0.7232028507132844, + "grad_norm": 426.4371337890625, + "learning_rate": 2.2618918033627168e-06, + "loss": 25.7333, + "step": 358010 + }, + { + "epoch": 0.7232230513459682, + "grad_norm": 198.6202392578125, + "learning_rate": 2.2615997375333926e-06, + "loss": 8.7386, + "step": 358020 + }, + { + "epoch": 0.723243251978652, + "grad_norm": 73.05398559570312, + "learning_rate": 2.2613076850506997e-06, + "loss": 17.6739, + "step": 358030 + }, + { + "epoch": 0.7232634526113357, + "grad_norm": 131.81849670410156, + "learning_rate": 2.261015645916059e-06, + "loss": 12.5919, + "step": 358040 + }, + { + "epoch": 0.7232836532440196, + "grad_norm": 698.3875122070312, + "learning_rate": 2.2607236201308974e-06, + "loss": 23.4516, + "step": 358050 + }, + { + "epoch": 0.7233038538767034, + "grad_norm": 333.82537841796875, + "learning_rate": 2.260431607696637e-06, + "loss": 17.0055, + "step": 358060 + }, + { + "epoch": 0.7233240545093872, + "grad_norm": 268.1274108886719, + "learning_rate": 2.260139608614699e-06, + "loss": 24.7118, + "step": 358070 + }, + { + "epoch": 0.723344255142071, + "grad_norm": 542.8685302734375, + "learning_rate": 2.2598476228865078e-06, + "loss": 19.3529, + "step": 358080 + }, + { + "epoch": 0.7233644557747548, + "grad_norm": 201.8508758544922, + "learning_rate": 2.2595556505134885e-06, + "loss": 11.3707, + "step": 358090 + }, + { + "epoch": 0.7233846564074387, + "grad_norm": 434.41180419921875, + "learning_rate": 2.2592636914970633e-06, + "loss": 14.799, + "step": 358100 + }, + { + "epoch": 0.7234048570401225, + "grad_norm": 118.51235961914062, + "learning_rate": 2.258971745838652e-06, + "loss": 12.1212, + "step": 358110 + }, + { + "epoch": 0.7234250576728063, + "grad_norm": 507.77349853515625, + "learning_rate": 2.2586798135396824e-06, + "loss": 15.2521, + "step": 358120 + }, + { + "epoch": 0.7234452583054901, + "grad_norm": 77.28484344482422, + "learning_rate": 2.258387894601575e-06, + "loss": 7.3264, + "step": 358130 + }, + { + "epoch": 0.7234654589381739, + "grad_norm": 337.06976318359375, + "learning_rate": 2.2580959890257496e-06, + "loss": 17.3473, + "step": 358140 + }, + { + "epoch": 0.7234856595708578, + "grad_norm": 400.00372314453125, + "learning_rate": 2.2578040968136326e-06, + "loss": 26.0037, + "step": 358150 + }, + { + "epoch": 0.7235058602035416, + "grad_norm": 315.7028503417969, + "learning_rate": 2.25751221796665e-06, + "loss": 22.5793, + "step": 358160 + }, + { + "epoch": 0.7235260608362254, + "grad_norm": 118.08807373046875, + "learning_rate": 2.257220352486216e-06, + "loss": 18.7497, + "step": 358170 + }, + { + "epoch": 0.7235462614689092, + "grad_norm": 493.3076171875, + "learning_rate": 2.2569285003737567e-06, + "loss": 15.3999, + "step": 358180 + }, + { + "epoch": 0.723566462101593, + "grad_norm": 480.72161865234375, + "learning_rate": 2.256636661630698e-06, + "loss": 10.4665, + "step": 358190 + }, + { + "epoch": 0.7235866627342769, + "grad_norm": 428.39788818359375, + "learning_rate": 2.256344836258459e-06, + "loss": 17.419, + "step": 358200 + }, + { + "epoch": 0.7236068633669607, + "grad_norm": 467.6040954589844, + "learning_rate": 2.2560530242584604e-06, + "loss": 16.9549, + "step": 358210 + }, + { + "epoch": 0.7236270639996445, + "grad_norm": 719.1596069335938, + "learning_rate": 2.255761225632129e-06, + "loss": 28.8495, + "step": 358220 + }, + { + "epoch": 0.7236472646323283, + "grad_norm": 287.503173828125, + "learning_rate": 2.255469440380885e-06, + "loss": 19.0372, + "step": 358230 + }, + { + "epoch": 0.7236674652650121, + "grad_norm": 181.43899536132812, + "learning_rate": 2.255177668506147e-06, + "loss": 12.2521, + "step": 358240 + }, + { + "epoch": 0.723687665897696, + "grad_norm": 32.46775817871094, + "learning_rate": 2.254885910009341e-06, + "loss": 10.1338, + "step": 358250 + }, + { + "epoch": 0.7237078665303798, + "grad_norm": 760.5326538085938, + "learning_rate": 2.2545941648918897e-06, + "loss": 23.1847, + "step": 358260 + }, + { + "epoch": 0.7237280671630636, + "grad_norm": 41.78430938720703, + "learning_rate": 2.2543024331552133e-06, + "loss": 19.854, + "step": 358270 + }, + { + "epoch": 0.7237482677957474, + "grad_norm": 402.6669006347656, + "learning_rate": 2.2540107148007316e-06, + "loss": 20.7295, + "step": 358280 + }, + { + "epoch": 0.7237684684284312, + "grad_norm": 19.97368812561035, + "learning_rate": 2.253719009829871e-06, + "loss": 8.8038, + "step": 358290 + }, + { + "epoch": 0.7237886690611149, + "grad_norm": 168.602294921875, + "learning_rate": 2.2534273182440515e-06, + "loss": 12.4717, + "step": 358300 + }, + { + "epoch": 0.7238088696937988, + "grad_norm": 44.03022003173828, + "learning_rate": 2.2531356400446913e-06, + "loss": 12.1233, + "step": 358310 + }, + { + "epoch": 0.7238290703264826, + "grad_norm": 20.101224899291992, + "learning_rate": 2.252843975233217e-06, + "loss": 13.4733, + "step": 358320 + }, + { + "epoch": 0.7238492709591664, + "grad_norm": 644.1679077148438, + "learning_rate": 2.2525523238110465e-06, + "loss": 17.2313, + "step": 358330 + }, + { + "epoch": 0.7238694715918502, + "grad_norm": 230.2259979248047, + "learning_rate": 2.2522606857796036e-06, + "loss": 12.693, + "step": 358340 + }, + { + "epoch": 0.723889672224534, + "grad_norm": 1635.599853515625, + "learning_rate": 2.25196906114031e-06, + "loss": 17.1778, + "step": 358350 + }, + { + "epoch": 0.7239098728572179, + "grad_norm": 440.3619689941406, + "learning_rate": 2.251677449894583e-06, + "loss": 21.9231, + "step": 358360 + }, + { + "epoch": 0.7239300734899017, + "grad_norm": 32.30497741699219, + "learning_rate": 2.2513858520438497e-06, + "loss": 9.506, + "step": 358370 + }, + { + "epoch": 0.7239502741225855, + "grad_norm": 501.3995361328125, + "learning_rate": 2.2510942675895277e-06, + "loss": 14.3097, + "step": 358380 + }, + { + "epoch": 0.7239704747552693, + "grad_norm": 821.6167602539062, + "learning_rate": 2.250802696533037e-06, + "loss": 24.4095, + "step": 358390 + }, + { + "epoch": 0.7239906753879531, + "grad_norm": 449.1909484863281, + "learning_rate": 2.250511138875801e-06, + "loss": 7.4873, + "step": 358400 + }, + { + "epoch": 0.724010876020637, + "grad_norm": 615.5599975585938, + "learning_rate": 2.250219594619242e-06, + "loss": 17.3436, + "step": 358410 + }, + { + "epoch": 0.7240310766533208, + "grad_norm": 159.68350219726562, + "learning_rate": 2.2499280637647785e-06, + "loss": 22.6658, + "step": 358420 + }, + { + "epoch": 0.7240512772860046, + "grad_norm": 319.6164855957031, + "learning_rate": 2.249636546313831e-06, + "loss": 19.9201, + "step": 358430 + }, + { + "epoch": 0.7240714779186884, + "grad_norm": 166.0934295654297, + "learning_rate": 2.2493450422678224e-06, + "loss": 25.8329, + "step": 358440 + }, + { + "epoch": 0.7240916785513722, + "grad_norm": 441.7103576660156, + "learning_rate": 2.249053551628173e-06, + "loss": 22.1586, + "step": 358450 + }, + { + "epoch": 0.7241118791840561, + "grad_norm": 322.5674743652344, + "learning_rate": 2.248762074396301e-06, + "loss": 27.2304, + "step": 358460 + }, + { + "epoch": 0.7241320798167399, + "grad_norm": 541.81787109375, + "learning_rate": 2.2484706105736294e-06, + "loss": 12.4397, + "step": 358470 + }, + { + "epoch": 0.7241522804494237, + "grad_norm": 408.0065612792969, + "learning_rate": 2.2481791601615797e-06, + "loss": 12.977, + "step": 358480 + }, + { + "epoch": 0.7241724810821075, + "grad_norm": 417.0727844238281, + "learning_rate": 2.247887723161571e-06, + "loss": 30.8202, + "step": 358490 + }, + { + "epoch": 0.7241926817147913, + "grad_norm": 809.7229614257812, + "learning_rate": 2.2475962995750224e-06, + "loss": 18.3792, + "step": 358500 + }, + { + "epoch": 0.7242128823474752, + "grad_norm": 109.99610900878906, + "learning_rate": 2.2473048894033566e-06, + "loss": 8.9393, + "step": 358510 + }, + { + "epoch": 0.724233082980159, + "grad_norm": 638.1373901367188, + "learning_rate": 2.247013492647994e-06, + "loss": 20.4283, + "step": 358520 + }, + { + "epoch": 0.7242532836128428, + "grad_norm": 411.397216796875, + "learning_rate": 2.246722109310351e-06, + "loss": 17.646, + "step": 358530 + }, + { + "epoch": 0.7242734842455266, + "grad_norm": 500.1414489746094, + "learning_rate": 2.2464307393918523e-06, + "loss": 26.2394, + "step": 358540 + }, + { + "epoch": 0.7242936848782103, + "grad_norm": 370.89263916015625, + "learning_rate": 2.246139382893915e-06, + "loss": 26.8334, + "step": 358550 + }, + { + "epoch": 0.7243138855108942, + "grad_norm": 463.3133850097656, + "learning_rate": 2.2458480398179615e-06, + "loss": 37.4234, + "step": 358560 + }, + { + "epoch": 0.724334086143578, + "grad_norm": 316.4405212402344, + "learning_rate": 2.245556710165409e-06, + "loss": 13.3972, + "step": 358570 + }, + { + "epoch": 0.7243542867762618, + "grad_norm": 553.1693115234375, + "learning_rate": 2.245265393937681e-06, + "loss": 22.1872, + "step": 358580 + }, + { + "epoch": 0.7243744874089456, + "grad_norm": 591.702392578125, + "learning_rate": 2.2449740911361955e-06, + "loss": 20.8448, + "step": 358590 + }, + { + "epoch": 0.7243946880416294, + "grad_norm": 276.9403991699219, + "learning_rate": 2.24468280176237e-06, + "loss": 18.095, + "step": 358600 + }, + { + "epoch": 0.7244148886743133, + "grad_norm": 455.9407653808594, + "learning_rate": 2.2443915258176283e-06, + "loss": 18.1615, + "step": 358610 + }, + { + "epoch": 0.7244350893069971, + "grad_norm": 66.06970977783203, + "learning_rate": 2.2441002633033865e-06, + "loss": 12.1589, + "step": 358620 + }, + { + "epoch": 0.7244552899396809, + "grad_norm": 451.0813293457031, + "learning_rate": 2.243809014221068e-06, + "loss": 19.9946, + "step": 358630 + }, + { + "epoch": 0.7244754905723647, + "grad_norm": 7.21042013168335, + "learning_rate": 2.243517778572089e-06, + "loss": 28.7723, + "step": 358640 + }, + { + "epoch": 0.7244956912050485, + "grad_norm": 191.8756866455078, + "learning_rate": 2.2432265563578686e-06, + "loss": 7.0977, + "step": 358650 + }, + { + "epoch": 0.7245158918377324, + "grad_norm": 216.77684020996094, + "learning_rate": 2.2429353475798298e-06, + "loss": 7.8373, + "step": 358660 + }, + { + "epoch": 0.7245360924704162, + "grad_norm": 332.76300048828125, + "learning_rate": 2.2426441522393893e-06, + "loss": 15.3892, + "step": 358670 + }, + { + "epoch": 0.7245562931031, + "grad_norm": 317.4374084472656, + "learning_rate": 2.2423529703379646e-06, + "loss": 23.7859, + "step": 358680 + }, + { + "epoch": 0.7245764937357838, + "grad_norm": 66.8053970336914, + "learning_rate": 2.242061801876978e-06, + "loss": 9.3861, + "step": 358690 + }, + { + "epoch": 0.7245966943684676, + "grad_norm": 214.3506317138672, + "learning_rate": 2.2417706468578495e-06, + "loss": 16.2656, + "step": 358700 + }, + { + "epoch": 0.7246168950011515, + "grad_norm": 537.5006103515625, + "learning_rate": 2.2414795052819956e-06, + "loss": 32.6843, + "step": 358710 + }, + { + "epoch": 0.7246370956338353, + "grad_norm": 456.9745178222656, + "learning_rate": 2.241188377150834e-06, + "loss": 24.5563, + "step": 358720 + }, + { + "epoch": 0.7246572962665191, + "grad_norm": 153.896484375, + "learning_rate": 2.240897262465788e-06, + "loss": 22.6297, + "step": 358730 + }, + { + "epoch": 0.7246774968992029, + "grad_norm": 14.554478645324707, + "learning_rate": 2.240606161228274e-06, + "loss": 12.3954, + "step": 358740 + }, + { + "epoch": 0.7246976975318867, + "grad_norm": 398.4756164550781, + "learning_rate": 2.2403150734397095e-06, + "loss": 16.2786, + "step": 358750 + }, + { + "epoch": 0.7247178981645706, + "grad_norm": 197.67860412597656, + "learning_rate": 2.2400239991015144e-06, + "loss": 15.9143, + "step": 358760 + }, + { + "epoch": 0.7247380987972544, + "grad_norm": 453.6308288574219, + "learning_rate": 2.239732938215111e-06, + "loss": 10.0401, + "step": 358770 + }, + { + "epoch": 0.7247582994299382, + "grad_norm": 549.3597412109375, + "learning_rate": 2.239441890781911e-06, + "loss": 14.2409, + "step": 358780 + }, + { + "epoch": 0.724778500062622, + "grad_norm": 312.85858154296875, + "learning_rate": 2.239150856803336e-06, + "loss": 16.799, + "step": 358790 + }, + { + "epoch": 0.7247987006953058, + "grad_norm": 590.366943359375, + "learning_rate": 2.2388598362808074e-06, + "loss": 27.8185, + "step": 358800 + }, + { + "epoch": 0.7248189013279895, + "grad_norm": 219.06785583496094, + "learning_rate": 2.2385688292157405e-06, + "loss": 10.0919, + "step": 358810 + }, + { + "epoch": 0.7248391019606734, + "grad_norm": 148.28330993652344, + "learning_rate": 2.2382778356095524e-06, + "loss": 14.3372, + "step": 358820 + }, + { + "epoch": 0.7248593025933572, + "grad_norm": 195.47874450683594, + "learning_rate": 2.2379868554636653e-06, + "loss": 15.591, + "step": 358830 + }, + { + "epoch": 0.724879503226041, + "grad_norm": 206.07144165039062, + "learning_rate": 2.2376958887794953e-06, + "loss": 18.501, + "step": 358840 + }, + { + "epoch": 0.7248997038587248, + "grad_norm": 447.396728515625, + "learning_rate": 2.2374049355584583e-06, + "loss": 9.9212, + "step": 358850 + }, + { + "epoch": 0.7249199044914086, + "grad_norm": 368.84735107421875, + "learning_rate": 2.237113995801975e-06, + "loss": 36.9193, + "step": 358860 + }, + { + "epoch": 0.7249401051240925, + "grad_norm": 2.169508695602417, + "learning_rate": 2.2368230695114644e-06, + "loss": 29.4848, + "step": 358870 + }, + { + "epoch": 0.7249603057567763, + "grad_norm": 60.03493118286133, + "learning_rate": 2.2365321566883437e-06, + "loss": 28.9763, + "step": 358880 + }, + { + "epoch": 0.7249805063894601, + "grad_norm": 682.3919067382812, + "learning_rate": 2.2362412573340274e-06, + "loss": 22.0694, + "step": 358890 + }, + { + "epoch": 0.7250007070221439, + "grad_norm": 59.70888137817383, + "learning_rate": 2.235950371449938e-06, + "loss": 13.3987, + "step": 358900 + }, + { + "epoch": 0.7250209076548277, + "grad_norm": 578.2684936523438, + "learning_rate": 2.235659499037492e-06, + "loss": 13.9095, + "step": 358910 + }, + { + "epoch": 0.7250411082875116, + "grad_norm": 189.75296020507812, + "learning_rate": 2.2353686400981038e-06, + "loss": 22.0163, + "step": 358920 + }, + { + "epoch": 0.7250613089201954, + "grad_norm": 378.1733703613281, + "learning_rate": 2.235077794633196e-06, + "loss": 16.2982, + "step": 358930 + }, + { + "epoch": 0.7250815095528792, + "grad_norm": 368.48858642578125, + "learning_rate": 2.234786962644181e-06, + "loss": 15.0167, + "step": 358940 + }, + { + "epoch": 0.725101710185563, + "grad_norm": 871.95166015625, + "learning_rate": 2.2344961441324814e-06, + "loss": 21.6351, + "step": 358950 + }, + { + "epoch": 0.7251219108182468, + "grad_norm": 389.7361755371094, + "learning_rate": 2.2342053390995117e-06, + "loss": 16.9612, + "step": 358960 + }, + { + "epoch": 0.7251421114509307, + "grad_norm": 15.658724784851074, + "learning_rate": 2.2339145475466885e-06, + "loss": 15.0011, + "step": 358970 + }, + { + "epoch": 0.7251623120836145, + "grad_norm": 202.39169311523438, + "learning_rate": 2.2336237694754314e-06, + "loss": 24.985, + "step": 358980 + }, + { + "epoch": 0.7251825127162983, + "grad_norm": 297.51947021484375, + "learning_rate": 2.233333004887157e-06, + "loss": 15.1063, + "step": 358990 + }, + { + "epoch": 0.7252027133489821, + "grad_norm": 93.51701354980469, + "learning_rate": 2.23304225378328e-06, + "loss": 8.9163, + "step": 359000 + }, + { + "epoch": 0.7252229139816659, + "grad_norm": 467.2501220703125, + "learning_rate": 2.2327515161652196e-06, + "loss": 15.1077, + "step": 359010 + }, + { + "epoch": 0.7252431146143498, + "grad_norm": 326.148681640625, + "learning_rate": 2.232460792034395e-06, + "loss": 15.2704, + "step": 359020 + }, + { + "epoch": 0.7252633152470336, + "grad_norm": 526.8595581054688, + "learning_rate": 2.2321700813922205e-06, + "loss": 12.8883, + "step": 359030 + }, + { + "epoch": 0.7252835158797174, + "grad_norm": 31.045886993408203, + "learning_rate": 2.231879384240111e-06, + "loss": 22.5578, + "step": 359040 + }, + { + "epoch": 0.7253037165124012, + "grad_norm": 115.10821533203125, + "learning_rate": 2.231588700579488e-06, + "loss": 5.4195, + "step": 359050 + }, + { + "epoch": 0.7253239171450849, + "grad_norm": 341.2144775390625, + "learning_rate": 2.2312980304117656e-06, + "loss": 13.2967, + "step": 359060 + }, + { + "epoch": 0.7253441177777687, + "grad_norm": 7.237173557281494, + "learning_rate": 2.2310073737383593e-06, + "loss": 15.3578, + "step": 359070 + }, + { + "epoch": 0.7253643184104526, + "grad_norm": 742.6315307617188, + "learning_rate": 2.230716730560687e-06, + "loss": 21.257, + "step": 359080 + }, + { + "epoch": 0.7253845190431364, + "grad_norm": 143.5435333251953, + "learning_rate": 2.230426100880167e-06, + "loss": 19.4306, + "step": 359090 + }, + { + "epoch": 0.7254047196758202, + "grad_norm": 376.4156494140625, + "learning_rate": 2.2301354846982148e-06, + "loss": 16.1877, + "step": 359100 + }, + { + "epoch": 0.725424920308504, + "grad_norm": 538.2612915039062, + "learning_rate": 2.2298448820162438e-06, + "loss": 12.3151, + "step": 359110 + }, + { + "epoch": 0.7254451209411878, + "grad_norm": 255.2987518310547, + "learning_rate": 2.2295542928356755e-06, + "loss": 18.9394, + "step": 359120 + }, + { + "epoch": 0.7254653215738717, + "grad_norm": 330.2919616699219, + "learning_rate": 2.229263717157923e-06, + "loss": 12.4454, + "step": 359130 + }, + { + "epoch": 0.7254855222065555, + "grad_norm": 315.2470397949219, + "learning_rate": 2.2289731549844018e-06, + "loss": 42.2872, + "step": 359140 + }, + { + "epoch": 0.7255057228392393, + "grad_norm": 715.8504638671875, + "learning_rate": 2.228682606316529e-06, + "loss": 18.0506, + "step": 359150 + }, + { + "epoch": 0.7255259234719231, + "grad_norm": 201.93016052246094, + "learning_rate": 2.2283920711557226e-06, + "loss": 17.994, + "step": 359160 + }, + { + "epoch": 0.725546124104607, + "grad_norm": 697.1846923828125, + "learning_rate": 2.2281015495033975e-06, + "loss": 16.3907, + "step": 359170 + }, + { + "epoch": 0.7255663247372908, + "grad_norm": 544.1822509765625, + "learning_rate": 2.227811041360967e-06, + "loss": 25.6777, + "step": 359180 + }, + { + "epoch": 0.7255865253699746, + "grad_norm": 347.1976318359375, + "learning_rate": 2.2275205467298515e-06, + "loss": 24.672, + "step": 359190 + }, + { + "epoch": 0.7256067260026584, + "grad_norm": 760.2865600585938, + "learning_rate": 2.2272300656114648e-06, + "loss": 18.438, + "step": 359200 + }, + { + "epoch": 0.7256269266353422, + "grad_norm": 488.0180969238281, + "learning_rate": 2.2269395980072206e-06, + "loss": 21.7027, + "step": 359210 + }, + { + "epoch": 0.725647127268026, + "grad_norm": 682.90966796875, + "learning_rate": 2.226649143918538e-06, + "loss": 17.3114, + "step": 359220 + }, + { + "epoch": 0.7256673279007099, + "grad_norm": 1414.949462890625, + "learning_rate": 2.2263587033468293e-06, + "loss": 18.8927, + "step": 359230 + }, + { + "epoch": 0.7256875285333937, + "grad_norm": 313.8021240234375, + "learning_rate": 2.2260682762935137e-06, + "loss": 14.85, + "step": 359240 + }, + { + "epoch": 0.7257077291660775, + "grad_norm": 1083.0970458984375, + "learning_rate": 2.2257778627600044e-06, + "loss": 21.5205, + "step": 359250 + }, + { + "epoch": 0.7257279297987613, + "grad_norm": 270.201416015625, + "learning_rate": 2.2254874627477164e-06, + "loss": 14.2119, + "step": 359260 + }, + { + "epoch": 0.7257481304314451, + "grad_norm": 263.2742004394531, + "learning_rate": 2.2251970762580675e-06, + "loss": 19.2875, + "step": 359270 + }, + { + "epoch": 0.725768331064129, + "grad_norm": 40.12912368774414, + "learning_rate": 2.2249067032924715e-06, + "loss": 23.795, + "step": 359280 + }, + { + "epoch": 0.7257885316968128, + "grad_norm": 365.149658203125, + "learning_rate": 2.2246163438523417e-06, + "loss": 17.3139, + "step": 359290 + }, + { + "epoch": 0.7258087323294966, + "grad_norm": 222.59262084960938, + "learning_rate": 2.224325997939095e-06, + "loss": 12.1256, + "step": 359300 + }, + { + "epoch": 0.7258289329621804, + "grad_norm": 289.6069030761719, + "learning_rate": 2.2240356655541488e-06, + "loss": 19.4232, + "step": 359310 + }, + { + "epoch": 0.7258491335948641, + "grad_norm": 27.476863861083984, + "learning_rate": 2.223745346698917e-06, + "loss": 10.5382, + "step": 359320 + }, + { + "epoch": 0.725869334227548, + "grad_norm": 269.6813659667969, + "learning_rate": 2.2234550413748106e-06, + "loss": 17.7383, + "step": 359330 + }, + { + "epoch": 0.7258895348602318, + "grad_norm": 1166.08056640625, + "learning_rate": 2.2231647495832496e-06, + "loss": 22.4043, + "step": 359340 + }, + { + "epoch": 0.7259097354929156, + "grad_norm": 1.4344022274017334, + "learning_rate": 2.222874471325647e-06, + "loss": 14.3152, + "step": 359350 + }, + { + "epoch": 0.7259299361255994, + "grad_norm": 540.9111328125, + "learning_rate": 2.222584206603416e-06, + "loss": 16.838, + "step": 359360 + }, + { + "epoch": 0.7259501367582832, + "grad_norm": 253.33688354492188, + "learning_rate": 2.222293955417972e-06, + "loss": 16.5393, + "step": 359370 + }, + { + "epoch": 0.7259703373909671, + "grad_norm": 393.6908874511719, + "learning_rate": 2.2220037177707342e-06, + "loss": 21.5705, + "step": 359380 + }, + { + "epoch": 0.7259905380236509, + "grad_norm": 423.1177062988281, + "learning_rate": 2.2217134936631095e-06, + "loss": 16.9682, + "step": 359390 + }, + { + "epoch": 0.7260107386563347, + "grad_norm": 285.3522644042969, + "learning_rate": 2.221423283096517e-06, + "loss": 13.07, + "step": 359400 + }, + { + "epoch": 0.7260309392890185, + "grad_norm": 474.0448303222656, + "learning_rate": 2.221133086072372e-06, + "loss": 23.2813, + "step": 359410 + }, + { + "epoch": 0.7260511399217023, + "grad_norm": 299.09234619140625, + "learning_rate": 2.220842902592087e-06, + "loss": 16.1896, + "step": 359420 + }, + { + "epoch": 0.7260713405543862, + "grad_norm": 10.753003120422363, + "learning_rate": 2.220552732657075e-06, + "loss": 17.9483, + "step": 359430 + }, + { + "epoch": 0.72609154118707, + "grad_norm": 603.6778564453125, + "learning_rate": 2.2202625762687533e-06, + "loss": 14.1939, + "step": 359440 + }, + { + "epoch": 0.7261117418197538, + "grad_norm": 158.3985595703125, + "learning_rate": 2.219972433428535e-06, + "loss": 15.2814, + "step": 359450 + }, + { + "epoch": 0.7261319424524376, + "grad_norm": 170.3075408935547, + "learning_rate": 2.2196823041378325e-06, + "loss": 12.2966, + "step": 359460 + }, + { + "epoch": 0.7261521430851214, + "grad_norm": 190.19564819335938, + "learning_rate": 2.21939218839806e-06, + "loss": 7.367, + "step": 359470 + }, + { + "epoch": 0.7261723437178053, + "grad_norm": 199.4613037109375, + "learning_rate": 2.2191020862106353e-06, + "loss": 15.0406, + "step": 359480 + }, + { + "epoch": 0.7261925443504891, + "grad_norm": 362.06243896484375, + "learning_rate": 2.21881199757697e-06, + "loss": 25.7381, + "step": 359490 + }, + { + "epoch": 0.7262127449831729, + "grad_norm": 168.42156982421875, + "learning_rate": 2.218521922498476e-06, + "loss": 20.4386, + "step": 359500 + }, + { + "epoch": 0.7262329456158567, + "grad_norm": 173.8197784423828, + "learning_rate": 2.2182318609765703e-06, + "loss": 13.2371, + "step": 359510 + }, + { + "epoch": 0.7262531462485405, + "grad_norm": 264.0271301269531, + "learning_rate": 2.217941813012665e-06, + "loss": 32.4108, + "step": 359520 + }, + { + "epoch": 0.7262733468812244, + "grad_norm": 69.82245635986328, + "learning_rate": 2.217651778608172e-06, + "loss": 15.0553, + "step": 359530 + }, + { + "epoch": 0.7262935475139082, + "grad_norm": 291.6565246582031, + "learning_rate": 2.217361757764509e-06, + "loss": 6.4934, + "step": 359540 + }, + { + "epoch": 0.726313748146592, + "grad_norm": 569.568359375, + "learning_rate": 2.217071750483085e-06, + "loss": 14.8901, + "step": 359550 + }, + { + "epoch": 0.7263339487792758, + "grad_norm": 269.9482727050781, + "learning_rate": 2.2167817567653176e-06, + "loss": 15.6459, + "step": 359560 + }, + { + "epoch": 0.7263541494119596, + "grad_norm": 1034.3734130859375, + "learning_rate": 2.216491776612619e-06, + "loss": 29.7321, + "step": 359570 + }, + { + "epoch": 0.7263743500446433, + "grad_norm": 213.13063049316406, + "learning_rate": 2.2162018100263995e-06, + "loss": 57.3121, + "step": 359580 + }, + { + "epoch": 0.7263945506773272, + "grad_norm": 59.16593551635742, + "learning_rate": 2.215911857008077e-06, + "loss": 11.3962, + "step": 359590 + }, + { + "epoch": 0.726414751310011, + "grad_norm": 550.7051391601562, + "learning_rate": 2.2156219175590623e-06, + "loss": 19.9069, + "step": 359600 + }, + { + "epoch": 0.7264349519426948, + "grad_norm": 348.9745788574219, + "learning_rate": 2.215331991680766e-06, + "loss": 11.6251, + "step": 359610 + }, + { + "epoch": 0.7264551525753786, + "grad_norm": 405.37017822265625, + "learning_rate": 2.215042079374605e-06, + "loss": 12.169, + "step": 359620 + }, + { + "epoch": 0.7264753532080624, + "grad_norm": 200.28878784179688, + "learning_rate": 2.214752180641992e-06, + "loss": 17.7441, + "step": 359630 + }, + { + "epoch": 0.7264955538407463, + "grad_norm": 612.6473388671875, + "learning_rate": 2.2144622954843396e-06, + "loss": 24.8813, + "step": 359640 + }, + { + "epoch": 0.7265157544734301, + "grad_norm": 311.94775390625, + "learning_rate": 2.214172423903058e-06, + "loss": 19.8329, + "step": 359650 + }, + { + "epoch": 0.7265359551061139, + "grad_norm": 314.4627990722656, + "learning_rate": 2.2138825658995645e-06, + "loss": 20.2682, + "step": 359660 + }, + { + "epoch": 0.7265561557387977, + "grad_norm": 598.5054931640625, + "learning_rate": 2.213592721475269e-06, + "loss": 17.5923, + "step": 359670 + }, + { + "epoch": 0.7265763563714815, + "grad_norm": 406.8619384765625, + "learning_rate": 2.213302890631583e-06, + "loss": 14.9745, + "step": 359680 + }, + { + "epoch": 0.7265965570041654, + "grad_norm": 597.0632934570312, + "learning_rate": 2.2130130733699206e-06, + "loss": 24.9578, + "step": 359690 + }, + { + "epoch": 0.7266167576368492, + "grad_norm": 425.6470947265625, + "learning_rate": 2.212723269691697e-06, + "loss": 22.5632, + "step": 359700 + }, + { + "epoch": 0.726636958269533, + "grad_norm": 408.3787536621094, + "learning_rate": 2.212433479598321e-06, + "loss": 10.5233, + "step": 359710 + }, + { + "epoch": 0.7266571589022168, + "grad_norm": 617.8192138671875, + "learning_rate": 2.2121437030912045e-06, + "loss": 19.0784, + "step": 359720 + }, + { + "epoch": 0.7266773595349006, + "grad_norm": 407.09674072265625, + "learning_rate": 2.2118539401717636e-06, + "loss": 28.2398, + "step": 359730 + }, + { + "epoch": 0.7266975601675845, + "grad_norm": 458.1068420410156, + "learning_rate": 2.2115641908414087e-06, + "loss": 22.6853, + "step": 359740 + }, + { + "epoch": 0.7267177608002683, + "grad_norm": 517.1664428710938, + "learning_rate": 2.2112744551015496e-06, + "loss": 13.1747, + "step": 359750 + }, + { + "epoch": 0.7267379614329521, + "grad_norm": 1.197109341621399, + "learning_rate": 2.2109847329536005e-06, + "loss": 10.3271, + "step": 359760 + }, + { + "epoch": 0.7267581620656359, + "grad_norm": 245.50851440429688, + "learning_rate": 2.2106950243989754e-06, + "loss": 12.7855, + "step": 359770 + }, + { + "epoch": 0.7267783626983197, + "grad_norm": 318.2196960449219, + "learning_rate": 2.2104053294390847e-06, + "loss": 22.7982, + "step": 359780 + }, + { + "epoch": 0.7267985633310036, + "grad_norm": 89.09252166748047, + "learning_rate": 2.210115648075338e-06, + "loss": 13.4377, + "step": 359790 + }, + { + "epoch": 0.7268187639636874, + "grad_norm": 324.0706787109375, + "learning_rate": 2.209825980309151e-06, + "loss": 19.3334, + "step": 359800 + }, + { + "epoch": 0.7268389645963712, + "grad_norm": 443.2936706542969, + "learning_rate": 2.209536326141934e-06, + "loss": 14.9058, + "step": 359810 + }, + { + "epoch": 0.726859165229055, + "grad_norm": 1070.38720703125, + "learning_rate": 2.2092466855750966e-06, + "loss": 23.2808, + "step": 359820 + }, + { + "epoch": 0.7268793658617387, + "grad_norm": 216.96498107910156, + "learning_rate": 2.2089570586100545e-06, + "loss": 14.0101, + "step": 359830 + }, + { + "epoch": 0.7268995664944226, + "grad_norm": 445.51409912109375, + "learning_rate": 2.208667445248215e-06, + "loss": 11.5647, + "step": 359840 + }, + { + "epoch": 0.7269197671271064, + "grad_norm": 454.36480712890625, + "learning_rate": 2.208377845490994e-06, + "loss": 28.9318, + "step": 359850 + }, + { + "epoch": 0.7269399677597902, + "grad_norm": 401.3663635253906, + "learning_rate": 2.2080882593398e-06, + "loss": 24.741, + "step": 359860 + }, + { + "epoch": 0.726960168392474, + "grad_norm": 186.1900634765625, + "learning_rate": 2.2077986867960436e-06, + "loss": 16.7676, + "step": 359870 + }, + { + "epoch": 0.7269803690251578, + "grad_norm": 250.18862915039062, + "learning_rate": 2.20750912786114e-06, + "loss": 15.4508, + "step": 359880 + }, + { + "epoch": 0.7270005696578417, + "grad_norm": 360.76416015625, + "learning_rate": 2.2072195825364983e-06, + "loss": 23.8939, + "step": 359890 + }, + { + "epoch": 0.7270207702905255, + "grad_norm": 297.14190673828125, + "learning_rate": 2.2069300508235273e-06, + "loss": 15.5774, + "step": 359900 + }, + { + "epoch": 0.7270409709232093, + "grad_norm": 65.08010864257812, + "learning_rate": 2.2066405327236413e-06, + "loss": 10.4941, + "step": 359910 + }, + { + "epoch": 0.7270611715558931, + "grad_norm": 192.5146942138672, + "learning_rate": 2.2063510282382517e-06, + "loss": 21.6264, + "step": 359920 + }, + { + "epoch": 0.7270813721885769, + "grad_norm": 142.77598571777344, + "learning_rate": 2.206061537368768e-06, + "loss": 17.3337, + "step": 359930 + }, + { + "epoch": 0.7271015728212608, + "grad_norm": 297.79437255859375, + "learning_rate": 2.2057720601166004e-06, + "loss": 18.7999, + "step": 359940 + }, + { + "epoch": 0.7271217734539446, + "grad_norm": 270.64678955078125, + "learning_rate": 2.2054825964831627e-06, + "loss": 30.1399, + "step": 359950 + }, + { + "epoch": 0.7271419740866284, + "grad_norm": 384.5345153808594, + "learning_rate": 2.2051931464698636e-06, + "loss": 21.4343, + "step": 359960 + }, + { + "epoch": 0.7271621747193122, + "grad_norm": 163.68165588378906, + "learning_rate": 2.2049037100781125e-06, + "loss": 8.9799, + "step": 359970 + }, + { + "epoch": 0.727182375351996, + "grad_norm": 129.2692413330078, + "learning_rate": 2.204614287309321e-06, + "loss": 21.4616, + "step": 359980 + }, + { + "epoch": 0.7272025759846799, + "grad_norm": 555.4015502929688, + "learning_rate": 2.204324878164905e-06, + "loss": 17.6524, + "step": 359990 + }, + { + "epoch": 0.7272227766173637, + "grad_norm": 461.436279296875, + "learning_rate": 2.204035482646267e-06, + "loss": 16.2615, + "step": 360000 + }, + { + "epoch": 0.7272429772500475, + "grad_norm": 596.1917724609375, + "learning_rate": 2.20374610075482e-06, + "loss": 30.5351, + "step": 360010 + }, + { + "epoch": 0.7272631778827313, + "grad_norm": 264.5061340332031, + "learning_rate": 2.2034567324919774e-06, + "loss": 5.905, + "step": 360020 + }, + { + "epoch": 0.7272833785154151, + "grad_norm": 446.2514953613281, + "learning_rate": 2.2031673778591477e-06, + "loss": 20.922, + "step": 360030 + }, + { + "epoch": 0.727303579148099, + "grad_norm": 235.1972198486328, + "learning_rate": 2.2028780368577395e-06, + "loss": 22.4989, + "step": 360040 + }, + { + "epoch": 0.7273237797807828, + "grad_norm": 25.34808921813965, + "learning_rate": 2.2025887094891657e-06, + "loss": 19.9513, + "step": 360050 + }, + { + "epoch": 0.7273439804134666, + "grad_norm": 333.74322509765625, + "learning_rate": 2.202299395754836e-06, + "loss": 13.5665, + "step": 360060 + }, + { + "epoch": 0.7273641810461504, + "grad_norm": 451.43218994140625, + "learning_rate": 2.2020100956561576e-06, + "loss": 23.9477, + "step": 360070 + }, + { + "epoch": 0.7273843816788342, + "grad_norm": 320.87115478515625, + "learning_rate": 2.201720809194542e-06, + "loss": 11.5491, + "step": 360080 + }, + { + "epoch": 0.7274045823115179, + "grad_norm": 331.8887023925781, + "learning_rate": 2.201431536371402e-06, + "loss": 13.0425, + "step": 360090 + }, + { + "epoch": 0.7274247829442018, + "grad_norm": 295.6164245605469, + "learning_rate": 2.201142277188146e-06, + "loss": 21.2351, + "step": 360100 + }, + { + "epoch": 0.7274449835768856, + "grad_norm": 327.21002197265625, + "learning_rate": 2.20085303164618e-06, + "loss": 36.5531, + "step": 360110 + }, + { + "epoch": 0.7274651842095694, + "grad_norm": 538.5135498046875, + "learning_rate": 2.2005637997469194e-06, + "loss": 23.5763, + "step": 360120 + }, + { + "epoch": 0.7274853848422532, + "grad_norm": 529.24951171875, + "learning_rate": 2.2002745814917716e-06, + "loss": 21.429, + "step": 360130 + }, + { + "epoch": 0.727505585474937, + "grad_norm": 602.7332763671875, + "learning_rate": 2.1999853768821433e-06, + "loss": 18.329, + "step": 360140 + }, + { + "epoch": 0.7275257861076209, + "grad_norm": 626.5838623046875, + "learning_rate": 2.1996961859194487e-06, + "loss": 12.5332, + "step": 360150 + }, + { + "epoch": 0.7275459867403047, + "grad_norm": 685.1061401367188, + "learning_rate": 2.1994070086050937e-06, + "loss": 18.306, + "step": 360160 + }, + { + "epoch": 0.7275661873729885, + "grad_norm": 506.77349853515625, + "learning_rate": 2.199117844940491e-06, + "loss": 13.3728, + "step": 360170 + }, + { + "epoch": 0.7275863880056723, + "grad_norm": 92.83052062988281, + "learning_rate": 2.198828694927048e-06, + "loss": 18.4025, + "step": 360180 + }, + { + "epoch": 0.7276065886383561, + "grad_norm": 318.7611083984375, + "learning_rate": 2.198539558566173e-06, + "loss": 29.3137, + "step": 360190 + }, + { + "epoch": 0.72762678927104, + "grad_norm": 338.2325744628906, + "learning_rate": 2.1982504358592777e-06, + "loss": 28.2511, + "step": 360200 + }, + { + "epoch": 0.7276469899037238, + "grad_norm": 525.8775024414062, + "learning_rate": 2.1979613268077684e-06, + "loss": 10.6956, + "step": 360210 + }, + { + "epoch": 0.7276671905364076, + "grad_norm": 461.5638122558594, + "learning_rate": 2.1976722314130576e-06, + "loss": 11.5493, + "step": 360220 + }, + { + "epoch": 0.7276873911690914, + "grad_norm": 217.5858917236328, + "learning_rate": 2.1973831496765503e-06, + "loss": 16.0767, + "step": 360230 + }, + { + "epoch": 0.7277075918017752, + "grad_norm": 566.3673095703125, + "learning_rate": 2.1970940815996592e-06, + "loss": 17.7488, + "step": 360240 + }, + { + "epoch": 0.7277277924344591, + "grad_norm": 407.6756286621094, + "learning_rate": 2.1968050271837926e-06, + "loss": 15.9521, + "step": 360250 + }, + { + "epoch": 0.7277479930671429, + "grad_norm": 502.3351135253906, + "learning_rate": 2.196515986430356e-06, + "loss": 34.51, + "step": 360260 + }, + { + "epoch": 0.7277681936998267, + "grad_norm": 275.16143798828125, + "learning_rate": 2.196226959340762e-06, + "loss": 6.6301, + "step": 360270 + }, + { + "epoch": 0.7277883943325105, + "grad_norm": 381.4097595214844, + "learning_rate": 2.195937945916418e-06, + "loss": 19.1713, + "step": 360280 + }, + { + "epoch": 0.7278085949651943, + "grad_norm": 1045.807861328125, + "learning_rate": 2.1956489461587307e-06, + "loss": 24.8398, + "step": 360290 + }, + { + "epoch": 0.7278287955978782, + "grad_norm": 420.18475341796875, + "learning_rate": 2.19535996006911e-06, + "loss": 22.2127, + "step": 360300 + }, + { + "epoch": 0.727848996230562, + "grad_norm": 378.78338623046875, + "learning_rate": 2.195070987648966e-06, + "loss": 22.7336, + "step": 360310 + }, + { + "epoch": 0.7278691968632458, + "grad_norm": 132.0506134033203, + "learning_rate": 2.1947820288997067e-06, + "loss": 8.3921, + "step": 360320 + }, + { + "epoch": 0.7278893974959296, + "grad_norm": 358.63775634765625, + "learning_rate": 2.1944930838227374e-06, + "loss": 12.0968, + "step": 360330 + }, + { + "epoch": 0.7279095981286133, + "grad_norm": 411.3817443847656, + "learning_rate": 2.1942041524194705e-06, + "loss": 19.8867, + "step": 360340 + }, + { + "epoch": 0.7279297987612972, + "grad_norm": 130.55499267578125, + "learning_rate": 2.193915234691312e-06, + "loss": 12.3914, + "step": 360350 + }, + { + "epoch": 0.727949999393981, + "grad_norm": 60.1270637512207, + "learning_rate": 2.1936263306396688e-06, + "loss": 14.733, + "step": 360360 + }, + { + "epoch": 0.7279702000266648, + "grad_norm": 4.043726921081543, + "learning_rate": 2.1933374402659502e-06, + "loss": 27.5651, + "step": 360370 + }, + { + "epoch": 0.7279904006593486, + "grad_norm": 363.79461669921875, + "learning_rate": 2.1930485635715665e-06, + "loss": 19.1083, + "step": 360380 + }, + { + "epoch": 0.7280106012920324, + "grad_norm": 241.27272033691406, + "learning_rate": 2.1927597005579236e-06, + "loss": 19.0199, + "step": 360390 + }, + { + "epoch": 0.7280308019247163, + "grad_norm": 1405.54638671875, + "learning_rate": 2.192470851226428e-06, + "loss": 20.4921, + "step": 360400 + }, + { + "epoch": 0.7280510025574001, + "grad_norm": 268.7628479003906, + "learning_rate": 2.19218201557849e-06, + "loss": 13.2207, + "step": 360410 + }, + { + "epoch": 0.7280712031900839, + "grad_norm": 589.1351318359375, + "learning_rate": 2.1918931936155167e-06, + "loss": 22.0355, + "step": 360420 + }, + { + "epoch": 0.7280914038227677, + "grad_norm": 483.664794921875, + "learning_rate": 2.191604385338914e-06, + "loss": 16.1735, + "step": 360430 + }, + { + "epoch": 0.7281116044554515, + "grad_norm": 0.7184778451919556, + "learning_rate": 2.1913155907500923e-06, + "loss": 17.3139, + "step": 360440 + }, + { + "epoch": 0.7281318050881354, + "grad_norm": 99.97135162353516, + "learning_rate": 2.1910268098504562e-06, + "loss": 8.8662, + "step": 360450 + }, + { + "epoch": 0.7281520057208192, + "grad_norm": 220.3773956298828, + "learning_rate": 2.190738042641416e-06, + "loss": 10.7225, + "step": 360460 + }, + { + "epoch": 0.728172206353503, + "grad_norm": 187.96905517578125, + "learning_rate": 2.1904492891243785e-06, + "loss": 26.9043, + "step": 360470 + }, + { + "epoch": 0.7281924069861868, + "grad_norm": 525.996337890625, + "learning_rate": 2.190160549300748e-06, + "loss": 21.2937, + "step": 360480 + }, + { + "epoch": 0.7282126076188706, + "grad_norm": 152.04421997070312, + "learning_rate": 2.189871823171936e-06, + "loss": 10.7987, + "step": 360490 + }, + { + "epoch": 0.7282328082515545, + "grad_norm": 235.4153594970703, + "learning_rate": 2.1895831107393485e-06, + "loss": 17.4104, + "step": 360500 + }, + { + "epoch": 0.7282530088842383, + "grad_norm": 248.13519287109375, + "learning_rate": 2.18929441200439e-06, + "loss": 24.46, + "step": 360510 + }, + { + "epoch": 0.7282732095169221, + "grad_norm": 198.44969177246094, + "learning_rate": 2.1890057269684695e-06, + "loss": 22.6974, + "step": 360520 + }, + { + "epoch": 0.7282934101496059, + "grad_norm": 25.584314346313477, + "learning_rate": 2.1887170556329962e-06, + "loss": 9.6163, + "step": 360530 + }, + { + "epoch": 0.7283136107822897, + "grad_norm": 116.18987274169922, + "learning_rate": 2.188428397999375e-06, + "loss": 13.5682, + "step": 360540 + }, + { + "epoch": 0.7283338114149736, + "grad_norm": 2.7935242652893066, + "learning_rate": 2.1881397540690106e-06, + "loss": 7.1686, + "step": 360550 + }, + { + "epoch": 0.7283540120476574, + "grad_norm": 591.6446533203125, + "learning_rate": 2.187851123843314e-06, + "loss": 26.2548, + "step": 360560 + }, + { + "epoch": 0.7283742126803412, + "grad_norm": 102.64051055908203, + "learning_rate": 2.18756250732369e-06, + "loss": 13.3199, + "step": 360570 + }, + { + "epoch": 0.728394413313025, + "grad_norm": 532.3738403320312, + "learning_rate": 2.187273904511544e-06, + "loss": 17.5984, + "step": 360580 + }, + { + "epoch": 0.7284146139457088, + "grad_norm": 400.3085021972656, + "learning_rate": 2.1869853154082828e-06, + "loss": 12.8936, + "step": 360590 + }, + { + "epoch": 0.7284348145783925, + "grad_norm": 470.28582763671875, + "learning_rate": 2.1866967400153184e-06, + "loss": 15.0249, + "step": 360600 + }, + { + "epoch": 0.7284550152110764, + "grad_norm": 550.6588745117188, + "learning_rate": 2.1864081783340484e-06, + "loss": 16.8647, + "step": 360610 + }, + { + "epoch": 0.7284752158437602, + "grad_norm": 639.104248046875, + "learning_rate": 2.1861196303658843e-06, + "loss": 12.5242, + "step": 360620 + }, + { + "epoch": 0.728495416476444, + "grad_norm": 295.1681823730469, + "learning_rate": 2.1858310961122336e-06, + "loss": 10.8901, + "step": 360630 + }, + { + "epoch": 0.7285156171091278, + "grad_norm": 416.6802062988281, + "learning_rate": 2.185542575574501e-06, + "loss": 18.2717, + "step": 360640 + }, + { + "epoch": 0.7285358177418116, + "grad_norm": 258.6806640625, + "learning_rate": 2.18525406875409e-06, + "loss": 15.2738, + "step": 360650 + }, + { + "epoch": 0.7285560183744955, + "grad_norm": 222.70498657226562, + "learning_rate": 2.184965575652412e-06, + "loss": 13.6236, + "step": 360660 + }, + { + "epoch": 0.7285762190071793, + "grad_norm": 530.6907958984375, + "learning_rate": 2.18467709627087e-06, + "loss": 13.6382, + "step": 360670 + }, + { + "epoch": 0.7285964196398631, + "grad_norm": 36.37629318237305, + "learning_rate": 2.1843886306108686e-06, + "loss": 11.1797, + "step": 360680 + }, + { + "epoch": 0.7286166202725469, + "grad_norm": 540.2681884765625, + "learning_rate": 2.184100178673815e-06, + "loss": 9.2794, + "step": 360690 + }, + { + "epoch": 0.7286368209052307, + "grad_norm": 260.7170104980469, + "learning_rate": 2.183811740461118e-06, + "loss": 32.9272, + "step": 360700 + }, + { + "epoch": 0.7286570215379146, + "grad_norm": 446.9761962890625, + "learning_rate": 2.183523315974181e-06, + "loss": 19.565, + "step": 360710 + }, + { + "epoch": 0.7286772221705984, + "grad_norm": 366.071533203125, + "learning_rate": 2.183234905214408e-06, + "loss": 24.6809, + "step": 360720 + }, + { + "epoch": 0.7286974228032822, + "grad_norm": 342.06671142578125, + "learning_rate": 2.182946508183208e-06, + "loss": 31.346, + "step": 360730 + }, + { + "epoch": 0.728717623435966, + "grad_norm": 393.7168273925781, + "learning_rate": 2.182658124881985e-06, + "loss": 23.4002, + "step": 360740 + }, + { + "epoch": 0.7287378240686498, + "grad_norm": 369.3158874511719, + "learning_rate": 2.1823697553121432e-06, + "loss": 15.1723, + "step": 360750 + }, + { + "epoch": 0.7287580247013337, + "grad_norm": 240.69386291503906, + "learning_rate": 2.1820813994750904e-06, + "loss": 16.7641, + "step": 360760 + }, + { + "epoch": 0.7287782253340175, + "grad_norm": 238.80050659179688, + "learning_rate": 2.18179305737223e-06, + "loss": 12.4301, + "step": 360770 + }, + { + "epoch": 0.7287984259667013, + "grad_norm": 618.7152099609375, + "learning_rate": 2.1815047290049707e-06, + "loss": 15.9741, + "step": 360780 + }, + { + "epoch": 0.7288186265993851, + "grad_norm": 65.00995635986328, + "learning_rate": 2.1812164143747143e-06, + "loss": 12.7006, + "step": 360790 + }, + { + "epoch": 0.7288388272320689, + "grad_norm": 265.7799987792969, + "learning_rate": 2.1809281134828663e-06, + "loss": 17.9888, + "step": 360800 + }, + { + "epoch": 0.7288590278647528, + "grad_norm": 147.77239990234375, + "learning_rate": 2.1806398263308343e-06, + "loss": 14.6698, + "step": 360810 + }, + { + "epoch": 0.7288792284974366, + "grad_norm": 270.9167175292969, + "learning_rate": 2.1803515529200204e-06, + "loss": 26.5043, + "step": 360820 + }, + { + "epoch": 0.7288994291301204, + "grad_norm": 265.6896057128906, + "learning_rate": 2.1800632932518325e-06, + "loss": 17.3283, + "step": 360830 + }, + { + "epoch": 0.7289196297628042, + "grad_norm": 1087.7618408203125, + "learning_rate": 2.179775047327672e-06, + "loss": 26.0571, + "step": 360840 + }, + { + "epoch": 0.728939830395488, + "grad_norm": 304.3948974609375, + "learning_rate": 2.179486815148948e-06, + "loss": 14.0474, + "step": 360850 + }, + { + "epoch": 0.7289600310281718, + "grad_norm": 428.5285949707031, + "learning_rate": 2.179198596717063e-06, + "loss": 32.7469, + "step": 360860 + }, + { + "epoch": 0.7289802316608556, + "grad_norm": 1042.8258056640625, + "learning_rate": 2.1789103920334205e-06, + "loss": 29.2223, + "step": 360870 + }, + { + "epoch": 0.7290004322935394, + "grad_norm": 143.35415649414062, + "learning_rate": 2.178622201099428e-06, + "loss": 34.5574, + "step": 360880 + }, + { + "epoch": 0.7290206329262232, + "grad_norm": 880.0578002929688, + "learning_rate": 2.178334023916489e-06, + "loss": 26.2843, + "step": 360890 + }, + { + "epoch": 0.729040833558907, + "grad_norm": 395.4434509277344, + "learning_rate": 2.1780458604860056e-06, + "loss": 14.6299, + "step": 360900 + }, + { + "epoch": 0.7290610341915909, + "grad_norm": 1124.6776123046875, + "learning_rate": 2.1777577108093843e-06, + "loss": 13.5058, + "step": 360910 + }, + { + "epoch": 0.7290812348242747, + "grad_norm": 241.42478942871094, + "learning_rate": 2.177469574888034e-06, + "loss": 16.9511, + "step": 360920 + }, + { + "epoch": 0.7291014354569585, + "grad_norm": 59.13896179199219, + "learning_rate": 2.17718145272335e-06, + "loss": 16.9234, + "step": 360930 + }, + { + "epoch": 0.7291216360896423, + "grad_norm": 430.2388916015625, + "learning_rate": 2.1768933443167423e-06, + "loss": 23.9968, + "step": 360940 + }, + { + "epoch": 0.7291418367223261, + "grad_norm": 301.6485900878906, + "learning_rate": 2.1766052496696155e-06, + "loss": 28.9483, + "step": 360950 + }, + { + "epoch": 0.72916203735501, + "grad_norm": 409.0451965332031, + "learning_rate": 2.176317168783372e-06, + "loss": 11.3576, + "step": 360960 + }, + { + "epoch": 0.7291822379876938, + "grad_norm": 58.25111770629883, + "learning_rate": 2.1760291016594143e-06, + "loss": 10.1496, + "step": 360970 + }, + { + "epoch": 0.7292024386203776, + "grad_norm": 538.2581787109375, + "learning_rate": 2.1757410482991488e-06, + "loss": 12.4929, + "step": 360980 + }, + { + "epoch": 0.7292226392530614, + "grad_norm": 107.94898986816406, + "learning_rate": 2.17545300870398e-06, + "loss": 24.9767, + "step": 360990 + }, + { + "epoch": 0.7292428398857452, + "grad_norm": 119.3606185913086, + "learning_rate": 2.175164982875311e-06, + "loss": 11.8385, + "step": 361000 + }, + { + "epoch": 0.729263040518429, + "grad_norm": 326.6532287597656, + "learning_rate": 2.1748769708145435e-06, + "loss": 14.3606, + "step": 361010 + }, + { + "epoch": 0.7292832411511129, + "grad_norm": 335.1154479980469, + "learning_rate": 2.1745889725230845e-06, + "loss": 20.6894, + "step": 361020 + }, + { + "epoch": 0.7293034417837967, + "grad_norm": 300.31842041015625, + "learning_rate": 2.1743009880023364e-06, + "loss": 13.3313, + "step": 361030 + }, + { + "epoch": 0.7293236424164805, + "grad_norm": 244.2138671875, + "learning_rate": 2.174013017253701e-06, + "loss": 14.7413, + "step": 361040 + }, + { + "epoch": 0.7293438430491643, + "grad_norm": 961.990234375, + "learning_rate": 2.173725060278585e-06, + "loss": 18.6023, + "step": 361050 + }, + { + "epoch": 0.7293640436818482, + "grad_norm": 275.3308410644531, + "learning_rate": 2.1734371170783888e-06, + "loss": 17.4297, + "step": 361060 + }, + { + "epoch": 0.729384244314532, + "grad_norm": 465.2654724121094, + "learning_rate": 2.173149187654518e-06, + "loss": 23.1155, + "step": 361070 + }, + { + "epoch": 0.7294044449472158, + "grad_norm": 352.1817626953125, + "learning_rate": 2.1728612720083764e-06, + "loss": 24.5433, + "step": 361080 + }, + { + "epoch": 0.7294246455798996, + "grad_norm": 684.2299194335938, + "learning_rate": 2.172573370141364e-06, + "loss": 17.5574, + "step": 361090 + }, + { + "epoch": 0.7294448462125834, + "grad_norm": 357.49310302734375, + "learning_rate": 2.1722854820548873e-06, + "loss": 17.079, + "step": 361100 + }, + { + "epoch": 0.7294650468452671, + "grad_norm": 167.45614624023438, + "learning_rate": 2.1719976077503484e-06, + "loss": 16.0663, + "step": 361110 + }, + { + "epoch": 0.729485247477951, + "grad_norm": 86.17201232910156, + "learning_rate": 2.171709747229149e-06, + "loss": 15.0983, + "step": 361120 + }, + { + "epoch": 0.7295054481106348, + "grad_norm": 363.0920104980469, + "learning_rate": 2.1714219004926923e-06, + "loss": 13.2577, + "step": 361130 + }, + { + "epoch": 0.7295256487433186, + "grad_norm": 263.7752685546875, + "learning_rate": 2.1711340675423847e-06, + "loss": 14.69, + "step": 361140 + }, + { + "epoch": 0.7295458493760024, + "grad_norm": 576.8699951171875, + "learning_rate": 2.1708462483796263e-06, + "loss": 18.0602, + "step": 361150 + }, + { + "epoch": 0.7295660500086862, + "grad_norm": 64.69669342041016, + "learning_rate": 2.170558443005818e-06, + "loss": 26.0218, + "step": 361160 + }, + { + "epoch": 0.7295862506413701, + "grad_norm": 292.1713562011719, + "learning_rate": 2.170270651422367e-06, + "loss": 12.5334, + "step": 361170 + }, + { + "epoch": 0.7296064512740539, + "grad_norm": 402.4986877441406, + "learning_rate": 2.1699828736306736e-06, + "loss": 22.3744, + "step": 361180 + }, + { + "epoch": 0.7296266519067377, + "grad_norm": 252.6857452392578, + "learning_rate": 2.1696951096321383e-06, + "loss": 16.728, + "step": 361190 + }, + { + "epoch": 0.7296468525394215, + "grad_norm": 362.59912109375, + "learning_rate": 2.1694073594281663e-06, + "loss": 36.7554, + "step": 361200 + }, + { + "epoch": 0.7296670531721053, + "grad_norm": 179.68382263183594, + "learning_rate": 2.1691196230201626e-06, + "loss": 12.6639, + "step": 361210 + }, + { + "epoch": 0.7296872538047892, + "grad_norm": 3.6797828674316406, + "learning_rate": 2.168831900409523e-06, + "loss": 12.1016, + "step": 361220 + }, + { + "epoch": 0.729707454437473, + "grad_norm": 505.0819091796875, + "learning_rate": 2.1685441915976537e-06, + "loss": 20.3073, + "step": 361230 + }, + { + "epoch": 0.7297276550701568, + "grad_norm": 261.9149169921875, + "learning_rate": 2.168256496585958e-06, + "loss": 15.1085, + "step": 361240 + }, + { + "epoch": 0.7297478557028406, + "grad_norm": 434.857177734375, + "learning_rate": 2.1679688153758373e-06, + "loss": 16.0192, + "step": 361250 + }, + { + "epoch": 0.7297680563355244, + "grad_norm": 522.893310546875, + "learning_rate": 2.1676811479686905e-06, + "loss": 9.4449, + "step": 361260 + }, + { + "epoch": 0.7297882569682083, + "grad_norm": 710.4201049804688, + "learning_rate": 2.1673934943659226e-06, + "loss": 27.2911, + "step": 361270 + }, + { + "epoch": 0.7298084576008921, + "grad_norm": 2422.255126953125, + "learning_rate": 2.1671058545689387e-06, + "loss": 42.0225, + "step": 361280 + }, + { + "epoch": 0.7298286582335759, + "grad_norm": 645.8966064453125, + "learning_rate": 2.166818228579134e-06, + "loss": 28.2227, + "step": 361290 + }, + { + "epoch": 0.7298488588662597, + "grad_norm": 327.0567321777344, + "learning_rate": 2.1665306163979132e-06, + "loss": 6.7001, + "step": 361300 + }, + { + "epoch": 0.7298690594989435, + "grad_norm": 793.0493774414062, + "learning_rate": 2.1662430180266808e-06, + "loss": 14.0256, + "step": 361310 + }, + { + "epoch": 0.7298892601316274, + "grad_norm": 216.51014709472656, + "learning_rate": 2.1659554334668364e-06, + "loss": 26.4455, + "step": 361320 + }, + { + "epoch": 0.7299094607643112, + "grad_norm": 245.2882080078125, + "learning_rate": 2.1656678627197793e-06, + "loss": 6.7295, + "step": 361330 + }, + { + "epoch": 0.729929661396995, + "grad_norm": 0.20949004590511322, + "learning_rate": 2.165380305786915e-06, + "loss": 24.76, + "step": 361340 + }, + { + "epoch": 0.7299498620296788, + "grad_norm": 953.6806640625, + "learning_rate": 2.165092762669643e-06, + "loss": 26.3631, + "step": 361350 + }, + { + "epoch": 0.7299700626623626, + "grad_norm": 348.4391174316406, + "learning_rate": 2.164805233369364e-06, + "loss": 18.4093, + "step": 361360 + }, + { + "epoch": 0.7299902632950463, + "grad_norm": 468.8824768066406, + "learning_rate": 2.1645177178874817e-06, + "loss": 19.1577, + "step": 361370 + }, + { + "epoch": 0.7300104639277302, + "grad_norm": 79.99201965332031, + "learning_rate": 2.164230216225395e-06, + "loss": 26.3549, + "step": 361380 + }, + { + "epoch": 0.730030664560414, + "grad_norm": 448.7281188964844, + "learning_rate": 2.163942728384507e-06, + "loss": 16.6217, + "step": 361390 + }, + { + "epoch": 0.7300508651930978, + "grad_norm": 101.54920959472656, + "learning_rate": 2.1636552543662187e-06, + "loss": 13.2712, + "step": 361400 + }, + { + "epoch": 0.7300710658257816, + "grad_norm": 374.1964416503906, + "learning_rate": 2.163367794171929e-06, + "loss": 13.4904, + "step": 361410 + }, + { + "epoch": 0.7300912664584654, + "grad_norm": 597.1946411132812, + "learning_rate": 2.1630803478030428e-06, + "loss": 17.7211, + "step": 361420 + }, + { + "epoch": 0.7301114670911493, + "grad_norm": 320.4565734863281, + "learning_rate": 2.162792915260956e-06, + "loss": 21.6194, + "step": 361430 + }, + { + "epoch": 0.7301316677238331, + "grad_norm": 114.46507263183594, + "learning_rate": 2.1625054965470754e-06, + "loss": 22.1658, + "step": 361440 + }, + { + "epoch": 0.7301518683565169, + "grad_norm": 415.8811340332031, + "learning_rate": 2.1622180916627964e-06, + "loss": 10.7178, + "step": 361450 + }, + { + "epoch": 0.7301720689892007, + "grad_norm": 271.2681884765625, + "learning_rate": 2.161930700609524e-06, + "loss": 23.4488, + "step": 361460 + }, + { + "epoch": 0.7301922696218845, + "grad_norm": 103.7387466430664, + "learning_rate": 2.1616433233886576e-06, + "loss": 30.7911, + "step": 361470 + }, + { + "epoch": 0.7302124702545684, + "grad_norm": 404.3741455078125, + "learning_rate": 2.1613559600015955e-06, + "loss": 21.6233, + "step": 361480 + }, + { + "epoch": 0.7302326708872522, + "grad_norm": 82.00901794433594, + "learning_rate": 2.1610686104497413e-06, + "loss": 14.4869, + "step": 361490 + }, + { + "epoch": 0.730252871519936, + "grad_norm": 433.1382141113281, + "learning_rate": 2.1607812747344955e-06, + "loss": 21.366, + "step": 361500 + }, + { + "epoch": 0.7302730721526198, + "grad_norm": 121.11111450195312, + "learning_rate": 2.160493952857255e-06, + "loss": 10.8664, + "step": 361510 + }, + { + "epoch": 0.7302932727853036, + "grad_norm": 890.6771850585938, + "learning_rate": 2.160206644819422e-06, + "loss": 24.7509, + "step": 361520 + }, + { + "epoch": 0.7303134734179875, + "grad_norm": 10.892768859863281, + "learning_rate": 2.159919350622402e-06, + "loss": 9.2625, + "step": 361530 + }, + { + "epoch": 0.7303336740506713, + "grad_norm": 637.7189331054688, + "learning_rate": 2.1596320702675867e-06, + "loss": 9.0058, + "step": 361540 + }, + { + "epoch": 0.7303538746833551, + "grad_norm": 342.6564636230469, + "learning_rate": 2.1593448037563795e-06, + "loss": 26.8792, + "step": 361550 + }, + { + "epoch": 0.7303740753160389, + "grad_norm": 457.1528625488281, + "learning_rate": 2.159057551090184e-06, + "loss": 20.4412, + "step": 361560 + }, + { + "epoch": 0.7303942759487227, + "grad_norm": 440.42083740234375, + "learning_rate": 2.158770312270397e-06, + "loss": 28.5649, + "step": 361570 + }, + { + "epoch": 0.7304144765814066, + "grad_norm": 130.75962829589844, + "learning_rate": 2.158483087298417e-06, + "loss": 28.6687, + "step": 361580 + }, + { + "epoch": 0.7304346772140904, + "grad_norm": 759.0496215820312, + "learning_rate": 2.158195876175646e-06, + "loss": 22.2783, + "step": 361590 + }, + { + "epoch": 0.7304548778467742, + "grad_norm": 350.0136413574219, + "learning_rate": 2.157908678903487e-06, + "loss": 15.8889, + "step": 361600 + }, + { + "epoch": 0.730475078479458, + "grad_norm": 213.92550659179688, + "learning_rate": 2.157621495483333e-06, + "loss": 11.6819, + "step": 361610 + }, + { + "epoch": 0.7304952791121417, + "grad_norm": 368.8323669433594, + "learning_rate": 2.157334325916587e-06, + "loss": 19.16, + "step": 361620 + }, + { + "epoch": 0.7305154797448256, + "grad_norm": 870.4979858398438, + "learning_rate": 2.1570471702046504e-06, + "loss": 20.1488, + "step": 361630 + }, + { + "epoch": 0.7305356803775094, + "grad_norm": 259.45025634765625, + "learning_rate": 2.1567600283489213e-06, + "loss": 17.0631, + "step": 361640 + }, + { + "epoch": 0.7305558810101932, + "grad_norm": 109.738037109375, + "learning_rate": 2.1564729003507974e-06, + "loss": 16.7987, + "step": 361650 + }, + { + "epoch": 0.730576081642877, + "grad_norm": 443.196044921875, + "learning_rate": 2.156185786211681e-06, + "loss": 20.6028, + "step": 361660 + }, + { + "epoch": 0.7305962822755608, + "grad_norm": 176.04074096679688, + "learning_rate": 2.15589868593297e-06, + "loss": 20.3453, + "step": 361670 + }, + { + "epoch": 0.7306164829082447, + "grad_norm": 333.56732177734375, + "learning_rate": 2.1556115995160624e-06, + "loss": 15.0055, + "step": 361680 + }, + { + "epoch": 0.7306366835409285, + "grad_norm": 559.19970703125, + "learning_rate": 2.155324526962361e-06, + "loss": 29.2269, + "step": 361690 + }, + { + "epoch": 0.7306568841736123, + "grad_norm": 230.17852783203125, + "learning_rate": 2.1550374682732605e-06, + "loss": 32.9297, + "step": 361700 + }, + { + "epoch": 0.7306770848062961, + "grad_norm": 135.4713134765625, + "learning_rate": 2.154750423450165e-06, + "loss": 13.8467, + "step": 361710 + }, + { + "epoch": 0.7306972854389799, + "grad_norm": 223.9093475341797, + "learning_rate": 2.154463392494468e-06, + "loss": 14.1095, + "step": 361720 + }, + { + "epoch": 0.7307174860716638, + "grad_norm": 262.5910949707031, + "learning_rate": 2.1541763754075732e-06, + "loss": 19.8624, + "step": 361730 + }, + { + "epoch": 0.7307376867043476, + "grad_norm": 146.00656127929688, + "learning_rate": 2.1538893721908766e-06, + "loss": 12.8752, + "step": 361740 + }, + { + "epoch": 0.7307578873370314, + "grad_norm": 350.9228210449219, + "learning_rate": 2.1536023828457793e-06, + "loss": 12.8664, + "step": 361750 + }, + { + "epoch": 0.7307780879697152, + "grad_norm": 281.30767822265625, + "learning_rate": 2.153315407373679e-06, + "loss": 20.9679, + "step": 361760 + }, + { + "epoch": 0.730798288602399, + "grad_norm": 817.6254272460938, + "learning_rate": 2.153028445775972e-06, + "loss": 34.3722, + "step": 361770 + }, + { + "epoch": 0.7308184892350829, + "grad_norm": 308.9070739746094, + "learning_rate": 2.1527414980540607e-06, + "loss": 34.6558, + "step": 361780 + }, + { + "epoch": 0.7308386898677667, + "grad_norm": 254.76809692382812, + "learning_rate": 2.1524545642093426e-06, + "loss": 42.2831, + "step": 361790 + }, + { + "epoch": 0.7308588905004505, + "grad_norm": 476.56787109375, + "learning_rate": 2.152167644243213e-06, + "loss": 12.8899, + "step": 361800 + }, + { + "epoch": 0.7308790911331343, + "grad_norm": 294.74993896484375, + "learning_rate": 2.1518807381570737e-06, + "loss": 12.2191, + "step": 361810 + }, + { + "epoch": 0.7308992917658181, + "grad_norm": 232.15695190429688, + "learning_rate": 2.1515938459523254e-06, + "loss": 15.1084, + "step": 361820 + }, + { + "epoch": 0.730919492398502, + "grad_norm": 174.69699096679688, + "learning_rate": 2.15130696763036e-06, + "loss": 8.9743, + "step": 361830 + }, + { + "epoch": 0.7309396930311858, + "grad_norm": 249.19667053222656, + "learning_rate": 2.151020103192579e-06, + "loss": 9.6197, + "step": 361840 + }, + { + "epoch": 0.7309598936638696, + "grad_norm": 425.8226318359375, + "learning_rate": 2.1507332526403814e-06, + "loss": 31.1412, + "step": 361850 + }, + { + "epoch": 0.7309800942965534, + "grad_norm": 325.6577453613281, + "learning_rate": 2.1504464159751646e-06, + "loss": 12.526, + "step": 361860 + }, + { + "epoch": 0.7310002949292372, + "grad_norm": 501.7792053222656, + "learning_rate": 2.1501595931983256e-06, + "loss": 13.087, + "step": 361870 + }, + { + "epoch": 0.731020495561921, + "grad_norm": 319.7770080566406, + "learning_rate": 2.149872784311262e-06, + "loss": 16.9793, + "step": 361880 + }, + { + "epoch": 0.7310406961946048, + "grad_norm": 593.9952392578125, + "learning_rate": 2.149585989315377e-06, + "loss": 21.6624, + "step": 361890 + }, + { + "epoch": 0.7310608968272886, + "grad_norm": 138.48013305664062, + "learning_rate": 2.14929920821206e-06, + "loss": 32.0402, + "step": 361900 + }, + { + "epoch": 0.7310810974599724, + "grad_norm": 433.41741943359375, + "learning_rate": 2.1490124410027137e-06, + "loss": 20.2227, + "step": 361910 + }, + { + "epoch": 0.7311012980926562, + "grad_norm": 589.3394165039062, + "learning_rate": 2.1487256876887356e-06, + "loss": 20.6278, + "step": 361920 + }, + { + "epoch": 0.73112149872534, + "grad_norm": 243.6151123046875, + "learning_rate": 2.148438948271524e-06, + "loss": 12.6422, + "step": 361930 + }, + { + "epoch": 0.7311416993580239, + "grad_norm": 405.2738037109375, + "learning_rate": 2.1481522227524725e-06, + "loss": 10.863, + "step": 361940 + }, + { + "epoch": 0.7311618999907077, + "grad_norm": 325.8308410644531, + "learning_rate": 2.147865511132983e-06, + "loss": 19.621, + "step": 361950 + }, + { + "epoch": 0.7311821006233915, + "grad_norm": 265.0723571777344, + "learning_rate": 2.1475788134144516e-06, + "loss": 22.2872, + "step": 361960 + }, + { + "epoch": 0.7312023012560753, + "grad_norm": 210.59898376464844, + "learning_rate": 2.147292129598273e-06, + "loss": 13.0862, + "step": 361970 + }, + { + "epoch": 0.7312225018887591, + "grad_norm": 188.1290740966797, + "learning_rate": 2.147005459685848e-06, + "loss": 18.2052, + "step": 361980 + }, + { + "epoch": 0.731242702521443, + "grad_norm": 324.91693115234375, + "learning_rate": 2.1467188036785706e-06, + "loss": 11.8528, + "step": 361990 + }, + { + "epoch": 0.7312629031541268, + "grad_norm": 15.264313697814941, + "learning_rate": 2.146432161577842e-06, + "loss": 8.4361, + "step": 362000 + }, + { + "epoch": 0.7312831037868106, + "grad_norm": 483.80743408203125, + "learning_rate": 2.146145533385057e-06, + "loss": 12.2032, + "step": 362010 + }, + { + "epoch": 0.7313033044194944, + "grad_norm": 517.9437255859375, + "learning_rate": 2.1458589191016103e-06, + "loss": 47.8655, + "step": 362020 + }, + { + "epoch": 0.7313235050521782, + "grad_norm": 259.2720642089844, + "learning_rate": 2.1455723187289028e-06, + "loss": 18.0934, + "step": 362030 + }, + { + "epoch": 0.7313437056848621, + "grad_norm": 319.8258056640625, + "learning_rate": 2.1452857322683285e-06, + "loss": 17.4515, + "step": 362040 + }, + { + "epoch": 0.7313639063175459, + "grad_norm": 0.0, + "learning_rate": 2.1449991597212865e-06, + "loss": 29.6921, + "step": 362050 + }, + { + "epoch": 0.7313841069502297, + "grad_norm": 54.035953521728516, + "learning_rate": 2.1447126010891704e-06, + "loss": 15.0086, + "step": 362060 + }, + { + "epoch": 0.7314043075829135, + "grad_norm": 148.7391815185547, + "learning_rate": 2.144426056373381e-06, + "loss": 14.8681, + "step": 362070 + }, + { + "epoch": 0.7314245082155973, + "grad_norm": 387.33441162109375, + "learning_rate": 2.144139525575313e-06, + "loss": 23.0641, + "step": 362080 + }, + { + "epoch": 0.7314447088482812, + "grad_norm": 139.35191345214844, + "learning_rate": 2.14385300869636e-06, + "loss": 23.601, + "step": 362090 + }, + { + "epoch": 0.731464909480965, + "grad_norm": 433.0995178222656, + "learning_rate": 2.1435665057379233e-06, + "loss": 24.6062, + "step": 362100 + }, + { + "epoch": 0.7314851101136488, + "grad_norm": 56.32212448120117, + "learning_rate": 2.143280016701397e-06, + "loss": 18.4387, + "step": 362110 + }, + { + "epoch": 0.7315053107463326, + "grad_norm": 640.1629638671875, + "learning_rate": 2.1429935415881753e-06, + "loss": 24.2324, + "step": 362120 + }, + { + "epoch": 0.7315255113790163, + "grad_norm": 150.6722869873047, + "learning_rate": 2.1427070803996565e-06, + "loss": 7.2841, + "step": 362130 + }, + { + "epoch": 0.7315457120117002, + "grad_norm": 277.1022644042969, + "learning_rate": 2.142420633137241e-06, + "loss": 22.3922, + "step": 362140 + }, + { + "epoch": 0.731565912644384, + "grad_norm": 275.8918151855469, + "learning_rate": 2.1421341998023167e-06, + "loss": 13.4643, + "step": 362150 + }, + { + "epoch": 0.7315861132770678, + "grad_norm": 332.7549133300781, + "learning_rate": 2.141847780396284e-06, + "loss": 9.5429, + "step": 362160 + }, + { + "epoch": 0.7316063139097516, + "grad_norm": 1105.1734619140625, + "learning_rate": 2.14156137492054e-06, + "loss": 18.1477, + "step": 362170 + }, + { + "epoch": 0.7316265145424354, + "grad_norm": 191.01303100585938, + "learning_rate": 2.141274983376479e-06, + "loss": 7.7928, + "step": 362180 + }, + { + "epoch": 0.7316467151751193, + "grad_norm": 261.5278015136719, + "learning_rate": 2.1409886057654963e-06, + "loss": 15.5697, + "step": 362190 + }, + { + "epoch": 0.7316669158078031, + "grad_norm": 4180.95361328125, + "learning_rate": 2.140702242088987e-06, + "loss": 34.039, + "step": 362200 + }, + { + "epoch": 0.7316871164404869, + "grad_norm": 341.9481201171875, + "learning_rate": 2.1404158923483524e-06, + "loss": 23.2512, + "step": 362210 + }, + { + "epoch": 0.7317073170731707, + "grad_norm": 1053.8857421875, + "learning_rate": 2.1401295565449803e-06, + "loss": 29.5673, + "step": 362220 + }, + { + "epoch": 0.7317275177058545, + "grad_norm": 314.30804443359375, + "learning_rate": 2.13984323468027e-06, + "loss": 29.4899, + "step": 362230 + }, + { + "epoch": 0.7317477183385384, + "grad_norm": 8.269257545471191, + "learning_rate": 2.1395569267556187e-06, + "loss": 24.012, + "step": 362240 + }, + { + "epoch": 0.7317679189712222, + "grad_norm": 237.9320526123047, + "learning_rate": 2.13927063277242e-06, + "loss": 17.2456, + "step": 362250 + }, + { + "epoch": 0.731788119603906, + "grad_norm": 124.94193267822266, + "learning_rate": 2.1389843527320675e-06, + "loss": 12.9626, + "step": 362260 + }, + { + "epoch": 0.7318083202365898, + "grad_norm": 341.7077331542969, + "learning_rate": 2.1386980866359595e-06, + "loss": 44.6113, + "step": 362270 + }, + { + "epoch": 0.7318285208692736, + "grad_norm": 854.4281005859375, + "learning_rate": 2.1384118344854906e-06, + "loss": 14.9032, + "step": 362280 + }, + { + "epoch": 0.7318487215019575, + "grad_norm": 444.77606201171875, + "learning_rate": 2.1381255962820535e-06, + "loss": 12.3953, + "step": 362290 + }, + { + "epoch": 0.7318689221346413, + "grad_norm": 666.7606811523438, + "learning_rate": 2.137839372027047e-06, + "loss": 19.9273, + "step": 362300 + }, + { + "epoch": 0.7318891227673251, + "grad_norm": 670.42431640625, + "learning_rate": 2.137553161721862e-06, + "loss": 29.0313, + "step": 362310 + }, + { + "epoch": 0.7319093234000089, + "grad_norm": 217.4117889404297, + "learning_rate": 2.137266965367898e-06, + "loss": 11.4348, + "step": 362320 + }, + { + "epoch": 0.7319295240326927, + "grad_norm": 450.91351318359375, + "learning_rate": 2.1369807829665455e-06, + "loss": 18.4128, + "step": 362330 + }, + { + "epoch": 0.7319497246653766, + "grad_norm": 628.2463989257812, + "learning_rate": 2.136694614519203e-06, + "loss": 18.8765, + "step": 362340 + }, + { + "epoch": 0.7319699252980604, + "grad_norm": 302.05169677734375, + "learning_rate": 2.1364084600272645e-06, + "loss": 10.7668, + "step": 362350 + }, + { + "epoch": 0.7319901259307442, + "grad_norm": 264.4158935546875, + "learning_rate": 2.1361223194921214e-06, + "loss": 31.4827, + "step": 362360 + }, + { + "epoch": 0.732010326563428, + "grad_norm": 285.2078552246094, + "learning_rate": 2.135836192915173e-06, + "loss": 19.0411, + "step": 362370 + }, + { + "epoch": 0.7320305271961118, + "grad_norm": 193.60736083984375, + "learning_rate": 2.1355500802978093e-06, + "loss": 20.2884, + "step": 362380 + }, + { + "epoch": 0.7320507278287955, + "grad_norm": 283.0826110839844, + "learning_rate": 2.135263981641429e-06, + "loss": 16.9832, + "step": 362390 + }, + { + "epoch": 0.7320709284614794, + "grad_norm": 211.36553955078125, + "learning_rate": 2.134977896947425e-06, + "loss": 9.025, + "step": 362400 + }, + { + "epoch": 0.7320911290941632, + "grad_norm": 319.20904541015625, + "learning_rate": 2.134691826217189e-06, + "loss": 27.5033, + "step": 362410 + }, + { + "epoch": 0.732111329726847, + "grad_norm": 214.47494506835938, + "learning_rate": 2.1344057694521177e-06, + "loss": 6.9164, + "step": 362420 + }, + { + "epoch": 0.7321315303595308, + "grad_norm": 302.7760925292969, + "learning_rate": 2.1341197266536085e-06, + "loss": 22.4382, + "step": 362430 + }, + { + "epoch": 0.7321517309922146, + "grad_norm": 530.4078979492188, + "learning_rate": 2.1338336978230487e-06, + "loss": 20.3697, + "step": 362440 + }, + { + "epoch": 0.7321719316248985, + "grad_norm": 113.6771240234375, + "learning_rate": 2.1335476829618364e-06, + "loss": 16.3201, + "step": 362450 + }, + { + "epoch": 0.7321921322575823, + "grad_norm": 25.262041091918945, + "learning_rate": 2.133261682071366e-06, + "loss": 15.1948, + "step": 362460 + }, + { + "epoch": 0.7322123328902661, + "grad_norm": 303.3648986816406, + "learning_rate": 2.1329756951530307e-06, + "loss": 10.07, + "step": 362470 + }, + { + "epoch": 0.7322325335229499, + "grad_norm": 554.2640380859375, + "learning_rate": 2.132689722208223e-06, + "loss": 20.4004, + "step": 362480 + }, + { + "epoch": 0.7322527341556337, + "grad_norm": 342.9158935546875, + "learning_rate": 2.132403763238337e-06, + "loss": 10.8332, + "step": 362490 + }, + { + "epoch": 0.7322729347883176, + "grad_norm": 274.4956359863281, + "learning_rate": 2.132117818244771e-06, + "loss": 23.1871, + "step": 362500 + }, + { + "epoch": 0.7322931354210014, + "grad_norm": 234.41029357910156, + "learning_rate": 2.1318318872289117e-06, + "loss": 25.0619, + "step": 362510 + }, + { + "epoch": 0.7323133360536852, + "grad_norm": 304.1642150878906, + "learning_rate": 2.1315459701921553e-06, + "loss": 13.4672, + "step": 362520 + }, + { + "epoch": 0.732333536686369, + "grad_norm": 323.67913818359375, + "learning_rate": 2.1312600671358983e-06, + "loss": 22.5587, + "step": 362530 + }, + { + "epoch": 0.7323537373190528, + "grad_norm": 316.3114013671875, + "learning_rate": 2.1309741780615316e-06, + "loss": 25.9001, + "step": 362540 + }, + { + "epoch": 0.7323739379517367, + "grad_norm": 398.9940490722656, + "learning_rate": 2.1306883029704472e-06, + "loss": 12.2283, + "step": 362550 + }, + { + "epoch": 0.7323941385844205, + "grad_norm": 121.77845764160156, + "learning_rate": 2.130402441864041e-06, + "loss": 8.5003, + "step": 362560 + }, + { + "epoch": 0.7324143392171043, + "grad_norm": 956.2951049804688, + "learning_rate": 2.1301165947437064e-06, + "loss": 27.1598, + "step": 362570 + }, + { + "epoch": 0.7324345398497881, + "grad_norm": 247.68553161621094, + "learning_rate": 2.129830761610833e-06, + "loss": 27.7892, + "step": 362580 + }, + { + "epoch": 0.7324547404824719, + "grad_norm": 239.00120544433594, + "learning_rate": 2.1295449424668184e-06, + "loss": 23.0604, + "step": 362590 + }, + { + "epoch": 0.7324749411151558, + "grad_norm": 707.9892578125, + "learning_rate": 2.1292591373130515e-06, + "loss": 17.0722, + "step": 362600 + }, + { + "epoch": 0.7324951417478396, + "grad_norm": 485.00299072265625, + "learning_rate": 2.1289733461509294e-06, + "loss": 26.1827, + "step": 362610 + }, + { + "epoch": 0.7325153423805234, + "grad_norm": 385.3713684082031, + "learning_rate": 2.128687568981843e-06, + "loss": 17.5272, + "step": 362620 + }, + { + "epoch": 0.7325355430132072, + "grad_norm": 668.2521362304688, + "learning_rate": 2.1284018058071833e-06, + "loss": 24.3644, + "step": 362630 + }, + { + "epoch": 0.732555743645891, + "grad_norm": 293.8020324707031, + "learning_rate": 2.1281160566283466e-06, + "loss": 27.0207, + "step": 362640 + }, + { + "epoch": 0.7325759442785748, + "grad_norm": 271.657958984375, + "learning_rate": 2.127830321446722e-06, + "loss": 7.6152, + "step": 362650 + }, + { + "epoch": 0.7325961449112586, + "grad_norm": 633.6864624023438, + "learning_rate": 2.1275446002637063e-06, + "loss": 28.675, + "step": 362660 + }, + { + "epoch": 0.7326163455439424, + "grad_norm": 360.24310302734375, + "learning_rate": 2.127258893080688e-06, + "loss": 14.7315, + "step": 362670 + }, + { + "epoch": 0.7326365461766262, + "grad_norm": 375.8663635253906, + "learning_rate": 2.126973199899063e-06, + "loss": 12.8284, + "step": 362680 + }, + { + "epoch": 0.73265674680931, + "grad_norm": 423.78326416015625, + "learning_rate": 2.126687520720222e-06, + "loss": 15.7296, + "step": 362690 + }, + { + "epoch": 0.7326769474419939, + "grad_norm": 321.8089599609375, + "learning_rate": 2.1264018555455563e-06, + "loss": 26.7895, + "step": 362700 + }, + { + "epoch": 0.7326971480746777, + "grad_norm": 260.6068115234375, + "learning_rate": 2.1261162043764606e-06, + "loss": 20.3698, + "step": 362710 + }, + { + "epoch": 0.7327173487073615, + "grad_norm": 484.1304931640625, + "learning_rate": 2.1258305672143265e-06, + "loss": 24.1116, + "step": 362720 + }, + { + "epoch": 0.7327375493400453, + "grad_norm": 525.1082763671875, + "learning_rate": 2.1255449440605436e-06, + "loss": 17.53, + "step": 362730 + }, + { + "epoch": 0.7327577499727291, + "grad_norm": 357.7300720214844, + "learning_rate": 2.1252593349165056e-06, + "loss": 13.1004, + "step": 362740 + }, + { + "epoch": 0.732777950605413, + "grad_norm": 305.9688415527344, + "learning_rate": 2.124973739783609e-06, + "loss": 5.4386, + "step": 362750 + }, + { + "epoch": 0.7327981512380968, + "grad_norm": 309.69061279296875, + "learning_rate": 2.1246881586632384e-06, + "loss": 23.2326, + "step": 362760 + }, + { + "epoch": 0.7328183518707806, + "grad_norm": 311.6143493652344, + "learning_rate": 2.1244025915567883e-06, + "loss": 17.7003, + "step": 362770 + }, + { + "epoch": 0.7328385525034644, + "grad_norm": 166.311279296875, + "learning_rate": 2.1241170384656533e-06, + "loss": 11.4186, + "step": 362780 + }, + { + "epoch": 0.7328587531361482, + "grad_norm": 181.48782348632812, + "learning_rate": 2.123831499391223e-06, + "loss": 32.3941, + "step": 362790 + }, + { + "epoch": 0.732878953768832, + "grad_norm": 193.65078735351562, + "learning_rate": 2.1235459743348874e-06, + "loss": 14.3278, + "step": 362800 + }, + { + "epoch": 0.7328991544015159, + "grad_norm": 165.1879425048828, + "learning_rate": 2.12326046329804e-06, + "loss": 8.6508, + "step": 362810 + }, + { + "epoch": 0.7329193550341997, + "grad_norm": 749.2440185546875, + "learning_rate": 2.1229749662820754e-06, + "loss": 27.1321, + "step": 362820 + }, + { + "epoch": 0.7329395556668835, + "grad_norm": 734.5176391601562, + "learning_rate": 2.122689483288379e-06, + "loss": 22.8381, + "step": 362830 + }, + { + "epoch": 0.7329597562995673, + "grad_norm": 287.7867736816406, + "learning_rate": 2.1224040143183444e-06, + "loss": 10.9977, + "step": 362840 + }, + { + "epoch": 0.7329799569322512, + "grad_norm": 639.458984375, + "learning_rate": 2.122118559373366e-06, + "loss": 17.4001, + "step": 362850 + }, + { + "epoch": 0.733000157564935, + "grad_norm": 136.86932373046875, + "learning_rate": 2.121833118454832e-06, + "loss": 15.538, + "step": 362860 + }, + { + "epoch": 0.7330203581976188, + "grad_norm": 598.6109619140625, + "learning_rate": 2.1215476915641327e-06, + "loss": 27.0943, + "step": 362870 + }, + { + "epoch": 0.7330405588303026, + "grad_norm": 202.90081787109375, + "learning_rate": 2.1212622787026626e-06, + "loss": 13.3531, + "step": 362880 + }, + { + "epoch": 0.7330607594629864, + "grad_norm": 148.5986785888672, + "learning_rate": 2.120976879871811e-06, + "loss": 9.3221, + "step": 362890 + }, + { + "epoch": 0.7330809600956701, + "grad_norm": 447.45245361328125, + "learning_rate": 2.1206914950729673e-06, + "loss": 15.6156, + "step": 362900 + }, + { + "epoch": 0.733101160728354, + "grad_norm": 339.8567810058594, + "learning_rate": 2.1204061243075257e-06, + "loss": 14.7264, + "step": 362910 + }, + { + "epoch": 0.7331213613610378, + "grad_norm": 191.8043212890625, + "learning_rate": 2.1201207675768738e-06, + "loss": 13.6121, + "step": 362920 + }, + { + "epoch": 0.7331415619937216, + "grad_norm": 382.62396240234375, + "learning_rate": 2.1198354248824057e-06, + "loss": 15.7879, + "step": 362930 + }, + { + "epoch": 0.7331617626264054, + "grad_norm": 206.1188201904297, + "learning_rate": 2.1195500962255084e-06, + "loss": 10.5715, + "step": 362940 + }, + { + "epoch": 0.7331819632590892, + "grad_norm": 211.1554718017578, + "learning_rate": 2.119264781607577e-06, + "loss": 21.8638, + "step": 362950 + }, + { + "epoch": 0.7332021638917731, + "grad_norm": 245.4528350830078, + "learning_rate": 2.118979481029999e-06, + "loss": 11.091, + "step": 362960 + }, + { + "epoch": 0.7332223645244569, + "grad_norm": 1.5007383823394775, + "learning_rate": 2.118694194494164e-06, + "loss": 17.3465, + "step": 362970 + }, + { + "epoch": 0.7332425651571407, + "grad_norm": 504.3922424316406, + "learning_rate": 2.1184089220014657e-06, + "loss": 24.4486, + "step": 362980 + }, + { + "epoch": 0.7332627657898245, + "grad_norm": 268.60662841796875, + "learning_rate": 2.1181236635532913e-06, + "loss": 15.5253, + "step": 362990 + }, + { + "epoch": 0.7332829664225083, + "grad_norm": 218.5037384033203, + "learning_rate": 2.1178384191510344e-06, + "loss": 18.9683, + "step": 363000 + }, + { + "epoch": 0.7333031670551922, + "grad_norm": 459.7041931152344, + "learning_rate": 2.1175531887960834e-06, + "loss": 17.4342, + "step": 363010 + }, + { + "epoch": 0.733323367687876, + "grad_norm": 352.9919738769531, + "learning_rate": 2.1172679724898264e-06, + "loss": 20.0649, + "step": 363020 + }, + { + "epoch": 0.7333435683205598, + "grad_norm": 272.325439453125, + "learning_rate": 2.116982770233658e-06, + "loss": 16.1177, + "step": 363030 + }, + { + "epoch": 0.7333637689532436, + "grad_norm": 20.618610382080078, + "learning_rate": 2.116697582028966e-06, + "loss": 16.507, + "step": 363040 + }, + { + "epoch": 0.7333839695859274, + "grad_norm": 419.63336181640625, + "learning_rate": 2.116412407877138e-06, + "loss": 21.0784, + "step": 363050 + }, + { + "epoch": 0.7334041702186113, + "grad_norm": 742.9035034179688, + "learning_rate": 2.116127247779566e-06, + "loss": 18.4874, + "step": 363060 + }, + { + "epoch": 0.7334243708512951, + "grad_norm": 20.84160804748535, + "learning_rate": 2.1158421017376423e-06, + "loss": 27.8615, + "step": 363070 + }, + { + "epoch": 0.7334445714839789, + "grad_norm": 535.6408081054688, + "learning_rate": 2.1155569697527546e-06, + "loss": 20.284, + "step": 363080 + }, + { + "epoch": 0.7334647721166627, + "grad_norm": 193.92379760742188, + "learning_rate": 2.1152718518262903e-06, + "loss": 19.2419, + "step": 363090 + }, + { + "epoch": 0.7334849727493465, + "grad_norm": 211.2849578857422, + "learning_rate": 2.114986747959643e-06, + "loss": 17.1762, + "step": 363100 + }, + { + "epoch": 0.7335051733820304, + "grad_norm": 306.9668273925781, + "learning_rate": 2.1147016581542e-06, + "loss": 16.8532, + "step": 363110 + }, + { + "epoch": 0.7335253740147142, + "grad_norm": 67.09207916259766, + "learning_rate": 2.11441658241135e-06, + "loss": 19.1871, + "step": 363120 + }, + { + "epoch": 0.733545574647398, + "grad_norm": 190.48162841796875, + "learning_rate": 2.114131520732483e-06, + "loss": 17.9457, + "step": 363130 + }, + { + "epoch": 0.7335657752800818, + "grad_norm": 506.0332336425781, + "learning_rate": 2.113846473118991e-06, + "loss": 9.4889, + "step": 363140 + }, + { + "epoch": 0.7335859759127656, + "grad_norm": 303.1412048339844, + "learning_rate": 2.1135614395722613e-06, + "loss": 26.3246, + "step": 363150 + }, + { + "epoch": 0.7336061765454494, + "grad_norm": 370.9874267578125, + "learning_rate": 2.113276420093681e-06, + "loss": 17.4242, + "step": 363160 + }, + { + "epoch": 0.7336263771781332, + "grad_norm": 296.4694519042969, + "learning_rate": 2.1129914146846435e-06, + "loss": 27.9499, + "step": 363170 + }, + { + "epoch": 0.733646577810817, + "grad_norm": 622.138916015625, + "learning_rate": 2.1127064233465354e-06, + "loss": 23.5313, + "step": 363180 + }, + { + "epoch": 0.7336667784435008, + "grad_norm": 292.4410400390625, + "learning_rate": 2.1124214460807446e-06, + "loss": 22.4486, + "step": 363190 + }, + { + "epoch": 0.7336869790761846, + "grad_norm": 380.1524963378906, + "learning_rate": 2.112136482888663e-06, + "loss": 34.3623, + "step": 363200 + }, + { + "epoch": 0.7337071797088685, + "grad_norm": 230.6260223388672, + "learning_rate": 2.111851533771676e-06, + "loss": 12.5961, + "step": 363210 + }, + { + "epoch": 0.7337273803415523, + "grad_norm": 473.07080078125, + "learning_rate": 2.111566598731176e-06, + "loss": 15.4202, + "step": 363220 + }, + { + "epoch": 0.7337475809742361, + "grad_norm": 317.4713439941406, + "learning_rate": 2.1112816777685506e-06, + "loss": 20.3979, + "step": 363230 + }, + { + "epoch": 0.7337677816069199, + "grad_norm": 322.885498046875, + "learning_rate": 2.110996770885186e-06, + "loss": 14.5344, + "step": 363240 + }, + { + "epoch": 0.7337879822396037, + "grad_norm": 0.08826649934053421, + "learning_rate": 2.1107118780824744e-06, + "loss": 21.5315, + "step": 363250 + }, + { + "epoch": 0.7338081828722876, + "grad_norm": 552.6737060546875, + "learning_rate": 2.1104269993618008e-06, + "loss": 20.2855, + "step": 363260 + }, + { + "epoch": 0.7338283835049714, + "grad_norm": 497.2496643066406, + "learning_rate": 2.1101421347245576e-06, + "loss": 22.2619, + "step": 363270 + }, + { + "epoch": 0.7338485841376552, + "grad_norm": 0.0, + "learning_rate": 2.109857284172129e-06, + "loss": 23.4409, + "step": 363280 + }, + { + "epoch": 0.733868784770339, + "grad_norm": 398.0042724609375, + "learning_rate": 2.1095724477059077e-06, + "loss": 12.7938, + "step": 363290 + }, + { + "epoch": 0.7338889854030228, + "grad_norm": 451.1884765625, + "learning_rate": 2.1092876253272793e-06, + "loss": 13.8234, + "step": 363300 + }, + { + "epoch": 0.7339091860357067, + "grad_norm": 298.0521545410156, + "learning_rate": 2.1090028170376307e-06, + "loss": 8.3172, + "step": 363310 + }, + { + "epoch": 0.7339293866683905, + "grad_norm": 160.68760681152344, + "learning_rate": 2.1087180228383536e-06, + "loss": 17.8757, + "step": 363320 + }, + { + "epoch": 0.7339495873010743, + "grad_norm": 760.4774169921875, + "learning_rate": 2.108433242730834e-06, + "loss": 18.7513, + "step": 363330 + }, + { + "epoch": 0.7339697879337581, + "grad_norm": 243.92681884765625, + "learning_rate": 2.1081484767164584e-06, + "loss": 10.6218, + "step": 363340 + }, + { + "epoch": 0.7339899885664419, + "grad_norm": 377.17681884765625, + "learning_rate": 2.1078637247966166e-06, + "loss": 13.2281, + "step": 363350 + }, + { + "epoch": 0.7340101891991258, + "grad_norm": 127.73228454589844, + "learning_rate": 2.1075789869726998e-06, + "loss": 19.4942, + "step": 363360 + }, + { + "epoch": 0.7340303898318096, + "grad_norm": 378.4305725097656, + "learning_rate": 2.1072942632460887e-06, + "loss": 17.1761, + "step": 363370 + }, + { + "epoch": 0.7340505904644934, + "grad_norm": 493.0738830566406, + "learning_rate": 2.107009553618174e-06, + "loss": 10.5615, + "step": 363380 + }, + { + "epoch": 0.7340707910971772, + "grad_norm": 651.5400390625, + "learning_rate": 2.106724858090346e-06, + "loss": 26.7237, + "step": 363390 + }, + { + "epoch": 0.734090991729861, + "grad_norm": 356.9836120605469, + "learning_rate": 2.10644017666399e-06, + "loss": 15.6841, + "step": 363400 + }, + { + "epoch": 0.7341111923625447, + "grad_norm": 338.3780212402344, + "learning_rate": 2.1061555093404917e-06, + "loss": 7.3456, + "step": 363410 + }, + { + "epoch": 0.7341313929952286, + "grad_norm": 175.104248046875, + "learning_rate": 2.105870856121241e-06, + "loss": 20.5585, + "step": 363420 + }, + { + "epoch": 0.7341515936279124, + "grad_norm": 449.11956787109375, + "learning_rate": 2.105586217007628e-06, + "loss": 14.4432, + "step": 363430 + }, + { + "epoch": 0.7341717942605962, + "grad_norm": 171.7938232421875, + "learning_rate": 2.1053015920010328e-06, + "loss": 5.9366, + "step": 363440 + }, + { + "epoch": 0.73419199489328, + "grad_norm": 489.88165283203125, + "learning_rate": 2.105016981102847e-06, + "loss": 10.2844, + "step": 363450 + }, + { + "epoch": 0.7342121955259638, + "grad_norm": 585.947265625, + "learning_rate": 2.104732384314459e-06, + "loss": 12.4221, + "step": 363460 + }, + { + "epoch": 0.7342323961586477, + "grad_norm": 564.4567260742188, + "learning_rate": 2.1044478016372544e-06, + "loss": 25.8418, + "step": 363470 + }, + { + "epoch": 0.7342525967913315, + "grad_norm": 870.10400390625, + "learning_rate": 2.104163233072618e-06, + "loss": 28.1137, + "step": 363480 + }, + { + "epoch": 0.7342727974240153, + "grad_norm": 273.01324462890625, + "learning_rate": 2.1038786786219405e-06, + "loss": 11.7085, + "step": 363490 + }, + { + "epoch": 0.7342929980566991, + "grad_norm": 37.50141906738281, + "learning_rate": 2.103594138286607e-06, + "loss": 31.0749, + "step": 363500 + }, + { + "epoch": 0.7343131986893829, + "grad_norm": 134.88909912109375, + "learning_rate": 2.103309612068003e-06, + "loss": 8.8806, + "step": 363510 + }, + { + "epoch": 0.7343333993220668, + "grad_norm": 263.0030212402344, + "learning_rate": 2.1030250999675184e-06, + "loss": 8.9125, + "step": 363520 + }, + { + "epoch": 0.7343535999547506, + "grad_norm": 207.8386993408203, + "learning_rate": 2.102740601986536e-06, + "loss": 12.5714, + "step": 363530 + }, + { + "epoch": 0.7343738005874344, + "grad_norm": 197.8450164794922, + "learning_rate": 2.1024561181264464e-06, + "loss": 22.7507, + "step": 363540 + }, + { + "epoch": 0.7343940012201182, + "grad_norm": 496.8077087402344, + "learning_rate": 2.1021716483886323e-06, + "loss": 17.176, + "step": 363550 + }, + { + "epoch": 0.734414201852802, + "grad_norm": 425.3866882324219, + "learning_rate": 2.1018871927744844e-06, + "loss": 11.5271, + "step": 363560 + }, + { + "epoch": 0.7344344024854859, + "grad_norm": 545.3555908203125, + "learning_rate": 2.1016027512853864e-06, + "loss": 12.7917, + "step": 363570 + }, + { + "epoch": 0.7344546031181697, + "grad_norm": 65.3341293334961, + "learning_rate": 2.101318323922723e-06, + "loss": 15.5178, + "step": 363580 + }, + { + "epoch": 0.7344748037508535, + "grad_norm": 304.2625732421875, + "learning_rate": 2.1010339106878846e-06, + "loss": 25.6953, + "step": 363590 + }, + { + "epoch": 0.7344950043835373, + "grad_norm": 9.351652145385742, + "learning_rate": 2.100749511582254e-06, + "loss": 14.0082, + "step": 363600 + }, + { + "epoch": 0.7345152050162211, + "grad_norm": 312.57421875, + "learning_rate": 2.10046512660722e-06, + "loss": 20.842, + "step": 363610 + }, + { + "epoch": 0.734535405648905, + "grad_norm": 342.7440490722656, + "learning_rate": 2.1001807557641673e-06, + "loss": 18.5986, + "step": 363620 + }, + { + "epoch": 0.7345556062815888, + "grad_norm": 0.0, + "learning_rate": 2.09989639905448e-06, + "loss": 13.1004, + "step": 363630 + }, + { + "epoch": 0.7345758069142726, + "grad_norm": 324.0922546386719, + "learning_rate": 2.099612056479548e-06, + "loss": 14.762, + "step": 363640 + }, + { + "epoch": 0.7345960075469564, + "grad_norm": 384.5039978027344, + "learning_rate": 2.0993277280407547e-06, + "loss": 11.5096, + "step": 363650 + }, + { + "epoch": 0.7346162081796402, + "grad_norm": 225.64239501953125, + "learning_rate": 2.099043413739485e-06, + "loss": 21.6839, + "step": 363660 + }, + { + "epoch": 0.734636408812324, + "grad_norm": 271.57293701171875, + "learning_rate": 2.098759113577125e-06, + "loss": 22.0194, + "step": 363670 + }, + { + "epoch": 0.7346566094450078, + "grad_norm": 186.3390350341797, + "learning_rate": 2.098474827555064e-06, + "loss": 35.961, + "step": 363680 + }, + { + "epoch": 0.7346768100776916, + "grad_norm": 1.4856864213943481, + "learning_rate": 2.098190555674684e-06, + "loss": 20.7479, + "step": 363690 + }, + { + "epoch": 0.7346970107103754, + "grad_norm": 263.3894348144531, + "learning_rate": 2.09790629793737e-06, + "loss": 13.7656, + "step": 363700 + }, + { + "epoch": 0.7347172113430592, + "grad_norm": 338.7405090332031, + "learning_rate": 2.09762205434451e-06, + "loss": 20.4562, + "step": 363710 + }, + { + "epoch": 0.734737411975743, + "grad_norm": 268.4193115234375, + "learning_rate": 2.0973378248974884e-06, + "loss": 12.3929, + "step": 363720 + }, + { + "epoch": 0.7347576126084269, + "grad_norm": 332.6508483886719, + "learning_rate": 2.0970536095976884e-06, + "loss": 24.0123, + "step": 363730 + }, + { + "epoch": 0.7347778132411107, + "grad_norm": 111.83285522460938, + "learning_rate": 2.0967694084464973e-06, + "loss": 23.1527, + "step": 363740 + }, + { + "epoch": 0.7347980138737945, + "grad_norm": 655.0184326171875, + "learning_rate": 2.096485221445301e-06, + "loss": 31.2206, + "step": 363750 + }, + { + "epoch": 0.7348182145064783, + "grad_norm": 398.7709045410156, + "learning_rate": 2.0962010485954844e-06, + "loss": 14.0068, + "step": 363760 + }, + { + "epoch": 0.7348384151391621, + "grad_norm": 366.68621826171875, + "learning_rate": 2.0959168898984295e-06, + "loss": 16.8575, + "step": 363770 + }, + { + "epoch": 0.734858615771846, + "grad_norm": 261.1799621582031, + "learning_rate": 2.095632745355525e-06, + "loss": 19.9537, + "step": 363780 + }, + { + "epoch": 0.7348788164045298, + "grad_norm": 368.545654296875, + "learning_rate": 2.0953486149681553e-06, + "loss": 15.8502, + "step": 363790 + }, + { + "epoch": 0.7348990170372136, + "grad_norm": 387.75653076171875, + "learning_rate": 2.095064498737701e-06, + "loss": 21.5372, + "step": 363800 + }, + { + "epoch": 0.7349192176698974, + "grad_norm": 573.4590454101562, + "learning_rate": 2.0947803966655526e-06, + "loss": 19.9252, + "step": 363810 + }, + { + "epoch": 0.7349394183025812, + "grad_norm": 515.4793701171875, + "learning_rate": 2.09449630875309e-06, + "loss": 12.6202, + "step": 363820 + }, + { + "epoch": 0.7349596189352651, + "grad_norm": 206.53895568847656, + "learning_rate": 2.0942122350017023e-06, + "loss": 9.7034, + "step": 363830 + }, + { + "epoch": 0.7349798195679489, + "grad_norm": 666.141357421875, + "learning_rate": 2.0939281754127695e-06, + "loss": 28.0815, + "step": 363840 + }, + { + "epoch": 0.7350000202006327, + "grad_norm": 162.98211669921875, + "learning_rate": 2.0936441299876803e-06, + "loss": 9.8799, + "step": 363850 + }, + { + "epoch": 0.7350202208333165, + "grad_norm": 344.7902526855469, + "learning_rate": 2.093360098727817e-06, + "loss": 12.8903, + "step": 363860 + }, + { + "epoch": 0.7350404214660003, + "grad_norm": 506.5223388671875, + "learning_rate": 2.0930760816345626e-06, + "loss": 18.2629, + "step": 363870 + }, + { + "epoch": 0.7350606220986842, + "grad_norm": 480.6791687011719, + "learning_rate": 2.092792078709304e-06, + "loss": 21.4682, + "step": 363880 + }, + { + "epoch": 0.735080822731368, + "grad_norm": 44.390289306640625, + "learning_rate": 2.0925080899534227e-06, + "loss": 15.0543, + "step": 363890 + }, + { + "epoch": 0.7351010233640518, + "grad_norm": 616.2040405273438, + "learning_rate": 2.0922241153683064e-06, + "loss": 13.2332, + "step": 363900 + }, + { + "epoch": 0.7351212239967356, + "grad_norm": 359.86737060546875, + "learning_rate": 2.091940154955337e-06, + "loss": 13.5108, + "step": 363910 + }, + { + "epoch": 0.7351414246294194, + "grad_norm": 0.5225976705551147, + "learning_rate": 2.0916562087158964e-06, + "loss": 14.2642, + "step": 363920 + }, + { + "epoch": 0.7351616252621032, + "grad_norm": 285.5023193359375, + "learning_rate": 2.091372276651373e-06, + "loss": 34.4661, + "step": 363930 + }, + { + "epoch": 0.735181825894787, + "grad_norm": 525.5093994140625, + "learning_rate": 2.0910883587631476e-06, + "loss": 13.6851, + "step": 363940 + }, + { + "epoch": 0.7352020265274708, + "grad_norm": 472.0049743652344, + "learning_rate": 2.0908044550526034e-06, + "loss": 16.949, + "step": 363950 + }, + { + "epoch": 0.7352222271601546, + "grad_norm": 181.69915771484375, + "learning_rate": 2.0905205655211257e-06, + "loss": 19.0975, + "step": 363960 + }, + { + "epoch": 0.7352424277928384, + "grad_norm": 816.1010131835938, + "learning_rate": 2.090236690170101e-06, + "loss": 23.5879, + "step": 363970 + }, + { + "epoch": 0.7352626284255223, + "grad_norm": 320.40216064453125, + "learning_rate": 2.0899528290009065e-06, + "loss": 16.6658, + "step": 363980 + }, + { + "epoch": 0.7352828290582061, + "grad_norm": 337.8548583984375, + "learning_rate": 2.089668982014929e-06, + "loss": 16.0121, + "step": 363990 + }, + { + "epoch": 0.7353030296908899, + "grad_norm": 363.7285461425781, + "learning_rate": 2.0893851492135536e-06, + "loss": 24.7558, + "step": 364000 + }, + { + "epoch": 0.7353232303235737, + "grad_norm": 506.2754211425781, + "learning_rate": 2.0891013305981615e-06, + "loss": 16.5288, + "step": 364010 + }, + { + "epoch": 0.7353434309562575, + "grad_norm": 606.0123901367188, + "learning_rate": 2.0888175261701355e-06, + "loss": 22.4118, + "step": 364020 + }, + { + "epoch": 0.7353636315889414, + "grad_norm": 555.2738647460938, + "learning_rate": 2.0885337359308592e-06, + "loss": 27.5469, + "step": 364030 + }, + { + "epoch": 0.7353838322216252, + "grad_norm": 167.02308654785156, + "learning_rate": 2.08824995988172e-06, + "loss": 7.4998, + "step": 364040 + }, + { + "epoch": 0.735404032854309, + "grad_norm": 167.9142608642578, + "learning_rate": 2.087966198024094e-06, + "loss": 20.0514, + "step": 364050 + }, + { + "epoch": 0.7354242334869928, + "grad_norm": 50.869422912597656, + "learning_rate": 2.0876824503593673e-06, + "loss": 23.0109, + "step": 364060 + }, + { + "epoch": 0.7354444341196766, + "grad_norm": 720.871826171875, + "learning_rate": 2.087398716888925e-06, + "loss": 22.7871, + "step": 364070 + }, + { + "epoch": 0.7354646347523605, + "grad_norm": 174.04391479492188, + "learning_rate": 2.0871149976141484e-06, + "loss": 12.0311, + "step": 364080 + }, + { + "epoch": 0.7354848353850443, + "grad_norm": 84.24659729003906, + "learning_rate": 2.086831292536418e-06, + "loss": 17.644, + "step": 364090 + }, + { + "epoch": 0.7355050360177281, + "grad_norm": 522.3529663085938, + "learning_rate": 2.0865476016571206e-06, + "loss": 13.5571, + "step": 364100 + }, + { + "epoch": 0.7355252366504119, + "grad_norm": 631.71826171875, + "learning_rate": 2.0862639249776364e-06, + "loss": 16.4983, + "step": 364110 + }, + { + "epoch": 0.7355454372830957, + "grad_norm": 1105.38330078125, + "learning_rate": 2.085980262499347e-06, + "loss": 34.7617, + "step": 364120 + }, + { + "epoch": 0.7355656379157796, + "grad_norm": 649.3748168945312, + "learning_rate": 2.085696614223638e-06, + "loss": 12.8657, + "step": 364130 + }, + { + "epoch": 0.7355858385484634, + "grad_norm": 285.0609130859375, + "learning_rate": 2.085412980151888e-06, + "loss": 13.6714, + "step": 364140 + }, + { + "epoch": 0.7356060391811472, + "grad_norm": 337.8122253417969, + "learning_rate": 2.085129360285484e-06, + "loss": 13.7497, + "step": 364150 + }, + { + "epoch": 0.735626239813831, + "grad_norm": 132.0164337158203, + "learning_rate": 2.0848457546258037e-06, + "loss": 18.5715, + "step": 364160 + }, + { + "epoch": 0.7356464404465148, + "grad_norm": 1003.1087036132812, + "learning_rate": 2.0845621631742342e-06, + "loss": 45.9939, + "step": 364170 + }, + { + "epoch": 0.7356666410791985, + "grad_norm": 329.656982421875, + "learning_rate": 2.0842785859321545e-06, + "loss": 16.9075, + "step": 364180 + }, + { + "epoch": 0.7356868417118824, + "grad_norm": 368.9480895996094, + "learning_rate": 2.083995022900946e-06, + "loss": 13.125, + "step": 364190 + }, + { + "epoch": 0.7357070423445662, + "grad_norm": 306.953369140625, + "learning_rate": 2.083711474081993e-06, + "loss": 15.056, + "step": 364200 + }, + { + "epoch": 0.73572724297725, + "grad_norm": 574.099365234375, + "learning_rate": 2.0834279394766755e-06, + "loss": 17.3683, + "step": 364210 + }, + { + "epoch": 0.7357474436099338, + "grad_norm": 419.91485595703125, + "learning_rate": 2.083144419086378e-06, + "loss": 22.846, + "step": 364220 + }, + { + "epoch": 0.7357676442426176, + "grad_norm": 361.4034423828125, + "learning_rate": 2.082860912912481e-06, + "loss": 15.6339, + "step": 364230 + }, + { + "epoch": 0.7357878448753015, + "grad_norm": 498.6043395996094, + "learning_rate": 2.082577420956364e-06, + "loss": 10.3416, + "step": 364240 + }, + { + "epoch": 0.7358080455079853, + "grad_norm": 689.240478515625, + "learning_rate": 2.0822939432194134e-06, + "loss": 15.2039, + "step": 364250 + }, + { + "epoch": 0.7358282461406691, + "grad_norm": 245.316650390625, + "learning_rate": 2.082010479703008e-06, + "loss": 12.0396, + "step": 364260 + }, + { + "epoch": 0.7358484467733529, + "grad_norm": 293.8541564941406, + "learning_rate": 2.0817270304085273e-06, + "loss": 14.1903, + "step": 364270 + }, + { + "epoch": 0.7358686474060367, + "grad_norm": 469.2142333984375, + "learning_rate": 2.0814435953373554e-06, + "loss": 43.1208, + "step": 364280 + }, + { + "epoch": 0.7358888480387206, + "grad_norm": 59.95088577270508, + "learning_rate": 2.081160174490875e-06, + "loss": 19.9318, + "step": 364290 + }, + { + "epoch": 0.7359090486714044, + "grad_norm": 199.74713134765625, + "learning_rate": 2.080876767870466e-06, + "loss": 17.9222, + "step": 364300 + }, + { + "epoch": 0.7359292493040882, + "grad_norm": 863.7758178710938, + "learning_rate": 2.0805933754775083e-06, + "loss": 26.1903, + "step": 364310 + }, + { + "epoch": 0.735949449936772, + "grad_norm": 372.6961669921875, + "learning_rate": 2.0803099973133856e-06, + "loss": 10.2383, + "step": 364320 + }, + { + "epoch": 0.7359696505694558, + "grad_norm": 341.9979553222656, + "learning_rate": 2.080026633379478e-06, + "loss": 25.43, + "step": 364330 + }, + { + "epoch": 0.7359898512021397, + "grad_norm": 58.08280944824219, + "learning_rate": 2.079743283677165e-06, + "loss": 15.3824, + "step": 364340 + }, + { + "epoch": 0.7360100518348235, + "grad_norm": 293.8395080566406, + "learning_rate": 2.079459948207828e-06, + "loss": 15.1824, + "step": 364350 + }, + { + "epoch": 0.7360302524675073, + "grad_norm": 176.7348175048828, + "learning_rate": 2.079176626972852e-06, + "loss": 15.4581, + "step": 364360 + }, + { + "epoch": 0.7360504531001911, + "grad_norm": 0.0, + "learning_rate": 2.0788933199736145e-06, + "loss": 17.6577, + "step": 364370 + }, + { + "epoch": 0.736070653732875, + "grad_norm": 269.6431884765625, + "learning_rate": 2.0786100272114943e-06, + "loss": 13.2815, + "step": 364380 + }, + { + "epoch": 0.7360908543655588, + "grad_norm": 343.29443359375, + "learning_rate": 2.0783267486878773e-06, + "loss": 13.7139, + "step": 364390 + }, + { + "epoch": 0.7361110549982426, + "grad_norm": 25.994674682617188, + "learning_rate": 2.07804348440414e-06, + "loss": 11.1712, + "step": 364400 + }, + { + "epoch": 0.7361312556309264, + "grad_norm": 241.93959045410156, + "learning_rate": 2.077760234361664e-06, + "loss": 11.0591, + "step": 364410 + }, + { + "epoch": 0.7361514562636102, + "grad_norm": 423.5716552734375, + "learning_rate": 2.0774769985618317e-06, + "loss": 15.3195, + "step": 364420 + }, + { + "epoch": 0.736171656896294, + "grad_norm": 230.12428283691406, + "learning_rate": 2.07719377700602e-06, + "loss": 34.281, + "step": 364430 + }, + { + "epoch": 0.7361918575289778, + "grad_norm": 215.46192932128906, + "learning_rate": 2.0769105696956128e-06, + "loss": 21.9807, + "step": 364440 + }, + { + "epoch": 0.7362120581616616, + "grad_norm": 353.5723571777344, + "learning_rate": 2.0766273766319873e-06, + "loss": 18.0947, + "step": 364450 + }, + { + "epoch": 0.7362322587943454, + "grad_norm": 421.5323791503906, + "learning_rate": 2.076344197816527e-06, + "loss": 26.1596, + "step": 364460 + }, + { + "epoch": 0.7362524594270292, + "grad_norm": 398.92974853515625, + "learning_rate": 2.076061033250611e-06, + "loss": 23.5638, + "step": 364470 + }, + { + "epoch": 0.736272660059713, + "grad_norm": 1340.0908203125, + "learning_rate": 2.0757778829356175e-06, + "loss": 16.1693, + "step": 364480 + }, + { + "epoch": 0.7362928606923969, + "grad_norm": 318.86724853515625, + "learning_rate": 2.0754947468729285e-06, + "loss": 15.0761, + "step": 364490 + }, + { + "epoch": 0.7363130613250807, + "grad_norm": 371.62493896484375, + "learning_rate": 2.075211625063923e-06, + "loss": 22.8919, + "step": 364500 + }, + { + "epoch": 0.7363332619577645, + "grad_norm": 436.4801330566406, + "learning_rate": 2.074928517509982e-06, + "loss": 23.7027, + "step": 364510 + }, + { + "epoch": 0.7363534625904483, + "grad_norm": 197.2120819091797, + "learning_rate": 2.0746454242124846e-06, + "loss": 12.4145, + "step": 364520 + }, + { + "epoch": 0.7363736632231321, + "grad_norm": 721.41357421875, + "learning_rate": 2.0743623451728096e-06, + "loss": 15.5012, + "step": 364530 + }, + { + "epoch": 0.736393863855816, + "grad_norm": 423.0234069824219, + "learning_rate": 2.074079280392339e-06, + "loss": 18.1207, + "step": 364540 + }, + { + "epoch": 0.7364140644884998, + "grad_norm": 756.3760375976562, + "learning_rate": 2.0737962298724513e-06, + "loss": 25.2786, + "step": 364550 + }, + { + "epoch": 0.7364342651211836, + "grad_norm": 466.7777404785156, + "learning_rate": 2.0735131936145237e-06, + "loss": 9.0022, + "step": 364560 + }, + { + "epoch": 0.7364544657538674, + "grad_norm": 189.84327697753906, + "learning_rate": 2.073230171619938e-06, + "loss": 19.0023, + "step": 364570 + }, + { + "epoch": 0.7364746663865512, + "grad_norm": 412.6581115722656, + "learning_rate": 2.0729471638900772e-06, + "loss": 27.9567, + "step": 364580 + }, + { + "epoch": 0.736494867019235, + "grad_norm": 321.45556640625, + "learning_rate": 2.0726641704263133e-06, + "loss": 25.5634, + "step": 364590 + }, + { + "epoch": 0.7365150676519189, + "grad_norm": 626.9800415039062, + "learning_rate": 2.0723811912300295e-06, + "loss": 11.3805, + "step": 364600 + }, + { + "epoch": 0.7365352682846027, + "grad_norm": 456.0321044921875, + "learning_rate": 2.072098226302606e-06, + "loss": 15.3584, + "step": 364610 + }, + { + "epoch": 0.7365554689172865, + "grad_norm": 230.5832977294922, + "learning_rate": 2.0718152756454207e-06, + "loss": 21.3752, + "step": 364620 + }, + { + "epoch": 0.7365756695499703, + "grad_norm": 338.4997253417969, + "learning_rate": 2.071532339259851e-06, + "loss": 24.3353, + "step": 364630 + }, + { + "epoch": 0.7365958701826542, + "grad_norm": 361.087646484375, + "learning_rate": 2.0712494171472776e-06, + "loss": 15.6216, + "step": 364640 + }, + { + "epoch": 0.736616070815338, + "grad_norm": 315.47674560546875, + "learning_rate": 2.0709665093090824e-06, + "loss": 14.0099, + "step": 364650 + }, + { + "epoch": 0.7366362714480218, + "grad_norm": 89.35796356201172, + "learning_rate": 2.0706836157466383e-06, + "loss": 13.5254, + "step": 364660 + }, + { + "epoch": 0.7366564720807056, + "grad_norm": 202.1490936279297, + "learning_rate": 2.0704007364613266e-06, + "loss": 14.4107, + "step": 364670 + }, + { + "epoch": 0.7366766727133894, + "grad_norm": 467.12591552734375, + "learning_rate": 2.0701178714545285e-06, + "loss": 18.407, + "step": 364680 + }, + { + "epoch": 0.7366968733460731, + "grad_norm": 582.572998046875, + "learning_rate": 2.0698350207276204e-06, + "loss": 15.5343, + "step": 364690 + }, + { + "epoch": 0.736717073978757, + "grad_norm": 25.415082931518555, + "learning_rate": 2.0695521842819788e-06, + "loss": 17.5226, + "step": 364700 + }, + { + "epoch": 0.7367372746114408, + "grad_norm": 229.2875518798828, + "learning_rate": 2.0692693621189864e-06, + "loss": 14.4092, + "step": 364710 + }, + { + "epoch": 0.7367574752441246, + "grad_norm": 126.3668212890625, + "learning_rate": 2.0689865542400196e-06, + "loss": 25.4265, + "step": 364720 + }, + { + "epoch": 0.7367776758768084, + "grad_norm": 288.700927734375, + "learning_rate": 2.0687037606464554e-06, + "loss": 20.3068, + "step": 364730 + }, + { + "epoch": 0.7367978765094922, + "grad_norm": 22.80805206298828, + "learning_rate": 2.0684209813396748e-06, + "loss": 14.9529, + "step": 364740 + }, + { + "epoch": 0.7368180771421761, + "grad_norm": 362.77783203125, + "learning_rate": 2.0681382163210533e-06, + "loss": 16.4827, + "step": 364750 + }, + { + "epoch": 0.7368382777748599, + "grad_norm": 321.43145751953125, + "learning_rate": 2.0678554655919725e-06, + "loss": 15.38, + "step": 364760 + }, + { + "epoch": 0.7368584784075437, + "grad_norm": 318.1789245605469, + "learning_rate": 2.0675727291538068e-06, + "loss": 20.9938, + "step": 364770 + }, + { + "epoch": 0.7368786790402275, + "grad_norm": 430.7816467285156, + "learning_rate": 2.0672900070079375e-06, + "loss": 14.727, + "step": 364780 + }, + { + "epoch": 0.7368988796729113, + "grad_norm": 599.7792358398438, + "learning_rate": 2.067007299155741e-06, + "loss": 18.0324, + "step": 364790 + }, + { + "epoch": 0.7369190803055952, + "grad_norm": 183.4662628173828, + "learning_rate": 2.066724605598594e-06, + "loss": 13.3307, + "step": 364800 + }, + { + "epoch": 0.736939280938279, + "grad_norm": 332.3937683105469, + "learning_rate": 2.0664419263378764e-06, + "loss": 19.6866, + "step": 364810 + }, + { + "epoch": 0.7369594815709628, + "grad_norm": 208.60269165039062, + "learning_rate": 2.066159261374964e-06, + "loss": 21.2156, + "step": 364820 + }, + { + "epoch": 0.7369796822036466, + "grad_norm": 124.0291519165039, + "learning_rate": 2.0658766107112367e-06, + "loss": 25.1262, + "step": 364830 + }, + { + "epoch": 0.7369998828363304, + "grad_norm": 454.86767578125, + "learning_rate": 2.0655939743480714e-06, + "loss": 15.669, + "step": 364840 + }, + { + "epoch": 0.7370200834690143, + "grad_norm": 172.296875, + "learning_rate": 2.0653113522868427e-06, + "loss": 11.7845, + "step": 364850 + }, + { + "epoch": 0.7370402841016981, + "grad_norm": 840.0567626953125, + "learning_rate": 2.065028744528933e-06, + "loss": 29.7223, + "step": 364860 + }, + { + "epoch": 0.7370604847343819, + "grad_norm": 548.4918823242188, + "learning_rate": 2.0647461510757173e-06, + "loss": 21.1387, + "step": 364870 + }, + { + "epoch": 0.7370806853670657, + "grad_norm": 306.392333984375, + "learning_rate": 2.0644635719285704e-06, + "loss": 18.6181, + "step": 364880 + }, + { + "epoch": 0.7371008859997495, + "grad_norm": 270.8236999511719, + "learning_rate": 2.064181007088873e-06, + "loss": 15.3122, + "step": 364890 + }, + { + "epoch": 0.7371210866324334, + "grad_norm": 476.0078430175781, + "learning_rate": 2.063898456558002e-06, + "loss": 27.3251, + "step": 364900 + }, + { + "epoch": 0.7371412872651172, + "grad_norm": 406.9973449707031, + "learning_rate": 2.0636159203373344e-06, + "loss": 29.6369, + "step": 364910 + }, + { + "epoch": 0.737161487897801, + "grad_norm": 354.7281494140625, + "learning_rate": 2.063333398428245e-06, + "loss": 15.3554, + "step": 364920 + }, + { + "epoch": 0.7371816885304848, + "grad_norm": 301.2372131347656, + "learning_rate": 2.063050890832114e-06, + "loss": 14.0986, + "step": 364930 + }, + { + "epoch": 0.7372018891631686, + "grad_norm": 245.56309509277344, + "learning_rate": 2.0627683975503165e-06, + "loss": 12.8766, + "step": 364940 + }, + { + "epoch": 0.7372220897958524, + "grad_norm": 196.98167419433594, + "learning_rate": 2.0624859185842284e-06, + "loss": 21.5888, + "step": 364950 + }, + { + "epoch": 0.7372422904285362, + "grad_norm": 216.22962951660156, + "learning_rate": 2.062203453935227e-06, + "loss": 12.4488, + "step": 364960 + }, + { + "epoch": 0.73726249106122, + "grad_norm": 132.98841857910156, + "learning_rate": 2.0619210036046923e-06, + "loss": 14.3631, + "step": 364970 + }, + { + "epoch": 0.7372826916939038, + "grad_norm": 459.241943359375, + "learning_rate": 2.0616385675939977e-06, + "loss": 14.7004, + "step": 364980 + }, + { + "epoch": 0.7373028923265876, + "grad_norm": 532.1847534179688, + "learning_rate": 2.0613561459045184e-06, + "loss": 30.1397, + "step": 364990 + }, + { + "epoch": 0.7373230929592715, + "grad_norm": 301.25482177734375, + "learning_rate": 2.061073738537635e-06, + "loss": 20.9194, + "step": 365000 + }, + { + "epoch": 0.7373432935919553, + "grad_norm": 62.32251739501953, + "learning_rate": 2.0607913454947215e-06, + "loss": 16.0964, + "step": 365010 + }, + { + "epoch": 0.7373634942246391, + "grad_norm": 180.23123168945312, + "learning_rate": 2.060508966777153e-06, + "loss": 9.9189, + "step": 365020 + }, + { + "epoch": 0.7373836948573229, + "grad_norm": 269.3304443359375, + "learning_rate": 2.0602266023863088e-06, + "loss": 16.3484, + "step": 365030 + }, + { + "epoch": 0.7374038954900067, + "grad_norm": 101.0495376586914, + "learning_rate": 2.059944252323562e-06, + "loss": 13.0723, + "step": 365040 + }, + { + "epoch": 0.7374240961226906, + "grad_norm": 83.1161880493164, + "learning_rate": 2.0596619165902916e-06, + "loss": 17.9313, + "step": 365050 + }, + { + "epoch": 0.7374442967553744, + "grad_norm": 159.0005645751953, + "learning_rate": 2.059379595187871e-06, + "loss": 25.5723, + "step": 365060 + }, + { + "epoch": 0.7374644973880582, + "grad_norm": 489.9853515625, + "learning_rate": 2.0590972881176798e-06, + "loss": 17.5896, + "step": 365070 + }, + { + "epoch": 0.737484698020742, + "grad_norm": 276.6214599609375, + "learning_rate": 2.058814995381091e-06, + "loss": 11.981, + "step": 365080 + }, + { + "epoch": 0.7375048986534258, + "grad_norm": 334.879150390625, + "learning_rate": 2.0585327169794796e-06, + "loss": 25.8994, + "step": 365090 + }, + { + "epoch": 0.7375250992861097, + "grad_norm": 153.3126678466797, + "learning_rate": 2.0582504529142248e-06, + "loss": 25.3136, + "step": 365100 + }, + { + "epoch": 0.7375452999187935, + "grad_norm": 860.970947265625, + "learning_rate": 2.0579682031866988e-06, + "loss": 23.6348, + "step": 365110 + }, + { + "epoch": 0.7375655005514773, + "grad_norm": 4.6487836837768555, + "learning_rate": 2.057685967798281e-06, + "loss": 14.9108, + "step": 365120 + }, + { + "epoch": 0.7375857011841611, + "grad_norm": 63.38954162597656, + "learning_rate": 2.0574037467503444e-06, + "loss": 17.4898, + "step": 365130 + }, + { + "epoch": 0.7376059018168449, + "grad_norm": 324.40863037109375, + "learning_rate": 2.0571215400442634e-06, + "loss": 11.4311, + "step": 365140 + }, + { + "epoch": 0.7376261024495288, + "grad_norm": 185.480224609375, + "learning_rate": 2.056839347681417e-06, + "loss": 17.5421, + "step": 365150 + }, + { + "epoch": 0.7376463030822126, + "grad_norm": 262.0285949707031, + "learning_rate": 2.056557169663179e-06, + "loss": 16.7033, + "step": 365160 + }, + { + "epoch": 0.7376665037148964, + "grad_norm": 415.1700744628906, + "learning_rate": 2.056275005990922e-06, + "loss": 20.0006, + "step": 365170 + }, + { + "epoch": 0.7376867043475802, + "grad_norm": 502.0487060546875, + "learning_rate": 2.0559928566660235e-06, + "loss": 17.7509, + "step": 365180 + }, + { + "epoch": 0.737706904980264, + "grad_norm": 340.950439453125, + "learning_rate": 2.055710721689863e-06, + "loss": 31.2926, + "step": 365190 + }, + { + "epoch": 0.7377271056129477, + "grad_norm": 87.57659149169922, + "learning_rate": 2.0554286010638076e-06, + "loss": 28.3744, + "step": 365200 + }, + { + "epoch": 0.7377473062456316, + "grad_norm": 794.2094116210938, + "learning_rate": 2.055146494789236e-06, + "loss": 24.5646, + "step": 365210 + }, + { + "epoch": 0.7377675068783154, + "grad_norm": 286.29437255859375, + "learning_rate": 2.0548644028675246e-06, + "loss": 18.8776, + "step": 365220 + }, + { + "epoch": 0.7377877075109992, + "grad_norm": 427.6775817871094, + "learning_rate": 2.054582325300047e-06, + "loss": 13.636, + "step": 365230 + }, + { + "epoch": 0.737807908143683, + "grad_norm": 364.1160888671875, + "learning_rate": 2.0543002620881764e-06, + "loss": 12.5557, + "step": 365240 + }, + { + "epoch": 0.7378281087763668, + "grad_norm": 641.0657958984375, + "learning_rate": 2.0540182132332886e-06, + "loss": 19.4537, + "step": 365250 + }, + { + "epoch": 0.7378483094090507, + "grad_norm": 264.384765625, + "learning_rate": 2.0537361787367625e-06, + "loss": 15.7444, + "step": 365260 + }, + { + "epoch": 0.7378685100417345, + "grad_norm": 192.15878295898438, + "learning_rate": 2.0534541585999656e-06, + "loss": 6.6334, + "step": 365270 + }, + { + "epoch": 0.7378887106744183, + "grad_norm": 156.77110290527344, + "learning_rate": 2.053172152824276e-06, + "loss": 15.3243, + "step": 365280 + }, + { + "epoch": 0.7379089113071021, + "grad_norm": 263.1060485839844, + "learning_rate": 2.052890161411069e-06, + "loss": 26.4381, + "step": 365290 + }, + { + "epoch": 0.7379291119397859, + "grad_norm": 848.5113525390625, + "learning_rate": 2.0526081843617183e-06, + "loss": 20.6396, + "step": 365300 + }, + { + "epoch": 0.7379493125724698, + "grad_norm": 642.378173828125, + "learning_rate": 2.0523262216775962e-06, + "loss": 21.4196, + "step": 365310 + }, + { + "epoch": 0.7379695132051536, + "grad_norm": 191.56842041015625, + "learning_rate": 2.0520442733600805e-06, + "loss": 20.8087, + "step": 365320 + }, + { + "epoch": 0.7379897138378374, + "grad_norm": 317.2579650878906, + "learning_rate": 2.0517623394105427e-06, + "loss": 22.4094, + "step": 365330 + }, + { + "epoch": 0.7380099144705212, + "grad_norm": 251.9071807861328, + "learning_rate": 2.0514804198303565e-06, + "loss": 19.7969, + "step": 365340 + }, + { + "epoch": 0.738030115103205, + "grad_norm": 276.1114501953125, + "learning_rate": 2.0511985146208966e-06, + "loss": 14.7182, + "step": 365350 + }, + { + "epoch": 0.7380503157358889, + "grad_norm": 352.9045715332031, + "learning_rate": 2.0509166237835398e-06, + "loss": 43.914, + "step": 365360 + }, + { + "epoch": 0.7380705163685727, + "grad_norm": 1177.640380859375, + "learning_rate": 2.0506347473196582e-06, + "loss": 23.1351, + "step": 365370 + }, + { + "epoch": 0.7380907170012565, + "grad_norm": 346.06512451171875, + "learning_rate": 2.0503528852306225e-06, + "loss": 17.196, + "step": 365380 + }, + { + "epoch": 0.7381109176339403, + "grad_norm": 154.27084350585938, + "learning_rate": 2.0500710375178107e-06, + "loss": 8.2563, + "step": 365390 + }, + { + "epoch": 0.7381311182666241, + "grad_norm": 439.40386962890625, + "learning_rate": 2.049789204182596e-06, + "loss": 16.077, + "step": 365400 + }, + { + "epoch": 0.738151318899308, + "grad_norm": 498.5280456542969, + "learning_rate": 2.0495073852263476e-06, + "loss": 20.0574, + "step": 365410 + }, + { + "epoch": 0.7381715195319918, + "grad_norm": 197.1608428955078, + "learning_rate": 2.0492255806504453e-06, + "loss": 27.3639, + "step": 365420 + }, + { + "epoch": 0.7381917201646756, + "grad_norm": 387.186767578125, + "learning_rate": 2.048943790456257e-06, + "loss": 27.2629, + "step": 365430 + }, + { + "epoch": 0.7382119207973594, + "grad_norm": 679.3992919921875, + "learning_rate": 2.0486620146451613e-06, + "loss": 27.7594, + "step": 365440 + }, + { + "epoch": 0.7382321214300432, + "grad_norm": 132.18710327148438, + "learning_rate": 2.0483802532185286e-06, + "loss": 15.0273, + "step": 365450 + }, + { + "epoch": 0.738252322062727, + "grad_norm": 401.27386474609375, + "learning_rate": 2.048098506177731e-06, + "loss": 12.9366, + "step": 365460 + }, + { + "epoch": 0.7382725226954108, + "grad_norm": 429.3953857421875, + "learning_rate": 2.0478167735241443e-06, + "loss": 21.7013, + "step": 365470 + }, + { + "epoch": 0.7382927233280946, + "grad_norm": 186.17623901367188, + "learning_rate": 2.0475350552591418e-06, + "loss": 24.4907, + "step": 365480 + }, + { + "epoch": 0.7383129239607784, + "grad_norm": 295.16973876953125, + "learning_rate": 2.0472533513840923e-06, + "loss": 14.8181, + "step": 365490 + }, + { + "epoch": 0.7383331245934622, + "grad_norm": 18.06108283996582, + "learning_rate": 2.046971661900373e-06, + "loss": 14.5095, + "step": 365500 + }, + { + "epoch": 0.738353325226146, + "grad_norm": 492.65283203125, + "learning_rate": 2.0466899868093566e-06, + "loss": 18.0839, + "step": 365510 + }, + { + "epoch": 0.7383735258588299, + "grad_norm": 302.4225158691406, + "learning_rate": 2.0464083261124156e-06, + "loss": 24.0759, + "step": 365520 + }, + { + "epoch": 0.7383937264915137, + "grad_norm": 238.61460876464844, + "learning_rate": 2.04612667981092e-06, + "loss": 27.7483, + "step": 365530 + }, + { + "epoch": 0.7384139271241975, + "grad_norm": 437.5618896484375, + "learning_rate": 2.0458450479062465e-06, + "loss": 11.6448, + "step": 365540 + }, + { + "epoch": 0.7384341277568813, + "grad_norm": 511.740234375, + "learning_rate": 2.045563430399766e-06, + "loss": 18.7123, + "step": 365550 + }, + { + "epoch": 0.7384543283895652, + "grad_norm": 397.40032958984375, + "learning_rate": 2.0452818272928493e-06, + "loss": 14.0414, + "step": 365560 + }, + { + "epoch": 0.738474529022249, + "grad_norm": 583.4635009765625, + "learning_rate": 2.0450002385868707e-06, + "loss": 24.7034, + "step": 365570 + }, + { + "epoch": 0.7384947296549328, + "grad_norm": 377.3338928222656, + "learning_rate": 2.0447186642832063e-06, + "loss": 11.6895, + "step": 365580 + }, + { + "epoch": 0.7385149302876166, + "grad_norm": 451.6911926269531, + "learning_rate": 2.0444371043832208e-06, + "loss": 15.0496, + "step": 365590 + }, + { + "epoch": 0.7385351309203004, + "grad_norm": 454.59991455078125, + "learning_rate": 2.04415555888829e-06, + "loss": 23.482, + "step": 365600 + }, + { + "epoch": 0.7385553315529843, + "grad_norm": 329.60504150390625, + "learning_rate": 2.0438740277997888e-06, + "loss": 21.0239, + "step": 365610 + }, + { + "epoch": 0.7385755321856681, + "grad_norm": 72.90430450439453, + "learning_rate": 2.0435925111190867e-06, + "loss": 9.1965, + "step": 365620 + }, + { + "epoch": 0.7385957328183519, + "grad_norm": 332.781494140625, + "learning_rate": 2.043311008847555e-06, + "loss": 26.3267, + "step": 365630 + }, + { + "epoch": 0.7386159334510357, + "grad_norm": 383.88885498046875, + "learning_rate": 2.043029520986568e-06, + "loss": 16.9502, + "step": 365640 + }, + { + "epoch": 0.7386361340837195, + "grad_norm": 175.68109130859375, + "learning_rate": 2.042748047537495e-06, + "loss": 21.7168, + "step": 365650 + }, + { + "epoch": 0.7386563347164034, + "grad_norm": 461.905517578125, + "learning_rate": 2.0424665885017114e-06, + "loss": 19.2489, + "step": 365660 + }, + { + "epoch": 0.7386765353490872, + "grad_norm": 205.0062255859375, + "learning_rate": 2.0421851438805845e-06, + "loss": 14.8898, + "step": 365670 + }, + { + "epoch": 0.738696735981771, + "grad_norm": 146.28482055664062, + "learning_rate": 2.0419037136754903e-06, + "loss": 13.7219, + "step": 365680 + }, + { + "epoch": 0.7387169366144548, + "grad_norm": 561.8257446289062, + "learning_rate": 2.0416222978877993e-06, + "loss": 22.6266, + "step": 365690 + }, + { + "epoch": 0.7387371372471386, + "grad_norm": 231.55657958984375, + "learning_rate": 2.04134089651888e-06, + "loss": 21.4549, + "step": 365700 + }, + { + "epoch": 0.7387573378798225, + "grad_norm": 353.3453369140625, + "learning_rate": 2.0410595095701084e-06, + "loss": 23.9026, + "step": 365710 + }, + { + "epoch": 0.7387775385125062, + "grad_norm": 359.21295166015625, + "learning_rate": 2.040778137042852e-06, + "loss": 21.8223, + "step": 365720 + }, + { + "epoch": 0.73879773914519, + "grad_norm": 386.9476318359375, + "learning_rate": 2.040496778938486e-06, + "loss": 16.4884, + "step": 365730 + }, + { + "epoch": 0.7388179397778738, + "grad_norm": 222.1886444091797, + "learning_rate": 2.04021543525838e-06, + "loss": 26.9491, + "step": 365740 + }, + { + "epoch": 0.7388381404105576, + "grad_norm": 301.1086120605469, + "learning_rate": 2.0399341060039023e-06, + "loss": 25.0354, + "step": 365750 + }, + { + "epoch": 0.7388583410432414, + "grad_norm": 253.30914306640625, + "learning_rate": 2.039652791176429e-06, + "loss": 9.767, + "step": 365760 + }, + { + "epoch": 0.7388785416759253, + "grad_norm": 424.1557922363281, + "learning_rate": 2.0393714907773294e-06, + "loss": 20.6632, + "step": 365770 + }, + { + "epoch": 0.7388987423086091, + "grad_norm": 509.7571105957031, + "learning_rate": 2.0390902048079717e-06, + "loss": 19.995, + "step": 365780 + }, + { + "epoch": 0.7389189429412929, + "grad_norm": 571.0275268554688, + "learning_rate": 2.038808933269729e-06, + "loss": 16.7485, + "step": 365790 + }, + { + "epoch": 0.7389391435739767, + "grad_norm": 301.386962890625, + "learning_rate": 2.0385276761639768e-06, + "loss": 6.9186, + "step": 365800 + }, + { + "epoch": 0.7389593442066605, + "grad_norm": 545.816650390625, + "learning_rate": 2.0382464334920774e-06, + "loss": 16.2437, + "step": 365810 + }, + { + "epoch": 0.7389795448393444, + "grad_norm": 138.1280975341797, + "learning_rate": 2.037965205255406e-06, + "loss": 10.7938, + "step": 365820 + }, + { + "epoch": 0.7389997454720282, + "grad_norm": 185.7974395751953, + "learning_rate": 2.037683991455334e-06, + "loss": 28.174, + "step": 365830 + }, + { + "epoch": 0.739019946104712, + "grad_norm": 184.17898559570312, + "learning_rate": 2.037402792093232e-06, + "loss": 13.9978, + "step": 365840 + }, + { + "epoch": 0.7390401467373958, + "grad_norm": 367.27703857421875, + "learning_rate": 2.0371216071704667e-06, + "loss": 18.0003, + "step": 365850 + }, + { + "epoch": 0.7390603473700796, + "grad_norm": 599.73974609375, + "learning_rate": 2.036840436688412e-06, + "loss": 21.7555, + "step": 365860 + }, + { + "epoch": 0.7390805480027635, + "grad_norm": 321.1237487792969, + "learning_rate": 2.036559280648441e-06, + "loss": 24.0683, + "step": 365870 + }, + { + "epoch": 0.7391007486354473, + "grad_norm": 313.18206787109375, + "learning_rate": 2.036278139051917e-06, + "loss": 19.0138, + "step": 365880 + }, + { + "epoch": 0.7391209492681311, + "grad_norm": 298.0772399902344, + "learning_rate": 2.0359970119002143e-06, + "loss": 26.005, + "step": 365890 + }, + { + "epoch": 0.7391411499008149, + "grad_norm": 270.0126647949219, + "learning_rate": 2.035715899194704e-06, + "loss": 17.485, + "step": 365900 + }, + { + "epoch": 0.7391613505334987, + "grad_norm": 308.9616394042969, + "learning_rate": 2.0354348009367554e-06, + "loss": 13.9394, + "step": 365910 + }, + { + "epoch": 0.7391815511661826, + "grad_norm": 398.1065368652344, + "learning_rate": 2.0351537171277353e-06, + "loss": 10.6025, + "step": 365920 + }, + { + "epoch": 0.7392017517988664, + "grad_norm": 622.6244506835938, + "learning_rate": 2.034872647769019e-06, + "loss": 27.7901, + "step": 365930 + }, + { + "epoch": 0.7392219524315502, + "grad_norm": 437.3424072265625, + "learning_rate": 2.0345915928619737e-06, + "loss": 19.379, + "step": 365940 + }, + { + "epoch": 0.739242153064234, + "grad_norm": 9.678370475769043, + "learning_rate": 2.034310552407967e-06, + "loss": 13.0048, + "step": 365950 + }, + { + "epoch": 0.7392623536969178, + "grad_norm": 586.5205078125, + "learning_rate": 2.0340295264083716e-06, + "loss": 17.0872, + "step": 365960 + }, + { + "epoch": 0.7392825543296015, + "grad_norm": 1070.67431640625, + "learning_rate": 2.033748514864558e-06, + "loss": 28.4841, + "step": 365970 + }, + { + "epoch": 0.7393027549622854, + "grad_norm": 292.5230712890625, + "learning_rate": 2.0334675177778938e-06, + "loss": 27.6959, + "step": 365980 + }, + { + "epoch": 0.7393229555949692, + "grad_norm": 60.216163635253906, + "learning_rate": 2.033186535149748e-06, + "loss": 25.453, + "step": 365990 + }, + { + "epoch": 0.739343156227653, + "grad_norm": 558.8899536132812, + "learning_rate": 2.0329055669814936e-06, + "loss": 17.6868, + "step": 366000 + }, + { + "epoch": 0.7393633568603368, + "grad_norm": 97.86579895019531, + "learning_rate": 2.0326246132744963e-06, + "loss": 17.3967, + "step": 366010 + }, + { + "epoch": 0.7393835574930206, + "grad_norm": 756.5426025390625, + "learning_rate": 2.0323436740301262e-06, + "loss": 36.4764, + "step": 366020 + }, + { + "epoch": 0.7394037581257045, + "grad_norm": 248.0166473388672, + "learning_rate": 2.0320627492497543e-06, + "loss": 8.8571, + "step": 366030 + }, + { + "epoch": 0.7394239587583883, + "grad_norm": 334.6541442871094, + "learning_rate": 2.0317818389347468e-06, + "loss": 21.99, + "step": 366040 + }, + { + "epoch": 0.7394441593910721, + "grad_norm": 0.0, + "learning_rate": 2.0315009430864762e-06, + "loss": 23.731, + "step": 366050 + }, + { + "epoch": 0.7394643600237559, + "grad_norm": 157.05633544921875, + "learning_rate": 2.03122006170631e-06, + "loss": 15.738, + "step": 366060 + }, + { + "epoch": 0.7394845606564397, + "grad_norm": 419.2681884765625, + "learning_rate": 2.030939194795616e-06, + "loss": 20.4409, + "step": 366070 + }, + { + "epoch": 0.7395047612891236, + "grad_norm": 427.7028503417969, + "learning_rate": 2.030658342355765e-06, + "loss": 14.2418, + "step": 366080 + }, + { + "epoch": 0.7395249619218074, + "grad_norm": 109.60071563720703, + "learning_rate": 2.030377504388126e-06, + "loss": 21.0627, + "step": 366090 + }, + { + "epoch": 0.7395451625544912, + "grad_norm": 201.05209350585938, + "learning_rate": 2.030096680894065e-06, + "loss": 14.6214, + "step": 366100 + }, + { + "epoch": 0.739565363187175, + "grad_norm": 97.4200210571289, + "learning_rate": 2.029815871874952e-06, + "loss": 11.6279, + "step": 366110 + }, + { + "epoch": 0.7395855638198588, + "grad_norm": 242.57472229003906, + "learning_rate": 2.0295350773321583e-06, + "loss": 16.6682, + "step": 366120 + }, + { + "epoch": 0.7396057644525427, + "grad_norm": 204.07839965820312, + "learning_rate": 2.0292542972670503e-06, + "loss": 31.5801, + "step": 366130 + }, + { + "epoch": 0.7396259650852265, + "grad_norm": 18.23077392578125, + "learning_rate": 2.0289735316809948e-06, + "loss": 30.5035, + "step": 366140 + }, + { + "epoch": 0.7396461657179103, + "grad_norm": 265.7449645996094, + "learning_rate": 2.028692780575364e-06, + "loss": 23.7089, + "step": 366150 + }, + { + "epoch": 0.7396663663505941, + "grad_norm": 210.06321716308594, + "learning_rate": 2.028412043951524e-06, + "loss": 16.4642, + "step": 366160 + }, + { + "epoch": 0.739686566983278, + "grad_norm": 519.0916137695312, + "learning_rate": 2.028131321810841e-06, + "loss": 23.075, + "step": 366170 + }, + { + "epoch": 0.7397067676159618, + "grad_norm": 759.4010620117188, + "learning_rate": 2.027850614154686e-06, + "loss": 24.0642, + "step": 366180 + }, + { + "epoch": 0.7397269682486456, + "grad_norm": 335.1924133300781, + "learning_rate": 2.0275699209844306e-06, + "loss": 19.3043, + "step": 366190 + }, + { + "epoch": 0.7397471688813294, + "grad_norm": 234.74412536621094, + "learning_rate": 2.027289242301435e-06, + "loss": 15.7694, + "step": 366200 + }, + { + "epoch": 0.7397673695140132, + "grad_norm": 66.03064727783203, + "learning_rate": 2.0270085781070707e-06, + "loss": 15.6784, + "step": 366210 + }, + { + "epoch": 0.739787570146697, + "grad_norm": 200.4514617919922, + "learning_rate": 2.026727928402708e-06, + "loss": 11.6553, + "step": 366220 + }, + { + "epoch": 0.7398077707793808, + "grad_norm": 1297.374755859375, + "learning_rate": 2.0264472931897135e-06, + "loss": 23.5428, + "step": 366230 + }, + { + "epoch": 0.7398279714120646, + "grad_norm": 452.533935546875, + "learning_rate": 2.026166672469451e-06, + "loss": 19.6167, + "step": 366240 + }, + { + "epoch": 0.7398481720447484, + "grad_norm": 71.55281066894531, + "learning_rate": 2.0258860662432946e-06, + "loss": 21.2165, + "step": 366250 + }, + { + "epoch": 0.7398683726774322, + "grad_norm": 15.032710075378418, + "learning_rate": 2.025605474512608e-06, + "loss": 13.4593, + "step": 366260 + }, + { + "epoch": 0.739888573310116, + "grad_norm": 171.6467742919922, + "learning_rate": 2.025324897278758e-06, + "loss": 18.3105, + "step": 366270 + }, + { + "epoch": 0.7399087739427999, + "grad_norm": 635.5283203125, + "learning_rate": 2.0250443345431135e-06, + "loss": 35.5155, + "step": 366280 + }, + { + "epoch": 0.7399289745754837, + "grad_norm": 21.338153839111328, + "learning_rate": 2.0247637863070446e-06, + "loss": 17.7918, + "step": 366290 + }, + { + "epoch": 0.7399491752081675, + "grad_norm": 310.5945129394531, + "learning_rate": 2.0244832525719155e-06, + "loss": 11.9054, + "step": 366300 + }, + { + "epoch": 0.7399693758408513, + "grad_norm": 327.890625, + "learning_rate": 2.0242027333390924e-06, + "loss": 40.5949, + "step": 366310 + }, + { + "epoch": 0.7399895764735351, + "grad_norm": 215.30043029785156, + "learning_rate": 2.0239222286099463e-06, + "loss": 16.9784, + "step": 366320 + }, + { + "epoch": 0.740009777106219, + "grad_norm": 324.63348388671875, + "learning_rate": 2.0236417383858404e-06, + "loss": 25.1119, + "step": 366330 + }, + { + "epoch": 0.7400299777389028, + "grad_norm": 218.94415283203125, + "learning_rate": 2.0233612626681454e-06, + "loss": 20.3333, + "step": 366340 + }, + { + "epoch": 0.7400501783715866, + "grad_norm": 830.9730224609375, + "learning_rate": 2.0230808014582265e-06, + "loss": 19.89, + "step": 366350 + }, + { + "epoch": 0.7400703790042704, + "grad_norm": 100.47584533691406, + "learning_rate": 2.0228003547574488e-06, + "loss": 26.5495, + "step": 366360 + }, + { + "epoch": 0.7400905796369542, + "grad_norm": 0.0, + "learning_rate": 2.022519922567183e-06, + "loss": 8.8323, + "step": 366370 + }, + { + "epoch": 0.7401107802696381, + "grad_norm": 463.47674560546875, + "learning_rate": 2.0222395048887942e-06, + "loss": 13.7695, + "step": 366380 + }, + { + "epoch": 0.7401309809023219, + "grad_norm": 197.24440002441406, + "learning_rate": 2.0219591017236474e-06, + "loss": 22.9665, + "step": 366390 + }, + { + "epoch": 0.7401511815350057, + "grad_norm": 345.9573974609375, + "learning_rate": 2.02167871307311e-06, + "loss": 18.8581, + "step": 366400 + }, + { + "epoch": 0.7401713821676895, + "grad_norm": 442.7614440917969, + "learning_rate": 2.021398338938552e-06, + "loss": 23.2895, + "step": 366410 + }, + { + "epoch": 0.7401915828003733, + "grad_norm": 760.4876708984375, + "learning_rate": 2.021117979321336e-06, + "loss": 19.8918, + "step": 366420 + }, + { + "epoch": 0.7402117834330572, + "grad_norm": 165.46810913085938, + "learning_rate": 2.0208376342228287e-06, + "loss": 20.7642, + "step": 366430 + }, + { + "epoch": 0.740231984065741, + "grad_norm": 604.820556640625, + "learning_rate": 2.0205573036443994e-06, + "loss": 19.4534, + "step": 366440 + }, + { + "epoch": 0.7402521846984248, + "grad_norm": 214.176025390625, + "learning_rate": 2.020276987587412e-06, + "loss": 15.5619, + "step": 366450 + }, + { + "epoch": 0.7402723853311086, + "grad_norm": 376.1199951171875, + "learning_rate": 2.019996686053231e-06, + "loss": 22.0531, + "step": 366460 + }, + { + "epoch": 0.7402925859637924, + "grad_norm": 159.35194396972656, + "learning_rate": 2.0197163990432256e-06, + "loss": 21.4652, + "step": 366470 + }, + { + "epoch": 0.7403127865964761, + "grad_norm": 170.47312927246094, + "learning_rate": 2.0194361265587644e-06, + "loss": 7.2859, + "step": 366480 + }, + { + "epoch": 0.74033298722916, + "grad_norm": 204.39385986328125, + "learning_rate": 2.019155868601206e-06, + "loss": 8.9417, + "step": 366490 + }, + { + "epoch": 0.7403531878618438, + "grad_norm": 403.3724365234375, + "learning_rate": 2.0188756251719204e-06, + "loss": 19.4494, + "step": 366500 + }, + { + "epoch": 0.7403733884945276, + "grad_norm": 56.86149215698242, + "learning_rate": 2.018595396272275e-06, + "loss": 18.7177, + "step": 366510 + }, + { + "epoch": 0.7403935891272114, + "grad_norm": 418.9256591796875, + "learning_rate": 2.018315181903635e-06, + "loss": 19.4897, + "step": 366520 + }, + { + "epoch": 0.7404137897598952, + "grad_norm": 246.70790100097656, + "learning_rate": 2.018034982067363e-06, + "loss": 25.4961, + "step": 366530 + }, + { + "epoch": 0.7404339903925791, + "grad_norm": 89.4341049194336, + "learning_rate": 2.0177547967648283e-06, + "loss": 19.213, + "step": 366540 + }, + { + "epoch": 0.7404541910252629, + "grad_norm": 323.2135009765625, + "learning_rate": 2.0174746259973956e-06, + "loss": 9.7069, + "step": 366550 + }, + { + "epoch": 0.7404743916579467, + "grad_norm": 690.9506225585938, + "learning_rate": 2.0171944697664277e-06, + "loss": 16.364, + "step": 366560 + }, + { + "epoch": 0.7404945922906305, + "grad_norm": 481.0787658691406, + "learning_rate": 2.0169143280732916e-06, + "loss": 14.3124, + "step": 366570 + }, + { + "epoch": 0.7405147929233143, + "grad_norm": 379.80987548828125, + "learning_rate": 2.016634200919356e-06, + "loss": 14.8808, + "step": 366580 + }, + { + "epoch": 0.7405349935559982, + "grad_norm": 745.8098754882812, + "learning_rate": 2.016354088305983e-06, + "loss": 15.8693, + "step": 366590 + }, + { + "epoch": 0.740555194188682, + "grad_norm": 520.9889526367188, + "learning_rate": 2.016073990234536e-06, + "loss": 32.575, + "step": 366600 + }, + { + "epoch": 0.7405753948213658, + "grad_norm": 59.87366485595703, + "learning_rate": 2.0157939067063848e-06, + "loss": 21.0093, + "step": 366610 + }, + { + "epoch": 0.7405955954540496, + "grad_norm": 469.46917724609375, + "learning_rate": 2.0155138377228924e-06, + "loss": 23.9371, + "step": 366620 + }, + { + "epoch": 0.7406157960867334, + "grad_norm": 12.194167137145996, + "learning_rate": 2.0152337832854213e-06, + "loss": 13.9133, + "step": 366630 + }, + { + "epoch": 0.7406359967194173, + "grad_norm": 0.0, + "learning_rate": 2.014953743395341e-06, + "loss": 10.9898, + "step": 366640 + }, + { + "epoch": 0.7406561973521011, + "grad_norm": 320.4013366699219, + "learning_rate": 2.014673718054012e-06, + "loss": 42.721, + "step": 366650 + }, + { + "epoch": 0.7406763979847849, + "grad_norm": 73.00108337402344, + "learning_rate": 2.0143937072628033e-06, + "loss": 13.0563, + "step": 366660 + }, + { + "epoch": 0.7406965986174687, + "grad_norm": 440.5721740722656, + "learning_rate": 2.014113711023077e-06, + "loss": 17.716, + "step": 366670 + }, + { + "epoch": 0.7407167992501525, + "grad_norm": 623.513427734375, + "learning_rate": 2.013833729336197e-06, + "loss": 30.0989, + "step": 366680 + }, + { + "epoch": 0.7407369998828364, + "grad_norm": 161.8341522216797, + "learning_rate": 2.0135537622035313e-06, + "loss": 13.7036, + "step": 366690 + }, + { + "epoch": 0.7407572005155202, + "grad_norm": 262.3466796875, + "learning_rate": 2.0132738096264415e-06, + "loss": 12.5189, + "step": 366700 + }, + { + "epoch": 0.740777401148204, + "grad_norm": 335.18231201171875, + "learning_rate": 2.0129938716062917e-06, + "loss": 21.4074, + "step": 366710 + }, + { + "epoch": 0.7407976017808878, + "grad_norm": 95.01959991455078, + "learning_rate": 2.0127139481444475e-06, + "loss": 20.8021, + "step": 366720 + }, + { + "epoch": 0.7408178024135716, + "grad_norm": 374.58447265625, + "learning_rate": 2.0124340392422746e-06, + "loss": 20.3548, + "step": 366730 + }, + { + "epoch": 0.7408380030462554, + "grad_norm": 525.8010864257812, + "learning_rate": 2.012154144901136e-06, + "loss": 32.7077, + "step": 366740 + }, + { + "epoch": 0.7408582036789392, + "grad_norm": 623.1885986328125, + "learning_rate": 2.0118742651223944e-06, + "loss": 19.2108, + "step": 366750 + }, + { + "epoch": 0.740878404311623, + "grad_norm": 284.1596984863281, + "learning_rate": 2.0115943999074167e-06, + "loss": 14.0548, + "step": 366760 + }, + { + "epoch": 0.7408986049443068, + "grad_norm": 253.09112548828125, + "learning_rate": 2.011314549257565e-06, + "loss": 19.0957, + "step": 366770 + }, + { + "epoch": 0.7409188055769906, + "grad_norm": 166.9971466064453, + "learning_rate": 2.0110347131742024e-06, + "loss": 6.3661, + "step": 366780 + }, + { + "epoch": 0.7409390062096745, + "grad_norm": 408.9407958984375, + "learning_rate": 2.0107548916586946e-06, + "loss": 16.7069, + "step": 366790 + }, + { + "epoch": 0.7409592068423583, + "grad_norm": 455.688720703125, + "learning_rate": 2.0104750847124075e-06, + "loss": 15.5309, + "step": 366800 + }, + { + "epoch": 0.7409794074750421, + "grad_norm": 351.8607482910156, + "learning_rate": 2.010195292336699e-06, + "loss": 23.2376, + "step": 366810 + }, + { + "epoch": 0.7409996081077259, + "grad_norm": 279.016357421875, + "learning_rate": 2.0099155145329364e-06, + "loss": 19.2799, + "step": 366820 + }, + { + "epoch": 0.7410198087404097, + "grad_norm": 470.3839416503906, + "learning_rate": 2.009635751302484e-06, + "loss": 15.1497, + "step": 366830 + }, + { + "epoch": 0.7410400093730936, + "grad_norm": 163.67852783203125, + "learning_rate": 2.0093560026467046e-06, + "loss": 11.5575, + "step": 366840 + }, + { + "epoch": 0.7410602100057774, + "grad_norm": 179.66949462890625, + "learning_rate": 2.0090762685669597e-06, + "loss": 17.2575, + "step": 366850 + }, + { + "epoch": 0.7410804106384612, + "grad_norm": 490.8962097167969, + "learning_rate": 2.0087965490646144e-06, + "loss": 21.9387, + "step": 366860 + }, + { + "epoch": 0.741100611271145, + "grad_norm": 519.0337524414062, + "learning_rate": 2.0085168441410352e-06, + "loss": 13.6265, + "step": 366870 + }, + { + "epoch": 0.7411208119038288, + "grad_norm": 381.9383850097656, + "learning_rate": 2.0082371537975786e-06, + "loss": 17.2182, + "step": 366880 + }, + { + "epoch": 0.7411410125365127, + "grad_norm": 292.76849365234375, + "learning_rate": 2.0079574780356116e-06, + "loss": 22.6855, + "step": 366890 + }, + { + "epoch": 0.7411612131691965, + "grad_norm": 453.6347961425781, + "learning_rate": 2.007677816856498e-06, + "loss": 13.1982, + "step": 366900 + }, + { + "epoch": 0.7411814138018803, + "grad_norm": 824.3630981445312, + "learning_rate": 2.0073981702616e-06, + "loss": 27.8966, + "step": 366910 + }, + { + "epoch": 0.7412016144345641, + "grad_norm": 304.8612976074219, + "learning_rate": 2.007118538252279e-06, + "loss": 23.3396, + "step": 366920 + }, + { + "epoch": 0.7412218150672479, + "grad_norm": 638.951416015625, + "learning_rate": 2.006838920829901e-06, + "loss": 20.9227, + "step": 366930 + }, + { + "epoch": 0.7412420156999318, + "grad_norm": 14.768926620483398, + "learning_rate": 2.0065593179958267e-06, + "loss": 31.2564, + "step": 366940 + }, + { + "epoch": 0.7412622163326156, + "grad_norm": 358.9869384765625, + "learning_rate": 2.0062797297514176e-06, + "loss": 11.3001, + "step": 366950 + }, + { + "epoch": 0.7412824169652994, + "grad_norm": 147.42005920410156, + "learning_rate": 2.0060001560980395e-06, + "loss": 23.4147, + "step": 366960 + }, + { + "epoch": 0.7413026175979832, + "grad_norm": 247.12164306640625, + "learning_rate": 2.0057205970370524e-06, + "loss": 26.8701, + "step": 366970 + }, + { + "epoch": 0.741322818230667, + "grad_norm": 462.36322021484375, + "learning_rate": 2.0054410525698217e-06, + "loss": 21.2175, + "step": 366980 + }, + { + "epoch": 0.7413430188633509, + "grad_norm": 263.4853210449219, + "learning_rate": 2.0051615226977072e-06, + "loss": 8.8489, + "step": 366990 + }, + { + "epoch": 0.7413632194960346, + "grad_norm": 752.6406860351562, + "learning_rate": 2.0048820074220716e-06, + "loss": 28.5969, + "step": 367000 + }, + { + "epoch": 0.7413834201287184, + "grad_norm": 470.063232421875, + "learning_rate": 2.0046025067442788e-06, + "loss": 22.4267, + "step": 367010 + }, + { + "epoch": 0.7414036207614022, + "grad_norm": 632.08837890625, + "learning_rate": 2.0043230206656884e-06, + "loss": 26.7387, + "step": 367020 + }, + { + "epoch": 0.741423821394086, + "grad_norm": 281.9903259277344, + "learning_rate": 2.0040435491876666e-06, + "loss": 14.0005, + "step": 367030 + }, + { + "epoch": 0.7414440220267698, + "grad_norm": 371.42437744140625, + "learning_rate": 2.0037640923115704e-06, + "loss": 39.7928, + "step": 367040 + }, + { + "epoch": 0.7414642226594537, + "grad_norm": 402.6060485839844, + "learning_rate": 2.0034846500387674e-06, + "loss": 12.355, + "step": 367050 + }, + { + "epoch": 0.7414844232921375, + "grad_norm": 42.73507308959961, + "learning_rate": 2.003205222370616e-06, + "loss": 25.2388, + "step": 367060 + }, + { + "epoch": 0.7415046239248213, + "grad_norm": 271.7084045410156, + "learning_rate": 2.0029258093084774e-06, + "loss": 19.5723, + "step": 367070 + }, + { + "epoch": 0.7415248245575051, + "grad_norm": 407.57977294921875, + "learning_rate": 2.0026464108537153e-06, + "loss": 19.6693, + "step": 367080 + }, + { + "epoch": 0.7415450251901889, + "grad_norm": 506.03558349609375, + "learning_rate": 2.002367027007694e-06, + "loss": 8.3654, + "step": 367090 + }, + { + "epoch": 0.7415652258228728, + "grad_norm": 222.3409881591797, + "learning_rate": 2.002087657771769e-06, + "loss": 14.7323, + "step": 367100 + }, + { + "epoch": 0.7415854264555566, + "grad_norm": 387.82733154296875, + "learning_rate": 2.001808303147305e-06, + "loss": 28.677, + "step": 367110 + }, + { + "epoch": 0.7416056270882404, + "grad_norm": 324.7660217285156, + "learning_rate": 2.0015289631356654e-06, + "loss": 11.4134, + "step": 367120 + }, + { + "epoch": 0.7416258277209242, + "grad_norm": 710.8705444335938, + "learning_rate": 2.00124963773821e-06, + "loss": 17.5392, + "step": 367130 + }, + { + "epoch": 0.741646028353608, + "grad_norm": 384.54461669921875, + "learning_rate": 2.000970326956299e-06, + "loss": 15.3054, + "step": 367140 + }, + { + "epoch": 0.7416662289862919, + "grad_norm": 347.185791015625, + "learning_rate": 2.0006910307912965e-06, + "loss": 32.0539, + "step": 367150 + }, + { + "epoch": 0.7416864296189757, + "grad_norm": 236.717529296875, + "learning_rate": 2.0004117492445614e-06, + "loss": 13.4359, + "step": 367160 + }, + { + "epoch": 0.7417066302516595, + "grad_norm": 206.33102416992188, + "learning_rate": 2.0001324823174544e-06, + "loss": 26.4592, + "step": 367170 + }, + { + "epoch": 0.7417268308843433, + "grad_norm": 247.7039031982422, + "learning_rate": 1.9998532300113376e-06, + "loss": 17.3294, + "step": 367180 + }, + { + "epoch": 0.7417470315170271, + "grad_norm": 389.9203186035156, + "learning_rate": 1.9995739923275743e-06, + "loss": 22.1714, + "step": 367190 + }, + { + "epoch": 0.741767232149711, + "grad_norm": 388.1050109863281, + "learning_rate": 1.999294769267523e-06, + "loss": 12.2662, + "step": 367200 + }, + { + "epoch": 0.7417874327823948, + "grad_norm": 601.4808959960938, + "learning_rate": 1.999015560832544e-06, + "loss": 22.3924, + "step": 367210 + }, + { + "epoch": 0.7418076334150786, + "grad_norm": 716.5521850585938, + "learning_rate": 1.9987363670240006e-06, + "loss": 18.4349, + "step": 367220 + }, + { + "epoch": 0.7418278340477624, + "grad_norm": 840.4054565429688, + "learning_rate": 1.998457187843252e-06, + "loss": 19.8814, + "step": 367230 + }, + { + "epoch": 0.7418480346804462, + "grad_norm": 443.71624755859375, + "learning_rate": 1.998178023291657e-06, + "loss": 14.4573, + "step": 367240 + }, + { + "epoch": 0.74186823531313, + "grad_norm": 293.313232421875, + "learning_rate": 1.9978988733705807e-06, + "loss": 16.8452, + "step": 367250 + }, + { + "epoch": 0.7418884359458138, + "grad_norm": 1023.288330078125, + "learning_rate": 1.997619738081379e-06, + "loss": 21.4529, + "step": 367260 + }, + { + "epoch": 0.7419086365784976, + "grad_norm": 309.85845947265625, + "learning_rate": 1.997340617425416e-06, + "loss": 10.65, + "step": 367270 + }, + { + "epoch": 0.7419288372111814, + "grad_norm": 870.9706420898438, + "learning_rate": 1.9970615114040514e-06, + "loss": 16.24, + "step": 367280 + }, + { + "epoch": 0.7419490378438652, + "grad_norm": 375.987060546875, + "learning_rate": 1.9967824200186426e-06, + "loss": 18.7024, + "step": 367290 + }, + { + "epoch": 0.741969238476549, + "grad_norm": 365.5154724121094, + "learning_rate": 1.996503343270554e-06, + "loss": 11.1557, + "step": 367300 + }, + { + "epoch": 0.7419894391092329, + "grad_norm": 272.2825012207031, + "learning_rate": 1.9962242811611437e-06, + "loss": 13.8497, + "step": 367310 + }, + { + "epoch": 0.7420096397419167, + "grad_norm": 450.99810791015625, + "learning_rate": 1.99594523369177e-06, + "loss": 17.0733, + "step": 367320 + }, + { + "epoch": 0.7420298403746005, + "grad_norm": 144.34202575683594, + "learning_rate": 1.995666200863795e-06, + "loss": 21.3699, + "step": 367330 + }, + { + "epoch": 0.7420500410072843, + "grad_norm": 823.3612670898438, + "learning_rate": 1.9953871826785804e-06, + "loss": 18.1909, + "step": 367340 + }, + { + "epoch": 0.7420702416399682, + "grad_norm": 48.882633209228516, + "learning_rate": 1.9951081791374843e-06, + "loss": 18.089, + "step": 367350 + }, + { + "epoch": 0.742090442272652, + "grad_norm": 25.4761905670166, + "learning_rate": 1.994829190241865e-06, + "loss": 10.5992, + "step": 367360 + }, + { + "epoch": 0.7421106429053358, + "grad_norm": 531.5303344726562, + "learning_rate": 1.9945502159930846e-06, + "loss": 11.2596, + "step": 367370 + }, + { + "epoch": 0.7421308435380196, + "grad_norm": 352.27239990234375, + "learning_rate": 1.994271256392503e-06, + "loss": 11.6738, + "step": 367380 + }, + { + "epoch": 0.7421510441707034, + "grad_norm": 214.4779052734375, + "learning_rate": 1.993992311441476e-06, + "loss": 16.8358, + "step": 367390 + }, + { + "epoch": 0.7421712448033873, + "grad_norm": 144.89553833007812, + "learning_rate": 1.9937133811413666e-06, + "loss": 18.4599, + "step": 367400 + }, + { + "epoch": 0.7421914454360711, + "grad_norm": 260.8878479003906, + "learning_rate": 1.9934344654935367e-06, + "loss": 11.9296, + "step": 367410 + }, + { + "epoch": 0.7422116460687549, + "grad_norm": 366.73394775390625, + "learning_rate": 1.9931555644993395e-06, + "loss": 20.6212, + "step": 367420 + }, + { + "epoch": 0.7422318467014387, + "grad_norm": 654.520751953125, + "learning_rate": 1.9928766781601366e-06, + "loss": 21.1741, + "step": 367430 + }, + { + "epoch": 0.7422520473341225, + "grad_norm": 323.9236755371094, + "learning_rate": 1.9925978064772904e-06, + "loss": 31.874, + "step": 367440 + }, + { + "epoch": 0.7422722479668064, + "grad_norm": 132.22869873046875, + "learning_rate": 1.9923189494521576e-06, + "loss": 11.6857, + "step": 367450 + }, + { + "epoch": 0.7422924485994902, + "grad_norm": 245.091552734375, + "learning_rate": 1.9920401070860955e-06, + "loss": 17.0472, + "step": 367460 + }, + { + "epoch": 0.742312649232174, + "grad_norm": 216.7820281982422, + "learning_rate": 1.991761279380466e-06, + "loss": 9.716, + "step": 367470 + }, + { + "epoch": 0.7423328498648578, + "grad_norm": 432.0198669433594, + "learning_rate": 1.9914824663366296e-06, + "loss": 22.8366, + "step": 367480 + }, + { + "epoch": 0.7423530504975416, + "grad_norm": 383.17138671875, + "learning_rate": 1.9912036679559397e-06, + "loss": 20.7813, + "step": 367490 + }, + { + "epoch": 0.7423732511302255, + "grad_norm": 346.9214172363281, + "learning_rate": 1.990924884239758e-06, + "loss": 22.4922, + "step": 367500 + }, + { + "epoch": 0.7423934517629092, + "grad_norm": 435.1873474121094, + "learning_rate": 1.990646115189446e-06, + "loss": 20.0185, + "step": 367510 + }, + { + "epoch": 0.742413652395593, + "grad_norm": 106.90283203125, + "learning_rate": 1.990367360806359e-06, + "loss": 12.1977, + "step": 367520 + }, + { + "epoch": 0.7424338530282768, + "grad_norm": 413.74432373046875, + "learning_rate": 1.9900886210918547e-06, + "loss": 19.9012, + "step": 367530 + }, + { + "epoch": 0.7424540536609606, + "grad_norm": 185.722412109375, + "learning_rate": 1.989809896047295e-06, + "loss": 26.3624, + "step": 367540 + }, + { + "epoch": 0.7424742542936444, + "grad_norm": 444.6708068847656, + "learning_rate": 1.989531185674037e-06, + "loss": 26.142, + "step": 367550 + }, + { + "epoch": 0.7424944549263283, + "grad_norm": 334.2047424316406, + "learning_rate": 1.989252489973438e-06, + "loss": 9.756, + "step": 367560 + }, + { + "epoch": 0.7425146555590121, + "grad_norm": 1283.1856689453125, + "learning_rate": 1.988973808946858e-06, + "loss": 12.4231, + "step": 367570 + }, + { + "epoch": 0.7425348561916959, + "grad_norm": 350.5517883300781, + "learning_rate": 1.988695142595653e-06, + "loss": 7.4296, + "step": 367580 + }, + { + "epoch": 0.7425550568243797, + "grad_norm": 188.34580993652344, + "learning_rate": 1.988416490921184e-06, + "loss": 11.0043, + "step": 367590 + }, + { + "epoch": 0.7425752574570635, + "grad_norm": 588.7089233398438, + "learning_rate": 1.988137853924808e-06, + "loss": 31.801, + "step": 367600 + }, + { + "epoch": 0.7425954580897474, + "grad_norm": 527.1619873046875, + "learning_rate": 1.9878592316078813e-06, + "loss": 22.5599, + "step": 367610 + }, + { + "epoch": 0.7426156587224312, + "grad_norm": 412.13873291015625, + "learning_rate": 1.987580623971765e-06, + "loss": 34.7493, + "step": 367620 + }, + { + "epoch": 0.742635859355115, + "grad_norm": 441.67449951171875, + "learning_rate": 1.987302031017814e-06, + "loss": 21.1823, + "step": 367630 + }, + { + "epoch": 0.7426560599877988, + "grad_norm": 319.75970458984375, + "learning_rate": 1.9870234527473886e-06, + "loss": 29.5131, + "step": 367640 + }, + { + "epoch": 0.7426762606204826, + "grad_norm": 506.9483337402344, + "learning_rate": 1.986744889161844e-06, + "loss": 16.4514, + "step": 367650 + }, + { + "epoch": 0.7426964612531665, + "grad_norm": 471.54632568359375, + "learning_rate": 1.986466340262541e-06, + "loss": 20.7602, + "step": 367660 + }, + { + "epoch": 0.7427166618858503, + "grad_norm": 56.814937591552734, + "learning_rate": 1.9861878060508357e-06, + "loss": 30.1332, + "step": 367670 + }, + { + "epoch": 0.7427368625185341, + "grad_norm": 0.0, + "learning_rate": 1.985909286528084e-06, + "loss": 16.3546, + "step": 367680 + }, + { + "epoch": 0.7427570631512179, + "grad_norm": 160.3397216796875, + "learning_rate": 1.985630781695646e-06, + "loss": 16.0622, + "step": 367690 + }, + { + "epoch": 0.7427772637839017, + "grad_norm": 320.552490234375, + "learning_rate": 1.9853522915548777e-06, + "loss": 28.14, + "step": 367700 + }, + { + "epoch": 0.7427974644165856, + "grad_norm": 378.26947021484375, + "learning_rate": 1.985073816107136e-06, + "loss": 27.7358, + "step": 367710 + }, + { + "epoch": 0.7428176650492694, + "grad_norm": 176.5840606689453, + "learning_rate": 1.984795355353778e-06, + "loss": 24.6097, + "step": 367720 + }, + { + "epoch": 0.7428378656819532, + "grad_norm": 1436.84619140625, + "learning_rate": 1.9845169092961643e-06, + "loss": 22.231, + "step": 367730 + }, + { + "epoch": 0.742858066314637, + "grad_norm": 200.1486358642578, + "learning_rate": 1.984238477935649e-06, + "loss": 17.1255, + "step": 367740 + }, + { + "epoch": 0.7428782669473208, + "grad_norm": 455.57196044921875, + "learning_rate": 1.9839600612735877e-06, + "loss": 16.9428, + "step": 367750 + }, + { + "epoch": 0.7428984675800046, + "grad_norm": 479.2783203125, + "learning_rate": 1.983681659311341e-06, + "loss": 26.5434, + "step": 367760 + }, + { + "epoch": 0.7429186682126884, + "grad_norm": 207.67135620117188, + "learning_rate": 1.9834032720502646e-06, + "loss": 22.968, + "step": 367770 + }, + { + "epoch": 0.7429388688453722, + "grad_norm": 313.4549560546875, + "learning_rate": 1.9831248994917123e-06, + "loss": 23.2637, + "step": 367780 + }, + { + "epoch": 0.742959069478056, + "grad_norm": 177.2661590576172, + "learning_rate": 1.9828465416370434e-06, + "loss": 9.194, + "step": 367790 + }, + { + "epoch": 0.7429792701107398, + "grad_norm": 293.1524353027344, + "learning_rate": 1.9825681984876173e-06, + "loss": 23.7921, + "step": 367800 + }, + { + "epoch": 0.7429994707434237, + "grad_norm": 463.06427001953125, + "learning_rate": 1.982289870044787e-06, + "loss": 24.1851, + "step": 367810 + }, + { + "epoch": 0.7430196713761075, + "grad_norm": 150.6734161376953, + "learning_rate": 1.982011556309908e-06, + "loss": 9.0601, + "step": 367820 + }, + { + "epoch": 0.7430398720087913, + "grad_norm": 360.19970703125, + "learning_rate": 1.9817332572843408e-06, + "loss": 22.3678, + "step": 367830 + }, + { + "epoch": 0.7430600726414751, + "grad_norm": 94.46986389160156, + "learning_rate": 1.9814549729694395e-06, + "loss": 12.3882, + "step": 367840 + }, + { + "epoch": 0.7430802732741589, + "grad_norm": 250.29745483398438, + "learning_rate": 1.9811767033665587e-06, + "loss": 44.3132, + "step": 367850 + }, + { + "epoch": 0.7431004739068428, + "grad_norm": 233.59835815429688, + "learning_rate": 1.9808984484770577e-06, + "loss": 18.3321, + "step": 367860 + }, + { + "epoch": 0.7431206745395266, + "grad_norm": 373.7940368652344, + "learning_rate": 1.9806202083022906e-06, + "loss": 13.86, + "step": 367870 + }, + { + "epoch": 0.7431408751722104, + "grad_norm": 178.30247497558594, + "learning_rate": 1.980341982843616e-06, + "loss": 17.0042, + "step": 367880 + }, + { + "epoch": 0.7431610758048942, + "grad_norm": 291.4422912597656, + "learning_rate": 1.980063772102388e-06, + "loss": 10.8339, + "step": 367890 + }, + { + "epoch": 0.743181276437578, + "grad_norm": 419.87493896484375, + "learning_rate": 1.979785576079961e-06, + "loss": 13.6699, + "step": 367900 + }, + { + "epoch": 0.7432014770702619, + "grad_norm": 219.6775360107422, + "learning_rate": 1.9795073947776955e-06, + "loss": 18.5805, + "step": 367910 + }, + { + "epoch": 0.7432216777029457, + "grad_norm": 129.4747314453125, + "learning_rate": 1.979229228196942e-06, + "loss": 7.3325, + "step": 367920 + }, + { + "epoch": 0.7432418783356295, + "grad_norm": 263.1779479980469, + "learning_rate": 1.9789510763390605e-06, + "loss": 17.6758, + "step": 367930 + }, + { + "epoch": 0.7432620789683133, + "grad_norm": 149.30722045898438, + "learning_rate": 1.978672939205404e-06, + "loss": 12.421, + "step": 367940 + }, + { + "epoch": 0.7432822796009971, + "grad_norm": 261.9346008300781, + "learning_rate": 1.9783948167973306e-06, + "loss": 21.0661, + "step": 367950 + }, + { + "epoch": 0.743302480233681, + "grad_norm": 276.88958740234375, + "learning_rate": 1.9781167091161944e-06, + "loss": 19.6049, + "step": 367960 + }, + { + "epoch": 0.7433226808663648, + "grad_norm": 366.97808837890625, + "learning_rate": 1.977838616163349e-06, + "loss": 27.8706, + "step": 367970 + }, + { + "epoch": 0.7433428814990486, + "grad_norm": 335.1759948730469, + "learning_rate": 1.9775605379401534e-06, + "loss": 15.9381, + "step": 367980 + }, + { + "epoch": 0.7433630821317324, + "grad_norm": 501.2055969238281, + "learning_rate": 1.9772824744479613e-06, + "loss": 26.8819, + "step": 367990 + }, + { + "epoch": 0.7433832827644162, + "grad_norm": 185.55311584472656, + "learning_rate": 1.977004425688126e-06, + "loss": 6.9506, + "step": 368000 + }, + { + "epoch": 0.7434034833971, + "grad_norm": 314.1372375488281, + "learning_rate": 1.9767263916620043e-06, + "loss": 15.9037, + "step": 368010 + }, + { + "epoch": 0.7434236840297838, + "grad_norm": 596.9950561523438, + "learning_rate": 1.9764483723709555e-06, + "loss": 18.8883, + "step": 368020 + }, + { + "epoch": 0.7434438846624676, + "grad_norm": 353.4117736816406, + "learning_rate": 1.9761703678163267e-06, + "loss": 14.5118, + "step": 368030 + }, + { + "epoch": 0.7434640852951514, + "grad_norm": 300.2447509765625, + "learning_rate": 1.975892377999477e-06, + "loss": 7.5033, + "step": 368040 + }, + { + "epoch": 0.7434842859278352, + "grad_norm": 43.761016845703125, + "learning_rate": 1.9756144029217626e-06, + "loss": 15.808, + "step": 368050 + }, + { + "epoch": 0.743504486560519, + "grad_norm": 447.7980041503906, + "learning_rate": 1.975336442584537e-06, + "loss": 17.3498, + "step": 368060 + }, + { + "epoch": 0.7435246871932029, + "grad_norm": 431.0140075683594, + "learning_rate": 1.975058496989153e-06, + "loss": 9.2232, + "step": 368070 + }, + { + "epoch": 0.7435448878258867, + "grad_norm": 271.3089904785156, + "learning_rate": 1.9747805661369662e-06, + "loss": 24.2961, + "step": 368080 + }, + { + "epoch": 0.7435650884585705, + "grad_norm": 271.5911560058594, + "learning_rate": 1.974502650029336e-06, + "loss": 26.4251, + "step": 368090 + }, + { + "epoch": 0.7435852890912543, + "grad_norm": 13.378753662109375, + "learning_rate": 1.97422474866761e-06, + "loss": 19.3268, + "step": 368100 + }, + { + "epoch": 0.7436054897239381, + "grad_norm": 551.256103515625, + "learning_rate": 1.9739468620531448e-06, + "loss": 15.8235, + "step": 368110 + }, + { + "epoch": 0.743625690356622, + "grad_norm": 234.52906799316406, + "learning_rate": 1.973668990187298e-06, + "loss": 16.2694, + "step": 368120 + }, + { + "epoch": 0.7436458909893058, + "grad_norm": 319.56573486328125, + "learning_rate": 1.9733911330714213e-06, + "loss": 15.377, + "step": 368130 + }, + { + "epoch": 0.7436660916219896, + "grad_norm": 472.1600036621094, + "learning_rate": 1.973113290706867e-06, + "loss": 13.2866, + "step": 368140 + }, + { + "epoch": 0.7436862922546734, + "grad_norm": 90.01313781738281, + "learning_rate": 1.9728354630949935e-06, + "loss": 6.5488, + "step": 368150 + }, + { + "epoch": 0.7437064928873572, + "grad_norm": 265.7916259765625, + "learning_rate": 1.972557650237153e-06, + "loss": 14.189, + "step": 368160 + }, + { + "epoch": 0.7437266935200411, + "grad_norm": 367.3469543457031, + "learning_rate": 1.972279852134697e-06, + "loss": 19.1696, + "step": 368170 + }, + { + "epoch": 0.7437468941527249, + "grad_norm": 360.18450927734375, + "learning_rate": 1.972002068788984e-06, + "loss": 9.7707, + "step": 368180 + }, + { + "epoch": 0.7437670947854087, + "grad_norm": 333.741455078125, + "learning_rate": 1.9717243002013636e-06, + "loss": 20.8472, + "step": 368190 + }, + { + "epoch": 0.7437872954180925, + "grad_norm": 368.5290832519531, + "learning_rate": 1.9714465463731934e-06, + "loss": 26.3683, + "step": 368200 + }, + { + "epoch": 0.7438074960507763, + "grad_norm": 574.5621337890625, + "learning_rate": 1.9711688073058262e-06, + "loss": 21.3149, + "step": 368210 + }, + { + "epoch": 0.7438276966834602, + "grad_norm": 483.143798828125, + "learning_rate": 1.9708910830006124e-06, + "loss": 32.7721, + "step": 368220 + }, + { + "epoch": 0.743847897316144, + "grad_norm": 59.498416900634766, + "learning_rate": 1.97061337345891e-06, + "loss": 16.5077, + "step": 368230 + }, + { + "epoch": 0.7438680979488278, + "grad_norm": 526.3450317382812, + "learning_rate": 1.9703356786820687e-06, + "loss": 16.4204, + "step": 368240 + }, + { + "epoch": 0.7438882985815116, + "grad_norm": 532.1583862304688, + "learning_rate": 1.970057998671446e-06, + "loss": 11.2544, + "step": 368250 + }, + { + "epoch": 0.7439084992141954, + "grad_norm": 916.2235717773438, + "learning_rate": 1.9697803334283906e-06, + "loss": 21.3499, + "step": 368260 + }, + { + "epoch": 0.7439286998468791, + "grad_norm": 188.19058227539062, + "learning_rate": 1.9695026829542607e-06, + "loss": 14.5245, + "step": 368270 + }, + { + "epoch": 0.743948900479563, + "grad_norm": 342.7704772949219, + "learning_rate": 1.969225047250407e-06, + "loss": 18.4819, + "step": 368280 + }, + { + "epoch": 0.7439691011122468, + "grad_norm": 296.64520263671875, + "learning_rate": 1.9689474263181814e-06, + "loss": 17.9191, + "step": 368290 + }, + { + "epoch": 0.7439893017449306, + "grad_norm": 271.92340087890625, + "learning_rate": 1.9686698201589395e-06, + "loss": 17.2526, + "step": 368300 + }, + { + "epoch": 0.7440095023776144, + "grad_norm": 653.4396362304688, + "learning_rate": 1.968392228774034e-06, + "loss": 16.0667, + "step": 368310 + }, + { + "epoch": 0.7440297030102982, + "grad_norm": 137.953857421875, + "learning_rate": 1.968114652164815e-06, + "loss": 11.8034, + "step": 368320 + }, + { + "epoch": 0.7440499036429821, + "grad_norm": 158.80421447753906, + "learning_rate": 1.967837090332637e-06, + "loss": 12.5297, + "step": 368330 + }, + { + "epoch": 0.7440701042756659, + "grad_norm": 225.25830078125, + "learning_rate": 1.967559543278856e-06, + "loss": 23.5659, + "step": 368340 + }, + { + "epoch": 0.7440903049083497, + "grad_norm": 403.22662353515625, + "learning_rate": 1.9672820110048207e-06, + "loss": 14.903, + "step": 368350 + }, + { + "epoch": 0.7441105055410335, + "grad_norm": 486.71124267578125, + "learning_rate": 1.967004493511884e-06, + "loss": 28.5848, + "step": 368360 + }, + { + "epoch": 0.7441307061737173, + "grad_norm": 417.70623779296875, + "learning_rate": 1.966726990801402e-06, + "loss": 13.3004, + "step": 368370 + }, + { + "epoch": 0.7441509068064012, + "grad_norm": 31.580720901489258, + "learning_rate": 1.966449502874724e-06, + "loss": 9.2592, + "step": 368380 + }, + { + "epoch": 0.744171107439085, + "grad_norm": 6.129324913024902, + "learning_rate": 1.9661720297332014e-06, + "loss": 28.3694, + "step": 368390 + }, + { + "epoch": 0.7441913080717688, + "grad_norm": 278.25164794921875, + "learning_rate": 1.9658945713781883e-06, + "loss": 20.7193, + "step": 368400 + }, + { + "epoch": 0.7442115087044526, + "grad_norm": 237.9478302001953, + "learning_rate": 1.9656171278110394e-06, + "loss": 22.9121, + "step": 368410 + }, + { + "epoch": 0.7442317093371364, + "grad_norm": 745.4280395507812, + "learning_rate": 1.9653396990331043e-06, + "loss": 48.5694, + "step": 368420 + }, + { + "epoch": 0.7442519099698203, + "grad_norm": 570.2730102539062, + "learning_rate": 1.965062285045733e-06, + "loss": 27.6345, + "step": 368430 + }, + { + "epoch": 0.7442721106025041, + "grad_norm": 141.58511352539062, + "learning_rate": 1.9647848858502825e-06, + "loss": 21.2795, + "step": 368440 + }, + { + "epoch": 0.7442923112351879, + "grad_norm": 430.83642578125, + "learning_rate": 1.9645075014481024e-06, + "loss": 14.071, + "step": 368450 + }, + { + "epoch": 0.7443125118678717, + "grad_norm": 197.9840545654297, + "learning_rate": 1.964230131840543e-06, + "loss": 12.0083, + "step": 368460 + }, + { + "epoch": 0.7443327125005555, + "grad_norm": 186.36111450195312, + "learning_rate": 1.9639527770289586e-06, + "loss": 12.0492, + "step": 368470 + }, + { + "epoch": 0.7443529131332394, + "grad_norm": 369.485107421875, + "learning_rate": 1.9636754370146987e-06, + "loss": 15.3095, + "step": 368480 + }, + { + "epoch": 0.7443731137659232, + "grad_norm": 205.3795928955078, + "learning_rate": 1.9633981117991186e-06, + "loss": 20.2946, + "step": 368490 + }, + { + "epoch": 0.744393314398607, + "grad_norm": 236.73345947265625, + "learning_rate": 1.9631208013835677e-06, + "loss": 10.602, + "step": 368500 + }, + { + "epoch": 0.7444135150312908, + "grad_norm": 22.172441482543945, + "learning_rate": 1.9628435057693963e-06, + "loss": 10.0532, + "step": 368510 + }, + { + "epoch": 0.7444337156639746, + "grad_norm": 456.3912658691406, + "learning_rate": 1.9625662249579586e-06, + "loss": 21.0028, + "step": 368520 + }, + { + "epoch": 0.7444539162966584, + "grad_norm": 620.5680541992188, + "learning_rate": 1.962288958950603e-06, + "loss": 12.2597, + "step": 368530 + }, + { + "epoch": 0.7444741169293422, + "grad_norm": 368.8133544921875, + "learning_rate": 1.9620117077486838e-06, + "loss": 15.5148, + "step": 368540 + }, + { + "epoch": 0.744494317562026, + "grad_norm": 612.2443237304688, + "learning_rate": 1.9617344713535503e-06, + "loss": 11.3829, + "step": 368550 + }, + { + "epoch": 0.7445145181947098, + "grad_norm": 190.12802124023438, + "learning_rate": 1.9614572497665555e-06, + "loss": 40.4976, + "step": 368560 + }, + { + "epoch": 0.7445347188273936, + "grad_norm": 697.5142211914062, + "learning_rate": 1.9611800429890497e-06, + "loss": 19.6238, + "step": 368570 + }, + { + "epoch": 0.7445549194600775, + "grad_norm": 64.01460266113281, + "learning_rate": 1.960902851022382e-06, + "loss": 25.6799, + "step": 368580 + }, + { + "epoch": 0.7445751200927613, + "grad_norm": 388.7540283203125, + "learning_rate": 1.9606256738679074e-06, + "loss": 17.4432, + "step": 368590 + }, + { + "epoch": 0.7445953207254451, + "grad_norm": 560.4470825195312, + "learning_rate": 1.9603485115269743e-06, + "loss": 26.4945, + "step": 368600 + }, + { + "epoch": 0.7446155213581289, + "grad_norm": 796.9616088867188, + "learning_rate": 1.960071364000932e-06, + "loss": 13.8728, + "step": 368610 + }, + { + "epoch": 0.7446357219908127, + "grad_norm": 342.3143310546875, + "learning_rate": 1.959794231291134e-06, + "loss": 13.0166, + "step": 368620 + }, + { + "epoch": 0.7446559226234966, + "grad_norm": 156.66897583007812, + "learning_rate": 1.959517113398933e-06, + "loss": 15.2025, + "step": 368630 + }, + { + "epoch": 0.7446761232561804, + "grad_norm": 993.8450927734375, + "learning_rate": 1.959240010325673e-06, + "loss": 23.725, + "step": 368640 + }, + { + "epoch": 0.7446963238888642, + "grad_norm": 696.0406494140625, + "learning_rate": 1.958962922072709e-06, + "loss": 21.7202, + "step": 368650 + }, + { + "epoch": 0.744716524521548, + "grad_norm": 275.1782531738281, + "learning_rate": 1.9586858486413923e-06, + "loss": 15.1027, + "step": 368660 + }, + { + "epoch": 0.7447367251542318, + "grad_norm": 611.896484375, + "learning_rate": 1.958408790033072e-06, + "loss": 34.5304, + "step": 368670 + }, + { + "epoch": 0.7447569257869157, + "grad_norm": 401.76611328125, + "learning_rate": 1.958131746249097e-06, + "loss": 16.009, + "step": 368680 + }, + { + "epoch": 0.7447771264195995, + "grad_norm": 310.48065185546875, + "learning_rate": 1.9578547172908185e-06, + "loss": 25.6982, + "step": 368690 + }, + { + "epoch": 0.7447973270522833, + "grad_norm": 470.43499755859375, + "learning_rate": 1.9575777031595906e-06, + "loss": 19.3149, + "step": 368700 + }, + { + "epoch": 0.7448175276849671, + "grad_norm": 282.6390686035156, + "learning_rate": 1.9573007038567565e-06, + "loss": 12.1436, + "step": 368710 + }, + { + "epoch": 0.7448377283176509, + "grad_norm": 0.0, + "learning_rate": 1.95702371938367e-06, + "loss": 14.1782, + "step": 368720 + }, + { + "epoch": 0.7448579289503348, + "grad_norm": 705.910400390625, + "learning_rate": 1.956746749741682e-06, + "loss": 11.0068, + "step": 368730 + }, + { + "epoch": 0.7448781295830186, + "grad_norm": 953.3394165039062, + "learning_rate": 1.9564697949321417e-06, + "loss": 15.2012, + "step": 368740 + }, + { + "epoch": 0.7448983302157024, + "grad_norm": 622.295654296875, + "learning_rate": 1.956192854956397e-06, + "loss": 31.6843, + "step": 368750 + }, + { + "epoch": 0.7449185308483862, + "grad_norm": 204.8721466064453, + "learning_rate": 1.9559159298158e-06, + "loss": 25.308, + "step": 368760 + }, + { + "epoch": 0.74493873148107, + "grad_norm": 1993.9295654296875, + "learning_rate": 1.9556390195117004e-06, + "loss": 18.91, + "step": 368770 + }, + { + "epoch": 0.7449589321137539, + "grad_norm": 74.58755493164062, + "learning_rate": 1.955362124045445e-06, + "loss": 23.2488, + "step": 368780 + }, + { + "epoch": 0.7449791327464376, + "grad_norm": 392.92547607421875, + "learning_rate": 1.955085243418387e-06, + "loss": 25.7019, + "step": 368790 + }, + { + "epoch": 0.7449993333791214, + "grad_norm": 441.37969970703125, + "learning_rate": 1.9548083776318727e-06, + "loss": 10.7071, + "step": 368800 + }, + { + "epoch": 0.7450195340118052, + "grad_norm": 597.580078125, + "learning_rate": 1.9545315266872545e-06, + "loss": 16.4676, + "step": 368810 + }, + { + "epoch": 0.745039734644489, + "grad_norm": 325.194091796875, + "learning_rate": 1.95425469058588e-06, + "loss": 16.3694, + "step": 368820 + }, + { + "epoch": 0.7450599352771728, + "grad_norm": 1044.6522216796875, + "learning_rate": 1.9539778693290976e-06, + "loss": 11.8188, + "step": 368830 + }, + { + "epoch": 0.7450801359098567, + "grad_norm": 442.4576416015625, + "learning_rate": 1.953701062918259e-06, + "loss": 20.3352, + "step": 368840 + }, + { + "epoch": 0.7451003365425405, + "grad_norm": 1567.712158203125, + "learning_rate": 1.95342427135471e-06, + "loss": 33.4006, + "step": 368850 + }, + { + "epoch": 0.7451205371752243, + "grad_norm": 390.2557373046875, + "learning_rate": 1.953147494639804e-06, + "loss": 16.9342, + "step": 368860 + }, + { + "epoch": 0.7451407378079081, + "grad_norm": 1089.302978515625, + "learning_rate": 1.9528707327748853e-06, + "loss": 39.3832, + "step": 368870 + }, + { + "epoch": 0.745160938440592, + "grad_norm": 56.755516052246094, + "learning_rate": 1.9525939857613075e-06, + "loss": 15.0005, + "step": 368880 + }, + { + "epoch": 0.7451811390732758, + "grad_norm": 568.4422607421875, + "learning_rate": 1.9523172536004165e-06, + "loss": 10.7274, + "step": 368890 + }, + { + "epoch": 0.7452013397059596, + "grad_norm": 51.04914093017578, + "learning_rate": 1.95204053629356e-06, + "loss": 17.0731, + "step": 368900 + }, + { + "epoch": 0.7452215403386434, + "grad_norm": 449.2515869140625, + "learning_rate": 1.9517638338420898e-06, + "loss": 15.5755, + "step": 368910 + }, + { + "epoch": 0.7452417409713272, + "grad_norm": 463.6322021484375, + "learning_rate": 1.9514871462473527e-06, + "loss": 19.8048, + "step": 368920 + }, + { + "epoch": 0.745261941604011, + "grad_norm": 82.99703979492188, + "learning_rate": 1.951210473510696e-06, + "loss": 19.7288, + "step": 368930 + }, + { + "epoch": 0.7452821422366949, + "grad_norm": 536.100830078125, + "learning_rate": 1.9509338156334695e-06, + "loss": 26.1445, + "step": 368940 + }, + { + "epoch": 0.7453023428693787, + "grad_norm": 776.7899169921875, + "learning_rate": 1.9506571726170236e-06, + "loss": 15.4703, + "step": 368950 + }, + { + "epoch": 0.7453225435020625, + "grad_norm": 735.1712036132812, + "learning_rate": 1.9503805444627054e-06, + "loss": 21.861, + "step": 368960 + }, + { + "epoch": 0.7453427441347463, + "grad_norm": 1005.1720581054688, + "learning_rate": 1.95010393117186e-06, + "loss": 33.4139, + "step": 368970 + }, + { + "epoch": 0.7453629447674301, + "grad_norm": 311.2734069824219, + "learning_rate": 1.9498273327458405e-06, + "loss": 14.2049, + "step": 368980 + }, + { + "epoch": 0.745383145400114, + "grad_norm": 22.720521926879883, + "learning_rate": 1.9495507491859922e-06, + "loss": 21.7195, + "step": 368990 + }, + { + "epoch": 0.7454033460327978, + "grad_norm": 203.66107177734375, + "learning_rate": 1.9492741804936623e-06, + "loss": 9.2814, + "step": 369000 + }, + { + "epoch": 0.7454235466654816, + "grad_norm": 160.48106384277344, + "learning_rate": 1.9489976266702e-06, + "loss": 17.9409, + "step": 369010 + }, + { + "epoch": 0.7454437472981654, + "grad_norm": 332.17327880859375, + "learning_rate": 1.9487210877169545e-06, + "loss": 14.8505, + "step": 369020 + }, + { + "epoch": 0.7454639479308492, + "grad_norm": 408.85784912109375, + "learning_rate": 1.9484445636352724e-06, + "loss": 14.3842, + "step": 369030 + }, + { + "epoch": 0.745484148563533, + "grad_norm": 713.8599853515625, + "learning_rate": 1.9481680544264995e-06, + "loss": 23.6859, + "step": 369040 + }, + { + "epoch": 0.7455043491962168, + "grad_norm": 602.9070434570312, + "learning_rate": 1.9478915600919877e-06, + "loss": 18.816, + "step": 369050 + }, + { + "epoch": 0.7455245498289006, + "grad_norm": 171.35777282714844, + "learning_rate": 1.9476150806330816e-06, + "loss": 9.5252, + "step": 369060 + }, + { + "epoch": 0.7455447504615844, + "grad_norm": 36.31989288330078, + "learning_rate": 1.947338616051129e-06, + "loss": 11.3571, + "step": 369070 + }, + { + "epoch": 0.7455649510942682, + "grad_norm": 386.3040771484375, + "learning_rate": 1.947062166347478e-06, + "loss": 32.9928, + "step": 369080 + }, + { + "epoch": 0.7455851517269521, + "grad_norm": 853.2674560546875, + "learning_rate": 1.9467857315234746e-06, + "loss": 23.3887, + "step": 369090 + }, + { + "epoch": 0.7456053523596359, + "grad_norm": 237.78518676757812, + "learning_rate": 1.946509311580469e-06, + "loss": 20.5703, + "step": 369100 + }, + { + "epoch": 0.7456255529923197, + "grad_norm": 693.06396484375, + "learning_rate": 1.946232906519806e-06, + "loss": 24.2468, + "step": 369110 + }, + { + "epoch": 0.7456457536250035, + "grad_norm": 171.52392578125, + "learning_rate": 1.9459565163428322e-06, + "loss": 21.2309, + "step": 369120 + }, + { + "epoch": 0.7456659542576873, + "grad_norm": 453.17706298828125, + "learning_rate": 1.945680141050898e-06, + "loss": 17.6323, + "step": 369130 + }, + { + "epoch": 0.7456861548903712, + "grad_norm": 78.51905059814453, + "learning_rate": 1.945403780645346e-06, + "loss": 4.4487, + "step": 369140 + }, + { + "epoch": 0.745706355523055, + "grad_norm": 19.49560546875, + "learning_rate": 1.945127435127528e-06, + "loss": 21.8167, + "step": 369150 + }, + { + "epoch": 0.7457265561557388, + "grad_norm": 395.67755126953125, + "learning_rate": 1.9448511044987862e-06, + "loss": 17.1365, + "step": 369160 + }, + { + "epoch": 0.7457467567884226, + "grad_norm": 508.52960205078125, + "learning_rate": 1.944574788760471e-06, + "loss": 27.2976, + "step": 369170 + }, + { + "epoch": 0.7457669574211064, + "grad_norm": 603.98095703125, + "learning_rate": 1.944298487913928e-06, + "loss": 62.7787, + "step": 369180 + }, + { + "epoch": 0.7457871580537903, + "grad_norm": 387.5748596191406, + "learning_rate": 1.9440222019605022e-06, + "loss": 22.3551, + "step": 369190 + }, + { + "epoch": 0.7458073586864741, + "grad_norm": 0.17242459952831268, + "learning_rate": 1.9437459309015426e-06, + "loss": 6.1827, + "step": 369200 + }, + { + "epoch": 0.7458275593191579, + "grad_norm": 0.0, + "learning_rate": 1.9434696747383946e-06, + "loss": 21.0599, + "step": 369210 + }, + { + "epoch": 0.7458477599518417, + "grad_norm": 272.6597595214844, + "learning_rate": 1.9431934334724035e-06, + "loss": 20.5769, + "step": 369220 + }, + { + "epoch": 0.7458679605845255, + "grad_norm": 90.93065643310547, + "learning_rate": 1.942917207104917e-06, + "loss": 19.9338, + "step": 369230 + }, + { + "epoch": 0.7458881612172094, + "grad_norm": 374.5639953613281, + "learning_rate": 1.942640995637284e-06, + "loss": 18.8268, + "step": 369240 + }, + { + "epoch": 0.7459083618498932, + "grad_norm": 261.09100341796875, + "learning_rate": 1.942364799070845e-06, + "loss": 32.9503, + "step": 369250 + }, + { + "epoch": 0.745928562482577, + "grad_norm": 687.7623901367188, + "learning_rate": 1.9420886174069486e-06, + "loss": 18.6524, + "step": 369260 + }, + { + "epoch": 0.7459487631152608, + "grad_norm": 505.5316467285156, + "learning_rate": 1.9418124506469437e-06, + "loss": 13.9148, + "step": 369270 + }, + { + "epoch": 0.7459689637479446, + "grad_norm": 334.58233642578125, + "learning_rate": 1.9415362987921737e-06, + "loss": 12.9861, + "step": 369280 + }, + { + "epoch": 0.7459891643806285, + "grad_norm": 99.5759048461914, + "learning_rate": 1.9412601618439834e-06, + "loss": 19.2453, + "step": 369290 + }, + { + "epoch": 0.7460093650133122, + "grad_norm": 322.212890625, + "learning_rate": 1.94098403980372e-06, + "loss": 21.7992, + "step": 369300 + }, + { + "epoch": 0.746029565645996, + "grad_norm": 528.5283203125, + "learning_rate": 1.940707932672733e-06, + "loss": 8.8681, + "step": 369310 + }, + { + "epoch": 0.7460497662786798, + "grad_norm": 255.1359405517578, + "learning_rate": 1.9404318404523605e-06, + "loss": 15.9722, + "step": 369320 + }, + { + "epoch": 0.7460699669113636, + "grad_norm": 46.65135192871094, + "learning_rate": 1.9401557631439526e-06, + "loss": 27.8944, + "step": 369330 + }, + { + "epoch": 0.7460901675440474, + "grad_norm": 284.31573486328125, + "learning_rate": 1.939879700748856e-06, + "loss": 16.722, + "step": 369340 + }, + { + "epoch": 0.7461103681767313, + "grad_norm": 125.75949096679688, + "learning_rate": 1.939603653268414e-06, + "loss": 73.6868, + "step": 369350 + }, + { + "epoch": 0.7461305688094151, + "grad_norm": 289.5494689941406, + "learning_rate": 1.9393276207039717e-06, + "loss": 17.8466, + "step": 369360 + }, + { + "epoch": 0.7461507694420989, + "grad_norm": 419.5796203613281, + "learning_rate": 1.9390516030568767e-06, + "loss": 19.4291, + "step": 369370 + }, + { + "epoch": 0.7461709700747827, + "grad_norm": 228.23182678222656, + "learning_rate": 1.938775600328473e-06, + "loss": 15.3995, + "step": 369380 + }, + { + "epoch": 0.7461911707074665, + "grad_norm": 398.65203857421875, + "learning_rate": 1.9384996125201045e-06, + "loss": 23.2901, + "step": 369390 + }, + { + "epoch": 0.7462113713401504, + "grad_norm": 32.43281555175781, + "learning_rate": 1.938223639633119e-06, + "loss": 25.4916, + "step": 369400 + }, + { + "epoch": 0.7462315719728342, + "grad_norm": 576.2562866210938, + "learning_rate": 1.937947681668858e-06, + "loss": 17.5294, + "step": 369410 + }, + { + "epoch": 0.746251772605518, + "grad_norm": 129.07254028320312, + "learning_rate": 1.9376717386286703e-06, + "loss": 17.2375, + "step": 369420 + }, + { + "epoch": 0.7462719732382018, + "grad_norm": 769.473388671875, + "learning_rate": 1.9373958105138997e-06, + "loss": 14.399, + "step": 369430 + }, + { + "epoch": 0.7462921738708856, + "grad_norm": 538.255126953125, + "learning_rate": 1.937119897325889e-06, + "loss": 16.4073, + "step": 369440 + }, + { + "epoch": 0.7463123745035695, + "grad_norm": 267.184326171875, + "learning_rate": 1.936843999065985e-06, + "loss": 30.4707, + "step": 369450 + }, + { + "epoch": 0.7463325751362533, + "grad_norm": 5.306746482849121, + "learning_rate": 1.936568115735531e-06, + "loss": 27.5811, + "step": 369460 + }, + { + "epoch": 0.7463527757689371, + "grad_norm": 548.7133178710938, + "learning_rate": 1.9362922473358735e-06, + "loss": 22.3497, + "step": 369470 + }, + { + "epoch": 0.7463729764016209, + "grad_norm": 367.3851013183594, + "learning_rate": 1.936016393868355e-06, + "loss": 35.1907, + "step": 369480 + }, + { + "epoch": 0.7463931770343047, + "grad_norm": 699.6803588867188, + "learning_rate": 1.9357405553343224e-06, + "loss": 14.7062, + "step": 369490 + }, + { + "epoch": 0.7464133776669886, + "grad_norm": 171.78594970703125, + "learning_rate": 1.9354647317351187e-06, + "loss": 10.5206, + "step": 369500 + }, + { + "epoch": 0.7464335782996724, + "grad_norm": 186.3427276611328, + "learning_rate": 1.9351889230720866e-06, + "loss": 7.8387, + "step": 369510 + }, + { + "epoch": 0.7464537789323562, + "grad_norm": 393.7889404296875, + "learning_rate": 1.9349131293465732e-06, + "loss": 12.3001, + "step": 369520 + }, + { + "epoch": 0.74647397956504, + "grad_norm": 475.2576599121094, + "learning_rate": 1.934637350559922e-06, + "loss": 30.4372, + "step": 369530 + }, + { + "epoch": 0.7464941801977238, + "grad_norm": 401.1710510253906, + "learning_rate": 1.9343615867134748e-06, + "loss": 44.3436, + "step": 369540 + }, + { + "epoch": 0.7465143808304076, + "grad_norm": 312.4425354003906, + "learning_rate": 1.9340858378085777e-06, + "loss": 19.5031, + "step": 369550 + }, + { + "epoch": 0.7465345814630914, + "grad_norm": 565.4642333984375, + "learning_rate": 1.933810103846575e-06, + "loss": 24.1692, + "step": 369560 + }, + { + "epoch": 0.7465547820957752, + "grad_norm": 597.1283569335938, + "learning_rate": 1.933534384828811e-06, + "loss": 21.642, + "step": 369570 + }, + { + "epoch": 0.746574982728459, + "grad_norm": 151.046875, + "learning_rate": 1.933258680756627e-06, + "loss": 12.984, + "step": 369580 + }, + { + "epoch": 0.7465951833611428, + "grad_norm": 240.71826171875, + "learning_rate": 1.9329829916313684e-06, + "loss": 8.2591, + "step": 369590 + }, + { + "epoch": 0.7466153839938267, + "grad_norm": 202.32093811035156, + "learning_rate": 1.93270731745438e-06, + "loss": 25.7117, + "step": 369600 + }, + { + "epoch": 0.7466355846265105, + "grad_norm": 500.51055908203125, + "learning_rate": 1.9324316582270025e-06, + "loss": 11.8643, + "step": 369610 + }, + { + "epoch": 0.7466557852591943, + "grad_norm": 385.1941833496094, + "learning_rate": 1.93215601395058e-06, + "loss": 20.8431, + "step": 369620 + }, + { + "epoch": 0.7466759858918781, + "grad_norm": 513.0110473632812, + "learning_rate": 1.93188038462646e-06, + "loss": 18.9674, + "step": 369630 + }, + { + "epoch": 0.7466961865245619, + "grad_norm": 902.2584228515625, + "learning_rate": 1.931604770255982e-06, + "loss": 22.0942, + "step": 369640 + }, + { + "epoch": 0.7467163871572458, + "grad_norm": 372.42059326171875, + "learning_rate": 1.9313291708404885e-06, + "loss": 12.7421, + "step": 369650 + }, + { + "epoch": 0.7467365877899296, + "grad_norm": 527.2333984375, + "learning_rate": 1.9310535863813266e-06, + "loss": 20.7038, + "step": 369660 + }, + { + "epoch": 0.7467567884226134, + "grad_norm": 530.1795043945312, + "learning_rate": 1.9307780168798374e-06, + "loss": 17.0725, + "step": 369670 + }, + { + "epoch": 0.7467769890552972, + "grad_norm": 121.6954116821289, + "learning_rate": 1.930502462337362e-06, + "loss": 16.0094, + "step": 369680 + }, + { + "epoch": 0.746797189687981, + "grad_norm": 450.1278991699219, + "learning_rate": 1.9302269227552465e-06, + "loss": 22.1541, + "step": 369690 + }, + { + "epoch": 0.7468173903206649, + "grad_norm": 278.0421142578125, + "learning_rate": 1.929951398134832e-06, + "loss": 19.6431, + "step": 369700 + }, + { + "epoch": 0.7468375909533487, + "grad_norm": 3.5465619564056396, + "learning_rate": 1.9296758884774624e-06, + "loss": 15.6716, + "step": 369710 + }, + { + "epoch": 0.7468577915860325, + "grad_norm": 555.057861328125, + "learning_rate": 1.9294003937844806e-06, + "loss": 19.467, + "step": 369720 + }, + { + "epoch": 0.7468779922187163, + "grad_norm": 412.9222412109375, + "learning_rate": 1.9291249140572275e-06, + "loss": 16.6906, + "step": 369730 + }, + { + "epoch": 0.7468981928514001, + "grad_norm": 943.5767822265625, + "learning_rate": 1.9288494492970487e-06, + "loss": 17.9049, + "step": 369740 + }, + { + "epoch": 0.746918393484084, + "grad_norm": 188.947265625, + "learning_rate": 1.928573999505284e-06, + "loss": 19.5368, + "step": 369750 + }, + { + "epoch": 0.7469385941167678, + "grad_norm": 436.8934326171875, + "learning_rate": 1.928298564683278e-06, + "loss": 10.6879, + "step": 369760 + }, + { + "epoch": 0.7469587947494516, + "grad_norm": 188.24301147460938, + "learning_rate": 1.928023144832371e-06, + "loss": 12.9682, + "step": 369770 + }, + { + "epoch": 0.7469789953821354, + "grad_norm": 294.8847961425781, + "learning_rate": 1.927747739953908e-06, + "loss": 17.8445, + "step": 369780 + }, + { + "epoch": 0.7469991960148192, + "grad_norm": 155.76507568359375, + "learning_rate": 1.9274723500492304e-06, + "loss": 11.2441, + "step": 369790 + }, + { + "epoch": 0.747019396647503, + "grad_norm": 534.6624145507812, + "learning_rate": 1.927196975119678e-06, + "loss": 13.1074, + "step": 369800 + }, + { + "epoch": 0.7470395972801868, + "grad_norm": 279.3785400390625, + "learning_rate": 1.926921615166596e-06, + "loss": 17.9435, + "step": 369810 + }, + { + "epoch": 0.7470597979128706, + "grad_norm": 627.2294921875, + "learning_rate": 1.926646270191326e-06, + "loss": 16.5889, + "step": 369820 + }, + { + "epoch": 0.7470799985455544, + "grad_norm": 327.37646484375, + "learning_rate": 1.9263709401952076e-06, + "loss": 25.8833, + "step": 369830 + }, + { + "epoch": 0.7471001991782382, + "grad_norm": 108.8963394165039, + "learning_rate": 1.926095625179584e-06, + "loss": 16.5024, + "step": 369840 + }, + { + "epoch": 0.747120399810922, + "grad_norm": 233.37591552734375, + "learning_rate": 1.9258203251458012e-06, + "loss": 17.9614, + "step": 369850 + }, + { + "epoch": 0.7471406004436059, + "grad_norm": 440.4516906738281, + "learning_rate": 1.9255450400951937e-06, + "loss": 21.2581, + "step": 369860 + }, + { + "epoch": 0.7471608010762897, + "grad_norm": 380.8348693847656, + "learning_rate": 1.925269770029107e-06, + "loss": 9.627, + "step": 369870 + }, + { + "epoch": 0.7471810017089735, + "grad_norm": 127.53816986083984, + "learning_rate": 1.924994514948884e-06, + "loss": 8.6012, + "step": 369880 + }, + { + "epoch": 0.7472012023416573, + "grad_norm": 287.0458984375, + "learning_rate": 1.9247192748558648e-06, + "loss": 14.8855, + "step": 369890 + }, + { + "epoch": 0.7472214029743411, + "grad_norm": 413.32171630859375, + "learning_rate": 1.9244440497513895e-06, + "loss": 21.4359, + "step": 369900 + }, + { + "epoch": 0.747241603607025, + "grad_norm": 420.88275146484375, + "learning_rate": 1.9241688396368e-06, + "loss": 16.8756, + "step": 369910 + }, + { + "epoch": 0.7472618042397088, + "grad_norm": 814.9009399414062, + "learning_rate": 1.923893644513443e-06, + "loss": 17.2182, + "step": 369920 + }, + { + "epoch": 0.7472820048723926, + "grad_norm": 73.95588684082031, + "learning_rate": 1.9236184643826515e-06, + "loss": 22.9281, + "step": 369930 + }, + { + "epoch": 0.7473022055050764, + "grad_norm": 147.46900939941406, + "learning_rate": 1.9233432992457708e-06, + "loss": 15.6789, + "step": 369940 + }, + { + "epoch": 0.7473224061377602, + "grad_norm": 506.54510498046875, + "learning_rate": 1.9230681491041425e-06, + "loss": 28.0695, + "step": 369950 + }, + { + "epoch": 0.7473426067704441, + "grad_norm": 422.76824951171875, + "learning_rate": 1.9227930139591077e-06, + "loss": 21.3278, + "step": 369960 + }, + { + "epoch": 0.7473628074031279, + "grad_norm": 919.2847290039062, + "learning_rate": 1.922517893812004e-06, + "loss": 16.2156, + "step": 369970 + }, + { + "epoch": 0.7473830080358117, + "grad_norm": 392.3123779296875, + "learning_rate": 1.9222427886641774e-06, + "loss": 16.2128, + "step": 369980 + }, + { + "epoch": 0.7474032086684955, + "grad_norm": 311.8834228515625, + "learning_rate": 1.921967698516966e-06, + "loss": 23.6828, + "step": 369990 + }, + { + "epoch": 0.7474234093011793, + "grad_norm": 256.5654296875, + "learning_rate": 1.9216926233717087e-06, + "loss": 21.8324, + "step": 370000 + }, + { + "epoch": 0.7474436099338632, + "grad_norm": 71.87821960449219, + "learning_rate": 1.9214175632297503e-06, + "loss": 13.2317, + "step": 370010 + }, + { + "epoch": 0.747463810566547, + "grad_norm": 28.057043075561523, + "learning_rate": 1.9211425180924274e-06, + "loss": 19.1946, + "step": 370020 + }, + { + "epoch": 0.7474840111992308, + "grad_norm": 113.76498413085938, + "learning_rate": 1.920867487961084e-06, + "loss": 12.4828, + "step": 370030 + }, + { + "epoch": 0.7475042118319146, + "grad_norm": 341.0687561035156, + "learning_rate": 1.920592472837057e-06, + "loss": 27.8353, + "step": 370040 + }, + { + "epoch": 0.7475244124645984, + "grad_norm": 912.1026000976562, + "learning_rate": 1.920317472721691e-06, + "loss": 31.9558, + "step": 370050 + }, + { + "epoch": 0.7475446130972823, + "grad_norm": 287.0536193847656, + "learning_rate": 1.9200424876163244e-06, + "loss": 10.8399, + "step": 370060 + }, + { + "epoch": 0.747564813729966, + "grad_norm": 523.6887817382812, + "learning_rate": 1.9197675175222954e-06, + "loss": 17.1597, + "step": 370070 + }, + { + "epoch": 0.7475850143626498, + "grad_norm": 408.5846862792969, + "learning_rate": 1.919492562440947e-06, + "loss": 10.8312, + "step": 370080 + }, + { + "epoch": 0.7476052149953336, + "grad_norm": 1629.015380859375, + "learning_rate": 1.919217622373617e-06, + "loss": 33.6798, + "step": 370090 + }, + { + "epoch": 0.7476254156280174, + "grad_norm": 375.0655212402344, + "learning_rate": 1.9189426973216478e-06, + "loss": 20.4349, + "step": 370100 + }, + { + "epoch": 0.7476456162607013, + "grad_norm": 109.400146484375, + "learning_rate": 1.918667787286379e-06, + "loss": 20.1551, + "step": 370110 + }, + { + "epoch": 0.7476658168933851, + "grad_norm": 301.20184326171875, + "learning_rate": 1.9183928922691474e-06, + "loss": 19.1174, + "step": 370120 + }, + { + "epoch": 0.7476860175260689, + "grad_norm": 139.5936279296875, + "learning_rate": 1.918118012271297e-06, + "loss": 10.2495, + "step": 370130 + }, + { + "epoch": 0.7477062181587527, + "grad_norm": 35.07410430908203, + "learning_rate": 1.917843147294166e-06, + "loss": 10.8955, + "step": 370140 + }, + { + "epoch": 0.7477264187914365, + "grad_norm": 687.4677734375, + "learning_rate": 1.917568297339091e-06, + "loss": 24.3905, + "step": 370150 + }, + { + "epoch": 0.7477466194241204, + "grad_norm": 777.4478149414062, + "learning_rate": 1.9172934624074153e-06, + "loss": 28.4684, + "step": 370160 + }, + { + "epoch": 0.7477668200568042, + "grad_norm": 251.4214324951172, + "learning_rate": 1.9170186425004805e-06, + "loss": 12.5224, + "step": 370170 + }, + { + "epoch": 0.747787020689488, + "grad_norm": 622.1026000976562, + "learning_rate": 1.916743837619619e-06, + "loss": 19.8279, + "step": 370180 + }, + { + "epoch": 0.7478072213221718, + "grad_norm": 658.2227783203125, + "learning_rate": 1.9164690477661746e-06, + "loss": 24.3781, + "step": 370190 + }, + { + "epoch": 0.7478274219548556, + "grad_norm": 0.4346986413002014, + "learning_rate": 1.9161942729414876e-06, + "loss": 28.2475, + "step": 370200 + }, + { + "epoch": 0.7478476225875395, + "grad_norm": 338.2659912109375, + "learning_rate": 1.9159195131468955e-06, + "loss": 13.7039, + "step": 370210 + }, + { + "epoch": 0.7478678232202233, + "grad_norm": 345.5289001464844, + "learning_rate": 1.9156447683837365e-06, + "loss": 15.8404, + "step": 370220 + }, + { + "epoch": 0.7478880238529071, + "grad_norm": 229.8165740966797, + "learning_rate": 1.9153700386533502e-06, + "loss": 24.3089, + "step": 370230 + }, + { + "epoch": 0.7479082244855909, + "grad_norm": 400.5947265625, + "learning_rate": 1.9150953239570784e-06, + "loss": 11.5835, + "step": 370240 + }, + { + "epoch": 0.7479284251182747, + "grad_norm": 289.86627197265625, + "learning_rate": 1.9148206242962575e-06, + "loss": 7.1895, + "step": 370250 + }, + { + "epoch": 0.7479486257509586, + "grad_norm": 267.56402587890625, + "learning_rate": 1.9145459396722248e-06, + "loss": 21.1082, + "step": 370260 + }, + { + "epoch": 0.7479688263836424, + "grad_norm": 563.6002197265625, + "learning_rate": 1.914271270086323e-06, + "loss": 13.0804, + "step": 370270 + }, + { + "epoch": 0.7479890270163262, + "grad_norm": 114.8333740234375, + "learning_rate": 1.9139966155398894e-06, + "loss": 13.9258, + "step": 370280 + }, + { + "epoch": 0.74800922764901, + "grad_norm": 452.27496337890625, + "learning_rate": 1.913721976034259e-06, + "loss": 21.8796, + "step": 370290 + }, + { + "epoch": 0.7480294282816938, + "grad_norm": 278.9789733886719, + "learning_rate": 1.913447351570776e-06, + "loss": 30.854, + "step": 370300 + }, + { + "epoch": 0.7480496289143777, + "grad_norm": 602.8372802734375, + "learning_rate": 1.913172742150774e-06, + "loss": 19.3653, + "step": 370310 + }, + { + "epoch": 0.7480698295470614, + "grad_norm": 317.3916931152344, + "learning_rate": 1.912898147775596e-06, + "loss": 11.7023, + "step": 370320 + }, + { + "epoch": 0.7480900301797452, + "grad_norm": 201.62997436523438, + "learning_rate": 1.912623568446578e-06, + "loss": 17.2515, + "step": 370330 + }, + { + "epoch": 0.748110230812429, + "grad_norm": 344.6565246582031, + "learning_rate": 1.9123490041650556e-06, + "loss": 11.0799, + "step": 370340 + }, + { + "epoch": 0.7481304314451128, + "grad_norm": 315.00152587890625, + "learning_rate": 1.912074454932372e-06, + "loss": 24.2449, + "step": 370350 + }, + { + "epoch": 0.7481506320777966, + "grad_norm": 229.8583984375, + "learning_rate": 1.911799920749861e-06, + "loss": 8.638, + "step": 370360 + }, + { + "epoch": 0.7481708327104805, + "grad_norm": 397.9170837402344, + "learning_rate": 1.911525401618865e-06, + "loss": 25.7534, + "step": 370370 + }, + { + "epoch": 0.7481910333431643, + "grad_norm": 218.24066162109375, + "learning_rate": 1.9112508975407173e-06, + "loss": 32.6129, + "step": 370380 + }, + { + "epoch": 0.7482112339758481, + "grad_norm": 260.3525390625, + "learning_rate": 1.9109764085167604e-06, + "loss": 22.2521, + "step": 370390 + }, + { + "epoch": 0.7482314346085319, + "grad_norm": 249.40843200683594, + "learning_rate": 1.910701934548329e-06, + "loss": 18.8474, + "step": 370400 + }, + { + "epoch": 0.7482516352412157, + "grad_norm": 347.7652282714844, + "learning_rate": 1.9104274756367606e-06, + "loss": 14.7549, + "step": 370410 + }, + { + "epoch": 0.7482718358738996, + "grad_norm": 585.7051391601562, + "learning_rate": 1.9101530317833957e-06, + "loss": 17.4191, + "step": 370420 + }, + { + "epoch": 0.7482920365065834, + "grad_norm": 986.23291015625, + "learning_rate": 1.9098786029895698e-06, + "loss": 27.654, + "step": 370430 + }, + { + "epoch": 0.7483122371392672, + "grad_norm": 522.16796875, + "learning_rate": 1.909604189256619e-06, + "loss": 28.5049, + "step": 370440 + }, + { + "epoch": 0.748332437771951, + "grad_norm": 580.5750122070312, + "learning_rate": 1.9093297905858833e-06, + "loss": 14.9824, + "step": 370450 + }, + { + "epoch": 0.7483526384046348, + "grad_norm": 461.26007080078125, + "learning_rate": 1.909055406978702e-06, + "loss": 14.1847, + "step": 370460 + }, + { + "epoch": 0.7483728390373187, + "grad_norm": 438.3565368652344, + "learning_rate": 1.908781038436407e-06, + "loss": 20.7148, + "step": 370470 + }, + { + "epoch": 0.7483930396700025, + "grad_norm": 304.63580322265625, + "learning_rate": 1.9085066849603377e-06, + "loss": 14.8347, + "step": 370480 + }, + { + "epoch": 0.7484132403026863, + "grad_norm": 596.1948852539062, + "learning_rate": 1.908232346551834e-06, + "loss": 19.4836, + "step": 370490 + }, + { + "epoch": 0.7484334409353701, + "grad_norm": 506.2995910644531, + "learning_rate": 1.90795802321223e-06, + "loss": 24.1062, + "step": 370500 + }, + { + "epoch": 0.7484536415680539, + "grad_norm": 343.956787109375, + "learning_rate": 1.907683714942863e-06, + "loss": 16.1206, + "step": 370510 + }, + { + "epoch": 0.7484738422007378, + "grad_norm": 139.79502868652344, + "learning_rate": 1.90740942174507e-06, + "loss": 23.2229, + "step": 370520 + }, + { + "epoch": 0.7484940428334216, + "grad_norm": 329.6191711425781, + "learning_rate": 1.9071351436201918e-06, + "loss": 12.6936, + "step": 370530 + }, + { + "epoch": 0.7485142434661054, + "grad_norm": 618.066162109375, + "learning_rate": 1.9068608805695588e-06, + "loss": 16.9842, + "step": 370540 + }, + { + "epoch": 0.7485344440987892, + "grad_norm": 103.03219604492188, + "learning_rate": 1.9065866325945099e-06, + "loss": 28.5514, + "step": 370550 + }, + { + "epoch": 0.748554644731473, + "grad_norm": 645.9698486328125, + "learning_rate": 1.906312399696385e-06, + "loss": 19.0781, + "step": 370560 + }, + { + "epoch": 0.7485748453641569, + "grad_norm": 230.70558166503906, + "learning_rate": 1.9060381818765177e-06, + "loss": 17.82, + "step": 370570 + }, + { + "epoch": 0.7485950459968406, + "grad_norm": 216.55380249023438, + "learning_rate": 1.9057639791362437e-06, + "loss": 18.2331, + "step": 370580 + }, + { + "epoch": 0.7486152466295244, + "grad_norm": 80.94849395751953, + "learning_rate": 1.9054897914769028e-06, + "loss": 20.7204, + "step": 370590 + }, + { + "epoch": 0.7486354472622082, + "grad_norm": 348.4039611816406, + "learning_rate": 1.9052156188998284e-06, + "loss": 8.6221, + "step": 370600 + }, + { + "epoch": 0.748655647894892, + "grad_norm": 256.8065185546875, + "learning_rate": 1.9049414614063566e-06, + "loss": 18.6516, + "step": 370610 + }, + { + "epoch": 0.7486758485275758, + "grad_norm": 596.1468505859375, + "learning_rate": 1.9046673189978266e-06, + "loss": 17.3199, + "step": 370620 + }, + { + "epoch": 0.7486960491602597, + "grad_norm": 148.25144958496094, + "learning_rate": 1.904393191675571e-06, + "loss": 22.5702, + "step": 370630 + }, + { + "epoch": 0.7487162497929435, + "grad_norm": 331.7828674316406, + "learning_rate": 1.9041190794409287e-06, + "loss": 16.0017, + "step": 370640 + }, + { + "epoch": 0.7487364504256273, + "grad_norm": 85.73043823242188, + "learning_rate": 1.9038449822952331e-06, + "loss": 10.475, + "step": 370650 + }, + { + "epoch": 0.7487566510583111, + "grad_norm": 2970.264404296875, + "learning_rate": 1.9035709002398234e-06, + "loss": 36.6707, + "step": 370660 + }, + { + "epoch": 0.748776851690995, + "grad_norm": 318.8187561035156, + "learning_rate": 1.9032968332760331e-06, + "loss": 14.7505, + "step": 370670 + }, + { + "epoch": 0.7487970523236788, + "grad_norm": 275.8929748535156, + "learning_rate": 1.903022781405197e-06, + "loss": 10.1959, + "step": 370680 + }, + { + "epoch": 0.7488172529563626, + "grad_norm": 53.88206100463867, + "learning_rate": 1.902748744628654e-06, + "loss": 18.7687, + "step": 370690 + }, + { + "epoch": 0.7488374535890464, + "grad_norm": 460.0052185058594, + "learning_rate": 1.9024747229477365e-06, + "loss": 20.5616, + "step": 370700 + }, + { + "epoch": 0.7488576542217302, + "grad_norm": 275.89794921875, + "learning_rate": 1.9022007163637829e-06, + "loss": 19.8112, + "step": 370710 + }, + { + "epoch": 0.748877854854414, + "grad_norm": 557.7904052734375, + "learning_rate": 1.9019267248781276e-06, + "loss": 12.6103, + "step": 370720 + }, + { + "epoch": 0.7488980554870979, + "grad_norm": 845.3037109375, + "learning_rate": 1.9016527484921037e-06, + "loss": 20.1345, + "step": 370730 + }, + { + "epoch": 0.7489182561197817, + "grad_norm": 370.7670593261719, + "learning_rate": 1.9013787872070506e-06, + "loss": 25.4725, + "step": 370740 + }, + { + "epoch": 0.7489384567524655, + "grad_norm": 131.9552764892578, + "learning_rate": 1.9011048410243011e-06, + "loss": 36.2493, + "step": 370750 + }, + { + "epoch": 0.7489586573851493, + "grad_norm": 355.9578552246094, + "learning_rate": 1.900830909945189e-06, + "loss": 21.0548, + "step": 370760 + }, + { + "epoch": 0.7489788580178331, + "grad_norm": 73.87676239013672, + "learning_rate": 1.900556993971051e-06, + "loss": 10.1273, + "step": 370770 + }, + { + "epoch": 0.748999058650517, + "grad_norm": 938.6024780273438, + "learning_rate": 1.9002830931032262e-06, + "loss": 20.8951, + "step": 370780 + }, + { + "epoch": 0.7490192592832008, + "grad_norm": 505.7022705078125, + "learning_rate": 1.900009207343042e-06, + "loss": 18.4569, + "step": 370790 + }, + { + "epoch": 0.7490394599158846, + "grad_norm": 200.5887451171875, + "learning_rate": 1.8997353366918369e-06, + "loss": 19.4817, + "step": 370800 + }, + { + "epoch": 0.7490596605485684, + "grad_norm": 267.5477600097656, + "learning_rate": 1.8994614811509475e-06, + "loss": 17.4729, + "step": 370810 + }, + { + "epoch": 0.7490798611812522, + "grad_norm": 203.84954833984375, + "learning_rate": 1.8991876407217068e-06, + "loss": 15.9631, + "step": 370820 + }, + { + "epoch": 0.749100061813936, + "grad_norm": 203.7384796142578, + "learning_rate": 1.8989138154054482e-06, + "loss": 33.4527, + "step": 370830 + }, + { + "epoch": 0.7491202624466198, + "grad_norm": 657.7221069335938, + "learning_rate": 1.898640005203507e-06, + "loss": 26.5684, + "step": 370840 + }, + { + "epoch": 0.7491404630793036, + "grad_norm": 270.6231384277344, + "learning_rate": 1.8983662101172217e-06, + "loss": 7.4513, + "step": 370850 + }, + { + "epoch": 0.7491606637119874, + "grad_norm": 1.5963205099105835, + "learning_rate": 1.8980924301479199e-06, + "loss": 25.2977, + "step": 370860 + }, + { + "epoch": 0.7491808643446712, + "grad_norm": 360.6601867675781, + "learning_rate": 1.8978186652969394e-06, + "loss": 25.598, + "step": 370870 + }, + { + "epoch": 0.7492010649773551, + "grad_norm": 296.4714050292969, + "learning_rate": 1.8975449155656162e-06, + "loss": 19.0337, + "step": 370880 + }, + { + "epoch": 0.7492212656100389, + "grad_norm": 253.92222595214844, + "learning_rate": 1.897271180955283e-06, + "loss": 15.944, + "step": 370890 + }, + { + "epoch": 0.7492414662427227, + "grad_norm": 1114.2794189453125, + "learning_rate": 1.896997461467272e-06, + "loss": 18.0087, + "step": 370900 + }, + { + "epoch": 0.7492616668754065, + "grad_norm": 29.295988082885742, + "learning_rate": 1.8967237571029207e-06, + "loss": 12.9869, + "step": 370910 + }, + { + "epoch": 0.7492818675080903, + "grad_norm": 71.8803482055664, + "learning_rate": 1.896450067863561e-06, + "loss": 10.9449, + "step": 370920 + }, + { + "epoch": 0.7493020681407742, + "grad_norm": 293.2554016113281, + "learning_rate": 1.8961763937505262e-06, + "loss": 12.5658, + "step": 370930 + }, + { + "epoch": 0.749322268773458, + "grad_norm": 281.9087219238281, + "learning_rate": 1.8959027347651527e-06, + "loss": 11.3054, + "step": 370940 + }, + { + "epoch": 0.7493424694061418, + "grad_norm": 805.2517700195312, + "learning_rate": 1.895629090908771e-06, + "loss": 19.4157, + "step": 370950 + }, + { + "epoch": 0.7493626700388256, + "grad_norm": 286.0586242675781, + "learning_rate": 1.895355462182718e-06, + "loss": 21.2542, + "step": 370960 + }, + { + "epoch": 0.7493828706715094, + "grad_norm": 423.8516845703125, + "learning_rate": 1.8950818485883248e-06, + "loss": 12.2588, + "step": 370970 + }, + { + "epoch": 0.7494030713041933, + "grad_norm": 0.0, + "learning_rate": 1.8948082501269272e-06, + "loss": 12.0584, + "step": 370980 + }, + { + "epoch": 0.7494232719368771, + "grad_norm": 943.635498046875, + "learning_rate": 1.8945346667998566e-06, + "loss": 12.7964, + "step": 370990 + }, + { + "epoch": 0.7494434725695609, + "grad_norm": 670.3473510742188, + "learning_rate": 1.8942610986084487e-06, + "loss": 16.2354, + "step": 371000 + }, + { + "epoch": 0.7494636732022447, + "grad_norm": 621.8496704101562, + "learning_rate": 1.8939875455540352e-06, + "loss": 26.1257, + "step": 371010 + }, + { + "epoch": 0.7494838738349285, + "grad_norm": 114.41722106933594, + "learning_rate": 1.8937140076379484e-06, + "loss": 33.2578, + "step": 371020 + }, + { + "epoch": 0.7495040744676124, + "grad_norm": 399.6213684082031, + "learning_rate": 1.8934404848615245e-06, + "loss": 16.3593, + "step": 371030 + }, + { + "epoch": 0.7495242751002962, + "grad_norm": 555.8201293945312, + "learning_rate": 1.8931669772260946e-06, + "loss": 12.3215, + "step": 371040 + }, + { + "epoch": 0.74954447573298, + "grad_norm": 191.23582458496094, + "learning_rate": 1.8928934847329905e-06, + "loss": 15.6306, + "step": 371050 + }, + { + "epoch": 0.7495646763656638, + "grad_norm": 353.723876953125, + "learning_rate": 1.8926200073835466e-06, + "loss": 32.8761, + "step": 371060 + }, + { + "epoch": 0.7495848769983476, + "grad_norm": 422.55938720703125, + "learning_rate": 1.8923465451790997e-06, + "loss": 18.5121, + "step": 371070 + }, + { + "epoch": 0.7496050776310315, + "grad_norm": 154.19549560546875, + "learning_rate": 1.892073098120975e-06, + "loss": 11.1005, + "step": 371080 + }, + { + "epoch": 0.7496252782637152, + "grad_norm": 859.382080078125, + "learning_rate": 1.8917996662105092e-06, + "loss": 25.7056, + "step": 371090 + }, + { + "epoch": 0.749645478896399, + "grad_norm": 477.7334899902344, + "learning_rate": 1.8915262494490366e-06, + "loss": 19.7303, + "step": 371100 + }, + { + "epoch": 0.7496656795290828, + "grad_norm": 570.6571655273438, + "learning_rate": 1.8912528478378877e-06, + "loss": 33.5792, + "step": 371110 + }, + { + "epoch": 0.7496858801617666, + "grad_norm": 196.02267456054688, + "learning_rate": 1.8909794613783943e-06, + "loss": 27.0937, + "step": 371120 + }, + { + "epoch": 0.7497060807944504, + "grad_norm": 53.689823150634766, + "learning_rate": 1.8907060900718894e-06, + "loss": 16.336, + "step": 371130 + }, + { + "epoch": 0.7497262814271343, + "grad_norm": 1805.9793701171875, + "learning_rate": 1.8904327339197098e-06, + "loss": 11.4277, + "step": 371140 + }, + { + "epoch": 0.7497464820598181, + "grad_norm": 316.41802978515625, + "learning_rate": 1.8901593929231804e-06, + "loss": 27.9684, + "step": 371150 + }, + { + "epoch": 0.7497666826925019, + "grad_norm": 128.99745178222656, + "learning_rate": 1.8898860670836367e-06, + "loss": 14.9042, + "step": 371160 + }, + { + "epoch": 0.7497868833251857, + "grad_norm": 23.5936222076416, + "learning_rate": 1.8896127564024124e-06, + "loss": 19.2889, + "step": 371170 + }, + { + "epoch": 0.7498070839578695, + "grad_norm": 121.90223693847656, + "learning_rate": 1.8893394608808391e-06, + "loss": 27.0936, + "step": 371180 + }, + { + "epoch": 0.7498272845905534, + "grad_norm": 451.2678527832031, + "learning_rate": 1.889066180520246e-06, + "loss": 17.8427, + "step": 371190 + }, + { + "epoch": 0.7498474852232372, + "grad_norm": 367.1835021972656, + "learning_rate": 1.8887929153219687e-06, + "loss": 22.2723, + "step": 371200 + }, + { + "epoch": 0.749867685855921, + "grad_norm": 175.74130249023438, + "learning_rate": 1.8885196652873372e-06, + "loss": 18.2782, + "step": 371210 + }, + { + "epoch": 0.7498878864886048, + "grad_norm": 141.18226623535156, + "learning_rate": 1.8882464304176817e-06, + "loss": 27.077, + "step": 371220 + }, + { + "epoch": 0.7499080871212886, + "grad_norm": 253.47525024414062, + "learning_rate": 1.8879732107143378e-06, + "loss": 28.0933, + "step": 371230 + }, + { + "epoch": 0.7499282877539725, + "grad_norm": 446.2903747558594, + "learning_rate": 1.8877000061786333e-06, + "loss": 20.4453, + "step": 371240 + }, + { + "epoch": 0.7499484883866563, + "grad_norm": 182.23312377929688, + "learning_rate": 1.887426816811903e-06, + "loss": 13.7468, + "step": 371250 + }, + { + "epoch": 0.7499686890193401, + "grad_norm": 179.2864227294922, + "learning_rate": 1.8871536426154752e-06, + "loss": 17.0753, + "step": 371260 + }, + { + "epoch": 0.7499888896520239, + "grad_norm": 485.5817565917969, + "learning_rate": 1.8868804835906845e-06, + "loss": 21.7351, + "step": 371270 + }, + { + "epoch": 0.7500090902847077, + "grad_norm": 865.7108154296875, + "learning_rate": 1.8866073397388612e-06, + "loss": 15.9035, + "step": 371280 + }, + { + "epoch": 0.7500292909173916, + "grad_norm": 118.02152252197266, + "learning_rate": 1.8863342110613342e-06, + "loss": 13.5667, + "step": 371290 + }, + { + "epoch": 0.7500494915500754, + "grad_norm": 93.32388305664062, + "learning_rate": 1.8860610975594384e-06, + "loss": 12.3167, + "step": 371300 + }, + { + "epoch": 0.7500696921827592, + "grad_norm": 0.05210625007748604, + "learning_rate": 1.8857879992345013e-06, + "loss": 16.3564, + "step": 371310 + }, + { + "epoch": 0.750089892815443, + "grad_norm": 540.0073852539062, + "learning_rate": 1.8855149160878571e-06, + "loss": 18.4973, + "step": 371320 + }, + { + "epoch": 0.7501100934481268, + "grad_norm": 225.62303161621094, + "learning_rate": 1.8852418481208362e-06, + "loss": 14.9685, + "step": 371330 + }, + { + "epoch": 0.7501302940808106, + "grad_norm": 257.6683654785156, + "learning_rate": 1.8849687953347666e-06, + "loss": 24.1979, + "step": 371340 + }, + { + "epoch": 0.7501504947134944, + "grad_norm": 608.3507080078125, + "learning_rate": 1.8846957577309832e-06, + "loss": 16.5048, + "step": 371350 + }, + { + "epoch": 0.7501706953461782, + "grad_norm": 158.879150390625, + "learning_rate": 1.8844227353108146e-06, + "loss": 21.7614, + "step": 371360 + }, + { + "epoch": 0.750190895978862, + "grad_norm": 449.2908020019531, + "learning_rate": 1.8841497280755906e-06, + "loss": 26.0557, + "step": 371370 + }, + { + "epoch": 0.7502110966115458, + "grad_norm": 344.0711364746094, + "learning_rate": 1.8838767360266425e-06, + "loss": 20.2887, + "step": 371380 + }, + { + "epoch": 0.7502312972442297, + "grad_norm": 637.5467529296875, + "learning_rate": 1.8836037591653044e-06, + "loss": 18.1995, + "step": 371390 + }, + { + "epoch": 0.7502514978769135, + "grad_norm": 267.91094970703125, + "learning_rate": 1.8833307974929006e-06, + "loss": 18.5801, + "step": 371400 + }, + { + "epoch": 0.7502716985095973, + "grad_norm": 685.9423217773438, + "learning_rate": 1.8830578510107638e-06, + "loss": 33.9838, + "step": 371410 + }, + { + "epoch": 0.7502918991422811, + "grad_norm": 621.2180786132812, + "learning_rate": 1.8827849197202275e-06, + "loss": 29.6183, + "step": 371420 + }, + { + "epoch": 0.7503120997749649, + "grad_norm": 369.6567687988281, + "learning_rate": 1.8825120036226192e-06, + "loss": 9.9979, + "step": 371430 + }, + { + "epoch": 0.7503323004076488, + "grad_norm": 165.22293090820312, + "learning_rate": 1.8822391027192677e-06, + "loss": 23.3534, + "step": 371440 + }, + { + "epoch": 0.7503525010403326, + "grad_norm": 528.44873046875, + "learning_rate": 1.8819662170115043e-06, + "loss": 12.3577, + "step": 371450 + }, + { + "epoch": 0.7503727016730164, + "grad_norm": 627.9049682617188, + "learning_rate": 1.881693346500663e-06, + "loss": 19.3407, + "step": 371460 + }, + { + "epoch": 0.7503929023057002, + "grad_norm": 310.8790588378906, + "learning_rate": 1.8814204911880667e-06, + "loss": 22.8446, + "step": 371470 + }, + { + "epoch": 0.750413102938384, + "grad_norm": 421.85400390625, + "learning_rate": 1.8811476510750486e-06, + "loss": 14.8666, + "step": 371480 + }, + { + "epoch": 0.7504333035710679, + "grad_norm": 563.498779296875, + "learning_rate": 1.8808748261629406e-06, + "loss": 17.9732, + "step": 371490 + }, + { + "epoch": 0.7504535042037517, + "grad_norm": 248.99783325195312, + "learning_rate": 1.8806020164530702e-06, + "loss": 19.4519, + "step": 371500 + }, + { + "epoch": 0.7504737048364355, + "grad_norm": 622.2474365234375, + "learning_rate": 1.8803292219467656e-06, + "loss": 32.7135, + "step": 371510 + }, + { + "epoch": 0.7504939054691193, + "grad_norm": 436.3412170410156, + "learning_rate": 1.8800564426453595e-06, + "loss": 22.4725, + "step": 371520 + }, + { + "epoch": 0.7505141061018031, + "grad_norm": 659.15234375, + "learning_rate": 1.87978367855018e-06, + "loss": 15.2118, + "step": 371530 + }, + { + "epoch": 0.750534306734487, + "grad_norm": 39.72431945800781, + "learning_rate": 1.8795109296625546e-06, + "loss": 5.0009, + "step": 371540 + }, + { + "epoch": 0.7505545073671708, + "grad_norm": 325.27874755859375, + "learning_rate": 1.8792381959838147e-06, + "loss": 13.0658, + "step": 371550 + }, + { + "epoch": 0.7505747079998546, + "grad_norm": 302.402099609375, + "learning_rate": 1.878965477515291e-06, + "loss": 18.2643, + "step": 371560 + }, + { + "epoch": 0.7505949086325384, + "grad_norm": 243.29498291015625, + "learning_rate": 1.8786927742583111e-06, + "loss": 17.85, + "step": 371570 + }, + { + "epoch": 0.7506151092652222, + "grad_norm": 321.30810546875, + "learning_rate": 1.878420086214202e-06, + "loss": 20.9506, + "step": 371580 + }, + { + "epoch": 0.7506353098979061, + "grad_norm": 5.587765216827393, + "learning_rate": 1.8781474133842963e-06, + "loss": 19.0845, + "step": 371590 + }, + { + "epoch": 0.7506555105305898, + "grad_norm": 417.1700439453125, + "learning_rate": 1.8778747557699223e-06, + "loss": 26.0506, + "step": 371600 + }, + { + "epoch": 0.7506757111632736, + "grad_norm": 0.44337430596351624, + "learning_rate": 1.877602113372406e-06, + "loss": 15.0031, + "step": 371610 + }, + { + "epoch": 0.7506959117959574, + "grad_norm": 190.23757934570312, + "learning_rate": 1.8773294861930797e-06, + "loss": 15.9604, + "step": 371620 + }, + { + "epoch": 0.7507161124286412, + "grad_norm": 186.6510772705078, + "learning_rate": 1.8770568742332695e-06, + "loss": 18.3207, + "step": 371630 + }, + { + "epoch": 0.750736313061325, + "grad_norm": 498.6097412109375, + "learning_rate": 1.8767842774943068e-06, + "loss": 18.6611, + "step": 371640 + }, + { + "epoch": 0.7507565136940089, + "grad_norm": 0.26976367831230164, + "learning_rate": 1.8765116959775187e-06, + "loss": 17.8622, + "step": 371650 + }, + { + "epoch": 0.7507767143266927, + "grad_norm": 404.4729309082031, + "learning_rate": 1.876239129684232e-06, + "loss": 8.5176, + "step": 371660 + }, + { + "epoch": 0.7507969149593765, + "grad_norm": 457.6100769042969, + "learning_rate": 1.875966578615777e-06, + "loss": 19.4443, + "step": 371670 + }, + { + "epoch": 0.7508171155920603, + "grad_norm": 556.7343139648438, + "learning_rate": 1.8756940427734854e-06, + "loss": 21.3292, + "step": 371680 + }, + { + "epoch": 0.7508373162247441, + "grad_norm": 671.5337524414062, + "learning_rate": 1.8754215221586785e-06, + "loss": 32.8127, + "step": 371690 + }, + { + "epoch": 0.750857516857428, + "grad_norm": 243.16249084472656, + "learning_rate": 1.8751490167726888e-06, + "loss": 15.6399, + "step": 371700 + }, + { + "epoch": 0.7508777174901118, + "grad_norm": 540.6301879882812, + "learning_rate": 1.874876526616845e-06, + "loss": 18.4024, + "step": 371710 + }, + { + "epoch": 0.7508979181227956, + "grad_norm": 226.1687774658203, + "learning_rate": 1.874604051692474e-06, + "loss": 19.5006, + "step": 371720 + }, + { + "epoch": 0.7509181187554794, + "grad_norm": 560.4127807617188, + "learning_rate": 1.874331592000902e-06, + "loss": 16.1051, + "step": 371730 + }, + { + "epoch": 0.7509383193881632, + "grad_norm": 698.4486083984375, + "learning_rate": 1.8740591475434588e-06, + "loss": 20.4588, + "step": 371740 + }, + { + "epoch": 0.7509585200208471, + "grad_norm": 267.2878723144531, + "learning_rate": 1.873786718321476e-06, + "loss": 14.6198, + "step": 371750 + }, + { + "epoch": 0.7509787206535309, + "grad_norm": 133.45802307128906, + "learning_rate": 1.8735143043362735e-06, + "loss": 27.8448, + "step": 371760 + }, + { + "epoch": 0.7509989212862147, + "grad_norm": 689.1096801757812, + "learning_rate": 1.8732419055891832e-06, + "loss": 31.4137, + "step": 371770 + }, + { + "epoch": 0.7510191219188985, + "grad_norm": 214.43348693847656, + "learning_rate": 1.8729695220815346e-06, + "loss": 12.4934, + "step": 371780 + }, + { + "epoch": 0.7510393225515823, + "grad_norm": 236.0517120361328, + "learning_rate": 1.8726971538146532e-06, + "loss": 15.2071, + "step": 371790 + }, + { + "epoch": 0.7510595231842662, + "grad_norm": 236.72003173828125, + "learning_rate": 1.8724248007898648e-06, + "loss": 25.1316, + "step": 371800 + }, + { + "epoch": 0.75107972381695, + "grad_norm": 2.4329068660736084, + "learning_rate": 1.8721524630085003e-06, + "loss": 9.839, + "step": 371810 + }, + { + "epoch": 0.7510999244496338, + "grad_norm": 401.3538513183594, + "learning_rate": 1.8718801404718856e-06, + "loss": 15.5504, + "step": 371820 + }, + { + "epoch": 0.7511201250823176, + "grad_norm": 290.0550231933594, + "learning_rate": 1.8716078331813459e-06, + "loss": 21.7786, + "step": 371830 + }, + { + "epoch": 0.7511403257150014, + "grad_norm": 801.5999755859375, + "learning_rate": 1.8713355411382117e-06, + "loss": 20.3351, + "step": 371840 + }, + { + "epoch": 0.7511605263476853, + "grad_norm": 56.34940719604492, + "learning_rate": 1.871063264343807e-06, + "loss": 14.2141, + "step": 371850 + }, + { + "epoch": 0.751180726980369, + "grad_norm": 272.6626281738281, + "learning_rate": 1.870791002799462e-06, + "loss": 19.8764, + "step": 371860 + }, + { + "epoch": 0.7512009276130528, + "grad_norm": 316.6409606933594, + "learning_rate": 1.8705187565065003e-06, + "loss": 19.6626, + "step": 371870 + }, + { + "epoch": 0.7512211282457366, + "grad_norm": 503.186767578125, + "learning_rate": 1.8702465254662527e-06, + "loss": 13.1025, + "step": 371880 + }, + { + "epoch": 0.7512413288784204, + "grad_norm": 190.72361755371094, + "learning_rate": 1.8699743096800438e-06, + "loss": 18.0219, + "step": 371890 + }, + { + "epoch": 0.7512615295111043, + "grad_norm": 1024.67822265625, + "learning_rate": 1.8697021091491991e-06, + "loss": 26.6758, + "step": 371900 + }, + { + "epoch": 0.7512817301437881, + "grad_norm": 238.12718200683594, + "learning_rate": 1.869429923875048e-06, + "loss": 19.7015, + "step": 371910 + }, + { + "epoch": 0.7513019307764719, + "grad_norm": 681.9209594726562, + "learning_rate": 1.869157753858914e-06, + "loss": 23.4534, + "step": 371920 + }, + { + "epoch": 0.7513221314091557, + "grad_norm": 86.79539489746094, + "learning_rate": 1.8688855991021272e-06, + "loss": 18.3068, + "step": 371930 + }, + { + "epoch": 0.7513423320418395, + "grad_norm": 628.2562866210938, + "learning_rate": 1.8686134596060123e-06, + "loss": 14.6956, + "step": 371940 + }, + { + "epoch": 0.7513625326745234, + "grad_norm": 146.2157745361328, + "learning_rate": 1.8683413353718937e-06, + "loss": 18.6938, + "step": 371950 + }, + { + "epoch": 0.7513827333072072, + "grad_norm": 1092.339111328125, + "learning_rate": 1.8680692264011014e-06, + "loss": 20.8303, + "step": 371960 + }, + { + "epoch": 0.751402933939891, + "grad_norm": 400.3479919433594, + "learning_rate": 1.8677971326949602e-06, + "loss": 13.7188, + "step": 371970 + }, + { + "epoch": 0.7514231345725748, + "grad_norm": 281.03680419921875, + "learning_rate": 1.867525054254794e-06, + "loss": 10.1167, + "step": 371980 + }, + { + "epoch": 0.7514433352052586, + "grad_norm": 443.2901611328125, + "learning_rate": 1.8672529910819305e-06, + "loss": 32.2682, + "step": 371990 + }, + { + "epoch": 0.7514635358379425, + "grad_norm": 372.9542236328125, + "learning_rate": 1.8669809431776991e-06, + "loss": 31.8958, + "step": 372000 + }, + { + "epoch": 0.7514837364706263, + "grad_norm": 306.41961669921875, + "learning_rate": 1.86670891054342e-06, + "loss": 13.3648, + "step": 372010 + }, + { + "epoch": 0.7515039371033101, + "grad_norm": 335.14239501953125, + "learning_rate": 1.8664368931804211e-06, + "loss": 10.7412, + "step": 372020 + }, + { + "epoch": 0.7515241377359939, + "grad_norm": 165.44140625, + "learning_rate": 1.8661648910900303e-06, + "loss": 24.0779, + "step": 372030 + }, + { + "epoch": 0.7515443383686777, + "grad_norm": 384.6484069824219, + "learning_rate": 1.8658929042735725e-06, + "loss": 16.3811, + "step": 372040 + }, + { + "epoch": 0.7515645390013616, + "grad_norm": 221.1503143310547, + "learning_rate": 1.8656209327323704e-06, + "loss": 24.0299, + "step": 372050 + }, + { + "epoch": 0.7515847396340454, + "grad_norm": 505.26861572265625, + "learning_rate": 1.8653489764677512e-06, + "loss": 13.4609, + "step": 372060 + }, + { + "epoch": 0.7516049402667292, + "grad_norm": 340.41748046875, + "learning_rate": 1.865077035481045e-06, + "loss": 15.3707, + "step": 372070 + }, + { + "epoch": 0.751625140899413, + "grad_norm": 709.1183471679688, + "learning_rate": 1.8648051097735697e-06, + "loss": 17.3134, + "step": 372080 + }, + { + "epoch": 0.7516453415320968, + "grad_norm": 372.26153564453125, + "learning_rate": 1.8645331993466537e-06, + "loss": 11.1038, + "step": 372090 + }, + { + "epoch": 0.7516655421647807, + "grad_norm": 358.8562927246094, + "learning_rate": 1.8642613042016245e-06, + "loss": 26.0, + "step": 372100 + }, + { + "epoch": 0.7516857427974644, + "grad_norm": 435.4578552246094, + "learning_rate": 1.8639894243398055e-06, + "loss": 13.9236, + "step": 372110 + }, + { + "epoch": 0.7517059434301482, + "grad_norm": 1.341683030128479, + "learning_rate": 1.8637175597625195e-06, + "loss": 15.2352, + "step": 372120 + }, + { + "epoch": 0.751726144062832, + "grad_norm": 226.56320190429688, + "learning_rate": 1.8634457104710956e-06, + "loss": 11.6611, + "step": 372130 + }, + { + "epoch": 0.7517463446955158, + "grad_norm": 437.05535888671875, + "learning_rate": 1.8631738764668571e-06, + "loss": 17.6793, + "step": 372140 + }, + { + "epoch": 0.7517665453281996, + "grad_norm": 255.4073944091797, + "learning_rate": 1.862902057751127e-06, + "loss": 8.3148, + "step": 372150 + }, + { + "epoch": 0.7517867459608835, + "grad_norm": 225.7238006591797, + "learning_rate": 1.8626302543252317e-06, + "loss": 11.3706, + "step": 372160 + }, + { + "epoch": 0.7518069465935673, + "grad_norm": 28.877674102783203, + "learning_rate": 1.8623584661904976e-06, + "loss": 10.5569, + "step": 372170 + }, + { + "epoch": 0.7518271472262511, + "grad_norm": 248.7672576904297, + "learning_rate": 1.862086693348248e-06, + "loss": 17.5141, + "step": 372180 + }, + { + "epoch": 0.7518473478589349, + "grad_norm": 244.1647186279297, + "learning_rate": 1.8618149357998055e-06, + "loss": 8.8961, + "step": 372190 + }, + { + "epoch": 0.7518675484916187, + "grad_norm": 394.9271240234375, + "learning_rate": 1.8615431935464984e-06, + "loss": 17.3364, + "step": 372200 + }, + { + "epoch": 0.7518877491243026, + "grad_norm": 102.46836853027344, + "learning_rate": 1.8612714665896486e-06, + "loss": 10.7787, + "step": 372210 + }, + { + "epoch": 0.7519079497569864, + "grad_norm": 137.36032104492188, + "learning_rate": 1.8609997549305792e-06, + "loss": 17.4078, + "step": 372220 + }, + { + "epoch": 0.7519281503896702, + "grad_norm": 0.998528242111206, + "learning_rate": 1.8607280585706183e-06, + "loss": 25.5399, + "step": 372230 + }, + { + "epoch": 0.751948351022354, + "grad_norm": 1166.321533203125, + "learning_rate": 1.8604563775110868e-06, + "loss": 28.0414, + "step": 372240 + }, + { + "epoch": 0.7519685516550378, + "grad_norm": 163.6189727783203, + "learning_rate": 1.8601847117533112e-06, + "loss": 15.5508, + "step": 372250 + }, + { + "epoch": 0.7519887522877217, + "grad_norm": 167.6587677001953, + "learning_rate": 1.859913061298615e-06, + "loss": 11.8528, + "step": 372260 + }, + { + "epoch": 0.7520089529204055, + "grad_norm": 169.41229248046875, + "learning_rate": 1.8596414261483192e-06, + "loss": 14.1934, + "step": 372270 + }, + { + "epoch": 0.7520291535530893, + "grad_norm": 794.1710815429688, + "learning_rate": 1.8593698063037525e-06, + "loss": 23.826, + "step": 372280 + }, + { + "epoch": 0.7520493541857731, + "grad_norm": 183.20425415039062, + "learning_rate": 1.8590982017662362e-06, + "loss": 24.6968, + "step": 372290 + }, + { + "epoch": 0.7520695548184569, + "grad_norm": 659.823974609375, + "learning_rate": 1.8588266125370929e-06, + "loss": 23.0166, + "step": 372300 + }, + { + "epoch": 0.7520897554511408, + "grad_norm": 427.4971008300781, + "learning_rate": 1.8585550386176476e-06, + "loss": 14.9007, + "step": 372310 + }, + { + "epoch": 0.7521099560838246, + "grad_norm": 486.5269775390625, + "learning_rate": 1.858283480009226e-06, + "loss": 19.578, + "step": 372320 + }, + { + "epoch": 0.7521301567165084, + "grad_norm": 605.6756591796875, + "learning_rate": 1.8580119367131487e-06, + "loss": 19.1646, + "step": 372330 + }, + { + "epoch": 0.7521503573491922, + "grad_norm": 253.02023315429688, + "learning_rate": 1.8577404087307394e-06, + "loss": 11.5323, + "step": 372340 + }, + { + "epoch": 0.752170557981876, + "grad_norm": 584.7437133789062, + "learning_rate": 1.8574688960633236e-06, + "loss": 20.9569, + "step": 372350 + }, + { + "epoch": 0.7521907586145599, + "grad_norm": 744.9182739257812, + "learning_rate": 1.8571973987122233e-06, + "loss": 13.9706, + "step": 372360 + }, + { + "epoch": 0.7522109592472436, + "grad_norm": 220.01039123535156, + "learning_rate": 1.85692591667876e-06, + "loss": 20.734, + "step": 372370 + }, + { + "epoch": 0.7522311598799274, + "grad_norm": 663.4364624023438, + "learning_rate": 1.8566544499642587e-06, + "loss": 22.673, + "step": 372380 + }, + { + "epoch": 0.7522513605126112, + "grad_norm": 220.8157958984375, + "learning_rate": 1.8563829985700444e-06, + "loss": 16.6396, + "step": 372390 + }, + { + "epoch": 0.752271561145295, + "grad_norm": 304.0716247558594, + "learning_rate": 1.8561115624974374e-06, + "loss": 12.1302, + "step": 372400 + }, + { + "epoch": 0.7522917617779789, + "grad_norm": 390.99932861328125, + "learning_rate": 1.8558401417477602e-06, + "loss": 22.9299, + "step": 372410 + }, + { + "epoch": 0.7523119624106627, + "grad_norm": 851.5274658203125, + "learning_rate": 1.855568736322338e-06, + "loss": 21.7617, + "step": 372420 + }, + { + "epoch": 0.7523321630433465, + "grad_norm": 474.37847900390625, + "learning_rate": 1.8552973462224926e-06, + "loss": 34.2556, + "step": 372430 + }, + { + "epoch": 0.7523523636760303, + "grad_norm": 908.5614624023438, + "learning_rate": 1.8550259714495444e-06, + "loss": 26.9689, + "step": 372440 + }, + { + "epoch": 0.7523725643087141, + "grad_norm": 505.4935302734375, + "learning_rate": 1.8547546120048204e-06, + "loss": 16.8324, + "step": 372450 + }, + { + "epoch": 0.752392764941398, + "grad_norm": 653.8145751953125, + "learning_rate": 1.854483267889639e-06, + "loss": 8.4588, + "step": 372460 + }, + { + "epoch": 0.7524129655740818, + "grad_norm": 447.92877197265625, + "learning_rate": 1.854211939105327e-06, + "loss": 18.8001, + "step": 372470 + }, + { + "epoch": 0.7524331662067656, + "grad_norm": 584.7274169921875, + "learning_rate": 1.8539406256532022e-06, + "loss": 12.1158, + "step": 372480 + }, + { + "epoch": 0.7524533668394494, + "grad_norm": 372.8336486816406, + "learning_rate": 1.8536693275345908e-06, + "loss": 26.4126, + "step": 372490 + }, + { + "epoch": 0.7524735674721332, + "grad_norm": 447.24200439453125, + "learning_rate": 1.8533980447508138e-06, + "loss": 36.0696, + "step": 372500 + }, + { + "epoch": 0.752493768104817, + "grad_norm": 353.8642578125, + "learning_rate": 1.8531267773031913e-06, + "loss": 16.9937, + "step": 372510 + }, + { + "epoch": 0.7525139687375009, + "grad_norm": 451.4334411621094, + "learning_rate": 1.8528555251930492e-06, + "loss": 22.1797, + "step": 372520 + }, + { + "epoch": 0.7525341693701847, + "grad_norm": 643.447998046875, + "learning_rate": 1.8525842884217055e-06, + "loss": 21.9101, + "step": 372530 + }, + { + "epoch": 0.7525543700028685, + "grad_norm": 0.0, + "learning_rate": 1.852313066990486e-06, + "loss": 13.9883, + "step": 372540 + }, + { + "epoch": 0.7525745706355523, + "grad_norm": 420.5641784667969, + "learning_rate": 1.8520418609007107e-06, + "loss": 14.4659, + "step": 372550 + }, + { + "epoch": 0.7525947712682362, + "grad_norm": 444.2103576660156, + "learning_rate": 1.8517706701536998e-06, + "loss": 12.8427, + "step": 372560 + }, + { + "epoch": 0.75261497190092, + "grad_norm": 232.41436767578125, + "learning_rate": 1.8514994947507787e-06, + "loss": 15.8466, + "step": 372570 + }, + { + "epoch": 0.7526351725336038, + "grad_norm": 533.5691528320312, + "learning_rate": 1.8512283346932675e-06, + "loss": 16.6797, + "step": 372580 + }, + { + "epoch": 0.7526553731662876, + "grad_norm": 678.6180419921875, + "learning_rate": 1.8509571899824851e-06, + "loss": 26.8828, + "step": 372590 + }, + { + "epoch": 0.7526755737989714, + "grad_norm": 181.0961151123047, + "learning_rate": 1.8506860606197564e-06, + "loss": 15.2489, + "step": 372600 + }, + { + "epoch": 0.7526957744316553, + "grad_norm": 133.0625457763672, + "learning_rate": 1.850414946606403e-06, + "loss": 19.4113, + "step": 372610 + }, + { + "epoch": 0.752715975064339, + "grad_norm": 516.4111938476562, + "learning_rate": 1.850143847943745e-06, + "loss": 25.9688, + "step": 372620 + }, + { + "epoch": 0.7527361756970228, + "grad_norm": 473.9225769042969, + "learning_rate": 1.8498727646331022e-06, + "loss": 24.7476, + "step": 372630 + }, + { + "epoch": 0.7527563763297066, + "grad_norm": 908.9474487304688, + "learning_rate": 1.8496016966757996e-06, + "loss": 27.9903, + "step": 372640 + }, + { + "epoch": 0.7527765769623904, + "grad_norm": 126.64356231689453, + "learning_rate": 1.8493306440731557e-06, + "loss": 14.6101, + "step": 372650 + }, + { + "epoch": 0.7527967775950742, + "grad_norm": 174.29595947265625, + "learning_rate": 1.849059606826491e-06, + "loss": 7.9262, + "step": 372660 + }, + { + "epoch": 0.7528169782277581, + "grad_norm": 580.3689575195312, + "learning_rate": 1.8487885849371268e-06, + "loss": 22.3703, + "step": 372670 + }, + { + "epoch": 0.7528371788604419, + "grad_norm": 543.1556396484375, + "learning_rate": 1.848517578406389e-06, + "loss": 20.5304, + "step": 372680 + }, + { + "epoch": 0.7528573794931257, + "grad_norm": 374.93121337890625, + "learning_rate": 1.8482465872355904e-06, + "loss": 22.1513, + "step": 372690 + }, + { + "epoch": 0.7528775801258095, + "grad_norm": 551.6804809570312, + "learning_rate": 1.8479756114260562e-06, + "loss": 19.926, + "step": 372700 + }, + { + "epoch": 0.7528977807584933, + "grad_norm": 397.40899658203125, + "learning_rate": 1.847704650979108e-06, + "loss": 15.0318, + "step": 372710 + }, + { + "epoch": 0.7529179813911772, + "grad_norm": 358.8426818847656, + "learning_rate": 1.8474337058960646e-06, + "loss": 23.3054, + "step": 372720 + }, + { + "epoch": 0.752938182023861, + "grad_norm": 432.9877014160156, + "learning_rate": 1.8471627761782457e-06, + "loss": 13.7821, + "step": 372730 + }, + { + "epoch": 0.7529583826565448, + "grad_norm": 602.4999389648438, + "learning_rate": 1.8468918618269749e-06, + "loss": 21.9916, + "step": 372740 + }, + { + "epoch": 0.7529785832892286, + "grad_norm": 267.88726806640625, + "learning_rate": 1.8466209628435705e-06, + "loss": 14.1686, + "step": 372750 + }, + { + "epoch": 0.7529987839219124, + "grad_norm": 152.61849975585938, + "learning_rate": 1.846350079229351e-06, + "loss": 21.7504, + "step": 372760 + }, + { + "epoch": 0.7530189845545963, + "grad_norm": 404.3066711425781, + "learning_rate": 1.846079210985639e-06, + "loss": 42.8575, + "step": 372770 + }, + { + "epoch": 0.7530391851872801, + "grad_norm": 1.7670810222625732, + "learning_rate": 1.8458083581137565e-06, + "loss": 10.9234, + "step": 372780 + }, + { + "epoch": 0.7530593858199639, + "grad_norm": 480.45086669921875, + "learning_rate": 1.8455375206150212e-06, + "loss": 19.2333, + "step": 372790 + }, + { + "epoch": 0.7530795864526477, + "grad_norm": 336.90460205078125, + "learning_rate": 1.8452666984907519e-06, + "loss": 11.0934, + "step": 372800 + }, + { + "epoch": 0.7530997870853315, + "grad_norm": 764.4038696289062, + "learning_rate": 1.8449958917422712e-06, + "loss": 18.3545, + "step": 372810 + }, + { + "epoch": 0.7531199877180154, + "grad_norm": 1.3576780557632446, + "learning_rate": 1.8447251003708982e-06, + "loss": 22.2995, + "step": 372820 + }, + { + "epoch": 0.7531401883506992, + "grad_norm": 146.26312255859375, + "learning_rate": 1.8444543243779512e-06, + "loss": 35.3476, + "step": 372830 + }, + { + "epoch": 0.753160388983383, + "grad_norm": 342.9661865234375, + "learning_rate": 1.844183563764752e-06, + "loss": 12.6528, + "step": 372840 + }, + { + "epoch": 0.7531805896160668, + "grad_norm": 553.3638916015625, + "learning_rate": 1.8439128185326183e-06, + "loss": 22.5289, + "step": 372850 + }, + { + "epoch": 0.7532007902487506, + "grad_norm": 85.49042510986328, + "learning_rate": 1.843642088682872e-06, + "loss": 24.871, + "step": 372860 + }, + { + "epoch": 0.7532209908814345, + "grad_norm": 363.81671142578125, + "learning_rate": 1.8433713742168313e-06, + "loss": 13.3764, + "step": 372870 + }, + { + "epoch": 0.7532411915141182, + "grad_norm": 103.99618530273438, + "learning_rate": 1.8431006751358143e-06, + "loss": 14.6951, + "step": 372880 + }, + { + "epoch": 0.753261392146802, + "grad_norm": 136.24386596679688, + "learning_rate": 1.8428299914411435e-06, + "loss": 16.6602, + "step": 372890 + }, + { + "epoch": 0.7532815927794858, + "grad_norm": 310.4085998535156, + "learning_rate": 1.842559323134136e-06, + "loss": 15.7493, + "step": 372900 + }, + { + "epoch": 0.7533017934121696, + "grad_norm": 249.01113891601562, + "learning_rate": 1.8422886702161098e-06, + "loss": 24.468, + "step": 372910 + }, + { + "epoch": 0.7533219940448534, + "grad_norm": 352.1138000488281, + "learning_rate": 1.8420180326883857e-06, + "loss": 31.5453, + "step": 372920 + }, + { + "epoch": 0.7533421946775373, + "grad_norm": 254.39141845703125, + "learning_rate": 1.8417474105522849e-06, + "loss": 20.3214, + "step": 372930 + }, + { + "epoch": 0.7533623953102211, + "grad_norm": 494.2662658691406, + "learning_rate": 1.8414768038091235e-06, + "loss": 9.3419, + "step": 372940 + }, + { + "epoch": 0.7533825959429049, + "grad_norm": 141.86717224121094, + "learning_rate": 1.8412062124602192e-06, + "loss": 19.459, + "step": 372950 + }, + { + "epoch": 0.7534027965755887, + "grad_norm": 585.3483276367188, + "learning_rate": 1.8409356365068947e-06, + "loss": 30.1718, + "step": 372960 + }, + { + "epoch": 0.7534229972082725, + "grad_norm": 661.2196044921875, + "learning_rate": 1.8406650759504667e-06, + "loss": 13.9914, + "step": 372970 + }, + { + "epoch": 0.7534431978409564, + "grad_norm": 122.52154541015625, + "learning_rate": 1.8403945307922526e-06, + "loss": 13.9282, + "step": 372980 + }, + { + "epoch": 0.7534633984736402, + "grad_norm": 907.5955810546875, + "learning_rate": 1.8401240010335725e-06, + "loss": 19.1787, + "step": 372990 + }, + { + "epoch": 0.753483599106324, + "grad_norm": 313.92523193359375, + "learning_rate": 1.8398534866757455e-06, + "loss": 15.3005, + "step": 373000 + }, + { + "epoch": 0.7535037997390078, + "grad_norm": 64.90787506103516, + "learning_rate": 1.8395829877200904e-06, + "loss": 15.5811, + "step": 373010 + }, + { + "epoch": 0.7535240003716916, + "grad_norm": 997.3680419921875, + "learning_rate": 1.8393125041679221e-06, + "loss": 15.6607, + "step": 373020 + }, + { + "epoch": 0.7535442010043755, + "grad_norm": 480.28204345703125, + "learning_rate": 1.8390420360205635e-06, + "loss": 13.6431, + "step": 373030 + }, + { + "epoch": 0.7535644016370593, + "grad_norm": 125.03921508789062, + "learning_rate": 1.83877158327933e-06, + "loss": 7.5408, + "step": 373040 + }, + { + "epoch": 0.7535846022697431, + "grad_norm": 580.4336547851562, + "learning_rate": 1.8385011459455394e-06, + "loss": 15.5241, + "step": 373050 + }, + { + "epoch": 0.7536048029024269, + "grad_norm": 108.73404693603516, + "learning_rate": 1.83823072402051e-06, + "loss": 17.1433, + "step": 373060 + }, + { + "epoch": 0.7536250035351107, + "grad_norm": 793.8583984375, + "learning_rate": 1.8379603175055628e-06, + "loss": 18.2797, + "step": 373070 + }, + { + "epoch": 0.7536452041677946, + "grad_norm": 4.538880348205566, + "learning_rate": 1.8376899264020138e-06, + "loss": 34.743, + "step": 373080 + }, + { + "epoch": 0.7536654048004784, + "grad_norm": 658.1224365234375, + "learning_rate": 1.8374195507111781e-06, + "loss": 19.0254, + "step": 373090 + }, + { + "epoch": 0.7536856054331622, + "grad_norm": 216.00830078125, + "learning_rate": 1.837149190434378e-06, + "loss": 18.4801, + "step": 373100 + }, + { + "epoch": 0.753705806065846, + "grad_norm": 600.862060546875, + "learning_rate": 1.8368788455729292e-06, + "loss": 34.0035, + "step": 373110 + }, + { + "epoch": 0.7537260066985298, + "grad_norm": 495.9053649902344, + "learning_rate": 1.8366085161281477e-06, + "loss": 16.4508, + "step": 373120 + }, + { + "epoch": 0.7537462073312137, + "grad_norm": 495.1671447753906, + "learning_rate": 1.8363382021013536e-06, + "loss": 12.3908, + "step": 373130 + }, + { + "epoch": 0.7537664079638974, + "grad_norm": 1.0381710529327393, + "learning_rate": 1.8360679034938628e-06, + "loss": 21.47, + "step": 373140 + }, + { + "epoch": 0.7537866085965812, + "grad_norm": 365.1124267578125, + "learning_rate": 1.8357976203069943e-06, + "loss": 18.5855, + "step": 373150 + }, + { + "epoch": 0.753806809229265, + "grad_norm": 541.2918090820312, + "learning_rate": 1.8355273525420642e-06, + "loss": 26.7612, + "step": 373160 + }, + { + "epoch": 0.7538270098619488, + "grad_norm": 779.7386474609375, + "learning_rate": 1.8352571002003888e-06, + "loss": 22.4891, + "step": 373170 + }, + { + "epoch": 0.7538472104946327, + "grad_norm": 611.7274169921875, + "learning_rate": 1.834986863283288e-06, + "loss": 11.2856, + "step": 373180 + }, + { + "epoch": 0.7538674111273165, + "grad_norm": 85.95925903320312, + "learning_rate": 1.8347166417920776e-06, + "loss": 20.2332, + "step": 373190 + }, + { + "epoch": 0.7538876117600003, + "grad_norm": 201.36541748046875, + "learning_rate": 1.8344464357280722e-06, + "loss": 11.7059, + "step": 373200 + }, + { + "epoch": 0.7539078123926841, + "grad_norm": 306.60211181640625, + "learning_rate": 1.834176245092591e-06, + "loss": 17.1396, + "step": 373210 + }, + { + "epoch": 0.7539280130253679, + "grad_norm": 374.44140625, + "learning_rate": 1.8339060698869526e-06, + "loss": 25.0919, + "step": 373220 + }, + { + "epoch": 0.7539482136580518, + "grad_norm": 293.8785400390625, + "learning_rate": 1.8336359101124724e-06, + "loss": 8.6249, + "step": 373230 + }, + { + "epoch": 0.7539684142907356, + "grad_norm": 530.725341796875, + "learning_rate": 1.8333657657704645e-06, + "loss": 13.0651, + "step": 373240 + }, + { + "epoch": 0.7539886149234194, + "grad_norm": 324.19317626953125, + "learning_rate": 1.8330956368622498e-06, + "loss": 27.7408, + "step": 373250 + }, + { + "epoch": 0.7540088155561032, + "grad_norm": 556.4000244140625, + "learning_rate": 1.8328255233891428e-06, + "loss": 12.6205, + "step": 373260 + }, + { + "epoch": 0.754029016188787, + "grad_norm": 425.33197021484375, + "learning_rate": 1.8325554253524585e-06, + "loss": 16.1365, + "step": 373270 + }, + { + "epoch": 0.7540492168214709, + "grad_norm": 401.85260009765625, + "learning_rate": 1.8322853427535148e-06, + "loss": 12.5885, + "step": 373280 + }, + { + "epoch": 0.7540694174541547, + "grad_norm": 236.56997680664062, + "learning_rate": 1.832015275593631e-06, + "loss": 8.9223, + "step": 373290 + }, + { + "epoch": 0.7540896180868385, + "grad_norm": 170.80523681640625, + "learning_rate": 1.831745223874118e-06, + "loss": 17.5782, + "step": 373300 + }, + { + "epoch": 0.7541098187195223, + "grad_norm": 1027.2984619140625, + "learning_rate": 1.8314751875962939e-06, + "loss": 34.7551, + "step": 373310 + }, + { + "epoch": 0.7541300193522061, + "grad_norm": 741.0579223632812, + "learning_rate": 1.8312051667614772e-06, + "loss": 27.0635, + "step": 373320 + }, + { + "epoch": 0.75415021998489, + "grad_norm": 385.8459777832031, + "learning_rate": 1.8309351613709825e-06, + "loss": 25.9867, + "step": 373330 + }, + { + "epoch": 0.7541704206175738, + "grad_norm": 126.38776397705078, + "learning_rate": 1.8306651714261237e-06, + "loss": 22.6648, + "step": 373340 + }, + { + "epoch": 0.7541906212502576, + "grad_norm": 412.8068542480469, + "learning_rate": 1.8303951969282202e-06, + "loss": 17.6643, + "step": 373350 + }, + { + "epoch": 0.7542108218829414, + "grad_norm": 544.8564453125, + "learning_rate": 1.8301252378785856e-06, + "loss": 15.0653, + "step": 373360 + }, + { + "epoch": 0.7542310225156252, + "grad_norm": 284.7385559082031, + "learning_rate": 1.8298552942785352e-06, + "loss": 27.7315, + "step": 373370 + }, + { + "epoch": 0.7542512231483091, + "grad_norm": 386.16339111328125, + "learning_rate": 1.829585366129385e-06, + "loss": 14.34, + "step": 373380 + }, + { + "epoch": 0.7542714237809928, + "grad_norm": 339.692138671875, + "learning_rate": 1.8293154534324531e-06, + "loss": 19.847, + "step": 373390 + }, + { + "epoch": 0.7542916244136766, + "grad_norm": 148.24313354492188, + "learning_rate": 1.829045556189053e-06, + "loss": 26.0274, + "step": 373400 + }, + { + "epoch": 0.7543118250463604, + "grad_norm": 588.754638671875, + "learning_rate": 1.8287756744004986e-06, + "loss": 42.3484, + "step": 373410 + }, + { + "epoch": 0.7543320256790442, + "grad_norm": 226.2706298828125, + "learning_rate": 1.828505808068109e-06, + "loss": 9.4534, + "step": 373420 + }, + { + "epoch": 0.754352226311728, + "grad_norm": 75.25081634521484, + "learning_rate": 1.8282359571931968e-06, + "loss": 7.8139, + "step": 373430 + }, + { + "epoch": 0.7543724269444119, + "grad_norm": 184.63485717773438, + "learning_rate": 1.8279661217770766e-06, + "loss": 12.659, + "step": 373440 + }, + { + "epoch": 0.7543926275770957, + "grad_norm": 299.3465270996094, + "learning_rate": 1.8276963018210664e-06, + "loss": 15.0745, + "step": 373450 + }, + { + "epoch": 0.7544128282097795, + "grad_norm": 85.56561279296875, + "learning_rate": 1.8274264973264782e-06, + "loss": 20.9809, + "step": 373460 + }, + { + "epoch": 0.7544330288424633, + "grad_norm": 418.154296875, + "learning_rate": 1.8271567082946302e-06, + "loss": 23.4097, + "step": 373470 + }, + { + "epoch": 0.7544532294751471, + "grad_norm": 223.2666015625, + "learning_rate": 1.8268869347268348e-06, + "loss": 16.1004, + "step": 373480 + }, + { + "epoch": 0.754473430107831, + "grad_norm": 652.4942626953125, + "learning_rate": 1.8266171766244067e-06, + "loss": 21.3322, + "step": 373490 + }, + { + "epoch": 0.7544936307405148, + "grad_norm": 722.2883911132812, + "learning_rate": 1.8263474339886628e-06, + "loss": 15.3794, + "step": 373500 + }, + { + "epoch": 0.7545138313731986, + "grad_norm": 6.032987594604492, + "learning_rate": 1.8260777068209168e-06, + "loss": 8.9061, + "step": 373510 + }, + { + "epoch": 0.7545340320058824, + "grad_norm": 2.141902446746826, + "learning_rate": 1.8258079951224816e-06, + "loss": 13.8676, + "step": 373520 + }, + { + "epoch": 0.7545542326385662, + "grad_norm": 54.33201599121094, + "learning_rate": 1.825538298894673e-06, + "loss": 23.2468, + "step": 373530 + }, + { + "epoch": 0.7545744332712501, + "grad_norm": 623.4483032226562, + "learning_rate": 1.825268618138808e-06, + "loss": 20.8951, + "step": 373540 + }, + { + "epoch": 0.7545946339039339, + "grad_norm": 263.5401916503906, + "learning_rate": 1.824998952856198e-06, + "loss": 17.8034, + "step": 373550 + }, + { + "epoch": 0.7546148345366177, + "grad_norm": 540.0736694335938, + "learning_rate": 1.8247293030481568e-06, + "loss": 22.1899, + "step": 373560 + }, + { + "epoch": 0.7546350351693015, + "grad_norm": 550.5327758789062, + "learning_rate": 1.824459668716001e-06, + "loss": 25.1992, + "step": 373570 + }, + { + "epoch": 0.7546552358019853, + "grad_norm": 517.9869995117188, + "learning_rate": 1.8241900498610438e-06, + "loss": 20.0201, + "step": 373580 + }, + { + "epoch": 0.7546754364346692, + "grad_norm": 290.59112548828125, + "learning_rate": 1.8239204464845978e-06, + "loss": 15.7157, + "step": 373590 + }, + { + "epoch": 0.754695637067353, + "grad_norm": 101.77208709716797, + "learning_rate": 1.8236508585879781e-06, + "loss": 19.5796, + "step": 373600 + }, + { + "epoch": 0.7547158377000368, + "grad_norm": 542.2976684570312, + "learning_rate": 1.8233812861725002e-06, + "loss": 11.8657, + "step": 373610 + }, + { + "epoch": 0.7547360383327206, + "grad_norm": 47.22188949584961, + "learning_rate": 1.8231117292394772e-06, + "loss": 16.9453, + "step": 373620 + }, + { + "epoch": 0.7547562389654044, + "grad_norm": 200.20924377441406, + "learning_rate": 1.8228421877902203e-06, + "loss": 19.2468, + "step": 373630 + }, + { + "epoch": 0.7547764395980883, + "grad_norm": 533.7518310546875, + "learning_rate": 1.822572661826047e-06, + "loss": 26.7474, + "step": 373640 + }, + { + "epoch": 0.754796640230772, + "grad_norm": 244.7967529296875, + "learning_rate": 1.8223031513482692e-06, + "loss": 10.8895, + "step": 373650 + }, + { + "epoch": 0.7548168408634558, + "grad_norm": 307.4630432128906, + "learning_rate": 1.8220336563581986e-06, + "loss": 18.4101, + "step": 373660 + }, + { + "epoch": 0.7548370414961396, + "grad_norm": 408.6343688964844, + "learning_rate": 1.821764176857151e-06, + "loss": 23.4212, + "step": 373670 + }, + { + "epoch": 0.7548572421288234, + "grad_norm": 285.11029052734375, + "learning_rate": 1.8214947128464406e-06, + "loss": 12.0743, + "step": 373680 + }, + { + "epoch": 0.7548774427615073, + "grad_norm": 551.1350708007812, + "learning_rate": 1.8212252643273797e-06, + "loss": 29.6394, + "step": 373690 + }, + { + "epoch": 0.7548976433941911, + "grad_norm": 254.38385009765625, + "learning_rate": 1.8209558313012792e-06, + "loss": 23.6601, + "step": 373700 + }, + { + "epoch": 0.7549178440268749, + "grad_norm": 602.5098266601562, + "learning_rate": 1.8206864137694563e-06, + "loss": 13.4353, + "step": 373710 + }, + { + "epoch": 0.7549380446595587, + "grad_norm": 372.99005126953125, + "learning_rate": 1.8204170117332226e-06, + "loss": 20.7797, + "step": 373720 + }, + { + "epoch": 0.7549582452922425, + "grad_norm": 300.43499755859375, + "learning_rate": 1.8201476251938888e-06, + "loss": 26.6204, + "step": 373730 + }, + { + "epoch": 0.7549784459249264, + "grad_norm": 646.2213134765625, + "learning_rate": 1.8198782541527715e-06, + "loss": 12.8761, + "step": 373740 + }, + { + "epoch": 0.7549986465576102, + "grad_norm": 294.9193420410156, + "learning_rate": 1.8196088986111798e-06, + "loss": 20.0258, + "step": 373750 + }, + { + "epoch": 0.755018847190294, + "grad_norm": 283.4877624511719, + "learning_rate": 1.819339558570431e-06, + "loss": 6.8631, + "step": 373760 + }, + { + "epoch": 0.7550390478229778, + "grad_norm": 209.648681640625, + "learning_rate": 1.819070234031835e-06, + "loss": 15.8996, + "step": 373770 + }, + { + "epoch": 0.7550592484556616, + "grad_norm": 34.476966857910156, + "learning_rate": 1.818800924996703e-06, + "loss": 18.4766, + "step": 373780 + }, + { + "epoch": 0.7550794490883455, + "grad_norm": 311.11376953125, + "learning_rate": 1.8185316314663515e-06, + "loss": 26.3973, + "step": 373790 + }, + { + "epoch": 0.7550996497210293, + "grad_norm": 318.308349609375, + "learning_rate": 1.8182623534420906e-06, + "loss": 21.0494, + "step": 373800 + }, + { + "epoch": 0.7551198503537131, + "grad_norm": 307.3672790527344, + "learning_rate": 1.817993090925232e-06, + "loss": 7.7701, + "step": 373810 + }, + { + "epoch": 0.7551400509863969, + "grad_norm": 565.0277709960938, + "learning_rate": 1.8177238439170885e-06, + "loss": 11.7383, + "step": 373820 + }, + { + "epoch": 0.7551602516190807, + "grad_norm": 573.1658325195312, + "learning_rate": 1.8174546124189752e-06, + "loss": 16.0305, + "step": 373830 + }, + { + "epoch": 0.7551804522517646, + "grad_norm": 95.29529571533203, + "learning_rate": 1.8171853964322016e-06, + "loss": 9.8451, + "step": 373840 + }, + { + "epoch": 0.7552006528844484, + "grad_norm": 429.97845458984375, + "learning_rate": 1.8169161959580795e-06, + "loss": 17.8444, + "step": 373850 + }, + { + "epoch": 0.7552208535171322, + "grad_norm": 385.33087158203125, + "learning_rate": 1.816647010997923e-06, + "loss": 18.6532, + "step": 373860 + }, + { + "epoch": 0.755241054149816, + "grad_norm": 267.79779052734375, + "learning_rate": 1.8163778415530425e-06, + "loss": 22.2014, + "step": 373870 + }, + { + "epoch": 0.7552612547824998, + "grad_norm": 199.0067596435547, + "learning_rate": 1.8161086876247492e-06, + "loss": 29.8439, + "step": 373880 + }, + { + "epoch": 0.7552814554151837, + "grad_norm": 215.98484802246094, + "learning_rate": 1.8158395492143555e-06, + "loss": 18.1653, + "step": 373890 + }, + { + "epoch": 0.7553016560478674, + "grad_norm": 755.5444946289062, + "learning_rate": 1.8155704263231777e-06, + "loss": 36.9149, + "step": 373900 + }, + { + "epoch": 0.7553218566805512, + "grad_norm": 54.62074661254883, + "learning_rate": 1.8153013189525192e-06, + "loss": 23.4045, + "step": 373910 + }, + { + "epoch": 0.755342057313235, + "grad_norm": 573.7791748046875, + "learning_rate": 1.8150322271036962e-06, + "loss": 42.773, + "step": 373920 + }, + { + "epoch": 0.7553622579459188, + "grad_norm": 394.81207275390625, + "learning_rate": 1.8147631507780217e-06, + "loss": 23.2269, + "step": 373930 + }, + { + "epoch": 0.7553824585786026, + "grad_norm": 72.78643035888672, + "learning_rate": 1.814494089976805e-06, + "loss": 19.492, + "step": 373940 + }, + { + "epoch": 0.7554026592112865, + "grad_norm": 352.05078125, + "learning_rate": 1.8142250447013566e-06, + "loss": 18.0564, + "step": 373950 + }, + { + "epoch": 0.7554228598439703, + "grad_norm": 692.9324951171875, + "learning_rate": 1.81395601495299e-06, + "loss": 17.2259, + "step": 373960 + }, + { + "epoch": 0.7554430604766541, + "grad_norm": 429.7369384765625, + "learning_rate": 1.8136870007330155e-06, + "loss": 9.6317, + "step": 373970 + }, + { + "epoch": 0.7554632611093379, + "grad_norm": 206.9267120361328, + "learning_rate": 1.8134180020427423e-06, + "loss": 37.7886, + "step": 373980 + }, + { + "epoch": 0.7554834617420217, + "grad_norm": 0.6846423149108887, + "learning_rate": 1.8131490188834837e-06, + "loss": 8.6686, + "step": 373990 + }, + { + "epoch": 0.7555036623747056, + "grad_norm": 151.581298828125, + "learning_rate": 1.8128800512565514e-06, + "loss": 10.8003, + "step": 374000 + }, + { + "epoch": 0.7555238630073894, + "grad_norm": 380.9773254394531, + "learning_rate": 1.8126110991632556e-06, + "loss": 12.3034, + "step": 374010 + }, + { + "epoch": 0.7555440636400732, + "grad_norm": 272.1925354003906, + "learning_rate": 1.8123421626049048e-06, + "loss": 14.0785, + "step": 374020 + }, + { + "epoch": 0.755564264272757, + "grad_norm": 874.1373901367188, + "learning_rate": 1.8120732415828135e-06, + "loss": 14.6445, + "step": 374030 + }, + { + "epoch": 0.7555844649054408, + "grad_norm": 188.32449340820312, + "learning_rate": 1.8118043360982906e-06, + "loss": 14.4632, + "step": 374040 + }, + { + "epoch": 0.7556046655381247, + "grad_norm": 297.5085144042969, + "learning_rate": 1.8115354461526453e-06, + "loss": 26.6521, + "step": 374050 + }, + { + "epoch": 0.7556248661708085, + "grad_norm": 246.0274658203125, + "learning_rate": 1.8112665717471905e-06, + "loss": 23.6608, + "step": 374060 + }, + { + "epoch": 0.7556450668034923, + "grad_norm": 280.06146240234375, + "learning_rate": 1.8109977128832346e-06, + "loss": 27.8095, + "step": 374070 + }, + { + "epoch": 0.7556652674361761, + "grad_norm": 387.3278503417969, + "learning_rate": 1.8107288695620905e-06, + "loss": 21.2384, + "step": 374080 + }, + { + "epoch": 0.75568546806886, + "grad_norm": 631.174072265625, + "learning_rate": 1.810460041785067e-06, + "loss": 9.841, + "step": 374090 + }, + { + "epoch": 0.7557056687015438, + "grad_norm": 481.58599853515625, + "learning_rate": 1.810191229553473e-06, + "loss": 21.0722, + "step": 374100 + }, + { + "epoch": 0.7557258693342276, + "grad_norm": 424.12042236328125, + "learning_rate": 1.8099224328686216e-06, + "loss": 30.1242, + "step": 374110 + }, + { + "epoch": 0.7557460699669114, + "grad_norm": 471.8075256347656, + "learning_rate": 1.8096536517318196e-06, + "loss": 20.2933, + "step": 374120 + }, + { + "epoch": 0.7557662705995952, + "grad_norm": 582.2421875, + "learning_rate": 1.8093848861443802e-06, + "loss": 11.7692, + "step": 374130 + }, + { + "epoch": 0.755786471232279, + "grad_norm": 490.2405090332031, + "learning_rate": 1.809116136107611e-06, + "loss": 17.1522, + "step": 374140 + }, + { + "epoch": 0.7558066718649629, + "grad_norm": 642.3689575195312, + "learning_rate": 1.8088474016228236e-06, + "loss": 38.366, + "step": 374150 + }, + { + "epoch": 0.7558268724976466, + "grad_norm": 207.1797637939453, + "learning_rate": 1.808578682691327e-06, + "loss": 9.4646, + "step": 374160 + }, + { + "epoch": 0.7558470731303304, + "grad_norm": 464.04241943359375, + "learning_rate": 1.8083099793144299e-06, + "loss": 34.519, + "step": 374170 + }, + { + "epoch": 0.7558672737630142, + "grad_norm": 336.1421203613281, + "learning_rate": 1.8080412914934436e-06, + "loss": 11.8767, + "step": 374180 + }, + { + "epoch": 0.755887474395698, + "grad_norm": 266.2442626953125, + "learning_rate": 1.8077726192296774e-06, + "loss": 10.2188, + "step": 374190 + }, + { + "epoch": 0.7559076750283819, + "grad_norm": 140.14678955078125, + "learning_rate": 1.807503962524439e-06, + "loss": 10.7977, + "step": 374200 + }, + { + "epoch": 0.7559278756610657, + "grad_norm": 249.89939880371094, + "learning_rate": 1.8072353213790383e-06, + "loss": 17.3283, + "step": 374210 + }, + { + "epoch": 0.7559480762937495, + "grad_norm": 184.39439392089844, + "learning_rate": 1.8069666957947873e-06, + "loss": 13.4435, + "step": 374220 + }, + { + "epoch": 0.7559682769264333, + "grad_norm": 99.02581024169922, + "learning_rate": 1.8066980857729937e-06, + "loss": 16.3724, + "step": 374230 + }, + { + "epoch": 0.7559884775591171, + "grad_norm": 76.87078094482422, + "learning_rate": 1.8064294913149645e-06, + "loss": 14.3106, + "step": 374240 + }, + { + "epoch": 0.756008678191801, + "grad_norm": 746.1737060546875, + "learning_rate": 1.806160912422012e-06, + "loss": 17.5373, + "step": 374250 + }, + { + "epoch": 0.7560288788244848, + "grad_norm": 147.80593872070312, + "learning_rate": 1.8058923490954443e-06, + "loss": 9.2528, + "step": 374260 + }, + { + "epoch": 0.7560490794571686, + "grad_norm": 408.75677490234375, + "learning_rate": 1.8056238013365679e-06, + "loss": 22.3378, + "step": 374270 + }, + { + "epoch": 0.7560692800898524, + "grad_norm": 53.84675979614258, + "learning_rate": 1.8053552691466936e-06, + "loss": 15.5796, + "step": 374280 + }, + { + "epoch": 0.7560894807225362, + "grad_norm": 541.00830078125, + "learning_rate": 1.805086752527132e-06, + "loss": 14.2719, + "step": 374290 + }, + { + "epoch": 0.75610968135522, + "grad_norm": 1040.6136474609375, + "learning_rate": 1.8048182514791901e-06, + "loss": 26.303, + "step": 374300 + }, + { + "epoch": 0.7561298819879039, + "grad_norm": 5.465455532073975, + "learning_rate": 1.804549766004175e-06, + "loss": 20.0012, + "step": 374310 + }, + { + "epoch": 0.7561500826205877, + "grad_norm": 313.8343200683594, + "learning_rate": 1.8042812961033983e-06, + "loss": 12.539, + "step": 374320 + }, + { + "epoch": 0.7561702832532715, + "grad_norm": 0.7055712342262268, + "learning_rate": 1.8040128417781672e-06, + "loss": 17.5081, + "step": 374330 + }, + { + "epoch": 0.7561904838859553, + "grad_norm": 81.33342742919922, + "learning_rate": 1.8037444030297878e-06, + "loss": 25.3508, + "step": 374340 + }, + { + "epoch": 0.7562106845186392, + "grad_norm": 764.7306518554688, + "learning_rate": 1.8034759798595724e-06, + "loss": 17.5529, + "step": 374350 + }, + { + "epoch": 0.756230885151323, + "grad_norm": 490.69293212890625, + "learning_rate": 1.803207572268826e-06, + "loss": 10.7631, + "step": 374360 + }, + { + "epoch": 0.7562510857840068, + "grad_norm": 157.39866638183594, + "learning_rate": 1.8029391802588598e-06, + "loss": 17.3602, + "step": 374370 + }, + { + "epoch": 0.7562712864166906, + "grad_norm": 1128.905029296875, + "learning_rate": 1.8026708038309797e-06, + "loss": 16.8336, + "step": 374380 + }, + { + "epoch": 0.7562914870493744, + "grad_norm": 389.259033203125, + "learning_rate": 1.8024024429864928e-06, + "loss": 10.3319, + "step": 374390 + }, + { + "epoch": 0.7563116876820583, + "grad_norm": 313.4371032714844, + "learning_rate": 1.8021340977267104e-06, + "loss": 15.9083, + "step": 374400 + }, + { + "epoch": 0.756331888314742, + "grad_norm": 1055.1304931640625, + "learning_rate": 1.801865768052939e-06, + "loss": 29.6975, + "step": 374410 + }, + { + "epoch": 0.7563520889474258, + "grad_norm": 490.5217590332031, + "learning_rate": 1.8015974539664839e-06, + "loss": 18.6402, + "step": 374420 + }, + { + "epoch": 0.7563722895801096, + "grad_norm": 279.38360595703125, + "learning_rate": 1.8013291554686547e-06, + "loss": 14.5644, + "step": 374430 + }, + { + "epoch": 0.7563924902127934, + "grad_norm": 394.5645446777344, + "learning_rate": 1.801060872560761e-06, + "loss": 13.438, + "step": 374440 + }, + { + "epoch": 0.7564126908454772, + "grad_norm": 100.2540054321289, + "learning_rate": 1.800792605244109e-06, + "loss": 13.5408, + "step": 374450 + }, + { + "epoch": 0.7564328914781611, + "grad_norm": 129.83828735351562, + "learning_rate": 1.8005243535200034e-06, + "loss": 22.0487, + "step": 374460 + }, + { + "epoch": 0.7564530921108449, + "grad_norm": 660.2085571289062, + "learning_rate": 1.8002561173897564e-06, + "loss": 12.8716, + "step": 374470 + }, + { + "epoch": 0.7564732927435287, + "grad_norm": 0.35584282875061035, + "learning_rate": 1.7999878968546724e-06, + "loss": 17.5631, + "step": 374480 + }, + { + "epoch": 0.7564934933762125, + "grad_norm": 427.03399658203125, + "learning_rate": 1.7997196919160582e-06, + "loss": 16.2899, + "step": 374490 + }, + { + "epoch": 0.7565136940088963, + "grad_norm": 77.68339538574219, + "learning_rate": 1.799451502575222e-06, + "loss": 12.8195, + "step": 374500 + }, + { + "epoch": 0.7565338946415802, + "grad_norm": 712.9091796875, + "learning_rate": 1.7991833288334742e-06, + "loss": 18.7816, + "step": 374510 + }, + { + "epoch": 0.756554095274264, + "grad_norm": 238.63438415527344, + "learning_rate": 1.7989151706921155e-06, + "loss": 12.6859, + "step": 374520 + }, + { + "epoch": 0.7565742959069478, + "grad_norm": 777.8014526367188, + "learning_rate": 1.7986470281524555e-06, + "loss": 18.9677, + "step": 374530 + }, + { + "epoch": 0.7565944965396316, + "grad_norm": 615.5677490234375, + "learning_rate": 1.7983789012158037e-06, + "loss": 16.7531, + "step": 374540 + }, + { + "epoch": 0.7566146971723154, + "grad_norm": 430.4071044921875, + "learning_rate": 1.798110789883465e-06, + "loss": 20.0846, + "step": 374550 + }, + { + "epoch": 0.7566348978049993, + "grad_norm": 11.2094087600708, + "learning_rate": 1.7978426941567435e-06, + "loss": 11.201, + "step": 374560 + }, + { + "epoch": 0.7566550984376831, + "grad_norm": 267.1214599609375, + "learning_rate": 1.7975746140369505e-06, + "loss": 16.2487, + "step": 374570 + }, + { + "epoch": 0.7566752990703669, + "grad_norm": 454.36309814453125, + "learning_rate": 1.7973065495253905e-06, + "loss": 21.9082, + "step": 374580 + }, + { + "epoch": 0.7566954997030507, + "grad_norm": 214.96356201171875, + "learning_rate": 1.7970385006233682e-06, + "loss": 15.3024, + "step": 374590 + }, + { + "epoch": 0.7567157003357345, + "grad_norm": 288.4180908203125, + "learning_rate": 1.7967704673321917e-06, + "loss": 25.1004, + "step": 374600 + }, + { + "epoch": 0.7567359009684184, + "grad_norm": 213.95083618164062, + "learning_rate": 1.796502449653169e-06, + "loss": 11.2068, + "step": 374610 + }, + { + "epoch": 0.7567561016011022, + "grad_norm": 280.2714538574219, + "learning_rate": 1.7962344475876054e-06, + "loss": 13.2888, + "step": 374620 + }, + { + "epoch": 0.756776302233786, + "grad_norm": 158.43423461914062, + "learning_rate": 1.7959664611368043e-06, + "loss": 20.3864, + "step": 374630 + }, + { + "epoch": 0.7567965028664698, + "grad_norm": 419.7962951660156, + "learning_rate": 1.7956984903020757e-06, + "loss": 23.4935, + "step": 374640 + }, + { + "epoch": 0.7568167034991536, + "grad_norm": 701.8889770507812, + "learning_rate": 1.7954305350847246e-06, + "loss": 27.9078, + "step": 374650 + }, + { + "epoch": 0.7568369041318375, + "grad_norm": 245.12318420410156, + "learning_rate": 1.7951625954860541e-06, + "loss": 18.8585, + "step": 374660 + }, + { + "epoch": 0.7568571047645212, + "grad_norm": 487.855224609375, + "learning_rate": 1.7948946715073744e-06, + "loss": 19.2059, + "step": 374670 + }, + { + "epoch": 0.756877305397205, + "grad_norm": 0.0, + "learning_rate": 1.7946267631499874e-06, + "loss": 29.6986, + "step": 374680 + }, + { + "epoch": 0.7568975060298888, + "grad_norm": 488.7209777832031, + "learning_rate": 1.7943588704152033e-06, + "loss": 17.4271, + "step": 374690 + }, + { + "epoch": 0.7569177066625726, + "grad_norm": 276.59039306640625, + "learning_rate": 1.7940909933043243e-06, + "loss": 15.1514, + "step": 374700 + }, + { + "epoch": 0.7569379072952565, + "grad_norm": 526.3798828125, + "learning_rate": 1.7938231318186555e-06, + "loss": 18.1403, + "step": 374710 + }, + { + "epoch": 0.7569581079279403, + "grad_norm": 0.0, + "learning_rate": 1.7935552859595058e-06, + "loss": 14.2099, + "step": 374720 + }, + { + "epoch": 0.7569783085606241, + "grad_norm": 476.27899169921875, + "learning_rate": 1.793287455728177e-06, + "loss": 31.5622, + "step": 374730 + }, + { + "epoch": 0.7569985091933079, + "grad_norm": 487.2760009765625, + "learning_rate": 1.7930196411259782e-06, + "loss": 23.4855, + "step": 374740 + }, + { + "epoch": 0.7570187098259917, + "grad_norm": 281.9328918457031, + "learning_rate": 1.7927518421542106e-06, + "loss": 18.0875, + "step": 374750 + }, + { + "epoch": 0.7570389104586756, + "grad_norm": 311.88751220703125, + "learning_rate": 1.7924840588141829e-06, + "loss": 9.8626, + "step": 374760 + }, + { + "epoch": 0.7570591110913594, + "grad_norm": 582.0269775390625, + "learning_rate": 1.7922162911071993e-06, + "loss": 18.8568, + "step": 374770 + }, + { + "epoch": 0.7570793117240432, + "grad_norm": 459.2454833984375, + "learning_rate": 1.7919485390345631e-06, + "loss": 22.8015, + "step": 374780 + }, + { + "epoch": 0.757099512356727, + "grad_norm": 343.797607421875, + "learning_rate": 1.791680802597582e-06, + "loss": 13.3459, + "step": 374790 + }, + { + "epoch": 0.7571197129894108, + "grad_norm": 302.8917236328125, + "learning_rate": 1.7914130817975595e-06, + "loss": 18.7183, + "step": 374800 + }, + { + "epoch": 0.7571399136220947, + "grad_norm": 764.393798828125, + "learning_rate": 1.7911453766357983e-06, + "loss": 28.0872, + "step": 374810 + }, + { + "epoch": 0.7571601142547785, + "grad_norm": 26.692785263061523, + "learning_rate": 1.7908776871136063e-06, + "loss": 30.2047, + "step": 374820 + }, + { + "epoch": 0.7571803148874623, + "grad_norm": 113.09803771972656, + "learning_rate": 1.7906100132322902e-06, + "loss": 12.2912, + "step": 374830 + }, + { + "epoch": 0.7572005155201461, + "grad_norm": 238.6380615234375, + "learning_rate": 1.7903423549931482e-06, + "loss": 15.993, + "step": 374840 + }, + { + "epoch": 0.7572207161528299, + "grad_norm": 149.5492401123047, + "learning_rate": 1.7900747123974882e-06, + "loss": 37.5172, + "step": 374850 + }, + { + "epoch": 0.7572409167855138, + "grad_norm": 377.97052001953125, + "learning_rate": 1.7898070854466165e-06, + "loss": 17.2467, + "step": 374860 + }, + { + "epoch": 0.7572611174181976, + "grad_norm": 868.7412109375, + "learning_rate": 1.7895394741418355e-06, + "loss": 32.1399, + "step": 374870 + }, + { + "epoch": 0.7572813180508814, + "grad_norm": 453.3160705566406, + "learning_rate": 1.7892718784844482e-06, + "loss": 14.5478, + "step": 374880 + }, + { + "epoch": 0.7573015186835652, + "grad_norm": 242.47076416015625, + "learning_rate": 1.7890042984757605e-06, + "loss": 31.7516, + "step": 374890 + }, + { + "epoch": 0.757321719316249, + "grad_norm": 257.50445556640625, + "learning_rate": 1.7887367341170781e-06, + "loss": 14.959, + "step": 374900 + }, + { + "epoch": 0.7573419199489329, + "grad_norm": 208.0048065185547, + "learning_rate": 1.788469185409703e-06, + "loss": 20.22, + "step": 374910 + }, + { + "epoch": 0.7573621205816167, + "grad_norm": 97.57205200195312, + "learning_rate": 1.7882016523549383e-06, + "loss": 12.312, + "step": 374920 + }, + { + "epoch": 0.7573823212143004, + "grad_norm": 493.3318786621094, + "learning_rate": 1.7879341349540907e-06, + "loss": 23.5748, + "step": 374930 + }, + { + "epoch": 0.7574025218469842, + "grad_norm": 191.0437469482422, + "learning_rate": 1.787666633208463e-06, + "loss": 13.13, + "step": 374940 + }, + { + "epoch": 0.757422722479668, + "grad_norm": 291.8238525390625, + "learning_rate": 1.7873991471193563e-06, + "loss": 34.0035, + "step": 374950 + }, + { + "epoch": 0.7574429231123518, + "grad_norm": 245.30130004882812, + "learning_rate": 1.787131676688078e-06, + "loss": 28.5045, + "step": 374960 + }, + { + "epoch": 0.7574631237450357, + "grad_norm": 2033.75244140625, + "learning_rate": 1.7868642219159292e-06, + "loss": 32.8364, + "step": 374970 + }, + { + "epoch": 0.7574833243777195, + "grad_norm": 80.31974792480469, + "learning_rate": 1.7865967828042158e-06, + "loss": 13.944, + "step": 374980 + }, + { + "epoch": 0.7575035250104033, + "grad_norm": 995.2206420898438, + "learning_rate": 1.7863293593542402e-06, + "loss": 31.8257, + "step": 374990 + }, + { + "epoch": 0.7575237256430871, + "grad_norm": 381.6286315917969, + "learning_rate": 1.7860619515673034e-06, + "loss": 17.0038, + "step": 375000 + }, + { + "epoch": 0.7575439262757709, + "grad_norm": 260.1957092285156, + "learning_rate": 1.7857945594447128e-06, + "loss": 18.8672, + "step": 375010 + }, + { + "epoch": 0.7575641269084548, + "grad_norm": 89.29084014892578, + "learning_rate": 1.7855271829877696e-06, + "loss": 16.7167, + "step": 375020 + }, + { + "epoch": 0.7575843275411386, + "grad_norm": 548.8855590820312, + "learning_rate": 1.7852598221977757e-06, + "loss": 25.9282, + "step": 375030 + }, + { + "epoch": 0.7576045281738224, + "grad_norm": 1157.1932373046875, + "learning_rate": 1.7849924770760352e-06, + "loss": 15.5239, + "step": 375040 + }, + { + "epoch": 0.7576247288065062, + "grad_norm": 375.1731262207031, + "learning_rate": 1.784725147623853e-06, + "loss": 13.8027, + "step": 375050 + }, + { + "epoch": 0.75764492943919, + "grad_norm": 384.16510009765625, + "learning_rate": 1.7844578338425306e-06, + "loss": 17.4332, + "step": 375060 + }, + { + "epoch": 0.7576651300718739, + "grad_norm": 388.6846923828125, + "learning_rate": 1.7841905357333688e-06, + "loss": 9.5122, + "step": 375070 + }, + { + "epoch": 0.7576853307045577, + "grad_norm": 149.29364013671875, + "learning_rate": 1.7839232532976746e-06, + "loss": 21.0735, + "step": 375080 + }, + { + "epoch": 0.7577055313372415, + "grad_norm": 122.9616470336914, + "learning_rate": 1.783655986536748e-06, + "loss": 14.2662, + "step": 375090 + }, + { + "epoch": 0.7577257319699253, + "grad_norm": 224.22518920898438, + "learning_rate": 1.7833887354518902e-06, + "loss": 9.137, + "step": 375100 + }, + { + "epoch": 0.7577459326026091, + "grad_norm": 309.8247375488281, + "learning_rate": 1.7831215000444057e-06, + "loss": 10.1294, + "step": 375110 + }, + { + "epoch": 0.757766133235293, + "grad_norm": 232.92469787597656, + "learning_rate": 1.7828542803156007e-06, + "loss": 17.5009, + "step": 375120 + }, + { + "epoch": 0.7577863338679768, + "grad_norm": 265.9322204589844, + "learning_rate": 1.7825870762667696e-06, + "loss": 15.1142, + "step": 375130 + }, + { + "epoch": 0.7578065345006606, + "grad_norm": 513.8374633789062, + "learning_rate": 1.7823198878992192e-06, + "loss": 12.3842, + "step": 375140 + }, + { + "epoch": 0.7578267351333444, + "grad_norm": 223.05540466308594, + "learning_rate": 1.7820527152142531e-06, + "loss": 17.299, + "step": 375150 + }, + { + "epoch": 0.7578469357660282, + "grad_norm": 260.6461181640625, + "learning_rate": 1.781785558213172e-06, + "loss": 23.935, + "step": 375160 + }, + { + "epoch": 0.7578671363987121, + "grad_norm": 231.95819091796875, + "learning_rate": 1.781518416897276e-06, + "loss": 24.2113, + "step": 375170 + }, + { + "epoch": 0.7578873370313958, + "grad_norm": 597.959228515625, + "learning_rate": 1.7812512912678687e-06, + "loss": 16.3873, + "step": 375180 + }, + { + "epoch": 0.7579075376640796, + "grad_norm": 210.48451232910156, + "learning_rate": 1.7809841813262558e-06, + "loss": 10.5464, + "step": 375190 + }, + { + "epoch": 0.7579277382967634, + "grad_norm": 129.9409942626953, + "learning_rate": 1.7807170870737317e-06, + "loss": 14.1919, + "step": 375200 + }, + { + "epoch": 0.7579479389294472, + "grad_norm": 534.8091430664062, + "learning_rate": 1.7804500085116022e-06, + "loss": 12.0085, + "step": 375210 + }, + { + "epoch": 0.757968139562131, + "grad_norm": 338.99273681640625, + "learning_rate": 1.7801829456411713e-06, + "loss": 10.6398, + "step": 375220 + }, + { + "epoch": 0.7579883401948149, + "grad_norm": 183.349365234375, + "learning_rate": 1.7799158984637372e-06, + "loss": 23.207, + "step": 375230 + }, + { + "epoch": 0.7580085408274987, + "grad_norm": 426.0802307128906, + "learning_rate": 1.779648866980601e-06, + "loss": 9.9525, + "step": 375240 + }, + { + "epoch": 0.7580287414601825, + "grad_norm": 308.3338317871094, + "learning_rate": 1.7793818511930678e-06, + "loss": 24.7034, + "step": 375250 + }, + { + "epoch": 0.7580489420928663, + "grad_norm": 926.7468872070312, + "learning_rate": 1.779114851102437e-06, + "loss": 28.6146, + "step": 375260 + }, + { + "epoch": 0.7580691427255501, + "grad_norm": 465.4128112792969, + "learning_rate": 1.7788478667100074e-06, + "loss": 12.0828, + "step": 375270 + }, + { + "epoch": 0.758089343358234, + "grad_norm": 27.61420440673828, + "learning_rate": 1.7785808980170848e-06, + "loss": 11.1147, + "step": 375280 + }, + { + "epoch": 0.7581095439909178, + "grad_norm": 559.3772583007812, + "learning_rate": 1.7783139450249664e-06, + "loss": 24.2235, + "step": 375290 + }, + { + "epoch": 0.7581297446236016, + "grad_norm": 297.36480712890625, + "learning_rate": 1.7780470077349566e-06, + "loss": 20.765, + "step": 375300 + }, + { + "epoch": 0.7581499452562854, + "grad_norm": 486.7535095214844, + "learning_rate": 1.7777800861483552e-06, + "loss": 13.895, + "step": 375310 + }, + { + "epoch": 0.7581701458889692, + "grad_norm": 242.015380859375, + "learning_rate": 1.7775131802664608e-06, + "loss": 17.063, + "step": 375320 + }, + { + "epoch": 0.7581903465216531, + "grad_norm": 957.19189453125, + "learning_rate": 1.777246290090578e-06, + "loss": 49.9769, + "step": 375330 + }, + { + "epoch": 0.7582105471543369, + "grad_norm": 338.21856689453125, + "learning_rate": 1.7769794156220043e-06, + "loss": 24.6517, + "step": 375340 + }, + { + "epoch": 0.7582307477870207, + "grad_norm": 163.7085723876953, + "learning_rate": 1.7767125568620442e-06, + "loss": 27.5348, + "step": 375350 + }, + { + "epoch": 0.7582509484197045, + "grad_norm": 382.12322998046875, + "learning_rate": 1.776445713811994e-06, + "loss": 12.7062, + "step": 375360 + }, + { + "epoch": 0.7582711490523883, + "grad_norm": 244.8220977783203, + "learning_rate": 1.7761788864731582e-06, + "loss": 20.3112, + "step": 375370 + }, + { + "epoch": 0.7582913496850722, + "grad_norm": 492.9038391113281, + "learning_rate": 1.7759120748468356e-06, + "loss": 20.0269, + "step": 375380 + }, + { + "epoch": 0.758311550317756, + "grad_norm": 348.23773193359375, + "learning_rate": 1.7756452789343243e-06, + "loss": 27.5935, + "step": 375390 + }, + { + "epoch": 0.7583317509504398, + "grad_norm": 266.7994689941406, + "learning_rate": 1.7753784987369287e-06, + "loss": 24.2155, + "step": 375400 + }, + { + "epoch": 0.7583519515831236, + "grad_norm": 279.92987060546875, + "learning_rate": 1.7751117342559477e-06, + "loss": 12.4588, + "step": 375410 + }, + { + "epoch": 0.7583721522158074, + "grad_norm": 1162.434814453125, + "learning_rate": 1.7748449854926792e-06, + "loss": 40.0468, + "step": 375420 + }, + { + "epoch": 0.7583923528484913, + "grad_norm": 31.6923770904541, + "learning_rate": 1.774578252448425e-06, + "loss": 8.9493, + "step": 375430 + }, + { + "epoch": 0.758412553481175, + "grad_norm": 194.1206512451172, + "learning_rate": 1.7743115351244883e-06, + "loss": 14.6561, + "step": 375440 + }, + { + "epoch": 0.7584327541138588, + "grad_norm": 337.4588317871094, + "learning_rate": 1.7740448335221628e-06, + "loss": 9.4775, + "step": 375450 + }, + { + "epoch": 0.7584529547465426, + "grad_norm": 322.32781982421875, + "learning_rate": 1.7737781476427511e-06, + "loss": 29.0225, + "step": 375460 + }, + { + "epoch": 0.7584731553792264, + "grad_norm": 420.2108459472656, + "learning_rate": 1.7735114774875556e-06, + "loss": 19.4391, + "step": 375470 + }, + { + "epoch": 0.7584933560119103, + "grad_norm": 3.5831351280212402, + "learning_rate": 1.7732448230578743e-06, + "loss": 9.7425, + "step": 375480 + }, + { + "epoch": 0.7585135566445941, + "grad_norm": 258.252197265625, + "learning_rate": 1.7729781843550036e-06, + "loss": 12.9734, + "step": 375490 + }, + { + "epoch": 0.7585337572772779, + "grad_norm": 553.4739990234375, + "learning_rate": 1.7727115613802465e-06, + "loss": 30.479, + "step": 375500 + }, + { + "epoch": 0.7585539579099617, + "grad_norm": 39.61473083496094, + "learning_rate": 1.7724449541349048e-06, + "loss": 17.0065, + "step": 375510 + }, + { + "epoch": 0.7585741585426455, + "grad_norm": 770.8367309570312, + "learning_rate": 1.772178362620272e-06, + "loss": 22.7148, + "step": 375520 + }, + { + "epoch": 0.7585943591753294, + "grad_norm": 1040.59619140625, + "learning_rate": 1.77191178683765e-06, + "loss": 27.546, + "step": 375530 + }, + { + "epoch": 0.7586145598080132, + "grad_norm": 78.16218566894531, + "learning_rate": 1.7716452267883404e-06, + "loss": 33.4048, + "step": 375540 + }, + { + "epoch": 0.758634760440697, + "grad_norm": 380.9717712402344, + "learning_rate": 1.7713786824736406e-06, + "loss": 32.5716, + "step": 375550 + }, + { + "epoch": 0.7586549610733808, + "grad_norm": 368.9980163574219, + "learning_rate": 1.7711121538948473e-06, + "loss": 17.0607, + "step": 375560 + }, + { + "epoch": 0.7586751617060646, + "grad_norm": 460.2774353027344, + "learning_rate": 1.7708456410532637e-06, + "loss": 22.7964, + "step": 375570 + }, + { + "epoch": 0.7586953623387485, + "grad_norm": 50.77473831176758, + "learning_rate": 1.7705791439501851e-06, + "loss": 15.9386, + "step": 375580 + }, + { + "epoch": 0.7587155629714323, + "grad_norm": 324.2077331542969, + "learning_rate": 1.7703126625869138e-06, + "loss": 14.2888, + "step": 375590 + }, + { + "epoch": 0.7587357636041161, + "grad_norm": 667.908935546875, + "learning_rate": 1.770046196964747e-06, + "loss": 29.7749, + "step": 375600 + }, + { + "epoch": 0.7587559642367999, + "grad_norm": 496.64434814453125, + "learning_rate": 1.769779747084981e-06, + "loss": 17.4577, + "step": 375610 + }, + { + "epoch": 0.7587761648694837, + "grad_norm": 305.63262939453125, + "learning_rate": 1.769513312948919e-06, + "loss": 13.8207, + "step": 375620 + }, + { + "epoch": 0.7587963655021676, + "grad_norm": 618.8668212890625, + "learning_rate": 1.7692468945578572e-06, + "loss": 18.8828, + "step": 375630 + }, + { + "epoch": 0.7588165661348514, + "grad_norm": 278.47723388671875, + "learning_rate": 1.768980491913092e-06, + "loss": 15.5401, + "step": 375640 + }, + { + "epoch": 0.7588367667675352, + "grad_norm": 236.46444702148438, + "learning_rate": 1.7687141050159246e-06, + "loss": 20.985, + "step": 375650 + }, + { + "epoch": 0.758856967400219, + "grad_norm": 313.394287109375, + "learning_rate": 1.7684477338676543e-06, + "loss": 13.4524, + "step": 375660 + }, + { + "epoch": 0.7588771680329028, + "grad_norm": 161.9228057861328, + "learning_rate": 1.7681813784695778e-06, + "loss": 14.2453, + "step": 375670 + }, + { + "epoch": 0.7588973686655867, + "grad_norm": 340.1613464355469, + "learning_rate": 1.7679150388229916e-06, + "loss": 11.2734, + "step": 375680 + }, + { + "epoch": 0.7589175692982704, + "grad_norm": 270.6866760253906, + "learning_rate": 1.7676487149291972e-06, + "loss": 21.4311, + "step": 375690 + }, + { + "epoch": 0.7589377699309542, + "grad_norm": 413.8176574707031, + "learning_rate": 1.7673824067894912e-06, + "loss": 13.3455, + "step": 375700 + }, + { + "epoch": 0.758957970563638, + "grad_norm": 400.6026306152344, + "learning_rate": 1.767116114405169e-06, + "loss": 20.54, + "step": 375710 + }, + { + "epoch": 0.7589781711963218, + "grad_norm": 476.71063232421875, + "learning_rate": 1.7668498377775312e-06, + "loss": 15.2724, + "step": 375720 + }, + { + "epoch": 0.7589983718290056, + "grad_norm": 417.1393127441406, + "learning_rate": 1.7665835769078782e-06, + "loss": 12.251, + "step": 375730 + }, + { + "epoch": 0.7590185724616895, + "grad_norm": 2.2373547554016113, + "learning_rate": 1.7663173317975012e-06, + "loss": 9.704, + "step": 375740 + }, + { + "epoch": 0.7590387730943733, + "grad_norm": 412.61566162109375, + "learning_rate": 1.7660511024477018e-06, + "loss": 29.7417, + "step": 375750 + }, + { + "epoch": 0.7590589737270571, + "grad_norm": 271.6612854003906, + "learning_rate": 1.765784888859779e-06, + "loss": 17.1338, + "step": 375760 + }, + { + "epoch": 0.7590791743597409, + "grad_norm": 358.768310546875, + "learning_rate": 1.7655186910350276e-06, + "loss": 22.6152, + "step": 375770 + }, + { + "epoch": 0.7590993749924247, + "grad_norm": 253.42044067382812, + "learning_rate": 1.7652525089747447e-06, + "loss": 21.7026, + "step": 375780 + }, + { + "epoch": 0.7591195756251086, + "grad_norm": 601.6935424804688, + "learning_rate": 1.7649863426802283e-06, + "loss": 28.4537, + "step": 375790 + }, + { + "epoch": 0.7591397762577924, + "grad_norm": 317.54400634765625, + "learning_rate": 1.7647201921527802e-06, + "loss": 14.1507, + "step": 375800 + }, + { + "epoch": 0.7591599768904762, + "grad_norm": 226.88931274414062, + "learning_rate": 1.7644540573936892e-06, + "loss": 10.0781, + "step": 375810 + }, + { + "epoch": 0.75918017752316, + "grad_norm": 469.7894592285156, + "learning_rate": 1.7641879384042571e-06, + "loss": 23.0613, + "step": 375820 + }, + { + "epoch": 0.7592003781558438, + "grad_norm": 657.5131225585938, + "learning_rate": 1.7639218351857824e-06, + "loss": 26.1201, + "step": 375830 + }, + { + "epoch": 0.7592205787885277, + "grad_norm": 550.1307373046875, + "learning_rate": 1.76365574773956e-06, + "loss": 33.5979, + "step": 375840 + }, + { + "epoch": 0.7592407794212115, + "grad_norm": 411.6669616699219, + "learning_rate": 1.763389676066885e-06, + "loss": 15.1043, + "step": 375850 + }, + { + "epoch": 0.7592609800538953, + "grad_norm": 618.7662963867188, + "learning_rate": 1.7631236201690583e-06, + "loss": 10.8901, + "step": 375860 + }, + { + "epoch": 0.7592811806865791, + "grad_norm": 263.6335144042969, + "learning_rate": 1.7628575800473747e-06, + "loss": 23.4085, + "step": 375870 + }, + { + "epoch": 0.759301381319263, + "grad_norm": 565.1019287109375, + "learning_rate": 1.7625915557031287e-06, + "loss": 19.3755, + "step": 375880 + }, + { + "epoch": 0.7593215819519468, + "grad_norm": 341.90289306640625, + "learning_rate": 1.7623255471376199e-06, + "loss": 17.2587, + "step": 375890 + }, + { + "epoch": 0.7593417825846306, + "grad_norm": 203.76864624023438, + "learning_rate": 1.762059554352143e-06, + "loss": 34.235, + "step": 375900 + }, + { + "epoch": 0.7593619832173144, + "grad_norm": 605.4048461914062, + "learning_rate": 1.761793577347996e-06, + "loss": 20.0809, + "step": 375910 + }, + { + "epoch": 0.7593821838499982, + "grad_norm": 321.06439208984375, + "learning_rate": 1.761527616126475e-06, + "loss": 17.8851, + "step": 375920 + }, + { + "epoch": 0.759402384482682, + "grad_norm": 319.451416015625, + "learning_rate": 1.7612616706888735e-06, + "loss": 5.3906, + "step": 375930 + }, + { + "epoch": 0.7594225851153659, + "grad_norm": 418.46124267578125, + "learning_rate": 1.7609957410364913e-06, + "loss": 17.0868, + "step": 375940 + }, + { + "epoch": 0.7594427857480496, + "grad_norm": 216.66879272460938, + "learning_rate": 1.760729827170622e-06, + "loss": 19.261, + "step": 375950 + }, + { + "epoch": 0.7594629863807334, + "grad_norm": 174.04296875, + "learning_rate": 1.760463929092564e-06, + "loss": 10.1454, + "step": 375960 + }, + { + "epoch": 0.7594831870134172, + "grad_norm": 330.9078674316406, + "learning_rate": 1.76019804680361e-06, + "loss": 24.3022, + "step": 375970 + }, + { + "epoch": 0.759503387646101, + "grad_norm": 453.86749267578125, + "learning_rate": 1.7599321803050595e-06, + "loss": 13.8887, + "step": 375980 + }, + { + "epoch": 0.7595235882787849, + "grad_norm": 210.9562530517578, + "learning_rate": 1.7596663295982069e-06, + "loss": 29.3843, + "step": 375990 + }, + { + "epoch": 0.7595437889114687, + "grad_norm": 324.6178894042969, + "learning_rate": 1.7594004946843458e-06, + "loss": 13.822, + "step": 376000 + }, + { + "epoch": 0.7595639895441525, + "grad_norm": 1.545017123222351, + "learning_rate": 1.7591346755647754e-06, + "loss": 11.5382, + "step": 376010 + }, + { + "epoch": 0.7595841901768363, + "grad_norm": 298.6878356933594, + "learning_rate": 1.7588688722407892e-06, + "loss": 19.7182, + "step": 376020 + }, + { + "epoch": 0.7596043908095201, + "grad_norm": 495.82452392578125, + "learning_rate": 1.7586030847136815e-06, + "loss": 20.0121, + "step": 376030 + }, + { + "epoch": 0.759624591442204, + "grad_norm": 501.1104431152344, + "learning_rate": 1.7583373129847493e-06, + "loss": 41.7348, + "step": 376040 + }, + { + "epoch": 0.7596447920748878, + "grad_norm": 964.5734252929688, + "learning_rate": 1.758071557055291e-06, + "loss": 32.0864, + "step": 376050 + }, + { + "epoch": 0.7596649927075716, + "grad_norm": 415.458251953125, + "learning_rate": 1.7578058169265954e-06, + "loss": 34.0849, + "step": 376060 + }, + { + "epoch": 0.7596851933402554, + "grad_norm": 24.043859481811523, + "learning_rate": 1.7575400925999613e-06, + "loss": 10.3754, + "step": 376070 + }, + { + "epoch": 0.7597053939729392, + "grad_norm": 319.2466125488281, + "learning_rate": 1.7572743840766854e-06, + "loss": 19.9899, + "step": 376080 + }, + { + "epoch": 0.7597255946056231, + "grad_norm": 253.73214721679688, + "learning_rate": 1.7570086913580603e-06, + "loss": 13.9276, + "step": 376090 + }, + { + "epoch": 0.7597457952383069, + "grad_norm": 637.227294921875, + "learning_rate": 1.7567430144453801e-06, + "loss": 13.6044, + "step": 376100 + }, + { + "epoch": 0.7597659958709907, + "grad_norm": 288.0482177734375, + "learning_rate": 1.756477353339941e-06, + "loss": 15.9136, + "step": 376110 + }, + { + "epoch": 0.7597861965036745, + "grad_norm": 709.585205078125, + "learning_rate": 1.7562117080430413e-06, + "loss": 35.3148, + "step": 376120 + }, + { + "epoch": 0.7598063971363583, + "grad_norm": 401.94012451171875, + "learning_rate": 1.755946078555969e-06, + "loss": 15.7747, + "step": 376130 + }, + { + "epoch": 0.7598265977690422, + "grad_norm": 281.07598876953125, + "learning_rate": 1.755680464880022e-06, + "loss": 8.5536, + "step": 376140 + }, + { + "epoch": 0.759846798401726, + "grad_norm": 445.91845703125, + "learning_rate": 1.7554148670164966e-06, + "loss": 22.7897, + "step": 376150 + }, + { + "epoch": 0.7598669990344098, + "grad_norm": 166.84934997558594, + "learning_rate": 1.7551492849666857e-06, + "loss": 15.9211, + "step": 376160 + }, + { + "epoch": 0.7598871996670936, + "grad_norm": 587.87841796875, + "learning_rate": 1.7548837187318817e-06, + "loss": 21.8101, + "step": 376170 + }, + { + "epoch": 0.7599074002997774, + "grad_norm": 66.2043228149414, + "learning_rate": 1.7546181683133829e-06, + "loss": 18.6671, + "step": 376180 + }, + { + "epoch": 0.7599276009324613, + "grad_norm": 522.960693359375, + "learning_rate": 1.7543526337124817e-06, + "loss": 21.857, + "step": 376190 + }, + { + "epoch": 0.7599478015651451, + "grad_norm": 322.857666015625, + "learning_rate": 1.75408711493047e-06, + "loss": 19.3971, + "step": 376200 + }, + { + "epoch": 0.7599680021978288, + "grad_norm": 423.8174133300781, + "learning_rate": 1.7538216119686457e-06, + "loss": 16.8876, + "step": 376210 + }, + { + "epoch": 0.7599882028305126, + "grad_norm": 687.6094970703125, + "learning_rate": 1.7535561248282994e-06, + "loss": 15.8927, + "step": 376220 + }, + { + "epoch": 0.7600084034631964, + "grad_norm": 347.0122375488281, + "learning_rate": 1.7532906535107286e-06, + "loss": 8.7866, + "step": 376230 + }, + { + "epoch": 0.7600286040958802, + "grad_norm": 1143.4398193359375, + "learning_rate": 1.7530251980172241e-06, + "loss": 44.6102, + "step": 376240 + }, + { + "epoch": 0.7600488047285641, + "grad_norm": 329.41644287109375, + "learning_rate": 1.7527597583490825e-06, + "loss": 11.3206, + "step": 376250 + }, + { + "epoch": 0.7600690053612479, + "grad_norm": 250.16851806640625, + "learning_rate": 1.7524943345075957e-06, + "loss": 12.0135, + "step": 376260 + }, + { + "epoch": 0.7600892059939317, + "grad_norm": 74.37361145019531, + "learning_rate": 1.7522289264940557e-06, + "loss": 8.2757, + "step": 376270 + }, + { + "epoch": 0.7601094066266155, + "grad_norm": 785.9454345703125, + "learning_rate": 1.7519635343097601e-06, + "loss": 23.9677, + "step": 376280 + }, + { + "epoch": 0.7601296072592993, + "grad_norm": 468.9040832519531, + "learning_rate": 1.7516981579559987e-06, + "loss": 17.7546, + "step": 376290 + }, + { + "epoch": 0.7601498078919832, + "grad_norm": 215.666748046875, + "learning_rate": 1.751432797434068e-06, + "loss": 19.3954, + "step": 376300 + }, + { + "epoch": 0.760170008524667, + "grad_norm": 315.82989501953125, + "learning_rate": 1.75116745274526e-06, + "loss": 24.7293, + "step": 376310 + }, + { + "epoch": 0.7601902091573508, + "grad_norm": 281.9214782714844, + "learning_rate": 1.7509021238908659e-06, + "loss": 12.1708, + "step": 376320 + }, + { + "epoch": 0.7602104097900346, + "grad_norm": 243.76895141601562, + "learning_rate": 1.750636810872181e-06, + "loss": 7.8583, + "step": 376330 + }, + { + "epoch": 0.7602306104227184, + "grad_norm": 484.5269775390625, + "learning_rate": 1.7503715136905014e-06, + "loss": 19.7257, + "step": 376340 + }, + { + "epoch": 0.7602508110554023, + "grad_norm": 266.2414245605469, + "learning_rate": 1.7501062323471136e-06, + "loss": 11.5703, + "step": 376350 + }, + { + "epoch": 0.7602710116880861, + "grad_norm": 462.5458068847656, + "learning_rate": 1.7498409668433135e-06, + "loss": 12.6849, + "step": 376360 + }, + { + "epoch": 0.7602912123207699, + "grad_norm": 376.05426025390625, + "learning_rate": 1.7495757171803967e-06, + "loss": 7.2245, + "step": 376370 + }, + { + "epoch": 0.7603114129534537, + "grad_norm": 179.6309051513672, + "learning_rate": 1.749310483359653e-06, + "loss": 25.1848, + "step": 376380 + }, + { + "epoch": 0.7603316135861375, + "grad_norm": 282.795654296875, + "learning_rate": 1.7490452653823747e-06, + "loss": 18.6978, + "step": 376390 + }, + { + "epoch": 0.7603518142188214, + "grad_norm": 239.79541015625, + "learning_rate": 1.7487800632498547e-06, + "loss": 17.3848, + "step": 376400 + }, + { + "epoch": 0.7603720148515052, + "grad_norm": 274.5397644042969, + "learning_rate": 1.7485148769633903e-06, + "loss": 23.4847, + "step": 376410 + }, + { + "epoch": 0.760392215484189, + "grad_norm": 0.0, + "learning_rate": 1.7482497065242665e-06, + "loss": 14.6895, + "step": 376420 + }, + { + "epoch": 0.7604124161168728, + "grad_norm": 1420.82470703125, + "learning_rate": 1.7479845519337795e-06, + "loss": 29.4314, + "step": 376430 + }, + { + "epoch": 0.7604326167495566, + "grad_norm": 713.2938232421875, + "learning_rate": 1.7477194131932229e-06, + "loss": 22.3533, + "step": 376440 + }, + { + "epoch": 0.7604528173822405, + "grad_norm": 261.40106201171875, + "learning_rate": 1.747454290303887e-06, + "loss": 20.2105, + "step": 376450 + }, + { + "epoch": 0.7604730180149242, + "grad_norm": 211.13002014160156, + "learning_rate": 1.747189183267063e-06, + "loss": 16.6662, + "step": 376460 + }, + { + "epoch": 0.760493218647608, + "grad_norm": 264.2223205566406, + "learning_rate": 1.7469240920840463e-06, + "loss": 22.8406, + "step": 376470 + }, + { + "epoch": 0.7605134192802918, + "grad_norm": 301.1449279785156, + "learning_rate": 1.746659016756127e-06, + "loss": 23.6323, + "step": 376480 + }, + { + "epoch": 0.7605336199129756, + "grad_norm": 300.9630126953125, + "learning_rate": 1.7463939572845951e-06, + "loss": 13.9824, + "step": 376490 + }, + { + "epoch": 0.7605538205456595, + "grad_norm": 218.0428009033203, + "learning_rate": 1.746128913670746e-06, + "loss": 13.2248, + "step": 376500 + }, + { + "epoch": 0.7605740211783433, + "grad_norm": 198.65106201171875, + "learning_rate": 1.7458638859158688e-06, + "loss": 12.9668, + "step": 376510 + }, + { + "epoch": 0.7605942218110271, + "grad_norm": 931.1880493164062, + "learning_rate": 1.7455988740212576e-06, + "loss": 23.6093, + "step": 376520 + }, + { + "epoch": 0.7606144224437109, + "grad_norm": 88.32315826416016, + "learning_rate": 1.7453338779882029e-06, + "loss": 24.4658, + "step": 376530 + }, + { + "epoch": 0.7606346230763947, + "grad_norm": 170.65892028808594, + "learning_rate": 1.7450688978179947e-06, + "loss": 20.2341, + "step": 376540 + }, + { + "epoch": 0.7606548237090786, + "grad_norm": 787.7627563476562, + "learning_rate": 1.7448039335119272e-06, + "loss": 24.3571, + "step": 376550 + }, + { + "epoch": 0.7606750243417624, + "grad_norm": 154.41603088378906, + "learning_rate": 1.744538985071289e-06, + "loss": 18.6799, + "step": 376560 + }, + { + "epoch": 0.7606952249744462, + "grad_norm": 308.78009033203125, + "learning_rate": 1.7442740524973744e-06, + "loss": 21.146, + "step": 376570 + }, + { + "epoch": 0.76071542560713, + "grad_norm": 6.902546405792236, + "learning_rate": 1.7440091357914718e-06, + "loss": 20.52, + "step": 376580 + }, + { + "epoch": 0.7607356262398138, + "grad_norm": 789.2552490234375, + "learning_rate": 1.7437442349548756e-06, + "loss": 18.2658, + "step": 376590 + }, + { + "epoch": 0.7607558268724977, + "grad_norm": 231.39454650878906, + "learning_rate": 1.7434793499888746e-06, + "loss": 15.8216, + "step": 376600 + }, + { + "epoch": 0.7607760275051815, + "grad_norm": 601.401123046875, + "learning_rate": 1.7432144808947587e-06, + "loss": 18.9456, + "step": 376610 + }, + { + "epoch": 0.7607962281378653, + "grad_norm": 235.45130920410156, + "learning_rate": 1.7429496276738223e-06, + "loss": 17.1497, + "step": 376620 + }, + { + "epoch": 0.7608164287705491, + "grad_norm": 443.2958679199219, + "learning_rate": 1.7426847903273547e-06, + "loss": 25.5497, + "step": 376630 + }, + { + "epoch": 0.7608366294032329, + "grad_norm": 429.2977600097656, + "learning_rate": 1.742419968856644e-06, + "loss": 19.2875, + "step": 376640 + }, + { + "epoch": 0.7608568300359168, + "grad_norm": 333.5958557128906, + "learning_rate": 1.7421551632629835e-06, + "loss": 15.7969, + "step": 376650 + }, + { + "epoch": 0.7608770306686006, + "grad_norm": 301.61822509765625, + "learning_rate": 1.7418903735476673e-06, + "loss": 27.5749, + "step": 376660 + }, + { + "epoch": 0.7608972313012844, + "grad_norm": 244.7122344970703, + "learning_rate": 1.7416255997119786e-06, + "loss": 18.7674, + "step": 376670 + }, + { + "epoch": 0.7609174319339682, + "grad_norm": 261.3179016113281, + "learning_rate": 1.7413608417572114e-06, + "loss": 16.8346, + "step": 376680 + }, + { + "epoch": 0.760937632566652, + "grad_norm": 485.1816711425781, + "learning_rate": 1.7410960996846583e-06, + "loss": 13.3663, + "step": 376690 + }, + { + "epoch": 0.7609578331993359, + "grad_norm": 878.5574340820312, + "learning_rate": 1.7408313734956074e-06, + "loss": 40.5127, + "step": 376700 + }, + { + "epoch": 0.7609780338320197, + "grad_norm": 110.62535095214844, + "learning_rate": 1.7405666631913475e-06, + "loss": 21.6779, + "step": 376710 + }, + { + "epoch": 0.7609982344647034, + "grad_norm": 265.9751281738281, + "learning_rate": 1.7403019687731704e-06, + "loss": 12.9539, + "step": 376720 + }, + { + "epoch": 0.7610184350973872, + "grad_norm": 473.6266174316406, + "learning_rate": 1.740037290242369e-06, + "loss": 12.9548, + "step": 376730 + }, + { + "epoch": 0.761038635730071, + "grad_norm": 351.79290771484375, + "learning_rate": 1.7397726276002274e-06, + "loss": 13.2225, + "step": 376740 + }, + { + "epoch": 0.7610588363627548, + "grad_norm": 320.6704406738281, + "learning_rate": 1.7395079808480386e-06, + "loss": 28.4213, + "step": 376750 + }, + { + "epoch": 0.7610790369954387, + "grad_norm": 175.11500549316406, + "learning_rate": 1.7392433499870941e-06, + "loss": 28.7987, + "step": 376760 + }, + { + "epoch": 0.7610992376281225, + "grad_norm": 798.2296142578125, + "learning_rate": 1.738978735018682e-06, + "loss": 15.7704, + "step": 376770 + }, + { + "epoch": 0.7611194382608063, + "grad_norm": 272.8622741699219, + "learning_rate": 1.7387141359440907e-06, + "loss": 26.9554, + "step": 376780 + }, + { + "epoch": 0.7611396388934901, + "grad_norm": 130.2759552001953, + "learning_rate": 1.7384495527646127e-06, + "loss": 19.8157, + "step": 376790 + }, + { + "epoch": 0.7611598395261739, + "grad_norm": 867.0805053710938, + "learning_rate": 1.738184985481536e-06, + "loss": 18.2362, + "step": 376800 + }, + { + "epoch": 0.7611800401588578, + "grad_norm": 538.44482421875, + "learning_rate": 1.7379204340961481e-06, + "loss": 15.0707, + "step": 376810 + }, + { + "epoch": 0.7612002407915416, + "grad_norm": 425.69573974609375, + "learning_rate": 1.7376558986097424e-06, + "loss": 20.5225, + "step": 376820 + }, + { + "epoch": 0.7612204414242254, + "grad_norm": 251.30618286132812, + "learning_rate": 1.737391379023604e-06, + "loss": 11.0112, + "step": 376830 + }, + { + "epoch": 0.7612406420569092, + "grad_norm": 557.7199096679688, + "learning_rate": 1.7371268753390265e-06, + "loss": 35.7515, + "step": 376840 + }, + { + "epoch": 0.761260842689593, + "grad_norm": 102.01097869873047, + "learning_rate": 1.7368623875572948e-06, + "loss": 15.3064, + "step": 376850 + }, + { + "epoch": 0.7612810433222769, + "grad_norm": 234.31521606445312, + "learning_rate": 1.736597915679702e-06, + "loss": 16.7485, + "step": 376860 + }, + { + "epoch": 0.7613012439549607, + "grad_norm": 334.3034362792969, + "learning_rate": 1.7363334597075354e-06, + "loss": 29.7615, + "step": 376870 + }, + { + "epoch": 0.7613214445876445, + "grad_norm": 175.65499877929688, + "learning_rate": 1.7360690196420816e-06, + "loss": 11.9786, + "step": 376880 + }, + { + "epoch": 0.7613416452203283, + "grad_norm": 273.6047668457031, + "learning_rate": 1.735804595484633e-06, + "loss": 16.8305, + "step": 376890 + }, + { + "epoch": 0.7613618458530121, + "grad_norm": 356.8277282714844, + "learning_rate": 1.7355401872364759e-06, + "loss": 28.5677, + "step": 376900 + }, + { + "epoch": 0.761382046485696, + "grad_norm": 453.5097961425781, + "learning_rate": 1.7352757948989012e-06, + "loss": 34.673, + "step": 376910 + }, + { + "epoch": 0.7614022471183798, + "grad_norm": 114.28955078125, + "learning_rate": 1.7350114184731965e-06, + "loss": 24.7503, + "step": 376920 + }, + { + "epoch": 0.7614224477510636, + "grad_norm": 123.94576263427734, + "learning_rate": 1.7347470579606478e-06, + "loss": 15.0611, + "step": 376930 + }, + { + "epoch": 0.7614426483837474, + "grad_norm": 51.32515335083008, + "learning_rate": 1.7344827133625487e-06, + "loss": 13.312, + "step": 376940 + }, + { + "epoch": 0.7614628490164312, + "grad_norm": 0.0, + "learning_rate": 1.734218384680184e-06, + "loss": 18.5802, + "step": 376950 + }, + { + "epoch": 0.7614830496491151, + "grad_norm": 9.909598350524902, + "learning_rate": 1.7339540719148417e-06, + "loss": 16.478, + "step": 376960 + }, + { + "epoch": 0.7615032502817988, + "grad_norm": 215.57704162597656, + "learning_rate": 1.7336897750678106e-06, + "loss": 15.0508, + "step": 376970 + }, + { + "epoch": 0.7615234509144826, + "grad_norm": 612.6788330078125, + "learning_rate": 1.733425494140381e-06, + "loss": 13.8191, + "step": 376980 + }, + { + "epoch": 0.7615436515471664, + "grad_norm": 387.7210388183594, + "learning_rate": 1.7331612291338402e-06, + "loss": 19.9209, + "step": 376990 + }, + { + "epoch": 0.7615638521798502, + "grad_norm": 377.4043884277344, + "learning_rate": 1.7328969800494727e-06, + "loss": 15.2365, + "step": 377000 + }, + { + "epoch": 0.761584052812534, + "grad_norm": 31.66712188720703, + "learning_rate": 1.7326327468885695e-06, + "loss": 21.8383, + "step": 377010 + }, + { + "epoch": 0.7616042534452179, + "grad_norm": 58.340267181396484, + "learning_rate": 1.7323685296524212e-06, + "loss": 18.2436, + "step": 377020 + }, + { + "epoch": 0.7616244540779017, + "grad_norm": 673.8753662109375, + "learning_rate": 1.7321043283423094e-06, + "loss": 20.8677, + "step": 377030 + }, + { + "epoch": 0.7616446547105855, + "grad_norm": 205.27183532714844, + "learning_rate": 1.7318401429595244e-06, + "loss": 16.2658, + "step": 377040 + }, + { + "epoch": 0.7616648553432693, + "grad_norm": 0.0, + "learning_rate": 1.7315759735053562e-06, + "loss": 11.151, + "step": 377050 + }, + { + "epoch": 0.7616850559759532, + "grad_norm": 1154.3646240234375, + "learning_rate": 1.7313118199810897e-06, + "loss": 21.8191, + "step": 377060 + }, + { + "epoch": 0.761705256608637, + "grad_norm": 54.69978332519531, + "learning_rate": 1.7310476823880118e-06, + "loss": 27.5796, + "step": 377070 + }, + { + "epoch": 0.7617254572413208, + "grad_norm": 659.6768798828125, + "learning_rate": 1.7307835607274125e-06, + "loss": 22.9341, + "step": 377080 + }, + { + "epoch": 0.7617456578740046, + "grad_norm": 41.843936920166016, + "learning_rate": 1.7305194550005776e-06, + "loss": 14.992, + "step": 377090 + }, + { + "epoch": 0.7617658585066884, + "grad_norm": 483.52044677734375, + "learning_rate": 1.7302553652087927e-06, + "loss": 10.2882, + "step": 377100 + }, + { + "epoch": 0.7617860591393723, + "grad_norm": 670.2805786132812, + "learning_rate": 1.7299912913533485e-06, + "loss": 15.0992, + "step": 377110 + }, + { + "epoch": 0.7618062597720561, + "grad_norm": 184.13381958007812, + "learning_rate": 1.729727233435528e-06, + "loss": 23.6193, + "step": 377120 + }, + { + "epoch": 0.7618264604047399, + "grad_norm": 127.78020477294922, + "learning_rate": 1.7294631914566222e-06, + "loss": 17.8214, + "step": 377130 + }, + { + "epoch": 0.7618466610374237, + "grad_norm": 945.5842895507812, + "learning_rate": 1.7291991654179163e-06, + "loss": 21.2076, + "step": 377140 + }, + { + "epoch": 0.7618668616701075, + "grad_norm": 12.364981651306152, + "learning_rate": 1.7289351553206952e-06, + "loss": 19.2048, + "step": 377150 + }, + { + "epoch": 0.7618870623027914, + "grad_norm": 207.98306274414062, + "learning_rate": 1.7286711611662488e-06, + "loss": 9.4154, + "step": 377160 + }, + { + "epoch": 0.7619072629354752, + "grad_norm": 261.1643371582031, + "learning_rate": 1.7284071829558606e-06, + "loss": 10.8977, + "step": 377170 + }, + { + "epoch": 0.761927463568159, + "grad_norm": 807.3040161132812, + "learning_rate": 1.7281432206908211e-06, + "loss": 25.6677, + "step": 377180 + }, + { + "epoch": 0.7619476642008428, + "grad_norm": 552.6361694335938, + "learning_rate": 1.7278792743724133e-06, + "loss": 32.2756, + "step": 377190 + }, + { + "epoch": 0.7619678648335266, + "grad_norm": 669.0811767578125, + "learning_rate": 1.727615344001926e-06, + "loss": 16.0056, + "step": 377200 + }, + { + "epoch": 0.7619880654662105, + "grad_norm": 229.4485626220703, + "learning_rate": 1.7273514295806454e-06, + "loss": 18.1232, + "step": 377210 + }, + { + "epoch": 0.7620082660988943, + "grad_norm": 67.4647216796875, + "learning_rate": 1.727087531109855e-06, + "loss": 19.872, + "step": 377220 + }, + { + "epoch": 0.762028466731578, + "grad_norm": 313.5301513671875, + "learning_rate": 1.7268236485908446e-06, + "loss": 21.8244, + "step": 377230 + }, + { + "epoch": 0.7620486673642618, + "grad_norm": 387.4441223144531, + "learning_rate": 1.7265597820248987e-06, + "loss": 15.3841, + "step": 377240 + }, + { + "epoch": 0.7620688679969456, + "grad_norm": 537.0117797851562, + "learning_rate": 1.7262959314133015e-06, + "loss": 24.9926, + "step": 377250 + }, + { + "epoch": 0.7620890686296294, + "grad_norm": 375.2696533203125, + "learning_rate": 1.7260320967573413e-06, + "loss": 17.9223, + "step": 377260 + }, + { + "epoch": 0.7621092692623133, + "grad_norm": 92.41923522949219, + "learning_rate": 1.7257682780583068e-06, + "loss": 21.5852, + "step": 377270 + }, + { + "epoch": 0.7621294698949971, + "grad_norm": 501.506591796875, + "learning_rate": 1.7255044753174778e-06, + "loss": 26.0706, + "step": 377280 + }, + { + "epoch": 0.7621496705276809, + "grad_norm": 665.3644409179688, + "learning_rate": 1.7252406885361416e-06, + "loss": 16.7149, + "step": 377290 + }, + { + "epoch": 0.7621698711603647, + "grad_norm": 637.6961059570312, + "learning_rate": 1.7249769177155879e-06, + "loss": 30.2187, + "step": 377300 + }, + { + "epoch": 0.7621900717930485, + "grad_norm": 745.5427856445312, + "learning_rate": 1.724713162857099e-06, + "loss": 17.7424, + "step": 377310 + }, + { + "epoch": 0.7622102724257324, + "grad_norm": 108.52301025390625, + "learning_rate": 1.7244494239619592e-06, + "loss": 17.9354, + "step": 377320 + }, + { + "epoch": 0.7622304730584162, + "grad_norm": 512.7685546875, + "learning_rate": 1.7241857010314555e-06, + "loss": 11.6889, + "step": 377330 + }, + { + "epoch": 0.7622506736911, + "grad_norm": 328.822265625, + "learning_rate": 1.7239219940668771e-06, + "loss": 16.8154, + "step": 377340 + }, + { + "epoch": 0.7622708743237838, + "grad_norm": 314.33148193359375, + "learning_rate": 1.723658303069502e-06, + "loss": 19.154, + "step": 377350 + }, + { + "epoch": 0.7622910749564676, + "grad_norm": 368.0884704589844, + "learning_rate": 1.7233946280406193e-06, + "loss": 24.4742, + "step": 377360 + }, + { + "epoch": 0.7623112755891515, + "grad_norm": 575.9612426757812, + "learning_rate": 1.723130968981515e-06, + "loss": 23.5788, + "step": 377370 + }, + { + "epoch": 0.7623314762218353, + "grad_norm": 393.7576904296875, + "learning_rate": 1.722867325893473e-06, + "loss": 22.748, + "step": 377380 + }, + { + "epoch": 0.7623516768545191, + "grad_norm": 506.5479736328125, + "learning_rate": 1.7226036987777767e-06, + "loss": 16.1577, + "step": 377390 + }, + { + "epoch": 0.7623718774872029, + "grad_norm": 405.98748779296875, + "learning_rate": 1.7223400876357144e-06, + "loss": 21.2211, + "step": 377400 + }, + { + "epoch": 0.7623920781198867, + "grad_norm": 191.85047912597656, + "learning_rate": 1.7220764924685685e-06, + "loss": 18.3298, + "step": 377410 + }, + { + "epoch": 0.7624122787525706, + "grad_norm": 398.3681640625, + "learning_rate": 1.7218129132776224e-06, + "loss": 25.5017, + "step": 377420 + }, + { + "epoch": 0.7624324793852544, + "grad_norm": 109.55448150634766, + "learning_rate": 1.7215493500641645e-06, + "loss": 12.3465, + "step": 377430 + }, + { + "epoch": 0.7624526800179382, + "grad_norm": 247.6683349609375, + "learning_rate": 1.721285802829476e-06, + "loss": 23.2204, + "step": 377440 + }, + { + "epoch": 0.762472880650622, + "grad_norm": 323.5118408203125, + "learning_rate": 1.7210222715748443e-06, + "loss": 21.2569, + "step": 377450 + }, + { + "epoch": 0.7624930812833058, + "grad_norm": 557.070556640625, + "learning_rate": 1.7207587563015505e-06, + "loss": 35.0821, + "step": 377460 + }, + { + "epoch": 0.7625132819159897, + "grad_norm": 469.9717102050781, + "learning_rate": 1.720495257010883e-06, + "loss": 14.0372, + "step": 377470 + }, + { + "epoch": 0.7625334825486734, + "grad_norm": 512.7113037109375, + "learning_rate": 1.7202317737041235e-06, + "loss": 19.707, + "step": 377480 + }, + { + "epoch": 0.7625536831813572, + "grad_norm": 335.61767578125, + "learning_rate": 1.7199683063825544e-06, + "loss": 20.3734, + "step": 377490 + }, + { + "epoch": 0.762573883814041, + "grad_norm": 0.8463869690895081, + "learning_rate": 1.7197048550474643e-06, + "loss": 11.2797, + "step": 377500 + }, + { + "epoch": 0.7625940844467248, + "grad_norm": 324.1897888183594, + "learning_rate": 1.719441419700133e-06, + "loss": 28.4015, + "step": 377510 + }, + { + "epoch": 0.7626142850794086, + "grad_norm": 509.4043273925781, + "learning_rate": 1.7191780003418485e-06, + "loss": 24.1996, + "step": 377520 + }, + { + "epoch": 0.7626344857120925, + "grad_norm": 212.92823791503906, + "learning_rate": 1.7189145969738918e-06, + "loss": 21.1147, + "step": 377530 + }, + { + "epoch": 0.7626546863447763, + "grad_norm": 132.7948455810547, + "learning_rate": 1.718651209597546e-06, + "loss": 14.4377, + "step": 377540 + }, + { + "epoch": 0.7626748869774601, + "grad_norm": 620.8007202148438, + "learning_rate": 1.7183878382140978e-06, + "loss": 17.0333, + "step": 377550 + }, + { + "epoch": 0.7626950876101439, + "grad_norm": 89.54065704345703, + "learning_rate": 1.7181244828248294e-06, + "loss": 10.121, + "step": 377560 + }, + { + "epoch": 0.7627152882428277, + "grad_norm": 311.8543701171875, + "learning_rate": 1.7178611434310221e-06, + "loss": 36.6704, + "step": 377570 + }, + { + "epoch": 0.7627354888755116, + "grad_norm": 1.616960048675537, + "learning_rate": 1.7175978200339622e-06, + "loss": 9.524, + "step": 377580 + }, + { + "epoch": 0.7627556895081954, + "grad_norm": 124.14556121826172, + "learning_rate": 1.7173345126349339e-06, + "loss": 8.1849, + "step": 377590 + }, + { + "epoch": 0.7627758901408792, + "grad_norm": 258.3683776855469, + "learning_rate": 1.7170712212352187e-06, + "loss": 17.2788, + "step": 377600 + }, + { + "epoch": 0.762796090773563, + "grad_norm": 400.57159423828125, + "learning_rate": 1.7168079458360987e-06, + "loss": 26.6816, + "step": 377610 + }, + { + "epoch": 0.7628162914062468, + "grad_norm": 417.3874206542969, + "learning_rate": 1.7165446864388608e-06, + "loss": 14.7793, + "step": 377620 + }, + { + "epoch": 0.7628364920389307, + "grad_norm": 473.7869567871094, + "learning_rate": 1.7162814430447849e-06, + "loss": 18.1507, + "step": 377630 + }, + { + "epoch": 0.7628566926716145, + "grad_norm": 734.55078125, + "learning_rate": 1.7160182156551542e-06, + "loss": 16.1951, + "step": 377640 + }, + { + "epoch": 0.7628768933042983, + "grad_norm": 353.12628173828125, + "learning_rate": 1.7157550042712517e-06, + "loss": 22.855, + "step": 377650 + }, + { + "epoch": 0.7628970939369821, + "grad_norm": 220.4140167236328, + "learning_rate": 1.7154918088943629e-06, + "loss": 11.4678, + "step": 377660 + }, + { + "epoch": 0.762917294569666, + "grad_norm": 664.710205078125, + "learning_rate": 1.7152286295257687e-06, + "loss": 31.2423, + "step": 377670 + }, + { + "epoch": 0.7629374952023498, + "grad_norm": 313.5198059082031, + "learning_rate": 1.7149654661667503e-06, + "loss": 15.2182, + "step": 377680 + }, + { + "epoch": 0.7629576958350336, + "grad_norm": 550.048583984375, + "learning_rate": 1.714702318818593e-06, + "loss": 20.6036, + "step": 377690 + }, + { + "epoch": 0.7629778964677174, + "grad_norm": 634.5236206054688, + "learning_rate": 1.7144391874825784e-06, + "loss": 22.9805, + "step": 377700 + }, + { + "epoch": 0.7629980971004012, + "grad_norm": 112.58702850341797, + "learning_rate": 1.714176072159987e-06, + "loss": 34.8407, + "step": 377710 + }, + { + "epoch": 0.763018297733085, + "grad_norm": 234.19915771484375, + "learning_rate": 1.7139129728521048e-06, + "loss": 16.3787, + "step": 377720 + }, + { + "epoch": 0.7630384983657689, + "grad_norm": 320.8688049316406, + "learning_rate": 1.7136498895602105e-06, + "loss": 17.2428, + "step": 377730 + }, + { + "epoch": 0.7630586989984526, + "grad_norm": 265.7731628417969, + "learning_rate": 1.7133868222855893e-06, + "loss": 16.528, + "step": 377740 + }, + { + "epoch": 0.7630788996311364, + "grad_norm": 323.55340576171875, + "learning_rate": 1.7131237710295207e-06, + "loss": 22.871, + "step": 377750 + }, + { + "epoch": 0.7630991002638202, + "grad_norm": 351.5572814941406, + "learning_rate": 1.7128607357932903e-06, + "loss": 17.6728, + "step": 377760 + }, + { + "epoch": 0.763119300896504, + "grad_norm": 440.6859436035156, + "learning_rate": 1.7125977165781772e-06, + "loss": 19.3434, + "step": 377770 + }, + { + "epoch": 0.7631395015291879, + "grad_norm": 445.04156494140625, + "learning_rate": 1.7123347133854628e-06, + "loss": 19.0785, + "step": 377780 + }, + { + "epoch": 0.7631597021618717, + "grad_norm": 332.620361328125, + "learning_rate": 1.7120717262164322e-06, + "loss": 20.1409, + "step": 377790 + }, + { + "epoch": 0.7631799027945555, + "grad_norm": 518.7366333007812, + "learning_rate": 1.7118087550723633e-06, + "loss": 14.5072, + "step": 377800 + }, + { + "epoch": 0.7632001034272393, + "grad_norm": 320.44488525390625, + "learning_rate": 1.711545799954541e-06, + "loss": 30.7372, + "step": 377810 + }, + { + "epoch": 0.7632203040599231, + "grad_norm": 953.9003295898438, + "learning_rate": 1.7112828608642463e-06, + "loss": 12.0028, + "step": 377820 + }, + { + "epoch": 0.763240504692607, + "grad_norm": 184.9840850830078, + "learning_rate": 1.7110199378027581e-06, + "loss": 22.6699, + "step": 377830 + }, + { + "epoch": 0.7632607053252908, + "grad_norm": 618.4388427734375, + "learning_rate": 1.7107570307713618e-06, + "loss": 16.9055, + "step": 377840 + }, + { + "epoch": 0.7632809059579746, + "grad_norm": 95.88978576660156, + "learning_rate": 1.710494139771336e-06, + "loss": 11.2177, + "step": 377850 + }, + { + "epoch": 0.7633011065906584, + "grad_norm": 228.05349731445312, + "learning_rate": 1.7102312648039616e-06, + "loss": 20.6369, + "step": 377860 + }, + { + "epoch": 0.7633213072233422, + "grad_norm": 472.37579345703125, + "learning_rate": 1.7099684058705212e-06, + "loss": 22.444, + "step": 377870 + }, + { + "epoch": 0.7633415078560261, + "grad_norm": 130.0430450439453, + "learning_rate": 1.7097055629722991e-06, + "loss": 5.8776, + "step": 377880 + }, + { + "epoch": 0.7633617084887099, + "grad_norm": 333.94732666015625, + "learning_rate": 1.7094427361105693e-06, + "loss": 13.7401, + "step": 377890 + }, + { + "epoch": 0.7633819091213937, + "grad_norm": 137.74929809570312, + "learning_rate": 1.709179925286617e-06, + "loss": 26.0631, + "step": 377900 + }, + { + "epoch": 0.7634021097540775, + "grad_norm": 956.1805419921875, + "learning_rate": 1.7089171305017238e-06, + "loss": 19.5247, + "step": 377910 + }, + { + "epoch": 0.7634223103867613, + "grad_norm": 302.4202575683594, + "learning_rate": 1.7086543517571697e-06, + "loss": 27.5805, + "step": 377920 + }, + { + "epoch": 0.7634425110194452, + "grad_norm": 483.156005859375, + "learning_rate": 1.7083915890542329e-06, + "loss": 25.8959, + "step": 377930 + }, + { + "epoch": 0.763462711652129, + "grad_norm": 574.2993774414062, + "learning_rate": 1.7081288423941967e-06, + "loss": 34.5771, + "step": 377940 + }, + { + "epoch": 0.7634829122848128, + "grad_norm": 16.331708908081055, + "learning_rate": 1.7078661117783447e-06, + "loss": 16.448, + "step": 377950 + }, + { + "epoch": 0.7635031129174966, + "grad_norm": 415.71783447265625, + "learning_rate": 1.7076033972079503e-06, + "loss": 17.2632, + "step": 377960 + }, + { + "epoch": 0.7635233135501804, + "grad_norm": 156.38693237304688, + "learning_rate": 1.7073406986842982e-06, + "loss": 17.7233, + "step": 377970 + }, + { + "epoch": 0.7635435141828643, + "grad_norm": 158.8320770263672, + "learning_rate": 1.7070780162086691e-06, + "loss": 19.0591, + "step": 377980 + }, + { + "epoch": 0.7635637148155481, + "grad_norm": 412.1049499511719, + "learning_rate": 1.7068153497823431e-06, + "loss": 16.6338, + "step": 377990 + }, + { + "epoch": 0.7635839154482318, + "grad_norm": 572.3352661132812, + "learning_rate": 1.7065526994065973e-06, + "loss": 8.5546, + "step": 378000 + }, + { + "epoch": 0.7636041160809156, + "grad_norm": 261.773193359375, + "learning_rate": 1.7062900650827163e-06, + "loss": 13.3251, + "step": 378010 + }, + { + "epoch": 0.7636243167135994, + "grad_norm": 413.029541015625, + "learning_rate": 1.7060274468119782e-06, + "loss": 31.3703, + "step": 378020 + }, + { + "epoch": 0.7636445173462832, + "grad_norm": 229.0946044921875, + "learning_rate": 1.7057648445956609e-06, + "loss": 14.7214, + "step": 378030 + }, + { + "epoch": 0.7636647179789671, + "grad_norm": 355.132080078125, + "learning_rate": 1.7055022584350477e-06, + "loss": 11.482, + "step": 378040 + }, + { + "epoch": 0.7636849186116509, + "grad_norm": 211.0098876953125, + "learning_rate": 1.7052396883314154e-06, + "loss": 16.8089, + "step": 378050 + }, + { + "epoch": 0.7637051192443347, + "grad_norm": 96.66175079345703, + "learning_rate": 1.704977134286047e-06, + "loss": 10.922, + "step": 378060 + }, + { + "epoch": 0.7637253198770185, + "grad_norm": 57.73271942138672, + "learning_rate": 1.7047145963002187e-06, + "loss": 15.9487, + "step": 378070 + }, + { + "epoch": 0.7637455205097023, + "grad_norm": 756.5963745117188, + "learning_rate": 1.7044520743752135e-06, + "loss": 20.7602, + "step": 378080 + }, + { + "epoch": 0.7637657211423862, + "grad_norm": 596.1071166992188, + "learning_rate": 1.7041895685123088e-06, + "loss": 25.3244, + "step": 378090 + }, + { + "epoch": 0.76378592177507, + "grad_norm": 689.9220581054688, + "learning_rate": 1.7039270787127832e-06, + "loss": 24.2241, + "step": 378100 + }, + { + "epoch": 0.7638061224077538, + "grad_norm": 252.2925567626953, + "learning_rate": 1.7036646049779188e-06, + "loss": 24.4682, + "step": 378110 + }, + { + "epoch": 0.7638263230404376, + "grad_norm": 465.9823913574219, + "learning_rate": 1.7034021473089918e-06, + "loss": 17.5219, + "step": 378120 + }, + { + "epoch": 0.7638465236731214, + "grad_norm": 342.3565368652344, + "learning_rate": 1.7031397057072846e-06, + "loss": 13.4319, + "step": 378130 + }, + { + "epoch": 0.7638667243058053, + "grad_norm": 216.5574188232422, + "learning_rate": 1.702877280174074e-06, + "loss": 12.3569, + "step": 378140 + }, + { + "epoch": 0.7638869249384891, + "grad_norm": 269.8787536621094, + "learning_rate": 1.7026148707106388e-06, + "loss": 14.3361, + "step": 378150 + }, + { + "epoch": 0.7639071255711729, + "grad_norm": 672.4157104492188, + "learning_rate": 1.7023524773182598e-06, + "loss": 10.6181, + "step": 378160 + }, + { + "epoch": 0.7639273262038567, + "grad_norm": 133.22813415527344, + "learning_rate": 1.7020900999982154e-06, + "loss": 9.1915, + "step": 378170 + }, + { + "epoch": 0.7639475268365405, + "grad_norm": 669.6143188476562, + "learning_rate": 1.7018277387517817e-06, + "loss": 21.729, + "step": 378180 + }, + { + "epoch": 0.7639677274692244, + "grad_norm": 131.20468139648438, + "learning_rate": 1.70156539358024e-06, + "loss": 18.019, + "step": 378190 + }, + { + "epoch": 0.7639879281019082, + "grad_norm": 433.8664855957031, + "learning_rate": 1.7013030644848698e-06, + "loss": 17.6022, + "step": 378200 + }, + { + "epoch": 0.764008128734592, + "grad_norm": 238.54808044433594, + "learning_rate": 1.7010407514669485e-06, + "loss": 32.016, + "step": 378210 + }, + { + "epoch": 0.7640283293672758, + "grad_norm": 586.1495971679688, + "learning_rate": 1.7007784545277528e-06, + "loss": 11.9752, + "step": 378220 + }, + { + "epoch": 0.7640485299999596, + "grad_norm": 231.04721069335938, + "learning_rate": 1.700516173668565e-06, + "loss": 19.4538, + "step": 378230 + }, + { + "epoch": 0.7640687306326435, + "grad_norm": 713.0658569335938, + "learning_rate": 1.7002539088906606e-06, + "loss": 32.0958, + "step": 378240 + }, + { + "epoch": 0.7640889312653272, + "grad_norm": 42.475868225097656, + "learning_rate": 1.6999916601953165e-06, + "loss": 23.6006, + "step": 378250 + }, + { + "epoch": 0.764109131898011, + "grad_norm": 836.8502197265625, + "learning_rate": 1.6997294275838134e-06, + "loss": 32.8616, + "step": 378260 + }, + { + "epoch": 0.7641293325306948, + "grad_norm": 311.5104675292969, + "learning_rate": 1.69946721105743e-06, + "loss": 14.5967, + "step": 378270 + }, + { + "epoch": 0.7641495331633786, + "grad_norm": 401.9920654296875, + "learning_rate": 1.6992050106174435e-06, + "loss": 15.822, + "step": 378280 + }, + { + "epoch": 0.7641697337960625, + "grad_norm": 743.8467407226562, + "learning_rate": 1.6989428262651296e-06, + "loss": 27.7331, + "step": 378290 + }, + { + "epoch": 0.7641899344287463, + "grad_norm": 199.25067138671875, + "learning_rate": 1.6986806580017695e-06, + "loss": 11.5302, + "step": 378300 + }, + { + "epoch": 0.7642101350614301, + "grad_norm": 1117.6463623046875, + "learning_rate": 1.6984185058286396e-06, + "loss": 26.7681, + "step": 378310 + }, + { + "epoch": 0.7642303356941139, + "grad_norm": 425.97314453125, + "learning_rate": 1.698156369747016e-06, + "loss": 22.1579, + "step": 378320 + }, + { + "epoch": 0.7642505363267977, + "grad_norm": 516.744140625, + "learning_rate": 1.6978942497581797e-06, + "loss": 21.7841, + "step": 378330 + }, + { + "epoch": 0.7642707369594816, + "grad_norm": 193.90664672851562, + "learning_rate": 1.6976321458634036e-06, + "loss": 12.7344, + "step": 378340 + }, + { + "epoch": 0.7642909375921654, + "grad_norm": 492.00347900390625, + "learning_rate": 1.6973700580639707e-06, + "loss": 16.3136, + "step": 378350 + }, + { + "epoch": 0.7643111382248492, + "grad_norm": 647.6111450195312, + "learning_rate": 1.6971079863611534e-06, + "loss": 19.5634, + "step": 378360 + }, + { + "epoch": 0.764331338857533, + "grad_norm": 588.0462036132812, + "learning_rate": 1.6968459307562329e-06, + "loss": 17.8978, + "step": 378370 + }, + { + "epoch": 0.7643515394902168, + "grad_norm": 337.4934387207031, + "learning_rate": 1.6965838912504845e-06, + "loss": 20.2251, + "step": 378380 + }, + { + "epoch": 0.7643717401229007, + "grad_norm": 49.08926010131836, + "learning_rate": 1.6963218678451843e-06, + "loss": 26.6575, + "step": 378390 + }, + { + "epoch": 0.7643919407555845, + "grad_norm": 596.4000854492188, + "learning_rate": 1.6960598605416117e-06, + "loss": 14.4334, + "step": 378400 + }, + { + "epoch": 0.7644121413882683, + "grad_norm": 162.82354736328125, + "learning_rate": 1.6957978693410414e-06, + "loss": 20.7228, + "step": 378410 + }, + { + "epoch": 0.7644323420209521, + "grad_norm": 285.4637756347656, + "learning_rate": 1.695535894244753e-06, + "loss": 15.7035, + "step": 378420 + }, + { + "epoch": 0.7644525426536359, + "grad_norm": 299.3540954589844, + "learning_rate": 1.695273935254022e-06, + "loss": 20.7704, + "step": 378430 + }, + { + "epoch": 0.7644727432863198, + "grad_norm": 165.3634796142578, + "learning_rate": 1.6950119923701235e-06, + "loss": 18.8815, + "step": 378440 + }, + { + "epoch": 0.7644929439190036, + "grad_norm": 269.2787780761719, + "learning_rate": 1.6947500655943373e-06, + "loss": 19.3063, + "step": 378450 + }, + { + "epoch": 0.7645131445516874, + "grad_norm": 425.3699035644531, + "learning_rate": 1.6944881549279384e-06, + "loss": 23.9079, + "step": 378460 + }, + { + "epoch": 0.7645333451843712, + "grad_norm": 564.7520751953125, + "learning_rate": 1.6942262603722015e-06, + "loss": 25.971, + "step": 378470 + }, + { + "epoch": 0.764553545817055, + "grad_norm": 308.5755615234375, + "learning_rate": 1.6939643819284051e-06, + "loss": 12.3883, + "step": 378480 + }, + { + "epoch": 0.7645737464497389, + "grad_norm": 349.83050537109375, + "learning_rate": 1.6937025195978286e-06, + "loss": 15.6415, + "step": 378490 + }, + { + "epoch": 0.7645939470824227, + "grad_norm": 453.6837158203125, + "learning_rate": 1.6934406733817417e-06, + "loss": 13.7756, + "step": 378500 + }, + { + "epoch": 0.7646141477151064, + "grad_norm": 269.7435607910156, + "learning_rate": 1.6931788432814233e-06, + "loss": 26.2361, + "step": 378510 + }, + { + "epoch": 0.7646343483477902, + "grad_norm": 241.135986328125, + "learning_rate": 1.6929170292981528e-06, + "loss": 18.1771, + "step": 378520 + }, + { + "epoch": 0.764654548980474, + "grad_norm": 360.90704345703125, + "learning_rate": 1.692655231433203e-06, + "loss": 16.855, + "step": 378530 + }, + { + "epoch": 0.7646747496131578, + "grad_norm": 507.8138427734375, + "learning_rate": 1.6923934496878485e-06, + "loss": 11.4303, + "step": 378540 + }, + { + "epoch": 0.7646949502458417, + "grad_norm": 195.2571258544922, + "learning_rate": 1.6921316840633678e-06, + "loss": 14.8914, + "step": 378550 + }, + { + "epoch": 0.7647151508785255, + "grad_norm": 1136.0404052734375, + "learning_rate": 1.6918699345610395e-06, + "loss": 22.6831, + "step": 378560 + }, + { + "epoch": 0.7647353515112093, + "grad_norm": 444.76324462890625, + "learning_rate": 1.6916082011821322e-06, + "loss": 19.5697, + "step": 378570 + }, + { + "epoch": 0.7647555521438931, + "grad_norm": 633.3739624023438, + "learning_rate": 1.6913464839279254e-06, + "loss": 19.0122, + "step": 378580 + }, + { + "epoch": 0.764775752776577, + "grad_norm": 669.9867553710938, + "learning_rate": 1.6910847827996961e-06, + "loss": 16.7808, + "step": 378590 + }, + { + "epoch": 0.7647959534092608, + "grad_norm": 761.240478515625, + "learning_rate": 1.6908230977987184e-06, + "loss": 19.101, + "step": 378600 + }, + { + "epoch": 0.7648161540419446, + "grad_norm": 447.00750732421875, + "learning_rate": 1.6905614289262657e-06, + "loss": 21.746, + "step": 378610 + }, + { + "epoch": 0.7648363546746284, + "grad_norm": 213.79307556152344, + "learning_rate": 1.690299776183617e-06, + "loss": 19.0353, + "step": 378620 + }, + { + "epoch": 0.7648565553073122, + "grad_norm": 227.6842498779297, + "learning_rate": 1.6900381395720455e-06, + "loss": 12.6426, + "step": 378630 + }, + { + "epoch": 0.764876755939996, + "grad_norm": 142.15248107910156, + "learning_rate": 1.6897765190928257e-06, + "loss": 17.1625, + "step": 378640 + }, + { + "epoch": 0.7648969565726799, + "grad_norm": 210.0847625732422, + "learning_rate": 1.6895149147472344e-06, + "loss": 13.9529, + "step": 378650 + }, + { + "epoch": 0.7649171572053637, + "grad_norm": 459.7899169921875, + "learning_rate": 1.6892533265365445e-06, + "loss": 13.3628, + "step": 378660 + }, + { + "epoch": 0.7649373578380475, + "grad_norm": 865.4264526367188, + "learning_rate": 1.6889917544620342e-06, + "loss": 23.15, + "step": 378670 + }, + { + "epoch": 0.7649575584707313, + "grad_norm": 523.2117309570312, + "learning_rate": 1.6887301985249754e-06, + "loss": 14.8688, + "step": 378680 + }, + { + "epoch": 0.7649777591034151, + "grad_norm": 149.56378173828125, + "learning_rate": 1.6884686587266446e-06, + "loss": 19.8172, + "step": 378690 + }, + { + "epoch": 0.764997959736099, + "grad_norm": 671.8781127929688, + "learning_rate": 1.6882071350683165e-06, + "loss": 16.2385, + "step": 378700 + }, + { + "epoch": 0.7650181603687828, + "grad_norm": 197.93997192382812, + "learning_rate": 1.6879456275512634e-06, + "loss": 12.6514, + "step": 378710 + }, + { + "epoch": 0.7650383610014666, + "grad_norm": 417.5447998046875, + "learning_rate": 1.6876841361767637e-06, + "loss": 20.2361, + "step": 378720 + }, + { + "epoch": 0.7650585616341504, + "grad_norm": 414.91107177734375, + "learning_rate": 1.6874226609460875e-06, + "loss": 16.3534, + "step": 378730 + }, + { + "epoch": 0.7650787622668342, + "grad_norm": 372.59368896484375, + "learning_rate": 1.6871612018605131e-06, + "loss": 35.1785, + "step": 378740 + }, + { + "epoch": 0.7650989628995181, + "grad_norm": 215.13145446777344, + "learning_rate": 1.6868997589213138e-06, + "loss": 14.9734, + "step": 378750 + }, + { + "epoch": 0.7651191635322018, + "grad_norm": 149.23243713378906, + "learning_rate": 1.6866383321297614e-06, + "loss": 21.2211, + "step": 378760 + }, + { + "epoch": 0.7651393641648856, + "grad_norm": 367.0497741699219, + "learning_rate": 1.6863769214871334e-06, + "loss": 16.1369, + "step": 378770 + }, + { + "epoch": 0.7651595647975694, + "grad_norm": 1.4534255266189575, + "learning_rate": 1.6861155269947022e-06, + "loss": 19.1285, + "step": 378780 + }, + { + "epoch": 0.7651797654302532, + "grad_norm": 639.5684814453125, + "learning_rate": 1.6858541486537406e-06, + "loss": 21.6637, + "step": 378790 + }, + { + "epoch": 0.765199966062937, + "grad_norm": 999.3773803710938, + "learning_rate": 1.6855927864655241e-06, + "loss": 21.8322, + "step": 378800 + }, + { + "epoch": 0.7652201666956209, + "grad_norm": 87.37020111083984, + "learning_rate": 1.6853314404313275e-06, + "loss": 13.0347, + "step": 378810 + }, + { + "epoch": 0.7652403673283047, + "grad_norm": 579.7806396484375, + "learning_rate": 1.6850701105524236e-06, + "loss": 34.897, + "step": 378820 + }, + { + "epoch": 0.7652605679609885, + "grad_norm": 232.5875701904297, + "learning_rate": 1.6848087968300848e-06, + "loss": 16.0784, + "step": 378830 + }, + { + "epoch": 0.7652807685936723, + "grad_norm": 61.81998825073242, + "learning_rate": 1.684547499265587e-06, + "loss": 18.8138, + "step": 378840 + }, + { + "epoch": 0.7653009692263562, + "grad_norm": 482.0547180175781, + "learning_rate": 1.6842862178602026e-06, + "loss": 12.2082, + "step": 378850 + }, + { + "epoch": 0.76532116985904, + "grad_norm": 55.65410614013672, + "learning_rate": 1.6840249526152036e-06, + "loss": 13.8418, + "step": 378860 + }, + { + "epoch": 0.7653413704917238, + "grad_norm": 249.66575622558594, + "learning_rate": 1.6837637035318643e-06, + "loss": 14.0964, + "step": 378870 + }, + { + "epoch": 0.7653615711244076, + "grad_norm": 510.0167541503906, + "learning_rate": 1.6835024706114605e-06, + "loss": 25.2258, + "step": 378880 + }, + { + "epoch": 0.7653817717570914, + "grad_norm": 573.1727905273438, + "learning_rate": 1.6832412538552634e-06, + "loss": 12.4214, + "step": 378890 + }, + { + "epoch": 0.7654019723897753, + "grad_norm": 451.53851318359375, + "learning_rate": 1.6829800532645447e-06, + "loss": 6.745, + "step": 378900 + }, + { + "epoch": 0.7654221730224591, + "grad_norm": 620.3411865234375, + "learning_rate": 1.6827188688405805e-06, + "loss": 31.8535, + "step": 378910 + }, + { + "epoch": 0.7654423736551429, + "grad_norm": 703.267578125, + "learning_rate": 1.6824577005846421e-06, + "loss": 17.4161, + "step": 378920 + }, + { + "epoch": 0.7654625742878267, + "grad_norm": 76.35845184326172, + "learning_rate": 1.6821965484980007e-06, + "loss": 15.2875, + "step": 378930 + }, + { + "epoch": 0.7654827749205105, + "grad_norm": 672.7890014648438, + "learning_rate": 1.6819354125819327e-06, + "loss": 19.937, + "step": 378940 + }, + { + "epoch": 0.7655029755531944, + "grad_norm": 501.7395324707031, + "learning_rate": 1.6816742928377072e-06, + "loss": 8.8874, + "step": 378950 + }, + { + "epoch": 0.7655231761858782, + "grad_norm": 189.4306182861328, + "learning_rate": 1.6814131892666009e-06, + "loss": 13.2997, + "step": 378960 + }, + { + "epoch": 0.765543376818562, + "grad_norm": 522.3626098632812, + "learning_rate": 1.6811521018698824e-06, + "loss": 25.9449, + "step": 378970 + }, + { + "epoch": 0.7655635774512458, + "grad_norm": 445.63043212890625, + "learning_rate": 1.6808910306488274e-06, + "loss": 15.6408, + "step": 378980 + }, + { + "epoch": 0.7655837780839296, + "grad_norm": 310.6912536621094, + "learning_rate": 1.6806299756047068e-06, + "loss": 17.2769, + "step": 378990 + }, + { + "epoch": 0.7656039787166135, + "grad_norm": 781.8789672851562, + "learning_rate": 1.680368936738792e-06, + "loss": 12.3257, + "step": 379000 + }, + { + "epoch": 0.7656241793492973, + "grad_norm": 924.9375610351562, + "learning_rate": 1.680107914052358e-06, + "loss": 24.7111, + "step": 379010 + }, + { + "epoch": 0.765644379981981, + "grad_norm": 133.7320098876953, + "learning_rate": 1.6798469075466734e-06, + "loss": 12.1958, + "step": 379020 + }, + { + "epoch": 0.7656645806146648, + "grad_norm": 1912.49951171875, + "learning_rate": 1.6795859172230139e-06, + "loss": 40.6826, + "step": 379030 + }, + { + "epoch": 0.7656847812473486, + "grad_norm": 36.367393493652344, + "learning_rate": 1.6793249430826502e-06, + "loss": 22.006, + "step": 379040 + }, + { + "epoch": 0.7657049818800324, + "grad_norm": 163.1468963623047, + "learning_rate": 1.679063985126852e-06, + "loss": 15.4548, + "step": 379050 + }, + { + "epoch": 0.7657251825127163, + "grad_norm": 444.4681396484375, + "learning_rate": 1.6788030433568953e-06, + "loss": 10.0607, + "step": 379060 + }, + { + "epoch": 0.7657453831454001, + "grad_norm": 319.3157653808594, + "learning_rate": 1.678542117774049e-06, + "loss": 14.2482, + "step": 379070 + }, + { + "epoch": 0.7657655837780839, + "grad_norm": 244.03370666503906, + "learning_rate": 1.6782812083795846e-06, + "loss": 34.7048, + "step": 379080 + }, + { + "epoch": 0.7657857844107677, + "grad_norm": 257.67388916015625, + "learning_rate": 1.6780203151747742e-06, + "loss": 8.6153, + "step": 379090 + }, + { + "epoch": 0.7658059850434515, + "grad_norm": 389.9267883300781, + "learning_rate": 1.6777594381608936e-06, + "loss": 26.8459, + "step": 379100 + }, + { + "epoch": 0.7658261856761354, + "grad_norm": 391.3517150878906, + "learning_rate": 1.6774985773392071e-06, + "loss": 23.6509, + "step": 379110 + }, + { + "epoch": 0.7658463863088192, + "grad_norm": 363.4808349609375, + "learning_rate": 1.6772377327109896e-06, + "loss": 11.718, + "step": 379120 + }, + { + "epoch": 0.765866586941503, + "grad_norm": 535.1600341796875, + "learning_rate": 1.6769769042775141e-06, + "loss": 20.8023, + "step": 379130 + }, + { + "epoch": 0.7658867875741868, + "grad_norm": 458.2337646484375, + "learning_rate": 1.67671609204005e-06, + "loss": 25.0361, + "step": 379140 + }, + { + "epoch": 0.7659069882068706, + "grad_norm": 545.01904296875, + "learning_rate": 1.676455295999867e-06, + "loss": 15.2905, + "step": 379150 + }, + { + "epoch": 0.7659271888395545, + "grad_norm": 502.02215576171875, + "learning_rate": 1.6761945161582382e-06, + "loss": 22.2201, + "step": 379160 + }, + { + "epoch": 0.7659473894722383, + "grad_norm": 293.74493408203125, + "learning_rate": 1.675933752516437e-06, + "loss": 24.9691, + "step": 379170 + }, + { + "epoch": 0.7659675901049221, + "grad_norm": 410.0480041503906, + "learning_rate": 1.675673005075728e-06, + "loss": 18.7837, + "step": 379180 + }, + { + "epoch": 0.7659877907376059, + "grad_norm": 381.4063720703125, + "learning_rate": 1.6754122738373863e-06, + "loss": 31.3284, + "step": 379190 + }, + { + "epoch": 0.7660079913702897, + "grad_norm": 576.6478881835938, + "learning_rate": 1.6751515588026828e-06, + "loss": 17.7734, + "step": 379200 + }, + { + "epoch": 0.7660281920029736, + "grad_norm": 307.06597900390625, + "learning_rate": 1.674890859972888e-06, + "loss": 16.9801, + "step": 379210 + }, + { + "epoch": 0.7660483926356574, + "grad_norm": 795.3487548828125, + "learning_rate": 1.6746301773492701e-06, + "loss": 23.2835, + "step": 379220 + }, + { + "epoch": 0.7660685932683412, + "grad_norm": 300.02984619140625, + "learning_rate": 1.6743695109331027e-06, + "loss": 12.2691, + "step": 379230 + }, + { + "epoch": 0.766088793901025, + "grad_norm": 522.2974243164062, + "learning_rate": 1.6741088607256556e-06, + "loss": 25.5154, + "step": 379240 + }, + { + "epoch": 0.7661089945337088, + "grad_norm": 554.88232421875, + "learning_rate": 1.6738482267281963e-06, + "loss": 22.2439, + "step": 379250 + }, + { + "epoch": 0.7661291951663927, + "grad_norm": 266.3728332519531, + "learning_rate": 1.6735876089419973e-06, + "loss": 16.9184, + "step": 379260 + }, + { + "epoch": 0.7661493957990765, + "grad_norm": 210.74923706054688, + "learning_rate": 1.6733270073683305e-06, + "loss": 14.001, + "step": 379270 + }, + { + "epoch": 0.7661695964317602, + "grad_norm": 94.46199798583984, + "learning_rate": 1.6730664220084647e-06, + "loss": 16.1184, + "step": 379280 + }, + { + "epoch": 0.766189797064444, + "grad_norm": 274.54150390625, + "learning_rate": 1.6728058528636682e-06, + "loss": 15.9896, + "step": 379290 + }, + { + "epoch": 0.7662099976971278, + "grad_norm": 341.462646484375, + "learning_rate": 1.6725452999352137e-06, + "loss": 35.1971, + "step": 379300 + }, + { + "epoch": 0.7662301983298117, + "grad_norm": 339.0533142089844, + "learning_rate": 1.6722847632243699e-06, + "loss": 11.7799, + "step": 379310 + }, + { + "epoch": 0.7662503989624955, + "grad_norm": 208.22216796875, + "learning_rate": 1.6720242427324045e-06, + "loss": 14.5783, + "step": 379320 + }, + { + "epoch": 0.7662705995951793, + "grad_norm": 232.9193572998047, + "learning_rate": 1.6717637384605917e-06, + "loss": 11.5119, + "step": 379330 + }, + { + "epoch": 0.7662908002278631, + "grad_norm": 208.89015197753906, + "learning_rate": 1.6715032504101968e-06, + "loss": 10.4471, + "step": 379340 + }, + { + "epoch": 0.7663110008605469, + "grad_norm": 184.51535034179688, + "learning_rate": 1.671242778582493e-06, + "loss": 22.6559, + "step": 379350 + }, + { + "epoch": 0.7663312014932308, + "grad_norm": 313.58050537109375, + "learning_rate": 1.670982322978748e-06, + "loss": 9.7866, + "step": 379360 + }, + { + "epoch": 0.7663514021259146, + "grad_norm": 938.3388671875, + "learning_rate": 1.6707218836002298e-06, + "loss": 19.2327, + "step": 379370 + }, + { + "epoch": 0.7663716027585984, + "grad_norm": 487.086181640625, + "learning_rate": 1.6704614604482112e-06, + "loss": 18.2963, + "step": 379380 + }, + { + "epoch": 0.7663918033912822, + "grad_norm": 617.3723754882812, + "learning_rate": 1.67020105352396e-06, + "loss": 23.3614, + "step": 379390 + }, + { + "epoch": 0.766412004023966, + "grad_norm": 363.2884826660156, + "learning_rate": 1.6699406628287423e-06, + "loss": 15.7168, + "step": 379400 + }, + { + "epoch": 0.7664322046566499, + "grad_norm": 303.956298828125, + "learning_rate": 1.6696802883638309e-06, + "loss": 23.3308, + "step": 379410 + }, + { + "epoch": 0.7664524052893337, + "grad_norm": 553.9864501953125, + "learning_rate": 1.6694199301304947e-06, + "loss": 12.3935, + "step": 379420 + }, + { + "epoch": 0.7664726059220175, + "grad_norm": 535.25537109375, + "learning_rate": 1.6691595881300026e-06, + "loss": 18.6105, + "step": 379430 + }, + { + "epoch": 0.7664928065547013, + "grad_norm": 86.4391098022461, + "learning_rate": 1.6688992623636201e-06, + "loss": 25.8113, + "step": 379440 + }, + { + "epoch": 0.7665130071873851, + "grad_norm": 404.76690673828125, + "learning_rate": 1.6686389528326214e-06, + "loss": 16.7689, + "step": 379450 + }, + { + "epoch": 0.766533207820069, + "grad_norm": 226.09596252441406, + "learning_rate": 1.6683786595382716e-06, + "loss": 15.676, + "step": 379460 + }, + { + "epoch": 0.7665534084527528, + "grad_norm": 346.31243896484375, + "learning_rate": 1.6681183824818387e-06, + "loss": 8.5206, + "step": 379470 + }, + { + "epoch": 0.7665736090854366, + "grad_norm": 447.7514343261719, + "learning_rate": 1.6678581216645928e-06, + "loss": 17.0264, + "step": 379480 + }, + { + "epoch": 0.7665938097181204, + "grad_norm": 532.4463500976562, + "learning_rate": 1.6675978770878055e-06, + "loss": 19.6064, + "step": 379490 + }, + { + "epoch": 0.7666140103508042, + "grad_norm": 216.76478576660156, + "learning_rate": 1.6673376487527382e-06, + "loss": 19.8939, + "step": 379500 + }, + { + "epoch": 0.766634210983488, + "grad_norm": 447.941162109375, + "learning_rate": 1.6670774366606635e-06, + "loss": 12.3378, + "step": 379510 + }, + { + "epoch": 0.7666544116161719, + "grad_norm": 737.269775390625, + "learning_rate": 1.6668172408128509e-06, + "loss": 18.2964, + "step": 379520 + }, + { + "epoch": 0.7666746122488556, + "grad_norm": 260.05401611328125, + "learning_rate": 1.6665570612105663e-06, + "loss": 9.79, + "step": 379530 + }, + { + "epoch": 0.7666948128815394, + "grad_norm": 428.9274597167969, + "learning_rate": 1.666296897855077e-06, + "loss": 16.6628, + "step": 379540 + }, + { + "epoch": 0.7667150135142232, + "grad_norm": 343.9048767089844, + "learning_rate": 1.6660367507476539e-06, + "loss": 19.8294, + "step": 379550 + }, + { + "epoch": 0.766735214146907, + "grad_norm": 892.6444702148438, + "learning_rate": 1.665776619889562e-06, + "loss": 28.0487, + "step": 379560 + }, + { + "epoch": 0.7667554147795909, + "grad_norm": 508.3505554199219, + "learning_rate": 1.6655165052820715e-06, + "loss": 9.3646, + "step": 379570 + }, + { + "epoch": 0.7667756154122747, + "grad_norm": 148.50218200683594, + "learning_rate": 1.6652564069264476e-06, + "loss": 9.8464, + "step": 379580 + }, + { + "epoch": 0.7667958160449585, + "grad_norm": 116.51488494873047, + "learning_rate": 1.6649963248239614e-06, + "loss": 26.0876, + "step": 379590 + }, + { + "epoch": 0.7668160166776423, + "grad_norm": 708.1141357421875, + "learning_rate": 1.6647362589758787e-06, + "loss": 28.6497, + "step": 379600 + }, + { + "epoch": 0.7668362173103261, + "grad_norm": 371.03057861328125, + "learning_rate": 1.6644762093834648e-06, + "loss": 11.6496, + "step": 379610 + }, + { + "epoch": 0.76685641794301, + "grad_norm": 274.0343017578125, + "learning_rate": 1.6642161760479913e-06, + "loss": 14.4446, + "step": 379620 + }, + { + "epoch": 0.7668766185756938, + "grad_norm": 322.36651611328125, + "learning_rate": 1.663956158970722e-06, + "loss": 16.6299, + "step": 379630 + }, + { + "epoch": 0.7668968192083776, + "grad_norm": 183.26901245117188, + "learning_rate": 1.6636961581529277e-06, + "loss": 17.6881, + "step": 379640 + }, + { + "epoch": 0.7669170198410614, + "grad_norm": 788.52392578125, + "learning_rate": 1.6634361735958731e-06, + "loss": 17.3198, + "step": 379650 + }, + { + "epoch": 0.7669372204737452, + "grad_norm": 446.07098388671875, + "learning_rate": 1.6631762053008244e-06, + "loss": 15.1415, + "step": 379660 + }, + { + "epoch": 0.7669574211064291, + "grad_norm": 570.2052001953125, + "learning_rate": 1.6629162532690517e-06, + "loss": 32.3281, + "step": 379670 + }, + { + "epoch": 0.7669776217391129, + "grad_norm": 355.68701171875, + "learning_rate": 1.6626563175018207e-06, + "loss": 10.5786, + "step": 379680 + }, + { + "epoch": 0.7669978223717967, + "grad_norm": 441.5621032714844, + "learning_rate": 1.6623963980003966e-06, + "loss": 16.6183, + "step": 379690 + }, + { + "epoch": 0.7670180230044805, + "grad_norm": 197.07032775878906, + "learning_rate": 1.6621364947660472e-06, + "loss": 12.0965, + "step": 379700 + }, + { + "epoch": 0.7670382236371643, + "grad_norm": 339.8711242675781, + "learning_rate": 1.661876607800043e-06, + "loss": 39.5871, + "step": 379710 + }, + { + "epoch": 0.7670584242698482, + "grad_norm": 241.8334197998047, + "learning_rate": 1.6616167371036446e-06, + "loss": 15.9962, + "step": 379720 + }, + { + "epoch": 0.767078624902532, + "grad_norm": 411.3114013671875, + "learning_rate": 1.6613568826781208e-06, + "loss": 22.2917, + "step": 379730 + }, + { + "epoch": 0.7670988255352158, + "grad_norm": 161.6168975830078, + "learning_rate": 1.6610970445247404e-06, + "loss": 27.7665, + "step": 379740 + }, + { + "epoch": 0.7671190261678996, + "grad_norm": 987.0467529296875, + "learning_rate": 1.6608372226447678e-06, + "loss": 20.4386, + "step": 379750 + }, + { + "epoch": 0.7671392268005834, + "grad_norm": 280.5501708984375, + "learning_rate": 1.6605774170394683e-06, + "loss": 24.2737, + "step": 379760 + }, + { + "epoch": 0.7671594274332673, + "grad_norm": 765.9490966796875, + "learning_rate": 1.6603176277101095e-06, + "loss": 24.2523, + "step": 379770 + }, + { + "epoch": 0.7671796280659511, + "grad_norm": 178.56707763671875, + "learning_rate": 1.6600578546579604e-06, + "loss": 19.8372, + "step": 379780 + }, + { + "epoch": 0.7671998286986348, + "grad_norm": 148.49171447753906, + "learning_rate": 1.6597980978842814e-06, + "loss": 23.115, + "step": 379790 + }, + { + "epoch": 0.7672200293313186, + "grad_norm": 148.5966033935547, + "learning_rate": 1.6595383573903412e-06, + "loss": 18.8011, + "step": 379800 + }, + { + "epoch": 0.7672402299640024, + "grad_norm": 203.92120361328125, + "learning_rate": 1.6592786331774075e-06, + "loss": 17.3271, + "step": 379810 + }, + { + "epoch": 0.7672604305966862, + "grad_norm": 636.1489868164062, + "learning_rate": 1.6590189252467448e-06, + "loss": 9.946, + "step": 379820 + }, + { + "epoch": 0.7672806312293701, + "grad_norm": 320.523193359375, + "learning_rate": 1.6587592335996167e-06, + "loss": 14.2966, + "step": 379830 + }, + { + "epoch": 0.7673008318620539, + "grad_norm": 268.8244934082031, + "learning_rate": 1.6584995582372926e-06, + "loss": 24.7064, + "step": 379840 + }, + { + "epoch": 0.7673210324947377, + "grad_norm": 533.8139038085938, + "learning_rate": 1.658239899161036e-06, + "loss": 10.481, + "step": 379850 + }, + { + "epoch": 0.7673412331274215, + "grad_norm": 655.6635131835938, + "learning_rate": 1.6579802563721115e-06, + "loss": 16.7972, + "step": 379860 + }, + { + "epoch": 0.7673614337601053, + "grad_norm": 763.1399536132812, + "learning_rate": 1.6577206298717862e-06, + "loss": 31.9017, + "step": 379870 + }, + { + "epoch": 0.7673816343927892, + "grad_norm": 176.9679718017578, + "learning_rate": 1.657461019661326e-06, + "loss": 24.1936, + "step": 379880 + }, + { + "epoch": 0.767401835025473, + "grad_norm": 108.568115234375, + "learning_rate": 1.657201425741996e-06, + "loss": 12.1127, + "step": 379890 + }, + { + "epoch": 0.7674220356581568, + "grad_norm": 214.27066040039062, + "learning_rate": 1.6569418481150596e-06, + "loss": 14.8013, + "step": 379900 + }, + { + "epoch": 0.7674422362908406, + "grad_norm": 370.8332824707031, + "learning_rate": 1.656682286781784e-06, + "loss": 33.4163, + "step": 379910 + }, + { + "epoch": 0.7674624369235244, + "grad_norm": 223.8822021484375, + "learning_rate": 1.6564227417434336e-06, + "loss": 16.8652, + "step": 379920 + }, + { + "epoch": 0.7674826375562083, + "grad_norm": 254.4141082763672, + "learning_rate": 1.6561632130012716e-06, + "loss": 15.5237, + "step": 379930 + }, + { + "epoch": 0.7675028381888921, + "grad_norm": 177.2603302001953, + "learning_rate": 1.6559037005565665e-06, + "loss": 17.4667, + "step": 379940 + }, + { + "epoch": 0.7675230388215759, + "grad_norm": 701.9732666015625, + "learning_rate": 1.6556442044105797e-06, + "loss": 18.3868, + "step": 379950 + }, + { + "epoch": 0.7675432394542597, + "grad_norm": 306.0867614746094, + "learning_rate": 1.6553847245645787e-06, + "loss": 19.5346, + "step": 379960 + }, + { + "epoch": 0.7675634400869435, + "grad_norm": 286.0669250488281, + "learning_rate": 1.6551252610198266e-06, + "loss": 18.5203, + "step": 379970 + }, + { + "epoch": 0.7675836407196274, + "grad_norm": 222.2674102783203, + "learning_rate": 1.6548658137775868e-06, + "loss": 22.9019, + "step": 379980 + }, + { + "epoch": 0.7676038413523112, + "grad_norm": 333.3114013671875, + "learning_rate": 1.6546063828391272e-06, + "loss": 22.5526, + "step": 379990 + }, + { + "epoch": 0.767624041984995, + "grad_norm": 362.8835144042969, + "learning_rate": 1.6543469682057105e-06, + "loss": 11.9528, + "step": 380000 + }, + { + "epoch": 0.7676442426176788, + "grad_norm": 427.2572326660156, + "learning_rate": 1.654087569878599e-06, + "loss": 21.9404, + "step": 380010 + }, + { + "epoch": 0.7676644432503626, + "grad_norm": 424.8553466796875, + "learning_rate": 1.653828187859059e-06, + "loss": 12.2714, + "step": 380020 + }, + { + "epoch": 0.7676846438830465, + "grad_norm": 209.9966278076172, + "learning_rate": 1.6535688221483558e-06, + "loss": 28.7189, + "step": 380030 + }, + { + "epoch": 0.7677048445157302, + "grad_norm": 169.88417053222656, + "learning_rate": 1.6533094727477528e-06, + "loss": 12.2698, + "step": 380040 + }, + { + "epoch": 0.767725045148414, + "grad_norm": 308.2015075683594, + "learning_rate": 1.653050139658512e-06, + "loss": 35.0971, + "step": 380050 + }, + { + "epoch": 0.7677452457810978, + "grad_norm": 8.098689079284668, + "learning_rate": 1.6527908228819e-06, + "loss": 10.1755, + "step": 380060 + }, + { + "epoch": 0.7677654464137816, + "grad_norm": 228.56390380859375, + "learning_rate": 1.6525315224191795e-06, + "loss": 16.6736, + "step": 380070 + }, + { + "epoch": 0.7677856470464655, + "grad_norm": 865.73388671875, + "learning_rate": 1.6522722382716133e-06, + "loss": 10.5984, + "step": 380080 + }, + { + "epoch": 0.7678058476791493, + "grad_norm": 351.1080322265625, + "learning_rate": 1.6520129704404659e-06, + "loss": 23.2793, + "step": 380090 + }, + { + "epoch": 0.7678260483118331, + "grad_norm": 366.80743408203125, + "learning_rate": 1.6517537189270043e-06, + "loss": 19.1603, + "step": 380100 + }, + { + "epoch": 0.7678462489445169, + "grad_norm": 536.4520874023438, + "learning_rate": 1.651494483732486e-06, + "loss": 27.78, + "step": 380110 + }, + { + "epoch": 0.7678664495772007, + "grad_norm": 366.5112609863281, + "learning_rate": 1.651235264858177e-06, + "loss": 16.3296, + "step": 380120 + }, + { + "epoch": 0.7678866502098846, + "grad_norm": 89.81643676757812, + "learning_rate": 1.6509760623053435e-06, + "loss": 20.7632, + "step": 380130 + }, + { + "epoch": 0.7679068508425684, + "grad_norm": 779.8565063476562, + "learning_rate": 1.6507168760752457e-06, + "loss": 33.6351, + "step": 380140 + }, + { + "epoch": 0.7679270514752522, + "grad_norm": 1.249252438545227, + "learning_rate": 1.6504577061691468e-06, + "loss": 11.8843, + "step": 380150 + }, + { + "epoch": 0.767947252107936, + "grad_norm": 348.9931640625, + "learning_rate": 1.650198552588312e-06, + "loss": 20.5949, + "step": 380160 + }, + { + "epoch": 0.7679674527406198, + "grad_norm": 340.8260803222656, + "learning_rate": 1.649939415334003e-06, + "loss": 6.7435, + "step": 380170 + }, + { + "epoch": 0.7679876533733037, + "grad_norm": 313.790771484375, + "learning_rate": 1.6496802944074814e-06, + "loss": 19.5737, + "step": 380180 + }, + { + "epoch": 0.7680078540059875, + "grad_norm": 6.209090709686279, + "learning_rate": 1.649421189810012e-06, + "loss": 19.2419, + "step": 380190 + }, + { + "epoch": 0.7680280546386713, + "grad_norm": 490.5793151855469, + "learning_rate": 1.6491621015428588e-06, + "loss": 17.369, + "step": 380200 + }, + { + "epoch": 0.7680482552713551, + "grad_norm": 500.19744873046875, + "learning_rate": 1.6489030296072833e-06, + "loss": 12.8155, + "step": 380210 + }, + { + "epoch": 0.7680684559040389, + "grad_norm": 331.2500305175781, + "learning_rate": 1.6486439740045463e-06, + "loss": 20.4228, + "step": 380220 + }, + { + "epoch": 0.7680886565367228, + "grad_norm": 141.11647033691406, + "learning_rate": 1.6483849347359132e-06, + "loss": 17.5376, + "step": 380230 + }, + { + "epoch": 0.7681088571694066, + "grad_norm": 797.0460815429688, + "learning_rate": 1.6481259118026444e-06, + "loss": 21.028, + "step": 380240 + }, + { + "epoch": 0.7681290578020904, + "grad_norm": 476.4696960449219, + "learning_rate": 1.6478669052060048e-06, + "loss": 16.271, + "step": 380250 + }, + { + "epoch": 0.7681492584347742, + "grad_norm": 290.9534606933594, + "learning_rate": 1.6476079149472552e-06, + "loss": 12.3833, + "step": 380260 + }, + { + "epoch": 0.768169459067458, + "grad_norm": 324.8664245605469, + "learning_rate": 1.6473489410276565e-06, + "loss": 30.8366, + "step": 380270 + }, + { + "epoch": 0.7681896597001419, + "grad_norm": 280.6238098144531, + "learning_rate": 1.6470899834484744e-06, + "loss": 9.0797, + "step": 380280 + }, + { + "epoch": 0.7682098603328257, + "grad_norm": 107.61328887939453, + "learning_rate": 1.6468310422109684e-06, + "loss": 8.9617, + "step": 380290 + }, + { + "epoch": 0.7682300609655094, + "grad_norm": 29.13918113708496, + "learning_rate": 1.6465721173164e-06, + "loss": 20.003, + "step": 380300 + }, + { + "epoch": 0.7682502615981932, + "grad_norm": 337.44024658203125, + "learning_rate": 1.6463132087660327e-06, + "loss": 9.134, + "step": 380310 + }, + { + "epoch": 0.768270462230877, + "grad_norm": 230.2394256591797, + "learning_rate": 1.6460543165611291e-06, + "loss": 15.5104, + "step": 380320 + }, + { + "epoch": 0.7682906628635608, + "grad_norm": 584.089111328125, + "learning_rate": 1.6457954407029503e-06, + "loss": 19.6054, + "step": 380330 + }, + { + "epoch": 0.7683108634962447, + "grad_norm": 259.95806884765625, + "learning_rate": 1.6455365811927559e-06, + "loss": 8.7815, + "step": 380340 + }, + { + "epoch": 0.7683310641289285, + "grad_norm": 358.2709045410156, + "learning_rate": 1.6452777380318112e-06, + "loss": 16.9212, + "step": 380350 + }, + { + "epoch": 0.7683512647616123, + "grad_norm": 525.129638671875, + "learning_rate": 1.645018911221376e-06, + "loss": 17.4771, + "step": 380360 + }, + { + "epoch": 0.7683714653942961, + "grad_norm": 358.9599914550781, + "learning_rate": 1.6447601007627096e-06, + "loss": 14.5307, + "step": 380370 + }, + { + "epoch": 0.76839166602698, + "grad_norm": 914.122802734375, + "learning_rate": 1.6445013066570758e-06, + "loss": 12.692, + "step": 380380 + }, + { + "epoch": 0.7684118666596638, + "grad_norm": 220.31948852539062, + "learning_rate": 1.644242528905739e-06, + "loss": 8.3485, + "step": 380390 + }, + { + "epoch": 0.7684320672923476, + "grad_norm": 612.853515625, + "learning_rate": 1.643983767509954e-06, + "loss": 10.4811, + "step": 380400 + }, + { + "epoch": 0.7684522679250314, + "grad_norm": 105.07130432128906, + "learning_rate": 1.6437250224709844e-06, + "loss": 12.8862, + "step": 380410 + }, + { + "epoch": 0.7684724685577152, + "grad_norm": 395.0177307128906, + "learning_rate": 1.6434662937900942e-06, + "loss": 17.6527, + "step": 380420 + }, + { + "epoch": 0.768492669190399, + "grad_norm": 437.62164306640625, + "learning_rate": 1.6432075814685423e-06, + "loss": 24.6856, + "step": 380430 + }, + { + "epoch": 0.7685128698230829, + "grad_norm": 233.72462463378906, + "learning_rate": 1.6429488855075876e-06, + "loss": 18.4488, + "step": 380440 + }, + { + "epoch": 0.7685330704557667, + "grad_norm": 281.45330810546875, + "learning_rate": 1.6426902059084942e-06, + "loss": 16.5913, + "step": 380450 + }, + { + "epoch": 0.7685532710884505, + "grad_norm": 188.50828552246094, + "learning_rate": 1.6424315426725224e-06, + "loss": 12.7214, + "step": 380460 + }, + { + "epoch": 0.7685734717211343, + "grad_norm": 336.333740234375, + "learning_rate": 1.6421728958009298e-06, + "loss": 22.2692, + "step": 380470 + }, + { + "epoch": 0.7685936723538181, + "grad_norm": 510.2742919921875, + "learning_rate": 1.6419142652949793e-06, + "loss": 18.998, + "step": 380480 + }, + { + "epoch": 0.768613872986502, + "grad_norm": 558.2223510742188, + "learning_rate": 1.6416556511559329e-06, + "loss": 10.7758, + "step": 380490 + }, + { + "epoch": 0.7686340736191858, + "grad_norm": 375.12060546875, + "learning_rate": 1.6413970533850498e-06, + "loss": 14.4149, + "step": 380500 + }, + { + "epoch": 0.7686542742518696, + "grad_norm": 0.0005101134884171188, + "learning_rate": 1.6411384719835882e-06, + "loss": 12.5642, + "step": 380510 + }, + { + "epoch": 0.7686744748845534, + "grad_norm": 486.0578918457031, + "learning_rate": 1.6408799069528119e-06, + "loss": 16.8116, + "step": 380520 + }, + { + "epoch": 0.7686946755172372, + "grad_norm": 0.0, + "learning_rate": 1.6406213582939801e-06, + "loss": 12.1538, + "step": 380530 + }, + { + "epoch": 0.7687148761499211, + "grad_norm": 258.78338623046875, + "learning_rate": 1.6403628260083503e-06, + "loss": 19.9973, + "step": 380540 + }, + { + "epoch": 0.7687350767826048, + "grad_norm": 405.3760070800781, + "learning_rate": 1.6401043100971864e-06, + "loss": 16.9124, + "step": 380550 + }, + { + "epoch": 0.7687552774152886, + "grad_norm": 446.0802917480469, + "learning_rate": 1.639845810561745e-06, + "loss": 12.1335, + "step": 380560 + }, + { + "epoch": 0.7687754780479724, + "grad_norm": 613.1295776367188, + "learning_rate": 1.6395873274032887e-06, + "loss": 12.5113, + "step": 380570 + }, + { + "epoch": 0.7687956786806562, + "grad_norm": 649.5859375, + "learning_rate": 1.6393288606230768e-06, + "loss": 29.3049, + "step": 380580 + }, + { + "epoch": 0.7688158793133401, + "grad_norm": 1147.98486328125, + "learning_rate": 1.6390704102223664e-06, + "loss": 31.9163, + "step": 380590 + }, + { + "epoch": 0.7688360799460239, + "grad_norm": 648.0134887695312, + "learning_rate": 1.6388119762024213e-06, + "loss": 20.687, + "step": 380600 + }, + { + "epoch": 0.7688562805787077, + "grad_norm": 368.980224609375, + "learning_rate": 1.6385535585644985e-06, + "loss": 14.665, + "step": 380610 + }, + { + "epoch": 0.7688764812113915, + "grad_norm": 139.56597900390625, + "learning_rate": 1.6382951573098565e-06, + "loss": 23.6349, + "step": 380620 + }, + { + "epoch": 0.7688966818440753, + "grad_norm": 420.3999938964844, + "learning_rate": 1.638036772439756e-06, + "loss": 15.8837, + "step": 380630 + }, + { + "epoch": 0.7689168824767592, + "grad_norm": 353.9872131347656, + "learning_rate": 1.6377784039554584e-06, + "loss": 20.016, + "step": 380640 + }, + { + "epoch": 0.768937083109443, + "grad_norm": 84.67881774902344, + "learning_rate": 1.6375200518582208e-06, + "loss": 24.0127, + "step": 380650 + }, + { + "epoch": 0.7689572837421268, + "grad_norm": 771.5448608398438, + "learning_rate": 1.6372617161493014e-06, + "loss": 15.1039, + "step": 380660 + }, + { + "epoch": 0.7689774843748106, + "grad_norm": 893.9058837890625, + "learning_rate": 1.637003396829962e-06, + "loss": 27.4231, + "step": 380670 + }, + { + "epoch": 0.7689976850074944, + "grad_norm": 719.2028198242188, + "learning_rate": 1.63674509390146e-06, + "loss": 34.8571, + "step": 380680 + }, + { + "epoch": 0.7690178856401783, + "grad_norm": 360.4425964355469, + "learning_rate": 1.636486807365053e-06, + "loss": 25.9847, + "step": 380690 + }, + { + "epoch": 0.7690380862728621, + "grad_norm": 483.6988220214844, + "learning_rate": 1.6362285372220016e-06, + "loss": 29.8262, + "step": 380700 + }, + { + "epoch": 0.7690582869055459, + "grad_norm": 199.2152099609375, + "learning_rate": 1.635970283473567e-06, + "loss": 16.3838, + "step": 380710 + }, + { + "epoch": 0.7690784875382297, + "grad_norm": 431.31427001953125, + "learning_rate": 1.6357120461210024e-06, + "loss": 21.6255, + "step": 380720 + }, + { + "epoch": 0.7690986881709135, + "grad_norm": 322.01214599609375, + "learning_rate": 1.6354538251655695e-06, + "loss": 15.2622, + "step": 380730 + }, + { + "epoch": 0.7691188888035974, + "grad_norm": 432.5312194824219, + "learning_rate": 1.6351956206085273e-06, + "loss": 19.7821, + "step": 380740 + }, + { + "epoch": 0.7691390894362812, + "grad_norm": 98.79090881347656, + "learning_rate": 1.6349374324511347e-06, + "loss": 16.8088, + "step": 380750 + }, + { + "epoch": 0.769159290068965, + "grad_norm": 501.28057861328125, + "learning_rate": 1.6346792606946466e-06, + "loss": 21.1545, + "step": 380760 + }, + { + "epoch": 0.7691794907016488, + "grad_norm": 439.8757629394531, + "learning_rate": 1.6344211053403253e-06, + "loss": 20.0453, + "step": 380770 + }, + { + "epoch": 0.7691996913343326, + "grad_norm": 334.0589599609375, + "learning_rate": 1.634162966389427e-06, + "loss": 12.4049, + "step": 380780 + }, + { + "epoch": 0.7692198919670165, + "grad_norm": 742.2249755859375, + "learning_rate": 1.6339048438432093e-06, + "loss": 22.8836, + "step": 380790 + }, + { + "epoch": 0.7692400925997003, + "grad_norm": 34.85905838012695, + "learning_rate": 1.6336467377029308e-06, + "loss": 12.9863, + "step": 380800 + }, + { + "epoch": 0.769260293232384, + "grad_norm": 215.7017822265625, + "learning_rate": 1.6333886479698514e-06, + "loss": 13.1861, + "step": 380810 + }, + { + "epoch": 0.7692804938650678, + "grad_norm": 849.7139892578125, + "learning_rate": 1.6331305746452276e-06, + "loss": 15.6368, + "step": 380820 + }, + { + "epoch": 0.7693006944977516, + "grad_norm": 118.31912231445312, + "learning_rate": 1.632872517730315e-06, + "loss": 16.8557, + "step": 380830 + }, + { + "epoch": 0.7693208951304354, + "grad_norm": 290.2739562988281, + "learning_rate": 1.6326144772263752e-06, + "loss": 22.0026, + "step": 380840 + }, + { + "epoch": 0.7693410957631193, + "grad_norm": 5.083776950836182, + "learning_rate": 1.6323564531346642e-06, + "loss": 9.5606, + "step": 380850 + }, + { + "epoch": 0.7693612963958031, + "grad_norm": 138.90684509277344, + "learning_rate": 1.6320984454564377e-06, + "loss": 21.7563, + "step": 380860 + }, + { + "epoch": 0.7693814970284869, + "grad_norm": 304.3311767578125, + "learning_rate": 1.6318404541929562e-06, + "loss": 22.3836, + "step": 380870 + }, + { + "epoch": 0.7694016976611707, + "grad_norm": 664.1461791992188, + "learning_rate": 1.6315824793454743e-06, + "loss": 23.9397, + "step": 380880 + }, + { + "epoch": 0.7694218982938545, + "grad_norm": 3.81365704536438, + "learning_rate": 1.631324520915253e-06, + "loss": 5.6078, + "step": 380890 + }, + { + "epoch": 0.7694420989265384, + "grad_norm": 230.58444213867188, + "learning_rate": 1.6310665789035468e-06, + "loss": 12.9682, + "step": 380900 + }, + { + "epoch": 0.7694622995592222, + "grad_norm": 395.6536560058594, + "learning_rate": 1.6308086533116125e-06, + "loss": 21.0332, + "step": 380910 + }, + { + "epoch": 0.769482500191906, + "grad_norm": 605.6498413085938, + "learning_rate": 1.6305507441407076e-06, + "loss": 34.0215, + "step": 380920 + }, + { + "epoch": 0.7695027008245898, + "grad_norm": 110.5271987915039, + "learning_rate": 1.6302928513920912e-06, + "loss": 17.813, + "step": 380930 + }, + { + "epoch": 0.7695229014572736, + "grad_norm": 1174.549560546875, + "learning_rate": 1.6300349750670191e-06, + "loss": 13.342, + "step": 380940 + }, + { + "epoch": 0.7695431020899575, + "grad_norm": 299.632568359375, + "learning_rate": 1.6297771151667462e-06, + "loss": 20.369, + "step": 380950 + }, + { + "epoch": 0.7695633027226413, + "grad_norm": 354.7998962402344, + "learning_rate": 1.6295192716925324e-06, + "loss": 17.0283, + "step": 380960 + }, + { + "epoch": 0.7695835033553251, + "grad_norm": 389.1010437011719, + "learning_rate": 1.6292614446456328e-06, + "loss": 13.1237, + "step": 380970 + }, + { + "epoch": 0.7696037039880089, + "grad_norm": 343.6322021484375, + "learning_rate": 1.629003634027303e-06, + "loss": 12.6781, + "step": 380980 + }, + { + "epoch": 0.7696239046206927, + "grad_norm": 459.3533630371094, + "learning_rate": 1.6287458398388001e-06, + "loss": 13.2048, + "step": 380990 + }, + { + "epoch": 0.7696441052533766, + "grad_norm": 340.8312072753906, + "learning_rate": 1.6284880620813847e-06, + "loss": 21.0156, + "step": 381000 + }, + { + "epoch": 0.7696643058860604, + "grad_norm": 93.18062591552734, + "learning_rate": 1.6282303007563061e-06, + "loss": 16.2557, + "step": 381010 + }, + { + "epoch": 0.7696845065187442, + "grad_norm": 328.4632568359375, + "learning_rate": 1.627972555864824e-06, + "loss": 11.7841, + "step": 381020 + }, + { + "epoch": 0.769704707151428, + "grad_norm": 454.50469970703125, + "learning_rate": 1.6277148274081962e-06, + "loss": 29.7739, + "step": 381030 + }, + { + "epoch": 0.7697249077841118, + "grad_norm": 40.51005172729492, + "learning_rate": 1.6274571153876777e-06, + "loss": 15.5865, + "step": 381040 + }, + { + "epoch": 0.7697451084167957, + "grad_norm": 316.1906433105469, + "learning_rate": 1.627199419804522e-06, + "loss": 11.8209, + "step": 381050 + }, + { + "epoch": 0.7697653090494795, + "grad_norm": 547.84765625, + "learning_rate": 1.6269417406599897e-06, + "loss": 17.5443, + "step": 381060 + }, + { + "epoch": 0.7697855096821632, + "grad_norm": 252.12637329101562, + "learning_rate": 1.626684077955334e-06, + "loss": 13.6713, + "step": 381070 + }, + { + "epoch": 0.769805710314847, + "grad_norm": 535.2305908203125, + "learning_rate": 1.6264264316918087e-06, + "loss": 21.1895, + "step": 381080 + }, + { + "epoch": 0.7698259109475308, + "grad_norm": 164.35350036621094, + "learning_rate": 1.6261688018706724e-06, + "loss": 8.5325, + "step": 381090 + }, + { + "epoch": 0.7698461115802147, + "grad_norm": 255.1110076904297, + "learning_rate": 1.6259111884931817e-06, + "loss": 18.8768, + "step": 381100 + }, + { + "epoch": 0.7698663122128985, + "grad_norm": 0.0, + "learning_rate": 1.6256535915605904e-06, + "loss": 9.0382, + "step": 381110 + }, + { + "epoch": 0.7698865128455823, + "grad_norm": 249.6778106689453, + "learning_rate": 1.6253960110741528e-06, + "loss": 17.7898, + "step": 381120 + }, + { + "epoch": 0.7699067134782661, + "grad_norm": 361.3498229980469, + "learning_rate": 1.6251384470351272e-06, + "loss": 21.6081, + "step": 381130 + }, + { + "epoch": 0.7699269141109499, + "grad_norm": 233.3412322998047, + "learning_rate": 1.6248808994447678e-06, + "loss": 17.5962, + "step": 381140 + }, + { + "epoch": 0.7699471147436338, + "grad_norm": 228.55316162109375, + "learning_rate": 1.6246233683043279e-06, + "loss": 17.8167, + "step": 381150 + }, + { + "epoch": 0.7699673153763176, + "grad_norm": 189.47850036621094, + "learning_rate": 1.6243658536150657e-06, + "loss": 15.8199, + "step": 381160 + }, + { + "epoch": 0.7699875160090014, + "grad_norm": 747.3308715820312, + "learning_rate": 1.6241083553782332e-06, + "loss": 17.8983, + "step": 381170 + }, + { + "epoch": 0.7700077166416852, + "grad_norm": 310.81427001953125, + "learning_rate": 1.6238508735950892e-06, + "loss": 19.3162, + "step": 381180 + }, + { + "epoch": 0.770027917274369, + "grad_norm": 432.24334716796875, + "learning_rate": 1.6235934082668863e-06, + "loss": 15.3876, + "step": 381190 + }, + { + "epoch": 0.7700481179070529, + "grad_norm": 334.8131103515625, + "learning_rate": 1.6233359593948777e-06, + "loss": 19.6963, + "step": 381200 + }, + { + "epoch": 0.7700683185397367, + "grad_norm": 135.47760009765625, + "learning_rate": 1.6230785269803223e-06, + "loss": 11.7463, + "step": 381210 + }, + { + "epoch": 0.7700885191724205, + "grad_norm": 184.69654846191406, + "learning_rate": 1.6228211110244725e-06, + "loss": 18.422, + "step": 381220 + }, + { + "epoch": 0.7701087198051043, + "grad_norm": 190.71502685546875, + "learning_rate": 1.6225637115285809e-06, + "loss": 19.1578, + "step": 381230 + }, + { + "epoch": 0.7701289204377881, + "grad_norm": 502.76531982421875, + "learning_rate": 1.6223063284939045e-06, + "loss": 24.0089, + "step": 381240 + }, + { + "epoch": 0.770149121070472, + "grad_norm": 576.7030029296875, + "learning_rate": 1.6220489619216988e-06, + "loss": 39.1933, + "step": 381250 + }, + { + "epoch": 0.7701693217031558, + "grad_norm": 834.5010986328125, + "learning_rate": 1.621791611813217e-06, + "loss": 19.1321, + "step": 381260 + }, + { + "epoch": 0.7701895223358396, + "grad_norm": 410.09454345703125, + "learning_rate": 1.6215342781697118e-06, + "loss": 24.6805, + "step": 381270 + }, + { + "epoch": 0.7702097229685234, + "grad_norm": 625.7750244140625, + "learning_rate": 1.62127696099244e-06, + "loss": 38.776, + "step": 381280 + }, + { + "epoch": 0.7702299236012072, + "grad_norm": 405.533935546875, + "learning_rate": 1.6210196602826544e-06, + "loss": 13.8247, + "step": 381290 + }, + { + "epoch": 0.770250124233891, + "grad_norm": 301.3874816894531, + "learning_rate": 1.6207623760416074e-06, + "loss": 24.191, + "step": 381300 + }, + { + "epoch": 0.7702703248665749, + "grad_norm": 414.2416687011719, + "learning_rate": 1.6205051082705553e-06, + "loss": 23.5549, + "step": 381310 + }, + { + "epoch": 0.7702905254992586, + "grad_norm": 305.6679992675781, + "learning_rate": 1.620247856970754e-06, + "loss": 14.1989, + "step": 381320 + }, + { + "epoch": 0.7703107261319424, + "grad_norm": 180.5402374267578, + "learning_rate": 1.6199906221434525e-06, + "loss": 11.7204, + "step": 381330 + }, + { + "epoch": 0.7703309267646262, + "grad_norm": 360.4750671386719, + "learning_rate": 1.619733403789906e-06, + "loss": 9.5748, + "step": 381340 + }, + { + "epoch": 0.77035112739731, + "grad_norm": 525.697021484375, + "learning_rate": 1.6194762019113703e-06, + "loss": 14.7872, + "step": 381350 + }, + { + "epoch": 0.7703713280299939, + "grad_norm": 315.2054748535156, + "learning_rate": 1.6192190165090982e-06, + "loss": 13.9068, + "step": 381360 + }, + { + "epoch": 0.7703915286626777, + "grad_norm": 762.55419921875, + "learning_rate": 1.6189618475843406e-06, + "loss": 15.1304, + "step": 381370 + }, + { + "epoch": 0.7704117292953615, + "grad_norm": 140.41734313964844, + "learning_rate": 1.618704695138353e-06, + "loss": 17.7757, + "step": 381380 + }, + { + "epoch": 0.7704319299280453, + "grad_norm": 732.903076171875, + "learning_rate": 1.618447559172392e-06, + "loss": 23.0551, + "step": 381390 + }, + { + "epoch": 0.7704521305607291, + "grad_norm": 18.876867294311523, + "learning_rate": 1.6181904396877041e-06, + "loss": 9.4914, + "step": 381400 + }, + { + "epoch": 0.770472331193413, + "grad_norm": 252.22230529785156, + "learning_rate": 1.6179333366855455e-06, + "loss": 24.7016, + "step": 381410 + }, + { + "epoch": 0.7704925318260968, + "grad_norm": 225.93679809570312, + "learning_rate": 1.6176762501671717e-06, + "loss": 17.5837, + "step": 381420 + }, + { + "epoch": 0.7705127324587806, + "grad_norm": 394.24365234375, + "learning_rate": 1.6174191801338334e-06, + "loss": 13.2283, + "step": 381430 + }, + { + "epoch": 0.7705329330914644, + "grad_norm": 106.54386138916016, + "learning_rate": 1.617162126586782e-06, + "loss": 17.2904, + "step": 381440 + }, + { + "epoch": 0.7705531337241482, + "grad_norm": 46.939537048339844, + "learning_rate": 1.6169050895272743e-06, + "loss": 19.5087, + "step": 381450 + }, + { + "epoch": 0.7705733343568321, + "grad_norm": 132.36495971679688, + "learning_rate": 1.61664806895656e-06, + "loss": 9.7853, + "step": 381460 + }, + { + "epoch": 0.7705935349895159, + "grad_norm": 78.6220932006836, + "learning_rate": 1.6163910648758919e-06, + "loss": 18.6722, + "step": 381470 + }, + { + "epoch": 0.7706137356221997, + "grad_norm": 182.31996154785156, + "learning_rate": 1.6161340772865247e-06, + "loss": 10.6614, + "step": 381480 + }, + { + "epoch": 0.7706339362548835, + "grad_norm": 625.3024291992188, + "learning_rate": 1.6158771061897077e-06, + "loss": 17.6466, + "step": 381490 + }, + { + "epoch": 0.7706541368875673, + "grad_norm": 312.5849609375, + "learning_rate": 1.6156201515866971e-06, + "loss": 16.7991, + "step": 381500 + }, + { + "epoch": 0.7706743375202512, + "grad_norm": 249.2038116455078, + "learning_rate": 1.6153632134787433e-06, + "loss": 36.6219, + "step": 381510 + }, + { + "epoch": 0.770694538152935, + "grad_norm": 327.1910095214844, + "learning_rate": 1.6151062918670973e-06, + "loss": 11.6358, + "step": 381520 + }, + { + "epoch": 0.7707147387856188, + "grad_norm": 197.6422119140625, + "learning_rate": 1.614849386753014e-06, + "loss": 15.3233, + "step": 381530 + }, + { + "epoch": 0.7707349394183026, + "grad_norm": 249.66806030273438, + "learning_rate": 1.6145924981377424e-06, + "loss": 7.9539, + "step": 381540 + }, + { + "epoch": 0.7707551400509864, + "grad_norm": 342.3058166503906, + "learning_rate": 1.6143356260225385e-06, + "loss": 14.0132, + "step": 381550 + }, + { + "epoch": 0.7707753406836703, + "grad_norm": 243.70208740234375, + "learning_rate": 1.6140787704086502e-06, + "loss": 18.7993, + "step": 381560 + }, + { + "epoch": 0.7707955413163541, + "grad_norm": 20.913881301879883, + "learning_rate": 1.6138219312973335e-06, + "loss": 19.2215, + "step": 381570 + }, + { + "epoch": 0.7708157419490378, + "grad_norm": 637.6466064453125, + "learning_rate": 1.6135651086898373e-06, + "loss": 21.3557, + "step": 381580 + }, + { + "epoch": 0.7708359425817216, + "grad_norm": 280.07177734375, + "learning_rate": 1.613308302587413e-06, + "loss": 19.861, + "step": 381590 + }, + { + "epoch": 0.7708561432144054, + "grad_norm": 442.05072021484375, + "learning_rate": 1.6130515129913144e-06, + "loss": 10.3194, + "step": 381600 + }, + { + "epoch": 0.7708763438470893, + "grad_norm": 229.8391876220703, + "learning_rate": 1.6127947399027922e-06, + "loss": 11.2043, + "step": 381610 + }, + { + "epoch": 0.7708965444797731, + "grad_norm": 403.2405700683594, + "learning_rate": 1.612537983323096e-06, + "loss": 22.6117, + "step": 381620 + }, + { + "epoch": 0.7709167451124569, + "grad_norm": 362.3747863769531, + "learning_rate": 1.6122812432534785e-06, + "loss": 22.793, + "step": 381630 + }, + { + "epoch": 0.7709369457451407, + "grad_norm": 135.65130615234375, + "learning_rate": 1.6120245196951933e-06, + "loss": 11.5768, + "step": 381640 + }, + { + "epoch": 0.7709571463778245, + "grad_norm": 551.9156494140625, + "learning_rate": 1.6117678126494895e-06, + "loss": 19.9291, + "step": 381650 + }, + { + "epoch": 0.7709773470105084, + "grad_norm": 398.4827575683594, + "learning_rate": 1.6115111221176171e-06, + "loss": 13.0153, + "step": 381660 + }, + { + "epoch": 0.7709975476431922, + "grad_norm": 250.5609588623047, + "learning_rate": 1.6112544481008296e-06, + "loss": 22.9284, + "step": 381670 + }, + { + "epoch": 0.771017748275876, + "grad_norm": 494.46868896484375, + "learning_rate": 1.6109977906003777e-06, + "loss": 22.3495, + "step": 381680 + }, + { + "epoch": 0.7710379489085598, + "grad_norm": 163.58030700683594, + "learning_rate": 1.610741149617509e-06, + "loss": 9.6822, + "step": 381690 + }, + { + "epoch": 0.7710581495412436, + "grad_norm": 190.7425537109375, + "learning_rate": 1.6104845251534772e-06, + "loss": 14.6259, + "step": 381700 + }, + { + "epoch": 0.7710783501739275, + "grad_norm": 387.62591552734375, + "learning_rate": 1.6102279172095348e-06, + "loss": 20.8189, + "step": 381710 + }, + { + "epoch": 0.7710985508066113, + "grad_norm": 175.18040466308594, + "learning_rate": 1.6099713257869303e-06, + "loss": 10.4614, + "step": 381720 + }, + { + "epoch": 0.7711187514392951, + "grad_norm": 746.3145141601562, + "learning_rate": 1.6097147508869127e-06, + "loss": 19.7552, + "step": 381730 + }, + { + "epoch": 0.7711389520719789, + "grad_norm": 204.37940979003906, + "learning_rate": 1.6094581925107356e-06, + "loss": 9.6557, + "step": 381740 + }, + { + "epoch": 0.7711591527046627, + "grad_norm": 1955.2236328125, + "learning_rate": 1.6092016506596481e-06, + "loss": 21.758, + "step": 381750 + }, + { + "epoch": 0.7711793533373466, + "grad_norm": 298.4073181152344, + "learning_rate": 1.6089451253348987e-06, + "loss": 16.5147, + "step": 381760 + }, + { + "epoch": 0.7711995539700304, + "grad_norm": 454.9640808105469, + "learning_rate": 1.6086886165377414e-06, + "loss": 16.8713, + "step": 381770 + }, + { + "epoch": 0.7712197546027142, + "grad_norm": 171.67300415039062, + "learning_rate": 1.608432124269423e-06, + "loss": 13.2809, + "step": 381780 + }, + { + "epoch": 0.771239955235398, + "grad_norm": 196.25167846679688, + "learning_rate": 1.608175648531196e-06, + "loss": 16.3012, + "step": 381790 + }, + { + "epoch": 0.7712601558680818, + "grad_norm": 339.40155029296875, + "learning_rate": 1.6079191893243102e-06, + "loss": 17.3038, + "step": 381800 + }, + { + "epoch": 0.7712803565007657, + "grad_norm": 143.8892059326172, + "learning_rate": 1.6076627466500127e-06, + "loss": 17.3237, + "step": 381810 + }, + { + "epoch": 0.7713005571334495, + "grad_norm": 389.0728454589844, + "learning_rate": 1.6074063205095576e-06, + "loss": 41.0029, + "step": 381820 + }, + { + "epoch": 0.7713207577661332, + "grad_norm": 1105.4664306640625, + "learning_rate": 1.607149910904191e-06, + "loss": 30.0587, + "step": 381830 + }, + { + "epoch": 0.771340958398817, + "grad_norm": 548.2081909179688, + "learning_rate": 1.6068935178351657e-06, + "loss": 11.7212, + "step": 381840 + }, + { + "epoch": 0.7713611590315008, + "grad_norm": 386.83685302734375, + "learning_rate": 1.6066371413037286e-06, + "loss": 22.2225, + "step": 381850 + }, + { + "epoch": 0.7713813596641846, + "grad_norm": 316.70318603515625, + "learning_rate": 1.6063807813111315e-06, + "loss": 29.086, + "step": 381860 + }, + { + "epoch": 0.7714015602968685, + "grad_norm": 367.0750427246094, + "learning_rate": 1.6061244378586233e-06, + "loss": 19.2121, + "step": 381870 + }, + { + "epoch": 0.7714217609295523, + "grad_norm": 345.0139465332031, + "learning_rate": 1.6058681109474511e-06, + "loss": 16.8491, + "step": 381880 + }, + { + "epoch": 0.7714419615622361, + "grad_norm": 375.4194030761719, + "learning_rate": 1.605611800578868e-06, + "loss": 19.4328, + "step": 381890 + }, + { + "epoch": 0.7714621621949199, + "grad_norm": 1006.6027221679688, + "learning_rate": 1.605355506754121e-06, + "loss": 20.4447, + "step": 381900 + }, + { + "epoch": 0.7714823628276037, + "grad_norm": 578.6243286132812, + "learning_rate": 1.6050992294744578e-06, + "loss": 29.9668, + "step": 381910 + }, + { + "epoch": 0.7715025634602876, + "grad_norm": 136.8943328857422, + "learning_rate": 1.6048429687411294e-06, + "loss": 11.5305, + "step": 381920 + }, + { + "epoch": 0.7715227640929714, + "grad_norm": 386.48602294921875, + "learning_rate": 1.604586724555388e-06, + "loss": 20.3794, + "step": 381930 + }, + { + "epoch": 0.7715429647256552, + "grad_norm": 41.99283218383789, + "learning_rate": 1.6043304969184752e-06, + "loss": 18.3726, + "step": 381940 + }, + { + "epoch": 0.771563165358339, + "grad_norm": 658.3291015625, + "learning_rate": 1.6040742858316443e-06, + "loss": 22.7599, + "step": 381950 + }, + { + "epoch": 0.7715833659910228, + "grad_norm": 555.6497802734375, + "learning_rate": 1.6038180912961455e-06, + "loss": 57.4267, + "step": 381960 + }, + { + "epoch": 0.7716035666237067, + "grad_norm": 280.7648620605469, + "learning_rate": 1.6035619133132247e-06, + "loss": 13.9776, + "step": 381970 + }, + { + "epoch": 0.7716237672563905, + "grad_norm": 430.380859375, + "learning_rate": 1.6033057518841294e-06, + "loss": 28.2771, + "step": 381980 + }, + { + "epoch": 0.7716439678890743, + "grad_norm": 596.4095458984375, + "learning_rate": 1.6030496070101104e-06, + "loss": 29.4352, + "step": 381990 + }, + { + "epoch": 0.7716641685217581, + "grad_norm": 219.3242645263672, + "learning_rate": 1.6027934786924187e-06, + "loss": 24.9992, + "step": 382000 + }, + { + "epoch": 0.7716843691544419, + "grad_norm": 126.5769271850586, + "learning_rate": 1.6025373669322963e-06, + "loss": 16.7645, + "step": 382010 + }, + { + "epoch": 0.7717045697871258, + "grad_norm": 350.49432373046875, + "learning_rate": 1.602281271730995e-06, + "loss": 7.0701, + "step": 382020 + }, + { + "epoch": 0.7717247704198096, + "grad_norm": 416.9051208496094, + "learning_rate": 1.602025193089764e-06, + "loss": 10.584, + "step": 382030 + }, + { + "epoch": 0.7717449710524934, + "grad_norm": 414.9043273925781, + "learning_rate": 1.60176913100985e-06, + "loss": 24.8061, + "step": 382040 + }, + { + "epoch": 0.7717651716851772, + "grad_norm": 322.49114990234375, + "learning_rate": 1.6015130854924999e-06, + "loss": 16.9898, + "step": 382050 + }, + { + "epoch": 0.771785372317861, + "grad_norm": 488.6578369140625, + "learning_rate": 1.601257056538964e-06, + "loss": 15.9868, + "step": 382060 + }, + { + "epoch": 0.7718055729505449, + "grad_norm": 138.75418090820312, + "learning_rate": 1.6010010441504891e-06, + "loss": 16.8504, + "step": 382070 + }, + { + "epoch": 0.7718257735832287, + "grad_norm": 693.740478515625, + "learning_rate": 1.6007450483283215e-06, + "loss": 25.4663, + "step": 382080 + }, + { + "epoch": 0.7718459742159124, + "grad_norm": 400.11407470703125, + "learning_rate": 1.6004890690737114e-06, + "loss": 26.5421, + "step": 382090 + }, + { + "epoch": 0.7718661748485962, + "grad_norm": 314.84747314453125, + "learning_rate": 1.600233106387904e-06, + "loss": 26.856, + "step": 382100 + }, + { + "epoch": 0.77188637548128, + "grad_norm": 316.89752197265625, + "learning_rate": 1.5999771602721499e-06, + "loss": 14.2165, + "step": 382110 + }, + { + "epoch": 0.7719065761139638, + "grad_norm": 109.03117370605469, + "learning_rate": 1.5997212307276943e-06, + "loss": 13.3175, + "step": 382120 + }, + { + "epoch": 0.7719267767466477, + "grad_norm": 645.1715698242188, + "learning_rate": 1.5994653177557833e-06, + "loss": 14.0254, + "step": 382130 + }, + { + "epoch": 0.7719469773793315, + "grad_norm": 871.7144775390625, + "learning_rate": 1.5992094213576682e-06, + "loss": 32.4052, + "step": 382140 + }, + { + "epoch": 0.7719671780120153, + "grad_norm": 479.74798583984375, + "learning_rate": 1.598953541534592e-06, + "loss": 24.3621, + "step": 382150 + }, + { + "epoch": 0.7719873786446991, + "grad_norm": 464.97760009765625, + "learning_rate": 1.598697678287805e-06, + "loss": 19.9426, + "step": 382160 + }, + { + "epoch": 0.772007579277383, + "grad_norm": 371.6749572753906, + "learning_rate": 1.5984418316185518e-06, + "loss": 16.0154, + "step": 382170 + }, + { + "epoch": 0.7720277799100668, + "grad_norm": 300.82476806640625, + "learning_rate": 1.598186001528082e-06, + "loss": 15.7633, + "step": 382180 + }, + { + "epoch": 0.7720479805427506, + "grad_norm": 1.938651442527771, + "learning_rate": 1.5979301880176407e-06, + "loss": 9.9504, + "step": 382190 + }, + { + "epoch": 0.7720681811754344, + "grad_norm": 88.23099517822266, + "learning_rate": 1.597674391088474e-06, + "loss": 13.4447, + "step": 382200 + }, + { + "epoch": 0.7720883818081182, + "grad_norm": 342.0703430175781, + "learning_rate": 1.597418610741831e-06, + "loss": 32.9549, + "step": 382210 + }, + { + "epoch": 0.772108582440802, + "grad_norm": 208.0787353515625, + "learning_rate": 1.5971628469789569e-06, + "loss": 11.0357, + "step": 382220 + }, + { + "epoch": 0.7721287830734859, + "grad_norm": 478.9021301269531, + "learning_rate": 1.5969070998010972e-06, + "loss": 16.869, + "step": 382230 + }, + { + "epoch": 0.7721489837061697, + "grad_norm": 797.6223754882812, + "learning_rate": 1.5966513692094992e-06, + "loss": 13.592, + "step": 382240 + }, + { + "epoch": 0.7721691843388535, + "grad_norm": 244.73097229003906, + "learning_rate": 1.596395655205411e-06, + "loss": 14.8758, + "step": 382250 + }, + { + "epoch": 0.7721893849715373, + "grad_norm": 2272.3330078125, + "learning_rate": 1.5961399577900783e-06, + "loss": 37.7566, + "step": 382260 + }, + { + "epoch": 0.7722095856042211, + "grad_norm": 439.3791809082031, + "learning_rate": 1.5958842769647442e-06, + "loss": 17.8415, + "step": 382270 + }, + { + "epoch": 0.772229786236905, + "grad_norm": 284.2250061035156, + "learning_rate": 1.5956286127306591e-06, + "loss": 17.0185, + "step": 382280 + }, + { + "epoch": 0.7722499868695888, + "grad_norm": 421.5446472167969, + "learning_rate": 1.5953729650890675e-06, + "loss": 16.382, + "step": 382290 + }, + { + "epoch": 0.7722701875022726, + "grad_norm": 405.8324279785156, + "learning_rate": 1.5951173340412134e-06, + "loss": 20.0054, + "step": 382300 + }, + { + "epoch": 0.7722903881349564, + "grad_norm": 276.3622741699219, + "learning_rate": 1.5948617195883448e-06, + "loss": 10.8424, + "step": 382310 + }, + { + "epoch": 0.7723105887676402, + "grad_norm": 543.3141479492188, + "learning_rate": 1.5946061217317082e-06, + "loss": 22.2879, + "step": 382320 + }, + { + "epoch": 0.7723307894003241, + "grad_norm": 1.156139612197876, + "learning_rate": 1.594350540472549e-06, + "loss": 17.0557, + "step": 382330 + }, + { + "epoch": 0.7723509900330079, + "grad_norm": 267.58477783203125, + "learning_rate": 1.59409497581211e-06, + "loss": 13.4345, + "step": 382340 + }, + { + "epoch": 0.7723711906656916, + "grad_norm": 72.86398315429688, + "learning_rate": 1.5938394277516412e-06, + "loss": 26.052, + "step": 382350 + }, + { + "epoch": 0.7723913912983754, + "grad_norm": 144.9212188720703, + "learning_rate": 1.5935838962923849e-06, + "loss": 13.0571, + "step": 382360 + }, + { + "epoch": 0.7724115919310592, + "grad_norm": 351.2877197265625, + "learning_rate": 1.5933283814355871e-06, + "loss": 22.0361, + "step": 382370 + }, + { + "epoch": 0.7724317925637431, + "grad_norm": 434.0843200683594, + "learning_rate": 1.5930728831824943e-06, + "loss": 13.3632, + "step": 382380 + }, + { + "epoch": 0.7724519931964269, + "grad_norm": 218.42372131347656, + "learning_rate": 1.59281740153435e-06, + "loss": 16.7192, + "step": 382390 + }, + { + "epoch": 0.7724721938291107, + "grad_norm": 431.15924072265625, + "learning_rate": 1.5925619364924016e-06, + "loss": 13.0559, + "step": 382400 + }, + { + "epoch": 0.7724923944617945, + "grad_norm": 329.8361511230469, + "learning_rate": 1.5923064880578937e-06, + "loss": 19.9663, + "step": 382410 + }, + { + "epoch": 0.7725125950944783, + "grad_norm": 403.38616943359375, + "learning_rate": 1.5920510562320685e-06, + "loss": 26.0965, + "step": 382420 + }, + { + "epoch": 0.7725327957271622, + "grad_norm": 388.4576416015625, + "learning_rate": 1.591795641016175e-06, + "loss": 9.0168, + "step": 382430 + }, + { + "epoch": 0.772552996359846, + "grad_norm": 1017.5739135742188, + "learning_rate": 1.5915402424114545e-06, + "loss": 29.5508, + "step": 382440 + }, + { + "epoch": 0.7725731969925298, + "grad_norm": 560.5558471679688, + "learning_rate": 1.591284860419155e-06, + "loss": 11.9871, + "step": 382450 + }, + { + "epoch": 0.7725933976252136, + "grad_norm": 292.2740173339844, + "learning_rate": 1.591029495040518e-06, + "loss": 25.0928, + "step": 382460 + }, + { + "epoch": 0.7726135982578974, + "grad_norm": 1157.902587890625, + "learning_rate": 1.5907741462767916e-06, + "loss": 22.0831, + "step": 382470 + }, + { + "epoch": 0.7726337988905813, + "grad_norm": 1321.1202392578125, + "learning_rate": 1.5905188141292182e-06, + "loss": 22.8953, + "step": 382480 + }, + { + "epoch": 0.7726539995232651, + "grad_norm": 271.35601806640625, + "learning_rate": 1.5902634985990412e-06, + "loss": 15.5145, + "step": 382490 + }, + { + "epoch": 0.7726742001559489, + "grad_norm": 388.1890563964844, + "learning_rate": 1.5900081996875083e-06, + "loss": 18.603, + "step": 382500 + }, + { + "epoch": 0.7726944007886327, + "grad_norm": 155.25045776367188, + "learning_rate": 1.5897529173958615e-06, + "loss": 19.8483, + "step": 382510 + }, + { + "epoch": 0.7727146014213165, + "grad_norm": 203.2943572998047, + "learning_rate": 1.5894976517253436e-06, + "loss": 44.4506, + "step": 382520 + }, + { + "epoch": 0.7727348020540004, + "grad_norm": 518.5247192382812, + "learning_rate": 1.5892424026772008e-06, + "loss": 30.9917, + "step": 382530 + }, + { + "epoch": 0.7727550026866842, + "grad_norm": 248.99508666992188, + "learning_rate": 1.5889871702526799e-06, + "loss": 20.7534, + "step": 382540 + }, + { + "epoch": 0.772775203319368, + "grad_norm": 134.00927734375, + "learning_rate": 1.5887319544530182e-06, + "loss": 19.9531, + "step": 382550 + }, + { + "epoch": 0.7727954039520518, + "grad_norm": 527.314697265625, + "learning_rate": 1.5884767552794639e-06, + "loss": 15.5636, + "step": 382560 + }, + { + "epoch": 0.7728156045847356, + "grad_norm": 302.09124755859375, + "learning_rate": 1.5882215727332618e-06, + "loss": 18.4815, + "step": 382570 + }, + { + "epoch": 0.7728358052174195, + "grad_norm": 298.60162353515625, + "learning_rate": 1.5879664068156535e-06, + "loss": 16.8549, + "step": 382580 + }, + { + "epoch": 0.7728560058501033, + "grad_norm": 423.8826599121094, + "learning_rate": 1.5877112575278819e-06, + "loss": 10.2502, + "step": 382590 + }, + { + "epoch": 0.772876206482787, + "grad_norm": 211.8774871826172, + "learning_rate": 1.587456124871191e-06, + "loss": 24.0772, + "step": 382600 + }, + { + "epoch": 0.7728964071154708, + "grad_norm": 162.92193603515625, + "learning_rate": 1.5872010088468293e-06, + "loss": 10.396, + "step": 382610 + }, + { + "epoch": 0.7729166077481546, + "grad_norm": 704.2095336914062, + "learning_rate": 1.586945909456032e-06, + "loss": 10.3541, + "step": 382620 + }, + { + "epoch": 0.7729368083808384, + "grad_norm": 1019.6995239257812, + "learning_rate": 1.5866908267000464e-06, + "loss": 20.1296, + "step": 382630 + }, + { + "epoch": 0.7729570090135223, + "grad_norm": 48.223819732666016, + "learning_rate": 1.586435760580118e-06, + "loss": 14.7224, + "step": 382640 + }, + { + "epoch": 0.7729772096462061, + "grad_norm": 319.72552490234375, + "learning_rate": 1.5861807110974869e-06, + "loss": 18.7333, + "step": 382650 + }, + { + "epoch": 0.7729974102788899, + "grad_norm": 380.0537109375, + "learning_rate": 1.585925678253396e-06, + "loss": 27.573, + "step": 382660 + }, + { + "epoch": 0.7730176109115737, + "grad_norm": 983.9434814453125, + "learning_rate": 1.5856706620490902e-06, + "loss": 22.5923, + "step": 382670 + }, + { + "epoch": 0.7730378115442575, + "grad_norm": 382.8108215332031, + "learning_rate": 1.5854156624858119e-06, + "loss": 21.1205, + "step": 382680 + }, + { + "epoch": 0.7730580121769414, + "grad_norm": 390.1148681640625, + "learning_rate": 1.5851606795648023e-06, + "loss": 18.5207, + "step": 382690 + }, + { + "epoch": 0.7730782128096252, + "grad_norm": 496.3921203613281, + "learning_rate": 1.5849057132873063e-06, + "loss": 13.3286, + "step": 382700 + }, + { + "epoch": 0.773098413442309, + "grad_norm": 454.15509033203125, + "learning_rate": 1.5846507636545645e-06, + "loss": 8.9551, + "step": 382710 + }, + { + "epoch": 0.7731186140749928, + "grad_norm": 617.4522094726562, + "learning_rate": 1.5843958306678219e-06, + "loss": 16.2409, + "step": 382720 + }, + { + "epoch": 0.7731388147076766, + "grad_norm": 58.4134407043457, + "learning_rate": 1.58414091432832e-06, + "loss": 11.9742, + "step": 382730 + }, + { + "epoch": 0.7731590153403605, + "grad_norm": 44.10279846191406, + "learning_rate": 1.5838860146372992e-06, + "loss": 12.1516, + "step": 382740 + }, + { + "epoch": 0.7731792159730443, + "grad_norm": 242.874755859375, + "learning_rate": 1.5836311315960051e-06, + "loss": 14.0989, + "step": 382750 + }, + { + "epoch": 0.7731994166057281, + "grad_norm": 386.6486511230469, + "learning_rate": 1.5833762652056773e-06, + "loss": 29.4466, + "step": 382760 + }, + { + "epoch": 0.7732196172384119, + "grad_norm": 244.56224060058594, + "learning_rate": 1.5831214154675605e-06, + "loss": 12.7304, + "step": 382770 + }, + { + "epoch": 0.7732398178710957, + "grad_norm": 321.93145751953125, + "learning_rate": 1.5828665823828943e-06, + "loss": 16.6896, + "step": 382780 + }, + { + "epoch": 0.7732600185037796, + "grad_norm": 521.9586181640625, + "learning_rate": 1.5826117659529234e-06, + "loss": 13.361, + "step": 382790 + }, + { + "epoch": 0.7732802191364634, + "grad_norm": 635.6339111328125, + "learning_rate": 1.582356966178888e-06, + "loss": 28.9973, + "step": 382800 + }, + { + "epoch": 0.7733004197691472, + "grad_norm": 109.14483642578125, + "learning_rate": 1.5821021830620287e-06, + "loss": 13.728, + "step": 382810 + }, + { + "epoch": 0.773320620401831, + "grad_norm": 430.0278625488281, + "learning_rate": 1.5818474166035907e-06, + "loss": 15.9968, + "step": 382820 + }, + { + "epoch": 0.7733408210345148, + "grad_norm": 169.47763061523438, + "learning_rate": 1.5815926668048138e-06, + "loss": 14.3138, + "step": 382830 + }, + { + "epoch": 0.7733610216671987, + "grad_norm": 941.049560546875, + "learning_rate": 1.5813379336669377e-06, + "loss": 18.2106, + "step": 382840 + }, + { + "epoch": 0.7733812222998825, + "grad_norm": 530.6898193359375, + "learning_rate": 1.581083217191206e-06, + "loss": 17.7321, + "step": 382850 + }, + { + "epoch": 0.7734014229325662, + "grad_norm": 179.96353149414062, + "learning_rate": 1.5808285173788617e-06, + "loss": 13.7731, + "step": 382860 + }, + { + "epoch": 0.77342162356525, + "grad_norm": 335.7576904296875, + "learning_rate": 1.5805738342311444e-06, + "loss": 9.9771, + "step": 382870 + }, + { + "epoch": 0.7734418241979338, + "grad_norm": 1528.3177490234375, + "learning_rate": 1.5803191677492941e-06, + "loss": 24.7554, + "step": 382880 + }, + { + "epoch": 0.7734620248306177, + "grad_norm": 286.49993896484375, + "learning_rate": 1.5800645179345548e-06, + "loss": 23.7924, + "step": 382890 + }, + { + "epoch": 0.7734822254633015, + "grad_norm": 299.359130859375, + "learning_rate": 1.5798098847881664e-06, + "loss": 24.5248, + "step": 382900 + }, + { + "epoch": 0.7735024260959853, + "grad_norm": 167.763427734375, + "learning_rate": 1.5795552683113679e-06, + "loss": 20.9832, + "step": 382910 + }, + { + "epoch": 0.7735226267286691, + "grad_norm": 340.8807067871094, + "learning_rate": 1.579300668505403e-06, + "loss": 19.0091, + "step": 382920 + }, + { + "epoch": 0.7735428273613529, + "grad_norm": 749.7567749023438, + "learning_rate": 1.5790460853715123e-06, + "loss": 21.8559, + "step": 382930 + }, + { + "epoch": 0.7735630279940368, + "grad_norm": 55.97519302368164, + "learning_rate": 1.578791518910937e-06, + "loss": 12.544, + "step": 382940 + }, + { + "epoch": 0.7735832286267206, + "grad_norm": 268.6002197265625, + "learning_rate": 1.5785369691249147e-06, + "loss": 40.3558, + "step": 382950 + }, + { + "epoch": 0.7736034292594044, + "grad_norm": 138.2102508544922, + "learning_rate": 1.5782824360146897e-06, + "loss": 39.5704, + "step": 382960 + }, + { + "epoch": 0.7736236298920882, + "grad_norm": 119.10359954833984, + "learning_rate": 1.5780279195815018e-06, + "loss": 18.7883, + "step": 382970 + }, + { + "epoch": 0.773643830524772, + "grad_norm": 506.84320068359375, + "learning_rate": 1.5777734198265887e-06, + "loss": 22.4223, + "step": 382980 + }, + { + "epoch": 0.7736640311574559, + "grad_norm": 374.88555908203125, + "learning_rate": 1.5775189367511946e-06, + "loss": 15.4941, + "step": 382990 + }, + { + "epoch": 0.7736842317901397, + "grad_norm": 389.50445556640625, + "learning_rate": 1.5772644703565564e-06, + "loss": 12.4242, + "step": 383000 + }, + { + "epoch": 0.7737044324228235, + "grad_norm": 238.9445037841797, + "learning_rate": 1.5770100206439177e-06, + "loss": 17.647, + "step": 383010 + }, + { + "epoch": 0.7737246330555073, + "grad_norm": 309.0486145019531, + "learning_rate": 1.576755587614517e-06, + "loss": 28.6423, + "step": 383020 + }, + { + "epoch": 0.7737448336881911, + "grad_norm": 172.6853485107422, + "learning_rate": 1.5765011712695928e-06, + "loss": 11.7171, + "step": 383030 + }, + { + "epoch": 0.773765034320875, + "grad_norm": 702.5298461914062, + "learning_rate": 1.5762467716103884e-06, + "loss": 21.4869, + "step": 383040 + }, + { + "epoch": 0.7737852349535588, + "grad_norm": 339.43353271484375, + "learning_rate": 1.5759923886381402e-06, + "loss": 19.1555, + "step": 383050 + }, + { + "epoch": 0.7738054355862426, + "grad_norm": 338.2257995605469, + "learning_rate": 1.5757380223540914e-06, + "loss": 20.7537, + "step": 383060 + }, + { + "epoch": 0.7738256362189264, + "grad_norm": 78.8567886352539, + "learning_rate": 1.5754836727594786e-06, + "loss": 11.621, + "step": 383070 + }, + { + "epoch": 0.7738458368516102, + "grad_norm": 433.5685119628906, + "learning_rate": 1.5752293398555446e-06, + "loss": 14.2845, + "step": 383080 + }, + { + "epoch": 0.7738660374842941, + "grad_norm": 183.4577178955078, + "learning_rate": 1.5749750236435279e-06, + "loss": 11.6864, + "step": 383090 + }, + { + "epoch": 0.7738862381169779, + "grad_norm": 1075.549072265625, + "learning_rate": 1.5747207241246654e-06, + "loss": 23.7103, + "step": 383100 + }, + { + "epoch": 0.7739064387496616, + "grad_norm": 190.39039611816406, + "learning_rate": 1.5744664413002004e-06, + "loss": 12.5373, + "step": 383110 + }, + { + "epoch": 0.7739266393823454, + "grad_norm": 687.258056640625, + "learning_rate": 1.5742121751713708e-06, + "loss": 20.1562, + "step": 383120 + }, + { + "epoch": 0.7739468400150292, + "grad_norm": 233.72378540039062, + "learning_rate": 1.5739579257394132e-06, + "loss": 18.9227, + "step": 383130 + }, + { + "epoch": 0.773967040647713, + "grad_norm": 2.5841612815856934, + "learning_rate": 1.57370369300557e-06, + "loss": 14.079, + "step": 383140 + }, + { + "epoch": 0.7739872412803969, + "grad_norm": 424.8268737792969, + "learning_rate": 1.5734494769710817e-06, + "loss": 12.2895, + "step": 383150 + }, + { + "epoch": 0.7740074419130807, + "grad_norm": 736.3742065429688, + "learning_rate": 1.5731952776371828e-06, + "loss": 15.886, + "step": 383160 + }, + { + "epoch": 0.7740276425457645, + "grad_norm": 557.7459716796875, + "learning_rate": 1.5729410950051139e-06, + "loss": 19.9959, + "step": 383170 + }, + { + "epoch": 0.7740478431784483, + "grad_norm": 377.92144775390625, + "learning_rate": 1.572686929076116e-06, + "loss": 19.8222, + "step": 383180 + }, + { + "epoch": 0.7740680438111321, + "grad_norm": 218.2261962890625, + "learning_rate": 1.5724327798514267e-06, + "loss": 22.5208, + "step": 383190 + }, + { + "epoch": 0.774088244443816, + "grad_norm": 151.34141540527344, + "learning_rate": 1.5721786473322825e-06, + "loss": 11.3797, + "step": 383200 + }, + { + "epoch": 0.7741084450764998, + "grad_norm": 483.9348449707031, + "learning_rate": 1.571924531519924e-06, + "loss": 20.1426, + "step": 383210 + }, + { + "epoch": 0.7741286457091836, + "grad_norm": 32.00010681152344, + "learning_rate": 1.571670432415593e-06, + "loss": 14.2402, + "step": 383220 + }, + { + "epoch": 0.7741488463418674, + "grad_norm": 31.30953598022461, + "learning_rate": 1.5714163500205203e-06, + "loss": 24.5089, + "step": 383230 + }, + { + "epoch": 0.7741690469745512, + "grad_norm": 777.2874145507812, + "learning_rate": 1.5711622843359492e-06, + "loss": 40.0035, + "step": 383240 + }, + { + "epoch": 0.7741892476072351, + "grad_norm": 375.6747131347656, + "learning_rate": 1.5709082353631188e-06, + "loss": 23.9119, + "step": 383250 + }, + { + "epoch": 0.7742094482399189, + "grad_norm": 434.52178955078125, + "learning_rate": 1.5706542031032663e-06, + "loss": 21.5206, + "step": 383260 + }, + { + "epoch": 0.7742296488726027, + "grad_norm": 461.241943359375, + "learning_rate": 1.570400187557627e-06, + "loss": 10.6992, + "step": 383270 + }, + { + "epoch": 0.7742498495052865, + "grad_norm": 490.8486022949219, + "learning_rate": 1.5701461887274428e-06, + "loss": 21.7874, + "step": 383280 + }, + { + "epoch": 0.7742700501379703, + "grad_norm": 435.8128662109375, + "learning_rate": 1.5698922066139505e-06, + "loss": 21.2518, + "step": 383290 + }, + { + "epoch": 0.7742902507706542, + "grad_norm": 376.1637878417969, + "learning_rate": 1.5696382412183853e-06, + "loss": 17.9095, + "step": 383300 + }, + { + "epoch": 0.774310451403338, + "grad_norm": 61.54003143310547, + "learning_rate": 1.5693842925419894e-06, + "loss": 17.7064, + "step": 383310 + }, + { + "epoch": 0.7743306520360218, + "grad_norm": 1618.9508056640625, + "learning_rate": 1.5691303605859964e-06, + "loss": 38.0497, + "step": 383320 + }, + { + "epoch": 0.7743508526687056, + "grad_norm": 594.95849609375, + "learning_rate": 1.568876445351648e-06, + "loss": 17.9375, + "step": 383330 + }, + { + "epoch": 0.7743710533013894, + "grad_norm": 187.2743377685547, + "learning_rate": 1.5686225468401795e-06, + "loss": 31.4732, + "step": 383340 + }, + { + "epoch": 0.7743912539340733, + "grad_norm": 350.3153381347656, + "learning_rate": 1.5683686650528267e-06, + "loss": 14.1288, + "step": 383350 + }, + { + "epoch": 0.7744114545667571, + "grad_norm": 310.48394775390625, + "learning_rate": 1.5681147999908308e-06, + "loss": 24.6163, + "step": 383360 + }, + { + "epoch": 0.7744316551994408, + "grad_norm": 636.476806640625, + "learning_rate": 1.567860951655425e-06, + "loss": 20.272, + "step": 383370 + }, + { + "epoch": 0.7744518558321246, + "grad_norm": 602.2848510742188, + "learning_rate": 1.5676071200478504e-06, + "loss": 30.9711, + "step": 383380 + }, + { + "epoch": 0.7744720564648084, + "grad_norm": 381.50213623046875, + "learning_rate": 1.5673533051693413e-06, + "loss": 18.0637, + "step": 383390 + }, + { + "epoch": 0.7744922570974923, + "grad_norm": 1670.898681640625, + "learning_rate": 1.567099507021137e-06, + "loss": 38.0073, + "step": 383400 + }, + { + "epoch": 0.7745124577301761, + "grad_norm": 166.84677124023438, + "learning_rate": 1.5668457256044733e-06, + "loss": 22.5448, + "step": 383410 + }, + { + "epoch": 0.7745326583628599, + "grad_norm": 5.103789806365967, + "learning_rate": 1.566591960920586e-06, + "loss": 23.9697, + "step": 383420 + }, + { + "epoch": 0.7745528589955437, + "grad_norm": 874.5416259765625, + "learning_rate": 1.5663382129707144e-06, + "loss": 24.9892, + "step": 383430 + }, + { + "epoch": 0.7745730596282275, + "grad_norm": 49.70535659790039, + "learning_rate": 1.5660844817560939e-06, + "loss": 12.3777, + "step": 383440 + }, + { + "epoch": 0.7745932602609114, + "grad_norm": 645.6976318359375, + "learning_rate": 1.5658307672779594e-06, + "loss": 17.1051, + "step": 383450 + }, + { + "epoch": 0.7746134608935952, + "grad_norm": 410.8448486328125, + "learning_rate": 1.5655770695375494e-06, + "loss": 10.888, + "step": 383460 + }, + { + "epoch": 0.774633661526279, + "grad_norm": 938.0996704101562, + "learning_rate": 1.5653233885361013e-06, + "loss": 24.6534, + "step": 383470 + }, + { + "epoch": 0.7746538621589628, + "grad_norm": 339.14263916015625, + "learning_rate": 1.5650697242748513e-06, + "loss": 15.8851, + "step": 383480 + }, + { + "epoch": 0.7746740627916466, + "grad_norm": 380.4107666015625, + "learning_rate": 1.5648160767550324e-06, + "loss": 16.784, + "step": 383490 + }, + { + "epoch": 0.7746942634243305, + "grad_norm": 259.8949890136719, + "learning_rate": 1.5645624459778858e-06, + "loss": 10.5215, + "step": 383500 + }, + { + "epoch": 0.7747144640570143, + "grad_norm": 1726.8370361328125, + "learning_rate": 1.5643088319446441e-06, + "loss": 26.8748, + "step": 383510 + }, + { + "epoch": 0.7747346646896981, + "grad_norm": 353.5360107421875, + "learning_rate": 1.5640552346565441e-06, + "loss": 24.7704, + "step": 383520 + }, + { + "epoch": 0.7747548653223819, + "grad_norm": 384.0005187988281, + "learning_rate": 1.563801654114821e-06, + "loss": 18.4352, + "step": 383530 + }, + { + "epoch": 0.7747750659550657, + "grad_norm": 416.81451416015625, + "learning_rate": 1.5635480903207139e-06, + "loss": 17.7697, + "step": 383540 + }, + { + "epoch": 0.7747952665877496, + "grad_norm": 829.3290405273438, + "learning_rate": 1.563294543275457e-06, + "loss": 21.636, + "step": 383550 + }, + { + "epoch": 0.7748154672204334, + "grad_norm": 925.5904541015625, + "learning_rate": 1.5630410129802837e-06, + "loss": 36.7419, + "step": 383560 + }, + { + "epoch": 0.7748356678531172, + "grad_norm": 170.02854919433594, + "learning_rate": 1.5627874994364335e-06, + "loss": 10.2682, + "step": 383570 + }, + { + "epoch": 0.774855868485801, + "grad_norm": 68.25321197509766, + "learning_rate": 1.5625340026451396e-06, + "loss": 12.093, + "step": 383580 + }, + { + "epoch": 0.7748760691184848, + "grad_norm": 423.32275390625, + "learning_rate": 1.562280522607637e-06, + "loss": 14.1, + "step": 383590 + }, + { + "epoch": 0.7748962697511687, + "grad_norm": 518.2012939453125, + "learning_rate": 1.5620270593251635e-06, + "loss": 18.0794, + "step": 383600 + }, + { + "epoch": 0.7749164703838525, + "grad_norm": 171.8751220703125, + "learning_rate": 1.561773612798952e-06, + "loss": 12.9444, + "step": 383610 + }, + { + "epoch": 0.7749366710165362, + "grad_norm": 553.6185302734375, + "learning_rate": 1.5615201830302402e-06, + "loss": 13.3761, + "step": 383620 + }, + { + "epoch": 0.77495687164922, + "grad_norm": 387.4228210449219, + "learning_rate": 1.5612667700202616e-06, + "loss": 17.978, + "step": 383630 + }, + { + "epoch": 0.7749770722819038, + "grad_norm": 317.31097412109375, + "learning_rate": 1.5610133737702503e-06, + "loss": 25.5296, + "step": 383640 + }, + { + "epoch": 0.7749972729145876, + "grad_norm": 90.6382827758789, + "learning_rate": 1.560759994281445e-06, + "loss": 13.9602, + "step": 383650 + }, + { + "epoch": 0.7750174735472715, + "grad_norm": 312.0516052246094, + "learning_rate": 1.5605066315550759e-06, + "loss": 13.112, + "step": 383660 + }, + { + "epoch": 0.7750376741799553, + "grad_norm": 305.4877624511719, + "learning_rate": 1.5602532855923824e-06, + "loss": 15.4547, + "step": 383670 + }, + { + "epoch": 0.7750578748126391, + "grad_norm": 603.0599975585938, + "learning_rate": 1.5599999563945955e-06, + "loss": 11.5011, + "step": 383680 + }, + { + "epoch": 0.7750780754453229, + "grad_norm": 338.0648498535156, + "learning_rate": 1.5597466439629532e-06, + "loss": 16.2301, + "step": 383690 + }, + { + "epoch": 0.7750982760780067, + "grad_norm": 606.4363403320312, + "learning_rate": 1.5594933482986885e-06, + "loss": 31.9384, + "step": 383700 + }, + { + "epoch": 0.7751184767106906, + "grad_norm": 1.41245436668396, + "learning_rate": 1.5592400694030342e-06, + "loss": 15.2708, + "step": 383710 + }, + { + "epoch": 0.7751386773433744, + "grad_norm": 676.3465576171875, + "learning_rate": 1.5589868072772279e-06, + "loss": 51.5045, + "step": 383720 + }, + { + "epoch": 0.7751588779760582, + "grad_norm": 7.440090179443359, + "learning_rate": 1.558733561922503e-06, + "loss": 7.3795, + "step": 383730 + }, + { + "epoch": 0.775179078608742, + "grad_norm": 759.7951049804688, + "learning_rate": 1.5584803333400917e-06, + "loss": 21.857, + "step": 383740 + }, + { + "epoch": 0.7751992792414258, + "grad_norm": 178.63719177246094, + "learning_rate": 1.5582271215312294e-06, + "loss": 15.4087, + "step": 383750 + }, + { + "epoch": 0.7752194798741097, + "grad_norm": 778.220947265625, + "learning_rate": 1.5579739264971544e-06, + "loss": 25.2869, + "step": 383760 + }, + { + "epoch": 0.7752396805067935, + "grad_norm": 338.1744079589844, + "learning_rate": 1.5577207482390933e-06, + "loss": 14.539, + "step": 383770 + }, + { + "epoch": 0.7752598811394773, + "grad_norm": 483.3032531738281, + "learning_rate": 1.5574675867582845e-06, + "loss": 17.1825, + "step": 383780 + }, + { + "epoch": 0.7752800817721611, + "grad_norm": 305.2230529785156, + "learning_rate": 1.557214442055962e-06, + "loss": 14.487, + "step": 383790 + }, + { + "epoch": 0.7753002824048449, + "grad_norm": 85.836669921875, + "learning_rate": 1.556961314133359e-06, + "loss": 16.7747, + "step": 383800 + }, + { + "epoch": 0.7753204830375288, + "grad_norm": 368.25970458984375, + "learning_rate": 1.5567082029917074e-06, + "loss": 12.5381, + "step": 383810 + }, + { + "epoch": 0.7753406836702126, + "grad_norm": 348.8280334472656, + "learning_rate": 1.5564551086322428e-06, + "loss": 21.189, + "step": 383820 + }, + { + "epoch": 0.7753608843028964, + "grad_norm": 331.0599365234375, + "learning_rate": 1.556202031056201e-06, + "loss": 8.9081, + "step": 383830 + }, + { + "epoch": 0.7753810849355802, + "grad_norm": 248.07205200195312, + "learning_rate": 1.5559489702648096e-06, + "loss": 18.0033, + "step": 383840 + }, + { + "epoch": 0.775401285568264, + "grad_norm": 461.08135986328125, + "learning_rate": 1.5556959262593058e-06, + "loss": 15.9705, + "step": 383850 + }, + { + "epoch": 0.7754214862009479, + "grad_norm": 298.7308044433594, + "learning_rate": 1.5554428990409232e-06, + "loss": 13.7456, + "step": 383860 + }, + { + "epoch": 0.7754416868336317, + "grad_norm": 529.7130737304688, + "learning_rate": 1.5551898886108947e-06, + "loss": 15.5866, + "step": 383870 + }, + { + "epoch": 0.7754618874663154, + "grad_norm": 293.4848327636719, + "learning_rate": 1.5549368949704507e-06, + "loss": 18.0192, + "step": 383880 + }, + { + "epoch": 0.7754820880989992, + "grad_norm": 474.0185852050781, + "learning_rate": 1.5546839181208284e-06, + "loss": 14.8301, + "step": 383890 + }, + { + "epoch": 0.775502288731683, + "grad_norm": 489.7498474121094, + "learning_rate": 1.554430958063259e-06, + "loss": 25.3866, + "step": 383900 + }, + { + "epoch": 0.7755224893643669, + "grad_norm": 499.2673645019531, + "learning_rate": 1.5541780147989733e-06, + "loss": 16.6086, + "step": 383910 + }, + { + "epoch": 0.7755426899970507, + "grad_norm": 118.5300521850586, + "learning_rate": 1.5539250883292078e-06, + "loss": 20.088, + "step": 383920 + }, + { + "epoch": 0.7755628906297345, + "grad_norm": 288.1347351074219, + "learning_rate": 1.5536721786551918e-06, + "loss": 25.9228, + "step": 383930 + }, + { + "epoch": 0.7755830912624183, + "grad_norm": 574.680419921875, + "learning_rate": 1.5534192857781611e-06, + "loss": 11.4823, + "step": 383940 + }, + { + "epoch": 0.7756032918951021, + "grad_norm": 302.4960632324219, + "learning_rate": 1.5531664096993454e-06, + "loss": 11.9024, + "step": 383950 + }, + { + "epoch": 0.775623492527786, + "grad_norm": 5.106573581695557, + "learning_rate": 1.55291355041998e-06, + "loss": 14.0364, + "step": 383960 + }, + { + "epoch": 0.7756436931604698, + "grad_norm": 287.447265625, + "learning_rate": 1.552660707941296e-06, + "loss": 12.1343, + "step": 383970 + }, + { + "epoch": 0.7756638937931536, + "grad_norm": 295.5054016113281, + "learning_rate": 1.552407882264524e-06, + "loss": 18.9641, + "step": 383980 + }, + { + "epoch": 0.7756840944258374, + "grad_norm": 318.2136535644531, + "learning_rate": 1.552155073390899e-06, + "loss": 33.7459, + "step": 383990 + }, + { + "epoch": 0.7757042950585212, + "grad_norm": 103.75553131103516, + "learning_rate": 1.551902281321651e-06, + "loss": 16.6785, + "step": 384000 + }, + { + "epoch": 0.775724495691205, + "grad_norm": 128.6370849609375, + "learning_rate": 1.5516495060580145e-06, + "loss": 12.6248, + "step": 384010 + }, + { + "epoch": 0.7757446963238889, + "grad_norm": 266.0093688964844, + "learning_rate": 1.5513967476012198e-06, + "loss": 22.2741, + "step": 384020 + }, + { + "epoch": 0.7757648969565727, + "grad_norm": 451.62847900390625, + "learning_rate": 1.551144005952498e-06, + "loss": 18.3263, + "step": 384030 + }, + { + "epoch": 0.7757850975892565, + "grad_norm": 306.1957092285156, + "learning_rate": 1.5508912811130832e-06, + "loss": 8.4984, + "step": 384040 + }, + { + "epoch": 0.7758052982219403, + "grad_norm": 294.24359130859375, + "learning_rate": 1.5506385730842062e-06, + "loss": 21.9452, + "step": 384050 + }, + { + "epoch": 0.7758254988546242, + "grad_norm": 214.49560546875, + "learning_rate": 1.5503858818670963e-06, + "loss": 11.1492, + "step": 384060 + }, + { + "epoch": 0.775845699487308, + "grad_norm": 338.5537109375, + "learning_rate": 1.5501332074629876e-06, + "loss": 24.4503, + "step": 384070 + }, + { + "epoch": 0.7758659001199918, + "grad_norm": 360.83349609375, + "learning_rate": 1.5498805498731146e-06, + "loss": 15.6625, + "step": 384080 + }, + { + "epoch": 0.7758861007526756, + "grad_norm": 462.2666931152344, + "learning_rate": 1.549627909098702e-06, + "loss": 19.1801, + "step": 384090 + }, + { + "epoch": 0.7759063013853594, + "grad_norm": 611.6724243164062, + "learning_rate": 1.5493752851409844e-06, + "loss": 31.4426, + "step": 384100 + }, + { + "epoch": 0.7759265020180433, + "grad_norm": 395.310546875, + "learning_rate": 1.5491226780011954e-06, + "loss": 37.8498, + "step": 384110 + }, + { + "epoch": 0.7759467026507271, + "grad_norm": 191.07583618164062, + "learning_rate": 1.548870087680563e-06, + "loss": 17.6473, + "step": 384120 + }, + { + "epoch": 0.7759669032834109, + "grad_norm": 781.8247680664062, + "learning_rate": 1.5486175141803177e-06, + "loss": 25.2453, + "step": 384130 + }, + { + "epoch": 0.7759871039160946, + "grad_norm": 204.58645629882812, + "learning_rate": 1.5483649575016929e-06, + "loss": 19.3628, + "step": 384140 + }, + { + "epoch": 0.7760073045487784, + "grad_norm": 261.19061279296875, + "learning_rate": 1.5481124176459195e-06, + "loss": 25.1665, + "step": 384150 + }, + { + "epoch": 0.7760275051814622, + "grad_norm": 277.91497802734375, + "learning_rate": 1.5478598946142277e-06, + "loss": 9.8039, + "step": 384160 + }, + { + "epoch": 0.7760477058141461, + "grad_norm": 472.0810241699219, + "learning_rate": 1.5476073884078463e-06, + "loss": 26.7061, + "step": 384170 + }, + { + "epoch": 0.7760679064468299, + "grad_norm": 609.7418212890625, + "learning_rate": 1.5473548990280097e-06, + "loss": 16.6024, + "step": 384180 + }, + { + "epoch": 0.7760881070795137, + "grad_norm": 554.5270385742188, + "learning_rate": 1.5471024264759466e-06, + "loss": 22.243, + "step": 384190 + }, + { + "epoch": 0.7761083077121975, + "grad_norm": 230.99478149414062, + "learning_rate": 1.5468499707528856e-06, + "loss": 13.1364, + "step": 384200 + }, + { + "epoch": 0.7761285083448813, + "grad_norm": 371.1297912597656, + "learning_rate": 1.5465975318600607e-06, + "loss": 21.091, + "step": 384210 + }, + { + "epoch": 0.7761487089775652, + "grad_norm": 273.14007568359375, + "learning_rate": 1.5463451097986993e-06, + "loss": 24.4958, + "step": 384220 + }, + { + "epoch": 0.776168909610249, + "grad_norm": 440.2649841308594, + "learning_rate": 1.5460927045700342e-06, + "loss": 26.5645, + "step": 384230 + }, + { + "epoch": 0.7761891102429328, + "grad_norm": 220.97445678710938, + "learning_rate": 1.5458403161752943e-06, + "loss": 18.2712, + "step": 384240 + }, + { + "epoch": 0.7762093108756166, + "grad_norm": 714.91845703125, + "learning_rate": 1.5455879446157084e-06, + "loss": 36.6765, + "step": 384250 + }, + { + "epoch": 0.7762295115083004, + "grad_norm": 690.603515625, + "learning_rate": 1.5453355898925094e-06, + "loss": 25.6641, + "step": 384260 + }, + { + "epoch": 0.7762497121409843, + "grad_norm": 524.8508911132812, + "learning_rate": 1.5450832520069241e-06, + "loss": 17.8045, + "step": 384270 + }, + { + "epoch": 0.7762699127736681, + "grad_norm": 715.39306640625, + "learning_rate": 1.5448309309601855e-06, + "loss": 26.0652, + "step": 384280 + }, + { + "epoch": 0.7762901134063519, + "grad_norm": 221.43211364746094, + "learning_rate": 1.5445786267535207e-06, + "loss": 12.6999, + "step": 384290 + }, + { + "epoch": 0.7763103140390357, + "grad_norm": 348.0744323730469, + "learning_rate": 1.5443263393881619e-06, + "loss": 19.6884, + "step": 384300 + }, + { + "epoch": 0.7763305146717195, + "grad_norm": 226.06593322753906, + "learning_rate": 1.5440740688653372e-06, + "loss": 17.7141, + "step": 384310 + }, + { + "epoch": 0.7763507153044034, + "grad_norm": 483.87689208984375, + "learning_rate": 1.543821815186275e-06, + "loss": 22.7634, + "step": 384320 + }, + { + "epoch": 0.7763709159370872, + "grad_norm": 778.535400390625, + "learning_rate": 1.5435695783522076e-06, + "loss": 24.2931, + "step": 384330 + }, + { + "epoch": 0.776391116569771, + "grad_norm": 217.14146423339844, + "learning_rate": 1.5433173583643628e-06, + "loss": 12.3832, + "step": 384340 + }, + { + "epoch": 0.7764113172024548, + "grad_norm": 433.1095886230469, + "learning_rate": 1.5430651552239684e-06, + "loss": 20.548, + "step": 384350 + }, + { + "epoch": 0.7764315178351386, + "grad_norm": 288.0685729980469, + "learning_rate": 1.5428129689322552e-06, + "loss": 20.827, + "step": 384360 + }, + { + "epoch": 0.7764517184678225, + "grad_norm": 240.14483642578125, + "learning_rate": 1.5425607994904552e-06, + "loss": 12.2251, + "step": 384370 + }, + { + "epoch": 0.7764719191005063, + "grad_norm": 120.36453247070312, + "learning_rate": 1.5423086468997917e-06, + "loss": 14.1546, + "step": 384380 + }, + { + "epoch": 0.77649211973319, + "grad_norm": 304.1021728515625, + "learning_rate": 1.5420565111614965e-06, + "loss": 17.2286, + "step": 384390 + }, + { + "epoch": 0.7765123203658738, + "grad_norm": 324.0277099609375, + "learning_rate": 1.5418043922768e-06, + "loss": 12.4004, + "step": 384400 + }, + { + "epoch": 0.7765325209985576, + "grad_norm": 408.1206359863281, + "learning_rate": 1.5415522902469293e-06, + "loss": 20.7216, + "step": 384410 + }, + { + "epoch": 0.7765527216312414, + "grad_norm": 309.0054931640625, + "learning_rate": 1.5413002050731118e-06, + "loss": 21.8756, + "step": 384420 + }, + { + "epoch": 0.7765729222639253, + "grad_norm": 208.31134033203125, + "learning_rate": 1.5410481367565777e-06, + "loss": 16.1911, + "step": 384430 + }, + { + "epoch": 0.7765931228966091, + "grad_norm": 348.1766052246094, + "learning_rate": 1.5407960852985582e-06, + "loss": 17.2752, + "step": 384440 + }, + { + "epoch": 0.7766133235292929, + "grad_norm": 481.5426025390625, + "learning_rate": 1.540544050700276e-06, + "loss": 7.244, + "step": 384450 + }, + { + "epoch": 0.7766335241619767, + "grad_norm": 551.3931274414062, + "learning_rate": 1.5402920329629627e-06, + "loss": 18.0558, + "step": 384460 + }, + { + "epoch": 0.7766537247946605, + "grad_norm": 240.8903350830078, + "learning_rate": 1.5400400320878484e-06, + "loss": 10.1459, + "step": 384470 + }, + { + "epoch": 0.7766739254273444, + "grad_norm": 313.7499084472656, + "learning_rate": 1.539788048076159e-06, + "loss": 20.1553, + "step": 384480 + }, + { + "epoch": 0.7766941260600282, + "grad_norm": 238.50909423828125, + "learning_rate": 1.539536080929121e-06, + "loss": 45.4269, + "step": 384490 + }, + { + "epoch": 0.776714326692712, + "grad_norm": 214.79942321777344, + "learning_rate": 1.5392841306479667e-06, + "loss": 5.5206, + "step": 384500 + }, + { + "epoch": 0.7767345273253958, + "grad_norm": 2.047757863998413, + "learning_rate": 1.539032197233921e-06, + "loss": 10.5082, + "step": 384510 + }, + { + "epoch": 0.7767547279580796, + "grad_norm": 163.90113830566406, + "learning_rate": 1.5387802806882118e-06, + "loss": 16.4249, + "step": 384520 + }, + { + "epoch": 0.7767749285907635, + "grad_norm": 77.99993133544922, + "learning_rate": 1.5385283810120688e-06, + "loss": 26.0398, + "step": 384530 + }, + { + "epoch": 0.7767951292234473, + "grad_norm": 662.6762084960938, + "learning_rate": 1.5382764982067172e-06, + "loss": 24.6799, + "step": 384540 + }, + { + "epoch": 0.7768153298561311, + "grad_norm": 535.010498046875, + "learning_rate": 1.5380246322733883e-06, + "loss": 43.2987, + "step": 384550 + }, + { + "epoch": 0.7768355304888149, + "grad_norm": 92.85782623291016, + "learning_rate": 1.5377727832133049e-06, + "loss": 16.2858, + "step": 384560 + }, + { + "epoch": 0.7768557311214987, + "grad_norm": 374.7579040527344, + "learning_rate": 1.537520951027699e-06, + "loss": 17.3287, + "step": 384570 + }, + { + "epoch": 0.7768759317541826, + "grad_norm": 161.86851501464844, + "learning_rate": 1.537269135717796e-06, + "loss": 16.0337, + "step": 384580 + }, + { + "epoch": 0.7768961323868664, + "grad_norm": 99.89845275878906, + "learning_rate": 1.5370173372848218e-06, + "loss": 8.8479, + "step": 384590 + }, + { + "epoch": 0.7769163330195502, + "grad_norm": 339.26947021484375, + "learning_rate": 1.5367655557300066e-06, + "loss": 10.8958, + "step": 384600 + }, + { + "epoch": 0.776936533652234, + "grad_norm": 823.4581298828125, + "learning_rate": 1.5365137910545747e-06, + "loss": 20.2525, + "step": 384610 + }, + { + "epoch": 0.7769567342849178, + "grad_norm": 163.99513244628906, + "learning_rate": 1.5362620432597559e-06, + "loss": 25.8464, + "step": 384620 + }, + { + "epoch": 0.7769769349176017, + "grad_norm": 422.450927734375, + "learning_rate": 1.5360103123467756e-06, + "loss": 31.5294, + "step": 384630 + }, + { + "epoch": 0.7769971355502855, + "grad_norm": 609.9090576171875, + "learning_rate": 1.5357585983168593e-06, + "loss": 32.4314, + "step": 384640 + }, + { + "epoch": 0.7770173361829692, + "grad_norm": 528.384765625, + "learning_rate": 1.5355069011712376e-06, + "loss": 18.6976, + "step": 384650 + }, + { + "epoch": 0.777037536815653, + "grad_norm": 425.56732177734375, + "learning_rate": 1.5352552209111344e-06, + "loss": 16.283, + "step": 384660 + }, + { + "epoch": 0.7770577374483368, + "grad_norm": 169.77854919433594, + "learning_rate": 1.535003557537776e-06, + "loss": 16.551, + "step": 384670 + }, + { + "epoch": 0.7770779380810207, + "grad_norm": 0.015986911952495575, + "learning_rate": 1.5347519110523895e-06, + "loss": 19.9182, + "step": 384680 + }, + { + "epoch": 0.7770981387137045, + "grad_norm": 245.8568572998047, + "learning_rate": 1.5345002814562055e-06, + "loss": 45.4147, + "step": 384690 + }, + { + "epoch": 0.7771183393463883, + "grad_norm": 205.8431854248047, + "learning_rate": 1.5342486687504432e-06, + "loss": 12.4595, + "step": 384700 + }, + { + "epoch": 0.7771385399790721, + "grad_norm": 4.2626519203186035, + "learning_rate": 1.533997072936333e-06, + "loss": 15.0956, + "step": 384710 + }, + { + "epoch": 0.7771587406117559, + "grad_norm": 430.7354431152344, + "learning_rate": 1.533745494015102e-06, + "loss": 13.0182, + "step": 384720 + }, + { + "epoch": 0.7771789412444398, + "grad_norm": 200.21856689453125, + "learning_rate": 1.533493931987975e-06, + "loss": 17.5419, + "step": 384730 + }, + { + "epoch": 0.7771991418771236, + "grad_norm": 34.58863067626953, + "learning_rate": 1.5332423868561769e-06, + "loss": 27.4933, + "step": 384740 + }, + { + "epoch": 0.7772193425098074, + "grad_norm": 474.97015380859375, + "learning_rate": 1.5329908586209347e-06, + "loss": 10.175, + "step": 384750 + }, + { + "epoch": 0.7772395431424912, + "grad_norm": 428.1702575683594, + "learning_rate": 1.5327393472834772e-06, + "loss": 19.8074, + "step": 384760 + }, + { + "epoch": 0.777259743775175, + "grad_norm": 457.3119812011719, + "learning_rate": 1.5324878528450254e-06, + "loss": 19.01, + "step": 384770 + }, + { + "epoch": 0.7772799444078589, + "grad_norm": 192.80319213867188, + "learning_rate": 1.5322363753068064e-06, + "loss": 16.6181, + "step": 384780 + }, + { + "epoch": 0.7773001450405427, + "grad_norm": 33.13753128051758, + "learning_rate": 1.5319849146700488e-06, + "loss": 23.758, + "step": 384790 + }, + { + "epoch": 0.7773203456732265, + "grad_norm": 576.9069213867188, + "learning_rate": 1.531733470935976e-06, + "loss": 11.0886, + "step": 384800 + }, + { + "epoch": 0.7773405463059103, + "grad_norm": 564.5203857421875, + "learning_rate": 1.531482044105812e-06, + "loss": 16.9974, + "step": 384810 + }, + { + "epoch": 0.7773607469385941, + "grad_norm": 279.1705322265625, + "learning_rate": 1.5312306341807858e-06, + "loss": 28.6219, + "step": 384820 + }, + { + "epoch": 0.777380947571278, + "grad_norm": 271.1581726074219, + "learning_rate": 1.5309792411621204e-06, + "loss": 26.1332, + "step": 384830 + }, + { + "epoch": 0.7774011482039618, + "grad_norm": 542.3341064453125, + "learning_rate": 1.5307278650510399e-06, + "loss": 18.0816, + "step": 384840 + }, + { + "epoch": 0.7774213488366456, + "grad_norm": 387.3022155761719, + "learning_rate": 1.5304765058487725e-06, + "loss": 13.1619, + "step": 384850 + }, + { + "epoch": 0.7774415494693294, + "grad_norm": 269.654541015625, + "learning_rate": 1.5302251635565401e-06, + "loss": 41.7115, + "step": 384860 + }, + { + "epoch": 0.7774617501020132, + "grad_norm": 140.29644775390625, + "learning_rate": 1.5299738381755712e-06, + "loss": 15.5364, + "step": 384870 + }, + { + "epoch": 0.7774819507346971, + "grad_norm": 170.4852294921875, + "learning_rate": 1.5297225297070879e-06, + "loss": 23.2477, + "step": 384880 + }, + { + "epoch": 0.7775021513673809, + "grad_norm": 316.4628601074219, + "learning_rate": 1.5294712381523168e-06, + "loss": 15.9037, + "step": 384890 + }, + { + "epoch": 0.7775223520000646, + "grad_norm": 744.5531616210938, + "learning_rate": 1.529219963512481e-06, + "loss": 12.7941, + "step": 384900 + }, + { + "epoch": 0.7775425526327484, + "grad_norm": 254.7901153564453, + "learning_rate": 1.5289687057888075e-06, + "loss": 15.0023, + "step": 384910 + }, + { + "epoch": 0.7775627532654322, + "grad_norm": 443.2149963378906, + "learning_rate": 1.5287174649825194e-06, + "loss": 22.0366, + "step": 384920 + }, + { + "epoch": 0.777582953898116, + "grad_norm": 307.50421142578125, + "learning_rate": 1.5284662410948398e-06, + "loss": 13.2912, + "step": 384930 + }, + { + "epoch": 0.7776031545307999, + "grad_norm": 322.15985107421875, + "learning_rate": 1.5282150341269964e-06, + "loss": 15.5182, + "step": 384940 + }, + { + "epoch": 0.7776233551634837, + "grad_norm": 413.22955322265625, + "learning_rate": 1.5279638440802118e-06, + "loss": 11.9651, + "step": 384950 + }, + { + "epoch": 0.7776435557961675, + "grad_norm": 682.2948608398438, + "learning_rate": 1.5277126709557088e-06, + "loss": 13.074, + "step": 384960 + }, + { + "epoch": 0.7776637564288513, + "grad_norm": 397.0071716308594, + "learning_rate": 1.5274615147547128e-06, + "loss": 19.5731, + "step": 384970 + }, + { + "epoch": 0.7776839570615351, + "grad_norm": 140.2388916015625, + "learning_rate": 1.5272103754784517e-06, + "loss": 32.3869, + "step": 384980 + }, + { + "epoch": 0.777704157694219, + "grad_norm": 374.5721435546875, + "learning_rate": 1.526959253128143e-06, + "loss": 12.7778, + "step": 384990 + }, + { + "epoch": 0.7777243583269028, + "grad_norm": 431.6618957519531, + "learning_rate": 1.5267081477050132e-06, + "loss": 21.2579, + "step": 385000 + }, + { + "epoch": 0.7777445589595866, + "grad_norm": 398.0082702636719, + "learning_rate": 1.5264570592102883e-06, + "loss": 21.9021, + "step": 385010 + }, + { + "epoch": 0.7777647595922704, + "grad_norm": 436.4931945800781, + "learning_rate": 1.5262059876451906e-06, + "loss": 22.1284, + "step": 385020 + }, + { + "epoch": 0.7777849602249542, + "grad_norm": 451.14666748046875, + "learning_rate": 1.5259549330109424e-06, + "loss": 12.1658, + "step": 385030 + }, + { + "epoch": 0.7778051608576381, + "grad_norm": 284.5849914550781, + "learning_rate": 1.5257038953087678e-06, + "loss": 16.0711, + "step": 385040 + }, + { + "epoch": 0.7778253614903219, + "grad_norm": 284.7397155761719, + "learning_rate": 1.5254528745398943e-06, + "loss": 20.7581, + "step": 385050 + }, + { + "epoch": 0.7778455621230057, + "grad_norm": 468.49114990234375, + "learning_rate": 1.5252018707055393e-06, + "loss": 21.8534, + "step": 385060 + }, + { + "epoch": 0.7778657627556895, + "grad_norm": 525.685791015625, + "learning_rate": 1.5249508838069287e-06, + "loss": 10.5381, + "step": 385070 + }, + { + "epoch": 0.7778859633883733, + "grad_norm": 178.24122619628906, + "learning_rate": 1.5246999138452878e-06, + "loss": 16.3678, + "step": 385080 + }, + { + "epoch": 0.7779061640210572, + "grad_norm": 438.45404052734375, + "learning_rate": 1.5244489608218376e-06, + "loss": 18.4635, + "step": 385090 + }, + { + "epoch": 0.777926364653741, + "grad_norm": 578.2142333984375, + "learning_rate": 1.5241980247378008e-06, + "loss": 23.6082, + "step": 385100 + }, + { + "epoch": 0.7779465652864248, + "grad_norm": 335.78887939453125, + "learning_rate": 1.5239471055944022e-06, + "loss": 13.7291, + "step": 385110 + }, + { + "epoch": 0.7779667659191086, + "grad_norm": 316.18280029296875, + "learning_rate": 1.5236962033928636e-06, + "loss": 21.4164, + "step": 385120 + }, + { + "epoch": 0.7779869665517924, + "grad_norm": 638.5242919921875, + "learning_rate": 1.5234453181344071e-06, + "loss": 23.4461, + "step": 385130 + }, + { + "epoch": 0.7780071671844763, + "grad_norm": 255.30441284179688, + "learning_rate": 1.5231944498202578e-06, + "loss": 18.7992, + "step": 385140 + }, + { + "epoch": 0.7780273678171601, + "grad_norm": 230.3295135498047, + "learning_rate": 1.5229435984516355e-06, + "loss": 24.4938, + "step": 385150 + }, + { + "epoch": 0.7780475684498438, + "grad_norm": 443.8564758300781, + "learning_rate": 1.5226927640297663e-06, + "loss": 35.4686, + "step": 385160 + }, + { + "epoch": 0.7780677690825276, + "grad_norm": 829.7617797851562, + "learning_rate": 1.5224419465558687e-06, + "loss": 18.2524, + "step": 385170 + }, + { + "epoch": 0.7780879697152114, + "grad_norm": 299.2265625, + "learning_rate": 1.522191146031169e-06, + "loss": 22.1634, + "step": 385180 + }, + { + "epoch": 0.7781081703478953, + "grad_norm": 668.59521484375, + "learning_rate": 1.521940362456888e-06, + "loss": 27.8544, + "step": 385190 + }, + { + "epoch": 0.7781283709805791, + "grad_norm": 44.44142150878906, + "learning_rate": 1.521689595834246e-06, + "loss": 30.3538, + "step": 385200 + }, + { + "epoch": 0.7781485716132629, + "grad_norm": 154.08029174804688, + "learning_rate": 1.521438846164469e-06, + "loss": 16.6562, + "step": 385210 + }, + { + "epoch": 0.7781687722459467, + "grad_norm": 655.61328125, + "learning_rate": 1.5211881134487755e-06, + "loss": 22.0776, + "step": 385220 + }, + { + "epoch": 0.7781889728786305, + "grad_norm": 195.63526916503906, + "learning_rate": 1.5209373976883906e-06, + "loss": 30.8979, + "step": 385230 + }, + { + "epoch": 0.7782091735113144, + "grad_norm": 293.49652099609375, + "learning_rate": 1.5206866988845348e-06, + "loss": 15.782, + "step": 385240 + }, + { + "epoch": 0.7782293741439982, + "grad_norm": 424.705078125, + "learning_rate": 1.5204360170384286e-06, + "loss": 14.5477, + "step": 385250 + }, + { + "epoch": 0.778249574776682, + "grad_norm": 2079.281982421875, + "learning_rate": 1.5201853521512967e-06, + "loss": 50.4745, + "step": 385260 + }, + { + "epoch": 0.7782697754093658, + "grad_norm": 310.99786376953125, + "learning_rate": 1.5199347042243595e-06, + "loss": 19.3026, + "step": 385270 + }, + { + "epoch": 0.7782899760420496, + "grad_norm": 245.80108642578125, + "learning_rate": 1.519684073258837e-06, + "loss": 23.6941, + "step": 385280 + }, + { + "epoch": 0.7783101766747335, + "grad_norm": 693.7467041015625, + "learning_rate": 1.5194334592559517e-06, + "loss": 21.5978, + "step": 385290 + }, + { + "epoch": 0.7783303773074173, + "grad_norm": 882.8553466796875, + "learning_rate": 1.519182862216929e-06, + "loss": 18.9106, + "step": 385300 + }, + { + "epoch": 0.7783505779401011, + "grad_norm": 227.40785217285156, + "learning_rate": 1.5189322821429842e-06, + "loss": 9.4096, + "step": 385310 + }, + { + "epoch": 0.7783707785727849, + "grad_norm": 302.2127380371094, + "learning_rate": 1.5186817190353404e-06, + "loss": 19.1014, + "step": 385320 + }, + { + "epoch": 0.7783909792054687, + "grad_norm": 514.7554931640625, + "learning_rate": 1.5184311728952216e-06, + "loss": 17.937, + "step": 385330 + }, + { + "epoch": 0.7784111798381526, + "grad_norm": 492.3889465332031, + "learning_rate": 1.5181806437238472e-06, + "loss": 12.4642, + "step": 385340 + }, + { + "epoch": 0.7784313804708364, + "grad_norm": 475.7500305175781, + "learning_rate": 1.5179301315224364e-06, + "loss": 24.705, + "step": 385350 + }, + { + "epoch": 0.7784515811035202, + "grad_norm": 249.72171020507812, + "learning_rate": 1.5176796362922119e-06, + "loss": 11.6964, + "step": 385360 + }, + { + "epoch": 0.778471781736204, + "grad_norm": 240.2904815673828, + "learning_rate": 1.5174291580343976e-06, + "loss": 18.1363, + "step": 385370 + }, + { + "epoch": 0.7784919823688878, + "grad_norm": 279.27685546875, + "learning_rate": 1.5171786967502078e-06, + "loss": 24.9632, + "step": 385380 + }, + { + "epoch": 0.7785121830015717, + "grad_norm": 449.6131286621094, + "learning_rate": 1.516928252440867e-06, + "loss": 34.3537, + "step": 385390 + }, + { + "epoch": 0.7785323836342555, + "grad_norm": 571.5137939453125, + "learning_rate": 1.5166778251075964e-06, + "loss": 16.6113, + "step": 385400 + }, + { + "epoch": 0.7785525842669393, + "grad_norm": 620.7178344726562, + "learning_rate": 1.516427414751616e-06, + "loss": 15.0394, + "step": 385410 + }, + { + "epoch": 0.778572784899623, + "grad_norm": 346.1143798828125, + "learning_rate": 1.5161770213741444e-06, + "loss": 18.6634, + "step": 385420 + }, + { + "epoch": 0.7785929855323068, + "grad_norm": 218.16488647460938, + "learning_rate": 1.5159266449764048e-06, + "loss": 14.5162, + "step": 385430 + }, + { + "epoch": 0.7786131861649906, + "grad_norm": 337.9759521484375, + "learning_rate": 1.5156762855596162e-06, + "loss": 12.06, + "step": 385440 + }, + { + "epoch": 0.7786333867976745, + "grad_norm": 527.7807006835938, + "learning_rate": 1.5154259431249978e-06, + "loss": 17.1289, + "step": 385450 + }, + { + "epoch": 0.7786535874303583, + "grad_norm": 303.9666748046875, + "learning_rate": 1.5151756176737703e-06, + "loss": 15.2366, + "step": 385460 + }, + { + "epoch": 0.7786737880630421, + "grad_norm": 77.94291687011719, + "learning_rate": 1.5149253092071554e-06, + "loss": 16.9323, + "step": 385470 + }, + { + "epoch": 0.7786939886957259, + "grad_norm": 517.1173706054688, + "learning_rate": 1.5146750177263725e-06, + "loss": 21.6196, + "step": 385480 + }, + { + "epoch": 0.7787141893284097, + "grad_norm": 981.0882568359375, + "learning_rate": 1.5144247432326386e-06, + "loss": 22.6039, + "step": 385490 + }, + { + "epoch": 0.7787343899610936, + "grad_norm": 427.5642395019531, + "learning_rate": 1.514174485727178e-06, + "loss": 15.4875, + "step": 385500 + }, + { + "epoch": 0.7787545905937774, + "grad_norm": 326.907958984375, + "learning_rate": 1.5139242452112074e-06, + "loss": 24.2637, + "step": 385510 + }, + { + "epoch": 0.7787747912264612, + "grad_norm": 19.355066299438477, + "learning_rate": 1.5136740216859464e-06, + "loss": 12.2308, + "step": 385520 + }, + { + "epoch": 0.778794991859145, + "grad_norm": 416.51190185546875, + "learning_rate": 1.5134238151526166e-06, + "loss": 21.3602, + "step": 385530 + }, + { + "epoch": 0.7788151924918288, + "grad_norm": 213.76942443847656, + "learning_rate": 1.5131736256124346e-06, + "loss": 8.8589, + "step": 385540 + }, + { + "epoch": 0.7788353931245127, + "grad_norm": 50.654388427734375, + "learning_rate": 1.5129234530666232e-06, + "loss": 19.1779, + "step": 385550 + }, + { + "epoch": 0.7788555937571965, + "grad_norm": 325.6395263671875, + "learning_rate": 1.5126732975164e-06, + "loss": 12.2903, + "step": 385560 + }, + { + "epoch": 0.7788757943898803, + "grad_norm": 256.3102111816406, + "learning_rate": 1.5124231589629823e-06, + "loss": 6.6157, + "step": 385570 + }, + { + "epoch": 0.7788959950225641, + "grad_norm": 99.91606140136719, + "learning_rate": 1.5121730374075916e-06, + "loss": 14.7548, + "step": 385580 + }, + { + "epoch": 0.778916195655248, + "grad_norm": 195.95484924316406, + "learning_rate": 1.511922932851449e-06, + "loss": 6.047, + "step": 385590 + }, + { + "epoch": 0.7789363962879318, + "grad_norm": 527.3798217773438, + "learning_rate": 1.5116728452957686e-06, + "loss": 10.9641, + "step": 385600 + }, + { + "epoch": 0.7789565969206156, + "grad_norm": 1464.2406005859375, + "learning_rate": 1.511422774741771e-06, + "loss": 23.2647, + "step": 385610 + }, + { + "epoch": 0.7789767975532994, + "grad_norm": 194.63536071777344, + "learning_rate": 1.5111727211906774e-06, + "loss": 13.5937, + "step": 385620 + }, + { + "epoch": 0.7789969981859832, + "grad_norm": 174.94775390625, + "learning_rate": 1.5109226846437054e-06, + "loss": 29.5315, + "step": 385630 + }, + { + "epoch": 0.779017198818667, + "grad_norm": 402.3554382324219, + "learning_rate": 1.510672665102071e-06, + "loss": 20.5427, + "step": 385640 + }, + { + "epoch": 0.7790373994513509, + "grad_norm": 363.8941650390625, + "learning_rate": 1.5104226625669943e-06, + "loss": 9.9553, + "step": 385650 + }, + { + "epoch": 0.7790576000840347, + "grad_norm": 502.1727294921875, + "learning_rate": 1.5101726770396986e-06, + "loss": 28.6017, + "step": 385660 + }, + { + "epoch": 0.7790778007167184, + "grad_norm": 90.51630401611328, + "learning_rate": 1.509922708521394e-06, + "loss": 12.2599, + "step": 385670 + }, + { + "epoch": 0.7790980013494022, + "grad_norm": 494.1336975097656, + "learning_rate": 1.509672757013303e-06, + "loss": 28.2401, + "step": 385680 + }, + { + "epoch": 0.779118201982086, + "grad_norm": 391.7030944824219, + "learning_rate": 1.509422822516645e-06, + "loss": 15.7195, + "step": 385690 + }, + { + "epoch": 0.7791384026147699, + "grad_norm": 270.3523864746094, + "learning_rate": 1.5091729050326376e-06, + "loss": 14.2068, + "step": 385700 + }, + { + "epoch": 0.7791586032474537, + "grad_norm": 157.23414611816406, + "learning_rate": 1.5089230045624958e-06, + "loss": 27.5659, + "step": 385710 + }, + { + "epoch": 0.7791788038801375, + "grad_norm": 335.717041015625, + "learning_rate": 1.5086731211074418e-06, + "loss": 13.2512, + "step": 385720 + }, + { + "epoch": 0.7791990045128213, + "grad_norm": 418.17840576171875, + "learning_rate": 1.5084232546686911e-06, + "loss": 14.8485, + "step": 385730 + }, + { + "epoch": 0.7792192051455051, + "grad_norm": 560.2449340820312, + "learning_rate": 1.508173405247461e-06, + "loss": 25.812, + "step": 385740 + }, + { + "epoch": 0.779239405778189, + "grad_norm": 686.2391357421875, + "learning_rate": 1.5079235728449714e-06, + "loss": 16.8938, + "step": 385750 + }, + { + "epoch": 0.7792596064108728, + "grad_norm": 175.08425903320312, + "learning_rate": 1.5076737574624372e-06, + "loss": 14.4246, + "step": 385760 + }, + { + "epoch": 0.7792798070435566, + "grad_norm": 417.5024719238281, + "learning_rate": 1.5074239591010791e-06, + "loss": 6.5063, + "step": 385770 + }, + { + "epoch": 0.7793000076762404, + "grad_norm": 395.2223205566406, + "learning_rate": 1.507174177762112e-06, + "loss": 36.0661, + "step": 385780 + }, + { + "epoch": 0.7793202083089242, + "grad_norm": 490.0069580078125, + "learning_rate": 1.5069244134467553e-06, + "loss": 21.2514, + "step": 385790 + }, + { + "epoch": 0.779340408941608, + "grad_norm": 1034.65966796875, + "learning_rate": 1.5066746661562254e-06, + "loss": 22.4074, + "step": 385800 + }, + { + "epoch": 0.7793606095742919, + "grad_norm": 654.3142700195312, + "learning_rate": 1.5064249358917383e-06, + "loss": 21.8747, + "step": 385810 + }, + { + "epoch": 0.7793808102069757, + "grad_norm": 331.18170166015625, + "learning_rate": 1.5061752226545134e-06, + "loss": 8.6893, + "step": 385820 + }, + { + "epoch": 0.7794010108396595, + "grad_norm": 621.185546875, + "learning_rate": 1.5059255264457656e-06, + "loss": 20.6916, + "step": 385830 + }, + { + "epoch": 0.7794212114723433, + "grad_norm": 464.4343566894531, + "learning_rate": 1.5056758472667144e-06, + "loss": 23.6093, + "step": 385840 + }, + { + "epoch": 0.7794414121050272, + "grad_norm": 682.4192504882812, + "learning_rate": 1.5054261851185753e-06, + "loss": 18.9779, + "step": 385850 + }, + { + "epoch": 0.779461612737711, + "grad_norm": 190.57699584960938, + "learning_rate": 1.5051765400025636e-06, + "loss": 13.4317, + "step": 385860 + }, + { + "epoch": 0.7794818133703948, + "grad_norm": 3304.729248046875, + "learning_rate": 1.5049269119198988e-06, + "loss": 29.976, + "step": 385870 + }, + { + "epoch": 0.7795020140030786, + "grad_norm": 254.01058959960938, + "learning_rate": 1.5046773008717968e-06, + "loss": 21.1212, + "step": 385880 + }, + { + "epoch": 0.7795222146357624, + "grad_norm": 709.68115234375, + "learning_rate": 1.5044277068594721e-06, + "loss": 21.5708, + "step": 385890 + }, + { + "epoch": 0.7795424152684463, + "grad_norm": 734.3587646484375, + "learning_rate": 1.5041781298841424e-06, + "loss": 29.4084, + "step": 385900 + }, + { + "epoch": 0.7795626159011301, + "grad_norm": 330.8517150878906, + "learning_rate": 1.503928569947028e-06, + "loss": 23.806, + "step": 385910 + }, + { + "epoch": 0.7795828165338139, + "grad_norm": 359.6206970214844, + "learning_rate": 1.5036790270493383e-06, + "loss": 16.4389, + "step": 385920 + }, + { + "epoch": 0.7796030171664976, + "grad_norm": 381.5948181152344, + "learning_rate": 1.5034295011922933e-06, + "loss": 15.3177, + "step": 385930 + }, + { + "epoch": 0.7796232177991814, + "grad_norm": 31.452110290527344, + "learning_rate": 1.5031799923771102e-06, + "loss": 22.716, + "step": 385940 + }, + { + "epoch": 0.7796434184318652, + "grad_norm": 238.20335388183594, + "learning_rate": 1.5029305006050038e-06, + "loss": 15.4614, + "step": 385950 + }, + { + "epoch": 0.7796636190645491, + "grad_norm": 5.884660720825195, + "learning_rate": 1.5026810258771885e-06, + "loss": 20.7957, + "step": 385960 + }, + { + "epoch": 0.7796838196972329, + "grad_norm": 679.9158935546875, + "learning_rate": 1.5024315681948815e-06, + "loss": 14.5647, + "step": 385970 + }, + { + "epoch": 0.7797040203299167, + "grad_norm": 1149.2484130859375, + "learning_rate": 1.5021821275593018e-06, + "loss": 32.3723, + "step": 385980 + }, + { + "epoch": 0.7797242209626005, + "grad_norm": 590.5807495117188, + "learning_rate": 1.5019327039716598e-06, + "loss": 21.5006, + "step": 385990 + }, + { + "epoch": 0.7797444215952843, + "grad_norm": 231.70526123046875, + "learning_rate": 1.5016832974331725e-06, + "loss": 31.0076, + "step": 386000 + }, + { + "epoch": 0.7797646222279682, + "grad_norm": 424.45361328125, + "learning_rate": 1.5014339079450586e-06, + "loss": 21.9043, + "step": 386010 + }, + { + "epoch": 0.779784822860652, + "grad_norm": 130.33021545410156, + "learning_rate": 1.501184535508532e-06, + "loss": 13.1588, + "step": 386020 + }, + { + "epoch": 0.7798050234933358, + "grad_norm": 269.3244323730469, + "learning_rate": 1.500935180124805e-06, + "loss": 13.8246, + "step": 386030 + }, + { + "epoch": 0.7798252241260196, + "grad_norm": 513.9899291992188, + "learning_rate": 1.500685841795098e-06, + "loss": 33.1769, + "step": 386040 + }, + { + "epoch": 0.7798454247587034, + "grad_norm": 424.0687255859375, + "learning_rate": 1.5004365205206235e-06, + "loss": 27.0779, + "step": 386050 + }, + { + "epoch": 0.7798656253913873, + "grad_norm": 343.50421142578125, + "learning_rate": 1.5001872163025954e-06, + "loss": 19.2403, + "step": 386060 + }, + { + "epoch": 0.7798858260240711, + "grad_norm": 28.880369186401367, + "learning_rate": 1.49993792914223e-06, + "loss": 22.104, + "step": 386070 + }, + { + "epoch": 0.7799060266567549, + "grad_norm": 223.69723510742188, + "learning_rate": 1.4996886590407445e-06, + "loss": 14.1746, + "step": 386080 + }, + { + "epoch": 0.7799262272894387, + "grad_norm": 277.2913818359375, + "learning_rate": 1.4994394059993522e-06, + "loss": 11.7292, + "step": 386090 + }, + { + "epoch": 0.7799464279221225, + "grad_norm": 345.3890075683594, + "learning_rate": 1.4991901700192657e-06, + "loss": 23.5399, + "step": 386100 + }, + { + "epoch": 0.7799666285548064, + "grad_norm": 256.9220275878906, + "learning_rate": 1.4989409511017034e-06, + "loss": 14.9563, + "step": 386110 + }, + { + "epoch": 0.7799868291874902, + "grad_norm": 107.40361785888672, + "learning_rate": 1.498691749247878e-06, + "loss": 13.2521, + "step": 386120 + }, + { + "epoch": 0.780007029820174, + "grad_norm": 168.9518280029297, + "learning_rate": 1.4984425644590033e-06, + "loss": 20.286, + "step": 386130 + }, + { + "epoch": 0.7800272304528578, + "grad_norm": 406.5779724121094, + "learning_rate": 1.498193396736296e-06, + "loss": 18.0538, + "step": 386140 + }, + { + "epoch": 0.7800474310855416, + "grad_norm": 163.77590942382812, + "learning_rate": 1.4979442460809684e-06, + "loss": 6.0938, + "step": 386150 + }, + { + "epoch": 0.7800676317182255, + "grad_norm": 270.00030517578125, + "learning_rate": 1.4976951124942369e-06, + "loss": 15.4553, + "step": 386160 + }, + { + "epoch": 0.7800878323509093, + "grad_norm": 323.8747253417969, + "learning_rate": 1.4974459959773146e-06, + "loss": 12.7156, + "step": 386170 + }, + { + "epoch": 0.780108032983593, + "grad_norm": 144.15640258789062, + "learning_rate": 1.4971968965314143e-06, + "loss": 16.3609, + "step": 386180 + }, + { + "epoch": 0.7801282336162768, + "grad_norm": 542.04833984375, + "learning_rate": 1.4969478141577531e-06, + "loss": 12.2939, + "step": 386190 + }, + { + "epoch": 0.7801484342489606, + "grad_norm": 417.11761474609375, + "learning_rate": 1.496698748857543e-06, + "loss": 21.2529, + "step": 386200 + }, + { + "epoch": 0.7801686348816445, + "grad_norm": 515.2703247070312, + "learning_rate": 1.4964497006319972e-06, + "loss": 14.6671, + "step": 386210 + }, + { + "epoch": 0.7801888355143283, + "grad_norm": 909.078125, + "learning_rate": 1.4962006694823306e-06, + "loss": 25.3786, + "step": 386220 + }, + { + "epoch": 0.7802090361470121, + "grad_norm": 400.2494812011719, + "learning_rate": 1.4959516554097581e-06, + "loss": 14.9983, + "step": 386230 + }, + { + "epoch": 0.7802292367796959, + "grad_norm": 176.0259552001953, + "learning_rate": 1.4957026584154926e-06, + "loss": 15.1697, + "step": 386240 + }, + { + "epoch": 0.7802494374123797, + "grad_norm": 901.3413696289062, + "learning_rate": 1.4954536785007456e-06, + "loss": 25.1786, + "step": 386250 + }, + { + "epoch": 0.7802696380450636, + "grad_norm": 471.90386962890625, + "learning_rate": 1.4952047156667326e-06, + "loss": 24.8239, + "step": 386260 + }, + { + "epoch": 0.7802898386777474, + "grad_norm": 585.814697265625, + "learning_rate": 1.4949557699146694e-06, + "loss": 21.5982, + "step": 386270 + }, + { + "epoch": 0.7803100393104312, + "grad_norm": 512.4572143554688, + "learning_rate": 1.4947068412457639e-06, + "loss": 23.7617, + "step": 386280 + }, + { + "epoch": 0.780330239943115, + "grad_norm": 464.33660888671875, + "learning_rate": 1.4944579296612323e-06, + "loss": 20.042, + "step": 386290 + }, + { + "epoch": 0.7803504405757988, + "grad_norm": 496.6575927734375, + "learning_rate": 1.4942090351622884e-06, + "loss": 17.0011, + "step": 386300 + }, + { + "epoch": 0.7803706412084827, + "grad_norm": 361.2125549316406, + "learning_rate": 1.493960157750145e-06, + "loss": 15.9416, + "step": 386310 + }, + { + "epoch": 0.7803908418411665, + "grad_norm": 205.79136657714844, + "learning_rate": 1.493711297426013e-06, + "loss": 16.0879, + "step": 386320 + }, + { + "epoch": 0.7804110424738503, + "grad_norm": 500.2698974609375, + "learning_rate": 1.4934624541911086e-06, + "loss": 14.078, + "step": 386330 + }, + { + "epoch": 0.7804312431065341, + "grad_norm": 298.42779541015625, + "learning_rate": 1.4932136280466426e-06, + "loss": 23.3748, + "step": 386340 + }, + { + "epoch": 0.7804514437392179, + "grad_norm": 249.26150512695312, + "learning_rate": 1.492964818993826e-06, + "loss": 18.0938, + "step": 386350 + }, + { + "epoch": 0.7804716443719018, + "grad_norm": 0.031371522694826126, + "learning_rate": 1.492716027033876e-06, + "loss": 18.898, + "step": 386360 + }, + { + "epoch": 0.7804918450045856, + "grad_norm": 463.8792724609375, + "learning_rate": 1.4924672521680006e-06, + "loss": 14.4938, + "step": 386370 + }, + { + "epoch": 0.7805120456372694, + "grad_norm": 416.21453857421875, + "learning_rate": 1.4922184943974167e-06, + "loss": 20.2173, + "step": 386380 + }, + { + "epoch": 0.7805322462699532, + "grad_norm": 966.876953125, + "learning_rate": 1.4919697537233318e-06, + "loss": 28.3099, + "step": 386390 + }, + { + "epoch": 0.780552446902637, + "grad_norm": 530.0706787109375, + "learning_rate": 1.491721030146963e-06, + "loss": 18.5152, + "step": 386400 + }, + { + "epoch": 0.7805726475353209, + "grad_norm": 160.47454833984375, + "learning_rate": 1.4914723236695206e-06, + "loss": 11.2257, + "step": 386410 + }, + { + "epoch": 0.7805928481680047, + "grad_norm": 191.1073760986328, + "learning_rate": 1.4912236342922143e-06, + "loss": 21.4983, + "step": 386420 + }, + { + "epoch": 0.7806130488006885, + "grad_norm": 625.6028442382812, + "learning_rate": 1.4909749620162605e-06, + "loss": 22.6572, + "step": 386430 + }, + { + "epoch": 0.7806332494333722, + "grad_norm": 236.0618438720703, + "learning_rate": 1.4907263068428673e-06, + "loss": 13.1335, + "step": 386440 + }, + { + "epoch": 0.780653450066056, + "grad_norm": 285.98828125, + "learning_rate": 1.4904776687732503e-06, + "loss": 5.3353, + "step": 386450 + }, + { + "epoch": 0.7806736506987398, + "grad_norm": 480.0706481933594, + "learning_rate": 1.4902290478086195e-06, + "loss": 19.6177, + "step": 386460 + }, + { + "epoch": 0.7806938513314237, + "grad_norm": 268.5168762207031, + "learning_rate": 1.4899804439501853e-06, + "loss": 34.3631, + "step": 386470 + }, + { + "epoch": 0.7807140519641075, + "grad_norm": 252.37997436523438, + "learning_rate": 1.4897318571991615e-06, + "loss": 21.8165, + "step": 386480 + }, + { + "epoch": 0.7807342525967913, + "grad_norm": 204.5614471435547, + "learning_rate": 1.4894832875567593e-06, + "loss": 11.2927, + "step": 386490 + }, + { + "epoch": 0.7807544532294751, + "grad_norm": 536.2254028320312, + "learning_rate": 1.489234735024188e-06, + "loss": 12.7074, + "step": 386500 + }, + { + "epoch": 0.7807746538621589, + "grad_norm": 90.27301788330078, + "learning_rate": 1.4889861996026617e-06, + "loss": 19.6775, + "step": 386510 + }, + { + "epoch": 0.7807948544948428, + "grad_norm": 606.7857055664062, + "learning_rate": 1.4887376812933913e-06, + "loss": 18.9317, + "step": 386520 + }, + { + "epoch": 0.7808150551275266, + "grad_norm": 4.590639591217041, + "learning_rate": 1.488489180097588e-06, + "loss": 33.1678, + "step": 386530 + }, + { + "epoch": 0.7808352557602104, + "grad_norm": 573.2119140625, + "learning_rate": 1.4882406960164615e-06, + "loss": 10.8807, + "step": 386540 + }, + { + "epoch": 0.7808554563928942, + "grad_norm": 573.2291870117188, + "learning_rate": 1.4879922290512244e-06, + "loss": 20.2384, + "step": 386550 + }, + { + "epoch": 0.780875657025578, + "grad_norm": 368.7428894042969, + "learning_rate": 1.487743779203088e-06, + "loss": 14.5099, + "step": 386560 + }, + { + "epoch": 0.7808958576582619, + "grad_norm": 234.43643188476562, + "learning_rate": 1.4874953464732606e-06, + "loss": 21.0713, + "step": 386570 + }, + { + "epoch": 0.7809160582909457, + "grad_norm": 647.460205078125, + "learning_rate": 1.487246930862955e-06, + "loss": 14.3458, + "step": 386580 + }, + { + "epoch": 0.7809362589236295, + "grad_norm": 195.44033813476562, + "learning_rate": 1.486998532373385e-06, + "loss": 12.5954, + "step": 386590 + }, + { + "epoch": 0.7809564595563133, + "grad_norm": 79.07245635986328, + "learning_rate": 1.4867501510057548e-06, + "loss": 18.258, + "step": 386600 + }, + { + "epoch": 0.7809766601889971, + "grad_norm": 403.3853454589844, + "learning_rate": 1.486501786761278e-06, + "loss": 19.6852, + "step": 386610 + }, + { + "epoch": 0.780996860821681, + "grad_norm": 313.54241943359375, + "learning_rate": 1.4862534396411671e-06, + "loss": 14.3152, + "step": 386620 + }, + { + "epoch": 0.7810170614543648, + "grad_norm": 557.8474731445312, + "learning_rate": 1.486005109646631e-06, + "loss": 15.661, + "step": 386630 + }, + { + "epoch": 0.7810372620870486, + "grad_norm": 203.18238830566406, + "learning_rate": 1.4857567967788784e-06, + "loss": 18.5647, + "step": 386640 + }, + { + "epoch": 0.7810574627197324, + "grad_norm": 359.0729675292969, + "learning_rate": 1.4855085010391217e-06, + "loss": 20.258, + "step": 386650 + }, + { + "epoch": 0.7810776633524162, + "grad_norm": 720.0196533203125, + "learning_rate": 1.485260222428571e-06, + "loss": 17.099, + "step": 386660 + }, + { + "epoch": 0.7810978639851001, + "grad_norm": 574.5145874023438, + "learning_rate": 1.4850119609484342e-06, + "loss": 27.3897, + "step": 386670 + }, + { + "epoch": 0.7811180646177839, + "grad_norm": 474.090087890625, + "learning_rate": 1.4847637165999224e-06, + "loss": 18.5943, + "step": 386680 + }, + { + "epoch": 0.7811382652504676, + "grad_norm": 129.98526000976562, + "learning_rate": 1.4845154893842473e-06, + "loss": 18.4085, + "step": 386690 + }, + { + "epoch": 0.7811584658831514, + "grad_norm": 1041.7506103515625, + "learning_rate": 1.484267279302618e-06, + "loss": 20.6864, + "step": 386700 + }, + { + "epoch": 0.7811786665158352, + "grad_norm": 122.86719512939453, + "learning_rate": 1.4840190863562414e-06, + "loss": 15.1946, + "step": 386710 + }, + { + "epoch": 0.781198867148519, + "grad_norm": 701.8041381835938, + "learning_rate": 1.483770910546331e-06, + "loss": 18.744, + "step": 386720 + }, + { + "epoch": 0.7812190677812029, + "grad_norm": 99.49020385742188, + "learning_rate": 1.4835227518740951e-06, + "loss": 10.8952, + "step": 386730 + }, + { + "epoch": 0.7812392684138867, + "grad_norm": 263.1635437011719, + "learning_rate": 1.4832746103407409e-06, + "loss": 13.1393, + "step": 386740 + }, + { + "epoch": 0.7812594690465705, + "grad_norm": 142.5100555419922, + "learning_rate": 1.4830264859474814e-06, + "loss": 13.7558, + "step": 386750 + }, + { + "epoch": 0.7812796696792543, + "grad_norm": 924.5542602539062, + "learning_rate": 1.4827783786955224e-06, + "loss": 20.6192, + "step": 386760 + }, + { + "epoch": 0.7812998703119381, + "grad_norm": 235.34300231933594, + "learning_rate": 1.482530288586077e-06, + "loss": 16.9288, + "step": 386770 + }, + { + "epoch": 0.781320070944622, + "grad_norm": 28.140670776367188, + "learning_rate": 1.482282215620352e-06, + "loss": 13.6741, + "step": 386780 + }, + { + "epoch": 0.7813402715773058, + "grad_norm": 13.361457824707031, + "learning_rate": 1.4820341597995558e-06, + "loss": 14.6516, + "step": 386790 + }, + { + "epoch": 0.7813604722099896, + "grad_norm": 84.14933013916016, + "learning_rate": 1.4817861211248996e-06, + "loss": 24.9434, + "step": 386800 + }, + { + "epoch": 0.7813806728426734, + "grad_norm": 637.1309204101562, + "learning_rate": 1.4815380995975908e-06, + "loss": 21.3062, + "step": 386810 + }, + { + "epoch": 0.7814008734753572, + "grad_norm": 445.86187744140625, + "learning_rate": 1.4812900952188374e-06, + "loss": 14.363, + "step": 386820 + }, + { + "epoch": 0.7814210741080411, + "grad_norm": 1048.857177734375, + "learning_rate": 1.4810421079898495e-06, + "loss": 27.6814, + "step": 386830 + }, + { + "epoch": 0.7814412747407249, + "grad_norm": 641.3052978515625, + "learning_rate": 1.4807941379118368e-06, + "loss": 16.5968, + "step": 386840 + }, + { + "epoch": 0.7814614753734087, + "grad_norm": 583.0075073242188, + "learning_rate": 1.480546184986007e-06, + "loss": 14.5565, + "step": 386850 + }, + { + "epoch": 0.7814816760060925, + "grad_norm": 204.2652587890625, + "learning_rate": 1.4802982492135664e-06, + "loss": 13.9256, + "step": 386860 + }, + { + "epoch": 0.7815018766387763, + "grad_norm": 338.5167236328125, + "learning_rate": 1.4800503305957264e-06, + "loss": 11.404, + "step": 386870 + }, + { + "epoch": 0.7815220772714602, + "grad_norm": 240.81396484375, + "learning_rate": 1.4798024291336949e-06, + "loss": 25.4999, + "step": 386880 + }, + { + "epoch": 0.781542277904144, + "grad_norm": 380.2215881347656, + "learning_rate": 1.4795545448286774e-06, + "loss": 29.0816, + "step": 386890 + }, + { + "epoch": 0.7815624785368278, + "grad_norm": 214.4364776611328, + "learning_rate": 1.4793066776818843e-06, + "loss": 13.0321, + "step": 386900 + }, + { + "epoch": 0.7815826791695116, + "grad_norm": 515.7333374023438, + "learning_rate": 1.479058827694525e-06, + "loss": 11.2031, + "step": 386910 + }, + { + "epoch": 0.7816028798021954, + "grad_norm": 532.2525024414062, + "learning_rate": 1.4788109948678058e-06, + "loss": 15.7329, + "step": 386920 + }, + { + "epoch": 0.7816230804348793, + "grad_norm": 111.3807144165039, + "learning_rate": 1.478563179202933e-06, + "loss": 52.7665, + "step": 386930 + }, + { + "epoch": 0.7816432810675631, + "grad_norm": 270.2540283203125, + "learning_rate": 1.4783153807011186e-06, + "loss": 18.9392, + "step": 386940 + }, + { + "epoch": 0.7816634817002468, + "grad_norm": 309.8558654785156, + "learning_rate": 1.4780675993635668e-06, + "loss": 15.1325, + "step": 386950 + }, + { + "epoch": 0.7816836823329306, + "grad_norm": 738.7531127929688, + "learning_rate": 1.4778198351914853e-06, + "loss": 14.0651, + "step": 386960 + }, + { + "epoch": 0.7817038829656144, + "grad_norm": 247.15045166015625, + "learning_rate": 1.4775720881860845e-06, + "loss": 15.0891, + "step": 386970 + }, + { + "epoch": 0.7817240835982983, + "grad_norm": 500.20452880859375, + "learning_rate": 1.4773243583485681e-06, + "loss": 21.4067, + "step": 386980 + }, + { + "epoch": 0.7817442842309821, + "grad_norm": 371.3633117675781, + "learning_rate": 1.4770766456801477e-06, + "loss": 25.5898, + "step": 386990 + }, + { + "epoch": 0.7817644848636659, + "grad_norm": 318.5390319824219, + "learning_rate": 1.4768289501820265e-06, + "loss": 14.3243, + "step": 387000 + }, + { + "epoch": 0.7817846854963497, + "grad_norm": 131.52696228027344, + "learning_rate": 1.476581271855415e-06, + "loss": 23.0765, + "step": 387010 + }, + { + "epoch": 0.7818048861290335, + "grad_norm": 1093.0257568359375, + "learning_rate": 1.4763336107015192e-06, + "loss": 11.2298, + "step": 387020 + }, + { + "epoch": 0.7818250867617174, + "grad_norm": 17.26053237915039, + "learning_rate": 1.4760859667215449e-06, + "loss": 20.0986, + "step": 387030 + }, + { + "epoch": 0.7818452873944012, + "grad_norm": 432.4856262207031, + "learning_rate": 1.4758383399167014e-06, + "loss": 16.8963, + "step": 387040 + }, + { + "epoch": 0.781865488027085, + "grad_norm": 344.38140869140625, + "learning_rate": 1.4755907302881927e-06, + "loss": 11.4162, + "step": 387050 + }, + { + "epoch": 0.7818856886597688, + "grad_norm": 882.4750366210938, + "learning_rate": 1.4753431378372291e-06, + "loss": 18.6084, + "step": 387060 + }, + { + "epoch": 0.7819058892924526, + "grad_norm": 471.34710693359375, + "learning_rate": 1.4750955625650153e-06, + "loss": 13.8958, + "step": 387070 + }, + { + "epoch": 0.7819260899251365, + "grad_norm": 5.232560634613037, + "learning_rate": 1.474848004472757e-06, + "loss": 14.3645, + "step": 387080 + }, + { + "epoch": 0.7819462905578203, + "grad_norm": 269.0277404785156, + "learning_rate": 1.4746004635616634e-06, + "loss": 9.4807, + "step": 387090 + }, + { + "epoch": 0.7819664911905041, + "grad_norm": 645.8562622070312, + "learning_rate": 1.4743529398329393e-06, + "loss": 21.5985, + "step": 387100 + }, + { + "epoch": 0.7819866918231879, + "grad_norm": 218.50521850585938, + "learning_rate": 1.4741054332877902e-06, + "loss": 21.1489, + "step": 387110 + }, + { + "epoch": 0.7820068924558717, + "grad_norm": 216.4319610595703, + "learning_rate": 1.4738579439274236e-06, + "loss": 32.7325, + "step": 387120 + }, + { + "epoch": 0.7820270930885556, + "grad_norm": 581.9794921875, + "learning_rate": 1.473610471753047e-06, + "loss": 17.003, + "step": 387130 + }, + { + "epoch": 0.7820472937212394, + "grad_norm": 358.75482177734375, + "learning_rate": 1.4733630167658652e-06, + "loss": 19.3711, + "step": 387140 + }, + { + "epoch": 0.7820674943539232, + "grad_norm": 456.4960021972656, + "learning_rate": 1.473115578967083e-06, + "loss": 17.8683, + "step": 387150 + }, + { + "epoch": 0.782087694986607, + "grad_norm": 763.3374633789062, + "learning_rate": 1.4728681583579091e-06, + "loss": 16.0971, + "step": 387160 + }, + { + "epoch": 0.7821078956192908, + "grad_norm": 301.2882995605469, + "learning_rate": 1.4726207549395482e-06, + "loss": 16.7476, + "step": 387170 + }, + { + "epoch": 0.7821280962519747, + "grad_norm": 360.88348388671875, + "learning_rate": 1.4723733687132041e-06, + "loss": 43.6114, + "step": 387180 + }, + { + "epoch": 0.7821482968846585, + "grad_norm": 505.7632751464844, + "learning_rate": 1.4721259996800847e-06, + "loss": 17.4158, + "step": 387190 + }, + { + "epoch": 0.7821684975173423, + "grad_norm": 147.3037109375, + "learning_rate": 1.4718786478413983e-06, + "loss": 30.0771, + "step": 387200 + }, + { + "epoch": 0.782188698150026, + "grad_norm": 31.770463943481445, + "learning_rate": 1.471631313198344e-06, + "loss": 20.0511, + "step": 387210 + }, + { + "epoch": 0.7822088987827098, + "grad_norm": 451.4087219238281, + "learning_rate": 1.4713839957521315e-06, + "loss": 17.5128, + "step": 387220 + }, + { + "epoch": 0.7822290994153936, + "grad_norm": 6.938021183013916, + "learning_rate": 1.4711366955039664e-06, + "loss": 15.298, + "step": 387230 + }, + { + "epoch": 0.7822493000480775, + "grad_norm": 572.2969360351562, + "learning_rate": 1.4708894124550527e-06, + "loss": 21.4585, + "step": 387240 + }, + { + "epoch": 0.7822695006807613, + "grad_norm": 547.682861328125, + "learning_rate": 1.4706421466065952e-06, + "loss": 14.4308, + "step": 387250 + }, + { + "epoch": 0.7822897013134451, + "grad_norm": 713.3699340820312, + "learning_rate": 1.470394897959801e-06, + "loss": 15.0501, + "step": 387260 + }, + { + "epoch": 0.7823099019461289, + "grad_norm": 713.3102416992188, + "learning_rate": 1.4701476665158738e-06, + "loss": 26.4644, + "step": 387270 + }, + { + "epoch": 0.7823301025788127, + "grad_norm": 433.7578430175781, + "learning_rate": 1.4699004522760174e-06, + "loss": 17.473, + "step": 387280 + }, + { + "epoch": 0.7823503032114966, + "grad_norm": 315.8951110839844, + "learning_rate": 1.4696532552414383e-06, + "loss": 14.5305, + "step": 387290 + }, + { + "epoch": 0.7823705038441804, + "grad_norm": 400.2901611328125, + "learning_rate": 1.469406075413342e-06, + "loss": 10.748, + "step": 387300 + }, + { + "epoch": 0.7823907044768642, + "grad_norm": 160.92108154296875, + "learning_rate": 1.4691589127929328e-06, + "loss": 17.2667, + "step": 387310 + }, + { + "epoch": 0.782410905109548, + "grad_norm": 266.0498046875, + "learning_rate": 1.4689117673814135e-06, + "loss": 30.5759, + "step": 387320 + }, + { + "epoch": 0.7824311057422318, + "grad_norm": 95.8498306274414, + "learning_rate": 1.4686646391799909e-06, + "loss": 14.7226, + "step": 387330 + }, + { + "epoch": 0.7824513063749157, + "grad_norm": 464.59521484375, + "learning_rate": 1.4684175281898688e-06, + "loss": 10.7037, + "step": 387340 + }, + { + "epoch": 0.7824715070075995, + "grad_norm": 242.17062377929688, + "learning_rate": 1.46817043441225e-06, + "loss": 16.1035, + "step": 387350 + }, + { + "epoch": 0.7824917076402833, + "grad_norm": 220.4538116455078, + "learning_rate": 1.4679233578483415e-06, + "loss": 21.0202, + "step": 387360 + }, + { + "epoch": 0.7825119082729671, + "grad_norm": 1055.5733642578125, + "learning_rate": 1.4676762984993443e-06, + "loss": 20.5439, + "step": 387370 + }, + { + "epoch": 0.782532108905651, + "grad_norm": 1321.3682861328125, + "learning_rate": 1.467429256366466e-06, + "loss": 17.3385, + "step": 387380 + }, + { + "epoch": 0.7825523095383348, + "grad_norm": 349.6588134765625, + "learning_rate": 1.4671822314509099e-06, + "loss": 23.1099, + "step": 387390 + }, + { + "epoch": 0.7825725101710186, + "grad_norm": 152.19427490234375, + "learning_rate": 1.4669352237538763e-06, + "loss": 18.292, + "step": 387400 + }, + { + "epoch": 0.7825927108037024, + "grad_norm": 261.75164794921875, + "learning_rate": 1.4666882332765747e-06, + "loss": 28.4234, + "step": 387410 + }, + { + "epoch": 0.7826129114363862, + "grad_norm": 257.6233825683594, + "learning_rate": 1.4664412600202056e-06, + "loss": 7.0353, + "step": 387420 + }, + { + "epoch": 0.78263311206907, + "grad_norm": 204.0417938232422, + "learning_rate": 1.4661943039859716e-06, + "loss": 9.2934, + "step": 387430 + }, + { + "epoch": 0.7826533127017539, + "grad_norm": 187.93104553222656, + "learning_rate": 1.4659473651750777e-06, + "loss": 15.1497, + "step": 387440 + }, + { + "epoch": 0.7826735133344377, + "grad_norm": 489.39453125, + "learning_rate": 1.4657004435887296e-06, + "loss": 18.4237, + "step": 387450 + }, + { + "epoch": 0.7826937139671214, + "grad_norm": 254.7746124267578, + "learning_rate": 1.4654535392281287e-06, + "loss": 18.6789, + "step": 387460 + }, + { + "epoch": 0.7827139145998052, + "grad_norm": 470.4129943847656, + "learning_rate": 1.4652066520944774e-06, + "loss": 26.8949, + "step": 387470 + }, + { + "epoch": 0.782734115232489, + "grad_norm": 369.17156982421875, + "learning_rate": 1.4649597821889817e-06, + "loss": 16.8451, + "step": 387480 + }, + { + "epoch": 0.7827543158651729, + "grad_norm": 56.73841094970703, + "learning_rate": 1.4647129295128426e-06, + "loss": 23.2344, + "step": 387490 + }, + { + "epoch": 0.7827745164978567, + "grad_norm": 490.67572021484375, + "learning_rate": 1.4644660940672628e-06, + "loss": 22.2702, + "step": 387500 + }, + { + "epoch": 0.7827947171305405, + "grad_norm": 12.69742202758789, + "learning_rate": 1.4642192758534463e-06, + "loss": 8.2552, + "step": 387510 + }, + { + "epoch": 0.7828149177632243, + "grad_norm": 294.6560363769531, + "learning_rate": 1.463972474872598e-06, + "loss": 16.5292, + "step": 387520 + }, + { + "epoch": 0.7828351183959081, + "grad_norm": 278.0723571777344, + "learning_rate": 1.463725691125919e-06, + "loss": 30.3822, + "step": 387530 + }, + { + "epoch": 0.782855319028592, + "grad_norm": 716.5742797851562, + "learning_rate": 1.4634789246146103e-06, + "loss": 25.5102, + "step": 387540 + }, + { + "epoch": 0.7828755196612758, + "grad_norm": 356.6044616699219, + "learning_rate": 1.463232175339878e-06, + "loss": 14.8314, + "step": 387550 + }, + { + "epoch": 0.7828957202939596, + "grad_norm": 451.978271484375, + "learning_rate": 1.4629854433029234e-06, + "loss": 30.8221, + "step": 387560 + }, + { + "epoch": 0.7829159209266434, + "grad_norm": 394.9523620605469, + "learning_rate": 1.4627387285049465e-06, + "loss": 12.3614, + "step": 387570 + }, + { + "epoch": 0.7829361215593272, + "grad_norm": 239.18336486816406, + "learning_rate": 1.462492030947153e-06, + "loss": 15.4607, + "step": 387580 + }, + { + "epoch": 0.7829563221920111, + "grad_norm": 403.53228759765625, + "learning_rate": 1.462245350630745e-06, + "loss": 10.6075, + "step": 387590 + }, + { + "epoch": 0.7829765228246949, + "grad_norm": 783.1351318359375, + "learning_rate": 1.4619986875569247e-06, + "loss": 21.1282, + "step": 387600 + }, + { + "epoch": 0.7829967234573787, + "grad_norm": 482.3123474121094, + "learning_rate": 1.4617520417268916e-06, + "loss": 12.1442, + "step": 387610 + }, + { + "epoch": 0.7830169240900625, + "grad_norm": 422.8743591308594, + "learning_rate": 1.4615054131418521e-06, + "loss": 10.1425, + "step": 387620 + }, + { + "epoch": 0.7830371247227463, + "grad_norm": 251.38690185546875, + "learning_rate": 1.4612588018030055e-06, + "loss": 18.1368, + "step": 387630 + }, + { + "epoch": 0.7830573253554302, + "grad_norm": 333.3215637207031, + "learning_rate": 1.461012207711553e-06, + "loss": 29.8625, + "step": 387640 + }, + { + "epoch": 0.783077525988114, + "grad_norm": 687.0955810546875, + "learning_rate": 1.460765630868699e-06, + "loss": 25.649, + "step": 387650 + }, + { + "epoch": 0.7830977266207978, + "grad_norm": 12.569586753845215, + "learning_rate": 1.4605190712756428e-06, + "loss": 12.0924, + "step": 387660 + }, + { + "epoch": 0.7831179272534816, + "grad_norm": 559.3131103515625, + "learning_rate": 1.460272528933589e-06, + "loss": 28.3499, + "step": 387670 + }, + { + "epoch": 0.7831381278861654, + "grad_norm": 230.3157196044922, + "learning_rate": 1.4600260038437376e-06, + "loss": 17.6607, + "step": 387680 + }, + { + "epoch": 0.7831583285188493, + "grad_norm": 731.9522705078125, + "learning_rate": 1.459779496007288e-06, + "loss": 16.4776, + "step": 387690 + }, + { + "epoch": 0.7831785291515331, + "grad_norm": 290.20904541015625, + "learning_rate": 1.459533005425446e-06, + "loss": 11.4024, + "step": 387700 + }, + { + "epoch": 0.7831987297842169, + "grad_norm": 209.58937072753906, + "learning_rate": 1.4592865320994103e-06, + "loss": 17.2254, + "step": 387710 + }, + { + "epoch": 0.7832189304169006, + "grad_norm": 534.1865844726562, + "learning_rate": 1.4590400760303814e-06, + "loss": 16.533, + "step": 387720 + }, + { + "epoch": 0.7832391310495844, + "grad_norm": 245.14344787597656, + "learning_rate": 1.4587936372195611e-06, + "loss": 19.2951, + "step": 387730 + }, + { + "epoch": 0.7832593316822682, + "grad_norm": 13.814839363098145, + "learning_rate": 1.4585472156681535e-06, + "loss": 10.595, + "step": 387740 + }, + { + "epoch": 0.7832795323149521, + "grad_norm": 10.924476623535156, + "learning_rate": 1.4583008113773567e-06, + "loss": 17.6907, + "step": 387750 + }, + { + "epoch": 0.7832997329476359, + "grad_norm": 408.18524169921875, + "learning_rate": 1.4580544243483708e-06, + "loss": 17.6132, + "step": 387760 + }, + { + "epoch": 0.7833199335803197, + "grad_norm": 0.14816100895404816, + "learning_rate": 1.4578080545823991e-06, + "loss": 14.5498, + "step": 387770 + }, + { + "epoch": 0.7833401342130035, + "grad_norm": 450.8954162597656, + "learning_rate": 1.457561702080642e-06, + "loss": 13.728, + "step": 387780 + }, + { + "epoch": 0.7833603348456873, + "grad_norm": 244.66517639160156, + "learning_rate": 1.457315366844298e-06, + "loss": 12.0544, + "step": 387790 + }, + { + "epoch": 0.7833805354783712, + "grad_norm": 418.8738708496094, + "learning_rate": 1.4570690488745687e-06, + "loss": 18.9703, + "step": 387800 + }, + { + "epoch": 0.783400736111055, + "grad_norm": 631.1736450195312, + "learning_rate": 1.4568227481726589e-06, + "loss": 10.4646, + "step": 387810 + }, + { + "epoch": 0.7834209367437388, + "grad_norm": 506.9795227050781, + "learning_rate": 1.4565764647397612e-06, + "loss": 11.6902, + "step": 387820 + }, + { + "epoch": 0.7834411373764226, + "grad_norm": 116.03436279296875, + "learning_rate": 1.4563301985770812e-06, + "loss": 17.3234, + "step": 387830 + }, + { + "epoch": 0.7834613380091064, + "grad_norm": 295.0504455566406, + "learning_rate": 1.4560839496858187e-06, + "loss": 13.5444, + "step": 387840 + }, + { + "epoch": 0.7834815386417903, + "grad_norm": 575.4901123046875, + "learning_rate": 1.4558377180671734e-06, + "loss": 18.5078, + "step": 387850 + }, + { + "epoch": 0.7835017392744741, + "grad_norm": 170.54981994628906, + "learning_rate": 1.4555915037223438e-06, + "loss": 16.1026, + "step": 387860 + }, + { + "epoch": 0.7835219399071579, + "grad_norm": 1.215683937072754, + "learning_rate": 1.455345306652533e-06, + "loss": 11.0265, + "step": 387870 + }, + { + "epoch": 0.7835421405398417, + "grad_norm": 47.566925048828125, + "learning_rate": 1.4550991268589393e-06, + "loss": 36.6043, + "step": 387880 + }, + { + "epoch": 0.7835623411725255, + "grad_norm": 127.637939453125, + "learning_rate": 1.4548529643427607e-06, + "loss": 15.5971, + "step": 387890 + }, + { + "epoch": 0.7835825418052094, + "grad_norm": 454.849853515625, + "learning_rate": 1.4546068191051988e-06, + "loss": 15.9606, + "step": 387900 + }, + { + "epoch": 0.7836027424378932, + "grad_norm": 167.77975463867188, + "learning_rate": 1.4543606911474545e-06, + "loss": 19.1838, + "step": 387910 + }, + { + "epoch": 0.783622943070577, + "grad_norm": 528.1384887695312, + "learning_rate": 1.4541145804707268e-06, + "loss": 27.7355, + "step": 387920 + }, + { + "epoch": 0.7836431437032608, + "grad_norm": 602.1045532226562, + "learning_rate": 1.4538684870762127e-06, + "loss": 18.1397, + "step": 387930 + }, + { + "epoch": 0.7836633443359446, + "grad_norm": 252.25518798828125, + "learning_rate": 1.4536224109651148e-06, + "loss": 13.3719, + "step": 387940 + }, + { + "epoch": 0.7836835449686285, + "grad_norm": 66.79181671142578, + "learning_rate": 1.4533763521386319e-06, + "loss": 21.1371, + "step": 387950 + }, + { + "epoch": 0.7837037456013123, + "grad_norm": 61.031314849853516, + "learning_rate": 1.4531303105979605e-06, + "loss": 6.9965, + "step": 387960 + }, + { + "epoch": 0.783723946233996, + "grad_norm": 180.56610107421875, + "learning_rate": 1.4528842863443033e-06, + "loss": 14.9373, + "step": 387970 + }, + { + "epoch": 0.7837441468666798, + "grad_norm": 278.0017395019531, + "learning_rate": 1.4526382793788564e-06, + "loss": 14.2005, + "step": 387980 + }, + { + "epoch": 0.7837643474993636, + "grad_norm": 169.85379028320312, + "learning_rate": 1.4523922897028215e-06, + "loss": 18.7049, + "step": 387990 + }, + { + "epoch": 0.7837845481320475, + "grad_norm": 412.85125732421875, + "learning_rate": 1.4521463173173966e-06, + "loss": 13.6385, + "step": 388000 + }, + { + "epoch": 0.7838047487647313, + "grad_norm": 386.0448913574219, + "learning_rate": 1.4519003622237788e-06, + "loss": 13.6984, + "step": 388010 + }, + { + "epoch": 0.7838249493974151, + "grad_norm": 409.82012939453125, + "learning_rate": 1.4516544244231695e-06, + "loss": 18.8317, + "step": 388020 + }, + { + "epoch": 0.7838451500300989, + "grad_norm": 405.0011291503906, + "learning_rate": 1.4514085039167652e-06, + "loss": 5.4395, + "step": 388030 + }, + { + "epoch": 0.7838653506627827, + "grad_norm": 489.4347839355469, + "learning_rate": 1.4511626007057667e-06, + "loss": 11.3004, + "step": 388040 + }, + { + "epoch": 0.7838855512954666, + "grad_norm": 592.531982421875, + "learning_rate": 1.4509167147913693e-06, + "loss": 16.8958, + "step": 388050 + }, + { + "epoch": 0.7839057519281504, + "grad_norm": 428.1918029785156, + "learning_rate": 1.4506708461747754e-06, + "loss": 18.5203, + "step": 388060 + }, + { + "epoch": 0.7839259525608342, + "grad_norm": 538.6543579101562, + "learning_rate": 1.4504249948571814e-06, + "loss": 24.9611, + "step": 388070 + }, + { + "epoch": 0.783946153193518, + "grad_norm": 274.9072570800781, + "learning_rate": 1.4501791608397835e-06, + "loss": 28.5931, + "step": 388080 + }, + { + "epoch": 0.7839663538262018, + "grad_norm": 420.49224853515625, + "learning_rate": 1.449933344123784e-06, + "loss": 16.1609, + "step": 388090 + }, + { + "epoch": 0.7839865544588857, + "grad_norm": 643.5992431640625, + "learning_rate": 1.4496875447103781e-06, + "loss": 25.3927, + "step": 388100 + }, + { + "epoch": 0.7840067550915695, + "grad_norm": 307.52093505859375, + "learning_rate": 1.4494417626007633e-06, + "loss": 17.6272, + "step": 388110 + }, + { + "epoch": 0.7840269557242533, + "grad_norm": 335.6693115234375, + "learning_rate": 1.449195997796139e-06, + "loss": 12.8733, + "step": 388120 + }, + { + "epoch": 0.7840471563569371, + "grad_norm": 224.05914306640625, + "learning_rate": 1.4489502502977037e-06, + "loss": 12.8672, + "step": 388130 + }, + { + "epoch": 0.7840673569896209, + "grad_norm": 314.70001220703125, + "learning_rate": 1.4487045201066547e-06, + "loss": 19.8743, + "step": 388140 + }, + { + "epoch": 0.7840875576223048, + "grad_norm": 1050.0281982421875, + "learning_rate": 1.4484588072241873e-06, + "loss": 23.1158, + "step": 388150 + }, + { + "epoch": 0.7841077582549886, + "grad_norm": 0.7938860058784485, + "learning_rate": 1.4482131116515026e-06, + "loss": 10.3835, + "step": 388160 + }, + { + "epoch": 0.7841279588876724, + "grad_norm": 558.4970092773438, + "learning_rate": 1.4479674333897964e-06, + "loss": 15.0136, + "step": 388170 + }, + { + "epoch": 0.7841481595203562, + "grad_norm": 923.7531127929688, + "learning_rate": 1.4477217724402643e-06, + "loss": 17.374, + "step": 388180 + }, + { + "epoch": 0.78416836015304, + "grad_norm": 273.45819091796875, + "learning_rate": 1.4474761288041057e-06, + "loss": 13.9706, + "step": 388190 + }, + { + "epoch": 0.7841885607857239, + "grad_norm": 1157.492919921875, + "learning_rate": 1.4472305024825189e-06, + "loss": 25.9856, + "step": 388200 + }, + { + "epoch": 0.7842087614184077, + "grad_norm": 218.44546508789062, + "learning_rate": 1.4469848934767e-06, + "loss": 10.8597, + "step": 388210 + }, + { + "epoch": 0.7842289620510915, + "grad_norm": 193.62757873535156, + "learning_rate": 1.4467393017878444e-06, + "loss": 23.0175, + "step": 388220 + }, + { + "epoch": 0.7842491626837752, + "grad_norm": 452.9956970214844, + "learning_rate": 1.446493727417152e-06, + "loss": 27.1394, + "step": 388230 + }, + { + "epoch": 0.784269363316459, + "grad_norm": 159.25376892089844, + "learning_rate": 1.4462481703658177e-06, + "loss": 8.6344, + "step": 388240 + }, + { + "epoch": 0.7842895639491428, + "grad_norm": 1031.0223388671875, + "learning_rate": 1.4460026306350378e-06, + "loss": 13.0682, + "step": 388250 + }, + { + "epoch": 0.7843097645818267, + "grad_norm": 596.4764404296875, + "learning_rate": 1.4457571082260113e-06, + "loss": 28.9894, + "step": 388260 + }, + { + "epoch": 0.7843299652145105, + "grad_norm": 267.9975280761719, + "learning_rate": 1.445511603139932e-06, + "loss": 12.5665, + "step": 388270 + }, + { + "epoch": 0.7843501658471943, + "grad_norm": 271.784423828125, + "learning_rate": 1.4452661153779996e-06, + "loss": 23.4446, + "step": 388280 + }, + { + "epoch": 0.7843703664798781, + "grad_norm": 432.1602783203125, + "learning_rate": 1.445020644941409e-06, + "loss": 21.4996, + "step": 388290 + }, + { + "epoch": 0.7843905671125619, + "grad_norm": 376.19134521484375, + "learning_rate": 1.4447751918313552e-06, + "loss": 14.6372, + "step": 388300 + }, + { + "epoch": 0.7844107677452458, + "grad_norm": 1015.6385498046875, + "learning_rate": 1.4445297560490373e-06, + "loss": 32.7653, + "step": 388310 + }, + { + "epoch": 0.7844309683779296, + "grad_norm": 476.18585205078125, + "learning_rate": 1.4442843375956506e-06, + "loss": 25.0618, + "step": 388320 + }, + { + "epoch": 0.7844511690106134, + "grad_norm": 355.6081237792969, + "learning_rate": 1.4440389364723889e-06, + "loss": 20.9087, + "step": 388330 + }, + { + "epoch": 0.7844713696432972, + "grad_norm": 290.85345458984375, + "learning_rate": 1.4437935526804497e-06, + "loss": 7.4059, + "step": 388340 + }, + { + "epoch": 0.784491570275981, + "grad_norm": 289.3037109375, + "learning_rate": 1.4435481862210315e-06, + "loss": 16.1818, + "step": 388350 + }, + { + "epoch": 0.7845117709086649, + "grad_norm": 163.52731323242188, + "learning_rate": 1.4433028370953279e-06, + "loss": 21.5489, + "step": 388360 + }, + { + "epoch": 0.7845319715413487, + "grad_norm": 447.26934814453125, + "learning_rate": 1.4430575053045337e-06, + "loss": 17.4456, + "step": 388370 + }, + { + "epoch": 0.7845521721740325, + "grad_norm": 31.244953155517578, + "learning_rate": 1.4428121908498472e-06, + "loss": 23.6349, + "step": 388380 + }, + { + "epoch": 0.7845723728067163, + "grad_norm": 456.913818359375, + "learning_rate": 1.4425668937324623e-06, + "loss": 14.1966, + "step": 388390 + }, + { + "epoch": 0.7845925734394001, + "grad_norm": 29.88794708251953, + "learning_rate": 1.4423216139535735e-06, + "loss": 22.1706, + "step": 388400 + }, + { + "epoch": 0.784612774072084, + "grad_norm": 295.7020568847656, + "learning_rate": 1.4420763515143777e-06, + "loss": 26.525, + "step": 388410 + }, + { + "epoch": 0.7846329747047678, + "grad_norm": 209.07150268554688, + "learning_rate": 1.4418311064160735e-06, + "loss": 16.2925, + "step": 388420 + }, + { + "epoch": 0.7846531753374516, + "grad_norm": 4.983144283294678, + "learning_rate": 1.4415858786598496e-06, + "loss": 14.472, + "step": 388430 + }, + { + "epoch": 0.7846733759701354, + "grad_norm": 388.2928466796875, + "learning_rate": 1.4413406682469044e-06, + "loss": 21.0465, + "step": 388440 + }, + { + "epoch": 0.7846935766028192, + "grad_norm": 700.1123046875, + "learning_rate": 1.4410954751784352e-06, + "loss": 26.1824, + "step": 388450 + }, + { + "epoch": 0.7847137772355031, + "grad_norm": 828.24560546875, + "learning_rate": 1.440850299455635e-06, + "loss": 29.1401, + "step": 388460 + }, + { + "epoch": 0.7847339778681869, + "grad_norm": 288.4026184082031, + "learning_rate": 1.4406051410796968e-06, + "loss": 22.7773, + "step": 388470 + }, + { + "epoch": 0.7847541785008707, + "grad_norm": 391.3822937011719, + "learning_rate": 1.4403600000518191e-06, + "loss": 13.4979, + "step": 388480 + }, + { + "epoch": 0.7847743791335544, + "grad_norm": 538.2525634765625, + "learning_rate": 1.4401148763731953e-06, + "loss": 12.9334, + "step": 388490 + }, + { + "epoch": 0.7847945797662382, + "grad_norm": 771.3076171875, + "learning_rate": 1.4398697700450181e-06, + "loss": 22.9988, + "step": 388500 + }, + { + "epoch": 0.784814780398922, + "grad_norm": 209.12530517578125, + "learning_rate": 1.4396246810684839e-06, + "loss": 16.0794, + "step": 388510 + }, + { + "epoch": 0.7848349810316059, + "grad_norm": 431.53363037109375, + "learning_rate": 1.4393796094447886e-06, + "loss": 22.1997, + "step": 388520 + }, + { + "epoch": 0.7848551816642897, + "grad_norm": 57.958675384521484, + "learning_rate": 1.4391345551751251e-06, + "loss": 17.6947, + "step": 388530 + }, + { + "epoch": 0.7848753822969735, + "grad_norm": 350.1054382324219, + "learning_rate": 1.4388895182606867e-06, + "loss": 10.1518, + "step": 388540 + }, + { + "epoch": 0.7848955829296573, + "grad_norm": 901.931884765625, + "learning_rate": 1.4386444987026705e-06, + "loss": 20.1345, + "step": 388550 + }, + { + "epoch": 0.7849157835623412, + "grad_norm": 329.38287353515625, + "learning_rate": 1.4383994965022684e-06, + "loss": 20.9889, + "step": 388560 + }, + { + "epoch": 0.784935984195025, + "grad_norm": 501.769775390625, + "learning_rate": 1.4381545116606744e-06, + "loss": 22.558, + "step": 388570 + }, + { + "epoch": 0.7849561848277088, + "grad_norm": 602.0369262695312, + "learning_rate": 1.4379095441790847e-06, + "loss": 28.9831, + "step": 388580 + }, + { + "epoch": 0.7849763854603926, + "grad_norm": 155.15145874023438, + "learning_rate": 1.4376645940586898e-06, + "loss": 21.0178, + "step": 388590 + }, + { + "epoch": 0.7849965860930764, + "grad_norm": 688.16650390625, + "learning_rate": 1.4374196613006874e-06, + "loss": 24.7662, + "step": 388600 + }, + { + "epoch": 0.7850167867257603, + "grad_norm": 244.4649200439453, + "learning_rate": 1.4371747459062695e-06, + "loss": 11.4804, + "step": 388610 + }, + { + "epoch": 0.7850369873584441, + "grad_norm": 156.22230529785156, + "learning_rate": 1.4369298478766286e-06, + "loss": 21.9342, + "step": 388620 + }, + { + "epoch": 0.7850571879911279, + "grad_norm": 138.66293334960938, + "learning_rate": 1.4366849672129607e-06, + "loss": 18.6234, + "step": 388630 + }, + { + "epoch": 0.7850773886238117, + "grad_norm": 18.091754913330078, + "learning_rate": 1.4364401039164566e-06, + "loss": 14.6982, + "step": 388640 + }, + { + "epoch": 0.7850975892564955, + "grad_norm": 1073.05517578125, + "learning_rate": 1.4361952579883127e-06, + "loss": 20.9805, + "step": 388650 + }, + { + "epoch": 0.7851177898891794, + "grad_norm": 239.39566040039062, + "learning_rate": 1.4359504294297195e-06, + "loss": 16.8774, + "step": 388660 + }, + { + "epoch": 0.7851379905218632, + "grad_norm": 326.1767272949219, + "learning_rate": 1.4357056182418727e-06, + "loss": 26.5385, + "step": 388670 + }, + { + "epoch": 0.785158191154547, + "grad_norm": 209.94781494140625, + "learning_rate": 1.4354608244259649e-06, + "loss": 19.8899, + "step": 388680 + }, + { + "epoch": 0.7851783917872308, + "grad_norm": 230.2615966796875, + "learning_rate": 1.4352160479831873e-06, + "loss": 10.7179, + "step": 388690 + }, + { + "epoch": 0.7851985924199146, + "grad_norm": 298.5816650390625, + "learning_rate": 1.4349712889147355e-06, + "loss": 12.5192, + "step": 388700 + }, + { + "epoch": 0.7852187930525985, + "grad_norm": 267.53173828125, + "learning_rate": 1.4347265472218014e-06, + "loss": 19.6131, + "step": 388710 + }, + { + "epoch": 0.7852389936852823, + "grad_norm": 4.332077503204346, + "learning_rate": 1.4344818229055762e-06, + "loss": 13.5306, + "step": 388720 + }, + { + "epoch": 0.7852591943179661, + "grad_norm": 543.1563720703125, + "learning_rate": 1.434237115967254e-06, + "loss": 23.7846, + "step": 388730 + }, + { + "epoch": 0.7852793949506498, + "grad_norm": 250.99136352539062, + "learning_rate": 1.4339924264080308e-06, + "loss": 15.9756, + "step": 388740 + }, + { + "epoch": 0.7852995955833336, + "grad_norm": 584.9282836914062, + "learning_rate": 1.433747754229093e-06, + "loss": 24.7947, + "step": 388750 + }, + { + "epoch": 0.7853197962160174, + "grad_norm": 351.9540100097656, + "learning_rate": 1.4335030994316357e-06, + "loss": 15.2832, + "step": 388760 + }, + { + "epoch": 0.7853399968487013, + "grad_norm": 246.7580108642578, + "learning_rate": 1.4332584620168538e-06, + "loss": 19.1401, + "step": 388770 + }, + { + "epoch": 0.7853601974813851, + "grad_norm": 352.8351135253906, + "learning_rate": 1.4330138419859375e-06, + "loss": 32.0533, + "step": 388780 + }, + { + "epoch": 0.7853803981140689, + "grad_norm": 56.823421478271484, + "learning_rate": 1.4327692393400771e-06, + "loss": 12.9277, + "step": 388790 + }, + { + "epoch": 0.7854005987467527, + "grad_norm": 307.0126037597656, + "learning_rate": 1.4325246540804672e-06, + "loss": 22.0669, + "step": 388800 + }, + { + "epoch": 0.7854207993794365, + "grad_norm": 1.0302592515945435, + "learning_rate": 1.4322800862083009e-06, + "loss": 7.5862, + "step": 388810 + }, + { + "epoch": 0.7854410000121204, + "grad_norm": 655.0123291015625, + "learning_rate": 1.4320355357247689e-06, + "loss": 25.2072, + "step": 388820 + }, + { + "epoch": 0.7854612006448042, + "grad_norm": 152.23594665527344, + "learning_rate": 1.4317910026310611e-06, + "loss": 20.326, + "step": 388830 + }, + { + "epoch": 0.785481401277488, + "grad_norm": 1402.765869140625, + "learning_rate": 1.431546486928373e-06, + "loss": 27.2022, + "step": 388840 + }, + { + "epoch": 0.7855016019101718, + "grad_norm": 476.7830810546875, + "learning_rate": 1.4313019886178942e-06, + "loss": 16.5796, + "step": 388850 + }, + { + "epoch": 0.7855218025428556, + "grad_norm": 425.1496887207031, + "learning_rate": 1.4310575077008154e-06, + "loss": 18.6398, + "step": 388860 + }, + { + "epoch": 0.7855420031755395, + "grad_norm": 285.9603576660156, + "learning_rate": 1.4308130441783307e-06, + "loss": 15.7991, + "step": 388870 + }, + { + "epoch": 0.7855622038082233, + "grad_norm": 381.857666015625, + "learning_rate": 1.4305685980516293e-06, + "loss": 18.2687, + "step": 388880 + }, + { + "epoch": 0.7855824044409071, + "grad_norm": 216.32675170898438, + "learning_rate": 1.430324169321905e-06, + "loss": 6.9295, + "step": 388890 + }, + { + "epoch": 0.7856026050735909, + "grad_norm": 1054.424560546875, + "learning_rate": 1.4300797579903476e-06, + "loss": 33.8434, + "step": 388900 + }, + { + "epoch": 0.7856228057062747, + "grad_norm": 188.59909057617188, + "learning_rate": 1.429835364058147e-06, + "loss": 24.688, + "step": 388910 + }, + { + "epoch": 0.7856430063389586, + "grad_norm": 635.3093872070312, + "learning_rate": 1.4295909875264973e-06, + "loss": 15.7614, + "step": 388920 + }, + { + "epoch": 0.7856632069716424, + "grad_norm": 524.1882934570312, + "learning_rate": 1.4293466283965878e-06, + "loss": 15.9969, + "step": 388930 + }, + { + "epoch": 0.7856834076043262, + "grad_norm": 350.4474182128906, + "learning_rate": 1.4291022866696086e-06, + "loss": 30.6318, + "step": 388940 + }, + { + "epoch": 0.78570360823701, + "grad_norm": 59.84031295776367, + "learning_rate": 1.428857962346752e-06, + "loss": 30.0063, + "step": 388950 + }, + { + "epoch": 0.7857238088696938, + "grad_norm": 250.73403930664062, + "learning_rate": 1.4286136554292096e-06, + "loss": 14.1806, + "step": 388960 + }, + { + "epoch": 0.7857440095023777, + "grad_norm": 203.14076232910156, + "learning_rate": 1.4283693659181713e-06, + "loss": 23.3837, + "step": 388970 + }, + { + "epoch": 0.7857642101350615, + "grad_norm": 313.9321594238281, + "learning_rate": 1.4281250938148262e-06, + "loss": 13.177, + "step": 388980 + }, + { + "epoch": 0.7857844107677453, + "grad_norm": 332.5777282714844, + "learning_rate": 1.4278808391203674e-06, + "loss": 11.7201, + "step": 388990 + }, + { + "epoch": 0.785804611400429, + "grad_norm": 412.3974914550781, + "learning_rate": 1.4276366018359845e-06, + "loss": 14.7089, + "step": 389000 + }, + { + "epoch": 0.7858248120331128, + "grad_norm": 235.46893310546875, + "learning_rate": 1.4273923819628654e-06, + "loss": 19.7594, + "step": 389010 + }, + { + "epoch": 0.7858450126657966, + "grad_norm": 308.99261474609375, + "learning_rate": 1.427148179502203e-06, + "loss": 26.9167, + "step": 389020 + }, + { + "epoch": 0.7858652132984805, + "grad_norm": 945.6626586914062, + "learning_rate": 1.42690399445519e-06, + "loss": 23.02, + "step": 389030 + }, + { + "epoch": 0.7858854139311643, + "grad_norm": 462.1242370605469, + "learning_rate": 1.4266598268230102e-06, + "loss": 19.9528, + "step": 389040 + }, + { + "epoch": 0.7859056145638481, + "grad_norm": 0.0, + "learning_rate": 1.4264156766068577e-06, + "loss": 21.8942, + "step": 389050 + }, + { + "epoch": 0.7859258151965319, + "grad_norm": 315.1378173828125, + "learning_rate": 1.4261715438079227e-06, + "loss": 27.4553, + "step": 389060 + }, + { + "epoch": 0.7859460158292157, + "grad_norm": 374.4578857421875, + "learning_rate": 1.4259274284273943e-06, + "loss": 18.1581, + "step": 389070 + }, + { + "epoch": 0.7859662164618996, + "grad_norm": 423.93682861328125, + "learning_rate": 1.4256833304664609e-06, + "loss": 20.6491, + "step": 389080 + }, + { + "epoch": 0.7859864170945834, + "grad_norm": 336.80517578125, + "learning_rate": 1.425439249926313e-06, + "loss": 17.3852, + "step": 389090 + }, + { + "epoch": 0.7860066177272672, + "grad_norm": 365.2979431152344, + "learning_rate": 1.4251951868081438e-06, + "loss": 21.3594, + "step": 389100 + }, + { + "epoch": 0.786026818359951, + "grad_norm": 153.6697540283203, + "learning_rate": 1.4249511411131367e-06, + "loss": 19.2336, + "step": 389110 + }, + { + "epoch": 0.7860470189926348, + "grad_norm": 480.56158447265625, + "learning_rate": 1.4247071128424838e-06, + "loss": 16.8388, + "step": 389120 + }, + { + "epoch": 0.7860672196253187, + "grad_norm": 406.1510925292969, + "learning_rate": 1.424463101997377e-06, + "loss": 13.4169, + "step": 389130 + }, + { + "epoch": 0.7860874202580025, + "grad_norm": 257.09716796875, + "learning_rate": 1.424219108579003e-06, + "loss": 15.5408, + "step": 389140 + }, + { + "epoch": 0.7861076208906863, + "grad_norm": 307.8927001953125, + "learning_rate": 1.4239751325885499e-06, + "loss": 15.6521, + "step": 389150 + }, + { + "epoch": 0.7861278215233701, + "grad_norm": 470.115234375, + "learning_rate": 1.4237311740272097e-06, + "loss": 18.4215, + "step": 389160 + }, + { + "epoch": 0.786148022156054, + "grad_norm": 0.0, + "learning_rate": 1.4234872328961702e-06, + "loss": 13.725, + "step": 389170 + }, + { + "epoch": 0.7861682227887378, + "grad_norm": 764.4478759765625, + "learning_rate": 1.4232433091966187e-06, + "loss": 30.2444, + "step": 389180 + }, + { + "epoch": 0.7861884234214216, + "grad_norm": 119.15901947021484, + "learning_rate": 1.4229994029297467e-06, + "loss": 28.2807, + "step": 389190 + }, + { + "epoch": 0.7862086240541054, + "grad_norm": 978.8793334960938, + "learning_rate": 1.4227555140967402e-06, + "loss": 16.5327, + "step": 389200 + }, + { + "epoch": 0.7862288246867892, + "grad_norm": 932.5587158203125, + "learning_rate": 1.4225116426987916e-06, + "loss": 25.7453, + "step": 389210 + }, + { + "epoch": 0.786249025319473, + "grad_norm": 313.8899230957031, + "learning_rate": 1.4222677887370868e-06, + "loss": 6.8717, + "step": 389220 + }, + { + "epoch": 0.7862692259521569, + "grad_norm": 162.51959228515625, + "learning_rate": 1.4220239522128138e-06, + "loss": 22.4034, + "step": 389230 + }, + { + "epoch": 0.7862894265848407, + "grad_norm": 169.2135467529297, + "learning_rate": 1.421780133127163e-06, + "loss": 9.9378, + "step": 389240 + }, + { + "epoch": 0.7863096272175244, + "grad_norm": 394.2033386230469, + "learning_rate": 1.4215363314813208e-06, + "loss": 17.0908, + "step": 389250 + }, + { + "epoch": 0.7863298278502082, + "grad_norm": 191.8592987060547, + "learning_rate": 1.4212925472764777e-06, + "loss": 18.1082, + "step": 389260 + }, + { + "epoch": 0.786350028482892, + "grad_norm": 8.816691398620605, + "learning_rate": 1.4210487805138195e-06, + "loss": 29.5246, + "step": 389270 + }, + { + "epoch": 0.7863702291155759, + "grad_norm": 433.8766784667969, + "learning_rate": 1.4208050311945365e-06, + "loss": 15.919, + "step": 389280 + }, + { + "epoch": 0.7863904297482597, + "grad_norm": 452.135498046875, + "learning_rate": 1.4205612993198165e-06, + "loss": 25.2342, + "step": 389290 + }, + { + "epoch": 0.7864106303809435, + "grad_norm": 154.0487060546875, + "learning_rate": 1.420317584890844e-06, + "loss": 19.6334, + "step": 389300 + }, + { + "epoch": 0.7864308310136273, + "grad_norm": 342.992431640625, + "learning_rate": 1.4200738879088117e-06, + "loss": 17.9108, + "step": 389310 + }, + { + "epoch": 0.7864510316463111, + "grad_norm": 345.0018310546875, + "learning_rate": 1.4198302083749049e-06, + "loss": 11.1524, + "step": 389320 + }, + { + "epoch": 0.786471232278995, + "grad_norm": 84.22262573242188, + "learning_rate": 1.4195865462903102e-06, + "loss": 16.6948, + "step": 389330 + }, + { + "epoch": 0.7864914329116788, + "grad_norm": 710.9985961914062, + "learning_rate": 1.4193429016562161e-06, + "loss": 13.8726, + "step": 389340 + }, + { + "epoch": 0.7865116335443626, + "grad_norm": 524.85498046875, + "learning_rate": 1.4190992744738135e-06, + "loss": 15.7337, + "step": 389350 + }, + { + "epoch": 0.7865318341770464, + "grad_norm": 325.3512268066406, + "learning_rate": 1.4188556647442836e-06, + "loss": 20.1727, + "step": 389360 + }, + { + "epoch": 0.7865520348097302, + "grad_norm": 567.08447265625, + "learning_rate": 1.4186120724688169e-06, + "loss": 18.4844, + "step": 389370 + }, + { + "epoch": 0.7865722354424141, + "grad_norm": 511.7174072265625, + "learning_rate": 1.4183684976486024e-06, + "loss": 12.9145, + "step": 389380 + }, + { + "epoch": 0.7865924360750979, + "grad_norm": 251.32025146484375, + "learning_rate": 1.4181249402848246e-06, + "loss": 24.8652, + "step": 389390 + }, + { + "epoch": 0.7866126367077817, + "grad_norm": 790.8467407226562, + "learning_rate": 1.4178814003786706e-06, + "loss": 17.5066, + "step": 389400 + }, + { + "epoch": 0.7866328373404655, + "grad_norm": 364.34979248046875, + "learning_rate": 1.4176378779313282e-06, + "loss": 21.0615, + "step": 389410 + }, + { + "epoch": 0.7866530379731493, + "grad_norm": 272.425048828125, + "learning_rate": 1.417394372943987e-06, + "loss": 11.4304, + "step": 389420 + }, + { + "epoch": 0.7866732386058332, + "grad_norm": 243.82444763183594, + "learning_rate": 1.4171508854178284e-06, + "loss": 13.9975, + "step": 389430 + }, + { + "epoch": 0.786693439238517, + "grad_norm": 302.4471740722656, + "learning_rate": 1.4169074153540418e-06, + "loss": 13.661, + "step": 389440 + }, + { + "epoch": 0.7867136398712008, + "grad_norm": 303.0475158691406, + "learning_rate": 1.4166639627538153e-06, + "loss": 12.0609, + "step": 389450 + }, + { + "epoch": 0.7867338405038846, + "grad_norm": 272.08837890625, + "learning_rate": 1.416420527618334e-06, + "loss": 21.7047, + "step": 389460 + }, + { + "epoch": 0.7867540411365684, + "grad_norm": 264.9498596191406, + "learning_rate": 1.4161771099487832e-06, + "loss": 16.6128, + "step": 389470 + }, + { + "epoch": 0.7867742417692523, + "grad_norm": 270.763671875, + "learning_rate": 1.4159337097463515e-06, + "loss": 14.9909, + "step": 389480 + }, + { + "epoch": 0.7867944424019361, + "grad_norm": 251.77639770507812, + "learning_rate": 1.415690327012223e-06, + "loss": 22.8092, + "step": 389490 + }, + { + "epoch": 0.7868146430346199, + "grad_norm": 51.446590423583984, + "learning_rate": 1.4154469617475864e-06, + "loss": 13.1177, + "step": 389500 + }, + { + "epoch": 0.7868348436673036, + "grad_norm": 1031.5816650390625, + "learning_rate": 1.4152036139536269e-06, + "loss": 16.6685, + "step": 389510 + }, + { + "epoch": 0.7868550442999874, + "grad_norm": 116.60513305664062, + "learning_rate": 1.4149602836315285e-06, + "loss": 14.5776, + "step": 389520 + }, + { + "epoch": 0.7868752449326712, + "grad_norm": 349.8798828125, + "learning_rate": 1.4147169707824805e-06, + "loss": 21.2689, + "step": 389530 + }, + { + "epoch": 0.7868954455653551, + "grad_norm": 327.8604431152344, + "learning_rate": 1.414473675407667e-06, + "loss": 15.3557, + "step": 389540 + }, + { + "epoch": 0.7869156461980389, + "grad_norm": 491.3643798828125, + "learning_rate": 1.4142303975082723e-06, + "loss": 22.4333, + "step": 389550 + }, + { + "epoch": 0.7869358468307227, + "grad_norm": 186.81837463378906, + "learning_rate": 1.413987137085484e-06, + "loss": 12.3864, + "step": 389560 + }, + { + "epoch": 0.7869560474634065, + "grad_norm": 299.52606201171875, + "learning_rate": 1.413743894140489e-06, + "loss": 26.4502, + "step": 389570 + }, + { + "epoch": 0.7869762480960903, + "grad_norm": 260.9884338378906, + "learning_rate": 1.4135006686744711e-06, + "loss": 19.6507, + "step": 389580 + }, + { + "epoch": 0.7869964487287742, + "grad_norm": 415.31402587890625, + "learning_rate": 1.4132574606886146e-06, + "loss": 14.6892, + "step": 389590 + }, + { + "epoch": 0.787016649361458, + "grad_norm": 306.3960876464844, + "learning_rate": 1.4130142701841076e-06, + "loss": 15.7391, + "step": 389600 + }, + { + "epoch": 0.7870368499941418, + "grad_norm": 994.6866455078125, + "learning_rate": 1.4127710971621339e-06, + "loss": 30.5757, + "step": 389610 + }, + { + "epoch": 0.7870570506268256, + "grad_norm": 623.6600341796875, + "learning_rate": 1.4125279416238773e-06, + "loss": 17.5492, + "step": 389620 + }, + { + "epoch": 0.7870772512595094, + "grad_norm": 107.94436645507812, + "learning_rate": 1.412284803570525e-06, + "loss": 8.8308, + "step": 389630 + }, + { + "epoch": 0.7870974518921933, + "grad_norm": 372.19940185546875, + "learning_rate": 1.4120416830032641e-06, + "loss": 23.1538, + "step": 389640 + }, + { + "epoch": 0.7871176525248771, + "grad_norm": 226.026123046875, + "learning_rate": 1.4117985799232735e-06, + "loss": 18.6804, + "step": 389650 + }, + { + "epoch": 0.7871378531575609, + "grad_norm": 281.11907958984375, + "learning_rate": 1.4115554943317416e-06, + "loss": 21.3654, + "step": 389660 + }, + { + "epoch": 0.7871580537902447, + "grad_norm": 202.64096069335938, + "learning_rate": 1.4113124262298544e-06, + "loss": 36.7346, + "step": 389670 + }, + { + "epoch": 0.7871782544229285, + "grad_norm": 250.60726928710938, + "learning_rate": 1.4110693756187954e-06, + "loss": 21.4456, + "step": 389680 + }, + { + "epoch": 0.7871984550556124, + "grad_norm": 263.5244445800781, + "learning_rate": 1.4108263424997475e-06, + "loss": 22.3812, + "step": 389690 + }, + { + "epoch": 0.7872186556882962, + "grad_norm": 258.9317626953125, + "learning_rate": 1.4105833268738966e-06, + "loss": 9.4989, + "step": 389700 + }, + { + "epoch": 0.78723885632098, + "grad_norm": 674.4338989257812, + "learning_rate": 1.4103403287424306e-06, + "loss": 19.2276, + "step": 389710 + }, + { + "epoch": 0.7872590569536638, + "grad_norm": 111.07289123535156, + "learning_rate": 1.4100973481065266e-06, + "loss": 22.8958, + "step": 389720 + }, + { + "epoch": 0.7872792575863476, + "grad_norm": 216.6655731201172, + "learning_rate": 1.4098543849673734e-06, + "loss": 14.623, + "step": 389730 + }, + { + "epoch": 0.7872994582190315, + "grad_norm": 329.5108947753906, + "learning_rate": 1.4096114393261557e-06, + "loss": 19.5182, + "step": 389740 + }, + { + "epoch": 0.7873196588517153, + "grad_norm": 970.3660888671875, + "learning_rate": 1.4093685111840567e-06, + "loss": 14.4819, + "step": 389750 + }, + { + "epoch": 0.787339859484399, + "grad_norm": 113.45997619628906, + "learning_rate": 1.4091256005422583e-06, + "loss": 14.2308, + "step": 389760 + }, + { + "epoch": 0.7873600601170828, + "grad_norm": 589.2026977539062, + "learning_rate": 1.4088827074019479e-06, + "loss": 32.8889, + "step": 389770 + }, + { + "epoch": 0.7873802607497666, + "grad_norm": 574.441162109375, + "learning_rate": 1.4086398317643074e-06, + "loss": 29.1568, + "step": 389780 + }, + { + "epoch": 0.7874004613824505, + "grad_norm": 226.6656951904297, + "learning_rate": 1.4083969736305191e-06, + "loss": 16.7552, + "step": 389790 + }, + { + "epoch": 0.7874206620151343, + "grad_norm": 403.8798828125, + "learning_rate": 1.4081541330017706e-06, + "loss": 26.2683, + "step": 389800 + }, + { + "epoch": 0.7874408626478181, + "grad_norm": 461.10943603515625, + "learning_rate": 1.4079113098792413e-06, + "loss": 27.7547, + "step": 389810 + }, + { + "epoch": 0.7874610632805019, + "grad_norm": 98.00347900390625, + "learning_rate": 1.407668504264118e-06, + "loss": 17.0657, + "step": 389820 + }, + { + "epoch": 0.7874812639131857, + "grad_norm": 492.1479797363281, + "learning_rate": 1.4074257161575828e-06, + "loss": 23.8312, + "step": 389830 + }, + { + "epoch": 0.7875014645458696, + "grad_norm": 107.99175262451172, + "learning_rate": 1.407182945560817e-06, + "loss": 11.9698, + "step": 389840 + }, + { + "epoch": 0.7875216651785534, + "grad_norm": 247.5465087890625, + "learning_rate": 1.4069401924750082e-06, + "loss": 9.3671, + "step": 389850 + }, + { + "epoch": 0.7875418658112372, + "grad_norm": 466.1968078613281, + "learning_rate": 1.4066974569013346e-06, + "loss": 11.9463, + "step": 389860 + }, + { + "epoch": 0.787562066443921, + "grad_norm": 193.89556884765625, + "learning_rate": 1.4064547388409838e-06, + "loss": 27.172, + "step": 389870 + }, + { + "epoch": 0.7875822670766048, + "grad_norm": 759.7708740234375, + "learning_rate": 1.4062120382951355e-06, + "loss": 20.6499, + "step": 389880 + }, + { + "epoch": 0.7876024677092887, + "grad_norm": 443.9864807128906, + "learning_rate": 1.405969355264975e-06, + "loss": 9.3756, + "step": 389890 + }, + { + "epoch": 0.7876226683419725, + "grad_norm": 378.2743225097656, + "learning_rate": 1.4057266897516842e-06, + "loss": 20.7845, + "step": 389900 + }, + { + "epoch": 0.7876428689746563, + "grad_norm": 456.9198913574219, + "learning_rate": 1.4054840417564436e-06, + "loss": 8.7446, + "step": 389910 + }, + { + "epoch": 0.7876630696073401, + "grad_norm": 230.49256896972656, + "learning_rate": 1.4052414112804396e-06, + "loss": 9.2653, + "step": 389920 + }, + { + "epoch": 0.7876832702400239, + "grad_norm": 243.60069274902344, + "learning_rate": 1.404998798324853e-06, + "loss": 11.8547, + "step": 389930 + }, + { + "epoch": 0.7877034708727078, + "grad_norm": 383.02056884765625, + "learning_rate": 1.404756202890865e-06, + "loss": 16.3119, + "step": 389940 + }, + { + "epoch": 0.7877236715053916, + "grad_norm": 726.7916870117188, + "learning_rate": 1.4045136249796588e-06, + "loss": 18.4984, + "step": 389950 + }, + { + "epoch": 0.7877438721380754, + "grad_norm": 303.83380126953125, + "learning_rate": 1.4042710645924207e-06, + "loss": 8.8965, + "step": 389960 + }, + { + "epoch": 0.7877640727707592, + "grad_norm": 423.7670593261719, + "learning_rate": 1.4040285217303256e-06, + "loss": 22.0491, + "step": 389970 + }, + { + "epoch": 0.787784273403443, + "grad_norm": 294.0246887207031, + "learning_rate": 1.4037859963945598e-06, + "loss": 16.3219, + "step": 389980 + }, + { + "epoch": 0.7878044740361269, + "grad_norm": 398.1592712402344, + "learning_rate": 1.4035434885863064e-06, + "loss": 12.661, + "step": 389990 + }, + { + "epoch": 0.7878246746688107, + "grad_norm": 119.3377456665039, + "learning_rate": 1.4033009983067454e-06, + "loss": 28.423, + "step": 390000 + }, + { + "epoch": 0.7878448753014945, + "grad_norm": 807.6712036132812, + "learning_rate": 1.4030585255570577e-06, + "loss": 25.2311, + "step": 390010 + }, + { + "epoch": 0.7878650759341782, + "grad_norm": 129.84024047851562, + "learning_rate": 1.402816070338427e-06, + "loss": 12.5746, + "step": 390020 + }, + { + "epoch": 0.787885276566862, + "grad_norm": 132.0088348388672, + "learning_rate": 1.4025736326520373e-06, + "loss": 15.5592, + "step": 390030 + }, + { + "epoch": 0.7879054771995458, + "grad_norm": 639.2557983398438, + "learning_rate": 1.402331212499064e-06, + "loss": 13.7056, + "step": 390040 + }, + { + "epoch": 0.7879256778322297, + "grad_norm": 332.296875, + "learning_rate": 1.4020888098806924e-06, + "loss": 20.4145, + "step": 390050 + }, + { + "epoch": 0.7879458784649135, + "grad_norm": 49.81679916381836, + "learning_rate": 1.401846424798105e-06, + "loss": 16.4018, + "step": 390060 + }, + { + "epoch": 0.7879660790975973, + "grad_norm": 509.4182434082031, + "learning_rate": 1.4016040572524813e-06, + "loss": 14.52, + "step": 390070 + }, + { + "epoch": 0.7879862797302811, + "grad_norm": 65.95465850830078, + "learning_rate": 1.4013617072450019e-06, + "loss": 32.8255, + "step": 390080 + }, + { + "epoch": 0.788006480362965, + "grad_norm": 180.2455291748047, + "learning_rate": 1.401119374776851e-06, + "loss": 15.8193, + "step": 390090 + }, + { + "epoch": 0.7880266809956488, + "grad_norm": 174.9076690673828, + "learning_rate": 1.4008770598492072e-06, + "loss": 8.4116, + "step": 390100 + }, + { + "epoch": 0.7880468816283326, + "grad_norm": 356.658935546875, + "learning_rate": 1.4006347624632505e-06, + "loss": 23.3492, + "step": 390110 + }, + { + "epoch": 0.7880670822610164, + "grad_norm": 373.9306335449219, + "learning_rate": 1.4003924826201653e-06, + "loss": 19.9767, + "step": 390120 + }, + { + "epoch": 0.7880872828937002, + "grad_norm": 604.8828735351562, + "learning_rate": 1.4001502203211286e-06, + "loss": 19.2727, + "step": 390130 + }, + { + "epoch": 0.788107483526384, + "grad_norm": 292.82464599609375, + "learning_rate": 1.399907975567325e-06, + "loss": 23.8888, + "step": 390140 + }, + { + "epoch": 0.7881276841590679, + "grad_norm": 293.5496520996094, + "learning_rate": 1.3996657483599318e-06, + "loss": 11.5477, + "step": 390150 + }, + { + "epoch": 0.7881478847917517, + "grad_norm": 142.21266174316406, + "learning_rate": 1.3994235387001326e-06, + "loss": 11.2106, + "step": 390160 + }, + { + "epoch": 0.7881680854244355, + "grad_norm": 471.30609130859375, + "learning_rate": 1.3991813465891046e-06, + "loss": 14.1122, + "step": 390170 + }, + { + "epoch": 0.7881882860571193, + "grad_norm": 508.54669189453125, + "learning_rate": 1.3989391720280316e-06, + "loss": 21.9794, + "step": 390180 + }, + { + "epoch": 0.7882084866898031, + "grad_norm": 180.43858337402344, + "learning_rate": 1.3986970150180923e-06, + "loss": 19.0057, + "step": 390190 + }, + { + "epoch": 0.788228687322487, + "grad_norm": 37.364959716796875, + "learning_rate": 1.3984548755604655e-06, + "loss": 30.1706, + "step": 390200 + }, + { + "epoch": 0.7882488879551708, + "grad_norm": 385.6730651855469, + "learning_rate": 1.3982127536563345e-06, + "loss": 20.253, + "step": 390210 + }, + { + "epoch": 0.7882690885878546, + "grad_norm": 251.6427001953125, + "learning_rate": 1.3979706493068772e-06, + "loss": 9.2216, + "step": 390220 + }, + { + "epoch": 0.7882892892205384, + "grad_norm": 333.47467041015625, + "learning_rate": 1.397728562513273e-06, + "loss": 21.7927, + "step": 390230 + }, + { + "epoch": 0.7883094898532222, + "grad_norm": 986.24658203125, + "learning_rate": 1.397486493276703e-06, + "loss": 39.0049, + "step": 390240 + }, + { + "epoch": 0.7883296904859061, + "grad_norm": 308.32647705078125, + "learning_rate": 1.3972444415983495e-06, + "loss": 20.7179, + "step": 390250 + }, + { + "epoch": 0.7883498911185899, + "grad_norm": 242.95274353027344, + "learning_rate": 1.397002407479387e-06, + "loss": 19.1673, + "step": 390260 + }, + { + "epoch": 0.7883700917512737, + "grad_norm": 338.8032531738281, + "learning_rate": 1.3967603909209976e-06, + "loss": 22.7495, + "step": 390270 + }, + { + "epoch": 0.7883902923839574, + "grad_norm": 318.2375183105469, + "learning_rate": 1.3965183919243624e-06, + "loss": 16.4054, + "step": 390280 + }, + { + "epoch": 0.7884104930166412, + "grad_norm": 267.89013671875, + "learning_rate": 1.3962764104906596e-06, + "loss": 18.8724, + "step": 390290 + }, + { + "epoch": 0.788430693649325, + "grad_norm": 510.5446472167969, + "learning_rate": 1.3960344466210669e-06, + "loss": 42.5586, + "step": 390300 + }, + { + "epoch": 0.7884508942820089, + "grad_norm": 249.7393341064453, + "learning_rate": 1.3957925003167655e-06, + "loss": 21.2731, + "step": 390310 + }, + { + "epoch": 0.7884710949146927, + "grad_norm": 74.7342529296875, + "learning_rate": 1.3955505715789368e-06, + "loss": 15.1349, + "step": 390320 + }, + { + "epoch": 0.7884912955473765, + "grad_norm": 48.525596618652344, + "learning_rate": 1.395308660408755e-06, + "loss": 31.3871, + "step": 390330 + }, + { + "epoch": 0.7885114961800603, + "grad_norm": 466.8329772949219, + "learning_rate": 1.3950667668074015e-06, + "loss": 21.7112, + "step": 390340 + }, + { + "epoch": 0.7885316968127442, + "grad_norm": 182.07376098632812, + "learning_rate": 1.3948248907760565e-06, + "loss": 22.0986, + "step": 390350 + }, + { + "epoch": 0.788551897445428, + "grad_norm": 33.70562744140625, + "learning_rate": 1.3945830323158982e-06, + "loss": 27.627, + "step": 390360 + }, + { + "epoch": 0.7885720980781118, + "grad_norm": 224.57284545898438, + "learning_rate": 1.394341191428103e-06, + "loss": 9.883, + "step": 390370 + }, + { + "epoch": 0.7885922987107956, + "grad_norm": 111.23213195800781, + "learning_rate": 1.3940993681138533e-06, + "loss": 21.0308, + "step": 390380 + }, + { + "epoch": 0.7886124993434794, + "grad_norm": 249.69235229492188, + "learning_rate": 1.3938575623743262e-06, + "loss": 26.5926, + "step": 390390 + }, + { + "epoch": 0.7886326999761633, + "grad_norm": 183.27928161621094, + "learning_rate": 1.3936157742106977e-06, + "loss": 13.7726, + "step": 390400 + }, + { + "epoch": 0.7886529006088471, + "grad_norm": 166.25120544433594, + "learning_rate": 1.3933740036241505e-06, + "loss": 21.9439, + "step": 390410 + }, + { + "epoch": 0.7886731012415309, + "grad_norm": 393.0563659667969, + "learning_rate": 1.3931322506158596e-06, + "loss": 16.4815, + "step": 390420 + }, + { + "epoch": 0.7886933018742147, + "grad_norm": 122.62916564941406, + "learning_rate": 1.3928905151870059e-06, + "loss": 16.9871, + "step": 390430 + }, + { + "epoch": 0.7887135025068985, + "grad_norm": 326.7325439453125, + "learning_rate": 1.3926487973387665e-06, + "loss": 8.6477, + "step": 390440 + }, + { + "epoch": 0.7887337031395824, + "grad_norm": 252.49375915527344, + "learning_rate": 1.3924070970723176e-06, + "loss": 13.2418, + "step": 390450 + }, + { + "epoch": 0.7887539037722662, + "grad_norm": 352.0291748046875, + "learning_rate": 1.3921654143888403e-06, + "loss": 13.5124, + "step": 390460 + }, + { + "epoch": 0.78877410440495, + "grad_norm": 333.3211364746094, + "learning_rate": 1.39192374928951e-06, + "loss": 17.1857, + "step": 390470 + }, + { + "epoch": 0.7887943050376338, + "grad_norm": 34.31233215332031, + "learning_rate": 1.3916821017755073e-06, + "loss": 16.9706, + "step": 390480 + }, + { + "epoch": 0.7888145056703176, + "grad_norm": 1086.7008056640625, + "learning_rate": 1.3914404718480067e-06, + "loss": 29.5429, + "step": 390490 + }, + { + "epoch": 0.7888347063030015, + "grad_norm": 357.86199951171875, + "learning_rate": 1.3911988595081894e-06, + "loss": 10.5794, + "step": 390500 + }, + { + "epoch": 0.7888549069356853, + "grad_norm": 258.2838134765625, + "learning_rate": 1.3909572647572312e-06, + "loss": 18.9244, + "step": 390510 + }, + { + "epoch": 0.7888751075683691, + "grad_norm": 258.4314880371094, + "learning_rate": 1.3907156875963073e-06, + "loss": 19.3583, + "step": 390520 + }, + { + "epoch": 0.7888953082010528, + "grad_norm": 253.37208557128906, + "learning_rate": 1.3904741280265998e-06, + "loss": 15.9204, + "step": 390530 + }, + { + "epoch": 0.7889155088337366, + "grad_norm": 528.6141967773438, + "learning_rate": 1.3902325860492832e-06, + "loss": 12.6963, + "step": 390540 + }, + { + "epoch": 0.7889357094664204, + "grad_norm": 1056.8690185546875, + "learning_rate": 1.3899910616655338e-06, + "loss": 39.604, + "step": 390550 + }, + { + "epoch": 0.7889559100991043, + "grad_norm": 804.58544921875, + "learning_rate": 1.38974955487653e-06, + "loss": 17.4595, + "step": 390560 + }, + { + "epoch": 0.7889761107317881, + "grad_norm": 432.2843322753906, + "learning_rate": 1.389508065683452e-06, + "loss": 11.8486, + "step": 390570 + }, + { + "epoch": 0.7889963113644719, + "grad_norm": 760.0068969726562, + "learning_rate": 1.3892665940874705e-06, + "loss": 26.9778, + "step": 390580 + }, + { + "epoch": 0.7890165119971557, + "grad_norm": 304.84832763671875, + "learning_rate": 1.3890251400897663e-06, + "loss": 30.2592, + "step": 390590 + }, + { + "epoch": 0.7890367126298395, + "grad_norm": 258.3695983886719, + "learning_rate": 1.3887837036915169e-06, + "loss": 19.3298, + "step": 390600 + }, + { + "epoch": 0.7890569132625234, + "grad_norm": 174.30075073242188, + "learning_rate": 1.3885422848938974e-06, + "loss": 15.9546, + "step": 390610 + }, + { + "epoch": 0.7890771138952072, + "grad_norm": 350.92291259765625, + "learning_rate": 1.3883008836980837e-06, + "loss": 12.4152, + "step": 390620 + }, + { + "epoch": 0.789097314527891, + "grad_norm": 228.29713439941406, + "learning_rate": 1.3880595001052533e-06, + "loss": 14.4857, + "step": 390630 + }, + { + "epoch": 0.7891175151605748, + "grad_norm": 411.44049072265625, + "learning_rate": 1.3878181341165858e-06, + "loss": 16.262, + "step": 390640 + }, + { + "epoch": 0.7891377157932586, + "grad_norm": 518.0469970703125, + "learning_rate": 1.3875767857332512e-06, + "loss": 19.8735, + "step": 390650 + }, + { + "epoch": 0.7891579164259425, + "grad_norm": 364.7655944824219, + "learning_rate": 1.38733545495643e-06, + "loss": 32.1867, + "step": 390660 + }, + { + "epoch": 0.7891781170586263, + "grad_norm": 492.7501525878906, + "learning_rate": 1.3870941417872985e-06, + "loss": 18.8283, + "step": 390670 + }, + { + "epoch": 0.7891983176913101, + "grad_norm": 16.221332550048828, + "learning_rate": 1.3868528462270326e-06, + "loss": 17.0699, + "step": 390680 + }, + { + "epoch": 0.7892185183239939, + "grad_norm": 278.474853515625, + "learning_rate": 1.3866115682768055e-06, + "loss": 13.8446, + "step": 390690 + }, + { + "epoch": 0.7892387189566777, + "grad_norm": 201.19393920898438, + "learning_rate": 1.3863703079377971e-06, + "loss": 20.1051, + "step": 390700 + }, + { + "epoch": 0.7892589195893616, + "grad_norm": 149.2244873046875, + "learning_rate": 1.3861290652111819e-06, + "loss": 15.4303, + "step": 390710 + }, + { + "epoch": 0.7892791202220454, + "grad_norm": 588.6261596679688, + "learning_rate": 1.3858878400981335e-06, + "loss": 18.6694, + "step": 390720 + }, + { + "epoch": 0.7892993208547292, + "grad_norm": 505.7598571777344, + "learning_rate": 1.3856466325998307e-06, + "loss": 16.5566, + "step": 390730 + }, + { + "epoch": 0.789319521487413, + "grad_norm": 253.9817657470703, + "learning_rate": 1.3854054427174468e-06, + "loss": 20.0498, + "step": 390740 + }, + { + "epoch": 0.7893397221200968, + "grad_norm": 816.97802734375, + "learning_rate": 1.3851642704521596e-06, + "loss": 24.4637, + "step": 390750 + }, + { + "epoch": 0.7893599227527807, + "grad_norm": 622.67529296875, + "learning_rate": 1.3849231158051418e-06, + "loss": 13.5268, + "step": 390760 + }, + { + "epoch": 0.7893801233854645, + "grad_norm": 79.00733947753906, + "learning_rate": 1.3846819787775723e-06, + "loss": 13.9648, + "step": 390770 + }, + { + "epoch": 0.7894003240181483, + "grad_norm": 273.0272216796875, + "learning_rate": 1.3844408593706238e-06, + "loss": 13.587, + "step": 390780 + }, + { + "epoch": 0.789420524650832, + "grad_norm": 169.9261474609375, + "learning_rate": 1.3841997575854703e-06, + "loss": 12.3255, + "step": 390790 + }, + { + "epoch": 0.7894407252835158, + "grad_norm": 376.6979064941406, + "learning_rate": 1.3839586734232907e-06, + "loss": 11.2065, + "step": 390800 + }, + { + "epoch": 0.7894609259161997, + "grad_norm": 87.50740814208984, + "learning_rate": 1.3837176068852565e-06, + "loss": 4.0255, + "step": 390810 + }, + { + "epoch": 0.7894811265488835, + "grad_norm": 231.6638946533203, + "learning_rate": 1.3834765579725452e-06, + "loss": 13.0445, + "step": 390820 + }, + { + "epoch": 0.7895013271815673, + "grad_norm": 188.67898559570312, + "learning_rate": 1.3832355266863307e-06, + "loss": 11.0469, + "step": 390830 + }, + { + "epoch": 0.7895215278142511, + "grad_norm": 247.35105895996094, + "learning_rate": 1.3829945130277861e-06, + "loss": 21.8402, + "step": 390840 + }, + { + "epoch": 0.7895417284469349, + "grad_norm": 713.51513671875, + "learning_rate": 1.3827535169980888e-06, + "loss": 17.8452, + "step": 390850 + }, + { + "epoch": 0.7895619290796188, + "grad_norm": 850.671875, + "learning_rate": 1.3825125385984123e-06, + "loss": 28.3461, + "step": 390860 + }, + { + "epoch": 0.7895821297123026, + "grad_norm": 772.30322265625, + "learning_rate": 1.3822715778299295e-06, + "loss": 21.265, + "step": 390870 + }, + { + "epoch": 0.7896023303449864, + "grad_norm": 133.23912048339844, + "learning_rate": 1.3820306346938161e-06, + "loss": 21.7986, + "step": 390880 + }, + { + "epoch": 0.7896225309776702, + "grad_norm": 674.4517822265625, + "learning_rate": 1.3817897091912485e-06, + "loss": 20.0414, + "step": 390890 + }, + { + "epoch": 0.789642731610354, + "grad_norm": 457.153564453125, + "learning_rate": 1.3815488013233986e-06, + "loss": 16.7648, + "step": 390900 + }, + { + "epoch": 0.7896629322430379, + "grad_norm": 712.7386474609375, + "learning_rate": 1.3813079110914396e-06, + "loss": 30.4711, + "step": 390910 + }, + { + "epoch": 0.7896831328757217, + "grad_norm": 241.47642517089844, + "learning_rate": 1.3810670384965469e-06, + "loss": 19.2033, + "step": 390920 + }, + { + "epoch": 0.7897033335084055, + "grad_norm": 442.91015625, + "learning_rate": 1.380826183539898e-06, + "loss": 32.3804, + "step": 390930 + }, + { + "epoch": 0.7897235341410893, + "grad_norm": 386.73651123046875, + "learning_rate": 1.38058534622266e-06, + "loss": 22.0351, + "step": 390940 + }, + { + "epoch": 0.7897437347737731, + "grad_norm": 645.64111328125, + "learning_rate": 1.3803445265460096e-06, + "loss": 7.6352, + "step": 390950 + }, + { + "epoch": 0.789763935406457, + "grad_norm": 337.479248046875, + "learning_rate": 1.3801037245111233e-06, + "loss": 30.6434, + "step": 390960 + }, + { + "epoch": 0.7897841360391408, + "grad_norm": 595.5153198242188, + "learning_rate": 1.3798629401191715e-06, + "loss": 20.989, + "step": 390970 + }, + { + "epoch": 0.7898043366718246, + "grad_norm": 278.4434509277344, + "learning_rate": 1.3796221733713278e-06, + "loss": 15.418, + "step": 390980 + }, + { + "epoch": 0.7898245373045084, + "grad_norm": 152.62255859375, + "learning_rate": 1.3793814242687676e-06, + "loss": 17.0787, + "step": 390990 + }, + { + "epoch": 0.7898447379371922, + "grad_norm": 111.47569274902344, + "learning_rate": 1.3791406928126638e-06, + "loss": 13.7548, + "step": 391000 + }, + { + "epoch": 0.789864938569876, + "grad_norm": 407.932861328125, + "learning_rate": 1.3788999790041867e-06, + "loss": 12.2205, + "step": 391010 + }, + { + "epoch": 0.7898851392025599, + "grad_norm": 503.17254638671875, + "learning_rate": 1.3786592828445144e-06, + "loss": 16.619, + "step": 391020 + }, + { + "epoch": 0.7899053398352437, + "grad_norm": 383.8878173828125, + "learning_rate": 1.3784186043348151e-06, + "loss": 8.142, + "step": 391030 + }, + { + "epoch": 0.7899255404679274, + "grad_norm": 448.76788330078125, + "learning_rate": 1.3781779434762666e-06, + "loss": 39.0891, + "step": 391040 + }, + { + "epoch": 0.7899457411006112, + "grad_norm": 425.3940734863281, + "learning_rate": 1.3779373002700391e-06, + "loss": 18.9904, + "step": 391050 + }, + { + "epoch": 0.789965941733295, + "grad_norm": 663.6753540039062, + "learning_rate": 1.377696674717305e-06, + "loss": 19.2672, + "step": 391060 + }, + { + "epoch": 0.7899861423659789, + "grad_norm": 461.9291076660156, + "learning_rate": 1.3774560668192389e-06, + "loss": 15.2556, + "step": 391070 + }, + { + "epoch": 0.7900063429986627, + "grad_norm": 500.15228271484375, + "learning_rate": 1.3772154765770106e-06, + "loss": 25.29, + "step": 391080 + }, + { + "epoch": 0.7900265436313465, + "grad_norm": 176.2303466796875, + "learning_rate": 1.3769749039917968e-06, + "loss": 27.7685, + "step": 391090 + }, + { + "epoch": 0.7900467442640303, + "grad_norm": 576.8667602539062, + "learning_rate": 1.3767343490647668e-06, + "loss": 20.2137, + "step": 391100 + }, + { + "epoch": 0.7900669448967141, + "grad_norm": 556.3174438476562, + "learning_rate": 1.376493811797095e-06, + "loss": 19.5516, + "step": 391110 + }, + { + "epoch": 0.790087145529398, + "grad_norm": 488.4520568847656, + "learning_rate": 1.3762532921899529e-06, + "loss": 21.1224, + "step": 391120 + }, + { + "epoch": 0.7901073461620818, + "grad_norm": 227.12380981445312, + "learning_rate": 1.3760127902445114e-06, + "loss": 25.1898, + "step": 391130 + }, + { + "epoch": 0.7901275467947656, + "grad_norm": 618.3762817382812, + "learning_rate": 1.3757723059619455e-06, + "loss": 22.9171, + "step": 391140 + }, + { + "epoch": 0.7901477474274494, + "grad_norm": 223.0682830810547, + "learning_rate": 1.3755318393434259e-06, + "loss": 32.6294, + "step": 391150 + }, + { + "epoch": 0.7901679480601332, + "grad_norm": 148.1354522705078, + "learning_rate": 1.3752913903901227e-06, + "loss": 12.9979, + "step": 391160 + }, + { + "epoch": 0.7901881486928171, + "grad_norm": 240.58709716796875, + "learning_rate": 1.3750509591032102e-06, + "loss": 14.8266, + "step": 391170 + }, + { + "epoch": 0.7902083493255009, + "grad_norm": 216.39732360839844, + "learning_rate": 1.3748105454838623e-06, + "loss": 21.5026, + "step": 391180 + }, + { + "epoch": 0.7902285499581847, + "grad_norm": 152.46498107910156, + "learning_rate": 1.3745701495332447e-06, + "loss": 10.914, + "step": 391190 + }, + { + "epoch": 0.7902487505908685, + "grad_norm": 348.2789001464844, + "learning_rate": 1.3743297712525334e-06, + "loss": 18.8635, + "step": 391200 + }, + { + "epoch": 0.7902689512235523, + "grad_norm": 1.2223727703094482, + "learning_rate": 1.3740894106428997e-06, + "loss": 12.6695, + "step": 391210 + }, + { + "epoch": 0.7902891518562362, + "grad_norm": 180.6900634765625, + "learning_rate": 1.373849067705515e-06, + "loss": 16.0197, + "step": 391220 + }, + { + "epoch": 0.79030935248892, + "grad_norm": 344.8309631347656, + "learning_rate": 1.3736087424415483e-06, + "loss": 16.7079, + "step": 391230 + }, + { + "epoch": 0.7903295531216038, + "grad_norm": 408.68280029296875, + "learning_rate": 1.373368434852173e-06, + "loss": 11.0919, + "step": 391240 + }, + { + "epoch": 0.7903497537542876, + "grad_norm": 176.5845184326172, + "learning_rate": 1.373128144938563e-06, + "loss": 9.4938, + "step": 391250 + }, + { + "epoch": 0.7903699543869714, + "grad_norm": 595.7118530273438, + "learning_rate": 1.372887872701884e-06, + "loss": 16.1899, + "step": 391260 + }, + { + "epoch": 0.7903901550196553, + "grad_norm": 70.69458770751953, + "learning_rate": 1.372647618143309e-06, + "loss": 20.3304, + "step": 391270 + }, + { + "epoch": 0.7904103556523391, + "grad_norm": 634.7303466796875, + "learning_rate": 1.372407381264011e-06, + "loss": 31.9421, + "step": 391280 + }, + { + "epoch": 0.7904305562850229, + "grad_norm": 644.4024658203125, + "learning_rate": 1.37216716206516e-06, + "loss": 17.8286, + "step": 391290 + }, + { + "epoch": 0.7904507569177066, + "grad_norm": 440.9824523925781, + "learning_rate": 1.3719269605479241e-06, + "loss": 19.8846, + "step": 391300 + }, + { + "epoch": 0.7904709575503904, + "grad_norm": 396.9098205566406, + "learning_rate": 1.3716867767134783e-06, + "loss": 25.3281, + "step": 391310 + }, + { + "epoch": 0.7904911581830742, + "grad_norm": 55.08015823364258, + "learning_rate": 1.3714466105629908e-06, + "loss": 12.2769, + "step": 391320 + }, + { + "epoch": 0.7905113588157581, + "grad_norm": 279.33721923828125, + "learning_rate": 1.3712064620976305e-06, + "loss": 13.5692, + "step": 391330 + }, + { + "epoch": 0.7905315594484419, + "grad_norm": 662.1928100585938, + "learning_rate": 1.3709663313185723e-06, + "loss": 26.2303, + "step": 391340 + }, + { + "epoch": 0.7905517600811257, + "grad_norm": 307.8935546875, + "learning_rate": 1.3707262182269814e-06, + "loss": 13.6516, + "step": 391350 + }, + { + "epoch": 0.7905719607138095, + "grad_norm": 438.3340759277344, + "learning_rate": 1.370486122824033e-06, + "loss": 16.9934, + "step": 391360 + }, + { + "epoch": 0.7905921613464933, + "grad_norm": 624.7930908203125, + "learning_rate": 1.3702460451108934e-06, + "loss": 21.0231, + "step": 391370 + }, + { + "epoch": 0.7906123619791772, + "grad_norm": 230.44725036621094, + "learning_rate": 1.370005985088736e-06, + "loss": 18.2515, + "step": 391380 + }, + { + "epoch": 0.790632562611861, + "grad_norm": 449.1631164550781, + "learning_rate": 1.3697659427587284e-06, + "loss": 18.5813, + "step": 391390 + }, + { + "epoch": 0.7906527632445448, + "grad_norm": 390.2234802246094, + "learning_rate": 1.3695259181220405e-06, + "loss": 26.9494, + "step": 391400 + }, + { + "epoch": 0.7906729638772286, + "grad_norm": 292.9412841796875, + "learning_rate": 1.3692859111798446e-06, + "loss": 13.5067, + "step": 391410 + }, + { + "epoch": 0.7906931645099124, + "grad_norm": 374.9375915527344, + "learning_rate": 1.3690459219333068e-06, + "loss": 20.0724, + "step": 391420 + }, + { + "epoch": 0.7907133651425963, + "grad_norm": 415.5243225097656, + "learning_rate": 1.3688059503836004e-06, + "loss": 17.3517, + "step": 391430 + }, + { + "epoch": 0.7907335657752801, + "grad_norm": 420.6901550292969, + "learning_rate": 1.3685659965318937e-06, + "loss": 8.9723, + "step": 391440 + }, + { + "epoch": 0.7907537664079639, + "grad_norm": 334.8971862792969, + "learning_rate": 1.368326060379354e-06, + "loss": 10.9845, + "step": 391450 + }, + { + "epoch": 0.7907739670406477, + "grad_norm": 115.13829040527344, + "learning_rate": 1.368086141927154e-06, + "loss": 9.9702, + "step": 391460 + }, + { + "epoch": 0.7907941676733315, + "grad_norm": 394.88336181640625, + "learning_rate": 1.367846241176462e-06, + "loss": 23.7669, + "step": 391470 + }, + { + "epoch": 0.7908143683060154, + "grad_norm": 209.13877868652344, + "learning_rate": 1.3676063581284454e-06, + "loss": 16.8719, + "step": 391480 + }, + { + "epoch": 0.7908345689386992, + "grad_norm": 459.1510314941406, + "learning_rate": 1.367366492784275e-06, + "loss": 17.2768, + "step": 391490 + }, + { + "epoch": 0.790854769571383, + "grad_norm": 418.3271789550781, + "learning_rate": 1.3671266451451209e-06, + "loss": 28.1231, + "step": 391500 + }, + { + "epoch": 0.7908749702040668, + "grad_norm": 7.374743938446045, + "learning_rate": 1.3668868152121505e-06, + "loss": 14.3796, + "step": 391510 + }, + { + "epoch": 0.7908951708367506, + "grad_norm": 238.83999633789062, + "learning_rate": 1.3666470029865325e-06, + "loss": 24.2418, + "step": 391520 + }, + { + "epoch": 0.7909153714694345, + "grad_norm": 266.6180114746094, + "learning_rate": 1.3664072084694374e-06, + "loss": 16.7649, + "step": 391530 + }, + { + "epoch": 0.7909355721021183, + "grad_norm": 365.9168395996094, + "learning_rate": 1.3661674316620332e-06, + "loss": 12.9025, + "step": 391540 + }, + { + "epoch": 0.7909557727348021, + "grad_norm": 721.8485107421875, + "learning_rate": 1.3659276725654863e-06, + "loss": 15.6994, + "step": 391550 + }, + { + "epoch": 0.7909759733674858, + "grad_norm": 696.6354370117188, + "learning_rate": 1.3656879311809674e-06, + "loss": 20.8847, + "step": 391560 + }, + { + "epoch": 0.7909961740001696, + "grad_norm": 630.0370483398438, + "learning_rate": 1.365448207509646e-06, + "loss": 26.8489, + "step": 391570 + }, + { + "epoch": 0.7910163746328535, + "grad_norm": 446.8143615722656, + "learning_rate": 1.3652085015526895e-06, + "loss": 15.1067, + "step": 391580 + }, + { + "epoch": 0.7910365752655373, + "grad_norm": 447.8006896972656, + "learning_rate": 1.3649688133112644e-06, + "loss": 27.6257, + "step": 391590 + }, + { + "epoch": 0.7910567758982211, + "grad_norm": 569.8394165039062, + "learning_rate": 1.3647291427865417e-06, + "loss": 15.9739, + "step": 391600 + }, + { + "epoch": 0.7910769765309049, + "grad_norm": 596.05908203125, + "learning_rate": 1.364489489979688e-06, + "loss": 24.0909, + "step": 391610 + }, + { + "epoch": 0.7910971771635887, + "grad_norm": 747.9852905273438, + "learning_rate": 1.3642498548918704e-06, + "loss": 12.6635, + "step": 391620 + }, + { + "epoch": 0.7911173777962726, + "grad_norm": 701.9400634765625, + "learning_rate": 1.3640102375242598e-06, + "loss": 22.2039, + "step": 391630 + }, + { + "epoch": 0.7911375784289564, + "grad_norm": 184.07884216308594, + "learning_rate": 1.3637706378780209e-06, + "loss": 18.1476, + "step": 391640 + }, + { + "epoch": 0.7911577790616402, + "grad_norm": 318.0747985839844, + "learning_rate": 1.3635310559543235e-06, + "loss": 24.6376, + "step": 391650 + }, + { + "epoch": 0.791177979694324, + "grad_norm": 233.07652282714844, + "learning_rate": 1.3632914917543338e-06, + "loss": 15.8256, + "step": 391660 + }, + { + "epoch": 0.7911981803270078, + "grad_norm": 165.37388610839844, + "learning_rate": 1.3630519452792219e-06, + "loss": 17.8628, + "step": 391670 + }, + { + "epoch": 0.7912183809596917, + "grad_norm": 176.56077575683594, + "learning_rate": 1.3628124165301537e-06, + "loss": 12.3118, + "step": 391680 + }, + { + "epoch": 0.7912385815923755, + "grad_norm": 189.4002685546875, + "learning_rate": 1.362572905508295e-06, + "loss": 18.7685, + "step": 391690 + }, + { + "epoch": 0.7912587822250593, + "grad_norm": 375.29034423828125, + "learning_rate": 1.3623334122148164e-06, + "loss": 21.122, + "step": 391700 + }, + { + "epoch": 0.7912789828577431, + "grad_norm": 212.97000122070312, + "learning_rate": 1.3620939366508818e-06, + "loss": 9.0183, + "step": 391710 + }, + { + "epoch": 0.7912991834904269, + "grad_norm": 386.3677062988281, + "learning_rate": 1.361854478817662e-06, + "loss": 19.8714, + "step": 391720 + }, + { + "epoch": 0.7913193841231108, + "grad_norm": 34.30253601074219, + "learning_rate": 1.361615038716322e-06, + "loss": 19.0238, + "step": 391730 + }, + { + "epoch": 0.7913395847557946, + "grad_norm": 350.99078369140625, + "learning_rate": 1.3613756163480275e-06, + "loss": 34.4645, + "step": 391740 + }, + { + "epoch": 0.7913597853884784, + "grad_norm": 314.75653076171875, + "learning_rate": 1.3611362117139481e-06, + "loss": 19.9491, + "step": 391750 + }, + { + "epoch": 0.7913799860211622, + "grad_norm": 260.4652404785156, + "learning_rate": 1.3608968248152498e-06, + "loss": 12.6497, + "step": 391760 + }, + { + "epoch": 0.791400186653846, + "grad_norm": 266.1238708496094, + "learning_rate": 1.3606574556530976e-06, + "loss": 16.1195, + "step": 391770 + }, + { + "epoch": 0.7914203872865299, + "grad_norm": 79.61407470703125, + "learning_rate": 1.3604181042286597e-06, + "loss": 13.4222, + "step": 391780 + }, + { + "epoch": 0.7914405879192137, + "grad_norm": 278.7680969238281, + "learning_rate": 1.3601787705431052e-06, + "loss": 16.558, + "step": 391790 + }, + { + "epoch": 0.7914607885518975, + "grad_norm": 515.685302734375, + "learning_rate": 1.3599394545975952e-06, + "loss": 13.3479, + "step": 391800 + }, + { + "epoch": 0.7914809891845812, + "grad_norm": 337.426025390625, + "learning_rate": 1.3597001563932982e-06, + "loss": 25.6135, + "step": 391810 + }, + { + "epoch": 0.791501189817265, + "grad_norm": 246.74937438964844, + "learning_rate": 1.3594608759313832e-06, + "loss": 10.5852, + "step": 391820 + }, + { + "epoch": 0.7915213904499488, + "grad_norm": 334.9891662597656, + "learning_rate": 1.3592216132130142e-06, + "loss": 18.746, + "step": 391830 + }, + { + "epoch": 0.7915415910826327, + "grad_norm": 3060.901123046875, + "learning_rate": 1.358982368239356e-06, + "loss": 17.8981, + "step": 391840 + }, + { + "epoch": 0.7915617917153165, + "grad_norm": 65.82020568847656, + "learning_rate": 1.3587431410115765e-06, + "loss": 10.5679, + "step": 391850 + }, + { + "epoch": 0.7915819923480003, + "grad_norm": 447.283447265625, + "learning_rate": 1.3585039315308436e-06, + "loss": 22.4207, + "step": 391860 + }, + { + "epoch": 0.7916021929806841, + "grad_norm": 1267.4034423828125, + "learning_rate": 1.3582647397983185e-06, + "loss": 39.3353, + "step": 391870 + }, + { + "epoch": 0.791622393613368, + "grad_norm": 27.980613708496094, + "learning_rate": 1.3580255658151687e-06, + "loss": 18.3648, + "step": 391880 + }, + { + "epoch": 0.7916425942460518, + "grad_norm": 139.32876586914062, + "learning_rate": 1.3577864095825627e-06, + "loss": 11.3425, + "step": 391890 + }, + { + "epoch": 0.7916627948787356, + "grad_norm": 185.72015380859375, + "learning_rate": 1.3575472711016634e-06, + "loss": 18.7808, + "step": 391900 + }, + { + "epoch": 0.7916829955114194, + "grad_norm": 657.601318359375, + "learning_rate": 1.3573081503736362e-06, + "loss": 14.2909, + "step": 391910 + }, + { + "epoch": 0.7917031961441032, + "grad_norm": 134.64427185058594, + "learning_rate": 1.3570690473996483e-06, + "loss": 19.7827, + "step": 391920 + }, + { + "epoch": 0.791723396776787, + "grad_norm": 399.5252380371094, + "learning_rate": 1.356829962180864e-06, + "loss": 17.1316, + "step": 391930 + }, + { + "epoch": 0.7917435974094709, + "grad_norm": 217.53396606445312, + "learning_rate": 1.356590894718447e-06, + "loss": 18.9685, + "step": 391940 + }, + { + "epoch": 0.7917637980421547, + "grad_norm": 192.8686981201172, + "learning_rate": 1.356351845013566e-06, + "loss": 18.3453, + "step": 391950 + }, + { + "epoch": 0.7917839986748385, + "grad_norm": 193.24600219726562, + "learning_rate": 1.3561128130673823e-06, + "loss": 11.519, + "step": 391960 + }, + { + "epoch": 0.7918041993075223, + "grad_norm": 93.30621337890625, + "learning_rate": 1.3558737988810644e-06, + "loss": 17.8471, + "step": 391970 + }, + { + "epoch": 0.7918243999402061, + "grad_norm": 257.2477111816406, + "learning_rate": 1.3556348024557743e-06, + "loss": 11.8228, + "step": 391980 + }, + { + "epoch": 0.79184460057289, + "grad_norm": 411.4393615722656, + "learning_rate": 1.3553958237926794e-06, + "loss": 22.5637, + "step": 391990 + }, + { + "epoch": 0.7918648012055738, + "grad_norm": 919.718994140625, + "learning_rate": 1.3551568628929434e-06, + "loss": 34.8736, + "step": 392000 + }, + { + "epoch": 0.7918850018382576, + "grad_norm": 385.0460510253906, + "learning_rate": 1.3549179197577295e-06, + "loss": 14.2107, + "step": 392010 + }, + { + "epoch": 0.7919052024709414, + "grad_norm": 365.8689270019531, + "learning_rate": 1.3546789943882045e-06, + "loss": 17.5699, + "step": 392020 + }, + { + "epoch": 0.7919254031036252, + "grad_norm": 262.1510009765625, + "learning_rate": 1.3544400867855306e-06, + "loss": 24.0885, + "step": 392030 + }, + { + "epoch": 0.7919456037363091, + "grad_norm": 457.37420654296875, + "learning_rate": 1.3542011969508756e-06, + "loss": 19.9309, + "step": 392040 + }, + { + "epoch": 0.7919658043689929, + "grad_norm": 361.2774658203125, + "learning_rate": 1.3539623248854012e-06, + "loss": 11.8868, + "step": 392050 + }, + { + "epoch": 0.7919860050016767, + "grad_norm": 27.487533569335938, + "learning_rate": 1.3537234705902709e-06, + "loss": 9.8462, + "step": 392060 + }, + { + "epoch": 0.7920062056343604, + "grad_norm": 394.543212890625, + "learning_rate": 1.353484634066652e-06, + "loss": 8.8337, + "step": 392070 + }, + { + "epoch": 0.7920264062670442, + "grad_norm": 330.3654479980469, + "learning_rate": 1.3532458153157062e-06, + "loss": 8.504, + "step": 392080 + }, + { + "epoch": 0.7920466068997281, + "grad_norm": 178.21826171875, + "learning_rate": 1.3530070143385966e-06, + "loss": 13.4085, + "step": 392090 + }, + { + "epoch": 0.7920668075324119, + "grad_norm": 429.0102233886719, + "learning_rate": 1.3527682311364886e-06, + "loss": 20.5051, + "step": 392100 + }, + { + "epoch": 0.7920870081650957, + "grad_norm": 422.8012390136719, + "learning_rate": 1.3525294657105476e-06, + "loss": 9.6095, + "step": 392110 + }, + { + "epoch": 0.7921072087977795, + "grad_norm": 0.0, + "learning_rate": 1.352290718061935e-06, + "loss": 9.1739, + "step": 392120 + }, + { + "epoch": 0.7921274094304633, + "grad_norm": 463.0601501464844, + "learning_rate": 1.3520519881918143e-06, + "loss": 25.2802, + "step": 392130 + }, + { + "epoch": 0.7921476100631472, + "grad_norm": 143.3258819580078, + "learning_rate": 1.3518132761013509e-06, + "loss": 19.414, + "step": 392140 + }, + { + "epoch": 0.792167810695831, + "grad_norm": 310.7194519042969, + "learning_rate": 1.351574581791707e-06, + "loss": 21.0858, + "step": 392150 + }, + { + "epoch": 0.7921880113285148, + "grad_norm": 509.84344482421875, + "learning_rate": 1.3513359052640445e-06, + "loss": 19.4459, + "step": 392160 + }, + { + "epoch": 0.7922082119611986, + "grad_norm": 312.1219482421875, + "learning_rate": 1.3510972465195283e-06, + "loss": 24.1111, + "step": 392170 + }, + { + "epoch": 0.7922284125938824, + "grad_norm": 808.8726196289062, + "learning_rate": 1.350858605559323e-06, + "loss": 34.325, + "step": 392180 + }, + { + "epoch": 0.7922486132265663, + "grad_norm": 570.1800537109375, + "learning_rate": 1.3506199823845905e-06, + "loss": 24.7095, + "step": 392190 + }, + { + "epoch": 0.7922688138592501, + "grad_norm": 271.8632507324219, + "learning_rate": 1.3503813769964923e-06, + "loss": 18.3585, + "step": 392200 + }, + { + "epoch": 0.7922890144919339, + "grad_norm": 146.16278076171875, + "learning_rate": 1.3501427893961938e-06, + "loss": 14.5004, + "step": 392210 + }, + { + "epoch": 0.7923092151246177, + "grad_norm": 56.90432357788086, + "learning_rate": 1.3499042195848571e-06, + "loss": 27.4381, + "step": 392220 + }, + { + "epoch": 0.7923294157573015, + "grad_norm": 128.71359252929688, + "learning_rate": 1.3496656675636427e-06, + "loss": 31.9426, + "step": 392230 + }, + { + "epoch": 0.7923496163899854, + "grad_norm": 190.23631286621094, + "learning_rate": 1.3494271333337162e-06, + "loss": 19.4458, + "step": 392240 + }, + { + "epoch": 0.7923698170226692, + "grad_norm": 35.6628532409668, + "learning_rate": 1.349188616896238e-06, + "loss": 30.5413, + "step": 392250 + }, + { + "epoch": 0.792390017655353, + "grad_norm": 349.12017822265625, + "learning_rate": 1.3489501182523735e-06, + "loss": 34.5905, + "step": 392260 + }, + { + "epoch": 0.7924102182880368, + "grad_norm": 432.92059326171875, + "learning_rate": 1.3487116374032811e-06, + "loss": 14.4094, + "step": 392270 + }, + { + "epoch": 0.7924304189207206, + "grad_norm": 453.3785705566406, + "learning_rate": 1.3484731743501272e-06, + "loss": 30.512, + "step": 392280 + }, + { + "epoch": 0.7924506195534045, + "grad_norm": 403.2294921875, + "learning_rate": 1.3482347290940723e-06, + "loss": 19.341, + "step": 392290 + }, + { + "epoch": 0.7924708201860883, + "grad_norm": 273.9934387207031, + "learning_rate": 1.3479963016362768e-06, + "loss": 21.9321, + "step": 392300 + }, + { + "epoch": 0.7924910208187721, + "grad_norm": 807.1016845703125, + "learning_rate": 1.3477578919779062e-06, + "loss": 26.1982, + "step": 392310 + }, + { + "epoch": 0.7925112214514558, + "grad_norm": 605.7578735351562, + "learning_rate": 1.3475195001201186e-06, + "loss": 29.8424, + "step": 392320 + }, + { + "epoch": 0.7925314220841396, + "grad_norm": 32.64149475097656, + "learning_rate": 1.34728112606408e-06, + "loss": 17.4115, + "step": 392330 + }, + { + "epoch": 0.7925516227168234, + "grad_norm": 239.3573455810547, + "learning_rate": 1.3470427698109496e-06, + "loss": 24.8317, + "step": 392340 + }, + { + "epoch": 0.7925718233495073, + "grad_norm": 159.31484985351562, + "learning_rate": 1.3468044313618883e-06, + "loss": 31.5261, + "step": 392350 + }, + { + "epoch": 0.7925920239821911, + "grad_norm": 235.64517211914062, + "learning_rate": 1.346566110718061e-06, + "loss": 17.4422, + "step": 392360 + }, + { + "epoch": 0.7926122246148749, + "grad_norm": 314.08721923828125, + "learning_rate": 1.3463278078806274e-06, + "loss": 19.0374, + "step": 392370 + }, + { + "epoch": 0.7926324252475587, + "grad_norm": 356.0980224609375, + "learning_rate": 1.346089522850747e-06, + "loss": 20.1301, + "step": 392380 + }, + { + "epoch": 0.7926526258802425, + "grad_norm": 180.72988891601562, + "learning_rate": 1.3458512556295833e-06, + "loss": 18.3178, + "step": 392390 + }, + { + "epoch": 0.7926728265129264, + "grad_norm": 219.0718536376953, + "learning_rate": 1.3456130062183003e-06, + "loss": 16.6303, + "step": 392400 + }, + { + "epoch": 0.7926930271456102, + "grad_norm": 306.7239990234375, + "learning_rate": 1.3453747746180535e-06, + "loss": 30.4495, + "step": 392410 + }, + { + "epoch": 0.792713227778294, + "grad_norm": 174.364501953125, + "learning_rate": 1.3451365608300066e-06, + "loss": 16.6836, + "step": 392420 + }, + { + "epoch": 0.7927334284109778, + "grad_norm": 496.9266357421875, + "learning_rate": 1.3448983648553227e-06, + "loss": 18.4559, + "step": 392430 + }, + { + "epoch": 0.7927536290436616, + "grad_norm": 360.1391296386719, + "learning_rate": 1.3446601866951604e-06, + "loss": 21.449, + "step": 392440 + }, + { + "epoch": 0.7927738296763455, + "grad_norm": 210.6453094482422, + "learning_rate": 1.3444220263506797e-06, + "loss": 20.3085, + "step": 392450 + }, + { + "epoch": 0.7927940303090293, + "grad_norm": 261.69805908203125, + "learning_rate": 1.3441838838230425e-06, + "loss": 16.1359, + "step": 392460 + }, + { + "epoch": 0.7928142309417131, + "grad_norm": 123.70108032226562, + "learning_rate": 1.343945759113413e-06, + "loss": 9.9051, + "step": 392470 + }, + { + "epoch": 0.7928344315743969, + "grad_norm": 181.96739196777344, + "learning_rate": 1.3437076522229454e-06, + "loss": 10.5061, + "step": 392480 + }, + { + "epoch": 0.7928546322070807, + "grad_norm": 142.8966522216797, + "learning_rate": 1.3434695631528028e-06, + "loss": 22.1133, + "step": 392490 + }, + { + "epoch": 0.7928748328397646, + "grad_norm": 422.648681640625, + "learning_rate": 1.3432314919041478e-06, + "loss": 16.3421, + "step": 392500 + }, + { + "epoch": 0.7928950334724484, + "grad_norm": 111.23240661621094, + "learning_rate": 1.342993438478139e-06, + "loss": 21.4282, + "step": 392510 + }, + { + "epoch": 0.7929152341051322, + "grad_norm": 219.13015747070312, + "learning_rate": 1.3427554028759355e-06, + "loss": 11.9455, + "step": 392520 + }, + { + "epoch": 0.792935434737816, + "grad_norm": 667.111572265625, + "learning_rate": 1.3425173850986994e-06, + "loss": 13.5365, + "step": 392530 + }, + { + "epoch": 0.7929556353704998, + "grad_norm": 497.7477111816406, + "learning_rate": 1.3422793851475907e-06, + "loss": 17.9203, + "step": 392540 + }, + { + "epoch": 0.7929758360031837, + "grad_norm": 183.91111755371094, + "learning_rate": 1.3420414030237667e-06, + "loss": 5.6767, + "step": 392550 + }, + { + "epoch": 0.7929960366358675, + "grad_norm": 481.2868347167969, + "learning_rate": 1.3418034387283907e-06, + "loss": 21.1909, + "step": 392560 + }, + { + "epoch": 0.7930162372685513, + "grad_norm": 325.7046203613281, + "learning_rate": 1.3415654922626198e-06, + "loss": 16.1793, + "step": 392570 + }, + { + "epoch": 0.793036437901235, + "grad_norm": 651.7623291015625, + "learning_rate": 1.3413275636276164e-06, + "loss": 23.6038, + "step": 392580 + }, + { + "epoch": 0.7930566385339188, + "grad_norm": 733.0885620117188, + "learning_rate": 1.3410896528245371e-06, + "loss": 32.7139, + "step": 392590 + }, + { + "epoch": 0.7930768391666027, + "grad_norm": 287.5675048828125, + "learning_rate": 1.3408517598545446e-06, + "loss": 17.5776, + "step": 392600 + }, + { + "epoch": 0.7930970397992865, + "grad_norm": 193.81536865234375, + "learning_rate": 1.3406138847187971e-06, + "loss": 9.7663, + "step": 392610 + }, + { + "epoch": 0.7931172404319703, + "grad_norm": 168.9902801513672, + "learning_rate": 1.340376027418452e-06, + "loss": 31.5444, + "step": 392620 + }, + { + "epoch": 0.7931374410646541, + "grad_norm": 209.39915466308594, + "learning_rate": 1.3401381879546716e-06, + "loss": 22.2623, + "step": 392630 + }, + { + "epoch": 0.7931576416973379, + "grad_norm": 677.6997680664062, + "learning_rate": 1.3399003663286125e-06, + "loss": 28.2255, + "step": 392640 + }, + { + "epoch": 0.7931778423300218, + "grad_norm": 264.1876525878906, + "learning_rate": 1.3396625625414362e-06, + "loss": 26.0871, + "step": 392650 + }, + { + "epoch": 0.7931980429627056, + "grad_norm": 228.8098602294922, + "learning_rate": 1.3394247765943013e-06, + "loss": 13.0514, + "step": 392660 + }, + { + "epoch": 0.7932182435953894, + "grad_norm": 172.20529174804688, + "learning_rate": 1.339187008488364e-06, + "loss": 19.7662, + "step": 392670 + }, + { + "epoch": 0.7932384442280732, + "grad_norm": 303.61370849609375, + "learning_rate": 1.338949258224787e-06, + "loss": 22.0276, + "step": 392680 + }, + { + "epoch": 0.793258644860757, + "grad_norm": 505.311279296875, + "learning_rate": 1.3387115258047272e-06, + "loss": 14.4202, + "step": 392690 + }, + { + "epoch": 0.7932788454934409, + "grad_norm": 266.0522766113281, + "learning_rate": 1.3384738112293415e-06, + "loss": 18.1336, + "step": 392700 + }, + { + "epoch": 0.7932990461261247, + "grad_norm": 468.30267333984375, + "learning_rate": 1.3382361144997912e-06, + "loss": 19.9952, + "step": 392710 + }, + { + "epoch": 0.7933192467588085, + "grad_norm": 328.1174011230469, + "learning_rate": 1.337998435617235e-06, + "loss": 14.7651, + "step": 392720 + }, + { + "epoch": 0.7933394473914923, + "grad_norm": 688.8742065429688, + "learning_rate": 1.3377607745828302e-06, + "loss": 12.9913, + "step": 392730 + }, + { + "epoch": 0.7933596480241761, + "grad_norm": 296.8131103515625, + "learning_rate": 1.337523131397734e-06, + "loss": 14.5713, + "step": 392740 + }, + { + "epoch": 0.79337984865686, + "grad_norm": 164.2072296142578, + "learning_rate": 1.3372855060631067e-06, + "loss": 19.8475, + "step": 392750 + }, + { + "epoch": 0.7934000492895438, + "grad_norm": 210.5634307861328, + "learning_rate": 1.3370478985801062e-06, + "loss": 22.8115, + "step": 392760 + }, + { + "epoch": 0.7934202499222276, + "grad_norm": 176.11083984375, + "learning_rate": 1.3368103089498886e-06, + "loss": 21.7571, + "step": 392770 + }, + { + "epoch": 0.7934404505549114, + "grad_norm": 314.0799560546875, + "learning_rate": 1.3365727371736127e-06, + "loss": 16.8113, + "step": 392780 + }, + { + "epoch": 0.7934606511875952, + "grad_norm": 311.234375, + "learning_rate": 1.3363351832524385e-06, + "loss": 9.8002, + "step": 392790 + }, + { + "epoch": 0.793480851820279, + "grad_norm": 612.8961181640625, + "learning_rate": 1.3360976471875226e-06, + "loss": 16.0739, + "step": 392800 + }, + { + "epoch": 0.7935010524529629, + "grad_norm": 374.7096252441406, + "learning_rate": 1.3358601289800211e-06, + "loss": 17.2786, + "step": 392810 + }, + { + "epoch": 0.7935212530856467, + "grad_norm": 73.8741683959961, + "learning_rate": 1.335622628631094e-06, + "loss": 8.7438, + "step": 392820 + }, + { + "epoch": 0.7935414537183304, + "grad_norm": 321.8848571777344, + "learning_rate": 1.3353851461418976e-06, + "loss": 18.7795, + "step": 392830 + }, + { + "epoch": 0.7935616543510142, + "grad_norm": 510.4248046875, + "learning_rate": 1.3351476815135883e-06, + "loss": 19.6084, + "step": 392840 + }, + { + "epoch": 0.793581854983698, + "grad_norm": 207.49549865722656, + "learning_rate": 1.3349102347473264e-06, + "loss": 14.2602, + "step": 392850 + }, + { + "epoch": 0.7936020556163819, + "grad_norm": 4.39988374710083, + "learning_rate": 1.334672805844266e-06, + "loss": 10.138, + "step": 392860 + }, + { + "epoch": 0.7936222562490657, + "grad_norm": 22.672176361083984, + "learning_rate": 1.3344353948055672e-06, + "loss": 12.7341, + "step": 392870 + }, + { + "epoch": 0.7936424568817495, + "grad_norm": 358.9695129394531, + "learning_rate": 1.3341980016323841e-06, + "loss": 20.6453, + "step": 392880 + }, + { + "epoch": 0.7936626575144333, + "grad_norm": 294.8444519042969, + "learning_rate": 1.333960626325877e-06, + "loss": 38.9246, + "step": 392890 + }, + { + "epoch": 0.7936828581471171, + "grad_norm": 344.99530029296875, + "learning_rate": 1.333723268887201e-06, + "loss": 19.0355, + "step": 392900 + }, + { + "epoch": 0.793703058779801, + "grad_norm": 212.49868774414062, + "learning_rate": 1.3334859293175113e-06, + "loss": 15.9298, + "step": 392910 + }, + { + "epoch": 0.7937232594124848, + "grad_norm": 508.03704833984375, + "learning_rate": 1.3332486076179684e-06, + "loss": 21.3234, + "step": 392920 + }, + { + "epoch": 0.7937434600451686, + "grad_norm": 94.76934051513672, + "learning_rate": 1.3330113037897257e-06, + "loss": 8.6602, + "step": 392930 + }, + { + "epoch": 0.7937636606778524, + "grad_norm": 338.82281494140625, + "learning_rate": 1.3327740178339421e-06, + "loss": 7.4053, + "step": 392940 + }, + { + "epoch": 0.7937838613105362, + "grad_norm": 106.09545135498047, + "learning_rate": 1.3325367497517739e-06, + "loss": 13.124, + "step": 392950 + }, + { + "epoch": 0.7938040619432201, + "grad_norm": 221.4761199951172, + "learning_rate": 1.3322994995443744e-06, + "loss": 28.3337, + "step": 392960 + }, + { + "epoch": 0.7938242625759039, + "grad_norm": 269.02587890625, + "learning_rate": 1.3320622672129046e-06, + "loss": 19.4028, + "step": 392970 + }, + { + "epoch": 0.7938444632085877, + "grad_norm": 295.42327880859375, + "learning_rate": 1.331825052758518e-06, + "loss": 7.8659, + "step": 392980 + }, + { + "epoch": 0.7938646638412715, + "grad_norm": 80.26911926269531, + "learning_rate": 1.3315878561823697e-06, + "loss": 13.2659, + "step": 392990 + }, + { + "epoch": 0.7938848644739553, + "grad_norm": 492.2503356933594, + "learning_rate": 1.3313506774856177e-06, + "loss": 18.8608, + "step": 393000 + }, + { + "epoch": 0.7939050651066392, + "grad_norm": 72.5807876586914, + "learning_rate": 1.33111351666942e-06, + "loss": 10.6923, + "step": 393010 + }, + { + "epoch": 0.793925265739323, + "grad_norm": 520.0384521484375, + "learning_rate": 1.3308763737349273e-06, + "loss": 18.93, + "step": 393020 + }, + { + "epoch": 0.7939454663720068, + "grad_norm": 439.1123962402344, + "learning_rate": 1.3306392486832982e-06, + "loss": 23.7647, + "step": 393030 + }, + { + "epoch": 0.7939656670046906, + "grad_norm": 153.1498260498047, + "learning_rate": 1.3304021415156898e-06, + "loss": 11.0697, + "step": 393040 + }, + { + "epoch": 0.7939858676373744, + "grad_norm": 528.4383544921875, + "learning_rate": 1.3301650522332566e-06, + "loss": 19.9529, + "step": 393050 + }, + { + "epoch": 0.7940060682700583, + "grad_norm": 257.60479736328125, + "learning_rate": 1.3299279808371517e-06, + "loss": 12.547, + "step": 393060 + }, + { + "epoch": 0.7940262689027421, + "grad_norm": 290.8949279785156, + "learning_rate": 1.329690927328533e-06, + "loss": 9.9437, + "step": 393070 + }, + { + "epoch": 0.7940464695354259, + "grad_norm": 71.61394500732422, + "learning_rate": 1.3294538917085586e-06, + "loss": 22.1281, + "step": 393080 + }, + { + "epoch": 0.7940666701681096, + "grad_norm": 477.6353759765625, + "learning_rate": 1.329216873978378e-06, + "loss": 14.6355, + "step": 393090 + }, + { + "epoch": 0.7940868708007934, + "grad_norm": 275.8092956542969, + "learning_rate": 1.3289798741391486e-06, + "loss": 17.8101, + "step": 393100 + }, + { + "epoch": 0.7941070714334773, + "grad_norm": 606.7242431640625, + "learning_rate": 1.3287428921920275e-06, + "loss": 9.4685, + "step": 393110 + }, + { + "epoch": 0.7941272720661611, + "grad_norm": 59.06065368652344, + "learning_rate": 1.328505928138169e-06, + "loss": 21.1656, + "step": 393120 + }, + { + "epoch": 0.7941474726988449, + "grad_norm": 411.5826416015625, + "learning_rate": 1.3282689819787253e-06, + "loss": 9.8361, + "step": 393130 + }, + { + "epoch": 0.7941676733315287, + "grad_norm": 236.2435302734375, + "learning_rate": 1.328032053714855e-06, + "loss": 10.2996, + "step": 393140 + }, + { + "epoch": 0.7941878739642125, + "grad_norm": 38.0343017578125, + "learning_rate": 1.327795143347711e-06, + "loss": 6.9945, + "step": 393150 + }, + { + "epoch": 0.7942080745968964, + "grad_norm": 233.21644592285156, + "learning_rate": 1.3275582508784462e-06, + "loss": 19.6127, + "step": 393160 + }, + { + "epoch": 0.7942282752295802, + "grad_norm": 312.2197570800781, + "learning_rate": 1.3273213763082188e-06, + "loss": 17.6795, + "step": 393170 + }, + { + "epoch": 0.794248475862264, + "grad_norm": 1112.5133056640625, + "learning_rate": 1.3270845196381805e-06, + "loss": 30.4112, + "step": 393180 + }, + { + "epoch": 0.7942686764949478, + "grad_norm": 0.7452117800712585, + "learning_rate": 1.3268476808694881e-06, + "loss": 20.2499, + "step": 393190 + }, + { + "epoch": 0.7942888771276316, + "grad_norm": 106.0436019897461, + "learning_rate": 1.3266108600032928e-06, + "loss": 16.6449, + "step": 393200 + }, + { + "epoch": 0.7943090777603155, + "grad_norm": 344.6287841796875, + "learning_rate": 1.3263740570407524e-06, + "loss": 10.6484, + "step": 393210 + }, + { + "epoch": 0.7943292783929993, + "grad_norm": 774.5092163085938, + "learning_rate": 1.326137271983019e-06, + "loss": 18.0544, + "step": 393220 + }, + { + "epoch": 0.7943494790256831, + "grad_norm": 52.625030517578125, + "learning_rate": 1.3259005048312457e-06, + "loss": 13.479, + "step": 393230 + }, + { + "epoch": 0.7943696796583669, + "grad_norm": 178.127685546875, + "learning_rate": 1.3256637555865892e-06, + "loss": 8.8126, + "step": 393240 + }, + { + "epoch": 0.7943898802910507, + "grad_norm": 215.18649291992188, + "learning_rate": 1.3254270242502004e-06, + "loss": 7.4319, + "step": 393250 + }, + { + "epoch": 0.7944100809237346, + "grad_norm": 800.3228759765625, + "learning_rate": 1.3251903108232362e-06, + "loss": 26.9087, + "step": 393260 + }, + { + "epoch": 0.7944302815564184, + "grad_norm": 165.53640747070312, + "learning_rate": 1.3249536153068487e-06, + "loss": 18.4781, + "step": 393270 + }, + { + "epoch": 0.7944504821891022, + "grad_norm": 181.44790649414062, + "learning_rate": 1.3247169377021896e-06, + "loss": 17.2046, + "step": 393280 + }, + { + "epoch": 0.794470682821786, + "grad_norm": 31.7740421295166, + "learning_rate": 1.3244802780104166e-06, + "loss": 26.8276, + "step": 393290 + }, + { + "epoch": 0.7944908834544698, + "grad_norm": 98.43057250976562, + "learning_rate": 1.3242436362326804e-06, + "loss": 10.26, + "step": 393300 + }, + { + "epoch": 0.7945110840871537, + "grad_norm": 282.8909912109375, + "learning_rate": 1.3240070123701337e-06, + "loss": 8.1901, + "step": 393310 + }, + { + "epoch": 0.7945312847198375, + "grad_norm": 854.9933471679688, + "learning_rate": 1.323770406423931e-06, + "loss": 31.1475, + "step": 393320 + }, + { + "epoch": 0.7945514853525213, + "grad_norm": 523.5370483398438, + "learning_rate": 1.3235338183952268e-06, + "loss": 35.4954, + "step": 393330 + }, + { + "epoch": 0.7945716859852051, + "grad_norm": 452.68084716796875, + "learning_rate": 1.323297248285173e-06, + "loss": 19.9129, + "step": 393340 + }, + { + "epoch": 0.7945918866178888, + "grad_norm": 5.8730549812316895, + "learning_rate": 1.3230606960949204e-06, + "loss": 8.4925, + "step": 393350 + }, + { + "epoch": 0.7946120872505726, + "grad_norm": 288.4775085449219, + "learning_rate": 1.322824161825626e-06, + "loss": 14.5479, + "step": 393360 + }, + { + "epoch": 0.7946322878832565, + "grad_norm": 244.63304138183594, + "learning_rate": 1.3225876454784409e-06, + "loss": 23.3344, + "step": 393370 + }, + { + "epoch": 0.7946524885159403, + "grad_norm": 222.38368225097656, + "learning_rate": 1.3223511470545158e-06, + "loss": 13.8454, + "step": 393380 + }, + { + "epoch": 0.7946726891486241, + "grad_norm": 360.0292663574219, + "learning_rate": 1.3221146665550055e-06, + "loss": 20.8501, + "step": 393390 + }, + { + "epoch": 0.7946928897813079, + "grad_norm": 310.9561767578125, + "learning_rate": 1.3218782039810634e-06, + "loss": 18.6549, + "step": 393400 + }, + { + "epoch": 0.7947130904139917, + "grad_norm": 449.3248596191406, + "learning_rate": 1.321641759333841e-06, + "loss": 19.3592, + "step": 393410 + }, + { + "epoch": 0.7947332910466756, + "grad_norm": 129.15274047851562, + "learning_rate": 1.3214053326144888e-06, + "loss": 19.0694, + "step": 393420 + }, + { + "epoch": 0.7947534916793594, + "grad_norm": 0.25493475794792175, + "learning_rate": 1.321168923824162e-06, + "loss": 12.7415, + "step": 393430 + }, + { + "epoch": 0.7947736923120432, + "grad_norm": 217.77394104003906, + "learning_rate": 1.3209325329640126e-06, + "loss": 16.7637, + "step": 393440 + }, + { + "epoch": 0.794793892944727, + "grad_norm": 149.8814239501953, + "learning_rate": 1.3206961600351897e-06, + "loss": 13.435, + "step": 393450 + }, + { + "epoch": 0.7948140935774108, + "grad_norm": 295.07928466796875, + "learning_rate": 1.320459805038849e-06, + "loss": 30.2789, + "step": 393460 + }, + { + "epoch": 0.7948342942100947, + "grad_norm": 157.8231658935547, + "learning_rate": 1.32022346797614e-06, + "loss": 19.1555, + "step": 393470 + }, + { + "epoch": 0.7948544948427785, + "grad_norm": 319.9383850097656, + "learning_rate": 1.3199871488482163e-06, + "loss": 9.6858, + "step": 393480 + }, + { + "epoch": 0.7948746954754623, + "grad_norm": 271.5839538574219, + "learning_rate": 1.3197508476562277e-06, + "loss": 16.0141, + "step": 393490 + }, + { + "epoch": 0.7948948961081461, + "grad_norm": 424.5205383300781, + "learning_rate": 1.3195145644013286e-06, + "loss": 13.9018, + "step": 393500 + }, + { + "epoch": 0.7949150967408299, + "grad_norm": 212.56304931640625, + "learning_rate": 1.3192782990846692e-06, + "loss": 15.9282, + "step": 393510 + }, + { + "epoch": 0.7949352973735138, + "grad_norm": 476.3667907714844, + "learning_rate": 1.3190420517073993e-06, + "loss": 21.4514, + "step": 393520 + }, + { + "epoch": 0.7949554980061976, + "grad_norm": 473.9554748535156, + "learning_rate": 1.3188058222706735e-06, + "loss": 16.1802, + "step": 393530 + }, + { + "epoch": 0.7949756986388814, + "grad_norm": 100.92284393310547, + "learning_rate": 1.3185696107756402e-06, + "loss": 13.5259, + "step": 393540 + }, + { + "epoch": 0.7949958992715652, + "grad_norm": 217.48333740234375, + "learning_rate": 1.3183334172234536e-06, + "loss": 18.6937, + "step": 393550 + }, + { + "epoch": 0.795016099904249, + "grad_norm": 137.55055236816406, + "learning_rate": 1.3180972416152637e-06, + "loss": 11.7866, + "step": 393560 + }, + { + "epoch": 0.7950363005369329, + "grad_norm": 188.703857421875, + "learning_rate": 1.3178610839522193e-06, + "loss": 8.2441, + "step": 393570 + }, + { + "epoch": 0.7950565011696167, + "grad_norm": 672.42041015625, + "learning_rate": 1.317624944235475e-06, + "loss": 13.1175, + "step": 393580 + }, + { + "epoch": 0.7950767018023005, + "grad_norm": 858.04296875, + "learning_rate": 1.3173888224661802e-06, + "loss": 12.281, + "step": 393590 + }, + { + "epoch": 0.7950969024349842, + "grad_norm": 232.22882080078125, + "learning_rate": 1.317152718645484e-06, + "loss": 18.2014, + "step": 393600 + }, + { + "epoch": 0.795117103067668, + "grad_norm": 304.68597412109375, + "learning_rate": 1.3169166327745392e-06, + "loss": 28.0134, + "step": 393610 + }, + { + "epoch": 0.7951373037003518, + "grad_norm": 28.386598587036133, + "learning_rate": 1.316680564854499e-06, + "loss": 21.7759, + "step": 393620 + }, + { + "epoch": 0.7951575043330357, + "grad_norm": 0.0007220363477244973, + "learning_rate": 1.3164445148865073e-06, + "loss": 21.6391, + "step": 393630 + }, + { + "epoch": 0.7951777049657195, + "grad_norm": 126.11640167236328, + "learning_rate": 1.3162084828717187e-06, + "loss": 17.208, + "step": 393640 + }, + { + "epoch": 0.7951979055984033, + "grad_norm": 378.008056640625, + "learning_rate": 1.3159724688112846e-06, + "loss": 26.1479, + "step": 393650 + }, + { + "epoch": 0.7952181062310871, + "grad_norm": 313.0746154785156, + "learning_rate": 1.3157364727063542e-06, + "loss": 13.67, + "step": 393660 + }, + { + "epoch": 0.795238306863771, + "grad_norm": 280.4048156738281, + "learning_rate": 1.3155004945580757e-06, + "loss": 13.3941, + "step": 393670 + }, + { + "epoch": 0.7952585074964548, + "grad_norm": 149.05538940429688, + "learning_rate": 1.3152645343676007e-06, + "loss": 18.4442, + "step": 393680 + }, + { + "epoch": 0.7952787081291386, + "grad_norm": 307.603271484375, + "learning_rate": 1.3150285921360823e-06, + "loss": 18.4287, + "step": 393690 + }, + { + "epoch": 0.7952989087618224, + "grad_norm": 508.5350036621094, + "learning_rate": 1.314792667864665e-06, + "loss": 14.0235, + "step": 393700 + }, + { + "epoch": 0.7953191093945062, + "grad_norm": 434.1549987792969, + "learning_rate": 1.3145567615545013e-06, + "loss": 20.5497, + "step": 393710 + }, + { + "epoch": 0.79533931002719, + "grad_norm": 279.7898864746094, + "learning_rate": 1.3143208732067426e-06, + "loss": 15.663, + "step": 393720 + }, + { + "epoch": 0.7953595106598739, + "grad_norm": 97.50518035888672, + "learning_rate": 1.314085002822536e-06, + "loss": 14.4952, + "step": 393730 + }, + { + "epoch": 0.7953797112925577, + "grad_norm": 11.618754386901855, + "learning_rate": 1.3138491504030314e-06, + "loss": 9.7702, + "step": 393740 + }, + { + "epoch": 0.7953999119252415, + "grad_norm": 264.45269775390625, + "learning_rate": 1.3136133159493803e-06, + "loss": 19.0229, + "step": 393750 + }, + { + "epoch": 0.7954201125579253, + "grad_norm": 696.5134887695312, + "learning_rate": 1.3133774994627307e-06, + "loss": 32.1867, + "step": 393760 + }, + { + "epoch": 0.7954403131906091, + "grad_norm": 122.29059600830078, + "learning_rate": 1.313141700944231e-06, + "loss": 20.7587, + "step": 393770 + }, + { + "epoch": 0.795460513823293, + "grad_norm": 419.3998718261719, + "learning_rate": 1.3129059203950306e-06, + "loss": 21.3142, + "step": 393780 + }, + { + "epoch": 0.7954807144559768, + "grad_norm": 331.98126220703125, + "learning_rate": 1.312670157816282e-06, + "loss": 13.6463, + "step": 393790 + }, + { + "epoch": 0.7955009150886606, + "grad_norm": 246.06785583496094, + "learning_rate": 1.312434413209131e-06, + "loss": 14.7045, + "step": 393800 + }, + { + "epoch": 0.7955211157213444, + "grad_norm": 607.720947265625, + "learning_rate": 1.3121986865747267e-06, + "loss": 14.9481, + "step": 393810 + }, + { + "epoch": 0.7955413163540282, + "grad_norm": 448.0708923339844, + "learning_rate": 1.3119629779142196e-06, + "loss": 20.9657, + "step": 393820 + }, + { + "epoch": 0.7955615169867121, + "grad_norm": 431.65411376953125, + "learning_rate": 1.3117272872287578e-06, + "loss": 21.2966, + "step": 393830 + }, + { + "epoch": 0.7955817176193959, + "grad_norm": 282.5703125, + "learning_rate": 1.3114916145194884e-06, + "loss": 26.2329, + "step": 393840 + }, + { + "epoch": 0.7956019182520797, + "grad_norm": 401.35076904296875, + "learning_rate": 1.3112559597875628e-06, + "loss": 19.5809, + "step": 393850 + }, + { + "epoch": 0.7956221188847634, + "grad_norm": 750.6825561523438, + "learning_rate": 1.3110203230341273e-06, + "loss": 24.4017, + "step": 393860 + }, + { + "epoch": 0.7956423195174472, + "grad_norm": 0.21479862928390503, + "learning_rate": 1.3107847042603328e-06, + "loss": 24.2475, + "step": 393870 + }, + { + "epoch": 0.7956625201501311, + "grad_norm": 139.98748779296875, + "learning_rate": 1.3105491034673256e-06, + "loss": 11.7494, + "step": 393880 + }, + { + "epoch": 0.7956827207828149, + "grad_norm": 269.61834716796875, + "learning_rate": 1.3103135206562535e-06, + "loss": 10.683, + "step": 393890 + }, + { + "epoch": 0.7957029214154987, + "grad_norm": 437.0902404785156, + "learning_rate": 1.3100779558282673e-06, + "loss": 16.1986, + "step": 393900 + }, + { + "epoch": 0.7957231220481825, + "grad_norm": 633.8558349609375, + "learning_rate": 1.3098424089845136e-06, + "loss": 25.6735, + "step": 393910 + }, + { + "epoch": 0.7957433226808663, + "grad_norm": 578.57763671875, + "learning_rate": 1.3096068801261386e-06, + "loss": 16.9048, + "step": 393920 + }, + { + "epoch": 0.7957635233135502, + "grad_norm": 0.0013803989859297872, + "learning_rate": 1.3093713692542925e-06, + "loss": 21.8858, + "step": 393930 + }, + { + "epoch": 0.795783723946234, + "grad_norm": 656.0813598632812, + "learning_rate": 1.309135876370124e-06, + "loss": 13.1338, + "step": 393940 + }, + { + "epoch": 0.7958039245789178, + "grad_norm": 378.0755615234375, + "learning_rate": 1.3089004014747797e-06, + "loss": 16.7051, + "step": 393950 + }, + { + "epoch": 0.7958241252116016, + "grad_norm": 325.6436767578125, + "learning_rate": 1.3086649445694056e-06, + "loss": 17.3309, + "step": 393960 + }, + { + "epoch": 0.7958443258442854, + "grad_norm": 126.03115844726562, + "learning_rate": 1.308429505655152e-06, + "loss": 19.0706, + "step": 393970 + }, + { + "epoch": 0.7958645264769693, + "grad_norm": 202.6767120361328, + "learning_rate": 1.3081940847331658e-06, + "loss": 13.6732, + "step": 393980 + }, + { + "epoch": 0.7958847271096531, + "grad_norm": 575.3368530273438, + "learning_rate": 1.3079586818045925e-06, + "loss": 26.4399, + "step": 393990 + }, + { + "epoch": 0.7959049277423369, + "grad_norm": 101.26583862304688, + "learning_rate": 1.3077232968705805e-06, + "loss": 10.4103, + "step": 394000 + }, + { + "epoch": 0.7959251283750207, + "grad_norm": 339.0704650878906, + "learning_rate": 1.3074879299322802e-06, + "loss": 14.2013, + "step": 394010 + }, + { + "epoch": 0.7959453290077045, + "grad_norm": 467.2294006347656, + "learning_rate": 1.3072525809908332e-06, + "loss": 14.1033, + "step": 394020 + }, + { + "epoch": 0.7959655296403884, + "grad_norm": 264.7349853515625, + "learning_rate": 1.3070172500473888e-06, + "loss": 20.4526, + "step": 394030 + }, + { + "epoch": 0.7959857302730722, + "grad_norm": 455.54400634765625, + "learning_rate": 1.3067819371030966e-06, + "loss": 21.736, + "step": 394040 + }, + { + "epoch": 0.796005930905756, + "grad_norm": 235.12664794921875, + "learning_rate": 1.3065466421591006e-06, + "loss": 31.3294, + "step": 394050 + }, + { + "epoch": 0.7960261315384398, + "grad_norm": 422.9604187011719, + "learning_rate": 1.306311365216547e-06, + "loss": 12.6948, + "step": 394060 + }, + { + "epoch": 0.7960463321711236, + "grad_norm": 171.83616638183594, + "learning_rate": 1.3060761062765853e-06, + "loss": 17.2273, + "step": 394070 + }, + { + "epoch": 0.7960665328038075, + "grad_norm": 396.9876403808594, + "learning_rate": 1.3058408653403609e-06, + "loss": 25.2987, + "step": 394080 + }, + { + "epoch": 0.7960867334364913, + "grad_norm": 424.2646484375, + "learning_rate": 1.3056056424090186e-06, + "loss": 24.977, + "step": 394090 + }, + { + "epoch": 0.7961069340691751, + "grad_norm": 205.80349731445312, + "learning_rate": 1.3053704374837063e-06, + "loss": 12.2314, + "step": 394100 + }, + { + "epoch": 0.7961271347018588, + "grad_norm": 121.14717864990234, + "learning_rate": 1.3051352505655713e-06, + "loss": 17.3948, + "step": 394110 + }, + { + "epoch": 0.7961473353345426, + "grad_norm": 323.67108154296875, + "learning_rate": 1.3049000816557595e-06, + "loss": 22.5437, + "step": 394120 + }, + { + "epoch": 0.7961675359672264, + "grad_norm": 168.21630859375, + "learning_rate": 1.304664930755415e-06, + "loss": 18.7534, + "step": 394130 + }, + { + "epoch": 0.7961877365999103, + "grad_norm": 405.3740234375, + "learning_rate": 1.3044297978656867e-06, + "loss": 14.6358, + "step": 394140 + }, + { + "epoch": 0.7962079372325941, + "grad_norm": 74.92567443847656, + "learning_rate": 1.3041946829877178e-06, + "loss": 28.3451, + "step": 394150 + }, + { + "epoch": 0.7962281378652779, + "grad_norm": 492.98333740234375, + "learning_rate": 1.3039595861226579e-06, + "loss": 20.2084, + "step": 394160 + }, + { + "epoch": 0.7962483384979617, + "grad_norm": 404.05169677734375, + "learning_rate": 1.3037245072716504e-06, + "loss": 27.0056, + "step": 394170 + }, + { + "epoch": 0.7962685391306455, + "grad_norm": 666.1986083984375, + "learning_rate": 1.3034894464358395e-06, + "loss": 11.6529, + "step": 394180 + }, + { + "epoch": 0.7962887397633294, + "grad_norm": 307.9724426269531, + "learning_rate": 1.3032544036163742e-06, + "loss": 8.2639, + "step": 394190 + }, + { + "epoch": 0.7963089403960132, + "grad_norm": 25.722850799560547, + "learning_rate": 1.3030193788143991e-06, + "loss": 18.8226, + "step": 394200 + }, + { + "epoch": 0.796329141028697, + "grad_norm": 617.319580078125, + "learning_rate": 1.3027843720310574e-06, + "loss": 24.744, + "step": 394210 + }, + { + "epoch": 0.7963493416613808, + "grad_norm": 281.46490478515625, + "learning_rate": 1.3025493832674963e-06, + "loss": 34.2395, + "step": 394220 + }, + { + "epoch": 0.7963695422940646, + "grad_norm": 1.4975876808166504, + "learning_rate": 1.302314412524862e-06, + "loss": 32.9228, + "step": 394230 + }, + { + "epoch": 0.7963897429267485, + "grad_norm": 275.6388854980469, + "learning_rate": 1.3020794598042996e-06, + "loss": 21.2948, + "step": 394240 + }, + { + "epoch": 0.7964099435594323, + "grad_norm": 291.4058837890625, + "learning_rate": 1.301844525106951e-06, + "loss": 12.3671, + "step": 394250 + }, + { + "epoch": 0.7964301441921161, + "grad_norm": 99.08885192871094, + "learning_rate": 1.3016096084339658e-06, + "loss": 21.0551, + "step": 394260 + }, + { + "epoch": 0.7964503448247999, + "grad_norm": 341.2049255371094, + "learning_rate": 1.301374709786487e-06, + "loss": 15.1307, + "step": 394270 + }, + { + "epoch": 0.7964705454574837, + "grad_norm": 277.8306884765625, + "learning_rate": 1.3011398291656575e-06, + "loss": 11.555, + "step": 394280 + }, + { + "epoch": 0.7964907460901676, + "grad_norm": 210.9628143310547, + "learning_rate": 1.3009049665726236e-06, + "loss": 16.8974, + "step": 394290 + }, + { + "epoch": 0.7965109467228514, + "grad_norm": 210.85662841796875, + "learning_rate": 1.3006701220085338e-06, + "loss": 17.8028, + "step": 394300 + }, + { + "epoch": 0.7965311473555352, + "grad_norm": 130.49485778808594, + "learning_rate": 1.3004352954745257e-06, + "loss": 22.2752, + "step": 394310 + }, + { + "epoch": 0.796551347988219, + "grad_norm": 1315.4393310546875, + "learning_rate": 1.3002004869717472e-06, + "loss": 20.5979, + "step": 394320 + }, + { + "epoch": 0.7965715486209028, + "grad_norm": 209.0332794189453, + "learning_rate": 1.2999656965013447e-06, + "loss": 9.4618, + "step": 394330 + }, + { + "epoch": 0.7965917492535867, + "grad_norm": 526.4258422851562, + "learning_rate": 1.2997309240644607e-06, + "loss": 12.1449, + "step": 394340 + }, + { + "epoch": 0.7966119498862705, + "grad_norm": 683.3538818359375, + "learning_rate": 1.299496169662237e-06, + "loss": 15.6115, + "step": 394350 + }, + { + "epoch": 0.7966321505189543, + "grad_norm": 208.3459014892578, + "learning_rate": 1.2992614332958226e-06, + "loss": 10.9918, + "step": 394360 + }, + { + "epoch": 0.796652351151638, + "grad_norm": 236.00804138183594, + "learning_rate": 1.2990267149663588e-06, + "loss": 19.7191, + "step": 394370 + }, + { + "epoch": 0.7966725517843218, + "grad_norm": 1093.6866455078125, + "learning_rate": 1.2987920146749883e-06, + "loss": 13.9512, + "step": 394380 + }, + { + "epoch": 0.7966927524170057, + "grad_norm": 572.6358032226562, + "learning_rate": 1.2985573324228568e-06, + "loss": 19.6329, + "step": 394390 + }, + { + "epoch": 0.7967129530496895, + "grad_norm": 202.0952606201172, + "learning_rate": 1.2983226682111094e-06, + "loss": 25.3241, + "step": 394400 + }, + { + "epoch": 0.7967331536823733, + "grad_norm": 302.8909606933594, + "learning_rate": 1.2980880220408887e-06, + "loss": 11.107, + "step": 394410 + }, + { + "epoch": 0.7967533543150571, + "grad_norm": 372.9821472167969, + "learning_rate": 1.2978533939133358e-06, + "loss": 15.4005, + "step": 394420 + }, + { + "epoch": 0.7967735549477409, + "grad_norm": 254.22006225585938, + "learning_rate": 1.2976187838295984e-06, + "loss": 13.6604, + "step": 394430 + }, + { + "epoch": 0.7967937555804248, + "grad_norm": 436.09619140625, + "learning_rate": 1.2973841917908175e-06, + "loss": 15.5366, + "step": 394440 + }, + { + "epoch": 0.7968139562131086, + "grad_norm": 280.6983947753906, + "learning_rate": 1.2971496177981362e-06, + "loss": 15.5349, + "step": 394450 + }, + { + "epoch": 0.7968341568457924, + "grad_norm": 330.1646423339844, + "learning_rate": 1.2969150618527e-06, + "loss": 32.7581, + "step": 394460 + }, + { + "epoch": 0.7968543574784762, + "grad_norm": 486.09503173828125, + "learning_rate": 1.2966805239556484e-06, + "loss": 23.7823, + "step": 394470 + }, + { + "epoch": 0.79687455811116, + "grad_norm": 6.7952494621276855, + "learning_rate": 1.2964460041081288e-06, + "loss": 15.3201, + "step": 394480 + }, + { + "epoch": 0.7968947587438439, + "grad_norm": 142.43624877929688, + "learning_rate": 1.296211502311282e-06, + "loss": 18.9219, + "step": 394490 + }, + { + "epoch": 0.7969149593765277, + "grad_norm": 529.5702514648438, + "learning_rate": 1.2959770185662502e-06, + "loss": 21.5324, + "step": 394500 + }, + { + "epoch": 0.7969351600092115, + "grad_norm": 240.99537658691406, + "learning_rate": 1.295742552874178e-06, + "loss": 17.789, + "step": 394510 + }, + { + "epoch": 0.7969553606418953, + "grad_norm": 984.2542724609375, + "learning_rate": 1.2955081052362072e-06, + "loss": 29.0571, + "step": 394520 + }, + { + "epoch": 0.7969755612745791, + "grad_norm": 325.11138916015625, + "learning_rate": 1.2952736756534796e-06, + "loss": 8.1615, + "step": 394530 + }, + { + "epoch": 0.796995761907263, + "grad_norm": 291.4028015136719, + "learning_rate": 1.2950392641271386e-06, + "loss": 21.4612, + "step": 394540 + }, + { + "epoch": 0.7970159625399468, + "grad_norm": 379.6304626464844, + "learning_rate": 1.2948048706583284e-06, + "loss": 16.4679, + "step": 394550 + }, + { + "epoch": 0.7970361631726306, + "grad_norm": 614.7577514648438, + "learning_rate": 1.2945704952481896e-06, + "loss": 13.7075, + "step": 394560 + }, + { + "epoch": 0.7970563638053144, + "grad_norm": 528.6258544921875, + "learning_rate": 1.2943361378978636e-06, + "loss": 19.4213, + "step": 394570 + }, + { + "epoch": 0.7970765644379982, + "grad_norm": 379.50335693359375, + "learning_rate": 1.2941017986084953e-06, + "loss": 12.0558, + "step": 394580 + }, + { + "epoch": 0.7970967650706821, + "grad_norm": 528.6602172851562, + "learning_rate": 1.2938674773812255e-06, + "loss": 27.86, + "step": 394590 + }, + { + "epoch": 0.7971169657033659, + "grad_norm": 312.184814453125, + "learning_rate": 1.2936331742171943e-06, + "loss": 15.0957, + "step": 394600 + }, + { + "epoch": 0.7971371663360497, + "grad_norm": 321.55584716796875, + "learning_rate": 1.2933988891175458e-06, + "loss": 27.9651, + "step": 394610 + }, + { + "epoch": 0.7971573669687335, + "grad_norm": 77.93047332763672, + "learning_rate": 1.2931646220834242e-06, + "loss": 7.1763, + "step": 394620 + }, + { + "epoch": 0.7971775676014172, + "grad_norm": 1702.705322265625, + "learning_rate": 1.292930373115966e-06, + "loss": 13.2088, + "step": 394630 + }, + { + "epoch": 0.797197768234101, + "grad_norm": 409.6087646484375, + "learning_rate": 1.2926961422163154e-06, + "loss": 9.8329, + "step": 394640 + }, + { + "epoch": 0.7972179688667849, + "grad_norm": 258.40966796875, + "learning_rate": 1.2924619293856155e-06, + "loss": 13.6099, + "step": 394650 + }, + { + "epoch": 0.7972381694994687, + "grad_norm": 245.12200927734375, + "learning_rate": 1.2922277346250067e-06, + "loss": 14.0059, + "step": 394660 + }, + { + "epoch": 0.7972583701321525, + "grad_norm": 757.044189453125, + "learning_rate": 1.2919935579356285e-06, + "loss": 15.0933, + "step": 394670 + }, + { + "epoch": 0.7972785707648363, + "grad_norm": 137.48452758789062, + "learning_rate": 1.2917593993186257e-06, + "loss": 15.7164, + "step": 394680 + }, + { + "epoch": 0.7972987713975201, + "grad_norm": 1023.2304077148438, + "learning_rate": 1.2915252587751376e-06, + "loss": 14.3711, + "step": 394690 + }, + { + "epoch": 0.797318972030204, + "grad_norm": 248.52865600585938, + "learning_rate": 1.2912911363063048e-06, + "loss": 15.0697, + "step": 394700 + }, + { + "epoch": 0.7973391726628878, + "grad_norm": 464.6304016113281, + "learning_rate": 1.291057031913268e-06, + "loss": 44.176, + "step": 394710 + }, + { + "epoch": 0.7973593732955716, + "grad_norm": 434.9127197265625, + "learning_rate": 1.2908229455971717e-06, + "loss": 29.4214, + "step": 394720 + }, + { + "epoch": 0.7973795739282554, + "grad_norm": 490.8548278808594, + "learning_rate": 1.2905888773591546e-06, + "loss": 18.168, + "step": 394730 + }, + { + "epoch": 0.7973997745609392, + "grad_norm": 325.1541442871094, + "learning_rate": 1.2903548272003552e-06, + "loss": 13.8529, + "step": 394740 + }, + { + "epoch": 0.7974199751936231, + "grad_norm": 306.61810302734375, + "learning_rate": 1.2901207951219186e-06, + "loss": 8.425, + "step": 394750 + }, + { + "epoch": 0.7974401758263069, + "grad_norm": 392.6197814941406, + "learning_rate": 1.2898867811249832e-06, + "loss": 30.7993, + "step": 394760 + }, + { + "epoch": 0.7974603764589907, + "grad_norm": 198.2019805908203, + "learning_rate": 1.2896527852106876e-06, + "loss": 18.272, + "step": 394770 + }, + { + "epoch": 0.7974805770916745, + "grad_norm": 498.5760803222656, + "learning_rate": 1.2894188073801766e-06, + "loss": 22.122, + "step": 394780 + }, + { + "epoch": 0.7975007777243583, + "grad_norm": 490.75921630859375, + "learning_rate": 1.2891848476345864e-06, + "loss": 15.0195, + "step": 394790 + }, + { + "epoch": 0.7975209783570422, + "grad_norm": 56.68571090698242, + "learning_rate": 1.2889509059750605e-06, + "loss": 10.773, + "step": 394800 + }, + { + "epoch": 0.797541178989726, + "grad_norm": 457.5586853027344, + "learning_rate": 1.288716982402738e-06, + "loss": 12.8355, + "step": 394810 + }, + { + "epoch": 0.7975613796224098, + "grad_norm": 1123.6107177734375, + "learning_rate": 1.2884830769187572e-06, + "loss": 18.1294, + "step": 394820 + }, + { + "epoch": 0.7975815802550936, + "grad_norm": 611.557373046875, + "learning_rate": 1.2882491895242599e-06, + "loss": 15.0549, + "step": 394830 + }, + { + "epoch": 0.7976017808877774, + "grad_norm": 259.11004638671875, + "learning_rate": 1.2880153202203877e-06, + "loss": 10.5003, + "step": 394840 + }, + { + "epoch": 0.7976219815204613, + "grad_norm": 612.1580200195312, + "learning_rate": 1.287781469008278e-06, + "loss": 32.6783, + "step": 394850 + }, + { + "epoch": 0.7976421821531451, + "grad_norm": 152.5412139892578, + "learning_rate": 1.2875476358890698e-06, + "loss": 10.7557, + "step": 394860 + }, + { + "epoch": 0.7976623827858289, + "grad_norm": 83.74934387207031, + "learning_rate": 1.2873138208639057e-06, + "loss": 10.8741, + "step": 394870 + }, + { + "epoch": 0.7976825834185126, + "grad_norm": 187.7102508544922, + "learning_rate": 1.2870800239339237e-06, + "loss": 9.8461, + "step": 394880 + }, + { + "epoch": 0.7977027840511964, + "grad_norm": 318.344482421875, + "learning_rate": 1.2868462451002623e-06, + "loss": 5.4056, + "step": 394890 + }, + { + "epoch": 0.7977229846838803, + "grad_norm": 446.8281555175781, + "learning_rate": 1.2866124843640614e-06, + "loss": 18.0713, + "step": 394900 + }, + { + "epoch": 0.7977431853165641, + "grad_norm": 336.1660461425781, + "learning_rate": 1.2863787417264639e-06, + "loss": 28.2284, + "step": 394910 + }, + { + "epoch": 0.7977633859492479, + "grad_norm": 119.48172760009766, + "learning_rate": 1.2861450171886037e-06, + "loss": 9.7535, + "step": 394920 + }, + { + "epoch": 0.7977835865819317, + "grad_norm": 499.10699462890625, + "learning_rate": 1.2859113107516212e-06, + "loss": 19.604, + "step": 394930 + }, + { + "epoch": 0.7978037872146155, + "grad_norm": 70.6404800415039, + "learning_rate": 1.2856776224166589e-06, + "loss": 9.231, + "step": 394940 + }, + { + "epoch": 0.7978239878472994, + "grad_norm": 175.56529235839844, + "learning_rate": 1.2854439521848526e-06, + "loss": 12.3219, + "step": 394950 + }, + { + "epoch": 0.7978441884799832, + "grad_norm": 773.7423706054688, + "learning_rate": 1.2852103000573413e-06, + "loss": 24.3522, + "step": 394960 + }, + { + "epoch": 0.797864389112667, + "grad_norm": 69.67157745361328, + "learning_rate": 1.2849766660352652e-06, + "loss": 7.5418, + "step": 394970 + }, + { + "epoch": 0.7978845897453508, + "grad_norm": 148.06202697753906, + "learning_rate": 1.2847430501197627e-06, + "loss": 18.4866, + "step": 394980 + }, + { + "epoch": 0.7979047903780346, + "grad_norm": 75.63851928710938, + "learning_rate": 1.2845094523119706e-06, + "loss": 14.312, + "step": 394990 + }, + { + "epoch": 0.7979249910107185, + "grad_norm": 101.40110778808594, + "learning_rate": 1.2842758726130283e-06, + "loss": 10.1742, + "step": 395000 + }, + { + "epoch": 0.7979451916434023, + "grad_norm": 12.643241882324219, + "learning_rate": 1.2840423110240762e-06, + "loss": 30.2885, + "step": 395010 + }, + { + "epoch": 0.7979653922760861, + "grad_norm": 40.91701126098633, + "learning_rate": 1.2838087675462518e-06, + "loss": 21.6835, + "step": 395020 + }, + { + "epoch": 0.7979855929087699, + "grad_norm": 246.21238708496094, + "learning_rate": 1.2835752421806908e-06, + "loss": 16.0245, + "step": 395030 + }, + { + "epoch": 0.7980057935414537, + "grad_norm": 337.4515380859375, + "learning_rate": 1.283341734928535e-06, + "loss": 20.9604, + "step": 395040 + }, + { + "epoch": 0.7980259941741376, + "grad_norm": 152.22817993164062, + "learning_rate": 1.2831082457909206e-06, + "loss": 27.1174, + "step": 395050 + }, + { + "epoch": 0.7980461948068214, + "grad_norm": 337.61846923828125, + "learning_rate": 1.2828747747689846e-06, + "loss": 20.1692, + "step": 395060 + }, + { + "epoch": 0.7980663954395052, + "grad_norm": 192.54147338867188, + "learning_rate": 1.2826413218638672e-06, + "loss": 20.242, + "step": 395070 + }, + { + "epoch": 0.798086596072189, + "grad_norm": 581.4447021484375, + "learning_rate": 1.2824078870767036e-06, + "loss": 19.2308, + "step": 395080 + }, + { + "epoch": 0.7981067967048728, + "grad_norm": 1.4781781435012817, + "learning_rate": 1.2821744704086353e-06, + "loss": 24.4192, + "step": 395090 + }, + { + "epoch": 0.7981269973375567, + "grad_norm": 316.1722412109375, + "learning_rate": 1.2819410718607972e-06, + "loss": 40.5452, + "step": 395100 + }, + { + "epoch": 0.7981471979702405, + "grad_norm": 95.39334869384766, + "learning_rate": 1.2817076914343257e-06, + "loss": 81.6254, + "step": 395110 + }, + { + "epoch": 0.7981673986029243, + "grad_norm": 330.116455078125, + "learning_rate": 1.2814743291303616e-06, + "loss": 17.7754, + "step": 395120 + }, + { + "epoch": 0.7981875992356081, + "grad_norm": 485.01861572265625, + "learning_rate": 1.2812409849500408e-06, + "loss": 11.6857, + "step": 395130 + }, + { + "epoch": 0.7982077998682918, + "grad_norm": 302.6898193359375, + "learning_rate": 1.2810076588944987e-06, + "loss": 16.6038, + "step": 395140 + }, + { + "epoch": 0.7982280005009756, + "grad_norm": 268.6713562011719, + "learning_rate": 1.2807743509648745e-06, + "loss": 22.0135, + "step": 395150 + }, + { + "epoch": 0.7982482011336595, + "grad_norm": 271.824462890625, + "learning_rate": 1.280541061162306e-06, + "loss": 8.693, + "step": 395160 + }, + { + "epoch": 0.7982684017663433, + "grad_norm": 301.60888671875, + "learning_rate": 1.2803077894879296e-06, + "loss": 14.459, + "step": 395170 + }, + { + "epoch": 0.7982886023990271, + "grad_norm": 218.03836059570312, + "learning_rate": 1.2800745359428807e-06, + "loss": 20.5122, + "step": 395180 + }, + { + "epoch": 0.7983088030317109, + "grad_norm": 339.1699523925781, + "learning_rate": 1.2798413005282984e-06, + "loss": 19.4057, + "step": 395190 + }, + { + "epoch": 0.7983290036643947, + "grad_norm": 312.39691162109375, + "learning_rate": 1.2796080832453183e-06, + "loss": 10.9805, + "step": 395200 + }, + { + "epoch": 0.7983492042970786, + "grad_norm": 165.0194549560547, + "learning_rate": 1.279374884095076e-06, + "loss": 30.4858, + "step": 395210 + }, + { + "epoch": 0.7983694049297624, + "grad_norm": 172.35121154785156, + "learning_rate": 1.279141703078709e-06, + "loss": 19.7434, + "step": 395220 + }, + { + "epoch": 0.7983896055624462, + "grad_norm": 230.8863067626953, + "learning_rate": 1.2789085401973572e-06, + "loss": 24.9739, + "step": 395230 + }, + { + "epoch": 0.79840980619513, + "grad_norm": 346.300048828125, + "learning_rate": 1.2786753954521508e-06, + "loss": 39.0584, + "step": 395240 + }, + { + "epoch": 0.7984300068278138, + "grad_norm": 454.89654541015625, + "learning_rate": 1.2784422688442294e-06, + "loss": 13.4399, + "step": 395250 + }, + { + "epoch": 0.7984502074604977, + "grad_norm": 237.23622131347656, + "learning_rate": 1.2782091603747304e-06, + "loss": 14.5553, + "step": 395260 + }, + { + "epoch": 0.7984704080931815, + "grad_norm": 336.5985107421875, + "learning_rate": 1.2779760700447885e-06, + "loss": 24.699, + "step": 395270 + }, + { + "epoch": 0.7984906087258653, + "grad_norm": 411.7083740234375, + "learning_rate": 1.2777429978555383e-06, + "loss": 19.8059, + "step": 395280 + }, + { + "epoch": 0.7985108093585491, + "grad_norm": 169.06935119628906, + "learning_rate": 1.2775099438081173e-06, + "loss": 15.3829, + "step": 395290 + }, + { + "epoch": 0.7985310099912329, + "grad_norm": 573.5012817382812, + "learning_rate": 1.2772769079036639e-06, + "loss": 22.6528, + "step": 395300 + }, + { + "epoch": 0.7985512106239168, + "grad_norm": 29.020320892333984, + "learning_rate": 1.277043890143309e-06, + "loss": 6.569, + "step": 395310 + }, + { + "epoch": 0.7985714112566006, + "grad_norm": 944.8809204101562, + "learning_rate": 1.2768108905281906e-06, + "loss": 17.0352, + "step": 395320 + }, + { + "epoch": 0.7985916118892844, + "grad_norm": 107.38935852050781, + "learning_rate": 1.2765779090594454e-06, + "loss": 21.9235, + "step": 395330 + }, + { + "epoch": 0.7986118125219682, + "grad_norm": 1010.5393676757812, + "learning_rate": 1.2763449457382083e-06, + "loss": 31.6762, + "step": 395340 + }, + { + "epoch": 0.798632013154652, + "grad_norm": 173.56773376464844, + "learning_rate": 1.2761120005656125e-06, + "loss": 30.6485, + "step": 395350 + }, + { + "epoch": 0.7986522137873359, + "grad_norm": 278.5769958496094, + "learning_rate": 1.2758790735427966e-06, + "loss": 11.0597, + "step": 395360 + }, + { + "epoch": 0.7986724144200197, + "grad_norm": 160.19850158691406, + "learning_rate": 1.275646164670895e-06, + "loss": 19.5009, + "step": 395370 + }, + { + "epoch": 0.7986926150527035, + "grad_norm": 171.7584686279297, + "learning_rate": 1.27541327395104e-06, + "loss": 23.2055, + "step": 395380 + }, + { + "epoch": 0.7987128156853872, + "grad_norm": 301.77984619140625, + "learning_rate": 1.275180401384371e-06, + "loss": 17.2965, + "step": 395390 + }, + { + "epoch": 0.798733016318071, + "grad_norm": 341.29913330078125, + "learning_rate": 1.2749475469720196e-06, + "loss": 25.0192, + "step": 395400 + }, + { + "epoch": 0.7987532169507549, + "grad_norm": 626.1749267578125, + "learning_rate": 1.274714710715123e-06, + "loss": 18.06, + "step": 395410 + }, + { + "epoch": 0.7987734175834387, + "grad_norm": 175.00750732421875, + "learning_rate": 1.2744818926148157e-06, + "loss": 10.0525, + "step": 395420 + }, + { + "epoch": 0.7987936182161225, + "grad_norm": 230.48179626464844, + "learning_rate": 1.2742490926722295e-06, + "loss": 23.7853, + "step": 395430 + }, + { + "epoch": 0.7988138188488063, + "grad_norm": 348.3998107910156, + "learning_rate": 1.2740163108885033e-06, + "loss": 15.9951, + "step": 395440 + }, + { + "epoch": 0.7988340194814901, + "grad_norm": 389.44061279296875, + "learning_rate": 1.2737835472647686e-06, + "loss": 34.202, + "step": 395450 + }, + { + "epoch": 0.798854220114174, + "grad_norm": 458.6150817871094, + "learning_rate": 1.273550801802162e-06, + "loss": 24.2333, + "step": 395460 + }, + { + "epoch": 0.7988744207468578, + "grad_norm": 91.32477569580078, + "learning_rate": 1.2733180745018154e-06, + "loss": 16.5594, + "step": 395470 + }, + { + "epoch": 0.7988946213795416, + "grad_norm": 387.79022216796875, + "learning_rate": 1.2730853653648657e-06, + "loss": 18.4532, + "step": 395480 + }, + { + "epoch": 0.7989148220122254, + "grad_norm": 542.697509765625, + "learning_rate": 1.2728526743924462e-06, + "loss": 14.1246, + "step": 395490 + }, + { + "epoch": 0.7989350226449092, + "grad_norm": 182.57275390625, + "learning_rate": 1.2726200015856893e-06, + "loss": 19.1147, + "step": 395500 + }, + { + "epoch": 0.798955223277593, + "grad_norm": 179.49337768554688, + "learning_rate": 1.2723873469457304e-06, + "loss": 16.6853, + "step": 395510 + }, + { + "epoch": 0.7989754239102769, + "grad_norm": 235.9488525390625, + "learning_rate": 1.2721547104737065e-06, + "loss": 22.9069, + "step": 395520 + }, + { + "epoch": 0.7989956245429607, + "grad_norm": 117.95890045166016, + "learning_rate": 1.2719220921707453e-06, + "loss": 13.7428, + "step": 395530 + }, + { + "epoch": 0.7990158251756445, + "grad_norm": 0.0, + "learning_rate": 1.2716894920379835e-06, + "loss": 14.2216, + "step": 395540 + }, + { + "epoch": 0.7990360258083283, + "grad_norm": 355.38250732421875, + "learning_rate": 1.2714569100765567e-06, + "loss": 16.9524, + "step": 395550 + }, + { + "epoch": 0.7990562264410122, + "grad_norm": 432.5514831542969, + "learning_rate": 1.2712243462875967e-06, + "loss": 16.7309, + "step": 395560 + }, + { + "epoch": 0.799076427073696, + "grad_norm": 205.42091369628906, + "learning_rate": 1.2709918006722355e-06, + "loss": 33.1063, + "step": 395570 + }, + { + "epoch": 0.7990966277063798, + "grad_norm": 1120.8974609375, + "learning_rate": 1.2707592732316092e-06, + "loss": 23.3487, + "step": 395580 + }, + { + "epoch": 0.7991168283390636, + "grad_norm": 265.7613525390625, + "learning_rate": 1.2705267639668501e-06, + "loss": 14.2579, + "step": 395590 + }, + { + "epoch": 0.7991370289717474, + "grad_norm": 13.7578125, + "learning_rate": 1.2702942728790897e-06, + "loss": 10.8584, + "step": 395600 + }, + { + "epoch": 0.7991572296044313, + "grad_norm": 486.94915771484375, + "learning_rate": 1.2700617999694626e-06, + "loss": 19.4967, + "step": 395610 + }, + { + "epoch": 0.7991774302371151, + "grad_norm": 169.88043212890625, + "learning_rate": 1.2698293452391036e-06, + "loss": 20.9465, + "step": 395620 + }, + { + "epoch": 0.7991976308697989, + "grad_norm": 387.9173889160156, + "learning_rate": 1.2695969086891436e-06, + "loss": 21.2436, + "step": 395630 + }, + { + "epoch": 0.7992178315024827, + "grad_norm": 304.60455322265625, + "learning_rate": 1.2693644903207146e-06, + "loss": 12.9429, + "step": 395640 + }, + { + "epoch": 0.7992380321351664, + "grad_norm": 519.8191528320312, + "learning_rate": 1.2691320901349518e-06, + "loss": 14.3901, + "step": 395650 + }, + { + "epoch": 0.7992582327678502, + "grad_norm": 126.75354766845703, + "learning_rate": 1.2688997081329874e-06, + "loss": 18.2547, + "step": 395660 + }, + { + "epoch": 0.7992784334005341, + "grad_norm": 332.45257568359375, + "learning_rate": 1.2686673443159515e-06, + "loss": 16.2801, + "step": 395670 + }, + { + "epoch": 0.7992986340332179, + "grad_norm": 212.67904663085938, + "learning_rate": 1.2684349986849791e-06, + "loss": 21.5422, + "step": 395680 + }, + { + "epoch": 0.7993188346659017, + "grad_norm": 366.7095947265625, + "learning_rate": 1.2682026712412016e-06, + "loss": 25.9652, + "step": 395690 + }, + { + "epoch": 0.7993390352985855, + "grad_norm": 611.2752685546875, + "learning_rate": 1.2679703619857525e-06, + "loss": 19.9384, + "step": 395700 + }, + { + "epoch": 0.7993592359312693, + "grad_norm": 352.1509094238281, + "learning_rate": 1.2677380709197634e-06, + "loss": 11.0338, + "step": 395710 + }, + { + "epoch": 0.7993794365639532, + "grad_norm": 146.7178497314453, + "learning_rate": 1.2675057980443644e-06, + "loss": 8.9294, + "step": 395720 + }, + { + "epoch": 0.799399637196637, + "grad_norm": 318.03125, + "learning_rate": 1.2672735433606914e-06, + "loss": 19.5553, + "step": 395730 + }, + { + "epoch": 0.7994198378293208, + "grad_norm": 471.7959289550781, + "learning_rate": 1.2670413068698745e-06, + "loss": 23.6517, + "step": 395740 + }, + { + "epoch": 0.7994400384620046, + "grad_norm": 507.58929443359375, + "learning_rate": 1.2668090885730439e-06, + "loss": 18.7919, + "step": 395750 + }, + { + "epoch": 0.7994602390946884, + "grad_norm": 731.7949829101562, + "learning_rate": 1.2665768884713326e-06, + "loss": 16.968, + "step": 395760 + }, + { + "epoch": 0.7994804397273723, + "grad_norm": 475.5935363769531, + "learning_rate": 1.2663447065658746e-06, + "loss": 29.8474, + "step": 395770 + }, + { + "epoch": 0.7995006403600561, + "grad_norm": 175.68096923828125, + "learning_rate": 1.2661125428577998e-06, + "loss": 14.3358, + "step": 395780 + }, + { + "epoch": 0.7995208409927399, + "grad_norm": 0.010071820579469204, + "learning_rate": 1.265880397348238e-06, + "loss": 21.9857, + "step": 395790 + }, + { + "epoch": 0.7995410416254237, + "grad_norm": 28.79306411743164, + "learning_rate": 1.2656482700383238e-06, + "loss": 17.6525, + "step": 395800 + }, + { + "epoch": 0.7995612422581075, + "grad_norm": 641.2723388671875, + "learning_rate": 1.2654161609291864e-06, + "loss": 14.5989, + "step": 395810 + }, + { + "epoch": 0.7995814428907914, + "grad_norm": 263.36883544921875, + "learning_rate": 1.265184070021957e-06, + "loss": 31.5257, + "step": 395820 + }, + { + "epoch": 0.7996016435234752, + "grad_norm": 250.98660278320312, + "learning_rate": 1.2649519973177672e-06, + "loss": 22.3223, + "step": 395830 + }, + { + "epoch": 0.799621844156159, + "grad_norm": 568.1674194335938, + "learning_rate": 1.2647199428177509e-06, + "loss": 43.9173, + "step": 395840 + }, + { + "epoch": 0.7996420447888428, + "grad_norm": 452.78033447265625, + "learning_rate": 1.2644879065230343e-06, + "loss": 29.0686, + "step": 395850 + }, + { + "epoch": 0.7996622454215266, + "grad_norm": 174.3638153076172, + "learning_rate": 1.26425588843475e-06, + "loss": 22.5565, + "step": 395860 + }, + { + "epoch": 0.7996824460542105, + "grad_norm": 311.9055480957031, + "learning_rate": 1.2640238885540313e-06, + "loss": 17.1418, + "step": 395870 + }, + { + "epoch": 0.7997026466868943, + "grad_norm": 186.3204345703125, + "learning_rate": 1.263791906882007e-06, + "loss": 15.7676, + "step": 395880 + }, + { + "epoch": 0.7997228473195781, + "grad_norm": 456.97198486328125, + "learning_rate": 1.263559943419806e-06, + "loss": 27.3653, + "step": 395890 + }, + { + "epoch": 0.7997430479522618, + "grad_norm": 795.5870361328125, + "learning_rate": 1.2633279981685608e-06, + "loss": 21.4389, + "step": 395900 + }, + { + "epoch": 0.7997632485849456, + "grad_norm": 357.4993591308594, + "learning_rate": 1.2630960711294049e-06, + "loss": 17.7066, + "step": 395910 + }, + { + "epoch": 0.7997834492176294, + "grad_norm": 413.597412109375, + "learning_rate": 1.2628641623034627e-06, + "loss": 27.9636, + "step": 395920 + }, + { + "epoch": 0.7998036498503133, + "grad_norm": 437.23468017578125, + "learning_rate": 1.2626322716918672e-06, + "loss": 15.24, + "step": 395930 + }, + { + "epoch": 0.7998238504829971, + "grad_norm": 6900.7216796875, + "learning_rate": 1.2624003992957494e-06, + "loss": 55.5842, + "step": 395940 + }, + { + "epoch": 0.7998440511156809, + "grad_norm": 433.11737060546875, + "learning_rate": 1.2621685451162397e-06, + "loss": 11.88, + "step": 395950 + }, + { + "epoch": 0.7998642517483647, + "grad_norm": 690.3412475585938, + "learning_rate": 1.2619367091544654e-06, + "loss": 22.324, + "step": 395960 + }, + { + "epoch": 0.7998844523810485, + "grad_norm": 144.7787322998047, + "learning_rate": 1.2617048914115593e-06, + "loss": 21.3205, + "step": 395970 + }, + { + "epoch": 0.7999046530137324, + "grad_norm": 545.2615966796875, + "learning_rate": 1.2614730918886509e-06, + "loss": 26.1908, + "step": 395980 + }, + { + "epoch": 0.7999248536464162, + "grad_norm": 277.0710144042969, + "learning_rate": 1.261241310586867e-06, + "loss": 19.5164, + "step": 395990 + }, + { + "epoch": 0.7999450542791, + "grad_norm": 175.83204650878906, + "learning_rate": 1.2610095475073415e-06, + "loss": 20.0264, + "step": 396000 + }, + { + "epoch": 0.7999652549117838, + "grad_norm": 11.009918212890625, + "learning_rate": 1.2607778026512002e-06, + "loss": 16.6202, + "step": 396010 + }, + { + "epoch": 0.7999854555444676, + "grad_norm": 525.2348022460938, + "learning_rate": 1.2605460760195759e-06, + "loss": 19.783, + "step": 396020 + }, + { + "epoch": 0.8000056561771515, + "grad_norm": 188.04359436035156, + "learning_rate": 1.2603143676135965e-06, + "loss": 20.8798, + "step": 396030 + }, + { + "epoch": 0.8000258568098353, + "grad_norm": 24.16062355041504, + "learning_rate": 1.26008267743439e-06, + "loss": 14.6832, + "step": 396040 + }, + { + "epoch": 0.8000460574425191, + "grad_norm": 438.58056640625, + "learning_rate": 1.2598510054830888e-06, + "loss": 15.1189, + "step": 396050 + }, + { + "epoch": 0.8000662580752029, + "grad_norm": 303.39556884765625, + "learning_rate": 1.2596193517608179e-06, + "loss": 21.6028, + "step": 396060 + }, + { + "epoch": 0.8000864587078867, + "grad_norm": 307.9757080078125, + "learning_rate": 1.25938771626871e-06, + "loss": 26.5777, + "step": 396070 + }, + { + "epoch": 0.8001066593405706, + "grad_norm": 0.0, + "learning_rate": 1.259156099007892e-06, + "loss": 16.7807, + "step": 396080 + }, + { + "epoch": 0.8001268599732544, + "grad_norm": 318.7864685058594, + "learning_rate": 1.2589244999794947e-06, + "loss": 9.5164, + "step": 396090 + }, + { + "epoch": 0.8001470606059382, + "grad_norm": 228.4259490966797, + "learning_rate": 1.2586929191846453e-06, + "loss": 18.3174, + "step": 396100 + }, + { + "epoch": 0.800167261238622, + "grad_norm": 234.86070251464844, + "learning_rate": 1.2584613566244713e-06, + "loss": 21.9184, + "step": 396110 + }, + { + "epoch": 0.8001874618713058, + "grad_norm": 374.68157958984375, + "learning_rate": 1.2582298123001046e-06, + "loss": 20.8922, + "step": 396120 + }, + { + "epoch": 0.8002076625039897, + "grad_norm": 54.8770637512207, + "learning_rate": 1.2579982862126722e-06, + "loss": 9.7939, + "step": 396130 + }, + { + "epoch": 0.8002278631366735, + "grad_norm": 804.0513916015625, + "learning_rate": 1.2577667783633007e-06, + "loss": 12.1743, + "step": 396140 + }, + { + "epoch": 0.8002480637693573, + "grad_norm": 531.5899658203125, + "learning_rate": 1.25753528875312e-06, + "loss": 16.6296, + "step": 396150 + }, + { + "epoch": 0.800268264402041, + "grad_norm": 277.38885498046875, + "learning_rate": 1.2573038173832597e-06, + "loss": 30.3747, + "step": 396160 + }, + { + "epoch": 0.8002884650347248, + "grad_norm": 455.3837890625, + "learning_rate": 1.2570723642548465e-06, + "loss": 16.5166, + "step": 396170 + }, + { + "epoch": 0.8003086656674087, + "grad_norm": 337.4343566894531, + "learning_rate": 1.2568409293690077e-06, + "loss": 12.9993, + "step": 396180 + }, + { + "epoch": 0.8003288663000925, + "grad_norm": 39.89707946777344, + "learning_rate": 1.2566095127268734e-06, + "loss": 31.3835, + "step": 396190 + }, + { + "epoch": 0.8003490669327763, + "grad_norm": 470.049072265625, + "learning_rate": 1.2563781143295705e-06, + "loss": 35.6281, + "step": 396200 + }, + { + "epoch": 0.8003692675654601, + "grad_norm": 701.8652954101562, + "learning_rate": 1.2561467341782247e-06, + "loss": 23.626, + "step": 396210 + }, + { + "epoch": 0.8003894681981439, + "grad_norm": 103.31476593017578, + "learning_rate": 1.2559153722739658e-06, + "loss": 16.6704, + "step": 396220 + }, + { + "epoch": 0.8004096688308278, + "grad_norm": 75.45716094970703, + "learning_rate": 1.2556840286179234e-06, + "loss": 20.8166, + "step": 396230 + }, + { + "epoch": 0.8004298694635116, + "grad_norm": 125.75484466552734, + "learning_rate": 1.2554527032112224e-06, + "loss": 5.9354, + "step": 396240 + }, + { + "epoch": 0.8004500700961954, + "grad_norm": 353.441162109375, + "learning_rate": 1.2552213960549891e-06, + "loss": 14.4581, + "step": 396250 + }, + { + "epoch": 0.8004702707288792, + "grad_norm": 245.49456787109375, + "learning_rate": 1.254990107150354e-06, + "loss": 8.6626, + "step": 396260 + }, + { + "epoch": 0.800490471361563, + "grad_norm": 816.9166259765625, + "learning_rate": 1.2547588364984431e-06, + "loss": 23.9752, + "step": 396270 + }, + { + "epoch": 0.8005106719942469, + "grad_norm": 785.3043823242188, + "learning_rate": 1.2545275841003818e-06, + "loss": 15.4153, + "step": 396280 + }, + { + "epoch": 0.8005308726269307, + "grad_norm": 751.38232421875, + "learning_rate": 1.2542963499573007e-06, + "loss": 20.2525, + "step": 396290 + }, + { + "epoch": 0.8005510732596145, + "grad_norm": 422.921875, + "learning_rate": 1.2540651340703231e-06, + "loss": 22.5981, + "step": 396300 + }, + { + "epoch": 0.8005712738922983, + "grad_norm": 251.9776611328125, + "learning_rate": 1.253833936440579e-06, + "loss": 22.8785, + "step": 396310 + }, + { + "epoch": 0.8005914745249821, + "grad_norm": 328.57470703125, + "learning_rate": 1.2536027570691938e-06, + "loss": 16.0463, + "step": 396320 + }, + { + "epoch": 0.800611675157666, + "grad_norm": 490.6477966308594, + "learning_rate": 1.2533715959572935e-06, + "loss": 18.1275, + "step": 396330 + }, + { + "epoch": 0.8006318757903498, + "grad_norm": 616.2587890625, + "learning_rate": 1.253140453106007e-06, + "loss": 33.4304, + "step": 396340 + }, + { + "epoch": 0.8006520764230336, + "grad_norm": 308.67938232421875, + "learning_rate": 1.2529093285164579e-06, + "loss": 26.025, + "step": 396350 + }, + { + "epoch": 0.8006722770557174, + "grad_norm": 473.9761047363281, + "learning_rate": 1.2526782221897755e-06, + "loss": 7.2248, + "step": 396360 + }, + { + "epoch": 0.8006924776884012, + "grad_norm": 398.3085632324219, + "learning_rate": 1.252447134127084e-06, + "loss": 28.8853, + "step": 396370 + }, + { + "epoch": 0.8007126783210851, + "grad_norm": 387.9682312011719, + "learning_rate": 1.2522160643295112e-06, + "loss": 9.0265, + "step": 396380 + }, + { + "epoch": 0.8007328789537689, + "grad_norm": 279.651611328125, + "learning_rate": 1.2519850127981836e-06, + "loss": 12.125, + "step": 396390 + }, + { + "epoch": 0.8007530795864527, + "grad_norm": 390.2597961425781, + "learning_rate": 1.2517539795342248e-06, + "loss": 22.5556, + "step": 396400 + }, + { + "epoch": 0.8007732802191365, + "grad_norm": 231.67532348632812, + "learning_rate": 1.2515229645387639e-06, + "loss": 11.2884, + "step": 396410 + }, + { + "epoch": 0.8007934808518202, + "grad_norm": 504.6092529296875, + "learning_rate": 1.2512919678129254e-06, + "loss": 10.5464, + "step": 396420 + }, + { + "epoch": 0.800813681484504, + "grad_norm": 539.6251220703125, + "learning_rate": 1.2510609893578335e-06, + "loss": 32.6886, + "step": 396430 + }, + { + "epoch": 0.8008338821171879, + "grad_norm": 903.2691650390625, + "learning_rate": 1.2508300291746162e-06, + "loss": 34.8789, + "step": 396440 + }, + { + "epoch": 0.8008540827498717, + "grad_norm": 308.7208557128906, + "learning_rate": 1.2505990872644008e-06, + "loss": 17.5599, + "step": 396450 + }, + { + "epoch": 0.8008742833825555, + "grad_norm": 523.7728271484375, + "learning_rate": 1.2503681636283082e-06, + "loss": 16.3667, + "step": 396460 + }, + { + "epoch": 0.8008944840152393, + "grad_norm": 218.46133422851562, + "learning_rate": 1.2501372582674665e-06, + "loss": 12.6188, + "step": 396470 + }, + { + "epoch": 0.8009146846479231, + "grad_norm": 177.7940216064453, + "learning_rate": 1.2499063711830023e-06, + "loss": 16.5464, + "step": 396480 + }, + { + "epoch": 0.800934885280607, + "grad_norm": 101.02649688720703, + "learning_rate": 1.2496755023760398e-06, + "loss": 24.4657, + "step": 396490 + }, + { + "epoch": 0.8009550859132908, + "grad_norm": 402.1471862792969, + "learning_rate": 1.2494446518477022e-06, + "loss": 12.5647, + "step": 396500 + }, + { + "epoch": 0.8009752865459746, + "grad_norm": 430.8567810058594, + "learning_rate": 1.249213819599117e-06, + "loss": 13.368, + "step": 396510 + }, + { + "epoch": 0.8009954871786584, + "grad_norm": 354.9683837890625, + "learning_rate": 1.248983005631411e-06, + "loss": 15.4742, + "step": 396520 + }, + { + "epoch": 0.8010156878113422, + "grad_norm": 836.7166748046875, + "learning_rate": 1.2487522099457044e-06, + "loss": 26.8622, + "step": 396530 + }, + { + "epoch": 0.8010358884440261, + "grad_norm": 420.2633361816406, + "learning_rate": 1.248521432543125e-06, + "loss": 29.0428, + "step": 396540 + }, + { + "epoch": 0.8010560890767099, + "grad_norm": 335.16082763671875, + "learning_rate": 1.248290673424798e-06, + "loss": 11.3361, + "step": 396550 + }, + { + "epoch": 0.8010762897093937, + "grad_norm": 80.3372802734375, + "learning_rate": 1.2480599325918474e-06, + "loss": 10.0713, + "step": 396560 + }, + { + "epoch": 0.8010964903420775, + "grad_norm": 36.19289016723633, + "learning_rate": 1.247829210045396e-06, + "loss": 32.7652, + "step": 396570 + }, + { + "epoch": 0.8011166909747613, + "grad_norm": 646.9605102539062, + "learning_rate": 1.2475985057865714e-06, + "loss": 17.6624, + "step": 396580 + }, + { + "epoch": 0.8011368916074452, + "grad_norm": 172.8927459716797, + "learning_rate": 1.2473678198164967e-06, + "loss": 20.7343, + "step": 396590 + }, + { + "epoch": 0.801157092240129, + "grad_norm": 106.66670989990234, + "learning_rate": 1.2471371521362946e-06, + "loss": 15.0763, + "step": 396600 + }, + { + "epoch": 0.8011772928728128, + "grad_norm": 533.6571044921875, + "learning_rate": 1.2469065027470923e-06, + "loss": 21.8675, + "step": 396610 + }, + { + "epoch": 0.8011974935054966, + "grad_norm": 0.0, + "learning_rate": 1.246675871650011e-06, + "loss": 11.3138, + "step": 396620 + }, + { + "epoch": 0.8012176941381804, + "grad_norm": 320.3057556152344, + "learning_rate": 1.2464452588461778e-06, + "loss": 8.236, + "step": 396630 + }, + { + "epoch": 0.8012378947708643, + "grad_norm": 307.5802307128906, + "learning_rate": 1.2462146643367156e-06, + "loss": 18.8169, + "step": 396640 + }, + { + "epoch": 0.8012580954035481, + "grad_norm": 136.44705200195312, + "learning_rate": 1.2459840881227458e-06, + "loss": 20.7404, + "step": 396650 + }, + { + "epoch": 0.8012782960362319, + "grad_norm": 513.2805786132812, + "learning_rate": 1.2457535302053957e-06, + "loss": 12.792, + "step": 396660 + }, + { + "epoch": 0.8012984966689156, + "grad_norm": 334.1730041503906, + "learning_rate": 1.2455229905857863e-06, + "loss": 12.3761, + "step": 396670 + }, + { + "epoch": 0.8013186973015994, + "grad_norm": 482.0895690917969, + "learning_rate": 1.2452924692650443e-06, + "loss": 20.5428, + "step": 396680 + }, + { + "epoch": 0.8013388979342833, + "grad_norm": 203.59678649902344, + "learning_rate": 1.2450619662442892e-06, + "loss": 10.6843, + "step": 396690 + }, + { + "epoch": 0.8013590985669671, + "grad_norm": 435.9341735839844, + "learning_rate": 1.2448314815246487e-06, + "loss": 13.401, + "step": 396700 + }, + { + "epoch": 0.8013792991996509, + "grad_norm": 315.55419921875, + "learning_rate": 1.244601015107244e-06, + "loss": 20.2064, + "step": 396710 + }, + { + "epoch": 0.8013994998323347, + "grad_norm": 574.0961303710938, + "learning_rate": 1.2443705669931966e-06, + "loss": 11.4189, + "step": 396720 + }, + { + "epoch": 0.8014197004650185, + "grad_norm": 1980.1788330078125, + "learning_rate": 1.2441401371836337e-06, + "loss": 55.0449, + "step": 396730 + }, + { + "epoch": 0.8014399010977024, + "grad_norm": 248.682373046875, + "learning_rate": 1.2439097256796756e-06, + "loss": 24.5463, + "step": 396740 + }, + { + "epoch": 0.8014601017303862, + "grad_norm": 281.3458557128906, + "learning_rate": 1.2436793324824448e-06, + "loss": 26.7099, + "step": 396750 + }, + { + "epoch": 0.80148030236307, + "grad_norm": 236.09158325195312, + "learning_rate": 1.2434489575930652e-06, + "loss": 7.0798, + "step": 396760 + }, + { + "epoch": 0.8015005029957538, + "grad_norm": 374.0956115722656, + "learning_rate": 1.2432186010126613e-06, + "loss": 13.3354, + "step": 396770 + }, + { + "epoch": 0.8015207036284376, + "grad_norm": 448.5460510253906, + "learning_rate": 1.2429882627423545e-06, + "loss": 22.8503, + "step": 396780 + }, + { + "epoch": 0.8015409042611215, + "grad_norm": 385.36029052734375, + "learning_rate": 1.2427579427832654e-06, + "loss": 18.5393, + "step": 396790 + }, + { + "epoch": 0.8015611048938053, + "grad_norm": 280.7149353027344, + "learning_rate": 1.24252764113652e-06, + "loss": 19.2721, + "step": 396800 + }, + { + "epoch": 0.8015813055264891, + "grad_norm": 190.941162109375, + "learning_rate": 1.2422973578032394e-06, + "loss": 15.9896, + "step": 396810 + }, + { + "epoch": 0.8016015061591729, + "grad_norm": 249.75369262695312, + "learning_rate": 1.2420670927845441e-06, + "loss": 27.7864, + "step": 396820 + }, + { + "epoch": 0.8016217067918567, + "grad_norm": 50.38788986206055, + "learning_rate": 1.2418368460815578e-06, + "loss": 16.6244, + "step": 396830 + }, + { + "epoch": 0.8016419074245406, + "grad_norm": 477.87506103515625, + "learning_rate": 1.2416066176954044e-06, + "loss": 15.3904, + "step": 396840 + }, + { + "epoch": 0.8016621080572244, + "grad_norm": 497.0939025878906, + "learning_rate": 1.241376407627205e-06, + "loss": 27.108, + "step": 396850 + }, + { + "epoch": 0.8016823086899082, + "grad_norm": 310.3155212402344, + "learning_rate": 1.2411462158780791e-06, + "loss": 28.3227, + "step": 396860 + }, + { + "epoch": 0.801702509322592, + "grad_norm": 265.06317138671875, + "learning_rate": 1.2409160424491524e-06, + "loss": 13.765, + "step": 396870 + }, + { + "epoch": 0.8017227099552758, + "grad_norm": 237.19984436035156, + "learning_rate": 1.240685887341545e-06, + "loss": 16.64, + "step": 396880 + }, + { + "epoch": 0.8017429105879597, + "grad_norm": 438.68988037109375, + "learning_rate": 1.240455750556377e-06, + "loss": 22.226, + "step": 396890 + }, + { + "epoch": 0.8017631112206435, + "grad_norm": 262.5321044921875, + "learning_rate": 1.240225632094773e-06, + "loss": 21.8872, + "step": 396900 + }, + { + "epoch": 0.8017833118533273, + "grad_norm": 634.115966796875, + "learning_rate": 1.2399955319578521e-06, + "loss": 28.4038, + "step": 396910 + }, + { + "epoch": 0.8018035124860111, + "grad_norm": 63.45467758178711, + "learning_rate": 1.2397654501467387e-06, + "loss": 18.5422, + "step": 396920 + }, + { + "epoch": 0.8018237131186948, + "grad_norm": 605.983642578125, + "learning_rate": 1.2395353866625521e-06, + "loss": 17.3402, + "step": 396930 + }, + { + "epoch": 0.8018439137513786, + "grad_norm": 199.43296813964844, + "learning_rate": 1.2393053415064121e-06, + "loss": 11.353, + "step": 396940 + }, + { + "epoch": 0.8018641143840625, + "grad_norm": 97.341796875, + "learning_rate": 1.2390753146794438e-06, + "loss": 20.3572, + "step": 396950 + }, + { + "epoch": 0.8018843150167463, + "grad_norm": 197.81533813476562, + "learning_rate": 1.2388453061827644e-06, + "loss": 14.9244, + "step": 396960 + }, + { + "epoch": 0.8019045156494301, + "grad_norm": 1986.1629638671875, + "learning_rate": 1.2386153160174986e-06, + "loss": 25.1023, + "step": 396970 + }, + { + "epoch": 0.8019247162821139, + "grad_norm": 363.91241455078125, + "learning_rate": 1.2383853441847638e-06, + "loss": 27.498, + "step": 396980 + }, + { + "epoch": 0.8019449169147977, + "grad_norm": 460.5152893066406, + "learning_rate": 1.2381553906856842e-06, + "loss": 19.7326, + "step": 396990 + }, + { + "epoch": 0.8019651175474816, + "grad_norm": 134.38975524902344, + "learning_rate": 1.2379254555213788e-06, + "loss": 23.5737, + "step": 397000 + }, + { + "epoch": 0.8019853181801654, + "grad_norm": 89.52670288085938, + "learning_rate": 1.2376955386929673e-06, + "loss": 10.0938, + "step": 397010 + }, + { + "epoch": 0.8020055188128492, + "grad_norm": 277.80548095703125, + "learning_rate": 1.2374656402015728e-06, + "loss": 21.5251, + "step": 397020 + }, + { + "epoch": 0.802025719445533, + "grad_norm": 438.4913024902344, + "learning_rate": 1.2372357600483142e-06, + "loss": 20.0417, + "step": 397030 + }, + { + "epoch": 0.8020459200782168, + "grad_norm": 557.4075317382812, + "learning_rate": 1.2370058982343109e-06, + "loss": 25.3092, + "step": 397040 + }, + { + "epoch": 0.8020661207109007, + "grad_norm": 263.2080383300781, + "learning_rate": 1.2367760547606844e-06, + "loss": 13.6746, + "step": 397050 + }, + { + "epoch": 0.8020863213435845, + "grad_norm": 215.56797790527344, + "learning_rate": 1.236546229628558e-06, + "loss": 12.948, + "step": 397060 + }, + { + "epoch": 0.8021065219762683, + "grad_norm": 32.382484436035156, + "learning_rate": 1.2363164228390456e-06, + "loss": 21.1817, + "step": 397070 + }, + { + "epoch": 0.8021267226089521, + "grad_norm": 313.60711669921875, + "learning_rate": 1.236086634393271e-06, + "loss": 6.7464, + "step": 397080 + }, + { + "epoch": 0.802146923241636, + "grad_norm": 342.4676208496094, + "learning_rate": 1.2358568642923546e-06, + "loss": 12.7703, + "step": 397090 + }, + { + "epoch": 0.8021671238743198, + "grad_norm": 247.1126708984375, + "learning_rate": 1.2356271125374153e-06, + "loss": 19.5519, + "step": 397100 + }, + { + "epoch": 0.8021873245070036, + "grad_norm": 73.73995971679688, + "learning_rate": 1.2353973791295715e-06, + "loss": 18.9362, + "step": 397110 + }, + { + "epoch": 0.8022075251396874, + "grad_norm": 462.5740051269531, + "learning_rate": 1.2351676640699444e-06, + "loss": 30.8039, + "step": 397120 + }, + { + "epoch": 0.8022277257723712, + "grad_norm": 273.22747802734375, + "learning_rate": 1.2349379673596568e-06, + "loss": 8.9714, + "step": 397130 + }, + { + "epoch": 0.802247926405055, + "grad_norm": 566.9596557617188, + "learning_rate": 1.2347082889998214e-06, + "loss": 15.8455, + "step": 397140 + }, + { + "epoch": 0.8022681270377389, + "grad_norm": 492.3504638671875, + "learning_rate": 1.234478628991561e-06, + "loss": 18.6748, + "step": 397150 + }, + { + "epoch": 0.8022883276704227, + "grad_norm": 333.3385314941406, + "learning_rate": 1.234248987335997e-06, + "loss": 16.2732, + "step": 397160 + }, + { + "epoch": 0.8023085283031065, + "grad_norm": 222.15713500976562, + "learning_rate": 1.234019364034247e-06, + "loss": 15.5507, + "step": 397170 + }, + { + "epoch": 0.8023287289357902, + "grad_norm": 364.03106689453125, + "learning_rate": 1.2337897590874275e-06, + "loss": 29.7851, + "step": 397180 + }, + { + "epoch": 0.802348929568474, + "grad_norm": 205.8481903076172, + "learning_rate": 1.2335601724966617e-06, + "loss": 12.525, + "step": 397190 + }, + { + "epoch": 0.8023691302011579, + "grad_norm": 452.48822021484375, + "learning_rate": 1.2333306042630672e-06, + "loss": 20.0239, + "step": 397200 + }, + { + "epoch": 0.8023893308338417, + "grad_norm": 397.8550720214844, + "learning_rate": 1.2331010543877608e-06, + "loss": 29.5752, + "step": 397210 + }, + { + "epoch": 0.8024095314665255, + "grad_norm": 351.6863098144531, + "learning_rate": 1.232871522871864e-06, + "loss": 23.35, + "step": 397220 + }, + { + "epoch": 0.8024297320992093, + "grad_norm": 88.65419006347656, + "learning_rate": 1.2326420097164938e-06, + "loss": 17.8378, + "step": 397230 + }, + { + "epoch": 0.8024499327318931, + "grad_norm": 241.8977508544922, + "learning_rate": 1.2324125149227705e-06, + "loss": 22.2992, + "step": 397240 + }, + { + "epoch": 0.802470133364577, + "grad_norm": 263.2344970703125, + "learning_rate": 1.2321830384918116e-06, + "loss": 18.148, + "step": 397250 + }, + { + "epoch": 0.8024903339972608, + "grad_norm": 130.0895538330078, + "learning_rate": 1.2319535804247345e-06, + "loss": 6.7968, + "step": 397260 + }, + { + "epoch": 0.8025105346299446, + "grad_norm": 520.0706787109375, + "learning_rate": 1.2317241407226598e-06, + "loss": 26.9545, + "step": 397270 + }, + { + "epoch": 0.8025307352626284, + "grad_norm": 229.78622436523438, + "learning_rate": 1.2314947193867034e-06, + "loss": 9.8173, + "step": 397280 + }, + { + "epoch": 0.8025509358953122, + "grad_norm": 122.68806457519531, + "learning_rate": 1.2312653164179861e-06, + "loss": 9.8707, + "step": 397290 + }, + { + "epoch": 0.802571136527996, + "grad_norm": 321.5652770996094, + "learning_rate": 1.2310359318176229e-06, + "loss": 17.3135, + "step": 397300 + }, + { + "epoch": 0.8025913371606799, + "grad_norm": 214.66514587402344, + "learning_rate": 1.2308065655867346e-06, + "loss": 20.6322, + "step": 397310 + }, + { + "epoch": 0.8026115377933637, + "grad_norm": 366.59429931640625, + "learning_rate": 1.2305772177264385e-06, + "loss": 11.4024, + "step": 397320 + }, + { + "epoch": 0.8026317384260475, + "grad_norm": 274.4979553222656, + "learning_rate": 1.2303478882378506e-06, + "loss": 16.8746, + "step": 397330 + }, + { + "epoch": 0.8026519390587313, + "grad_norm": 280.9788513183594, + "learning_rate": 1.2301185771220907e-06, + "loss": 40.8339, + "step": 397340 + }, + { + "epoch": 0.8026721396914152, + "grad_norm": 299.7403564453125, + "learning_rate": 1.2298892843802756e-06, + "loss": 17.0093, + "step": 397350 + }, + { + "epoch": 0.802692340324099, + "grad_norm": 426.50408935546875, + "learning_rate": 1.2296600100135219e-06, + "loss": 27.7678, + "step": 397360 + }, + { + "epoch": 0.8027125409567828, + "grad_norm": 266.2039794921875, + "learning_rate": 1.2294307540229478e-06, + "loss": 19.4155, + "step": 397370 + }, + { + "epoch": 0.8027327415894666, + "grad_norm": 487.9114685058594, + "learning_rate": 1.2292015164096726e-06, + "loss": 15.9295, + "step": 397380 + }, + { + "epoch": 0.8027529422221504, + "grad_norm": 229.36537170410156, + "learning_rate": 1.2289722971748113e-06, + "loss": 17.6065, + "step": 397390 + }, + { + "epoch": 0.8027731428548343, + "grad_norm": 176.56796264648438, + "learning_rate": 1.2287430963194807e-06, + "loss": 22.1076, + "step": 397400 + }, + { + "epoch": 0.8027933434875181, + "grad_norm": 325.32183837890625, + "learning_rate": 1.2285139138448005e-06, + "loss": 11.1035, + "step": 397410 + }, + { + "epoch": 0.8028135441202019, + "grad_norm": 242.32443237304688, + "learning_rate": 1.2282847497518857e-06, + "loss": 24.5044, + "step": 397420 + }, + { + "epoch": 0.8028337447528857, + "grad_norm": 234.795166015625, + "learning_rate": 1.2280556040418517e-06, + "loss": 18.323, + "step": 397430 + }, + { + "epoch": 0.8028539453855694, + "grad_norm": 216.63821411132812, + "learning_rate": 1.2278264767158176e-06, + "loss": 29.2998, + "step": 397440 + }, + { + "epoch": 0.8028741460182532, + "grad_norm": 355.3334655761719, + "learning_rate": 1.2275973677749015e-06, + "loss": 8.1372, + "step": 397450 + }, + { + "epoch": 0.8028943466509371, + "grad_norm": 280.875, + "learning_rate": 1.2273682772202183e-06, + "loss": 18.087, + "step": 397460 + }, + { + "epoch": 0.8029145472836209, + "grad_norm": 353.6507873535156, + "learning_rate": 1.2271392050528825e-06, + "loss": 24.4111, + "step": 397470 + }, + { + "epoch": 0.8029347479163047, + "grad_norm": 532.7893676757812, + "learning_rate": 1.2269101512740145e-06, + "loss": 26.5952, + "step": 397480 + }, + { + "epoch": 0.8029549485489885, + "grad_norm": 314.47088623046875, + "learning_rate": 1.2266811158847285e-06, + "loss": 13.219, + "step": 397490 + }, + { + "epoch": 0.8029751491816723, + "grad_norm": 138.06919860839844, + "learning_rate": 1.22645209888614e-06, + "loss": 17.6856, + "step": 397500 + }, + { + "epoch": 0.8029953498143562, + "grad_norm": 666.2764892578125, + "learning_rate": 1.226223100279368e-06, + "loss": 19.3284, + "step": 397510 + }, + { + "epoch": 0.80301555044704, + "grad_norm": 509.1011962890625, + "learning_rate": 1.2259941200655246e-06, + "loss": 8.9179, + "step": 397520 + }, + { + "epoch": 0.8030357510797238, + "grad_norm": 386.4242248535156, + "learning_rate": 1.2257651582457302e-06, + "loss": 20.7277, + "step": 397530 + }, + { + "epoch": 0.8030559517124076, + "grad_norm": 325.4468688964844, + "learning_rate": 1.2255362148210987e-06, + "loss": 18.3119, + "step": 397540 + }, + { + "epoch": 0.8030761523450914, + "grad_norm": 291.003662109375, + "learning_rate": 1.2253072897927437e-06, + "loss": 12.979, + "step": 397550 + }, + { + "epoch": 0.8030963529777753, + "grad_norm": 302.1463928222656, + "learning_rate": 1.2250783831617852e-06, + "loss": 9.8737, + "step": 397560 + }, + { + "epoch": 0.8031165536104591, + "grad_norm": 126.8495101928711, + "learning_rate": 1.2248494949293354e-06, + "loss": 31.0371, + "step": 397570 + }, + { + "epoch": 0.8031367542431429, + "grad_norm": 303.8444519042969, + "learning_rate": 1.2246206250965127e-06, + "loss": 23.4929, + "step": 397580 + }, + { + "epoch": 0.8031569548758267, + "grad_norm": 367.688720703125, + "learning_rate": 1.2243917736644296e-06, + "loss": 13.9533, + "step": 397590 + }, + { + "epoch": 0.8031771555085105, + "grad_norm": 562.4437255859375, + "learning_rate": 1.2241629406342048e-06, + "loss": 34.8172, + "step": 397600 + }, + { + "epoch": 0.8031973561411944, + "grad_norm": 373.95379638671875, + "learning_rate": 1.2239341260069516e-06, + "loss": 20.0785, + "step": 397610 + }, + { + "epoch": 0.8032175567738782, + "grad_norm": 217.52337646484375, + "learning_rate": 1.2237053297837841e-06, + "loss": 18.1145, + "step": 397620 + }, + { + "epoch": 0.803237757406562, + "grad_norm": 330.6252136230469, + "learning_rate": 1.2234765519658204e-06, + "loss": 17.3215, + "step": 397630 + }, + { + "epoch": 0.8032579580392458, + "grad_norm": 572.371826171875, + "learning_rate": 1.2232477925541736e-06, + "loss": 21.833, + "step": 397640 + }, + { + "epoch": 0.8032781586719296, + "grad_norm": 145.16079711914062, + "learning_rate": 1.223019051549958e-06, + "loss": 26.6915, + "step": 397650 + }, + { + "epoch": 0.8032983593046135, + "grad_norm": 128.88552856445312, + "learning_rate": 1.2227903289542892e-06, + "loss": 11.6647, + "step": 397660 + }, + { + "epoch": 0.8033185599372973, + "grad_norm": 228.22393798828125, + "learning_rate": 1.2225616247682848e-06, + "loss": 16.5292, + "step": 397670 + }, + { + "epoch": 0.8033387605699811, + "grad_norm": 77.66017150878906, + "learning_rate": 1.2223329389930544e-06, + "loss": 25.5775, + "step": 397680 + }, + { + "epoch": 0.8033589612026649, + "grad_norm": 143.2098846435547, + "learning_rate": 1.2221042716297148e-06, + "loss": 9.8505, + "step": 397690 + }, + { + "epoch": 0.8033791618353486, + "grad_norm": 935.0286254882812, + "learning_rate": 1.2218756226793827e-06, + "loss": 15.5926, + "step": 397700 + }, + { + "epoch": 0.8033993624680325, + "grad_norm": 279.19903564453125, + "learning_rate": 1.22164699214317e-06, + "loss": 17.6214, + "step": 397710 + }, + { + "epoch": 0.8034195631007163, + "grad_norm": 439.5152282714844, + "learning_rate": 1.2214183800221906e-06, + "loss": 29.815, + "step": 397720 + }, + { + "epoch": 0.8034397637334001, + "grad_norm": 267.2311096191406, + "learning_rate": 1.2211897863175597e-06, + "loss": 23.3302, + "step": 397730 + }, + { + "epoch": 0.8034599643660839, + "grad_norm": 328.5450439453125, + "learning_rate": 1.2209612110303941e-06, + "loss": 27.6764, + "step": 397740 + }, + { + "epoch": 0.8034801649987677, + "grad_norm": 0.0, + "learning_rate": 1.2207326541618024e-06, + "loss": 16.7296, + "step": 397750 + }, + { + "epoch": 0.8035003656314516, + "grad_norm": 427.4633483886719, + "learning_rate": 1.2205041157129017e-06, + "loss": 20.971, + "step": 397760 + }, + { + "epoch": 0.8035205662641354, + "grad_norm": 569.0933837890625, + "learning_rate": 1.2202755956848067e-06, + "loss": 13.5047, + "step": 397770 + }, + { + "epoch": 0.8035407668968192, + "grad_norm": 410.8171691894531, + "learning_rate": 1.2200470940786302e-06, + "loss": 14.1582, + "step": 397780 + }, + { + "epoch": 0.803560967529503, + "grad_norm": 77.08499908447266, + "learning_rate": 1.219818610895484e-06, + "loss": 17.0055, + "step": 397790 + }, + { + "epoch": 0.8035811681621868, + "grad_norm": 594.0213623046875, + "learning_rate": 1.2195901461364851e-06, + "loss": 12.5723, + "step": 397800 + }, + { + "epoch": 0.8036013687948707, + "grad_norm": 128.66441345214844, + "learning_rate": 1.2193616998027452e-06, + "loss": 15.1553, + "step": 397810 + }, + { + "epoch": 0.8036215694275545, + "grad_norm": 294.89154052734375, + "learning_rate": 1.2191332718953763e-06, + "loss": 15.3579, + "step": 397820 + }, + { + "epoch": 0.8036417700602383, + "grad_norm": 577.250732421875, + "learning_rate": 1.2189048624154948e-06, + "loss": 20.3436, + "step": 397830 + }, + { + "epoch": 0.8036619706929221, + "grad_norm": 734.5863647460938, + "learning_rate": 1.2186764713642108e-06, + "loss": 18.3468, + "step": 397840 + }, + { + "epoch": 0.8036821713256059, + "grad_norm": 290.3452453613281, + "learning_rate": 1.218448098742641e-06, + "loss": 13.6241, + "step": 397850 + }, + { + "epoch": 0.8037023719582898, + "grad_norm": 194.62033081054688, + "learning_rate": 1.2182197445518946e-06, + "loss": 19.5496, + "step": 397860 + }, + { + "epoch": 0.8037225725909736, + "grad_norm": 214.85061645507812, + "learning_rate": 1.2179914087930884e-06, + "loss": 15.1666, + "step": 397870 + }, + { + "epoch": 0.8037427732236574, + "grad_norm": 273.3961181640625, + "learning_rate": 1.2177630914673327e-06, + "loss": 16.2342, + "step": 397880 + }, + { + "epoch": 0.8037629738563412, + "grad_norm": 601.1372680664062, + "learning_rate": 1.2175347925757397e-06, + "loss": 45.6443, + "step": 397890 + }, + { + "epoch": 0.803783174489025, + "grad_norm": 984.5829467773438, + "learning_rate": 1.217306512119425e-06, + "loss": 20.6048, + "step": 397900 + }, + { + "epoch": 0.8038033751217089, + "grad_norm": 319.5831298828125, + "learning_rate": 1.2170782500994983e-06, + "loss": 27.8044, + "step": 397910 + }, + { + "epoch": 0.8038235757543927, + "grad_norm": 169.63516235351562, + "learning_rate": 1.2168500065170747e-06, + "loss": 12.06, + "step": 397920 + }, + { + "epoch": 0.8038437763870765, + "grad_norm": 168.23828125, + "learning_rate": 1.216621781373265e-06, + "loss": 26.9941, + "step": 397930 + }, + { + "epoch": 0.8038639770197603, + "grad_norm": 262.0617370605469, + "learning_rate": 1.2163935746691807e-06, + "loss": 18.2955, + "step": 397940 + }, + { + "epoch": 0.803884177652444, + "grad_norm": 265.5741271972656, + "learning_rate": 1.216165386405937e-06, + "loss": 17.8595, + "step": 397950 + }, + { + "epoch": 0.8039043782851278, + "grad_norm": 483.8246154785156, + "learning_rate": 1.215937216584644e-06, + "loss": 20.2787, + "step": 397960 + }, + { + "epoch": 0.8039245789178117, + "grad_norm": 505.77178955078125, + "learning_rate": 1.2157090652064124e-06, + "loss": 21.4979, + "step": 397970 + }, + { + "epoch": 0.8039447795504955, + "grad_norm": 43.7061653137207, + "learning_rate": 1.215480932272356e-06, + "loss": 9.9025, + "step": 397980 + }, + { + "epoch": 0.8039649801831793, + "grad_norm": 357.3522033691406, + "learning_rate": 1.2152528177835892e-06, + "loss": 16.7999, + "step": 397990 + }, + { + "epoch": 0.8039851808158631, + "grad_norm": 564.4266967773438, + "learning_rate": 1.2150247217412186e-06, + "loss": 12.643, + "step": 398000 + }, + { + "epoch": 0.8040053814485469, + "grad_norm": 115.50165557861328, + "learning_rate": 1.2147966441463583e-06, + "loss": 19.1571, + "step": 398010 + }, + { + "epoch": 0.8040255820812308, + "grad_norm": 363.2052917480469, + "learning_rate": 1.2145685850001216e-06, + "loss": 15.7835, + "step": 398020 + }, + { + "epoch": 0.8040457827139146, + "grad_norm": 247.6719970703125, + "learning_rate": 1.2143405443036182e-06, + "loss": 14.8488, + "step": 398030 + }, + { + "epoch": 0.8040659833465984, + "grad_norm": 190.08120727539062, + "learning_rate": 1.2141125220579585e-06, + "loss": 9.337, + "step": 398040 + }, + { + "epoch": 0.8040861839792822, + "grad_norm": 444.0274963378906, + "learning_rate": 1.2138845182642555e-06, + "loss": 21.3661, + "step": 398050 + }, + { + "epoch": 0.804106384611966, + "grad_norm": 364.2178649902344, + "learning_rate": 1.2136565329236217e-06, + "loss": 13.6055, + "step": 398060 + }, + { + "epoch": 0.8041265852446499, + "grad_norm": 142.36663818359375, + "learning_rate": 1.2134285660371665e-06, + "loss": 6.8863, + "step": 398070 + }, + { + "epoch": 0.8041467858773337, + "grad_norm": 478.2822265625, + "learning_rate": 1.2132006176059997e-06, + "loss": 19.6823, + "step": 398080 + }, + { + "epoch": 0.8041669865100175, + "grad_norm": 169.50241088867188, + "learning_rate": 1.2129726876312348e-06, + "loss": 24.8239, + "step": 398090 + }, + { + "epoch": 0.8041871871427013, + "grad_norm": 357.72119140625, + "learning_rate": 1.2127447761139821e-06, + "loss": 15.6783, + "step": 398100 + }, + { + "epoch": 0.8042073877753851, + "grad_norm": 366.50384521484375, + "learning_rate": 1.2125168830553508e-06, + "loss": 27.5083, + "step": 398110 + }, + { + "epoch": 0.804227588408069, + "grad_norm": 204.8233642578125, + "learning_rate": 1.2122890084564542e-06, + "loss": 12.7033, + "step": 398120 + }, + { + "epoch": 0.8042477890407528, + "grad_norm": 649.1748046875, + "learning_rate": 1.2120611523184e-06, + "loss": 18.9861, + "step": 398130 + }, + { + "epoch": 0.8042679896734366, + "grad_norm": 341.6905212402344, + "learning_rate": 1.2118333146423016e-06, + "loss": 9.5192, + "step": 398140 + }, + { + "epoch": 0.8042881903061204, + "grad_norm": 489.960205078125, + "learning_rate": 1.2116054954292688e-06, + "loss": 29.6769, + "step": 398150 + }, + { + "epoch": 0.8043083909388042, + "grad_norm": 139.4357147216797, + "learning_rate": 1.2113776946804096e-06, + "loss": 36.3575, + "step": 398160 + }, + { + "epoch": 0.8043285915714881, + "grad_norm": 448.1143493652344, + "learning_rate": 1.2111499123968374e-06, + "loss": 15.5382, + "step": 398170 + }, + { + "epoch": 0.8043487922041719, + "grad_norm": 176.39828491210938, + "learning_rate": 1.2109221485796592e-06, + "loss": 25.3181, + "step": 398180 + }, + { + "epoch": 0.8043689928368557, + "grad_norm": 456.3003234863281, + "learning_rate": 1.210694403229989e-06, + "loss": 14.8564, + "step": 398190 + }, + { + "epoch": 0.8043891934695395, + "grad_norm": 422.00701904296875, + "learning_rate": 1.2104666763489326e-06, + "loss": 13.6546, + "step": 398200 + }, + { + "epoch": 0.8044093941022232, + "grad_norm": 217.01031494140625, + "learning_rate": 1.2102389679376037e-06, + "loss": 19.9483, + "step": 398210 + }, + { + "epoch": 0.804429594734907, + "grad_norm": 247.6773223876953, + "learning_rate": 1.2100112779971107e-06, + "loss": 16.3041, + "step": 398220 + }, + { + "epoch": 0.8044497953675909, + "grad_norm": 275.5755310058594, + "learning_rate": 1.2097836065285611e-06, + "loss": 13.8357, + "step": 398230 + }, + { + "epoch": 0.8044699960002747, + "grad_norm": 30.836162567138672, + "learning_rate": 1.2095559535330681e-06, + "loss": 5.0338, + "step": 398240 + }, + { + "epoch": 0.8044901966329585, + "grad_norm": 5.109502792358398, + "learning_rate": 1.20932831901174e-06, + "loss": 13.3135, + "step": 398250 + }, + { + "epoch": 0.8045103972656423, + "grad_norm": 451.49810791015625, + "learning_rate": 1.2091007029656843e-06, + "loss": 13.8472, + "step": 398260 + }, + { + "epoch": 0.8045305978983261, + "grad_norm": 332.7293395996094, + "learning_rate": 1.2088731053960118e-06, + "loss": 14.8156, + "step": 398270 + }, + { + "epoch": 0.80455079853101, + "grad_norm": 275.114013671875, + "learning_rate": 1.2086455263038349e-06, + "loss": 11.0916, + "step": 398280 + }, + { + "epoch": 0.8045709991636938, + "grad_norm": 233.9296875, + "learning_rate": 1.2084179656902573e-06, + "loss": 20.5777, + "step": 398290 + }, + { + "epoch": 0.8045911997963776, + "grad_norm": 264.5608215332031, + "learning_rate": 1.2081904235563908e-06, + "loss": 18.5516, + "step": 398300 + }, + { + "epoch": 0.8046114004290614, + "grad_norm": 252.69302368164062, + "learning_rate": 1.2079628999033449e-06, + "loss": 24.9003, + "step": 398310 + }, + { + "epoch": 0.8046316010617452, + "grad_norm": 644.6895141601562, + "learning_rate": 1.2077353947322284e-06, + "loss": 19.7171, + "step": 398320 + }, + { + "epoch": 0.8046518016944291, + "grad_norm": 223.94583129882812, + "learning_rate": 1.2075079080441482e-06, + "loss": 13.4161, + "step": 398330 + }, + { + "epoch": 0.8046720023271129, + "grad_norm": 150.11373901367188, + "learning_rate": 1.207280439840215e-06, + "loss": 9.5801, + "step": 398340 + }, + { + "epoch": 0.8046922029597967, + "grad_norm": 111.24942779541016, + "learning_rate": 1.2070529901215388e-06, + "loss": 19.0224, + "step": 398350 + }, + { + "epoch": 0.8047124035924805, + "grad_norm": 307.4959716796875, + "learning_rate": 1.206825558889224e-06, + "loss": 34.8143, + "step": 398360 + }, + { + "epoch": 0.8047326042251643, + "grad_norm": 467.3067932128906, + "learning_rate": 1.2065981461443815e-06, + "loss": 13.0586, + "step": 398370 + }, + { + "epoch": 0.8047528048578482, + "grad_norm": 515.9276733398438, + "learning_rate": 1.2063707518881207e-06, + "loss": 13.4302, + "step": 398380 + }, + { + "epoch": 0.804773005490532, + "grad_norm": 370.0353088378906, + "learning_rate": 1.206143376121549e-06, + "loss": 27.9527, + "step": 398390 + }, + { + "epoch": 0.8047932061232158, + "grad_norm": 265.7731628417969, + "learning_rate": 1.2059160188457724e-06, + "loss": 13.6677, + "step": 398400 + }, + { + "epoch": 0.8048134067558996, + "grad_norm": 647.421142578125, + "learning_rate": 1.2056886800619028e-06, + "loss": 23.1429, + "step": 398410 + }, + { + "epoch": 0.8048336073885834, + "grad_norm": 576.6586303710938, + "learning_rate": 1.2054613597710463e-06, + "loss": 13.7681, + "step": 398420 + }, + { + "epoch": 0.8048538080212673, + "grad_norm": 287.4187316894531, + "learning_rate": 1.2052340579743093e-06, + "loss": 9.2779, + "step": 398430 + }, + { + "epoch": 0.8048740086539511, + "grad_norm": 80.31151580810547, + "learning_rate": 1.2050067746728033e-06, + "loss": 15.723, + "step": 398440 + }, + { + "epoch": 0.8048942092866349, + "grad_norm": 610.2584228515625, + "learning_rate": 1.2047795098676317e-06, + "loss": 17.7203, + "step": 398450 + }, + { + "epoch": 0.8049144099193186, + "grad_norm": 503.1271057128906, + "learning_rate": 1.2045522635599066e-06, + "loss": 19.5396, + "step": 398460 + }, + { + "epoch": 0.8049346105520024, + "grad_norm": 115.04306030273438, + "learning_rate": 1.204325035750732e-06, + "loss": 16.2918, + "step": 398470 + }, + { + "epoch": 0.8049548111846863, + "grad_norm": 187.89588928222656, + "learning_rate": 1.204097826441218e-06, + "loss": 19.2752, + "step": 398480 + }, + { + "epoch": 0.8049750118173701, + "grad_norm": 167.96168518066406, + "learning_rate": 1.2038706356324703e-06, + "loss": 19.6608, + "step": 398490 + }, + { + "epoch": 0.8049952124500539, + "grad_norm": 274.8702087402344, + "learning_rate": 1.203643463325596e-06, + "loss": 11.9754, + "step": 398500 + }, + { + "epoch": 0.8050154130827377, + "grad_norm": 669.0701293945312, + "learning_rate": 1.2034163095217045e-06, + "loss": 15.9731, + "step": 398510 + }, + { + "epoch": 0.8050356137154215, + "grad_norm": 333.1326599121094, + "learning_rate": 1.2031891742218992e-06, + "loss": 20.093, + "step": 398520 + }, + { + "epoch": 0.8050558143481054, + "grad_norm": 161.7942352294922, + "learning_rate": 1.2029620574272916e-06, + "loss": 16.4859, + "step": 398530 + }, + { + "epoch": 0.8050760149807892, + "grad_norm": 116.2698974609375, + "learning_rate": 1.2027349591389858e-06, + "loss": 10.9212, + "step": 398540 + }, + { + "epoch": 0.805096215613473, + "grad_norm": 209.771728515625, + "learning_rate": 1.2025078793580885e-06, + "loss": 16.7754, + "step": 398550 + }, + { + "epoch": 0.8051164162461568, + "grad_norm": 156.68161010742188, + "learning_rate": 1.202280818085708e-06, + "loss": 12.0347, + "step": 398560 + }, + { + "epoch": 0.8051366168788406, + "grad_norm": 148.68417358398438, + "learning_rate": 1.2020537753229506e-06, + "loss": 27.6422, + "step": 398570 + }, + { + "epoch": 0.8051568175115245, + "grad_norm": 169.93348693847656, + "learning_rate": 1.2018267510709208e-06, + "loss": 13.968, + "step": 398580 + }, + { + "epoch": 0.8051770181442083, + "grad_norm": 184.7910614013672, + "learning_rate": 1.201599745330727e-06, + "loss": 15.3194, + "step": 398590 + }, + { + "epoch": 0.8051972187768921, + "grad_norm": 248.0599822998047, + "learning_rate": 1.2013727581034783e-06, + "loss": 14.647, + "step": 398600 + }, + { + "epoch": 0.8052174194095759, + "grad_norm": 319.2702941894531, + "learning_rate": 1.201145789390275e-06, + "loss": 17.0601, + "step": 398610 + }, + { + "epoch": 0.8052376200422597, + "grad_norm": 216.8593292236328, + "learning_rate": 1.2009188391922261e-06, + "loss": 15.7776, + "step": 398620 + }, + { + "epoch": 0.8052578206749436, + "grad_norm": 408.43060302734375, + "learning_rate": 1.2006919075104396e-06, + "loss": 23.2166, + "step": 398630 + }, + { + "epoch": 0.8052780213076274, + "grad_norm": 27.785552978515625, + "learning_rate": 1.20046499434602e-06, + "loss": 26.1633, + "step": 398640 + }, + { + "epoch": 0.8052982219403112, + "grad_norm": 575.4810791015625, + "learning_rate": 1.2002380997000717e-06, + "loss": 24.9866, + "step": 398650 + }, + { + "epoch": 0.805318422572995, + "grad_norm": 637.1857299804688, + "learning_rate": 1.200011223573702e-06, + "loss": 17.8723, + "step": 398660 + }, + { + "epoch": 0.8053386232056788, + "grad_norm": 750.9044189453125, + "learning_rate": 1.1997843659680202e-06, + "loss": 21.2408, + "step": 398670 + }, + { + "epoch": 0.8053588238383627, + "grad_norm": 129.53472900390625, + "learning_rate": 1.1995575268841254e-06, + "loss": 23.3928, + "step": 398680 + }, + { + "epoch": 0.8053790244710465, + "grad_norm": 357.01812744140625, + "learning_rate": 1.1993307063231258e-06, + "loss": 19.5596, + "step": 398690 + }, + { + "epoch": 0.8053992251037303, + "grad_norm": 7249.14892578125, + "learning_rate": 1.199103904286129e-06, + "loss": 32.026, + "step": 398700 + }, + { + "epoch": 0.8054194257364141, + "grad_norm": 414.0624084472656, + "learning_rate": 1.1988771207742388e-06, + "loss": 20.9807, + "step": 398710 + }, + { + "epoch": 0.8054396263690978, + "grad_norm": 689.0746459960938, + "learning_rate": 1.1986503557885587e-06, + "loss": 20.1095, + "step": 398720 + }, + { + "epoch": 0.8054598270017816, + "grad_norm": 152.21286010742188, + "learning_rate": 1.1984236093301976e-06, + "loss": 19.3516, + "step": 398730 + }, + { + "epoch": 0.8054800276344655, + "grad_norm": 255.29714965820312, + "learning_rate": 1.1981968814002576e-06, + "loss": 14.5699, + "step": 398740 + }, + { + "epoch": 0.8055002282671493, + "grad_norm": 528.74560546875, + "learning_rate": 1.1979701719998454e-06, + "loss": 20.1821, + "step": 398750 + }, + { + "epoch": 0.8055204288998331, + "grad_norm": 278.7982482910156, + "learning_rate": 1.1977434811300664e-06, + "loss": 16.4397, + "step": 398760 + }, + { + "epoch": 0.8055406295325169, + "grad_norm": 609.253662109375, + "learning_rate": 1.1975168087920226e-06, + "loss": 13.1578, + "step": 398770 + }, + { + "epoch": 0.8055608301652007, + "grad_norm": 339.8735656738281, + "learning_rate": 1.1972901549868222e-06, + "loss": 15.2729, + "step": 398780 + }, + { + "epoch": 0.8055810307978846, + "grad_norm": 65.78746795654297, + "learning_rate": 1.1970635197155671e-06, + "loss": 24.322, + "step": 398790 + }, + { + "epoch": 0.8056012314305684, + "grad_norm": 178.88465881347656, + "learning_rate": 1.1968369029793642e-06, + "loss": 22.919, + "step": 398800 + }, + { + "epoch": 0.8056214320632522, + "grad_norm": 844.7586669921875, + "learning_rate": 1.1966103047793158e-06, + "loss": 24.4495, + "step": 398810 + }, + { + "epoch": 0.805641632695936, + "grad_norm": 152.448486328125, + "learning_rate": 1.196383725116529e-06, + "loss": 24.2517, + "step": 398820 + }, + { + "epoch": 0.8056618333286198, + "grad_norm": 318.3756408691406, + "learning_rate": 1.1961571639921066e-06, + "loss": 17.6758, + "step": 398830 + }, + { + "epoch": 0.8056820339613037, + "grad_norm": 178.51841735839844, + "learning_rate": 1.1959306214071508e-06, + "loss": 42.8888, + "step": 398840 + }, + { + "epoch": 0.8057022345939875, + "grad_norm": 289.67938232421875, + "learning_rate": 1.1957040973627698e-06, + "loss": 23.0294, + "step": 398850 + }, + { + "epoch": 0.8057224352266713, + "grad_norm": 182.93289184570312, + "learning_rate": 1.1954775918600658e-06, + "loss": 15.1336, + "step": 398860 + }, + { + "epoch": 0.8057426358593551, + "grad_norm": 261.6667785644531, + "learning_rate": 1.1952511049001407e-06, + "loss": 27.8058, + "step": 398870 + }, + { + "epoch": 0.805762836492039, + "grad_norm": 906.7447509765625, + "learning_rate": 1.1950246364841005e-06, + "loss": 20.0649, + "step": 398880 + }, + { + "epoch": 0.8057830371247228, + "grad_norm": 383.3402404785156, + "learning_rate": 1.1947981866130515e-06, + "loss": 9.2169, + "step": 398890 + }, + { + "epoch": 0.8058032377574066, + "grad_norm": 560.3885498046875, + "learning_rate": 1.1945717552880919e-06, + "loss": 14.4276, + "step": 398900 + }, + { + "epoch": 0.8058234383900904, + "grad_norm": 416.9820861816406, + "learning_rate": 1.194345342510328e-06, + "loss": 19.4729, + "step": 398910 + }, + { + "epoch": 0.8058436390227742, + "grad_norm": 204.42552185058594, + "learning_rate": 1.1941189482808645e-06, + "loss": 13.8222, + "step": 398920 + }, + { + "epoch": 0.805863839655458, + "grad_norm": 523.3088989257812, + "learning_rate": 1.193892572600804e-06, + "loss": 21.5251, + "step": 398930 + }, + { + "epoch": 0.8058840402881419, + "grad_norm": 207.76956176757812, + "learning_rate": 1.1936662154712475e-06, + "loss": 9.1551, + "step": 398940 + }, + { + "epoch": 0.8059042409208257, + "grad_norm": 422.6136169433594, + "learning_rate": 1.193439876893301e-06, + "loss": 12.6284, + "step": 398950 + }, + { + "epoch": 0.8059244415535095, + "grad_norm": 859.187255859375, + "learning_rate": 1.1932135568680691e-06, + "loss": 30.7848, + "step": 398960 + }, + { + "epoch": 0.8059446421861932, + "grad_norm": 398.545166015625, + "learning_rate": 1.1929872553966497e-06, + "loss": 13.7408, + "step": 398970 + }, + { + "epoch": 0.805964842818877, + "grad_norm": 436.9729919433594, + "learning_rate": 1.1927609724801492e-06, + "loss": 24.225, + "step": 398980 + }, + { + "epoch": 0.8059850434515609, + "grad_norm": 479.7727966308594, + "learning_rate": 1.1925347081196709e-06, + "loss": 14.9331, + "step": 398990 + }, + { + "epoch": 0.8060052440842447, + "grad_norm": 510.92584228515625, + "learning_rate": 1.1923084623163172e-06, + "loss": 15.4965, + "step": 399000 + }, + { + "epoch": 0.8060254447169285, + "grad_norm": 109.71276092529297, + "learning_rate": 1.192082235071188e-06, + "loss": 13.8335, + "step": 399010 + }, + { + "epoch": 0.8060456453496123, + "grad_norm": 846.4888305664062, + "learning_rate": 1.1918560263853902e-06, + "loss": 21.4266, + "step": 399020 + }, + { + "epoch": 0.8060658459822961, + "grad_norm": 16.963756561279297, + "learning_rate": 1.1916298362600243e-06, + "loss": 13.8133, + "step": 399030 + }, + { + "epoch": 0.80608604661498, + "grad_norm": 550.7333984375, + "learning_rate": 1.1914036646961907e-06, + "loss": 17.8056, + "step": 399040 + }, + { + "epoch": 0.8061062472476638, + "grad_norm": 31.84421730041504, + "learning_rate": 1.1911775116949958e-06, + "loss": 9.0356, + "step": 399050 + }, + { + "epoch": 0.8061264478803476, + "grad_norm": 362.2513427734375, + "learning_rate": 1.1909513772575383e-06, + "loss": 13.9265, + "step": 399060 + }, + { + "epoch": 0.8061466485130314, + "grad_norm": 150.13502502441406, + "learning_rate": 1.1907252613849224e-06, + "loss": 16.7787, + "step": 399070 + }, + { + "epoch": 0.8061668491457152, + "grad_norm": 151.5859832763672, + "learning_rate": 1.1904991640782487e-06, + "loss": 20.1903, + "step": 399080 + }, + { + "epoch": 0.8061870497783991, + "grad_norm": 353.0852966308594, + "learning_rate": 1.190273085338622e-06, + "loss": 16.6556, + "step": 399090 + }, + { + "epoch": 0.8062072504110829, + "grad_norm": 341.1142578125, + "learning_rate": 1.1900470251671415e-06, + "loss": 19.0918, + "step": 399100 + }, + { + "epoch": 0.8062274510437667, + "grad_norm": 312.563232421875, + "learning_rate": 1.1898209835649083e-06, + "loss": 13.2726, + "step": 399110 + }, + { + "epoch": 0.8062476516764505, + "grad_norm": 8.31265926361084, + "learning_rate": 1.189594960533027e-06, + "loss": 11.9841, + "step": 399120 + }, + { + "epoch": 0.8062678523091343, + "grad_norm": 277.1001892089844, + "learning_rate": 1.1893689560725963e-06, + "loss": 10.1152, + "step": 399130 + }, + { + "epoch": 0.8062880529418182, + "grad_norm": 129.29551696777344, + "learning_rate": 1.1891429701847207e-06, + "loss": 19.1193, + "step": 399140 + }, + { + "epoch": 0.806308253574502, + "grad_norm": 309.3577575683594, + "learning_rate": 1.1889170028705e-06, + "loss": 26.0598, + "step": 399150 + }, + { + "epoch": 0.8063284542071858, + "grad_norm": 761.1845092773438, + "learning_rate": 1.1886910541310342e-06, + "loss": 20.8598, + "step": 399160 + }, + { + "epoch": 0.8063486548398696, + "grad_norm": 447.2908935546875, + "learning_rate": 1.1884651239674272e-06, + "loss": 24.8396, + "step": 399170 + }, + { + "epoch": 0.8063688554725534, + "grad_norm": 429.6351623535156, + "learning_rate": 1.188239212380779e-06, + "loss": 28.037, + "step": 399180 + }, + { + "epoch": 0.8063890561052373, + "grad_norm": 589.7367553710938, + "learning_rate": 1.1880133193721893e-06, + "loss": 8.272, + "step": 399190 + }, + { + "epoch": 0.8064092567379211, + "grad_norm": 292.37335205078125, + "learning_rate": 1.18778744494276e-06, + "loss": 13.0333, + "step": 399200 + }, + { + "epoch": 0.8064294573706049, + "grad_norm": 313.3642272949219, + "learning_rate": 1.1875615890935954e-06, + "loss": 18.613, + "step": 399210 + }, + { + "epoch": 0.8064496580032887, + "grad_norm": 112.42382049560547, + "learning_rate": 1.1873357518257905e-06, + "loss": 4.7573, + "step": 399220 + }, + { + "epoch": 0.8064698586359724, + "grad_norm": 40.10010528564453, + "learning_rate": 1.187109933140449e-06, + "loss": 5.4787, + "step": 399230 + }, + { + "epoch": 0.8064900592686562, + "grad_norm": 134.88690185546875, + "learning_rate": 1.186884133038672e-06, + "loss": 13.5613, + "step": 399240 + }, + { + "epoch": 0.8065102599013401, + "grad_norm": 177.24685668945312, + "learning_rate": 1.1866583515215597e-06, + "loss": 14.959, + "step": 399250 + }, + { + "epoch": 0.8065304605340239, + "grad_norm": 634.709716796875, + "learning_rate": 1.18643258859021e-06, + "loss": 18.0084, + "step": 399260 + }, + { + "epoch": 0.8065506611667077, + "grad_norm": 275.6827697753906, + "learning_rate": 1.1862068442457264e-06, + "loss": 10.1867, + "step": 399270 + }, + { + "epoch": 0.8065708617993915, + "grad_norm": 105.64037322998047, + "learning_rate": 1.18598111848921e-06, + "loss": 10.9114, + "step": 399280 + }, + { + "epoch": 0.8065910624320753, + "grad_norm": 290.1683044433594, + "learning_rate": 1.1857554113217568e-06, + "loss": 23.3434, + "step": 399290 + }, + { + "epoch": 0.8066112630647592, + "grad_norm": 15.911661148071289, + "learning_rate": 1.185529722744469e-06, + "loss": 14.5655, + "step": 399300 + }, + { + "epoch": 0.806631463697443, + "grad_norm": 416.8236389160156, + "learning_rate": 1.1853040527584475e-06, + "loss": 16.1546, + "step": 399310 + }, + { + "epoch": 0.8066516643301268, + "grad_norm": 84.52220153808594, + "learning_rate": 1.185078401364792e-06, + "loss": 19.1157, + "step": 399320 + }, + { + "epoch": 0.8066718649628106, + "grad_norm": 414.135986328125, + "learning_rate": 1.1848527685646e-06, + "loss": 13.6018, + "step": 399330 + }, + { + "epoch": 0.8066920655954944, + "grad_norm": 163.0150604248047, + "learning_rate": 1.1846271543589743e-06, + "loss": 17.9266, + "step": 399340 + }, + { + "epoch": 0.8067122662281783, + "grad_norm": 361.9199523925781, + "learning_rate": 1.1844015587490138e-06, + "loss": 20.8676, + "step": 399350 + }, + { + "epoch": 0.8067324668608621, + "grad_norm": 497.72503662109375, + "learning_rate": 1.184175981735815e-06, + "loss": 17.6227, + "step": 399360 + }, + { + "epoch": 0.8067526674935459, + "grad_norm": 197.9222869873047, + "learning_rate": 1.18395042332048e-06, + "loss": 29.214, + "step": 399370 + }, + { + "epoch": 0.8067728681262297, + "grad_norm": 170.6032257080078, + "learning_rate": 1.1837248835041093e-06, + "loss": 14.2644, + "step": 399380 + }, + { + "epoch": 0.8067930687589135, + "grad_norm": 610.2376098632812, + "learning_rate": 1.1834993622878004e-06, + "loss": 21.1365, + "step": 399390 + }, + { + "epoch": 0.8068132693915974, + "grad_norm": 298.6968994140625, + "learning_rate": 1.1832738596726518e-06, + "loss": 19.939, + "step": 399400 + }, + { + "epoch": 0.8068334700242812, + "grad_norm": 109.22898864746094, + "learning_rate": 1.1830483756597643e-06, + "loss": 26.7395, + "step": 399410 + }, + { + "epoch": 0.806853670656965, + "grad_norm": 201.33030700683594, + "learning_rate": 1.1828229102502364e-06, + "loss": 11.6022, + "step": 399420 + }, + { + "epoch": 0.8068738712896488, + "grad_norm": 23.997217178344727, + "learning_rate": 1.1825974634451653e-06, + "loss": 22.8611, + "step": 399430 + }, + { + "epoch": 0.8068940719223326, + "grad_norm": 391.8190002441406, + "learning_rate": 1.1823720352456525e-06, + "loss": 13.7551, + "step": 399440 + }, + { + "epoch": 0.8069142725550165, + "grad_norm": 442.6551818847656, + "learning_rate": 1.1821466256527942e-06, + "loss": 10.4822, + "step": 399450 + }, + { + "epoch": 0.8069344731877003, + "grad_norm": 331.6322937011719, + "learning_rate": 1.181921234667691e-06, + "loss": 7.8176, + "step": 399460 + }, + { + "epoch": 0.8069546738203841, + "grad_norm": 204.58738708496094, + "learning_rate": 1.181695862291441e-06, + "loss": 22.6816, + "step": 399470 + }, + { + "epoch": 0.8069748744530679, + "grad_norm": 294.2364196777344, + "learning_rate": 1.181470508525141e-06, + "loss": 23.8682, + "step": 399480 + }, + { + "epoch": 0.8069950750857516, + "grad_norm": 19.0340576171875, + "learning_rate": 1.1812451733698905e-06, + "loss": 17.3152, + "step": 399490 + }, + { + "epoch": 0.8070152757184355, + "grad_norm": 401.44287109375, + "learning_rate": 1.1810198568267906e-06, + "loss": 27.1681, + "step": 399500 + }, + { + "epoch": 0.8070354763511193, + "grad_norm": 706.4053955078125, + "learning_rate": 1.180794558896934e-06, + "loss": 17.0426, + "step": 399510 + }, + { + "epoch": 0.8070556769838031, + "grad_norm": 1950.119873046875, + "learning_rate": 1.180569279581421e-06, + "loss": 28.3513, + "step": 399520 + }, + { + "epoch": 0.8070758776164869, + "grad_norm": 367.019287109375, + "learning_rate": 1.1803440188813526e-06, + "loss": 24.6241, + "step": 399530 + }, + { + "epoch": 0.8070960782491707, + "grad_norm": 414.6794128417969, + "learning_rate": 1.1801187767978234e-06, + "loss": 27.6976, + "step": 399540 + }, + { + "epoch": 0.8071162788818546, + "grad_norm": 26.97930145263672, + "learning_rate": 1.1798935533319305e-06, + "loss": 10.439, + "step": 399550 + }, + { + "epoch": 0.8071364795145384, + "grad_norm": 623.3947143554688, + "learning_rate": 1.1796683484847731e-06, + "loss": 21.4734, + "step": 399560 + }, + { + "epoch": 0.8071566801472222, + "grad_norm": 130.07791137695312, + "learning_rate": 1.179443162257452e-06, + "loss": 18.3431, + "step": 399570 + }, + { + "epoch": 0.807176880779906, + "grad_norm": 334.9212341308594, + "learning_rate": 1.179217994651059e-06, + "loss": 11.5006, + "step": 399580 + }, + { + "epoch": 0.8071970814125898, + "grad_norm": 210.55154418945312, + "learning_rate": 1.1789928456666933e-06, + "loss": 23.3158, + "step": 399590 + }, + { + "epoch": 0.8072172820452737, + "grad_norm": 448.5078125, + "learning_rate": 1.178767715305455e-06, + "loss": 21.8411, + "step": 399600 + }, + { + "epoch": 0.8072374826779575, + "grad_norm": 56.96674346923828, + "learning_rate": 1.1785426035684395e-06, + "loss": 30.0421, + "step": 399610 + }, + { + "epoch": 0.8072576833106413, + "grad_norm": 487.0782470703125, + "learning_rate": 1.1783175104567418e-06, + "loss": 23.2399, + "step": 399620 + }, + { + "epoch": 0.8072778839433251, + "grad_norm": 235.6630096435547, + "learning_rate": 1.178092435971463e-06, + "loss": 14.6871, + "step": 399630 + }, + { + "epoch": 0.8072980845760089, + "grad_norm": 367.70233154296875, + "learning_rate": 1.177867380113698e-06, + "loss": 20.5984, + "step": 399640 + }, + { + "epoch": 0.8073182852086928, + "grad_norm": 541.5439453125, + "learning_rate": 1.1776423428845423e-06, + "loss": 29.2618, + "step": 399650 + }, + { + "epoch": 0.8073384858413766, + "grad_norm": 537.5186767578125, + "learning_rate": 1.1774173242850955e-06, + "loss": 17.9111, + "step": 399660 + }, + { + "epoch": 0.8073586864740604, + "grad_norm": 301.9331359863281, + "learning_rate": 1.1771923243164518e-06, + "loss": 14.7783, + "step": 399670 + }, + { + "epoch": 0.8073788871067442, + "grad_norm": 402.8861083984375, + "learning_rate": 1.1769673429797107e-06, + "loss": 21.9451, + "step": 399680 + }, + { + "epoch": 0.807399087739428, + "grad_norm": 199.38743591308594, + "learning_rate": 1.1767423802759653e-06, + "loss": 11.2573, + "step": 399690 + }, + { + "epoch": 0.8074192883721119, + "grad_norm": 439.7703857421875, + "learning_rate": 1.1765174362063152e-06, + "loss": 11.169, + "step": 399700 + }, + { + "epoch": 0.8074394890047957, + "grad_norm": 24.274436950683594, + "learning_rate": 1.1762925107718558e-06, + "loss": 20.235, + "step": 399710 + }, + { + "epoch": 0.8074596896374795, + "grad_norm": 223.40164184570312, + "learning_rate": 1.1760676039736813e-06, + "loss": 17.247, + "step": 399720 + }, + { + "epoch": 0.8074798902701633, + "grad_norm": 638.76513671875, + "learning_rate": 1.175842715812891e-06, + "loss": 24.7132, + "step": 399730 + }, + { + "epoch": 0.807500090902847, + "grad_norm": 561.298828125, + "learning_rate": 1.1756178462905782e-06, + "loss": 20.7884, + "step": 399740 + }, + { + "epoch": 0.8075202915355308, + "grad_norm": 882.0572509765625, + "learning_rate": 1.1753929954078414e-06, + "loss": 18.2897, + "step": 399750 + }, + { + "epoch": 0.8075404921682147, + "grad_norm": 403.7325439453125, + "learning_rate": 1.1751681631657752e-06, + "loss": 21.9839, + "step": 399760 + }, + { + "epoch": 0.8075606928008985, + "grad_norm": 214.2028045654297, + "learning_rate": 1.1749433495654743e-06, + "loss": 19.8607, + "step": 399770 + }, + { + "epoch": 0.8075808934335823, + "grad_norm": 526.0719604492188, + "learning_rate": 1.174718554608037e-06, + "loss": 18.6298, + "step": 399780 + }, + { + "epoch": 0.8076010940662661, + "grad_norm": 233.352783203125, + "learning_rate": 1.174493778294557e-06, + "loss": 11.2677, + "step": 399790 + }, + { + "epoch": 0.8076212946989499, + "grad_norm": 390.91180419921875, + "learning_rate": 1.1742690206261293e-06, + "loss": 19.352, + "step": 399800 + }, + { + "epoch": 0.8076414953316338, + "grad_norm": 664.2025146484375, + "learning_rate": 1.1740442816038505e-06, + "loss": 25.8564, + "step": 399810 + }, + { + "epoch": 0.8076616959643176, + "grad_norm": 518.6954345703125, + "learning_rate": 1.173819561228819e-06, + "loss": 16.0014, + "step": 399820 + }, + { + "epoch": 0.8076818965970014, + "grad_norm": 183.4699249267578, + "learning_rate": 1.1735948595021234e-06, + "loss": 19.0936, + "step": 399830 + }, + { + "epoch": 0.8077020972296852, + "grad_norm": 551.70263671875, + "learning_rate": 1.1733701764248623e-06, + "loss": 24.6808, + "step": 399840 + }, + { + "epoch": 0.807722297862369, + "grad_norm": 214.64559936523438, + "learning_rate": 1.1731455119981327e-06, + "loss": 14.4356, + "step": 399850 + }, + { + "epoch": 0.8077424984950529, + "grad_norm": 281.9747314453125, + "learning_rate": 1.1729208662230273e-06, + "loss": 19.6334, + "step": 399860 + }, + { + "epoch": 0.8077626991277367, + "grad_norm": 78.07125854492188, + "learning_rate": 1.1726962391006409e-06, + "loss": 21.6389, + "step": 399870 + }, + { + "epoch": 0.8077828997604205, + "grad_norm": 382.7174072265625, + "learning_rate": 1.1724716306320676e-06, + "loss": 19.6051, + "step": 399880 + }, + { + "epoch": 0.8078031003931043, + "grad_norm": 53.93277359008789, + "learning_rate": 1.1722470408184072e-06, + "loss": 13.8435, + "step": 399890 + }, + { + "epoch": 0.8078233010257881, + "grad_norm": 235.94053649902344, + "learning_rate": 1.1720224696607474e-06, + "loss": 11.6338, + "step": 399900 + }, + { + "epoch": 0.807843501658472, + "grad_norm": 1165.390380859375, + "learning_rate": 1.1717979171601857e-06, + "loss": 19.0239, + "step": 399910 + }, + { + "epoch": 0.8078637022911558, + "grad_norm": 440.1308288574219, + "learning_rate": 1.1715733833178178e-06, + "loss": 21.0809, + "step": 399920 + }, + { + "epoch": 0.8078839029238396, + "grad_norm": 807.2807006835938, + "learning_rate": 1.1713488681347375e-06, + "loss": 22.7856, + "step": 399930 + }, + { + "epoch": 0.8079041035565234, + "grad_norm": 48.46335220336914, + "learning_rate": 1.1711243716120363e-06, + "loss": 40.0194, + "step": 399940 + }, + { + "epoch": 0.8079243041892072, + "grad_norm": 275.75421142578125, + "learning_rate": 1.1708998937508126e-06, + "loss": 8.6214, + "step": 399950 + }, + { + "epoch": 0.8079445048218911, + "grad_norm": 530.797607421875, + "learning_rate": 1.1706754345521582e-06, + "loss": 19.6889, + "step": 399960 + }, + { + "epoch": 0.8079647054545749, + "grad_norm": 582.1223754882812, + "learning_rate": 1.1704509940171655e-06, + "loss": 21.8478, + "step": 399970 + }, + { + "epoch": 0.8079849060872587, + "grad_norm": 341.1997985839844, + "learning_rate": 1.1702265721469302e-06, + "loss": 8.5072, + "step": 399980 + }, + { + "epoch": 0.8080051067199425, + "grad_norm": 475.842041015625, + "learning_rate": 1.1700021689425478e-06, + "loss": 23.2782, + "step": 399990 + }, + { + "epoch": 0.8080253073526262, + "grad_norm": 137.1321258544922, + "learning_rate": 1.1697777844051105e-06, + "loss": 19.8697, + "step": 400000 + }, + { + "epoch": 0.80804550798531, + "grad_norm": 691.2139282226562, + "learning_rate": 1.16955341853571e-06, + "loss": 24.2845, + "step": 400010 + }, + { + "epoch": 0.8080657086179939, + "grad_norm": 349.56134033203125, + "learning_rate": 1.1693290713354433e-06, + "loss": 14.356, + "step": 400020 + }, + { + "epoch": 0.8080859092506777, + "grad_norm": 176.1207733154297, + "learning_rate": 1.169104742805402e-06, + "loss": 8.2134, + "step": 400030 + }, + { + "epoch": 0.8081061098833615, + "grad_norm": 275.6594543457031, + "learning_rate": 1.168880432946678e-06, + "loss": 9.6885, + "step": 400040 + }, + { + "epoch": 0.8081263105160453, + "grad_norm": 452.2191162109375, + "learning_rate": 1.1686561417603677e-06, + "loss": 19.1874, + "step": 400050 + }, + { + "epoch": 0.8081465111487292, + "grad_norm": 269.15997314453125, + "learning_rate": 1.168431869247561e-06, + "loss": 18.2211, + "step": 400060 + }, + { + "epoch": 0.808166711781413, + "grad_norm": 253.88807678222656, + "learning_rate": 1.1682076154093542e-06, + "loss": 36.2333, + "step": 400070 + }, + { + "epoch": 0.8081869124140968, + "grad_norm": 351.63763427734375, + "learning_rate": 1.1679833802468387e-06, + "loss": 12.8735, + "step": 400080 + }, + { + "epoch": 0.8082071130467806, + "grad_norm": 316.8739013671875, + "learning_rate": 1.1677591637611057e-06, + "loss": 14.9047, + "step": 400090 + }, + { + "epoch": 0.8082273136794644, + "grad_norm": 185.6357879638672, + "learning_rate": 1.1675349659532514e-06, + "loss": 26.8589, + "step": 400100 + }, + { + "epoch": 0.8082475143121483, + "grad_norm": 557.3394165039062, + "learning_rate": 1.1673107868243672e-06, + "loss": 16.9194, + "step": 400110 + }, + { + "epoch": 0.8082677149448321, + "grad_norm": 424.20867919921875, + "learning_rate": 1.1670866263755437e-06, + "loss": 26.7866, + "step": 400120 + }, + { + "epoch": 0.8082879155775159, + "grad_norm": 773.7348022460938, + "learning_rate": 1.1668624846078752e-06, + "loss": 25.3745, + "step": 400130 + }, + { + "epoch": 0.8083081162101997, + "grad_norm": 149.3612060546875, + "learning_rate": 1.1666383615224553e-06, + "loss": 15.7016, + "step": 400140 + }, + { + "epoch": 0.8083283168428835, + "grad_norm": 305.80084228515625, + "learning_rate": 1.1664142571203751e-06, + "loss": 16.0033, + "step": 400150 + }, + { + "epoch": 0.8083485174755674, + "grad_norm": 216.41941833496094, + "learning_rate": 1.1661901714027258e-06, + "loss": 11.1485, + "step": 400160 + }, + { + "epoch": 0.8083687181082512, + "grad_norm": 624.4144287109375, + "learning_rate": 1.1659661043706e-06, + "loss": 21.9399, + "step": 400170 + }, + { + "epoch": 0.808388918740935, + "grad_norm": 200.66453552246094, + "learning_rate": 1.1657420560250938e-06, + "loss": 18.7956, + "step": 400180 + }, + { + "epoch": 0.8084091193736188, + "grad_norm": 195.721923828125, + "learning_rate": 1.1655180263672928e-06, + "loss": 16.7465, + "step": 400190 + }, + { + "epoch": 0.8084293200063026, + "grad_norm": 199.95681762695312, + "learning_rate": 1.1652940153982917e-06, + "loss": 8.8618, + "step": 400200 + }, + { + "epoch": 0.8084495206389865, + "grad_norm": 287.88494873046875, + "learning_rate": 1.1650700231191842e-06, + "loss": 10.2409, + "step": 400210 + }, + { + "epoch": 0.8084697212716703, + "grad_norm": 242.36024475097656, + "learning_rate": 1.16484604953106e-06, + "loss": 16.0757, + "step": 400220 + }, + { + "epoch": 0.8084899219043541, + "grad_norm": 392.25262451171875, + "learning_rate": 1.1646220946350095e-06, + "loss": 15.1735, + "step": 400230 + }, + { + "epoch": 0.8085101225370379, + "grad_norm": 371.1950378417969, + "learning_rate": 1.1643981584321273e-06, + "loss": 16.5909, + "step": 400240 + }, + { + "epoch": 0.8085303231697216, + "grad_norm": 115.65122985839844, + "learning_rate": 1.164174240923503e-06, + "loss": 28.1685, + "step": 400250 + }, + { + "epoch": 0.8085505238024054, + "grad_norm": 267.21051025390625, + "learning_rate": 1.1639503421102272e-06, + "loss": 21.4339, + "step": 400260 + }, + { + "epoch": 0.8085707244350893, + "grad_norm": 256.7766418457031, + "learning_rate": 1.1637264619933936e-06, + "loss": 16.3704, + "step": 400270 + }, + { + "epoch": 0.8085909250677731, + "grad_norm": 255.3925018310547, + "learning_rate": 1.1635026005740902e-06, + "loss": 7.5286, + "step": 400280 + }, + { + "epoch": 0.8086111257004569, + "grad_norm": 560.7896728515625, + "learning_rate": 1.1632787578534116e-06, + "loss": 20.2387, + "step": 400290 + }, + { + "epoch": 0.8086313263331407, + "grad_norm": 715.2000122070312, + "learning_rate": 1.1630549338324454e-06, + "loss": 14.4149, + "step": 400300 + }, + { + "epoch": 0.8086515269658245, + "grad_norm": 468.9555358886719, + "learning_rate": 1.1628311285122857e-06, + "loss": 16.9689, + "step": 400310 + }, + { + "epoch": 0.8086717275985084, + "grad_norm": 799.9161376953125, + "learning_rate": 1.1626073418940214e-06, + "loss": 22.3565, + "step": 400320 + }, + { + "epoch": 0.8086919282311922, + "grad_norm": 234.2010498046875, + "learning_rate": 1.162383573978742e-06, + "loss": 25.0959, + "step": 400330 + }, + { + "epoch": 0.808712128863876, + "grad_norm": 352.39031982421875, + "learning_rate": 1.1621598247675415e-06, + "loss": 22.739, + "step": 400340 + }, + { + "epoch": 0.8087323294965598, + "grad_norm": 591.6005859375, + "learning_rate": 1.1619360942615065e-06, + "loss": 22.8182, + "step": 400350 + }, + { + "epoch": 0.8087525301292436, + "grad_norm": 708.0045166015625, + "learning_rate": 1.1617123824617315e-06, + "loss": 26.2195, + "step": 400360 + }, + { + "epoch": 0.8087727307619275, + "grad_norm": 620.4705810546875, + "learning_rate": 1.1614886893693044e-06, + "loss": 24.1007, + "step": 400370 + }, + { + "epoch": 0.8087929313946113, + "grad_norm": 416.3150939941406, + "learning_rate": 1.1612650149853144e-06, + "loss": 28.3288, + "step": 400380 + }, + { + "epoch": 0.8088131320272951, + "grad_norm": 327.38043212890625, + "learning_rate": 1.161041359310855e-06, + "loss": 10.2856, + "step": 400390 + }, + { + "epoch": 0.8088333326599789, + "grad_norm": 266.3319091796875, + "learning_rate": 1.160817722347014e-06, + "loss": 15.2799, + "step": 400400 + }, + { + "epoch": 0.8088535332926627, + "grad_norm": 352.69488525390625, + "learning_rate": 1.1605941040948803e-06, + "loss": 13.1876, + "step": 400410 + }, + { + "epoch": 0.8088737339253466, + "grad_norm": 291.1202087402344, + "learning_rate": 1.1603705045555457e-06, + "loss": 13.842, + "step": 400420 + }, + { + "epoch": 0.8088939345580304, + "grad_norm": 369.0484924316406, + "learning_rate": 1.160146923730101e-06, + "loss": 15.03, + "step": 400430 + }, + { + "epoch": 0.8089141351907142, + "grad_norm": 0.0, + "learning_rate": 1.1599233616196343e-06, + "loss": 13.4674, + "step": 400440 + }, + { + "epoch": 0.808934335823398, + "grad_norm": 153.3062744140625, + "learning_rate": 1.159699818225234e-06, + "loss": 9.7743, + "step": 400450 + }, + { + "epoch": 0.8089545364560818, + "grad_norm": 529.4329223632812, + "learning_rate": 1.159476293547992e-06, + "loss": 18.5159, + "step": 400460 + }, + { + "epoch": 0.8089747370887657, + "grad_norm": 374.7325744628906, + "learning_rate": 1.1592527875889969e-06, + "loss": 15.755, + "step": 400470 + }, + { + "epoch": 0.8089949377214495, + "grad_norm": 301.7434387207031, + "learning_rate": 1.159029300349337e-06, + "loss": 26.1856, + "step": 400480 + }, + { + "epoch": 0.8090151383541333, + "grad_norm": 547.7313232421875, + "learning_rate": 1.1588058318301021e-06, + "loss": 14.8381, + "step": 400490 + }, + { + "epoch": 0.8090353389868171, + "grad_norm": 93.29722595214844, + "learning_rate": 1.1585823820323845e-06, + "loss": 9.381, + "step": 400500 + }, + { + "epoch": 0.8090555396195008, + "grad_norm": 110.5420913696289, + "learning_rate": 1.1583589509572679e-06, + "loss": 11.4279, + "step": 400510 + }, + { + "epoch": 0.8090757402521846, + "grad_norm": 319.33612060546875, + "learning_rate": 1.1581355386058434e-06, + "loss": 9.2072, + "step": 400520 + }, + { + "epoch": 0.8090959408848685, + "grad_norm": 369.46038818359375, + "learning_rate": 1.1579121449792018e-06, + "loss": 27.1392, + "step": 400530 + }, + { + "epoch": 0.8091161415175523, + "grad_norm": 140.03639221191406, + "learning_rate": 1.1576887700784307e-06, + "loss": 17.1667, + "step": 400540 + }, + { + "epoch": 0.8091363421502361, + "grad_norm": 458.397705078125, + "learning_rate": 1.1574654139046171e-06, + "loss": 27.9674, + "step": 400550 + }, + { + "epoch": 0.8091565427829199, + "grad_norm": 206.59007263183594, + "learning_rate": 1.1572420764588522e-06, + "loss": 9.2913, + "step": 400560 + }, + { + "epoch": 0.8091767434156037, + "grad_norm": 164.22911071777344, + "learning_rate": 1.1570187577422237e-06, + "loss": 11.4083, + "step": 400570 + }, + { + "epoch": 0.8091969440482876, + "grad_norm": 468.7096862792969, + "learning_rate": 1.1567954577558177e-06, + "loss": 11.0282, + "step": 400580 + }, + { + "epoch": 0.8092171446809714, + "grad_norm": 776.8186645507812, + "learning_rate": 1.1565721765007247e-06, + "loss": 32.2028, + "step": 400590 + }, + { + "epoch": 0.8092373453136552, + "grad_norm": 215.9740753173828, + "learning_rate": 1.1563489139780344e-06, + "loss": 17.1194, + "step": 400600 + }, + { + "epoch": 0.809257545946339, + "grad_norm": 386.4876708984375, + "learning_rate": 1.1561256701888335e-06, + "loss": 17.8536, + "step": 400610 + }, + { + "epoch": 0.8092777465790228, + "grad_norm": 869.1632690429688, + "learning_rate": 1.1559024451342082e-06, + "loss": 26.9815, + "step": 400620 + }, + { + "epoch": 0.8092979472117067, + "grad_norm": 1911.39697265625, + "learning_rate": 1.1556792388152494e-06, + "loss": 32.4074, + "step": 400630 + }, + { + "epoch": 0.8093181478443905, + "grad_norm": 314.6783752441406, + "learning_rate": 1.1554560512330437e-06, + "loss": 16.0918, + "step": 400640 + }, + { + "epoch": 0.8093383484770743, + "grad_norm": 484.771484375, + "learning_rate": 1.1552328823886776e-06, + "loss": 18.9869, + "step": 400650 + }, + { + "epoch": 0.8093585491097581, + "grad_norm": 294.3255920410156, + "learning_rate": 1.155009732283242e-06, + "loss": 16.7085, + "step": 400660 + }, + { + "epoch": 0.809378749742442, + "grad_norm": 336.3222961425781, + "learning_rate": 1.1547866009178204e-06, + "loss": 23.7809, + "step": 400670 + }, + { + "epoch": 0.8093989503751258, + "grad_norm": 173.28057861328125, + "learning_rate": 1.1545634882935048e-06, + "loss": 29.9988, + "step": 400680 + }, + { + "epoch": 0.8094191510078096, + "grad_norm": 137.8450469970703, + "learning_rate": 1.1543403944113797e-06, + "loss": 16.778, + "step": 400690 + }, + { + "epoch": 0.8094393516404934, + "grad_norm": 433.0418701171875, + "learning_rate": 1.154117319272532e-06, + "loss": 11.905, + "step": 400700 + }, + { + "epoch": 0.8094595522731772, + "grad_norm": 145.5989990234375, + "learning_rate": 1.1538942628780513e-06, + "loss": 16.4736, + "step": 400710 + }, + { + "epoch": 0.809479752905861, + "grad_norm": 370.5570068359375, + "learning_rate": 1.153671225229024e-06, + "loss": 16.5558, + "step": 400720 + }, + { + "epoch": 0.8094999535385449, + "grad_norm": 151.88180541992188, + "learning_rate": 1.1534482063265346e-06, + "loss": 9.2769, + "step": 400730 + }, + { + "epoch": 0.8095201541712287, + "grad_norm": 690.061279296875, + "learning_rate": 1.153225206171672e-06, + "loss": 23.9408, + "step": 400740 + }, + { + "epoch": 0.8095403548039125, + "grad_norm": 94.86724090576172, + "learning_rate": 1.1530022247655253e-06, + "loss": 11.7036, + "step": 400750 + }, + { + "epoch": 0.8095605554365963, + "grad_norm": 327.24957275390625, + "learning_rate": 1.1527792621091787e-06, + "loss": 21.9644, + "step": 400760 + }, + { + "epoch": 0.80958075606928, + "grad_norm": 589.7625122070312, + "learning_rate": 1.1525563182037181e-06, + "loss": 38.0113, + "step": 400770 + }, + { + "epoch": 0.8096009567019639, + "grad_norm": 518.3905029296875, + "learning_rate": 1.1523333930502323e-06, + "loss": 13.1313, + "step": 400780 + }, + { + "epoch": 0.8096211573346477, + "grad_norm": 514.0545654296875, + "learning_rate": 1.1521104866498073e-06, + "loss": 27.4999, + "step": 400790 + }, + { + "epoch": 0.8096413579673315, + "grad_norm": 324.7932434082031, + "learning_rate": 1.1518875990035278e-06, + "loss": 16.8554, + "step": 400800 + }, + { + "epoch": 0.8096615586000153, + "grad_norm": 340.92047119140625, + "learning_rate": 1.1516647301124812e-06, + "loss": 15.0591, + "step": 400810 + }, + { + "epoch": 0.8096817592326991, + "grad_norm": 830.7328491210938, + "learning_rate": 1.1514418799777554e-06, + "loss": 24.784, + "step": 400820 + }, + { + "epoch": 0.809701959865383, + "grad_norm": 300.0881042480469, + "learning_rate": 1.1512190486004353e-06, + "loss": 22.6477, + "step": 400830 + }, + { + "epoch": 0.8097221604980668, + "grad_norm": 184.5310821533203, + "learning_rate": 1.1509962359816052e-06, + "loss": 18.6409, + "step": 400840 + }, + { + "epoch": 0.8097423611307506, + "grad_norm": 214.54428100585938, + "learning_rate": 1.1507734421223544e-06, + "loss": 11.8894, + "step": 400850 + }, + { + "epoch": 0.8097625617634344, + "grad_norm": 119.18444061279297, + "learning_rate": 1.1505506670237664e-06, + "loss": 22.754, + "step": 400860 + }, + { + "epoch": 0.8097827623961182, + "grad_norm": 371.9199523925781, + "learning_rate": 1.1503279106869264e-06, + "loss": 17.2868, + "step": 400870 + }, + { + "epoch": 0.8098029630288021, + "grad_norm": 314.87274169921875, + "learning_rate": 1.1501051731129227e-06, + "loss": 8.8082, + "step": 400880 + }, + { + "epoch": 0.8098231636614859, + "grad_norm": 404.53485107421875, + "learning_rate": 1.149882454302838e-06, + "loss": 12.3033, + "step": 400890 + }, + { + "epoch": 0.8098433642941697, + "grad_norm": 143.2717742919922, + "learning_rate": 1.1496597542577603e-06, + "loss": 7.4328, + "step": 400900 + }, + { + "epoch": 0.8098635649268535, + "grad_norm": 333.5477294921875, + "learning_rate": 1.149437072978773e-06, + "loss": 26.1383, + "step": 400910 + }, + { + "epoch": 0.8098837655595373, + "grad_norm": 424.95855712890625, + "learning_rate": 1.1492144104669639e-06, + "loss": 10.5219, + "step": 400920 + }, + { + "epoch": 0.8099039661922212, + "grad_norm": 356.6832580566406, + "learning_rate": 1.1489917667234162e-06, + "loss": 17.2317, + "step": 400930 + }, + { + "epoch": 0.809924166824905, + "grad_norm": 295.1436462402344, + "learning_rate": 1.1487691417492147e-06, + "loss": 19.8804, + "step": 400940 + }, + { + "epoch": 0.8099443674575888, + "grad_norm": 407.83599853515625, + "learning_rate": 1.1485465355454467e-06, + "loss": 15.6823, + "step": 400950 + }, + { + "epoch": 0.8099645680902726, + "grad_norm": 286.6570129394531, + "learning_rate": 1.1483239481131942e-06, + "loss": 15.7468, + "step": 400960 + }, + { + "epoch": 0.8099847687229564, + "grad_norm": 223.4400177001953, + "learning_rate": 1.148101379453545e-06, + "loss": 10.8784, + "step": 400970 + }, + { + "epoch": 0.8100049693556403, + "grad_norm": 311.2491455078125, + "learning_rate": 1.1478788295675824e-06, + "loss": 14.4636, + "step": 400980 + }, + { + "epoch": 0.8100251699883241, + "grad_norm": 7.400152683258057, + "learning_rate": 1.14765629845639e-06, + "loss": 22.4408, + "step": 400990 + }, + { + "epoch": 0.8100453706210079, + "grad_norm": 79.97480010986328, + "learning_rate": 1.1474337861210543e-06, + "loss": 16.5655, + "step": 401000 + }, + { + "epoch": 0.8100655712536917, + "grad_norm": 113.31886291503906, + "learning_rate": 1.14721129256266e-06, + "loss": 11.061, + "step": 401010 + }, + { + "epoch": 0.8100857718863754, + "grad_norm": 447.0079345703125, + "learning_rate": 1.1469888177822885e-06, + "loss": 13.1508, + "step": 401020 + }, + { + "epoch": 0.8101059725190592, + "grad_norm": 125.6728744506836, + "learning_rate": 1.1467663617810264e-06, + "loss": 17.4938, + "step": 401030 + }, + { + "epoch": 0.8101261731517431, + "grad_norm": 152.93800354003906, + "learning_rate": 1.1465439245599591e-06, + "loss": 25.3538, + "step": 401040 + }, + { + "epoch": 0.8101463737844269, + "grad_norm": 463.0459899902344, + "learning_rate": 1.1463215061201694e-06, + "loss": 24.4372, + "step": 401050 + }, + { + "epoch": 0.8101665744171107, + "grad_norm": 233.2440643310547, + "learning_rate": 1.1460991064627403e-06, + "loss": 11.7019, + "step": 401060 + }, + { + "epoch": 0.8101867750497945, + "grad_norm": 5.622211456298828, + "learning_rate": 1.1458767255887576e-06, + "loss": 19.5494, + "step": 401070 + }, + { + "epoch": 0.8102069756824783, + "grad_norm": 448.8087158203125, + "learning_rate": 1.1456543634993046e-06, + "loss": 22.6072, + "step": 401080 + }, + { + "epoch": 0.8102271763151622, + "grad_norm": 505.4259033203125, + "learning_rate": 1.1454320201954628e-06, + "loss": 17.0476, + "step": 401090 + }, + { + "epoch": 0.810247376947846, + "grad_norm": 204.0558319091797, + "learning_rate": 1.1452096956783181e-06, + "loss": 9.2969, + "step": 401100 + }, + { + "epoch": 0.8102675775805298, + "grad_norm": 655.0369262695312, + "learning_rate": 1.1449873899489566e-06, + "loss": 13.7258, + "step": 401110 + }, + { + "epoch": 0.8102877782132136, + "grad_norm": 169.9647979736328, + "learning_rate": 1.1447651030084567e-06, + "loss": 17.9725, + "step": 401120 + }, + { + "epoch": 0.8103079788458974, + "grad_norm": 395.2190246582031, + "learning_rate": 1.1445428348579035e-06, + "loss": 13.6773, + "step": 401130 + }, + { + "epoch": 0.8103281794785813, + "grad_norm": 215.7799835205078, + "learning_rate": 1.1443205854983824e-06, + "loss": 25.8711, + "step": 401140 + }, + { + "epoch": 0.8103483801112651, + "grad_norm": 424.9611511230469, + "learning_rate": 1.1440983549309753e-06, + "loss": 57.7286, + "step": 401150 + }, + { + "epoch": 0.8103685807439489, + "grad_norm": 405.81195068359375, + "learning_rate": 1.1438761431567641e-06, + "loss": 20.7329, + "step": 401160 + }, + { + "epoch": 0.8103887813766327, + "grad_norm": 194.59307861328125, + "learning_rate": 1.1436539501768334e-06, + "loss": 7.5697, + "step": 401170 + }, + { + "epoch": 0.8104089820093165, + "grad_norm": 96.64669036865234, + "learning_rate": 1.1434317759922664e-06, + "loss": 15.5658, + "step": 401180 + }, + { + "epoch": 0.8104291826420004, + "grad_norm": 246.2642822265625, + "learning_rate": 1.1432096206041438e-06, + "loss": 14.9546, + "step": 401190 + }, + { + "epoch": 0.8104493832746842, + "grad_norm": 88.6189956665039, + "learning_rate": 1.1429874840135492e-06, + "loss": 19.2168, + "step": 401200 + }, + { + "epoch": 0.810469583907368, + "grad_norm": 399.97540283203125, + "learning_rate": 1.1427653662215675e-06, + "loss": 14.9003, + "step": 401210 + }, + { + "epoch": 0.8104897845400518, + "grad_norm": 215.20079040527344, + "learning_rate": 1.1425432672292798e-06, + "loss": 28.3903, + "step": 401220 + }, + { + "epoch": 0.8105099851727356, + "grad_norm": 365.6468505859375, + "learning_rate": 1.1423211870377666e-06, + "loss": 27.7487, + "step": 401230 + }, + { + "epoch": 0.8105301858054195, + "grad_norm": 6.0168232917785645, + "learning_rate": 1.1420991256481133e-06, + "loss": 14.9814, + "step": 401240 + }, + { + "epoch": 0.8105503864381033, + "grad_norm": 379.3415832519531, + "learning_rate": 1.1418770830614012e-06, + "loss": 23.0263, + "step": 401250 + }, + { + "epoch": 0.8105705870707871, + "grad_norm": 1574.305908203125, + "learning_rate": 1.1416550592787106e-06, + "loss": 41.68, + "step": 401260 + }, + { + "epoch": 0.8105907877034709, + "grad_norm": 564.3516235351562, + "learning_rate": 1.141433054301127e-06, + "loss": 9.5871, + "step": 401270 + }, + { + "epoch": 0.8106109883361546, + "grad_norm": 430.0144348144531, + "learning_rate": 1.1412110681297296e-06, + "loss": 11.8704, + "step": 401280 + }, + { + "epoch": 0.8106311889688385, + "grad_norm": 207.62380981445312, + "learning_rate": 1.1409891007656022e-06, + "loss": 20.9196, + "step": 401290 + }, + { + "epoch": 0.8106513896015223, + "grad_norm": 391.6383361816406, + "learning_rate": 1.1407671522098262e-06, + "loss": 16.6244, + "step": 401300 + }, + { + "epoch": 0.8106715902342061, + "grad_norm": 261.6241760253906, + "learning_rate": 1.1405452224634817e-06, + "loss": 13.7126, + "step": 401310 + }, + { + "epoch": 0.8106917908668899, + "grad_norm": 500.1307373046875, + "learning_rate": 1.140323311527653e-06, + "loss": 56.0972, + "step": 401320 + }, + { + "epoch": 0.8107119914995737, + "grad_norm": 310.52386474609375, + "learning_rate": 1.1401014194034205e-06, + "loss": 22.8916, + "step": 401330 + }, + { + "epoch": 0.8107321921322576, + "grad_norm": 284.5379333496094, + "learning_rate": 1.1398795460918637e-06, + "loss": 11.5335, + "step": 401340 + }, + { + "epoch": 0.8107523927649414, + "grad_norm": 373.453369140625, + "learning_rate": 1.139657691594066e-06, + "loss": 24.3312, + "step": 401350 + }, + { + "epoch": 0.8107725933976252, + "grad_norm": 437.35235595703125, + "learning_rate": 1.1394358559111101e-06, + "loss": 16.2901, + "step": 401360 + }, + { + "epoch": 0.810792794030309, + "grad_norm": 190.43516540527344, + "learning_rate": 1.1392140390440754e-06, + "loss": 11.5918, + "step": 401370 + }, + { + "epoch": 0.8108129946629928, + "grad_norm": 412.5727233886719, + "learning_rate": 1.1389922409940423e-06, + "loss": 16.8753, + "step": 401380 + }, + { + "epoch": 0.8108331952956767, + "grad_norm": 447.2630310058594, + "learning_rate": 1.1387704617620937e-06, + "loss": 17.5915, + "step": 401390 + }, + { + "epoch": 0.8108533959283605, + "grad_norm": 28.676143646240234, + "learning_rate": 1.1385487013493095e-06, + "loss": 14.9792, + "step": 401400 + }, + { + "epoch": 0.8108735965610443, + "grad_norm": 321.4709167480469, + "learning_rate": 1.1383269597567691e-06, + "loss": 23.3169, + "step": 401410 + }, + { + "epoch": 0.8108937971937281, + "grad_norm": 696.7423095703125, + "learning_rate": 1.138105236985555e-06, + "loss": 15.216, + "step": 401420 + }, + { + "epoch": 0.8109139978264119, + "grad_norm": 45.329654693603516, + "learning_rate": 1.1378835330367494e-06, + "loss": 25.8943, + "step": 401430 + }, + { + "epoch": 0.8109341984590958, + "grad_norm": 373.6303405761719, + "learning_rate": 1.1376618479114304e-06, + "loss": 17.9609, + "step": 401440 + }, + { + "epoch": 0.8109543990917796, + "grad_norm": 500.3052978515625, + "learning_rate": 1.1374401816106778e-06, + "loss": 19.6485, + "step": 401450 + }, + { + "epoch": 0.8109745997244634, + "grad_norm": 51.37641143798828, + "learning_rate": 1.1372185341355746e-06, + "loss": 12.2529, + "step": 401460 + }, + { + "epoch": 0.8109948003571472, + "grad_norm": 101.87545776367188, + "learning_rate": 1.1369969054871998e-06, + "loss": 22.0237, + "step": 401470 + }, + { + "epoch": 0.811015000989831, + "grad_norm": 98.46989440917969, + "learning_rate": 1.1367752956666322e-06, + "loss": 16.4904, + "step": 401480 + }, + { + "epoch": 0.8110352016225149, + "grad_norm": 100.5164566040039, + "learning_rate": 1.1365537046749536e-06, + "loss": 26.3304, + "step": 401490 + }, + { + "epoch": 0.8110554022551987, + "grad_norm": 467.39208984375, + "learning_rate": 1.136332132513245e-06, + "loss": 9.3387, + "step": 401500 + }, + { + "epoch": 0.8110756028878825, + "grad_norm": 9.399454116821289, + "learning_rate": 1.1361105791825845e-06, + "loss": 17.5661, + "step": 401510 + }, + { + "epoch": 0.8110958035205663, + "grad_norm": 293.3193054199219, + "learning_rate": 1.1358890446840514e-06, + "loss": 21.871, + "step": 401520 + }, + { + "epoch": 0.81111600415325, + "grad_norm": 435.3813781738281, + "learning_rate": 1.135667529018728e-06, + "loss": 13.8553, + "step": 401530 + }, + { + "epoch": 0.8111362047859338, + "grad_norm": 682.3662719726562, + "learning_rate": 1.135446032187692e-06, + "loss": 31.9891, + "step": 401540 + }, + { + "epoch": 0.8111564054186177, + "grad_norm": 749.76171875, + "learning_rate": 1.135224554192022e-06, + "loss": 26.1557, + "step": 401550 + }, + { + "epoch": 0.8111766060513015, + "grad_norm": 210.04885864257812, + "learning_rate": 1.1350030950328001e-06, + "loss": 13.8013, + "step": 401560 + }, + { + "epoch": 0.8111968066839853, + "grad_norm": 48.78976821899414, + "learning_rate": 1.1347816547111029e-06, + "loss": 12.8956, + "step": 401570 + }, + { + "epoch": 0.8112170073166691, + "grad_norm": 362.14215087890625, + "learning_rate": 1.1345602332280125e-06, + "loss": 24.5604, + "step": 401580 + }, + { + "epoch": 0.811237207949353, + "grad_norm": 635.3172607421875, + "learning_rate": 1.134338830584607e-06, + "loss": 16.3891, + "step": 401590 + }, + { + "epoch": 0.8112574085820368, + "grad_norm": 936.0580444335938, + "learning_rate": 1.1341174467819637e-06, + "loss": 29.421, + "step": 401600 + }, + { + "epoch": 0.8112776092147206, + "grad_norm": 772.8532104492188, + "learning_rate": 1.1338960818211647e-06, + "loss": 21.7755, + "step": 401610 + }, + { + "epoch": 0.8112978098474044, + "grad_norm": 82.2054672241211, + "learning_rate": 1.1336747357032868e-06, + "loss": 10.8459, + "step": 401620 + }, + { + "epoch": 0.8113180104800882, + "grad_norm": 171.50927734375, + "learning_rate": 1.1334534084294084e-06, + "loss": 19.3667, + "step": 401630 + }, + { + "epoch": 0.811338211112772, + "grad_norm": 164.10752868652344, + "learning_rate": 1.1332321000006086e-06, + "loss": 25.8789, + "step": 401640 + }, + { + "epoch": 0.8113584117454559, + "grad_norm": 496.20977783203125, + "learning_rate": 1.1330108104179682e-06, + "loss": 15.3479, + "step": 401650 + }, + { + "epoch": 0.8113786123781397, + "grad_norm": 156.0892333984375, + "learning_rate": 1.1327895396825644e-06, + "loss": 16.3611, + "step": 401660 + }, + { + "epoch": 0.8113988130108235, + "grad_norm": 220.75396728515625, + "learning_rate": 1.1325682877954736e-06, + "loss": 17.7, + "step": 401670 + }, + { + "epoch": 0.8114190136435073, + "grad_norm": 446.0356750488281, + "learning_rate": 1.1323470547577774e-06, + "loss": 13.671, + "step": 401680 + }, + { + "epoch": 0.8114392142761911, + "grad_norm": 303.70318603515625, + "learning_rate": 1.1321258405705526e-06, + "loss": 18.6709, + "step": 401690 + }, + { + "epoch": 0.811459414908875, + "grad_norm": 7.471003532409668, + "learning_rate": 1.1319046452348758e-06, + "loss": 20.7449, + "step": 401700 + }, + { + "epoch": 0.8114796155415588, + "grad_norm": 344.658447265625, + "learning_rate": 1.131683468751827e-06, + "loss": 22.884, + "step": 401710 + }, + { + "epoch": 0.8114998161742426, + "grad_norm": 317.5362243652344, + "learning_rate": 1.1314623111224865e-06, + "loss": 17.6677, + "step": 401720 + }, + { + "epoch": 0.8115200168069264, + "grad_norm": 265.2457275390625, + "learning_rate": 1.1312411723479261e-06, + "loss": 15.9968, + "step": 401730 + }, + { + "epoch": 0.8115402174396102, + "grad_norm": 480.3129577636719, + "learning_rate": 1.1310200524292275e-06, + "loss": 12.3708, + "step": 401740 + }, + { + "epoch": 0.8115604180722941, + "grad_norm": 82.8792495727539, + "learning_rate": 1.1307989513674695e-06, + "loss": 9.7043, + "step": 401750 + }, + { + "epoch": 0.8115806187049779, + "grad_norm": 109.63460540771484, + "learning_rate": 1.1305778691637276e-06, + "loss": 7.1868, + "step": 401760 + }, + { + "epoch": 0.8116008193376617, + "grad_norm": 279.5165710449219, + "learning_rate": 1.1303568058190789e-06, + "loss": 29.7321, + "step": 401770 + }, + { + "epoch": 0.8116210199703455, + "grad_norm": 64.94554138183594, + "learning_rate": 1.1301357613346032e-06, + "loss": 7.8405, + "step": 401780 + }, + { + "epoch": 0.8116412206030292, + "grad_norm": 741.6187744140625, + "learning_rate": 1.129914735711376e-06, + "loss": 18.0711, + "step": 401790 + }, + { + "epoch": 0.811661421235713, + "grad_norm": 361.603759765625, + "learning_rate": 1.129693728950474e-06, + "loss": 21.3723, + "step": 401800 + }, + { + "epoch": 0.8116816218683969, + "grad_norm": 37.61235427856445, + "learning_rate": 1.1294727410529754e-06, + "loss": 23.2052, + "step": 401810 + }, + { + "epoch": 0.8117018225010807, + "grad_norm": 303.11474609375, + "learning_rate": 1.1292517720199581e-06, + "loss": 18.4122, + "step": 401820 + }, + { + "epoch": 0.8117220231337645, + "grad_norm": 147.80593872070312, + "learning_rate": 1.1290308218524986e-06, + "loss": 17.6941, + "step": 401830 + }, + { + "epoch": 0.8117422237664483, + "grad_norm": 179.4315185546875, + "learning_rate": 1.128809890551672e-06, + "loss": 19.127, + "step": 401840 + }, + { + "epoch": 0.8117624243991322, + "grad_norm": 498.0691833496094, + "learning_rate": 1.1285889781185576e-06, + "loss": 15.6153, + "step": 401850 + }, + { + "epoch": 0.811782625031816, + "grad_norm": 442.79736328125, + "learning_rate": 1.1283680845542316e-06, + "loss": 15.8981, + "step": 401860 + }, + { + "epoch": 0.8118028256644998, + "grad_norm": 361.2605895996094, + "learning_rate": 1.128147209859768e-06, + "loss": 20.6368, + "step": 401870 + }, + { + "epoch": 0.8118230262971836, + "grad_norm": 14.113608360290527, + "learning_rate": 1.1279263540362468e-06, + "loss": 24.9256, + "step": 401880 + }, + { + "epoch": 0.8118432269298674, + "grad_norm": 386.19049072265625, + "learning_rate": 1.1277055170847416e-06, + "loss": 19.3577, + "step": 401890 + }, + { + "epoch": 0.8118634275625513, + "grad_norm": 362.01763916015625, + "learning_rate": 1.1274846990063314e-06, + "loss": 15.0212, + "step": 401900 + }, + { + "epoch": 0.8118836281952351, + "grad_norm": 625.5485229492188, + "learning_rate": 1.1272638998020913e-06, + "loss": 18.1125, + "step": 401910 + }, + { + "epoch": 0.8119038288279189, + "grad_norm": 103.92401885986328, + "learning_rate": 1.1270431194730959e-06, + "loss": 15.5105, + "step": 401920 + }, + { + "epoch": 0.8119240294606027, + "grad_norm": 26.25572395324707, + "learning_rate": 1.1268223580204234e-06, + "loss": 7.4456, + "step": 401930 + }, + { + "epoch": 0.8119442300932865, + "grad_norm": 218.33631896972656, + "learning_rate": 1.1266016154451492e-06, + "loss": 14.0734, + "step": 401940 + }, + { + "epoch": 0.8119644307259704, + "grad_norm": 111.6611099243164, + "learning_rate": 1.1263808917483476e-06, + "loss": 16.248, + "step": 401950 + }, + { + "epoch": 0.8119846313586542, + "grad_norm": 414.2197570800781, + "learning_rate": 1.1261601869310962e-06, + "loss": 14.3198, + "step": 401960 + }, + { + "epoch": 0.812004831991338, + "grad_norm": 59.31288146972656, + "learning_rate": 1.1259395009944713e-06, + "loss": 18.0006, + "step": 401970 + }, + { + "epoch": 0.8120250326240218, + "grad_norm": 493.8407897949219, + "learning_rate": 1.125718833939547e-06, + "loss": 22.5595, + "step": 401980 + }, + { + "epoch": 0.8120452332567056, + "grad_norm": 589.6556396484375, + "learning_rate": 1.125498185767398e-06, + "loss": 15.9924, + "step": 401990 + }, + { + "epoch": 0.8120654338893895, + "grad_norm": 272.1883850097656, + "learning_rate": 1.1252775564791023e-06, + "loss": 16.4307, + "step": 402000 + }, + { + "epoch": 0.8120856345220733, + "grad_norm": 549.4430541992188, + "learning_rate": 1.1250569460757344e-06, + "loss": 26.6369, + "step": 402010 + }, + { + "epoch": 0.8121058351547571, + "grad_norm": 273.0378112792969, + "learning_rate": 1.1248363545583675e-06, + "loss": 21.2479, + "step": 402020 + }, + { + "epoch": 0.8121260357874409, + "grad_norm": 208.7742462158203, + "learning_rate": 1.1246157819280772e-06, + "loss": 19.5443, + "step": 402030 + }, + { + "epoch": 0.8121462364201246, + "grad_norm": 233.62985229492188, + "learning_rate": 1.1243952281859422e-06, + "loss": 26.6932, + "step": 402040 + }, + { + "epoch": 0.8121664370528084, + "grad_norm": 145.3607940673828, + "learning_rate": 1.1241746933330338e-06, + "loss": 13.2304, + "step": 402050 + }, + { + "epoch": 0.8121866376854923, + "grad_norm": 211.1546630859375, + "learning_rate": 1.123954177370427e-06, + "loss": 16.9454, + "step": 402060 + }, + { + "epoch": 0.8122068383181761, + "grad_norm": 799.8381958007812, + "learning_rate": 1.1237336802991989e-06, + "loss": 23.1495, + "step": 402070 + }, + { + "epoch": 0.8122270389508599, + "grad_norm": 293.4764404296875, + "learning_rate": 1.1235132021204226e-06, + "loss": 20.9247, + "step": 402080 + }, + { + "epoch": 0.8122472395835437, + "grad_norm": 585.5978393554688, + "learning_rate": 1.1232927428351714e-06, + "loss": 32.2018, + "step": 402090 + }, + { + "epoch": 0.8122674402162275, + "grad_norm": 543.6815185546875, + "learning_rate": 1.1230723024445212e-06, + "loss": 22.7182, + "step": 402100 + }, + { + "epoch": 0.8122876408489114, + "grad_norm": 592.7957153320312, + "learning_rate": 1.1228518809495475e-06, + "loss": 30.7611, + "step": 402110 + }, + { + "epoch": 0.8123078414815952, + "grad_norm": 706.2651977539062, + "learning_rate": 1.1226314783513238e-06, + "loss": 12.2706, + "step": 402120 + }, + { + "epoch": 0.812328042114279, + "grad_norm": 51.723060607910156, + "learning_rate": 1.122411094650922e-06, + "loss": 12.408, + "step": 402130 + }, + { + "epoch": 0.8123482427469628, + "grad_norm": 154.70314025878906, + "learning_rate": 1.1221907298494205e-06, + "loss": 13.539, + "step": 402140 + }, + { + "epoch": 0.8123684433796466, + "grad_norm": 353.5382995605469, + "learning_rate": 1.1219703839478907e-06, + "loss": 31.1909, + "step": 402150 + }, + { + "epoch": 0.8123886440123305, + "grad_norm": 184.29806518554688, + "learning_rate": 1.1217500569474054e-06, + "loss": 13.1566, + "step": 402160 + }, + { + "epoch": 0.8124088446450143, + "grad_norm": 745.1736450195312, + "learning_rate": 1.1215297488490412e-06, + "loss": 26.4486, + "step": 402170 + }, + { + "epoch": 0.8124290452776981, + "grad_norm": 188.0874481201172, + "learning_rate": 1.12130945965387e-06, + "loss": 16.0302, + "step": 402180 + }, + { + "epoch": 0.8124492459103819, + "grad_norm": 224.18624877929688, + "learning_rate": 1.1210891893629665e-06, + "loss": 13.0421, + "step": 402190 + }, + { + "epoch": 0.8124694465430657, + "grad_norm": 297.636474609375, + "learning_rate": 1.120868937977404e-06, + "loss": 12.9067, + "step": 402200 + }, + { + "epoch": 0.8124896471757496, + "grad_norm": 20.107284545898438, + "learning_rate": 1.1206487054982551e-06, + "loss": 111.9853, + "step": 402210 + }, + { + "epoch": 0.8125098478084334, + "grad_norm": 322.66339111328125, + "learning_rate": 1.1204284919265946e-06, + "loss": 22.0205, + "step": 402220 + }, + { + "epoch": 0.8125300484411172, + "grad_norm": 590.3783569335938, + "learning_rate": 1.1202082972634952e-06, + "loss": 13.0414, + "step": 402230 + }, + { + "epoch": 0.812550249073801, + "grad_norm": 427.2761535644531, + "learning_rate": 1.119988121510029e-06, + "loss": 27.9798, + "step": 402240 + }, + { + "epoch": 0.8125704497064848, + "grad_norm": 238.13414001464844, + "learning_rate": 1.1197679646672698e-06, + "loss": 15.6964, + "step": 402250 + }, + { + "epoch": 0.8125906503391687, + "grad_norm": 638.0228271484375, + "learning_rate": 1.1195478267362924e-06, + "loss": 18.874, + "step": 402260 + }, + { + "epoch": 0.8126108509718525, + "grad_norm": 323.3406066894531, + "learning_rate": 1.119327707718168e-06, + "loss": 13.2257, + "step": 402270 + }, + { + "epoch": 0.8126310516045363, + "grad_norm": 1900.7822265625, + "learning_rate": 1.1191076076139684e-06, + "loss": 13.8603, + "step": 402280 + }, + { + "epoch": 0.8126512522372201, + "grad_norm": 554.580810546875, + "learning_rate": 1.1188875264247695e-06, + "loss": 32.6755, + "step": 402290 + }, + { + "epoch": 0.8126714528699038, + "grad_norm": 212.0137176513672, + "learning_rate": 1.1186674641516415e-06, + "loss": 10.6111, + "step": 402300 + }, + { + "epoch": 0.8126916535025877, + "grad_norm": 89.68228912353516, + "learning_rate": 1.1184474207956564e-06, + "loss": 18.3645, + "step": 402310 + }, + { + "epoch": 0.8127118541352715, + "grad_norm": 263.7377014160156, + "learning_rate": 1.1182273963578877e-06, + "loss": 40.6328, + "step": 402320 + }, + { + "epoch": 0.8127320547679553, + "grad_norm": 245.92799377441406, + "learning_rate": 1.1180073908394108e-06, + "loss": 16.7199, + "step": 402330 + }, + { + "epoch": 0.8127522554006391, + "grad_norm": 360.03826904296875, + "learning_rate": 1.1177874042412923e-06, + "loss": 19.2404, + "step": 402340 + }, + { + "epoch": 0.8127724560333229, + "grad_norm": 243.1678466796875, + "learning_rate": 1.1175674365646067e-06, + "loss": 12.9017, + "step": 402350 + }, + { + "epoch": 0.8127926566660068, + "grad_norm": 269.5718688964844, + "learning_rate": 1.1173474878104285e-06, + "loss": 25.318, + "step": 402360 + }, + { + "epoch": 0.8128128572986906, + "grad_norm": 154.96092224121094, + "learning_rate": 1.1171275579798274e-06, + "loss": 14.6612, + "step": 402370 + }, + { + "epoch": 0.8128330579313744, + "grad_norm": 350.7008972167969, + "learning_rate": 1.1169076470738744e-06, + "loss": 34.7705, + "step": 402380 + }, + { + "epoch": 0.8128532585640582, + "grad_norm": 145.260986328125, + "learning_rate": 1.1166877550936433e-06, + "loss": 12.0329, + "step": 402390 + }, + { + "epoch": 0.812873459196742, + "grad_norm": 246.95323181152344, + "learning_rate": 1.1164678820402059e-06, + "loss": 24.2772, + "step": 402400 + }, + { + "epoch": 0.8128936598294259, + "grad_norm": 189.96546936035156, + "learning_rate": 1.1162480279146315e-06, + "loss": 19.6054, + "step": 402410 + }, + { + "epoch": 0.8129138604621097, + "grad_norm": 525.8419799804688, + "learning_rate": 1.1160281927179927e-06, + "loss": 11.8074, + "step": 402420 + }, + { + "epoch": 0.8129340610947935, + "grad_norm": 326.4888610839844, + "learning_rate": 1.1158083764513634e-06, + "loss": 16.4423, + "step": 402430 + }, + { + "epoch": 0.8129542617274773, + "grad_norm": 236.17709350585938, + "learning_rate": 1.1155885791158128e-06, + "loss": 14.372, + "step": 402440 + }, + { + "epoch": 0.8129744623601611, + "grad_norm": 590.6957397460938, + "learning_rate": 1.1153688007124109e-06, + "loss": 18.3441, + "step": 402450 + }, + { + "epoch": 0.812994662992845, + "grad_norm": 403.65301513671875, + "learning_rate": 1.1151490412422316e-06, + "loss": 19.8515, + "step": 402460 + }, + { + "epoch": 0.8130148636255288, + "grad_norm": 221.52354431152344, + "learning_rate": 1.114929300706345e-06, + "loss": 14.8902, + "step": 402470 + }, + { + "epoch": 0.8130350642582126, + "grad_norm": 232.07037353515625, + "learning_rate": 1.1147095791058198e-06, + "loss": 26.7014, + "step": 402480 + }, + { + "epoch": 0.8130552648908964, + "grad_norm": 580.7557983398438, + "learning_rate": 1.114489876441731e-06, + "loss": 12.2789, + "step": 402490 + }, + { + "epoch": 0.8130754655235802, + "grad_norm": 486.24896240234375, + "learning_rate": 1.1142701927151456e-06, + "loss": 22.9559, + "step": 402500 + }, + { + "epoch": 0.813095666156264, + "grad_norm": 1497.037353515625, + "learning_rate": 1.1140505279271373e-06, + "loss": 31.7122, + "step": 402510 + }, + { + "epoch": 0.8131158667889479, + "grad_norm": 273.04931640625, + "learning_rate": 1.1138308820787752e-06, + "loss": 15.8489, + "step": 402520 + }, + { + "epoch": 0.8131360674216317, + "grad_norm": 215.6087188720703, + "learning_rate": 1.1136112551711293e-06, + "loss": 16.5628, + "step": 402530 + }, + { + "epoch": 0.8131562680543155, + "grad_norm": 201.78170776367188, + "learning_rate": 1.113391647205272e-06, + "loss": 11.3917, + "step": 402540 + }, + { + "epoch": 0.8131764686869993, + "grad_norm": 510.38427734375, + "learning_rate": 1.1131720581822703e-06, + "loss": 21.7584, + "step": 402550 + }, + { + "epoch": 0.813196669319683, + "grad_norm": 644.1688842773438, + "learning_rate": 1.1129524881031989e-06, + "loss": 10.8479, + "step": 402560 + }, + { + "epoch": 0.8132168699523669, + "grad_norm": 22.48255157470703, + "learning_rate": 1.1127329369691236e-06, + "loss": 14.5291, + "step": 402570 + }, + { + "epoch": 0.8132370705850507, + "grad_norm": 124.2413330078125, + "learning_rate": 1.1125134047811182e-06, + "loss": 9.475, + "step": 402580 + }, + { + "epoch": 0.8132572712177345, + "grad_norm": 159.1404266357422, + "learning_rate": 1.1122938915402508e-06, + "loss": 6.645, + "step": 402590 + }, + { + "epoch": 0.8132774718504183, + "grad_norm": 336.38653564453125, + "learning_rate": 1.11207439724759e-06, + "loss": 16.921, + "step": 402600 + }, + { + "epoch": 0.8132976724831021, + "grad_norm": 310.8020324707031, + "learning_rate": 1.1118549219042085e-06, + "loss": 17.5297, + "step": 402610 + }, + { + "epoch": 0.813317873115786, + "grad_norm": 317.6047058105469, + "learning_rate": 1.111635465511175e-06, + "loss": 16.1258, + "step": 402620 + }, + { + "epoch": 0.8133380737484698, + "grad_norm": 540.7821655273438, + "learning_rate": 1.1114160280695568e-06, + "loss": 15.7187, + "step": 402630 + }, + { + "epoch": 0.8133582743811536, + "grad_norm": 718.9857177734375, + "learning_rate": 1.1111966095804254e-06, + "loss": 16.3498, + "step": 402640 + }, + { + "epoch": 0.8133784750138374, + "grad_norm": 26.156211853027344, + "learning_rate": 1.1109772100448512e-06, + "loss": 19.6282, + "step": 402650 + }, + { + "epoch": 0.8133986756465212, + "grad_norm": 383.561767578125, + "learning_rate": 1.1107578294639026e-06, + "loss": 22.3377, + "step": 402660 + }, + { + "epoch": 0.8134188762792051, + "grad_norm": 652.242919921875, + "learning_rate": 1.1105384678386472e-06, + "loss": 16.9119, + "step": 402670 + }, + { + "epoch": 0.8134390769118889, + "grad_norm": 687.17236328125, + "learning_rate": 1.1103191251701573e-06, + "loss": 12.5076, + "step": 402680 + }, + { + "epoch": 0.8134592775445727, + "grad_norm": 480.04132080078125, + "learning_rate": 1.1100998014594993e-06, + "loss": 15.089, + "step": 402690 + }, + { + "epoch": 0.8134794781772565, + "grad_norm": 418.9132995605469, + "learning_rate": 1.1098804967077425e-06, + "loss": 17.6941, + "step": 402700 + }, + { + "epoch": 0.8134996788099403, + "grad_norm": 492.0087585449219, + "learning_rate": 1.109661210915956e-06, + "loss": 14.0928, + "step": 402710 + }, + { + "epoch": 0.8135198794426242, + "grad_norm": 122.6615219116211, + "learning_rate": 1.1094419440852105e-06, + "loss": 19.9941, + "step": 402720 + }, + { + "epoch": 0.813540080075308, + "grad_norm": 428.9898681640625, + "learning_rate": 1.109222696216573e-06, + "loss": 16.79, + "step": 402730 + }, + { + "epoch": 0.8135602807079918, + "grad_norm": 1433.088623046875, + "learning_rate": 1.109003467311111e-06, + "loss": 37.8135, + "step": 402740 + }, + { + "epoch": 0.8135804813406756, + "grad_norm": 308.20782470703125, + "learning_rate": 1.1087842573698953e-06, + "loss": 24.2398, + "step": 402750 + }, + { + "epoch": 0.8136006819733594, + "grad_norm": 184.24591064453125, + "learning_rate": 1.1085650663939933e-06, + "loss": 26.7286, + "step": 402760 + }, + { + "epoch": 0.8136208826060433, + "grad_norm": 337.38543701171875, + "learning_rate": 1.1083458943844721e-06, + "loss": 21.6237, + "step": 402770 + }, + { + "epoch": 0.8136410832387271, + "grad_norm": 227.1483154296875, + "learning_rate": 1.1081267413424018e-06, + "loss": 19.5517, + "step": 402780 + }, + { + "epoch": 0.8136612838714109, + "grad_norm": 240.45953369140625, + "learning_rate": 1.1079076072688489e-06, + "loss": 13.4554, + "step": 402790 + }, + { + "epoch": 0.8136814845040947, + "grad_norm": 266.16412353515625, + "learning_rate": 1.1076884921648834e-06, + "loss": 21.7566, + "step": 402800 + }, + { + "epoch": 0.8137016851367784, + "grad_norm": 378.9514465332031, + "learning_rate": 1.1074693960315718e-06, + "loss": 15.3425, + "step": 402810 + }, + { + "epoch": 0.8137218857694622, + "grad_norm": 381.3688659667969, + "learning_rate": 1.1072503188699811e-06, + "loss": 17.5778, + "step": 402820 + }, + { + "epoch": 0.8137420864021461, + "grad_norm": 759.8544921875, + "learning_rate": 1.1070312606811816e-06, + "loss": 19.0493, + "step": 402830 + }, + { + "epoch": 0.8137622870348299, + "grad_norm": 390.9328308105469, + "learning_rate": 1.1068122214662397e-06, + "loss": 23.379, + "step": 402840 + }, + { + "epoch": 0.8137824876675137, + "grad_norm": 239.4142608642578, + "learning_rate": 1.1065932012262215e-06, + "loss": 18.6682, + "step": 402850 + }, + { + "epoch": 0.8138026883001975, + "grad_norm": 348.1329345703125, + "learning_rate": 1.1063741999621958e-06, + "loss": 14.7364, + "step": 402860 + }, + { + "epoch": 0.8138228889328813, + "grad_norm": 427.66949462890625, + "learning_rate": 1.1061552176752311e-06, + "loss": 20.0499, + "step": 402870 + }, + { + "epoch": 0.8138430895655652, + "grad_norm": 323.6460266113281, + "learning_rate": 1.1059362543663944e-06, + "loss": 17.2907, + "step": 402880 + }, + { + "epoch": 0.813863290198249, + "grad_norm": 0.04072647541761398, + "learning_rate": 1.1057173100367495e-06, + "loss": 10.3853, + "step": 402890 + }, + { + "epoch": 0.8138834908309328, + "grad_norm": 347.5143127441406, + "learning_rate": 1.1054983846873684e-06, + "loss": 19.3528, + "step": 402900 + }, + { + "epoch": 0.8139036914636166, + "grad_norm": 447.7604064941406, + "learning_rate": 1.1052794783193155e-06, + "loss": 20.1185, + "step": 402910 + }, + { + "epoch": 0.8139238920963004, + "grad_norm": 642.8087768554688, + "learning_rate": 1.105060590933657e-06, + "loss": 21.1317, + "step": 402920 + }, + { + "epoch": 0.8139440927289843, + "grad_norm": 67.33672332763672, + "learning_rate": 1.10484172253146e-06, + "loss": 26.4036, + "step": 402930 + }, + { + "epoch": 0.8139642933616681, + "grad_norm": 315.9908447265625, + "learning_rate": 1.1046228731137953e-06, + "loss": 13.0807, + "step": 402940 + }, + { + "epoch": 0.8139844939943519, + "grad_norm": 608.756103515625, + "learning_rate": 1.1044040426817237e-06, + "loss": 17.9246, + "step": 402950 + }, + { + "epoch": 0.8140046946270357, + "grad_norm": 44.12290573120117, + "learning_rate": 1.1041852312363144e-06, + "loss": 14.1365, + "step": 402960 + }, + { + "epoch": 0.8140248952597195, + "grad_norm": 533.66015625, + "learning_rate": 1.1039664387786348e-06, + "loss": 33.035, + "step": 402970 + }, + { + "epoch": 0.8140450958924034, + "grad_norm": 430.2449951171875, + "learning_rate": 1.1037476653097501e-06, + "loss": 23.2743, + "step": 402980 + }, + { + "epoch": 0.8140652965250872, + "grad_norm": 216.03126525878906, + "learning_rate": 1.1035289108307256e-06, + "loss": 15.3637, + "step": 402990 + }, + { + "epoch": 0.814085497157771, + "grad_norm": 23.619455337524414, + "learning_rate": 1.1033101753426285e-06, + "loss": 10.1855, + "step": 403000 + }, + { + "epoch": 0.8141056977904548, + "grad_norm": 436.244873046875, + "learning_rate": 1.1030914588465281e-06, + "loss": 16.8647, + "step": 403010 + }, + { + "epoch": 0.8141258984231386, + "grad_norm": 150.64334106445312, + "learning_rate": 1.1028727613434842e-06, + "loss": 30.5275, + "step": 403020 + }, + { + "epoch": 0.8141460990558225, + "grad_norm": 453.5550537109375, + "learning_rate": 1.1026540828345656e-06, + "loss": 19.0017, + "step": 403030 + }, + { + "epoch": 0.8141662996885063, + "grad_norm": 475.4017333984375, + "learning_rate": 1.10243542332084e-06, + "loss": 15.6427, + "step": 403040 + }, + { + "epoch": 0.8141865003211901, + "grad_norm": 134.8949432373047, + "learning_rate": 1.1022167828033715e-06, + "loss": 11.7787, + "step": 403050 + }, + { + "epoch": 0.8142067009538739, + "grad_norm": 276.0162048339844, + "learning_rate": 1.1019981612832243e-06, + "loss": 36.4231, + "step": 403060 + }, + { + "epoch": 0.8142269015865576, + "grad_norm": 258.1709899902344, + "learning_rate": 1.101779558761466e-06, + "loss": 9.948, + "step": 403070 + }, + { + "epoch": 0.8142471022192415, + "grad_norm": 640.7821044921875, + "learning_rate": 1.101560975239162e-06, + "loss": 16.1311, + "step": 403080 + }, + { + "epoch": 0.8142673028519253, + "grad_norm": 520.6428833007812, + "learning_rate": 1.1013424107173753e-06, + "loss": 17.7264, + "step": 403090 + }, + { + "epoch": 0.8142875034846091, + "grad_norm": 508.30267333984375, + "learning_rate": 1.1011238651971744e-06, + "loss": 19.0138, + "step": 403100 + }, + { + "epoch": 0.8143077041172929, + "grad_norm": 341.8509521484375, + "learning_rate": 1.1009053386796215e-06, + "loss": 17.658, + "step": 403110 + }, + { + "epoch": 0.8143279047499767, + "grad_norm": 457.42999267578125, + "learning_rate": 1.1006868311657848e-06, + "loss": 14.7793, + "step": 403120 + }, + { + "epoch": 0.8143481053826606, + "grad_norm": 904.3662719726562, + "learning_rate": 1.100468342656727e-06, + "loss": 23.9291, + "step": 403130 + }, + { + "epoch": 0.8143683060153444, + "grad_norm": 259.1905212402344, + "learning_rate": 1.1002498731535123e-06, + "loss": 8.0702, + "step": 403140 + }, + { + "epoch": 0.8143885066480282, + "grad_norm": 458.9711608886719, + "learning_rate": 1.1000314226572083e-06, + "loss": 25.5438, + "step": 403150 + }, + { + "epoch": 0.814408707280712, + "grad_norm": 650.0687866210938, + "learning_rate": 1.0998129911688766e-06, + "loss": 22.2907, + "step": 403160 + }, + { + "epoch": 0.8144289079133958, + "grad_norm": 118.45144653320312, + "learning_rate": 1.0995945786895846e-06, + "loss": 18.8227, + "step": 403170 + }, + { + "epoch": 0.8144491085460797, + "grad_norm": 321.75457763671875, + "learning_rate": 1.0993761852203943e-06, + "loss": 12.9093, + "step": 403180 + }, + { + "epoch": 0.8144693091787635, + "grad_norm": 291.5635681152344, + "learning_rate": 1.099157810762373e-06, + "loss": 25.4308, + "step": 403190 + }, + { + "epoch": 0.8144895098114473, + "grad_norm": 570.944091796875, + "learning_rate": 1.0989394553165833e-06, + "loss": 32.1767, + "step": 403200 + }, + { + "epoch": 0.8145097104441311, + "grad_norm": 231.1138916015625, + "learning_rate": 1.098721118884088e-06, + "loss": 8.4851, + "step": 403210 + }, + { + "epoch": 0.8145299110768149, + "grad_norm": 8.792352676391602, + "learning_rate": 1.0985028014659544e-06, + "loss": 9.7354, + "step": 403220 + }, + { + "epoch": 0.8145501117094988, + "grad_norm": 379.2449645996094, + "learning_rate": 1.0982845030632445e-06, + "loss": 12.45, + "step": 403230 + }, + { + "epoch": 0.8145703123421826, + "grad_norm": 261.1451416015625, + "learning_rate": 1.0980662236770217e-06, + "loss": 10.7204, + "step": 403240 + }, + { + "epoch": 0.8145905129748664, + "grad_norm": 271.76934814453125, + "learning_rate": 1.097847963308351e-06, + "loss": 29.2455, + "step": 403250 + }, + { + "epoch": 0.8146107136075502, + "grad_norm": 296.5911865234375, + "learning_rate": 1.0976297219582988e-06, + "loss": 26.4947, + "step": 403260 + }, + { + "epoch": 0.814630914240234, + "grad_norm": 501.515869140625, + "learning_rate": 1.0974114996279229e-06, + "loss": 15.3263, + "step": 403270 + }, + { + "epoch": 0.8146511148729179, + "grad_norm": 437.312744140625, + "learning_rate": 1.09719329631829e-06, + "loss": 18.0124, + "step": 403280 + }, + { + "epoch": 0.8146713155056017, + "grad_norm": 2777.39013671875, + "learning_rate": 1.0969751120304656e-06, + "loss": 33.5099, + "step": 403290 + }, + { + "epoch": 0.8146915161382855, + "grad_norm": 155.07955932617188, + "learning_rate": 1.0967569467655104e-06, + "loss": 16.7757, + "step": 403300 + }, + { + "epoch": 0.8147117167709693, + "grad_norm": 64.98365783691406, + "learning_rate": 1.0965388005244876e-06, + "loss": 20.1442, + "step": 403310 + }, + { + "epoch": 0.814731917403653, + "grad_norm": 844.584228515625, + "learning_rate": 1.0963206733084607e-06, + "loss": 28.434, + "step": 403320 + }, + { + "epoch": 0.8147521180363368, + "grad_norm": 282.7172546386719, + "learning_rate": 1.096102565118497e-06, + "loss": 8.1605, + "step": 403330 + }, + { + "epoch": 0.8147723186690207, + "grad_norm": 216.3434600830078, + "learning_rate": 1.0958844759556525e-06, + "loss": 13.9023, + "step": 403340 + }, + { + "epoch": 0.8147925193017045, + "grad_norm": 35.27849578857422, + "learning_rate": 1.0956664058209936e-06, + "loss": 28.5152, + "step": 403350 + }, + { + "epoch": 0.8148127199343883, + "grad_norm": 423.6056823730469, + "learning_rate": 1.0954483547155846e-06, + "loss": 17.6159, + "step": 403360 + }, + { + "epoch": 0.8148329205670721, + "grad_norm": 84.75102233886719, + "learning_rate": 1.095230322640487e-06, + "loss": 17.3009, + "step": 403370 + }, + { + "epoch": 0.814853121199756, + "grad_norm": 377.6807556152344, + "learning_rate": 1.0950123095967614e-06, + "loss": 7.4353, + "step": 403380 + }, + { + "epoch": 0.8148733218324398, + "grad_norm": 326.2807312011719, + "learning_rate": 1.094794315585474e-06, + "loss": 15.2747, + "step": 403390 + }, + { + "epoch": 0.8148935224651236, + "grad_norm": 167.96466064453125, + "learning_rate": 1.0945763406076837e-06, + "loss": 15.7253, + "step": 403400 + }, + { + "epoch": 0.8149137230978074, + "grad_norm": 331.5107116699219, + "learning_rate": 1.0943583846644561e-06, + "loss": 19.3244, + "step": 403410 + }, + { + "epoch": 0.8149339237304912, + "grad_norm": 530.7330932617188, + "learning_rate": 1.0941404477568524e-06, + "loss": 22.7064, + "step": 403420 + }, + { + "epoch": 0.814954124363175, + "grad_norm": 522.302490234375, + "learning_rate": 1.0939225298859324e-06, + "loss": 26.8466, + "step": 403430 + }, + { + "epoch": 0.8149743249958589, + "grad_norm": 593.5051879882812, + "learning_rate": 1.093704631052762e-06, + "loss": 14.2516, + "step": 403440 + }, + { + "epoch": 0.8149945256285427, + "grad_norm": 1005.314453125, + "learning_rate": 1.0934867512584013e-06, + "loss": 28.3563, + "step": 403450 + }, + { + "epoch": 0.8150147262612265, + "grad_norm": 551.16748046875, + "learning_rate": 1.0932688905039113e-06, + "loss": 24.4639, + "step": 403460 + }, + { + "epoch": 0.8150349268939103, + "grad_norm": 150.2791748046875, + "learning_rate": 1.0930510487903544e-06, + "loss": 24.9412, + "step": 403470 + }, + { + "epoch": 0.8150551275265941, + "grad_norm": 604.1342163085938, + "learning_rate": 1.0928332261187947e-06, + "loss": 19.7963, + "step": 403480 + }, + { + "epoch": 0.815075328159278, + "grad_norm": 290.0917053222656, + "learning_rate": 1.0926154224902919e-06, + "loss": 25.0408, + "step": 403490 + }, + { + "epoch": 0.8150955287919618, + "grad_norm": 314.20062255859375, + "learning_rate": 1.0923976379059059e-06, + "loss": 23.2143, + "step": 403500 + }, + { + "epoch": 0.8151157294246456, + "grad_norm": 665.75244140625, + "learning_rate": 1.0921798723667015e-06, + "loss": 24.1577, + "step": 403510 + }, + { + "epoch": 0.8151359300573294, + "grad_norm": 496.9930419921875, + "learning_rate": 1.0919621258737384e-06, + "loss": 19.7391, + "step": 403520 + }, + { + "epoch": 0.8151561306900132, + "grad_norm": 489.5891418457031, + "learning_rate": 1.0917443984280769e-06, + "loss": 14.7234, + "step": 403530 + }, + { + "epoch": 0.8151763313226971, + "grad_norm": 342.9373474121094, + "learning_rate": 1.0915266900307785e-06, + "loss": 22.7523, + "step": 403540 + }, + { + "epoch": 0.8151965319553809, + "grad_norm": 365.2608337402344, + "learning_rate": 1.0913090006829085e-06, + "loss": 31.3453, + "step": 403550 + }, + { + "epoch": 0.8152167325880647, + "grad_norm": 8.63314437866211, + "learning_rate": 1.0910913303855208e-06, + "loss": 12.228, + "step": 403560 + }, + { + "epoch": 0.8152369332207485, + "grad_norm": 114.32124328613281, + "learning_rate": 1.0908736791396807e-06, + "loss": 10.0802, + "step": 403570 + }, + { + "epoch": 0.8152571338534322, + "grad_norm": 497.6786193847656, + "learning_rate": 1.0906560469464488e-06, + "loss": 16.794, + "step": 403580 + }, + { + "epoch": 0.8152773344861161, + "grad_norm": 617.8209838867188, + "learning_rate": 1.0904384338068856e-06, + "loss": 17.6674, + "step": 403590 + }, + { + "epoch": 0.8152975351187999, + "grad_norm": 416.3782043457031, + "learning_rate": 1.09022083972205e-06, + "loss": 14.1802, + "step": 403600 + }, + { + "epoch": 0.8153177357514837, + "grad_norm": 403.2692565917969, + "learning_rate": 1.090003264693003e-06, + "loss": 16.3625, + "step": 403610 + }, + { + "epoch": 0.8153379363841675, + "grad_norm": 113.26661682128906, + "learning_rate": 1.0897857087208098e-06, + "loss": 16.398, + "step": 403620 + }, + { + "epoch": 0.8153581370168513, + "grad_norm": 328.08807373046875, + "learning_rate": 1.0895681718065231e-06, + "loss": 21.6225, + "step": 403630 + }, + { + "epoch": 0.8153783376495352, + "grad_norm": 1026.9281005859375, + "learning_rate": 1.0893506539512071e-06, + "loss": 27.294, + "step": 403640 + }, + { + "epoch": 0.815398538282219, + "grad_norm": 102.67794799804688, + "learning_rate": 1.0891331551559237e-06, + "loss": 15.5267, + "step": 403650 + }, + { + "epoch": 0.8154187389149028, + "grad_norm": 361.410400390625, + "learning_rate": 1.0889156754217306e-06, + "loss": 17.3861, + "step": 403660 + }, + { + "epoch": 0.8154389395475866, + "grad_norm": 267.0630798339844, + "learning_rate": 1.0886982147496866e-06, + "loss": 22.795, + "step": 403670 + }, + { + "epoch": 0.8154591401802704, + "grad_norm": 220.8252716064453, + "learning_rate": 1.0884807731408542e-06, + "loss": 18.6385, + "step": 403680 + }, + { + "epoch": 0.8154793408129543, + "grad_norm": 350.0182800292969, + "learning_rate": 1.0882633505962924e-06, + "loss": 24.2768, + "step": 403690 + }, + { + "epoch": 0.8154995414456381, + "grad_norm": 535.690673828125, + "learning_rate": 1.0880459471170597e-06, + "loss": 25.3048, + "step": 403700 + }, + { + "epoch": 0.8155197420783219, + "grad_norm": 388.8749694824219, + "learning_rate": 1.0878285627042173e-06, + "loss": 17.7655, + "step": 403710 + }, + { + "epoch": 0.8155399427110057, + "grad_norm": 404.4278869628906, + "learning_rate": 1.0876111973588233e-06, + "loss": 15.9903, + "step": 403720 + }, + { + "epoch": 0.8155601433436895, + "grad_norm": 672.9110717773438, + "learning_rate": 1.0873938510819381e-06, + "loss": 23.4759, + "step": 403730 + }, + { + "epoch": 0.8155803439763734, + "grad_norm": 529.5740356445312, + "learning_rate": 1.0871765238746219e-06, + "loss": 24.3309, + "step": 403740 + }, + { + "epoch": 0.8156005446090572, + "grad_norm": 523.105224609375, + "learning_rate": 1.0869592157379305e-06, + "loss": 17.2011, + "step": 403750 + }, + { + "epoch": 0.815620745241741, + "grad_norm": 307.237060546875, + "learning_rate": 1.0867419266729274e-06, + "loss": 17.2972, + "step": 403760 + }, + { + "epoch": 0.8156409458744248, + "grad_norm": 136.25494384765625, + "learning_rate": 1.0865246566806676e-06, + "loss": 22.0616, + "step": 403770 + }, + { + "epoch": 0.8156611465071086, + "grad_norm": 69.41106414794922, + "learning_rate": 1.0863074057622136e-06, + "loss": 22.4569, + "step": 403780 + }, + { + "epoch": 0.8156813471397925, + "grad_norm": 529.0281982421875, + "learning_rate": 1.0860901739186209e-06, + "loss": 27.5257, + "step": 403790 + }, + { + "epoch": 0.8157015477724763, + "grad_norm": 33.62493896484375, + "learning_rate": 1.0858729611509516e-06, + "loss": 13.3982, + "step": 403800 + }, + { + "epoch": 0.8157217484051601, + "grad_norm": 362.707763671875, + "learning_rate": 1.085655767460263e-06, + "loss": 15.2563, + "step": 403810 + }, + { + "epoch": 0.8157419490378439, + "grad_norm": 314.35736083984375, + "learning_rate": 1.085438592847612e-06, + "loss": 19.942, + "step": 403820 + }, + { + "epoch": 0.8157621496705277, + "grad_norm": 461.19097900390625, + "learning_rate": 1.0852214373140596e-06, + "loss": 17.6933, + "step": 403830 + }, + { + "epoch": 0.8157823503032114, + "grad_norm": 484.7992248535156, + "learning_rate": 1.085004300860663e-06, + "loss": 22.1922, + "step": 403840 + }, + { + "epoch": 0.8158025509358953, + "grad_norm": 338.7452087402344, + "learning_rate": 1.0847871834884798e-06, + "loss": 20.6989, + "step": 403850 + }, + { + "epoch": 0.8158227515685791, + "grad_norm": 1.6299223899841309, + "learning_rate": 1.0845700851985686e-06, + "loss": 43.439, + "step": 403860 + }, + { + "epoch": 0.8158429522012629, + "grad_norm": 495.8385009765625, + "learning_rate": 1.084353005991991e-06, + "loss": 16.6965, + "step": 403870 + }, + { + "epoch": 0.8158631528339467, + "grad_norm": 245.8248291015625, + "learning_rate": 1.0841359458697986e-06, + "loss": 10.4521, + "step": 403880 + }, + { + "epoch": 0.8158833534666305, + "grad_norm": 452.2076416015625, + "learning_rate": 1.0839189048330534e-06, + "loss": 23.4567, + "step": 403890 + }, + { + "epoch": 0.8159035540993144, + "grad_norm": 383.6417541503906, + "learning_rate": 1.0837018828828133e-06, + "loss": 11.7336, + "step": 403900 + }, + { + "epoch": 0.8159237547319982, + "grad_norm": 503.1542053222656, + "learning_rate": 1.0834848800201358e-06, + "loss": 17.9203, + "step": 403910 + }, + { + "epoch": 0.815943955364682, + "grad_norm": 663.6284790039062, + "learning_rate": 1.0832678962460759e-06, + "loss": 13.4979, + "step": 403920 + }, + { + "epoch": 0.8159641559973658, + "grad_norm": 263.2528991699219, + "learning_rate": 1.0830509315616938e-06, + "loss": 8.2308, + "step": 403930 + }, + { + "epoch": 0.8159843566300496, + "grad_norm": 56.36683654785156, + "learning_rate": 1.0828339859680487e-06, + "loss": 10.5519, + "step": 403940 + }, + { + "epoch": 0.8160045572627335, + "grad_norm": 387.2181701660156, + "learning_rate": 1.0826170594661933e-06, + "loss": 19.104, + "step": 403950 + }, + { + "epoch": 0.8160247578954173, + "grad_norm": 483.9422302246094, + "learning_rate": 1.082400152057187e-06, + "loss": 22.9121, + "step": 403960 + }, + { + "epoch": 0.8160449585281011, + "grad_norm": 285.2896423339844, + "learning_rate": 1.0821832637420887e-06, + "loss": 12.4481, + "step": 403970 + }, + { + "epoch": 0.8160651591607849, + "grad_norm": 646.0913696289062, + "learning_rate": 1.0819663945219538e-06, + "loss": 12.5675, + "step": 403980 + }, + { + "epoch": 0.8160853597934687, + "grad_norm": 20.109582901000977, + "learning_rate": 1.0817495443978381e-06, + "loss": 11.5095, + "step": 403990 + }, + { + "epoch": 0.8161055604261526, + "grad_norm": 417.9738464355469, + "learning_rate": 1.0815327133708015e-06, + "loss": 28.713, + "step": 404000 + }, + { + "epoch": 0.8161257610588364, + "grad_norm": 544.0221557617188, + "learning_rate": 1.0813159014418995e-06, + "loss": 13.9935, + "step": 404010 + }, + { + "epoch": 0.8161459616915202, + "grad_norm": 352.82421875, + "learning_rate": 1.0810991086121863e-06, + "loss": 19.4792, + "step": 404020 + }, + { + "epoch": 0.816166162324204, + "grad_norm": 199.01919555664062, + "learning_rate": 1.080882334882723e-06, + "loss": 14.4945, + "step": 404030 + }, + { + "epoch": 0.8161863629568878, + "grad_norm": 382.7222900390625, + "learning_rate": 1.0806655802545617e-06, + "loss": 25.3046, + "step": 404040 + }, + { + "epoch": 0.8162065635895717, + "grad_norm": 108.6363296508789, + "learning_rate": 1.080448844728763e-06, + "loss": 15.9086, + "step": 404050 + }, + { + "epoch": 0.8162267642222555, + "grad_norm": 161.2914581298828, + "learning_rate": 1.0802321283063794e-06, + "loss": 11.3683, + "step": 404060 + }, + { + "epoch": 0.8162469648549393, + "grad_norm": 1235.232421875, + "learning_rate": 1.0800154309884708e-06, + "loss": 18.9475, + "step": 404070 + }, + { + "epoch": 0.8162671654876231, + "grad_norm": 730.6261596679688, + "learning_rate": 1.07979875277609e-06, + "loss": 18.1601, + "step": 404080 + }, + { + "epoch": 0.8162873661203068, + "grad_norm": 498.7947998046875, + "learning_rate": 1.079582093670296e-06, + "loss": 30.4366, + "step": 404090 + }, + { + "epoch": 0.8163075667529907, + "grad_norm": 422.489013671875, + "learning_rate": 1.0793654536721432e-06, + "loss": 24.6204, + "step": 404100 + }, + { + "epoch": 0.8163277673856745, + "grad_norm": 469.9093322753906, + "learning_rate": 1.0791488327826865e-06, + "loss": 13.0858, + "step": 404110 + }, + { + "epoch": 0.8163479680183583, + "grad_norm": 302.9147033691406, + "learning_rate": 1.0789322310029842e-06, + "loss": 18.3818, + "step": 404120 + }, + { + "epoch": 0.8163681686510421, + "grad_norm": 20.856220245361328, + "learning_rate": 1.0787156483340905e-06, + "loss": 35.7565, + "step": 404130 + }, + { + "epoch": 0.8163883692837259, + "grad_norm": 424.981689453125, + "learning_rate": 1.07849908477706e-06, + "loss": 14.2802, + "step": 404140 + }, + { + "epoch": 0.8164085699164098, + "grad_norm": 258.5683898925781, + "learning_rate": 1.0782825403329488e-06, + "loss": 10.4218, + "step": 404150 + }, + { + "epoch": 0.8164287705490936, + "grad_norm": 80.01229858398438, + "learning_rate": 1.0780660150028161e-06, + "loss": 16.5788, + "step": 404160 + }, + { + "epoch": 0.8164489711817774, + "grad_norm": 496.80877685546875, + "learning_rate": 1.077849508787711e-06, + "loss": 17.968, + "step": 404170 + }, + { + "epoch": 0.8164691718144612, + "grad_norm": 243.45626831054688, + "learning_rate": 1.077633021688692e-06, + "loss": 12.9528, + "step": 404180 + }, + { + "epoch": 0.816489372447145, + "grad_norm": 1015.302001953125, + "learning_rate": 1.0774165537068154e-06, + "loss": 23.2021, + "step": 404190 + }, + { + "epoch": 0.8165095730798289, + "grad_norm": 326.9949951171875, + "learning_rate": 1.077200104843134e-06, + "loss": 21.9942, + "step": 404200 + }, + { + "epoch": 0.8165297737125127, + "grad_norm": 580.8416137695312, + "learning_rate": 1.0769836750987028e-06, + "loss": 22.5684, + "step": 404210 + }, + { + "epoch": 0.8165499743451965, + "grad_norm": 247.5938720703125, + "learning_rate": 1.0767672644745774e-06, + "loss": 22.6101, + "step": 404220 + }, + { + "epoch": 0.8165701749778803, + "grad_norm": 343.797119140625, + "learning_rate": 1.076550872971815e-06, + "loss": 18.3597, + "step": 404230 + }, + { + "epoch": 0.8165903756105641, + "grad_norm": 482.3746337890625, + "learning_rate": 1.0763345005914649e-06, + "loss": 22.0443, + "step": 404240 + }, + { + "epoch": 0.816610576243248, + "grad_norm": 80.26362609863281, + "learning_rate": 1.076118147334585e-06, + "loss": 25.7038, + "step": 404250 + }, + { + "epoch": 0.8166307768759318, + "grad_norm": 228.49267578125, + "learning_rate": 1.0759018132022302e-06, + "loss": 16.9956, + "step": 404260 + }, + { + "epoch": 0.8166509775086156, + "grad_norm": 614.4641723632812, + "learning_rate": 1.0756854981954546e-06, + "loss": 17.7355, + "step": 404270 + }, + { + "epoch": 0.8166711781412994, + "grad_norm": 381.8021240234375, + "learning_rate": 1.0754692023153101e-06, + "loss": 12.6121, + "step": 404280 + }, + { + "epoch": 0.8166913787739832, + "grad_norm": 364.3545837402344, + "learning_rate": 1.0752529255628542e-06, + "loss": 12.8285, + "step": 404290 + }, + { + "epoch": 0.816711579406667, + "grad_norm": 631.591064453125, + "learning_rate": 1.0750366679391393e-06, + "loss": 33.1447, + "step": 404300 + }, + { + "epoch": 0.8167317800393509, + "grad_norm": 119.57291412353516, + "learning_rate": 1.0748204294452187e-06, + "loss": 14.9711, + "step": 404310 + }, + { + "epoch": 0.8167519806720347, + "grad_norm": 201.0452423095703, + "learning_rate": 1.0746042100821485e-06, + "loss": 19.1192, + "step": 404320 + }, + { + "epoch": 0.8167721813047185, + "grad_norm": 16.87002944946289, + "learning_rate": 1.0743880098509802e-06, + "loss": 10.1328, + "step": 404330 + }, + { + "epoch": 0.8167923819374023, + "grad_norm": 395.2453918457031, + "learning_rate": 1.074171828752769e-06, + "loss": 17.627, + "step": 404340 + }, + { + "epoch": 0.816812582570086, + "grad_norm": 385.77197265625, + "learning_rate": 1.0739556667885692e-06, + "loss": 25.4675, + "step": 404350 + }, + { + "epoch": 0.8168327832027699, + "grad_norm": 399.2333984375, + "learning_rate": 1.0737395239594318e-06, + "loss": 14.6198, + "step": 404360 + }, + { + "epoch": 0.8168529838354537, + "grad_norm": 221.7762908935547, + "learning_rate": 1.0735234002664123e-06, + "loss": 15.0746, + "step": 404370 + }, + { + "epoch": 0.8168731844681375, + "grad_norm": 556.7322387695312, + "learning_rate": 1.0733072957105633e-06, + "loss": 16.0611, + "step": 404380 + }, + { + "epoch": 0.8168933851008213, + "grad_norm": 318.266845703125, + "learning_rate": 1.0730912102929392e-06, + "loss": 14.5884, + "step": 404390 + }, + { + "epoch": 0.8169135857335051, + "grad_norm": 185.9705352783203, + "learning_rate": 1.0728751440145907e-06, + "loss": 6.693, + "step": 404400 + }, + { + "epoch": 0.816933786366189, + "grad_norm": 246.94725036621094, + "learning_rate": 1.0726590968765738e-06, + "loss": 14.6006, + "step": 404410 + }, + { + "epoch": 0.8169539869988728, + "grad_norm": 938.7547607421875, + "learning_rate": 1.0724430688799402e-06, + "loss": 15.6411, + "step": 404420 + }, + { + "epoch": 0.8169741876315566, + "grad_norm": 113.59290313720703, + "learning_rate": 1.0722270600257411e-06, + "loss": 25.6076, + "step": 404430 + }, + { + "epoch": 0.8169943882642404, + "grad_norm": 1010.087646484375, + "learning_rate": 1.0720110703150327e-06, + "loss": 28.7306, + "step": 404440 + }, + { + "epoch": 0.8170145888969242, + "grad_norm": 516.1767578125, + "learning_rate": 1.0717950997488662e-06, + "loss": 12.0093, + "step": 404450 + }, + { + "epoch": 0.8170347895296081, + "grad_norm": 119.9769287109375, + "learning_rate": 1.0715791483282922e-06, + "loss": 26.2367, + "step": 404460 + }, + { + "epoch": 0.8170549901622919, + "grad_norm": 882.0177612304688, + "learning_rate": 1.0713632160543647e-06, + "loss": 20.4992, + "step": 404470 + }, + { + "epoch": 0.8170751907949757, + "grad_norm": 547.028076171875, + "learning_rate": 1.0711473029281394e-06, + "loss": 13.6764, + "step": 404480 + }, + { + "epoch": 0.8170953914276595, + "grad_norm": 143.71363830566406, + "learning_rate": 1.0709314089506634e-06, + "loss": 17.0387, + "step": 404490 + }, + { + "epoch": 0.8171155920603433, + "grad_norm": 488.9263000488281, + "learning_rate": 1.0707155341229902e-06, + "loss": 16.6229, + "step": 404500 + }, + { + "epoch": 0.8171357926930272, + "grad_norm": 382.60723876953125, + "learning_rate": 1.0704996784461753e-06, + "loss": 30.174, + "step": 404510 + }, + { + "epoch": 0.817155993325711, + "grad_norm": 285.2334289550781, + "learning_rate": 1.0702838419212674e-06, + "loss": 19.4932, + "step": 404520 + }, + { + "epoch": 0.8171761939583948, + "grad_norm": 410.0068664550781, + "learning_rate": 1.0700680245493188e-06, + "loss": 19.0309, + "step": 404530 + }, + { + "epoch": 0.8171963945910786, + "grad_norm": 619.8792724609375, + "learning_rate": 1.0698522263313816e-06, + "loss": 11.0317, + "step": 404540 + }, + { + "epoch": 0.8172165952237624, + "grad_norm": 278.36474609375, + "learning_rate": 1.0696364472685112e-06, + "loss": 9.48, + "step": 404550 + }, + { + "epoch": 0.8172367958564463, + "grad_norm": 166.591796875, + "learning_rate": 1.069420687361753e-06, + "loss": 9.1543, + "step": 404560 + }, + { + "epoch": 0.8172569964891301, + "grad_norm": 264.59930419921875, + "learning_rate": 1.0692049466121613e-06, + "loss": 11.8805, + "step": 404570 + }, + { + "epoch": 0.8172771971218139, + "grad_norm": 520.3172607421875, + "learning_rate": 1.0689892250207894e-06, + "loss": 16.2525, + "step": 404580 + }, + { + "epoch": 0.8172973977544977, + "grad_norm": 450.2430114746094, + "learning_rate": 1.0687735225886874e-06, + "loss": 24.1585, + "step": 404590 + }, + { + "epoch": 0.8173175983871814, + "grad_norm": 769.260009765625, + "learning_rate": 1.0685578393169054e-06, + "loss": 23.6105, + "step": 404600 + }, + { + "epoch": 0.8173377990198653, + "grad_norm": 858.8841552734375, + "learning_rate": 1.068342175206497e-06, + "loss": 15.8296, + "step": 404610 + }, + { + "epoch": 0.8173579996525491, + "grad_norm": 1134.909912109375, + "learning_rate": 1.0681265302585114e-06, + "loss": 28.3626, + "step": 404620 + }, + { + "epoch": 0.8173782002852329, + "grad_norm": 487.4468688964844, + "learning_rate": 1.0679109044739994e-06, + "loss": 12.7139, + "step": 404630 + }, + { + "epoch": 0.8173984009179167, + "grad_norm": 429.1744079589844, + "learning_rate": 1.067695297854014e-06, + "loss": 29.3317, + "step": 404640 + }, + { + "epoch": 0.8174186015506005, + "grad_norm": 230.45068359375, + "learning_rate": 1.0674797103996033e-06, + "loss": 11.5221, + "step": 404650 + }, + { + "epoch": 0.8174388021832844, + "grad_norm": 285.8327941894531, + "learning_rate": 1.0672641421118214e-06, + "loss": 15.1606, + "step": 404660 + }, + { + "epoch": 0.8174590028159682, + "grad_norm": 595.5794677734375, + "learning_rate": 1.067048592991715e-06, + "loss": 16.7449, + "step": 404670 + }, + { + "epoch": 0.817479203448652, + "grad_norm": 312.82275390625, + "learning_rate": 1.0668330630403383e-06, + "loss": 21.7909, + "step": 404680 + }, + { + "epoch": 0.8174994040813358, + "grad_norm": 646.6598510742188, + "learning_rate": 1.0666175522587402e-06, + "loss": 14.1596, + "step": 404690 + }, + { + "epoch": 0.8175196047140196, + "grad_norm": 343.3512268066406, + "learning_rate": 1.0664020606479702e-06, + "loss": 18.2318, + "step": 404700 + }, + { + "epoch": 0.8175398053467035, + "grad_norm": 286.7987060546875, + "learning_rate": 1.0661865882090805e-06, + "loss": 14.2659, + "step": 404710 + }, + { + "epoch": 0.8175600059793873, + "grad_norm": 522.711181640625, + "learning_rate": 1.0659711349431184e-06, + "loss": 16.0903, + "step": 404720 + }, + { + "epoch": 0.8175802066120711, + "grad_norm": 211.34999084472656, + "learning_rate": 1.0657557008511377e-06, + "loss": 15.7913, + "step": 404730 + }, + { + "epoch": 0.8176004072447549, + "grad_norm": 293.5294494628906, + "learning_rate": 1.0655402859341868e-06, + "loss": 21.6211, + "step": 404740 + }, + { + "epoch": 0.8176206078774387, + "grad_norm": 131.2279815673828, + "learning_rate": 1.065324890193314e-06, + "loss": 14.5233, + "step": 404750 + }, + { + "epoch": 0.8176408085101226, + "grad_norm": 132.0124969482422, + "learning_rate": 1.0651095136295713e-06, + "loss": 26.1818, + "step": 404760 + }, + { + "epoch": 0.8176610091428064, + "grad_norm": 258.7015686035156, + "learning_rate": 1.064894156244008e-06, + "loss": 60.9527, + "step": 404770 + }, + { + "epoch": 0.8176812097754902, + "grad_norm": 0.49908149242401123, + "learning_rate": 1.0646788180376716e-06, + "loss": 18.8364, + "step": 404780 + }, + { + "epoch": 0.817701410408174, + "grad_norm": 522.22265625, + "learning_rate": 1.0644634990116132e-06, + "loss": 17.0823, + "step": 404790 + }, + { + "epoch": 0.8177216110408578, + "grad_norm": 166.57650756835938, + "learning_rate": 1.064248199166884e-06, + "loss": 20.4954, + "step": 404800 + }, + { + "epoch": 0.8177418116735417, + "grad_norm": 406.01080322265625, + "learning_rate": 1.0640329185045323e-06, + "loss": 24.5166, + "step": 404810 + }, + { + "epoch": 0.8177620123062255, + "grad_norm": 643.4286499023438, + "learning_rate": 1.0638176570256048e-06, + "loss": 16.3062, + "step": 404820 + }, + { + "epoch": 0.8177822129389093, + "grad_norm": 501.24493408203125, + "learning_rate": 1.0636024147311524e-06, + "loss": 34.6792, + "step": 404830 + }, + { + "epoch": 0.8178024135715931, + "grad_norm": 317.7840270996094, + "learning_rate": 1.0633871916222277e-06, + "loss": 17.0821, + "step": 404840 + }, + { + "epoch": 0.8178226142042769, + "grad_norm": 754.8977661132812, + "learning_rate": 1.0631719876998736e-06, + "loss": 13.5358, + "step": 404850 + }, + { + "epoch": 0.8178428148369606, + "grad_norm": 312.731201171875, + "learning_rate": 1.0629568029651416e-06, + "loss": 8.0272, + "step": 404860 + }, + { + "epoch": 0.8178630154696445, + "grad_norm": 443.60174560546875, + "learning_rate": 1.0627416374190818e-06, + "loss": 18.4988, + "step": 404870 + }, + { + "epoch": 0.8178832161023283, + "grad_norm": 319.082275390625, + "learning_rate": 1.062526491062742e-06, + "loss": 14.2685, + "step": 404880 + }, + { + "epoch": 0.8179034167350121, + "grad_norm": 471.2208251953125, + "learning_rate": 1.0623113638971688e-06, + "loss": 19.6446, + "step": 404890 + }, + { + "epoch": 0.8179236173676959, + "grad_norm": 170.76010131835938, + "learning_rate": 1.0620962559234144e-06, + "loss": 6.4314, + "step": 404900 + }, + { + "epoch": 0.8179438180003797, + "grad_norm": 688.2703857421875, + "learning_rate": 1.0618811671425244e-06, + "loss": 20.1725, + "step": 404910 + }, + { + "epoch": 0.8179640186330636, + "grad_norm": 306.8644714355469, + "learning_rate": 1.0616660975555476e-06, + "loss": 11.3003, + "step": 404920 + }, + { + "epoch": 0.8179842192657474, + "grad_norm": 526.6558227539062, + "learning_rate": 1.0614510471635332e-06, + "loss": 21.5544, + "step": 404930 + }, + { + "epoch": 0.8180044198984312, + "grad_norm": 33.974609375, + "learning_rate": 1.0612360159675278e-06, + "loss": 17.053, + "step": 404940 + }, + { + "epoch": 0.818024620531115, + "grad_norm": 545.7177124023438, + "learning_rate": 1.0610210039685815e-06, + "loss": 42.8789, + "step": 404950 + }, + { + "epoch": 0.8180448211637988, + "grad_norm": 299.2607727050781, + "learning_rate": 1.0608060111677409e-06, + "loss": 24.8136, + "step": 404960 + }, + { + "epoch": 0.8180650217964827, + "grad_norm": 329.0627136230469, + "learning_rate": 1.0605910375660527e-06, + "loss": 23.0713, + "step": 404970 + }, + { + "epoch": 0.8180852224291665, + "grad_norm": 301.731201171875, + "learning_rate": 1.0603760831645677e-06, + "loss": 20.3733, + "step": 404980 + }, + { + "epoch": 0.8181054230618503, + "grad_norm": 68.4891586303711, + "learning_rate": 1.0601611479643303e-06, + "loss": 24.1063, + "step": 404990 + }, + { + "epoch": 0.8181256236945341, + "grad_norm": 294.00238037109375, + "learning_rate": 1.0599462319663906e-06, + "loss": 12.5412, + "step": 405000 + }, + { + "epoch": 0.8181458243272179, + "grad_norm": 380.47210693359375, + "learning_rate": 1.0597313351717942e-06, + "loss": 24.804, + "step": 405010 + }, + { + "epoch": 0.8181660249599018, + "grad_norm": 228.90867614746094, + "learning_rate": 1.0595164575815909e-06, + "loss": 24.4986, + "step": 405020 + }, + { + "epoch": 0.8181862255925856, + "grad_norm": 764.163818359375, + "learning_rate": 1.0593015991968258e-06, + "loss": 17.5265, + "step": 405030 + }, + { + "epoch": 0.8182064262252694, + "grad_norm": 176.84999084472656, + "learning_rate": 1.0590867600185462e-06, + "loss": 16.077, + "step": 405040 + }, + { + "epoch": 0.8182266268579532, + "grad_norm": 328.25531005859375, + "learning_rate": 1.0588719400478004e-06, + "loss": 24.8989, + "step": 405050 + }, + { + "epoch": 0.818246827490637, + "grad_norm": 22.21516227722168, + "learning_rate": 1.0586571392856354e-06, + "loss": 13.0133, + "step": 405060 + }, + { + "epoch": 0.8182670281233209, + "grad_norm": 229.92356872558594, + "learning_rate": 1.0584423577330955e-06, + "loss": 16.3715, + "step": 405070 + }, + { + "epoch": 0.8182872287560047, + "grad_norm": 277.6341857910156, + "learning_rate": 1.0582275953912296e-06, + "loss": 19.9553, + "step": 405080 + }, + { + "epoch": 0.8183074293886885, + "grad_norm": 303.4682922363281, + "learning_rate": 1.0580128522610872e-06, + "loss": 10.4468, + "step": 405090 + }, + { + "epoch": 0.8183276300213723, + "grad_norm": 276.90863037109375, + "learning_rate": 1.0577981283437095e-06, + "loss": 27.1738, + "step": 405100 + }, + { + "epoch": 0.818347830654056, + "grad_norm": 449.3738098144531, + "learning_rate": 1.0575834236401455e-06, + "loss": 16.6618, + "step": 405110 + }, + { + "epoch": 0.8183680312867398, + "grad_norm": 418.2468566894531, + "learning_rate": 1.057368738151443e-06, + "loss": 18.0085, + "step": 405120 + }, + { + "epoch": 0.8183882319194237, + "grad_norm": 276.5181579589844, + "learning_rate": 1.0571540718786471e-06, + "loss": 21.9944, + "step": 405130 + }, + { + "epoch": 0.8184084325521075, + "grad_norm": 215.91197204589844, + "learning_rate": 1.0569394248228026e-06, + "loss": 22.492, + "step": 405140 + }, + { + "epoch": 0.8184286331847913, + "grad_norm": 284.6844787597656, + "learning_rate": 1.0567247969849576e-06, + "loss": 19.5159, + "step": 405150 + }, + { + "epoch": 0.8184488338174751, + "grad_norm": 509.63507080078125, + "learning_rate": 1.05651018836616e-06, + "loss": 13.3617, + "step": 405160 + }, + { + "epoch": 0.818469034450159, + "grad_norm": 59.847103118896484, + "learning_rate": 1.0562955989674506e-06, + "loss": 16.318, + "step": 405170 + }, + { + "epoch": 0.8184892350828428, + "grad_norm": 612.8087768554688, + "learning_rate": 1.0560810287898783e-06, + "loss": 17.2715, + "step": 405180 + }, + { + "epoch": 0.8185094357155266, + "grad_norm": 515.5489501953125, + "learning_rate": 1.05586647783449e-06, + "loss": 10.0786, + "step": 405190 + }, + { + "epoch": 0.8185296363482104, + "grad_norm": 211.81011962890625, + "learning_rate": 1.0556519461023301e-06, + "loss": 10.0412, + "step": 405200 + }, + { + "epoch": 0.8185498369808942, + "grad_norm": 277.3237609863281, + "learning_rate": 1.0554374335944429e-06, + "loss": 21.2124, + "step": 405210 + }, + { + "epoch": 0.818570037613578, + "grad_norm": 16.84105682373047, + "learning_rate": 1.055222940311877e-06, + "loss": 13.1539, + "step": 405220 + }, + { + "epoch": 0.8185902382462619, + "grad_norm": 501.70941162109375, + "learning_rate": 1.0550084662556753e-06, + "loss": 18.1266, + "step": 405230 + }, + { + "epoch": 0.8186104388789457, + "grad_norm": 722.0267944335938, + "learning_rate": 1.0547940114268828e-06, + "loss": 27.4088, + "step": 405240 + }, + { + "epoch": 0.8186306395116295, + "grad_norm": 510.4221496582031, + "learning_rate": 1.0545795758265476e-06, + "loss": 18.9063, + "step": 405250 + }, + { + "epoch": 0.8186508401443133, + "grad_norm": 469.6507873535156, + "learning_rate": 1.0543651594557113e-06, + "loss": 23.375, + "step": 405260 + }, + { + "epoch": 0.8186710407769971, + "grad_norm": 726.9376831054688, + "learning_rate": 1.0541507623154218e-06, + "loss": 18.6523, + "step": 405270 + }, + { + "epoch": 0.818691241409681, + "grad_norm": 549.9287719726562, + "learning_rate": 1.0539363844067218e-06, + "loss": 12.9811, + "step": 405280 + }, + { + "epoch": 0.8187114420423648, + "grad_norm": 445.047119140625, + "learning_rate": 1.053722025730659e-06, + "loss": 22.0171, + "step": 405290 + }, + { + "epoch": 0.8187316426750486, + "grad_norm": 396.04705810546875, + "learning_rate": 1.053507686288276e-06, + "loss": 13.1831, + "step": 405300 + }, + { + "epoch": 0.8187518433077324, + "grad_norm": 203.05210876464844, + "learning_rate": 1.0532933660806166e-06, + "loss": 15.9318, + "step": 405310 + }, + { + "epoch": 0.8187720439404162, + "grad_norm": 362.0765686035156, + "learning_rate": 1.053079065108728e-06, + "loss": 18.4131, + "step": 405320 + }, + { + "epoch": 0.8187922445731001, + "grad_norm": 342.76129150390625, + "learning_rate": 1.0528647833736516e-06, + "loss": 7.8498, + "step": 405330 + }, + { + "epoch": 0.8188124452057839, + "grad_norm": 172.86709594726562, + "learning_rate": 1.0526505208764353e-06, + "loss": 14.6202, + "step": 405340 + }, + { + "epoch": 0.8188326458384677, + "grad_norm": 442.9986572265625, + "learning_rate": 1.052436277618122e-06, + "loss": 19.5566, + "step": 405350 + }, + { + "epoch": 0.8188528464711515, + "grad_norm": 801.3681030273438, + "learning_rate": 1.0522220535997534e-06, + "loss": 12.3308, + "step": 405360 + }, + { + "epoch": 0.8188730471038352, + "grad_norm": 31.05193519592285, + "learning_rate": 1.0520078488223772e-06, + "loss": 19.1345, + "step": 405370 + }, + { + "epoch": 0.8188932477365191, + "grad_norm": 49.77863311767578, + "learning_rate": 1.0517936632870362e-06, + "loss": 15.6791, + "step": 405380 + }, + { + "epoch": 0.8189134483692029, + "grad_norm": 21.847225189208984, + "learning_rate": 1.0515794969947724e-06, + "loss": 11.8785, + "step": 405390 + }, + { + "epoch": 0.8189336490018867, + "grad_norm": 493.5738525390625, + "learning_rate": 1.0513653499466315e-06, + "loss": 13.4028, + "step": 405400 + }, + { + "epoch": 0.8189538496345705, + "grad_norm": 441.8828125, + "learning_rate": 1.0511512221436581e-06, + "loss": 14.2989, + "step": 405410 + }, + { + "epoch": 0.8189740502672543, + "grad_norm": 462.55963134765625, + "learning_rate": 1.0509371135868945e-06, + "loss": 14.1361, + "step": 405420 + }, + { + "epoch": 0.8189942508999382, + "grad_norm": 739.9083862304688, + "learning_rate": 1.0507230242773836e-06, + "loss": 29.2888, + "step": 405430 + }, + { + "epoch": 0.819014451532622, + "grad_norm": 546.8694458007812, + "learning_rate": 1.0505089542161707e-06, + "loss": 16.9196, + "step": 405440 + }, + { + "epoch": 0.8190346521653058, + "grad_norm": 196.59988403320312, + "learning_rate": 1.0502949034042985e-06, + "loss": 20.0, + "step": 405450 + }, + { + "epoch": 0.8190548527979896, + "grad_norm": 31.419471740722656, + "learning_rate": 1.050080871842808e-06, + "loss": 11.916, + "step": 405460 + }, + { + "epoch": 0.8190750534306734, + "grad_norm": 297.56781005859375, + "learning_rate": 1.0498668595327448e-06, + "loss": 10.3404, + "step": 405470 + }, + { + "epoch": 0.8190952540633573, + "grad_norm": 281.243896484375, + "learning_rate": 1.0496528664751527e-06, + "loss": 11.0291, + "step": 405480 + }, + { + "epoch": 0.8191154546960411, + "grad_norm": 215.0363311767578, + "learning_rate": 1.049438892671073e-06, + "loss": 8.4872, + "step": 405490 + }, + { + "epoch": 0.8191356553287249, + "grad_norm": 351.2985534667969, + "learning_rate": 1.049224938121548e-06, + "loss": 11.865, + "step": 405500 + }, + { + "epoch": 0.8191558559614087, + "grad_norm": 323.0808410644531, + "learning_rate": 1.049011002827623e-06, + "loss": 28.4416, + "step": 405510 + }, + { + "epoch": 0.8191760565940925, + "grad_norm": 647.8573608398438, + "learning_rate": 1.0487970867903385e-06, + "loss": 14.6466, + "step": 405520 + }, + { + "epoch": 0.8191962572267764, + "grad_norm": 557.99072265625, + "learning_rate": 1.0485831900107368e-06, + "loss": 21.3212, + "step": 405530 + }, + { + "epoch": 0.8192164578594602, + "grad_norm": 197.75636291503906, + "learning_rate": 1.0483693124898631e-06, + "loss": 21.2272, + "step": 405540 + }, + { + "epoch": 0.819236658492144, + "grad_norm": 474.7149353027344, + "learning_rate": 1.0481554542287565e-06, + "loss": 18.4791, + "step": 405550 + }, + { + "epoch": 0.8192568591248278, + "grad_norm": 1080.687255859375, + "learning_rate": 1.0479416152284622e-06, + "loss": 20.9776, + "step": 405560 + }, + { + "epoch": 0.8192770597575116, + "grad_norm": 157.8067626953125, + "learning_rate": 1.0477277954900194e-06, + "loss": 20.7023, + "step": 405570 + }, + { + "epoch": 0.8192972603901955, + "grad_norm": 242.31314086914062, + "learning_rate": 1.047513995014474e-06, + "loss": 14.757, + "step": 405580 + }, + { + "epoch": 0.8193174610228793, + "grad_norm": 126.24282836914062, + "learning_rate": 1.0473002138028654e-06, + "loss": 25.1427, + "step": 405590 + }, + { + "epoch": 0.8193376616555631, + "grad_norm": 13.731419563293457, + "learning_rate": 1.047086451856235e-06, + "loss": 6.8258, + "step": 405600 + }, + { + "epoch": 0.8193578622882469, + "grad_norm": 394.7142333984375, + "learning_rate": 1.0468727091756275e-06, + "loss": 19.3574, + "step": 405610 + }, + { + "epoch": 0.8193780629209307, + "grad_norm": 895.7384643554688, + "learning_rate": 1.0466589857620813e-06, + "loss": 19.1313, + "step": 405620 + }, + { + "epoch": 0.8193982635536144, + "grad_norm": 374.3149719238281, + "learning_rate": 1.0464452816166416e-06, + "loss": 17.0161, + "step": 405630 + }, + { + "epoch": 0.8194184641862983, + "grad_norm": 1000.359130859375, + "learning_rate": 1.0462315967403475e-06, + "loss": 22.6076, + "step": 405640 + }, + { + "epoch": 0.8194386648189821, + "grad_norm": 472.3176574707031, + "learning_rate": 1.0460179311342394e-06, + "loss": 18.4308, + "step": 405650 + }, + { + "epoch": 0.8194588654516659, + "grad_norm": 121.66295623779297, + "learning_rate": 1.0458042847993627e-06, + "loss": 27.6165, + "step": 405660 + }, + { + "epoch": 0.8194790660843497, + "grad_norm": 315.0002746582031, + "learning_rate": 1.0455906577367553e-06, + "loss": 17.5618, + "step": 405670 + }, + { + "epoch": 0.8194992667170335, + "grad_norm": 223.4075927734375, + "learning_rate": 1.0453770499474585e-06, + "loss": 21.8781, + "step": 405680 + }, + { + "epoch": 0.8195194673497174, + "grad_norm": 386.3526611328125, + "learning_rate": 1.0451634614325146e-06, + "loss": 15.8846, + "step": 405690 + }, + { + "epoch": 0.8195396679824012, + "grad_norm": 223.708984375, + "learning_rate": 1.0449498921929669e-06, + "loss": 13.9674, + "step": 405700 + }, + { + "epoch": 0.819559868615085, + "grad_norm": 194.04403686523438, + "learning_rate": 1.0447363422298507e-06, + "loss": 19.5599, + "step": 405710 + }, + { + "epoch": 0.8195800692477688, + "grad_norm": 119.22209930419922, + "learning_rate": 1.0445228115442102e-06, + "loss": 11.4453, + "step": 405720 + }, + { + "epoch": 0.8196002698804526, + "grad_norm": 268.7986145019531, + "learning_rate": 1.044309300137087e-06, + "loss": 12.7573, + "step": 405730 + }, + { + "epoch": 0.8196204705131365, + "grad_norm": 89.77639770507812, + "learning_rate": 1.0440958080095204e-06, + "loss": 10.0482, + "step": 405740 + }, + { + "epoch": 0.8196406711458203, + "grad_norm": 539.5626220703125, + "learning_rate": 1.04388233516255e-06, + "loss": 19.2888, + "step": 405750 + }, + { + "epoch": 0.8196608717785041, + "grad_norm": 179.42356872558594, + "learning_rate": 1.0436688815972168e-06, + "loss": 17.396, + "step": 405760 + }, + { + "epoch": 0.8196810724111879, + "grad_norm": 503.5247802734375, + "learning_rate": 1.0434554473145646e-06, + "loss": 16.8302, + "step": 405770 + }, + { + "epoch": 0.8197012730438717, + "grad_norm": 389.95904541015625, + "learning_rate": 1.0432420323156284e-06, + "loss": 18.0237, + "step": 405780 + }, + { + "epoch": 0.8197214736765556, + "grad_norm": 98.8677978515625, + "learning_rate": 1.0430286366014496e-06, + "loss": 17.0766, + "step": 405790 + }, + { + "epoch": 0.8197416743092394, + "grad_norm": 269.2354431152344, + "learning_rate": 1.0428152601730718e-06, + "loss": 25.5945, + "step": 405800 + }, + { + "epoch": 0.8197618749419232, + "grad_norm": 229.5015106201172, + "learning_rate": 1.0426019030315314e-06, + "loss": 15.0495, + "step": 405810 + }, + { + "epoch": 0.819782075574607, + "grad_norm": 477.2650146484375, + "learning_rate": 1.0423885651778688e-06, + "loss": 8.8372, + "step": 405820 + }, + { + "epoch": 0.8198022762072908, + "grad_norm": 599.321533203125, + "learning_rate": 1.0421752466131258e-06, + "loss": 19.5767, + "step": 405830 + }, + { + "epoch": 0.8198224768399747, + "grad_norm": 481.2432556152344, + "learning_rate": 1.0419619473383402e-06, + "loss": 23.8262, + "step": 405840 + }, + { + "epoch": 0.8198426774726585, + "grad_norm": 330.3309631347656, + "learning_rate": 1.0417486673545508e-06, + "loss": 16.1757, + "step": 405850 + }, + { + "epoch": 0.8198628781053423, + "grad_norm": 614.2745971679688, + "learning_rate": 1.0415354066627993e-06, + "loss": 10.8526, + "step": 405860 + }, + { + "epoch": 0.8198830787380261, + "grad_norm": 240.1322479248047, + "learning_rate": 1.041322165264123e-06, + "loss": 23.1328, + "step": 405870 + }, + { + "epoch": 0.8199032793707098, + "grad_norm": 213.60418701171875, + "learning_rate": 1.0411089431595639e-06, + "loss": 26.3818, + "step": 405880 + }, + { + "epoch": 0.8199234800033937, + "grad_norm": 461.4310607910156, + "learning_rate": 1.040895740350158e-06, + "loss": 20.5856, + "step": 405890 + }, + { + "epoch": 0.8199436806360775, + "grad_norm": 249.21217346191406, + "learning_rate": 1.0406825568369478e-06, + "loss": 12.6341, + "step": 405900 + }, + { + "epoch": 0.8199638812687613, + "grad_norm": 519.0851440429688, + "learning_rate": 1.0404693926209702e-06, + "loss": 17.4954, + "step": 405910 + }, + { + "epoch": 0.8199840819014451, + "grad_norm": 582.5599975585938, + "learning_rate": 1.0402562477032635e-06, + "loss": 25.6814, + "step": 405920 + }, + { + "epoch": 0.8200042825341289, + "grad_norm": 310.9371337890625, + "learning_rate": 1.0400431220848688e-06, + "loss": 25.2741, + "step": 405930 + }, + { + "epoch": 0.8200244831668128, + "grad_norm": 643.0136108398438, + "learning_rate": 1.0398300157668222e-06, + "loss": 19.6303, + "step": 405940 + }, + { + "epoch": 0.8200446837994966, + "grad_norm": 202.9320526123047, + "learning_rate": 1.0396169287501652e-06, + "loss": 17.305, + "step": 405950 + }, + { + "epoch": 0.8200648844321804, + "grad_norm": 605.00634765625, + "learning_rate": 1.0394038610359352e-06, + "loss": 19.8948, + "step": 405960 + }, + { + "epoch": 0.8200850850648642, + "grad_norm": 91.46730041503906, + "learning_rate": 1.0391908126251688e-06, + "loss": 12.6257, + "step": 405970 + }, + { + "epoch": 0.820105285697548, + "grad_norm": 1556.552734375, + "learning_rate": 1.0389777835189075e-06, + "loss": 16.1427, + "step": 405980 + }, + { + "epoch": 0.8201254863302319, + "grad_norm": 220.1592254638672, + "learning_rate": 1.0387647737181877e-06, + "loss": 15.6865, + "step": 405990 + }, + { + "epoch": 0.8201456869629157, + "grad_norm": 589.5980834960938, + "learning_rate": 1.0385517832240472e-06, + "loss": 19.6536, + "step": 406000 + }, + { + "epoch": 0.8201658875955995, + "grad_norm": 489.4096374511719, + "learning_rate": 1.0383388120375242e-06, + "loss": 19.5343, + "step": 406010 + }, + { + "epoch": 0.8201860882282833, + "grad_norm": 311.42254638671875, + "learning_rate": 1.0381258601596594e-06, + "loss": 9.4658, + "step": 406020 + }, + { + "epoch": 0.8202062888609671, + "grad_norm": 356.68560791015625, + "learning_rate": 1.0379129275914878e-06, + "loss": 27.3836, + "step": 406030 + }, + { + "epoch": 0.820226489493651, + "grad_norm": 298.08416748046875, + "learning_rate": 1.037700014334047e-06, + "loss": 21.9449, + "step": 406040 + }, + { + "epoch": 0.8202466901263348, + "grad_norm": 156.88995361328125, + "learning_rate": 1.0374871203883774e-06, + "loss": 13.8985, + "step": 406050 + }, + { + "epoch": 0.8202668907590186, + "grad_norm": 187.8990478515625, + "learning_rate": 1.0372742457555151e-06, + "loss": 10.9844, + "step": 406060 + }, + { + "epoch": 0.8202870913917024, + "grad_norm": 527.1951904296875, + "learning_rate": 1.0370613904364957e-06, + "loss": 13.1193, + "step": 406070 + }, + { + "epoch": 0.8203072920243862, + "grad_norm": 364.440185546875, + "learning_rate": 1.0368485544323586e-06, + "loss": 17.7838, + "step": 406080 + }, + { + "epoch": 0.8203274926570701, + "grad_norm": 85.72505187988281, + "learning_rate": 1.0366357377441427e-06, + "loss": 14.7729, + "step": 406090 + }, + { + "epoch": 0.8203476932897539, + "grad_norm": 212.0575408935547, + "learning_rate": 1.036422940372883e-06, + "loss": 18.9541, + "step": 406100 + }, + { + "epoch": 0.8203678939224377, + "grad_norm": 139.4064178466797, + "learning_rate": 1.0362101623196158e-06, + "loss": 8.1081, + "step": 406110 + }, + { + "epoch": 0.8203880945551215, + "grad_norm": 284.49493408203125, + "learning_rate": 1.0359974035853814e-06, + "loss": 15.8686, + "step": 406120 + }, + { + "epoch": 0.8204082951878053, + "grad_norm": 494.59967041015625, + "learning_rate": 1.0357846641712143e-06, + "loss": 7.0495, + "step": 406130 + }, + { + "epoch": 0.820428495820489, + "grad_norm": 355.716064453125, + "learning_rate": 1.0355719440781508e-06, + "loss": 12.4157, + "step": 406140 + }, + { + "epoch": 0.8204486964531729, + "grad_norm": 434.7909240722656, + "learning_rate": 1.0353592433072302e-06, + "loss": 25.2197, + "step": 406150 + }, + { + "epoch": 0.8204688970858567, + "grad_norm": 312.6859130859375, + "learning_rate": 1.035146561859487e-06, + "loss": 21.4402, + "step": 406160 + }, + { + "epoch": 0.8204890977185405, + "grad_norm": 82.07292938232422, + "learning_rate": 1.0349338997359593e-06, + "loss": 15.2223, + "step": 406170 + }, + { + "epoch": 0.8205092983512243, + "grad_norm": 236.8374786376953, + "learning_rate": 1.0347212569376814e-06, + "loss": 15.4553, + "step": 406180 + }, + { + "epoch": 0.8205294989839081, + "grad_norm": 140.76976013183594, + "learning_rate": 1.0345086334656929e-06, + "loss": 16.7068, + "step": 406190 + }, + { + "epoch": 0.820549699616592, + "grad_norm": 12.717177391052246, + "learning_rate": 1.0342960293210281e-06, + "loss": 19.1874, + "step": 406200 + }, + { + "epoch": 0.8205699002492758, + "grad_norm": 265.7485046386719, + "learning_rate": 1.034083444504722e-06, + "loss": 12.1559, + "step": 406210 + }, + { + "epoch": 0.8205901008819596, + "grad_norm": 134.8882598876953, + "learning_rate": 1.0338708790178136e-06, + "loss": 25.6306, + "step": 406220 + }, + { + "epoch": 0.8206103015146434, + "grad_norm": 183.5823211669922, + "learning_rate": 1.0336583328613364e-06, + "loss": 29.2117, + "step": 406230 + }, + { + "epoch": 0.8206305021473272, + "grad_norm": 452.5019226074219, + "learning_rate": 1.0334458060363289e-06, + "loss": 18.6306, + "step": 406240 + }, + { + "epoch": 0.8206507027800111, + "grad_norm": 338.053955078125, + "learning_rate": 1.0332332985438248e-06, + "loss": 11.713, + "step": 406250 + }, + { + "epoch": 0.8206709034126949, + "grad_norm": 416.09368896484375, + "learning_rate": 1.03302081038486e-06, + "loss": 14.9481, + "step": 406260 + }, + { + "epoch": 0.8206911040453787, + "grad_norm": 260.2134094238281, + "learning_rate": 1.032808341560471e-06, + "loss": 29.0405, + "step": 406270 + }, + { + "epoch": 0.8207113046780625, + "grad_norm": 779.87646484375, + "learning_rate": 1.032595892071694e-06, + "loss": 35.7845, + "step": 406280 + }, + { + "epoch": 0.8207315053107463, + "grad_norm": 0.04122190177440643, + "learning_rate": 1.0323834619195617e-06, + "loss": 9.071, + "step": 406290 + }, + { + "epoch": 0.8207517059434302, + "grad_norm": 116.37691497802734, + "learning_rate": 1.0321710511051108e-06, + "loss": 14.0588, + "step": 406300 + }, + { + "epoch": 0.820771906576114, + "grad_norm": 404.05853271484375, + "learning_rate": 1.03195865962938e-06, + "loss": 9.1866, + "step": 406310 + }, + { + "epoch": 0.8207921072087978, + "grad_norm": 256.5055847167969, + "learning_rate": 1.0317462874933987e-06, + "loss": 26.4847, + "step": 406320 + }, + { + "epoch": 0.8208123078414816, + "grad_norm": 599.2551879882812, + "learning_rate": 1.0315339346982044e-06, + "loss": 30.4853, + "step": 406330 + }, + { + "epoch": 0.8208325084741654, + "grad_norm": 187.8599395751953, + "learning_rate": 1.0313216012448341e-06, + "loss": 25.9453, + "step": 406340 + }, + { + "epoch": 0.8208527091068493, + "grad_norm": 612.1749267578125, + "learning_rate": 1.0311092871343209e-06, + "loss": 24.1508, + "step": 406350 + }, + { + "epoch": 0.8208729097395331, + "grad_norm": 468.5676574707031, + "learning_rate": 1.0308969923676987e-06, + "loss": 21.0865, + "step": 406360 + }, + { + "epoch": 0.8208931103722169, + "grad_norm": 348.3234558105469, + "learning_rate": 1.0306847169460028e-06, + "loss": 21.1996, + "step": 406370 + }, + { + "epoch": 0.8209133110049007, + "grad_norm": 1181.8826904296875, + "learning_rate": 1.0304724608702704e-06, + "loss": 14.7764, + "step": 406380 + }, + { + "epoch": 0.8209335116375844, + "grad_norm": 238.29066467285156, + "learning_rate": 1.0302602241415316e-06, + "loss": 12.5135, + "step": 406390 + }, + { + "epoch": 0.8209537122702683, + "grad_norm": 268.7695617675781, + "learning_rate": 1.0300480067608232e-06, + "loss": 11.5064, + "step": 406400 + }, + { + "epoch": 0.8209739129029521, + "grad_norm": 123.91963958740234, + "learning_rate": 1.0298358087291803e-06, + "loss": 20.9362, + "step": 406410 + }, + { + "epoch": 0.8209941135356359, + "grad_norm": 407.3598327636719, + "learning_rate": 1.0296236300476359e-06, + "loss": 12.349, + "step": 406420 + }, + { + "epoch": 0.8210143141683197, + "grad_norm": 548.6702270507812, + "learning_rate": 1.0294114707172236e-06, + "loss": 15.9261, + "step": 406430 + }, + { + "epoch": 0.8210345148010035, + "grad_norm": 5.207859992980957, + "learning_rate": 1.0291993307389792e-06, + "loss": 18.7373, + "step": 406440 + }, + { + "epoch": 0.8210547154336874, + "grad_norm": 723.1630249023438, + "learning_rate": 1.0289872101139359e-06, + "loss": 16.8577, + "step": 406450 + }, + { + "epoch": 0.8210749160663712, + "grad_norm": 338.55328369140625, + "learning_rate": 1.0287751088431257e-06, + "loss": 12.0091, + "step": 406460 + }, + { + "epoch": 0.821095116699055, + "grad_norm": 458.1700744628906, + "learning_rate": 1.028563026927586e-06, + "loss": 15.2878, + "step": 406470 + }, + { + "epoch": 0.8211153173317388, + "grad_norm": 559.7627563476562, + "learning_rate": 1.0283509643683464e-06, + "loss": 14.1064, + "step": 406480 + }, + { + "epoch": 0.8211355179644226, + "grad_norm": 144.1278533935547, + "learning_rate": 1.0281389211664439e-06, + "loss": 16.2972, + "step": 406490 + }, + { + "epoch": 0.8211557185971065, + "grad_norm": 167.664306640625, + "learning_rate": 1.0279268973229089e-06, + "loss": 16.3451, + "step": 406500 + }, + { + "epoch": 0.8211759192297903, + "grad_norm": 10.996707916259766, + "learning_rate": 1.0277148928387788e-06, + "loss": 13.8368, + "step": 406510 + }, + { + "epoch": 0.8211961198624741, + "grad_norm": 299.57427978515625, + "learning_rate": 1.0275029077150838e-06, + "loss": 24.8445, + "step": 406520 + }, + { + "epoch": 0.8212163204951579, + "grad_norm": 190.02394104003906, + "learning_rate": 1.0272909419528565e-06, + "loss": 20.3937, + "step": 406530 + }, + { + "epoch": 0.8212365211278417, + "grad_norm": 256.25341796875, + "learning_rate": 1.0270789955531329e-06, + "loss": 12.2424, + "step": 406540 + }, + { + "epoch": 0.8212567217605256, + "grad_norm": 608.8441162109375, + "learning_rate": 1.026867068516943e-06, + "loss": 20.8768, + "step": 406550 + }, + { + "epoch": 0.8212769223932094, + "grad_norm": 897.0269775390625, + "learning_rate": 1.0266551608453224e-06, + "loss": 21.9482, + "step": 406560 + }, + { + "epoch": 0.8212971230258932, + "grad_norm": 308.4391784667969, + "learning_rate": 1.0264432725393026e-06, + "loss": 25.2127, + "step": 406570 + }, + { + "epoch": 0.821317323658577, + "grad_norm": 269.1169738769531, + "learning_rate": 1.026231403599915e-06, + "loss": 36.8312, + "step": 406580 + }, + { + "epoch": 0.8213375242912608, + "grad_norm": 422.3494873046875, + "learning_rate": 1.0260195540281948e-06, + "loss": 28.029, + "step": 406590 + }, + { + "epoch": 0.8213577249239447, + "grad_norm": 170.7576904296875, + "learning_rate": 1.0258077238251735e-06, + "loss": 9.9392, + "step": 406600 + }, + { + "epoch": 0.8213779255566285, + "grad_norm": 902.087158203125, + "learning_rate": 1.0255959129918825e-06, + "loss": 26.662, + "step": 406610 + }, + { + "epoch": 0.8213981261893123, + "grad_norm": 314.4224548339844, + "learning_rate": 1.0253841215293541e-06, + "loss": 25.7107, + "step": 406620 + }, + { + "epoch": 0.8214183268219961, + "grad_norm": 343.95343017578125, + "learning_rate": 1.0251723494386234e-06, + "loss": 22.3633, + "step": 406630 + }, + { + "epoch": 0.8214385274546799, + "grad_norm": 878.520263671875, + "learning_rate": 1.0249605967207204e-06, + "loss": 16.6829, + "step": 406640 + }, + { + "epoch": 0.8214587280873636, + "grad_norm": 477.63262939453125, + "learning_rate": 1.0247488633766756e-06, + "loss": 20.8903, + "step": 406650 + }, + { + "epoch": 0.8214789287200475, + "grad_norm": 558.3587646484375, + "learning_rate": 1.0245371494075246e-06, + "loss": 13.331, + "step": 406660 + }, + { + "epoch": 0.8214991293527313, + "grad_norm": 383.0649108886719, + "learning_rate": 1.0243254548142973e-06, + "loss": 26.3895, + "step": 406670 + }, + { + "epoch": 0.8215193299854151, + "grad_norm": 453.21136474609375, + "learning_rate": 1.0241137795980239e-06, + "loss": 29.1606, + "step": 406680 + }, + { + "epoch": 0.8215395306180989, + "grad_norm": 252.2771453857422, + "learning_rate": 1.023902123759738e-06, + "loss": 24.245, + "step": 406690 + }, + { + "epoch": 0.8215597312507827, + "grad_norm": 376.6198425292969, + "learning_rate": 1.0236904873004722e-06, + "loss": 22.271, + "step": 406700 + }, + { + "epoch": 0.8215799318834666, + "grad_norm": 324.5942687988281, + "learning_rate": 1.023478870221256e-06, + "loss": 15.6521, + "step": 406710 + }, + { + "epoch": 0.8216001325161504, + "grad_norm": 809.4425048828125, + "learning_rate": 1.0232672725231213e-06, + "loss": 22.5213, + "step": 406720 + }, + { + "epoch": 0.8216203331488342, + "grad_norm": 568.722412109375, + "learning_rate": 1.0230556942071002e-06, + "loss": 23.1406, + "step": 406730 + }, + { + "epoch": 0.821640533781518, + "grad_norm": 170.7123565673828, + "learning_rate": 1.0228441352742236e-06, + "loss": 9.9331, + "step": 406740 + }, + { + "epoch": 0.8216607344142018, + "grad_norm": 430.9437255859375, + "learning_rate": 1.0226325957255207e-06, + "loss": 19.6494, + "step": 406750 + }, + { + "epoch": 0.8216809350468857, + "grad_norm": 935.859619140625, + "learning_rate": 1.0224210755620257e-06, + "loss": 28.973, + "step": 406760 + }, + { + "epoch": 0.8217011356795695, + "grad_norm": 383.5516052246094, + "learning_rate": 1.0222095747847666e-06, + "loss": 30.4782, + "step": 406770 + }, + { + "epoch": 0.8217213363122533, + "grad_norm": 83.07652282714844, + "learning_rate": 1.0219980933947772e-06, + "loss": 23.0471, + "step": 406780 + }, + { + "epoch": 0.8217415369449371, + "grad_norm": 255.9378662109375, + "learning_rate": 1.0217866313930847e-06, + "loss": 18.1824, + "step": 406790 + }, + { + "epoch": 0.8217617375776209, + "grad_norm": 358.04937744140625, + "learning_rate": 1.0215751887807228e-06, + "loss": 26.3627, + "step": 406800 + }, + { + "epoch": 0.8217819382103048, + "grad_norm": 1221.672119140625, + "learning_rate": 1.0213637655587216e-06, + "loss": 37.2455, + "step": 406810 + }, + { + "epoch": 0.8218021388429886, + "grad_norm": 171.9769287109375, + "learning_rate": 1.0211523617281095e-06, + "loss": 12.8814, + "step": 406820 + }, + { + "epoch": 0.8218223394756724, + "grad_norm": 124.1884536743164, + "learning_rate": 1.0209409772899192e-06, + "loss": 16.5848, + "step": 406830 + }, + { + "epoch": 0.8218425401083562, + "grad_norm": 113.08357238769531, + "learning_rate": 1.0207296122451789e-06, + "loss": 11.1415, + "step": 406840 + }, + { + "epoch": 0.82186274074104, + "grad_norm": 601.0794677734375, + "learning_rate": 1.020518266594921e-06, + "loss": 18.2521, + "step": 406850 + }, + { + "epoch": 0.8218829413737239, + "grad_norm": 130.7305908203125, + "learning_rate": 1.0203069403401743e-06, + "loss": 17.6013, + "step": 406860 + }, + { + "epoch": 0.8219031420064077, + "grad_norm": 208.6722869873047, + "learning_rate": 1.0200956334819678e-06, + "loss": 21.3193, + "step": 406870 + }, + { + "epoch": 0.8219233426390915, + "grad_norm": 19.291900634765625, + "learning_rate": 1.0198843460213337e-06, + "loss": 8.2939, + "step": 406880 + }, + { + "epoch": 0.8219435432717753, + "grad_norm": 912.6710205078125, + "learning_rate": 1.0196730779593006e-06, + "loss": 20.773, + "step": 406890 + }, + { + "epoch": 0.8219637439044591, + "grad_norm": 477.4142150878906, + "learning_rate": 1.0194618292968972e-06, + "loss": 26.0923, + "step": 406900 + }, + { + "epoch": 0.8219839445371429, + "grad_norm": 184.98167419433594, + "learning_rate": 1.0192506000351532e-06, + "loss": 17.0479, + "step": 406910 + }, + { + "epoch": 0.8220041451698267, + "grad_norm": 34.30686569213867, + "learning_rate": 1.0190393901751023e-06, + "loss": 17.5799, + "step": 406920 + }, + { + "epoch": 0.8220243458025105, + "grad_norm": 249.46302795410156, + "learning_rate": 1.0188281997177679e-06, + "loss": 19.1335, + "step": 406930 + }, + { + "epoch": 0.8220445464351943, + "grad_norm": 379.62921142578125, + "learning_rate": 1.0186170286641816e-06, + "loss": 16.6195, + "step": 406940 + }, + { + "epoch": 0.8220647470678781, + "grad_norm": 304.0983581542969, + "learning_rate": 1.0184058770153748e-06, + "loss": 13.9507, + "step": 406950 + }, + { + "epoch": 0.822084947700562, + "grad_norm": 255.19227600097656, + "learning_rate": 1.0181947447723744e-06, + "loss": 20.8534, + "step": 406960 + }, + { + "epoch": 0.8221051483332458, + "grad_norm": 155.5648651123047, + "learning_rate": 1.017983631936209e-06, + "loss": 15.6805, + "step": 406970 + }, + { + "epoch": 0.8221253489659296, + "grad_norm": 0.8212962746620178, + "learning_rate": 1.0177725385079084e-06, + "loss": 8.0075, + "step": 406980 + }, + { + "epoch": 0.8221455495986134, + "grad_norm": 299.80841064453125, + "learning_rate": 1.017561464488504e-06, + "loss": 14.4082, + "step": 406990 + }, + { + "epoch": 0.8221657502312972, + "grad_norm": 185.31739807128906, + "learning_rate": 1.0173504098790188e-06, + "loss": 10.34, + "step": 407000 + }, + { + "epoch": 0.822185950863981, + "grad_norm": 307.5030517578125, + "learning_rate": 1.0171393746804854e-06, + "loss": 17.1441, + "step": 407010 + }, + { + "epoch": 0.8222061514966649, + "grad_norm": 243.54632568359375, + "learning_rate": 1.0169283588939326e-06, + "loss": 17.0271, + "step": 407020 + }, + { + "epoch": 0.8222263521293487, + "grad_norm": 498.6639099121094, + "learning_rate": 1.016717362520388e-06, + "loss": 25.0322, + "step": 407030 + }, + { + "epoch": 0.8222465527620325, + "grad_norm": 133.57798767089844, + "learning_rate": 1.0165063855608786e-06, + "loss": 6.8695, + "step": 407040 + }, + { + "epoch": 0.8222667533947163, + "grad_norm": 505.4360656738281, + "learning_rate": 1.016295428016435e-06, + "loss": 22.3942, + "step": 407050 + }, + { + "epoch": 0.8222869540274002, + "grad_norm": 1129.1591796875, + "learning_rate": 1.0160844898880845e-06, + "loss": 26.4678, + "step": 407060 + }, + { + "epoch": 0.822307154660084, + "grad_norm": 519.4834594726562, + "learning_rate": 1.0158735711768542e-06, + "loss": 22.2843, + "step": 407070 + }, + { + "epoch": 0.8223273552927678, + "grad_norm": 173.5504913330078, + "learning_rate": 1.0156626718837737e-06, + "loss": 28.651, + "step": 407080 + }, + { + "epoch": 0.8223475559254516, + "grad_norm": 436.7134094238281, + "learning_rate": 1.0154517920098682e-06, + "loss": 11.499, + "step": 407090 + }, + { + "epoch": 0.8223677565581354, + "grad_norm": 116.7005615234375, + "learning_rate": 1.0152409315561696e-06, + "loss": 9.7145, + "step": 407100 + }, + { + "epoch": 0.8223879571908193, + "grad_norm": 642.4334106445312, + "learning_rate": 1.0150300905237015e-06, + "loss": 31.7975, + "step": 407110 + }, + { + "epoch": 0.8224081578235031, + "grad_norm": 207.56675720214844, + "learning_rate": 1.014819268913495e-06, + "loss": 16.446, + "step": 407120 + }, + { + "epoch": 0.8224283584561869, + "grad_norm": 400.88409423828125, + "learning_rate": 1.0146084667265766e-06, + "loss": 10.7764, + "step": 407130 + }, + { + "epoch": 0.8224485590888707, + "grad_norm": 490.64605712890625, + "learning_rate": 1.0143976839639713e-06, + "loss": 17.8865, + "step": 407140 + }, + { + "epoch": 0.8224687597215545, + "grad_norm": 617.2076416015625, + "learning_rate": 1.0141869206267097e-06, + "loss": 20.4385, + "step": 407150 + }, + { + "epoch": 0.8224889603542382, + "grad_norm": 373.3833923339844, + "learning_rate": 1.0139761767158158e-06, + "loss": 17.1855, + "step": 407160 + }, + { + "epoch": 0.8225091609869221, + "grad_norm": 1924.2100830078125, + "learning_rate": 1.0137654522323204e-06, + "loss": 25.3028, + "step": 407170 + }, + { + "epoch": 0.8225293616196059, + "grad_norm": 202.76271057128906, + "learning_rate": 1.0135547471772488e-06, + "loss": 12.4462, + "step": 407180 + }, + { + "epoch": 0.8225495622522897, + "grad_norm": 27.07765007019043, + "learning_rate": 1.013344061551626e-06, + "loss": 16.7884, + "step": 407190 + }, + { + "epoch": 0.8225697628849735, + "grad_norm": 275.3519287109375, + "learning_rate": 1.0131333953564825e-06, + "loss": 10.6686, + "step": 407200 + }, + { + "epoch": 0.8225899635176573, + "grad_norm": 603.3817749023438, + "learning_rate": 1.0129227485928432e-06, + "loss": 18.0398, + "step": 407210 + }, + { + "epoch": 0.8226101641503412, + "grad_norm": 275.7763366699219, + "learning_rate": 1.0127121212617335e-06, + "loss": 10.4766, + "step": 407220 + }, + { + "epoch": 0.822630364783025, + "grad_norm": 477.1412658691406, + "learning_rate": 1.0125015133641813e-06, + "loss": 21.3107, + "step": 407230 + }, + { + "epoch": 0.8226505654157088, + "grad_norm": 323.2248840332031, + "learning_rate": 1.0122909249012148e-06, + "loss": 17.129, + "step": 407240 + }, + { + "epoch": 0.8226707660483926, + "grad_norm": 165.0032196044922, + "learning_rate": 1.0120803558738585e-06, + "loss": 18.286, + "step": 407250 + }, + { + "epoch": 0.8226909666810764, + "grad_norm": 537.7188110351562, + "learning_rate": 1.0118698062831372e-06, + "loss": 24.8882, + "step": 407260 + }, + { + "epoch": 0.8227111673137603, + "grad_norm": 226.24362182617188, + "learning_rate": 1.0116592761300804e-06, + "loss": 9.4518, + "step": 407270 + }, + { + "epoch": 0.8227313679464441, + "grad_norm": 288.0339660644531, + "learning_rate": 1.0114487654157123e-06, + "loss": 21.0934, + "step": 407280 + }, + { + "epoch": 0.8227515685791279, + "grad_norm": 465.50848388671875, + "learning_rate": 1.0112382741410582e-06, + "loss": 21.5515, + "step": 407290 + }, + { + "epoch": 0.8227717692118117, + "grad_norm": 187.2500457763672, + "learning_rate": 1.0110278023071445e-06, + "loss": 11.8522, + "step": 407300 + }, + { + "epoch": 0.8227919698444955, + "grad_norm": 210.75131225585938, + "learning_rate": 1.0108173499149991e-06, + "loss": 16.1581, + "step": 407310 + }, + { + "epoch": 0.8228121704771794, + "grad_norm": 0.0, + "learning_rate": 1.0106069169656464e-06, + "loss": 10.5133, + "step": 407320 + }, + { + "epoch": 0.8228323711098632, + "grad_norm": 303.7751159667969, + "learning_rate": 1.0103965034601098e-06, + "loss": 19.7185, + "step": 407330 + }, + { + "epoch": 0.822852571742547, + "grad_norm": 655.2921142578125, + "learning_rate": 1.0101861093994182e-06, + "loss": 22.9425, + "step": 407340 + }, + { + "epoch": 0.8228727723752308, + "grad_norm": 270.5400390625, + "learning_rate": 1.0099757347845957e-06, + "loss": 20.7602, + "step": 407350 + }, + { + "epoch": 0.8228929730079146, + "grad_norm": 644.845458984375, + "learning_rate": 1.0097653796166662e-06, + "loss": 21.5414, + "step": 407360 + }, + { + "epoch": 0.8229131736405985, + "grad_norm": 206.19781494140625, + "learning_rate": 1.0095550438966578e-06, + "loss": 15.1997, + "step": 407370 + }, + { + "epoch": 0.8229333742732823, + "grad_norm": 733.670654296875, + "learning_rate": 1.009344727625593e-06, + "loss": 21.8256, + "step": 407380 + }, + { + "epoch": 0.8229535749059661, + "grad_norm": 148.4443817138672, + "learning_rate": 1.0091344308044987e-06, + "loss": 9.7204, + "step": 407390 + }, + { + "epoch": 0.8229737755386499, + "grad_norm": 319.5944519042969, + "learning_rate": 1.0089241534343986e-06, + "loss": 14.368, + "step": 407400 + }, + { + "epoch": 0.8229939761713337, + "grad_norm": 412.720458984375, + "learning_rate": 1.008713895516319e-06, + "loss": 19.7029, + "step": 407410 + }, + { + "epoch": 0.8230141768040174, + "grad_norm": 248.20687866210938, + "learning_rate": 1.0085036570512836e-06, + "loss": 14.2003, + "step": 407420 + }, + { + "epoch": 0.8230343774367013, + "grad_norm": 623.3434448242188, + "learning_rate": 1.0082934380403159e-06, + "loss": 29.236, + "step": 407430 + }, + { + "epoch": 0.8230545780693851, + "grad_norm": 305.50140380859375, + "learning_rate": 1.0080832384844437e-06, + "loss": 27.633, + "step": 407440 + }, + { + "epoch": 0.8230747787020689, + "grad_norm": 803.7622680664062, + "learning_rate": 1.0078730583846879e-06, + "loss": 18.9865, + "step": 407450 + }, + { + "epoch": 0.8230949793347527, + "grad_norm": 426.3224182128906, + "learning_rate": 1.0076628977420761e-06, + "loss": 21.8781, + "step": 407460 + }, + { + "epoch": 0.8231151799674365, + "grad_norm": 144.5908966064453, + "learning_rate": 1.0074527565576308e-06, + "loss": 8.5792, + "step": 407470 + }, + { + "epoch": 0.8231353806001204, + "grad_norm": 185.98281860351562, + "learning_rate": 1.0072426348323754e-06, + "loss": 14.8563, + "step": 407480 + }, + { + "epoch": 0.8231555812328042, + "grad_norm": 183.10203552246094, + "learning_rate": 1.0070325325673364e-06, + "loss": 31.4714, + "step": 407490 + }, + { + "epoch": 0.823175781865488, + "grad_norm": 391.8500671386719, + "learning_rate": 1.006822449763537e-06, + "loss": 23.1074, + "step": 407500 + }, + { + "epoch": 0.8231959824981718, + "grad_norm": 448.3106384277344, + "learning_rate": 1.006612386421999e-06, + "loss": 9.8672, + "step": 407510 + }, + { + "epoch": 0.8232161831308556, + "grad_norm": 247.23480224609375, + "learning_rate": 1.006402342543748e-06, + "loss": 11.6359, + "step": 407520 + }, + { + "epoch": 0.8232363837635395, + "grad_norm": 370.77362060546875, + "learning_rate": 1.00619231812981e-06, + "loss": 12.3722, + "step": 407530 + }, + { + "epoch": 0.8232565843962233, + "grad_norm": 337.4403076171875, + "learning_rate": 1.0059823131812035e-06, + "loss": 10.3751, + "step": 407540 + }, + { + "epoch": 0.8232767850289071, + "grad_norm": 332.5984802246094, + "learning_rate": 1.0057723276989551e-06, + "loss": 17.4446, + "step": 407550 + }, + { + "epoch": 0.8232969856615909, + "grad_norm": 148.41029357910156, + "learning_rate": 1.0055623616840893e-06, + "loss": 12.2314, + "step": 407560 + }, + { + "epoch": 0.8233171862942747, + "grad_norm": 298.4598083496094, + "learning_rate": 1.0053524151376283e-06, + "loss": 8.6461, + "step": 407570 + }, + { + "epoch": 0.8233373869269586, + "grad_norm": 1214.2337646484375, + "learning_rate": 1.005142488060593e-06, + "loss": 31.857, + "step": 407580 + }, + { + "epoch": 0.8233575875596424, + "grad_norm": 397.0372619628906, + "learning_rate": 1.0049325804540094e-06, + "loss": 21.9878, + "step": 407590 + }, + { + "epoch": 0.8233777881923262, + "grad_norm": 315.916015625, + "learning_rate": 1.0047226923189024e-06, + "loss": 12.3157, + "step": 407600 + }, + { + "epoch": 0.82339798882501, + "grad_norm": 73.71172332763672, + "learning_rate": 1.0045128236562895e-06, + "loss": 27.1555, + "step": 407610 + }, + { + "epoch": 0.8234181894576938, + "grad_norm": 789.0155029296875, + "learning_rate": 1.0043029744671967e-06, + "loss": 14.7152, + "step": 407620 + }, + { + "epoch": 0.8234383900903777, + "grad_norm": 311.51495361328125, + "learning_rate": 1.004093144752648e-06, + "loss": 13.1633, + "step": 407630 + }, + { + "epoch": 0.8234585907230615, + "grad_norm": 305.6420593261719, + "learning_rate": 1.0038833345136644e-06, + "loss": 10.7144, + "step": 407640 + }, + { + "epoch": 0.8234787913557453, + "grad_norm": 812.8828125, + "learning_rate": 1.003673543751268e-06, + "loss": 12.7567, + "step": 407650 + }, + { + "epoch": 0.8234989919884291, + "grad_norm": 320.21844482421875, + "learning_rate": 1.0034637724664832e-06, + "loss": 13.4099, + "step": 407660 + }, + { + "epoch": 0.8235191926211128, + "grad_norm": 289.0506591796875, + "learning_rate": 1.0032540206603309e-06, + "loss": 19.9731, + "step": 407670 + }, + { + "epoch": 0.8235393932537967, + "grad_norm": 462.7651672363281, + "learning_rate": 1.0030442883338325e-06, + "loss": 21.0934, + "step": 407680 + }, + { + "epoch": 0.8235595938864805, + "grad_norm": 349.7567443847656, + "learning_rate": 1.0028345754880114e-06, + "loss": 29.654, + "step": 407690 + }, + { + "epoch": 0.8235797945191643, + "grad_norm": 237.719482421875, + "learning_rate": 1.0026248821238915e-06, + "loss": 20.4587, + "step": 407700 + }, + { + "epoch": 0.8235999951518481, + "grad_norm": 463.9545593261719, + "learning_rate": 1.0024152082424926e-06, + "loss": 9.4284, + "step": 407710 + }, + { + "epoch": 0.8236201957845319, + "grad_norm": 326.577392578125, + "learning_rate": 1.002205553844836e-06, + "loss": 26.6947, + "step": 407720 + }, + { + "epoch": 0.8236403964172158, + "grad_norm": 291.4574279785156, + "learning_rate": 1.0019959189319462e-06, + "loss": 16.8537, + "step": 407730 + }, + { + "epoch": 0.8236605970498996, + "grad_norm": 475.0122985839844, + "learning_rate": 1.0017863035048431e-06, + "loss": 18.0452, + "step": 407740 + }, + { + "epoch": 0.8236807976825834, + "grad_norm": 192.9997100830078, + "learning_rate": 1.0015767075645472e-06, + "loss": 22.6792, + "step": 407750 + }, + { + "epoch": 0.8237009983152672, + "grad_norm": 68.14810180664062, + "learning_rate": 1.0013671311120832e-06, + "loss": 9.7926, + "step": 407760 + }, + { + "epoch": 0.823721198947951, + "grad_norm": 359.591552734375, + "learning_rate": 1.001157574148469e-06, + "loss": 23.7008, + "step": 407770 + }, + { + "epoch": 0.8237413995806349, + "grad_norm": 270.8547058105469, + "learning_rate": 1.00094803667473e-06, + "loss": 20.2577, + "step": 407780 + }, + { + "epoch": 0.8237616002133187, + "grad_norm": 217.625244140625, + "learning_rate": 1.0007385186918844e-06, + "loss": 8.6643, + "step": 407790 + }, + { + "epoch": 0.8237818008460025, + "grad_norm": 104.18912506103516, + "learning_rate": 1.0005290202009533e-06, + "loss": 10.6152, + "step": 407800 + }, + { + "epoch": 0.8238020014786863, + "grad_norm": 300.1459045410156, + "learning_rate": 1.00031954120296e-06, + "loss": 16.6248, + "step": 407810 + }, + { + "epoch": 0.8238222021113701, + "grad_norm": 548.8994140625, + "learning_rate": 1.000110081698924e-06, + "loss": 31.0033, + "step": 407820 + }, + { + "epoch": 0.823842402744054, + "grad_norm": 732.3353881835938, + "learning_rate": 9.99900641689865e-07, + "loss": 16.5561, + "step": 407830 + }, + { + "epoch": 0.8238626033767378, + "grad_norm": 621.445556640625, + "learning_rate": 9.99691221176805e-07, + "loss": 13.8444, + "step": 407840 + }, + { + "epoch": 0.8238828040094216, + "grad_norm": 7.0599141120910645, + "learning_rate": 9.994818201607665e-07, + "loss": 28.752, + "step": 407850 + }, + { + "epoch": 0.8239030046421054, + "grad_norm": 457.2989501953125, + "learning_rate": 9.992724386427676e-07, + "loss": 22.4032, + "step": 407860 + }, + { + "epoch": 0.8239232052747892, + "grad_norm": 299.7660827636719, + "learning_rate": 9.990630766238292e-07, + "loss": 11.6483, + "step": 407870 + }, + { + "epoch": 0.8239434059074731, + "grad_norm": 306.6111755371094, + "learning_rate": 9.988537341049732e-07, + "loss": 11.5791, + "step": 407880 + }, + { + "epoch": 0.8239636065401569, + "grad_norm": 542.0485229492188, + "learning_rate": 9.986444110872185e-07, + "loss": 13.4904, + "step": 407890 + }, + { + "epoch": 0.8239838071728407, + "grad_norm": 178.08766174316406, + "learning_rate": 9.984351075715848e-07, + "loss": 10.2934, + "step": 407900 + }, + { + "epoch": 0.8240040078055245, + "grad_norm": 593.4957275390625, + "learning_rate": 9.982258235590926e-07, + "loss": 24.5771, + "step": 407910 + }, + { + "epoch": 0.8240242084382083, + "grad_norm": 244.8531494140625, + "learning_rate": 9.98016559050765e-07, + "loss": 27.4848, + "step": 407920 + }, + { + "epoch": 0.824044409070892, + "grad_norm": 223.80311584472656, + "learning_rate": 9.978073140476169e-07, + "loss": 13.6994, + "step": 407930 + }, + { + "epoch": 0.8240646097035759, + "grad_norm": 381.5936584472656, + "learning_rate": 9.975980885506708e-07, + "loss": 8.4069, + "step": 407940 + }, + { + "epoch": 0.8240848103362597, + "grad_norm": 174.5120086669922, + "learning_rate": 9.973888825609474e-07, + "loss": 22.6345, + "step": 407950 + }, + { + "epoch": 0.8241050109689435, + "grad_norm": 499.43511962890625, + "learning_rate": 9.971796960794644e-07, + "loss": 22.6273, + "step": 407960 + }, + { + "epoch": 0.8241252116016273, + "grad_norm": 228.4709930419922, + "learning_rate": 9.969705291072419e-07, + "loss": 30.4253, + "step": 407970 + }, + { + "epoch": 0.8241454122343111, + "grad_norm": 965.5435791015625, + "learning_rate": 9.967613816452997e-07, + "loss": 26.2806, + "step": 407980 + }, + { + "epoch": 0.824165612866995, + "grad_norm": 330.20074462890625, + "learning_rate": 9.965522536946564e-07, + "loss": 17.9362, + "step": 407990 + }, + { + "epoch": 0.8241858134996788, + "grad_norm": 527.2703857421875, + "learning_rate": 9.963431452563331e-07, + "loss": 21.5365, + "step": 408000 + }, + { + "epoch": 0.8242060141323626, + "grad_norm": 339.7175598144531, + "learning_rate": 9.96134056331346e-07, + "loss": 14.217, + "step": 408010 + }, + { + "epoch": 0.8242262147650464, + "grad_norm": 269.4036560058594, + "learning_rate": 9.959249869207177e-07, + "loss": 18.8896, + "step": 408020 + }, + { + "epoch": 0.8242464153977302, + "grad_norm": 357.4214782714844, + "learning_rate": 9.957159370254654e-07, + "loss": 19.9985, + "step": 408030 + }, + { + "epoch": 0.8242666160304141, + "grad_norm": 519.70068359375, + "learning_rate": 9.95506906646606e-07, + "loss": 15.1204, + "step": 408040 + }, + { + "epoch": 0.8242868166630979, + "grad_norm": 413.96832275390625, + "learning_rate": 9.952978957851622e-07, + "loss": 27.0462, + "step": 408050 + }, + { + "epoch": 0.8243070172957817, + "grad_norm": 632.7415771484375, + "learning_rate": 9.95088904442149e-07, + "loss": 11.7946, + "step": 408060 + }, + { + "epoch": 0.8243272179284655, + "grad_norm": 288.6258239746094, + "learning_rate": 9.948799326185886e-07, + "loss": 27.4577, + "step": 408070 + }, + { + "epoch": 0.8243474185611493, + "grad_norm": 181.74131774902344, + "learning_rate": 9.946709803154975e-07, + "loss": 15.3863, + "step": 408080 + }, + { + "epoch": 0.8243676191938332, + "grad_norm": 142.41555786132812, + "learning_rate": 9.94462047533893e-07, + "loss": 17.9423, + "step": 408090 + }, + { + "epoch": 0.824387819826517, + "grad_norm": 499.7061767578125, + "learning_rate": 9.942531342747953e-07, + "loss": 15.1426, + "step": 408100 + }, + { + "epoch": 0.8244080204592008, + "grad_norm": 324.0824890136719, + "learning_rate": 9.940442405392226e-07, + "loss": 12.2927, + "step": 408110 + }, + { + "epoch": 0.8244282210918846, + "grad_norm": 154.07171630859375, + "learning_rate": 9.938353663281908e-07, + "loss": 15.4776, + "step": 408120 + }, + { + "epoch": 0.8244484217245684, + "grad_norm": 251.8144073486328, + "learning_rate": 9.936265116427195e-07, + "loss": 19.2839, + "step": 408130 + }, + { + "epoch": 0.8244686223572523, + "grad_norm": 131.8602752685547, + "learning_rate": 9.9341767648383e-07, + "loss": 20.3942, + "step": 408140 + }, + { + "epoch": 0.8244888229899361, + "grad_norm": 423.2018127441406, + "learning_rate": 9.932088608525336e-07, + "loss": 24.359, + "step": 408150 + }, + { + "epoch": 0.8245090236226199, + "grad_norm": 493.63970947265625, + "learning_rate": 9.93000064749851e-07, + "loss": 22.8545, + "step": 408160 + }, + { + "epoch": 0.8245292242553037, + "grad_norm": 35.90886306762695, + "learning_rate": 9.92791288176802e-07, + "loss": 12.6635, + "step": 408170 + }, + { + "epoch": 0.8245494248879874, + "grad_norm": 402.9112548828125, + "learning_rate": 9.925825311344018e-07, + "loss": 14.5346, + "step": 408180 + }, + { + "epoch": 0.8245696255206713, + "grad_norm": 844.3985595703125, + "learning_rate": 9.923737936236671e-07, + "loss": 29.3343, + "step": 408190 + }, + { + "epoch": 0.8245898261533551, + "grad_norm": 941.7723999023438, + "learning_rate": 9.921650756456164e-07, + "loss": 28.1865, + "step": 408200 + }, + { + "epoch": 0.8246100267860389, + "grad_norm": 349.99188232421875, + "learning_rate": 9.919563772012697e-07, + "loss": 17.0764, + "step": 408210 + }, + { + "epoch": 0.8246302274187227, + "grad_norm": 561.3889770507812, + "learning_rate": 9.917476982916391e-07, + "loss": 18.3785, + "step": 408220 + }, + { + "epoch": 0.8246504280514065, + "grad_norm": 30.30302619934082, + "learning_rate": 9.915390389177438e-07, + "loss": 25.7135, + "step": 408230 + }, + { + "epoch": 0.8246706286840904, + "grad_norm": 1548.602294921875, + "learning_rate": 9.913303990806028e-07, + "loss": 19.1297, + "step": 408240 + }, + { + "epoch": 0.8246908293167742, + "grad_norm": 369.7165222167969, + "learning_rate": 9.911217787812305e-07, + "loss": 14.5312, + "step": 408250 + }, + { + "epoch": 0.824711029949458, + "grad_norm": 297.04144287109375, + "learning_rate": 9.909131780206437e-07, + "loss": 13.3768, + "step": 408260 + }, + { + "epoch": 0.8247312305821418, + "grad_norm": 348.3402404785156, + "learning_rate": 9.907045967998613e-07, + "loss": 9.3411, + "step": 408270 + }, + { + "epoch": 0.8247514312148256, + "grad_norm": 89.20021057128906, + "learning_rate": 9.904960351198977e-07, + "loss": 7.4829, + "step": 408280 + }, + { + "epoch": 0.8247716318475095, + "grad_norm": 986.4409790039062, + "learning_rate": 9.902874929817696e-07, + "loss": 17.8379, + "step": 408290 + }, + { + "epoch": 0.8247918324801933, + "grad_norm": 203.3545379638672, + "learning_rate": 9.900789703864933e-07, + "loss": 26.6569, + "step": 408300 + }, + { + "epoch": 0.8248120331128771, + "grad_norm": 214.7468719482422, + "learning_rate": 9.898704673350872e-07, + "loss": 15.4125, + "step": 408310 + }, + { + "epoch": 0.8248322337455609, + "grad_norm": 387.4945068359375, + "learning_rate": 9.896619838285664e-07, + "loss": 16.3487, + "step": 408320 + }, + { + "epoch": 0.8248524343782447, + "grad_norm": 273.38397216796875, + "learning_rate": 9.89453519867945e-07, + "loss": 21.7352, + "step": 408330 + }, + { + "epoch": 0.8248726350109286, + "grad_norm": 281.3521423339844, + "learning_rate": 9.892450754542427e-07, + "loss": 25.8193, + "step": 408340 + }, + { + "epoch": 0.8248928356436124, + "grad_norm": 515.4878540039062, + "learning_rate": 9.890366505884725e-07, + "loss": 16.4527, + "step": 408350 + }, + { + "epoch": 0.8249130362762962, + "grad_norm": 205.6038055419922, + "learning_rate": 9.888282452716507e-07, + "loss": 26.7113, + "step": 408360 + }, + { + "epoch": 0.82493323690898, + "grad_norm": 380.3154602050781, + "learning_rate": 9.88619859504794e-07, + "loss": 28.7859, + "step": 408370 + }, + { + "epoch": 0.8249534375416638, + "grad_norm": 358.2257080078125, + "learning_rate": 9.884114932889172e-07, + "loss": 16.7804, + "step": 408380 + }, + { + "epoch": 0.8249736381743477, + "grad_norm": 144.62191772460938, + "learning_rate": 9.88203146625037e-07, + "loss": 21.5211, + "step": 408390 + }, + { + "epoch": 0.8249938388070315, + "grad_norm": 251.671630859375, + "learning_rate": 9.879948195141681e-07, + "loss": 14.84, + "step": 408400 + }, + { + "epoch": 0.8250140394397153, + "grad_norm": 574.9043579101562, + "learning_rate": 9.877865119573249e-07, + "loss": 26.5908, + "step": 408410 + }, + { + "epoch": 0.8250342400723991, + "grad_norm": 955.4150390625, + "learning_rate": 9.87578223955525e-07, + "loss": 24.9013, + "step": 408420 + }, + { + "epoch": 0.8250544407050829, + "grad_norm": 375.3989562988281, + "learning_rate": 9.873699555097815e-07, + "loss": 18.8558, + "step": 408430 + }, + { + "epoch": 0.8250746413377666, + "grad_norm": 162.96932983398438, + "learning_rate": 9.871617066211092e-07, + "loss": 15.9927, + "step": 408440 + }, + { + "epoch": 0.8250948419704505, + "grad_norm": 380.4167175292969, + "learning_rate": 9.869534772905242e-07, + "loss": 8.1155, + "step": 408450 + }, + { + "epoch": 0.8251150426031343, + "grad_norm": 469.0326232910156, + "learning_rate": 9.867452675190425e-07, + "loss": 11.9704, + "step": 408460 + }, + { + "epoch": 0.8251352432358181, + "grad_norm": 679.4263305664062, + "learning_rate": 9.86537077307677e-07, + "loss": 21.8916, + "step": 408470 + }, + { + "epoch": 0.8251554438685019, + "grad_norm": 702.8475952148438, + "learning_rate": 9.863289066574426e-07, + "loss": 19.0194, + "step": 408480 + }, + { + "epoch": 0.8251756445011857, + "grad_norm": 143.41806030273438, + "learning_rate": 9.861207555693552e-07, + "loss": 11.9352, + "step": 408490 + }, + { + "epoch": 0.8251958451338696, + "grad_norm": 417.0471496582031, + "learning_rate": 9.859126240444284e-07, + "loss": 17.789, + "step": 408500 + }, + { + "epoch": 0.8252160457665534, + "grad_norm": 118.43316650390625, + "learning_rate": 9.857045120836756e-07, + "loss": 19.5522, + "step": 408510 + }, + { + "epoch": 0.8252362463992372, + "grad_norm": 298.9678649902344, + "learning_rate": 9.854964196881117e-07, + "loss": 22.8157, + "step": 408520 + }, + { + "epoch": 0.825256447031921, + "grad_norm": 688.9229125976562, + "learning_rate": 9.852883468587544e-07, + "loss": 27.4831, + "step": 408530 + }, + { + "epoch": 0.8252766476646048, + "grad_norm": 346.8231201171875, + "learning_rate": 9.85080293596612e-07, + "loss": 14.2504, + "step": 408540 + }, + { + "epoch": 0.8252968482972887, + "grad_norm": 27.613510131835938, + "learning_rate": 9.848722599027012e-07, + "loss": 13.9152, + "step": 408550 + }, + { + "epoch": 0.8253170489299725, + "grad_norm": 590.6968383789062, + "learning_rate": 9.84664245778037e-07, + "loss": 25.6394, + "step": 408560 + }, + { + "epoch": 0.8253372495626563, + "grad_norm": 203.42098999023438, + "learning_rate": 9.844562512236327e-07, + "loss": 9.9276, + "step": 408570 + }, + { + "epoch": 0.8253574501953401, + "grad_norm": 381.7028503417969, + "learning_rate": 9.842482762405004e-07, + "loss": 25.0408, + "step": 408580 + }, + { + "epoch": 0.825377650828024, + "grad_norm": 407.7652587890625, + "learning_rate": 9.840403208296556e-07, + "loss": 21.7263, + "step": 408590 + }, + { + "epoch": 0.8253978514607078, + "grad_norm": 495.6151123046875, + "learning_rate": 9.838323849921123e-07, + "loss": 15.1366, + "step": 408600 + }, + { + "epoch": 0.8254180520933916, + "grad_norm": 481.92291259765625, + "learning_rate": 9.836244687288803e-07, + "loss": 14.103, + "step": 408610 + }, + { + "epoch": 0.8254382527260754, + "grad_norm": 376.790771484375, + "learning_rate": 9.834165720409767e-07, + "loss": 12.3299, + "step": 408620 + }, + { + "epoch": 0.8254584533587592, + "grad_norm": 731.8862915039062, + "learning_rate": 9.83208694929414e-07, + "loss": 16.4488, + "step": 408630 + }, + { + "epoch": 0.825478653991443, + "grad_norm": 870.9635009765625, + "learning_rate": 9.830008373952054e-07, + "loss": 29.8897, + "step": 408640 + }, + { + "epoch": 0.8254988546241269, + "grad_norm": 171.99343872070312, + "learning_rate": 9.82792999439362e-07, + "loss": 20.0473, + "step": 408650 + }, + { + "epoch": 0.8255190552568107, + "grad_norm": 441.4473876953125, + "learning_rate": 9.825851810628995e-07, + "loss": 20.2918, + "step": 408660 + }, + { + "epoch": 0.8255392558894945, + "grad_norm": 63.19327926635742, + "learning_rate": 9.823773822668298e-07, + "loss": 37.2769, + "step": 408670 + }, + { + "epoch": 0.8255594565221783, + "grad_norm": 215.95689392089844, + "learning_rate": 9.821696030521644e-07, + "loss": 15.3831, + "step": 408680 + }, + { + "epoch": 0.8255796571548621, + "grad_norm": 313.6595764160156, + "learning_rate": 9.81961843419918e-07, + "loss": 24.235, + "step": 408690 + }, + { + "epoch": 0.8255998577875459, + "grad_norm": 208.46145629882812, + "learning_rate": 9.81754103371101e-07, + "loss": 16.2879, + "step": 408700 + }, + { + "epoch": 0.8256200584202297, + "grad_norm": 224.81307983398438, + "learning_rate": 9.815463829067284e-07, + "loss": 10.0573, + "step": 408710 + }, + { + "epoch": 0.8256402590529135, + "grad_norm": 156.67495727539062, + "learning_rate": 9.813386820278114e-07, + "loss": 21.7002, + "step": 408720 + }, + { + "epoch": 0.8256604596855973, + "grad_norm": 57.414039611816406, + "learning_rate": 9.811310007353608e-07, + "loss": 17.3652, + "step": 408730 + }, + { + "epoch": 0.8256806603182811, + "grad_norm": 553.4667358398438, + "learning_rate": 9.809233390303901e-07, + "loss": 26.2718, + "step": 408740 + }, + { + "epoch": 0.825700860950965, + "grad_norm": 508.50726318359375, + "learning_rate": 9.807156969139136e-07, + "loss": 10.7769, + "step": 408750 + }, + { + "epoch": 0.8257210615836488, + "grad_norm": 35.79631423950195, + "learning_rate": 9.805080743869406e-07, + "loss": 13.6019, + "step": 408760 + }, + { + "epoch": 0.8257412622163326, + "grad_norm": 267.25439453125, + "learning_rate": 9.803004714504827e-07, + "loss": 36.682, + "step": 408770 + }, + { + "epoch": 0.8257614628490164, + "grad_norm": 1.7341084480285645, + "learning_rate": 9.800928881055543e-07, + "loss": 31.0383, + "step": 408780 + }, + { + "epoch": 0.8257816634817002, + "grad_norm": 106.06595611572266, + "learning_rate": 9.798853243531654e-07, + "loss": 13.1472, + "step": 408790 + }, + { + "epoch": 0.825801864114384, + "grad_norm": 551.80224609375, + "learning_rate": 9.79677780194327e-07, + "loss": 29.5755, + "step": 408800 + }, + { + "epoch": 0.8258220647470679, + "grad_norm": 395.3323059082031, + "learning_rate": 9.794702556300505e-07, + "loss": 31.686, + "step": 408810 + }, + { + "epoch": 0.8258422653797517, + "grad_norm": 457.2997131347656, + "learning_rate": 9.792627506613517e-07, + "loss": 16.0439, + "step": 408820 + }, + { + "epoch": 0.8258624660124355, + "grad_norm": 62.50281524658203, + "learning_rate": 9.79055265289236e-07, + "loss": 8.3, + "step": 408830 + }, + { + "epoch": 0.8258826666451193, + "grad_norm": 554.3505859375, + "learning_rate": 9.788477995147173e-07, + "loss": 31.2342, + "step": 408840 + }, + { + "epoch": 0.8259028672778032, + "grad_norm": 340.3030700683594, + "learning_rate": 9.786403533388072e-07, + "loss": 15.5099, + "step": 408850 + }, + { + "epoch": 0.825923067910487, + "grad_norm": 438.2608642578125, + "learning_rate": 9.78432926762517e-07, + "loss": 22.3036, + "step": 408860 + }, + { + "epoch": 0.8259432685431708, + "grad_norm": 340.9075012207031, + "learning_rate": 9.782255197868556e-07, + "loss": 13.3878, + "step": 408870 + }, + { + "epoch": 0.8259634691758546, + "grad_norm": 128.02398681640625, + "learning_rate": 9.780181324128368e-07, + "loss": 7.8571, + "step": 408880 + }, + { + "epoch": 0.8259836698085384, + "grad_norm": 335.00469970703125, + "learning_rate": 9.778107646414691e-07, + "loss": 8.0132, + "step": 408890 + }, + { + "epoch": 0.8260038704412223, + "grad_norm": 497.5658874511719, + "learning_rate": 9.77603416473763e-07, + "loss": 13.8475, + "step": 408900 + }, + { + "epoch": 0.8260240710739061, + "grad_norm": 681.4411010742188, + "learning_rate": 9.773960879107303e-07, + "loss": 32.2834, + "step": 408910 + }, + { + "epoch": 0.8260442717065899, + "grad_norm": 569.7233276367188, + "learning_rate": 9.771887789533818e-07, + "loss": 21.9859, + "step": 408920 + }, + { + "epoch": 0.8260644723392737, + "grad_norm": 396.6499938964844, + "learning_rate": 9.76981489602728e-07, + "loss": 22.3758, + "step": 408930 + }, + { + "epoch": 0.8260846729719575, + "grad_norm": 422.7529602050781, + "learning_rate": 9.767742198597769e-07, + "loss": 20.051, + "step": 408940 + }, + { + "epoch": 0.8261048736046412, + "grad_norm": 278.30377197265625, + "learning_rate": 9.765669697255413e-07, + "loss": 21.6211, + "step": 408950 + }, + { + "epoch": 0.8261250742373251, + "grad_norm": 398.0464782714844, + "learning_rate": 9.763597392010304e-07, + "loss": 19.0091, + "step": 408960 + }, + { + "epoch": 0.8261452748700089, + "grad_norm": 233.4273223876953, + "learning_rate": 9.76152528287253e-07, + "loss": 8.3059, + "step": 408970 + }, + { + "epoch": 0.8261654755026927, + "grad_norm": 132.3906707763672, + "learning_rate": 9.759453369852213e-07, + "loss": 14.8877, + "step": 408980 + }, + { + "epoch": 0.8261856761353765, + "grad_norm": 229.14915466308594, + "learning_rate": 9.75738165295943e-07, + "loss": 17.5051, + "step": 408990 + }, + { + "epoch": 0.8262058767680603, + "grad_norm": 1248.712646484375, + "learning_rate": 9.7553101322043e-07, + "loss": 33.4552, + "step": 409000 + }, + { + "epoch": 0.8262260774007442, + "grad_norm": 239.10772705078125, + "learning_rate": 9.753238807596903e-07, + "loss": 16.8984, + "step": 409010 + }, + { + "epoch": 0.826246278033428, + "grad_norm": 1.1654975414276123, + "learning_rate": 9.751167679147328e-07, + "loss": 14.2042, + "step": 409020 + }, + { + "epoch": 0.8262664786661118, + "grad_norm": 348.69879150390625, + "learning_rate": 9.749096746865695e-07, + "loss": 20.265, + "step": 409030 + }, + { + "epoch": 0.8262866792987956, + "grad_norm": 96.15825653076172, + "learning_rate": 9.747026010762084e-07, + "loss": 9.8876, + "step": 409040 + }, + { + "epoch": 0.8263068799314794, + "grad_norm": 471.69818115234375, + "learning_rate": 9.744955470846567e-07, + "loss": 20.7782, + "step": 409050 + }, + { + "epoch": 0.8263270805641633, + "grad_norm": 855.0213623046875, + "learning_rate": 9.74288512712926e-07, + "loss": 17.223, + "step": 409060 + }, + { + "epoch": 0.8263472811968471, + "grad_norm": 515.3373413085938, + "learning_rate": 9.740814979620262e-07, + "loss": 25.8495, + "step": 409070 + }, + { + "epoch": 0.8263674818295309, + "grad_norm": 464.31170654296875, + "learning_rate": 9.738745028329643e-07, + "loss": 13.3487, + "step": 409080 + }, + { + "epoch": 0.8263876824622147, + "grad_norm": 697.5110473632812, + "learning_rate": 9.736675273267488e-07, + "loss": 17.6352, + "step": 409090 + }, + { + "epoch": 0.8264078830948985, + "grad_norm": 331.9514465332031, + "learning_rate": 9.734605714443906e-07, + "loss": 16.6086, + "step": 409100 + }, + { + "epoch": 0.8264280837275824, + "grad_norm": 419.98114013671875, + "learning_rate": 9.732536351868977e-07, + "loss": 14.8694, + "step": 409110 + }, + { + "epoch": 0.8264482843602662, + "grad_norm": 104.95368194580078, + "learning_rate": 9.730467185552762e-07, + "loss": 11.4161, + "step": 409120 + }, + { + "epoch": 0.82646848499295, + "grad_norm": 51.8902702331543, + "learning_rate": 9.728398215505369e-07, + "loss": 30.52, + "step": 409130 + }, + { + "epoch": 0.8264886856256338, + "grad_norm": 601.8125, + "learning_rate": 9.72632944173691e-07, + "loss": 40.3721, + "step": 409140 + }, + { + "epoch": 0.8265088862583176, + "grad_norm": 461.0328063964844, + "learning_rate": 9.724260864257401e-07, + "loss": 19.0334, + "step": 409150 + }, + { + "epoch": 0.8265290868910015, + "grad_norm": 313.46246337890625, + "learning_rate": 9.722192483076965e-07, + "loss": 17.4693, + "step": 409160 + }, + { + "epoch": 0.8265492875236853, + "grad_norm": 615.93212890625, + "learning_rate": 9.720124298205692e-07, + "loss": 20.7167, + "step": 409170 + }, + { + "epoch": 0.8265694881563691, + "grad_norm": 209.0207977294922, + "learning_rate": 9.718056309653646e-07, + "loss": 31.1384, + "step": 409180 + }, + { + "epoch": 0.8265896887890529, + "grad_norm": 490.0838623046875, + "learning_rate": 9.715988517430896e-07, + "loss": 19.0117, + "step": 409190 + }, + { + "epoch": 0.8266098894217367, + "grad_norm": 313.73626708984375, + "learning_rate": 9.713920921547532e-07, + "loss": 13.3818, + "step": 409200 + }, + { + "epoch": 0.8266300900544205, + "grad_norm": 177.05203247070312, + "learning_rate": 9.711853522013653e-07, + "loss": 26.8708, + "step": 409210 + }, + { + "epoch": 0.8266502906871043, + "grad_norm": 211.54241943359375, + "learning_rate": 9.709786318839293e-07, + "loss": 21.1137, + "step": 409220 + }, + { + "epoch": 0.8266704913197881, + "grad_norm": 538.9469604492188, + "learning_rate": 9.707719312034548e-07, + "loss": 10.6276, + "step": 409230 + }, + { + "epoch": 0.8266906919524719, + "grad_norm": 26.654558181762695, + "learning_rate": 9.705652501609503e-07, + "loss": 10.1592, + "step": 409240 + }, + { + "epoch": 0.8267108925851557, + "grad_norm": 358.67205810546875, + "learning_rate": 9.70358588757422e-07, + "loss": 12.8663, + "step": 409250 + }, + { + "epoch": 0.8267310932178396, + "grad_norm": 203.9763946533203, + "learning_rate": 9.701519469938759e-07, + "loss": 17.6682, + "step": 409260 + }, + { + "epoch": 0.8267512938505234, + "grad_norm": 100.33971405029297, + "learning_rate": 9.699453248713215e-07, + "loss": 13.6117, + "step": 409270 + }, + { + "epoch": 0.8267714944832072, + "grad_norm": 35.611656188964844, + "learning_rate": 9.69738722390765e-07, + "loss": 19.3551, + "step": 409280 + }, + { + "epoch": 0.826791695115891, + "grad_norm": 36.33995819091797, + "learning_rate": 9.69532139553212e-07, + "loss": 9.602, + "step": 409290 + }, + { + "epoch": 0.8268118957485748, + "grad_norm": 1031.1058349609375, + "learning_rate": 9.69325576359672e-07, + "loss": 19.8677, + "step": 409300 + }, + { + "epoch": 0.8268320963812587, + "grad_norm": 456.8401184082031, + "learning_rate": 9.691190328111488e-07, + "loss": 16.6949, + "step": 409310 + }, + { + "epoch": 0.8268522970139425, + "grad_norm": 153.07640075683594, + "learning_rate": 9.689125089086514e-07, + "loss": 14.1037, + "step": 409320 + }, + { + "epoch": 0.8268724976466263, + "grad_norm": 115.52783203125, + "learning_rate": 9.687060046531866e-07, + "loss": 20.026, + "step": 409330 + }, + { + "epoch": 0.8268926982793101, + "grad_norm": 486.40216064453125, + "learning_rate": 9.684995200457574e-07, + "loss": 18.2196, + "step": 409340 + }, + { + "epoch": 0.8269128989119939, + "grad_norm": 444.5055236816406, + "learning_rate": 9.682930550873742e-07, + "loss": 12.0368, + "step": 409350 + }, + { + "epoch": 0.8269330995446778, + "grad_norm": 383.5010986328125, + "learning_rate": 9.680866097790409e-07, + "loss": 9.2006, + "step": 409360 + }, + { + "epoch": 0.8269533001773616, + "grad_norm": 85.42082977294922, + "learning_rate": 9.67880184121765e-07, + "loss": 11.2452, + "step": 409370 + }, + { + "epoch": 0.8269735008100454, + "grad_norm": 515.4810180664062, + "learning_rate": 9.676737781165513e-07, + "loss": 16.0794, + "step": 409380 + }, + { + "epoch": 0.8269937014427292, + "grad_norm": 191.40391540527344, + "learning_rate": 9.674673917644072e-07, + "loss": 23.007, + "step": 409390 + }, + { + "epoch": 0.827013902075413, + "grad_norm": 231.99940490722656, + "learning_rate": 9.67261025066339e-07, + "loss": 21.2716, + "step": 409400 + }, + { + "epoch": 0.8270341027080969, + "grad_norm": 417.308837890625, + "learning_rate": 9.670546780233493e-07, + "loss": 16.8201, + "step": 409410 + }, + { + "epoch": 0.8270543033407807, + "grad_norm": 106.47950744628906, + "learning_rate": 9.668483506364462e-07, + "loss": 10.7087, + "step": 409420 + }, + { + "epoch": 0.8270745039734645, + "grad_norm": 244.87522888183594, + "learning_rate": 9.666420429066376e-07, + "loss": 14.4273, + "step": 409430 + }, + { + "epoch": 0.8270947046061483, + "grad_norm": 456.020263671875, + "learning_rate": 9.66435754834924e-07, + "loss": 18.7284, + "step": 409440 + }, + { + "epoch": 0.8271149052388321, + "grad_norm": 318.734130859375, + "learning_rate": 9.662294864223132e-07, + "loss": 23.9312, + "step": 409450 + }, + { + "epoch": 0.8271351058715158, + "grad_norm": 193.71820068359375, + "learning_rate": 9.66023237669812e-07, + "loss": 13.9561, + "step": 409460 + }, + { + "epoch": 0.8271553065041997, + "grad_norm": 279.223388671875, + "learning_rate": 9.658170085784242e-07, + "loss": 18.7789, + "step": 409470 + }, + { + "epoch": 0.8271755071368835, + "grad_norm": 574.6185302734375, + "learning_rate": 9.656107991491536e-07, + "loss": 19.9033, + "step": 409480 + }, + { + "epoch": 0.8271957077695673, + "grad_norm": 125.4744644165039, + "learning_rate": 9.654046093830084e-07, + "loss": 18.6923, + "step": 409490 + }, + { + "epoch": 0.8272159084022511, + "grad_norm": 112.13639831542969, + "learning_rate": 9.651984392809916e-07, + "loss": 10.577, + "step": 409500 + }, + { + "epoch": 0.8272361090349349, + "grad_norm": 221.3907012939453, + "learning_rate": 9.649922888441065e-07, + "loss": 18.3657, + "step": 409510 + }, + { + "epoch": 0.8272563096676188, + "grad_norm": 281.2359313964844, + "learning_rate": 9.647861580733603e-07, + "loss": 22.1341, + "step": 409520 + }, + { + "epoch": 0.8272765103003026, + "grad_norm": 614.3504028320312, + "learning_rate": 9.645800469697575e-07, + "loss": 24.8991, + "step": 409530 + }, + { + "epoch": 0.8272967109329864, + "grad_norm": 302.5175476074219, + "learning_rate": 9.64373955534303e-07, + "loss": 20.377, + "step": 409540 + }, + { + "epoch": 0.8273169115656702, + "grad_norm": 362.9315185546875, + "learning_rate": 9.641678837679985e-07, + "loss": 26.6605, + "step": 409550 + }, + { + "epoch": 0.827337112198354, + "grad_norm": 357.0892639160156, + "learning_rate": 9.639618316718519e-07, + "loss": 17.2189, + "step": 409560 + }, + { + "epoch": 0.8273573128310379, + "grad_norm": 231.19859313964844, + "learning_rate": 9.637557992468655e-07, + "loss": 13.5761, + "step": 409570 + }, + { + "epoch": 0.8273775134637217, + "grad_norm": 276.0561828613281, + "learning_rate": 9.635497864940425e-07, + "loss": 15.793, + "step": 409580 + }, + { + "epoch": 0.8273977140964055, + "grad_norm": 300.688720703125, + "learning_rate": 9.633437934143896e-07, + "loss": 13.512, + "step": 409590 + }, + { + "epoch": 0.8274179147290893, + "grad_norm": 255.2481231689453, + "learning_rate": 9.631378200089082e-07, + "loss": 33.7828, + "step": 409600 + }, + { + "epoch": 0.8274381153617731, + "grad_norm": 938.2775268554688, + "learning_rate": 9.629318662786047e-07, + "loss": 24.1358, + "step": 409610 + }, + { + "epoch": 0.827458315994457, + "grad_norm": 257.51910400390625, + "learning_rate": 9.62725932224482e-07, + "loss": 14.944, + "step": 409620 + }, + { + "epoch": 0.8274785166271408, + "grad_norm": 453.6352233886719, + "learning_rate": 9.62520017847542e-07, + "loss": 10.9707, + "step": 409630 + }, + { + "epoch": 0.8274987172598246, + "grad_norm": 374.6878356933594, + "learning_rate": 9.623141231487904e-07, + "loss": 20.3693, + "step": 409640 + }, + { + "epoch": 0.8275189178925084, + "grad_norm": 401.16461181640625, + "learning_rate": 9.621082481292309e-07, + "loss": 12.5384, + "step": 409650 + }, + { + "epoch": 0.8275391185251922, + "grad_norm": 433.9198303222656, + "learning_rate": 9.61902392789864e-07, + "loss": 25.6992, + "step": 409660 + }, + { + "epoch": 0.8275593191578761, + "grad_norm": 450.6440734863281, + "learning_rate": 9.616965571316956e-07, + "loss": 25.1214, + "step": 409670 + }, + { + "epoch": 0.8275795197905599, + "grad_norm": 409.1781311035156, + "learning_rate": 9.6149074115573e-07, + "loss": 18.633, + "step": 409680 + }, + { + "epoch": 0.8275997204232437, + "grad_norm": 516.9926147460938, + "learning_rate": 9.61284944862968e-07, + "loss": 21.039, + "step": 409690 + }, + { + "epoch": 0.8276199210559275, + "grad_norm": 377.6710205078125, + "learning_rate": 9.610791682544123e-07, + "loss": 5.7048, + "step": 409700 + }, + { + "epoch": 0.8276401216886113, + "grad_norm": 592.4374389648438, + "learning_rate": 9.608734113310685e-07, + "loss": 32.1657, + "step": 409710 + }, + { + "epoch": 0.827660322321295, + "grad_norm": 182.7586212158203, + "learning_rate": 9.606676740939375e-07, + "loss": 17.4946, + "step": 409720 + }, + { + "epoch": 0.8276805229539789, + "grad_norm": 0.0, + "learning_rate": 9.60461956544021e-07, + "loss": 10.775, + "step": 409730 + }, + { + "epoch": 0.8277007235866627, + "grad_norm": 240.7188262939453, + "learning_rate": 9.602562586823232e-07, + "loss": 55.8408, + "step": 409740 + }, + { + "epoch": 0.8277209242193465, + "grad_norm": 155.72105407714844, + "learning_rate": 9.600505805098486e-07, + "loss": 4.9667, + "step": 409750 + }, + { + "epoch": 0.8277411248520303, + "grad_norm": 138.11083984375, + "learning_rate": 9.59844922027595e-07, + "loss": 17.2876, + "step": 409760 + }, + { + "epoch": 0.8277613254847141, + "grad_norm": 406.36859130859375, + "learning_rate": 9.596392832365676e-07, + "loss": 17.6788, + "step": 409770 + }, + { + "epoch": 0.827781526117398, + "grad_norm": 396.15576171875, + "learning_rate": 9.594336641377695e-07, + "loss": 13.3846, + "step": 409780 + }, + { + "epoch": 0.8278017267500818, + "grad_norm": 513.1149291992188, + "learning_rate": 9.592280647322015e-07, + "loss": 26.491, + "step": 409790 + }, + { + "epoch": 0.8278219273827656, + "grad_norm": 1416.871826171875, + "learning_rate": 9.590224850208645e-07, + "loss": 35.7224, + "step": 409800 + }, + { + "epoch": 0.8278421280154494, + "grad_norm": 564.0831909179688, + "learning_rate": 9.588169250047624e-07, + "loss": 13.2908, + "step": 409810 + }, + { + "epoch": 0.8278623286481332, + "grad_norm": 514.4425659179688, + "learning_rate": 9.586113846848982e-07, + "loss": 17.1973, + "step": 409820 + }, + { + "epoch": 0.8278825292808171, + "grad_norm": 400.3626708984375, + "learning_rate": 9.584058640622702e-07, + "loss": 14.8681, + "step": 409830 + }, + { + "epoch": 0.8279027299135009, + "grad_norm": 304.7544250488281, + "learning_rate": 9.58200363137881e-07, + "loss": 18.7906, + "step": 409840 + }, + { + "epoch": 0.8279229305461847, + "grad_norm": 199.91624450683594, + "learning_rate": 9.57994881912735e-07, + "loss": 19.397, + "step": 409850 + }, + { + "epoch": 0.8279431311788685, + "grad_norm": 708.1905517578125, + "learning_rate": 9.577894203878313e-07, + "loss": 30.0399, + "step": 409860 + }, + { + "epoch": 0.8279633318115523, + "grad_norm": 168.64984130859375, + "learning_rate": 9.575839785641706e-07, + "loss": 36.4297, + "step": 409870 + }, + { + "epoch": 0.8279835324442362, + "grad_norm": 377.0751953125, + "learning_rate": 9.573785564427563e-07, + "loss": 9.8389, + "step": 409880 + }, + { + "epoch": 0.82800373307692, + "grad_norm": 375.81939697265625, + "learning_rate": 9.571731540245887e-07, + "loss": 16.9625, + "step": 409890 + }, + { + "epoch": 0.8280239337096038, + "grad_norm": 346.2193603515625, + "learning_rate": 9.569677713106673e-07, + "loss": 19.8607, + "step": 409900 + }, + { + "epoch": 0.8280441343422876, + "grad_norm": 308.16851806640625, + "learning_rate": 9.567624083019966e-07, + "loss": 22.3001, + "step": 409910 + }, + { + "epoch": 0.8280643349749714, + "grad_norm": 540.9521484375, + "learning_rate": 9.565570649995736e-07, + "loss": 23.195, + "step": 409920 + }, + { + "epoch": 0.8280845356076553, + "grad_norm": 7.390625476837158, + "learning_rate": 9.563517414044028e-07, + "loss": 11.8909, + "step": 409930 + }, + { + "epoch": 0.8281047362403391, + "grad_norm": 313.5946960449219, + "learning_rate": 9.561464375174827e-07, + "loss": 10.91, + "step": 409940 + }, + { + "epoch": 0.8281249368730229, + "grad_norm": 405.2649230957031, + "learning_rate": 9.559411533398139e-07, + "loss": 11.8811, + "step": 409950 + }, + { + "epoch": 0.8281451375057067, + "grad_norm": 400.70880126953125, + "learning_rate": 9.557358888723977e-07, + "loss": 11.1705, + "step": 409960 + }, + { + "epoch": 0.8281653381383905, + "grad_norm": 217.29420471191406, + "learning_rate": 9.555306441162337e-07, + "loss": 10.797, + "step": 409970 + }, + { + "epoch": 0.8281855387710743, + "grad_norm": 433.2265625, + "learning_rate": 9.553254190723239e-07, + "loss": 29.504, + "step": 409980 + }, + { + "epoch": 0.8282057394037581, + "grad_norm": 104.81578063964844, + "learning_rate": 9.55120213741666e-07, + "loss": 11.4437, + "step": 409990 + }, + { + "epoch": 0.8282259400364419, + "grad_norm": 110.06478881835938, + "learning_rate": 9.549150281252633e-07, + "loss": 14.5852, + "step": 410000 + }, + { + "epoch": 0.8282461406691257, + "grad_norm": 260.6749572753906, + "learning_rate": 9.54709862224114e-07, + "loss": 18.3852, + "step": 410010 + }, + { + "epoch": 0.8282663413018095, + "grad_norm": 134.43032836914062, + "learning_rate": 9.545047160392169e-07, + "loss": 13.3911, + "step": 410020 + }, + { + "epoch": 0.8282865419344934, + "grad_norm": 419.7550354003906, + "learning_rate": 9.54299589571574e-07, + "loss": 20.2757, + "step": 410030 + }, + { + "epoch": 0.8283067425671772, + "grad_norm": 461.2312316894531, + "learning_rate": 9.540944828221848e-07, + "loss": 16.329, + "step": 410040 + }, + { + "epoch": 0.828326943199861, + "grad_norm": 4.0432024002075195, + "learning_rate": 9.538893957920464e-07, + "loss": 6.0722, + "step": 410050 + }, + { + "epoch": 0.8283471438325448, + "grad_norm": 3.8514244556427, + "learning_rate": 9.536843284821612e-07, + "loss": 7.3329, + "step": 410060 + }, + { + "epoch": 0.8283673444652286, + "grad_norm": 149.021484375, + "learning_rate": 9.534792808935284e-07, + "loss": 10.045, + "step": 410070 + }, + { + "epoch": 0.8283875450979125, + "grad_norm": 102.64237213134766, + "learning_rate": 9.532742530271471e-07, + "loss": 13.7285, + "step": 410080 + }, + { + "epoch": 0.8284077457305963, + "grad_norm": 328.27374267578125, + "learning_rate": 9.530692448840151e-07, + "loss": 20.2738, + "step": 410090 + }, + { + "epoch": 0.8284279463632801, + "grad_norm": 646.1759643554688, + "learning_rate": 9.528642564651341e-07, + "loss": 32.3501, + "step": 410100 + }, + { + "epoch": 0.8284481469959639, + "grad_norm": 377.765380859375, + "learning_rate": 9.526592877715019e-07, + "loss": 15.0377, + "step": 410110 + }, + { + "epoch": 0.8284683476286477, + "grad_norm": 209.18014526367188, + "learning_rate": 9.524543388041157e-07, + "loss": 56.0616, + "step": 410120 + }, + { + "epoch": 0.8284885482613316, + "grad_norm": 128.7597198486328, + "learning_rate": 9.522494095639762e-07, + "loss": 13.2762, + "step": 410130 + }, + { + "epoch": 0.8285087488940154, + "grad_norm": 7.26552677154541, + "learning_rate": 9.52044500052084e-07, + "loss": 7.3488, + "step": 410140 + }, + { + "epoch": 0.8285289495266992, + "grad_norm": 275.5406188964844, + "learning_rate": 9.518396102694355e-07, + "loss": 33.0551, + "step": 410150 + }, + { + "epoch": 0.828549150159383, + "grad_norm": 394.4302978515625, + "learning_rate": 9.516347402170284e-07, + "loss": 19.0952, + "step": 410160 + }, + { + "epoch": 0.8285693507920668, + "grad_norm": 260.6647033691406, + "learning_rate": 9.514298898958641e-07, + "loss": 27.6525, + "step": 410170 + }, + { + "epoch": 0.8285895514247507, + "grad_norm": 422.53643798828125, + "learning_rate": 9.512250593069394e-07, + "loss": 11.8781, + "step": 410180 + }, + { + "epoch": 0.8286097520574345, + "grad_norm": 531.523193359375, + "learning_rate": 9.510202484512516e-07, + "loss": 12.3363, + "step": 410190 + }, + { + "epoch": 0.8286299526901183, + "grad_norm": 101.8055419921875, + "learning_rate": 9.508154573298012e-07, + "loss": 18.8377, + "step": 410200 + }, + { + "epoch": 0.8286501533228021, + "grad_norm": 139.24330139160156, + "learning_rate": 9.506106859435838e-07, + "loss": 15.0086, + "step": 410210 + }, + { + "epoch": 0.8286703539554859, + "grad_norm": 263.025146484375, + "learning_rate": 9.504059342936001e-07, + "loss": 19.079, + "step": 410220 + }, + { + "epoch": 0.8286905545881696, + "grad_norm": 224.90972900390625, + "learning_rate": 9.502012023808466e-07, + "loss": 22.5412, + "step": 410230 + }, + { + "epoch": 0.8287107552208535, + "grad_norm": 428.4664611816406, + "learning_rate": 9.499964902063203e-07, + "loss": 15.266, + "step": 410240 + }, + { + "epoch": 0.8287309558535373, + "grad_norm": 381.9537658691406, + "learning_rate": 9.497917977710208e-07, + "loss": 22.7627, + "step": 410250 + }, + { + "epoch": 0.8287511564862211, + "grad_norm": 201.21485900878906, + "learning_rate": 9.495871250759437e-07, + "loss": 17.1629, + "step": 410260 + }, + { + "epoch": 0.8287713571189049, + "grad_norm": 417.35638427734375, + "learning_rate": 9.49382472122089e-07, + "loss": 20.4261, + "step": 410270 + }, + { + "epoch": 0.8287915577515887, + "grad_norm": 74.46168518066406, + "learning_rate": 9.491778389104511e-07, + "loss": 81.7224, + "step": 410280 + }, + { + "epoch": 0.8288117583842726, + "grad_norm": 80.65632629394531, + "learning_rate": 9.489732254420315e-07, + "loss": 12.7389, + "step": 410290 + }, + { + "epoch": 0.8288319590169564, + "grad_norm": 187.4225311279297, + "learning_rate": 9.487686317178241e-07, + "loss": 10.1137, + "step": 410300 + }, + { + "epoch": 0.8288521596496402, + "grad_norm": 145.52528381347656, + "learning_rate": 9.48564057738826e-07, + "loss": 16.1393, + "step": 410310 + }, + { + "epoch": 0.828872360282324, + "grad_norm": 100.24214172363281, + "learning_rate": 9.483595035060367e-07, + "loss": 10.1025, + "step": 410320 + }, + { + "epoch": 0.8288925609150078, + "grad_norm": 383.0196533203125, + "learning_rate": 9.481549690204517e-07, + "loss": 14.8997, + "step": 410330 + }, + { + "epoch": 0.8289127615476917, + "grad_norm": 258.4500427246094, + "learning_rate": 9.47950454283067e-07, + "loss": 24.6888, + "step": 410340 + }, + { + "epoch": 0.8289329621803755, + "grad_norm": 315.6438293457031, + "learning_rate": 9.477459592948796e-07, + "loss": 29.4249, + "step": 410350 + }, + { + "epoch": 0.8289531628130593, + "grad_norm": 337.36968994140625, + "learning_rate": 9.475414840568903e-07, + "loss": 13.5003, + "step": 410360 + }, + { + "epoch": 0.8289733634457431, + "grad_norm": 419.51239013671875, + "learning_rate": 9.473370285700889e-07, + "loss": 19.1721, + "step": 410370 + }, + { + "epoch": 0.828993564078427, + "grad_norm": 99.75167083740234, + "learning_rate": 9.471325928354758e-07, + "loss": 19.118, + "step": 410380 + }, + { + "epoch": 0.8290137647111108, + "grad_norm": 387.23175048828125, + "learning_rate": 9.469281768540484e-07, + "loss": 22.0007, + "step": 410390 + }, + { + "epoch": 0.8290339653437946, + "grad_norm": 287.00994873046875, + "learning_rate": 9.467237806268009e-07, + "loss": 13.1687, + "step": 410400 + }, + { + "epoch": 0.8290541659764784, + "grad_norm": 621.3529052734375, + "learning_rate": 9.465194041547294e-07, + "loss": 19.163, + "step": 410410 + }, + { + "epoch": 0.8290743666091622, + "grad_norm": 392.2379455566406, + "learning_rate": 9.463150474388305e-07, + "loss": 23.21, + "step": 410420 + }, + { + "epoch": 0.829094567241846, + "grad_norm": 42.52976989746094, + "learning_rate": 9.461107104801026e-07, + "loss": 14.2209, + "step": 410430 + }, + { + "epoch": 0.8291147678745299, + "grad_norm": 2.3006131649017334, + "learning_rate": 9.45906393279537e-07, + "loss": 14.5232, + "step": 410440 + }, + { + "epoch": 0.8291349685072137, + "grad_norm": 663.354248046875, + "learning_rate": 9.457020958381324e-07, + "loss": 22.7271, + "step": 410450 + }, + { + "epoch": 0.8291551691398975, + "grad_norm": 302.34716796875, + "learning_rate": 9.454978181568847e-07, + "loss": 10.4355, + "step": 410460 + }, + { + "epoch": 0.8291753697725813, + "grad_norm": 488.5825500488281, + "learning_rate": 9.452935602367897e-07, + "loss": 15.7274, + "step": 410470 + }, + { + "epoch": 0.8291955704052651, + "grad_norm": 405.4479064941406, + "learning_rate": 9.450893220788399e-07, + "loss": 18.9441, + "step": 410480 + }, + { + "epoch": 0.8292157710379489, + "grad_norm": 227.84194946289062, + "learning_rate": 9.448851036840345e-07, + "loss": 25.4943, + "step": 410490 + }, + { + "epoch": 0.8292359716706327, + "grad_norm": 303.5299072265625, + "learning_rate": 9.446809050533679e-07, + "loss": 10.6827, + "step": 410500 + }, + { + "epoch": 0.8292561723033165, + "grad_norm": 159.5272979736328, + "learning_rate": 9.444767261878329e-07, + "loss": 22.9927, + "step": 410510 + }, + { + "epoch": 0.8292763729360003, + "grad_norm": 342.5478515625, + "learning_rate": 9.442725670884278e-07, + "loss": 13.5177, + "step": 410520 + }, + { + "epoch": 0.8292965735686841, + "grad_norm": 397.901611328125, + "learning_rate": 9.440684277561452e-07, + "loss": 17.6795, + "step": 410530 + }, + { + "epoch": 0.829316774201368, + "grad_norm": 678.2149047851562, + "learning_rate": 9.438643081919818e-07, + "loss": 36.0053, + "step": 410540 + }, + { + "epoch": 0.8293369748340518, + "grad_norm": 976.1620483398438, + "learning_rate": 9.436602083969326e-07, + "loss": 37.0097, + "step": 410550 + }, + { + "epoch": 0.8293571754667356, + "grad_norm": 924.8463745117188, + "learning_rate": 9.434561283719901e-07, + "loss": 21.1344, + "step": 410560 + }, + { + "epoch": 0.8293773760994194, + "grad_norm": 47.7503662109375, + "learning_rate": 9.432520681181512e-07, + "loss": 13.8507, + "step": 410570 + }, + { + "epoch": 0.8293975767321032, + "grad_norm": 175.39149475097656, + "learning_rate": 9.430480276364091e-07, + "loss": 18.1541, + "step": 410580 + }, + { + "epoch": 0.8294177773647871, + "grad_norm": 304.0611572265625, + "learning_rate": 9.428440069277595e-07, + "loss": 14.9955, + "step": 410590 + }, + { + "epoch": 0.8294379779974709, + "grad_norm": 15.601140022277832, + "learning_rate": 9.426400059931956e-07, + "loss": 17.8279, + "step": 410600 + }, + { + "epoch": 0.8294581786301547, + "grad_norm": 668.298095703125, + "learning_rate": 9.424360248337128e-07, + "loss": 29.1598, + "step": 410610 + }, + { + "epoch": 0.8294783792628385, + "grad_norm": 408.1661376953125, + "learning_rate": 9.422320634503052e-07, + "loss": 15.3479, + "step": 410620 + }, + { + "epoch": 0.8294985798955223, + "grad_norm": 19.777023315429688, + "learning_rate": 9.420281218439648e-07, + "loss": 16.2468, + "step": 410630 + }, + { + "epoch": 0.8295187805282062, + "grad_norm": 189.5535430908203, + "learning_rate": 9.418242000156886e-07, + "loss": 21.9398, + "step": 410640 + }, + { + "epoch": 0.82953898116089, + "grad_norm": 283.3282470703125, + "learning_rate": 9.41620297966469e-07, + "loss": 9.5893, + "step": 410650 + }, + { + "epoch": 0.8295591817935738, + "grad_norm": 670.09765625, + "learning_rate": 9.414164156972982e-07, + "loss": 24.5431, + "step": 410660 + }, + { + "epoch": 0.8295793824262576, + "grad_norm": 159.86378479003906, + "learning_rate": 9.41212553209172e-07, + "loss": 18.8697, + "step": 410670 + }, + { + "epoch": 0.8295995830589414, + "grad_norm": 290.81488037109375, + "learning_rate": 9.410087105030846e-07, + "loss": 21.4635, + "step": 410680 + }, + { + "epoch": 0.8296197836916253, + "grad_norm": 313.0645751953125, + "learning_rate": 9.408048875800286e-07, + "loss": 10.5297, + "step": 410690 + }, + { + "epoch": 0.8296399843243091, + "grad_norm": 242.60292053222656, + "learning_rate": 9.406010844409957e-07, + "loss": 12.4543, + "step": 410700 + }, + { + "epoch": 0.8296601849569929, + "grad_norm": 7.086318492889404, + "learning_rate": 9.403973010869826e-07, + "loss": 10.9747, + "step": 410710 + }, + { + "epoch": 0.8296803855896767, + "grad_norm": 358.39837646484375, + "learning_rate": 9.401935375189802e-07, + "loss": 13.273, + "step": 410720 + }, + { + "epoch": 0.8297005862223605, + "grad_norm": 1072.34130859375, + "learning_rate": 9.39989793737981e-07, + "loss": 26.2195, + "step": 410730 + }, + { + "epoch": 0.8297207868550442, + "grad_norm": 271.1131896972656, + "learning_rate": 9.39786069744979e-07, + "loss": 10.2916, + "step": 410740 + }, + { + "epoch": 0.8297409874877281, + "grad_norm": 144.45306396484375, + "learning_rate": 9.395823655409686e-07, + "loss": 13.8853, + "step": 410750 + }, + { + "epoch": 0.8297611881204119, + "grad_norm": 406.6188049316406, + "learning_rate": 9.393786811269418e-07, + "loss": 28.0983, + "step": 410760 + }, + { + "epoch": 0.8297813887530957, + "grad_norm": 303.7313537597656, + "learning_rate": 9.391750165038887e-07, + "loss": 31.1664, + "step": 410770 + }, + { + "epoch": 0.8298015893857795, + "grad_norm": 591.87841796875, + "learning_rate": 9.38971371672806e-07, + "loss": 11.3182, + "step": 410780 + }, + { + "epoch": 0.8298217900184633, + "grad_norm": 314.4297790527344, + "learning_rate": 9.387677466346839e-07, + "loss": 13.0558, + "step": 410790 + }, + { + "epoch": 0.8298419906511472, + "grad_norm": 261.5039978027344, + "learning_rate": 9.385641413905139e-07, + "loss": 10.7185, + "step": 410800 + }, + { + "epoch": 0.829862191283831, + "grad_norm": 632.2877807617188, + "learning_rate": 9.383605559412911e-07, + "loss": 23.6123, + "step": 410810 + }, + { + "epoch": 0.8298823919165148, + "grad_norm": 315.0278015136719, + "learning_rate": 9.38156990288005e-07, + "loss": 17.0746, + "step": 410820 + }, + { + "epoch": 0.8299025925491986, + "grad_norm": 507.8117980957031, + "learning_rate": 9.379534444316507e-07, + "loss": 25.6956, + "step": 410830 + }, + { + "epoch": 0.8299227931818824, + "grad_norm": 389.24224853515625, + "learning_rate": 9.37749918373218e-07, + "loss": 8.4088, + "step": 410840 + }, + { + "epoch": 0.8299429938145663, + "grad_norm": 223.9938507080078, + "learning_rate": 9.375464121136984e-07, + "loss": 11.5526, + "step": 410850 + }, + { + "epoch": 0.8299631944472501, + "grad_norm": 231.66468811035156, + "learning_rate": 9.373429256540866e-07, + "loss": 21.1557, + "step": 410860 + }, + { + "epoch": 0.8299833950799339, + "grad_norm": 133.00701904296875, + "learning_rate": 9.371394589953714e-07, + "loss": 12.3585, + "step": 410870 + }, + { + "epoch": 0.8300035957126177, + "grad_norm": 711.957275390625, + "learning_rate": 9.369360121385463e-07, + "loss": 20.4118, + "step": 410880 + }, + { + "epoch": 0.8300237963453015, + "grad_norm": 536.5179443359375, + "learning_rate": 9.367325850846015e-07, + "loss": 12.788, + "step": 410890 + }, + { + "epoch": 0.8300439969779854, + "grad_norm": 569.9052124023438, + "learning_rate": 9.365291778345303e-07, + "loss": 16.8838, + "step": 410900 + }, + { + "epoch": 0.8300641976106692, + "grad_norm": 399.3359375, + "learning_rate": 9.363257903893235e-07, + "loss": 16.0271, + "step": 410910 + }, + { + "epoch": 0.830084398243353, + "grad_norm": 554.6058959960938, + "learning_rate": 9.361224227499704e-07, + "loss": 17.2537, + "step": 410920 + }, + { + "epoch": 0.8301045988760368, + "grad_norm": 352.2273864746094, + "learning_rate": 9.359190749174645e-07, + "loss": 35.0574, + "step": 410930 + }, + { + "epoch": 0.8301247995087206, + "grad_norm": 416.0232849121094, + "learning_rate": 9.357157468927969e-07, + "loss": 25.0878, + "step": 410940 + }, + { + "epoch": 0.8301450001414045, + "grad_norm": 665.92626953125, + "learning_rate": 9.355124386769559e-07, + "loss": 20.1041, + "step": 410950 + }, + { + "epoch": 0.8301652007740883, + "grad_norm": 138.0343475341797, + "learning_rate": 9.353091502709349e-07, + "loss": 21.9213, + "step": 410960 + }, + { + "epoch": 0.8301854014067721, + "grad_norm": 165.73696899414062, + "learning_rate": 9.351058816757264e-07, + "loss": 12.9137, + "step": 410970 + }, + { + "epoch": 0.8302056020394559, + "grad_norm": 386.0030212402344, + "learning_rate": 9.349026328923161e-07, + "loss": 16.0946, + "step": 410980 + }, + { + "epoch": 0.8302258026721397, + "grad_norm": 408.11669921875, + "learning_rate": 9.346994039216972e-07, + "loss": 11.2878, + "step": 410990 + }, + { + "epoch": 0.8302460033048235, + "grad_norm": 404.0555419921875, + "learning_rate": 9.344961947648624e-07, + "loss": 6.5853, + "step": 411000 + }, + { + "epoch": 0.8302662039375073, + "grad_norm": 180.45965576171875, + "learning_rate": 9.342930054227994e-07, + "loss": 17.7546, + "step": 411010 + }, + { + "epoch": 0.8302864045701911, + "grad_norm": 379.35809326171875, + "learning_rate": 9.340898358964978e-07, + "loss": 11.1587, + "step": 411020 + }, + { + "epoch": 0.8303066052028749, + "grad_norm": 179.00009155273438, + "learning_rate": 9.338866861869494e-07, + "loss": 23.9938, + "step": 411030 + }, + { + "epoch": 0.8303268058355587, + "grad_norm": 211.85879516601562, + "learning_rate": 9.336835562951468e-07, + "loss": 8.7912, + "step": 411040 + }, + { + "epoch": 0.8303470064682426, + "grad_norm": 167.1537628173828, + "learning_rate": 9.334804462220748e-07, + "loss": 14.0006, + "step": 411050 + }, + { + "epoch": 0.8303672071009264, + "grad_norm": 565.4570922851562, + "learning_rate": 9.332773559687258e-07, + "loss": 22.9532, + "step": 411060 + }, + { + "epoch": 0.8303874077336102, + "grad_norm": 299.7813720703125, + "learning_rate": 9.330742855360914e-07, + "loss": 24.9923, + "step": 411070 + }, + { + "epoch": 0.830407608366294, + "grad_norm": 625.1273193359375, + "learning_rate": 9.32871234925159e-07, + "loss": 21.7001, + "step": 411080 + }, + { + "epoch": 0.8304278089989778, + "grad_norm": 334.21612548828125, + "learning_rate": 9.326682041369178e-07, + "loss": 19.3692, + "step": 411090 + }, + { + "epoch": 0.8304480096316617, + "grad_norm": 416.40594482421875, + "learning_rate": 9.3246519317236e-07, + "loss": 12.3695, + "step": 411100 + }, + { + "epoch": 0.8304682102643455, + "grad_norm": 254.921630859375, + "learning_rate": 9.322622020324734e-07, + "loss": 17.0086, + "step": 411110 + }, + { + "epoch": 0.8304884108970293, + "grad_norm": 12.822563171386719, + "learning_rate": 9.320592307182463e-07, + "loss": 27.2341, + "step": 411120 + }, + { + "epoch": 0.8305086115297131, + "grad_norm": 325.56964111328125, + "learning_rate": 9.318562792306707e-07, + "loss": 16.0775, + "step": 411130 + }, + { + "epoch": 0.8305288121623969, + "grad_norm": 686.0072631835938, + "learning_rate": 9.316533475707324e-07, + "loss": 14.179, + "step": 411140 + }, + { + "epoch": 0.8305490127950808, + "grad_norm": 207.0223388671875, + "learning_rate": 9.314504357394233e-07, + "loss": 18.4516, + "step": 411150 + }, + { + "epoch": 0.8305692134277646, + "grad_norm": 388.1821594238281, + "learning_rate": 9.312475437377322e-07, + "loss": 21.1226, + "step": 411160 + }, + { + "epoch": 0.8305894140604484, + "grad_norm": 361.5314636230469, + "learning_rate": 9.310446715666449e-07, + "loss": 15.0574, + "step": 411170 + }, + { + "epoch": 0.8306096146931322, + "grad_norm": 407.1485290527344, + "learning_rate": 9.30841819227154e-07, + "loss": 15.617, + "step": 411180 + }, + { + "epoch": 0.830629815325816, + "grad_norm": 270.5536804199219, + "learning_rate": 9.306389867202454e-07, + "loss": 18.7332, + "step": 411190 + }, + { + "epoch": 0.8306500159584999, + "grad_norm": 639.2105712890625, + "learning_rate": 9.304361740469103e-07, + "loss": 15.6452, + "step": 411200 + }, + { + "epoch": 0.8306702165911837, + "grad_norm": 461.1962585449219, + "learning_rate": 9.302333812081338e-07, + "loss": 22.9837, + "step": 411210 + }, + { + "epoch": 0.8306904172238675, + "grad_norm": 152.81753540039062, + "learning_rate": 9.300306082049082e-07, + "loss": 16.5258, + "step": 411220 + }, + { + "epoch": 0.8307106178565513, + "grad_norm": 199.67291259765625, + "learning_rate": 9.298278550382189e-07, + "loss": 6.3796, + "step": 411230 + }, + { + "epoch": 0.8307308184892351, + "grad_norm": 280.73388671875, + "learning_rate": 9.296251217090546e-07, + "loss": 18.1557, + "step": 411240 + }, + { + "epoch": 0.8307510191219188, + "grad_norm": 253.43238830566406, + "learning_rate": 9.294224082184045e-07, + "loss": 22.3909, + "step": 411250 + }, + { + "epoch": 0.8307712197546027, + "grad_norm": 312.5126647949219, + "learning_rate": 9.29219714567256e-07, + "loss": 7.3432, + "step": 411260 + }, + { + "epoch": 0.8307914203872865, + "grad_norm": 192.78823852539062, + "learning_rate": 9.290170407565957e-07, + "loss": 15.9798, + "step": 411270 + }, + { + "epoch": 0.8308116210199703, + "grad_norm": 375.00958251953125, + "learning_rate": 9.288143867874127e-07, + "loss": 23.6673, + "step": 411280 + }, + { + "epoch": 0.8308318216526541, + "grad_norm": 290.5165100097656, + "learning_rate": 9.286117526606958e-07, + "loss": 15.3995, + "step": 411290 + }, + { + "epoch": 0.8308520222853379, + "grad_norm": 379.6672668457031, + "learning_rate": 9.284091383774313e-07, + "loss": 16.1637, + "step": 411300 + }, + { + "epoch": 0.8308722229180218, + "grad_norm": 318.8584899902344, + "learning_rate": 9.282065439386057e-07, + "loss": 17.6491, + "step": 411310 + }, + { + "epoch": 0.8308924235507056, + "grad_norm": 13.372910499572754, + "learning_rate": 9.280039693452086e-07, + "loss": 9.7132, + "step": 411320 + }, + { + "epoch": 0.8309126241833894, + "grad_norm": 274.3443603515625, + "learning_rate": 9.278014145982261e-07, + "loss": 11.666, + "step": 411330 + }, + { + "epoch": 0.8309328248160732, + "grad_norm": 213.70191955566406, + "learning_rate": 9.275988796986451e-07, + "loss": 16.9673, + "step": 411340 + }, + { + "epoch": 0.830953025448757, + "grad_norm": 491.6261291503906, + "learning_rate": 9.273963646474527e-07, + "loss": 16.951, + "step": 411350 + }, + { + "epoch": 0.8309732260814409, + "grad_norm": 42.702850341796875, + "learning_rate": 9.271938694456378e-07, + "loss": 15.4695, + "step": 411360 + }, + { + "epoch": 0.8309934267141247, + "grad_norm": 469.57257080078125, + "learning_rate": 9.26991394094186e-07, + "loss": 22.5239, + "step": 411370 + }, + { + "epoch": 0.8310136273468085, + "grad_norm": 788.18017578125, + "learning_rate": 9.267889385940826e-07, + "loss": 33.9399, + "step": 411380 + }, + { + "epoch": 0.8310338279794923, + "grad_norm": 29.37600326538086, + "learning_rate": 9.265865029463178e-07, + "loss": 24.9759, + "step": 411390 + }, + { + "epoch": 0.8310540286121761, + "grad_norm": 793.1854858398438, + "learning_rate": 9.263840871518759e-07, + "loss": 28.1995, + "step": 411400 + }, + { + "epoch": 0.83107422924486, + "grad_norm": 246.13809204101562, + "learning_rate": 9.261816912117428e-07, + "loss": 25.9248, + "step": 411410 + }, + { + "epoch": 0.8310944298775438, + "grad_norm": 212.0892791748047, + "learning_rate": 9.259793151269075e-07, + "loss": 23.8625, + "step": 411420 + }, + { + "epoch": 0.8311146305102276, + "grad_norm": 0.5332641005516052, + "learning_rate": 9.257769588983533e-07, + "loss": 21.5472, + "step": 411430 + }, + { + "epoch": 0.8311348311429114, + "grad_norm": 309.0165100097656, + "learning_rate": 9.255746225270689e-07, + "loss": 15.9197, + "step": 411440 + }, + { + "epoch": 0.8311550317755952, + "grad_norm": 60.359275817871094, + "learning_rate": 9.253723060140407e-07, + "loss": 11.6712, + "step": 411450 + }, + { + "epoch": 0.8311752324082791, + "grad_norm": 333.2156066894531, + "learning_rate": 9.251700093602517e-07, + "loss": 29.5264, + "step": 411460 + }, + { + "epoch": 0.8311954330409629, + "grad_norm": 198.3731689453125, + "learning_rate": 9.249677325666912e-07, + "loss": 16.8443, + "step": 411470 + }, + { + "epoch": 0.8312156336736467, + "grad_norm": 412.7635803222656, + "learning_rate": 9.247654756343427e-07, + "loss": 25.6259, + "step": 411480 + }, + { + "epoch": 0.8312358343063305, + "grad_norm": 172.53404235839844, + "learning_rate": 9.24563238564194e-07, + "loss": 14.6385, + "step": 411490 + }, + { + "epoch": 0.8312560349390143, + "grad_norm": 399.63623046875, + "learning_rate": 9.243610213572285e-07, + "loss": 25.3241, + "step": 411500 + }, + { + "epoch": 0.831276235571698, + "grad_norm": 438.2187805175781, + "learning_rate": 9.241588240144345e-07, + "loss": 24.2764, + "step": 411510 + }, + { + "epoch": 0.8312964362043819, + "grad_norm": 141.4309844970703, + "learning_rate": 9.23956646536796e-07, + "loss": 25.2009, + "step": 411520 + }, + { + "epoch": 0.8313166368370657, + "grad_norm": 895.9459228515625, + "learning_rate": 9.237544889252969e-07, + "loss": 19.0765, + "step": 411530 + }, + { + "epoch": 0.8313368374697495, + "grad_norm": 254.4736328125, + "learning_rate": 9.235523511809258e-07, + "loss": 15.787, + "step": 411540 + }, + { + "epoch": 0.8313570381024333, + "grad_norm": 673.9073486328125, + "learning_rate": 9.233502333046662e-07, + "loss": 18.7821, + "step": 411550 + }, + { + "epoch": 0.8313772387351172, + "grad_norm": 572.1876220703125, + "learning_rate": 9.231481352975014e-07, + "loss": 16.1685, + "step": 411560 + }, + { + "epoch": 0.831397439367801, + "grad_norm": 255.88951110839844, + "learning_rate": 9.229460571604182e-07, + "loss": 32.6987, + "step": 411570 + }, + { + "epoch": 0.8314176400004848, + "grad_norm": 721.99560546875, + "learning_rate": 9.227439988944042e-07, + "loss": 19.6822, + "step": 411580 + }, + { + "epoch": 0.8314378406331686, + "grad_norm": 574.9505615234375, + "learning_rate": 9.225419605004387e-07, + "loss": 32.5775, + "step": 411590 + }, + { + "epoch": 0.8314580412658524, + "grad_norm": 426.6134338378906, + "learning_rate": 9.223399419795093e-07, + "loss": 35.3379, + "step": 411600 + }, + { + "epoch": 0.8314782418985363, + "grad_norm": 1008.5599365234375, + "learning_rate": 9.221379433326017e-07, + "loss": 29.3601, + "step": 411610 + }, + { + "epoch": 0.8314984425312201, + "grad_norm": 346.8797912597656, + "learning_rate": 9.21935964560699e-07, + "loss": 17.456, + "step": 411620 + }, + { + "epoch": 0.8315186431639039, + "grad_norm": 47.59032440185547, + "learning_rate": 9.217340056647844e-07, + "loss": 10.1423, + "step": 411630 + }, + { + "epoch": 0.8315388437965877, + "grad_norm": 1047.11083984375, + "learning_rate": 9.215320666458438e-07, + "loss": 23.1623, + "step": 411640 + }, + { + "epoch": 0.8315590444292715, + "grad_norm": 1145.93212890625, + "learning_rate": 9.213301475048642e-07, + "loss": 25.3491, + "step": 411650 + }, + { + "epoch": 0.8315792450619554, + "grad_norm": 226.3491973876953, + "learning_rate": 9.211282482428241e-07, + "loss": 10.8633, + "step": 411660 + }, + { + "epoch": 0.8315994456946392, + "grad_norm": 306.0322570800781, + "learning_rate": 9.209263688607095e-07, + "loss": 20.6406, + "step": 411670 + }, + { + "epoch": 0.831619646327323, + "grad_norm": 467.8283386230469, + "learning_rate": 9.207245093595068e-07, + "loss": 11.8403, + "step": 411680 + }, + { + "epoch": 0.8316398469600068, + "grad_norm": 48.53239440917969, + "learning_rate": 9.205226697401981e-07, + "loss": 19.3827, + "step": 411690 + }, + { + "epoch": 0.8316600475926906, + "grad_norm": 256.9255676269531, + "learning_rate": 9.203208500037664e-07, + "loss": 12.6553, + "step": 411700 + }, + { + "epoch": 0.8316802482253745, + "grad_norm": 23.489212036132812, + "learning_rate": 9.201190501511964e-07, + "loss": 11.7247, + "step": 411710 + }, + { + "epoch": 0.8317004488580583, + "grad_norm": 510.6328430175781, + "learning_rate": 9.199172701834718e-07, + "loss": 15.9846, + "step": 411720 + }, + { + "epoch": 0.8317206494907421, + "grad_norm": 319.6410217285156, + "learning_rate": 9.197155101015742e-07, + "loss": 12.1113, + "step": 411730 + }, + { + "epoch": 0.8317408501234259, + "grad_norm": 601.1150512695312, + "learning_rate": 9.195137699064899e-07, + "loss": 16.7689, + "step": 411740 + }, + { + "epoch": 0.8317610507561097, + "grad_norm": 377.2607116699219, + "learning_rate": 9.193120495991986e-07, + "loss": 24.6712, + "step": 411750 + }, + { + "epoch": 0.8317812513887936, + "grad_norm": 795.7567749023438, + "learning_rate": 9.191103491806875e-07, + "loss": 30.6676, + "step": 411760 + }, + { + "epoch": 0.8318014520214773, + "grad_norm": 157.62258911132812, + "learning_rate": 9.189086686519361e-07, + "loss": 10.8706, + "step": 411770 + }, + { + "epoch": 0.8318216526541611, + "grad_norm": 531.1155395507812, + "learning_rate": 9.187070080139299e-07, + "loss": 16.4465, + "step": 411780 + }, + { + "epoch": 0.8318418532868449, + "grad_norm": 143.78717041015625, + "learning_rate": 9.185053672676508e-07, + "loss": 17.1793, + "step": 411790 + }, + { + "epoch": 0.8318620539195287, + "grad_norm": 249.1612548828125, + "learning_rate": 9.183037464140804e-07, + "loss": 44.7882, + "step": 411800 + }, + { + "epoch": 0.8318822545522125, + "grad_norm": 329.79327392578125, + "learning_rate": 9.181021454542033e-07, + "loss": 14.9824, + "step": 411810 + }, + { + "epoch": 0.8319024551848964, + "grad_norm": 186.31150817871094, + "learning_rate": 9.179005643890005e-07, + "loss": 16.0255, + "step": 411820 + }, + { + "epoch": 0.8319226558175802, + "grad_norm": 196.14999389648438, + "learning_rate": 9.176990032194566e-07, + "loss": 17.4042, + "step": 411830 + }, + { + "epoch": 0.831942856450264, + "grad_norm": 456.0150451660156, + "learning_rate": 9.174974619465521e-07, + "loss": 17.0844, + "step": 411840 + }, + { + "epoch": 0.8319630570829478, + "grad_norm": 132.55392456054688, + "learning_rate": 9.17295940571269e-07, + "loss": 19.5425, + "step": 411850 + }, + { + "epoch": 0.8319832577156316, + "grad_norm": 446.3443908691406, + "learning_rate": 9.170944390945918e-07, + "loss": 19.4433, + "step": 411860 + }, + { + "epoch": 0.8320034583483155, + "grad_norm": 6.196475505828857, + "learning_rate": 9.168929575175006e-07, + "loss": 20.4936, + "step": 411870 + }, + { + "epoch": 0.8320236589809993, + "grad_norm": 210.97549438476562, + "learning_rate": 9.166914958409767e-07, + "loss": 21.8871, + "step": 411880 + }, + { + "epoch": 0.8320438596136831, + "grad_norm": 47.52590560913086, + "learning_rate": 9.164900540660032e-07, + "loss": 13.5703, + "step": 411890 + }, + { + "epoch": 0.8320640602463669, + "grad_norm": 397.4734802246094, + "learning_rate": 9.162886321935632e-07, + "loss": 12.2626, + "step": 411900 + }, + { + "epoch": 0.8320842608790507, + "grad_norm": 859.37890625, + "learning_rate": 9.160872302246376e-07, + "loss": 20.3554, + "step": 411910 + }, + { + "epoch": 0.8321044615117346, + "grad_norm": 255.53822326660156, + "learning_rate": 9.158858481602057e-07, + "loss": 9.5881, + "step": 411920 + }, + { + "epoch": 0.8321246621444184, + "grad_norm": 338.6055908203125, + "learning_rate": 9.15684486001252e-07, + "loss": 24.1861, + "step": 411930 + }, + { + "epoch": 0.8321448627771022, + "grad_norm": 633.619384765625, + "learning_rate": 9.154831437487571e-07, + "loss": 34.5775, + "step": 411940 + }, + { + "epoch": 0.832165063409786, + "grad_norm": 275.36859130859375, + "learning_rate": 9.152818214037007e-07, + "loss": 35.7982, + "step": 411950 + }, + { + "epoch": 0.8321852640424698, + "grad_norm": 360.8341064453125, + "learning_rate": 9.150805189670653e-07, + "loss": 17.0425, + "step": 411960 + }, + { + "epoch": 0.8322054646751537, + "grad_norm": 310.9940185546875, + "learning_rate": 9.148792364398328e-07, + "loss": 12.6153, + "step": 411970 + }, + { + "epoch": 0.8322256653078375, + "grad_norm": 545.13232421875, + "learning_rate": 9.146779738229838e-07, + "loss": 33.1984, + "step": 411980 + }, + { + "epoch": 0.8322458659405213, + "grad_norm": 190.50120544433594, + "learning_rate": 9.144767311174979e-07, + "loss": 43.5754, + "step": 411990 + }, + { + "epoch": 0.8322660665732051, + "grad_norm": 486.85797119140625, + "learning_rate": 9.142755083243577e-07, + "loss": 26.0594, + "step": 412000 + }, + { + "epoch": 0.8322862672058889, + "grad_norm": 248.93751525878906, + "learning_rate": 9.140743054445434e-07, + "loss": 26.355, + "step": 412010 + }, + { + "epoch": 0.8323064678385727, + "grad_norm": 360.4058837890625, + "learning_rate": 9.138731224790337e-07, + "loss": 18.8557, + "step": 412020 + }, + { + "epoch": 0.8323266684712565, + "grad_norm": 476.5465087890625, + "learning_rate": 9.136719594288124e-07, + "loss": 23.4403, + "step": 412030 + }, + { + "epoch": 0.8323468691039403, + "grad_norm": 254.30259704589844, + "learning_rate": 9.134708162948575e-07, + "loss": 19.2192, + "step": 412040 + }, + { + "epoch": 0.8323670697366241, + "grad_norm": 320.5162048339844, + "learning_rate": 9.132696930781509e-07, + "loss": 19.1303, + "step": 412050 + }, + { + "epoch": 0.8323872703693079, + "grad_norm": 368.6312561035156, + "learning_rate": 9.130685897796721e-07, + "loss": 20.132, + "step": 412060 + }, + { + "epoch": 0.8324074710019918, + "grad_norm": 410.7149963378906, + "learning_rate": 9.128675064004006e-07, + "loss": 15.2506, + "step": 412070 + }, + { + "epoch": 0.8324276716346756, + "grad_norm": 75.53740692138672, + "learning_rate": 9.126664429413179e-07, + "loss": 16.6689, + "step": 412080 + }, + { + "epoch": 0.8324478722673594, + "grad_norm": 356.1733093261719, + "learning_rate": 9.124653994034022e-07, + "loss": 17.0235, + "step": 412090 + }, + { + "epoch": 0.8324680729000432, + "grad_norm": 258.3155822753906, + "learning_rate": 9.122643757876354e-07, + "loss": 7.5383, + "step": 412100 + }, + { + "epoch": 0.832488273532727, + "grad_norm": 229.26480102539062, + "learning_rate": 9.120633720949951e-07, + "loss": 22.1528, + "step": 412110 + }, + { + "epoch": 0.8325084741654108, + "grad_norm": 417.92156982421875, + "learning_rate": 9.118623883264633e-07, + "loss": 16.4424, + "step": 412120 + }, + { + "epoch": 0.8325286747980947, + "grad_norm": 139.851806640625, + "learning_rate": 9.116614244830186e-07, + "loss": 12.0887, + "step": 412130 + }, + { + "epoch": 0.8325488754307785, + "grad_norm": 849.2883911132812, + "learning_rate": 9.11460480565639e-07, + "loss": 22.8037, + "step": 412140 + }, + { + "epoch": 0.8325690760634623, + "grad_norm": 616.9066772460938, + "learning_rate": 9.112595565753063e-07, + "loss": 26.1492, + "step": 412150 + }, + { + "epoch": 0.8325892766961461, + "grad_norm": 414.7738952636719, + "learning_rate": 9.110586525129988e-07, + "loss": 10.7356, + "step": 412160 + }, + { + "epoch": 0.83260947732883, + "grad_norm": 107.51226043701172, + "learning_rate": 9.108577683796938e-07, + "loss": 11.3799, + "step": 412170 + }, + { + "epoch": 0.8326296779615138, + "grad_norm": 339.81304931640625, + "learning_rate": 9.106569041763725e-07, + "loss": 18.7923, + "step": 412180 + }, + { + "epoch": 0.8326498785941976, + "grad_norm": 277.03924560546875, + "learning_rate": 9.104560599040158e-07, + "loss": 11.1877, + "step": 412190 + }, + { + "epoch": 0.8326700792268814, + "grad_norm": 409.3796081542969, + "learning_rate": 9.10255235563598e-07, + "loss": 9.2172, + "step": 412200 + }, + { + "epoch": 0.8326902798595652, + "grad_norm": 397.9833068847656, + "learning_rate": 9.100544311561e-07, + "loss": 18.0096, + "step": 412210 + }, + { + "epoch": 0.832710480492249, + "grad_norm": 225.15573120117188, + "learning_rate": 9.098536466825014e-07, + "loss": 21.4823, + "step": 412220 + }, + { + "epoch": 0.8327306811249329, + "grad_norm": 524.3678588867188, + "learning_rate": 9.096528821437806e-07, + "loss": 21.0009, + "step": 412230 + }, + { + "epoch": 0.8327508817576167, + "grad_norm": 525.455810546875, + "learning_rate": 9.094521375409143e-07, + "loss": 23.6072, + "step": 412240 + }, + { + "epoch": 0.8327710823903005, + "grad_norm": 73.05036163330078, + "learning_rate": 9.09251412874882e-07, + "loss": 24.2346, + "step": 412250 + }, + { + "epoch": 0.8327912830229843, + "grad_norm": 256.35650634765625, + "learning_rate": 9.090507081466648e-07, + "loss": 18.9737, + "step": 412260 + }, + { + "epoch": 0.8328114836556681, + "grad_norm": 86.88150024414062, + "learning_rate": 9.088500233572356e-07, + "loss": 8.7211, + "step": 412270 + }, + { + "epoch": 0.8328316842883519, + "grad_norm": 199.1029052734375, + "learning_rate": 9.086493585075757e-07, + "loss": 24.8217, + "step": 412280 + }, + { + "epoch": 0.8328518849210357, + "grad_norm": 113.49649810791016, + "learning_rate": 9.08448713598663e-07, + "loss": 23.8517, + "step": 412290 + }, + { + "epoch": 0.8328720855537195, + "grad_norm": 601.8291625976562, + "learning_rate": 9.08248088631476e-07, + "loss": 17.3809, + "step": 412300 + }, + { + "epoch": 0.8328922861864033, + "grad_norm": 443.21893310546875, + "learning_rate": 9.080474836069896e-07, + "loss": 12.3209, + "step": 412310 + }, + { + "epoch": 0.8329124868190871, + "grad_norm": 333.4514465332031, + "learning_rate": 9.078468985261851e-07, + "loss": 10.3934, + "step": 412320 + }, + { + "epoch": 0.832932687451771, + "grad_norm": 150.7950439453125, + "learning_rate": 9.076463333900382e-07, + "loss": 15.1107, + "step": 412330 + }, + { + "epoch": 0.8329528880844548, + "grad_norm": 74.56997680664062, + "learning_rate": 9.074457881995252e-07, + "loss": 26.3782, + "step": 412340 + }, + { + "epoch": 0.8329730887171386, + "grad_norm": 474.6545104980469, + "learning_rate": 9.072452629556272e-07, + "loss": 17.4449, + "step": 412350 + }, + { + "epoch": 0.8329932893498224, + "grad_norm": 289.1212463378906, + "learning_rate": 9.070447576593172e-07, + "loss": 29.2215, + "step": 412360 + }, + { + "epoch": 0.8330134899825062, + "grad_norm": 527.9814453125, + "learning_rate": 9.068442723115766e-07, + "loss": 27.2809, + "step": 412370 + }, + { + "epoch": 0.8330336906151901, + "grad_norm": 858.263916015625, + "learning_rate": 9.066438069133787e-07, + "loss": 26.6856, + "step": 412380 + }, + { + "epoch": 0.8330538912478739, + "grad_norm": 259.76495361328125, + "learning_rate": 9.064433614657042e-07, + "loss": 14.1867, + "step": 412390 + }, + { + "epoch": 0.8330740918805577, + "grad_norm": 548.4869995117188, + "learning_rate": 9.06242935969528e-07, + "loss": 17.8981, + "step": 412400 + }, + { + "epoch": 0.8330942925132415, + "grad_norm": 194.95555114746094, + "learning_rate": 9.060425304258263e-07, + "loss": 17.1656, + "step": 412410 + }, + { + "epoch": 0.8331144931459253, + "grad_norm": 574.6517333984375, + "learning_rate": 9.058421448355775e-07, + "loss": 14.5448, + "step": 412420 + }, + { + "epoch": 0.8331346937786092, + "grad_norm": 333.46209716796875, + "learning_rate": 9.056417791997568e-07, + "loss": 11.9902, + "step": 412430 + }, + { + "epoch": 0.833154894411293, + "grad_norm": 179.5861053466797, + "learning_rate": 9.054414335193424e-07, + "loss": 17.2803, + "step": 412440 + }, + { + "epoch": 0.8331750950439768, + "grad_norm": 481.0077819824219, + "learning_rate": 9.052411077953099e-07, + "loss": 19.6829, + "step": 412450 + }, + { + "epoch": 0.8331952956766606, + "grad_norm": 673.9221801757812, + "learning_rate": 9.050408020286344e-07, + "loss": 18.1825, + "step": 412460 + }, + { + "epoch": 0.8332154963093444, + "grad_norm": 432.15472412109375, + "learning_rate": 9.048405162202944e-07, + "loss": 20.572, + "step": 412470 + }, + { + "epoch": 0.8332356969420283, + "grad_norm": 394.2590026855469, + "learning_rate": 9.046402503712653e-07, + "loss": 20.2231, + "step": 412480 + }, + { + "epoch": 0.8332558975747121, + "grad_norm": 0.0, + "learning_rate": 9.044400044825219e-07, + "loss": 21.5337, + "step": 412490 + }, + { + "epoch": 0.8332760982073959, + "grad_norm": 275.9071044921875, + "learning_rate": 9.042397785550405e-07, + "loss": 18.5616, + "step": 412500 + }, + { + "epoch": 0.8332962988400797, + "grad_norm": 193.82522583007812, + "learning_rate": 9.04039572589801e-07, + "loss": 28.0304, + "step": 412510 + }, + { + "epoch": 0.8333164994727635, + "grad_norm": 559.5623168945312, + "learning_rate": 9.038393865877725e-07, + "loss": 26.5004, + "step": 412520 + }, + { + "epoch": 0.8333367001054472, + "grad_norm": 394.2783203125, + "learning_rate": 9.036392205499344e-07, + "loss": 12.8299, + "step": 412530 + }, + { + "epoch": 0.8333569007381311, + "grad_norm": 119.73848724365234, + "learning_rate": 9.034390744772637e-07, + "loss": 22.5734, + "step": 412540 + }, + { + "epoch": 0.8333771013708149, + "grad_norm": 119.30097961425781, + "learning_rate": 9.032389483707332e-07, + "loss": 22.4433, + "step": 412550 + }, + { + "epoch": 0.8333973020034987, + "grad_norm": 694.3966064453125, + "learning_rate": 9.030388422313185e-07, + "loss": 26.6818, + "step": 412560 + }, + { + "epoch": 0.8334175026361825, + "grad_norm": 45.48277282714844, + "learning_rate": 9.028387560599955e-07, + "loss": 17.5723, + "step": 412570 + }, + { + "epoch": 0.8334377032688663, + "grad_norm": 206.2269744873047, + "learning_rate": 9.026386898577417e-07, + "loss": 17.653, + "step": 412580 + }, + { + "epoch": 0.8334579039015502, + "grad_norm": 218.09690856933594, + "learning_rate": 9.024386436255278e-07, + "loss": 20.3943, + "step": 412590 + }, + { + "epoch": 0.833478104534234, + "grad_norm": 193.77537536621094, + "learning_rate": 9.022386173643305e-07, + "loss": 10.1374, + "step": 412600 + }, + { + "epoch": 0.8334983051669178, + "grad_norm": 94.75252532958984, + "learning_rate": 9.020386110751267e-07, + "loss": 11.1618, + "step": 412610 + }, + { + "epoch": 0.8335185057996016, + "grad_norm": 260.6609191894531, + "learning_rate": 9.018386247588901e-07, + "loss": 9.4794, + "step": 412620 + }, + { + "epoch": 0.8335387064322854, + "grad_norm": 758.8001098632812, + "learning_rate": 9.016386584165932e-07, + "loss": 38.9882, + "step": 412630 + }, + { + "epoch": 0.8335589070649693, + "grad_norm": 483.4865417480469, + "learning_rate": 9.014387120492141e-07, + "loss": 16.5911, + "step": 412640 + }, + { + "epoch": 0.8335791076976531, + "grad_norm": 469.7196350097656, + "learning_rate": 9.012387856577238e-07, + "loss": 22.2123, + "step": 412650 + }, + { + "epoch": 0.8335993083303369, + "grad_norm": 333.5057678222656, + "learning_rate": 9.010388792431002e-07, + "loss": 17.5803, + "step": 412660 + }, + { + "epoch": 0.8336195089630207, + "grad_norm": 61.50606918334961, + "learning_rate": 9.008389928063161e-07, + "loss": 20.5005, + "step": 412670 + }, + { + "epoch": 0.8336397095957045, + "grad_norm": 86.1005859375, + "learning_rate": 9.006391263483438e-07, + "loss": 6.5381, + "step": 412680 + }, + { + "epoch": 0.8336599102283884, + "grad_norm": 73.05723571777344, + "learning_rate": 9.004392798701605e-07, + "loss": 7.1045, + "step": 412690 + }, + { + "epoch": 0.8336801108610722, + "grad_norm": 180.08389282226562, + "learning_rate": 9.002394533727382e-07, + "loss": 19.4751, + "step": 412700 + }, + { + "epoch": 0.833700311493756, + "grad_norm": 220.0135498046875, + "learning_rate": 9.000396468570527e-07, + "loss": 20.2486, + "step": 412710 + }, + { + "epoch": 0.8337205121264398, + "grad_norm": 930.2962036132812, + "learning_rate": 8.998398603240755e-07, + "loss": 33.2094, + "step": 412720 + }, + { + "epoch": 0.8337407127591236, + "grad_norm": 440.52740478515625, + "learning_rate": 8.996400937747823e-07, + "loss": 17.4437, + "step": 412730 + }, + { + "epoch": 0.8337609133918075, + "grad_norm": 323.77581787109375, + "learning_rate": 8.994403472101465e-07, + "loss": 11.9247, + "step": 412740 + }, + { + "epoch": 0.8337811140244913, + "grad_norm": 193.76463317871094, + "learning_rate": 8.992406206311394e-07, + "loss": 11.6993, + "step": 412750 + }, + { + "epoch": 0.8338013146571751, + "grad_norm": 407.347412109375, + "learning_rate": 8.990409140387374e-07, + "loss": 19.1552, + "step": 412760 + }, + { + "epoch": 0.8338215152898589, + "grad_norm": 499.37908935546875, + "learning_rate": 8.988412274339131e-07, + "loss": 25.1889, + "step": 412770 + }, + { + "epoch": 0.8338417159225427, + "grad_norm": 443.1248779296875, + "learning_rate": 8.986415608176375e-07, + "loss": 15.8374, + "step": 412780 + }, + { + "epoch": 0.8338619165552265, + "grad_norm": 390.159423828125, + "learning_rate": 8.984419141908857e-07, + "loss": 21.9738, + "step": 412790 + }, + { + "epoch": 0.8338821171879103, + "grad_norm": 1117.803466796875, + "learning_rate": 8.982422875546332e-07, + "loss": 19.0937, + "step": 412800 + }, + { + "epoch": 0.8339023178205941, + "grad_norm": 166.6840362548828, + "learning_rate": 8.980426809098475e-07, + "loss": 17.6338, + "step": 412810 + }, + { + "epoch": 0.8339225184532779, + "grad_norm": 23.828283309936523, + "learning_rate": 8.978430942575045e-07, + "loss": 15.1676, + "step": 412820 + }, + { + "epoch": 0.8339427190859617, + "grad_norm": 1324.7325439453125, + "learning_rate": 8.976435275985779e-07, + "loss": 23.5533, + "step": 412830 + }, + { + "epoch": 0.8339629197186456, + "grad_norm": 59.1019401550293, + "learning_rate": 8.974439809340391e-07, + "loss": 13.1922, + "step": 412840 + }, + { + "epoch": 0.8339831203513294, + "grad_norm": 763.927978515625, + "learning_rate": 8.972444542648595e-07, + "loss": 15.6377, + "step": 412850 + }, + { + "epoch": 0.8340033209840132, + "grad_norm": 167.46139526367188, + "learning_rate": 8.970449475920129e-07, + "loss": 19.5433, + "step": 412860 + }, + { + "epoch": 0.834023521616697, + "grad_norm": 569.6694946289062, + "learning_rate": 8.968454609164745e-07, + "loss": 38.9997, + "step": 412870 + }, + { + "epoch": 0.8340437222493808, + "grad_norm": 459.7696533203125, + "learning_rate": 8.966459942392108e-07, + "loss": 14.4037, + "step": 412880 + }, + { + "epoch": 0.8340639228820647, + "grad_norm": 281.04302978515625, + "learning_rate": 8.964465475611967e-07, + "loss": 12.7747, + "step": 412890 + }, + { + "epoch": 0.8340841235147485, + "grad_norm": 172.90126037597656, + "learning_rate": 8.962471208834056e-07, + "loss": 24.5858, + "step": 412900 + }, + { + "epoch": 0.8341043241474323, + "grad_norm": 370.8978576660156, + "learning_rate": 8.960477142068085e-07, + "loss": 12.1178, + "step": 412910 + }, + { + "epoch": 0.8341245247801161, + "grad_norm": 418.8710632324219, + "learning_rate": 8.958483275323759e-07, + "loss": 35.5518, + "step": 412920 + }, + { + "epoch": 0.8341447254127999, + "grad_norm": 87.06157684326172, + "learning_rate": 8.956489608610825e-07, + "loss": 11.6532, + "step": 412930 + }, + { + "epoch": 0.8341649260454838, + "grad_norm": 750.3912963867188, + "learning_rate": 8.954496141938973e-07, + "loss": 29.8639, + "step": 412940 + }, + { + "epoch": 0.8341851266781676, + "grad_norm": 410.21051025390625, + "learning_rate": 8.95250287531792e-07, + "loss": 14.5647, + "step": 412950 + }, + { + "epoch": 0.8342053273108514, + "grad_norm": 557.1311645507812, + "learning_rate": 8.950509808757408e-07, + "loss": 33.6352, + "step": 412960 + }, + { + "epoch": 0.8342255279435352, + "grad_norm": 266.1794738769531, + "learning_rate": 8.94851694226711e-07, + "loss": 14.0841, + "step": 412970 + }, + { + "epoch": 0.834245728576219, + "grad_norm": 312.001953125, + "learning_rate": 8.946524275856783e-07, + "loss": 19.4741, + "step": 412980 + }, + { + "epoch": 0.8342659292089029, + "grad_norm": 256.38763427734375, + "learning_rate": 8.9445318095361e-07, + "loss": 11.4826, + "step": 412990 + }, + { + "epoch": 0.8342861298415867, + "grad_norm": 211.61795043945312, + "learning_rate": 8.942539543314799e-07, + "loss": 22.2258, + "step": 413000 + }, + { + "epoch": 0.8343063304742705, + "grad_norm": 215.25364685058594, + "learning_rate": 8.940547477202588e-07, + "loss": 14.7484, + "step": 413010 + }, + { + "epoch": 0.8343265311069543, + "grad_norm": 427.4831848144531, + "learning_rate": 8.938555611209149e-07, + "loss": 23.3023, + "step": 413020 + }, + { + "epoch": 0.8343467317396381, + "grad_norm": 405.29644775390625, + "learning_rate": 8.936563945344229e-07, + "loss": 9.2948, + "step": 413030 + }, + { + "epoch": 0.8343669323723218, + "grad_norm": 405.0749816894531, + "learning_rate": 8.934572479617498e-07, + "loss": 10.7187, + "step": 413040 + }, + { + "epoch": 0.8343871330050057, + "grad_norm": 478.822265625, + "learning_rate": 8.932581214038693e-07, + "loss": 20.9375, + "step": 413050 + }, + { + "epoch": 0.8344073336376895, + "grad_norm": 371.0656433105469, + "learning_rate": 8.930590148617513e-07, + "loss": 18.6845, + "step": 413060 + }, + { + "epoch": 0.8344275342703733, + "grad_norm": 468.7947998046875, + "learning_rate": 8.928599283363637e-07, + "loss": 13.453, + "step": 413070 + }, + { + "epoch": 0.8344477349030571, + "grad_norm": 360.0147705078125, + "learning_rate": 8.926608618286797e-07, + "loss": 13.7299, + "step": 413080 + }, + { + "epoch": 0.834467935535741, + "grad_norm": 101.20233154296875, + "learning_rate": 8.924618153396691e-07, + "loss": 13.2995, + "step": 413090 + }, + { + "epoch": 0.8344881361684248, + "grad_norm": 223.70965576171875, + "learning_rate": 8.922627888703e-07, + "loss": 12.3701, + "step": 413100 + }, + { + "epoch": 0.8345083368011086, + "grad_norm": 874.8382568359375, + "learning_rate": 8.920637824215433e-07, + "loss": 18.3292, + "step": 413110 + }, + { + "epoch": 0.8345285374337924, + "grad_norm": 546.51416015625, + "learning_rate": 8.918647959943727e-07, + "loss": 25.3475, + "step": 413120 + }, + { + "epoch": 0.8345487380664762, + "grad_norm": 34.34029769897461, + "learning_rate": 8.916658295897523e-07, + "loss": 22.9749, + "step": 413130 + }, + { + "epoch": 0.83456893869916, + "grad_norm": 301.0979919433594, + "learning_rate": 8.914668832086543e-07, + "loss": 11.5164, + "step": 413140 + }, + { + "epoch": 0.8345891393318439, + "grad_norm": 563.997802734375, + "learning_rate": 8.912679568520494e-07, + "loss": 18.5705, + "step": 413150 + }, + { + "epoch": 0.8346093399645277, + "grad_norm": 593.2365112304688, + "learning_rate": 8.910690505209063e-07, + "loss": 17.4941, + "step": 413160 + }, + { + "epoch": 0.8346295405972115, + "grad_norm": 525.0170288085938, + "learning_rate": 8.908701642161927e-07, + "loss": 14.5259, + "step": 413170 + }, + { + "epoch": 0.8346497412298953, + "grad_norm": 374.2406005859375, + "learning_rate": 8.906712979388799e-07, + "loss": 22.2857, + "step": 413180 + }, + { + "epoch": 0.8346699418625791, + "grad_norm": 499.7447814941406, + "learning_rate": 8.904724516899394e-07, + "loss": 17.2774, + "step": 413190 + }, + { + "epoch": 0.834690142495263, + "grad_norm": 350.549072265625, + "learning_rate": 8.902736254703347e-07, + "loss": 19.1012, + "step": 413200 + }, + { + "epoch": 0.8347103431279468, + "grad_norm": 172.6784210205078, + "learning_rate": 8.900748192810387e-07, + "loss": 17.8349, + "step": 413210 + }, + { + "epoch": 0.8347305437606306, + "grad_norm": 233.56097412109375, + "learning_rate": 8.898760331230206e-07, + "loss": 18.4664, + "step": 413220 + }, + { + "epoch": 0.8347507443933144, + "grad_norm": 255.29087829589844, + "learning_rate": 8.896772669972475e-07, + "loss": 15.4086, + "step": 413230 + }, + { + "epoch": 0.8347709450259982, + "grad_norm": 453.50433349609375, + "learning_rate": 8.894785209046886e-07, + "loss": 28.1366, + "step": 413240 + }, + { + "epoch": 0.8347911456586821, + "grad_norm": 408.1929931640625, + "learning_rate": 8.892797948463134e-07, + "loss": 19.2199, + "step": 413250 + }, + { + "epoch": 0.8348113462913659, + "grad_norm": 413.4342041015625, + "learning_rate": 8.8908108882309e-07, + "loss": 30.1597, + "step": 413260 + }, + { + "epoch": 0.8348315469240497, + "grad_norm": 305.1564025878906, + "learning_rate": 8.888824028359855e-07, + "loss": 13.2087, + "step": 413270 + }, + { + "epoch": 0.8348517475567335, + "grad_norm": 78.74534606933594, + "learning_rate": 8.886837368859713e-07, + "loss": 11.1992, + "step": 413280 + }, + { + "epoch": 0.8348719481894173, + "grad_norm": 181.76473999023438, + "learning_rate": 8.884850909740123e-07, + "loss": 15.7444, + "step": 413290 + }, + { + "epoch": 0.834892148822101, + "grad_norm": 258.8009948730469, + "learning_rate": 8.882864651010798e-07, + "loss": 29.8117, + "step": 413300 + }, + { + "epoch": 0.8349123494547849, + "grad_norm": 207.19329833984375, + "learning_rate": 8.880878592681386e-07, + "loss": 16.7603, + "step": 413310 + }, + { + "epoch": 0.8349325500874687, + "grad_norm": 192.0643768310547, + "learning_rate": 8.878892734761602e-07, + "loss": 16.0246, + "step": 413320 + }, + { + "epoch": 0.8349527507201525, + "grad_norm": 370.33599853515625, + "learning_rate": 8.876907077261093e-07, + "loss": 8.2631, + "step": 413330 + }, + { + "epoch": 0.8349729513528363, + "grad_norm": 17.950401306152344, + "learning_rate": 8.874921620189564e-07, + "loss": 18.9132, + "step": 413340 + }, + { + "epoch": 0.8349931519855202, + "grad_norm": 5.475889682769775, + "learning_rate": 8.872936363556678e-07, + "loss": 14.4931, + "step": 413350 + }, + { + "epoch": 0.835013352618204, + "grad_norm": 235.54627990722656, + "learning_rate": 8.8709513073721e-07, + "loss": 10.696, + "step": 413360 + }, + { + "epoch": 0.8350335532508878, + "grad_norm": 354.1400146484375, + "learning_rate": 8.868966451645533e-07, + "loss": 17.4819, + "step": 413370 + }, + { + "epoch": 0.8350537538835716, + "grad_norm": 612.1651000976562, + "learning_rate": 8.866981796386631e-07, + "loss": 29.6195, + "step": 413380 + }, + { + "epoch": 0.8350739545162554, + "grad_norm": 201.45852661132812, + "learning_rate": 8.864997341605059e-07, + "loss": 15.0578, + "step": 413390 + }, + { + "epoch": 0.8350941551489393, + "grad_norm": 8.226776123046875, + "learning_rate": 8.863013087310502e-07, + "loss": 18.1037, + "step": 413400 + }, + { + "epoch": 0.8351143557816231, + "grad_norm": 90.51039123535156, + "learning_rate": 8.861029033512652e-07, + "loss": 18.2073, + "step": 413410 + }, + { + "epoch": 0.8351345564143069, + "grad_norm": 283.7022399902344, + "learning_rate": 8.859045180221137e-07, + "loss": 16.5587, + "step": 413420 + }, + { + "epoch": 0.8351547570469907, + "grad_norm": 350.72943115234375, + "learning_rate": 8.857061527445643e-07, + "loss": 14.1672, + "step": 413430 + }, + { + "epoch": 0.8351749576796745, + "grad_norm": 636.0487670898438, + "learning_rate": 8.85507807519585e-07, + "loss": 28.9502, + "step": 413440 + }, + { + "epoch": 0.8351951583123584, + "grad_norm": 167.53924560546875, + "learning_rate": 8.853094823481423e-07, + "loss": 18.9471, + "step": 413450 + }, + { + "epoch": 0.8352153589450422, + "grad_norm": 599.519775390625, + "learning_rate": 8.851111772312004e-07, + "loss": 29.2826, + "step": 413460 + }, + { + "epoch": 0.835235559577726, + "grad_norm": 618.7284545898438, + "learning_rate": 8.849128921697276e-07, + "loss": 19.7537, + "step": 413470 + }, + { + "epoch": 0.8352557602104098, + "grad_norm": 221.83441162109375, + "learning_rate": 8.847146271646928e-07, + "loss": 21.1147, + "step": 413480 + }, + { + "epoch": 0.8352759608430936, + "grad_norm": 634.7052001953125, + "learning_rate": 8.845163822170577e-07, + "loss": 15.7934, + "step": 413490 + }, + { + "epoch": 0.8352961614757775, + "grad_norm": 320.6510925292969, + "learning_rate": 8.843181573277904e-07, + "loss": 13.7606, + "step": 413500 + }, + { + "epoch": 0.8353163621084613, + "grad_norm": 104.05023193359375, + "learning_rate": 8.841199524978583e-07, + "loss": 13.1243, + "step": 413510 + }, + { + "epoch": 0.8353365627411451, + "grad_norm": 225.48289489746094, + "learning_rate": 8.839217677282264e-07, + "loss": 17.1197, + "step": 413520 + }, + { + "epoch": 0.8353567633738289, + "grad_norm": 289.8655090332031, + "learning_rate": 8.837236030198593e-07, + "loss": 23.3992, + "step": 413530 + }, + { + "epoch": 0.8353769640065127, + "grad_norm": 438.6549987792969, + "learning_rate": 8.835254583737251e-07, + "loss": 32.0711, + "step": 413540 + }, + { + "epoch": 0.8353971646391966, + "grad_norm": 241.2873992919922, + "learning_rate": 8.833273337907888e-07, + "loss": 16.7628, + "step": 413550 + }, + { + "epoch": 0.8354173652718803, + "grad_norm": 504.1148376464844, + "learning_rate": 8.831292292720151e-07, + "loss": 10.8753, + "step": 413560 + }, + { + "epoch": 0.8354375659045641, + "grad_norm": 681.5499267578125, + "learning_rate": 8.829311448183708e-07, + "loss": 28.6235, + "step": 413570 + }, + { + "epoch": 0.8354577665372479, + "grad_norm": 392.9539794921875, + "learning_rate": 8.827330804308199e-07, + "loss": 11.4326, + "step": 413580 + }, + { + "epoch": 0.8354779671699317, + "grad_norm": 153.1405487060547, + "learning_rate": 8.825350361103291e-07, + "loss": 14.1093, + "step": 413590 + }, + { + "epoch": 0.8354981678026155, + "grad_norm": 123.26155090332031, + "learning_rate": 8.823370118578628e-07, + "loss": 19.5911, + "step": 413600 + }, + { + "epoch": 0.8355183684352994, + "grad_norm": 159.26515197753906, + "learning_rate": 8.821390076743874e-07, + "loss": 12.1363, + "step": 413610 + }, + { + "epoch": 0.8355385690679832, + "grad_norm": 175.86227416992188, + "learning_rate": 8.819410235608666e-07, + "loss": 18.3747, + "step": 413620 + }, + { + "epoch": 0.835558769700667, + "grad_norm": 307.37799072265625, + "learning_rate": 8.817430595182652e-07, + "loss": 10.5693, + "step": 413630 + }, + { + "epoch": 0.8355789703333508, + "grad_norm": 262.6300964355469, + "learning_rate": 8.815451155475496e-07, + "loss": 11.735, + "step": 413640 + }, + { + "epoch": 0.8355991709660346, + "grad_norm": 489.4883728027344, + "learning_rate": 8.813471916496824e-07, + "loss": 12.7488, + "step": 413650 + }, + { + "epoch": 0.8356193715987185, + "grad_norm": 410.9041748046875, + "learning_rate": 8.811492878256306e-07, + "loss": 23.8351, + "step": 413660 + }, + { + "epoch": 0.8356395722314023, + "grad_norm": 559.9931640625, + "learning_rate": 8.809514040763578e-07, + "loss": 18.7952, + "step": 413670 + }, + { + "epoch": 0.8356597728640861, + "grad_norm": 250.8330535888672, + "learning_rate": 8.807535404028267e-07, + "loss": 66.0124, + "step": 413680 + }, + { + "epoch": 0.8356799734967699, + "grad_norm": 328.7674865722656, + "learning_rate": 8.805556968060047e-07, + "loss": 11.4522, + "step": 413690 + }, + { + "epoch": 0.8357001741294537, + "grad_norm": 482.66925048828125, + "learning_rate": 8.803578732868545e-07, + "loss": 15.3524, + "step": 413700 + }, + { + "epoch": 0.8357203747621376, + "grad_norm": 276.3381652832031, + "learning_rate": 8.801600698463397e-07, + "loss": 12.5239, + "step": 413710 + }, + { + "epoch": 0.8357405753948214, + "grad_norm": 268.27520751953125, + "learning_rate": 8.799622864854246e-07, + "loss": 18.2343, + "step": 413720 + }, + { + "epoch": 0.8357607760275052, + "grad_norm": 155.68911743164062, + "learning_rate": 8.797645232050761e-07, + "loss": 14.467, + "step": 413730 + }, + { + "epoch": 0.835780976660189, + "grad_norm": 501.7003173828125, + "learning_rate": 8.795667800062529e-07, + "loss": 19.7305, + "step": 413740 + }, + { + "epoch": 0.8358011772928728, + "grad_norm": 186.0370635986328, + "learning_rate": 8.793690568899216e-07, + "loss": 9.8526, + "step": 413750 + }, + { + "epoch": 0.8358213779255567, + "grad_norm": 79.15919494628906, + "learning_rate": 8.791713538570474e-07, + "loss": 12.5743, + "step": 413760 + }, + { + "epoch": 0.8358415785582405, + "grad_norm": 12.420004844665527, + "learning_rate": 8.789736709085917e-07, + "loss": 18.4904, + "step": 413770 + }, + { + "epoch": 0.8358617791909243, + "grad_norm": 295.5406188964844, + "learning_rate": 8.787760080455171e-07, + "loss": 14.9115, + "step": 413780 + }, + { + "epoch": 0.8358819798236081, + "grad_norm": 565.0404052734375, + "learning_rate": 8.78578365268789e-07, + "loss": 24.2188, + "step": 413790 + }, + { + "epoch": 0.8359021804562919, + "grad_norm": 546.0817260742188, + "learning_rate": 8.783807425793722e-07, + "loss": 15.092, + "step": 413800 + }, + { + "epoch": 0.8359223810889757, + "grad_norm": 164.84117126464844, + "learning_rate": 8.781831399782254e-07, + "loss": 13.3016, + "step": 413810 + }, + { + "epoch": 0.8359425817216595, + "grad_norm": 347.80291748046875, + "learning_rate": 8.779855574663138e-07, + "loss": 20.6909, + "step": 413820 + }, + { + "epoch": 0.8359627823543433, + "grad_norm": 471.7724914550781, + "learning_rate": 8.777879950446022e-07, + "loss": 20.3324, + "step": 413830 + }, + { + "epoch": 0.8359829829870271, + "grad_norm": 575.730712890625, + "learning_rate": 8.775904527140522e-07, + "loss": 24.1305, + "step": 413840 + }, + { + "epoch": 0.8360031836197109, + "grad_norm": 375.9475402832031, + "learning_rate": 8.773929304756246e-07, + "loss": 28.2862, + "step": 413850 + }, + { + "epoch": 0.8360233842523948, + "grad_norm": 444.1407165527344, + "learning_rate": 8.771954283302852e-07, + "loss": 16.1395, + "step": 413860 + }, + { + "epoch": 0.8360435848850786, + "grad_norm": 8.394010543823242, + "learning_rate": 8.769979462789957e-07, + "loss": 8.5753, + "step": 413870 + }, + { + "epoch": 0.8360637855177624, + "grad_norm": 7.048148155212402, + "learning_rate": 8.768004843227162e-07, + "loss": 9.9166, + "step": 413880 + }, + { + "epoch": 0.8360839861504462, + "grad_norm": 636.8203735351562, + "learning_rate": 8.766030424624117e-07, + "loss": 20.2075, + "step": 413890 + }, + { + "epoch": 0.83610418678313, + "grad_norm": 98.16224670410156, + "learning_rate": 8.764056206990446e-07, + "loss": 17.7166, + "step": 413900 + }, + { + "epoch": 0.8361243874158139, + "grad_norm": 323.1768798828125, + "learning_rate": 8.762082190335763e-07, + "loss": 20.1645, + "step": 413910 + }, + { + "epoch": 0.8361445880484977, + "grad_norm": 378.2060546875, + "learning_rate": 8.760108374669679e-07, + "loss": 18.7097, + "step": 413920 + }, + { + "epoch": 0.8361647886811815, + "grad_norm": 379.65814208984375, + "learning_rate": 8.75813476000184e-07, + "loss": 10.7528, + "step": 413930 + }, + { + "epoch": 0.8361849893138653, + "grad_norm": 200.08131408691406, + "learning_rate": 8.756161346341851e-07, + "loss": 15.3422, + "step": 413940 + }, + { + "epoch": 0.8362051899465491, + "grad_norm": 359.909912109375, + "learning_rate": 8.754188133699316e-07, + "loss": 22.8936, + "step": 413950 + }, + { + "epoch": 0.836225390579233, + "grad_norm": 406.63525390625, + "learning_rate": 8.752215122083874e-07, + "loss": 19.9985, + "step": 413960 + }, + { + "epoch": 0.8362455912119168, + "grad_norm": 225.33741760253906, + "learning_rate": 8.750242311505125e-07, + "loss": 15.3774, + "step": 413970 + }, + { + "epoch": 0.8362657918446006, + "grad_norm": 235.2423858642578, + "learning_rate": 8.7482697019727e-07, + "loss": 36.1446, + "step": 413980 + }, + { + "epoch": 0.8362859924772844, + "grad_norm": 364.4989318847656, + "learning_rate": 8.746297293496209e-07, + "loss": 16.6937, + "step": 413990 + }, + { + "epoch": 0.8363061931099682, + "grad_norm": 370.710693359375, + "learning_rate": 8.744325086085248e-07, + "loss": 14.9425, + "step": 414000 + }, + { + "epoch": 0.836326393742652, + "grad_norm": 384.3143615722656, + "learning_rate": 8.74235307974945e-07, + "loss": 24.9753, + "step": 414010 + }, + { + "epoch": 0.8363465943753359, + "grad_norm": 116.01148986816406, + "learning_rate": 8.740381274498427e-07, + "loss": 16.6167, + "step": 414020 + }, + { + "epoch": 0.8363667950080197, + "grad_norm": 131.11326599121094, + "learning_rate": 8.738409670341764e-07, + "loss": 19.161, + "step": 414030 + }, + { + "epoch": 0.8363869956407035, + "grad_norm": 251.4010772705078, + "learning_rate": 8.736438267289088e-07, + "loss": 21.4255, + "step": 414040 + }, + { + "epoch": 0.8364071962733873, + "grad_norm": 141.95697021484375, + "learning_rate": 8.734467065350022e-07, + "loss": 26.4255, + "step": 414050 + }, + { + "epoch": 0.8364273969060712, + "grad_norm": 747.612548828125, + "learning_rate": 8.732496064534163e-07, + "loss": 30.1295, + "step": 414060 + }, + { + "epoch": 0.8364475975387549, + "grad_norm": 96.67201232910156, + "learning_rate": 8.730525264851092e-07, + "loss": 12.5954, + "step": 414070 + }, + { + "epoch": 0.8364677981714387, + "grad_norm": 289.9397277832031, + "learning_rate": 8.728554666310441e-07, + "loss": 15.3013, + "step": 414080 + }, + { + "epoch": 0.8364879988041225, + "grad_norm": 540.086669921875, + "learning_rate": 8.726584268921829e-07, + "loss": 13.3588, + "step": 414090 + }, + { + "epoch": 0.8365081994368063, + "grad_norm": 229.41238403320312, + "learning_rate": 8.72461407269482e-07, + "loss": 13.3224, + "step": 414100 + }, + { + "epoch": 0.8365284000694901, + "grad_norm": 403.1856689453125, + "learning_rate": 8.722644077639031e-07, + "loss": 29.2782, + "step": 414110 + }, + { + "epoch": 0.836548600702174, + "grad_norm": 540.8037719726562, + "learning_rate": 8.720674283764086e-07, + "loss": 18.8913, + "step": 414120 + }, + { + "epoch": 0.8365688013348578, + "grad_norm": 23.485515594482422, + "learning_rate": 8.718704691079566e-07, + "loss": 14.9548, + "step": 414130 + }, + { + "epoch": 0.8365890019675416, + "grad_norm": 145.4017791748047, + "learning_rate": 8.716735299595059e-07, + "loss": 21.5552, + "step": 414140 + }, + { + "epoch": 0.8366092026002254, + "grad_norm": 73.29997253417969, + "learning_rate": 8.714766109320188e-07, + "loss": 16.1105, + "step": 414150 + }, + { + "epoch": 0.8366294032329092, + "grad_norm": 320.2896423339844, + "learning_rate": 8.712797120264543e-07, + "loss": 18.929, + "step": 414160 + }, + { + "epoch": 0.8366496038655931, + "grad_norm": 272.5067138671875, + "learning_rate": 8.710828332437704e-07, + "loss": 17.7957, + "step": 414170 + }, + { + "epoch": 0.8366698044982769, + "grad_norm": 113.86924743652344, + "learning_rate": 8.70885974584929e-07, + "loss": 18.2122, + "step": 414180 + }, + { + "epoch": 0.8366900051309607, + "grad_norm": 122.1517333984375, + "learning_rate": 8.706891360508874e-07, + "loss": 16.3829, + "step": 414190 + }, + { + "epoch": 0.8367102057636445, + "grad_norm": 279.2169189453125, + "learning_rate": 8.704923176426072e-07, + "loss": 14.7806, + "step": 414200 + }, + { + "epoch": 0.8367304063963283, + "grad_norm": 163.48916625976562, + "learning_rate": 8.702955193610457e-07, + "loss": 8.444, + "step": 414210 + }, + { + "epoch": 0.8367506070290122, + "grad_norm": 406.5298156738281, + "learning_rate": 8.700987412071643e-07, + "loss": 16.508, + "step": 414220 + }, + { + "epoch": 0.836770807661696, + "grad_norm": 198.86090087890625, + "learning_rate": 8.699019831819206e-07, + "loss": 11.3153, + "step": 414230 + }, + { + "epoch": 0.8367910082943798, + "grad_norm": 460.2008361816406, + "learning_rate": 8.697052452862726e-07, + "loss": 12.9046, + "step": 414240 + }, + { + "epoch": 0.8368112089270636, + "grad_norm": 115.87354278564453, + "learning_rate": 8.695085275211812e-07, + "loss": 27.3795, + "step": 414250 + }, + { + "epoch": 0.8368314095597474, + "grad_norm": 499.9412536621094, + "learning_rate": 8.69311829887603e-07, + "loss": 16.2713, + "step": 414260 + }, + { + "epoch": 0.8368516101924313, + "grad_norm": 221.27325439453125, + "learning_rate": 8.691151523864993e-07, + "loss": 22.8541, + "step": 414270 + }, + { + "epoch": 0.8368718108251151, + "grad_norm": 495.54376220703125, + "learning_rate": 8.689184950188279e-07, + "loss": 22.65, + "step": 414280 + }, + { + "epoch": 0.8368920114577989, + "grad_norm": 119.42444610595703, + "learning_rate": 8.687218577855444e-07, + "loss": 20.8679, + "step": 414290 + }, + { + "epoch": 0.8369122120904827, + "grad_norm": 538.4122314453125, + "learning_rate": 8.685252406876116e-07, + "loss": 21.7959, + "step": 414300 + }, + { + "epoch": 0.8369324127231665, + "grad_norm": 268.2040710449219, + "learning_rate": 8.683286437259852e-07, + "loss": 18.0892, + "step": 414310 + }, + { + "epoch": 0.8369526133558503, + "grad_norm": 390.52825927734375, + "learning_rate": 8.68132066901623e-07, + "loss": 23.0368, + "step": 414320 + }, + { + "epoch": 0.8369728139885341, + "grad_norm": 639.8551025390625, + "learning_rate": 8.679355102154841e-07, + "loss": 16.519, + "step": 414330 + }, + { + "epoch": 0.8369930146212179, + "grad_norm": 275.55413818359375, + "learning_rate": 8.677389736685271e-07, + "loss": 13.5942, + "step": 414340 + }, + { + "epoch": 0.8370132152539017, + "grad_norm": 265.37274169921875, + "learning_rate": 8.675424572617092e-07, + "loss": 7.7294, + "step": 414350 + }, + { + "epoch": 0.8370334158865855, + "grad_norm": 790.9881591796875, + "learning_rate": 8.673459609959872e-07, + "loss": 18.9518, + "step": 414360 + }, + { + "epoch": 0.8370536165192694, + "grad_norm": 182.69476318359375, + "learning_rate": 8.671494848723211e-07, + "loss": 16.3131, + "step": 414370 + }, + { + "epoch": 0.8370738171519532, + "grad_norm": 337.262939453125, + "learning_rate": 8.669530288916667e-07, + "loss": 17.6301, + "step": 414380 + }, + { + "epoch": 0.837094017784637, + "grad_norm": 155.77891540527344, + "learning_rate": 8.667565930549809e-07, + "loss": 22.1242, + "step": 414390 + }, + { + "epoch": 0.8371142184173208, + "grad_norm": 434.0264892578125, + "learning_rate": 8.665601773632226e-07, + "loss": 12.7268, + "step": 414400 + }, + { + "epoch": 0.8371344190500046, + "grad_norm": 769.7467041015625, + "learning_rate": 8.663637818173504e-07, + "loss": 17.8749, + "step": 414410 + }, + { + "epoch": 0.8371546196826885, + "grad_norm": 300.7909851074219, + "learning_rate": 8.661674064183179e-07, + "loss": 17.9705, + "step": 414420 + }, + { + "epoch": 0.8371748203153723, + "grad_norm": 815.4336547851562, + "learning_rate": 8.659710511670838e-07, + "loss": 17.56, + "step": 414430 + }, + { + "epoch": 0.8371950209480561, + "grad_norm": 607.55810546875, + "learning_rate": 8.657747160646068e-07, + "loss": 19.1566, + "step": 414440 + }, + { + "epoch": 0.8372152215807399, + "grad_norm": 537.06298828125, + "learning_rate": 8.655784011118424e-07, + "loss": 22.8558, + "step": 414450 + }, + { + "epoch": 0.8372354222134237, + "grad_norm": 710.0247802734375, + "learning_rate": 8.653821063097462e-07, + "loss": 34.4708, + "step": 414460 + }, + { + "epoch": 0.8372556228461076, + "grad_norm": 312.3938903808594, + "learning_rate": 8.65185831659277e-07, + "loss": 11.5337, + "step": 414470 + }, + { + "epoch": 0.8372758234787914, + "grad_norm": 235.47027587890625, + "learning_rate": 8.649895771613909e-07, + "loss": 23.7216, + "step": 414480 + }, + { + "epoch": 0.8372960241114752, + "grad_norm": 494.0722351074219, + "learning_rate": 8.64793342817043e-07, + "loss": 24.8195, + "step": 414490 + }, + { + "epoch": 0.837316224744159, + "grad_norm": 194.82943725585938, + "learning_rate": 8.645971286271903e-07, + "loss": 15.1807, + "step": 414500 + }, + { + "epoch": 0.8373364253768428, + "grad_norm": 548.2518310546875, + "learning_rate": 8.644009345927912e-07, + "loss": 22.8696, + "step": 414510 + }, + { + "epoch": 0.8373566260095267, + "grad_norm": 832.0997314453125, + "learning_rate": 8.642047607148008e-07, + "loss": 18.8447, + "step": 414520 + }, + { + "epoch": 0.8373768266422105, + "grad_norm": 519.0682983398438, + "learning_rate": 8.640086069941727e-07, + "loss": 18.8811, + "step": 414530 + }, + { + "epoch": 0.8373970272748943, + "grad_norm": 783.4784545898438, + "learning_rate": 8.638124734318664e-07, + "loss": 30.9056, + "step": 414540 + }, + { + "epoch": 0.8374172279075781, + "grad_norm": 266.5477600097656, + "learning_rate": 8.636163600288372e-07, + "loss": 15.8093, + "step": 414550 + }, + { + "epoch": 0.8374374285402619, + "grad_norm": 440.7388916015625, + "learning_rate": 8.634202667860381e-07, + "loss": 25.2162, + "step": 414560 + }, + { + "epoch": 0.8374576291729458, + "grad_norm": 615.1576538085938, + "learning_rate": 8.632241937044283e-07, + "loss": 16.1164, + "step": 414570 + }, + { + "epoch": 0.8374778298056295, + "grad_norm": 29.328777313232422, + "learning_rate": 8.630281407849612e-07, + "loss": 16.8257, + "step": 414580 + }, + { + "epoch": 0.8374980304383133, + "grad_norm": 357.13653564453125, + "learning_rate": 8.628321080285945e-07, + "loss": 18.8659, + "step": 414590 + }, + { + "epoch": 0.8375182310709971, + "grad_norm": 660.4081420898438, + "learning_rate": 8.626360954362817e-07, + "loss": 12.5095, + "step": 414600 + }, + { + "epoch": 0.8375384317036809, + "grad_norm": 317.3007507324219, + "learning_rate": 8.62440103008978e-07, + "loss": 24.9659, + "step": 414610 + }, + { + "epoch": 0.8375586323363647, + "grad_norm": 625.5695190429688, + "learning_rate": 8.622441307476404e-07, + "loss": 27.2896, + "step": 414620 + }, + { + "epoch": 0.8375788329690486, + "grad_norm": 483.0949401855469, + "learning_rate": 8.62048178653223e-07, + "loss": 12.7133, + "step": 414630 + }, + { + "epoch": 0.8375990336017324, + "grad_norm": 152.7284393310547, + "learning_rate": 8.618522467266799e-07, + "loss": 12.2879, + "step": 414640 + }, + { + "epoch": 0.8376192342344162, + "grad_norm": 44.15868377685547, + "learning_rate": 8.616563349689672e-07, + "loss": 16.6405, + "step": 414650 + }, + { + "epoch": 0.8376394348671, + "grad_norm": 2.977247953414917, + "learning_rate": 8.614604433810408e-07, + "loss": 12.0111, + "step": 414660 + }, + { + "epoch": 0.8376596354997838, + "grad_norm": 424.8921813964844, + "learning_rate": 8.612645719638541e-07, + "loss": 10.2665, + "step": 414670 + }, + { + "epoch": 0.8376798361324677, + "grad_norm": 511.0644226074219, + "learning_rate": 8.610687207183604e-07, + "loss": 28.0615, + "step": 414680 + }, + { + "epoch": 0.8377000367651515, + "grad_norm": 341.1947937011719, + "learning_rate": 8.608728896455177e-07, + "loss": 9.8692, + "step": 414690 + }, + { + "epoch": 0.8377202373978353, + "grad_norm": 583.61865234375, + "learning_rate": 8.606770787462776e-07, + "loss": 23.7715, + "step": 414700 + }, + { + "epoch": 0.8377404380305191, + "grad_norm": 575.1815185546875, + "learning_rate": 8.604812880215946e-07, + "loss": 25.0434, + "step": 414710 + }, + { + "epoch": 0.8377606386632029, + "grad_norm": 721.6419677734375, + "learning_rate": 8.60285517472424e-07, + "loss": 19.6313, + "step": 414720 + }, + { + "epoch": 0.8377808392958868, + "grad_norm": 19.51460838317871, + "learning_rate": 8.600897670997205e-07, + "loss": 12.7547, + "step": 414730 + }, + { + "epoch": 0.8378010399285706, + "grad_norm": 238.0825958251953, + "learning_rate": 8.598940369044378e-07, + "loss": 25.7607, + "step": 414740 + }, + { + "epoch": 0.8378212405612544, + "grad_norm": 397.0168762207031, + "learning_rate": 8.596983268875281e-07, + "loss": 9.5079, + "step": 414750 + }, + { + "epoch": 0.8378414411939382, + "grad_norm": 1884.20556640625, + "learning_rate": 8.595026370499477e-07, + "loss": 22.7407, + "step": 414760 + }, + { + "epoch": 0.837861641826622, + "grad_norm": 219.27264404296875, + "learning_rate": 8.59306967392649e-07, + "loss": 13.0254, + "step": 414770 + }, + { + "epoch": 0.8378818424593059, + "grad_norm": 59.17811584472656, + "learning_rate": 8.59111317916585e-07, + "loss": 18.7953, + "step": 414780 + }, + { + "epoch": 0.8379020430919897, + "grad_norm": 513.6927490234375, + "learning_rate": 8.589156886227112e-07, + "loss": 15.032, + "step": 414790 + }, + { + "epoch": 0.8379222437246735, + "grad_norm": 222.7626953125, + "learning_rate": 8.587200795119793e-07, + "loss": 6.7889, + "step": 414800 + }, + { + "epoch": 0.8379424443573573, + "grad_norm": 223.1555938720703, + "learning_rate": 8.585244905853446e-07, + "loss": 45.8468, + "step": 414810 + }, + { + "epoch": 0.8379626449900411, + "grad_norm": 179.75576782226562, + "learning_rate": 8.583289218437574e-07, + "loss": 40.9749, + "step": 414820 + }, + { + "epoch": 0.837982845622725, + "grad_norm": 193.07220458984375, + "learning_rate": 8.581333732881747e-07, + "loss": 8.5778, + "step": 414830 + }, + { + "epoch": 0.8380030462554087, + "grad_norm": 465.3057556152344, + "learning_rate": 8.579378449195469e-07, + "loss": 16.5517, + "step": 414840 + }, + { + "epoch": 0.8380232468880925, + "grad_norm": 628.0454711914062, + "learning_rate": 8.577423367388271e-07, + "loss": 25.3636, + "step": 414850 + }, + { + "epoch": 0.8380434475207763, + "grad_norm": 114.03556823730469, + "learning_rate": 8.575468487469696e-07, + "loss": 22.118, + "step": 414860 + }, + { + "epoch": 0.8380636481534601, + "grad_norm": 24.78908920288086, + "learning_rate": 8.573513809449252e-07, + "loss": 12.9919, + "step": 414870 + }, + { + "epoch": 0.838083848786144, + "grad_norm": 833.2183837890625, + "learning_rate": 8.571559333336488e-07, + "loss": 21.047, + "step": 414880 + }, + { + "epoch": 0.8381040494188278, + "grad_norm": 236.3020782470703, + "learning_rate": 8.569605059140923e-07, + "loss": 33.2618, + "step": 414890 + }, + { + "epoch": 0.8381242500515116, + "grad_norm": 305.19927978515625, + "learning_rate": 8.567650986872061e-07, + "loss": 18.8827, + "step": 414900 + }, + { + "epoch": 0.8381444506841954, + "grad_norm": 1474.306640625, + "learning_rate": 8.565697116539462e-07, + "loss": 27.5161, + "step": 414910 + }, + { + "epoch": 0.8381646513168792, + "grad_norm": 357.6324462890625, + "learning_rate": 8.563743448152623e-07, + "loss": 20.0884, + "step": 414920 + }, + { + "epoch": 0.838184851949563, + "grad_norm": 288.3275451660156, + "learning_rate": 8.561789981721064e-07, + "loss": 10.7723, + "step": 414930 + }, + { + "epoch": 0.8382050525822469, + "grad_norm": 393.3634948730469, + "learning_rate": 8.559836717254316e-07, + "loss": 29.6967, + "step": 414940 + }, + { + "epoch": 0.8382252532149307, + "grad_norm": 443.06097412109375, + "learning_rate": 8.557883654761906e-07, + "loss": 35.6726, + "step": 414950 + }, + { + "epoch": 0.8382454538476145, + "grad_norm": 396.83074951171875, + "learning_rate": 8.555930794253347e-07, + "loss": 9.0194, + "step": 414960 + }, + { + "epoch": 0.8382656544802983, + "grad_norm": 5.810521125793457, + "learning_rate": 8.553978135738139e-07, + "loss": 11.9886, + "step": 414970 + }, + { + "epoch": 0.8382858551129821, + "grad_norm": 207.85836791992188, + "learning_rate": 8.552025679225834e-07, + "loss": 11.3733, + "step": 414980 + }, + { + "epoch": 0.838306055745666, + "grad_norm": 350.19366455078125, + "learning_rate": 8.550073424725924e-07, + "loss": 17.0902, + "step": 414990 + }, + { + "epoch": 0.8383262563783498, + "grad_norm": 159.68637084960938, + "learning_rate": 8.54812137224792e-07, + "loss": 15.9937, + "step": 415000 + }, + { + "epoch": 0.8383464570110336, + "grad_norm": 285.9203796386719, + "learning_rate": 8.54616952180134e-07, + "loss": 25.6369, + "step": 415010 + }, + { + "epoch": 0.8383666576437174, + "grad_norm": 127.26937866210938, + "learning_rate": 8.544217873395727e-07, + "loss": 24.5282, + "step": 415020 + }, + { + "epoch": 0.8383868582764012, + "grad_norm": 0.43699193000793457, + "learning_rate": 8.542266427040546e-07, + "loss": 9.2404, + "step": 415030 + }, + { + "epoch": 0.8384070589090851, + "grad_norm": 324.5606384277344, + "learning_rate": 8.540315182745329e-07, + "loss": 17.5618, + "step": 415040 + }, + { + "epoch": 0.8384272595417689, + "grad_norm": 216.83395385742188, + "learning_rate": 8.5383641405196e-07, + "loss": 25.6635, + "step": 415050 + }, + { + "epoch": 0.8384474601744527, + "grad_norm": 401.2691650390625, + "learning_rate": 8.536413300372859e-07, + "loss": 10.4735, + "step": 415060 + }, + { + "epoch": 0.8384676608071365, + "grad_norm": 264.07440185546875, + "learning_rate": 8.534462662314597e-07, + "loss": 17.2263, + "step": 415070 + }, + { + "epoch": 0.8384878614398203, + "grad_norm": 380.81732177734375, + "learning_rate": 8.532512226354345e-07, + "loss": 14.928, + "step": 415080 + }, + { + "epoch": 0.8385080620725041, + "grad_norm": 292.3482971191406, + "learning_rate": 8.530561992501596e-07, + "loss": 23.0051, + "step": 415090 + }, + { + "epoch": 0.8385282627051879, + "grad_norm": 237.59320068359375, + "learning_rate": 8.528611960765853e-07, + "loss": 21.9377, + "step": 415100 + }, + { + "epoch": 0.8385484633378717, + "grad_norm": 419.3064880371094, + "learning_rate": 8.526662131156621e-07, + "loss": 12.194, + "step": 415110 + }, + { + "epoch": 0.8385686639705555, + "grad_norm": 264.32537841796875, + "learning_rate": 8.524712503683419e-07, + "loss": 21.6371, + "step": 415120 + }, + { + "epoch": 0.8385888646032393, + "grad_norm": 455.37042236328125, + "learning_rate": 8.522763078355739e-07, + "loss": 11.2926, + "step": 415130 + }, + { + "epoch": 0.8386090652359232, + "grad_norm": 499.8186340332031, + "learning_rate": 8.520813855183069e-07, + "loss": 9.7159, + "step": 415140 + }, + { + "epoch": 0.838629265868607, + "grad_norm": 329.1452331542969, + "learning_rate": 8.518864834174939e-07, + "loss": 12.3609, + "step": 415150 + }, + { + "epoch": 0.8386494665012908, + "grad_norm": 677.871826171875, + "learning_rate": 8.516916015340826e-07, + "loss": 24.037, + "step": 415160 + }, + { + "epoch": 0.8386696671339746, + "grad_norm": 302.4140930175781, + "learning_rate": 8.514967398690215e-07, + "loss": 15.02, + "step": 415170 + }, + { + "epoch": 0.8386898677666584, + "grad_norm": 165.50599670410156, + "learning_rate": 8.513018984232641e-07, + "loss": 18.3824, + "step": 415180 + }, + { + "epoch": 0.8387100683993423, + "grad_norm": 470.12298583984375, + "learning_rate": 8.511070771977569e-07, + "loss": 18.0278, + "step": 415190 + }, + { + "epoch": 0.8387302690320261, + "grad_norm": 761.8771362304688, + "learning_rate": 8.509122761934519e-07, + "loss": 16.4101, + "step": 415200 + }, + { + "epoch": 0.8387504696647099, + "grad_norm": 521.2567138671875, + "learning_rate": 8.507174954112968e-07, + "loss": 16.6844, + "step": 415210 + }, + { + "epoch": 0.8387706702973937, + "grad_norm": 358.11822509765625, + "learning_rate": 8.505227348522404e-07, + "loss": 33.388, + "step": 415220 + }, + { + "epoch": 0.8387908709300775, + "grad_norm": 506.910400390625, + "learning_rate": 8.503279945172338e-07, + "loss": 20.2392, + "step": 415230 + }, + { + "epoch": 0.8388110715627614, + "grad_norm": 515.4656372070312, + "learning_rate": 8.501332744072255e-07, + "loss": 14.4212, + "step": 415240 + }, + { + "epoch": 0.8388312721954452, + "grad_norm": 222.73764038085938, + "learning_rate": 8.499385745231631e-07, + "loss": 13.0752, + "step": 415250 + }, + { + "epoch": 0.838851472828129, + "grad_norm": 453.6046142578125, + "learning_rate": 8.497438948659969e-07, + "loss": 22.6873, + "step": 415260 + }, + { + "epoch": 0.8388716734608128, + "grad_norm": 172.0892791748047, + "learning_rate": 8.495492354366764e-07, + "loss": 7.291, + "step": 415270 + }, + { + "epoch": 0.8388918740934966, + "grad_norm": 372.55084228515625, + "learning_rate": 8.493545962361499e-07, + "loss": 22.3061, + "step": 415280 + }, + { + "epoch": 0.8389120747261805, + "grad_norm": 1095.3577880859375, + "learning_rate": 8.491599772653647e-07, + "loss": 28.2512, + "step": 415290 + }, + { + "epoch": 0.8389322753588643, + "grad_norm": 397.1434631347656, + "learning_rate": 8.489653785252711e-07, + "loss": 22.9261, + "step": 415300 + }, + { + "epoch": 0.8389524759915481, + "grad_norm": 285.25030517578125, + "learning_rate": 8.487708000168166e-07, + "loss": 20.4244, + "step": 415310 + }, + { + "epoch": 0.8389726766242319, + "grad_norm": 429.2090148925781, + "learning_rate": 8.485762417409488e-07, + "loss": 24.278, + "step": 415320 + }, + { + "epoch": 0.8389928772569157, + "grad_norm": 68.62606048583984, + "learning_rate": 8.483817036986169e-07, + "loss": 12.0669, + "step": 415330 + }, + { + "epoch": 0.8390130778895996, + "grad_norm": 190.48098754882812, + "learning_rate": 8.481871858907703e-07, + "loss": 16.2902, + "step": 415340 + }, + { + "epoch": 0.8390332785222833, + "grad_norm": 252.4988555908203, + "learning_rate": 8.479926883183559e-07, + "loss": 18.1994, + "step": 415350 + }, + { + "epoch": 0.8390534791549671, + "grad_norm": 453.9949951171875, + "learning_rate": 8.477982109823202e-07, + "loss": 20.5837, + "step": 415360 + }, + { + "epoch": 0.8390736797876509, + "grad_norm": 4.822742462158203, + "learning_rate": 8.476037538836134e-07, + "loss": 13.4309, + "step": 415370 + }, + { + "epoch": 0.8390938804203347, + "grad_norm": 355.41363525390625, + "learning_rate": 8.474093170231828e-07, + "loss": 13.9954, + "step": 415380 + }, + { + "epoch": 0.8391140810530185, + "grad_norm": 598.54150390625, + "learning_rate": 8.472149004019742e-07, + "loss": 13.5178, + "step": 415390 + }, + { + "epoch": 0.8391342816857024, + "grad_norm": 425.3116760253906, + "learning_rate": 8.470205040209362e-07, + "loss": 18.0247, + "step": 415400 + }, + { + "epoch": 0.8391544823183862, + "grad_norm": 289.0229187011719, + "learning_rate": 8.46826127881018e-07, + "loss": 14.5099, + "step": 415410 + }, + { + "epoch": 0.83917468295107, + "grad_norm": 426.1451110839844, + "learning_rate": 8.466317719831657e-07, + "loss": 20.0752, + "step": 415420 + }, + { + "epoch": 0.8391948835837538, + "grad_norm": 130.5015411376953, + "learning_rate": 8.464374363283245e-07, + "loss": 19.8057, + "step": 415430 + }, + { + "epoch": 0.8392150842164376, + "grad_norm": 216.7658233642578, + "learning_rate": 8.462431209174454e-07, + "loss": 13.6538, + "step": 415440 + }, + { + "epoch": 0.8392352848491215, + "grad_norm": 760.1143188476562, + "learning_rate": 8.460488257514731e-07, + "loss": 34.9151, + "step": 415450 + }, + { + "epoch": 0.8392554854818053, + "grad_norm": 397.96795654296875, + "learning_rate": 8.458545508313543e-07, + "loss": 9.9936, + "step": 415460 + }, + { + "epoch": 0.8392756861144891, + "grad_norm": 272.09503173828125, + "learning_rate": 8.456602961580374e-07, + "loss": 16.4152, + "step": 415470 + }, + { + "epoch": 0.8392958867471729, + "grad_norm": 701.9149169921875, + "learning_rate": 8.454660617324672e-07, + "loss": 25.2512, + "step": 415480 + }, + { + "epoch": 0.8393160873798567, + "grad_norm": 280.76959228515625, + "learning_rate": 8.452718475555927e-07, + "loss": 13.5794, + "step": 415490 + }, + { + "epoch": 0.8393362880125406, + "grad_norm": 237.9507293701172, + "learning_rate": 8.450776536283594e-07, + "loss": 13.2203, + "step": 415500 + }, + { + "epoch": 0.8393564886452244, + "grad_norm": 284.7758483886719, + "learning_rate": 8.448834799517125e-07, + "loss": 19.6669, + "step": 415510 + }, + { + "epoch": 0.8393766892779082, + "grad_norm": 102.53799438476562, + "learning_rate": 8.446893265266005e-07, + "loss": 13.1576, + "step": 415520 + }, + { + "epoch": 0.839396889910592, + "grad_norm": 178.54124450683594, + "learning_rate": 8.444951933539691e-07, + "loss": 16.2389, + "step": 415530 + }, + { + "epoch": 0.8394170905432758, + "grad_norm": 411.52203369140625, + "learning_rate": 8.443010804347629e-07, + "loss": 16.3723, + "step": 415540 + }, + { + "epoch": 0.8394372911759597, + "grad_norm": 486.77764892578125, + "learning_rate": 8.441069877699287e-07, + "loss": 27.4579, + "step": 415550 + }, + { + "epoch": 0.8394574918086435, + "grad_norm": 159.8814697265625, + "learning_rate": 8.439129153604148e-07, + "loss": 18.4531, + "step": 415560 + }, + { + "epoch": 0.8394776924413273, + "grad_norm": 840.0407104492188, + "learning_rate": 8.437188632071652e-07, + "loss": 24.8126, + "step": 415570 + }, + { + "epoch": 0.8394978930740111, + "grad_norm": 307.8695373535156, + "learning_rate": 8.435248313111244e-07, + "loss": 16.5966, + "step": 415580 + }, + { + "epoch": 0.839518093706695, + "grad_norm": 167.97000122070312, + "learning_rate": 8.433308196732403e-07, + "loss": 10.4787, + "step": 415590 + }, + { + "epoch": 0.8395382943393787, + "grad_norm": 611.5538330078125, + "learning_rate": 8.431368282944585e-07, + "loss": 25.2954, + "step": 415600 + }, + { + "epoch": 0.8395584949720625, + "grad_norm": 533.6309814453125, + "learning_rate": 8.42942857175722e-07, + "loss": 25.0081, + "step": 415610 + }, + { + "epoch": 0.8395786956047463, + "grad_norm": 599.7293701171875, + "learning_rate": 8.427489063179778e-07, + "loss": 19.8581, + "step": 415620 + }, + { + "epoch": 0.8395988962374301, + "grad_norm": 384.72125244140625, + "learning_rate": 8.425549757221734e-07, + "loss": 21.0972, + "step": 415630 + }, + { + "epoch": 0.8396190968701139, + "grad_norm": 245.0561065673828, + "learning_rate": 8.423610653892494e-07, + "loss": 21.7128, + "step": 415640 + }, + { + "epoch": 0.8396392975027978, + "grad_norm": 131.35284423828125, + "learning_rate": 8.421671753201538e-07, + "loss": 17.4895, + "step": 415650 + }, + { + "epoch": 0.8396594981354816, + "grad_norm": 97.52739715576172, + "learning_rate": 8.419733055158319e-07, + "loss": 11.632, + "step": 415660 + }, + { + "epoch": 0.8396796987681654, + "grad_norm": 512.2359008789062, + "learning_rate": 8.41779455977228e-07, + "loss": 16.4915, + "step": 415670 + }, + { + "epoch": 0.8396998994008492, + "grad_norm": 383.025146484375, + "learning_rate": 8.415856267052852e-07, + "loss": 11.6689, + "step": 415680 + }, + { + "epoch": 0.839720100033533, + "grad_norm": 433.9366760253906, + "learning_rate": 8.413918177009512e-07, + "loss": 14.7481, + "step": 415690 + }, + { + "epoch": 0.8397403006662169, + "grad_norm": 268.4071350097656, + "learning_rate": 8.411980289651689e-07, + "loss": 17.461, + "step": 415700 + }, + { + "epoch": 0.8397605012989007, + "grad_norm": 488.49359130859375, + "learning_rate": 8.410042604988822e-07, + "loss": 19.7896, + "step": 415710 + }, + { + "epoch": 0.8397807019315845, + "grad_norm": 401.6903381347656, + "learning_rate": 8.408105123030358e-07, + "loss": 15.051, + "step": 415720 + }, + { + "epoch": 0.8398009025642683, + "grad_norm": 1138.7222900390625, + "learning_rate": 8.406167843785762e-07, + "loss": 20.6059, + "step": 415730 + }, + { + "epoch": 0.8398211031969521, + "grad_norm": 875.8900756835938, + "learning_rate": 8.404230767264454e-07, + "loss": 16.6636, + "step": 415740 + }, + { + "epoch": 0.839841303829636, + "grad_norm": 299.91119384765625, + "learning_rate": 8.402293893475872e-07, + "loss": 30.0651, + "step": 415750 + }, + { + "epoch": 0.8398615044623198, + "grad_norm": 530.86669921875, + "learning_rate": 8.400357222429473e-07, + "loss": 10.671, + "step": 415760 + }, + { + "epoch": 0.8398817050950036, + "grad_norm": 395.6985168457031, + "learning_rate": 8.39842075413469e-07, + "loss": 21.6314, + "step": 415770 + }, + { + "epoch": 0.8399019057276874, + "grad_norm": 835.7567138671875, + "learning_rate": 8.396484488600948e-07, + "loss": 28.1015, + "step": 415780 + }, + { + "epoch": 0.8399221063603712, + "grad_norm": 20.669448852539062, + "learning_rate": 8.394548425837706e-07, + "loss": 11.6772, + "step": 415790 + }, + { + "epoch": 0.839942306993055, + "grad_norm": 303.6646728515625, + "learning_rate": 8.392612565854374e-07, + "loss": 15.3869, + "step": 415800 + }, + { + "epoch": 0.8399625076257389, + "grad_norm": 386.441650390625, + "learning_rate": 8.390676908660417e-07, + "loss": 20.5151, + "step": 415810 + }, + { + "epoch": 0.8399827082584227, + "grad_norm": 208.04490661621094, + "learning_rate": 8.388741454265254e-07, + "loss": 32.2923, + "step": 415820 + }, + { + "epoch": 0.8400029088911065, + "grad_norm": 1051.257080078125, + "learning_rate": 8.386806202678305e-07, + "loss": 34.7388, + "step": 415830 + }, + { + "epoch": 0.8400231095237903, + "grad_norm": 452.179931640625, + "learning_rate": 8.384871153909025e-07, + "loss": 28.9045, + "step": 415840 + }, + { + "epoch": 0.8400433101564742, + "grad_norm": 737.98974609375, + "learning_rate": 8.382936307966838e-07, + "loss": 16.3271, + "step": 415850 + }, + { + "epoch": 0.8400635107891579, + "grad_norm": 648.748291015625, + "learning_rate": 8.381001664861161e-07, + "loss": 18.4748, + "step": 415860 + }, + { + "epoch": 0.8400837114218417, + "grad_norm": 391.1263427734375, + "learning_rate": 8.379067224601433e-07, + "loss": 23.7422, + "step": 415870 + }, + { + "epoch": 0.8401039120545255, + "grad_norm": 632.5194702148438, + "learning_rate": 8.3771329871971e-07, + "loss": 25.7779, + "step": 415880 + }, + { + "epoch": 0.8401241126872093, + "grad_norm": 0.5611941814422607, + "learning_rate": 8.375198952657565e-07, + "loss": 15.2841, + "step": 415890 + }, + { + "epoch": 0.8401443133198931, + "grad_norm": 168.1457977294922, + "learning_rate": 8.373265120992252e-07, + "loss": 15.9845, + "step": 415900 + }, + { + "epoch": 0.840164513952577, + "grad_norm": 237.24925231933594, + "learning_rate": 8.371331492210611e-07, + "loss": 25.797, + "step": 415910 + }, + { + "epoch": 0.8401847145852608, + "grad_norm": 454.6050720214844, + "learning_rate": 8.369398066322049e-07, + "loss": 20.6318, + "step": 415920 + }, + { + "epoch": 0.8402049152179446, + "grad_norm": 564.5012817382812, + "learning_rate": 8.367464843335981e-07, + "loss": 17.6519, + "step": 415930 + }, + { + "epoch": 0.8402251158506284, + "grad_norm": 426.44891357421875, + "learning_rate": 8.365531823261841e-07, + "loss": 18.174, + "step": 415940 + }, + { + "epoch": 0.8402453164833122, + "grad_norm": 869.7213134765625, + "learning_rate": 8.363599006109057e-07, + "loss": 27.4623, + "step": 415950 + }, + { + "epoch": 0.8402655171159961, + "grad_norm": 242.62086486816406, + "learning_rate": 8.361666391887047e-07, + "loss": 17.5728, + "step": 415960 + }, + { + "epoch": 0.8402857177486799, + "grad_norm": 656.5515747070312, + "learning_rate": 8.359733980605211e-07, + "loss": 23.2706, + "step": 415970 + }, + { + "epoch": 0.8403059183813637, + "grad_norm": 274.0316162109375, + "learning_rate": 8.357801772272988e-07, + "loss": 20.1416, + "step": 415980 + }, + { + "epoch": 0.8403261190140475, + "grad_norm": 312.2007751464844, + "learning_rate": 8.355869766899793e-07, + "loss": 24.8726, + "step": 415990 + }, + { + "epoch": 0.8403463196467313, + "grad_norm": 668.3038940429688, + "learning_rate": 8.353937964495029e-07, + "loss": 17.8256, + "step": 416000 + }, + { + "epoch": 0.8403665202794152, + "grad_norm": 373.26690673828125, + "learning_rate": 8.352006365068116e-07, + "loss": 19.7764, + "step": 416010 + }, + { + "epoch": 0.840386720912099, + "grad_norm": 471.3692932128906, + "learning_rate": 8.350074968628486e-07, + "loss": 12.6507, + "step": 416020 + }, + { + "epoch": 0.8404069215447828, + "grad_norm": 1139.7874755859375, + "learning_rate": 8.348143775185536e-07, + "loss": 34.0043, + "step": 416030 + }, + { + "epoch": 0.8404271221774666, + "grad_norm": 210.00518798828125, + "learning_rate": 8.346212784748676e-07, + "loss": 11.1326, + "step": 416040 + }, + { + "epoch": 0.8404473228101504, + "grad_norm": 234.03453063964844, + "learning_rate": 8.344281997327331e-07, + "loss": 16.5989, + "step": 416050 + }, + { + "epoch": 0.8404675234428343, + "grad_norm": 252.06436157226562, + "learning_rate": 8.342351412930899e-07, + "loss": 16.515, + "step": 416060 + }, + { + "epoch": 0.8404877240755181, + "grad_norm": 14.403572082519531, + "learning_rate": 8.340421031568791e-07, + "loss": 20.2605, + "step": 416070 + }, + { + "epoch": 0.8405079247082019, + "grad_norm": 516.9014282226562, + "learning_rate": 8.338490853250425e-07, + "loss": 17.6014, + "step": 416080 + }, + { + "epoch": 0.8405281253408857, + "grad_norm": 166.3125, + "learning_rate": 8.336560877985189e-07, + "loss": 19.7227, + "step": 416090 + }, + { + "epoch": 0.8405483259735695, + "grad_norm": 65.38711547851562, + "learning_rate": 8.334631105782515e-07, + "loss": 19.9046, + "step": 416100 + }, + { + "epoch": 0.8405685266062533, + "grad_norm": 191.06500244140625, + "learning_rate": 8.332701536651794e-07, + "loss": 12.5053, + "step": 416110 + }, + { + "epoch": 0.8405887272389371, + "grad_norm": 225.70530700683594, + "learning_rate": 8.330772170602424e-07, + "loss": 21.7426, + "step": 416120 + }, + { + "epoch": 0.8406089278716209, + "grad_norm": 202.8973388671875, + "learning_rate": 8.328843007643828e-07, + "loss": 8.0427, + "step": 416130 + }, + { + "epoch": 0.8406291285043047, + "grad_norm": 1216.110595703125, + "learning_rate": 8.326914047785395e-07, + "loss": 18.0305, + "step": 416140 + }, + { + "epoch": 0.8406493291369885, + "grad_norm": 569.0231323242188, + "learning_rate": 8.324985291036513e-07, + "loss": 24.0127, + "step": 416150 + }, + { + "epoch": 0.8406695297696724, + "grad_norm": 474.5058288574219, + "learning_rate": 8.323056737406604e-07, + "loss": 27.7385, + "step": 416160 + }, + { + "epoch": 0.8406897304023562, + "grad_norm": 415.0338439941406, + "learning_rate": 8.32112838690507e-07, + "loss": 38.8405, + "step": 416170 + }, + { + "epoch": 0.84070993103504, + "grad_norm": 262.8816833496094, + "learning_rate": 8.319200239541303e-07, + "loss": 14.3544, + "step": 416180 + }, + { + "epoch": 0.8407301316677238, + "grad_norm": 423.84130859375, + "learning_rate": 8.317272295324691e-07, + "loss": 27.0065, + "step": 416190 + }, + { + "epoch": 0.8407503323004076, + "grad_norm": 88.60386657714844, + "learning_rate": 8.315344554264643e-07, + "loss": 9.2607, + "step": 416200 + }, + { + "epoch": 0.8407705329330915, + "grad_norm": 319.1097412109375, + "learning_rate": 8.313417016370557e-07, + "loss": 12.6866, + "step": 416210 + }, + { + "epoch": 0.8407907335657753, + "grad_norm": 797.659912109375, + "learning_rate": 8.311489681651803e-07, + "loss": 20.9408, + "step": 416220 + }, + { + "epoch": 0.8408109341984591, + "grad_norm": 328.889892578125, + "learning_rate": 8.309562550117789e-07, + "loss": 16.7754, + "step": 416230 + }, + { + "epoch": 0.8408311348311429, + "grad_norm": 94.6286849975586, + "learning_rate": 8.307635621777943e-07, + "loss": 17.0616, + "step": 416240 + }, + { + "epoch": 0.8408513354638267, + "grad_norm": 705.1524658203125, + "learning_rate": 8.305708896641596e-07, + "loss": 27.5486, + "step": 416250 + }, + { + "epoch": 0.8408715360965106, + "grad_norm": 618.1636962890625, + "learning_rate": 8.303782374718167e-07, + "loss": 18.6452, + "step": 416260 + }, + { + "epoch": 0.8408917367291944, + "grad_norm": 419.329345703125, + "learning_rate": 8.30185605601706e-07, + "loss": 18.2721, + "step": 416270 + }, + { + "epoch": 0.8409119373618782, + "grad_norm": 543.0031127929688, + "learning_rate": 8.299929940547646e-07, + "loss": 22.3321, + "step": 416280 + }, + { + "epoch": 0.840932137994562, + "grad_norm": 214.42633056640625, + "learning_rate": 8.298004028319306e-07, + "loss": 18.5606, + "step": 416290 + }, + { + "epoch": 0.8409523386272458, + "grad_norm": 243.7238311767578, + "learning_rate": 8.296078319341444e-07, + "loss": 23.4548, + "step": 416300 + }, + { + "epoch": 0.8409725392599297, + "grad_norm": 349.8535461425781, + "learning_rate": 8.294152813623446e-07, + "loss": 21.2033, + "step": 416310 + }, + { + "epoch": 0.8409927398926135, + "grad_norm": 1284.961181640625, + "learning_rate": 8.292227511174671e-07, + "loss": 27.151, + "step": 416320 + }, + { + "epoch": 0.8410129405252973, + "grad_norm": 60.14564514160156, + "learning_rate": 8.29030241200452e-07, + "loss": 20.0398, + "step": 416330 + }, + { + "epoch": 0.8410331411579811, + "grad_norm": 280.21649169921875, + "learning_rate": 8.288377516122393e-07, + "loss": 24.6728, + "step": 416340 + }, + { + "epoch": 0.8410533417906649, + "grad_norm": 468.47125244140625, + "learning_rate": 8.286452823537649e-07, + "loss": 13.5939, + "step": 416350 + }, + { + "epoch": 0.8410735424233488, + "grad_norm": 17.582555770874023, + "learning_rate": 8.284528334259667e-07, + "loss": 12.8961, + "step": 416360 + }, + { + "epoch": 0.8410937430560325, + "grad_norm": 144.01602172851562, + "learning_rate": 8.282604048297848e-07, + "loss": 12.5265, + "step": 416370 + }, + { + "epoch": 0.8411139436887163, + "grad_norm": 522.6692504882812, + "learning_rate": 8.280679965661554e-07, + "loss": 15.1801, + "step": 416380 + }, + { + "epoch": 0.8411341443214001, + "grad_norm": 663.3296508789062, + "learning_rate": 8.278756086360157e-07, + "loss": 14.2881, + "step": 416390 + }, + { + "epoch": 0.8411543449540839, + "grad_norm": 272.4671936035156, + "learning_rate": 8.276832410403051e-07, + "loss": 24.4269, + "step": 416400 + }, + { + "epoch": 0.8411745455867677, + "grad_norm": 730.2803955078125, + "learning_rate": 8.274908937799592e-07, + "loss": 18.7027, + "step": 416410 + }, + { + "epoch": 0.8411947462194516, + "grad_norm": 814.9945678710938, + "learning_rate": 8.27298566855918e-07, + "loss": 20.1449, + "step": 416420 + }, + { + "epoch": 0.8412149468521354, + "grad_norm": 343.75592041015625, + "learning_rate": 8.271062602691171e-07, + "loss": 41.2443, + "step": 416430 + }, + { + "epoch": 0.8412351474848192, + "grad_norm": 410.6811828613281, + "learning_rate": 8.269139740204935e-07, + "loss": 19.676, + "step": 416440 + }, + { + "epoch": 0.841255348117503, + "grad_norm": 266.7729797363281, + "learning_rate": 8.267217081109863e-07, + "loss": 24.5343, + "step": 416450 + }, + { + "epoch": 0.8412755487501868, + "grad_norm": 360.75347900390625, + "learning_rate": 8.265294625415299e-07, + "loss": 11.8981, + "step": 416460 + }, + { + "epoch": 0.8412957493828707, + "grad_norm": 268.4195861816406, + "learning_rate": 8.263372373130635e-07, + "loss": 22.2304, + "step": 416470 + }, + { + "epoch": 0.8413159500155545, + "grad_norm": 24.121749877929688, + "learning_rate": 8.261450324265225e-07, + "loss": 14.1595, + "step": 416480 + }, + { + "epoch": 0.8413361506482383, + "grad_norm": 124.1473617553711, + "learning_rate": 8.259528478828455e-07, + "loss": 25.6739, + "step": 416490 + }, + { + "epoch": 0.8413563512809221, + "grad_norm": 485.75091552734375, + "learning_rate": 8.25760683682968e-07, + "loss": 28.142, + "step": 416500 + }, + { + "epoch": 0.8413765519136059, + "grad_norm": 702.4967041015625, + "learning_rate": 8.255685398278257e-07, + "loss": 26.1443, + "step": 416510 + }, + { + "epoch": 0.8413967525462898, + "grad_norm": 742.955810546875, + "learning_rate": 8.25376416318357e-07, + "loss": 16.2622, + "step": 416520 + }, + { + "epoch": 0.8414169531789736, + "grad_norm": 2220.86767578125, + "learning_rate": 8.25184313155497e-07, + "loss": 24.0307, + "step": 416530 + }, + { + "epoch": 0.8414371538116574, + "grad_norm": 689.5069580078125, + "learning_rate": 8.249922303401814e-07, + "loss": 12.3775, + "step": 416540 + }, + { + "epoch": 0.8414573544443412, + "grad_norm": 553.2268676757812, + "learning_rate": 8.248001678733475e-07, + "loss": 23.2607, + "step": 416550 + }, + { + "epoch": 0.841477555077025, + "grad_norm": 432.29266357421875, + "learning_rate": 8.246081257559324e-07, + "loss": 30.3421, + "step": 416560 + }, + { + "epoch": 0.8414977557097089, + "grad_norm": 283.49530029296875, + "learning_rate": 8.244161039888709e-07, + "loss": 10.0194, + "step": 416570 + }, + { + "epoch": 0.8415179563423927, + "grad_norm": 222.84986877441406, + "learning_rate": 8.242241025730974e-07, + "loss": 19.9328, + "step": 416580 + }, + { + "epoch": 0.8415381569750765, + "grad_norm": 332.9171142578125, + "learning_rate": 8.240321215095504e-07, + "loss": 25.312, + "step": 416590 + }, + { + "epoch": 0.8415583576077603, + "grad_norm": 182.77035522460938, + "learning_rate": 8.238401607991647e-07, + "loss": 21.3855, + "step": 416600 + }, + { + "epoch": 0.8415785582404441, + "grad_norm": 751.35888671875, + "learning_rate": 8.236482204428737e-07, + "loss": 14.0024, + "step": 416610 + }, + { + "epoch": 0.841598758873128, + "grad_norm": 531.2789916992188, + "learning_rate": 8.234563004416151e-07, + "loss": 8.2383, + "step": 416620 + }, + { + "epoch": 0.8416189595058117, + "grad_norm": 621.9959716796875, + "learning_rate": 8.232644007963253e-07, + "loss": 39.9086, + "step": 416630 + }, + { + "epoch": 0.8416391601384955, + "grad_norm": 384.6670227050781, + "learning_rate": 8.230725215079383e-07, + "loss": 19.9908, + "step": 416640 + }, + { + "epoch": 0.8416593607711793, + "grad_norm": 391.2987060546875, + "learning_rate": 8.228806625773878e-07, + "loss": 11.5974, + "step": 416650 + }, + { + "epoch": 0.8416795614038631, + "grad_norm": 168.9244842529297, + "learning_rate": 8.226888240056114e-07, + "loss": 12.1507, + "step": 416660 + }, + { + "epoch": 0.841699762036547, + "grad_norm": 642.2132568359375, + "learning_rate": 8.224970057935433e-07, + "loss": 15.2827, + "step": 416670 + }, + { + "epoch": 0.8417199626692308, + "grad_norm": 602.5213623046875, + "learning_rate": 8.223052079421167e-07, + "loss": 19.2457, + "step": 416680 + }, + { + "epoch": 0.8417401633019146, + "grad_norm": 894.4662475585938, + "learning_rate": 8.221134304522694e-07, + "loss": 20.9106, + "step": 416690 + }, + { + "epoch": 0.8417603639345984, + "grad_norm": 332.2417297363281, + "learning_rate": 8.21921673324933e-07, + "loss": 12.6066, + "step": 416700 + }, + { + "epoch": 0.8417805645672822, + "grad_norm": 448.7167053222656, + "learning_rate": 8.217299365610448e-07, + "loss": 16.0514, + "step": 416710 + }, + { + "epoch": 0.841800765199966, + "grad_norm": 254.240234375, + "learning_rate": 8.215382201615379e-07, + "loss": 16.29, + "step": 416720 + }, + { + "epoch": 0.8418209658326499, + "grad_norm": 257.3818664550781, + "learning_rate": 8.213465241273461e-07, + "loss": 13.0963, + "step": 416730 + }, + { + "epoch": 0.8418411664653337, + "grad_norm": 152.66146850585938, + "learning_rate": 8.211548484594057e-07, + "loss": 20.5531, + "step": 416740 + }, + { + "epoch": 0.8418613670980175, + "grad_norm": 290.1943359375, + "learning_rate": 8.209631931586499e-07, + "loss": 14.6181, + "step": 416750 + }, + { + "epoch": 0.8418815677307013, + "grad_norm": 276.91009521484375, + "learning_rate": 8.207715582260112e-07, + "loss": 28.6372, + "step": 416760 + }, + { + "epoch": 0.8419017683633852, + "grad_norm": 232.94760131835938, + "learning_rate": 8.205799436624251e-07, + "loss": 10.0377, + "step": 416770 + }, + { + "epoch": 0.841921968996069, + "grad_norm": 595.9349365234375, + "learning_rate": 8.203883494688264e-07, + "loss": 28.9553, + "step": 416780 + }, + { + "epoch": 0.8419421696287528, + "grad_norm": 459.4786682128906, + "learning_rate": 8.201967756461482e-07, + "loss": 14.0377, + "step": 416790 + }, + { + "epoch": 0.8419623702614366, + "grad_norm": 546.0062255859375, + "learning_rate": 8.200052221953231e-07, + "loss": 20.928, + "step": 416800 + }, + { + "epoch": 0.8419825708941204, + "grad_norm": 146.12875366210938, + "learning_rate": 8.198136891172864e-07, + "loss": 19.1887, + "step": 416810 + }, + { + "epoch": 0.8420027715268043, + "grad_norm": 302.6186218261719, + "learning_rate": 8.196221764129708e-07, + "loss": 19.0241, + "step": 416820 + }, + { + "epoch": 0.8420229721594881, + "grad_norm": 549.2744140625, + "learning_rate": 8.194306840833083e-07, + "loss": 30.9922, + "step": 416830 + }, + { + "epoch": 0.8420431727921719, + "grad_norm": 546.9652099609375, + "learning_rate": 8.192392121292336e-07, + "loss": 28.0652, + "step": 416840 + }, + { + "epoch": 0.8420633734248557, + "grad_norm": 156.42550659179688, + "learning_rate": 8.190477605516828e-07, + "loss": 19.8071, + "step": 416850 + }, + { + "epoch": 0.8420835740575395, + "grad_norm": 337.0184326171875, + "learning_rate": 8.188563293515834e-07, + "loss": 35.2146, + "step": 416860 + }, + { + "epoch": 0.8421037746902234, + "grad_norm": 279.08404541015625, + "learning_rate": 8.186649185298712e-07, + "loss": 13.8124, + "step": 416870 + }, + { + "epoch": 0.8421239753229071, + "grad_norm": 671.6292114257812, + "learning_rate": 8.184735280874801e-07, + "loss": 18.5853, + "step": 416880 + }, + { + "epoch": 0.8421441759555909, + "grad_norm": 474.8827819824219, + "learning_rate": 8.182821580253425e-07, + "loss": 28.7646, + "step": 416890 + }, + { + "epoch": 0.8421643765882747, + "grad_norm": 183.2584991455078, + "learning_rate": 8.180908083443884e-07, + "loss": 15.0636, + "step": 416900 + }, + { + "epoch": 0.8421845772209585, + "grad_norm": 540.3548583984375, + "learning_rate": 8.178994790455541e-07, + "loss": 28.3647, + "step": 416910 + }, + { + "epoch": 0.8422047778536423, + "grad_norm": 61.81666946411133, + "learning_rate": 8.177081701297706e-07, + "loss": 15.9166, + "step": 416920 + }, + { + "epoch": 0.8422249784863262, + "grad_norm": 419.9620056152344, + "learning_rate": 8.175168815979689e-07, + "loss": 15.7197, + "step": 416930 + }, + { + "epoch": 0.84224517911901, + "grad_norm": 360.3706359863281, + "learning_rate": 8.173256134510827e-07, + "loss": 14.0688, + "step": 416940 + }, + { + "epoch": 0.8422653797516938, + "grad_norm": 632.1427612304688, + "learning_rate": 8.171343656900455e-07, + "loss": 19.6811, + "step": 416950 + }, + { + "epoch": 0.8422855803843776, + "grad_norm": 314.94830322265625, + "learning_rate": 8.169431383157877e-07, + "loss": 14.6761, + "step": 416960 + }, + { + "epoch": 0.8423057810170614, + "grad_norm": 198.35939025878906, + "learning_rate": 8.16751931329241e-07, + "loss": 18.2067, + "step": 416970 + }, + { + "epoch": 0.8423259816497453, + "grad_norm": 257.9476318359375, + "learning_rate": 8.16560744731339e-07, + "loss": 10.4845, + "step": 416980 + }, + { + "epoch": 0.8423461822824291, + "grad_norm": 317.8804931640625, + "learning_rate": 8.163695785230125e-07, + "loss": 24.2213, + "step": 416990 + }, + { + "epoch": 0.8423663829151129, + "grad_norm": 255.24928283691406, + "learning_rate": 8.161784327051919e-07, + "loss": 13.5445, + "step": 417000 + }, + { + "epoch": 0.8423865835477967, + "grad_norm": 29.380022048950195, + "learning_rate": 8.159873072788116e-07, + "loss": 14.2142, + "step": 417010 + }, + { + "epoch": 0.8424067841804805, + "grad_norm": 535.5570678710938, + "learning_rate": 8.157962022448001e-07, + "loss": 13.1268, + "step": 417020 + }, + { + "epoch": 0.8424269848131644, + "grad_norm": 971.4058837890625, + "learning_rate": 8.156051176040919e-07, + "loss": 20.5933, + "step": 417030 + }, + { + "epoch": 0.8424471854458482, + "grad_norm": 389.6575012207031, + "learning_rate": 8.154140533576171e-07, + "loss": 17.0994, + "step": 417040 + }, + { + "epoch": 0.842467386078532, + "grad_norm": 251.94143676757812, + "learning_rate": 8.152230095063051e-07, + "loss": 24.1352, + "step": 417050 + }, + { + "epoch": 0.8424875867112158, + "grad_norm": 322.27734375, + "learning_rate": 8.150319860510903e-07, + "loss": 12.5676, + "step": 417060 + }, + { + "epoch": 0.8425077873438996, + "grad_norm": 396.6203918457031, + "learning_rate": 8.148409829929005e-07, + "loss": 11.8115, + "step": 417070 + }, + { + "epoch": 0.8425279879765835, + "grad_norm": 240.94351196289062, + "learning_rate": 8.14650000332669e-07, + "loss": 9.2375, + "step": 417080 + }, + { + "epoch": 0.8425481886092673, + "grad_norm": 819.8513793945312, + "learning_rate": 8.144590380713252e-07, + "loss": 12.0466, + "step": 417090 + }, + { + "epoch": 0.8425683892419511, + "grad_norm": 297.76092529296875, + "learning_rate": 8.142680962098016e-07, + "loss": 14.8963, + "step": 417100 + }, + { + "epoch": 0.8425885898746349, + "grad_norm": 93.6225814819336, + "learning_rate": 8.140771747490273e-07, + "loss": 11.1213, + "step": 417110 + }, + { + "epoch": 0.8426087905073187, + "grad_norm": 474.6208801269531, + "learning_rate": 8.138862736899317e-07, + "loss": 16.4858, + "step": 417120 + }, + { + "epoch": 0.8426289911400026, + "grad_norm": 514.2171630859375, + "learning_rate": 8.136953930334484e-07, + "loss": 11.1815, + "step": 417130 + }, + { + "epoch": 0.8426491917726863, + "grad_norm": 234.20852661132812, + "learning_rate": 8.135045327805058e-07, + "loss": 18.8199, + "step": 417140 + }, + { + "epoch": 0.8426693924053701, + "grad_norm": 78.7138671875, + "learning_rate": 8.133136929320329e-07, + "loss": 22.1076, + "step": 417150 + }, + { + "epoch": 0.8426895930380539, + "grad_norm": 86.7906494140625, + "learning_rate": 8.131228734889618e-07, + "loss": 9.5779, + "step": 417160 + }, + { + "epoch": 0.8427097936707377, + "grad_norm": 358.8228759765625, + "learning_rate": 8.12932074452224e-07, + "loss": 15.5598, + "step": 417170 + }, + { + "epoch": 0.8427299943034215, + "grad_norm": 306.94622802734375, + "learning_rate": 8.127412958227454e-07, + "loss": 16.4282, + "step": 417180 + }, + { + "epoch": 0.8427501949361054, + "grad_norm": 5.045441627502441, + "learning_rate": 8.125505376014576e-07, + "loss": 18.441, + "step": 417190 + }, + { + "epoch": 0.8427703955687892, + "grad_norm": 221.3944091796875, + "learning_rate": 8.123597997892918e-07, + "loss": 20.5832, + "step": 417200 + }, + { + "epoch": 0.842790596201473, + "grad_norm": 186.3422393798828, + "learning_rate": 8.121690823871764e-07, + "loss": 20.6377, + "step": 417210 + }, + { + "epoch": 0.8428107968341568, + "grad_norm": 532.1461791992188, + "learning_rate": 8.119783853960401e-07, + "loss": 23.3955, + "step": 417220 + }, + { + "epoch": 0.8428309974668406, + "grad_norm": 127.2115478515625, + "learning_rate": 8.11787708816813e-07, + "loss": 18.1984, + "step": 417230 + }, + { + "epoch": 0.8428511980995245, + "grad_norm": 477.167236328125, + "learning_rate": 8.115970526504258e-07, + "loss": 18.1049, + "step": 417240 + }, + { + "epoch": 0.8428713987322083, + "grad_norm": 163.93153381347656, + "learning_rate": 8.114064168978064e-07, + "loss": 11.5624, + "step": 417250 + }, + { + "epoch": 0.8428915993648921, + "grad_norm": 777.0939331054688, + "learning_rate": 8.112158015598832e-07, + "loss": 20.3062, + "step": 417260 + }, + { + "epoch": 0.8429117999975759, + "grad_norm": 54.6539306640625, + "learning_rate": 8.110252066375873e-07, + "loss": 11.629, + "step": 417270 + }, + { + "epoch": 0.8429320006302597, + "grad_norm": 82.93055725097656, + "learning_rate": 8.108346321318467e-07, + "loss": 10.9843, + "step": 417280 + }, + { + "epoch": 0.8429522012629436, + "grad_norm": 660.4531860351562, + "learning_rate": 8.106440780435881e-07, + "loss": 13.2302, + "step": 417290 + }, + { + "epoch": 0.8429724018956274, + "grad_norm": 184.3472900390625, + "learning_rate": 8.104535443737438e-07, + "loss": 17.7074, + "step": 417300 + }, + { + "epoch": 0.8429926025283112, + "grad_norm": 586.7990112304688, + "learning_rate": 8.102630311232395e-07, + "loss": 19.8601, + "step": 417310 + }, + { + "epoch": 0.843012803160995, + "grad_norm": 348.7237854003906, + "learning_rate": 8.100725382930064e-07, + "loss": 20.8465, + "step": 417320 + }, + { + "epoch": 0.8430330037936788, + "grad_norm": 678.0906372070312, + "learning_rate": 8.098820658839718e-07, + "loss": 19.6183, + "step": 417330 + }, + { + "epoch": 0.8430532044263627, + "grad_norm": 380.850341796875, + "learning_rate": 8.096916138970623e-07, + "loss": 18.166, + "step": 417340 + }, + { + "epoch": 0.8430734050590465, + "grad_norm": 565.8128662109375, + "learning_rate": 8.095011823332089e-07, + "loss": 19.0084, + "step": 417350 + }, + { + "epoch": 0.8430936056917303, + "grad_norm": 253.3446502685547, + "learning_rate": 8.093107711933385e-07, + "loss": 6.7644, + "step": 417360 + }, + { + "epoch": 0.8431138063244141, + "grad_norm": 400.7567138671875, + "learning_rate": 8.091203804783776e-07, + "loss": 16.0196, + "step": 417370 + }, + { + "epoch": 0.843134006957098, + "grad_norm": 231.70989990234375, + "learning_rate": 8.089300101892561e-07, + "loss": 15.8512, + "step": 417380 + }, + { + "epoch": 0.8431542075897817, + "grad_norm": 230.20811462402344, + "learning_rate": 8.087396603269027e-07, + "loss": 12.4343, + "step": 417390 + }, + { + "epoch": 0.8431744082224655, + "grad_norm": 86.41107940673828, + "learning_rate": 8.085493308922432e-07, + "loss": 15.9593, + "step": 417400 + }, + { + "epoch": 0.8431946088551493, + "grad_norm": 317.251220703125, + "learning_rate": 8.083590218862053e-07, + "loss": 22.2669, + "step": 417410 + }, + { + "epoch": 0.8432148094878331, + "grad_norm": 499.9970703125, + "learning_rate": 8.081687333097183e-07, + "loss": 23.5597, + "step": 417420 + }, + { + "epoch": 0.8432350101205169, + "grad_norm": 271.93731689453125, + "learning_rate": 8.079784651637084e-07, + "loss": 15.5502, + "step": 417430 + }, + { + "epoch": 0.8432552107532008, + "grad_norm": 416.82958984375, + "learning_rate": 8.077882174491014e-07, + "loss": 15.8095, + "step": 417440 + }, + { + "epoch": 0.8432754113858846, + "grad_norm": 464.6175537109375, + "learning_rate": 8.075979901668269e-07, + "loss": 23.8034, + "step": 417450 + }, + { + "epoch": 0.8432956120185684, + "grad_norm": 284.9903564453125, + "learning_rate": 8.074077833178135e-07, + "loss": 12.4611, + "step": 417460 + }, + { + "epoch": 0.8433158126512522, + "grad_norm": 143.05435180664062, + "learning_rate": 8.072175969029832e-07, + "loss": 11.4091, + "step": 417470 + }, + { + "epoch": 0.843336013283936, + "grad_norm": 227.09988403320312, + "learning_rate": 8.070274309232662e-07, + "loss": 16.5846, + "step": 417480 + }, + { + "epoch": 0.8433562139166199, + "grad_norm": 54.895660400390625, + "learning_rate": 8.068372853795903e-07, + "loss": 12.8017, + "step": 417490 + }, + { + "epoch": 0.8433764145493037, + "grad_norm": 195.83372497558594, + "learning_rate": 8.066471602728804e-07, + "loss": 13.6173, + "step": 417500 + }, + { + "epoch": 0.8433966151819875, + "grad_norm": 468.7023620605469, + "learning_rate": 8.064570556040629e-07, + "loss": 7.9044, + "step": 417510 + }, + { + "epoch": 0.8434168158146713, + "grad_norm": 811.3773193359375, + "learning_rate": 8.06266971374065e-07, + "loss": 71.1642, + "step": 417520 + }, + { + "epoch": 0.8434370164473551, + "grad_norm": 380.46160888671875, + "learning_rate": 8.060769075838154e-07, + "loss": 22.0033, + "step": 417530 + }, + { + "epoch": 0.843457217080039, + "grad_norm": 353.1867370605469, + "learning_rate": 8.058868642342366e-07, + "loss": 23.6593, + "step": 417540 + }, + { + "epoch": 0.8434774177127228, + "grad_norm": 255.4258575439453, + "learning_rate": 8.056968413262555e-07, + "loss": 20.9205, + "step": 417550 + }, + { + "epoch": 0.8434976183454066, + "grad_norm": 628.8818969726562, + "learning_rate": 8.055068388608011e-07, + "loss": 19.278, + "step": 417560 + }, + { + "epoch": 0.8435178189780904, + "grad_norm": 0.0, + "learning_rate": 8.053168568387976e-07, + "loss": 19.0558, + "step": 417570 + }, + { + "epoch": 0.8435380196107742, + "grad_norm": 553.6051025390625, + "learning_rate": 8.051268952611696e-07, + "loss": 20.5668, + "step": 417580 + }, + { + "epoch": 0.8435582202434581, + "grad_norm": 200.57460021972656, + "learning_rate": 8.04936954128846e-07, + "loss": 23.3032, + "step": 417590 + }, + { + "epoch": 0.8435784208761419, + "grad_norm": 540.6591186523438, + "learning_rate": 8.047470334427504e-07, + "loss": 25.1376, + "step": 417600 + }, + { + "epoch": 0.8435986215088257, + "grad_norm": 305.846435546875, + "learning_rate": 8.045571332038082e-07, + "loss": 16.2725, + "step": 417610 + }, + { + "epoch": 0.8436188221415095, + "grad_norm": 129.34548950195312, + "learning_rate": 8.043672534129465e-07, + "loss": 22.8199, + "step": 417620 + }, + { + "epoch": 0.8436390227741933, + "grad_norm": 503.1650695800781, + "learning_rate": 8.041773940710884e-07, + "loss": 22.0007, + "step": 417630 + }, + { + "epoch": 0.8436592234068772, + "grad_norm": 598.3443603515625, + "learning_rate": 8.039875551791626e-07, + "loss": 19.9376, + "step": 417640 + }, + { + "epoch": 0.8436794240395609, + "grad_norm": 36.877967834472656, + "learning_rate": 8.037977367380922e-07, + "loss": 16.0755, + "step": 417650 + }, + { + "epoch": 0.8436996246722447, + "grad_norm": 80.91256713867188, + "learning_rate": 8.036079387488016e-07, + "loss": 9.9852, + "step": 417660 + }, + { + "epoch": 0.8437198253049285, + "grad_norm": 272.49676513671875, + "learning_rate": 8.034181612122183e-07, + "loss": 19.1621, + "step": 417670 + }, + { + "epoch": 0.8437400259376123, + "grad_norm": 422.8641052246094, + "learning_rate": 8.032284041292649e-07, + "loss": 29.3479, + "step": 417680 + }, + { + "epoch": 0.8437602265702961, + "grad_norm": 508.0213928222656, + "learning_rate": 8.030386675008678e-07, + "loss": 22.7619, + "step": 417690 + }, + { + "epoch": 0.84378042720298, + "grad_norm": 504.5783996582031, + "learning_rate": 8.028489513279503e-07, + "loss": 22.4872, + "step": 417700 + }, + { + "epoch": 0.8438006278356638, + "grad_norm": 847.2953491210938, + "learning_rate": 8.026592556114393e-07, + "loss": 31.6101, + "step": 417710 + }, + { + "epoch": 0.8438208284683476, + "grad_norm": 361.8731689453125, + "learning_rate": 8.02469580352258e-07, + "loss": 21.0525, + "step": 417720 + }, + { + "epoch": 0.8438410291010314, + "grad_norm": 255.57284545898438, + "learning_rate": 8.022799255513297e-07, + "loss": 15.0666, + "step": 417730 + }, + { + "epoch": 0.8438612297337152, + "grad_norm": 145.09686279296875, + "learning_rate": 8.020902912095807e-07, + "loss": 10.2129, + "step": 417740 + }, + { + "epoch": 0.8438814303663991, + "grad_norm": 383.4001770019531, + "learning_rate": 8.019006773279348e-07, + "loss": 23.5098, + "step": 417750 + }, + { + "epoch": 0.8439016309990829, + "grad_norm": 146.52294921875, + "learning_rate": 8.01711083907315e-07, + "loss": 10.6484, + "step": 417760 + }, + { + "epoch": 0.8439218316317667, + "grad_norm": 692.0272216796875, + "learning_rate": 8.015215109486457e-07, + "loss": 14.2041, + "step": 417770 + }, + { + "epoch": 0.8439420322644505, + "grad_norm": 436.42169189453125, + "learning_rate": 8.013319584528539e-07, + "loss": 23.2882, + "step": 417780 + }, + { + "epoch": 0.8439622328971343, + "grad_norm": 351.6768798828125, + "learning_rate": 8.011424264208584e-07, + "loss": 14.4282, + "step": 417790 + }, + { + "epoch": 0.8439824335298182, + "grad_norm": 14.351901054382324, + "learning_rate": 8.009529148535855e-07, + "loss": 25.9734, + "step": 417800 + }, + { + "epoch": 0.844002634162502, + "grad_norm": 185.14767456054688, + "learning_rate": 8.007634237519595e-07, + "loss": 20.9192, + "step": 417810 + }, + { + "epoch": 0.8440228347951858, + "grad_norm": 589.0468139648438, + "learning_rate": 8.005739531169044e-07, + "loss": 15.9933, + "step": 417820 + }, + { + "epoch": 0.8440430354278696, + "grad_norm": 383.037841796875, + "learning_rate": 8.003845029493407e-07, + "loss": 18.9315, + "step": 417830 + }, + { + "epoch": 0.8440632360605534, + "grad_norm": 417.3287048339844, + "learning_rate": 8.001950732501934e-07, + "loss": 12.2846, + "step": 417840 + }, + { + "epoch": 0.8440834366932373, + "grad_norm": 0.0, + "learning_rate": 8.000056640203885e-07, + "loss": 15.2792, + "step": 417850 + }, + { + "epoch": 0.8441036373259211, + "grad_norm": 475.166015625, + "learning_rate": 7.99816275260844e-07, + "loss": 19.9353, + "step": 417860 + }, + { + "epoch": 0.8441238379586049, + "grad_norm": 279.0462951660156, + "learning_rate": 7.996269069724861e-07, + "loss": 20.397, + "step": 417870 + }, + { + "epoch": 0.8441440385912887, + "grad_norm": 289.1851501464844, + "learning_rate": 7.994375591562376e-07, + "loss": 10.9729, + "step": 417880 + }, + { + "epoch": 0.8441642392239725, + "grad_norm": 120.9384994506836, + "learning_rate": 7.992482318130218e-07, + "loss": 14.9548, + "step": 417890 + }, + { + "epoch": 0.8441844398566564, + "grad_norm": 785.109375, + "learning_rate": 7.990589249437591e-07, + "loss": 14.2628, + "step": 417900 + }, + { + "epoch": 0.8442046404893401, + "grad_norm": 314.58782958984375, + "learning_rate": 7.988696385493744e-07, + "loss": 17.008, + "step": 417910 + }, + { + "epoch": 0.8442248411220239, + "grad_norm": 226.8375701904297, + "learning_rate": 7.986803726307901e-07, + "loss": 15.2947, + "step": 417920 + }, + { + "epoch": 0.8442450417547077, + "grad_norm": 551.301025390625, + "learning_rate": 7.984911271889267e-07, + "loss": 19.3609, + "step": 417930 + }, + { + "epoch": 0.8442652423873915, + "grad_norm": 293.1257019042969, + "learning_rate": 7.983019022247096e-07, + "loss": 11.7711, + "step": 417940 + }, + { + "epoch": 0.8442854430200754, + "grad_norm": 529.7147216796875, + "learning_rate": 7.98112697739058e-07, + "loss": 20.5691, + "step": 417950 + }, + { + "epoch": 0.8443056436527592, + "grad_norm": 159.33741760253906, + "learning_rate": 7.979235137328961e-07, + "loss": 7.6765, + "step": 417960 + }, + { + "epoch": 0.844325844285443, + "grad_norm": 36.75069046020508, + "learning_rate": 7.97734350207145e-07, + "loss": 20.2914, + "step": 417970 + }, + { + "epoch": 0.8443460449181268, + "grad_norm": 212.27427673339844, + "learning_rate": 7.975452071627277e-07, + "loss": 17.0566, + "step": 417980 + }, + { + "epoch": 0.8443662455508106, + "grad_norm": 7.3990983963012695, + "learning_rate": 7.973560846005646e-07, + "loss": 38.3213, + "step": 417990 + }, + { + "epoch": 0.8443864461834945, + "grad_norm": 307.8514404296875, + "learning_rate": 7.971669825215789e-07, + "loss": 12.0889, + "step": 418000 + }, + { + "epoch": 0.8444066468161783, + "grad_norm": 210.28102111816406, + "learning_rate": 7.969779009266915e-07, + "loss": 11.6439, + "step": 418010 + }, + { + "epoch": 0.8444268474488621, + "grad_norm": 271.6314697265625, + "learning_rate": 7.967888398168233e-07, + "loss": 17.1989, + "step": 418020 + }, + { + "epoch": 0.8444470480815459, + "grad_norm": 108.26612091064453, + "learning_rate": 7.965997991928975e-07, + "loss": 22.6884, + "step": 418030 + }, + { + "epoch": 0.8444672487142297, + "grad_norm": 34.8552131652832, + "learning_rate": 7.964107790558345e-07, + "loss": 11.7574, + "step": 418040 + }, + { + "epoch": 0.8444874493469136, + "grad_norm": 261.3864440917969, + "learning_rate": 7.962217794065547e-07, + "loss": 15.4715, + "step": 418050 + }, + { + "epoch": 0.8445076499795974, + "grad_norm": 63.58285140991211, + "learning_rate": 7.960328002459794e-07, + "loss": 27.665, + "step": 418060 + }, + { + "epoch": 0.8445278506122812, + "grad_norm": 1134.8045654296875, + "learning_rate": 7.958438415750331e-07, + "loss": 19.2892, + "step": 418070 + }, + { + "epoch": 0.844548051244965, + "grad_norm": 412.56097412109375, + "learning_rate": 7.956549033946314e-07, + "loss": 11.5439, + "step": 418080 + }, + { + "epoch": 0.8445682518776488, + "grad_norm": 50.35539245605469, + "learning_rate": 7.954659857056984e-07, + "loss": 25.4583, + "step": 418090 + }, + { + "epoch": 0.8445884525103327, + "grad_norm": 220.02413940429688, + "learning_rate": 7.952770885091548e-07, + "loss": 10.1853, + "step": 418100 + }, + { + "epoch": 0.8446086531430165, + "grad_norm": 501.8572998046875, + "learning_rate": 7.950882118059211e-07, + "loss": 17.0491, + "step": 418110 + }, + { + "epoch": 0.8446288537757003, + "grad_norm": 3.201639175415039, + "learning_rate": 7.948993555969159e-07, + "loss": 18.2402, + "step": 418120 + }, + { + "epoch": 0.8446490544083841, + "grad_norm": 322.1613464355469, + "learning_rate": 7.947105198830612e-07, + "loss": 14.7168, + "step": 418130 + }, + { + "epoch": 0.8446692550410679, + "grad_norm": 709.388427734375, + "learning_rate": 7.945217046652804e-07, + "loss": 20.6433, + "step": 418140 + }, + { + "epoch": 0.8446894556737518, + "grad_norm": 165.0631561279297, + "learning_rate": 7.94332909944488e-07, + "loss": 31.5094, + "step": 418150 + }, + { + "epoch": 0.8447096563064355, + "grad_norm": 380.36480712890625, + "learning_rate": 7.941441357216068e-07, + "loss": 13.631, + "step": 418160 + }, + { + "epoch": 0.8447298569391193, + "grad_norm": 1242.5738525390625, + "learning_rate": 7.939553819975582e-07, + "loss": 34.1927, + "step": 418170 + }, + { + "epoch": 0.8447500575718031, + "grad_norm": 477.37554931640625, + "learning_rate": 7.937666487732609e-07, + "loss": 20.4424, + "step": 418180 + }, + { + "epoch": 0.8447702582044869, + "grad_norm": 306.4768981933594, + "learning_rate": 7.935779360496337e-07, + "loss": 17.0729, + "step": 418190 + }, + { + "epoch": 0.8447904588371707, + "grad_norm": 303.6390686035156, + "learning_rate": 7.933892438275987e-07, + "loss": 23.9531, + "step": 418200 + }, + { + "epoch": 0.8448106594698546, + "grad_norm": 258.2592468261719, + "learning_rate": 7.932005721080738e-07, + "loss": 15.5802, + "step": 418210 + }, + { + "epoch": 0.8448308601025384, + "grad_norm": 406.1451416015625, + "learning_rate": 7.930119208919784e-07, + "loss": 7.2556, + "step": 418220 + }, + { + "epoch": 0.8448510607352222, + "grad_norm": 448.5814208984375, + "learning_rate": 7.92823290180234e-07, + "loss": 15.0219, + "step": 418230 + }, + { + "epoch": 0.844871261367906, + "grad_norm": 554.1819458007812, + "learning_rate": 7.926346799737572e-07, + "loss": 21.8187, + "step": 418240 + }, + { + "epoch": 0.8448914620005898, + "grad_norm": 417.68475341796875, + "learning_rate": 7.924460902734698e-07, + "loss": 37.0044, + "step": 418250 + }, + { + "epoch": 0.8449116626332737, + "grad_norm": 555.5881958007812, + "learning_rate": 7.922575210802896e-07, + "loss": 17.1528, + "step": 418260 + }, + { + "epoch": 0.8449318632659575, + "grad_norm": 317.4004821777344, + "learning_rate": 7.920689723951353e-07, + "loss": 16.6019, + "step": 418270 + }, + { + "epoch": 0.8449520638986413, + "grad_norm": 28.36787223815918, + "learning_rate": 7.918804442189271e-07, + "loss": 23.7992, + "step": 418280 + }, + { + "epoch": 0.8449722645313251, + "grad_norm": 399.80255126953125, + "learning_rate": 7.916919365525827e-07, + "loss": 9.0299, + "step": 418290 + }, + { + "epoch": 0.8449924651640089, + "grad_norm": 201.8307647705078, + "learning_rate": 7.91503449397022e-07, + "loss": 21.9661, + "step": 418300 + }, + { + "epoch": 0.8450126657966928, + "grad_norm": 473.5652770996094, + "learning_rate": 7.913149827531619e-07, + "loss": 18.6611, + "step": 418310 + }, + { + "epoch": 0.8450328664293766, + "grad_norm": 422.7249755859375, + "learning_rate": 7.911265366219234e-07, + "loss": 18.0575, + "step": 418320 + }, + { + "epoch": 0.8450530670620604, + "grad_norm": 108.836669921875, + "learning_rate": 7.909381110042241e-07, + "loss": 14.3215, + "step": 418330 + }, + { + "epoch": 0.8450732676947442, + "grad_norm": 638.8184814453125, + "learning_rate": 7.907497059009806e-07, + "loss": 22.2167, + "step": 418340 + }, + { + "epoch": 0.845093468327428, + "grad_norm": 88.75518798828125, + "learning_rate": 7.90561321313113e-07, + "loss": 10.6438, + "step": 418350 + }, + { + "epoch": 0.8451136689601119, + "grad_norm": 278.2799072265625, + "learning_rate": 7.903729572415397e-07, + "loss": 15.4431, + "step": 418360 + }, + { + "epoch": 0.8451338695927957, + "grad_norm": 168.87420654296875, + "learning_rate": 7.901846136871766e-07, + "loss": 11.0043, + "step": 418370 + }, + { + "epoch": 0.8451540702254795, + "grad_norm": 257.9417419433594, + "learning_rate": 7.899962906509434e-07, + "loss": 10.7762, + "step": 418380 + }, + { + "epoch": 0.8451742708581633, + "grad_norm": 434.875732421875, + "learning_rate": 7.898079881337594e-07, + "loss": 28.863, + "step": 418390 + }, + { + "epoch": 0.8451944714908471, + "grad_norm": 27.86065673828125, + "learning_rate": 7.89619706136539e-07, + "loss": 16.9371, + "step": 418400 + }, + { + "epoch": 0.845214672123531, + "grad_norm": 224.47476196289062, + "learning_rate": 7.894314446602013e-07, + "loss": 28.2371, + "step": 418410 + }, + { + "epoch": 0.8452348727562147, + "grad_norm": 769.4656982421875, + "learning_rate": 7.892432037056652e-07, + "loss": 16.6101, + "step": 418420 + }, + { + "epoch": 0.8452550733888985, + "grad_norm": 262.0355529785156, + "learning_rate": 7.890549832738465e-07, + "loss": 22.3285, + "step": 418430 + }, + { + "epoch": 0.8452752740215823, + "grad_norm": 214.3258819580078, + "learning_rate": 7.888667833656627e-07, + "loss": 16.3148, + "step": 418440 + }, + { + "epoch": 0.8452954746542661, + "grad_norm": 118.48668670654297, + "learning_rate": 7.88678603982031e-07, + "loss": 20.364, + "step": 418450 + }, + { + "epoch": 0.84531567528695, + "grad_norm": 140.25999450683594, + "learning_rate": 7.884904451238712e-07, + "loss": 8.9497, + "step": 418460 + }, + { + "epoch": 0.8453358759196338, + "grad_norm": 440.9783935546875, + "learning_rate": 7.883023067920964e-07, + "loss": 11.734, + "step": 418470 + }, + { + "epoch": 0.8453560765523176, + "grad_norm": 297.5727844238281, + "learning_rate": 7.881141889876248e-07, + "loss": 13.3235, + "step": 418480 + }, + { + "epoch": 0.8453762771850014, + "grad_norm": 0.0, + "learning_rate": 7.879260917113751e-07, + "loss": 13.3954, + "step": 418490 + }, + { + "epoch": 0.8453964778176852, + "grad_norm": 308.35198974609375, + "learning_rate": 7.877380149642628e-07, + "loss": 10.9062, + "step": 418500 + }, + { + "epoch": 0.845416678450369, + "grad_norm": 332.7234191894531, + "learning_rate": 7.875499587472035e-07, + "loss": 17.3936, + "step": 418510 + }, + { + "epoch": 0.8454368790830529, + "grad_norm": 69.22209930419922, + "learning_rate": 7.873619230611157e-07, + "loss": 21.6091, + "step": 418520 + }, + { + "epoch": 0.8454570797157367, + "grad_norm": 530.6533203125, + "learning_rate": 7.871739079069152e-07, + "loss": 16.756, + "step": 418530 + }, + { + "epoch": 0.8454772803484205, + "grad_norm": 450.29388427734375, + "learning_rate": 7.869859132855168e-07, + "loss": 21.5757, + "step": 418540 + }, + { + "epoch": 0.8454974809811043, + "grad_norm": 36.44185256958008, + "learning_rate": 7.867979391978398e-07, + "loss": 21.2164, + "step": 418550 + }, + { + "epoch": 0.8455176816137882, + "grad_norm": 379.1933898925781, + "learning_rate": 7.866099856447968e-07, + "loss": 13.118, + "step": 418560 + }, + { + "epoch": 0.845537882246472, + "grad_norm": 519.5206298828125, + "learning_rate": 7.864220526273069e-07, + "loss": 21.3711, + "step": 418570 + }, + { + "epoch": 0.8455580828791558, + "grad_norm": 258.16107177734375, + "learning_rate": 7.862341401462842e-07, + "loss": 19.3362, + "step": 418580 + }, + { + "epoch": 0.8455782835118396, + "grad_norm": 466.6935119628906, + "learning_rate": 7.86046248202646e-07, + "loss": 21.6412, + "step": 418590 + }, + { + "epoch": 0.8455984841445234, + "grad_norm": 386.0509948730469, + "learning_rate": 7.858583767973071e-07, + "loss": 9.0111, + "step": 418600 + }, + { + "epoch": 0.8456186847772073, + "grad_norm": 476.1527099609375, + "learning_rate": 7.856705259311826e-07, + "loss": 16.6665, + "step": 418610 + }, + { + "epoch": 0.8456388854098911, + "grad_norm": 712.1090087890625, + "learning_rate": 7.854826956051897e-07, + "loss": 21.9178, + "step": 418620 + }, + { + "epoch": 0.8456590860425749, + "grad_norm": 798.2106323242188, + "learning_rate": 7.852948858202419e-07, + "loss": 36.1859, + "step": 418630 + }, + { + "epoch": 0.8456792866752587, + "grad_norm": 459.5579833984375, + "learning_rate": 7.851070965772572e-07, + "loss": 23.7613, + "step": 418640 + }, + { + "epoch": 0.8456994873079425, + "grad_norm": 570.1857299804688, + "learning_rate": 7.849193278771489e-07, + "loss": 18.8711, + "step": 418650 + }, + { + "epoch": 0.8457196879406264, + "grad_norm": 141.66217041015625, + "learning_rate": 7.847315797208316e-07, + "loss": 18.4804, + "step": 418660 + }, + { + "epoch": 0.8457398885733101, + "grad_norm": 412.1319580078125, + "learning_rate": 7.845438521092213e-07, + "loss": 12.6574, + "step": 418670 + }, + { + "epoch": 0.8457600892059939, + "grad_norm": 131.34910583496094, + "learning_rate": 7.843561450432352e-07, + "loss": 10.978, + "step": 418680 + }, + { + "epoch": 0.8457802898386777, + "grad_norm": 167.0421142578125, + "learning_rate": 7.841684585237836e-07, + "loss": 17.1971, + "step": 418690 + }, + { + "epoch": 0.8458004904713615, + "grad_norm": 510.4516296386719, + "learning_rate": 7.839807925517834e-07, + "loss": 17.4608, + "step": 418700 + }, + { + "epoch": 0.8458206911040453, + "grad_norm": 278.193115234375, + "learning_rate": 7.837931471281513e-07, + "loss": 11.2845, + "step": 418710 + }, + { + "epoch": 0.8458408917367292, + "grad_norm": 289.3529968261719, + "learning_rate": 7.836055222537997e-07, + "loss": 8.1362, + "step": 418720 + }, + { + "epoch": 0.845861092369413, + "grad_norm": 482.45391845703125, + "learning_rate": 7.834179179296419e-07, + "loss": 13.6992, + "step": 418730 + }, + { + "epoch": 0.8458812930020968, + "grad_norm": 90.5341567993164, + "learning_rate": 7.832303341565938e-07, + "loss": 15.0954, + "step": 418740 + }, + { + "epoch": 0.8459014936347806, + "grad_norm": 542.28466796875, + "learning_rate": 7.830427709355726e-07, + "loss": 10.9046, + "step": 418750 + }, + { + "epoch": 0.8459216942674644, + "grad_norm": 462.2651672363281, + "learning_rate": 7.828552282674867e-07, + "loss": 34.2711, + "step": 418760 + }, + { + "epoch": 0.8459418949001483, + "grad_norm": 730.208984375, + "learning_rate": 7.826677061532528e-07, + "loss": 19.8283, + "step": 418770 + }, + { + "epoch": 0.8459620955328321, + "grad_norm": 314.582275390625, + "learning_rate": 7.824802045937863e-07, + "loss": 27.1289, + "step": 418780 + }, + { + "epoch": 0.8459822961655159, + "grad_norm": 522.7733154296875, + "learning_rate": 7.822927235900001e-07, + "loss": 12.4487, + "step": 418790 + }, + { + "epoch": 0.8460024967981997, + "grad_norm": 107.94712829589844, + "learning_rate": 7.821052631428061e-07, + "loss": 12.2357, + "step": 418800 + }, + { + "epoch": 0.8460226974308835, + "grad_norm": 296.1025085449219, + "learning_rate": 7.819178232531205e-07, + "loss": 11.7953, + "step": 418810 + }, + { + "epoch": 0.8460428980635674, + "grad_norm": 190.7719268798828, + "learning_rate": 7.81730403921856e-07, + "loss": 20.8951, + "step": 418820 + }, + { + "epoch": 0.8460630986962512, + "grad_norm": 181.572509765625, + "learning_rate": 7.815430051499251e-07, + "loss": 17.1508, + "step": 418830 + }, + { + "epoch": 0.846083299328935, + "grad_norm": 123.58523559570312, + "learning_rate": 7.813556269382427e-07, + "loss": 17.396, + "step": 418840 + }, + { + "epoch": 0.8461034999616188, + "grad_norm": 284.181884765625, + "learning_rate": 7.811682692877204e-07, + "loss": 16.8144, + "step": 418850 + }, + { + "epoch": 0.8461237005943026, + "grad_norm": 437.01385498046875, + "learning_rate": 7.809809321992729e-07, + "loss": 18.8257, + "step": 418860 + }, + { + "epoch": 0.8461439012269865, + "grad_norm": 137.48239135742188, + "learning_rate": 7.807936156738133e-07, + "loss": 10.2114, + "step": 418870 + }, + { + "epoch": 0.8461641018596703, + "grad_norm": 38.34272384643555, + "learning_rate": 7.80606319712252e-07, + "loss": 13.6911, + "step": 418880 + }, + { + "epoch": 0.8461843024923541, + "grad_norm": 320.4749755859375, + "learning_rate": 7.804190443155057e-07, + "loss": 9.4036, + "step": 418890 + }, + { + "epoch": 0.8462045031250379, + "grad_norm": 527.709228515625, + "learning_rate": 7.802317894844835e-07, + "loss": 15.5926, + "step": 418900 + }, + { + "epoch": 0.8462247037577217, + "grad_norm": 262.3277282714844, + "learning_rate": 7.800445552201014e-07, + "loss": 22.2958, + "step": 418910 + }, + { + "epoch": 0.8462449043904056, + "grad_norm": 332.1363220214844, + "learning_rate": 7.798573415232686e-07, + "loss": 13.8192, + "step": 418920 + }, + { + "epoch": 0.8462651050230893, + "grad_norm": 266.5022277832031, + "learning_rate": 7.79670148394901e-07, + "loss": 20.8731, + "step": 418930 + }, + { + "epoch": 0.8462853056557731, + "grad_norm": 81.87886810302734, + "learning_rate": 7.794829758359085e-07, + "loss": 18.3454, + "step": 418940 + }, + { + "epoch": 0.8463055062884569, + "grad_norm": 615.016845703125, + "learning_rate": 7.792958238472037e-07, + "loss": 21.2683, + "step": 418950 + }, + { + "epoch": 0.8463257069211407, + "grad_norm": 299.83892822265625, + "learning_rate": 7.791086924296998e-07, + "loss": 20.1067, + "step": 418960 + }, + { + "epoch": 0.8463459075538246, + "grad_norm": 742.3004150390625, + "learning_rate": 7.789215815843082e-07, + "loss": 28.9742, + "step": 418970 + }, + { + "epoch": 0.8463661081865084, + "grad_norm": 306.6733703613281, + "learning_rate": 7.787344913119399e-07, + "loss": 9.6765, + "step": 418980 + }, + { + "epoch": 0.8463863088191922, + "grad_norm": 59.7943229675293, + "learning_rate": 7.785474216135081e-07, + "loss": 6.1867, + "step": 418990 + }, + { + "epoch": 0.846406509451876, + "grad_norm": 306.6423645019531, + "learning_rate": 7.783603724899258e-07, + "loss": 8.696, + "step": 419000 + }, + { + "epoch": 0.8464267100845598, + "grad_norm": 192.6134796142578, + "learning_rate": 7.781733439421013e-07, + "loss": 17.1364, + "step": 419010 + }, + { + "epoch": 0.8464469107172437, + "grad_norm": 316.57684326171875, + "learning_rate": 7.779863359709472e-07, + "loss": 13.7423, + "step": 419020 + }, + { + "epoch": 0.8464671113499275, + "grad_norm": 244.40464782714844, + "learning_rate": 7.777993485773771e-07, + "loss": 7.499, + "step": 419030 + }, + { + "epoch": 0.8464873119826113, + "grad_norm": 271.5377502441406, + "learning_rate": 7.776123817623011e-07, + "loss": 17.381, + "step": 419040 + }, + { + "epoch": 0.8465075126152951, + "grad_norm": 281.7366027832031, + "learning_rate": 7.774254355266287e-07, + "loss": 6.5732, + "step": 419050 + }, + { + "epoch": 0.8465277132479789, + "grad_norm": 173.3346405029297, + "learning_rate": 7.772385098712731e-07, + "loss": 30.2304, + "step": 419060 + }, + { + "epoch": 0.8465479138806628, + "grad_norm": 68.60765075683594, + "learning_rate": 7.770516047971466e-07, + "loss": 10.1655, + "step": 419070 + }, + { + "epoch": 0.8465681145133466, + "grad_norm": 230.587646484375, + "learning_rate": 7.768647203051566e-07, + "loss": 17.9789, + "step": 419080 + }, + { + "epoch": 0.8465883151460304, + "grad_norm": 264.0446472167969, + "learning_rate": 7.766778563962152e-07, + "loss": 13.6752, + "step": 419090 + }, + { + "epoch": 0.8466085157787142, + "grad_norm": 503.3843994140625, + "learning_rate": 7.76491013071235e-07, + "loss": 29.5807, + "step": 419100 + }, + { + "epoch": 0.846628716411398, + "grad_norm": 331.93603515625, + "learning_rate": 7.763041903311258e-07, + "loss": 27.7832, + "step": 419110 + }, + { + "epoch": 0.8466489170440819, + "grad_norm": 235.54718017578125, + "learning_rate": 7.761173881767958e-07, + "loss": 18.7475, + "step": 419120 + }, + { + "epoch": 0.8466691176767657, + "grad_norm": 248.29202270507812, + "learning_rate": 7.759306066091593e-07, + "loss": 14.1553, + "step": 419130 + }, + { + "epoch": 0.8466893183094495, + "grad_norm": 10.38530445098877, + "learning_rate": 7.757438456291245e-07, + "loss": 15.3336, + "step": 419140 + }, + { + "epoch": 0.8467095189421333, + "grad_norm": 779.1312255859375, + "learning_rate": 7.755571052376004e-07, + "loss": 26.1951, + "step": 419150 + }, + { + "epoch": 0.8467297195748171, + "grad_norm": 179.73486328125, + "learning_rate": 7.753703854354999e-07, + "loss": 12.9748, + "step": 419160 + }, + { + "epoch": 0.846749920207501, + "grad_norm": 385.7361755371094, + "learning_rate": 7.751836862237305e-07, + "loss": 27.418, + "step": 419170 + }, + { + "epoch": 0.8467701208401847, + "grad_norm": 370.1129150390625, + "learning_rate": 7.749970076032048e-07, + "loss": 12.369, + "step": 419180 + }, + { + "epoch": 0.8467903214728685, + "grad_norm": 276.7247314453125, + "learning_rate": 7.748103495748299e-07, + "loss": 14.2185, + "step": 419190 + }, + { + "epoch": 0.8468105221055523, + "grad_norm": 461.1651611328125, + "learning_rate": 7.746237121395184e-07, + "loss": 17.3819, + "step": 419200 + }, + { + "epoch": 0.8468307227382361, + "grad_norm": 336.64678955078125, + "learning_rate": 7.744370952981778e-07, + "loss": 15.2645, + "step": 419210 + }, + { + "epoch": 0.8468509233709199, + "grad_norm": 239.9922637939453, + "learning_rate": 7.742504990517174e-07, + "loss": 20.9572, + "step": 419220 + }, + { + "epoch": 0.8468711240036038, + "grad_norm": 354.9892578125, + "learning_rate": 7.740639234010488e-07, + "loss": 17.1885, + "step": 419230 + }, + { + "epoch": 0.8468913246362876, + "grad_norm": 346.224853515625, + "learning_rate": 7.73877368347079e-07, + "loss": 21.1393, + "step": 419240 + }, + { + "epoch": 0.8469115252689714, + "grad_norm": 2.0196142196655273, + "learning_rate": 7.736908338907195e-07, + "loss": 28.4272, + "step": 419250 + }, + { + "epoch": 0.8469317259016552, + "grad_norm": 301.7315979003906, + "learning_rate": 7.735043200328784e-07, + "loss": 11.9376, + "step": 419260 + }, + { + "epoch": 0.846951926534339, + "grad_norm": 120.46678924560547, + "learning_rate": 7.733178267744634e-07, + "loss": 23.0687, + "step": 419270 + }, + { + "epoch": 0.8469721271670229, + "grad_norm": 213.86441040039062, + "learning_rate": 7.73131354116386e-07, + "loss": 20.7339, + "step": 419280 + }, + { + "epoch": 0.8469923277997067, + "grad_norm": 305.5952453613281, + "learning_rate": 7.729449020595531e-07, + "loss": 20.777, + "step": 419290 + }, + { + "epoch": 0.8470125284323905, + "grad_norm": 324.5541076660156, + "learning_rate": 7.727584706048735e-07, + "loss": 11.8582, + "step": 419300 + }, + { + "epoch": 0.8470327290650743, + "grad_norm": 314.37835693359375, + "learning_rate": 7.72572059753256e-07, + "loss": 24.1484, + "step": 419310 + }, + { + "epoch": 0.8470529296977581, + "grad_norm": 577.531982421875, + "learning_rate": 7.723856695056109e-07, + "loss": 12.3574, + "step": 419320 + }, + { + "epoch": 0.847073130330442, + "grad_norm": 294.72174072265625, + "learning_rate": 7.721992998628452e-07, + "loss": 14.6806, + "step": 419330 + }, + { + "epoch": 0.8470933309631258, + "grad_norm": 275.53570556640625, + "learning_rate": 7.720129508258667e-07, + "loss": 22.1799, + "step": 419340 + }, + { + "epoch": 0.8471135315958096, + "grad_norm": 132.30506896972656, + "learning_rate": 7.71826622395585e-07, + "loss": 12.9379, + "step": 419350 + }, + { + "epoch": 0.8471337322284934, + "grad_norm": 49.43588638305664, + "learning_rate": 7.716403145729073e-07, + "loss": 26.2082, + "step": 419360 + }, + { + "epoch": 0.8471539328611772, + "grad_norm": 632.3158569335938, + "learning_rate": 7.714540273587412e-07, + "loss": 13.9699, + "step": 419370 + }, + { + "epoch": 0.8471741334938611, + "grad_norm": 576.3694458007812, + "learning_rate": 7.712677607539948e-07, + "loss": 18.9505, + "step": 419380 + }, + { + "epoch": 0.8471943341265449, + "grad_norm": 270.6705322265625, + "learning_rate": 7.710815147595779e-07, + "loss": 14.4009, + "step": 419390 + }, + { + "epoch": 0.8472145347592287, + "grad_norm": 484.50408935546875, + "learning_rate": 7.708952893763972e-07, + "loss": 21.9983, + "step": 419400 + }, + { + "epoch": 0.8472347353919125, + "grad_norm": 135.74899291992188, + "learning_rate": 7.707090846053577e-07, + "loss": 38.6355, + "step": 419410 + }, + { + "epoch": 0.8472549360245963, + "grad_norm": 633.8313598632812, + "learning_rate": 7.705229004473713e-07, + "loss": 14.0703, + "step": 419420 + }, + { + "epoch": 0.8472751366572802, + "grad_norm": 421.99761962890625, + "learning_rate": 7.703367369033432e-07, + "loss": 19.2649, + "step": 419430 + }, + { + "epoch": 0.8472953372899639, + "grad_norm": 155.74380493164062, + "learning_rate": 7.701505939741793e-07, + "loss": 17.2813, + "step": 419440 + }, + { + "epoch": 0.8473155379226477, + "grad_norm": 489.8328552246094, + "learning_rate": 7.699644716607896e-07, + "loss": 16.5044, + "step": 419450 + }, + { + "epoch": 0.8473357385553315, + "grad_norm": 95.47686004638672, + "learning_rate": 7.697783699640793e-07, + "loss": 14.6129, + "step": 419460 + }, + { + "epoch": 0.8473559391880153, + "grad_norm": 133.22848510742188, + "learning_rate": 7.695922888849566e-07, + "loss": 44.6609, + "step": 419470 + }, + { + "epoch": 0.8473761398206991, + "grad_norm": 166.31944274902344, + "learning_rate": 7.694062284243287e-07, + "loss": 16.1958, + "step": 419480 + }, + { + "epoch": 0.847396340453383, + "grad_norm": 258.57080078125, + "learning_rate": 7.692201885831002e-07, + "loss": 36.7881, + "step": 419490 + }, + { + "epoch": 0.8474165410860668, + "grad_norm": 318.3566589355469, + "learning_rate": 7.690341693621805e-07, + "loss": 18.1619, + "step": 419500 + }, + { + "epoch": 0.8474367417187506, + "grad_norm": 139.5306854248047, + "learning_rate": 7.68848170762474e-07, + "loss": 18.9246, + "step": 419510 + }, + { + "epoch": 0.8474569423514344, + "grad_norm": 554.505615234375, + "learning_rate": 7.686621927848898e-07, + "loss": 19.0492, + "step": 419520 + }, + { + "epoch": 0.8474771429841182, + "grad_norm": 285.1543884277344, + "learning_rate": 7.684762354303316e-07, + "loss": 32.9261, + "step": 419530 + }, + { + "epoch": 0.8474973436168021, + "grad_norm": 286.7790832519531, + "learning_rate": 7.682902986997076e-07, + "loss": 6.3039, + "step": 419540 + }, + { + "epoch": 0.8475175442494859, + "grad_norm": 158.7056121826172, + "learning_rate": 7.681043825939238e-07, + "loss": 11.8037, + "step": 419550 + }, + { + "epoch": 0.8475377448821697, + "grad_norm": 227.37754821777344, + "learning_rate": 7.679184871138851e-07, + "loss": 10.6346, + "step": 419560 + }, + { + "epoch": 0.8475579455148535, + "grad_norm": 442.99200439453125, + "learning_rate": 7.677326122604995e-07, + "loss": 40.652, + "step": 419570 + }, + { + "epoch": 0.8475781461475373, + "grad_norm": 409.53076171875, + "learning_rate": 7.675467580346719e-07, + "loss": 19.2032, + "step": 419580 + }, + { + "epoch": 0.8475983467802212, + "grad_norm": 614.8516845703125, + "learning_rate": 7.673609244373065e-07, + "loss": 19.0753, + "step": 419590 + }, + { + "epoch": 0.847618547412905, + "grad_norm": 293.042724609375, + "learning_rate": 7.671751114693104e-07, + "loss": 21.46, + "step": 419600 + }, + { + "epoch": 0.8476387480455888, + "grad_norm": 289.71075439453125, + "learning_rate": 7.669893191315924e-07, + "loss": 18.8878, + "step": 419610 + }, + { + "epoch": 0.8476589486782726, + "grad_norm": 325.4839782714844, + "learning_rate": 7.668035474250523e-07, + "loss": 30.0041, + "step": 419620 + }, + { + "epoch": 0.8476791493109564, + "grad_norm": 173.85630798339844, + "learning_rate": 7.666177963505989e-07, + "loss": 8.7573, + "step": 419630 + }, + { + "epoch": 0.8476993499436403, + "grad_norm": 526.9801025390625, + "learning_rate": 7.664320659091373e-07, + "loss": 27.2767, + "step": 419640 + }, + { + "epoch": 0.8477195505763241, + "grad_norm": 909.6722412109375, + "learning_rate": 7.662463561015726e-07, + "loss": 16.1532, + "step": 419650 + }, + { + "epoch": 0.8477397512090079, + "grad_norm": 204.0998077392578, + "learning_rate": 7.66060666928809e-07, + "loss": 14.3266, + "step": 419660 + }, + { + "epoch": 0.8477599518416917, + "grad_norm": 357.2112121582031, + "learning_rate": 7.658749983917512e-07, + "loss": 23.5506, + "step": 419670 + }, + { + "epoch": 0.8477801524743755, + "grad_norm": 116.1423568725586, + "learning_rate": 7.656893504913082e-07, + "loss": 17.3412, + "step": 419680 + }, + { + "epoch": 0.8478003531070594, + "grad_norm": 1107.73388671875, + "learning_rate": 7.655037232283791e-07, + "loss": 29.388, + "step": 419690 + }, + { + "epoch": 0.8478205537397431, + "grad_norm": 449.1150817871094, + "learning_rate": 7.653181166038715e-07, + "loss": 25.4893, + "step": 419700 + }, + { + "epoch": 0.8478407543724269, + "grad_norm": 279.9544982910156, + "learning_rate": 7.651325306186908e-07, + "loss": 23.8791, + "step": 419710 + }, + { + "epoch": 0.8478609550051107, + "grad_norm": 207.33346557617188, + "learning_rate": 7.649469652737407e-07, + "loss": 22.5027, + "step": 419720 + }, + { + "epoch": 0.8478811556377945, + "grad_norm": 521.3672485351562, + "learning_rate": 7.647614205699244e-07, + "loss": 22.6143, + "step": 419730 + }, + { + "epoch": 0.8479013562704784, + "grad_norm": 22.869937896728516, + "learning_rate": 7.645758965081478e-07, + "loss": 22.4272, + "step": 419740 + }, + { + "epoch": 0.8479215569031622, + "grad_norm": 543.2610473632812, + "learning_rate": 7.643903930893154e-07, + "loss": 16.8551, + "step": 419750 + }, + { + "epoch": 0.847941757535846, + "grad_norm": 526.971923828125, + "learning_rate": 7.64204910314329e-07, + "loss": 23.0066, + "step": 419760 + }, + { + "epoch": 0.8479619581685298, + "grad_norm": 616.48876953125, + "learning_rate": 7.640194481840951e-07, + "loss": 21.9162, + "step": 419770 + }, + { + "epoch": 0.8479821588012136, + "grad_norm": 275.0811462402344, + "learning_rate": 7.638340066995154e-07, + "loss": 13.5628, + "step": 419780 + }, + { + "epoch": 0.8480023594338975, + "grad_norm": 305.490966796875, + "learning_rate": 7.636485858614962e-07, + "loss": 21.4191, + "step": 419790 + }, + { + "epoch": 0.8480225600665813, + "grad_norm": 31.853561401367188, + "learning_rate": 7.63463185670939e-07, + "loss": 38.427, + "step": 419800 + }, + { + "epoch": 0.8480427606992651, + "grad_norm": 205.2315216064453, + "learning_rate": 7.632778061287494e-07, + "loss": 11.8003, + "step": 419810 + }, + { + "epoch": 0.8480629613319489, + "grad_norm": 0.2011181265115738, + "learning_rate": 7.630924472358304e-07, + "loss": 15.5303, + "step": 419820 + }, + { + "epoch": 0.8480831619646327, + "grad_norm": 191.06072998046875, + "learning_rate": 7.629071089930834e-07, + "loss": 18.2567, + "step": 419830 + }, + { + "epoch": 0.8481033625973166, + "grad_norm": 233.3933868408203, + "learning_rate": 7.62721791401414e-07, + "loss": 22.8896, + "step": 419840 + }, + { + "epoch": 0.8481235632300004, + "grad_norm": 440.032958984375, + "learning_rate": 7.625364944617242e-07, + "loss": 25.5893, + "step": 419850 + }, + { + "epoch": 0.8481437638626842, + "grad_norm": 341.0321960449219, + "learning_rate": 7.623512181749182e-07, + "loss": 14.6416, + "step": 419860 + }, + { + "epoch": 0.848163964495368, + "grad_norm": 494.1391906738281, + "learning_rate": 7.621659625418987e-07, + "loss": 25.9168, + "step": 419870 + }, + { + "epoch": 0.8481841651280518, + "grad_norm": 491.30035400390625, + "learning_rate": 7.619807275635672e-07, + "loss": 17.8396, + "step": 419880 + }, + { + "epoch": 0.8482043657607357, + "grad_norm": 390.82745361328125, + "learning_rate": 7.617955132408289e-07, + "loss": 11.7248, + "step": 419890 + }, + { + "epoch": 0.8482245663934195, + "grad_norm": 188.36827087402344, + "learning_rate": 7.61610319574585e-07, + "loss": 34.1716, + "step": 419900 + }, + { + "epoch": 0.8482447670261033, + "grad_norm": 960.0751953125, + "learning_rate": 7.614251465657374e-07, + "loss": 22.1919, + "step": 419910 + }, + { + "epoch": 0.8482649676587871, + "grad_norm": 12.805871963500977, + "learning_rate": 7.612399942151894e-07, + "loss": 12.9999, + "step": 419920 + }, + { + "epoch": 0.8482851682914709, + "grad_norm": 360.5621643066406, + "learning_rate": 7.610548625238445e-07, + "loss": 15.7093, + "step": 419930 + }, + { + "epoch": 0.8483053689241548, + "grad_norm": 436.2169494628906, + "learning_rate": 7.608697514926045e-07, + "loss": 13.3797, + "step": 419940 + }, + { + "epoch": 0.8483255695568385, + "grad_norm": 324.625732421875, + "learning_rate": 7.606846611223695e-07, + "loss": 11.3572, + "step": 419950 + }, + { + "epoch": 0.8483457701895223, + "grad_norm": 507.09783935546875, + "learning_rate": 7.60499591414045e-07, + "loss": 34.2821, + "step": 419960 + }, + { + "epoch": 0.8483659708222061, + "grad_norm": 24.52530860900879, + "learning_rate": 7.60314542368531e-07, + "loss": 16.4815, + "step": 419970 + }, + { + "epoch": 0.8483861714548899, + "grad_norm": 225.76918029785156, + "learning_rate": 7.601295139867287e-07, + "loss": 18.7387, + "step": 419980 + }, + { + "epoch": 0.8484063720875737, + "grad_norm": 346.3161926269531, + "learning_rate": 7.599445062695404e-07, + "loss": 20.7518, + "step": 419990 + }, + { + "epoch": 0.8484265727202576, + "grad_norm": 215.38546752929688, + "learning_rate": 7.597595192178702e-07, + "loss": 7.1108, + "step": 420000 + }, + { + "epoch": 0.8484467733529414, + "grad_norm": 172.4610137939453, + "learning_rate": 7.595745528326176e-07, + "loss": 30.5344, + "step": 420010 + }, + { + "epoch": 0.8484669739856252, + "grad_norm": 339.5697021484375, + "learning_rate": 7.593896071146828e-07, + "loss": 11.9902, + "step": 420020 + }, + { + "epoch": 0.848487174618309, + "grad_norm": 445.3851013183594, + "learning_rate": 7.592046820649706e-07, + "loss": 13.3798, + "step": 420030 + }, + { + "epoch": 0.8485073752509928, + "grad_norm": 216.46893310546875, + "learning_rate": 7.5901977768438e-07, + "loss": 17.0299, + "step": 420040 + }, + { + "epoch": 0.8485275758836767, + "grad_norm": 429.12762451171875, + "learning_rate": 7.588348939738116e-07, + "loss": 13.6675, + "step": 420050 + }, + { + "epoch": 0.8485477765163605, + "grad_norm": 9.526630401611328, + "learning_rate": 7.586500309341682e-07, + "loss": 14.2783, + "step": 420060 + }, + { + "epoch": 0.8485679771490443, + "grad_norm": 292.997314453125, + "learning_rate": 7.584651885663497e-07, + "loss": 9.1951, + "step": 420070 + }, + { + "epoch": 0.8485881777817281, + "grad_norm": 168.2415313720703, + "learning_rate": 7.582803668712579e-07, + "loss": 11.4824, + "step": 420080 + }, + { + "epoch": 0.848608378414412, + "grad_norm": 236.3516387939453, + "learning_rate": 7.580955658497924e-07, + "loss": 21.5602, + "step": 420090 + }, + { + "epoch": 0.8486285790470958, + "grad_norm": 239.54022216796875, + "learning_rate": 7.579107855028562e-07, + "loss": 21.8654, + "step": 420100 + }, + { + "epoch": 0.8486487796797796, + "grad_norm": 361.20947265625, + "learning_rate": 7.577260258313474e-07, + "loss": 25.1178, + "step": 420110 + }, + { + "epoch": 0.8486689803124634, + "grad_norm": 521.0519409179688, + "learning_rate": 7.57541286836167e-07, + "loss": 15.5121, + "step": 420120 + }, + { + "epoch": 0.8486891809451472, + "grad_norm": 309.9486389160156, + "learning_rate": 7.573565685182166e-07, + "loss": 16.8341, + "step": 420130 + }, + { + "epoch": 0.848709381577831, + "grad_norm": 99.5003433227539, + "learning_rate": 7.571718708783948e-07, + "loss": 11.3829, + "step": 420140 + }, + { + "epoch": 0.8487295822105149, + "grad_norm": 501.2671203613281, + "learning_rate": 7.569871939176037e-07, + "loss": 22.9475, + "step": 420150 + }, + { + "epoch": 0.8487497828431987, + "grad_norm": 145.5573272705078, + "learning_rate": 7.568025376367422e-07, + "loss": 12.3609, + "step": 420160 + }, + { + "epoch": 0.8487699834758825, + "grad_norm": 389.3744201660156, + "learning_rate": 7.566179020367098e-07, + "loss": 18.8499, + "step": 420170 + }, + { + "epoch": 0.8487901841085663, + "grad_norm": 376.95501708984375, + "learning_rate": 7.564332871184077e-07, + "loss": 23.0914, + "step": 420180 + }, + { + "epoch": 0.8488103847412501, + "grad_norm": 844.72705078125, + "learning_rate": 7.562486928827356e-07, + "loss": 25.2918, + "step": 420190 + }, + { + "epoch": 0.848830585373934, + "grad_norm": 556.1143188476562, + "learning_rate": 7.560641193305912e-07, + "loss": 30.2421, + "step": 420200 + }, + { + "epoch": 0.8488507860066177, + "grad_norm": 452.9190979003906, + "learning_rate": 7.55879566462876e-07, + "loss": 20.9866, + "step": 420210 + }, + { + "epoch": 0.8488709866393015, + "grad_norm": 344.0130920410156, + "learning_rate": 7.556950342804908e-07, + "loss": 12.773, + "step": 420220 + }, + { + "epoch": 0.8488911872719853, + "grad_norm": 559.789794921875, + "learning_rate": 7.555105227843312e-07, + "loss": 22.4848, + "step": 420230 + }, + { + "epoch": 0.8489113879046691, + "grad_norm": 236.39991760253906, + "learning_rate": 7.553260319752986e-07, + "loss": 11.7751, + "step": 420240 + }, + { + "epoch": 0.848931588537353, + "grad_norm": 178.82774353027344, + "learning_rate": 7.551415618542928e-07, + "loss": 10.9281, + "step": 420250 + }, + { + "epoch": 0.8489517891700368, + "grad_norm": 416.56451416015625, + "learning_rate": 7.549571124222127e-07, + "loss": 29.0814, + "step": 420260 + }, + { + "epoch": 0.8489719898027206, + "grad_norm": 125.81304931640625, + "learning_rate": 7.547726836799551e-07, + "loss": 13.1673, + "step": 420270 + }, + { + "epoch": 0.8489921904354044, + "grad_norm": 317.0643615722656, + "learning_rate": 7.545882756284212e-07, + "loss": 7.6171, + "step": 420280 + }, + { + "epoch": 0.8490123910680882, + "grad_norm": 523.3759765625, + "learning_rate": 7.544038882685112e-07, + "loss": 25.3222, + "step": 420290 + }, + { + "epoch": 0.849032591700772, + "grad_norm": 430.9664306640625, + "learning_rate": 7.542195216011188e-07, + "loss": 18.5014, + "step": 420300 + }, + { + "epoch": 0.8490527923334559, + "grad_norm": 462.48175048828125, + "learning_rate": 7.540351756271464e-07, + "loss": 18.2764, + "step": 420310 + }, + { + "epoch": 0.8490729929661397, + "grad_norm": 312.3878173828125, + "learning_rate": 7.538508503474923e-07, + "loss": 19.0678, + "step": 420320 + }, + { + "epoch": 0.8490931935988235, + "grad_norm": 158.47137451171875, + "learning_rate": 7.536665457630544e-07, + "loss": 21.0344, + "step": 420330 + }, + { + "epoch": 0.8491133942315073, + "grad_norm": 358.35675048828125, + "learning_rate": 7.534822618747289e-07, + "loss": 33.1354, + "step": 420340 + }, + { + "epoch": 0.8491335948641912, + "grad_norm": 539.3812255859375, + "learning_rate": 7.532979986834177e-07, + "loss": 29.5604, + "step": 420350 + }, + { + "epoch": 0.849153795496875, + "grad_norm": 234.95236206054688, + "learning_rate": 7.53113756190017e-07, + "loss": 10.4691, + "step": 420360 + }, + { + "epoch": 0.8491739961295588, + "grad_norm": 205.41302490234375, + "learning_rate": 7.529295343954229e-07, + "loss": 9.7857, + "step": 420370 + }, + { + "epoch": 0.8491941967622426, + "grad_norm": 466.1488037109375, + "learning_rate": 7.527453333005368e-07, + "loss": 16.7474, + "step": 420380 + }, + { + "epoch": 0.8492143973949264, + "grad_norm": 1036.1993408203125, + "learning_rate": 7.525611529062538e-07, + "loss": 24.6821, + "step": 420390 + }, + { + "epoch": 0.8492345980276103, + "grad_norm": 448.77362060546875, + "learning_rate": 7.523769932134739e-07, + "loss": 17.0026, + "step": 420400 + }, + { + "epoch": 0.8492547986602941, + "grad_norm": 88.42417907714844, + "learning_rate": 7.521928542230916e-07, + "loss": 21.489, + "step": 420410 + }, + { + "epoch": 0.8492749992929779, + "grad_norm": 304.2235412597656, + "learning_rate": 7.520087359360073e-07, + "loss": 6.9169, + "step": 420420 + }, + { + "epoch": 0.8492951999256617, + "grad_norm": 195.1634063720703, + "learning_rate": 7.51824638353118e-07, + "loss": 17.1502, + "step": 420430 + }, + { + "epoch": 0.8493154005583455, + "grad_norm": 17.28790855407715, + "learning_rate": 7.51640561475318e-07, + "loss": 16.0534, + "step": 420440 + }, + { + "epoch": 0.8493356011910294, + "grad_norm": 185.24879455566406, + "learning_rate": 7.514565053035083e-07, + "loss": 10.7844, + "step": 420450 + }, + { + "epoch": 0.8493558018237131, + "grad_norm": 467.2895202636719, + "learning_rate": 7.512724698385831e-07, + "loss": 12.9716, + "step": 420460 + }, + { + "epoch": 0.8493760024563969, + "grad_norm": 195.4284210205078, + "learning_rate": 7.510884550814418e-07, + "loss": 13.6663, + "step": 420470 + }, + { + "epoch": 0.8493962030890807, + "grad_norm": 298.0000915527344, + "learning_rate": 7.509044610329803e-07, + "loss": 29.8925, + "step": 420480 + }, + { + "epoch": 0.8494164037217645, + "grad_norm": 297.3729248046875, + "learning_rate": 7.507204876940938e-07, + "loss": 13.3852, + "step": 420490 + }, + { + "epoch": 0.8494366043544483, + "grad_norm": 187.8828887939453, + "learning_rate": 7.505365350656813e-07, + "loss": 11.998, + "step": 420500 + }, + { + "epoch": 0.8494568049871322, + "grad_norm": 617.66552734375, + "learning_rate": 7.50352603148638e-07, + "loss": 25.929, + "step": 420510 + }, + { + "epoch": 0.849477005619816, + "grad_norm": 349.9392395019531, + "learning_rate": 7.5016869194386e-07, + "loss": 14.9932, + "step": 420520 + }, + { + "epoch": 0.8494972062524998, + "grad_norm": 259.2439270019531, + "learning_rate": 7.499848014522443e-07, + "loss": 19.1879, + "step": 420530 + }, + { + "epoch": 0.8495174068851836, + "grad_norm": 257.3936767578125, + "learning_rate": 7.498009316746879e-07, + "loss": 30.295, + "step": 420540 + }, + { + "epoch": 0.8495376075178674, + "grad_norm": 373.472900390625, + "learning_rate": 7.496170826120869e-07, + "loss": 19.4068, + "step": 420550 + }, + { + "epoch": 0.8495578081505513, + "grad_norm": 835.5751953125, + "learning_rate": 7.494332542653349e-07, + "loss": 18.3841, + "step": 420560 + }, + { + "epoch": 0.8495780087832351, + "grad_norm": 344.0382080078125, + "learning_rate": 7.492494466353317e-07, + "loss": 25.2305, + "step": 420570 + }, + { + "epoch": 0.8495982094159189, + "grad_norm": 439.5791320800781, + "learning_rate": 7.490656597229707e-07, + "loss": 34.9259, + "step": 420580 + }, + { + "epoch": 0.8496184100486027, + "grad_norm": 294.4809875488281, + "learning_rate": 7.488818935291465e-07, + "loss": 26.3275, + "step": 420590 + }, + { + "epoch": 0.8496386106812865, + "grad_norm": 312.11370849609375, + "learning_rate": 7.486981480547567e-07, + "loss": 30.0317, + "step": 420600 + }, + { + "epoch": 0.8496588113139704, + "grad_norm": 177.1890106201172, + "learning_rate": 7.48514423300698e-07, + "loss": 21.0186, + "step": 420610 + }, + { + "epoch": 0.8496790119466542, + "grad_norm": 83.07833862304688, + "learning_rate": 7.48330719267864e-07, + "loss": 9.8696, + "step": 420620 + }, + { + "epoch": 0.849699212579338, + "grad_norm": 548.93310546875, + "learning_rate": 7.481470359571497e-07, + "loss": 14.6292, + "step": 420630 + }, + { + "epoch": 0.8497194132120218, + "grad_norm": 274.3866882324219, + "learning_rate": 7.479633733694519e-07, + "loss": 14.8856, + "step": 420640 + }, + { + "epoch": 0.8497396138447056, + "grad_norm": 734.2679443359375, + "learning_rate": 7.477797315056645e-07, + "loss": 22.1115, + "step": 420650 + }, + { + "epoch": 0.8497598144773895, + "grad_norm": 447.2192687988281, + "learning_rate": 7.475961103666824e-07, + "loss": 17.7805, + "step": 420660 + }, + { + "epoch": 0.8497800151100733, + "grad_norm": 216.69761657714844, + "learning_rate": 7.474125099534019e-07, + "loss": 18.5491, + "step": 420670 + }, + { + "epoch": 0.8498002157427571, + "grad_norm": 214.6820831298828, + "learning_rate": 7.472289302667163e-07, + "loss": 13.082, + "step": 420680 + }, + { + "epoch": 0.8498204163754409, + "grad_norm": 361.2976379394531, + "learning_rate": 7.470453713075215e-07, + "loss": 24.109, + "step": 420690 + }, + { + "epoch": 0.8498406170081247, + "grad_norm": 281.5863037109375, + "learning_rate": 7.468618330767114e-07, + "loss": 11.7758, + "step": 420700 + }, + { + "epoch": 0.8498608176408086, + "grad_norm": 71.94729614257812, + "learning_rate": 7.466783155751816e-07, + "loss": 14.2478, + "step": 420710 + }, + { + "epoch": 0.8498810182734923, + "grad_norm": 362.0234375, + "learning_rate": 7.464948188038262e-07, + "loss": 13.8452, + "step": 420720 + }, + { + "epoch": 0.8499012189061761, + "grad_norm": 562.4525146484375, + "learning_rate": 7.463113427635376e-07, + "loss": 23.803, + "step": 420730 + }, + { + "epoch": 0.8499214195388599, + "grad_norm": 171.87557983398438, + "learning_rate": 7.461278874552131e-07, + "loss": 7.2911, + "step": 420740 + }, + { + "epoch": 0.8499416201715437, + "grad_norm": 215.71307373046875, + "learning_rate": 7.459444528797438e-07, + "loss": 9.4946, + "step": 420750 + }, + { + "epoch": 0.8499618208042276, + "grad_norm": 240.4515838623047, + "learning_rate": 7.457610390380265e-07, + "loss": 15.6298, + "step": 420760 + }, + { + "epoch": 0.8499820214369114, + "grad_norm": 410.1695251464844, + "learning_rate": 7.455776459309538e-07, + "loss": 15.6956, + "step": 420770 + }, + { + "epoch": 0.8500022220695952, + "grad_norm": 235.95631408691406, + "learning_rate": 7.453942735594189e-07, + "loss": 31.0048, + "step": 420780 + }, + { + "epoch": 0.850022422702279, + "grad_norm": 133.65708923339844, + "learning_rate": 7.452109219243175e-07, + "loss": 14.8682, + "step": 420790 + }, + { + "epoch": 0.8500426233349628, + "grad_norm": 217.16836547851562, + "learning_rate": 7.450275910265415e-07, + "loss": 29.9921, + "step": 420800 + }, + { + "epoch": 0.8500628239676467, + "grad_norm": 527.6741943359375, + "learning_rate": 7.448442808669842e-07, + "loss": 15.8221, + "step": 420810 + }, + { + "epoch": 0.8500830246003305, + "grad_norm": 312.0057373046875, + "learning_rate": 7.446609914465397e-07, + "loss": 18.7422, + "step": 420820 + }, + { + "epoch": 0.8501032252330143, + "grad_norm": 212.4395751953125, + "learning_rate": 7.444777227661037e-07, + "loss": 19.5951, + "step": 420830 + }, + { + "epoch": 0.8501234258656981, + "grad_norm": 257.22882080078125, + "learning_rate": 7.442944748265651e-07, + "loss": 16.0408, + "step": 420840 + }, + { + "epoch": 0.8501436264983819, + "grad_norm": 224.76571655273438, + "learning_rate": 7.441112476288187e-07, + "loss": 22.7621, + "step": 420850 + }, + { + "epoch": 0.8501638271310658, + "grad_norm": 111.08911895751953, + "learning_rate": 7.439280411737592e-07, + "loss": 11.3516, + "step": 420860 + }, + { + "epoch": 0.8501840277637496, + "grad_norm": 237.11033630371094, + "learning_rate": 7.437448554622783e-07, + "loss": 6.7287, + "step": 420870 + }, + { + "epoch": 0.8502042283964334, + "grad_norm": 395.3372497558594, + "learning_rate": 7.435616904952675e-07, + "loss": 19.3152, + "step": 420880 + }, + { + "epoch": 0.8502244290291172, + "grad_norm": 480.9623107910156, + "learning_rate": 7.433785462736209e-07, + "loss": 20.8863, + "step": 420890 + }, + { + "epoch": 0.850244629661801, + "grad_norm": 382.73583984375, + "learning_rate": 7.43195422798233e-07, + "loss": 19.1049, + "step": 420900 + }, + { + "epoch": 0.8502648302944849, + "grad_norm": 1869.8851318359375, + "learning_rate": 7.430123200699924e-07, + "loss": 32.079, + "step": 420910 + }, + { + "epoch": 0.8502850309271687, + "grad_norm": 712.2645874023438, + "learning_rate": 7.428292380897933e-07, + "loss": 18.4824, + "step": 420920 + }, + { + "epoch": 0.8503052315598525, + "grad_norm": 398.8208312988281, + "learning_rate": 7.426461768585291e-07, + "loss": 23.8987, + "step": 420930 + }, + { + "epoch": 0.8503254321925363, + "grad_norm": 108.1100082397461, + "learning_rate": 7.424631363770912e-07, + "loss": 15.4419, + "step": 420940 + }, + { + "epoch": 0.8503456328252201, + "grad_norm": 335.90740966796875, + "learning_rate": 7.422801166463706e-07, + "loss": 20.4621, + "step": 420950 + }, + { + "epoch": 0.850365833457904, + "grad_norm": 288.9495544433594, + "learning_rate": 7.420971176672614e-07, + "loss": 17.5241, + "step": 420960 + }, + { + "epoch": 0.8503860340905878, + "grad_norm": 348.71038818359375, + "learning_rate": 7.419141394406543e-07, + "loss": 18.832, + "step": 420970 + }, + { + "epoch": 0.8504062347232715, + "grad_norm": 540.66650390625, + "learning_rate": 7.4173118196744e-07, + "loss": 33.4038, + "step": 420980 + }, + { + "epoch": 0.8504264353559553, + "grad_norm": 284.1331787109375, + "learning_rate": 7.415482452485129e-07, + "loss": 21.0569, + "step": 420990 + }, + { + "epoch": 0.8504466359886391, + "grad_norm": 199.09298706054688, + "learning_rate": 7.413653292847617e-07, + "loss": 22.0451, + "step": 421000 + }, + { + "epoch": 0.8504668366213229, + "grad_norm": 437.1142883300781, + "learning_rate": 7.411824340770813e-07, + "loss": 21.0575, + "step": 421010 + }, + { + "epoch": 0.8504870372540068, + "grad_norm": 7.600314617156982, + "learning_rate": 7.409995596263591e-07, + "loss": 10.5348, + "step": 421020 + }, + { + "epoch": 0.8505072378866906, + "grad_norm": 411.22314453125, + "learning_rate": 7.408167059334897e-07, + "loss": 35.8578, + "step": 421030 + }, + { + "epoch": 0.8505274385193744, + "grad_norm": 206.26797485351562, + "learning_rate": 7.40633872999364e-07, + "loss": 13.219, + "step": 421040 + }, + { + "epoch": 0.8505476391520582, + "grad_norm": 343.618896484375, + "learning_rate": 7.4045106082487e-07, + "loss": 27.9574, + "step": 421050 + }, + { + "epoch": 0.850567839784742, + "grad_norm": 161.28146362304688, + "learning_rate": 7.402682694109026e-07, + "loss": 18.7444, + "step": 421060 + }, + { + "epoch": 0.8505880404174259, + "grad_norm": 341.72161865234375, + "learning_rate": 7.4008549875835e-07, + "loss": 16.2181, + "step": 421070 + }, + { + "epoch": 0.8506082410501097, + "grad_norm": 453.3415222167969, + "learning_rate": 7.399027488681049e-07, + "loss": 28.2804, + "step": 421080 + }, + { + "epoch": 0.8506284416827935, + "grad_norm": 495.8848876953125, + "learning_rate": 7.39720019741057e-07, + "loss": 30.7922, + "step": 421090 + }, + { + "epoch": 0.8506486423154773, + "grad_norm": 277.91143798828125, + "learning_rate": 7.395373113780962e-07, + "loss": 18.172, + "step": 421100 + }, + { + "epoch": 0.8506688429481611, + "grad_norm": 302.0050964355469, + "learning_rate": 7.393546237801147e-07, + "loss": 20.2798, + "step": 421110 + }, + { + "epoch": 0.850689043580845, + "grad_norm": 3.2582924365997314, + "learning_rate": 7.391719569480021e-07, + "loss": 13.3446, + "step": 421120 + }, + { + "epoch": 0.8507092442135288, + "grad_norm": 554.4869384765625, + "learning_rate": 7.389893108826473e-07, + "loss": 26.9601, + "step": 421130 + }, + { + "epoch": 0.8507294448462126, + "grad_norm": 320.6636047363281, + "learning_rate": 7.388066855849418e-07, + "loss": 14.6543, + "step": 421140 + }, + { + "epoch": 0.8507496454788964, + "grad_norm": 301.8233642578125, + "learning_rate": 7.386240810557771e-07, + "loss": 25.684, + "step": 421150 + }, + { + "epoch": 0.8507698461115802, + "grad_norm": 1380.050048828125, + "learning_rate": 7.384414972960419e-07, + "loss": 27.1782, + "step": 421160 + }, + { + "epoch": 0.8507900467442641, + "grad_norm": 360.6093444824219, + "learning_rate": 7.382589343066243e-07, + "loss": 14.7563, + "step": 421170 + }, + { + "epoch": 0.8508102473769479, + "grad_norm": 667.0159912109375, + "learning_rate": 7.380763920884171e-07, + "loss": 20.4568, + "step": 421180 + }, + { + "epoch": 0.8508304480096317, + "grad_norm": 194.67372131347656, + "learning_rate": 7.378938706423089e-07, + "loss": 13.4816, + "step": 421190 + }, + { + "epoch": 0.8508506486423155, + "grad_norm": 118.02854919433594, + "learning_rate": 7.377113699691879e-07, + "loss": 15.1407, + "step": 421200 + }, + { + "epoch": 0.8508708492749993, + "grad_norm": 446.3256530761719, + "learning_rate": 7.375288900699445e-07, + "loss": 10.0944, + "step": 421210 + }, + { + "epoch": 0.8508910499076832, + "grad_norm": 54.94810485839844, + "learning_rate": 7.373464309454698e-07, + "loss": 22.6813, + "step": 421220 + }, + { + "epoch": 0.8509112505403669, + "grad_norm": 132.21023559570312, + "learning_rate": 7.371639925966512e-07, + "loss": 11.2265, + "step": 421230 + }, + { + "epoch": 0.8509314511730507, + "grad_norm": 949.7929077148438, + "learning_rate": 7.369815750243769e-07, + "loss": 17.2051, + "step": 421240 + }, + { + "epoch": 0.8509516518057345, + "grad_norm": 444.0530090332031, + "learning_rate": 7.367991782295392e-07, + "loss": 23.2181, + "step": 421250 + }, + { + "epoch": 0.8509718524384183, + "grad_norm": 639.97900390625, + "learning_rate": 7.366168022130249e-07, + "loss": 24.6395, + "step": 421260 + }, + { + "epoch": 0.8509920530711022, + "grad_norm": 575.516845703125, + "learning_rate": 7.364344469757223e-07, + "loss": 21.0052, + "step": 421270 + }, + { + "epoch": 0.851012253703786, + "grad_norm": 327.63720703125, + "learning_rate": 7.362521125185218e-07, + "loss": 17.7251, + "step": 421280 + }, + { + "epoch": 0.8510324543364698, + "grad_norm": 325.7966003417969, + "learning_rate": 7.360697988423105e-07, + "loss": 13.0203, + "step": 421290 + }, + { + "epoch": 0.8510526549691536, + "grad_norm": 282.32525634765625, + "learning_rate": 7.358875059479792e-07, + "loss": 14.0752, + "step": 421300 + }, + { + "epoch": 0.8510728556018374, + "grad_norm": 528.5604858398438, + "learning_rate": 7.357052338364134e-07, + "loss": 14.7047, + "step": 421310 + }, + { + "epoch": 0.8510930562345213, + "grad_norm": 2.1461079120635986, + "learning_rate": 7.355229825085047e-07, + "loss": 9.7225, + "step": 421320 + }, + { + "epoch": 0.8511132568672051, + "grad_norm": 432.22021484375, + "learning_rate": 7.353407519651395e-07, + "loss": 27.3364, + "step": 421330 + }, + { + "epoch": 0.8511334574998889, + "grad_norm": 551.1565551757812, + "learning_rate": 7.351585422072049e-07, + "loss": 15.5955, + "step": 421340 + }, + { + "epoch": 0.8511536581325727, + "grad_norm": 288.9549560546875, + "learning_rate": 7.349763532355919e-07, + "loss": 14.2593, + "step": 421350 + }, + { + "epoch": 0.8511738587652565, + "grad_norm": 47.11952209472656, + "learning_rate": 7.347941850511853e-07, + "loss": 20.5868, + "step": 421360 + }, + { + "epoch": 0.8511940593979404, + "grad_norm": 447.0443420410156, + "learning_rate": 7.34612037654876e-07, + "loss": 11.4741, + "step": 421370 + }, + { + "epoch": 0.8512142600306242, + "grad_norm": 382.4913024902344, + "learning_rate": 7.344299110475506e-07, + "loss": 21.1025, + "step": 421380 + }, + { + "epoch": 0.851234460663308, + "grad_norm": 304.9476318359375, + "learning_rate": 7.342478052300945e-07, + "loss": 21.573, + "step": 421390 + }, + { + "epoch": 0.8512546612959918, + "grad_norm": 327.41204833984375, + "learning_rate": 7.34065720203399e-07, + "loss": 16.9658, + "step": 421400 + }, + { + "epoch": 0.8512748619286756, + "grad_norm": 293.70294189453125, + "learning_rate": 7.338836559683493e-07, + "loss": 15.7225, + "step": 421410 + }, + { + "epoch": 0.8512950625613595, + "grad_norm": 349.07061767578125, + "learning_rate": 7.337016125258323e-07, + "loss": 17.4128, + "step": 421420 + }, + { + "epoch": 0.8513152631940433, + "grad_norm": 261.398193359375, + "learning_rate": 7.335195898767367e-07, + "loss": 25.8577, + "step": 421430 + }, + { + "epoch": 0.8513354638267271, + "grad_norm": 351.88543701171875, + "learning_rate": 7.333375880219507e-07, + "loss": 52.9905, + "step": 421440 + }, + { + "epoch": 0.8513556644594109, + "grad_norm": 0.34273359179496765, + "learning_rate": 7.33155606962358e-07, + "loss": 26.1271, + "step": 421450 + }, + { + "epoch": 0.8513758650920947, + "grad_norm": 578.9764404296875, + "learning_rate": 7.329736466988469e-07, + "loss": 11.8796, + "step": 421460 + }, + { + "epoch": 0.8513960657247786, + "grad_norm": 174.4795684814453, + "learning_rate": 7.327917072323065e-07, + "loss": 31.6347, + "step": 421470 + }, + { + "epoch": 0.8514162663574624, + "grad_norm": 111.70514678955078, + "learning_rate": 7.326097885636214e-07, + "loss": 11.7568, + "step": 421480 + }, + { + "epoch": 0.8514364669901461, + "grad_norm": 278.8871154785156, + "learning_rate": 7.324278906936771e-07, + "loss": 24.3871, + "step": 421490 + }, + { + "epoch": 0.8514566676228299, + "grad_norm": 619.1923828125, + "learning_rate": 7.322460136233622e-07, + "loss": 29.3718, + "step": 421500 + }, + { + "epoch": 0.8514768682555137, + "grad_norm": 228.79319763183594, + "learning_rate": 7.320641573535647e-07, + "loss": 19.2691, + "step": 421510 + }, + { + "epoch": 0.8514970688881975, + "grad_norm": 413.57049560546875, + "learning_rate": 7.318823218851668e-07, + "loss": 32.7849, + "step": 421520 + }, + { + "epoch": 0.8515172695208814, + "grad_norm": 396.7378845214844, + "learning_rate": 7.31700507219057e-07, + "loss": 20.0575, + "step": 421530 + }, + { + "epoch": 0.8515374701535652, + "grad_norm": 375.3517761230469, + "learning_rate": 7.315187133561219e-07, + "loss": 19.0317, + "step": 421540 + }, + { + "epoch": 0.851557670786249, + "grad_norm": 37.742340087890625, + "learning_rate": 7.31336940297247e-07, + "loss": 20.6924, + "step": 421550 + }, + { + "epoch": 0.8515778714189328, + "grad_norm": 427.2479248046875, + "learning_rate": 7.311551880433171e-07, + "loss": 13.0093, + "step": 421560 + }, + { + "epoch": 0.8515980720516166, + "grad_norm": 529.8531494140625, + "learning_rate": 7.309734565952198e-07, + "loss": 16.0609, + "step": 421570 + }, + { + "epoch": 0.8516182726843005, + "grad_norm": 294.2206726074219, + "learning_rate": 7.307917459538405e-07, + "loss": 22.9379, + "step": 421580 + }, + { + "epoch": 0.8516384733169843, + "grad_norm": 397.2265625, + "learning_rate": 7.30610056120063e-07, + "loss": 24.8546, + "step": 421590 + }, + { + "epoch": 0.8516586739496681, + "grad_norm": 20.17185401916504, + "learning_rate": 7.304283870947748e-07, + "loss": 17.733, + "step": 421600 + }, + { + "epoch": 0.8516788745823519, + "grad_norm": 473.6755065917969, + "learning_rate": 7.302467388788614e-07, + "loss": 23.9961, + "step": 421610 + }, + { + "epoch": 0.8516990752150357, + "grad_norm": 362.61431884765625, + "learning_rate": 7.300651114732077e-07, + "loss": 20.499, + "step": 421620 + }, + { + "epoch": 0.8517192758477196, + "grad_norm": 176.23777770996094, + "learning_rate": 7.298835048786979e-07, + "loss": 15.5241, + "step": 421630 + }, + { + "epoch": 0.8517394764804034, + "grad_norm": 443.8497009277344, + "learning_rate": 7.29701919096219e-07, + "loss": 17.5838, + "step": 421640 + }, + { + "epoch": 0.8517596771130872, + "grad_norm": 549.1595458984375, + "learning_rate": 7.295203541266549e-07, + "loss": 21.9836, + "step": 421650 + }, + { + "epoch": 0.851779877745771, + "grad_norm": 238.44998168945312, + "learning_rate": 7.293388099708892e-07, + "loss": 17.0428, + "step": 421660 + }, + { + "epoch": 0.8518000783784548, + "grad_norm": 224.4907684326172, + "learning_rate": 7.291572866298102e-07, + "loss": 20.522, + "step": 421670 + }, + { + "epoch": 0.8518202790111387, + "grad_norm": 39.98009490966797, + "learning_rate": 7.289757841042988e-07, + "loss": 22.7961, + "step": 421680 + }, + { + "epoch": 0.8518404796438225, + "grad_norm": 395.55438232421875, + "learning_rate": 7.287943023952426e-07, + "loss": 14.4075, + "step": 421690 + }, + { + "epoch": 0.8518606802765063, + "grad_norm": 462.35174560546875, + "learning_rate": 7.286128415035249e-07, + "loss": 20.4092, + "step": 421700 + }, + { + "epoch": 0.8518808809091901, + "grad_norm": 342.838623046875, + "learning_rate": 7.284314014300292e-07, + "loss": 14.6635, + "step": 421710 + }, + { + "epoch": 0.8519010815418739, + "grad_norm": 402.89068603515625, + "learning_rate": 7.282499821756417e-07, + "loss": 14.4933, + "step": 421720 + }, + { + "epoch": 0.8519212821745578, + "grad_norm": 195.26206970214844, + "learning_rate": 7.28068583741246e-07, + "loss": 25.5276, + "step": 421730 + }, + { + "epoch": 0.8519414828072415, + "grad_norm": 632.72265625, + "learning_rate": 7.278872061277248e-07, + "loss": 21.9685, + "step": 421740 + }, + { + "epoch": 0.8519616834399253, + "grad_norm": 480.2913818359375, + "learning_rate": 7.277058493359629e-07, + "loss": 26.8477, + "step": 421750 + }, + { + "epoch": 0.8519818840726091, + "grad_norm": 457.3885192871094, + "learning_rate": 7.275245133668457e-07, + "loss": 21.4772, + "step": 421760 + }, + { + "epoch": 0.8520020847052929, + "grad_norm": 304.02252197265625, + "learning_rate": 7.273431982212559e-07, + "loss": 14.3607, + "step": 421770 + }, + { + "epoch": 0.8520222853379767, + "grad_norm": 395.3974304199219, + "learning_rate": 7.27161903900076e-07, + "loss": 14.1178, + "step": 421780 + }, + { + "epoch": 0.8520424859706606, + "grad_norm": 679.8463134765625, + "learning_rate": 7.269806304041915e-07, + "loss": 18.9653, + "step": 421790 + }, + { + "epoch": 0.8520626866033444, + "grad_norm": 689.3289184570312, + "learning_rate": 7.267993777344856e-07, + "loss": 24.3499, + "step": 421800 + }, + { + "epoch": 0.8520828872360282, + "grad_norm": 340.630126953125, + "learning_rate": 7.266181458918403e-07, + "loss": 33.4605, + "step": 421810 + }, + { + "epoch": 0.852103087868712, + "grad_norm": 313.085205078125, + "learning_rate": 7.264369348771394e-07, + "loss": 17.1183, + "step": 421820 + }, + { + "epoch": 0.8521232885013958, + "grad_norm": 342.37451171875, + "learning_rate": 7.262557446912693e-07, + "loss": 15.2027, + "step": 421830 + }, + { + "epoch": 0.8521434891340797, + "grad_norm": 316.3703918457031, + "learning_rate": 7.260745753351078e-07, + "loss": 14.5585, + "step": 421840 + }, + { + "epoch": 0.8521636897667635, + "grad_norm": 439.4654235839844, + "learning_rate": 7.258934268095402e-07, + "loss": 14.8989, + "step": 421850 + }, + { + "epoch": 0.8521838903994473, + "grad_norm": 593.458740234375, + "learning_rate": 7.257122991154514e-07, + "loss": 26.9237, + "step": 421860 + }, + { + "epoch": 0.8522040910321311, + "grad_norm": 509.9592590332031, + "learning_rate": 7.255311922537217e-07, + "loss": 21.6094, + "step": 421870 + }, + { + "epoch": 0.852224291664815, + "grad_norm": 44.265228271484375, + "learning_rate": 7.253501062252338e-07, + "loss": 17.5555, + "step": 421880 + }, + { + "epoch": 0.8522444922974988, + "grad_norm": 115.82080078125, + "learning_rate": 7.251690410308726e-07, + "loss": 21.4566, + "step": 421890 + }, + { + "epoch": 0.8522646929301826, + "grad_norm": 234.79441833496094, + "learning_rate": 7.249879966715174e-07, + "loss": 13.3536, + "step": 421900 + }, + { + "epoch": 0.8522848935628664, + "grad_norm": 245.02662658691406, + "learning_rate": 7.248069731480533e-07, + "loss": 11.3074, + "step": 421910 + }, + { + "epoch": 0.8523050941955502, + "grad_norm": 447.1869812011719, + "learning_rate": 7.246259704613606e-07, + "loss": 19.349, + "step": 421920 + }, + { + "epoch": 0.852325294828234, + "grad_norm": 258.03607177734375, + "learning_rate": 7.244449886123233e-07, + "loss": 20.7277, + "step": 421930 + }, + { + "epoch": 0.8523454954609179, + "grad_norm": 416.8277282714844, + "learning_rate": 7.242640276018226e-07, + "loss": 15.8615, + "step": 421940 + }, + { + "epoch": 0.8523656960936017, + "grad_norm": 9.882548332214355, + "learning_rate": 7.240830874307392e-07, + "loss": 21.3556, + "step": 421950 + }, + { + "epoch": 0.8523858967262855, + "grad_norm": 509.8556213378906, + "learning_rate": 7.239021680999575e-07, + "loss": 18.4977, + "step": 421960 + }, + { + "epoch": 0.8524060973589693, + "grad_norm": 307.5932922363281, + "learning_rate": 7.237212696103568e-07, + "loss": 19.4454, + "step": 421970 + }, + { + "epoch": 0.8524262979916531, + "grad_norm": 57.08131408691406, + "learning_rate": 7.235403919628214e-07, + "loss": 9.8599, + "step": 421980 + }, + { + "epoch": 0.852446498624337, + "grad_norm": 234.61434936523438, + "learning_rate": 7.233595351582313e-07, + "loss": 37.4964, + "step": 421990 + }, + { + "epoch": 0.8524666992570207, + "grad_norm": 192.13845825195312, + "learning_rate": 7.23178699197467e-07, + "loss": 19.1351, + "step": 422000 + }, + { + "epoch": 0.8524868998897045, + "grad_norm": 12.566235542297363, + "learning_rate": 7.229978840814122e-07, + "loss": 23.8078, + "step": 422010 + }, + { + "epoch": 0.8525071005223883, + "grad_norm": 335.6422119140625, + "learning_rate": 7.228170898109465e-07, + "loss": 11.2894, + "step": 422020 + }, + { + "epoch": 0.8525273011550721, + "grad_norm": 59.3913688659668, + "learning_rate": 7.22636316386951e-07, + "loss": 17.7019, + "step": 422030 + }, + { + "epoch": 0.852547501787756, + "grad_norm": 292.2152099609375, + "learning_rate": 7.22455563810307e-07, + "loss": 18.1457, + "step": 422040 + }, + { + "epoch": 0.8525677024204398, + "grad_norm": 203.8167266845703, + "learning_rate": 7.222748320818984e-07, + "loss": 13.9486, + "step": 422050 + }, + { + "epoch": 0.8525879030531236, + "grad_norm": 192.4440155029297, + "learning_rate": 7.220941212026005e-07, + "loss": 12.7467, + "step": 422060 + }, + { + "epoch": 0.8526081036858074, + "grad_norm": 151.43333435058594, + "learning_rate": 7.219134311732978e-07, + "loss": 17.172, + "step": 422070 + }, + { + "epoch": 0.8526283043184912, + "grad_norm": 274.3697509765625, + "learning_rate": 7.217327619948705e-07, + "loss": 10.654, + "step": 422080 + }, + { + "epoch": 0.8526485049511751, + "grad_norm": 333.4923400878906, + "learning_rate": 7.215521136681997e-07, + "loss": 16.1211, + "step": 422090 + }, + { + "epoch": 0.8526687055838589, + "grad_norm": 509.5408020019531, + "learning_rate": 7.213714861941628e-07, + "loss": 21.663, + "step": 422100 + }, + { + "epoch": 0.8526889062165427, + "grad_norm": 252.9277801513672, + "learning_rate": 7.211908795736433e-07, + "loss": 15.0233, + "step": 422110 + }, + { + "epoch": 0.8527091068492265, + "grad_norm": 328.42047119140625, + "learning_rate": 7.210102938075225e-07, + "loss": 14.1027, + "step": 422120 + }, + { + "epoch": 0.8527293074819103, + "grad_norm": 867.2739868164062, + "learning_rate": 7.20829728896676e-07, + "loss": 30.1601, + "step": 422130 + }, + { + "epoch": 0.8527495081145942, + "grad_norm": 230.66310119628906, + "learning_rate": 7.206491848419867e-07, + "loss": 14.3198, + "step": 422140 + }, + { + "epoch": 0.852769708747278, + "grad_norm": 273.2495422363281, + "learning_rate": 7.204686616443352e-07, + "loss": 26.9474, + "step": 422150 + }, + { + "epoch": 0.8527899093799618, + "grad_norm": 440.524169921875, + "learning_rate": 7.202881593046002e-07, + "loss": 19.9583, + "step": 422160 + }, + { + "epoch": 0.8528101100126456, + "grad_norm": 270.63421630859375, + "learning_rate": 7.20107677823661e-07, + "loss": 26.173, + "step": 422170 + }, + { + "epoch": 0.8528303106453294, + "grad_norm": 816.7549438476562, + "learning_rate": 7.199272172023986e-07, + "loss": 30.6517, + "step": 422180 + }, + { + "epoch": 0.8528505112780133, + "grad_norm": 11.268199920654297, + "learning_rate": 7.197467774416921e-07, + "loss": 17.9889, + "step": 422190 + }, + { + "epoch": 0.8528707119106971, + "grad_norm": 447.5383605957031, + "learning_rate": 7.195663585424195e-07, + "loss": 13.2262, + "step": 422200 + }, + { + "epoch": 0.8528909125433809, + "grad_norm": 179.54664611816406, + "learning_rate": 7.193859605054615e-07, + "loss": 16.2462, + "step": 422210 + }, + { + "epoch": 0.8529111131760647, + "grad_norm": 292.2140197753906, + "learning_rate": 7.19205583331698e-07, + "loss": 10.8472, + "step": 422220 + }, + { + "epoch": 0.8529313138087485, + "grad_norm": 346.0629577636719, + "learning_rate": 7.190252270220071e-07, + "loss": 10.9012, + "step": 422230 + }, + { + "epoch": 0.8529515144414324, + "grad_norm": 255.02244567871094, + "learning_rate": 7.188448915772673e-07, + "loss": 11.8099, + "step": 422240 + }, + { + "epoch": 0.8529717150741161, + "grad_norm": 21.119182586669922, + "learning_rate": 7.186645769983591e-07, + "loss": 15.2602, + "step": 422250 + }, + { + "epoch": 0.8529919157067999, + "grad_norm": 140.7800750732422, + "learning_rate": 7.18484283286161e-07, + "loss": 34.9589, + "step": 422260 + }, + { + "epoch": 0.8530121163394837, + "grad_norm": 472.4410400390625, + "learning_rate": 7.183040104415495e-07, + "loss": 13.2531, + "step": 422270 + }, + { + "epoch": 0.8530323169721675, + "grad_norm": 12.594407081604004, + "learning_rate": 7.181237584654066e-07, + "loss": 10.1258, + "step": 422280 + }, + { + "epoch": 0.8530525176048513, + "grad_norm": 415.6765441894531, + "learning_rate": 7.179435273586078e-07, + "loss": 15.773, + "step": 422290 + }, + { + "epoch": 0.8530727182375352, + "grad_norm": 65.1563720703125, + "learning_rate": 7.177633171220339e-07, + "loss": 18.0508, + "step": 422300 + }, + { + "epoch": 0.853092918870219, + "grad_norm": 0.0, + "learning_rate": 7.17583127756562e-07, + "loss": 36.8546, + "step": 422310 + }, + { + "epoch": 0.8531131195029028, + "grad_norm": 316.9919128417969, + "learning_rate": 7.1740295926307e-07, + "loss": 17.3249, + "step": 422320 + }, + { + "epoch": 0.8531333201355866, + "grad_norm": 129.97496032714844, + "learning_rate": 7.172228116424374e-07, + "loss": 25.0414, + "step": 422330 + }, + { + "epoch": 0.8531535207682704, + "grad_norm": 284.4596252441406, + "learning_rate": 7.170426848955408e-07, + "loss": 16.1025, + "step": 422340 + }, + { + "epoch": 0.8531737214009543, + "grad_norm": 640.4921264648438, + "learning_rate": 7.168625790232586e-07, + "loss": 18.8267, + "step": 422350 + }, + { + "epoch": 0.8531939220336381, + "grad_norm": 1549.4013671875, + "learning_rate": 7.166824940264683e-07, + "loss": 28.6623, + "step": 422360 + }, + { + "epoch": 0.8532141226663219, + "grad_norm": 363.51800537109375, + "learning_rate": 7.165024299060486e-07, + "loss": 11.5337, + "step": 422370 + }, + { + "epoch": 0.8532343232990057, + "grad_norm": 534.2193603515625, + "learning_rate": 7.163223866628771e-07, + "loss": 14.7847, + "step": 422380 + }, + { + "epoch": 0.8532545239316895, + "grad_norm": 626.86865234375, + "learning_rate": 7.161423642978299e-07, + "loss": 22.6856, + "step": 422390 + }, + { + "epoch": 0.8532747245643734, + "grad_norm": 187.61221313476562, + "learning_rate": 7.159623628117856e-07, + "loss": 13.3871, + "step": 422400 + }, + { + "epoch": 0.8532949251970572, + "grad_norm": 282.8906555175781, + "learning_rate": 7.157823822056214e-07, + "loss": 12.2203, + "step": 422410 + }, + { + "epoch": 0.853315125829741, + "grad_norm": 233.69619750976562, + "learning_rate": 7.156024224802139e-07, + "loss": 14.9793, + "step": 422420 + }, + { + "epoch": 0.8533353264624248, + "grad_norm": 729.30517578125, + "learning_rate": 7.154224836364398e-07, + "loss": 23.6431, + "step": 422430 + }, + { + "epoch": 0.8533555270951086, + "grad_norm": 443.14154052734375, + "learning_rate": 7.152425656751794e-07, + "loss": 13.5563, + "step": 422440 + }, + { + "epoch": 0.8533757277277925, + "grad_norm": 488.0747985839844, + "learning_rate": 7.150626685973045e-07, + "loss": 16.0828, + "step": 422450 + }, + { + "epoch": 0.8533959283604763, + "grad_norm": 164.69842529296875, + "learning_rate": 7.148827924036944e-07, + "loss": 18.4377, + "step": 422460 + }, + { + "epoch": 0.8534161289931601, + "grad_norm": 345.8693542480469, + "learning_rate": 7.147029370952274e-07, + "loss": 24.7451, + "step": 422470 + }, + { + "epoch": 0.8534363296258439, + "grad_norm": 172.44485473632812, + "learning_rate": 7.145231026727783e-07, + "loss": 19.3036, + "step": 422480 + }, + { + "epoch": 0.8534565302585277, + "grad_norm": 492.9544372558594, + "learning_rate": 7.143432891372226e-07, + "loss": 16.763, + "step": 422490 + }, + { + "epoch": 0.8534767308912116, + "grad_norm": 627.2689208984375, + "learning_rate": 7.141634964894389e-07, + "loss": 18.8558, + "step": 422500 + }, + { + "epoch": 0.8534969315238953, + "grad_norm": 461.1217041015625, + "learning_rate": 7.139837247303027e-07, + "loss": 14.6763, + "step": 422510 + }, + { + "epoch": 0.8535171321565791, + "grad_norm": 225.9117889404297, + "learning_rate": 7.138039738606894e-07, + "loss": 33.9146, + "step": 422520 + }, + { + "epoch": 0.8535373327892629, + "grad_norm": 202.75625610351562, + "learning_rate": 7.13624243881475e-07, + "loss": 22.2583, + "step": 422530 + }, + { + "epoch": 0.8535575334219467, + "grad_norm": 566.3533935546875, + "learning_rate": 7.134445347935376e-07, + "loss": 16.8513, + "step": 422540 + }, + { + "epoch": 0.8535777340546306, + "grad_norm": 628.0380249023438, + "learning_rate": 7.132648465977515e-07, + "loss": 13.4277, + "step": 422550 + }, + { + "epoch": 0.8535979346873144, + "grad_norm": 36.08042526245117, + "learning_rate": 7.130851792949916e-07, + "loss": 12.6178, + "step": 422560 + }, + { + "epoch": 0.8536181353199982, + "grad_norm": 211.43484497070312, + "learning_rate": 7.129055328861356e-07, + "loss": 16.2987, + "step": 422570 + }, + { + "epoch": 0.853638335952682, + "grad_norm": 52.93839645385742, + "learning_rate": 7.127259073720571e-07, + "loss": 16.9325, + "step": 422580 + }, + { + "epoch": 0.8536585365853658, + "grad_norm": 228.2338409423828, + "learning_rate": 7.125463027536334e-07, + "loss": 24.4949, + "step": 422590 + }, + { + "epoch": 0.8536787372180497, + "grad_norm": 248.85826110839844, + "learning_rate": 7.123667190317396e-07, + "loss": 19.4014, + "step": 422600 + }, + { + "epoch": 0.8536989378507335, + "grad_norm": 218.89491271972656, + "learning_rate": 7.121871562072486e-07, + "loss": 28.9889, + "step": 422610 + }, + { + "epoch": 0.8537191384834173, + "grad_norm": 258.9183654785156, + "learning_rate": 7.12007614281039e-07, + "loss": 23.8125, + "step": 422620 + }, + { + "epoch": 0.8537393391161011, + "grad_norm": 481.2682800292969, + "learning_rate": 7.11828093253984e-07, + "loss": 16.0911, + "step": 422630 + }, + { + "epoch": 0.8537595397487849, + "grad_norm": 599.0542602539062, + "learning_rate": 7.116485931269573e-07, + "loss": 16.99, + "step": 422640 + }, + { + "epoch": 0.8537797403814688, + "grad_norm": 411.8247375488281, + "learning_rate": 7.114691139008356e-07, + "loss": 13.0052, + "step": 422650 + }, + { + "epoch": 0.8537999410141526, + "grad_norm": 409.2203674316406, + "learning_rate": 7.112896555764943e-07, + "loss": 13.5062, + "step": 422660 + }, + { + "epoch": 0.8538201416468364, + "grad_norm": 386.6644592285156, + "learning_rate": 7.111102181548074e-07, + "loss": 22.6078, + "step": 422670 + }, + { + "epoch": 0.8538403422795202, + "grad_norm": 340.1001281738281, + "learning_rate": 7.109308016366473e-07, + "loss": 10.6364, + "step": 422680 + }, + { + "epoch": 0.853860542912204, + "grad_norm": 299.4497375488281, + "learning_rate": 7.107514060228921e-07, + "loss": 18.9434, + "step": 422690 + }, + { + "epoch": 0.8538807435448879, + "grad_norm": 461.1625671386719, + "learning_rate": 7.105720313144143e-07, + "loss": 9.3554, + "step": 422700 + }, + { + "epoch": 0.8539009441775717, + "grad_norm": 300.0633544921875, + "learning_rate": 7.103926775120867e-07, + "loss": 33.4887, + "step": 422710 + }, + { + "epoch": 0.8539211448102555, + "grad_norm": 314.12481689453125, + "learning_rate": 7.102133446167847e-07, + "loss": 18.0671, + "step": 422720 + }, + { + "epoch": 0.8539413454429393, + "grad_norm": 172.38677978515625, + "learning_rate": 7.100340326293853e-07, + "loss": 9.6837, + "step": 422730 + }, + { + "epoch": 0.8539615460756231, + "grad_norm": 89.95143127441406, + "learning_rate": 7.098547415507572e-07, + "loss": 19.3794, + "step": 422740 + }, + { + "epoch": 0.853981746708307, + "grad_norm": 269.31787109375, + "learning_rate": 7.096754713817771e-07, + "loss": 21.4956, + "step": 422750 + }, + { + "epoch": 0.8540019473409908, + "grad_norm": 4.7827067375183105, + "learning_rate": 7.094962221233192e-07, + "loss": 12.2086, + "step": 422760 + }, + { + "epoch": 0.8540221479736745, + "grad_norm": 681.1311645507812, + "learning_rate": 7.093169937762562e-07, + "loss": 18.2227, + "step": 422770 + }, + { + "epoch": 0.8540423486063583, + "grad_norm": 109.95421600341797, + "learning_rate": 7.091377863414611e-07, + "loss": 14.1634, + "step": 422780 + }, + { + "epoch": 0.8540625492390421, + "grad_norm": 179.98135375976562, + "learning_rate": 7.08958599819809e-07, + "loss": 15.3122, + "step": 422790 + }, + { + "epoch": 0.8540827498717259, + "grad_norm": 210.8217315673828, + "learning_rate": 7.087794342121724e-07, + "loss": 10.1048, + "step": 422800 + }, + { + "epoch": 0.8541029505044098, + "grad_norm": 285.1670837402344, + "learning_rate": 7.086002895194227e-07, + "loss": 15.6775, + "step": 422810 + }, + { + "epoch": 0.8541231511370936, + "grad_norm": 824.2320556640625, + "learning_rate": 7.08421165742435e-07, + "loss": 26.6279, + "step": 422820 + }, + { + "epoch": 0.8541433517697774, + "grad_norm": 56.3100471496582, + "learning_rate": 7.08242062882083e-07, + "loss": 21.4873, + "step": 422830 + }, + { + "epoch": 0.8541635524024612, + "grad_norm": 541.4666137695312, + "learning_rate": 7.080629809392392e-07, + "loss": 15.3471, + "step": 422840 + }, + { + "epoch": 0.854183753035145, + "grad_norm": 333.91851806640625, + "learning_rate": 7.078839199147741e-07, + "loss": 22.5128, + "step": 422850 + }, + { + "epoch": 0.8542039536678289, + "grad_norm": 545.7657470703125, + "learning_rate": 7.077048798095637e-07, + "loss": 28.4918, + "step": 422860 + }, + { + "epoch": 0.8542241543005127, + "grad_norm": 0.6666960716247559, + "learning_rate": 7.07525860624479e-07, + "loss": 15.1818, + "step": 422870 + }, + { + "epoch": 0.8542443549331965, + "grad_norm": 512.3268432617188, + "learning_rate": 7.073468623603918e-07, + "loss": 18.1457, + "step": 422880 + }, + { + "epoch": 0.8542645555658803, + "grad_norm": 398.10882568359375, + "learning_rate": 7.071678850181762e-07, + "loss": 23.5213, + "step": 422890 + }, + { + "epoch": 0.8542847561985641, + "grad_norm": 26.361709594726562, + "learning_rate": 7.069889285987025e-07, + "loss": 24.1029, + "step": 422900 + }, + { + "epoch": 0.854304956831248, + "grad_norm": 99.04013061523438, + "learning_rate": 7.068099931028449e-07, + "loss": 14.5572, + "step": 422910 + }, + { + "epoch": 0.8543251574639318, + "grad_norm": 487.67059326171875, + "learning_rate": 7.066310785314756e-07, + "loss": 14.2184, + "step": 422920 + }, + { + "epoch": 0.8543453580966156, + "grad_norm": 779.5738525390625, + "learning_rate": 7.064521848854639e-07, + "loss": 41.2217, + "step": 422930 + }, + { + "epoch": 0.8543655587292994, + "grad_norm": 250.2709503173828, + "learning_rate": 7.062733121656845e-07, + "loss": 17.1406, + "step": 422940 + }, + { + "epoch": 0.8543857593619832, + "grad_norm": 452.2339782714844, + "learning_rate": 7.060944603730086e-07, + "loss": 11.8047, + "step": 422950 + }, + { + "epoch": 0.8544059599946671, + "grad_norm": 194.9429168701172, + "learning_rate": 7.059156295083064e-07, + "loss": 16.9779, + "step": 422960 + }, + { + "epoch": 0.8544261606273509, + "grad_norm": 0.0, + "learning_rate": 7.057368195724506e-07, + "loss": 25.6859, + "step": 422970 + }, + { + "epoch": 0.8544463612600347, + "grad_norm": 120.65647888183594, + "learning_rate": 7.055580305663135e-07, + "loss": 12.0228, + "step": 422980 + }, + { + "epoch": 0.8544665618927185, + "grad_norm": 79.81140899658203, + "learning_rate": 7.053792624907662e-07, + "loss": 14.6664, + "step": 422990 + }, + { + "epoch": 0.8544867625254023, + "grad_norm": 401.9481201171875, + "learning_rate": 7.052005153466779e-07, + "loss": 20.8853, + "step": 423000 + }, + { + "epoch": 0.8545069631580862, + "grad_norm": 73.91416931152344, + "learning_rate": 7.050217891349226e-07, + "loss": 15.3818, + "step": 423010 + }, + { + "epoch": 0.8545271637907699, + "grad_norm": 1922.633056640625, + "learning_rate": 7.048430838563708e-07, + "loss": 31.3682, + "step": 423020 + }, + { + "epoch": 0.8545473644234537, + "grad_norm": 451.913818359375, + "learning_rate": 7.046643995118913e-07, + "loss": 16.2877, + "step": 423030 + }, + { + "epoch": 0.8545675650561375, + "grad_norm": 74.22517395019531, + "learning_rate": 7.04485736102356e-07, + "loss": 27.027, + "step": 423040 + }, + { + "epoch": 0.8545877656888213, + "grad_norm": 294.8464050292969, + "learning_rate": 7.043070936286395e-07, + "loss": 11.8094, + "step": 423050 + }, + { + "epoch": 0.8546079663215052, + "grad_norm": 267.31085205078125, + "learning_rate": 7.041284720916064e-07, + "loss": 14.3946, + "step": 423060 + }, + { + "epoch": 0.854628166954189, + "grad_norm": 237.23765563964844, + "learning_rate": 7.0394987149213e-07, + "loss": 15.9205, + "step": 423070 + }, + { + "epoch": 0.8546483675868728, + "grad_norm": 169.7945098876953, + "learning_rate": 7.037712918310818e-07, + "loss": 21.465, + "step": 423080 + }, + { + "epoch": 0.8546685682195566, + "grad_norm": 1223.9046630859375, + "learning_rate": 7.035927331093318e-07, + "loss": 20.4954, + "step": 423090 + }, + { + "epoch": 0.8546887688522404, + "grad_norm": 288.6837463378906, + "learning_rate": 7.034141953277484e-07, + "loss": 14.3236, + "step": 423100 + }, + { + "epoch": 0.8547089694849243, + "grad_norm": 198.91371154785156, + "learning_rate": 7.032356784872035e-07, + "loss": 22.5588, + "step": 423110 + }, + { + "epoch": 0.8547291701176081, + "grad_norm": 362.96160888671875, + "learning_rate": 7.030571825885685e-07, + "loss": 12.3301, + "step": 423120 + }, + { + "epoch": 0.8547493707502919, + "grad_norm": 245.9905242919922, + "learning_rate": 7.028787076327093e-07, + "loss": 21.1567, + "step": 423130 + }, + { + "epoch": 0.8547695713829757, + "grad_norm": 548.1031494140625, + "learning_rate": 7.027002536204986e-07, + "loss": 17.0715, + "step": 423140 + }, + { + "epoch": 0.8547897720156595, + "grad_norm": 129.6185760498047, + "learning_rate": 7.025218205528061e-07, + "loss": 13.8482, + "step": 423150 + }, + { + "epoch": 0.8548099726483434, + "grad_norm": 187.77706909179688, + "learning_rate": 7.02343408430502e-07, + "loss": 12.9665, + "step": 423160 + }, + { + "epoch": 0.8548301732810272, + "grad_norm": 544.2252807617188, + "learning_rate": 7.021650172544531e-07, + "loss": 12.6351, + "step": 423170 + }, + { + "epoch": 0.854850373913711, + "grad_norm": 582.0858154296875, + "learning_rate": 7.019866470255315e-07, + "loss": 26.0964, + "step": 423180 + }, + { + "epoch": 0.8548705745463948, + "grad_norm": 290.2862548828125, + "learning_rate": 7.018082977446061e-07, + "loss": 12.2002, + "step": 423190 + }, + { + "epoch": 0.8548907751790786, + "grad_norm": 792.829833984375, + "learning_rate": 7.01629969412545e-07, + "loss": 17.0466, + "step": 423200 + }, + { + "epoch": 0.8549109758117625, + "grad_norm": 155.3640594482422, + "learning_rate": 7.014516620302186e-07, + "loss": 8.8565, + "step": 423210 + }, + { + "epoch": 0.8549311764444463, + "grad_norm": 230.0448760986328, + "learning_rate": 7.012733755984946e-07, + "loss": 10.1439, + "step": 423220 + }, + { + "epoch": 0.8549513770771301, + "grad_norm": 291.56732177734375, + "learning_rate": 7.010951101182439e-07, + "loss": 17.6992, + "step": 423230 + }, + { + "epoch": 0.8549715777098139, + "grad_norm": 23.559616088867188, + "learning_rate": 7.009168655903342e-07, + "loss": 16.6611, + "step": 423240 + }, + { + "epoch": 0.8549917783424977, + "grad_norm": 102.11821746826172, + "learning_rate": 7.007386420156332e-07, + "loss": 16.0564, + "step": 423250 + }, + { + "epoch": 0.8550119789751816, + "grad_norm": 426.19720458984375, + "learning_rate": 7.005604393950116e-07, + "loss": 8.914, + "step": 423260 + }, + { + "epoch": 0.8550321796078654, + "grad_norm": 197.2255859375, + "learning_rate": 7.003822577293362e-07, + "loss": 13.2246, + "step": 423270 + }, + { + "epoch": 0.8550523802405491, + "grad_norm": 291.4448547363281, + "learning_rate": 7.002040970194768e-07, + "loss": 13.1481, + "step": 423280 + }, + { + "epoch": 0.8550725808732329, + "grad_norm": 600.7762451171875, + "learning_rate": 7.000259572663004e-07, + "loss": 43.9559, + "step": 423290 + }, + { + "epoch": 0.8550927815059167, + "grad_norm": 267.8164367675781, + "learning_rate": 6.99847838470677e-07, + "loss": 9.8765, + "step": 423300 + }, + { + "epoch": 0.8551129821386005, + "grad_norm": 551.2529296875, + "learning_rate": 6.996697406334735e-07, + "loss": 8.6453, + "step": 423310 + }, + { + "epoch": 0.8551331827712844, + "grad_norm": 306.5521240234375, + "learning_rate": 6.994916637555571e-07, + "loss": 15.5206, + "step": 423320 + }, + { + "epoch": 0.8551533834039682, + "grad_norm": 409.07958984375, + "learning_rate": 6.993136078377965e-07, + "loss": 11.0522, + "step": 423330 + }, + { + "epoch": 0.855173584036652, + "grad_norm": 609.0219116210938, + "learning_rate": 6.991355728810623e-07, + "loss": 29.754, + "step": 423340 + }, + { + "epoch": 0.8551937846693358, + "grad_norm": 440.0611877441406, + "learning_rate": 6.989575588862174e-07, + "loss": 19.6599, + "step": 423350 + }, + { + "epoch": 0.8552139853020196, + "grad_norm": 289.8558044433594, + "learning_rate": 6.987795658541319e-07, + "loss": 13.4273, + "step": 423360 + }, + { + "epoch": 0.8552341859347035, + "grad_norm": 370.3302917480469, + "learning_rate": 6.986015937856743e-07, + "loss": 26.0949, + "step": 423370 + }, + { + "epoch": 0.8552543865673873, + "grad_norm": 423.3115234375, + "learning_rate": 6.984236426817104e-07, + "loss": 29.1275, + "step": 423380 + }, + { + "epoch": 0.8552745872000711, + "grad_norm": 849.1698608398438, + "learning_rate": 6.982457125431069e-07, + "loss": 33.0457, + "step": 423390 + }, + { + "epoch": 0.8552947878327549, + "grad_norm": 520.9661254882812, + "learning_rate": 6.980678033707333e-07, + "loss": 19.85, + "step": 423400 + }, + { + "epoch": 0.8553149884654387, + "grad_norm": 812.6036376953125, + "learning_rate": 6.978899151654556e-07, + "loss": 22.3343, + "step": 423410 + }, + { + "epoch": 0.8553351890981226, + "grad_norm": 95.53800201416016, + "learning_rate": 6.977120479281396e-07, + "loss": 20.1647, + "step": 423420 + }, + { + "epoch": 0.8553553897308064, + "grad_norm": 571.2792358398438, + "learning_rate": 6.975342016596531e-07, + "loss": 24.6136, + "step": 423430 + }, + { + "epoch": 0.8553755903634902, + "grad_norm": 265.1282653808594, + "learning_rate": 6.973563763608643e-07, + "loss": 17.2472, + "step": 423440 + }, + { + "epoch": 0.855395790996174, + "grad_norm": 319.90191650390625, + "learning_rate": 6.971785720326385e-07, + "loss": 13.9207, + "step": 423450 + }, + { + "epoch": 0.8554159916288578, + "grad_norm": 482.8645324707031, + "learning_rate": 6.970007886758412e-07, + "loss": 20.3257, + "step": 423460 + }, + { + "epoch": 0.8554361922615417, + "grad_norm": 471.61529541015625, + "learning_rate": 6.968230262913417e-07, + "loss": 21.8656, + "step": 423470 + }, + { + "epoch": 0.8554563928942255, + "grad_norm": 443.1946716308594, + "learning_rate": 6.966452848800043e-07, + "loss": 15.9943, + "step": 423480 + }, + { + "epoch": 0.8554765935269093, + "grad_norm": 578.9497680664062, + "learning_rate": 6.964675644426955e-07, + "loss": 18.8936, + "step": 423490 + }, + { + "epoch": 0.8554967941595931, + "grad_norm": 6.638950347900391, + "learning_rate": 6.962898649802824e-07, + "loss": 17.4078, + "step": 423500 + }, + { + "epoch": 0.8555169947922769, + "grad_norm": 1091.97998046875, + "learning_rate": 6.961121864936294e-07, + "loss": 22.4504, + "step": 423510 + }, + { + "epoch": 0.8555371954249608, + "grad_norm": 532.3468627929688, + "learning_rate": 6.95934528983605e-07, + "loss": 10.7549, + "step": 423520 + }, + { + "epoch": 0.8555573960576445, + "grad_norm": 306.2650146484375, + "learning_rate": 6.957568924510733e-07, + "loss": 11.9596, + "step": 423530 + }, + { + "epoch": 0.8555775966903283, + "grad_norm": 550.3804931640625, + "learning_rate": 6.955792768969e-07, + "loss": 18.5798, + "step": 423540 + }, + { + "epoch": 0.8555977973230121, + "grad_norm": 468.0177307128906, + "learning_rate": 6.954016823219517e-07, + "loss": 15.8299, + "step": 423550 + }, + { + "epoch": 0.8556179979556959, + "grad_norm": 265.7972717285156, + "learning_rate": 6.952241087270938e-07, + "loss": 12.0223, + "step": 423560 + }, + { + "epoch": 0.8556381985883798, + "grad_norm": 417.89630126953125, + "learning_rate": 6.950465561131903e-07, + "loss": 18.3307, + "step": 423570 + }, + { + "epoch": 0.8556583992210636, + "grad_norm": 390.70086669921875, + "learning_rate": 6.948690244811079e-07, + "loss": 22.9239, + "step": 423580 + }, + { + "epoch": 0.8556785998537474, + "grad_norm": 357.60009765625, + "learning_rate": 6.946915138317129e-07, + "loss": 13.8321, + "step": 423590 + }, + { + "epoch": 0.8556988004864312, + "grad_norm": 292.06292724609375, + "learning_rate": 6.945140241658688e-07, + "loss": 16.6138, + "step": 423600 + }, + { + "epoch": 0.855719001119115, + "grad_norm": 231.03797912597656, + "learning_rate": 6.943365554844406e-07, + "loss": 16.0941, + "step": 423610 + }, + { + "epoch": 0.8557392017517989, + "grad_norm": 178.90017700195312, + "learning_rate": 6.941591077882948e-07, + "loss": 26.9284, + "step": 423620 + }, + { + "epoch": 0.8557594023844827, + "grad_norm": 267.6416015625, + "learning_rate": 6.939816810782952e-07, + "loss": 22.531, + "step": 423630 + }, + { + "epoch": 0.8557796030171665, + "grad_norm": 483.8642883300781, + "learning_rate": 6.938042753553054e-07, + "loss": 33.9005, + "step": 423640 + }, + { + "epoch": 0.8557998036498503, + "grad_norm": 532.0093383789062, + "learning_rate": 6.936268906201915e-07, + "loss": 14.0307, + "step": 423650 + }, + { + "epoch": 0.8558200042825341, + "grad_norm": 672.6396484375, + "learning_rate": 6.934495268738195e-07, + "loss": 18.4205, + "step": 423660 + }, + { + "epoch": 0.855840204915218, + "grad_norm": 494.2569274902344, + "learning_rate": 6.932721841170503e-07, + "loss": 12.0525, + "step": 423670 + }, + { + "epoch": 0.8558604055479018, + "grad_norm": 488.8106689453125, + "learning_rate": 6.930948623507505e-07, + "loss": 16.517, + "step": 423680 + }, + { + "epoch": 0.8558806061805856, + "grad_norm": 716.0003662109375, + "learning_rate": 6.92917561575785e-07, + "loss": 24.3459, + "step": 423690 + }, + { + "epoch": 0.8559008068132694, + "grad_norm": 463.7527160644531, + "learning_rate": 6.927402817930168e-07, + "loss": 17.1598, + "step": 423700 + }, + { + "epoch": 0.8559210074459532, + "grad_norm": 303.1055908203125, + "learning_rate": 6.925630230033087e-07, + "loss": 24.3444, + "step": 423710 + }, + { + "epoch": 0.855941208078637, + "grad_norm": 637.7202758789062, + "learning_rate": 6.923857852075261e-07, + "loss": 17.3609, + "step": 423720 + }, + { + "epoch": 0.8559614087113209, + "grad_norm": 456.35491943359375, + "learning_rate": 6.922085684065349e-07, + "loss": 19.7045, + "step": 423730 + }, + { + "epoch": 0.8559816093440047, + "grad_norm": 126.94768524169922, + "learning_rate": 6.920313726011945e-07, + "loss": 6.9527, + "step": 423740 + }, + { + "epoch": 0.8560018099766885, + "grad_norm": 357.84619140625, + "learning_rate": 6.918541977923709e-07, + "loss": 23.9845, + "step": 423750 + }, + { + "epoch": 0.8560220106093723, + "grad_norm": 377.5257873535156, + "learning_rate": 6.916770439809283e-07, + "loss": 21.3561, + "step": 423760 + }, + { + "epoch": 0.8560422112420562, + "grad_norm": 135.76699829101562, + "learning_rate": 6.914999111677295e-07, + "loss": 16.4085, + "step": 423770 + }, + { + "epoch": 0.85606241187474, + "grad_norm": 181.76840209960938, + "learning_rate": 6.913227993536364e-07, + "loss": 23.1228, + "step": 423780 + }, + { + "epoch": 0.8560826125074237, + "grad_norm": 422.5714416503906, + "learning_rate": 6.911457085395146e-07, + "loss": 13.2301, + "step": 423790 + }, + { + "epoch": 0.8561028131401075, + "grad_norm": 458.74517822265625, + "learning_rate": 6.909686387262255e-07, + "loss": 13.2711, + "step": 423800 + }, + { + "epoch": 0.8561230137727913, + "grad_norm": 173.49359130859375, + "learning_rate": 6.907915899146322e-07, + "loss": 14.9355, + "step": 423810 + }, + { + "epoch": 0.8561432144054751, + "grad_norm": 420.94586181640625, + "learning_rate": 6.906145621055987e-07, + "loss": 15.7464, + "step": 423820 + }, + { + "epoch": 0.856163415038159, + "grad_norm": 325.2900695800781, + "learning_rate": 6.904375552999859e-07, + "loss": 14.5921, + "step": 423830 + }, + { + "epoch": 0.8561836156708428, + "grad_norm": 377.8846130371094, + "learning_rate": 6.902605694986592e-07, + "loss": 29.0642, + "step": 423840 + }, + { + "epoch": 0.8562038163035266, + "grad_norm": 424.69622802734375, + "learning_rate": 6.9008360470248e-07, + "loss": 10.3148, + "step": 423850 + }, + { + "epoch": 0.8562240169362104, + "grad_norm": 16.294780731201172, + "learning_rate": 6.89906660912309e-07, + "loss": 20.1662, + "step": 423860 + }, + { + "epoch": 0.8562442175688942, + "grad_norm": 467.6504821777344, + "learning_rate": 6.897297381290113e-07, + "loss": 12.0162, + "step": 423870 + }, + { + "epoch": 0.8562644182015781, + "grad_norm": 90.69451141357422, + "learning_rate": 6.895528363534476e-07, + "loss": 21.7523, + "step": 423880 + }, + { + "epoch": 0.8562846188342619, + "grad_norm": 572.353515625, + "learning_rate": 6.89375955586481e-07, + "loss": 9.6403, + "step": 423890 + }, + { + "epoch": 0.8563048194669457, + "grad_norm": 426.759765625, + "learning_rate": 6.891990958289724e-07, + "loss": 16.1095, + "step": 423900 + }, + { + "epoch": 0.8563250200996295, + "grad_norm": 261.89208984375, + "learning_rate": 6.890222570817856e-07, + "loss": 19.6324, + "step": 423910 + }, + { + "epoch": 0.8563452207323133, + "grad_norm": 768.3299560546875, + "learning_rate": 6.888454393457817e-07, + "loss": 23.239, + "step": 423920 + }, + { + "epoch": 0.8563654213649972, + "grad_norm": 167.47921752929688, + "learning_rate": 6.886686426218209e-07, + "loss": 16.6358, + "step": 423930 + }, + { + "epoch": 0.856385621997681, + "grad_norm": 671.823486328125, + "learning_rate": 6.884918669107671e-07, + "loss": 14.0608, + "step": 423940 + }, + { + "epoch": 0.8564058226303648, + "grad_norm": 208.45716857910156, + "learning_rate": 6.883151122134812e-07, + "loss": 13.198, + "step": 423950 + }, + { + "epoch": 0.8564260232630486, + "grad_norm": 64.32759857177734, + "learning_rate": 6.881383785308232e-07, + "loss": 15.9101, + "step": 423960 + }, + { + "epoch": 0.8564462238957324, + "grad_norm": 501.4256591796875, + "learning_rate": 6.879616658636562e-07, + "loss": 18.0617, + "step": 423970 + }, + { + "epoch": 0.8564664245284163, + "grad_norm": 332.1427917480469, + "learning_rate": 6.877849742128423e-07, + "loss": 15.8026, + "step": 423980 + }, + { + "epoch": 0.8564866251611001, + "grad_norm": 429.5103759765625, + "learning_rate": 6.876083035792408e-07, + "loss": 13.0902, + "step": 423990 + }, + { + "epoch": 0.8565068257937839, + "grad_norm": 616.64208984375, + "learning_rate": 6.874316539637127e-07, + "loss": 26.6816, + "step": 424000 + }, + { + "epoch": 0.8565270264264677, + "grad_norm": 420.34564208984375, + "learning_rate": 6.872550253671207e-07, + "loss": 23.2197, + "step": 424010 + }, + { + "epoch": 0.8565472270591515, + "grad_norm": 1443.5985107421875, + "learning_rate": 6.870784177903244e-07, + "loss": 31.7041, + "step": 424020 + }, + { + "epoch": 0.8565674276918354, + "grad_norm": 152.979248046875, + "learning_rate": 6.869018312341841e-07, + "loss": 12.8966, + "step": 424030 + }, + { + "epoch": 0.8565876283245192, + "grad_norm": 279.4204406738281, + "learning_rate": 6.86725265699561e-07, + "loss": 17.8656, + "step": 424040 + }, + { + "epoch": 0.8566078289572029, + "grad_norm": 646.5421142578125, + "learning_rate": 6.865487211873167e-07, + "loss": 19.8595, + "step": 424050 + }, + { + "epoch": 0.8566280295898867, + "grad_norm": 416.0545654296875, + "learning_rate": 6.863721976983112e-07, + "loss": 26.8425, + "step": 424060 + }, + { + "epoch": 0.8566482302225705, + "grad_norm": 922.7435302734375, + "learning_rate": 6.861956952334031e-07, + "loss": 17.239, + "step": 424070 + }, + { + "epoch": 0.8566684308552543, + "grad_norm": 240.82968139648438, + "learning_rate": 6.860192137934552e-07, + "loss": 19.2269, + "step": 424080 + }, + { + "epoch": 0.8566886314879382, + "grad_norm": 439.35784912109375, + "learning_rate": 6.858427533793261e-07, + "loss": 13.4926, + "step": 424090 + }, + { + "epoch": 0.856708832120622, + "grad_norm": 588.150146484375, + "learning_rate": 6.856663139918751e-07, + "loss": 10.181, + "step": 424100 + }, + { + "epoch": 0.8567290327533058, + "grad_norm": 288.3674011230469, + "learning_rate": 6.854898956319644e-07, + "loss": 25.9362, + "step": 424110 + }, + { + "epoch": 0.8567492333859896, + "grad_norm": 256.4996643066406, + "learning_rate": 6.853134983004517e-07, + "loss": 7.8227, + "step": 424120 + }, + { + "epoch": 0.8567694340186734, + "grad_norm": 452.1529846191406, + "learning_rate": 6.851371219981989e-07, + "loss": 16.0354, + "step": 424130 + }, + { + "epoch": 0.8567896346513573, + "grad_norm": 287.13458251953125, + "learning_rate": 6.849607667260643e-07, + "loss": 25.9335, + "step": 424140 + }, + { + "epoch": 0.8568098352840411, + "grad_norm": 425.8190002441406, + "learning_rate": 6.847844324849062e-07, + "loss": 40.5554, + "step": 424150 + }, + { + "epoch": 0.8568300359167249, + "grad_norm": 180.3892822265625, + "learning_rate": 6.846081192755871e-07, + "loss": 12.232, + "step": 424160 + }, + { + "epoch": 0.8568502365494087, + "grad_norm": 622.3618774414062, + "learning_rate": 6.844318270989631e-07, + "loss": 16.899, + "step": 424170 + }, + { + "epoch": 0.8568704371820925, + "grad_norm": 257.9329528808594, + "learning_rate": 6.842555559558961e-07, + "loss": 13.4867, + "step": 424180 + }, + { + "epoch": 0.8568906378147764, + "grad_norm": 330.57879638671875, + "learning_rate": 6.840793058472434e-07, + "loss": 11.9845, + "step": 424190 + }, + { + "epoch": 0.8569108384474602, + "grad_norm": 922.0054321289062, + "learning_rate": 6.839030767738653e-07, + "loss": 19.7079, + "step": 424200 + }, + { + "epoch": 0.856931039080144, + "grad_norm": 719.37109375, + "learning_rate": 6.837268687366199e-07, + "loss": 25.35, + "step": 424210 + }, + { + "epoch": 0.8569512397128278, + "grad_norm": 421.06842041015625, + "learning_rate": 6.835506817363657e-07, + "loss": 15.7544, + "step": 424220 + }, + { + "epoch": 0.8569714403455116, + "grad_norm": 64.48709869384766, + "learning_rate": 6.83374515773963e-07, + "loss": 25.4828, + "step": 424230 + }, + { + "epoch": 0.8569916409781955, + "grad_norm": 391.7761535644531, + "learning_rate": 6.831983708502693e-07, + "loss": 13.6241, + "step": 424240 + }, + { + "epoch": 0.8570118416108793, + "grad_norm": 389.82110595703125, + "learning_rate": 6.830222469661419e-07, + "loss": 10.5855, + "step": 424250 + }, + { + "epoch": 0.8570320422435631, + "grad_norm": 359.3149108886719, + "learning_rate": 6.828461441224405e-07, + "loss": 16.1136, + "step": 424260 + }, + { + "epoch": 0.8570522428762469, + "grad_norm": 162.98236083984375, + "learning_rate": 6.826700623200255e-07, + "loss": 12.2284, + "step": 424270 + }, + { + "epoch": 0.8570724435089307, + "grad_norm": 303.9280090332031, + "learning_rate": 6.824940015597514e-07, + "loss": 15.9212, + "step": 424280 + }, + { + "epoch": 0.8570926441416146, + "grad_norm": 93.1552963256836, + "learning_rate": 6.823179618424774e-07, + "loss": 6.9389, + "step": 424290 + }, + { + "epoch": 0.8571128447742983, + "grad_norm": 265.16131591796875, + "learning_rate": 6.821419431690629e-07, + "loss": 12.7053, + "step": 424300 + }, + { + "epoch": 0.8571330454069821, + "grad_norm": 131.3051300048828, + "learning_rate": 6.819659455403654e-07, + "loss": 12.4221, + "step": 424310 + }, + { + "epoch": 0.8571532460396659, + "grad_norm": 738.7040405273438, + "learning_rate": 6.817899689572405e-07, + "loss": 18.3596, + "step": 424320 + }, + { + "epoch": 0.8571734466723497, + "grad_norm": 505.9089660644531, + "learning_rate": 6.816140134205479e-07, + "loss": 17.632, + "step": 424330 + }, + { + "epoch": 0.8571936473050336, + "grad_norm": 440.5530700683594, + "learning_rate": 6.81438078931147e-07, + "loss": 14.6219, + "step": 424340 + }, + { + "epoch": 0.8572138479377174, + "grad_norm": 101.6639404296875, + "learning_rate": 6.81262165489891e-07, + "loss": 13.628, + "step": 424350 + }, + { + "epoch": 0.8572340485704012, + "grad_norm": 87.83068084716797, + "learning_rate": 6.810862730976392e-07, + "loss": 12.6355, + "step": 424360 + }, + { + "epoch": 0.857254249203085, + "grad_norm": 340.1312561035156, + "learning_rate": 6.809104017552503e-07, + "loss": 10.5935, + "step": 424370 + }, + { + "epoch": 0.8572744498357688, + "grad_norm": 155.0587158203125, + "learning_rate": 6.807345514635805e-07, + "loss": 17.6908, + "step": 424380 + }, + { + "epoch": 0.8572946504684527, + "grad_norm": 348.400146484375, + "learning_rate": 6.80558722223485e-07, + "loss": 13.1934, + "step": 424390 + }, + { + "epoch": 0.8573148511011365, + "grad_norm": 300.3902893066406, + "learning_rate": 6.803829140358237e-07, + "loss": 20.5829, + "step": 424400 + }, + { + "epoch": 0.8573350517338203, + "grad_norm": 574.2684326171875, + "learning_rate": 6.802071269014527e-07, + "loss": 20.713, + "step": 424410 + }, + { + "epoch": 0.8573552523665041, + "grad_norm": 401.4273376464844, + "learning_rate": 6.800313608212261e-07, + "loss": 17.8327, + "step": 424420 + }, + { + "epoch": 0.8573754529991879, + "grad_norm": 308.0999450683594, + "learning_rate": 6.798556157960046e-07, + "loss": 13.0263, + "step": 424430 + }, + { + "epoch": 0.8573956536318718, + "grad_norm": 250.98672485351562, + "learning_rate": 6.796798918266417e-07, + "loss": 21.3625, + "step": 424440 + }, + { + "epoch": 0.8574158542645556, + "grad_norm": 9.2577543258667, + "learning_rate": 6.795041889139958e-07, + "loss": 17.9683, + "step": 424450 + }, + { + "epoch": 0.8574360548972394, + "grad_norm": 317.8941345214844, + "learning_rate": 6.793285070589229e-07, + "loss": 18.9148, + "step": 424460 + }, + { + "epoch": 0.8574562555299232, + "grad_norm": 299.7467346191406, + "learning_rate": 6.79152846262277e-07, + "loss": 29.3304, + "step": 424470 + }, + { + "epoch": 0.857476456162607, + "grad_norm": 391.6691589355469, + "learning_rate": 6.789772065249178e-07, + "loss": 12.1778, + "step": 424480 + }, + { + "epoch": 0.8574966567952909, + "grad_norm": 327.8114929199219, + "learning_rate": 6.788015878476983e-07, + "loss": 12.3476, + "step": 424490 + }, + { + "epoch": 0.8575168574279747, + "grad_norm": 172.25738525390625, + "learning_rate": 6.786259902314768e-07, + "loss": 12.297, + "step": 424500 + }, + { + "epoch": 0.8575370580606585, + "grad_norm": 308.32952880859375, + "learning_rate": 6.784504136771075e-07, + "loss": 25.0714, + "step": 424510 + }, + { + "epoch": 0.8575572586933423, + "grad_norm": 362.46392822265625, + "learning_rate": 6.782748581854471e-07, + "loss": 14.4293, + "step": 424520 + }, + { + "epoch": 0.8575774593260261, + "grad_norm": 24.034151077270508, + "learning_rate": 6.780993237573513e-07, + "loss": 10.6712, + "step": 424530 + }, + { + "epoch": 0.85759765995871, + "grad_norm": 383.0001220703125, + "learning_rate": 6.779238103936742e-07, + "loss": 17.6843, + "step": 424540 + }, + { + "epoch": 0.8576178605913938, + "grad_norm": 189.0929412841797, + "learning_rate": 6.777483180952732e-07, + "loss": 13.5381, + "step": 424550 + }, + { + "epoch": 0.8576380612240775, + "grad_norm": 70.52850341796875, + "learning_rate": 6.775728468630027e-07, + "loss": 20.7291, + "step": 424560 + }, + { + "epoch": 0.8576582618567613, + "grad_norm": 336.09002685546875, + "learning_rate": 6.773973966977165e-07, + "loss": 26.8984, + "step": 424570 + }, + { + "epoch": 0.8576784624894451, + "grad_norm": 489.1164245605469, + "learning_rate": 6.772219676002717e-07, + "loss": 18.0368, + "step": 424580 + }, + { + "epoch": 0.857698663122129, + "grad_norm": 230.7914581298828, + "learning_rate": 6.770465595715231e-07, + "loss": 16.2012, + "step": 424590 + }, + { + "epoch": 0.8577188637548128, + "grad_norm": 262.0990905761719, + "learning_rate": 6.768711726123261e-07, + "loss": 12.7744, + "step": 424600 + }, + { + "epoch": 0.8577390643874966, + "grad_norm": 402.1889343261719, + "learning_rate": 6.76695806723533e-07, + "loss": 15.6399, + "step": 424610 + }, + { + "epoch": 0.8577592650201804, + "grad_norm": 267.36810302734375, + "learning_rate": 6.765204619060012e-07, + "loss": 33.8587, + "step": 424620 + }, + { + "epoch": 0.8577794656528642, + "grad_norm": 326.3402404785156, + "learning_rate": 6.763451381605846e-07, + "loss": 15.6711, + "step": 424630 + }, + { + "epoch": 0.857799666285548, + "grad_norm": 11.367655754089355, + "learning_rate": 6.761698354881363e-07, + "loss": 43.7316, + "step": 424640 + }, + { + "epoch": 0.8578198669182319, + "grad_norm": 635.5440673828125, + "learning_rate": 6.759945538895119e-07, + "loss": 23.3102, + "step": 424650 + }, + { + "epoch": 0.8578400675509157, + "grad_norm": 849.0969848632812, + "learning_rate": 6.758192933655667e-07, + "loss": 24.0309, + "step": 424660 + }, + { + "epoch": 0.8578602681835995, + "grad_norm": 350.1836242675781, + "learning_rate": 6.756440539171533e-07, + "loss": 17.4814, + "step": 424670 + }, + { + "epoch": 0.8578804688162833, + "grad_norm": 686.380615234375, + "learning_rate": 6.754688355451256e-07, + "loss": 16.0547, + "step": 424680 + }, + { + "epoch": 0.8579006694489671, + "grad_norm": 292.45770263671875, + "learning_rate": 6.752936382503394e-07, + "loss": 19.9808, + "step": 424690 + }, + { + "epoch": 0.857920870081651, + "grad_norm": 641.3974609375, + "learning_rate": 6.751184620336471e-07, + "loss": 21.2562, + "step": 424700 + }, + { + "epoch": 0.8579410707143348, + "grad_norm": 272.4770202636719, + "learning_rate": 6.749433068959022e-07, + "loss": 8.5196, + "step": 424710 + }, + { + "epoch": 0.8579612713470186, + "grad_norm": 308.81781005859375, + "learning_rate": 6.747681728379601e-07, + "loss": 16.7534, + "step": 424720 + }, + { + "epoch": 0.8579814719797024, + "grad_norm": 365.3183898925781, + "learning_rate": 6.745930598606721e-07, + "loss": 27.0763, + "step": 424730 + }, + { + "epoch": 0.8580016726123862, + "grad_norm": 418.2502746582031, + "learning_rate": 6.744179679648943e-07, + "loss": 14.3722, + "step": 424740 + }, + { + "epoch": 0.8580218732450701, + "grad_norm": 421.59576416015625, + "learning_rate": 6.742428971514786e-07, + "loss": 15.017, + "step": 424750 + }, + { + "epoch": 0.8580420738777539, + "grad_norm": 524.6824340820312, + "learning_rate": 6.74067847421277e-07, + "loss": 30.5549, + "step": 424760 + }, + { + "epoch": 0.8580622745104377, + "grad_norm": 423.94622802734375, + "learning_rate": 6.738928187751454e-07, + "loss": 17.7713, + "step": 424770 + }, + { + "epoch": 0.8580824751431215, + "grad_norm": 554.2315063476562, + "learning_rate": 6.737178112139342e-07, + "loss": 14.8536, + "step": 424780 + }, + { + "epoch": 0.8581026757758053, + "grad_norm": 244.727294921875, + "learning_rate": 6.735428247384989e-07, + "loss": 26.6349, + "step": 424790 + }, + { + "epoch": 0.8581228764084892, + "grad_norm": 278.85321044921875, + "learning_rate": 6.733678593496901e-07, + "loss": 13.8719, + "step": 424800 + }, + { + "epoch": 0.8581430770411729, + "grad_norm": 419.3354797363281, + "learning_rate": 6.731929150483624e-07, + "loss": 13.6345, + "step": 424810 + }, + { + "epoch": 0.8581632776738567, + "grad_norm": 365.9506530761719, + "learning_rate": 6.73017991835368e-07, + "loss": 15.2896, + "step": 424820 + }, + { + "epoch": 0.8581834783065405, + "grad_norm": 290.0882873535156, + "learning_rate": 6.728430897115578e-07, + "loss": 25.9369, + "step": 424830 + }, + { + "epoch": 0.8582036789392243, + "grad_norm": 205.87440490722656, + "learning_rate": 6.726682086777869e-07, + "loss": 14.2771, + "step": 424840 + }, + { + "epoch": 0.8582238795719082, + "grad_norm": 305.48828125, + "learning_rate": 6.724933487349061e-07, + "loss": 17.0883, + "step": 424850 + }, + { + "epoch": 0.858244080204592, + "grad_norm": 160.75222778320312, + "learning_rate": 6.723185098837665e-07, + "loss": 37.2009, + "step": 424860 + }, + { + "epoch": 0.8582642808372758, + "grad_norm": 272.7733154296875, + "learning_rate": 6.721436921252223e-07, + "loss": 31.2173, + "step": 424870 + }, + { + "epoch": 0.8582844814699596, + "grad_norm": 170.22232055664062, + "learning_rate": 6.719688954601266e-07, + "loss": 27.0385, + "step": 424880 + }, + { + "epoch": 0.8583046821026434, + "grad_norm": 419.1109924316406, + "learning_rate": 6.717941198893274e-07, + "loss": 14.9501, + "step": 424890 + }, + { + "epoch": 0.8583248827353273, + "grad_norm": 344.0302734375, + "learning_rate": 6.716193654136788e-07, + "loss": 25.7425, + "step": 424900 + }, + { + "epoch": 0.8583450833680111, + "grad_norm": 112.04228210449219, + "learning_rate": 6.714446320340334e-07, + "loss": 11.3657, + "step": 424910 + }, + { + "epoch": 0.8583652840006949, + "grad_norm": 580.894775390625, + "learning_rate": 6.712699197512418e-07, + "loss": 11.5984, + "step": 424920 + }, + { + "epoch": 0.8583854846333787, + "grad_norm": 150.63763427734375, + "learning_rate": 6.710952285661549e-07, + "loss": 32.7941, + "step": 424930 + }, + { + "epoch": 0.8584056852660625, + "grad_norm": 569.3262939453125, + "learning_rate": 6.709205584796241e-07, + "loss": 15.6715, + "step": 424940 + }, + { + "epoch": 0.8584258858987464, + "grad_norm": 216.56661987304688, + "learning_rate": 6.707459094925045e-07, + "loss": 10.321, + "step": 424950 + }, + { + "epoch": 0.8584460865314302, + "grad_norm": 393.1278381347656, + "learning_rate": 6.705712816056415e-07, + "loss": 16.0812, + "step": 424960 + }, + { + "epoch": 0.858466287164114, + "grad_norm": 350.8961181640625, + "learning_rate": 6.703966748198892e-07, + "loss": 12.8694, + "step": 424970 + }, + { + "epoch": 0.8584864877967978, + "grad_norm": 113.81829833984375, + "learning_rate": 6.702220891360994e-07, + "loss": 11.2908, + "step": 424980 + }, + { + "epoch": 0.8585066884294816, + "grad_norm": 11.053807258605957, + "learning_rate": 6.700475245551218e-07, + "loss": 9.902, + "step": 424990 + }, + { + "epoch": 0.8585268890621655, + "grad_norm": 425.9871520996094, + "learning_rate": 6.698729810778065e-07, + "loss": 21.1804, + "step": 425000 + }, + { + "epoch": 0.8585470896948493, + "grad_norm": 581.9754638671875, + "learning_rate": 6.696984587050065e-07, + "loss": 12.3986, + "step": 425010 + }, + { + "epoch": 0.8585672903275331, + "grad_norm": 212.0026092529297, + "learning_rate": 6.695239574375706e-07, + "loss": 8.5076, + "step": 425020 + }, + { + "epoch": 0.8585874909602169, + "grad_norm": 72.91810607910156, + "learning_rate": 6.693494772763487e-07, + "loss": 11.4933, + "step": 425030 + }, + { + "epoch": 0.8586076915929007, + "grad_norm": 143.1288299560547, + "learning_rate": 6.691750182221935e-07, + "loss": 20.9185, + "step": 425040 + }, + { + "epoch": 0.8586278922255846, + "grad_norm": 425.71942138671875, + "learning_rate": 6.69000580275953e-07, + "loss": 13.3819, + "step": 425050 + }, + { + "epoch": 0.8586480928582684, + "grad_norm": 21.489431381225586, + "learning_rate": 6.688261634384791e-07, + "loss": 17.3936, + "step": 425060 + }, + { + "epoch": 0.8586682934909521, + "grad_norm": 41.76399230957031, + "learning_rate": 6.686517677106214e-07, + "loss": 13.0918, + "step": 425070 + }, + { + "epoch": 0.8586884941236359, + "grad_norm": 32.80811309814453, + "learning_rate": 6.684773930932281e-07, + "loss": 15.1117, + "step": 425080 + }, + { + "epoch": 0.8587086947563197, + "grad_norm": 69.80337524414062, + "learning_rate": 6.683030395871526e-07, + "loss": 30.6622, + "step": 425090 + }, + { + "epoch": 0.8587288953890035, + "grad_norm": 185.21798706054688, + "learning_rate": 6.681287071932408e-07, + "loss": 22.5686, + "step": 425100 + }, + { + "epoch": 0.8587490960216874, + "grad_norm": 478.83819580078125, + "learning_rate": 6.679543959123458e-07, + "loss": 20.441, + "step": 425110 + }, + { + "epoch": 0.8587692966543712, + "grad_norm": 399.27178955078125, + "learning_rate": 6.677801057453143e-07, + "loss": 16.9462, + "step": 425120 + }, + { + "epoch": 0.858789497287055, + "grad_norm": 411.01025390625, + "learning_rate": 6.676058366929988e-07, + "loss": 11.9585, + "step": 425130 + }, + { + "epoch": 0.8588096979197388, + "grad_norm": 322.119384765625, + "learning_rate": 6.674315887562466e-07, + "loss": 16.2444, + "step": 425140 + }, + { + "epoch": 0.8588298985524226, + "grad_norm": 285.550048828125, + "learning_rate": 6.672573619359063e-07, + "loss": 21.3185, + "step": 425150 + }, + { + "epoch": 0.8588500991851065, + "grad_norm": 260.2703857421875, + "learning_rate": 6.67083156232829e-07, + "loss": 28.233, + "step": 425160 + }, + { + "epoch": 0.8588702998177903, + "grad_norm": 317.91375732421875, + "learning_rate": 6.669089716478627e-07, + "loss": 24.1974, + "step": 425170 + }, + { + "epoch": 0.8588905004504741, + "grad_norm": 188.8263702392578, + "learning_rate": 6.667348081818559e-07, + "loss": 9.7247, + "step": 425180 + }, + { + "epoch": 0.8589107010831579, + "grad_norm": 616.5370483398438, + "learning_rate": 6.665606658356583e-07, + "loss": 16.824, + "step": 425190 + }, + { + "epoch": 0.8589309017158417, + "grad_norm": 105.1166000366211, + "learning_rate": 6.663865446101192e-07, + "loss": 18.8522, + "step": 425200 + }, + { + "epoch": 0.8589511023485256, + "grad_norm": 127.76112365722656, + "learning_rate": 6.662124445060863e-07, + "loss": 16.4562, + "step": 425210 + }, + { + "epoch": 0.8589713029812094, + "grad_norm": 267.9668273925781, + "learning_rate": 6.660383655244074e-07, + "loss": 12.7942, + "step": 425220 + }, + { + "epoch": 0.8589915036138932, + "grad_norm": 100.9757308959961, + "learning_rate": 6.658643076659327e-07, + "loss": 19.0906, + "step": 425230 + }, + { + "epoch": 0.859011704246577, + "grad_norm": 521.11767578125, + "learning_rate": 6.6569027093151e-07, + "loss": 22.8369, + "step": 425240 + }, + { + "epoch": 0.8590319048792608, + "grad_norm": 638.5565795898438, + "learning_rate": 6.655162553219862e-07, + "loss": 32.4391, + "step": 425250 + }, + { + "epoch": 0.8590521055119447, + "grad_norm": 581.9984741210938, + "learning_rate": 6.653422608382105e-07, + "loss": 28.779, + "step": 425260 + }, + { + "epoch": 0.8590723061446285, + "grad_norm": 107.42449951171875, + "learning_rate": 6.651682874810317e-07, + "loss": 10.0435, + "step": 425270 + }, + { + "epoch": 0.8590925067773123, + "grad_norm": 673.4070434570312, + "learning_rate": 6.649943352512972e-07, + "loss": 20.3799, + "step": 425280 + }, + { + "epoch": 0.8591127074099961, + "grad_norm": 507.18798828125, + "learning_rate": 6.648204041498534e-07, + "loss": 20.1533, + "step": 425290 + }, + { + "epoch": 0.8591329080426799, + "grad_norm": 315.055908203125, + "learning_rate": 6.646464941775499e-07, + "loss": 10.991, + "step": 425300 + }, + { + "epoch": 0.8591531086753638, + "grad_norm": 265.2852783203125, + "learning_rate": 6.64472605335234e-07, + "loss": 13.6276, + "step": 425310 + }, + { + "epoch": 0.8591733093080475, + "grad_norm": 124.01360321044922, + "learning_rate": 6.642987376237514e-07, + "loss": 29.7952, + "step": 425320 + }, + { + "epoch": 0.8591935099407313, + "grad_norm": 366.8377685546875, + "learning_rate": 6.641248910439518e-07, + "loss": 12.5253, + "step": 425330 + }, + { + "epoch": 0.8592137105734151, + "grad_norm": 379.7523193359375, + "learning_rate": 6.639510655966813e-07, + "loss": 15.6356, + "step": 425340 + }, + { + "epoch": 0.8592339112060989, + "grad_norm": 288.5459289550781, + "learning_rate": 6.637772612827881e-07, + "loss": 13.3753, + "step": 425350 + }, + { + "epoch": 0.8592541118387828, + "grad_norm": 86.94245147705078, + "learning_rate": 6.636034781031181e-07, + "loss": 13.9261, + "step": 425360 + }, + { + "epoch": 0.8592743124714666, + "grad_norm": 391.8909912109375, + "learning_rate": 6.634297160585184e-07, + "loss": 22.205, + "step": 425370 + }, + { + "epoch": 0.8592945131041504, + "grad_norm": 401.6841125488281, + "learning_rate": 6.632559751498369e-07, + "loss": 16.0234, + "step": 425380 + }, + { + "epoch": 0.8593147137368342, + "grad_norm": 576.306640625, + "learning_rate": 6.630822553779193e-07, + "loss": 30.8965, + "step": 425390 + }, + { + "epoch": 0.859334914369518, + "grad_norm": 179.42271423339844, + "learning_rate": 6.629085567436133e-07, + "loss": 27.6414, + "step": 425400 + }, + { + "epoch": 0.8593551150022019, + "grad_norm": 441.8272705078125, + "learning_rate": 6.627348792477639e-07, + "loss": 20.9805, + "step": 425410 + }, + { + "epoch": 0.8593753156348857, + "grad_norm": 26.4918155670166, + "learning_rate": 6.625612228912199e-07, + "loss": 26.1555, + "step": 425420 + }, + { + "epoch": 0.8593955162675695, + "grad_norm": 261.5885009765625, + "learning_rate": 6.623875876748265e-07, + "loss": 23.7872, + "step": 425430 + }, + { + "epoch": 0.8594157169002533, + "grad_norm": 585.4600219726562, + "learning_rate": 6.622139735994288e-07, + "loss": 18.8284, + "step": 425440 + }, + { + "epoch": 0.8594359175329371, + "grad_norm": 181.7188262939453, + "learning_rate": 6.620403806658754e-07, + "loss": 8.9154, + "step": 425450 + }, + { + "epoch": 0.859456118165621, + "grad_norm": 448.3474426269531, + "learning_rate": 6.618668088750107e-07, + "loss": 23.9844, + "step": 425460 + }, + { + "epoch": 0.8594763187983048, + "grad_norm": 234.59646606445312, + "learning_rate": 6.616932582276798e-07, + "loss": 17.3415, + "step": 425470 + }, + { + "epoch": 0.8594965194309886, + "grad_norm": 122.73247528076172, + "learning_rate": 6.615197287247299e-07, + "loss": 15.1149, + "step": 425480 + }, + { + "epoch": 0.8595167200636724, + "grad_norm": 1209.4432373046875, + "learning_rate": 6.61346220367009e-07, + "loss": 10.0193, + "step": 425490 + }, + { + "epoch": 0.8595369206963562, + "grad_norm": 872.7515258789062, + "learning_rate": 6.611727331553585e-07, + "loss": 22.8392, + "step": 425500 + }, + { + "epoch": 0.85955712132904, + "grad_norm": 473.6070556640625, + "learning_rate": 6.609992670906251e-07, + "loss": 14.2966, + "step": 425510 + }, + { + "epoch": 0.8595773219617239, + "grad_norm": 349.6256103515625, + "learning_rate": 6.608258221736568e-07, + "loss": 18.4509, + "step": 425520 + }, + { + "epoch": 0.8595975225944077, + "grad_norm": 724.1307983398438, + "learning_rate": 6.60652398405297e-07, + "loss": 27.9618, + "step": 425530 + }, + { + "epoch": 0.8596177232270915, + "grad_norm": 521.7205810546875, + "learning_rate": 6.604789957863899e-07, + "loss": 18.731, + "step": 425540 + }, + { + "epoch": 0.8596379238597753, + "grad_norm": 652.4680786132812, + "learning_rate": 6.603056143177817e-07, + "loss": 16.1501, + "step": 425550 + }, + { + "epoch": 0.8596581244924592, + "grad_norm": 0.0, + "learning_rate": 6.601322540003202e-07, + "loss": 27.6205, + "step": 425560 + }, + { + "epoch": 0.859678325125143, + "grad_norm": 201.71774291992188, + "learning_rate": 6.599589148348451e-07, + "loss": 17.4737, + "step": 425570 + }, + { + "epoch": 0.8596985257578267, + "grad_norm": 604.082763671875, + "learning_rate": 6.597855968222038e-07, + "loss": 13.4022, + "step": 425580 + }, + { + "epoch": 0.8597187263905105, + "grad_norm": 367.8940734863281, + "learning_rate": 6.596122999632426e-07, + "loss": 21.1846, + "step": 425590 + }, + { + "epoch": 0.8597389270231943, + "grad_norm": 194.28854370117188, + "learning_rate": 6.594390242588044e-07, + "loss": 13.4595, + "step": 425600 + }, + { + "epoch": 0.8597591276558781, + "grad_norm": 5.756259441375732, + "learning_rate": 6.592657697097333e-07, + "loss": 18.238, + "step": 425610 + }, + { + "epoch": 0.859779328288562, + "grad_norm": 582.0173950195312, + "learning_rate": 6.590925363168749e-07, + "loss": 25.401, + "step": 425620 + }, + { + "epoch": 0.8597995289212458, + "grad_norm": 677.1604614257812, + "learning_rate": 6.589193240810732e-07, + "loss": 29.9261, + "step": 425630 + }, + { + "epoch": 0.8598197295539296, + "grad_norm": 0.6434399485588074, + "learning_rate": 6.587461330031714e-07, + "loss": 14.7036, + "step": 425640 + }, + { + "epoch": 0.8598399301866134, + "grad_norm": 413.88775634765625, + "learning_rate": 6.585729630840149e-07, + "loss": 17.6794, + "step": 425650 + }, + { + "epoch": 0.8598601308192972, + "grad_norm": 194.02024841308594, + "learning_rate": 6.583998143244463e-07, + "loss": 14.493, + "step": 425660 + }, + { + "epoch": 0.8598803314519811, + "grad_norm": 146.6978759765625, + "learning_rate": 6.582266867253118e-07, + "loss": 7.9778, + "step": 425670 + }, + { + "epoch": 0.8599005320846649, + "grad_norm": 225.89418029785156, + "learning_rate": 6.580535802874538e-07, + "loss": 15.6481, + "step": 425680 + }, + { + "epoch": 0.8599207327173487, + "grad_norm": 421.75567626953125, + "learning_rate": 6.578804950117146e-07, + "loss": 14.5297, + "step": 425690 + }, + { + "epoch": 0.8599409333500325, + "grad_norm": 206.58993530273438, + "learning_rate": 6.577074308989406e-07, + "loss": 22.0901, + "step": 425700 + }, + { + "epoch": 0.8599611339827163, + "grad_norm": 30.304372787475586, + "learning_rate": 6.575343879499729e-07, + "loss": 17.5016, + "step": 425710 + }, + { + "epoch": 0.8599813346154002, + "grad_norm": 396.6864318847656, + "learning_rate": 6.57361366165657e-07, + "loss": 18.5493, + "step": 425720 + }, + { + "epoch": 0.860001535248084, + "grad_norm": 469.9893493652344, + "learning_rate": 6.571883655468336e-07, + "loss": 22.4117, + "step": 425730 + }, + { + "epoch": 0.8600217358807678, + "grad_norm": 226.0044708251953, + "learning_rate": 6.57015386094349e-07, + "loss": 20.228, + "step": 425740 + }, + { + "epoch": 0.8600419365134516, + "grad_norm": 166.0838623046875, + "learning_rate": 6.568424278090446e-07, + "loss": 16.2481, + "step": 425750 + }, + { + "epoch": 0.8600621371461354, + "grad_norm": 412.2013854980469, + "learning_rate": 6.56669490691762e-07, + "loss": 17.67, + "step": 425760 + }, + { + "epoch": 0.8600823377788193, + "grad_norm": 566.22900390625, + "learning_rate": 6.564965747433472e-07, + "loss": 31.0132, + "step": 425770 + }, + { + "epoch": 0.8601025384115031, + "grad_norm": 529.25732421875, + "learning_rate": 6.563236799646405e-07, + "loss": 15.0801, + "step": 425780 + }, + { + "epoch": 0.8601227390441869, + "grad_norm": 350.4403381347656, + "learning_rate": 6.561508063564847e-07, + "loss": 13.9268, + "step": 425790 + }, + { + "epoch": 0.8601429396768707, + "grad_norm": 297.8817138671875, + "learning_rate": 6.559779539197231e-07, + "loss": 30.1499, + "step": 425800 + }, + { + "epoch": 0.8601631403095545, + "grad_norm": 724.6544189453125, + "learning_rate": 6.558051226551992e-07, + "loss": 19.1858, + "step": 425810 + }, + { + "epoch": 0.8601833409422384, + "grad_norm": 423.3988952636719, + "learning_rate": 6.556323125637542e-07, + "loss": 12.4289, + "step": 425820 + }, + { + "epoch": 0.8602035415749222, + "grad_norm": 3.7432050704956055, + "learning_rate": 6.554595236462291e-07, + "loss": 21.2461, + "step": 425830 + }, + { + "epoch": 0.8602237422076059, + "grad_norm": 308.4240417480469, + "learning_rate": 6.552867559034687e-07, + "loss": 39.6005, + "step": 425840 + }, + { + "epoch": 0.8602439428402897, + "grad_norm": 232.96771240234375, + "learning_rate": 6.551140093363135e-07, + "loss": 15.7731, + "step": 425850 + }, + { + "epoch": 0.8602641434729735, + "grad_norm": 339.595458984375, + "learning_rate": 6.549412839456048e-07, + "loss": 13.9972, + "step": 425860 + }, + { + "epoch": 0.8602843441056574, + "grad_norm": 749.7958984375, + "learning_rate": 6.547685797321851e-07, + "loss": 24.7508, + "step": 425870 + }, + { + "epoch": 0.8603045447383412, + "grad_norm": 102.43689727783203, + "learning_rate": 6.545958966968974e-07, + "loss": 8.3586, + "step": 425880 + }, + { + "epoch": 0.860324745371025, + "grad_norm": 429.5927429199219, + "learning_rate": 6.544232348405821e-07, + "loss": 18.1928, + "step": 425890 + }, + { + "epoch": 0.8603449460037088, + "grad_norm": 176.30628967285156, + "learning_rate": 6.542505941640803e-07, + "loss": 25.1116, + "step": 425900 + }, + { + "epoch": 0.8603651466363926, + "grad_norm": 614.5752563476562, + "learning_rate": 6.540779746682346e-07, + "loss": 30.6537, + "step": 425910 + }, + { + "epoch": 0.8603853472690765, + "grad_norm": 484.5924377441406, + "learning_rate": 6.53905376353886e-07, + "loss": 19.1837, + "step": 425920 + }, + { + "epoch": 0.8604055479017603, + "grad_norm": 508.8131408691406, + "learning_rate": 6.537327992218745e-07, + "loss": 10.3871, + "step": 425930 + }, + { + "epoch": 0.8604257485344441, + "grad_norm": 277.9728088378906, + "learning_rate": 6.535602432730432e-07, + "loss": 13.4423, + "step": 425940 + }, + { + "epoch": 0.8604459491671279, + "grad_norm": 355.0103759765625, + "learning_rate": 6.533877085082307e-07, + "loss": 12.4517, + "step": 425950 + }, + { + "epoch": 0.8604661497998117, + "grad_norm": 258.963623046875, + "learning_rate": 6.532151949282811e-07, + "loss": 17.3157, + "step": 425960 + }, + { + "epoch": 0.8604863504324956, + "grad_norm": 131.1427764892578, + "learning_rate": 6.53042702534033e-07, + "loss": 11.3277, + "step": 425970 + }, + { + "epoch": 0.8605065510651794, + "grad_norm": 262.5980529785156, + "learning_rate": 6.528702313263264e-07, + "loss": 15.6577, + "step": 425980 + }, + { + "epoch": 0.8605267516978632, + "grad_norm": 415.2807922363281, + "learning_rate": 6.526977813060042e-07, + "loss": 14.8311, + "step": 425990 + }, + { + "epoch": 0.860546952330547, + "grad_norm": 277.50701904296875, + "learning_rate": 6.52525352473905e-07, + "loss": 13.9653, + "step": 426000 + }, + { + "epoch": 0.8605671529632308, + "grad_norm": 771.4279174804688, + "learning_rate": 6.523529448308708e-07, + "loss": 18.6443, + "step": 426010 + }, + { + "epoch": 0.8605873535959147, + "grad_norm": 647.3812255859375, + "learning_rate": 6.521805583777396e-07, + "loss": 17.5931, + "step": 426020 + }, + { + "epoch": 0.8606075542285985, + "grad_norm": 317.23895263671875, + "learning_rate": 6.520081931153544e-07, + "loss": 17.4922, + "step": 426030 + }, + { + "epoch": 0.8606277548612823, + "grad_norm": 698.1004028320312, + "learning_rate": 6.518358490445542e-07, + "loss": 19.6825, + "step": 426040 + }, + { + "epoch": 0.8606479554939661, + "grad_norm": 270.4190979003906, + "learning_rate": 6.516635261661775e-07, + "loss": 14.7697, + "step": 426050 + }, + { + "epoch": 0.8606681561266499, + "grad_norm": 208.9061737060547, + "learning_rate": 6.514912244810662e-07, + "loss": 10.7142, + "step": 426060 + }, + { + "epoch": 0.8606883567593338, + "grad_norm": 288.39532470703125, + "learning_rate": 6.513189439900591e-07, + "loss": 16.5989, + "step": 426070 + }, + { + "epoch": 0.8607085573920176, + "grad_norm": 423.8705139160156, + "learning_rate": 6.511466846939956e-07, + "loss": 19.8654, + "step": 426080 + }, + { + "epoch": 0.8607287580247013, + "grad_norm": 144.19081115722656, + "learning_rate": 6.509744465937151e-07, + "loss": 17.9407, + "step": 426090 + }, + { + "epoch": 0.8607489586573851, + "grad_norm": 311.5626525878906, + "learning_rate": 6.508022296900601e-07, + "loss": 22.3135, + "step": 426100 + }, + { + "epoch": 0.8607691592900689, + "grad_norm": 19.088600158691406, + "learning_rate": 6.506300339838656e-07, + "loss": 6.1991, + "step": 426110 + }, + { + "epoch": 0.8607893599227527, + "grad_norm": 200.4326629638672, + "learning_rate": 6.504578594759725e-07, + "loss": 13.0629, + "step": 426120 + }, + { + "epoch": 0.8608095605554366, + "grad_norm": 175.73031616210938, + "learning_rate": 6.502857061672213e-07, + "loss": 24.9918, + "step": 426130 + }, + { + "epoch": 0.8608297611881204, + "grad_norm": 140.0410919189453, + "learning_rate": 6.501135740584502e-07, + "loss": 19.302, + "step": 426140 + }, + { + "epoch": 0.8608499618208042, + "grad_norm": 374.5769348144531, + "learning_rate": 6.499414631504969e-07, + "loss": 24.6695, + "step": 426150 + }, + { + "epoch": 0.860870162453488, + "grad_norm": 289.5231018066406, + "learning_rate": 6.497693734442007e-07, + "loss": 16.6754, + "step": 426160 + }, + { + "epoch": 0.8608903630861718, + "grad_norm": 633.6494750976562, + "learning_rate": 6.495973049404037e-07, + "loss": 17.9832, + "step": 426170 + }, + { + "epoch": 0.8609105637188557, + "grad_norm": 203.72691345214844, + "learning_rate": 6.494252576399395e-07, + "loss": 16.9029, + "step": 426180 + }, + { + "epoch": 0.8609307643515395, + "grad_norm": 121.83488464355469, + "learning_rate": 6.49253231543649e-07, + "loss": 20.0728, + "step": 426190 + }, + { + "epoch": 0.8609509649842233, + "grad_norm": 61.352237701416016, + "learning_rate": 6.490812266523716e-07, + "loss": 16.1185, + "step": 426200 + }, + { + "epoch": 0.8609711656169071, + "grad_norm": 234.11602783203125, + "learning_rate": 6.489092429669447e-07, + "loss": 14.0938, + "step": 426210 + }, + { + "epoch": 0.8609913662495909, + "grad_norm": 657.1200561523438, + "learning_rate": 6.487372804882053e-07, + "loss": 17.6869, + "step": 426220 + }, + { + "epoch": 0.8610115668822748, + "grad_norm": 588.2548828125, + "learning_rate": 6.485653392169938e-07, + "loss": 22.2285, + "step": 426230 + }, + { + "epoch": 0.8610317675149586, + "grad_norm": 148.45741271972656, + "learning_rate": 6.483934191541469e-07, + "loss": 15.8468, + "step": 426240 + }, + { + "epoch": 0.8610519681476424, + "grad_norm": 243.9552459716797, + "learning_rate": 6.482215203005016e-07, + "loss": 18.0027, + "step": 426250 + }, + { + "epoch": 0.8610721687803262, + "grad_norm": 399.02685546875, + "learning_rate": 6.480496426568983e-07, + "loss": 33.1014, + "step": 426260 + }, + { + "epoch": 0.86109236941301, + "grad_norm": 126.6474609375, + "learning_rate": 6.478777862241714e-07, + "loss": 12.1914, + "step": 426270 + }, + { + "epoch": 0.8611125700456939, + "grad_norm": 29.05548095703125, + "learning_rate": 6.477059510031619e-07, + "loss": 18.6576, + "step": 426280 + }, + { + "epoch": 0.8611327706783777, + "grad_norm": 391.8978271484375, + "learning_rate": 6.475341369947047e-07, + "loss": 18.8539, + "step": 426290 + }, + { + "epoch": 0.8611529713110615, + "grad_norm": 301.7601318359375, + "learning_rate": 6.47362344199639e-07, + "loss": 11.3853, + "step": 426300 + }, + { + "epoch": 0.8611731719437453, + "grad_norm": 172.89324951171875, + "learning_rate": 6.471905726188015e-07, + "loss": 15.71, + "step": 426310 + }, + { + "epoch": 0.8611933725764291, + "grad_norm": 511.9766540527344, + "learning_rate": 6.470188222530282e-07, + "loss": 17.3988, + "step": 426320 + }, + { + "epoch": 0.861213573209113, + "grad_norm": 495.0720520019531, + "learning_rate": 6.468470931031584e-07, + "loss": 26.0315, + "step": 426330 + }, + { + "epoch": 0.8612337738417968, + "grad_norm": 554.4364624023438, + "learning_rate": 6.466753851700264e-07, + "loss": 14.4683, + "step": 426340 + }, + { + "epoch": 0.8612539744744805, + "grad_norm": 499.3392639160156, + "learning_rate": 6.465036984544721e-07, + "loss": 24.2076, + "step": 426350 + }, + { + "epoch": 0.8612741751071643, + "grad_norm": 135.09153747558594, + "learning_rate": 6.463320329573303e-07, + "loss": 9.2683, + "step": 426360 + }, + { + "epoch": 0.8612943757398481, + "grad_norm": 318.33416748046875, + "learning_rate": 6.46160388679437e-07, + "loss": 11.1335, + "step": 426370 + }, + { + "epoch": 0.861314576372532, + "grad_norm": 261.01849365234375, + "learning_rate": 6.459887656216318e-07, + "loss": 19.9245, + "step": 426380 + }, + { + "epoch": 0.8613347770052158, + "grad_norm": 608.0852661132812, + "learning_rate": 6.458171637847488e-07, + "loss": 12.4543, + "step": 426390 + }, + { + "epoch": 0.8613549776378996, + "grad_norm": 101.21218872070312, + "learning_rate": 6.456455831696234e-07, + "loss": 18.111, + "step": 426400 + }, + { + "epoch": 0.8613751782705834, + "grad_norm": 22.630373001098633, + "learning_rate": 6.454740237770934e-07, + "loss": 19.3144, + "step": 426410 + }, + { + "epoch": 0.8613953789032672, + "grad_norm": 196.15382385253906, + "learning_rate": 6.453024856079976e-07, + "loss": 19.9625, + "step": 426420 + }, + { + "epoch": 0.861415579535951, + "grad_norm": 61.11758804321289, + "learning_rate": 6.451309686631668e-07, + "loss": 14.8085, + "step": 426430 + }, + { + "epoch": 0.8614357801686349, + "grad_norm": 243.17691040039062, + "learning_rate": 6.449594729434394e-07, + "loss": 25.2256, + "step": 426440 + }, + { + "epoch": 0.8614559808013187, + "grad_norm": 221.3637237548828, + "learning_rate": 6.447879984496525e-07, + "loss": 24.9462, + "step": 426450 + }, + { + "epoch": 0.8614761814340025, + "grad_norm": 64.93826293945312, + "learning_rate": 6.446165451826409e-07, + "loss": 17.2373, + "step": 426460 + }, + { + "epoch": 0.8614963820666863, + "grad_norm": 665.0004272460938, + "learning_rate": 6.444451131432383e-07, + "loss": 28.1167, + "step": 426470 + }, + { + "epoch": 0.8615165826993701, + "grad_norm": 742.8054809570312, + "learning_rate": 6.442737023322826e-07, + "loss": 17.7511, + "step": 426480 + }, + { + "epoch": 0.861536783332054, + "grad_norm": 982.496337890625, + "learning_rate": 6.441023127506096e-07, + "loss": 25.0334, + "step": 426490 + }, + { + "epoch": 0.8615569839647378, + "grad_norm": 602.4315185546875, + "learning_rate": 6.439309443990532e-07, + "loss": 20.5195, + "step": 426500 + }, + { + "epoch": 0.8615771845974216, + "grad_norm": 155.8933868408203, + "learning_rate": 6.437595972784483e-07, + "loss": 19.0511, + "step": 426510 + }, + { + "epoch": 0.8615973852301054, + "grad_norm": 141.42478942871094, + "learning_rate": 6.435882713896319e-07, + "loss": 21.8023, + "step": 426520 + }, + { + "epoch": 0.8616175858627892, + "grad_norm": 461.2997131347656, + "learning_rate": 6.434169667334378e-07, + "loss": 16.3485, + "step": 426530 + }, + { + "epoch": 0.8616377864954731, + "grad_norm": 254.6463623046875, + "learning_rate": 6.432456833106998e-07, + "loss": 31.786, + "step": 426540 + }, + { + "epoch": 0.8616579871281569, + "grad_norm": 549.6669921875, + "learning_rate": 6.43074421122255e-07, + "loss": 15.4421, + "step": 426550 + }, + { + "epoch": 0.8616781877608407, + "grad_norm": 163.59596252441406, + "learning_rate": 6.429031801689362e-07, + "loss": 22.5572, + "step": 426560 + }, + { + "epoch": 0.8616983883935245, + "grad_norm": 173.46153259277344, + "learning_rate": 6.427319604515797e-07, + "loss": 11.6241, + "step": 426570 + }, + { + "epoch": 0.8617185890262083, + "grad_norm": 86.23184204101562, + "learning_rate": 6.425607619710195e-07, + "loss": 14.788, + "step": 426580 + }, + { + "epoch": 0.8617387896588922, + "grad_norm": 461.0710144042969, + "learning_rate": 6.423895847280881e-07, + "loss": 14.6491, + "step": 426590 + }, + { + "epoch": 0.8617589902915759, + "grad_norm": 449.5500793457031, + "learning_rate": 6.422184287236227e-07, + "loss": 15.9077, + "step": 426600 + }, + { + "epoch": 0.8617791909242597, + "grad_norm": 248.44351196289062, + "learning_rate": 6.420472939584549e-07, + "loss": 16.0009, + "step": 426610 + }, + { + "epoch": 0.8617993915569435, + "grad_norm": 529.0774536132812, + "learning_rate": 6.418761804334212e-07, + "loss": 23.6131, + "step": 426620 + }, + { + "epoch": 0.8618195921896273, + "grad_norm": 219.2325897216797, + "learning_rate": 6.417050881493536e-07, + "loss": 23.5891, + "step": 426630 + }, + { + "epoch": 0.8618397928223112, + "grad_norm": 363.240234375, + "learning_rate": 6.415340171070877e-07, + "loss": 13.0453, + "step": 426640 + }, + { + "epoch": 0.861859993454995, + "grad_norm": 181.5204315185547, + "learning_rate": 6.413629673074562e-07, + "loss": 10.0992, + "step": 426650 + }, + { + "epoch": 0.8618801940876788, + "grad_norm": 576.6511840820312, + "learning_rate": 6.411919387512922e-07, + "loss": 18.3, + "step": 426660 + }, + { + "epoch": 0.8619003947203626, + "grad_norm": 520.0042724609375, + "learning_rate": 6.410209314394305e-07, + "loss": 38.815, + "step": 426670 + }, + { + "epoch": 0.8619205953530464, + "grad_norm": 368.30169677734375, + "learning_rate": 6.408499453727046e-07, + "loss": 34.8511, + "step": 426680 + }, + { + "epoch": 0.8619407959857303, + "grad_norm": 88.484619140625, + "learning_rate": 6.406789805519464e-07, + "loss": 12.4439, + "step": 426690 + }, + { + "epoch": 0.8619609966184141, + "grad_norm": 312.1562805175781, + "learning_rate": 6.405080369779898e-07, + "loss": 17.1228, + "step": 426700 + }, + { + "epoch": 0.8619811972510979, + "grad_norm": 394.21160888671875, + "learning_rate": 6.403371146516707e-07, + "loss": 21.2892, + "step": 426710 + }, + { + "epoch": 0.8620013978837817, + "grad_norm": 180.46238708496094, + "learning_rate": 6.401662135738174e-07, + "loss": 14.238, + "step": 426720 + }, + { + "epoch": 0.8620215985164655, + "grad_norm": 1012.4619140625, + "learning_rate": 6.399953337452652e-07, + "loss": 21.8464, + "step": 426730 + }, + { + "epoch": 0.8620417991491494, + "grad_norm": 408.7126159667969, + "learning_rate": 6.398244751668481e-07, + "loss": 12.9432, + "step": 426740 + }, + { + "epoch": 0.8620619997818332, + "grad_norm": 258.2486877441406, + "learning_rate": 6.396536378393975e-07, + "loss": 30.4977, + "step": 426750 + }, + { + "epoch": 0.862082200414517, + "grad_norm": 318.1420593261719, + "learning_rate": 6.394828217637455e-07, + "loss": 13.0637, + "step": 426760 + }, + { + "epoch": 0.8621024010472008, + "grad_norm": 563.1136474609375, + "learning_rate": 6.393120269407249e-07, + "loss": 30.0762, + "step": 426770 + }, + { + "epoch": 0.8621226016798846, + "grad_norm": 844.4476928710938, + "learning_rate": 6.391412533711711e-07, + "loss": 25.8394, + "step": 426780 + }, + { + "epoch": 0.8621428023125685, + "grad_norm": 198.59259033203125, + "learning_rate": 6.389705010559117e-07, + "loss": 15.6251, + "step": 426790 + }, + { + "epoch": 0.8621630029452523, + "grad_norm": 59.635990142822266, + "learning_rate": 6.387997699957815e-07, + "loss": 14.6566, + "step": 426800 + }, + { + "epoch": 0.8621832035779361, + "grad_norm": 131.53367614746094, + "learning_rate": 6.386290601916129e-07, + "loss": 12.1609, + "step": 426810 + }, + { + "epoch": 0.8622034042106199, + "grad_norm": 647.2199096679688, + "learning_rate": 6.384583716442371e-07, + "loss": 28.2555, + "step": 426820 + }, + { + "epoch": 0.8622236048433037, + "grad_norm": 488.17181396484375, + "learning_rate": 6.382877043544855e-07, + "loss": 8.1615, + "step": 426830 + }, + { + "epoch": 0.8622438054759876, + "grad_norm": 1174.1416015625, + "learning_rate": 6.381170583231916e-07, + "loss": 23.6752, + "step": 426840 + }, + { + "epoch": 0.8622640061086714, + "grad_norm": 1267.3780517578125, + "learning_rate": 6.379464335511859e-07, + "loss": 37.9311, + "step": 426850 + }, + { + "epoch": 0.8622842067413551, + "grad_norm": 1164.790771484375, + "learning_rate": 6.377758300392994e-07, + "loss": 21.5773, + "step": 426860 + }, + { + "epoch": 0.8623044073740389, + "grad_norm": 257.47064208984375, + "learning_rate": 6.376052477883655e-07, + "loss": 18.7969, + "step": 426870 + }, + { + "epoch": 0.8623246080067227, + "grad_norm": 348.642333984375, + "learning_rate": 6.374346867992138e-07, + "loss": 10.7251, + "step": 426880 + }, + { + "epoch": 0.8623448086394065, + "grad_norm": 360.3748779296875, + "learning_rate": 6.372641470726765e-07, + "loss": 20.3781, + "step": 426890 + }, + { + "epoch": 0.8623650092720904, + "grad_norm": 258.9471130371094, + "learning_rate": 6.370936286095842e-07, + "loss": 12.6942, + "step": 426900 + }, + { + "epoch": 0.8623852099047742, + "grad_norm": 261.1137390136719, + "learning_rate": 6.369231314107693e-07, + "loss": 19.6193, + "step": 426910 + }, + { + "epoch": 0.862405410537458, + "grad_norm": 265.93951416015625, + "learning_rate": 6.36752655477062e-07, + "loss": 24.8202, + "step": 426920 + }, + { + "epoch": 0.8624256111701418, + "grad_norm": 3038.77490234375, + "learning_rate": 6.36582200809292e-07, + "loss": 29.0701, + "step": 426930 + }, + { + "epoch": 0.8624458118028256, + "grad_norm": 328.9877624511719, + "learning_rate": 6.36411767408292e-07, + "loss": 17.7837, + "step": 426940 + }, + { + "epoch": 0.8624660124355095, + "grad_norm": 327.4744873046875, + "learning_rate": 6.362413552748908e-07, + "loss": 33.9574, + "step": 426950 + }, + { + "epoch": 0.8624862130681933, + "grad_norm": 380.6587219238281, + "learning_rate": 6.360709644099211e-07, + "loss": 20.3324, + "step": 426960 + }, + { + "epoch": 0.8625064137008771, + "grad_norm": 335.7801208496094, + "learning_rate": 6.359005948142122e-07, + "loss": 19.1392, + "step": 426970 + }, + { + "epoch": 0.8625266143335609, + "grad_norm": 442.7544860839844, + "learning_rate": 6.357302464885934e-07, + "loss": 15.0711, + "step": 426980 + }, + { + "epoch": 0.8625468149662447, + "grad_norm": 639.4885864257812, + "learning_rate": 6.355599194338974e-07, + "loss": 28.2678, + "step": 426990 + }, + { + "epoch": 0.8625670155989286, + "grad_norm": 170.79458618164062, + "learning_rate": 6.353896136509524e-07, + "loss": 11.3887, + "step": 427000 + }, + { + "epoch": 0.8625872162316124, + "grad_norm": 80.47013854980469, + "learning_rate": 6.352193291405884e-07, + "loss": 9.2106, + "step": 427010 + }, + { + "epoch": 0.8626074168642962, + "grad_norm": 607.91259765625, + "learning_rate": 6.350490659036362e-07, + "loss": 23.6392, + "step": 427020 + }, + { + "epoch": 0.86262761749698, + "grad_norm": 184.9613494873047, + "learning_rate": 6.348788239409271e-07, + "loss": 11.4782, + "step": 427030 + }, + { + "epoch": 0.8626478181296638, + "grad_norm": 74.06925201416016, + "learning_rate": 6.347086032532873e-07, + "loss": 13.4751, + "step": 427040 + }, + { + "epoch": 0.8626680187623477, + "grad_norm": 476.8874816894531, + "learning_rate": 6.345384038415486e-07, + "loss": 14.9326, + "step": 427050 + }, + { + "epoch": 0.8626882193950315, + "grad_norm": 196.14111328125, + "learning_rate": 6.343682257065408e-07, + "loss": 15.7272, + "step": 427060 + }, + { + "epoch": 0.8627084200277153, + "grad_norm": 679.0256958007812, + "learning_rate": 6.341980688490934e-07, + "loss": 38.8679, + "step": 427070 + }, + { + "epoch": 0.8627286206603991, + "grad_norm": 1150.7294921875, + "learning_rate": 6.340279332700333e-07, + "loss": 22.0961, + "step": 427080 + }, + { + "epoch": 0.862748821293083, + "grad_norm": 202.36204528808594, + "learning_rate": 6.338578189701921e-07, + "loss": 12.8232, + "step": 427090 + }, + { + "epoch": 0.8627690219257668, + "grad_norm": 753.1251831054688, + "learning_rate": 6.336877259504004e-07, + "loss": 15.3143, + "step": 427100 + }, + { + "epoch": 0.8627892225584506, + "grad_norm": 265.86480712890625, + "learning_rate": 6.335176542114829e-07, + "loss": 21.6381, + "step": 427110 + }, + { + "epoch": 0.8628094231911343, + "grad_norm": 627.78076171875, + "learning_rate": 6.333476037542707e-07, + "loss": 20.5898, + "step": 427120 + }, + { + "epoch": 0.8628296238238181, + "grad_norm": 273.6964416503906, + "learning_rate": 6.331775745795937e-07, + "loss": 28.021, + "step": 427130 + }, + { + "epoch": 0.8628498244565019, + "grad_norm": 639.4112548828125, + "learning_rate": 6.330075666882795e-07, + "loss": 22.1546, + "step": 427140 + }, + { + "epoch": 0.8628700250891858, + "grad_norm": 623.170166015625, + "learning_rate": 6.328375800811559e-07, + "loss": 19.3433, + "step": 427150 + }, + { + "epoch": 0.8628902257218696, + "grad_norm": 471.6022033691406, + "learning_rate": 6.326676147590533e-07, + "loss": 19.5129, + "step": 427160 + }, + { + "epoch": 0.8629104263545534, + "grad_norm": 424.69146728515625, + "learning_rate": 6.324976707227993e-07, + "loss": 6.4254, + "step": 427170 + }, + { + "epoch": 0.8629306269872372, + "grad_norm": 0.0, + "learning_rate": 6.323277479732203e-07, + "loss": 13.1277, + "step": 427180 + }, + { + "epoch": 0.862950827619921, + "grad_norm": 962.615234375, + "learning_rate": 6.321578465111478e-07, + "loss": 26.358, + "step": 427190 + }, + { + "epoch": 0.8629710282526049, + "grad_norm": 536.439453125, + "learning_rate": 6.319879663374068e-07, + "loss": 22.4644, + "step": 427200 + }, + { + "epoch": 0.8629912288852887, + "grad_norm": 307.97930908203125, + "learning_rate": 6.318181074528279e-07, + "loss": 26.8442, + "step": 427210 + }, + { + "epoch": 0.8630114295179725, + "grad_norm": 415.0699768066406, + "learning_rate": 6.316482698582365e-07, + "loss": 9.4954, + "step": 427220 + }, + { + "epoch": 0.8630316301506563, + "grad_norm": 51.516761779785156, + "learning_rate": 6.314784535544627e-07, + "loss": 21.2377, + "step": 427230 + }, + { + "epoch": 0.8630518307833401, + "grad_norm": 110.73899841308594, + "learning_rate": 6.313086585423316e-07, + "loss": 14.6737, + "step": 427240 + }, + { + "epoch": 0.863072031416024, + "grad_norm": 384.1722106933594, + "learning_rate": 6.311388848226741e-07, + "loss": 21.8881, + "step": 427250 + }, + { + "epoch": 0.8630922320487078, + "grad_norm": 262.3652648925781, + "learning_rate": 6.309691323963152e-07, + "loss": 23.4864, + "step": 427260 + }, + { + "epoch": 0.8631124326813916, + "grad_norm": 3200.091552734375, + "learning_rate": 6.307994012640822e-07, + "loss": 31.6897, + "step": 427270 + }, + { + "epoch": 0.8631326333140754, + "grad_norm": 670.214599609375, + "learning_rate": 6.30629691426804e-07, + "loss": 18.9, + "step": 427280 + }, + { + "epoch": 0.8631528339467592, + "grad_norm": 600.1300659179688, + "learning_rate": 6.304600028853065e-07, + "loss": 17.5757, + "step": 427290 + }, + { + "epoch": 0.863173034579443, + "grad_norm": 689.7635498046875, + "learning_rate": 6.302903356404161e-07, + "loss": 19.7232, + "step": 427300 + }, + { + "epoch": 0.8631932352121269, + "grad_norm": 251.3146514892578, + "learning_rate": 6.301206896929607e-07, + "loss": 18.1964, + "step": 427310 + }, + { + "epoch": 0.8632134358448107, + "grad_norm": 415.42327880859375, + "learning_rate": 6.29951065043769e-07, + "loss": 17.6401, + "step": 427320 + }, + { + "epoch": 0.8632336364774945, + "grad_norm": 302.36083984375, + "learning_rate": 6.297814616936637e-07, + "loss": 23.2883, + "step": 427330 + }, + { + "epoch": 0.8632538371101783, + "grad_norm": 387.613037109375, + "learning_rate": 6.296118796434735e-07, + "loss": 16.3554, + "step": 427340 + }, + { + "epoch": 0.8632740377428622, + "grad_norm": 283.42108154296875, + "learning_rate": 6.294423188940263e-07, + "loss": 10.3294, + "step": 427350 + }, + { + "epoch": 0.863294238375546, + "grad_norm": 270.95489501953125, + "learning_rate": 6.292727794461468e-07, + "loss": 21.8911, + "step": 427360 + }, + { + "epoch": 0.8633144390082297, + "grad_norm": 473.91461181640625, + "learning_rate": 6.291032613006604e-07, + "loss": 23.1282, + "step": 427370 + }, + { + "epoch": 0.8633346396409135, + "grad_norm": 404.8705749511719, + "learning_rate": 6.289337644583949e-07, + "loss": 11.5368, + "step": 427380 + }, + { + "epoch": 0.8633548402735973, + "grad_norm": 357.8258361816406, + "learning_rate": 6.287642889201783e-07, + "loss": 16.3562, + "step": 427390 + }, + { + "epoch": 0.8633750409062811, + "grad_norm": 366.5943298339844, + "learning_rate": 6.28594834686832e-07, + "loss": 16.7686, + "step": 427400 + }, + { + "epoch": 0.863395241538965, + "grad_norm": 202.032470703125, + "learning_rate": 6.284254017591845e-07, + "loss": 14.6638, + "step": 427410 + }, + { + "epoch": 0.8634154421716488, + "grad_norm": 290.3363342285156, + "learning_rate": 6.282559901380625e-07, + "loss": 15.5777, + "step": 427420 + }, + { + "epoch": 0.8634356428043326, + "grad_norm": 314.4226989746094, + "learning_rate": 6.280865998242908e-07, + "loss": 17.3926, + "step": 427430 + }, + { + "epoch": 0.8634558434370164, + "grad_norm": 367.91033935546875, + "learning_rate": 6.279172308186931e-07, + "loss": 18.545, + "step": 427440 + }, + { + "epoch": 0.8634760440697002, + "grad_norm": 329.7330322265625, + "learning_rate": 6.277478831220979e-07, + "loss": 29.4346, + "step": 427450 + }, + { + "epoch": 0.8634962447023841, + "grad_norm": 199.14669799804688, + "learning_rate": 6.275785567353293e-07, + "loss": 13.117, + "step": 427460 + }, + { + "epoch": 0.8635164453350679, + "grad_norm": 242.6150360107422, + "learning_rate": 6.274092516592111e-07, + "loss": 8.73, + "step": 427470 + }, + { + "epoch": 0.8635366459677517, + "grad_norm": 316.0828857421875, + "learning_rate": 6.272399678945712e-07, + "loss": 10.5379, + "step": 427480 + }, + { + "epoch": 0.8635568466004355, + "grad_norm": 629.6071166992188, + "learning_rate": 6.27070705442232e-07, + "loss": 18.8083, + "step": 427490 + }, + { + "epoch": 0.8635770472331193, + "grad_norm": 554.4690551757812, + "learning_rate": 6.269014643030214e-07, + "loss": 20.6939, + "step": 427500 + }, + { + "epoch": 0.8635972478658032, + "grad_norm": 251.9290313720703, + "learning_rate": 6.267322444777612e-07, + "loss": 13.3097, + "step": 427510 + }, + { + "epoch": 0.863617448498487, + "grad_norm": 288.9092712402344, + "learning_rate": 6.265630459672789e-07, + "loss": 9.0264, + "step": 427520 + }, + { + "epoch": 0.8636376491311708, + "grad_norm": 124.1415786743164, + "learning_rate": 6.263938687723981e-07, + "loss": 21.6229, + "step": 427530 + }, + { + "epoch": 0.8636578497638546, + "grad_norm": 357.16436767578125, + "learning_rate": 6.262247128939414e-07, + "loss": 10.0365, + "step": 427540 + }, + { + "epoch": 0.8636780503965384, + "grad_norm": 393.6729431152344, + "learning_rate": 6.260555783327366e-07, + "loss": 19.359, + "step": 427550 + }, + { + "epoch": 0.8636982510292223, + "grad_norm": 0.5614365339279175, + "learning_rate": 6.258864650896051e-07, + "loss": 10.1452, + "step": 427560 + }, + { + "epoch": 0.8637184516619061, + "grad_norm": 841.2181396484375, + "learning_rate": 6.257173731653738e-07, + "loss": 24.4382, + "step": 427570 + }, + { + "epoch": 0.8637386522945899, + "grad_norm": 343.8075256347656, + "learning_rate": 6.25548302560865e-07, + "loss": 24.539, + "step": 427580 + }, + { + "epoch": 0.8637588529272737, + "grad_norm": 555.2030029296875, + "learning_rate": 6.253792532769026e-07, + "loss": 12.5351, + "step": 427590 + }, + { + "epoch": 0.8637790535599575, + "grad_norm": 549.779052734375, + "learning_rate": 6.252102253143122e-07, + "loss": 26.9366, + "step": 427600 + }, + { + "epoch": 0.8637992541926414, + "grad_norm": 707.0358276367188, + "learning_rate": 6.250412186739163e-07, + "loss": 17.0704, + "step": 427610 + }, + { + "epoch": 0.8638194548253252, + "grad_norm": 679.9314575195312, + "learning_rate": 6.248722333565377e-07, + "loss": 19.7051, + "step": 427620 + }, + { + "epoch": 0.8638396554580089, + "grad_norm": 125.01164245605469, + "learning_rate": 6.247032693630012e-07, + "loss": 24.9688, + "step": 427630 + }, + { + "epoch": 0.8638598560906927, + "grad_norm": 502.79290771484375, + "learning_rate": 6.245343266941328e-07, + "loss": 18.026, + "step": 427640 + }, + { + "epoch": 0.8638800567233765, + "grad_norm": 7.125823020935059, + "learning_rate": 6.243654053507515e-07, + "loss": 10.2686, + "step": 427650 + }, + { + "epoch": 0.8639002573560604, + "grad_norm": 999.5502319335938, + "learning_rate": 6.241965053336818e-07, + "loss": 19.1553, + "step": 427660 + }, + { + "epoch": 0.8639204579887442, + "grad_norm": 560.1099853515625, + "learning_rate": 6.24027626643749e-07, + "loss": 19.8785, + "step": 427670 + }, + { + "epoch": 0.863940658621428, + "grad_norm": 584.9874877929688, + "learning_rate": 6.238587692817749e-07, + "loss": 12.1768, + "step": 427680 + }, + { + "epoch": 0.8639608592541118, + "grad_norm": 467.38824462890625, + "learning_rate": 6.236899332485813e-07, + "loss": 15.8237, + "step": 427690 + }, + { + "epoch": 0.8639810598867956, + "grad_norm": 1058.6409912109375, + "learning_rate": 6.235211185449919e-07, + "loss": 14.8201, + "step": 427700 + }, + { + "epoch": 0.8640012605194795, + "grad_norm": 332.8944091796875, + "learning_rate": 6.233523251718321e-07, + "loss": 15.9929, + "step": 427710 + }, + { + "epoch": 0.8640214611521633, + "grad_norm": 237.4763946533203, + "learning_rate": 6.231835531299202e-07, + "loss": 24.3467, + "step": 427720 + }, + { + "epoch": 0.8640416617848471, + "grad_norm": 355.6075744628906, + "learning_rate": 6.23014802420081e-07, + "loss": 19.6161, + "step": 427730 + }, + { + "epoch": 0.8640618624175309, + "grad_norm": 207.7703399658203, + "learning_rate": 6.228460730431374e-07, + "loss": 12.1483, + "step": 427740 + }, + { + "epoch": 0.8640820630502147, + "grad_norm": 235.3800811767578, + "learning_rate": 6.226773649999113e-07, + "loss": 11.0554, + "step": 427750 + }, + { + "epoch": 0.8641022636828986, + "grad_norm": 405.9524230957031, + "learning_rate": 6.225086782912237e-07, + "loss": 21.9891, + "step": 427760 + }, + { + "epoch": 0.8641224643155824, + "grad_norm": 145.1119842529297, + "learning_rate": 6.223400129178992e-07, + "loss": 9.3089, + "step": 427770 + }, + { + "epoch": 0.8641426649482662, + "grad_norm": 275.88665771484375, + "learning_rate": 6.221713688807585e-07, + "loss": 32.3242, + "step": 427780 + }, + { + "epoch": 0.86416286558095, + "grad_norm": 425.7492980957031, + "learning_rate": 6.220027461806222e-07, + "loss": 10.1099, + "step": 427790 + }, + { + "epoch": 0.8641830662136338, + "grad_norm": 280.409423828125, + "learning_rate": 6.218341448183141e-07, + "loss": 23.433, + "step": 427800 + }, + { + "epoch": 0.8642032668463177, + "grad_norm": 453.26947021484375, + "learning_rate": 6.216655647946556e-07, + "loss": 14.3934, + "step": 427810 + }, + { + "epoch": 0.8642234674790015, + "grad_norm": 234.53407287597656, + "learning_rate": 6.214970061104686e-07, + "loss": 29.2459, + "step": 427820 + }, + { + "epoch": 0.8642436681116853, + "grad_norm": 383.7027587890625, + "learning_rate": 6.213284687665733e-07, + "loss": 25.2639, + "step": 427830 + }, + { + "epoch": 0.8642638687443691, + "grad_norm": 490.578125, + "learning_rate": 6.21159952763793e-07, + "loss": 13.2446, + "step": 427840 + }, + { + "epoch": 0.8642840693770529, + "grad_norm": 291.77685546875, + "learning_rate": 6.209914581029474e-07, + "loss": 32.4509, + "step": 427850 + }, + { + "epoch": 0.8643042700097368, + "grad_norm": 213.53958129882812, + "learning_rate": 6.20822984784858e-07, + "loss": 17.6725, + "step": 427860 + }, + { + "epoch": 0.8643244706424206, + "grad_norm": 241.1728057861328, + "learning_rate": 6.20654532810347e-07, + "loss": 19.7326, + "step": 427870 + }, + { + "epoch": 0.8643446712751043, + "grad_norm": 178.4251251220703, + "learning_rate": 6.204861021802333e-07, + "loss": 9.0884, + "step": 427880 + }, + { + "epoch": 0.8643648719077881, + "grad_norm": 417.46343994140625, + "learning_rate": 6.203176928953403e-07, + "loss": 17.9481, + "step": 427890 + }, + { + "epoch": 0.8643850725404719, + "grad_norm": 1527.8167724609375, + "learning_rate": 6.201493049564883e-07, + "loss": 14.5531, + "step": 427900 + }, + { + "epoch": 0.8644052731731557, + "grad_norm": 728.8560180664062, + "learning_rate": 6.199809383644956e-07, + "loss": 12.0885, + "step": 427910 + }, + { + "epoch": 0.8644254738058396, + "grad_norm": 236.08883666992188, + "learning_rate": 6.198125931201848e-07, + "loss": 21.4588, + "step": 427920 + }, + { + "epoch": 0.8644456744385234, + "grad_norm": 260.3927917480469, + "learning_rate": 6.196442692243787e-07, + "loss": 20.7814, + "step": 427930 + }, + { + "epoch": 0.8644658750712072, + "grad_norm": 662.6884155273438, + "learning_rate": 6.194759666778927e-07, + "loss": 19.546, + "step": 427940 + }, + { + "epoch": 0.864486075703891, + "grad_norm": 499.678466796875, + "learning_rate": 6.193076854815494e-07, + "loss": 19.0645, + "step": 427950 + }, + { + "epoch": 0.8645062763365748, + "grad_norm": 537.9235229492188, + "learning_rate": 6.191394256361699e-07, + "loss": 24.221, + "step": 427960 + }, + { + "epoch": 0.8645264769692587, + "grad_norm": 395.36358642578125, + "learning_rate": 6.189711871425741e-07, + "loss": 14.9715, + "step": 427970 + }, + { + "epoch": 0.8645466776019425, + "grad_norm": 348.86285400390625, + "learning_rate": 6.188029700015802e-07, + "loss": 13.2189, + "step": 427980 + }, + { + "epoch": 0.8645668782346263, + "grad_norm": 371.9176330566406, + "learning_rate": 6.186347742140092e-07, + "loss": 29.0738, + "step": 427990 + }, + { + "epoch": 0.8645870788673101, + "grad_norm": 878.5059814453125, + "learning_rate": 6.184665997806832e-07, + "loss": 22.1013, + "step": 428000 + }, + { + "epoch": 0.8646072794999939, + "grad_norm": 426.17926025390625, + "learning_rate": 6.182984467024173e-07, + "loss": 19.0282, + "step": 428010 + }, + { + "epoch": 0.8646274801326778, + "grad_norm": 299.5845031738281, + "learning_rate": 6.181303149800333e-07, + "loss": 18.7745, + "step": 428020 + }, + { + "epoch": 0.8646476807653616, + "grad_norm": 98.38887023925781, + "learning_rate": 6.179622046143513e-07, + "loss": 19.949, + "step": 428030 + }, + { + "epoch": 0.8646678813980454, + "grad_norm": 340.8511657714844, + "learning_rate": 6.177941156061906e-07, + "loss": 8.3363, + "step": 428040 + }, + { + "epoch": 0.8646880820307292, + "grad_norm": 50.793827056884766, + "learning_rate": 6.17626047956369e-07, + "loss": 15.9434, + "step": 428050 + }, + { + "epoch": 0.864708282663413, + "grad_norm": 231.07606506347656, + "learning_rate": 6.174580016657073e-07, + "loss": 13.5634, + "step": 428060 + }, + { + "epoch": 0.8647284832960969, + "grad_norm": 1061.10302734375, + "learning_rate": 6.172899767350238e-07, + "loss": 16.6211, + "step": 428070 + }, + { + "epoch": 0.8647486839287807, + "grad_norm": 386.4588623046875, + "learning_rate": 6.171219731651362e-07, + "loss": 24.283, + "step": 428080 + }, + { + "epoch": 0.8647688845614645, + "grad_norm": 548.4069213867188, + "learning_rate": 6.169539909568656e-07, + "loss": 17.2704, + "step": 428090 + }, + { + "epoch": 0.8647890851941483, + "grad_norm": 411.5999450683594, + "learning_rate": 6.167860301110284e-07, + "loss": 20.6446, + "step": 428100 + }, + { + "epoch": 0.8648092858268321, + "grad_norm": 124.37580871582031, + "learning_rate": 6.166180906284458e-07, + "loss": 21.915, + "step": 428110 + }, + { + "epoch": 0.864829486459516, + "grad_norm": 51.3354377746582, + "learning_rate": 6.164501725099342e-07, + "loss": 20.3813, + "step": 428120 + }, + { + "epoch": 0.8648496870921998, + "grad_norm": 313.509521484375, + "learning_rate": 6.162822757563136e-07, + "loss": 16.0153, + "step": 428130 + }, + { + "epoch": 0.8648698877248835, + "grad_norm": 571.181884765625, + "learning_rate": 6.161144003684017e-07, + "loss": 21.6709, + "step": 428140 + }, + { + "epoch": 0.8648900883575673, + "grad_norm": 57.8526496887207, + "learning_rate": 6.159465463470149e-07, + "loss": 13.1453, + "step": 428150 + }, + { + "epoch": 0.8649102889902511, + "grad_norm": 282.853515625, + "learning_rate": 6.157787136929743e-07, + "loss": 13.9466, + "step": 428160 + }, + { + "epoch": 0.864930489622935, + "grad_norm": 47.31289291381836, + "learning_rate": 6.156109024070955e-07, + "loss": 19.1847, + "step": 428170 + }, + { + "epoch": 0.8649506902556188, + "grad_norm": 368.8774719238281, + "learning_rate": 6.154431124901983e-07, + "loss": 13.2376, + "step": 428180 + }, + { + "epoch": 0.8649708908883026, + "grad_norm": 938.6478271484375, + "learning_rate": 6.152753439430997e-07, + "loss": 18.7021, + "step": 428190 + }, + { + "epoch": 0.8649910915209864, + "grad_norm": 505.9247131347656, + "learning_rate": 6.151075967666165e-07, + "loss": 21.0578, + "step": 428200 + }, + { + "epoch": 0.8650112921536702, + "grad_norm": 360.1973571777344, + "learning_rate": 6.149398709615678e-07, + "loss": 14.0969, + "step": 428210 + }, + { + "epoch": 0.865031492786354, + "grad_norm": 419.23187255859375, + "learning_rate": 6.147721665287703e-07, + "loss": 9.2271, + "step": 428220 + }, + { + "epoch": 0.8650516934190379, + "grad_norm": 2.7760090827941895, + "learning_rate": 6.146044834690401e-07, + "loss": 10.1077, + "step": 428230 + }, + { + "epoch": 0.8650718940517217, + "grad_norm": 281.9516906738281, + "learning_rate": 6.144368217831965e-07, + "loss": 8.2609, + "step": 428240 + }, + { + "epoch": 0.8650920946844055, + "grad_norm": 337.73345947265625, + "learning_rate": 6.142691814720575e-07, + "loss": 20.3037, + "step": 428250 + }, + { + "epoch": 0.8651122953170893, + "grad_norm": 430.0217590332031, + "learning_rate": 6.141015625364366e-07, + "loss": 11.1121, + "step": 428260 + }, + { + "epoch": 0.8651324959497732, + "grad_norm": 234.089599609375, + "learning_rate": 6.139339649771525e-07, + "loss": 16.6557, + "step": 428270 + }, + { + "epoch": 0.865152696582457, + "grad_norm": 211.0261993408203, + "learning_rate": 6.137663887950235e-07, + "loss": 13.0847, + "step": 428280 + }, + { + "epoch": 0.8651728972151408, + "grad_norm": 1035.0299072265625, + "learning_rate": 6.135988339908655e-07, + "loss": 20.2821, + "step": 428290 + }, + { + "epoch": 0.8651930978478246, + "grad_norm": 463.2383728027344, + "learning_rate": 6.134313005654929e-07, + "loss": 17.1917, + "step": 428300 + }, + { + "epoch": 0.8652132984805084, + "grad_norm": 309.8400573730469, + "learning_rate": 6.132637885197251e-07, + "loss": 20.0468, + "step": 428310 + }, + { + "epoch": 0.8652334991131923, + "grad_norm": 226.93862915039062, + "learning_rate": 6.130962978543792e-07, + "loss": 12.4476, + "step": 428320 + }, + { + "epoch": 0.8652536997458761, + "grad_norm": 284.1596374511719, + "learning_rate": 6.129288285702672e-07, + "loss": 13.2608, + "step": 428330 + }, + { + "epoch": 0.8652739003785599, + "grad_norm": 140.4319610595703, + "learning_rate": 6.127613806682087e-07, + "loss": 22.1714, + "step": 428340 + }, + { + "epoch": 0.8652941010112437, + "grad_norm": 279.0960693359375, + "learning_rate": 6.1259395414902e-07, + "loss": 10.1577, + "step": 428350 + }, + { + "epoch": 0.8653143016439275, + "grad_norm": 213.5045166015625, + "learning_rate": 6.124265490135161e-07, + "loss": 18.4615, + "step": 428360 + }, + { + "epoch": 0.8653345022766114, + "grad_norm": 517.26416015625, + "learning_rate": 6.122591652625126e-07, + "loss": 14.2572, + "step": 428370 + }, + { + "epoch": 0.8653547029092952, + "grad_norm": 994.1997680664062, + "learning_rate": 6.120918028968265e-07, + "loss": 15.3615, + "step": 428380 + }, + { + "epoch": 0.8653749035419789, + "grad_norm": 991.473876953125, + "learning_rate": 6.119244619172727e-07, + "loss": 25.2808, + "step": 428390 + }, + { + "epoch": 0.8653951041746627, + "grad_norm": 304.0759582519531, + "learning_rate": 6.117571423246655e-07, + "loss": 18.7912, + "step": 428400 + }, + { + "epoch": 0.8654153048073465, + "grad_norm": 199.3155059814453, + "learning_rate": 6.11589844119822e-07, + "loss": 8.4047, + "step": 428410 + }, + { + "epoch": 0.8654355054400303, + "grad_norm": 1094.4544677734375, + "learning_rate": 6.114225673035584e-07, + "loss": 21.4444, + "step": 428420 + }, + { + "epoch": 0.8654557060727142, + "grad_norm": 207.91004943847656, + "learning_rate": 6.112553118766889e-07, + "loss": 25.2106, + "step": 428430 + }, + { + "epoch": 0.865475906705398, + "grad_norm": 201.0609588623047, + "learning_rate": 6.110880778400275e-07, + "loss": 17.329, + "step": 428440 + }, + { + "epoch": 0.8654961073380818, + "grad_norm": 327.4130859375, + "learning_rate": 6.109208651943921e-07, + "loss": 18.1264, + "step": 428450 + }, + { + "epoch": 0.8655163079707656, + "grad_norm": 22.62371253967285, + "learning_rate": 6.107536739405956e-07, + "loss": 17.9029, + "step": 428460 + }, + { + "epoch": 0.8655365086034494, + "grad_norm": 56.47309875488281, + "learning_rate": 6.105865040794523e-07, + "loss": 22.641, + "step": 428470 + }, + { + "epoch": 0.8655567092361333, + "grad_norm": 569.1929321289062, + "learning_rate": 6.104193556117793e-07, + "loss": 22.5186, + "step": 428480 + }, + { + "epoch": 0.8655769098688171, + "grad_norm": 389.8046875, + "learning_rate": 6.102522285383888e-07, + "loss": 10.6147, + "step": 428490 + }, + { + "epoch": 0.8655971105015009, + "grad_norm": 299.5357360839844, + "learning_rate": 6.100851228600974e-07, + "loss": 28.5346, + "step": 428500 + }, + { + "epoch": 0.8656173111341847, + "grad_norm": 633.14501953125, + "learning_rate": 6.099180385777192e-07, + "loss": 20.1365, + "step": 428510 + }, + { + "epoch": 0.8656375117668685, + "grad_norm": 677.9397583007812, + "learning_rate": 6.097509756920667e-07, + "loss": 25.9088, + "step": 428520 + }, + { + "epoch": 0.8656577123995524, + "grad_norm": 182.68701171875, + "learning_rate": 6.095839342039561e-07, + "loss": 13.7292, + "step": 428530 + }, + { + "epoch": 0.8656779130322362, + "grad_norm": 308.7190246582031, + "learning_rate": 6.094169141142014e-07, + "loss": 17.2095, + "step": 428540 + }, + { + "epoch": 0.86569811366492, + "grad_norm": 33.39704132080078, + "learning_rate": 6.092499154236148e-07, + "loss": 14.2919, + "step": 428550 + }, + { + "epoch": 0.8657183142976038, + "grad_norm": 427.1455383300781, + "learning_rate": 6.090829381330116e-07, + "loss": 21.8508, + "step": 428560 + }, + { + "epoch": 0.8657385149302876, + "grad_norm": 329.7239990234375, + "learning_rate": 6.089159822432073e-07, + "loss": 15.2621, + "step": 428570 + }, + { + "epoch": 0.8657587155629715, + "grad_norm": 314.5326843261719, + "learning_rate": 6.087490477550129e-07, + "loss": 20.3163, + "step": 428580 + }, + { + "epoch": 0.8657789161956553, + "grad_norm": 572.4465942382812, + "learning_rate": 6.085821346692427e-07, + "loss": 16.2445, + "step": 428590 + }, + { + "epoch": 0.8657991168283391, + "grad_norm": 400.1139831542969, + "learning_rate": 6.084152429867113e-07, + "loss": 13.0832, + "step": 428600 + }, + { + "epoch": 0.8658193174610229, + "grad_norm": 211.2682342529297, + "learning_rate": 6.082483727082317e-07, + "loss": 19.7007, + "step": 428610 + }, + { + "epoch": 0.8658395180937067, + "grad_norm": 22.129629135131836, + "learning_rate": 6.080815238346155e-07, + "loss": 12.4625, + "step": 428620 + }, + { + "epoch": 0.8658597187263906, + "grad_norm": 177.38450622558594, + "learning_rate": 6.079146963666777e-07, + "loss": 19.3025, + "step": 428630 + }, + { + "epoch": 0.8658799193590744, + "grad_norm": 248.06504821777344, + "learning_rate": 6.077478903052314e-07, + "loss": 10.8978, + "step": 428640 + }, + { + "epoch": 0.8659001199917581, + "grad_norm": 420.5555419921875, + "learning_rate": 6.075811056510894e-07, + "loss": 22.3603, + "step": 428650 + }, + { + "epoch": 0.8659203206244419, + "grad_norm": 462.0986633300781, + "learning_rate": 6.074143424050638e-07, + "loss": 12.2093, + "step": 428660 + }, + { + "epoch": 0.8659405212571257, + "grad_norm": 136.93148803710938, + "learning_rate": 6.072476005679684e-07, + "loss": 17.8271, + "step": 428670 + }, + { + "epoch": 0.8659607218898095, + "grad_norm": 218.40277099609375, + "learning_rate": 6.070808801406158e-07, + "loss": 14.5372, + "step": 428680 + }, + { + "epoch": 0.8659809225224934, + "grad_norm": 508.3802185058594, + "learning_rate": 6.069141811238166e-07, + "loss": 28.8907, + "step": 428690 + }, + { + "epoch": 0.8660011231551772, + "grad_norm": 515.6898803710938, + "learning_rate": 6.067475035183862e-07, + "loss": 19.3834, + "step": 428700 + }, + { + "epoch": 0.866021323787861, + "grad_norm": 187.1757354736328, + "learning_rate": 6.06580847325135e-07, + "loss": 23.0241, + "step": 428710 + }, + { + "epoch": 0.8660415244205448, + "grad_norm": 411.40753173828125, + "learning_rate": 6.064142125448763e-07, + "loss": 15.513, + "step": 428720 + }, + { + "epoch": 0.8660617250532286, + "grad_norm": 325.0508728027344, + "learning_rate": 6.062475991784211e-07, + "loss": 9.4605, + "step": 428730 + }, + { + "epoch": 0.8660819256859125, + "grad_norm": 573.1134643554688, + "learning_rate": 6.060810072265833e-07, + "loss": 16.3213, + "step": 428740 + }, + { + "epoch": 0.8661021263185963, + "grad_norm": 238.4241943359375, + "learning_rate": 6.059144366901737e-07, + "loss": 24.3547, + "step": 428750 + }, + { + "epoch": 0.8661223269512801, + "grad_norm": 197.23275756835938, + "learning_rate": 6.057478875700035e-07, + "loss": 17.9109, + "step": 428760 + }, + { + "epoch": 0.8661425275839639, + "grad_norm": 341.0602722167969, + "learning_rate": 6.055813598668853e-07, + "loss": 27.3504, + "step": 428770 + }, + { + "epoch": 0.8661627282166477, + "grad_norm": 847.5743408203125, + "learning_rate": 6.054148535816301e-07, + "loss": 22.3776, + "step": 428780 + }, + { + "epoch": 0.8661829288493316, + "grad_norm": 183.5742645263672, + "learning_rate": 6.052483687150512e-07, + "loss": 30.0743, + "step": 428790 + }, + { + "epoch": 0.8662031294820154, + "grad_norm": 780.5835571289062, + "learning_rate": 6.050819052679585e-07, + "loss": 19.2017, + "step": 428800 + }, + { + "epoch": 0.8662233301146992, + "grad_norm": 535.20263671875, + "learning_rate": 6.049154632411625e-07, + "loss": 16.9253, + "step": 428810 + }, + { + "epoch": 0.866243530747383, + "grad_norm": 549.9434814453125, + "learning_rate": 6.047490426354763e-07, + "loss": 39.4413, + "step": 428820 + }, + { + "epoch": 0.8662637313800668, + "grad_norm": 660.7290649414062, + "learning_rate": 6.045826434517104e-07, + "loss": 15.035, + "step": 428830 + }, + { + "epoch": 0.8662839320127507, + "grad_norm": 398.4607238769531, + "learning_rate": 6.044162656906744e-07, + "loss": 20.089, + "step": 428840 + }, + { + "epoch": 0.8663041326454345, + "grad_norm": 207.11167907714844, + "learning_rate": 6.042499093531806e-07, + "loss": 14.1729, + "step": 428850 + }, + { + "epoch": 0.8663243332781183, + "grad_norm": 146.52442932128906, + "learning_rate": 6.040835744400403e-07, + "loss": 14.1465, + "step": 428860 + }, + { + "epoch": 0.8663445339108021, + "grad_norm": 394.9657287597656, + "learning_rate": 6.039172609520639e-07, + "loss": 19.8002, + "step": 428870 + }, + { + "epoch": 0.866364734543486, + "grad_norm": 185.9178466796875, + "learning_rate": 6.037509688900606e-07, + "loss": 17.8917, + "step": 428880 + }, + { + "epoch": 0.8663849351761698, + "grad_norm": 221.9687042236328, + "learning_rate": 6.035846982548427e-07, + "loss": 12.455, + "step": 428890 + }, + { + "epoch": 0.8664051358088536, + "grad_norm": 630.0784912109375, + "learning_rate": 6.034184490472195e-07, + "loss": 20.9911, + "step": 428900 + }, + { + "epoch": 0.8664253364415373, + "grad_norm": 412.6866149902344, + "learning_rate": 6.032522212680009e-07, + "loss": 23.1872, + "step": 428910 + }, + { + "epoch": 0.8664455370742211, + "grad_norm": 362.35296630859375, + "learning_rate": 6.030860149179973e-07, + "loss": 31.9858, + "step": 428920 + }, + { + "epoch": 0.8664657377069049, + "grad_norm": 606.3483276367188, + "learning_rate": 6.029198299980216e-07, + "loss": 19.8697, + "step": 428930 + }, + { + "epoch": 0.8664859383395888, + "grad_norm": 331.26861572265625, + "learning_rate": 6.027536665088795e-07, + "loss": 16.6162, + "step": 428940 + }, + { + "epoch": 0.8665061389722726, + "grad_norm": 681.3436889648438, + "learning_rate": 6.025875244513824e-07, + "loss": 22.1219, + "step": 428950 + }, + { + "epoch": 0.8665263396049564, + "grad_norm": 241.8258514404297, + "learning_rate": 6.024214038263415e-07, + "loss": 17.739, + "step": 428960 + }, + { + "epoch": 0.8665465402376402, + "grad_norm": 483.76239013671875, + "learning_rate": 6.022553046345647e-07, + "loss": 14.6526, + "step": 428970 + }, + { + "epoch": 0.866566740870324, + "grad_norm": 263.0108947753906, + "learning_rate": 6.020892268768619e-07, + "loss": 15.652, + "step": 428980 + }, + { + "epoch": 0.8665869415030079, + "grad_norm": 369.8650817871094, + "learning_rate": 6.019231705540435e-07, + "loss": 28.1917, + "step": 428990 + }, + { + "epoch": 0.8666071421356917, + "grad_norm": 325.9433288574219, + "learning_rate": 6.017571356669183e-07, + "loss": 18.0282, + "step": 429000 + }, + { + "epoch": 0.8666273427683755, + "grad_norm": 964.6558227539062, + "learning_rate": 6.015911222162946e-07, + "loss": 30.9763, + "step": 429010 + }, + { + "epoch": 0.8666475434010593, + "grad_norm": 114.1938705444336, + "learning_rate": 6.014251302029817e-07, + "loss": 14.6377, + "step": 429020 + }, + { + "epoch": 0.8666677440337431, + "grad_norm": 836.9354248046875, + "learning_rate": 6.012591596277906e-07, + "loss": 24.4932, + "step": 429030 + }, + { + "epoch": 0.866687944666427, + "grad_norm": 426.4087219238281, + "learning_rate": 6.01093210491529e-07, + "loss": 12.1576, + "step": 429040 + }, + { + "epoch": 0.8667081452991108, + "grad_norm": 317.5310974121094, + "learning_rate": 6.009272827950042e-07, + "loss": 24.4424, + "step": 429050 + }, + { + "epoch": 0.8667283459317946, + "grad_norm": 448.1875305175781, + "learning_rate": 6.007613765390274e-07, + "loss": 19.8337, + "step": 429060 + }, + { + "epoch": 0.8667485465644784, + "grad_norm": 399.0731201171875, + "learning_rate": 6.005954917244062e-07, + "loss": 19.61, + "step": 429070 + }, + { + "epoch": 0.8667687471971622, + "grad_norm": 395.09912109375, + "learning_rate": 6.004296283519478e-07, + "loss": 12.0017, + "step": 429080 + }, + { + "epoch": 0.8667889478298461, + "grad_norm": 44.819297790527344, + "learning_rate": 6.002637864224631e-07, + "loss": 23.6252, + "step": 429090 + }, + { + "epoch": 0.8668091484625299, + "grad_norm": 1196.097412109375, + "learning_rate": 6.000979659367579e-07, + "loss": 29.9458, + "step": 429100 + }, + { + "epoch": 0.8668293490952137, + "grad_norm": 607.6129150390625, + "learning_rate": 5.999321668956425e-07, + "loss": 33.875, + "step": 429110 + }, + { + "epoch": 0.8668495497278975, + "grad_norm": 7.330821990966797, + "learning_rate": 5.997663892999239e-07, + "loss": 15.4908, + "step": 429120 + }, + { + "epoch": 0.8668697503605813, + "grad_norm": 38.274208068847656, + "learning_rate": 5.996006331504095e-07, + "loss": 20.6134, + "step": 429130 + }, + { + "epoch": 0.8668899509932652, + "grad_norm": 188.052734375, + "learning_rate": 5.994348984479092e-07, + "loss": 9.7485, + "step": 429140 + }, + { + "epoch": 0.866910151625949, + "grad_norm": 266.77996826171875, + "learning_rate": 5.992691851932292e-07, + "loss": 15.0421, + "step": 429150 + }, + { + "epoch": 0.8669303522586327, + "grad_norm": 796.673095703125, + "learning_rate": 5.991034933871764e-07, + "loss": 26.3724, + "step": 429160 + }, + { + "epoch": 0.8669505528913165, + "grad_norm": 565.3130493164062, + "learning_rate": 5.989378230305592e-07, + "loss": 16.5069, + "step": 429170 + }, + { + "epoch": 0.8669707535240003, + "grad_norm": 660.585205078125, + "learning_rate": 5.987721741241864e-07, + "loss": 23.7941, + "step": 429180 + }, + { + "epoch": 0.8669909541566841, + "grad_norm": 213.3594512939453, + "learning_rate": 5.986065466688645e-07, + "loss": 8.1154, + "step": 429190 + }, + { + "epoch": 0.867011154789368, + "grad_norm": 346.75775146484375, + "learning_rate": 5.98440940665399e-07, + "loss": 6.3781, + "step": 429200 + }, + { + "epoch": 0.8670313554220518, + "grad_norm": 371.4981994628906, + "learning_rate": 5.982753561145999e-07, + "loss": 12.4109, + "step": 429210 + }, + { + "epoch": 0.8670515560547356, + "grad_norm": 529.7020263671875, + "learning_rate": 5.981097930172725e-07, + "loss": 12.5239, + "step": 429220 + }, + { + "epoch": 0.8670717566874194, + "grad_norm": 96.09205627441406, + "learning_rate": 5.979442513742234e-07, + "loss": 16.9727, + "step": 429230 + }, + { + "epoch": 0.8670919573201032, + "grad_norm": 101.05036926269531, + "learning_rate": 5.977787311862598e-07, + "loss": 29.9755, + "step": 429240 + }, + { + "epoch": 0.8671121579527871, + "grad_norm": 340.5410461425781, + "learning_rate": 5.9761323245419e-07, + "loss": 13.352, + "step": 429250 + }, + { + "epoch": 0.8671323585854709, + "grad_norm": 692.7017211914062, + "learning_rate": 5.974477551788194e-07, + "loss": 17.2747, + "step": 429260 + }, + { + "epoch": 0.8671525592181547, + "grad_norm": 6.047051429748535, + "learning_rate": 5.972822993609534e-07, + "loss": 20.4808, + "step": 429270 + }, + { + "epoch": 0.8671727598508385, + "grad_norm": 296.6640319824219, + "learning_rate": 5.971168650014008e-07, + "loss": 16.523, + "step": 429280 + }, + { + "epoch": 0.8671929604835223, + "grad_norm": 690.8693237304688, + "learning_rate": 5.969514521009662e-07, + "loss": 17.6326, + "step": 429290 + }, + { + "epoch": 0.8672131611162062, + "grad_norm": 161.1558837890625, + "learning_rate": 5.967860606604553e-07, + "loss": 9.8164, + "step": 429300 + }, + { + "epoch": 0.86723336174889, + "grad_norm": 797.1126708984375, + "learning_rate": 5.966206906806748e-07, + "loss": 21.5301, + "step": 429310 + }, + { + "epoch": 0.8672535623815738, + "grad_norm": 326.7194519042969, + "learning_rate": 5.964553421624325e-07, + "loss": 11.2839, + "step": 429320 + }, + { + "epoch": 0.8672737630142576, + "grad_norm": 360.45458984375, + "learning_rate": 5.962900151065326e-07, + "loss": 13.3711, + "step": 429330 + }, + { + "epoch": 0.8672939636469414, + "grad_norm": 225.3975372314453, + "learning_rate": 5.961247095137795e-07, + "loss": 11.2614, + "step": 429340 + }, + { + "epoch": 0.8673141642796253, + "grad_norm": 301.3730163574219, + "learning_rate": 5.959594253849821e-07, + "loss": 21.4585, + "step": 429350 + }, + { + "epoch": 0.8673343649123091, + "grad_norm": 598.3665771484375, + "learning_rate": 5.95794162720944e-07, + "loss": 24.2816, + "step": 429360 + }, + { + "epoch": 0.8673545655449929, + "grad_norm": 180.07510375976562, + "learning_rate": 5.956289215224703e-07, + "loss": 7.2624, + "step": 429370 + }, + { + "epoch": 0.8673747661776767, + "grad_norm": 148.77638244628906, + "learning_rate": 5.95463701790368e-07, + "loss": 10.5682, + "step": 429380 + }, + { + "epoch": 0.8673949668103605, + "grad_norm": 296.96978759765625, + "learning_rate": 5.9529850352544e-07, + "loss": 5.4315, + "step": 429390 + }, + { + "epoch": 0.8674151674430444, + "grad_norm": 186.8556365966797, + "learning_rate": 5.951333267284942e-07, + "loss": 17.682, + "step": 429400 + }, + { + "epoch": 0.8674353680757282, + "grad_norm": 472.2164306640625, + "learning_rate": 5.949681714003347e-07, + "loss": 16.0475, + "step": 429410 + }, + { + "epoch": 0.8674555687084119, + "grad_norm": 502.9358215332031, + "learning_rate": 5.948030375417646e-07, + "loss": 18.0679, + "step": 429420 + }, + { + "epoch": 0.8674757693410957, + "grad_norm": 209.30079650878906, + "learning_rate": 5.946379251535911e-07, + "loss": 15.7121, + "step": 429430 + }, + { + "epoch": 0.8674959699737795, + "grad_norm": 0.22631804645061493, + "learning_rate": 5.944728342366179e-07, + "loss": 18.109, + "step": 429440 + }, + { + "epoch": 0.8675161706064634, + "grad_norm": 458.28662109375, + "learning_rate": 5.943077647916496e-07, + "loss": 13.4672, + "step": 429450 + }, + { + "epoch": 0.8675363712391472, + "grad_norm": 349.9730529785156, + "learning_rate": 5.941427168194902e-07, + "loss": 21.1921, + "step": 429460 + }, + { + "epoch": 0.867556571871831, + "grad_norm": 614.0407104492188, + "learning_rate": 5.93977690320946e-07, + "loss": 22.7234, + "step": 429470 + }, + { + "epoch": 0.8675767725045148, + "grad_norm": 400.67938232421875, + "learning_rate": 5.938126852968201e-07, + "loss": 11.0473, + "step": 429480 + }, + { + "epoch": 0.8675969731371986, + "grad_norm": 653.5919799804688, + "learning_rate": 5.936477017479158e-07, + "loss": 12.628, + "step": 429490 + }, + { + "epoch": 0.8676171737698825, + "grad_norm": 406.5254211425781, + "learning_rate": 5.934827396750392e-07, + "loss": 29.1455, + "step": 429500 + }, + { + "epoch": 0.8676373744025663, + "grad_norm": 265.0135498046875, + "learning_rate": 5.933177990789934e-07, + "loss": 12.9955, + "step": 429510 + }, + { + "epoch": 0.8676575750352501, + "grad_norm": 358.8462829589844, + "learning_rate": 5.931528799605813e-07, + "loss": 20.3956, + "step": 429520 + }, + { + "epoch": 0.8676777756679339, + "grad_norm": 660.5332641601562, + "learning_rate": 5.92987982320607e-07, + "loss": 28.7643, + "step": 429530 + }, + { + "epoch": 0.8676979763006177, + "grad_norm": 831.7608642578125, + "learning_rate": 5.928231061598772e-07, + "loss": 22.3911, + "step": 429540 + }, + { + "epoch": 0.8677181769333016, + "grad_norm": 287.30059814453125, + "learning_rate": 5.926582514791912e-07, + "loss": 15.9544, + "step": 429550 + }, + { + "epoch": 0.8677383775659854, + "grad_norm": 534.867431640625, + "learning_rate": 5.92493418279354e-07, + "loss": 17.9074, + "step": 429560 + }, + { + "epoch": 0.8677585781986692, + "grad_norm": 2.071526288986206, + "learning_rate": 5.923286065611705e-07, + "loss": 10.6438, + "step": 429570 + }, + { + "epoch": 0.867778778831353, + "grad_norm": 794.7091674804688, + "learning_rate": 5.921638163254423e-07, + "loss": 26.6991, + "step": 429580 + }, + { + "epoch": 0.8677989794640368, + "grad_norm": 532.3543701171875, + "learning_rate": 5.919990475729725e-07, + "loss": 20.9693, + "step": 429590 + }, + { + "epoch": 0.8678191800967207, + "grad_norm": 124.79682922363281, + "learning_rate": 5.918343003045656e-07, + "loss": 7.699, + "step": 429600 + }, + { + "epoch": 0.8678393807294045, + "grad_norm": 1302.444580078125, + "learning_rate": 5.916695745210238e-07, + "loss": 25.4062, + "step": 429610 + }, + { + "epoch": 0.8678595813620883, + "grad_norm": 697.5823364257812, + "learning_rate": 5.915048702231491e-07, + "loss": 18.373, + "step": 429620 + }, + { + "epoch": 0.8678797819947721, + "grad_norm": 262.98980712890625, + "learning_rate": 5.913401874117447e-07, + "loss": 13.0781, + "step": 429630 + }, + { + "epoch": 0.8678999826274559, + "grad_norm": 508.6786193847656, + "learning_rate": 5.911755260876145e-07, + "loss": 10.9307, + "step": 429640 + }, + { + "epoch": 0.8679201832601398, + "grad_norm": 2.2486164569854736, + "learning_rate": 5.910108862515596e-07, + "loss": 16.3299, + "step": 429650 + }, + { + "epoch": 0.8679403838928236, + "grad_norm": 539.4609375, + "learning_rate": 5.908462679043825e-07, + "loss": 18.2002, + "step": 429660 + }, + { + "epoch": 0.8679605845255073, + "grad_norm": 812.7120971679688, + "learning_rate": 5.906816710468866e-07, + "loss": 17.6814, + "step": 429670 + }, + { + "epoch": 0.8679807851581911, + "grad_norm": 134.23193359375, + "learning_rate": 5.905170956798739e-07, + "loss": 18.2646, + "step": 429680 + }, + { + "epoch": 0.8680009857908749, + "grad_norm": 302.33538818359375, + "learning_rate": 5.903525418041445e-07, + "loss": 19.6878, + "step": 429690 + }, + { + "epoch": 0.8680211864235587, + "grad_norm": 690.2728881835938, + "learning_rate": 5.901880094205037e-07, + "loss": 27.2643, + "step": 429700 + }, + { + "epoch": 0.8680413870562426, + "grad_norm": 243.7892303466797, + "learning_rate": 5.900234985297498e-07, + "loss": 16.3223, + "step": 429710 + }, + { + "epoch": 0.8680615876889264, + "grad_norm": 343.6305236816406, + "learning_rate": 5.898590091326884e-07, + "loss": 12.9504, + "step": 429720 + }, + { + "epoch": 0.8680817883216102, + "grad_norm": 401.50335693359375, + "learning_rate": 5.896945412301186e-07, + "loss": 18.5425, + "step": 429730 + }, + { + "epoch": 0.868101988954294, + "grad_norm": 463.09027099609375, + "learning_rate": 5.895300948228421e-07, + "loss": 18.9193, + "step": 429740 + }, + { + "epoch": 0.8681221895869778, + "grad_norm": 31.94213104248047, + "learning_rate": 5.893656699116618e-07, + "loss": 12.961, + "step": 429750 + }, + { + "epoch": 0.8681423902196617, + "grad_norm": 168.0640106201172, + "learning_rate": 5.892012664973784e-07, + "loss": 15.7568, + "step": 429760 + }, + { + "epoch": 0.8681625908523455, + "grad_norm": 18.695362091064453, + "learning_rate": 5.89036884580792e-07, + "loss": 10.4548, + "step": 429770 + }, + { + "epoch": 0.8681827914850293, + "grad_norm": 192.1884307861328, + "learning_rate": 5.888725241627047e-07, + "loss": 26.1267, + "step": 429780 + }, + { + "epoch": 0.8682029921177131, + "grad_norm": 971.8569946289062, + "learning_rate": 5.887081852439186e-07, + "loss": 15.6991, + "step": 429790 + }, + { + "epoch": 0.8682231927503969, + "grad_norm": 236.9096221923828, + "learning_rate": 5.885438678252342e-07, + "loss": 8.6657, + "step": 429800 + }, + { + "epoch": 0.8682433933830808, + "grad_norm": 144.9537811279297, + "learning_rate": 5.883795719074509e-07, + "loss": 14.095, + "step": 429810 + }, + { + "epoch": 0.8682635940157646, + "grad_norm": 165.6309814453125, + "learning_rate": 5.882152974913713e-07, + "loss": 11.5047, + "step": 429820 + }, + { + "epoch": 0.8682837946484484, + "grad_norm": 436.7717590332031, + "learning_rate": 5.880510445777954e-07, + "loss": 26.6042, + "step": 429830 + }, + { + "epoch": 0.8683039952811322, + "grad_norm": 427.2652893066406, + "learning_rate": 5.878868131675225e-07, + "loss": 11.8873, + "step": 429840 + }, + { + "epoch": 0.868324195913816, + "grad_norm": 63.67523956298828, + "learning_rate": 5.877226032613542e-07, + "loss": 13.2029, + "step": 429850 + }, + { + "epoch": 0.8683443965464999, + "grad_norm": 112.3509292602539, + "learning_rate": 5.875584148600916e-07, + "loss": 12.0538, + "step": 429860 + }, + { + "epoch": 0.8683645971791837, + "grad_norm": 80.30730438232422, + "learning_rate": 5.873942479645345e-07, + "loss": 17.1, + "step": 429870 + }, + { + "epoch": 0.8683847978118675, + "grad_norm": 356.8930358886719, + "learning_rate": 5.872301025754812e-07, + "loss": 11.842, + "step": 429880 + }, + { + "epoch": 0.8684049984445513, + "grad_norm": 548.4095458984375, + "learning_rate": 5.870659786937344e-07, + "loss": 18.5002, + "step": 429890 + }, + { + "epoch": 0.8684251990772351, + "grad_norm": 45.01439666748047, + "learning_rate": 5.869018763200929e-07, + "loss": 22.6678, + "step": 429900 + }, + { + "epoch": 0.868445399709919, + "grad_norm": 42.51400375366211, + "learning_rate": 5.867377954553555e-07, + "loss": 31.7126, + "step": 429910 + }, + { + "epoch": 0.8684656003426028, + "grad_norm": 472.7746276855469, + "learning_rate": 5.865737361003226e-07, + "loss": 17.5104, + "step": 429920 + }, + { + "epoch": 0.8684858009752865, + "grad_norm": 214.0835723876953, + "learning_rate": 5.864096982557949e-07, + "loss": 18.531, + "step": 429930 + }, + { + "epoch": 0.8685060016079703, + "grad_norm": 1023.9053344726562, + "learning_rate": 5.862456819225715e-07, + "loss": 21.1408, + "step": 429940 + }, + { + "epoch": 0.8685262022406541, + "grad_norm": 826.1692504882812, + "learning_rate": 5.860816871014496e-07, + "loss": 20.2905, + "step": 429950 + }, + { + "epoch": 0.868546402873338, + "grad_norm": 303.5501403808594, + "learning_rate": 5.859177137932315e-07, + "loss": 20.0418, + "step": 429960 + }, + { + "epoch": 0.8685666035060218, + "grad_norm": 949.9374389648438, + "learning_rate": 5.857537619987152e-07, + "loss": 16.031, + "step": 429970 + }, + { + "epoch": 0.8685868041387056, + "grad_norm": 184.94468688964844, + "learning_rate": 5.855898317186992e-07, + "loss": 13.5504, + "step": 429980 + }, + { + "epoch": 0.8686070047713894, + "grad_norm": 224.17349243164062, + "learning_rate": 5.854259229539833e-07, + "loss": 20.099, + "step": 429990 + }, + { + "epoch": 0.8686272054040732, + "grad_norm": 571.5230102539062, + "learning_rate": 5.852620357053651e-07, + "loss": 20.4905, + "step": 430000 + }, + { + "epoch": 0.868647406036757, + "grad_norm": 362.5823669433594, + "learning_rate": 5.850981699736453e-07, + "loss": 31.167, + "step": 430010 + }, + { + "epoch": 0.8686676066694409, + "grad_norm": 429.9981384277344, + "learning_rate": 5.849343257596218e-07, + "loss": 16.7991, + "step": 430020 + }, + { + "epoch": 0.8686878073021247, + "grad_norm": 447.98236083984375, + "learning_rate": 5.847705030640915e-07, + "loss": 23.4799, + "step": 430030 + }, + { + "epoch": 0.8687080079348085, + "grad_norm": 339.2086486816406, + "learning_rate": 5.84606701887856e-07, + "loss": 14.5216, + "step": 430040 + }, + { + "epoch": 0.8687282085674923, + "grad_norm": 9.948432922363281, + "learning_rate": 5.844429222317111e-07, + "loss": 4.6542, + "step": 430050 + }, + { + "epoch": 0.8687484092001762, + "grad_norm": 334.28228759765625, + "learning_rate": 5.842791640964551e-07, + "loss": 22.8723, + "step": 430060 + }, + { + "epoch": 0.86876860983286, + "grad_norm": 188.88925170898438, + "learning_rate": 5.841154274828869e-07, + "loss": 14.7107, + "step": 430070 + }, + { + "epoch": 0.8687888104655438, + "grad_norm": 496.1181335449219, + "learning_rate": 5.839517123918059e-07, + "loss": 18.6788, + "step": 430080 + }, + { + "epoch": 0.8688090110982276, + "grad_norm": 227.1697235107422, + "learning_rate": 5.83788018824008e-07, + "loss": 10.2072, + "step": 430090 + }, + { + "epoch": 0.8688292117309114, + "grad_norm": 239.81246948242188, + "learning_rate": 5.836243467802915e-07, + "loss": 15.5513, + "step": 430100 + }, + { + "epoch": 0.8688494123635953, + "grad_norm": 183.52682495117188, + "learning_rate": 5.834606962614548e-07, + "loss": 6.584, + "step": 430110 + }, + { + "epoch": 0.8688696129962791, + "grad_norm": 421.7367858886719, + "learning_rate": 5.832970672682948e-07, + "loss": 30.8429, + "step": 430120 + }, + { + "epoch": 0.8688898136289629, + "grad_norm": 890.8756103515625, + "learning_rate": 5.831334598016086e-07, + "loss": 21.5495, + "step": 430130 + }, + { + "epoch": 0.8689100142616467, + "grad_norm": 315.5503234863281, + "learning_rate": 5.829698738621941e-07, + "loss": 21.7944, + "step": 430140 + }, + { + "epoch": 0.8689302148943305, + "grad_norm": 303.3395080566406, + "learning_rate": 5.828063094508507e-07, + "loss": 22.6338, + "step": 430150 + }, + { + "epoch": 0.8689504155270144, + "grad_norm": 297.9224853515625, + "learning_rate": 5.826427665683715e-07, + "loss": 8.8286, + "step": 430160 + }, + { + "epoch": 0.8689706161596982, + "grad_norm": 394.8922119140625, + "learning_rate": 5.824792452155558e-07, + "loss": 15.724, + "step": 430170 + }, + { + "epoch": 0.868990816792382, + "grad_norm": 441.8057556152344, + "learning_rate": 5.823157453932015e-07, + "loss": 26.8445, + "step": 430180 + }, + { + "epoch": 0.8690110174250657, + "grad_norm": 26.113948822021484, + "learning_rate": 5.821522671021041e-07, + "loss": 20.7014, + "step": 430190 + }, + { + "epoch": 0.8690312180577495, + "grad_norm": 501.9127197265625, + "learning_rate": 5.819888103430598e-07, + "loss": 19.1681, + "step": 430200 + }, + { + "epoch": 0.8690514186904333, + "grad_norm": 878.7197875976562, + "learning_rate": 5.818253751168679e-07, + "loss": 17.2247, + "step": 430210 + }, + { + "epoch": 0.8690716193231172, + "grad_norm": 168.92588806152344, + "learning_rate": 5.816619614243224e-07, + "loss": 22.1551, + "step": 430220 + }, + { + "epoch": 0.869091819955801, + "grad_norm": 309.08953857421875, + "learning_rate": 5.814985692662201e-07, + "loss": 31.1003, + "step": 430230 + }, + { + "epoch": 0.8691120205884848, + "grad_norm": 260.6549072265625, + "learning_rate": 5.81335198643358e-07, + "loss": 12.7556, + "step": 430240 + }, + { + "epoch": 0.8691322212211686, + "grad_norm": 285.0781555175781, + "learning_rate": 5.811718495565327e-07, + "loss": 14.1524, + "step": 430250 + }, + { + "epoch": 0.8691524218538524, + "grad_norm": 851.3721923828125, + "learning_rate": 5.810085220065404e-07, + "loss": 42.9282, + "step": 430260 + }, + { + "epoch": 0.8691726224865363, + "grad_norm": 672.4266967773438, + "learning_rate": 5.808452159941752e-07, + "loss": 14.6863, + "step": 430270 + }, + { + "epoch": 0.8691928231192201, + "grad_norm": 127.9519271850586, + "learning_rate": 5.806819315202361e-07, + "loss": 15.2976, + "step": 430280 + }, + { + "epoch": 0.8692130237519039, + "grad_norm": 315.2915954589844, + "learning_rate": 5.805186685855163e-07, + "loss": 8.6507, + "step": 430290 + }, + { + "epoch": 0.8692332243845877, + "grad_norm": 301.9071960449219, + "learning_rate": 5.803554271908124e-07, + "loss": 15.8969, + "step": 430300 + }, + { + "epoch": 0.8692534250172715, + "grad_norm": 345.7572326660156, + "learning_rate": 5.801922073369203e-07, + "loss": 14.5178, + "step": 430310 + }, + { + "epoch": 0.8692736256499554, + "grad_norm": 429.4742431640625, + "learning_rate": 5.800290090246346e-07, + "loss": 12.8204, + "step": 430320 + }, + { + "epoch": 0.8692938262826392, + "grad_norm": 374.86956787109375, + "learning_rate": 5.798658322547529e-07, + "loss": 16.9373, + "step": 430330 + }, + { + "epoch": 0.869314026915323, + "grad_norm": 175.72686767578125, + "learning_rate": 5.797026770280683e-07, + "loss": 7.0061, + "step": 430340 + }, + { + "epoch": 0.8693342275480068, + "grad_norm": 4.458624362945557, + "learning_rate": 5.795395433453765e-07, + "loss": 14.7271, + "step": 430350 + }, + { + "epoch": 0.8693544281806906, + "grad_norm": 185.01858520507812, + "learning_rate": 5.793764312074735e-07, + "loss": 17.3475, + "step": 430360 + }, + { + "epoch": 0.8693746288133745, + "grad_norm": 254.61776733398438, + "learning_rate": 5.792133406151523e-07, + "loss": 12.7682, + "step": 430370 + }, + { + "epoch": 0.8693948294460583, + "grad_norm": 371.08416748046875, + "learning_rate": 5.790502715692104e-07, + "loss": 23.1745, + "step": 430380 + }, + { + "epoch": 0.8694150300787421, + "grad_norm": 311.84423828125, + "learning_rate": 5.788872240704402e-07, + "loss": 13.9985, + "step": 430390 + }, + { + "epoch": 0.8694352307114259, + "grad_norm": 510.8533935546875, + "learning_rate": 5.787241981196384e-07, + "loss": 25.0979, + "step": 430400 + }, + { + "epoch": 0.8694554313441097, + "grad_norm": 385.23028564453125, + "learning_rate": 5.785611937175989e-07, + "loss": 11.1401, + "step": 430410 + }, + { + "epoch": 0.8694756319767936, + "grad_norm": 78.8631591796875, + "learning_rate": 5.783982108651148e-07, + "loss": 16.0758, + "step": 430420 + }, + { + "epoch": 0.8694958326094774, + "grad_norm": 550.5646362304688, + "learning_rate": 5.782352495629822e-07, + "loss": 24.8675, + "step": 430430 + }, + { + "epoch": 0.8695160332421611, + "grad_norm": 36.11176681518555, + "learning_rate": 5.780723098119951e-07, + "loss": 24.2342, + "step": 430440 + }, + { + "epoch": 0.8695362338748449, + "grad_norm": 163.89772033691406, + "learning_rate": 5.779093916129464e-07, + "loss": 19.7238, + "step": 430450 + }, + { + "epoch": 0.8695564345075287, + "grad_norm": 196.0900421142578, + "learning_rate": 5.777464949666306e-07, + "loss": 36.1687, + "step": 430460 + }, + { + "epoch": 0.8695766351402126, + "grad_norm": 170.99618530273438, + "learning_rate": 5.775836198738427e-07, + "loss": 48.7488, + "step": 430470 + }, + { + "epoch": 0.8695968357728964, + "grad_norm": 68.98126220703125, + "learning_rate": 5.774207663353765e-07, + "loss": 17.3575, + "step": 430480 + }, + { + "epoch": 0.8696170364055802, + "grad_norm": 196.4060516357422, + "learning_rate": 5.772579343520241e-07, + "loss": 24.434, + "step": 430490 + }, + { + "epoch": 0.869637237038264, + "grad_norm": 221.60061645507812, + "learning_rate": 5.770951239245803e-07, + "loss": 21.6284, + "step": 430500 + }, + { + "epoch": 0.8696574376709478, + "grad_norm": 758.8854370117188, + "learning_rate": 5.769323350538391e-07, + "loss": 28.3884, + "step": 430510 + }, + { + "epoch": 0.8696776383036317, + "grad_norm": 156.29849243164062, + "learning_rate": 5.767695677405921e-07, + "loss": 21.5101, + "step": 430520 + }, + { + "epoch": 0.8696978389363155, + "grad_norm": 207.89292907714844, + "learning_rate": 5.766068219856341e-07, + "loss": 27.7899, + "step": 430530 + }, + { + "epoch": 0.8697180395689993, + "grad_norm": 300.2975158691406, + "learning_rate": 5.764440977897584e-07, + "loss": 21.9585, + "step": 430540 + }, + { + "epoch": 0.8697382402016831, + "grad_norm": 296.90606689453125, + "learning_rate": 5.762813951537582e-07, + "loss": 12.1937, + "step": 430550 + }, + { + "epoch": 0.8697584408343669, + "grad_norm": 179.23008728027344, + "learning_rate": 5.761187140784247e-07, + "loss": 13.7425, + "step": 430560 + }, + { + "epoch": 0.8697786414670508, + "grad_norm": 180.9647216796875, + "learning_rate": 5.759560545645527e-07, + "loss": 17.7472, + "step": 430570 + }, + { + "epoch": 0.8697988420997346, + "grad_norm": 295.97515869140625, + "learning_rate": 5.757934166129347e-07, + "loss": 11.9401, + "step": 430580 + }, + { + "epoch": 0.8698190427324184, + "grad_norm": 453.92840576171875, + "learning_rate": 5.756308002243622e-07, + "loss": 25.4231, + "step": 430590 + }, + { + "epoch": 0.8698392433651022, + "grad_norm": 279.5281066894531, + "learning_rate": 5.754682053996291e-07, + "loss": 13.0344, + "step": 430600 + }, + { + "epoch": 0.869859443997786, + "grad_norm": 364.5643005371094, + "learning_rate": 5.753056321395267e-07, + "loss": 13.5878, + "step": 430610 + }, + { + "epoch": 0.8698796446304699, + "grad_norm": 478.2242126464844, + "learning_rate": 5.751430804448488e-07, + "loss": 17.6104, + "step": 430620 + }, + { + "epoch": 0.8698998452631537, + "grad_norm": 193.85586547851562, + "learning_rate": 5.749805503163869e-07, + "loss": 10.5943, + "step": 430630 + }, + { + "epoch": 0.8699200458958375, + "grad_norm": 67.99600982666016, + "learning_rate": 5.748180417549321e-07, + "loss": 10.1801, + "step": 430640 + }, + { + "epoch": 0.8699402465285213, + "grad_norm": 151.18943786621094, + "learning_rate": 5.746555547612781e-07, + "loss": 24.1284, + "step": 430650 + }, + { + "epoch": 0.8699604471612051, + "grad_norm": 712.3665161132812, + "learning_rate": 5.744930893362166e-07, + "loss": 24.4889, + "step": 430660 + }, + { + "epoch": 0.869980647793889, + "grad_norm": 426.7364807128906, + "learning_rate": 5.74330645480538e-07, + "loss": 16.0659, + "step": 430670 + }, + { + "epoch": 0.8700008484265728, + "grad_norm": 322.21551513671875, + "learning_rate": 5.741682231950351e-07, + "loss": 21.5867, + "step": 430680 + }, + { + "epoch": 0.8700210490592566, + "grad_norm": 193.04405212402344, + "learning_rate": 5.740058224805001e-07, + "loss": 29.5392, + "step": 430690 + }, + { + "epoch": 0.8700412496919403, + "grad_norm": 57.63292694091797, + "learning_rate": 5.738434433377244e-07, + "loss": 18.641, + "step": 430700 + }, + { + "epoch": 0.8700614503246241, + "grad_norm": 256.9406433105469, + "learning_rate": 5.736810857674979e-07, + "loss": 30.6381, + "step": 430710 + }, + { + "epoch": 0.8700816509573079, + "grad_norm": 812.01513671875, + "learning_rate": 5.735187497706135e-07, + "loss": 25.1458, + "step": 430720 + }, + { + "epoch": 0.8701018515899918, + "grad_norm": 382.5735168457031, + "learning_rate": 5.733564353478622e-07, + "loss": 13.3199, + "step": 430730 + }, + { + "epoch": 0.8701220522226756, + "grad_norm": 0.0, + "learning_rate": 5.731941425000337e-07, + "loss": 25.8635, + "step": 430740 + }, + { + "epoch": 0.8701422528553594, + "grad_norm": 259.4105529785156, + "learning_rate": 5.730318712279203e-07, + "loss": 7.6119, + "step": 430750 + }, + { + "epoch": 0.8701624534880432, + "grad_norm": 455.6169738769531, + "learning_rate": 5.728696215323143e-07, + "loss": 13.8466, + "step": 430760 + }, + { + "epoch": 0.870182654120727, + "grad_norm": 10.431163787841797, + "learning_rate": 5.727073934140026e-07, + "loss": 17.4464, + "step": 430770 + }, + { + "epoch": 0.8702028547534109, + "grad_norm": 20.1141414642334, + "learning_rate": 5.725451868737786e-07, + "loss": 10.5243, + "step": 430780 + }, + { + "epoch": 0.8702230553860947, + "grad_norm": 79.7918930053711, + "learning_rate": 5.723830019124332e-07, + "loss": 17.1926, + "step": 430790 + }, + { + "epoch": 0.8702432560187785, + "grad_norm": 243.2442169189453, + "learning_rate": 5.722208385307559e-07, + "loss": 11.0174, + "step": 430800 + }, + { + "epoch": 0.8702634566514623, + "grad_norm": 311.8316650390625, + "learning_rate": 5.720586967295366e-07, + "loss": 10.5594, + "step": 430810 + }, + { + "epoch": 0.8702836572841461, + "grad_norm": 1566.53173828125, + "learning_rate": 5.718965765095669e-07, + "loss": 18.4191, + "step": 430820 + }, + { + "epoch": 0.87030385791683, + "grad_norm": 16.48854637145996, + "learning_rate": 5.717344778716361e-07, + "loss": 10.0563, + "step": 430830 + }, + { + "epoch": 0.8703240585495138, + "grad_norm": 369.173095703125, + "learning_rate": 5.715724008165335e-07, + "loss": 18.6129, + "step": 430840 + }, + { + "epoch": 0.8703442591821976, + "grad_norm": 287.1148376464844, + "learning_rate": 5.714103453450498e-07, + "loss": 17.074, + "step": 430850 + }, + { + "epoch": 0.8703644598148814, + "grad_norm": 375.2651062011719, + "learning_rate": 5.712483114579758e-07, + "loss": 22.1905, + "step": 430860 + }, + { + "epoch": 0.8703846604475652, + "grad_norm": 144.95962524414062, + "learning_rate": 5.710862991561006e-07, + "loss": 13.5704, + "step": 430870 + }, + { + "epoch": 0.8704048610802491, + "grad_norm": 589.4137573242188, + "learning_rate": 5.709243084402128e-07, + "loss": 14.086, + "step": 430880 + }, + { + "epoch": 0.8704250617129329, + "grad_norm": 4.6374192237854, + "learning_rate": 5.70762339311104e-07, + "loss": 17.3641, + "step": 430890 + }, + { + "epoch": 0.8704452623456167, + "grad_norm": 481.83892822265625, + "learning_rate": 5.706003917695619e-07, + "loss": 26.1069, + "step": 430900 + }, + { + "epoch": 0.8704654629783005, + "grad_norm": 828.807861328125, + "learning_rate": 5.704384658163748e-07, + "loss": 20.8426, + "step": 430910 + }, + { + "epoch": 0.8704856636109843, + "grad_norm": 578.0156860351562, + "learning_rate": 5.702765614523354e-07, + "loss": 27.7354, + "step": 430920 + }, + { + "epoch": 0.8705058642436682, + "grad_norm": 473.4757995605469, + "learning_rate": 5.701146786782291e-07, + "loss": 22.8459, + "step": 430930 + }, + { + "epoch": 0.870526064876352, + "grad_norm": 713.3866577148438, + "learning_rate": 5.699528174948477e-07, + "loss": 103.8826, + "step": 430940 + }, + { + "epoch": 0.8705462655090357, + "grad_norm": 761.142578125, + "learning_rate": 5.697909779029786e-07, + "loss": 14.9914, + "step": 430950 + }, + { + "epoch": 0.8705664661417195, + "grad_norm": 440.9145202636719, + "learning_rate": 5.696291599034104e-07, + "loss": 37.4724, + "step": 430960 + }, + { + "epoch": 0.8705866667744033, + "grad_norm": 239.36610412597656, + "learning_rate": 5.694673634969334e-07, + "loss": 9.299, + "step": 430970 + }, + { + "epoch": 0.8706068674070871, + "grad_norm": 616.1082763671875, + "learning_rate": 5.693055886843341e-07, + "loss": 17.9193, + "step": 430980 + }, + { + "epoch": 0.870627068039771, + "grad_norm": 220.93699645996094, + "learning_rate": 5.691438354664031e-07, + "loss": 25.5439, + "step": 430990 + }, + { + "epoch": 0.8706472686724548, + "grad_norm": 50.125919342041016, + "learning_rate": 5.689821038439264e-07, + "loss": 22.6964, + "step": 431000 + }, + { + "epoch": 0.8706674693051386, + "grad_norm": 86.68278503417969, + "learning_rate": 5.688203938176945e-07, + "loss": 18.2718, + "step": 431010 + }, + { + "epoch": 0.8706876699378224, + "grad_norm": 283.8215026855469, + "learning_rate": 5.686587053884946e-07, + "loss": 10.0713, + "step": 431020 + }, + { + "epoch": 0.8707078705705062, + "grad_norm": 289.13702392578125, + "learning_rate": 5.684970385571137e-07, + "loss": 12.529, + "step": 431030 + }, + { + "epoch": 0.8707280712031901, + "grad_norm": 564.9439697265625, + "learning_rate": 5.683353933243418e-07, + "loss": 27.3489, + "step": 431040 + }, + { + "epoch": 0.8707482718358739, + "grad_norm": 163.3947296142578, + "learning_rate": 5.681737696909656e-07, + "loss": 16.6905, + "step": 431050 + }, + { + "epoch": 0.8707684724685577, + "grad_norm": 573.1134643554688, + "learning_rate": 5.680121676577721e-07, + "loss": 13.4849, + "step": 431060 + }, + { + "epoch": 0.8707886731012415, + "grad_norm": 630.4998168945312, + "learning_rate": 5.678505872255496e-07, + "loss": 36.728, + "step": 431070 + }, + { + "epoch": 0.8708088737339253, + "grad_norm": 344.05316162109375, + "learning_rate": 5.676890283950881e-07, + "loss": 10.6528, + "step": 431080 + }, + { + "epoch": 0.8708290743666092, + "grad_norm": 374.6134948730469, + "learning_rate": 5.675274911671702e-07, + "loss": 14.498, + "step": 431090 + }, + { + "epoch": 0.870849274999293, + "grad_norm": 26.75878143310547, + "learning_rate": 5.673659755425859e-07, + "loss": 31.9518, + "step": 431100 + }, + { + "epoch": 0.8708694756319768, + "grad_norm": 45.3067512512207, + "learning_rate": 5.672044815221234e-07, + "loss": 17.2036, + "step": 431110 + }, + { + "epoch": 0.8708896762646606, + "grad_norm": 154.9571075439453, + "learning_rate": 5.670430091065682e-07, + "loss": 10.5864, + "step": 431120 + }, + { + "epoch": 0.8709098768973444, + "grad_norm": 662.7469482421875, + "learning_rate": 5.668815582967074e-07, + "loss": 17.816, + "step": 431130 + }, + { + "epoch": 0.8709300775300283, + "grad_norm": 295.2125244140625, + "learning_rate": 5.667201290933278e-07, + "loss": 9.9797, + "step": 431140 + }, + { + "epoch": 0.8709502781627121, + "grad_norm": 101.50379180908203, + "learning_rate": 5.665587214972173e-07, + "loss": 16.7628, + "step": 431150 + }, + { + "epoch": 0.8709704787953959, + "grad_norm": 238.20875549316406, + "learning_rate": 5.663973355091624e-07, + "loss": 22.1564, + "step": 431160 + }, + { + "epoch": 0.8709906794280797, + "grad_norm": 10.923376083374023, + "learning_rate": 5.662359711299481e-07, + "loss": 20.7295, + "step": 431170 + }, + { + "epoch": 0.8710108800607635, + "grad_norm": 430.8266906738281, + "learning_rate": 5.66074628360363e-07, + "loss": 13.5221, + "step": 431180 + }, + { + "epoch": 0.8710310806934474, + "grad_norm": 339.6048278808594, + "learning_rate": 5.659133072011919e-07, + "loss": 9.1787, + "step": 431190 + }, + { + "epoch": 0.8710512813261312, + "grad_norm": 364.2235412597656, + "learning_rate": 5.657520076532208e-07, + "loss": 23.0557, + "step": 431200 + }, + { + "epoch": 0.8710714819588149, + "grad_norm": 451.9517822265625, + "learning_rate": 5.655907297172375e-07, + "loss": 18.2305, + "step": 431210 + }, + { + "epoch": 0.8710916825914987, + "grad_norm": 389.908935546875, + "learning_rate": 5.654294733940263e-07, + "loss": 24.1222, + "step": 431220 + }, + { + "epoch": 0.8711118832241825, + "grad_norm": 146.44761657714844, + "learning_rate": 5.65268238684375e-07, + "loss": 11.2361, + "step": 431230 + }, + { + "epoch": 0.8711320838568664, + "grad_norm": 268.7914733886719, + "learning_rate": 5.651070255890689e-07, + "loss": 13.8313, + "step": 431240 + }, + { + "epoch": 0.8711522844895502, + "grad_norm": 70.17601013183594, + "learning_rate": 5.649458341088915e-07, + "loss": 12.3232, + "step": 431250 + }, + { + "epoch": 0.871172485122234, + "grad_norm": 253.52432250976562, + "learning_rate": 5.647846642446314e-07, + "loss": 23.8891, + "step": 431260 + }, + { + "epoch": 0.8711926857549178, + "grad_norm": 4.544342517852783, + "learning_rate": 5.646235159970731e-07, + "loss": 18.7502, + "step": 431270 + }, + { + "epoch": 0.8712128863876016, + "grad_norm": 505.71832275390625, + "learning_rate": 5.64462389367001e-07, + "loss": 11.3864, + "step": 431280 + }, + { + "epoch": 0.8712330870202855, + "grad_norm": 626.1432495117188, + "learning_rate": 5.64301284355201e-07, + "loss": 29.4871, + "step": 431290 + }, + { + "epoch": 0.8712532876529693, + "grad_norm": 345.0766296386719, + "learning_rate": 5.641402009624591e-07, + "loss": 15.6212, + "step": 431300 + }, + { + "epoch": 0.8712734882856531, + "grad_norm": 224.95736694335938, + "learning_rate": 5.639791391895605e-07, + "loss": 31.2785, + "step": 431310 + }, + { + "epoch": 0.8712936889183369, + "grad_norm": 168.00369262695312, + "learning_rate": 5.638180990372882e-07, + "loss": 14.0955, + "step": 431320 + }, + { + "epoch": 0.8713138895510207, + "grad_norm": 420.13336181640625, + "learning_rate": 5.6365708050643e-07, + "loss": 13.7055, + "step": 431330 + }, + { + "epoch": 0.8713340901837046, + "grad_norm": 414.8622131347656, + "learning_rate": 5.634960835977688e-07, + "loss": 13.434, + "step": 431340 + }, + { + "epoch": 0.8713542908163884, + "grad_norm": 77.1890869140625, + "learning_rate": 5.633351083120886e-07, + "loss": 15.1375, + "step": 431350 + }, + { + "epoch": 0.8713744914490722, + "grad_norm": 103.03490447998047, + "learning_rate": 5.631741546501746e-07, + "loss": 14.2117, + "step": 431360 + }, + { + "epoch": 0.871394692081756, + "grad_norm": 775.10791015625, + "learning_rate": 5.630132226128143e-07, + "loss": 14.5798, + "step": 431370 + }, + { + "epoch": 0.8714148927144398, + "grad_norm": 281.8716125488281, + "learning_rate": 5.628523122007867e-07, + "loss": 26.4327, + "step": 431380 + }, + { + "epoch": 0.8714350933471237, + "grad_norm": 1007.6710205078125, + "learning_rate": 5.626914234148794e-07, + "loss": 14.8941, + "step": 431390 + }, + { + "epoch": 0.8714552939798075, + "grad_norm": 223.94935607910156, + "learning_rate": 5.625305562558764e-07, + "loss": 19.0216, + "step": 431400 + }, + { + "epoch": 0.8714754946124913, + "grad_norm": 555.6204223632812, + "learning_rate": 5.623697107245619e-07, + "loss": 21.2055, + "step": 431410 + }, + { + "epoch": 0.8714956952451751, + "grad_norm": 259.8000183105469, + "learning_rate": 5.622088868217179e-07, + "loss": 10.3563, + "step": 431420 + }, + { + "epoch": 0.8715158958778589, + "grad_norm": 346.1632385253906, + "learning_rate": 5.620480845481291e-07, + "loss": 16.3341, + "step": 431430 + }, + { + "epoch": 0.8715360965105428, + "grad_norm": 343.7537536621094, + "learning_rate": 5.618873039045825e-07, + "loss": 17.5446, + "step": 431440 + }, + { + "epoch": 0.8715562971432266, + "grad_norm": 157.0694580078125, + "learning_rate": 5.617265448918563e-07, + "loss": 17.3536, + "step": 431450 + }, + { + "epoch": 0.8715764977759103, + "grad_norm": 279.52642822265625, + "learning_rate": 5.615658075107366e-07, + "loss": 23.2342, + "step": 431460 + }, + { + "epoch": 0.8715966984085941, + "grad_norm": 355.4528503417969, + "learning_rate": 5.614050917620084e-07, + "loss": 12.9763, + "step": 431470 + }, + { + "epoch": 0.8716168990412779, + "grad_norm": 355.9991760253906, + "learning_rate": 5.612443976464527e-07, + "loss": 16.9133, + "step": 431480 + }, + { + "epoch": 0.8716370996739617, + "grad_norm": 264.4765319824219, + "learning_rate": 5.610837251648532e-07, + "loss": 16.8319, + "step": 431490 + }, + { + "epoch": 0.8716573003066456, + "grad_norm": 707.2461547851562, + "learning_rate": 5.609230743179939e-07, + "loss": 14.6348, + "step": 431500 + }, + { + "epoch": 0.8716775009393294, + "grad_norm": 394.54473876953125, + "learning_rate": 5.607624451066568e-07, + "loss": 14.8244, + "step": 431510 + }, + { + "epoch": 0.8716977015720132, + "grad_norm": 259.0372314453125, + "learning_rate": 5.606018375316246e-07, + "loss": 12.0606, + "step": 431520 + }, + { + "epoch": 0.871717902204697, + "grad_norm": 168.06674194335938, + "learning_rate": 5.604412515936814e-07, + "loss": 19.1726, + "step": 431530 + }, + { + "epoch": 0.8717381028373808, + "grad_norm": 333.35650634765625, + "learning_rate": 5.602806872936079e-07, + "loss": 19.7599, + "step": 431540 + }, + { + "epoch": 0.8717583034700647, + "grad_norm": 696.573974609375, + "learning_rate": 5.601201446321891e-07, + "loss": 12.1974, + "step": 431550 + }, + { + "epoch": 0.8717785041027485, + "grad_norm": 419.63787841796875, + "learning_rate": 5.599596236102068e-07, + "loss": 19.8364, + "step": 431560 + }, + { + "epoch": 0.8717987047354323, + "grad_norm": 172.09666442871094, + "learning_rate": 5.597991242284407e-07, + "loss": 21.4207, + "step": 431570 + }, + { + "epoch": 0.8718189053681161, + "grad_norm": 339.2966613769531, + "learning_rate": 5.596386464876769e-07, + "loss": 22.9744, + "step": 431580 + }, + { + "epoch": 0.8718391060008, + "grad_norm": 554.4182739257812, + "learning_rate": 5.594781903886942e-07, + "loss": 17.6794, + "step": 431590 + }, + { + "epoch": 0.8718593066334838, + "grad_norm": 452.2076416015625, + "learning_rate": 5.593177559322776e-07, + "loss": 42.9131, + "step": 431600 + }, + { + "epoch": 0.8718795072661676, + "grad_norm": 175.66160583496094, + "learning_rate": 5.591573431192066e-07, + "loss": 16.0131, + "step": 431610 + }, + { + "epoch": 0.8718997078988514, + "grad_norm": 335.3212585449219, + "learning_rate": 5.589969519502652e-07, + "loss": 19.758, + "step": 431620 + }, + { + "epoch": 0.8719199085315352, + "grad_norm": 366.0373840332031, + "learning_rate": 5.588365824262343e-07, + "loss": 13.9156, + "step": 431630 + }, + { + "epoch": 0.871940109164219, + "grad_norm": 330.110107421875, + "learning_rate": 5.586762345478935e-07, + "loss": 12.1645, + "step": 431640 + }, + { + "epoch": 0.8719603097969029, + "grad_norm": 303.6749572753906, + "learning_rate": 5.585159083160274e-07, + "loss": 20.0114, + "step": 431650 + }, + { + "epoch": 0.8719805104295867, + "grad_norm": 408.7389221191406, + "learning_rate": 5.583556037314164e-07, + "loss": 9.4455, + "step": 431660 + }, + { + "epoch": 0.8720007110622705, + "grad_norm": 513.5108032226562, + "learning_rate": 5.581953207948404e-07, + "loss": 12.3123, + "step": 431670 + }, + { + "epoch": 0.8720209116949543, + "grad_norm": 308.0274658203125, + "learning_rate": 5.58035059507081e-07, + "loss": 11.2197, + "step": 431680 + }, + { + "epoch": 0.8720411123276381, + "grad_norm": 446.0270690917969, + "learning_rate": 5.578748198689226e-07, + "loss": 15.9516, + "step": 431690 + }, + { + "epoch": 0.872061312960322, + "grad_norm": 491.5663757324219, + "learning_rate": 5.577146018811419e-07, + "loss": 25.5518, + "step": 431700 + }, + { + "epoch": 0.8720815135930058, + "grad_norm": 377.6907958984375, + "learning_rate": 5.575544055445209e-07, + "loss": 18.1277, + "step": 431710 + }, + { + "epoch": 0.8721017142256895, + "grad_norm": 401.6126708984375, + "learning_rate": 5.573942308598418e-07, + "loss": 15.5451, + "step": 431720 + }, + { + "epoch": 0.8721219148583733, + "grad_norm": 740.29931640625, + "learning_rate": 5.572340778278845e-07, + "loss": 27.8595, + "step": 431730 + }, + { + "epoch": 0.8721421154910571, + "grad_norm": 167.87570190429688, + "learning_rate": 5.570739464494284e-07, + "loss": 24.6506, + "step": 431740 + }, + { + "epoch": 0.872162316123741, + "grad_norm": 236.99952697753906, + "learning_rate": 5.569138367252553e-07, + "loss": 7.629, + "step": 431750 + }, + { + "epoch": 0.8721825167564248, + "grad_norm": 101.96878051757812, + "learning_rate": 5.567537486561476e-07, + "loss": 10.9722, + "step": 431760 + }, + { + "epoch": 0.8722027173891086, + "grad_norm": 275.85247802734375, + "learning_rate": 5.565936822428808e-07, + "loss": 13.2331, + "step": 431770 + }, + { + "epoch": 0.8722229180217924, + "grad_norm": 294.6750183105469, + "learning_rate": 5.564336374862373e-07, + "loss": 13.1657, + "step": 431780 + }, + { + "epoch": 0.8722431186544762, + "grad_norm": 430.006103515625, + "learning_rate": 5.562736143869984e-07, + "loss": 15.3141, + "step": 431790 + }, + { + "epoch": 0.87226331928716, + "grad_norm": 288.45123291015625, + "learning_rate": 5.561136129459432e-07, + "loss": 11.9062, + "step": 431800 + }, + { + "epoch": 0.8722835199198439, + "grad_norm": 201.5088653564453, + "learning_rate": 5.559536331638498e-07, + "loss": 17.3725, + "step": 431810 + }, + { + "epoch": 0.8723037205525277, + "grad_norm": 431.68145751953125, + "learning_rate": 5.557936750415011e-07, + "loss": 11.8276, + "step": 431820 + }, + { + "epoch": 0.8723239211852115, + "grad_norm": 385.95745849609375, + "learning_rate": 5.556337385796734e-07, + "loss": 15.5291, + "step": 431830 + }, + { + "epoch": 0.8723441218178953, + "grad_norm": 78.91565704345703, + "learning_rate": 5.55473823779149e-07, + "loss": 17.6818, + "step": 431840 + }, + { + "epoch": 0.8723643224505792, + "grad_norm": 32.79534149169922, + "learning_rate": 5.553139306407062e-07, + "loss": 20.4146, + "step": 431850 + }, + { + "epoch": 0.872384523083263, + "grad_norm": 160.63734436035156, + "learning_rate": 5.551540591651234e-07, + "loss": 18.6493, + "step": 431860 + }, + { + "epoch": 0.8724047237159468, + "grad_norm": 673.2002563476562, + "learning_rate": 5.549942093531812e-07, + "loss": 27.3512, + "step": 431870 + }, + { + "epoch": 0.8724249243486306, + "grad_norm": 1010.4269409179688, + "learning_rate": 5.548343812056584e-07, + "loss": 24.5515, + "step": 431880 + }, + { + "epoch": 0.8724451249813144, + "grad_norm": 185.97637939453125, + "learning_rate": 5.546745747233323e-07, + "loss": 8.7487, + "step": 431890 + }, + { + "epoch": 0.8724653256139983, + "grad_norm": 88.39303588867188, + "learning_rate": 5.545147899069836e-07, + "loss": 12.3746, + "step": 431900 + }, + { + "epoch": 0.8724855262466821, + "grad_norm": 796.1704711914062, + "learning_rate": 5.543550267573916e-07, + "loss": 21.8673, + "step": 431910 + }, + { + "epoch": 0.8725057268793659, + "grad_norm": 188.41055297851562, + "learning_rate": 5.541952852753341e-07, + "loss": 11.4995, + "step": 431920 + }, + { + "epoch": 0.8725259275120497, + "grad_norm": 184.47239685058594, + "learning_rate": 5.540355654615881e-07, + "loss": 13.8518, + "step": 431930 + }, + { + "epoch": 0.8725461281447335, + "grad_norm": 144.6045379638672, + "learning_rate": 5.538758673169348e-07, + "loss": 25.1702, + "step": 431940 + }, + { + "epoch": 0.8725663287774174, + "grad_norm": 425.9952392578125, + "learning_rate": 5.537161908421512e-07, + "loss": 17.863, + "step": 431950 + }, + { + "epoch": 0.8725865294101012, + "grad_norm": 225.549072265625, + "learning_rate": 5.535565360380146e-07, + "loss": 36.3956, + "step": 431960 + }, + { + "epoch": 0.872606730042785, + "grad_norm": 264.4824523925781, + "learning_rate": 5.533969029053043e-07, + "loss": 9.2637, + "step": 431970 + }, + { + "epoch": 0.8726269306754687, + "grad_norm": 424.3130798339844, + "learning_rate": 5.532372914448003e-07, + "loss": 17.5098, + "step": 431980 + }, + { + "epoch": 0.8726471313081525, + "grad_norm": 24.801950454711914, + "learning_rate": 5.530777016572763e-07, + "loss": 18.5786, + "step": 431990 + }, + { + "epoch": 0.8726673319408363, + "grad_norm": 175.92909240722656, + "learning_rate": 5.529181335435124e-07, + "loss": 25.0721, + "step": 432000 + }, + { + "epoch": 0.8726875325735202, + "grad_norm": 133.3310546875, + "learning_rate": 5.527585871042867e-07, + "loss": 35.488, + "step": 432010 + }, + { + "epoch": 0.872707733206204, + "grad_norm": 51.423831939697266, + "learning_rate": 5.525990623403765e-07, + "loss": 21.8321, + "step": 432020 + }, + { + "epoch": 0.8727279338388878, + "grad_norm": 757.439453125, + "learning_rate": 5.524395592525584e-07, + "loss": 14.381, + "step": 432030 + }, + { + "epoch": 0.8727481344715716, + "grad_norm": 194.53475952148438, + "learning_rate": 5.522800778416099e-07, + "loss": 38.3131, + "step": 432040 + }, + { + "epoch": 0.8727683351042554, + "grad_norm": 474.09283447265625, + "learning_rate": 5.521206181083111e-07, + "loss": 22.2005, + "step": 432050 + }, + { + "epoch": 0.8727885357369393, + "grad_norm": 117.4988021850586, + "learning_rate": 5.519611800534347e-07, + "loss": 12.3979, + "step": 432060 + }, + { + "epoch": 0.8728087363696231, + "grad_norm": 265.8170166015625, + "learning_rate": 5.518017636777606e-07, + "loss": 14.9616, + "step": 432070 + }, + { + "epoch": 0.8728289370023069, + "grad_norm": 243.57675170898438, + "learning_rate": 5.516423689820655e-07, + "loss": 22.1666, + "step": 432080 + }, + { + "epoch": 0.8728491376349907, + "grad_norm": 331.3977355957031, + "learning_rate": 5.514829959671264e-07, + "loss": 15.4174, + "step": 432090 + }, + { + "epoch": 0.8728693382676745, + "grad_norm": 238.21192932128906, + "learning_rate": 5.51323644633719e-07, + "loss": 6.8709, + "step": 432100 + }, + { + "epoch": 0.8728895389003584, + "grad_norm": 210.3321990966797, + "learning_rate": 5.511643149826206e-07, + "loss": 24.2467, + "step": 432110 + }, + { + "epoch": 0.8729097395330422, + "grad_norm": 312.65826416015625, + "learning_rate": 5.510050070146083e-07, + "loss": 14.3068, + "step": 432120 + }, + { + "epoch": 0.872929940165726, + "grad_norm": 549.9442138671875, + "learning_rate": 5.508457207304574e-07, + "loss": 24.7624, + "step": 432130 + }, + { + "epoch": 0.8729501407984098, + "grad_norm": 259.71240234375, + "learning_rate": 5.506864561309455e-07, + "loss": 13.4766, + "step": 432140 + }, + { + "epoch": 0.8729703414310936, + "grad_norm": 175.85206604003906, + "learning_rate": 5.505272132168471e-07, + "loss": 12.3887, + "step": 432150 + }, + { + "epoch": 0.8729905420637775, + "grad_norm": 409.7877197265625, + "learning_rate": 5.503679919889404e-07, + "loss": 18.4446, + "step": 432160 + }, + { + "epoch": 0.8730107426964613, + "grad_norm": 258.2601623535156, + "learning_rate": 5.502087924480005e-07, + "loss": 26.1197, + "step": 432170 + }, + { + "epoch": 0.8730309433291451, + "grad_norm": 183.85809326171875, + "learning_rate": 5.50049614594802e-07, + "loss": 14.9734, + "step": 432180 + }, + { + "epoch": 0.8730511439618289, + "grad_norm": 342.5546569824219, + "learning_rate": 5.498904584301235e-07, + "loss": 22.1346, + "step": 432190 + }, + { + "epoch": 0.8730713445945127, + "grad_norm": 303.18170166015625, + "learning_rate": 5.497313239547374e-07, + "loss": 14.67, + "step": 432200 + }, + { + "epoch": 0.8730915452271966, + "grad_norm": 280.4842529296875, + "learning_rate": 5.49572211169423e-07, + "loss": 24.5363, + "step": 432210 + }, + { + "epoch": 0.8731117458598804, + "grad_norm": 332.1897277832031, + "learning_rate": 5.49413120074952e-07, + "loss": 21.0212, + "step": 432220 + }, + { + "epoch": 0.8731319464925641, + "grad_norm": 602.5610961914062, + "learning_rate": 5.492540506721033e-07, + "loss": 16.7147, + "step": 432230 + }, + { + "epoch": 0.8731521471252479, + "grad_norm": 0.8453227281570435, + "learning_rate": 5.490950029616504e-07, + "loss": 30.1204, + "step": 432240 + }, + { + "epoch": 0.8731723477579317, + "grad_norm": 269.9468994140625, + "learning_rate": 5.489359769443675e-07, + "loss": 21.4242, + "step": 432250 + }, + { + "epoch": 0.8731925483906156, + "grad_norm": 805.483154296875, + "learning_rate": 5.487769726210318e-07, + "loss": 16.6819, + "step": 432260 + }, + { + "epoch": 0.8732127490232994, + "grad_norm": 526.4840698242188, + "learning_rate": 5.486179899924171e-07, + "loss": 12.0968, + "step": 432270 + }, + { + "epoch": 0.8732329496559832, + "grad_norm": 212.0432891845703, + "learning_rate": 5.484590290592979e-07, + "loss": 21.728, + "step": 432280 + }, + { + "epoch": 0.873253150288667, + "grad_norm": 108.55610656738281, + "learning_rate": 5.483000898224494e-07, + "loss": 10.9841, + "step": 432290 + }, + { + "epoch": 0.8732733509213508, + "grad_norm": 398.4762878417969, + "learning_rate": 5.48141172282648e-07, + "loss": 55.5959, + "step": 432300 + }, + { + "epoch": 0.8732935515540347, + "grad_norm": 319.7445983886719, + "learning_rate": 5.479822764406645e-07, + "loss": 23.2687, + "step": 432310 + }, + { + "epoch": 0.8733137521867185, + "grad_norm": 391.5088195800781, + "learning_rate": 5.478234022972756e-07, + "loss": 14.2518, + "step": 432320 + }, + { + "epoch": 0.8733339528194023, + "grad_norm": 249.58383178710938, + "learning_rate": 5.476645498532567e-07, + "loss": 11.0005, + "step": 432330 + }, + { + "epoch": 0.8733541534520861, + "grad_norm": 648.7304077148438, + "learning_rate": 5.475057191093808e-07, + "loss": 16.0999, + "step": 432340 + }, + { + "epoch": 0.8733743540847699, + "grad_norm": 190.3310089111328, + "learning_rate": 5.473469100664208e-07, + "loss": 19.4721, + "step": 432350 + }, + { + "epoch": 0.8733945547174538, + "grad_norm": 461.1125183105469, + "learning_rate": 5.471881227251518e-07, + "loss": 14.6923, + "step": 432360 + }, + { + "epoch": 0.8734147553501376, + "grad_norm": 504.35113525390625, + "learning_rate": 5.470293570863499e-07, + "loss": 20.7561, + "step": 432370 + }, + { + "epoch": 0.8734349559828214, + "grad_norm": 109.82476043701172, + "learning_rate": 5.46870613150785e-07, + "loss": 5.3293, + "step": 432380 + }, + { + "epoch": 0.8734551566155052, + "grad_norm": 343.56341552734375, + "learning_rate": 5.467118909192326e-07, + "loss": 22.4527, + "step": 432390 + }, + { + "epoch": 0.873475357248189, + "grad_norm": 465.1142578125, + "learning_rate": 5.46553190392467e-07, + "loss": 16.6082, + "step": 432400 + }, + { + "epoch": 0.8734955578808729, + "grad_norm": 420.0114440917969, + "learning_rate": 5.46394511571261e-07, + "loss": 11.3463, + "step": 432410 + }, + { + "epoch": 0.8735157585135567, + "grad_norm": 172.47666931152344, + "learning_rate": 5.462358544563873e-07, + "loss": 14.4073, + "step": 432420 + }, + { + "epoch": 0.8735359591462405, + "grad_norm": 168.5302734375, + "learning_rate": 5.460772190486208e-07, + "loss": 13.0114, + "step": 432430 + }, + { + "epoch": 0.8735561597789243, + "grad_norm": 69.3399429321289, + "learning_rate": 5.459186053487336e-07, + "loss": 14.7425, + "step": 432440 + }, + { + "epoch": 0.8735763604116081, + "grad_norm": 761.65478515625, + "learning_rate": 5.457600133574987e-07, + "loss": 24.4876, + "step": 432450 + }, + { + "epoch": 0.873596561044292, + "grad_norm": 211.42430114746094, + "learning_rate": 5.456014430756895e-07, + "loss": 15.9545, + "step": 432460 + }, + { + "epoch": 0.8736167616769758, + "grad_norm": 216.61505126953125, + "learning_rate": 5.454428945040774e-07, + "loss": 12.5682, + "step": 432470 + }, + { + "epoch": 0.8736369623096596, + "grad_norm": 194.28504943847656, + "learning_rate": 5.452843676434377e-07, + "loss": 17.7277, + "step": 432480 + }, + { + "epoch": 0.8736571629423433, + "grad_norm": 80.29365539550781, + "learning_rate": 5.45125862494541e-07, + "loss": 20.0275, + "step": 432490 + }, + { + "epoch": 0.8736773635750271, + "grad_norm": 152.5752716064453, + "learning_rate": 5.449673790581611e-07, + "loss": 24.6487, + "step": 432500 + }, + { + "epoch": 0.8736975642077109, + "grad_norm": 554.6441650390625, + "learning_rate": 5.448089173350696e-07, + "loss": 9.1202, + "step": 432510 + }, + { + "epoch": 0.8737177648403948, + "grad_norm": 277.0372314453125, + "learning_rate": 5.446504773260386e-07, + "loss": 34.0796, + "step": 432520 + }, + { + "epoch": 0.8737379654730786, + "grad_norm": 0.0, + "learning_rate": 5.44492059031842e-07, + "loss": 14.672, + "step": 432530 + }, + { + "epoch": 0.8737581661057624, + "grad_norm": 554.496337890625, + "learning_rate": 5.443336624532492e-07, + "loss": 18.5186, + "step": 432540 + }, + { + "epoch": 0.8737783667384462, + "grad_norm": 546.7005615234375, + "learning_rate": 5.44175287591035e-07, + "loss": 16.7326, + "step": 432550 + }, + { + "epoch": 0.87379856737113, + "grad_norm": 198.7493896484375, + "learning_rate": 5.440169344459701e-07, + "loss": 31.7625, + "step": 432560 + }, + { + "epoch": 0.8738187680038139, + "grad_norm": 22.46010398864746, + "learning_rate": 5.438586030188247e-07, + "loss": 9.2043, + "step": 432570 + }, + { + "epoch": 0.8738389686364977, + "grad_norm": 207.79080200195312, + "learning_rate": 5.437002933103724e-07, + "loss": 18.9447, + "step": 432580 + }, + { + "epoch": 0.8738591692691815, + "grad_norm": 162.22372436523438, + "learning_rate": 5.435420053213863e-07, + "loss": 18.3355, + "step": 432590 + }, + { + "epoch": 0.8738793699018653, + "grad_norm": 187.2777099609375, + "learning_rate": 5.433837390526341e-07, + "loss": 14.0661, + "step": 432600 + }, + { + "epoch": 0.8738995705345491, + "grad_norm": 232.63430786132812, + "learning_rate": 5.432254945048887e-07, + "loss": 10.7602, + "step": 432610 + }, + { + "epoch": 0.873919771167233, + "grad_norm": 56.217803955078125, + "learning_rate": 5.430672716789232e-07, + "loss": 10.8976, + "step": 432620 + }, + { + "epoch": 0.8739399717999168, + "grad_norm": 368.2447509765625, + "learning_rate": 5.429090705755069e-07, + "loss": 11.1776, + "step": 432630 + }, + { + "epoch": 0.8739601724326006, + "grad_norm": 455.3416748046875, + "learning_rate": 5.427508911954105e-07, + "loss": 13.8417, + "step": 432640 + }, + { + "epoch": 0.8739803730652844, + "grad_norm": 212.73867797851562, + "learning_rate": 5.425927335394054e-07, + "loss": 17.5974, + "step": 432650 + }, + { + "epoch": 0.8740005736979682, + "grad_norm": 362.45147705078125, + "learning_rate": 5.424345976082645e-07, + "loss": 18.1649, + "step": 432660 + }, + { + "epoch": 0.8740207743306521, + "grad_norm": 493.18914794921875, + "learning_rate": 5.42276483402755e-07, + "loss": 39.1498, + "step": 432670 + }, + { + "epoch": 0.8740409749633359, + "grad_norm": 467.24139404296875, + "learning_rate": 5.421183909236494e-07, + "loss": 12.8275, + "step": 432680 + }, + { + "epoch": 0.8740611755960197, + "grad_norm": 296.6934814453125, + "learning_rate": 5.419603201717189e-07, + "loss": 22.2164, + "step": 432690 + }, + { + "epoch": 0.8740813762287035, + "grad_norm": 17.59931182861328, + "learning_rate": 5.418022711477333e-07, + "loss": 19.6711, + "step": 432700 + }, + { + "epoch": 0.8741015768613873, + "grad_norm": 393.2613220214844, + "learning_rate": 5.416442438524616e-07, + "loss": 38.0818, + "step": 432710 + }, + { + "epoch": 0.8741217774940712, + "grad_norm": 290.41900634765625, + "learning_rate": 5.414862382866759e-07, + "loss": 17.1718, + "step": 432720 + }, + { + "epoch": 0.874141978126755, + "grad_norm": 296.6372375488281, + "learning_rate": 5.413282544511455e-07, + "loss": 12.0898, + "step": 432730 + }, + { + "epoch": 0.8741621787594387, + "grad_norm": 202.93516540527344, + "learning_rate": 5.4117029234664e-07, + "loss": 14.88, + "step": 432740 + }, + { + "epoch": 0.8741823793921225, + "grad_norm": 21.219100952148438, + "learning_rate": 5.410123519739302e-07, + "loss": 12.9098, + "step": 432750 + }, + { + "epoch": 0.8742025800248063, + "grad_norm": 372.3573913574219, + "learning_rate": 5.408544333337845e-07, + "loss": 15.8375, + "step": 432760 + }, + { + "epoch": 0.8742227806574902, + "grad_norm": 146.60308837890625, + "learning_rate": 5.406965364269745e-07, + "loss": 13.5756, + "step": 432770 + }, + { + "epoch": 0.874242981290174, + "grad_norm": 507.54498291015625, + "learning_rate": 5.405386612542685e-07, + "loss": 8.8647, + "step": 432780 + }, + { + "epoch": 0.8742631819228578, + "grad_norm": 238.99053955078125, + "learning_rate": 5.403808078164358e-07, + "loss": 8.5932, + "step": 432790 + }, + { + "epoch": 0.8742833825555416, + "grad_norm": 383.38189697265625, + "learning_rate": 5.402229761142464e-07, + "loss": 16.0073, + "step": 432800 + }, + { + "epoch": 0.8743035831882254, + "grad_norm": 186.34523010253906, + "learning_rate": 5.400651661484684e-07, + "loss": 7.5963, + "step": 432810 + }, + { + "epoch": 0.8743237838209093, + "grad_norm": 380.567138671875, + "learning_rate": 5.399073779198732e-07, + "loss": 15.2616, + "step": 432820 + }, + { + "epoch": 0.8743439844535931, + "grad_norm": 8.406536102294922, + "learning_rate": 5.397496114292278e-07, + "loss": 28.221, + "step": 432830 + }, + { + "epoch": 0.8743641850862769, + "grad_norm": 315.7178955078125, + "learning_rate": 5.395918666773026e-07, + "loss": 17.5182, + "step": 432840 + }, + { + "epoch": 0.8743843857189607, + "grad_norm": 289.23663330078125, + "learning_rate": 5.394341436648653e-07, + "loss": 22.2257, + "step": 432850 + }, + { + "epoch": 0.8744045863516445, + "grad_norm": 133.96200561523438, + "learning_rate": 5.392764423926844e-07, + "loss": 19.679, + "step": 432860 + }, + { + "epoch": 0.8744247869843284, + "grad_norm": 226.0999755859375, + "learning_rate": 5.391187628615296e-07, + "loss": 15.2748, + "step": 432870 + }, + { + "epoch": 0.8744449876170122, + "grad_norm": 555.56201171875, + "learning_rate": 5.389611050721694e-07, + "loss": 21.7419, + "step": 432880 + }, + { + "epoch": 0.874465188249696, + "grad_norm": 433.8108825683594, + "learning_rate": 5.388034690253701e-07, + "loss": 9.8576, + "step": 432890 + }, + { + "epoch": 0.8744853888823798, + "grad_norm": 171.49124145507812, + "learning_rate": 5.386458547219026e-07, + "loss": 21.4585, + "step": 432900 + }, + { + "epoch": 0.8745055895150636, + "grad_norm": 192.7606658935547, + "learning_rate": 5.384882621625353e-07, + "loss": 14.1203, + "step": 432910 + }, + { + "epoch": 0.8745257901477475, + "grad_norm": 1570.7572021484375, + "learning_rate": 5.383306913480335e-07, + "loss": 32.1453, + "step": 432920 + }, + { + "epoch": 0.8745459907804313, + "grad_norm": 495.3536376953125, + "learning_rate": 5.381731422791664e-07, + "loss": 25.808, + "step": 432930 + }, + { + "epoch": 0.8745661914131151, + "grad_norm": 662.15869140625, + "learning_rate": 5.380156149567034e-07, + "loss": 17.0348, + "step": 432940 + }, + { + "epoch": 0.8745863920457989, + "grad_norm": 261.1503601074219, + "learning_rate": 5.378581093814112e-07, + "loss": 9.5443, + "step": 432950 + }, + { + "epoch": 0.8746065926784827, + "grad_norm": 349.7860412597656, + "learning_rate": 5.377006255540562e-07, + "loss": 20.0274, + "step": 432960 + }, + { + "epoch": 0.8746267933111666, + "grad_norm": 153.81024169921875, + "learning_rate": 5.375431634754074e-07, + "loss": 16.5702, + "step": 432970 + }, + { + "epoch": 0.8746469939438504, + "grad_norm": 734.0849609375, + "learning_rate": 5.373857231462337e-07, + "loss": 35.6784, + "step": 432980 + }, + { + "epoch": 0.8746671945765342, + "grad_norm": 1650.5621337890625, + "learning_rate": 5.372283045672994e-07, + "loss": 32.2598, + "step": 432990 + }, + { + "epoch": 0.8746873952092179, + "grad_norm": 35.7860107421875, + "learning_rate": 5.370709077393721e-07, + "loss": 24.6236, + "step": 433000 + }, + { + "epoch": 0.8747075958419017, + "grad_norm": 258.3363952636719, + "learning_rate": 5.369135326632219e-07, + "loss": 27.3438, + "step": 433010 + }, + { + "epoch": 0.8747277964745855, + "grad_norm": 46.31103515625, + "learning_rate": 5.367561793396132e-07, + "loss": 17.3969, + "step": 433020 + }, + { + "epoch": 0.8747479971072694, + "grad_norm": 448.46282958984375, + "learning_rate": 5.365988477693124e-07, + "loss": 19.1224, + "step": 433030 + }, + { + "epoch": 0.8747681977399532, + "grad_norm": 178.7939453125, + "learning_rate": 5.364415379530891e-07, + "loss": 25.5598, + "step": 433040 + }, + { + "epoch": 0.874788398372637, + "grad_norm": 209.88671875, + "learning_rate": 5.362842498917081e-07, + "loss": 17.2414, + "step": 433050 + }, + { + "epoch": 0.8748085990053208, + "grad_norm": 262.6157531738281, + "learning_rate": 5.36126983585935e-07, + "loss": 14.1692, + "step": 433060 + }, + { + "epoch": 0.8748287996380046, + "grad_norm": 541.3820190429688, + "learning_rate": 5.359697390365387e-07, + "loss": 17.0255, + "step": 433070 + }, + { + "epoch": 0.8748490002706885, + "grad_norm": 379.27484130859375, + "learning_rate": 5.35812516244284e-07, + "loss": 18.0919, + "step": 433080 + }, + { + "epoch": 0.8748692009033723, + "grad_norm": 590.208251953125, + "learning_rate": 5.356553152099381e-07, + "loss": 24.4596, + "step": 433090 + }, + { + "epoch": 0.8748894015360561, + "grad_norm": 436.2502746582031, + "learning_rate": 5.354981359342659e-07, + "loss": 19.3351, + "step": 433100 + }, + { + "epoch": 0.8749096021687399, + "grad_norm": 154.7361297607422, + "learning_rate": 5.353409784180352e-07, + "loss": 6.1398, + "step": 433110 + }, + { + "epoch": 0.8749298028014237, + "grad_norm": 5.425511360168457, + "learning_rate": 5.35183842662011e-07, + "loss": 16.9334, + "step": 433120 + }, + { + "epoch": 0.8749500034341076, + "grad_norm": 375.8233947753906, + "learning_rate": 5.350267286669585e-07, + "loss": 10.4431, + "step": 433130 + }, + { + "epoch": 0.8749702040667914, + "grad_norm": 1288.437744140625, + "learning_rate": 5.348696364336448e-07, + "loss": 17.4038, + "step": 433140 + }, + { + "epoch": 0.8749904046994752, + "grad_norm": 644.7527465820312, + "learning_rate": 5.347125659628344e-07, + "loss": 18.2653, + "step": 433150 + }, + { + "epoch": 0.875010605332159, + "grad_norm": 102.53956604003906, + "learning_rate": 5.345555172552941e-07, + "loss": 32.7191, + "step": 433160 + }, + { + "epoch": 0.8750308059648428, + "grad_norm": 643.939208984375, + "learning_rate": 5.343984903117889e-07, + "loss": 10.4285, + "step": 433170 + }, + { + "epoch": 0.8750510065975267, + "grad_norm": 478.85107421875, + "learning_rate": 5.342414851330824e-07, + "loss": 28.7579, + "step": 433180 + }, + { + "epoch": 0.8750712072302105, + "grad_norm": 139.83828735351562, + "learning_rate": 5.340845017199425e-07, + "loss": 22.7458, + "step": 433190 + }, + { + "epoch": 0.8750914078628943, + "grad_norm": 238.2321319580078, + "learning_rate": 5.339275400731331e-07, + "loss": 24.4583, + "step": 433200 + }, + { + "epoch": 0.8751116084955781, + "grad_norm": 358.0097961425781, + "learning_rate": 5.337706001934184e-07, + "loss": 12.0504, + "step": 433210 + }, + { + "epoch": 0.8751318091282619, + "grad_norm": 76.72681427001953, + "learning_rate": 5.33613682081564e-07, + "loss": 15.6278, + "step": 433220 + }, + { + "epoch": 0.8751520097609458, + "grad_norm": 533.8015747070312, + "learning_rate": 5.334567857383354e-07, + "loss": 29.6115, + "step": 433230 + }, + { + "epoch": 0.8751722103936296, + "grad_norm": 436.1002502441406, + "learning_rate": 5.332999111644971e-07, + "loss": 17.0068, + "step": 433240 + }, + { + "epoch": 0.8751924110263134, + "grad_norm": 372.8768310546875, + "learning_rate": 5.331430583608122e-07, + "loss": 17.8014, + "step": 433250 + }, + { + "epoch": 0.8752126116589971, + "grad_norm": 635.5821533203125, + "learning_rate": 5.329862273280462e-07, + "loss": 28.728, + "step": 433260 + }, + { + "epoch": 0.8752328122916809, + "grad_norm": 17.68754768371582, + "learning_rate": 5.328294180669658e-07, + "loss": 15.5665, + "step": 433270 + }, + { + "epoch": 0.8752530129243647, + "grad_norm": 138.2192840576172, + "learning_rate": 5.326726305783308e-07, + "loss": 25.2673, + "step": 433280 + }, + { + "epoch": 0.8752732135570486, + "grad_norm": 27.268882751464844, + "learning_rate": 5.325158648629075e-07, + "loss": 18.3581, + "step": 433290 + }, + { + "epoch": 0.8752934141897324, + "grad_norm": 136.46170043945312, + "learning_rate": 5.323591209214612e-07, + "loss": 21.0438, + "step": 433300 + }, + { + "epoch": 0.8753136148224162, + "grad_norm": 241.7820587158203, + "learning_rate": 5.322023987547547e-07, + "loss": 13.798, + "step": 433310 + }, + { + "epoch": 0.8753338154551, + "grad_norm": 367.8775329589844, + "learning_rate": 5.320456983635508e-07, + "loss": 16.049, + "step": 433320 + }, + { + "epoch": 0.8753540160877838, + "grad_norm": 531.8893432617188, + "learning_rate": 5.318890197486154e-07, + "loss": 20.8812, + "step": 433330 + }, + { + "epoch": 0.8753742167204677, + "grad_norm": 166.06248474121094, + "learning_rate": 5.317323629107108e-07, + "loss": 11.5503, + "step": 433340 + }, + { + "epoch": 0.8753944173531515, + "grad_norm": 269.6576232910156, + "learning_rate": 5.315757278505995e-07, + "loss": 18.7279, + "step": 433350 + }, + { + "epoch": 0.8754146179858353, + "grad_norm": 50.614158630371094, + "learning_rate": 5.314191145690473e-07, + "loss": 27.3247, + "step": 433360 + }, + { + "epoch": 0.8754348186185191, + "grad_norm": 360.9093017578125, + "learning_rate": 5.312625230668155e-07, + "loss": 22.016, + "step": 433370 + }, + { + "epoch": 0.875455019251203, + "grad_norm": 285.93548583984375, + "learning_rate": 5.311059533446694e-07, + "loss": 13.3252, + "step": 433380 + }, + { + "epoch": 0.8754752198838868, + "grad_norm": 201.05575561523438, + "learning_rate": 5.309494054033704e-07, + "loss": 11.7539, + "step": 433390 + }, + { + "epoch": 0.8754954205165706, + "grad_norm": 509.9866638183594, + "learning_rate": 5.307928792436812e-07, + "loss": 22.6127, + "step": 433400 + }, + { + "epoch": 0.8755156211492544, + "grad_norm": 513.7008056640625, + "learning_rate": 5.306363748663668e-07, + "loss": 26.1914, + "step": 433410 + }, + { + "epoch": 0.8755358217819382, + "grad_norm": 431.09210205078125, + "learning_rate": 5.304798922721871e-07, + "loss": 15.9364, + "step": 433420 + }, + { + "epoch": 0.875556022414622, + "grad_norm": 260.40850830078125, + "learning_rate": 5.303234314619071e-07, + "loss": 10.2867, + "step": 433430 + }, + { + "epoch": 0.8755762230473059, + "grad_norm": 502.4874267578125, + "learning_rate": 5.301669924362884e-07, + "loss": 23.3825, + "step": 433440 + }, + { + "epoch": 0.8755964236799897, + "grad_norm": 321.4576416015625, + "learning_rate": 5.300105751960943e-07, + "loss": 17.2701, + "step": 433450 + }, + { + "epoch": 0.8756166243126735, + "grad_norm": 139.70773315429688, + "learning_rate": 5.298541797420864e-07, + "loss": 12.7987, + "step": 433460 + }, + { + "epoch": 0.8756368249453573, + "grad_norm": 582.2942504882812, + "learning_rate": 5.296978060750257e-07, + "loss": 16.9464, + "step": 433470 + }, + { + "epoch": 0.8756570255780411, + "grad_norm": 487.0493469238281, + "learning_rate": 5.295414541956773e-07, + "loss": 24.6904, + "step": 433480 + }, + { + "epoch": 0.875677226210725, + "grad_norm": 231.55262756347656, + "learning_rate": 5.293851241048015e-07, + "loss": 11.211, + "step": 433490 + }, + { + "epoch": 0.8756974268434088, + "grad_norm": 110.7741928100586, + "learning_rate": 5.292288158031595e-07, + "loss": 15.8149, + "step": 433500 + }, + { + "epoch": 0.8757176274760925, + "grad_norm": 195.20785522460938, + "learning_rate": 5.290725292915138e-07, + "loss": 15.7708, + "step": 433510 + }, + { + "epoch": 0.8757378281087763, + "grad_norm": 195.65206909179688, + "learning_rate": 5.28916264570628e-07, + "loss": 20.5607, + "step": 433520 + }, + { + "epoch": 0.8757580287414601, + "grad_norm": 98.87440490722656, + "learning_rate": 5.287600216412609e-07, + "loss": 21.9518, + "step": 433530 + }, + { + "epoch": 0.875778229374144, + "grad_norm": 632.74267578125, + "learning_rate": 5.286038005041744e-07, + "loss": 27.4733, + "step": 433540 + }, + { + "epoch": 0.8757984300068278, + "grad_norm": 154.83694458007812, + "learning_rate": 5.28447601160132e-07, + "loss": 30.9031, + "step": 433550 + }, + { + "epoch": 0.8758186306395116, + "grad_norm": 567.2290649414062, + "learning_rate": 5.28291423609894e-07, + "loss": 20.3919, + "step": 433560 + }, + { + "epoch": 0.8758388312721954, + "grad_norm": 287.53558349609375, + "learning_rate": 5.281352678542195e-07, + "loss": 13.9235, + "step": 433570 + }, + { + "epoch": 0.8758590319048792, + "grad_norm": 43.357112884521484, + "learning_rate": 5.279791338938717e-07, + "loss": 22.6026, + "step": 433580 + }, + { + "epoch": 0.8758792325375631, + "grad_norm": 369.96173095703125, + "learning_rate": 5.278230217296132e-07, + "loss": 26.5123, + "step": 433590 + }, + { + "epoch": 0.8758994331702469, + "grad_norm": 405.46685791015625, + "learning_rate": 5.276669313622013e-07, + "loss": 18.1153, + "step": 433600 + }, + { + "epoch": 0.8759196338029307, + "grad_norm": 311.51251220703125, + "learning_rate": 5.275108627923975e-07, + "loss": 21.1751, + "step": 433610 + }, + { + "epoch": 0.8759398344356145, + "grad_norm": 322.4067077636719, + "learning_rate": 5.273548160209651e-07, + "loss": 20.5398, + "step": 433620 + }, + { + "epoch": 0.8759600350682983, + "grad_norm": 423.6556396484375, + "learning_rate": 5.271987910486625e-07, + "loss": 20.9919, + "step": 433630 + }, + { + "epoch": 0.8759802357009822, + "grad_norm": 623.9129638671875, + "learning_rate": 5.270427878762496e-07, + "loss": 16.2219, + "step": 433640 + }, + { + "epoch": 0.876000436333666, + "grad_norm": 353.4367370605469, + "learning_rate": 5.268868065044886e-07, + "loss": 18.939, + "step": 433650 + }, + { + "epoch": 0.8760206369663498, + "grad_norm": 227.28196716308594, + "learning_rate": 5.267308469341387e-07, + "loss": 20.1464, + "step": 433660 + }, + { + "epoch": 0.8760408375990336, + "grad_norm": 672.3381958007812, + "learning_rate": 5.265749091659589e-07, + "loss": 21.6426, + "step": 433670 + }, + { + "epoch": 0.8760610382317174, + "grad_norm": 361.6789855957031, + "learning_rate": 5.264189932007119e-07, + "loss": 13.6678, + "step": 433680 + }, + { + "epoch": 0.8760812388644013, + "grad_norm": 427.6011657714844, + "learning_rate": 5.262630990391549e-07, + "loss": 19.2339, + "step": 433690 + }, + { + "epoch": 0.8761014394970851, + "grad_norm": 503.2677307128906, + "learning_rate": 5.2610722668205e-07, + "loss": 19.9637, + "step": 433700 + }, + { + "epoch": 0.8761216401297689, + "grad_norm": 466.8319396972656, + "learning_rate": 5.259513761301549e-07, + "loss": 18.7197, + "step": 433710 + }, + { + "epoch": 0.8761418407624527, + "grad_norm": 389.1748046875, + "learning_rate": 5.257955473842314e-07, + "loss": 13.7403, + "step": 433720 + }, + { + "epoch": 0.8761620413951365, + "grad_norm": 258.1947937011719, + "learning_rate": 5.25639740445037e-07, + "loss": 17.3214, + "step": 433730 + }, + { + "epoch": 0.8761822420278204, + "grad_norm": 548.7359619140625, + "learning_rate": 5.254839553133312e-07, + "loss": 11.7706, + "step": 433740 + }, + { + "epoch": 0.8762024426605042, + "grad_norm": 1.1916102170944214, + "learning_rate": 5.253281919898751e-07, + "loss": 31.5134, + "step": 433750 + }, + { + "epoch": 0.876222643293188, + "grad_norm": 550.4560546875, + "learning_rate": 5.251724504754258e-07, + "loss": 10.0177, + "step": 433760 + }, + { + "epoch": 0.8762428439258717, + "grad_norm": 305.590576171875, + "learning_rate": 5.250167307707437e-07, + "loss": 17.4997, + "step": 433770 + }, + { + "epoch": 0.8762630445585555, + "grad_norm": 482.72845458984375, + "learning_rate": 5.24861032876588e-07, + "loss": 22.2059, + "step": 433780 + }, + { + "epoch": 0.8762832451912393, + "grad_norm": 302.8846435546875, + "learning_rate": 5.247053567937155e-07, + "loss": 21.9938, + "step": 433790 + }, + { + "epoch": 0.8763034458239232, + "grad_norm": 25.524728775024414, + "learning_rate": 5.245497025228874e-07, + "loss": 32.624, + "step": 433800 + }, + { + "epoch": 0.876323646456607, + "grad_norm": 563.2052612304688, + "learning_rate": 5.243940700648609e-07, + "loss": 14.6061, + "step": 433810 + }, + { + "epoch": 0.8763438470892908, + "grad_norm": 260.39739990234375, + "learning_rate": 5.242384594203942e-07, + "loss": 29.1842, + "step": 433820 + }, + { + "epoch": 0.8763640477219746, + "grad_norm": 326.4090270996094, + "learning_rate": 5.240828705902462e-07, + "loss": 6.721, + "step": 433830 + }, + { + "epoch": 0.8763842483546584, + "grad_norm": 285.9054870605469, + "learning_rate": 5.239273035751763e-07, + "loss": 28.3131, + "step": 433840 + }, + { + "epoch": 0.8764044489873423, + "grad_norm": 558.6888427734375, + "learning_rate": 5.237717583759421e-07, + "loss": 18.2302, + "step": 433850 + }, + { + "epoch": 0.8764246496200261, + "grad_norm": 381.3103332519531, + "learning_rate": 5.236162349933005e-07, + "loss": 16.1051, + "step": 433860 + }, + { + "epoch": 0.8764448502527099, + "grad_norm": 773.1160278320312, + "learning_rate": 5.234607334280117e-07, + "loss": 13.0957, + "step": 433870 + }, + { + "epoch": 0.8764650508853937, + "grad_norm": 564.5609130859375, + "learning_rate": 5.23305253680832e-07, + "loss": 19.1361, + "step": 433880 + }, + { + "epoch": 0.8764852515180775, + "grad_norm": 561.9713745117188, + "learning_rate": 5.231497957525184e-07, + "loss": 17.2799, + "step": 433890 + }, + { + "epoch": 0.8765054521507614, + "grad_norm": 131.5213165283203, + "learning_rate": 5.229943596438297e-07, + "loss": 18.6086, + "step": 433900 + }, + { + "epoch": 0.8765256527834452, + "grad_norm": 312.5356140136719, + "learning_rate": 5.22838945355525e-07, + "loss": 15.7515, + "step": 433910 + }, + { + "epoch": 0.876545853416129, + "grad_norm": 225.6712646484375, + "learning_rate": 5.2268355288836e-07, + "loss": 7.8059, + "step": 433920 + }, + { + "epoch": 0.8765660540488128, + "grad_norm": 158.82826232910156, + "learning_rate": 5.225281822430911e-07, + "loss": 17.0044, + "step": 433930 + }, + { + "epoch": 0.8765862546814966, + "grad_norm": 908.42578125, + "learning_rate": 5.22372833420478e-07, + "loss": 33.0427, + "step": 433940 + }, + { + "epoch": 0.8766064553141805, + "grad_norm": 103.19387817382812, + "learning_rate": 5.222175064212764e-07, + "loss": 9.7731, + "step": 433950 + }, + { + "epoch": 0.8766266559468643, + "grad_norm": 747.3312377929688, + "learning_rate": 5.220622012462429e-07, + "loss": 25.4204, + "step": 433960 + }, + { + "epoch": 0.8766468565795481, + "grad_norm": 188.887939453125, + "learning_rate": 5.219069178961361e-07, + "loss": 17.6252, + "step": 433970 + }, + { + "epoch": 0.8766670572122319, + "grad_norm": 572.9087524414062, + "learning_rate": 5.217516563717107e-07, + "loss": 26.657, + "step": 433980 + }, + { + "epoch": 0.8766872578449157, + "grad_norm": 1910.318603515625, + "learning_rate": 5.215964166737258e-07, + "loss": 22.2355, + "step": 433990 + }, + { + "epoch": 0.8767074584775996, + "grad_norm": 343.5889587402344, + "learning_rate": 5.214411988029355e-07, + "loss": 14.9556, + "step": 434000 + }, + { + "epoch": 0.8767276591102834, + "grad_norm": 452.5372619628906, + "learning_rate": 5.212860027600986e-07, + "loss": 11.9828, + "step": 434010 + }, + { + "epoch": 0.8767478597429671, + "grad_norm": 744.7176513671875, + "learning_rate": 5.21130828545971e-07, + "loss": 9.0645, + "step": 434020 + }, + { + "epoch": 0.8767680603756509, + "grad_norm": 317.92132568359375, + "learning_rate": 5.209756761613072e-07, + "loss": 21.652, + "step": 434030 + }, + { + "epoch": 0.8767882610083347, + "grad_norm": 102.97876739501953, + "learning_rate": 5.208205456068655e-07, + "loss": 20.3339, + "step": 434040 + }, + { + "epoch": 0.8768084616410186, + "grad_norm": 526.6455078125, + "learning_rate": 5.206654368834002e-07, + "loss": 29.8651, + "step": 434050 + }, + { + "epoch": 0.8768286622737024, + "grad_norm": 283.0181579589844, + "learning_rate": 5.205103499916697e-07, + "loss": 10.6281, + "step": 434060 + }, + { + "epoch": 0.8768488629063862, + "grad_norm": 1044.4290771484375, + "learning_rate": 5.203552849324284e-07, + "loss": 18.4944, + "step": 434070 + }, + { + "epoch": 0.87686906353907, + "grad_norm": 282.4901428222656, + "learning_rate": 5.202002417064306e-07, + "loss": 28.6363, + "step": 434080 + }, + { + "epoch": 0.8768892641717538, + "grad_norm": 400.8611755371094, + "learning_rate": 5.200452203144352e-07, + "loss": 22.2381, + "step": 434090 + }, + { + "epoch": 0.8769094648044377, + "grad_norm": 384.8634033203125, + "learning_rate": 5.198902207571955e-07, + "loss": 12.7948, + "step": 434100 + }, + { + "epoch": 0.8769296654371215, + "grad_norm": 218.68125915527344, + "learning_rate": 5.197352430354669e-07, + "loss": 19.107, + "step": 434110 + }, + { + "epoch": 0.8769498660698053, + "grad_norm": 254.05931091308594, + "learning_rate": 5.19580287150005e-07, + "loss": 33.2748, + "step": 434120 + }, + { + "epoch": 0.8769700667024891, + "grad_norm": 872.2964477539062, + "learning_rate": 5.194253531015675e-07, + "loss": 17.5088, + "step": 434130 + }, + { + "epoch": 0.8769902673351729, + "grad_norm": 439.51593017578125, + "learning_rate": 5.192704408909055e-07, + "loss": 17.5281, + "step": 434140 + }, + { + "epoch": 0.8770104679678568, + "grad_norm": 149.76513671875, + "learning_rate": 5.191155505187756e-07, + "loss": 17.1009, + "step": 434150 + }, + { + "epoch": 0.8770306686005406, + "grad_norm": 145.25714111328125, + "learning_rate": 5.189606819859344e-07, + "loss": 11.5807, + "step": 434160 + }, + { + "epoch": 0.8770508692332244, + "grad_norm": 513.3689575195312, + "learning_rate": 5.188058352931352e-07, + "loss": 18.9419, + "step": 434170 + }, + { + "epoch": 0.8770710698659082, + "grad_norm": 817.205322265625, + "learning_rate": 5.186510104411319e-07, + "loss": 21.3864, + "step": 434180 + }, + { + "epoch": 0.877091270498592, + "grad_norm": 311.3988952636719, + "learning_rate": 5.184962074306798e-07, + "loss": 16.6342, + "step": 434190 + }, + { + "epoch": 0.8771114711312759, + "grad_norm": 298.127197265625, + "learning_rate": 5.183414262625364e-07, + "loss": 12.2656, + "step": 434200 + }, + { + "epoch": 0.8771316717639597, + "grad_norm": 143.401611328125, + "learning_rate": 5.181866669374507e-07, + "loss": 12.541, + "step": 434210 + }, + { + "epoch": 0.8771518723966435, + "grad_norm": 945.0745239257812, + "learning_rate": 5.180319294561797e-07, + "loss": 20.0791, + "step": 434220 + }, + { + "epoch": 0.8771720730293273, + "grad_norm": 256.93524169921875, + "learning_rate": 5.178772138194782e-07, + "loss": 15.6674, + "step": 434230 + }, + { + "epoch": 0.8771922736620111, + "grad_norm": 766.1445922851562, + "learning_rate": 5.177225200281e-07, + "loss": 19.7, + "step": 434240 + }, + { + "epoch": 0.877212474294695, + "grad_norm": 280.7933044433594, + "learning_rate": 5.175678480827972e-07, + "loss": 15.9839, + "step": 434250 + }, + { + "epoch": 0.8772326749273788, + "grad_norm": 542.4453735351562, + "learning_rate": 5.174131979843266e-07, + "loss": 21.3026, + "step": 434260 + }, + { + "epoch": 0.8772528755600626, + "grad_norm": 401.15228271484375, + "learning_rate": 5.172585697334398e-07, + "loss": 22.7436, + "step": 434270 + }, + { + "epoch": 0.8772730761927463, + "grad_norm": 469.0462341308594, + "learning_rate": 5.171039633308905e-07, + "loss": 14.0386, + "step": 434280 + }, + { + "epoch": 0.8772932768254301, + "grad_norm": 233.45587158203125, + "learning_rate": 5.169493787774338e-07, + "loss": 13.6644, + "step": 434290 + }, + { + "epoch": 0.8773134774581139, + "grad_norm": 294.5813293457031, + "learning_rate": 5.167948160738206e-07, + "loss": 10.5622, + "step": 434300 + }, + { + "epoch": 0.8773336780907978, + "grad_norm": 11.028252601623535, + "learning_rate": 5.166402752208071e-07, + "loss": 8.3747, + "step": 434310 + }, + { + "epoch": 0.8773538787234816, + "grad_norm": 437.0497741699219, + "learning_rate": 5.164857562191439e-07, + "loss": 24.6234, + "step": 434320 + }, + { + "epoch": 0.8773740793561654, + "grad_norm": 663.6287231445312, + "learning_rate": 5.163312590695869e-07, + "loss": 21.4791, + "step": 434330 + }, + { + "epoch": 0.8773942799888492, + "grad_norm": 246.58670043945312, + "learning_rate": 5.161767837728871e-07, + "loss": 21.7751, + "step": 434340 + }, + { + "epoch": 0.877414480621533, + "grad_norm": 15.990674018859863, + "learning_rate": 5.160223303297967e-07, + "loss": 13.354, + "step": 434350 + }, + { + "epoch": 0.8774346812542169, + "grad_norm": 264.93389892578125, + "learning_rate": 5.15867898741071e-07, + "loss": 17.3039, + "step": 434360 + }, + { + "epoch": 0.8774548818869007, + "grad_norm": 337.698974609375, + "learning_rate": 5.1571348900746e-07, + "loss": 17.1138, + "step": 434370 + }, + { + "epoch": 0.8774750825195845, + "grad_norm": 445.1646423339844, + "learning_rate": 5.155591011297184e-07, + "loss": 33.2997, + "step": 434380 + }, + { + "epoch": 0.8774952831522683, + "grad_norm": 197.03204345703125, + "learning_rate": 5.154047351085983e-07, + "loss": 12.7977, + "step": 434390 + }, + { + "epoch": 0.8775154837849521, + "grad_norm": 262.495361328125, + "learning_rate": 5.152503909448503e-07, + "loss": 20.8684, + "step": 434400 + }, + { + "epoch": 0.877535684417636, + "grad_norm": 787.7115478515625, + "learning_rate": 5.150960686392293e-07, + "loss": 22.9129, + "step": 434410 + }, + { + "epoch": 0.8775558850503198, + "grad_norm": 477.249755859375, + "learning_rate": 5.149417681924856e-07, + "loss": 20.5048, + "step": 434420 + }, + { + "epoch": 0.8775760856830036, + "grad_norm": 42.68398666381836, + "learning_rate": 5.147874896053711e-07, + "loss": 14.379, + "step": 434430 + }, + { + "epoch": 0.8775962863156874, + "grad_norm": 145.18858337402344, + "learning_rate": 5.146332328786386e-07, + "loss": 21.0583, + "step": 434440 + }, + { + "epoch": 0.8776164869483712, + "grad_norm": 353.35400390625, + "learning_rate": 5.144789980130404e-07, + "loss": 19.2491, + "step": 434450 + }, + { + "epoch": 0.8776366875810551, + "grad_norm": 766.2012939453125, + "learning_rate": 5.143247850093274e-07, + "loss": 22.5219, + "step": 434460 + }, + { + "epoch": 0.8776568882137389, + "grad_norm": 329.0042419433594, + "learning_rate": 5.141705938682506e-07, + "loss": 20.0049, + "step": 434470 + }, + { + "epoch": 0.8776770888464227, + "grad_norm": 256.41632080078125, + "learning_rate": 5.140164245905633e-07, + "loss": 13.8661, + "step": 434480 + }, + { + "epoch": 0.8776972894791065, + "grad_norm": 472.8299255371094, + "learning_rate": 5.138622771770157e-07, + "loss": 19.9039, + "step": 434490 + }, + { + "epoch": 0.8777174901117903, + "grad_norm": 310.2403564453125, + "learning_rate": 5.137081516283582e-07, + "loss": 14.9185, + "step": 434500 + }, + { + "epoch": 0.8777376907444742, + "grad_norm": 534.4397583007812, + "learning_rate": 5.135540479453432e-07, + "loss": 17.3908, + "step": 434510 + }, + { + "epoch": 0.877757891377158, + "grad_norm": 316.0336608886719, + "learning_rate": 5.133999661287226e-07, + "loss": 26.7963, + "step": 434520 + }, + { + "epoch": 0.8777780920098417, + "grad_norm": 247.14096069335938, + "learning_rate": 5.13245906179246e-07, + "loss": 28.1687, + "step": 434530 + }, + { + "epoch": 0.8777982926425255, + "grad_norm": 113.46258544921875, + "learning_rate": 5.130918680976643e-07, + "loss": 13.7509, + "step": 434540 + }, + { + "epoch": 0.8778184932752093, + "grad_norm": 235.147705078125, + "learning_rate": 5.129378518847295e-07, + "loss": 23.4125, + "step": 434550 + }, + { + "epoch": 0.8778386939078932, + "grad_norm": 383.57684326171875, + "learning_rate": 5.127838575411908e-07, + "loss": 14.9999, + "step": 434560 + }, + { + "epoch": 0.877858894540577, + "grad_norm": 352.58856201171875, + "learning_rate": 5.126298850677991e-07, + "loss": 13.527, + "step": 434570 + }, + { + "epoch": 0.8778790951732608, + "grad_norm": 321.5787353515625, + "learning_rate": 5.124759344653057e-07, + "loss": 8.4193, + "step": 434580 + }, + { + "epoch": 0.8778992958059446, + "grad_norm": 500.7260437011719, + "learning_rate": 5.123220057344597e-07, + "loss": 22.2562, + "step": 434590 + }, + { + "epoch": 0.8779194964386284, + "grad_norm": 445.0096130371094, + "learning_rate": 5.121680988760125e-07, + "loss": 19.0204, + "step": 434600 + }, + { + "epoch": 0.8779396970713123, + "grad_norm": 222.2227325439453, + "learning_rate": 5.120142138907131e-07, + "loss": 37.4008, + "step": 434610 + }, + { + "epoch": 0.8779598977039961, + "grad_norm": 418.1203918457031, + "learning_rate": 5.11860350779313e-07, + "loss": 19.7539, + "step": 434620 + }, + { + "epoch": 0.8779800983366799, + "grad_norm": 516.3267822265625, + "learning_rate": 5.11706509542561e-07, + "loss": 14.7627, + "step": 434630 + }, + { + "epoch": 0.8780002989693637, + "grad_norm": 933.3505859375, + "learning_rate": 5.115526901812062e-07, + "loss": 25.2065, + "step": 434640 + }, + { + "epoch": 0.8780204996020475, + "grad_norm": 410.081787109375, + "learning_rate": 5.113988926960001e-07, + "loss": 15.1906, + "step": 434650 + }, + { + "epoch": 0.8780407002347314, + "grad_norm": 320.27349853515625, + "learning_rate": 5.112451170876903e-07, + "loss": 16.609, + "step": 434660 + }, + { + "epoch": 0.8780609008674152, + "grad_norm": 259.0826110839844, + "learning_rate": 5.110913633570286e-07, + "loss": 31.0852, + "step": 434670 + }, + { + "epoch": 0.878081101500099, + "grad_norm": 314.41046142578125, + "learning_rate": 5.109376315047632e-07, + "loss": 11.9574, + "step": 434680 + }, + { + "epoch": 0.8781013021327828, + "grad_norm": 643.1636352539062, + "learning_rate": 5.107839215316424e-07, + "loss": 27.3094, + "step": 434690 + }, + { + "epoch": 0.8781215027654666, + "grad_norm": 439.34320068359375, + "learning_rate": 5.106302334384172e-07, + "loss": 16.708, + "step": 434700 + }, + { + "epoch": 0.8781417033981505, + "grad_norm": 979.7433471679688, + "learning_rate": 5.104765672258355e-07, + "loss": 25.1756, + "step": 434710 + }, + { + "epoch": 0.8781619040308343, + "grad_norm": 422.1972961425781, + "learning_rate": 5.103229228946455e-07, + "loss": 11.8049, + "step": 434720 + }, + { + "epoch": 0.8781821046635181, + "grad_norm": 145.0203857421875, + "learning_rate": 5.101693004455977e-07, + "loss": 10.6392, + "step": 434730 + }, + { + "epoch": 0.8782023052962019, + "grad_norm": 377.1507568359375, + "learning_rate": 5.100156998794415e-07, + "loss": 18.1032, + "step": 434740 + }, + { + "epoch": 0.8782225059288857, + "grad_norm": 272.1239929199219, + "learning_rate": 5.098621211969224e-07, + "loss": 21.4669, + "step": 434750 + }, + { + "epoch": 0.8782427065615696, + "grad_norm": 46.512596130371094, + "learning_rate": 5.09708564398791e-07, + "loss": 13.3603, + "step": 434760 + }, + { + "epoch": 0.8782629071942534, + "grad_norm": 281.3079528808594, + "learning_rate": 5.095550294857959e-07, + "loss": 14.2644, + "step": 434770 + }, + { + "epoch": 0.8782831078269372, + "grad_norm": 374.24896240234375, + "learning_rate": 5.094015164586852e-07, + "loss": 14.84, + "step": 434780 + }, + { + "epoch": 0.8783033084596209, + "grad_norm": 364.0821533203125, + "learning_rate": 5.092480253182058e-07, + "loss": 23.2679, + "step": 434790 + }, + { + "epoch": 0.8783235090923047, + "grad_norm": 162.59280395507812, + "learning_rate": 5.090945560651073e-07, + "loss": 15.5259, + "step": 434800 + }, + { + "epoch": 0.8783437097249885, + "grad_norm": 276.9872741699219, + "learning_rate": 5.08941108700139e-07, + "loss": 8.1555, + "step": 434810 + }, + { + "epoch": 0.8783639103576724, + "grad_norm": 326.7541198730469, + "learning_rate": 5.087876832240446e-07, + "loss": 27.8575, + "step": 434820 + }, + { + "epoch": 0.8783841109903562, + "grad_norm": 33.11380386352539, + "learning_rate": 5.086342796375749e-07, + "loss": 7.0672, + "step": 434830 + }, + { + "epoch": 0.87840431162304, + "grad_norm": 153.7439727783203, + "learning_rate": 5.084808979414779e-07, + "loss": 14.9328, + "step": 434840 + }, + { + "epoch": 0.8784245122557238, + "grad_norm": 171.47793579101562, + "learning_rate": 5.083275381364999e-07, + "loss": 26.4409, + "step": 434850 + }, + { + "epoch": 0.8784447128884076, + "grad_norm": 350.4378662109375, + "learning_rate": 5.081742002233881e-07, + "loss": 16.2276, + "step": 434860 + }, + { + "epoch": 0.8784649135210915, + "grad_norm": 450.28155517578125, + "learning_rate": 5.080208842028911e-07, + "loss": 25.9278, + "step": 434870 + }, + { + "epoch": 0.8784851141537753, + "grad_norm": 448.4052734375, + "learning_rate": 5.078675900757557e-07, + "loss": 20.3733, + "step": 434880 + }, + { + "epoch": 0.8785053147864591, + "grad_norm": 350.4577941894531, + "learning_rate": 5.07714317842728e-07, + "loss": 12.8354, + "step": 434890 + }, + { + "epoch": 0.8785255154191429, + "grad_norm": 610.2189331054688, + "learning_rate": 5.075610675045567e-07, + "loss": 17.948, + "step": 434900 + }, + { + "epoch": 0.8785457160518267, + "grad_norm": 399.9798889160156, + "learning_rate": 5.074078390619869e-07, + "loss": 21.844, + "step": 434910 + }, + { + "epoch": 0.8785659166845106, + "grad_norm": 383.8321838378906, + "learning_rate": 5.072546325157673e-07, + "loss": 13.7429, + "step": 434920 + }, + { + "epoch": 0.8785861173171944, + "grad_norm": 216.4840850830078, + "learning_rate": 5.071014478666425e-07, + "loss": 14.4042, + "step": 434930 + }, + { + "epoch": 0.8786063179498782, + "grad_norm": 457.54327392578125, + "learning_rate": 5.069482851153618e-07, + "loss": 18.8779, + "step": 434940 + }, + { + "epoch": 0.878626518582562, + "grad_norm": 32.08904266357422, + "learning_rate": 5.0679514426267e-07, + "loss": 15.2914, + "step": 434950 + }, + { + "epoch": 0.8786467192152458, + "grad_norm": 35.61134719848633, + "learning_rate": 5.06642025309313e-07, + "loss": 11.8779, + "step": 434960 + }, + { + "epoch": 0.8786669198479297, + "grad_norm": 723.7139282226562, + "learning_rate": 5.064889282560382e-07, + "loss": 20.9908, + "step": 434970 + }, + { + "epoch": 0.8786871204806135, + "grad_norm": 102.7997817993164, + "learning_rate": 5.063358531035906e-07, + "loss": 13.4488, + "step": 434980 + }, + { + "epoch": 0.8787073211132973, + "grad_norm": 352.2797546386719, + "learning_rate": 5.06182799852718e-07, + "loss": 13.2957, + "step": 434990 + }, + { + "epoch": 0.8787275217459811, + "grad_norm": 886.9483032226562, + "learning_rate": 5.06029768504166e-07, + "loss": 22.359, + "step": 435000 + }, + { + "epoch": 0.8787477223786649, + "grad_norm": 218.36572265625, + "learning_rate": 5.058767590586783e-07, + "loss": 19.1126, + "step": 435010 + }, + { + "epoch": 0.8787679230113488, + "grad_norm": 287.1523742675781, + "learning_rate": 5.057237715170032e-07, + "loss": 14.2723, + "step": 435020 + }, + { + "epoch": 0.8787881236440326, + "grad_norm": 420.1017761230469, + "learning_rate": 5.055708058798853e-07, + "loss": 27.4834, + "step": 435030 + }, + { + "epoch": 0.8788083242767164, + "grad_norm": 525.8872680664062, + "learning_rate": 5.054178621480694e-07, + "loss": 16.9919, + "step": 435040 + }, + { + "epoch": 0.8788285249094001, + "grad_norm": 232.071533203125, + "learning_rate": 5.052649403223015e-07, + "loss": 18.4302, + "step": 435050 + }, + { + "epoch": 0.8788487255420839, + "grad_norm": 342.8013610839844, + "learning_rate": 5.051120404033283e-07, + "loss": 18.3008, + "step": 435060 + }, + { + "epoch": 0.8788689261747678, + "grad_norm": 6.585102081298828, + "learning_rate": 5.049591623918937e-07, + "loss": 26.1853, + "step": 435070 + }, + { + "epoch": 0.8788891268074516, + "grad_norm": 368.1114501953125, + "learning_rate": 5.04806306288742e-07, + "loss": 25.3293, + "step": 435080 + }, + { + "epoch": 0.8789093274401354, + "grad_norm": 515.1712036132812, + "learning_rate": 5.046534720946206e-07, + "loss": 16.9136, + "step": 435090 + }, + { + "epoch": 0.8789295280728192, + "grad_norm": 16.89857292175293, + "learning_rate": 5.045006598102725e-07, + "loss": 25.171, + "step": 435100 + }, + { + "epoch": 0.878949728705503, + "grad_norm": 346.8264465332031, + "learning_rate": 5.043478694364423e-07, + "loss": 16.4898, + "step": 435110 + }, + { + "epoch": 0.8789699293381869, + "grad_norm": 332.0619201660156, + "learning_rate": 5.04195100973875e-07, + "loss": 17.1752, + "step": 435120 + }, + { + "epoch": 0.8789901299708707, + "grad_norm": 107.52465057373047, + "learning_rate": 5.040423544233164e-07, + "loss": 17.1065, + "step": 435130 + }, + { + "epoch": 0.8790103306035545, + "grad_norm": 433.8957824707031, + "learning_rate": 5.0388962978551e-07, + "loss": 15.4823, + "step": 435140 + }, + { + "epoch": 0.8790305312362383, + "grad_norm": 242.65370178222656, + "learning_rate": 5.037369270611997e-07, + "loss": 14.8826, + "step": 435150 + }, + { + "epoch": 0.8790507318689221, + "grad_norm": 471.4053649902344, + "learning_rate": 5.035842462511309e-07, + "loss": 33.3922, + "step": 435160 + }, + { + "epoch": 0.879070932501606, + "grad_norm": 67.55804443359375, + "learning_rate": 5.034315873560475e-07, + "loss": 15.3738, + "step": 435170 + }, + { + "epoch": 0.8790911331342898, + "grad_norm": 534.8261108398438, + "learning_rate": 5.032789503766922e-07, + "loss": 19.2829, + "step": 435180 + }, + { + "epoch": 0.8791113337669736, + "grad_norm": 511.7774353027344, + "learning_rate": 5.031263353138105e-07, + "loss": 27.3329, + "step": 435190 + }, + { + "epoch": 0.8791315343996574, + "grad_norm": 279.4249267578125, + "learning_rate": 5.029737421681446e-07, + "loss": 28.538, + "step": 435200 + }, + { + "epoch": 0.8791517350323412, + "grad_norm": 345.3414306640625, + "learning_rate": 5.028211709404407e-07, + "loss": 24.7293, + "step": 435210 + }, + { + "epoch": 0.879171935665025, + "grad_norm": 673.9608764648438, + "learning_rate": 5.026686216314397e-07, + "loss": 16.0036, + "step": 435220 + }, + { + "epoch": 0.8791921362977089, + "grad_norm": 195.92652893066406, + "learning_rate": 5.025160942418872e-07, + "loss": 16.4735, + "step": 435230 + }, + { + "epoch": 0.8792123369303927, + "grad_norm": 606.4151611328125, + "learning_rate": 5.023635887725259e-07, + "loss": 17.4006, + "step": 435240 + }, + { + "epoch": 0.8792325375630765, + "grad_norm": 447.26043701171875, + "learning_rate": 5.022111052240985e-07, + "loss": 21.1823, + "step": 435250 + }, + { + "epoch": 0.8792527381957603, + "grad_norm": 122.67220306396484, + "learning_rate": 5.020586435973491e-07, + "loss": 25.98, + "step": 435260 + }, + { + "epoch": 0.8792729388284442, + "grad_norm": 458.9827575683594, + "learning_rate": 5.019062038930195e-07, + "loss": 21.6993, + "step": 435270 + }, + { + "epoch": 0.879293139461128, + "grad_norm": 315.67913818359375, + "learning_rate": 5.017537861118543e-07, + "loss": 27.674, + "step": 435280 + }, + { + "epoch": 0.8793133400938118, + "grad_norm": 158.3207244873047, + "learning_rate": 5.016013902545957e-07, + "loss": 13.9432, + "step": 435290 + }, + { + "epoch": 0.8793335407264955, + "grad_norm": 261.7013244628906, + "learning_rate": 5.014490163219854e-07, + "loss": 16.5056, + "step": 435300 + }, + { + "epoch": 0.8793537413591793, + "grad_norm": 59.75825500488281, + "learning_rate": 5.012966643147682e-07, + "loss": 10.7603, + "step": 435310 + }, + { + "epoch": 0.8793739419918631, + "grad_norm": 483.52099609375, + "learning_rate": 5.011443342336852e-07, + "loss": 8.7847, + "step": 435320 + }, + { + "epoch": 0.879394142624547, + "grad_norm": 192.5576934814453, + "learning_rate": 5.009920260794782e-07, + "loss": 9.5417, + "step": 435330 + }, + { + "epoch": 0.8794143432572308, + "grad_norm": 318.4565124511719, + "learning_rate": 5.008397398528903e-07, + "loss": 13.3563, + "step": 435340 + }, + { + "epoch": 0.8794345438899146, + "grad_norm": 380.82891845703125, + "learning_rate": 5.006874755546654e-07, + "loss": 17.2505, + "step": 435350 + }, + { + "epoch": 0.8794547445225984, + "grad_norm": 852.404296875, + "learning_rate": 5.005352331855423e-07, + "loss": 22.5348, + "step": 435360 + }, + { + "epoch": 0.8794749451552822, + "grad_norm": 502.7873229980469, + "learning_rate": 5.00383012746265e-07, + "loss": 34.0941, + "step": 435370 + }, + { + "epoch": 0.8794951457879661, + "grad_norm": 190.71434020996094, + "learning_rate": 5.002308142375762e-07, + "loss": 13.873, + "step": 435380 + }, + { + "epoch": 0.8795153464206499, + "grad_norm": 77.80549621582031, + "learning_rate": 5.000786376602162e-07, + "loss": 12.7138, + "step": 435390 + }, + { + "epoch": 0.8795355470533337, + "grad_norm": 485.7001037597656, + "learning_rate": 4.99926483014927e-07, + "loss": 13.9821, + "step": 435400 + }, + { + "epoch": 0.8795557476860175, + "grad_norm": 265.3295593261719, + "learning_rate": 4.997743503024494e-07, + "loss": 19.1495, + "step": 435410 + }, + { + "epoch": 0.8795759483187013, + "grad_norm": 141.0780792236328, + "learning_rate": 4.996222395235283e-07, + "loss": 22.0904, + "step": 435420 + }, + { + "epoch": 0.8795961489513852, + "grad_norm": 273.9278869628906, + "learning_rate": 4.994701506789007e-07, + "loss": 17.5997, + "step": 435430 + }, + { + "epoch": 0.879616349584069, + "grad_norm": 353.7311706542969, + "learning_rate": 4.99318083769309e-07, + "loss": 13.0439, + "step": 435440 + }, + { + "epoch": 0.8796365502167528, + "grad_norm": 721.9375610351562, + "learning_rate": 4.991660387954967e-07, + "loss": 22.3529, + "step": 435450 + }, + { + "epoch": 0.8796567508494366, + "grad_norm": 301.629638671875, + "learning_rate": 4.990140157582036e-07, + "loss": 14.4415, + "step": 435460 + }, + { + "epoch": 0.8796769514821204, + "grad_norm": 149.6297607421875, + "learning_rate": 4.988620146581685e-07, + "loss": 16.6529, + "step": 435470 + }, + { + "epoch": 0.8796971521148043, + "grad_norm": 219.41262817382812, + "learning_rate": 4.987100354961355e-07, + "loss": 14.8243, + "step": 435480 + }, + { + "epoch": 0.8797173527474881, + "grad_norm": 404.56951904296875, + "learning_rate": 4.985580782728433e-07, + "loss": 18.9657, + "step": 435490 + }, + { + "epoch": 0.8797375533801719, + "grad_norm": 428.6075744628906, + "learning_rate": 4.984061429890324e-07, + "loss": 13.3605, + "step": 435500 + }, + { + "epoch": 0.8797577540128557, + "grad_norm": 303.6961975097656, + "learning_rate": 4.98254229645444e-07, + "loss": 23.9782, + "step": 435510 + }, + { + "epoch": 0.8797779546455395, + "grad_norm": 307.2866516113281, + "learning_rate": 4.981023382428196e-07, + "loss": 22.7079, + "step": 435520 + }, + { + "epoch": 0.8797981552782234, + "grad_norm": 423.6874084472656, + "learning_rate": 4.979504687818987e-07, + "loss": 21.0905, + "step": 435530 + }, + { + "epoch": 0.8798183559109072, + "grad_norm": 481.9783020019531, + "learning_rate": 4.977986212634195e-07, + "loss": 21.2299, + "step": 435540 + }, + { + "epoch": 0.879838556543591, + "grad_norm": 610.5613403320312, + "learning_rate": 4.976467956881254e-07, + "loss": 20.8345, + "step": 435550 + }, + { + "epoch": 0.8798587571762747, + "grad_norm": 238.90846252441406, + "learning_rate": 4.97494992056754e-07, + "loss": 18.398, + "step": 435560 + }, + { + "epoch": 0.8798789578089585, + "grad_norm": 228.98049926757812, + "learning_rate": 4.973432103700454e-07, + "loss": 25.6139, + "step": 435570 + }, + { + "epoch": 0.8798991584416423, + "grad_norm": 269.6488037109375, + "learning_rate": 4.971914506287407e-07, + "loss": 21.9952, + "step": 435580 + }, + { + "epoch": 0.8799193590743262, + "grad_norm": 191.76803588867188, + "learning_rate": 4.97039712833578e-07, + "loss": 30.8179, + "step": 435590 + }, + { + "epoch": 0.87993955970701, + "grad_norm": 510.7918701171875, + "learning_rate": 4.968879969852985e-07, + "loss": 21.6079, + "step": 435600 + }, + { + "epoch": 0.8799597603396938, + "grad_norm": 22.066001892089844, + "learning_rate": 4.967363030846406e-07, + "loss": 22.5894, + "step": 435610 + }, + { + "epoch": 0.8799799609723776, + "grad_norm": 355.6514892578125, + "learning_rate": 4.965846311323431e-07, + "loss": 24.1495, + "step": 435620 + }, + { + "epoch": 0.8800001616050614, + "grad_norm": 226.7723388671875, + "learning_rate": 4.964329811291463e-07, + "loss": 11.3769, + "step": 435630 + }, + { + "epoch": 0.8800203622377453, + "grad_norm": 76.03665161132812, + "learning_rate": 4.962813530757893e-07, + "loss": 14.7804, + "step": 435640 + }, + { + "epoch": 0.8800405628704291, + "grad_norm": 199.2616729736328, + "learning_rate": 4.961297469730097e-07, + "loss": 13.2704, + "step": 435650 + }, + { + "epoch": 0.8800607635031129, + "grad_norm": 465.0576171875, + "learning_rate": 4.959781628215476e-07, + "loss": 19.5031, + "step": 435660 + }, + { + "epoch": 0.8800809641357967, + "grad_norm": 108.87104797363281, + "learning_rate": 4.95826600622143e-07, + "loss": 15.921, + "step": 435670 + }, + { + "epoch": 0.8801011647684805, + "grad_norm": 103.40303802490234, + "learning_rate": 4.956750603755328e-07, + "loss": 13.7844, + "step": 435680 + }, + { + "epoch": 0.8801213654011644, + "grad_norm": 320.8612365722656, + "learning_rate": 4.95523542082455e-07, + "loss": 15.81, + "step": 435690 + }, + { + "epoch": 0.8801415660338482, + "grad_norm": 696.59326171875, + "learning_rate": 4.9537204574365e-07, + "loss": 24.1962, + "step": 435700 + }, + { + "epoch": 0.880161766666532, + "grad_norm": 989.5188598632812, + "learning_rate": 4.952205713598557e-07, + "loss": 21.6382, + "step": 435710 + }, + { + "epoch": 0.8801819672992158, + "grad_norm": 198.38470458984375, + "learning_rate": 4.950691189318086e-07, + "loss": 12.5203, + "step": 435720 + }, + { + "epoch": 0.8802021679318996, + "grad_norm": 232.7144317626953, + "learning_rate": 4.949176884602486e-07, + "loss": 40.752, + "step": 435730 + }, + { + "epoch": 0.8802223685645835, + "grad_norm": 314.38751220703125, + "learning_rate": 4.947662799459152e-07, + "loss": 19.3201, + "step": 435740 + }, + { + "epoch": 0.8802425691972673, + "grad_norm": 347.6485595703125, + "learning_rate": 4.946148933895423e-07, + "loss": 22.7622, + "step": 435750 + }, + { + "epoch": 0.8802627698299511, + "grad_norm": 87.8727035522461, + "learning_rate": 4.944635287918703e-07, + "loss": 10.1187, + "step": 435760 + }, + { + "epoch": 0.8802829704626349, + "grad_norm": 192.29734802246094, + "learning_rate": 4.943121861536376e-07, + "loss": 20.3241, + "step": 435770 + }, + { + "epoch": 0.8803031710953187, + "grad_norm": 254.23399353027344, + "learning_rate": 4.941608654755808e-07, + "loss": 10.8488, + "step": 435780 + }, + { + "epoch": 0.8803233717280026, + "grad_norm": 515.1815185546875, + "learning_rate": 4.940095667584366e-07, + "loss": 21.5697, + "step": 435790 + }, + { + "epoch": 0.8803435723606864, + "grad_norm": 158.14749145507812, + "learning_rate": 4.938582900029437e-07, + "loss": 21.8464, + "step": 435800 + }, + { + "epoch": 0.8803637729933701, + "grad_norm": 957.723876953125, + "learning_rate": 4.937070352098384e-07, + "loss": 24.3145, + "step": 435810 + }, + { + "epoch": 0.8803839736260539, + "grad_norm": 341.580322265625, + "learning_rate": 4.935558023798592e-07, + "loss": 18.2231, + "step": 435820 + }, + { + "epoch": 0.8804041742587377, + "grad_norm": 276.53717041015625, + "learning_rate": 4.934045915137419e-07, + "loss": 9.082, + "step": 435830 + }, + { + "epoch": 0.8804243748914216, + "grad_norm": 673.6719970703125, + "learning_rate": 4.932534026122249e-07, + "loss": 12.0788, + "step": 435840 + }, + { + "epoch": 0.8804445755241054, + "grad_norm": 303.8342590332031, + "learning_rate": 4.931022356760439e-07, + "loss": 22.0684, + "step": 435850 + }, + { + "epoch": 0.8804647761567892, + "grad_norm": 380.82440185546875, + "learning_rate": 4.929510907059354e-07, + "loss": 20.524, + "step": 435860 + }, + { + "epoch": 0.880484976789473, + "grad_norm": 22.417442321777344, + "learning_rate": 4.927999677026374e-07, + "loss": 14.9668, + "step": 435870 + }, + { + "epoch": 0.8805051774221568, + "grad_norm": 0.30903351306915283, + "learning_rate": 4.926488666668844e-07, + "loss": 12.1088, + "step": 435880 + }, + { + "epoch": 0.8805253780548407, + "grad_norm": 529.1118774414062, + "learning_rate": 4.924977875994159e-07, + "loss": 19.5259, + "step": 435890 + }, + { + "epoch": 0.8805455786875245, + "grad_norm": 357.0335388183594, + "learning_rate": 4.92346730500966e-07, + "loss": 9.633, + "step": 435900 + }, + { + "epoch": 0.8805657793202083, + "grad_norm": 185.8363037109375, + "learning_rate": 4.921956953722701e-07, + "loss": 9.942, + "step": 435910 + }, + { + "epoch": 0.8805859799528921, + "grad_norm": 256.4708557128906, + "learning_rate": 4.920446822140673e-07, + "loss": 10.8701, + "step": 435920 + }, + { + "epoch": 0.8806061805855759, + "grad_norm": 474.8191833496094, + "learning_rate": 4.918936910270916e-07, + "loss": 12.3728, + "step": 435930 + }, + { + "epoch": 0.8806263812182598, + "grad_norm": 217.32952880859375, + "learning_rate": 4.917427218120785e-07, + "loss": 21.8445, + "step": 435940 + }, + { + "epoch": 0.8806465818509436, + "grad_norm": 248.072265625, + "learning_rate": 4.915917745697645e-07, + "loss": 20.431, + "step": 435950 + }, + { + "epoch": 0.8806667824836274, + "grad_norm": 288.0353698730469, + "learning_rate": 4.914408493008871e-07, + "loss": 25.1107, + "step": 435960 + }, + { + "epoch": 0.8806869831163112, + "grad_norm": 106.0359115600586, + "learning_rate": 4.912899460061787e-07, + "loss": 12.067, + "step": 435970 + }, + { + "epoch": 0.880707183748995, + "grad_norm": 354.09881591796875, + "learning_rate": 4.911390646863757e-07, + "loss": 17.9086, + "step": 435980 + }, + { + "epoch": 0.8807273843816789, + "grad_norm": 540.8861694335938, + "learning_rate": 4.909882053422154e-07, + "loss": 18.2216, + "step": 435990 + }, + { + "epoch": 0.8807475850143627, + "grad_norm": 184.07806396484375, + "learning_rate": 4.908373679744316e-07, + "loss": 17.5554, + "step": 436000 + }, + { + "epoch": 0.8807677856470465, + "grad_norm": 177.03599548339844, + "learning_rate": 4.90686552583759e-07, + "loss": 18.7642, + "step": 436010 + }, + { + "epoch": 0.8807879862797303, + "grad_norm": 166.44651794433594, + "learning_rate": 4.905357591709325e-07, + "loss": 43.1835, + "step": 436020 + }, + { + "epoch": 0.8808081869124141, + "grad_norm": 321.760986328125, + "learning_rate": 4.9038498773669e-07, + "loss": 16.6634, + "step": 436030 + }, + { + "epoch": 0.880828387545098, + "grad_norm": 341.0688171386719, + "learning_rate": 4.902342382817626e-07, + "loss": 32.7243, + "step": 436040 + }, + { + "epoch": 0.8808485881777818, + "grad_norm": 302.72125244140625, + "learning_rate": 4.900835108068863e-07, + "loss": 16.4093, + "step": 436050 + }, + { + "epoch": 0.8808687888104656, + "grad_norm": 336.9021301269531, + "learning_rate": 4.899328053127966e-07, + "loss": 21.199, + "step": 436060 + }, + { + "epoch": 0.8808889894431493, + "grad_norm": 434.4256896972656, + "learning_rate": 4.89782121800228e-07, + "loss": 14.7242, + "step": 436070 + }, + { + "epoch": 0.8809091900758331, + "grad_norm": 167.6675567626953, + "learning_rate": 4.896314602699126e-07, + "loss": 15.5623, + "step": 436080 + }, + { + "epoch": 0.880929390708517, + "grad_norm": 460.88494873046875, + "learning_rate": 4.894808207225882e-07, + "loss": 21.3126, + "step": 436090 + }, + { + "epoch": 0.8809495913412008, + "grad_norm": 345.532958984375, + "learning_rate": 4.893302031589864e-07, + "loss": 14.3713, + "step": 436100 + }, + { + "epoch": 0.8809697919738846, + "grad_norm": 349.746826171875, + "learning_rate": 4.891796075798416e-07, + "loss": 39.5377, + "step": 436110 + }, + { + "epoch": 0.8809899926065684, + "grad_norm": 411.3088684082031, + "learning_rate": 4.890290339858883e-07, + "loss": 13.9997, + "step": 436120 + }, + { + "epoch": 0.8810101932392522, + "grad_norm": 324.4735107421875, + "learning_rate": 4.888784823778614e-07, + "loss": 15.4449, + "step": 436130 + }, + { + "epoch": 0.881030393871936, + "grad_norm": 259.8567199707031, + "learning_rate": 4.887279527564936e-07, + "loss": 9.0276, + "step": 436140 + }, + { + "epoch": 0.8810505945046199, + "grad_norm": 444.4129943847656, + "learning_rate": 4.885774451225178e-07, + "loss": 15.4865, + "step": 436150 + }, + { + "epoch": 0.8810707951373037, + "grad_norm": 343.4646911621094, + "learning_rate": 4.884269594766689e-07, + "loss": 18.7254, + "step": 436160 + }, + { + "epoch": 0.8810909957699875, + "grad_norm": 232.9482421875, + "learning_rate": 4.8827649581968e-07, + "loss": 16.868, + "step": 436170 + }, + { + "epoch": 0.8811111964026713, + "grad_norm": 549.541015625, + "learning_rate": 4.881260541522831e-07, + "loss": 28.0799, + "step": 436180 + }, + { + "epoch": 0.8811313970353551, + "grad_norm": 612.6248168945312, + "learning_rate": 4.87975634475214e-07, + "loss": 20.9709, + "step": 436190 + }, + { + "epoch": 0.881151597668039, + "grad_norm": 501.504638671875, + "learning_rate": 4.878252367892033e-07, + "loss": 16.7092, + "step": 436200 + }, + { + "epoch": 0.8811717983007228, + "grad_norm": 297.49261474609375, + "learning_rate": 4.87674861094986e-07, + "loss": 19.5409, + "step": 436210 + }, + { + "epoch": 0.8811919989334066, + "grad_norm": 224.5021209716797, + "learning_rate": 4.875245073932944e-07, + "loss": 10.7612, + "step": 436220 + }, + { + "epoch": 0.8812121995660904, + "grad_norm": 361.38916015625, + "learning_rate": 4.873741756848594e-07, + "loss": 23.1716, + "step": 436230 + }, + { + "epoch": 0.8812324001987742, + "grad_norm": 224.0435333251953, + "learning_rate": 4.87223865970417e-07, + "loss": 12.7234, + "step": 436240 + }, + { + "epoch": 0.8812526008314581, + "grad_norm": 342.0216369628906, + "learning_rate": 4.87073578250698e-07, + "loss": 13.4458, + "step": 436250 + }, + { + "epoch": 0.8812728014641419, + "grad_norm": 313.01409912109375, + "learning_rate": 4.869233125264339e-07, + "loss": 8.6048, + "step": 436260 + }, + { + "epoch": 0.8812930020968257, + "grad_norm": 258.0538330078125, + "learning_rate": 4.867730687983585e-07, + "loss": 15.3131, + "step": 436270 + }, + { + "epoch": 0.8813132027295095, + "grad_norm": 425.1286926269531, + "learning_rate": 4.866228470672041e-07, + "loss": 11.7544, + "step": 436280 + }, + { + "epoch": 0.8813334033621933, + "grad_norm": 173.1059112548828, + "learning_rate": 4.864726473337034e-07, + "loss": 21.4789, + "step": 436290 + }, + { + "epoch": 0.8813536039948772, + "grad_norm": 401.54052734375, + "learning_rate": 4.863224695985858e-07, + "loss": 23.8768, + "step": 436300 + }, + { + "epoch": 0.881373804627561, + "grad_norm": 436.904052734375, + "learning_rate": 4.861723138625862e-07, + "loss": 18.8533, + "step": 436310 + }, + { + "epoch": 0.8813940052602448, + "grad_norm": 455.9186706542969, + "learning_rate": 4.860221801264358e-07, + "loss": 17.9445, + "step": 436320 + }, + { + "epoch": 0.8814142058929285, + "grad_norm": 319.8497314453125, + "learning_rate": 4.858720683908646e-07, + "loss": 21.2574, + "step": 436330 + }, + { + "epoch": 0.8814344065256123, + "grad_norm": 468.0010986328125, + "learning_rate": 4.857219786566053e-07, + "loss": 24.4979, + "step": 436340 + }, + { + "epoch": 0.8814546071582962, + "grad_norm": 550.1976928710938, + "learning_rate": 4.855719109243917e-07, + "loss": 23.3478, + "step": 436350 + }, + { + "epoch": 0.88147480779098, + "grad_norm": 190.78244018554688, + "learning_rate": 4.85421865194951e-07, + "loss": 25.4633, + "step": 436360 + }, + { + "epoch": 0.8814950084236638, + "grad_norm": 798.060302734375, + "learning_rate": 4.852718414690166e-07, + "loss": 11.6966, + "step": 436370 + }, + { + "epoch": 0.8815152090563476, + "grad_norm": 268.206298828125, + "learning_rate": 4.851218397473206e-07, + "loss": 20.4789, + "step": 436380 + }, + { + "epoch": 0.8815354096890314, + "grad_norm": 858.3386840820312, + "learning_rate": 4.84971860030593e-07, + "loss": 25.3801, + "step": 436390 + }, + { + "epoch": 0.8815556103217153, + "grad_norm": 158.20469665527344, + "learning_rate": 4.848219023195644e-07, + "loss": 9.441, + "step": 436400 + }, + { + "epoch": 0.8815758109543991, + "grad_norm": 214.66708374023438, + "learning_rate": 4.846719666149668e-07, + "loss": 14.5835, + "step": 436410 + }, + { + "epoch": 0.8815960115870829, + "grad_norm": 338.07586669921875, + "learning_rate": 4.845220529175304e-07, + "loss": 16.8058, + "step": 436420 + }, + { + "epoch": 0.8816162122197667, + "grad_norm": 421.33880615234375, + "learning_rate": 4.84372161227985e-07, + "loss": 13.6267, + "step": 436430 + }, + { + "epoch": 0.8816364128524505, + "grad_norm": 0.18723739683628082, + "learning_rate": 4.842222915470618e-07, + "loss": 11.3648, + "step": 436440 + }, + { + "epoch": 0.8816566134851344, + "grad_norm": 26.688398361206055, + "learning_rate": 4.840724438754929e-07, + "loss": 23.6005, + "step": 436450 + }, + { + "epoch": 0.8816768141178182, + "grad_norm": 433.8655700683594, + "learning_rate": 4.839226182140072e-07, + "loss": 14.6859, + "step": 436460 + }, + { + "epoch": 0.881697014750502, + "grad_norm": 489.1601867675781, + "learning_rate": 4.837728145633335e-07, + "loss": 19.8474, + "step": 436470 + }, + { + "epoch": 0.8817172153831858, + "grad_norm": 393.7753601074219, + "learning_rate": 4.836230329242042e-07, + "loss": 20.2308, + "step": 436480 + }, + { + "epoch": 0.8817374160158696, + "grad_norm": 130.21282958984375, + "learning_rate": 4.83473273297348e-07, + "loss": 17.8237, + "step": 436490 + }, + { + "epoch": 0.8817576166485535, + "grad_norm": 400.9236755371094, + "learning_rate": 4.833235356834959e-07, + "loss": 15.2621, + "step": 436500 + }, + { + "epoch": 0.8817778172812373, + "grad_norm": 395.96966552734375, + "learning_rate": 4.831738200833775e-07, + "loss": 17.6387, + "step": 436510 + }, + { + "epoch": 0.8817980179139211, + "grad_norm": 605.7783813476562, + "learning_rate": 4.830241264977209e-07, + "loss": 19.7262, + "step": 436520 + }, + { + "epoch": 0.8818182185466049, + "grad_norm": 163.8187713623047, + "learning_rate": 4.828744549272579e-07, + "loss": 21.9274, + "step": 436530 + }, + { + "epoch": 0.8818384191792887, + "grad_norm": 571.732421875, + "learning_rate": 4.827248053727168e-07, + "loss": 18.2143, + "step": 436540 + }, + { + "epoch": 0.8818586198119726, + "grad_norm": 313.611083984375, + "learning_rate": 4.825751778348259e-07, + "loss": 16.4337, + "step": 436550 + }, + { + "epoch": 0.8818788204446564, + "grad_norm": 463.8468933105469, + "learning_rate": 4.824255723143162e-07, + "loss": 17.2454, + "step": 436560 + }, + { + "epoch": 0.8818990210773402, + "grad_norm": 476.1033020019531, + "learning_rate": 4.822759888119171e-07, + "loss": 15.0617, + "step": 436570 + }, + { + "epoch": 0.8819192217100239, + "grad_norm": 248.8014678955078, + "learning_rate": 4.821264273283566e-07, + "loss": 10.7048, + "step": 436580 + }, + { + "epoch": 0.8819394223427077, + "grad_norm": 682.4763793945312, + "learning_rate": 4.819768878643633e-07, + "loss": 23.8681, + "step": 436590 + }, + { + "epoch": 0.8819596229753915, + "grad_norm": 253.24656677246094, + "learning_rate": 4.818273704206678e-07, + "loss": 11.7182, + "step": 436600 + }, + { + "epoch": 0.8819798236080754, + "grad_norm": 95.55328369140625, + "learning_rate": 4.816778749979973e-07, + "loss": 16.1974, + "step": 436610 + }, + { + "epoch": 0.8820000242407592, + "grad_norm": 72.05003356933594, + "learning_rate": 4.815284015970801e-07, + "loss": 10.8611, + "step": 436620 + }, + { + "epoch": 0.882020224873443, + "grad_norm": 675.322998046875, + "learning_rate": 4.813789502186456e-07, + "loss": 12.9505, + "step": 436630 + }, + { + "epoch": 0.8820404255061268, + "grad_norm": 151.66297912597656, + "learning_rate": 4.812295208634238e-07, + "loss": 21.0599, + "step": 436640 + }, + { + "epoch": 0.8820606261388106, + "grad_norm": 343.0258483886719, + "learning_rate": 4.810801135321391e-07, + "loss": 28.378, + "step": 436650 + }, + { + "epoch": 0.8820808267714945, + "grad_norm": 415.1305236816406, + "learning_rate": 4.809307282255221e-07, + "loss": 14.8969, + "step": 436660 + }, + { + "epoch": 0.8821010274041783, + "grad_norm": 659.5724487304688, + "learning_rate": 4.807813649443016e-07, + "loss": 24.2443, + "step": 436670 + }, + { + "epoch": 0.8821212280368621, + "grad_norm": 838.9188232421875, + "learning_rate": 4.806320236892048e-07, + "loss": 24.2559, + "step": 436680 + }, + { + "epoch": 0.8821414286695459, + "grad_norm": 328.0303039550781, + "learning_rate": 4.804827044609578e-07, + "loss": 18.7561, + "step": 436690 + }, + { + "epoch": 0.8821616293022297, + "grad_norm": 458.027099609375, + "learning_rate": 4.803334072602917e-07, + "loss": 16.5193, + "step": 436700 + }, + { + "epoch": 0.8821818299349136, + "grad_norm": 705.2664184570312, + "learning_rate": 4.801841320879319e-07, + "loss": 24.2991, + "step": 436710 + }, + { + "epoch": 0.8822020305675974, + "grad_norm": 440.40521240234375, + "learning_rate": 4.800348789446058e-07, + "loss": 17.4962, + "step": 436720 + }, + { + "epoch": 0.8822222312002812, + "grad_norm": 82.1038818359375, + "learning_rate": 4.798856478310409e-07, + "loss": 17.8321, + "step": 436730 + }, + { + "epoch": 0.882242431832965, + "grad_norm": 691.0404052734375, + "learning_rate": 4.797364387479664e-07, + "loss": 28.2128, + "step": 436740 + }, + { + "epoch": 0.8822626324656488, + "grad_norm": 657.4496459960938, + "learning_rate": 4.795872516961087e-07, + "loss": 15.4127, + "step": 436750 + }, + { + "epoch": 0.8822828330983327, + "grad_norm": 584.3892822265625, + "learning_rate": 4.794380866761928e-07, + "loss": 13.4703, + "step": 436760 + }, + { + "epoch": 0.8823030337310165, + "grad_norm": 325.6146545410156, + "learning_rate": 4.792889436889487e-07, + "loss": 7.1147, + "step": 436770 + }, + { + "epoch": 0.8823232343637003, + "grad_norm": 118.1163101196289, + "learning_rate": 4.791398227351024e-07, + "loss": 14.4561, + "step": 436780 + }, + { + "epoch": 0.8823434349963841, + "grad_norm": 243.56419372558594, + "learning_rate": 4.789907238153785e-07, + "loss": 21.6599, + "step": 436790 + }, + { + "epoch": 0.8823636356290679, + "grad_norm": 760.7359619140625, + "learning_rate": 4.788416469305068e-07, + "loss": 25.8602, + "step": 436800 + }, + { + "epoch": 0.8823838362617518, + "grad_norm": 113.09996032714844, + "learning_rate": 4.786925920812119e-07, + "loss": 12.4008, + "step": 436810 + }, + { + "epoch": 0.8824040368944356, + "grad_norm": 326.0145263671875, + "learning_rate": 4.78543559268222e-07, + "loss": 22.3032, + "step": 436820 + }, + { + "epoch": 0.8824242375271194, + "grad_norm": 238.98001098632812, + "learning_rate": 4.78394548492262e-07, + "loss": 20.334, + "step": 436830 + }, + { + "epoch": 0.8824444381598031, + "grad_norm": 637.3267822265625, + "learning_rate": 4.782455597540576e-07, + "loss": 30.5467, + "step": 436840 + }, + { + "epoch": 0.8824646387924869, + "grad_norm": 487.647216796875, + "learning_rate": 4.780965930543369e-07, + "loss": 12.0064, + "step": 436850 + }, + { + "epoch": 0.8824848394251708, + "grad_norm": 367.8666687011719, + "learning_rate": 4.779476483938251e-07, + "loss": 13.7615, + "step": 436860 + }, + { + "epoch": 0.8825050400578546, + "grad_norm": 679.5595092773438, + "learning_rate": 4.777987257732469e-07, + "loss": 20.4513, + "step": 436870 + }, + { + "epoch": 0.8825252406905384, + "grad_norm": 102.08183288574219, + "learning_rate": 4.776498251933292e-07, + "loss": 21.0092, + "step": 436880 + }, + { + "epoch": 0.8825454413232222, + "grad_norm": 578.4793701171875, + "learning_rate": 4.775009466547986e-07, + "loss": 20.5826, + "step": 436890 + }, + { + "epoch": 0.882565641955906, + "grad_norm": 64.65514373779297, + "learning_rate": 4.773520901583801e-07, + "loss": 19.3511, + "step": 436900 + }, + { + "epoch": 0.8825858425885899, + "grad_norm": 629.3908081054688, + "learning_rate": 4.772032557047984e-07, + "loss": 24.7698, + "step": 436910 + }, + { + "epoch": 0.8826060432212737, + "grad_norm": 752.3409423828125, + "learning_rate": 4.770544432947799e-07, + "loss": 17.0448, + "step": 436920 + }, + { + "epoch": 0.8826262438539575, + "grad_norm": 471.2159118652344, + "learning_rate": 4.769056529290495e-07, + "loss": 16.0433, + "step": 436930 + }, + { + "epoch": 0.8826464444866413, + "grad_norm": 449.1194763183594, + "learning_rate": 4.7675688460833145e-07, + "loss": 24.9503, + "step": 436940 + }, + { + "epoch": 0.8826666451193251, + "grad_norm": 366.4239807128906, + "learning_rate": 4.766081383333521e-07, + "loss": 29.5621, + "step": 436950 + }, + { + "epoch": 0.882686845752009, + "grad_norm": 390.2115173339844, + "learning_rate": 4.7645941410483733e-07, + "loss": 16.0112, + "step": 436960 + }, + { + "epoch": 0.8827070463846928, + "grad_norm": 189.2827606201172, + "learning_rate": 4.7631071192350943e-07, + "loss": 19.8507, + "step": 436970 + }, + { + "epoch": 0.8827272470173766, + "grad_norm": 124.29317474365234, + "learning_rate": 4.7616203179009445e-07, + "loss": 20.3787, + "step": 436980 + }, + { + "epoch": 0.8827474476500604, + "grad_norm": 133.61338806152344, + "learning_rate": 4.760133737053174e-07, + "loss": 7.5316, + "step": 436990 + }, + { + "epoch": 0.8827676482827442, + "grad_norm": 119.6464614868164, + "learning_rate": 4.758647376699033e-07, + "loss": 16.7015, + "step": 437000 + }, + { + "epoch": 0.882787848915428, + "grad_norm": 175.40997314453125, + "learning_rate": 4.757161236845742e-07, + "loss": 13.1048, + "step": 437010 + }, + { + "epoch": 0.8828080495481119, + "grad_norm": 1037.2186279296875, + "learning_rate": 4.755675317500569e-07, + "loss": 30.6978, + "step": 437020 + }, + { + "epoch": 0.8828282501807957, + "grad_norm": 235.47128295898438, + "learning_rate": 4.7541896186707517e-07, + "loss": 17.5572, + "step": 437030 + }, + { + "epoch": 0.8828484508134795, + "grad_norm": 591.5972290039062, + "learning_rate": 4.752704140363512e-07, + "loss": 25.8775, + "step": 437040 + }, + { + "epoch": 0.8828686514461633, + "grad_norm": 420.414794921875, + "learning_rate": 4.751218882586106e-07, + "loss": 30.0815, + "step": 437050 + }, + { + "epoch": 0.8828888520788472, + "grad_norm": 216.95462036132812, + "learning_rate": 4.749733845345783e-07, + "loss": 17.1518, + "step": 437060 + }, + { + "epoch": 0.882909052711531, + "grad_norm": 728.5482177734375, + "learning_rate": 4.748249028649765e-07, + "loss": 23.1488, + "step": 437070 + }, + { + "epoch": 0.8829292533442148, + "grad_norm": 530.2343139648438, + "learning_rate": 4.7467644325052855e-07, + "loss": 16.5789, + "step": 437080 + }, + { + "epoch": 0.8829494539768985, + "grad_norm": 219.0844268798828, + "learning_rate": 4.7452800569195987e-07, + "loss": 13.1149, + "step": 437090 + }, + { + "epoch": 0.8829696546095823, + "grad_norm": 440.1877746582031, + "learning_rate": 4.743795901899928e-07, + "loss": 21.1264, + "step": 437100 + }, + { + "epoch": 0.8829898552422661, + "grad_norm": 140.57644653320312, + "learning_rate": 4.742311967453495e-07, + "loss": 14.4177, + "step": 437110 + }, + { + "epoch": 0.88301005587495, + "grad_norm": 552.50537109375, + "learning_rate": 4.7408282535875593e-07, + "loss": 29.2932, + "step": 437120 + }, + { + "epoch": 0.8830302565076338, + "grad_norm": 141.6434326171875, + "learning_rate": 4.739344760309322e-07, + "loss": 14.1547, + "step": 437130 + }, + { + "epoch": 0.8830504571403176, + "grad_norm": 53.88914108276367, + "learning_rate": 4.737861487626039e-07, + "loss": 11.355, + "step": 437140 + }, + { + "epoch": 0.8830706577730014, + "grad_norm": 602.4818115234375, + "learning_rate": 4.7363784355449303e-07, + "loss": 21.5469, + "step": 437150 + }, + { + "epoch": 0.8830908584056852, + "grad_norm": 606.8235473632812, + "learning_rate": 4.734895604073214e-07, + "loss": 14.414, + "step": 437160 + }, + { + "epoch": 0.8831110590383691, + "grad_norm": 586.7005004882812, + "learning_rate": 4.7334129932181283e-07, + "loss": 34.3757, + "step": 437170 + }, + { + "epoch": 0.8831312596710529, + "grad_norm": 158.1629638671875, + "learning_rate": 4.731930602986906e-07, + "loss": 9.9822, + "step": 437180 + }, + { + "epoch": 0.8831514603037367, + "grad_norm": 417.0316162109375, + "learning_rate": 4.730448433386764e-07, + "loss": 14.5091, + "step": 437190 + }, + { + "epoch": 0.8831716609364205, + "grad_norm": 861.2413940429688, + "learning_rate": 4.728966484424913e-07, + "loss": 26.3821, + "step": 437200 + }, + { + "epoch": 0.8831918615691043, + "grad_norm": 502.9021301269531, + "learning_rate": 4.727484756108602e-07, + "loss": 14.3446, + "step": 437210 + }, + { + "epoch": 0.8832120622017882, + "grad_norm": 319.57952880859375, + "learning_rate": 4.726003248445038e-07, + "loss": 17.3094, + "step": 437220 + }, + { + "epoch": 0.883232262834472, + "grad_norm": 108.85161590576172, + "learning_rate": 4.724521961441436e-07, + "loss": 14.5947, + "step": 437230 + }, + { + "epoch": 0.8832524634671558, + "grad_norm": 199.23394775390625, + "learning_rate": 4.723040895105019e-07, + "loss": 23.1646, + "step": 437240 + }, + { + "epoch": 0.8832726640998396, + "grad_norm": 285.6567687988281, + "learning_rate": 4.72156004944303e-07, + "loss": 13.2874, + "step": 437250 + }, + { + "epoch": 0.8832928647325234, + "grad_norm": 457.18017578125, + "learning_rate": 4.720079424462648e-07, + "loss": 20.3675, + "step": 437260 + }, + { + "epoch": 0.8833130653652073, + "grad_norm": 428.0270080566406, + "learning_rate": 4.718599020171105e-07, + "loss": 18.1246, + "step": 437270 + }, + { + "epoch": 0.8833332659978911, + "grad_norm": 94.82398223876953, + "learning_rate": 4.7171188365756235e-07, + "loss": 21.3042, + "step": 437280 + }, + { + "epoch": 0.8833534666305749, + "grad_norm": 424.24517822265625, + "learning_rate": 4.71563887368342e-07, + "loss": 13.0863, + "step": 437290 + }, + { + "epoch": 0.8833736672632587, + "grad_norm": 0.6462593078613281, + "learning_rate": 4.714159131501689e-07, + "loss": 8.5118, + "step": 437300 + }, + { + "epoch": 0.8833938678959425, + "grad_norm": 181.89942932128906, + "learning_rate": 4.7126796100376625e-07, + "loss": 16.9697, + "step": 437310 + }, + { + "epoch": 0.8834140685286264, + "grad_norm": 22.54423713684082, + "learning_rate": 4.7112003092985414e-07, + "loss": 11.9234, + "step": 437320 + }, + { + "epoch": 0.8834342691613102, + "grad_norm": 212.46315002441406, + "learning_rate": 4.7097212292915307e-07, + "loss": 18.5752, + "step": 437330 + }, + { + "epoch": 0.883454469793994, + "grad_norm": 273.62518310546875, + "learning_rate": 4.7082423700238413e-07, + "loss": 12.7955, + "step": 437340 + }, + { + "epoch": 0.8834746704266777, + "grad_norm": 337.732421875, + "learning_rate": 4.7067637315027005e-07, + "loss": 11.8375, + "step": 437350 + }, + { + "epoch": 0.8834948710593615, + "grad_norm": 203.64193725585938, + "learning_rate": 4.705285313735297e-07, + "loss": 9.4601, + "step": 437360 + }, + { + "epoch": 0.8835150716920454, + "grad_norm": 468.43768310546875, + "learning_rate": 4.703807116728831e-07, + "loss": 14.4001, + "step": 437370 + }, + { + "epoch": 0.8835352723247292, + "grad_norm": 310.4579772949219, + "learning_rate": 4.7023291404905245e-07, + "loss": 13.6832, + "step": 437380 + }, + { + "epoch": 0.883555472957413, + "grad_norm": 489.82421875, + "learning_rate": 4.700851385027566e-07, + "loss": 18.8303, + "step": 437390 + }, + { + "epoch": 0.8835756735900968, + "grad_norm": 307.5848083496094, + "learning_rate": 4.699373850347161e-07, + "loss": 23.059, + "step": 437400 + }, + { + "epoch": 0.8835958742227806, + "grad_norm": 587.0242919921875, + "learning_rate": 4.69789653645652e-07, + "loss": 21.7008, + "step": 437410 + }, + { + "epoch": 0.8836160748554645, + "grad_norm": 445.4830322265625, + "learning_rate": 4.6964194433628317e-07, + "loss": 10.5014, + "step": 437420 + }, + { + "epoch": 0.8836362754881483, + "grad_norm": 395.11370849609375, + "learning_rate": 4.6949425710733076e-07, + "loss": 9.3218, + "step": 437430 + }, + { + "epoch": 0.8836564761208321, + "grad_norm": 300.8310852050781, + "learning_rate": 4.693465919595136e-07, + "loss": 10.4542, + "step": 437440 + }, + { + "epoch": 0.8836766767535159, + "grad_norm": 375.7260437011719, + "learning_rate": 4.691989488935511e-07, + "loss": 17.3353, + "step": 437450 + }, + { + "epoch": 0.8836968773861997, + "grad_norm": 503.7796325683594, + "learning_rate": 4.690513279101638e-07, + "loss": 17.4916, + "step": 437460 + }, + { + "epoch": 0.8837170780188836, + "grad_norm": 682.6212158203125, + "learning_rate": 4.689037290100712e-07, + "loss": 22.9069, + "step": 437470 + }, + { + "epoch": 0.8837372786515674, + "grad_norm": 276.8441162109375, + "learning_rate": 4.687561521939915e-07, + "loss": 14.4692, + "step": 437480 + }, + { + "epoch": 0.8837574792842512, + "grad_norm": 87.65137481689453, + "learning_rate": 4.686085974626442e-07, + "loss": 25.7139, + "step": 437490 + }, + { + "epoch": 0.883777679916935, + "grad_norm": 647.8373413085938, + "learning_rate": 4.6846106481675035e-07, + "loss": 35.8543, + "step": 437500 + }, + { + "epoch": 0.8837978805496188, + "grad_norm": 23.456647872924805, + "learning_rate": 4.683135542570277e-07, + "loss": 15.9017, + "step": 437510 + }, + { + "epoch": 0.8838180811823027, + "grad_norm": 539.7153930664062, + "learning_rate": 4.681660657841941e-07, + "loss": 12.9271, + "step": 437520 + }, + { + "epoch": 0.8838382818149865, + "grad_norm": 442.73876953125, + "learning_rate": 4.6801859939896997e-07, + "loss": 19.9689, + "step": 437530 + }, + { + "epoch": 0.8838584824476703, + "grad_norm": 325.82708740234375, + "learning_rate": 4.678711551020743e-07, + "loss": 19.9856, + "step": 437540 + }, + { + "epoch": 0.8838786830803541, + "grad_norm": 2447.803466796875, + "learning_rate": 4.677237328942236e-07, + "loss": 16.8712, + "step": 437550 + }, + { + "epoch": 0.8838988837130379, + "grad_norm": 508.7763671875, + "learning_rate": 4.6757633277613734e-07, + "loss": 15.6815, + "step": 437560 + }, + { + "epoch": 0.8839190843457218, + "grad_norm": 592.2590942382812, + "learning_rate": 4.674289547485367e-07, + "loss": 19.2942, + "step": 437570 + }, + { + "epoch": 0.8839392849784056, + "grad_norm": 809.1892700195312, + "learning_rate": 4.672815988121354e-07, + "loss": 19.2552, + "step": 437580 + }, + { + "epoch": 0.8839594856110894, + "grad_norm": 3.522529125213623, + "learning_rate": 4.6713426496765413e-07, + "loss": 19.2758, + "step": 437590 + }, + { + "epoch": 0.8839796862437731, + "grad_norm": 0.0, + "learning_rate": 4.6698695321581165e-07, + "loss": 19.1922, + "step": 437600 + }, + { + "epoch": 0.8839998868764569, + "grad_norm": 352.0887145996094, + "learning_rate": 4.6683966355732466e-07, + "loss": 12.766, + "step": 437610 + }, + { + "epoch": 0.8840200875091407, + "grad_norm": 241.43849182128906, + "learning_rate": 4.6669239599291093e-07, + "loss": 17.8853, + "step": 437620 + }, + { + "epoch": 0.8840402881418246, + "grad_norm": 608.0235595703125, + "learning_rate": 4.665451505232882e-07, + "loss": 14.7198, + "step": 437630 + }, + { + "epoch": 0.8840604887745084, + "grad_norm": 361.3467712402344, + "learning_rate": 4.663979271491764e-07, + "loss": 21.5801, + "step": 437640 + }, + { + "epoch": 0.8840806894071922, + "grad_norm": 461.13983154296875, + "learning_rate": 4.662507258712895e-07, + "loss": 12.0084, + "step": 437650 + }, + { + "epoch": 0.884100890039876, + "grad_norm": 745.8554077148438, + "learning_rate": 4.6610354669034686e-07, + "loss": 23.2169, + "step": 437660 + }, + { + "epoch": 0.8841210906725598, + "grad_norm": 785.3743896484375, + "learning_rate": 4.6595638960706624e-07, + "loss": 19.8191, + "step": 437670 + }, + { + "epoch": 0.8841412913052437, + "grad_norm": 748.1947631835938, + "learning_rate": 4.6580925462216487e-07, + "loss": 21.8141, + "step": 437680 + }, + { + "epoch": 0.8841614919379275, + "grad_norm": 268.165283203125, + "learning_rate": 4.656621417363577e-07, + "loss": 33.7911, + "step": 437690 + }, + { + "epoch": 0.8841816925706113, + "grad_norm": 100.24395751953125, + "learning_rate": 4.655150509503642e-07, + "loss": 7.0177, + "step": 437700 + }, + { + "epoch": 0.8842018932032951, + "grad_norm": 439.3194885253906, + "learning_rate": 4.65367982264901e-07, + "loss": 13.1335, + "step": 437710 + }, + { + "epoch": 0.8842220938359789, + "grad_norm": 580.4093627929688, + "learning_rate": 4.6522093568068307e-07, + "loss": 21.3427, + "step": 437720 + }, + { + "epoch": 0.8842422944686628, + "grad_norm": 221.25843811035156, + "learning_rate": 4.650739111984287e-07, + "loss": 17.7892, + "step": 437730 + }, + { + "epoch": 0.8842624951013466, + "grad_norm": 306.7304992675781, + "learning_rate": 4.649269088188535e-07, + "loss": 11.2845, + "step": 437740 + }, + { + "epoch": 0.8842826957340304, + "grad_norm": 288.1169738769531, + "learning_rate": 4.647799285426757e-07, + "loss": 19.4869, + "step": 437750 + }, + { + "epoch": 0.8843028963667142, + "grad_norm": 726.6117553710938, + "learning_rate": 4.646329703706104e-07, + "loss": 37.4337, + "step": 437760 + }, + { + "epoch": 0.884323096999398, + "grad_norm": 282.6123962402344, + "learning_rate": 4.644860343033725e-07, + "loss": 8.5612, + "step": 437770 + }, + { + "epoch": 0.8843432976320819, + "grad_norm": 184.43309020996094, + "learning_rate": 4.6433912034168083e-07, + "loss": 10.7296, + "step": 437780 + }, + { + "epoch": 0.8843634982647657, + "grad_norm": 313.0046691894531, + "learning_rate": 4.6419222848624933e-07, + "loss": 13.1905, + "step": 437790 + }, + { + "epoch": 0.8843836988974495, + "grad_norm": 424.1170349121094, + "learning_rate": 4.640453587377958e-07, + "loss": 17.5901, + "step": 437800 + }, + { + "epoch": 0.8844038995301333, + "grad_norm": 543.5725708007812, + "learning_rate": 4.63898511097034e-07, + "loss": 20.5456, + "step": 437810 + }, + { + "epoch": 0.8844241001628171, + "grad_norm": 267.98577880859375, + "learning_rate": 4.6375168556468175e-07, + "loss": 27.1274, + "step": 437820 + }, + { + "epoch": 0.884444300795501, + "grad_norm": 689.3540649414062, + "learning_rate": 4.636048821414535e-07, + "loss": 17.7252, + "step": 437830 + }, + { + "epoch": 0.8844645014281848, + "grad_norm": 445.6986999511719, + "learning_rate": 4.6345810082806363e-07, + "loss": 18.1701, + "step": 437840 + }, + { + "epoch": 0.8844847020608686, + "grad_norm": 373.1318359375, + "learning_rate": 4.6331134162522994e-07, + "loss": 15.1325, + "step": 437850 + }, + { + "epoch": 0.8845049026935523, + "grad_norm": 377.1880187988281, + "learning_rate": 4.631646045336663e-07, + "loss": 16.7919, + "step": 437860 + }, + { + "epoch": 0.8845251033262361, + "grad_norm": 185.0105438232422, + "learning_rate": 4.6301788955408765e-07, + "loss": 27.8121, + "step": 437870 + }, + { + "epoch": 0.88454530395892, + "grad_norm": 563.5680541992188, + "learning_rate": 4.62871196687209e-07, + "loss": 18.6658, + "step": 437880 + }, + { + "epoch": 0.8845655045916038, + "grad_norm": 231.3992919921875, + "learning_rate": 4.6272452593374763e-07, + "loss": 15.211, + "step": 437890 + }, + { + "epoch": 0.8845857052242876, + "grad_norm": 88.52085876464844, + "learning_rate": 4.625778772944156e-07, + "loss": 17.0757, + "step": 437900 + }, + { + "epoch": 0.8846059058569714, + "grad_norm": 181.04515075683594, + "learning_rate": 4.6243125076992857e-07, + "loss": 9.0486, + "step": 437910 + }, + { + "epoch": 0.8846261064896552, + "grad_norm": 112.04206085205078, + "learning_rate": 4.62284646361002e-07, + "loss": 14.2957, + "step": 437920 + }, + { + "epoch": 0.884646307122339, + "grad_norm": 370.3331604003906, + "learning_rate": 4.6213806406834926e-07, + "loss": 25.6922, + "step": 437930 + }, + { + "epoch": 0.8846665077550229, + "grad_norm": 485.3958740234375, + "learning_rate": 4.6199150389268476e-07, + "loss": 13.4734, + "step": 437940 + }, + { + "epoch": 0.8846867083877067, + "grad_norm": 288.86383056640625, + "learning_rate": 4.6184496583472293e-07, + "loss": 32.1188, + "step": 437950 + }, + { + "epoch": 0.8847069090203905, + "grad_norm": 38.7264518737793, + "learning_rate": 4.616984498951793e-07, + "loss": 12.0045, + "step": 437960 + }, + { + "epoch": 0.8847271096530743, + "grad_norm": 810.9624633789062, + "learning_rate": 4.6155195607476723e-07, + "loss": 14.6303, + "step": 437970 + }, + { + "epoch": 0.8847473102857581, + "grad_norm": 460.4407958984375, + "learning_rate": 4.614054843741994e-07, + "loss": 9.7327, + "step": 437980 + }, + { + "epoch": 0.884767510918442, + "grad_norm": 91.19581604003906, + "learning_rate": 4.61259034794192e-07, + "loss": 10.1597, + "step": 437990 + }, + { + "epoch": 0.8847877115511258, + "grad_norm": 616.4536743164062, + "learning_rate": 4.6111260733545714e-07, + "loss": 28.4931, + "step": 438000 + }, + { + "epoch": 0.8848079121838096, + "grad_norm": 162.45799255371094, + "learning_rate": 4.6096620199870824e-07, + "loss": 8.3742, + "step": 438010 + }, + { + "epoch": 0.8848281128164934, + "grad_norm": 215.92393493652344, + "learning_rate": 4.6081981878466077e-07, + "loss": 18.8802, + "step": 438020 + }, + { + "epoch": 0.8848483134491772, + "grad_norm": 255.54925537109375, + "learning_rate": 4.606734576940253e-07, + "loss": 16.6796, + "step": 438030 + }, + { + "epoch": 0.8848685140818611, + "grad_norm": 0.0, + "learning_rate": 4.6052711872751843e-07, + "loss": 10.8918, + "step": 438040 + }, + { + "epoch": 0.8848887147145449, + "grad_norm": 235.79869079589844, + "learning_rate": 4.6038080188585135e-07, + "loss": 21.5738, + "step": 438050 + }, + { + "epoch": 0.8849089153472287, + "grad_norm": 143.0699920654297, + "learning_rate": 4.602345071697373e-07, + "loss": 13.8974, + "step": 438060 + }, + { + "epoch": 0.8849291159799125, + "grad_norm": 160.64614868164062, + "learning_rate": 4.600882345798902e-07, + "loss": 24.4522, + "step": 438070 + }, + { + "epoch": 0.8849493166125963, + "grad_norm": 119.09025573730469, + "learning_rate": 4.599419841170216e-07, + "loss": 17.8335, + "step": 438080 + }, + { + "epoch": 0.8849695172452802, + "grad_norm": 638.122802734375, + "learning_rate": 4.5979575578184554e-07, + "loss": 18.7437, + "step": 438090 + }, + { + "epoch": 0.884989717877964, + "grad_norm": 433.4074401855469, + "learning_rate": 4.5964954957507414e-07, + "loss": 22.9923, + "step": 438100 + }, + { + "epoch": 0.8850099185106478, + "grad_norm": 442.2541809082031, + "learning_rate": 4.595033654974207e-07, + "loss": 11.311, + "step": 438110 + }, + { + "epoch": 0.8850301191433315, + "grad_norm": 338.8622131347656, + "learning_rate": 4.593572035495969e-07, + "loss": 16.6048, + "step": 438120 + }, + { + "epoch": 0.8850503197760153, + "grad_norm": 254.80043029785156, + "learning_rate": 4.592110637323149e-07, + "loss": 9.9581, + "step": 438130 + }, + { + "epoch": 0.8850705204086992, + "grad_norm": 468.2026062011719, + "learning_rate": 4.5906494604628816e-07, + "loss": 17.1238, + "step": 438140 + }, + { + "epoch": 0.885090721041383, + "grad_norm": 607.1072998046875, + "learning_rate": 4.5891885049222815e-07, + "loss": 10.1863, + "step": 438150 + }, + { + "epoch": 0.8851109216740668, + "grad_norm": 680.0186157226562, + "learning_rate": 4.587727770708461e-07, + "loss": 15.1077, + "step": 438160 + }, + { + "epoch": 0.8851311223067506, + "grad_norm": 596.2591552734375, + "learning_rate": 4.5862672578285475e-07, + "loss": 18.6161, + "step": 438170 + }, + { + "epoch": 0.8851513229394344, + "grad_norm": 493.0876159667969, + "learning_rate": 4.5848069662896786e-07, + "loss": 18.7473, + "step": 438180 + }, + { + "epoch": 0.8851715235721183, + "grad_norm": 283.8135070800781, + "learning_rate": 4.5833468960989333e-07, + "loss": 19.7269, + "step": 438190 + }, + { + "epoch": 0.8851917242048021, + "grad_norm": 155.3894805908203, + "learning_rate": 4.581887047263445e-07, + "loss": 36.0125, + "step": 438200 + }, + { + "epoch": 0.8852119248374859, + "grad_norm": 399.869384765625, + "learning_rate": 4.5804274197903396e-07, + "loss": 11.8754, + "step": 438210 + }, + { + "epoch": 0.8852321254701697, + "grad_norm": 407.58941650390625, + "learning_rate": 4.5789680136867245e-07, + "loss": 14.0652, + "step": 438220 + }, + { + "epoch": 0.8852523261028535, + "grad_norm": 232.7967071533203, + "learning_rate": 4.577508828959698e-07, + "loss": 12.1223, + "step": 438230 + }, + { + "epoch": 0.8852725267355374, + "grad_norm": 279.1619873046875, + "learning_rate": 4.5760498656163886e-07, + "loss": 30.3401, + "step": 438240 + }, + { + "epoch": 0.8852927273682212, + "grad_norm": 716.46875, + "learning_rate": 4.5745911236639186e-07, + "loss": 25.7322, + "step": 438250 + }, + { + "epoch": 0.885312928000905, + "grad_norm": 555.6453857421875, + "learning_rate": 4.5731326031093645e-07, + "loss": 20.943, + "step": 438260 + }, + { + "epoch": 0.8853331286335888, + "grad_norm": 371.71453857421875, + "learning_rate": 4.57167430395985e-07, + "loss": 17.1643, + "step": 438270 + }, + { + "epoch": 0.8853533292662726, + "grad_norm": 604.136962890625, + "learning_rate": 4.5702162262224957e-07, + "loss": 15.5282, + "step": 438280 + }, + { + "epoch": 0.8853735298989565, + "grad_norm": 481.5717468261719, + "learning_rate": 4.5687583699044027e-07, + "loss": 20.4883, + "step": 438290 + }, + { + "epoch": 0.8853937305316403, + "grad_norm": 913.0048828125, + "learning_rate": 4.567300735012653e-07, + "loss": 19.11, + "step": 438300 + }, + { + "epoch": 0.8854139311643241, + "grad_norm": 522.569580078125, + "learning_rate": 4.565843321554386e-07, + "loss": 7.5831, + "step": 438310 + }, + { + "epoch": 0.8854341317970079, + "grad_norm": 355.49493408203125, + "learning_rate": 4.5643861295366854e-07, + "loss": 22.0143, + "step": 438320 + }, + { + "epoch": 0.8854543324296917, + "grad_norm": 793.0252075195312, + "learning_rate": 4.562929158966645e-07, + "loss": 19.7702, + "step": 438330 + }, + { + "epoch": 0.8854745330623756, + "grad_norm": 668.8243408203125, + "learning_rate": 4.561472409851386e-07, + "loss": 29.0895, + "step": 438340 + }, + { + "epoch": 0.8854947336950594, + "grad_norm": 279.86346435546875, + "learning_rate": 4.5600158821979933e-07, + "loss": 23.1765, + "step": 438350 + }, + { + "epoch": 0.8855149343277432, + "grad_norm": 11.049134254455566, + "learning_rate": 4.5585595760135825e-07, + "loss": 12.0037, + "step": 438360 + }, + { + "epoch": 0.8855351349604269, + "grad_norm": 108.68246459960938, + "learning_rate": 4.557103491305237e-07, + "loss": 22.9215, + "step": 438370 + }, + { + "epoch": 0.8855553355931107, + "grad_norm": 414.8169250488281, + "learning_rate": 4.555647628080051e-07, + "loss": 8.2424, + "step": 438380 + }, + { + "epoch": 0.8855755362257945, + "grad_norm": 265.68408203125, + "learning_rate": 4.554191986345136e-07, + "loss": 17.2626, + "step": 438390 + }, + { + "epoch": 0.8855957368584784, + "grad_norm": 139.4207000732422, + "learning_rate": 4.552736566107563e-07, + "loss": 9.0719, + "step": 438400 + }, + { + "epoch": 0.8856159374911622, + "grad_norm": 15.755369186401367, + "learning_rate": 4.551281367374455e-07, + "loss": 8.6049, + "step": 438410 + }, + { + "epoch": 0.885636138123846, + "grad_norm": 264.4195556640625, + "learning_rate": 4.5498263901528784e-07, + "loss": 13.7838, + "step": 438420 + }, + { + "epoch": 0.8856563387565298, + "grad_norm": 201.75054931640625, + "learning_rate": 4.548371634449944e-07, + "loss": 10.8078, + "step": 438430 + }, + { + "epoch": 0.8856765393892136, + "grad_norm": 353.6030578613281, + "learning_rate": 4.546917100272735e-07, + "loss": 8.0673, + "step": 438440 + }, + { + "epoch": 0.8856967400218975, + "grad_norm": 319.56085205078125, + "learning_rate": 4.5454627876283295e-07, + "loss": 18.6831, + "step": 438450 + }, + { + "epoch": 0.8857169406545813, + "grad_norm": 398.4009094238281, + "learning_rate": 4.5440086965238326e-07, + "loss": 12.8228, + "step": 438460 + }, + { + "epoch": 0.8857371412872651, + "grad_norm": 1338.2640380859375, + "learning_rate": 4.542554826966328e-07, + "loss": 14.2016, + "step": 438470 + }, + { + "epoch": 0.8857573419199489, + "grad_norm": 801.7337646484375, + "learning_rate": 4.541101178962887e-07, + "loss": 23.2571, + "step": 438480 + }, + { + "epoch": 0.8857775425526327, + "grad_norm": 412.7310485839844, + "learning_rate": 4.539647752520604e-07, + "loss": 16.6522, + "step": 438490 + }, + { + "epoch": 0.8857977431853166, + "grad_norm": 84.2537841796875, + "learning_rate": 4.538194547646574e-07, + "loss": 29.8909, + "step": 438500 + }, + { + "epoch": 0.8858179438180004, + "grad_norm": 494.4392395019531, + "learning_rate": 4.5367415643478683e-07, + "loss": 27.2246, + "step": 438510 + }, + { + "epoch": 0.8858381444506842, + "grad_norm": 822.8221435546875, + "learning_rate": 4.5352888026315654e-07, + "loss": 27.4212, + "step": 438520 + }, + { + "epoch": 0.885858345083368, + "grad_norm": 83.42070007324219, + "learning_rate": 4.533836262504759e-07, + "loss": 15.1378, + "step": 438530 + }, + { + "epoch": 0.8858785457160518, + "grad_norm": 133.17446899414062, + "learning_rate": 4.5323839439745163e-07, + "loss": 42.554, + "step": 438540 + }, + { + "epoch": 0.8858987463487357, + "grad_norm": 579.7252807617188, + "learning_rate": 4.5309318470479144e-07, + "loss": 18.9857, + "step": 438550 + }, + { + "epoch": 0.8859189469814195, + "grad_norm": 142.4523162841797, + "learning_rate": 4.529479971732031e-07, + "loss": 15.7356, + "step": 438560 + }, + { + "epoch": 0.8859391476141033, + "grad_norm": 598.0130004882812, + "learning_rate": 4.528028318033961e-07, + "loss": 20.5556, + "step": 438570 + }, + { + "epoch": 0.8859593482467871, + "grad_norm": 274.994384765625, + "learning_rate": 4.526576885960765e-07, + "loss": 14.0988, + "step": 438580 + }, + { + "epoch": 0.885979548879471, + "grad_norm": 882.8165893554688, + "learning_rate": 4.5251256755195093e-07, + "loss": 17.3218, + "step": 438590 + }, + { + "epoch": 0.8859997495121548, + "grad_norm": 356.706787109375, + "learning_rate": 4.523674686717283e-07, + "loss": 13.0502, + "step": 438600 + }, + { + "epoch": 0.8860199501448386, + "grad_norm": 363.36566162109375, + "learning_rate": 4.522223919561153e-07, + "loss": 7.7626, + "step": 438610 + }, + { + "epoch": 0.8860401507775224, + "grad_norm": 416.0433654785156, + "learning_rate": 4.520773374058179e-07, + "loss": 14.5136, + "step": 438620 + }, + { + "epoch": 0.8860603514102061, + "grad_norm": 344.8145446777344, + "learning_rate": 4.519323050215446e-07, + "loss": 9.0433, + "step": 438630 + }, + { + "epoch": 0.8860805520428899, + "grad_norm": 2461.770263671875, + "learning_rate": 4.5178729480400084e-07, + "loss": 44.5676, + "step": 438640 + }, + { + "epoch": 0.8861007526755738, + "grad_norm": 302.51043701171875, + "learning_rate": 4.51642306753895e-07, + "loss": 11.8257, + "step": 438650 + }, + { + "epoch": 0.8861209533082576, + "grad_norm": 52.07389831542969, + "learning_rate": 4.514973408719331e-07, + "loss": 23.1165, + "step": 438660 + }, + { + "epoch": 0.8861411539409414, + "grad_norm": 351.7831115722656, + "learning_rate": 4.513523971588202e-07, + "loss": 15.1223, + "step": 438670 + }, + { + "epoch": 0.8861613545736252, + "grad_norm": 761.6973876953125, + "learning_rate": 4.512074756152651e-07, + "loss": 88.1393, + "step": 438680 + }, + { + "epoch": 0.886181555206309, + "grad_norm": 458.8019104003906, + "learning_rate": 4.5106257624197237e-07, + "loss": 20.716, + "step": 438690 + }, + { + "epoch": 0.8862017558389929, + "grad_norm": 499.322998046875, + "learning_rate": 4.5091769903964965e-07, + "loss": 18.8507, + "step": 438700 + }, + { + "epoch": 0.8862219564716767, + "grad_norm": 28.082361221313477, + "learning_rate": 4.5077284400900147e-07, + "loss": 32.0778, + "step": 438710 + }, + { + "epoch": 0.8862421571043605, + "grad_norm": 210.52737426757812, + "learning_rate": 4.5062801115073607e-07, + "loss": 17.0969, + "step": 438720 + }, + { + "epoch": 0.8862623577370443, + "grad_norm": 71.61561584472656, + "learning_rate": 4.504832004655574e-07, + "loss": 10.2539, + "step": 438730 + }, + { + "epoch": 0.8862825583697281, + "grad_norm": 459.04461669921875, + "learning_rate": 4.503384119541709e-07, + "loss": 13.2212, + "step": 438740 + }, + { + "epoch": 0.886302759002412, + "grad_norm": 210.68026733398438, + "learning_rate": 4.501936456172845e-07, + "loss": 28.052, + "step": 438750 + }, + { + "epoch": 0.8863229596350958, + "grad_norm": 465.93450927734375, + "learning_rate": 4.50048901455602e-07, + "loss": 21.7153, + "step": 438760 + }, + { + "epoch": 0.8863431602677796, + "grad_norm": 143.4450225830078, + "learning_rate": 4.4990417946982836e-07, + "loss": 10.5338, + "step": 438770 + }, + { + "epoch": 0.8863633609004634, + "grad_norm": 1098.5234375, + "learning_rate": 4.4975947966067023e-07, + "loss": 29.5691, + "step": 438780 + }, + { + "epoch": 0.8863835615331472, + "grad_norm": 390.75439453125, + "learning_rate": 4.4961480202883434e-07, + "loss": 20.115, + "step": 438790 + }, + { + "epoch": 0.886403762165831, + "grad_norm": 428.8589782714844, + "learning_rate": 4.494701465750217e-07, + "loss": 25.0542, + "step": 438800 + }, + { + "epoch": 0.8864239627985149, + "grad_norm": 109.70050811767578, + "learning_rate": 4.4932551329994023e-07, + "loss": 13.2468, + "step": 438810 + }, + { + "epoch": 0.8864441634311987, + "grad_norm": 313.089111328125, + "learning_rate": 4.4918090220429476e-07, + "loss": 11.3645, + "step": 438820 + }, + { + "epoch": 0.8864643640638825, + "grad_norm": 143.77685546875, + "learning_rate": 4.490363132887904e-07, + "loss": 11.8531, + "step": 438830 + }, + { + "epoch": 0.8864845646965663, + "grad_norm": 922.7874145507812, + "learning_rate": 4.4889174655412924e-07, + "loss": 20.0964, + "step": 438840 + }, + { + "epoch": 0.8865047653292502, + "grad_norm": 204.7127685546875, + "learning_rate": 4.487472020010181e-07, + "loss": 9.0205, + "step": 438850 + }, + { + "epoch": 0.886524965961934, + "grad_norm": 307.23052978515625, + "learning_rate": 4.4860267963016293e-07, + "loss": 19.5553, + "step": 438860 + }, + { + "epoch": 0.8865451665946178, + "grad_norm": 117.01416778564453, + "learning_rate": 4.484581794422643e-07, + "loss": 15.8075, + "step": 438870 + }, + { + "epoch": 0.8865653672273015, + "grad_norm": 903.5155029296875, + "learning_rate": 4.48313701438029e-07, + "loss": 22.7876, + "step": 438880 + }, + { + "epoch": 0.8865855678599853, + "grad_norm": 392.1251220703125, + "learning_rate": 4.4816924561816076e-07, + "loss": 15.337, + "step": 438890 + }, + { + "epoch": 0.8866057684926691, + "grad_norm": 690.133056640625, + "learning_rate": 4.480248119833641e-07, + "loss": 12.9416, + "step": 438900 + }, + { + "epoch": 0.886625969125353, + "grad_norm": 261.4015197753906, + "learning_rate": 4.4788040053434124e-07, + "loss": 16.3067, + "step": 438910 + }, + { + "epoch": 0.8866461697580368, + "grad_norm": 405.3455810546875, + "learning_rate": 4.477360112717982e-07, + "loss": 21.6496, + "step": 438920 + }, + { + "epoch": 0.8866663703907206, + "grad_norm": 555.2881469726562, + "learning_rate": 4.475916441964379e-07, + "loss": 18.7047, + "step": 438930 + }, + { + "epoch": 0.8866865710234044, + "grad_norm": 474.69940185546875, + "learning_rate": 4.474472993089629e-07, + "loss": 15.6627, + "step": 438940 + }, + { + "epoch": 0.8867067716560882, + "grad_norm": 689.9981689453125, + "learning_rate": 4.473029766100784e-07, + "loss": 37.1921, + "step": 438950 + }, + { + "epoch": 0.8867269722887721, + "grad_norm": 55.59153747558594, + "learning_rate": 4.471586761004859e-07, + "loss": 6.5716, + "step": 438960 + }, + { + "epoch": 0.8867471729214559, + "grad_norm": 115.98567199707031, + "learning_rate": 4.4701439778089105e-07, + "loss": 18.5614, + "step": 438970 + }, + { + "epoch": 0.8867673735541397, + "grad_norm": 6.016637802124023, + "learning_rate": 4.4687014165199547e-07, + "loss": 12.5303, + "step": 438980 + }, + { + "epoch": 0.8867875741868235, + "grad_norm": 264.12884521484375, + "learning_rate": 4.46725907714502e-07, + "loss": 36.1123, + "step": 438990 + }, + { + "epoch": 0.8868077748195073, + "grad_norm": 352.121826171875, + "learning_rate": 4.4658169596911493e-07, + "loss": 18.2095, + "step": 439000 + }, + { + "epoch": 0.8868279754521912, + "grad_norm": 496.9523620605469, + "learning_rate": 4.464375064165355e-07, + "loss": 17.7458, + "step": 439010 + }, + { + "epoch": 0.886848176084875, + "grad_norm": 726.5328979492188, + "learning_rate": 4.4629333905746864e-07, + "loss": 24.0773, + "step": 439020 + }, + { + "epoch": 0.8868683767175588, + "grad_norm": 915.4989624023438, + "learning_rate": 4.461491938926144e-07, + "loss": 25.1332, + "step": 439030 + }, + { + "epoch": 0.8868885773502426, + "grad_norm": 90.11317443847656, + "learning_rate": 4.4600507092267767e-07, + "loss": 15.5745, + "step": 439040 + }, + { + "epoch": 0.8869087779829264, + "grad_norm": 420.9406433105469, + "learning_rate": 4.4586097014836017e-07, + "loss": 11.2216, + "step": 439050 + }, + { + "epoch": 0.8869289786156103, + "grad_norm": 325.19110107421875, + "learning_rate": 4.4571689157036244e-07, + "loss": 9.7093, + "step": 439060 + }, + { + "epoch": 0.8869491792482941, + "grad_norm": 156.01129150390625, + "learning_rate": 4.455728351893895e-07, + "loss": 19.7621, + "step": 439070 + }, + { + "epoch": 0.8869693798809779, + "grad_norm": 1102.5689697265625, + "learning_rate": 4.454288010061425e-07, + "loss": 33.3687, + "step": 439080 + }, + { + "epoch": 0.8869895805136617, + "grad_norm": 310.2066955566406, + "learning_rate": 4.4528478902132187e-07, + "loss": 12.1877, + "step": 439090 + }, + { + "epoch": 0.8870097811463455, + "grad_norm": 66.99678039550781, + "learning_rate": 4.4514079923563103e-07, + "loss": 15.1627, + "step": 439100 + }, + { + "epoch": 0.8870299817790294, + "grad_norm": 365.5875244140625, + "learning_rate": 4.449968316497721e-07, + "loss": 15.9574, + "step": 439110 + }, + { + "epoch": 0.8870501824117132, + "grad_norm": 398.55133056640625, + "learning_rate": 4.448528862644458e-07, + "loss": 12.7694, + "step": 439120 + }, + { + "epoch": 0.887070383044397, + "grad_norm": 465.5559387207031, + "learning_rate": 4.447089630803536e-07, + "loss": 25.2559, + "step": 439130 + }, + { + "epoch": 0.8870905836770807, + "grad_norm": 1330.308837890625, + "learning_rate": 4.445650620981984e-07, + "loss": 17.1388, + "step": 439140 + }, + { + "epoch": 0.8871107843097645, + "grad_norm": 277.4283752441406, + "learning_rate": 4.444211833186807e-07, + "loss": 15.4694, + "step": 439150 + }, + { + "epoch": 0.8871309849424484, + "grad_norm": 719.4064331054688, + "learning_rate": 4.4427732674250045e-07, + "loss": 19.9625, + "step": 439160 + }, + { + "epoch": 0.8871511855751322, + "grad_norm": 396.2528076171875, + "learning_rate": 4.4413349237036e-07, + "loss": 15.4088, + "step": 439170 + }, + { + "epoch": 0.887171386207816, + "grad_norm": 250.67013549804688, + "learning_rate": 4.4398968020296143e-07, + "loss": 20.414, + "step": 439180 + }, + { + "epoch": 0.8871915868404998, + "grad_norm": 535.2349853515625, + "learning_rate": 4.4384589024100423e-07, + "loss": 28.5496, + "step": 439190 + }, + { + "epoch": 0.8872117874731836, + "grad_norm": 499.7408447265625, + "learning_rate": 4.4370212248518895e-07, + "loss": 18.146, + "step": 439200 + }, + { + "epoch": 0.8872319881058675, + "grad_norm": 579.1659545898438, + "learning_rate": 4.4355837693621786e-07, + "loss": 22.3271, + "step": 439210 + }, + { + "epoch": 0.8872521887385513, + "grad_norm": 432.2957458496094, + "learning_rate": 4.434146535947903e-07, + "loss": 20.5266, + "step": 439220 + }, + { + "epoch": 0.8872723893712351, + "grad_norm": 116.518310546875, + "learning_rate": 4.4327095246160636e-07, + "loss": 15.6317, + "step": 439230 + }, + { + "epoch": 0.8872925900039189, + "grad_norm": 329.53875732421875, + "learning_rate": 4.4312727353736816e-07, + "loss": 48.8665, + "step": 439240 + }, + { + "epoch": 0.8873127906366027, + "grad_norm": 527.8027954101562, + "learning_rate": 4.4298361682277355e-07, + "loss": 17.8723, + "step": 439250 + }, + { + "epoch": 0.8873329912692866, + "grad_norm": 389.74847412109375, + "learning_rate": 4.428399823185253e-07, + "loss": 16.563, + "step": 439260 + }, + { + "epoch": 0.8873531919019704, + "grad_norm": 277.07861328125, + "learning_rate": 4.426963700253223e-07, + "loss": 19.9283, + "step": 439270 + }, + { + "epoch": 0.8873733925346542, + "grad_norm": 542.4605712890625, + "learning_rate": 4.425527799438639e-07, + "loss": 12.7354, + "step": 439280 + }, + { + "epoch": 0.887393593167338, + "grad_norm": 369.82061767578125, + "learning_rate": 4.4240921207485077e-07, + "loss": 30.9794, + "step": 439290 + }, + { + "epoch": 0.8874137938000218, + "grad_norm": 419.03533935546875, + "learning_rate": 4.4226566641898173e-07, + "loss": 10.5437, + "step": 439300 + }, + { + "epoch": 0.8874339944327057, + "grad_norm": 9.967693328857422, + "learning_rate": 4.421221429769579e-07, + "loss": 13.8394, + "step": 439310 + }, + { + "epoch": 0.8874541950653895, + "grad_norm": 428.9529113769531, + "learning_rate": 4.4197864174947755e-07, + "loss": 11.1389, + "step": 439320 + }, + { + "epoch": 0.8874743956980733, + "grad_norm": 293.9654235839844, + "learning_rate": 4.418351627372408e-07, + "loss": 20.6097, + "step": 439330 + }, + { + "epoch": 0.8874945963307571, + "grad_norm": 1044.8182373046875, + "learning_rate": 4.416917059409465e-07, + "loss": 22.0248, + "step": 439340 + }, + { + "epoch": 0.8875147969634409, + "grad_norm": 149.45436096191406, + "learning_rate": 4.415482713612934e-07, + "loss": 15.2085, + "step": 439350 + }, + { + "epoch": 0.8875349975961248, + "grad_norm": 171.68978881835938, + "learning_rate": 4.414048589989822e-07, + "loss": 14.8334, + "step": 439360 + }, + { + "epoch": 0.8875551982288086, + "grad_norm": 443.9935607910156, + "learning_rate": 4.4126146885471067e-07, + "loss": 22.9411, + "step": 439370 + }, + { + "epoch": 0.8875753988614924, + "grad_norm": 236.66659545898438, + "learning_rate": 4.411181009291765e-07, + "loss": 28.307, + "step": 439380 + }, + { + "epoch": 0.8875955994941762, + "grad_norm": 205.0904541015625, + "learning_rate": 4.409747552230803e-07, + "loss": 13.7862, + "step": 439390 + }, + { + "epoch": 0.8876158001268599, + "grad_norm": 697.9765625, + "learning_rate": 4.4083143173712207e-07, + "loss": 15.5063, + "step": 439400 + }, + { + "epoch": 0.8876360007595437, + "grad_norm": 472.9891052246094, + "learning_rate": 4.406881304719962e-07, + "loss": 16.3077, + "step": 439410 + }, + { + "epoch": 0.8876562013922276, + "grad_norm": 50.7934684753418, + "learning_rate": 4.405448514284039e-07, + "loss": 32.6937, + "step": 439420 + }, + { + "epoch": 0.8876764020249114, + "grad_norm": 613.9843139648438, + "learning_rate": 4.404015946070439e-07, + "loss": 22.2959, + "step": 439430 + }, + { + "epoch": 0.8876966026575952, + "grad_norm": 435.0783996582031, + "learning_rate": 4.40258360008613e-07, + "loss": 27.7951, + "step": 439440 + }, + { + "epoch": 0.887716803290279, + "grad_norm": 777.6680908203125, + "learning_rate": 4.401151476338095e-07, + "loss": 42.459, + "step": 439450 + }, + { + "epoch": 0.8877370039229628, + "grad_norm": 434.35491943359375, + "learning_rate": 4.3997195748333113e-07, + "loss": 11.0759, + "step": 439460 + }, + { + "epoch": 0.8877572045556467, + "grad_norm": 22.04349136352539, + "learning_rate": 4.3982878955787844e-07, + "loss": 14.6108, + "step": 439470 + }, + { + "epoch": 0.8877774051883305, + "grad_norm": 79.72455596923828, + "learning_rate": 4.396856438581454e-07, + "loss": 32.3698, + "step": 439480 + }, + { + "epoch": 0.8877976058210143, + "grad_norm": 269.5208435058594, + "learning_rate": 4.395425203848314e-07, + "loss": 16.2407, + "step": 439490 + }, + { + "epoch": 0.8878178064536981, + "grad_norm": 467.6042785644531, + "learning_rate": 4.3939941913863525e-07, + "loss": 17.5712, + "step": 439500 + }, + { + "epoch": 0.8878380070863819, + "grad_norm": 170.37184143066406, + "learning_rate": 4.392563401202526e-07, + "loss": 27.6455, + "step": 439510 + }, + { + "epoch": 0.8878582077190658, + "grad_norm": 74.18937683105469, + "learning_rate": 4.391132833303807e-07, + "loss": 19.3106, + "step": 439520 + }, + { + "epoch": 0.8878784083517496, + "grad_norm": 343.9047546386719, + "learning_rate": 4.389702487697189e-07, + "loss": 12.2741, + "step": 439530 + }, + { + "epoch": 0.8878986089844334, + "grad_norm": 33.56159973144531, + "learning_rate": 4.388272364389623e-07, + "loss": 12.6946, + "step": 439540 + }, + { + "epoch": 0.8879188096171172, + "grad_norm": 324.51470947265625, + "learning_rate": 4.38684246338808e-07, + "loss": 22.468, + "step": 439550 + }, + { + "epoch": 0.887939010249801, + "grad_norm": 296.24420166015625, + "learning_rate": 4.385412784699544e-07, + "loss": 32.3633, + "step": 439560 + }, + { + "epoch": 0.8879592108824849, + "grad_norm": 791.6322021484375, + "learning_rate": 4.3839833283309597e-07, + "loss": 28.1926, + "step": 439570 + }, + { + "epoch": 0.8879794115151687, + "grad_norm": 354.2049255371094, + "learning_rate": 4.3825540942893206e-07, + "loss": 19.6999, + "step": 439580 + }, + { + "epoch": 0.8879996121478525, + "grad_norm": 276.34783935546875, + "learning_rate": 4.381125082581583e-07, + "loss": 20.3004, + "step": 439590 + }, + { + "epoch": 0.8880198127805363, + "grad_norm": 525.9163818359375, + "learning_rate": 4.379696293214697e-07, + "loss": 21.0685, + "step": 439600 + }, + { + "epoch": 0.8880400134132201, + "grad_norm": 405.0637512207031, + "learning_rate": 4.378267726195645e-07, + "loss": 16.6269, + "step": 439610 + }, + { + "epoch": 0.888060214045904, + "grad_norm": 875.94873046875, + "learning_rate": 4.3768393815313723e-07, + "loss": 20.4529, + "step": 439620 + }, + { + "epoch": 0.8880804146785878, + "grad_norm": 78.52611541748047, + "learning_rate": 4.375411259228868e-07, + "loss": 16.3166, + "step": 439630 + }, + { + "epoch": 0.8881006153112716, + "grad_norm": 154.6929931640625, + "learning_rate": 4.373983359295059e-07, + "loss": 18.9563, + "step": 439640 + }, + { + "epoch": 0.8881208159439553, + "grad_norm": 183.49374389648438, + "learning_rate": 4.372555681736934e-07, + "loss": 23.9325, + "step": 439650 + }, + { + "epoch": 0.8881410165766391, + "grad_norm": 517.3644409179688, + "learning_rate": 4.3711282265614385e-07, + "loss": 17.1077, + "step": 439660 + }, + { + "epoch": 0.888161217209323, + "grad_norm": 266.4991455078125, + "learning_rate": 4.369700993775522e-07, + "loss": 11.7718, + "step": 439670 + }, + { + "epoch": 0.8881814178420068, + "grad_norm": 103.00946807861328, + "learning_rate": 4.368273983386157e-07, + "loss": 28.2479, + "step": 439680 + }, + { + "epoch": 0.8882016184746906, + "grad_norm": 126.4054946899414, + "learning_rate": 4.3668471954002864e-07, + "loss": 15.4523, + "step": 439690 + }, + { + "epoch": 0.8882218191073744, + "grad_norm": 629.47265625, + "learning_rate": 4.3654206298248625e-07, + "loss": 17.5671, + "step": 439700 + }, + { + "epoch": 0.8882420197400582, + "grad_norm": 250.27529907226562, + "learning_rate": 4.363994286666845e-07, + "loss": 20.4415, + "step": 439710 + }, + { + "epoch": 0.888262220372742, + "grad_norm": 315.4422607421875, + "learning_rate": 4.3625681659331895e-07, + "loss": 18.5077, + "step": 439720 + }, + { + "epoch": 0.8882824210054259, + "grad_norm": 205.74395751953125, + "learning_rate": 4.3611422676308413e-07, + "loss": 19.0239, + "step": 439730 + }, + { + "epoch": 0.8883026216381097, + "grad_norm": 144.6514892578125, + "learning_rate": 4.359716591766744e-07, + "loss": 10.1121, + "step": 439740 + }, + { + "epoch": 0.8883228222707935, + "grad_norm": 94.45883178710938, + "learning_rate": 4.3582911383478646e-07, + "loss": 23.2051, + "step": 439750 + }, + { + "epoch": 0.8883430229034773, + "grad_norm": 108.36637878417969, + "learning_rate": 4.3568659073811306e-07, + "loss": 14.929, + "step": 439760 + }, + { + "epoch": 0.8883632235361612, + "grad_norm": 606.2430419921875, + "learning_rate": 4.355440898873492e-07, + "loss": 10.0484, + "step": 439770 + }, + { + "epoch": 0.888383424168845, + "grad_norm": 298.37127685546875, + "learning_rate": 4.354016112831899e-07, + "loss": 7.4637, + "step": 439780 + }, + { + "epoch": 0.8884036248015288, + "grad_norm": 760.9266357421875, + "learning_rate": 4.352591549263302e-07, + "loss": 12.9479, + "step": 439790 + }, + { + "epoch": 0.8884238254342126, + "grad_norm": 621.0365600585938, + "learning_rate": 4.3511672081746393e-07, + "loss": 14.3445, + "step": 439800 + }, + { + "epoch": 0.8884440260668964, + "grad_norm": 407.5010070800781, + "learning_rate": 4.3497430895728444e-07, + "loss": 15.4422, + "step": 439810 + }, + { + "epoch": 0.8884642266995803, + "grad_norm": 734.0892333984375, + "learning_rate": 4.348319193464867e-07, + "loss": 36.5453, + "step": 439820 + }, + { + "epoch": 0.8884844273322641, + "grad_norm": 274.6903991699219, + "learning_rate": 4.3468955198576524e-07, + "loss": 13.928, + "step": 439830 + }, + { + "epoch": 0.8885046279649479, + "grad_norm": 403.5230712890625, + "learning_rate": 4.3454720687581165e-07, + "loss": 23.6129, + "step": 439840 + }, + { + "epoch": 0.8885248285976317, + "grad_norm": 741.6392211914062, + "learning_rate": 4.344048840173226e-07, + "loss": 13.0167, + "step": 439850 + }, + { + "epoch": 0.8885450292303155, + "grad_norm": 252.17994689941406, + "learning_rate": 4.3426258341098925e-07, + "loss": 6.1833, + "step": 439860 + }, + { + "epoch": 0.8885652298629994, + "grad_norm": 793.151611328125, + "learning_rate": 4.341203050575077e-07, + "loss": 20.799, + "step": 439870 + }, + { + "epoch": 0.8885854304956832, + "grad_norm": 447.8883056640625, + "learning_rate": 4.3397804895756957e-07, + "loss": 25.9996, + "step": 439880 + }, + { + "epoch": 0.888605631128367, + "grad_norm": 0.031039610505104065, + "learning_rate": 4.338358151118677e-07, + "loss": 9.6498, + "step": 439890 + }, + { + "epoch": 0.8886258317610508, + "grad_norm": 219.12828063964844, + "learning_rate": 4.33693603521097e-07, + "loss": 6.6652, + "step": 439900 + }, + { + "epoch": 0.8886460323937345, + "grad_norm": 318.5906677246094, + "learning_rate": 4.3355141418594926e-07, + "loss": 21.3146, + "step": 439910 + }, + { + "epoch": 0.8886662330264183, + "grad_norm": 336.47918701171875, + "learning_rate": 4.334092471071194e-07, + "loss": 17.1244, + "step": 439920 + }, + { + "epoch": 0.8886864336591022, + "grad_norm": 483.5345764160156, + "learning_rate": 4.3326710228529746e-07, + "loss": 16.6284, + "step": 439930 + }, + { + "epoch": 0.888706634291786, + "grad_norm": 651.7122192382812, + "learning_rate": 4.3312497972117895e-07, + "loss": 16.1954, + "step": 439940 + }, + { + "epoch": 0.8887268349244698, + "grad_norm": 283.4456481933594, + "learning_rate": 4.32982879415455e-07, + "loss": 19.7755, + "step": 439950 + }, + { + "epoch": 0.8887470355571536, + "grad_norm": 185.06582641601562, + "learning_rate": 4.3284080136881847e-07, + "loss": 25.6656, + "step": 439960 + }, + { + "epoch": 0.8887672361898374, + "grad_norm": 255.2400360107422, + "learning_rate": 4.32698745581962e-07, + "loss": 14.7366, + "step": 439970 + }, + { + "epoch": 0.8887874368225213, + "grad_norm": 445.416015625, + "learning_rate": 4.325567120555785e-07, + "loss": 11.0359, + "step": 439980 + }, + { + "epoch": 0.8888076374552051, + "grad_norm": 338.63629150390625, + "learning_rate": 4.324147007903584e-07, + "loss": 17.1912, + "step": 439990 + }, + { + "epoch": 0.8888278380878889, + "grad_norm": 143.6701202392578, + "learning_rate": 4.322727117869951e-07, + "loss": 9.3195, + "step": 440000 + }, + { + "epoch": 0.8888480387205727, + "grad_norm": 117.09489440917969, + "learning_rate": 4.3213074504618256e-07, + "loss": 12.1255, + "step": 440010 + }, + { + "epoch": 0.8888682393532565, + "grad_norm": 437.0494689941406, + "learning_rate": 4.31988800568609e-07, + "loss": 21.9564, + "step": 440020 + }, + { + "epoch": 0.8888884399859404, + "grad_norm": 12.793342590332031, + "learning_rate": 4.3184687835496784e-07, + "loss": 21.4166, + "step": 440030 + }, + { + "epoch": 0.8889086406186242, + "grad_norm": 170.0322723388672, + "learning_rate": 4.317049784059518e-07, + "loss": 12.801, + "step": 440040 + }, + { + "epoch": 0.888928841251308, + "grad_norm": 349.74859619140625, + "learning_rate": 4.315631007222515e-07, + "loss": 14.7523, + "step": 440050 + }, + { + "epoch": 0.8889490418839918, + "grad_norm": 217.57176208496094, + "learning_rate": 4.31421245304558e-07, + "loss": 19.6759, + "step": 440060 + }, + { + "epoch": 0.8889692425166756, + "grad_norm": 264.9936828613281, + "learning_rate": 4.3127941215356296e-07, + "loss": 18.8373, + "step": 440070 + }, + { + "epoch": 0.8889894431493595, + "grad_norm": 885.3839721679688, + "learning_rate": 4.3113760126995974e-07, + "loss": 22.7676, + "step": 440080 + }, + { + "epoch": 0.8890096437820433, + "grad_norm": 178.55599975585938, + "learning_rate": 4.309958126544361e-07, + "loss": 19.6819, + "step": 440090 + }, + { + "epoch": 0.8890298444147271, + "grad_norm": 23.563514709472656, + "learning_rate": 4.308540463076849e-07, + "loss": 8.1549, + "step": 440100 + }, + { + "epoch": 0.8890500450474109, + "grad_norm": 475.083251953125, + "learning_rate": 4.3071230223039774e-07, + "loss": 22.2255, + "step": 440110 + }, + { + "epoch": 0.8890702456800947, + "grad_norm": 310.59912109375, + "learning_rate": 4.3057058042326407e-07, + "loss": 18.2259, + "step": 440120 + }, + { + "epoch": 0.8890904463127786, + "grad_norm": 249.81739807128906, + "learning_rate": 4.30428880886975e-07, + "loss": 9.7312, + "step": 440130 + }, + { + "epoch": 0.8891106469454624, + "grad_norm": 157.1270751953125, + "learning_rate": 4.3028720362222166e-07, + "loss": 22.6873, + "step": 440140 + }, + { + "epoch": 0.8891308475781462, + "grad_norm": 368.5738220214844, + "learning_rate": 4.301455486296946e-07, + "loss": 21.0228, + "step": 440150 + }, + { + "epoch": 0.8891510482108299, + "grad_norm": 474.0268249511719, + "learning_rate": 4.300039159100827e-07, + "loss": 18.5053, + "step": 440160 + }, + { + "epoch": 0.8891712488435137, + "grad_norm": 402.1085510253906, + "learning_rate": 4.298623054640788e-07, + "loss": 14.8153, + "step": 440170 + }, + { + "epoch": 0.8891914494761975, + "grad_norm": 357.016357421875, + "learning_rate": 4.2972071729237065e-07, + "loss": 11.0594, + "step": 440180 + }, + { + "epoch": 0.8892116501088814, + "grad_norm": 11.80178165435791, + "learning_rate": 4.295791513956504e-07, + "loss": 18.0563, + "step": 440190 + }, + { + "epoch": 0.8892318507415652, + "grad_norm": 288.9800109863281, + "learning_rate": 4.29437607774606e-07, + "loss": 17.9754, + "step": 440200 + }, + { + "epoch": 0.889252051374249, + "grad_norm": 327.2669677734375, + "learning_rate": 4.2929608642992894e-07, + "loss": 28.796, + "step": 440210 + }, + { + "epoch": 0.8892722520069328, + "grad_norm": 452.38348388671875, + "learning_rate": 4.291545873623087e-07, + "loss": 16.7511, + "step": 440220 + }, + { + "epoch": 0.8892924526396166, + "grad_norm": 530.0560302734375, + "learning_rate": 4.2901311057243377e-07, + "loss": 10.8369, + "step": 440230 + }, + { + "epoch": 0.8893126532723005, + "grad_norm": 489.7251892089844, + "learning_rate": 4.2887165606099513e-07, + "loss": 18.7304, + "step": 440240 + }, + { + "epoch": 0.8893328539049843, + "grad_norm": 356.1235656738281, + "learning_rate": 4.2873022382868115e-07, + "loss": 18.1181, + "step": 440250 + }, + { + "epoch": 0.8893530545376681, + "grad_norm": 407.93499755859375, + "learning_rate": 4.2858881387618235e-07, + "loss": 16.1849, + "step": 440260 + }, + { + "epoch": 0.8893732551703519, + "grad_norm": 684.4132080078125, + "learning_rate": 4.284474262041871e-07, + "loss": 24.9747, + "step": 440270 + }, + { + "epoch": 0.8893934558030357, + "grad_norm": 216.8534698486328, + "learning_rate": 4.283060608133843e-07, + "loss": 17.7716, + "step": 440280 + }, + { + "epoch": 0.8894136564357196, + "grad_norm": 476.00628662109375, + "learning_rate": 4.2816471770446343e-07, + "loss": 14.6045, + "step": 440290 + }, + { + "epoch": 0.8894338570684034, + "grad_norm": 329.6189880371094, + "learning_rate": 4.280233968781139e-07, + "loss": 14.645, + "step": 440300 + }, + { + "epoch": 0.8894540577010872, + "grad_norm": 216.16668701171875, + "learning_rate": 4.2788209833502237e-07, + "loss": 15.3206, + "step": 440310 + }, + { + "epoch": 0.889474258333771, + "grad_norm": 514.588134765625, + "learning_rate": 4.277408220758794e-07, + "loss": 20.1569, + "step": 440320 + }, + { + "epoch": 0.8894944589664548, + "grad_norm": 256.3499755859375, + "learning_rate": 4.275995681013745e-07, + "loss": 30.1647, + "step": 440330 + }, + { + "epoch": 0.8895146595991387, + "grad_norm": 737.529296875, + "learning_rate": 4.2745833641219317e-07, + "loss": 16.7913, + "step": 440340 + }, + { + "epoch": 0.8895348602318225, + "grad_norm": 97.90251159667969, + "learning_rate": 4.273171270090254e-07, + "loss": 6.7977, + "step": 440350 + }, + { + "epoch": 0.8895550608645063, + "grad_norm": 356.3243408203125, + "learning_rate": 4.271759398925601e-07, + "loss": 13.1676, + "step": 440360 + }, + { + "epoch": 0.8895752614971901, + "grad_norm": 435.6765441894531, + "learning_rate": 4.270347750634846e-07, + "loss": 18.4452, + "step": 440370 + }, + { + "epoch": 0.889595462129874, + "grad_norm": 663.2234497070312, + "learning_rate": 4.2689363252248595e-07, + "loss": 21.1937, + "step": 440380 + }, + { + "epoch": 0.8896156627625578, + "grad_norm": 155.16162109375, + "learning_rate": 4.2675251227025315e-07, + "loss": 19.0472, + "step": 440390 + }, + { + "epoch": 0.8896358633952416, + "grad_norm": 344.0484313964844, + "learning_rate": 4.266114143074751e-07, + "loss": 10.4041, + "step": 440400 + }, + { + "epoch": 0.8896560640279254, + "grad_norm": 165.12008666992188, + "learning_rate": 4.264703386348384e-07, + "loss": 10.6243, + "step": 440410 + }, + { + "epoch": 0.8896762646606091, + "grad_norm": 223.59613037109375, + "learning_rate": 4.263292852530293e-07, + "loss": 23.7532, + "step": 440420 + }, + { + "epoch": 0.8896964652932929, + "grad_norm": 111.74337768554688, + "learning_rate": 4.261882541627377e-07, + "loss": 7.5019, + "step": 440430 + }, + { + "epoch": 0.8897166659259768, + "grad_norm": 448.3424377441406, + "learning_rate": 4.260472453646497e-07, + "loss": 27.4821, + "step": 440440 + }, + { + "epoch": 0.8897368665586606, + "grad_norm": 318.8260192871094, + "learning_rate": 4.2590625885945205e-07, + "loss": 18.8889, + "step": 440450 + }, + { + "epoch": 0.8897570671913444, + "grad_norm": 331.6166687011719, + "learning_rate": 4.25765294647833e-07, + "loss": 29.4402, + "step": 440460 + }, + { + "epoch": 0.8897772678240282, + "grad_norm": 598.9876708984375, + "learning_rate": 4.256243527304782e-07, + "loss": 18.679, + "step": 440470 + }, + { + "epoch": 0.889797468456712, + "grad_norm": 435.41290283203125, + "learning_rate": 4.2548343310807704e-07, + "loss": 17.4909, + "step": 440480 + }, + { + "epoch": 0.8898176690893959, + "grad_norm": 481.29132080078125, + "learning_rate": 4.25342535781314e-07, + "loss": 17.0876, + "step": 440490 + }, + { + "epoch": 0.8898378697220797, + "grad_norm": 258.2563781738281, + "learning_rate": 4.2520166075087635e-07, + "loss": 14.378, + "step": 440500 + }, + { + "epoch": 0.8898580703547635, + "grad_norm": 452.7650451660156, + "learning_rate": 4.250608080174512e-07, + "loss": 19.2819, + "step": 440510 + }, + { + "epoch": 0.8898782709874473, + "grad_norm": 223.28135681152344, + "learning_rate": 4.249199775817242e-07, + "loss": 16.1098, + "step": 440520 + }, + { + "epoch": 0.8898984716201311, + "grad_norm": 80.29216003417969, + "learning_rate": 4.247791694443837e-07, + "loss": 23.5703, + "step": 440530 + }, + { + "epoch": 0.889918672252815, + "grad_norm": 555.3062744140625, + "learning_rate": 4.24638383606113e-07, + "loss": 22.0077, + "step": 440540 + }, + { + "epoch": 0.8899388728854988, + "grad_norm": 1100.7147216796875, + "learning_rate": 4.24497620067601e-07, + "loss": 39.7739, + "step": 440550 + }, + { + "epoch": 0.8899590735181826, + "grad_norm": 675.494873046875, + "learning_rate": 4.2435687882953327e-07, + "loss": 17.158, + "step": 440560 + }, + { + "epoch": 0.8899792741508664, + "grad_norm": 292.1600341796875, + "learning_rate": 4.242161598925937e-07, + "loss": 15.5541, + "step": 440570 + }, + { + "epoch": 0.8899994747835502, + "grad_norm": 274.9527282714844, + "learning_rate": 4.240754632574706e-07, + "loss": 27.1441, + "step": 440580 + }, + { + "epoch": 0.8900196754162341, + "grad_norm": 48.35358428955078, + "learning_rate": 4.239347889248485e-07, + "loss": 13.2044, + "step": 440590 + }, + { + "epoch": 0.8900398760489179, + "grad_norm": 235.7996368408203, + "learning_rate": 4.237941368954124e-07, + "loss": 22.3997, + "step": 440600 + }, + { + "epoch": 0.8900600766816017, + "grad_norm": 11.193168640136719, + "learning_rate": 4.236535071698489e-07, + "loss": 17.1348, + "step": 440610 + }, + { + "epoch": 0.8900802773142855, + "grad_norm": 379.49041748046875, + "learning_rate": 4.2351289974884467e-07, + "loss": 23.3766, + "step": 440620 + }, + { + "epoch": 0.8901004779469693, + "grad_norm": 255.96241760253906, + "learning_rate": 4.2337231463308147e-07, + "loss": 14.9062, + "step": 440630 + }, + { + "epoch": 0.8901206785796532, + "grad_norm": 757.0723876953125, + "learning_rate": 4.2323175182324706e-07, + "loss": 29.2287, + "step": 440640 + }, + { + "epoch": 0.890140879212337, + "grad_norm": 357.3234558105469, + "learning_rate": 4.2309121132002695e-07, + "loss": 11.862, + "step": 440650 + }, + { + "epoch": 0.8901610798450208, + "grad_norm": 25.004135131835938, + "learning_rate": 4.2295069312410455e-07, + "loss": 18.1373, + "step": 440660 + }, + { + "epoch": 0.8901812804777045, + "grad_norm": 36.95526123046875, + "learning_rate": 4.228101972361648e-07, + "loss": 7.3536, + "step": 440670 + }, + { + "epoch": 0.8902014811103883, + "grad_norm": 207.34742736816406, + "learning_rate": 4.226697236568933e-07, + "loss": 16.4848, + "step": 440680 + }, + { + "epoch": 0.8902216817430721, + "grad_norm": 526.7403564453125, + "learning_rate": 4.225292723869762e-07, + "loss": 20.7376, + "step": 440690 + }, + { + "epoch": 0.890241882375756, + "grad_norm": 572.5574951171875, + "learning_rate": 4.2238884342709397e-07, + "loss": 23.5109, + "step": 440700 + }, + { + "epoch": 0.8902620830084398, + "grad_norm": 486.1275329589844, + "learning_rate": 4.222484367779334e-07, + "loss": 37.0887, + "step": 440710 + }, + { + "epoch": 0.8902822836411236, + "grad_norm": 314.53375244140625, + "learning_rate": 4.2210805244017993e-07, + "loss": 24.3133, + "step": 440720 + }, + { + "epoch": 0.8903024842738074, + "grad_norm": 374.93328857421875, + "learning_rate": 4.219676904145165e-07, + "loss": 31.6426, + "step": 440730 + }, + { + "epoch": 0.8903226849064912, + "grad_norm": 113.07835388183594, + "learning_rate": 4.218273507016263e-07, + "loss": 32.312, + "step": 440740 + }, + { + "epoch": 0.8903428855391751, + "grad_norm": 478.1295471191406, + "learning_rate": 4.2168703330219494e-07, + "loss": 22.5987, + "step": 440750 + }, + { + "epoch": 0.8903630861718589, + "grad_norm": 360.760498046875, + "learning_rate": 4.2154673821690585e-07, + "loss": 22.2875, + "step": 440760 + }, + { + "epoch": 0.8903832868045427, + "grad_norm": 177.20535278320312, + "learning_rate": 4.2140646544644227e-07, + "loss": 16.4268, + "step": 440770 + }, + { + "epoch": 0.8904034874372265, + "grad_norm": 525.95361328125, + "learning_rate": 4.212662149914887e-07, + "loss": 13.4369, + "step": 440780 + }, + { + "epoch": 0.8904236880699103, + "grad_norm": 216.5105438232422, + "learning_rate": 4.211259868527273e-07, + "loss": 27.3728, + "step": 440790 + }, + { + "epoch": 0.8904438887025942, + "grad_norm": 634.1834716796875, + "learning_rate": 4.2098578103084376e-07, + "loss": 22.5864, + "step": 440800 + }, + { + "epoch": 0.890464089335278, + "grad_norm": 52.97663116455078, + "learning_rate": 4.208455975265191e-07, + "loss": 18.4716, + "step": 440810 + }, + { + "epoch": 0.8904842899679618, + "grad_norm": 56.63849639892578, + "learning_rate": 4.2070543634043834e-07, + "loss": 9.3955, + "step": 440820 + }, + { + "epoch": 0.8905044906006456, + "grad_norm": 550.6646118164062, + "learning_rate": 4.205652974732838e-07, + "loss": 18.9752, + "step": 440830 + }, + { + "epoch": 0.8905246912333294, + "grad_norm": 559.0628051757812, + "learning_rate": 4.2042518092573814e-07, + "loss": 27.9919, + "step": 440840 + }, + { + "epoch": 0.8905448918660133, + "grad_norm": 128.8303680419922, + "learning_rate": 4.202850866984853e-07, + "loss": 27.2158, + "step": 440850 + }, + { + "epoch": 0.8905650924986971, + "grad_norm": 343.2501220703125, + "learning_rate": 4.201450147922065e-07, + "loss": 16.7919, + "step": 440860 + }, + { + "epoch": 0.8905852931313809, + "grad_norm": 185.21302795410156, + "learning_rate": 4.200049652075866e-07, + "loss": 20.326, + "step": 440870 + }, + { + "epoch": 0.8906054937640647, + "grad_norm": 244.53485107421875, + "learning_rate": 4.198649379453068e-07, + "loss": 12.922, + "step": 440880 + }, + { + "epoch": 0.8906256943967485, + "grad_norm": 532.2025756835938, + "learning_rate": 4.1972493300604877e-07, + "loss": 22.2626, + "step": 440890 + }, + { + "epoch": 0.8906458950294324, + "grad_norm": 89.53042602539062, + "learning_rate": 4.195849503904975e-07, + "loss": 12.9347, + "step": 440900 + }, + { + "epoch": 0.8906660956621162, + "grad_norm": 171.7433624267578, + "learning_rate": 4.1944499009933303e-07, + "loss": 8.9002, + "step": 440910 + }, + { + "epoch": 0.8906862962948, + "grad_norm": 118.01786041259766, + "learning_rate": 4.19305052133237e-07, + "loss": 7.3701, + "step": 440920 + }, + { + "epoch": 0.8907064969274837, + "grad_norm": 339.9176940917969, + "learning_rate": 4.1916513649289334e-07, + "loss": 17.031, + "step": 440930 + }, + { + "epoch": 0.8907266975601675, + "grad_norm": 315.4590759277344, + "learning_rate": 4.1902524317898427e-07, + "loss": 20.6522, + "step": 440940 + }, + { + "epoch": 0.8907468981928514, + "grad_norm": 231.2949676513672, + "learning_rate": 4.188853721921893e-07, + "loss": 17.2781, + "step": 440950 + }, + { + "epoch": 0.8907670988255352, + "grad_norm": 404.14935302734375, + "learning_rate": 4.1874552353319107e-07, + "loss": 20.6696, + "step": 440960 + }, + { + "epoch": 0.890787299458219, + "grad_norm": 1402.7301025390625, + "learning_rate": 4.186056972026725e-07, + "loss": 37.9456, + "step": 440970 + }, + { + "epoch": 0.8908075000909028, + "grad_norm": 425.55401611328125, + "learning_rate": 4.1846589320131415e-07, + "loss": 15.6599, + "step": 440980 + }, + { + "epoch": 0.8908277007235866, + "grad_norm": 712.2496337890625, + "learning_rate": 4.1832611152979655e-07, + "loss": 21.0699, + "step": 440990 + }, + { + "epoch": 0.8908479013562705, + "grad_norm": 487.39239501953125, + "learning_rate": 4.1818635218880186e-07, + "loss": 17.5509, + "step": 441000 + }, + { + "epoch": 0.8908681019889543, + "grad_norm": 75.24030303955078, + "learning_rate": 4.1804661517901244e-07, + "loss": 41.9425, + "step": 441010 + }, + { + "epoch": 0.8908883026216381, + "grad_norm": 488.7297058105469, + "learning_rate": 4.179069005011066e-07, + "loss": 14.4235, + "step": 441020 + }, + { + "epoch": 0.8909085032543219, + "grad_norm": 662.9674072265625, + "learning_rate": 4.177672081557671e-07, + "loss": 12.6769, + "step": 441030 + }, + { + "epoch": 0.8909287038870057, + "grad_norm": 744.5567626953125, + "learning_rate": 4.176275381436751e-07, + "loss": 12.1562, + "step": 441040 + }, + { + "epoch": 0.8909489045196896, + "grad_norm": 169.5181427001953, + "learning_rate": 4.1748789046551055e-07, + "loss": 20.7962, + "step": 441050 + }, + { + "epoch": 0.8909691051523734, + "grad_norm": 230.7576141357422, + "learning_rate": 4.173482651219535e-07, + "loss": 12.6017, + "step": 441060 + }, + { + "epoch": 0.8909893057850572, + "grad_norm": 511.5341491699219, + "learning_rate": 4.1720866211368615e-07, + "loss": 23.1607, + "step": 441070 + }, + { + "epoch": 0.891009506417741, + "grad_norm": 373.091552734375, + "learning_rate": 4.1706908144138804e-07, + "loss": 14.3656, + "step": 441080 + }, + { + "epoch": 0.8910297070504248, + "grad_norm": 284.5761413574219, + "learning_rate": 4.1692952310573854e-07, + "loss": 21.3658, + "step": 441090 + }, + { + "epoch": 0.8910499076831087, + "grad_norm": 570.6357421875, + "learning_rate": 4.1678998710741936e-07, + "loss": 19.4725, + "step": 441100 + }, + { + "epoch": 0.8910701083157925, + "grad_norm": 967.0919189453125, + "learning_rate": 4.1665047344710887e-07, + "loss": 26.8212, + "step": 441110 + }, + { + "epoch": 0.8910903089484763, + "grad_norm": 259.86822509765625, + "learning_rate": 4.1651098212548923e-07, + "loss": 12.6998, + "step": 441120 + }, + { + "epoch": 0.8911105095811601, + "grad_norm": 206.50607299804688, + "learning_rate": 4.163715131432383e-07, + "loss": 10.5511, + "step": 441130 + }, + { + "epoch": 0.8911307102138439, + "grad_norm": 466.2878723144531, + "learning_rate": 4.162320665010372e-07, + "loss": 22.2893, + "step": 441140 + }, + { + "epoch": 0.8911509108465278, + "grad_norm": 11.808757781982422, + "learning_rate": 4.160926421995648e-07, + "loss": 19.4246, + "step": 441150 + }, + { + "epoch": 0.8911711114792116, + "grad_norm": 793.69775390625, + "learning_rate": 4.159532402395011e-07, + "loss": 27.9781, + "step": 441160 + }, + { + "epoch": 0.8911913121118954, + "grad_norm": 0.3489563763141632, + "learning_rate": 4.158138606215256e-07, + "loss": 9.1859, + "step": 441170 + }, + { + "epoch": 0.8912115127445792, + "grad_norm": 0.0, + "learning_rate": 4.1567450334631667e-07, + "loss": 21.4238, + "step": 441180 + }, + { + "epoch": 0.8912317133772629, + "grad_norm": 1309.4345703125, + "learning_rate": 4.155351684145548e-07, + "loss": 15.2316, + "step": 441190 + }, + { + "epoch": 0.8912519140099467, + "grad_norm": 204.02719116210938, + "learning_rate": 4.153958558269189e-07, + "loss": 14.4283, + "step": 441200 + }, + { + "epoch": 0.8912721146426306, + "grad_norm": 98.7807846069336, + "learning_rate": 4.1525656558408624e-07, + "loss": 11.4204, + "step": 441210 + }, + { + "epoch": 0.8912923152753144, + "grad_norm": 407.64825439453125, + "learning_rate": 4.151172976867374e-07, + "loss": 13.8096, + "step": 441220 + }, + { + "epoch": 0.8913125159079982, + "grad_norm": 458.1145324707031, + "learning_rate": 4.149780521355523e-07, + "loss": 14.3765, + "step": 441230 + }, + { + "epoch": 0.891332716540682, + "grad_norm": 275.7145690917969, + "learning_rate": 4.1483882893120606e-07, + "loss": 11.479, + "step": 441240 + }, + { + "epoch": 0.8913529171733658, + "grad_norm": 315.7733154296875, + "learning_rate": 4.146996280743798e-07, + "loss": 28.4609, + "step": 441250 + }, + { + "epoch": 0.8913731178060497, + "grad_norm": 152.9192657470703, + "learning_rate": 4.145604495657518e-07, + "loss": 4.4852, + "step": 441260 + }, + { + "epoch": 0.8913933184387335, + "grad_norm": 29.09087562561035, + "learning_rate": 4.144212934060005e-07, + "loss": 21.8849, + "step": 441270 + }, + { + "epoch": 0.8914135190714173, + "grad_norm": 118.08297729492188, + "learning_rate": 4.142821595958024e-07, + "loss": 24.1273, + "step": 441280 + }, + { + "epoch": 0.8914337197041011, + "grad_norm": 202.7879638671875, + "learning_rate": 4.1414304813583663e-07, + "loss": 25.2596, + "step": 441290 + }, + { + "epoch": 0.8914539203367849, + "grad_norm": 70.14551544189453, + "learning_rate": 4.140039590267836e-07, + "loss": 21.5711, + "step": 441300 + }, + { + "epoch": 0.8914741209694688, + "grad_norm": 55.28828048706055, + "learning_rate": 4.1386489226931723e-07, + "loss": 21.0769, + "step": 441310 + }, + { + "epoch": 0.8914943216021526, + "grad_norm": 189.6409149169922, + "learning_rate": 4.137258478641176e-07, + "loss": 21.3108, + "step": 441320 + }, + { + "epoch": 0.8915145222348364, + "grad_norm": 401.5644836425781, + "learning_rate": 4.135868258118625e-07, + "loss": 12.4659, + "step": 441330 + }, + { + "epoch": 0.8915347228675202, + "grad_norm": 46.09809112548828, + "learning_rate": 4.1344782611322855e-07, + "loss": 21.3478, + "step": 441340 + }, + { + "epoch": 0.891554923500204, + "grad_norm": 237.2362823486328, + "learning_rate": 4.13308848768893e-07, + "loss": 20.2728, + "step": 441350 + }, + { + "epoch": 0.8915751241328879, + "grad_norm": 277.4466857910156, + "learning_rate": 4.1316989377953477e-07, + "loss": 16.9617, + "step": 441360 + }, + { + "epoch": 0.8915953247655717, + "grad_norm": 219.892822265625, + "learning_rate": 4.1303096114583e-07, + "loss": 19.2814, + "step": 441370 + }, + { + "epoch": 0.8916155253982555, + "grad_norm": 448.60009765625, + "learning_rate": 4.128920508684553e-07, + "loss": 24.5058, + "step": 441380 + }, + { + "epoch": 0.8916357260309393, + "grad_norm": 351.42901611328125, + "learning_rate": 4.127531629480891e-07, + "loss": 19.3832, + "step": 441390 + }, + { + "epoch": 0.8916559266636231, + "grad_norm": 642.4378662109375, + "learning_rate": 4.1261429738540694e-07, + "loss": 31.0099, + "step": 441400 + }, + { + "epoch": 0.891676127296307, + "grad_norm": 496.5330810546875, + "learning_rate": 4.1247545418108715e-07, + "loss": 21.5189, + "step": 441410 + }, + { + "epoch": 0.8916963279289908, + "grad_norm": 206.7541046142578, + "learning_rate": 4.1233663333580474e-07, + "loss": 10.9033, + "step": 441420 + }, + { + "epoch": 0.8917165285616746, + "grad_norm": 428.3406677246094, + "learning_rate": 4.121978348502381e-07, + "loss": 27.4461, + "step": 441430 + }, + { + "epoch": 0.8917367291943583, + "grad_norm": 265.78924560546875, + "learning_rate": 4.1205905872506224e-07, + "loss": 17.8106, + "step": 441440 + }, + { + "epoch": 0.8917569298270421, + "grad_norm": 439.1598815917969, + "learning_rate": 4.119203049609538e-07, + "loss": 19.014, + "step": 441450 + }, + { + "epoch": 0.891777130459726, + "grad_norm": 4.566817760467529, + "learning_rate": 4.1178157355859005e-07, + "loss": 13.6683, + "step": 441460 + }, + { + "epoch": 0.8917973310924098, + "grad_norm": 406.9350891113281, + "learning_rate": 4.1164286451864543e-07, + "loss": 17.9122, + "step": 441470 + }, + { + "epoch": 0.8918175317250936, + "grad_norm": 272.4143371582031, + "learning_rate": 4.1150417784179776e-07, + "loss": 21.5863, + "step": 441480 + }, + { + "epoch": 0.8918377323577774, + "grad_norm": 664.770263671875, + "learning_rate": 4.1136551352872256e-07, + "loss": 16.9244, + "step": 441490 + }, + { + "epoch": 0.8918579329904612, + "grad_norm": 657.5719604492188, + "learning_rate": 4.112268715800943e-07, + "loss": 23.7679, + "step": 441500 + }, + { + "epoch": 0.891878133623145, + "grad_norm": 150.0564727783203, + "learning_rate": 4.1108825199659087e-07, + "loss": 16.1455, + "step": 441510 + }, + { + "epoch": 0.8918983342558289, + "grad_norm": 410.1813659667969, + "learning_rate": 4.1094965477888605e-07, + "loss": 17.7182, + "step": 441520 + }, + { + "epoch": 0.8919185348885127, + "grad_norm": 194.68238830566406, + "learning_rate": 4.1081107992765546e-07, + "loss": 17.0886, + "step": 441530 + }, + { + "epoch": 0.8919387355211965, + "grad_norm": 333.56903076171875, + "learning_rate": 4.1067252744357524e-07, + "loss": 17.374, + "step": 441540 + }, + { + "epoch": 0.8919589361538803, + "grad_norm": 348.90771484375, + "learning_rate": 4.10533997327322e-07, + "loss": 10.7891, + "step": 441550 + }, + { + "epoch": 0.8919791367865642, + "grad_norm": 272.50164794921875, + "learning_rate": 4.1039548957956807e-07, + "loss": 16.9159, + "step": 441560 + }, + { + "epoch": 0.891999337419248, + "grad_norm": 54.76359558105469, + "learning_rate": 4.102570042009896e-07, + "loss": 17.3863, + "step": 441570 + }, + { + "epoch": 0.8920195380519318, + "grad_norm": 255.55389404296875, + "learning_rate": 4.101185411922626e-07, + "loss": 15.8582, + "step": 441580 + }, + { + "epoch": 0.8920397386846156, + "grad_norm": 70.89956665039062, + "learning_rate": 4.099801005540616e-07, + "loss": 17.2675, + "step": 441590 + }, + { + "epoch": 0.8920599393172994, + "grad_norm": 311.2065124511719, + "learning_rate": 4.0984168228705934e-07, + "loss": 13.5224, + "step": 441600 + }, + { + "epoch": 0.8920801399499833, + "grad_norm": 296.6814880371094, + "learning_rate": 4.0970328639193255e-07, + "loss": 17.2601, + "step": 441610 + }, + { + "epoch": 0.8921003405826671, + "grad_norm": 348.70703125, + "learning_rate": 4.0956491286935687e-07, + "loss": 40.7721, + "step": 441620 + }, + { + "epoch": 0.8921205412153509, + "grad_norm": 760.1993408203125, + "learning_rate": 4.0942656172000273e-07, + "loss": 19.1502, + "step": 441630 + }, + { + "epoch": 0.8921407418480347, + "grad_norm": 331.21807861328125, + "learning_rate": 4.0928823294454743e-07, + "loss": 27.8179, + "step": 441640 + }, + { + "epoch": 0.8921609424807185, + "grad_norm": 431.77606201171875, + "learning_rate": 4.091499265436649e-07, + "loss": 14.2931, + "step": 441650 + }, + { + "epoch": 0.8921811431134024, + "grad_norm": 814.52001953125, + "learning_rate": 4.0901164251802905e-07, + "loss": 16.915, + "step": 441660 + }, + { + "epoch": 0.8922013437460862, + "grad_norm": 197.07962036132812, + "learning_rate": 4.088733808683132e-07, + "loss": 19.4393, + "step": 441670 + }, + { + "epoch": 0.89222154437877, + "grad_norm": 327.1533203125, + "learning_rate": 4.087351415951918e-07, + "loss": 25.7358, + "step": 441680 + }, + { + "epoch": 0.8922417450114538, + "grad_norm": 617.658935546875, + "learning_rate": 4.085969246993388e-07, + "loss": 21.5491, + "step": 441690 + }, + { + "epoch": 0.8922619456441375, + "grad_norm": 484.689208984375, + "learning_rate": 4.084587301814269e-07, + "loss": 17.0248, + "step": 441700 + }, + { + "epoch": 0.8922821462768213, + "grad_norm": 317.9181823730469, + "learning_rate": 4.0832055804212957e-07, + "loss": 12.5236, + "step": 441710 + }, + { + "epoch": 0.8923023469095052, + "grad_norm": 6.746350288391113, + "learning_rate": 4.081824082821223e-07, + "loss": 11.206, + "step": 441720 + }, + { + "epoch": 0.892322547542189, + "grad_norm": 467.7158203125, + "learning_rate": 4.080442809020774e-07, + "loss": 15.5601, + "step": 441730 + }, + { + "epoch": 0.8923427481748728, + "grad_norm": 175.54934692382812, + "learning_rate": 4.079061759026659e-07, + "loss": 19.0913, + "step": 441740 + }, + { + "epoch": 0.8923629488075566, + "grad_norm": 490.88568115234375, + "learning_rate": 4.0776809328456455e-07, + "loss": 14.066, + "step": 441750 + }, + { + "epoch": 0.8923831494402404, + "grad_norm": 570.3674926757812, + "learning_rate": 4.0763003304844395e-07, + "loss": 12.0989, + "step": 441760 + }, + { + "epoch": 0.8924033500729243, + "grad_norm": 483.3099670410156, + "learning_rate": 4.0749199519497686e-07, + "loss": 14.4682, + "step": 441770 + }, + { + "epoch": 0.8924235507056081, + "grad_norm": 312.3144836425781, + "learning_rate": 4.073539797248377e-07, + "loss": 21.0393, + "step": 441780 + }, + { + "epoch": 0.8924437513382919, + "grad_norm": 95.38319396972656, + "learning_rate": 4.0721598663869764e-07, + "loss": 18.0344, + "step": 441790 + }, + { + "epoch": 0.8924639519709757, + "grad_norm": 414.2052001953125, + "learning_rate": 4.0707801593723006e-07, + "loss": 33.5571, + "step": 441800 + }, + { + "epoch": 0.8924841526036595, + "grad_norm": 379.32366943359375, + "learning_rate": 4.069400676211077e-07, + "loss": 19.56, + "step": 441810 + }, + { + "epoch": 0.8925043532363434, + "grad_norm": 185.80258178710938, + "learning_rate": 4.0680214169100117e-07, + "loss": 14.8039, + "step": 441820 + }, + { + "epoch": 0.8925245538690272, + "grad_norm": 760.356689453125, + "learning_rate": 4.0666423814758436e-07, + "loss": 28.8158, + "step": 441830 + }, + { + "epoch": 0.892544754501711, + "grad_norm": 153.5394287109375, + "learning_rate": 4.065263569915301e-07, + "loss": 16.4005, + "step": 441840 + }, + { + "epoch": 0.8925649551343948, + "grad_norm": 312.73956298828125, + "learning_rate": 4.063884982235078e-07, + "loss": 21.2115, + "step": 441850 + }, + { + "epoch": 0.8925851557670786, + "grad_norm": 129.17160034179688, + "learning_rate": 4.062506618441908e-07, + "loss": 14.3664, + "step": 441860 + }, + { + "epoch": 0.8926053563997625, + "grad_norm": 255.9687042236328, + "learning_rate": 4.06112847854252e-07, + "loss": 28.4689, + "step": 441870 + }, + { + "epoch": 0.8926255570324463, + "grad_norm": 344.6517639160156, + "learning_rate": 4.059750562543618e-07, + "loss": 42.3723, + "step": 441880 + }, + { + "epoch": 0.8926457576651301, + "grad_norm": 280.7841796875, + "learning_rate": 4.05837287045191e-07, + "loss": 14.4088, + "step": 441890 + }, + { + "epoch": 0.8926659582978139, + "grad_norm": 324.40618896484375, + "learning_rate": 4.056995402274122e-07, + "loss": 22.3527, + "step": 441900 + }, + { + "epoch": 0.8926861589304977, + "grad_norm": 194.77801513671875, + "learning_rate": 4.0556181580169885e-07, + "loss": 15.2484, + "step": 441910 + }, + { + "epoch": 0.8927063595631816, + "grad_norm": 433.3874206542969, + "learning_rate": 4.054241137687176e-07, + "loss": 22.2178, + "step": 441920 + }, + { + "epoch": 0.8927265601958654, + "grad_norm": 102.00749969482422, + "learning_rate": 4.052864341291418e-07, + "loss": 11.2987, + "step": 441930 + }, + { + "epoch": 0.8927467608285492, + "grad_norm": 348.9513854980469, + "learning_rate": 4.051487768836443e-07, + "loss": 22.3919, + "step": 441940 + }, + { + "epoch": 0.8927669614612329, + "grad_norm": 229.98158264160156, + "learning_rate": 4.0501114203289395e-07, + "loss": 17.3857, + "step": 441950 + }, + { + "epoch": 0.8927871620939167, + "grad_norm": 416.3073425292969, + "learning_rate": 4.048735295775608e-07, + "loss": 16.0174, + "step": 441960 + }, + { + "epoch": 0.8928073627266006, + "grad_norm": 374.8637390136719, + "learning_rate": 4.0473593951831814e-07, + "loss": 12.0551, + "step": 441970 + }, + { + "epoch": 0.8928275633592844, + "grad_norm": 544.44580078125, + "learning_rate": 4.0459837185583497e-07, + "loss": 10.0, + "step": 441980 + }, + { + "epoch": 0.8928477639919682, + "grad_norm": 578.8648071289062, + "learning_rate": 4.044608265907807e-07, + "loss": 23.768, + "step": 441990 + }, + { + "epoch": 0.892867964624652, + "grad_norm": 93.16361236572266, + "learning_rate": 4.043233037238281e-07, + "loss": 22.4546, + "step": 442000 + }, + { + "epoch": 0.8928881652573358, + "grad_norm": 862.9176025390625, + "learning_rate": 4.041858032556456e-07, + "loss": 19.5332, + "step": 442010 + }, + { + "epoch": 0.8929083658900197, + "grad_norm": 319.9920654296875, + "learning_rate": 4.040483251869054e-07, + "loss": 12.6, + "step": 442020 + }, + { + "epoch": 0.8929285665227035, + "grad_norm": 28.0956974029541, + "learning_rate": 4.0391086951827474e-07, + "loss": 31.4243, + "step": 442030 + }, + { + "epoch": 0.8929487671553873, + "grad_norm": 325.6099548339844, + "learning_rate": 4.0377343625042587e-07, + "loss": 11.6039, + "step": 442040 + }, + { + "epoch": 0.8929689677880711, + "grad_norm": 81.76892852783203, + "learning_rate": 4.0363602538402823e-07, + "loss": 11.1132, + "step": 442050 + }, + { + "epoch": 0.8929891684207549, + "grad_norm": 668.8286743164062, + "learning_rate": 4.034986369197502e-07, + "loss": 26.1809, + "step": 442060 + }, + { + "epoch": 0.8930093690534388, + "grad_norm": 663.0156860351562, + "learning_rate": 4.0336127085826294e-07, + "loss": 22.1773, + "step": 442070 + }, + { + "epoch": 0.8930295696861226, + "grad_norm": 353.7117919921875, + "learning_rate": 4.032239272002347e-07, + "loss": 14.2357, + "step": 442080 + }, + { + "epoch": 0.8930497703188064, + "grad_norm": 571.19384765625, + "learning_rate": 4.030866059463362e-07, + "loss": 24.5401, + "step": 442090 + }, + { + "epoch": 0.8930699709514902, + "grad_norm": 277.9085388183594, + "learning_rate": 4.029493070972362e-07, + "loss": 15.2518, + "step": 442100 + }, + { + "epoch": 0.893090171584174, + "grad_norm": 212.6479949951172, + "learning_rate": 4.0281203065360265e-07, + "loss": 15.6186, + "step": 442110 + }, + { + "epoch": 0.8931103722168579, + "grad_norm": 3.895404100418091, + "learning_rate": 4.026747766161071e-07, + "loss": 14.8128, + "step": 442120 + }, + { + "epoch": 0.8931305728495417, + "grad_norm": 3.034308433532715, + "learning_rate": 4.025375449854163e-07, + "loss": 10.7396, + "step": 442130 + }, + { + "epoch": 0.8931507734822255, + "grad_norm": 376.6468200683594, + "learning_rate": 4.0240033576219974e-07, + "loss": 22.8036, + "step": 442140 + }, + { + "epoch": 0.8931709741149093, + "grad_norm": 591.7930297851562, + "learning_rate": 4.022631489471257e-07, + "loss": 21.005, + "step": 442150 + }, + { + "epoch": 0.8931911747475931, + "grad_norm": 67.9887924194336, + "learning_rate": 4.0212598454086596e-07, + "loss": 17.2758, + "step": 442160 + }, + { + "epoch": 0.893211375380277, + "grad_norm": 611.8180541992188, + "learning_rate": 4.019888425440838e-07, + "loss": 18.577, + "step": 442170 + }, + { + "epoch": 0.8932315760129608, + "grad_norm": 460.660400390625, + "learning_rate": 4.018517229574509e-07, + "loss": 21.4927, + "step": 442180 + }, + { + "epoch": 0.8932517766456446, + "grad_norm": 286.05133056640625, + "learning_rate": 4.0171462578163624e-07, + "loss": 17.3168, + "step": 442190 + }, + { + "epoch": 0.8932719772783284, + "grad_norm": 289.0408020019531, + "learning_rate": 4.0157755101730645e-07, + "loss": 13.6396, + "step": 442200 + }, + { + "epoch": 0.8932921779110121, + "grad_norm": 433.1505432128906, + "learning_rate": 4.014404986651288e-07, + "loss": 18.2587, + "step": 442210 + }, + { + "epoch": 0.8933123785436959, + "grad_norm": 505.6806945800781, + "learning_rate": 4.013034687257727e-07, + "loss": 21.5438, + "step": 442220 + }, + { + "epoch": 0.8933325791763798, + "grad_norm": 330.5233154296875, + "learning_rate": 4.011664611999072e-07, + "loss": 30.2277, + "step": 442230 + }, + { + "epoch": 0.8933527798090636, + "grad_norm": 633.8748779296875, + "learning_rate": 4.010294760881972e-07, + "loss": 25.41, + "step": 442240 + }, + { + "epoch": 0.8933729804417474, + "grad_norm": 301.0047607421875, + "learning_rate": 4.0089251339131164e-07, + "loss": 21.5253, + "step": 442250 + }, + { + "epoch": 0.8933931810744312, + "grad_norm": 2.294178009033203, + "learning_rate": 4.0075557310991886e-07, + "loss": 21.8428, + "step": 442260 + }, + { + "epoch": 0.893413381707115, + "grad_norm": 519.116455078125, + "learning_rate": 4.006186552446861e-07, + "loss": 18.2456, + "step": 442270 + }, + { + "epoch": 0.8934335823397989, + "grad_norm": 256.85467529296875, + "learning_rate": 4.00481759796279e-07, + "loss": 19.9572, + "step": 442280 + }, + { + "epoch": 0.8934537829724827, + "grad_norm": 501.7436828613281, + "learning_rate": 4.003448867653664e-07, + "loss": 18.7082, + "step": 442290 + }, + { + "epoch": 0.8934739836051665, + "grad_norm": 430.889404296875, + "learning_rate": 4.002080361526156e-07, + "loss": 16.348, + "step": 442300 + }, + { + "epoch": 0.8934941842378503, + "grad_norm": 591.6133422851562, + "learning_rate": 4.000712079586916e-07, + "loss": 16.6457, + "step": 442310 + }, + { + "epoch": 0.8935143848705341, + "grad_norm": 604.29736328125, + "learning_rate": 3.999344021842627e-07, + "loss": 23.2564, + "step": 442320 + }, + { + "epoch": 0.893534585503218, + "grad_norm": 216.2506103515625, + "learning_rate": 3.997976188299968e-07, + "loss": 16.2214, + "step": 442330 + }, + { + "epoch": 0.8935547861359018, + "grad_norm": 949.240966796875, + "learning_rate": 3.996608578965594e-07, + "loss": 22.458, + "step": 442340 + }, + { + "epoch": 0.8935749867685856, + "grad_norm": 247.2509307861328, + "learning_rate": 3.9952411938461557e-07, + "loss": 18.5836, + "step": 442350 + }, + { + "epoch": 0.8935951874012694, + "grad_norm": 260.24273681640625, + "learning_rate": 3.9938740329483473e-07, + "loss": 20.2889, + "step": 442360 + }, + { + "epoch": 0.8936153880339532, + "grad_norm": 475.4487609863281, + "learning_rate": 3.992507096278814e-07, + "loss": 23.5179, + "step": 442370 + }, + { + "epoch": 0.8936355886666371, + "grad_norm": 317.2352600097656, + "learning_rate": 3.991140383844211e-07, + "loss": 9.8048, + "step": 442380 + }, + { + "epoch": 0.8936557892993209, + "grad_norm": 841.1236572265625, + "learning_rate": 3.989773895651222e-07, + "loss": 18.5649, + "step": 442390 + }, + { + "epoch": 0.8936759899320047, + "grad_norm": 498.5187072753906, + "learning_rate": 3.9884076317064813e-07, + "loss": 20.668, + "step": 442400 + }, + { + "epoch": 0.8936961905646885, + "grad_norm": 311.369384765625, + "learning_rate": 3.9870415920166715e-07, + "loss": 16.4844, + "step": 442410 + }, + { + "epoch": 0.8937163911973723, + "grad_norm": 283.80987548828125, + "learning_rate": 3.9856757765884436e-07, + "loss": 10.47, + "step": 442420 + }, + { + "epoch": 0.8937365918300562, + "grad_norm": 37.27119445800781, + "learning_rate": 3.984310185428442e-07, + "loss": 33.6831, + "step": 442430 + }, + { + "epoch": 0.89375679246274, + "grad_norm": 382.70458984375, + "learning_rate": 3.9829448185433385e-07, + "loss": 25.1369, + "step": 442440 + }, + { + "epoch": 0.8937769930954238, + "grad_norm": 424.489501953125, + "learning_rate": 3.9815796759397783e-07, + "loss": 8.1771, + "step": 442450 + }, + { + "epoch": 0.8937971937281076, + "grad_norm": 0.0, + "learning_rate": 3.980214757624412e-07, + "loss": 20.0538, + "step": 442460 + }, + { + "epoch": 0.8938173943607913, + "grad_norm": 232.93911743164062, + "learning_rate": 3.978850063603895e-07, + "loss": 18.2064, + "step": 442470 + }, + { + "epoch": 0.8938375949934751, + "grad_norm": 588.0669555664062, + "learning_rate": 3.977485593884889e-07, + "loss": 27.8043, + "step": 442480 + }, + { + "epoch": 0.893857795626159, + "grad_norm": 289.8110656738281, + "learning_rate": 3.9761213484740435e-07, + "loss": 18.8844, + "step": 442490 + }, + { + "epoch": 0.8938779962588428, + "grad_norm": 455.50518798828125, + "learning_rate": 3.9747573273779816e-07, + "loss": 13.1274, + "step": 442500 + }, + { + "epoch": 0.8938981968915266, + "grad_norm": 263.20697021484375, + "learning_rate": 3.9733935306033756e-07, + "loss": 16.1889, + "step": 442510 + }, + { + "epoch": 0.8939183975242104, + "grad_norm": 463.04376220703125, + "learning_rate": 3.9720299581568865e-07, + "loss": 21.7175, + "step": 442520 + }, + { + "epoch": 0.8939385981568942, + "grad_norm": 302.4626770019531, + "learning_rate": 3.970666610045121e-07, + "loss": 19.4688, + "step": 442530 + }, + { + "epoch": 0.8939587987895781, + "grad_norm": 769.6148681640625, + "learning_rate": 3.969303486274745e-07, + "loss": 27.4903, + "step": 442540 + }, + { + "epoch": 0.8939789994222619, + "grad_norm": 316.3883361816406, + "learning_rate": 3.967940586852409e-07, + "loss": 12.6395, + "step": 442550 + }, + { + "epoch": 0.8939992000549457, + "grad_norm": 183.7150421142578, + "learning_rate": 3.966577911784747e-07, + "loss": 20.832, + "step": 442560 + }, + { + "epoch": 0.8940194006876295, + "grad_norm": 590.2752685546875, + "learning_rate": 3.965215461078392e-07, + "loss": 15.9476, + "step": 442570 + }, + { + "epoch": 0.8940396013203133, + "grad_norm": 534.8192138671875, + "learning_rate": 3.963853234740006e-07, + "loss": 19.1567, + "step": 442580 + }, + { + "epoch": 0.8940598019529972, + "grad_norm": 597.6304931640625, + "learning_rate": 3.962491232776211e-07, + "loss": 20.072, + "step": 442590 + }, + { + "epoch": 0.894080002585681, + "grad_norm": 381.5980529785156, + "learning_rate": 3.961129455193641e-07, + "loss": 32.0533, + "step": 442600 + }, + { + "epoch": 0.8941002032183648, + "grad_norm": 256.7167663574219, + "learning_rate": 3.959767901998957e-07, + "loss": 11.408, + "step": 442610 + }, + { + "epoch": 0.8941204038510486, + "grad_norm": 65.94268798828125, + "learning_rate": 3.958406573198764e-07, + "loss": 18.7583, + "step": 442620 + }, + { + "epoch": 0.8941406044837324, + "grad_norm": 288.7595520019531, + "learning_rate": 3.957045468799725e-07, + "loss": 22.4417, + "step": 442630 + }, + { + "epoch": 0.8941608051164163, + "grad_norm": 427.774169921875, + "learning_rate": 3.955684588808456e-07, + "loss": 29.2313, + "step": 442640 + }, + { + "epoch": 0.8941810057491001, + "grad_norm": 105.23033142089844, + "learning_rate": 3.954323933231602e-07, + "loss": 24.7779, + "step": 442650 + }, + { + "epoch": 0.8942012063817839, + "grad_norm": 648.6617431640625, + "learning_rate": 3.952963502075791e-07, + "loss": 23.5429, + "step": 442660 + }, + { + "epoch": 0.8942214070144677, + "grad_norm": 456.74700927734375, + "learning_rate": 3.951603295347639e-07, + "loss": 21.8411, + "step": 442670 + }, + { + "epoch": 0.8942416076471515, + "grad_norm": 0.0741969645023346, + "learning_rate": 3.9502433130537977e-07, + "loss": 10.3931, + "step": 442680 + }, + { + "epoch": 0.8942618082798354, + "grad_norm": 482.71612548828125, + "learning_rate": 3.9488835552008773e-07, + "loss": 20.5551, + "step": 442690 + }, + { + "epoch": 0.8942820089125192, + "grad_norm": 61.67448806762695, + "learning_rate": 3.947524021795518e-07, + "loss": 18.076, + "step": 442700 + }, + { + "epoch": 0.894302209545203, + "grad_norm": 291.5075988769531, + "learning_rate": 3.946164712844347e-07, + "loss": 10.6352, + "step": 442710 + }, + { + "epoch": 0.8943224101778867, + "grad_norm": 536.745361328125, + "learning_rate": 3.9448056283539704e-07, + "loss": 19.3627, + "step": 442720 + }, + { + "epoch": 0.8943426108105705, + "grad_norm": 359.7989196777344, + "learning_rate": 3.9434467683310327e-07, + "loss": 13.5057, + "step": 442730 + }, + { + "epoch": 0.8943628114432544, + "grad_norm": 391.85369873046875, + "learning_rate": 3.942088132782157e-07, + "loss": 16.9426, + "step": 442740 + }, + { + "epoch": 0.8943830120759382, + "grad_norm": 234.9928741455078, + "learning_rate": 3.9407297217139427e-07, + "loss": 12.7332, + "step": 442750 + }, + { + "epoch": 0.894403212708622, + "grad_norm": 301.238037109375, + "learning_rate": 3.9393715351330243e-07, + "loss": 11.6003, + "step": 442760 + }, + { + "epoch": 0.8944234133413058, + "grad_norm": 177.99615478515625, + "learning_rate": 3.9380135730460347e-07, + "loss": 9.4544, + "step": 442770 + }, + { + "epoch": 0.8944436139739896, + "grad_norm": 565.592041015625, + "learning_rate": 3.9366558354595797e-07, + "loss": 23.9906, + "step": 442780 + }, + { + "epoch": 0.8944638146066735, + "grad_norm": 177.5765838623047, + "learning_rate": 3.935298322380271e-07, + "loss": 10.1951, + "step": 442790 + }, + { + "epoch": 0.8944840152393573, + "grad_norm": 451.1468505859375, + "learning_rate": 3.9339410338147363e-07, + "loss": 17.273, + "step": 442800 + }, + { + "epoch": 0.8945042158720411, + "grad_norm": 234.44549560546875, + "learning_rate": 3.9325839697695877e-07, + "loss": 11.3077, + "step": 442810 + }, + { + "epoch": 0.8945244165047249, + "grad_norm": 601.57275390625, + "learning_rate": 3.931227130251425e-07, + "loss": 16.1628, + "step": 442820 + }, + { + "epoch": 0.8945446171374087, + "grad_norm": 50.89834976196289, + "learning_rate": 3.929870515266876e-07, + "loss": 21.8412, + "step": 442830 + }, + { + "epoch": 0.8945648177700926, + "grad_norm": 312.83880615234375, + "learning_rate": 3.928514124822569e-07, + "loss": 18.6505, + "step": 442840 + }, + { + "epoch": 0.8945850184027764, + "grad_norm": 22.50575065612793, + "learning_rate": 3.9271579589250817e-07, + "loss": 27.6935, + "step": 442850 + }, + { + "epoch": 0.8946052190354602, + "grad_norm": 371.71063232421875, + "learning_rate": 3.925802017581032e-07, + "loss": 14.2174, + "step": 442860 + }, + { + "epoch": 0.894625419668144, + "grad_norm": 322.22113037109375, + "learning_rate": 3.924446300797052e-07, + "loss": 11.8918, + "step": 442870 + }, + { + "epoch": 0.8946456203008278, + "grad_norm": 245.89785766601562, + "learning_rate": 3.923090808579727e-07, + "loss": 19.6123, + "step": 442880 + }, + { + "epoch": 0.8946658209335117, + "grad_norm": 521.1366577148438, + "learning_rate": 3.9217355409356614e-07, + "loss": 26.2525, + "step": 442890 + }, + { + "epoch": 0.8946860215661955, + "grad_norm": 397.6199645996094, + "learning_rate": 3.920380497871473e-07, + "loss": 15.6646, + "step": 442900 + }, + { + "epoch": 0.8947062221988793, + "grad_norm": 391.6208801269531, + "learning_rate": 3.9190256793937675e-07, + "loss": 20.3865, + "step": 442910 + }, + { + "epoch": 0.8947264228315631, + "grad_norm": 289.1727294921875, + "learning_rate": 3.9176710855091283e-07, + "loss": 9.2943, + "step": 442920 + }, + { + "epoch": 0.8947466234642469, + "grad_norm": 239.43304443359375, + "learning_rate": 3.916316716224172e-07, + "loss": 16.7488, + "step": 442930 + }, + { + "epoch": 0.8947668240969308, + "grad_norm": 349.6020202636719, + "learning_rate": 3.9149625715455107e-07, + "loss": 20.8104, + "step": 442940 + }, + { + "epoch": 0.8947870247296146, + "grad_norm": 96.1251449584961, + "learning_rate": 3.913608651479733e-07, + "loss": 26.9883, + "step": 442950 + }, + { + "epoch": 0.8948072253622984, + "grad_norm": 623.001953125, + "learning_rate": 3.912254956033423e-07, + "loss": 19.0216, + "step": 442960 + }, + { + "epoch": 0.8948274259949822, + "grad_norm": 338.7909851074219, + "learning_rate": 3.9109014852132035e-07, + "loss": 10.8443, + "step": 442970 + }, + { + "epoch": 0.8948476266276659, + "grad_norm": 490.4385070800781, + "learning_rate": 3.9095482390256624e-07, + "loss": 17.8714, + "step": 442980 + }, + { + "epoch": 0.8948678272603497, + "grad_norm": 648.5219116210938, + "learning_rate": 3.908195217477384e-07, + "loss": 19.2579, + "step": 442990 + }, + { + "epoch": 0.8948880278930336, + "grad_norm": 679.5826416015625, + "learning_rate": 3.90684242057498e-07, + "loss": 34.5945, + "step": 443000 + }, + { + "epoch": 0.8949082285257174, + "grad_norm": 291.563232421875, + "learning_rate": 3.9054898483250224e-07, + "loss": 20.5633, + "step": 443010 + }, + { + "epoch": 0.8949284291584012, + "grad_norm": 510.5856628417969, + "learning_rate": 3.904137500734129e-07, + "loss": 22.5774, + "step": 443020 + }, + { + "epoch": 0.894948629791085, + "grad_norm": 551.2908325195312, + "learning_rate": 3.902785377808882e-07, + "loss": 26.2614, + "step": 443030 + }, + { + "epoch": 0.8949688304237688, + "grad_norm": 251.3923797607422, + "learning_rate": 3.901433479555855e-07, + "loss": 18.5808, + "step": 443040 + }, + { + "epoch": 0.8949890310564527, + "grad_norm": 289.2093200683594, + "learning_rate": 3.9000818059816593e-07, + "loss": 24.0075, + "step": 443050 + }, + { + "epoch": 0.8950092316891365, + "grad_norm": 123.8757553100586, + "learning_rate": 3.898730357092878e-07, + "loss": 28.818, + "step": 443060 + }, + { + "epoch": 0.8950294323218203, + "grad_norm": 592.0748291015625, + "learning_rate": 3.8973791328960786e-07, + "loss": 18.4389, + "step": 443070 + }, + { + "epoch": 0.8950496329545041, + "grad_norm": 339.29248046875, + "learning_rate": 3.8960281333978667e-07, + "loss": 19.7948, + "step": 443080 + }, + { + "epoch": 0.895069833587188, + "grad_norm": 218.08489990234375, + "learning_rate": 3.894677358604826e-07, + "loss": 14.6797, + "step": 443090 + }, + { + "epoch": 0.8950900342198718, + "grad_norm": 459.9266662597656, + "learning_rate": 3.89332680852354e-07, + "loss": 17.3519, + "step": 443100 + }, + { + "epoch": 0.8951102348525556, + "grad_norm": 314.08953857421875, + "learning_rate": 3.8919764831605754e-07, + "loss": 9.2347, + "step": 443110 + }, + { + "epoch": 0.8951304354852394, + "grad_norm": 176.02169799804688, + "learning_rate": 3.890626382522539e-07, + "loss": 21.5893, + "step": 443120 + }, + { + "epoch": 0.8951506361179232, + "grad_norm": 822.2457275390625, + "learning_rate": 3.889276506615991e-07, + "loss": 14.4411, + "step": 443130 + }, + { + "epoch": 0.895170836750607, + "grad_norm": 414.7174987792969, + "learning_rate": 3.88792685544751e-07, + "loss": 23.4973, + "step": 443140 + }, + { + "epoch": 0.8951910373832909, + "grad_norm": 176.8641357421875, + "learning_rate": 3.88657742902368e-07, + "loss": 14.6913, + "step": 443150 + }, + { + "epoch": 0.8952112380159747, + "grad_norm": 465.1305847167969, + "learning_rate": 3.88522822735109e-07, + "loss": 25.9285, + "step": 443160 + }, + { + "epoch": 0.8952314386486585, + "grad_norm": 268.416748046875, + "learning_rate": 3.8838792504363066e-07, + "loss": 8.6215, + "step": 443170 + }, + { + "epoch": 0.8952516392813423, + "grad_norm": 1195.952392578125, + "learning_rate": 3.882530498285886e-07, + "loss": 16.573, + "step": 443180 + }, + { + "epoch": 0.8952718399140261, + "grad_norm": 293.72906494140625, + "learning_rate": 3.8811819709064336e-07, + "loss": 14.2375, + "step": 443190 + }, + { + "epoch": 0.89529204054671, + "grad_norm": 47.482784271240234, + "learning_rate": 3.879833668304506e-07, + "loss": 29.3211, + "step": 443200 + }, + { + "epoch": 0.8953122411793938, + "grad_norm": 433.0750427246094, + "learning_rate": 3.8784855904866637e-07, + "loss": 26.0019, + "step": 443210 + }, + { + "epoch": 0.8953324418120776, + "grad_norm": 113.45503234863281, + "learning_rate": 3.877137737459502e-07, + "loss": 11.3939, + "step": 443220 + }, + { + "epoch": 0.8953526424447613, + "grad_norm": 362.42205810546875, + "learning_rate": 3.875790109229566e-07, + "loss": 25.6576, + "step": 443230 + }, + { + "epoch": 0.8953728430774451, + "grad_norm": 107.12870788574219, + "learning_rate": 3.8744427058034384e-07, + "loss": 13.7933, + "step": 443240 + }, + { + "epoch": 0.895393043710129, + "grad_norm": 554.9472045898438, + "learning_rate": 3.8730955271876813e-07, + "loss": 10.5045, + "step": 443250 + }, + { + "epoch": 0.8954132443428128, + "grad_norm": 5.183191299438477, + "learning_rate": 3.871748573388867e-07, + "loss": 19.209, + "step": 443260 + }, + { + "epoch": 0.8954334449754966, + "grad_norm": 622.285400390625, + "learning_rate": 3.870401844413557e-07, + "loss": 20.5912, + "step": 443270 + }, + { + "epoch": 0.8954536456081804, + "grad_norm": 608.5679931640625, + "learning_rate": 3.8690553402683015e-07, + "loss": 21.6407, + "step": 443280 + }, + { + "epoch": 0.8954738462408642, + "grad_norm": 224.35610961914062, + "learning_rate": 3.86770906095969e-07, + "loss": 21.7842, + "step": 443290 + }, + { + "epoch": 0.895494046873548, + "grad_norm": 360.945556640625, + "learning_rate": 3.866363006494256e-07, + "loss": 17.557, + "step": 443300 + }, + { + "epoch": 0.8955142475062319, + "grad_norm": 458.71636962890625, + "learning_rate": 3.8650171768785826e-07, + "loss": 30.2733, + "step": 443310 + }, + { + "epoch": 0.8955344481389157, + "grad_norm": 509.4033508300781, + "learning_rate": 3.863671572119221e-07, + "loss": 13.7747, + "step": 443320 + }, + { + "epoch": 0.8955546487715995, + "grad_norm": 390.75848388671875, + "learning_rate": 3.8623261922227204e-07, + "loss": 26.8211, + "step": 443330 + }, + { + "epoch": 0.8955748494042833, + "grad_norm": 336.63623046875, + "learning_rate": 3.8609810371956544e-07, + "loss": 9.9372, + "step": 443340 + }, + { + "epoch": 0.8955950500369672, + "grad_norm": 26.759260177612305, + "learning_rate": 3.859636107044573e-07, + "loss": 24.4349, + "step": 443350 + }, + { + "epoch": 0.895615250669651, + "grad_norm": 317.5001220703125, + "learning_rate": 3.8582914017760154e-07, + "loss": 21.1117, + "step": 443360 + }, + { + "epoch": 0.8956354513023348, + "grad_norm": 460.7093811035156, + "learning_rate": 3.856946921396554e-07, + "loss": 15.5941, + "step": 443370 + }, + { + "epoch": 0.8956556519350186, + "grad_norm": 315.0159606933594, + "learning_rate": 3.8556026659127445e-07, + "loss": 7.0699, + "step": 443380 + }, + { + "epoch": 0.8956758525677024, + "grad_norm": 358.8883972167969, + "learning_rate": 3.8542586353311264e-07, + "loss": 14.2727, + "step": 443390 + }, + { + "epoch": 0.8956960532003863, + "grad_norm": 316.111572265625, + "learning_rate": 3.85291482965825e-07, + "loss": 15.5937, + "step": 443400 + }, + { + "epoch": 0.8957162538330701, + "grad_norm": 223.5146942138672, + "learning_rate": 3.851571248900676e-07, + "loss": 15.3269, + "step": 443410 + }, + { + "epoch": 0.8957364544657539, + "grad_norm": 420.58526611328125, + "learning_rate": 3.8502278930649506e-07, + "loss": 18.9996, + "step": 443420 + }, + { + "epoch": 0.8957566550984377, + "grad_norm": 426.0105285644531, + "learning_rate": 3.8488847621576066e-07, + "loss": 8.0604, + "step": 443430 + }, + { + "epoch": 0.8957768557311215, + "grad_norm": 114.0346450805664, + "learning_rate": 3.8475418561851996e-07, + "loss": 43.2814, + "step": 443440 + }, + { + "epoch": 0.8957970563638054, + "grad_norm": 466.63580322265625, + "learning_rate": 3.846199175154297e-07, + "loss": 20.6006, + "step": 443450 + }, + { + "epoch": 0.8958172569964892, + "grad_norm": 123.13581848144531, + "learning_rate": 3.8448567190713993e-07, + "loss": 22.305, + "step": 443460 + }, + { + "epoch": 0.895837457629173, + "grad_norm": 208.05831909179688, + "learning_rate": 3.843514487943079e-07, + "loss": 17.5501, + "step": 443470 + }, + { + "epoch": 0.8958576582618568, + "grad_norm": 190.33071899414062, + "learning_rate": 3.8421724817758745e-07, + "loss": 21.5826, + "step": 443480 + }, + { + "epoch": 0.8958778588945405, + "grad_norm": 356.87127685546875, + "learning_rate": 3.84083070057632e-07, + "loss": 23.349, + "step": 443490 + }, + { + "epoch": 0.8958980595272243, + "grad_norm": 371.1936340332031, + "learning_rate": 3.8394891443509554e-07, + "loss": 7.139, + "step": 443500 + }, + { + "epoch": 0.8959182601599082, + "grad_norm": 329.1784973144531, + "learning_rate": 3.83814781310633e-07, + "loss": 17.2144, + "step": 443510 + }, + { + "epoch": 0.895938460792592, + "grad_norm": 22.805368423461914, + "learning_rate": 3.8368067068489724e-07, + "loss": 18.3239, + "step": 443520 + }, + { + "epoch": 0.8959586614252758, + "grad_norm": 343.87689208984375, + "learning_rate": 3.8354658255854105e-07, + "loss": 16.9245, + "step": 443530 + }, + { + "epoch": 0.8959788620579596, + "grad_norm": 511.1064147949219, + "learning_rate": 3.8341251693221893e-07, + "loss": 19.1064, + "step": 443540 + }, + { + "epoch": 0.8959990626906434, + "grad_norm": 370.7943420410156, + "learning_rate": 3.832784738065853e-07, + "loss": 18.6469, + "step": 443550 + }, + { + "epoch": 0.8960192633233273, + "grad_norm": 338.9887390136719, + "learning_rate": 3.83144453182292e-07, + "loss": 9.248, + "step": 443560 + }, + { + "epoch": 0.8960394639560111, + "grad_norm": 596.9765625, + "learning_rate": 3.830104550599922e-07, + "loss": 15.9077, + "step": 443570 + }, + { + "epoch": 0.8960596645886949, + "grad_norm": 96.01427459716797, + "learning_rate": 3.8287647944034054e-07, + "loss": 12.1708, + "step": 443580 + }, + { + "epoch": 0.8960798652213787, + "grad_norm": 276.3226013183594, + "learning_rate": 3.827425263239887e-07, + "loss": 15.0657, + "step": 443590 + }, + { + "epoch": 0.8961000658540625, + "grad_norm": 450.3712158203125, + "learning_rate": 3.8260859571158883e-07, + "loss": 22.2363, + "step": 443600 + }, + { + "epoch": 0.8961202664867464, + "grad_norm": 348.3032531738281, + "learning_rate": 3.824746876037955e-07, + "loss": 14.9362, + "step": 443610 + }, + { + "epoch": 0.8961404671194302, + "grad_norm": 542.25146484375, + "learning_rate": 3.8234080200125977e-07, + "loss": 17.2952, + "step": 443620 + }, + { + "epoch": 0.896160667752114, + "grad_norm": 302.9388732910156, + "learning_rate": 3.822069389046357e-07, + "loss": 21.2011, + "step": 443630 + }, + { + "epoch": 0.8961808683847978, + "grad_norm": 261.0054016113281, + "learning_rate": 3.8207309831457485e-07, + "loss": 11.77, + "step": 443640 + }, + { + "epoch": 0.8962010690174816, + "grad_norm": 107.35267639160156, + "learning_rate": 3.8193928023172897e-07, + "loss": 29.1692, + "step": 443650 + }, + { + "epoch": 0.8962212696501655, + "grad_norm": 248.1295166015625, + "learning_rate": 3.818054846567515e-07, + "loss": 16.7975, + "step": 443660 + }, + { + "epoch": 0.8962414702828493, + "grad_norm": 457.2428894042969, + "learning_rate": 3.8167171159029405e-07, + "loss": 15.9782, + "step": 443670 + }, + { + "epoch": 0.8962616709155331, + "grad_norm": 429.57025146484375, + "learning_rate": 3.815379610330078e-07, + "loss": 14.5478, + "step": 443680 + }, + { + "epoch": 0.8962818715482169, + "grad_norm": 473.4178466796875, + "learning_rate": 3.814042329855455e-07, + "loss": 17.089, + "step": 443690 + }, + { + "epoch": 0.8963020721809007, + "grad_norm": 374.85650634765625, + "learning_rate": 3.812705274485595e-07, + "loss": 22.4911, + "step": 443700 + }, + { + "epoch": 0.8963222728135846, + "grad_norm": 525.5431518554688, + "learning_rate": 3.811368444227009e-07, + "loss": 21.8245, + "step": 443710 + }, + { + "epoch": 0.8963424734462684, + "grad_norm": 626.8782348632812, + "learning_rate": 3.8100318390862033e-07, + "loss": 11.766, + "step": 443720 + }, + { + "epoch": 0.8963626740789522, + "grad_norm": 276.6632080078125, + "learning_rate": 3.8086954590697057e-07, + "loss": 21.9943, + "step": 443730 + }, + { + "epoch": 0.8963828747116359, + "grad_norm": 272.9100646972656, + "learning_rate": 3.8073593041840274e-07, + "loss": 40.214, + "step": 443740 + }, + { + "epoch": 0.8964030753443197, + "grad_norm": 561.2411499023438, + "learning_rate": 3.8060233744356634e-07, + "loss": 18.5295, + "step": 443750 + }, + { + "epoch": 0.8964232759770036, + "grad_norm": 11.287782669067383, + "learning_rate": 3.804687669831142e-07, + "loss": 11.435, + "step": 443760 + }, + { + "epoch": 0.8964434766096874, + "grad_norm": 321.518798828125, + "learning_rate": 3.80335219037698e-07, + "loss": 15.9047, + "step": 443770 + }, + { + "epoch": 0.8964636772423712, + "grad_norm": 562.2626953125, + "learning_rate": 3.802016936079678e-07, + "loss": 21.6205, + "step": 443780 + }, + { + "epoch": 0.896483877875055, + "grad_norm": 276.7866516113281, + "learning_rate": 3.8006819069457304e-07, + "loss": 20.1359, + "step": 443790 + }, + { + "epoch": 0.8965040785077388, + "grad_norm": 219.55706787109375, + "learning_rate": 3.7993471029816653e-07, + "loss": 10.2999, + "step": 443800 + }, + { + "epoch": 0.8965242791404227, + "grad_norm": 369.7362060546875, + "learning_rate": 3.798012524193978e-07, + "loss": 31.9167, + "step": 443810 + }, + { + "epoch": 0.8965444797731065, + "grad_norm": 241.03692626953125, + "learning_rate": 3.7966781705891684e-07, + "loss": 24.1727, + "step": 443820 + }, + { + "epoch": 0.8965646804057903, + "grad_norm": 726.6835327148438, + "learning_rate": 3.7953440421737433e-07, + "loss": 16.6032, + "step": 443830 + }, + { + "epoch": 0.8965848810384741, + "grad_norm": 285.6444091796875, + "learning_rate": 3.794010138954213e-07, + "loss": 33.959, + "step": 443840 + }, + { + "epoch": 0.8966050816711579, + "grad_norm": 587.40283203125, + "learning_rate": 3.792676460937078e-07, + "loss": 23.2916, + "step": 443850 + }, + { + "epoch": 0.8966252823038418, + "grad_norm": 6.033504486083984, + "learning_rate": 3.791343008128823e-07, + "loss": 11.7179, + "step": 443860 + }, + { + "epoch": 0.8966454829365256, + "grad_norm": 12.601014137268066, + "learning_rate": 3.790009780535969e-07, + "loss": 19.0735, + "step": 443870 + }, + { + "epoch": 0.8966656835692094, + "grad_norm": 82.12911987304688, + "learning_rate": 3.7886767781650016e-07, + "loss": 22.1259, + "step": 443880 + }, + { + "epoch": 0.8966858842018932, + "grad_norm": 154.22366333007812, + "learning_rate": 3.787344001022408e-07, + "loss": 12.7269, + "step": 443890 + }, + { + "epoch": 0.896706084834577, + "grad_norm": 318.2230224609375, + "learning_rate": 3.7860114491147017e-07, + "loss": 15.6227, + "step": 443900 + }, + { + "epoch": 0.8967262854672609, + "grad_norm": 235.8320770263672, + "learning_rate": 3.784679122448365e-07, + "loss": 29.1175, + "step": 443910 + }, + { + "epoch": 0.8967464860999447, + "grad_norm": 15.09432601928711, + "learning_rate": 3.783347021029904e-07, + "loss": 10.9162, + "step": 443920 + }, + { + "epoch": 0.8967666867326285, + "grad_norm": 90.33223724365234, + "learning_rate": 3.782015144865808e-07, + "loss": 16.4666, + "step": 443930 + }, + { + "epoch": 0.8967868873653123, + "grad_norm": 423.8289489746094, + "learning_rate": 3.780683493962556e-07, + "loss": 13.6356, + "step": 443940 + }, + { + "epoch": 0.8968070879979961, + "grad_norm": 230.84378051757812, + "learning_rate": 3.779352068326653e-07, + "loss": 11.9636, + "step": 443950 + }, + { + "epoch": 0.89682728863068, + "grad_norm": 1077.435546875, + "learning_rate": 3.7780208679645826e-07, + "loss": 25.3632, + "step": 443960 + }, + { + "epoch": 0.8968474892633638, + "grad_norm": 17.35983657836914, + "learning_rate": 3.776689892882823e-07, + "loss": 20.2653, + "step": 443970 + }, + { + "epoch": 0.8968676898960476, + "grad_norm": 436.32354736328125, + "learning_rate": 3.77535914308787e-07, + "loss": 17.2695, + "step": 443980 + }, + { + "epoch": 0.8968878905287314, + "grad_norm": 282.3138122558594, + "learning_rate": 3.774028618586217e-07, + "loss": 25.9139, + "step": 443990 + }, + { + "epoch": 0.8969080911614151, + "grad_norm": 650.9126586914062, + "learning_rate": 3.772698319384349e-07, + "loss": 13.6121, + "step": 444000 + }, + { + "epoch": 0.8969282917940989, + "grad_norm": 65.5379409790039, + "learning_rate": 3.7713682454887266e-07, + "loss": 11.2433, + "step": 444010 + }, + { + "epoch": 0.8969484924267828, + "grad_norm": 320.1227722167969, + "learning_rate": 3.770038396905862e-07, + "loss": 14.2495, + "step": 444020 + }, + { + "epoch": 0.8969686930594666, + "grad_norm": 357.67852783203125, + "learning_rate": 3.768708773642221e-07, + "loss": 16.8157, + "step": 444030 + }, + { + "epoch": 0.8969888936921504, + "grad_norm": 276.79132080078125, + "learning_rate": 3.767379375704278e-07, + "loss": 16.1594, + "step": 444040 + }, + { + "epoch": 0.8970090943248342, + "grad_norm": 167.3631134033203, + "learning_rate": 3.7660502030985203e-07, + "loss": 14.0351, + "step": 444050 + }, + { + "epoch": 0.897029294957518, + "grad_norm": 344.3874206542969, + "learning_rate": 3.7647212558314493e-07, + "loss": 10.6284, + "step": 444060 + }, + { + "epoch": 0.8970494955902019, + "grad_norm": 355.57635498046875, + "learning_rate": 3.7633925339094936e-07, + "loss": 4.788, + "step": 444070 + }, + { + "epoch": 0.8970696962228857, + "grad_norm": 234.05596923828125, + "learning_rate": 3.762064037339158e-07, + "loss": 26.8288, + "step": 444080 + }, + { + "epoch": 0.8970898968555695, + "grad_norm": 573.5048217773438, + "learning_rate": 3.760735766126927e-07, + "loss": 18.9453, + "step": 444090 + }, + { + "epoch": 0.8971100974882533, + "grad_norm": 306.1864318847656, + "learning_rate": 3.759407720279257e-07, + "loss": 17.8384, + "step": 444100 + }, + { + "epoch": 0.8971302981209371, + "grad_norm": 763.8611450195312, + "learning_rate": 3.758079899802619e-07, + "loss": 17.9444, + "step": 444110 + }, + { + "epoch": 0.897150498753621, + "grad_norm": 241.2154998779297, + "learning_rate": 3.756752304703498e-07, + "loss": 20.6118, + "step": 444120 + }, + { + "epoch": 0.8971706993863048, + "grad_norm": 474.2724609375, + "learning_rate": 3.755424934988355e-07, + "loss": 9.7833, + "step": 444130 + }, + { + "epoch": 0.8971909000189886, + "grad_norm": 71.91261291503906, + "learning_rate": 3.7540977906636576e-07, + "loss": 13.4105, + "step": 444140 + }, + { + "epoch": 0.8972111006516724, + "grad_norm": 111.76239776611328, + "learning_rate": 3.752770871735878e-07, + "loss": 10.913, + "step": 444150 + }, + { + "epoch": 0.8972313012843562, + "grad_norm": 187.22528076171875, + "learning_rate": 3.751444178211494e-07, + "loss": 7.838, + "step": 444160 + }, + { + "epoch": 0.8972515019170401, + "grad_norm": 277.2666320800781, + "learning_rate": 3.7501177100969566e-07, + "loss": 8.9107, + "step": 444170 + }, + { + "epoch": 0.8972717025497239, + "grad_norm": 444.3712463378906, + "learning_rate": 3.748791467398732e-07, + "loss": 12.3827, + "step": 444180 + }, + { + "epoch": 0.8972919031824077, + "grad_norm": 361.9163818359375, + "learning_rate": 3.747465450123294e-07, + "loss": 19.0128, + "step": 444190 + }, + { + "epoch": 0.8973121038150915, + "grad_norm": 334.4684143066406, + "learning_rate": 3.7461396582771035e-07, + "loss": 20.5581, + "step": 444200 + }, + { + "epoch": 0.8973323044477753, + "grad_norm": 313.4148864746094, + "learning_rate": 3.744814091866605e-07, + "loss": 23.7055, + "step": 444210 + }, + { + "epoch": 0.8973525050804592, + "grad_norm": 403.1631164550781, + "learning_rate": 3.7434887508982886e-07, + "loss": 16.993, + "step": 444220 + }, + { + "epoch": 0.897372705713143, + "grad_norm": 43.625972747802734, + "learning_rate": 3.7421636353785815e-07, + "loss": 17.8234, + "step": 444230 + }, + { + "epoch": 0.8973929063458268, + "grad_norm": 342.0977478027344, + "learning_rate": 3.740838745313974e-07, + "loss": 12.4247, + "step": 444240 + }, + { + "epoch": 0.8974131069785106, + "grad_norm": 238.92608642578125, + "learning_rate": 3.739514080710899e-07, + "loss": 10.3439, + "step": 444250 + }, + { + "epoch": 0.8974333076111943, + "grad_norm": 943.6799926757812, + "learning_rate": 3.738189641575818e-07, + "loss": 27.6635, + "step": 444260 + }, + { + "epoch": 0.8974535082438782, + "grad_norm": 413.5370788574219, + "learning_rate": 3.7368654279151985e-07, + "loss": 41.3092, + "step": 444270 + }, + { + "epoch": 0.897473708876562, + "grad_norm": 118.44355773925781, + "learning_rate": 3.7355414397354796e-07, + "loss": 10.9965, + "step": 444280 + }, + { + "epoch": 0.8974939095092458, + "grad_norm": 392.2132263183594, + "learning_rate": 3.7342176770431284e-07, + "loss": 15.963, + "step": 444290 + }, + { + "epoch": 0.8975141101419296, + "grad_norm": 335.9568176269531, + "learning_rate": 3.732894139844578e-07, + "loss": 40.2798, + "step": 444300 + }, + { + "epoch": 0.8975343107746134, + "grad_norm": 109.2363510131836, + "learning_rate": 3.731570828146297e-07, + "loss": 17.8185, + "step": 444310 + }, + { + "epoch": 0.8975545114072973, + "grad_norm": 73.78199768066406, + "learning_rate": 3.730247741954729e-07, + "loss": 16.4353, + "step": 444320 + }, + { + "epoch": 0.8975747120399811, + "grad_norm": 389.6304626464844, + "learning_rate": 3.7289248812763137e-07, + "loss": 10.4385, + "step": 444330 + }, + { + "epoch": 0.8975949126726649, + "grad_norm": 545.3394165039062, + "learning_rate": 3.727602246117518e-07, + "loss": 23.9497, + "step": 444340 + }, + { + "epoch": 0.8976151133053487, + "grad_norm": 512.56640625, + "learning_rate": 3.7262798364847753e-07, + "loss": 23.9551, + "step": 444350 + }, + { + "epoch": 0.8976353139380325, + "grad_norm": 228.80422973632812, + "learning_rate": 3.72495765238452e-07, + "loss": 15.2482, + "step": 444360 + }, + { + "epoch": 0.8976555145707164, + "grad_norm": 306.053955078125, + "learning_rate": 3.723635693823213e-07, + "loss": 21.9044, + "step": 444370 + }, + { + "epoch": 0.8976757152034002, + "grad_norm": 275.0199279785156, + "learning_rate": 3.7223139608073e-07, + "loss": 18.1763, + "step": 444380 + }, + { + "epoch": 0.897695915836084, + "grad_norm": 606.3357543945312, + "learning_rate": 3.720992453343214e-07, + "loss": 22.7662, + "step": 444390 + }, + { + "epoch": 0.8977161164687678, + "grad_norm": 194.8509979248047, + "learning_rate": 3.7196711714373947e-07, + "loss": 12.8268, + "step": 444400 + }, + { + "epoch": 0.8977363171014516, + "grad_norm": 116.74224853515625, + "learning_rate": 3.7183501150962863e-07, + "loss": 15.3617, + "step": 444410 + }, + { + "epoch": 0.8977565177341355, + "grad_norm": 469.42413330078125, + "learning_rate": 3.7170292843263347e-07, + "loss": 21.9017, + "step": 444420 + }, + { + "epoch": 0.8977767183668193, + "grad_norm": 342.9283142089844, + "learning_rate": 3.715708679133956e-07, + "loss": 10.503, + "step": 444430 + }, + { + "epoch": 0.8977969189995031, + "grad_norm": 269.0628662109375, + "learning_rate": 3.714388299525595e-07, + "loss": 12.1823, + "step": 444440 + }, + { + "epoch": 0.8978171196321869, + "grad_norm": 332.29168701171875, + "learning_rate": 3.713068145507709e-07, + "loss": 12.9783, + "step": 444450 + }, + { + "epoch": 0.8978373202648707, + "grad_norm": 60.27352523803711, + "learning_rate": 3.7117482170867083e-07, + "loss": 16.7351, + "step": 444460 + }, + { + "epoch": 0.8978575208975546, + "grad_norm": 413.5735168457031, + "learning_rate": 3.710428514269027e-07, + "loss": 11.7485, + "step": 444470 + }, + { + "epoch": 0.8978777215302384, + "grad_norm": 770.4952392578125, + "learning_rate": 3.7091090370611093e-07, + "loss": 17.4015, + "step": 444480 + }, + { + "epoch": 0.8978979221629222, + "grad_norm": 228.45712280273438, + "learning_rate": 3.707789785469379e-07, + "loss": 16.2267, + "step": 444490 + }, + { + "epoch": 0.897918122795606, + "grad_norm": 236.81741333007812, + "learning_rate": 3.7064707595002636e-07, + "loss": 27.0402, + "step": 444500 + }, + { + "epoch": 0.8979383234282897, + "grad_norm": 83.5274887084961, + "learning_rate": 3.705151959160197e-07, + "loss": 18.8256, + "step": 444510 + }, + { + "epoch": 0.8979585240609735, + "grad_norm": 512.5786743164062, + "learning_rate": 3.703833384455602e-07, + "loss": 20.8874, + "step": 444520 + }, + { + "epoch": 0.8979787246936574, + "grad_norm": 270.5484619140625, + "learning_rate": 3.702515035392912e-07, + "loss": 28.6608, + "step": 444530 + }, + { + "epoch": 0.8979989253263412, + "grad_norm": 0.0, + "learning_rate": 3.7011969119785496e-07, + "loss": 4.3968, + "step": 444540 + }, + { + "epoch": 0.898019125959025, + "grad_norm": 415.77490234375, + "learning_rate": 3.6998790142189324e-07, + "loss": 12.6397, + "step": 444550 + }, + { + "epoch": 0.8980393265917088, + "grad_norm": 994.0740966796875, + "learning_rate": 3.698561342120499e-07, + "loss": 16.8994, + "step": 444560 + }, + { + "epoch": 0.8980595272243926, + "grad_norm": 106.81134033203125, + "learning_rate": 3.6972438956896563e-07, + "loss": 10.136, + "step": 444570 + }, + { + "epoch": 0.8980797278570765, + "grad_norm": 483.38348388671875, + "learning_rate": 3.695926674932826e-07, + "loss": 33.2317, + "step": 444580 + }, + { + "epoch": 0.8980999284897603, + "grad_norm": 474.3923034667969, + "learning_rate": 3.694609679856431e-07, + "loss": 28.5039, + "step": 444590 + }, + { + "epoch": 0.8981201291224441, + "grad_norm": 798.9323120117188, + "learning_rate": 3.693292910466906e-07, + "loss": 21.2564, + "step": 444600 + }, + { + "epoch": 0.8981403297551279, + "grad_norm": 342.9609680175781, + "learning_rate": 3.69197636677065e-07, + "loss": 12.9522, + "step": 444610 + }, + { + "epoch": 0.8981605303878117, + "grad_norm": 172.19468688964844, + "learning_rate": 3.690660048774075e-07, + "loss": 12.2965, + "step": 444620 + }, + { + "epoch": 0.8981807310204956, + "grad_norm": 224.4616241455078, + "learning_rate": 3.6893439564836155e-07, + "loss": 8.3202, + "step": 444630 + }, + { + "epoch": 0.8982009316531794, + "grad_norm": 38.49509048461914, + "learning_rate": 3.688028089905682e-07, + "loss": 17.5146, + "step": 444640 + }, + { + "epoch": 0.8982211322858632, + "grad_norm": 54.56721115112305, + "learning_rate": 3.6867124490466697e-07, + "loss": 15.7033, + "step": 444650 + }, + { + "epoch": 0.898241332918547, + "grad_norm": 315.0137634277344, + "learning_rate": 3.685397033913002e-07, + "loss": 14.4688, + "step": 444660 + }, + { + "epoch": 0.8982615335512308, + "grad_norm": 285.6546936035156, + "learning_rate": 3.6840818445111114e-07, + "loss": 17.9773, + "step": 444670 + }, + { + "epoch": 0.8982817341839147, + "grad_norm": 588.5462036132812, + "learning_rate": 3.6827668808473714e-07, + "loss": 10.5859, + "step": 444680 + }, + { + "epoch": 0.8983019348165985, + "grad_norm": 444.90185546875, + "learning_rate": 3.68145214292821e-07, + "loss": 15.1751, + "step": 444690 + }, + { + "epoch": 0.8983221354492823, + "grad_norm": 1217.4012451171875, + "learning_rate": 3.680137630760039e-07, + "loss": 23.9297, + "step": 444700 + }, + { + "epoch": 0.8983423360819661, + "grad_norm": 432.7891845703125, + "learning_rate": 3.6788233443492583e-07, + "loss": 22.0646, + "step": 444710 + }, + { + "epoch": 0.8983625367146499, + "grad_norm": 320.3438720703125, + "learning_rate": 3.6775092837022685e-07, + "loss": 19.194, + "step": 444720 + }, + { + "epoch": 0.8983827373473338, + "grad_norm": 209.52174377441406, + "learning_rate": 3.676195448825487e-07, + "loss": 14.6774, + "step": 444730 + }, + { + "epoch": 0.8984029379800176, + "grad_norm": 575.9804077148438, + "learning_rate": 3.674881839725314e-07, + "loss": 28.4419, + "step": 444740 + }, + { + "epoch": 0.8984231386127014, + "grad_norm": 313.8929138183594, + "learning_rate": 3.6735684564081385e-07, + "loss": 20.3377, + "step": 444750 + }, + { + "epoch": 0.8984433392453852, + "grad_norm": 266.3089904785156, + "learning_rate": 3.672255298880367e-07, + "loss": 33.5205, + "step": 444760 + }, + { + "epoch": 0.8984635398780689, + "grad_norm": 223.70948791503906, + "learning_rate": 3.670942367148417e-07, + "loss": 21.5937, + "step": 444770 + }, + { + "epoch": 0.8984837405107527, + "grad_norm": 568.1279907226562, + "learning_rate": 3.669629661218671e-07, + "loss": 25.2434, + "step": 444780 + }, + { + "epoch": 0.8985039411434366, + "grad_norm": 15.84461498260498, + "learning_rate": 3.66831718109753e-07, + "loss": 14.5056, + "step": 444790 + }, + { + "epoch": 0.8985241417761204, + "grad_norm": 378.42156982421875, + "learning_rate": 3.6670049267913954e-07, + "loss": 15.9576, + "step": 444800 + }, + { + "epoch": 0.8985443424088042, + "grad_norm": 416.7359313964844, + "learning_rate": 3.665692898306655e-07, + "loss": 18.7411, + "step": 444810 + }, + { + "epoch": 0.898564543041488, + "grad_norm": 205.28900146484375, + "learning_rate": 3.664381095649705e-07, + "loss": 12.982, + "step": 444820 + }, + { + "epoch": 0.8985847436741718, + "grad_norm": 116.66765594482422, + "learning_rate": 3.6630695188269505e-07, + "loss": 10.0713, + "step": 444830 + }, + { + "epoch": 0.8986049443068557, + "grad_norm": 293.1413269042969, + "learning_rate": 3.6617581678447647e-07, + "loss": 14.8839, + "step": 444840 + }, + { + "epoch": 0.8986251449395395, + "grad_norm": 92.39081573486328, + "learning_rate": 3.6604470427095587e-07, + "loss": 13.498, + "step": 444850 + }, + { + "epoch": 0.8986453455722233, + "grad_norm": 314.4815979003906, + "learning_rate": 3.6591361434277105e-07, + "loss": 21.9132, + "step": 444860 + }, + { + "epoch": 0.8986655462049071, + "grad_norm": 375.6960144042969, + "learning_rate": 3.6578254700056107e-07, + "loss": 8.0686, + "step": 444870 + }, + { + "epoch": 0.898685746837591, + "grad_norm": 254.2244873046875, + "learning_rate": 3.6565150224496525e-07, + "loss": 8.0898, + "step": 444880 + }, + { + "epoch": 0.8987059474702748, + "grad_norm": 192.85308837890625, + "learning_rate": 3.65520480076621e-07, + "loss": 12.1381, + "step": 444890 + }, + { + "epoch": 0.8987261481029586, + "grad_norm": 262.3988952636719, + "learning_rate": 3.6538948049616886e-07, + "loss": 11.3097, + "step": 444900 + }, + { + "epoch": 0.8987463487356424, + "grad_norm": 374.4770812988281, + "learning_rate": 3.6525850350424554e-07, + "loss": 8.4125, + "step": 444910 + }, + { + "epoch": 0.8987665493683262, + "grad_norm": 919.95458984375, + "learning_rate": 3.651275491014905e-07, + "loss": 28.0506, + "step": 444920 + }, + { + "epoch": 0.89878675000101, + "grad_norm": 256.9989013671875, + "learning_rate": 3.649966172885422e-07, + "loss": 18.5785, + "step": 444930 + }, + { + "epoch": 0.8988069506336939, + "grad_norm": 302.6526184082031, + "learning_rate": 3.648657080660373e-07, + "loss": 14.164, + "step": 444940 + }, + { + "epoch": 0.8988271512663777, + "grad_norm": 138.86810302734375, + "learning_rate": 3.6473482143461523e-07, + "loss": 15.4201, + "step": 444950 + }, + { + "epoch": 0.8988473518990615, + "grad_norm": 253.0792999267578, + "learning_rate": 3.6460395739491337e-07, + "loss": 8.506, + "step": 444960 + }, + { + "epoch": 0.8988675525317453, + "grad_norm": 267.2154846191406, + "learning_rate": 3.644731159475695e-07, + "loss": 14.1381, + "step": 444970 + }, + { + "epoch": 0.8988877531644291, + "grad_norm": 773.7963256835938, + "learning_rate": 3.643422970932209e-07, + "loss": 28.2256, + "step": 444980 + }, + { + "epoch": 0.898907953797113, + "grad_norm": 183.3919219970703, + "learning_rate": 3.6421150083250754e-07, + "loss": 11.0542, + "step": 444990 + }, + { + "epoch": 0.8989281544297968, + "grad_norm": 614.067138671875, + "learning_rate": 3.6408072716606346e-07, + "loss": 20.9825, + "step": 445000 + }, + { + "epoch": 0.8989483550624806, + "grad_norm": 728.6051025390625, + "learning_rate": 3.6394997609452755e-07, + "loss": 13.9006, + "step": 445010 + }, + { + "epoch": 0.8989685556951643, + "grad_norm": 354.2909851074219, + "learning_rate": 3.6381924761853814e-07, + "loss": 13.5888, + "step": 445020 + }, + { + "epoch": 0.8989887563278481, + "grad_norm": 600.506591796875, + "learning_rate": 3.6368854173873094e-07, + "loss": 20.5429, + "step": 445030 + }, + { + "epoch": 0.899008956960532, + "grad_norm": 345.0184631347656, + "learning_rate": 3.635578584557431e-07, + "loss": 25.2817, + "step": 445040 + }, + { + "epoch": 0.8990291575932158, + "grad_norm": 493.048095703125, + "learning_rate": 3.6342719777021194e-07, + "loss": 23.0403, + "step": 445050 + }, + { + "epoch": 0.8990493582258996, + "grad_norm": 174.3533477783203, + "learning_rate": 3.6329655968277477e-07, + "loss": 16.8303, + "step": 445060 + }, + { + "epoch": 0.8990695588585834, + "grad_norm": 57.79670333862305, + "learning_rate": 3.6316594419406826e-07, + "loss": 14.3608, + "step": 445070 + }, + { + "epoch": 0.8990897594912672, + "grad_norm": 422.2472229003906, + "learning_rate": 3.6303535130472743e-07, + "loss": 24.412, + "step": 445080 + }, + { + "epoch": 0.8991099601239511, + "grad_norm": 244.47982788085938, + "learning_rate": 3.6290478101539073e-07, + "loss": 11.1764, + "step": 445090 + }, + { + "epoch": 0.8991301607566349, + "grad_norm": 318.8597412109375, + "learning_rate": 3.627742333266937e-07, + "loss": 11.5762, + "step": 445100 + }, + { + "epoch": 0.8991503613893187, + "grad_norm": 132.2611083984375, + "learning_rate": 3.6264370823927196e-07, + "loss": 18.6923, + "step": 445110 + }, + { + "epoch": 0.8991705620220025, + "grad_norm": 481.2422180175781, + "learning_rate": 3.6251320575376336e-07, + "loss": 23.3867, + "step": 445120 + }, + { + "epoch": 0.8991907626546863, + "grad_norm": 274.683837890625, + "learning_rate": 3.6238272587080183e-07, + "loss": 15.6587, + "step": 445130 + }, + { + "epoch": 0.8992109632873702, + "grad_norm": 292.89007568359375, + "learning_rate": 3.6225226859102515e-07, + "loss": 8.5681, + "step": 445140 + }, + { + "epoch": 0.899231163920054, + "grad_norm": 596.2623901367188, + "learning_rate": 3.621218339150684e-07, + "loss": 17.0533, + "step": 445150 + }, + { + "epoch": 0.8992513645527378, + "grad_norm": 728.558837890625, + "learning_rate": 3.619914218435666e-07, + "loss": 16.9225, + "step": 445160 + }, + { + "epoch": 0.8992715651854216, + "grad_norm": 299.84503173828125, + "learning_rate": 3.6186103237715706e-07, + "loss": 12.8559, + "step": 445170 + }, + { + "epoch": 0.8992917658181054, + "grad_norm": 665.6764526367188, + "learning_rate": 3.617306655164743e-07, + "loss": 37.6737, + "step": 445180 + }, + { + "epoch": 0.8993119664507893, + "grad_norm": 377.08441162109375, + "learning_rate": 3.6160032126215274e-07, + "loss": 15.4841, + "step": 445190 + }, + { + "epoch": 0.8993321670834731, + "grad_norm": 668.6402587890625, + "learning_rate": 3.614699996148285e-07, + "loss": 24.6529, + "step": 445200 + }, + { + "epoch": 0.8993523677161569, + "grad_norm": 458.1636657714844, + "learning_rate": 3.613397005751379e-07, + "loss": 26.3734, + "step": 445210 + }, + { + "epoch": 0.8993725683488407, + "grad_norm": 363.49127197265625, + "learning_rate": 3.612094241437153e-07, + "loss": 22.0163, + "step": 445220 + }, + { + "epoch": 0.8993927689815245, + "grad_norm": 333.63128662109375, + "learning_rate": 3.610791703211941e-07, + "loss": 14.0242, + "step": 445230 + }, + { + "epoch": 0.8994129696142084, + "grad_norm": 389.77337646484375, + "learning_rate": 3.6094893910821103e-07, + "loss": 20.8929, + "step": 445240 + }, + { + "epoch": 0.8994331702468922, + "grad_norm": 279.6188049316406, + "learning_rate": 3.608187305054006e-07, + "loss": 11.8537, + "step": 445250 + }, + { + "epoch": 0.899453370879576, + "grad_norm": 544.8707885742188, + "learning_rate": 3.606885445133962e-07, + "loss": 12.3577, + "step": 445260 + }, + { + "epoch": 0.8994735715122598, + "grad_norm": 241.91265869140625, + "learning_rate": 3.605583811328328e-07, + "loss": 23.5405, + "step": 445270 + }, + { + "epoch": 0.8994937721449435, + "grad_norm": 481.5022277832031, + "learning_rate": 3.604282403643472e-07, + "loss": 24.3286, + "step": 445280 + }, + { + "epoch": 0.8995139727776273, + "grad_norm": 165.5880584716797, + "learning_rate": 3.6029812220857e-07, + "loss": 18.6106, + "step": 445290 + }, + { + "epoch": 0.8995341734103112, + "grad_norm": 96.1946792602539, + "learning_rate": 3.601680266661367e-07, + "loss": 17.9782, + "step": 445300 + }, + { + "epoch": 0.899554374042995, + "grad_norm": 529.3019409179688, + "learning_rate": 3.6003795373768303e-07, + "loss": 18.7, + "step": 445310 + }, + { + "epoch": 0.8995745746756788, + "grad_norm": 1.002054214477539, + "learning_rate": 3.5990790342384117e-07, + "loss": 18.6463, + "step": 445320 + }, + { + "epoch": 0.8995947753083626, + "grad_norm": 678.5360717773438, + "learning_rate": 3.5977787572524457e-07, + "loss": 16.6345, + "step": 445330 + }, + { + "epoch": 0.8996149759410464, + "grad_norm": 79.41068267822266, + "learning_rate": 3.596478706425277e-07, + "loss": 16.8962, + "step": 445340 + }, + { + "epoch": 0.8996351765737303, + "grad_norm": 608.1316528320312, + "learning_rate": 3.5951788817632615e-07, + "loss": 15.5022, + "step": 445350 + }, + { + "epoch": 0.8996553772064141, + "grad_norm": 404.8101806640625, + "learning_rate": 3.5938792832726996e-07, + "loss": 23.5742, + "step": 445360 + }, + { + "epoch": 0.8996755778390979, + "grad_norm": 316.1697998046875, + "learning_rate": 3.5925799109599426e-07, + "loss": 15.4585, + "step": 445370 + }, + { + "epoch": 0.8996957784717817, + "grad_norm": 362.8455505371094, + "learning_rate": 3.5912807648313285e-07, + "loss": 19.0565, + "step": 445380 + }, + { + "epoch": 0.8997159791044655, + "grad_norm": 341.9504699707031, + "learning_rate": 3.5899818448931865e-07, + "loss": 20.8177, + "step": 445390 + }, + { + "epoch": 0.8997361797371494, + "grad_norm": 401.0293273925781, + "learning_rate": 3.5886831511518336e-07, + "loss": 24.3106, + "step": 445400 + }, + { + "epoch": 0.8997563803698332, + "grad_norm": 161.62619018554688, + "learning_rate": 3.5873846836136204e-07, + "loss": 25.0842, + "step": 445410 + }, + { + "epoch": 0.899776581002517, + "grad_norm": 0.0, + "learning_rate": 3.586086442284864e-07, + "loss": 22.1347, + "step": 445420 + }, + { + "epoch": 0.8997967816352008, + "grad_norm": 324.30487060546875, + "learning_rate": 3.5847884271718814e-07, + "loss": 21.2996, + "step": 445430 + }, + { + "epoch": 0.8998169822678846, + "grad_norm": 198.55274963378906, + "learning_rate": 3.583490638281023e-07, + "loss": 24.4018, + "step": 445440 + }, + { + "epoch": 0.8998371829005685, + "grad_norm": 240.75051879882812, + "learning_rate": 3.5821930756185894e-07, + "loss": 15.4742, + "step": 445450 + }, + { + "epoch": 0.8998573835332523, + "grad_norm": 526.855224609375, + "learning_rate": 3.5808957391909315e-07, + "loss": 14.6135, + "step": 445460 + }, + { + "epoch": 0.8998775841659361, + "grad_norm": 506.0526123046875, + "learning_rate": 3.579598629004355e-07, + "loss": 14.1117, + "step": 445470 + }, + { + "epoch": 0.8998977847986199, + "grad_norm": 545.1388549804688, + "learning_rate": 3.5783017450651714e-07, + "loss": 12.2958, + "step": 445480 + }, + { + "epoch": 0.8999179854313037, + "grad_norm": 520.304443359375, + "learning_rate": 3.5770050873797314e-07, + "loss": 19.0794, + "step": 445490 + }, + { + "epoch": 0.8999381860639876, + "grad_norm": 142.5303955078125, + "learning_rate": 3.575708655954324e-07, + "loss": 8.911, + "step": 445500 + }, + { + "epoch": 0.8999583866966714, + "grad_norm": 366.2143249511719, + "learning_rate": 3.5744124507952895e-07, + "loss": 15.2595, + "step": 445510 + }, + { + "epoch": 0.8999785873293552, + "grad_norm": 171.73390197753906, + "learning_rate": 3.573116471908933e-07, + "loss": 21.8395, + "step": 445520 + }, + { + "epoch": 0.899998787962039, + "grad_norm": 449.6515197753906, + "learning_rate": 3.571820719301583e-07, + "loss": 17.7359, + "step": 445530 + }, + { + "epoch": 0.9000189885947227, + "grad_norm": 827.51953125, + "learning_rate": 3.570525192979546e-07, + "loss": 15.0506, + "step": 445540 + }, + { + "epoch": 0.9000391892274066, + "grad_norm": 465.2867126464844, + "learning_rate": 3.569229892949133e-07, + "loss": 21.303, + "step": 445550 + }, + { + "epoch": 0.9000593898600904, + "grad_norm": 218.71087646484375, + "learning_rate": 3.5679348192166675e-07, + "loss": 8.2386, + "step": 445560 + }, + { + "epoch": 0.9000795904927742, + "grad_norm": 721.7833251953125, + "learning_rate": 3.5666399717884604e-07, + "loss": 20.4058, + "step": 445570 + }, + { + "epoch": 0.900099791125458, + "grad_norm": 141.5386505126953, + "learning_rate": 3.565345350670807e-07, + "loss": 12.856, + "step": 445580 + }, + { + "epoch": 0.9001199917581418, + "grad_norm": 508.2934875488281, + "learning_rate": 3.56405095587003e-07, + "loss": 12.1547, + "step": 445590 + }, + { + "epoch": 0.9001401923908257, + "grad_norm": 132.60581970214844, + "learning_rate": 3.562756787392452e-07, + "loss": 23.756, + "step": 445600 + }, + { + "epoch": 0.9001603930235095, + "grad_norm": 340.5688781738281, + "learning_rate": 3.561462845244351e-07, + "loss": 13.3342, + "step": 445610 + }, + { + "epoch": 0.9001805936561933, + "grad_norm": 351.4344482421875, + "learning_rate": 3.560169129432045e-07, + "loss": 26.1856, + "step": 445620 + }, + { + "epoch": 0.9002007942888771, + "grad_norm": 527.7457275390625, + "learning_rate": 3.5588756399618507e-07, + "loss": 9.6738, + "step": 445630 + }, + { + "epoch": 0.9002209949215609, + "grad_norm": 262.84136962890625, + "learning_rate": 3.557582376840063e-07, + "loss": 14.2681, + "step": 445640 + }, + { + "epoch": 0.9002411955542448, + "grad_norm": 526.7639770507812, + "learning_rate": 3.556289340072977e-07, + "loss": 10.0684, + "step": 445650 + }, + { + "epoch": 0.9002613961869286, + "grad_norm": 321.4812316894531, + "learning_rate": 3.55499652966691e-07, + "loss": 17.8272, + "step": 445660 + }, + { + "epoch": 0.9002815968196124, + "grad_norm": 91.7345962524414, + "learning_rate": 3.5537039456281674e-07, + "loss": 13.1638, + "step": 445670 + }, + { + "epoch": 0.9003017974522962, + "grad_norm": 111.93982696533203, + "learning_rate": 3.5524115879630225e-07, + "loss": 7.6087, + "step": 445680 + }, + { + "epoch": 0.90032199808498, + "grad_norm": 481.2381286621094, + "learning_rate": 3.551119456677793e-07, + "loss": 15.0273, + "step": 445690 + }, + { + "epoch": 0.9003421987176639, + "grad_norm": 403.2091979980469, + "learning_rate": 3.5498275517787783e-07, + "loss": 33.3146, + "step": 445700 + }, + { + "epoch": 0.9003623993503477, + "grad_norm": 309.87310791015625, + "learning_rate": 3.5485358732722743e-07, + "loss": 17.5942, + "step": 445710 + }, + { + "epoch": 0.9003825999830315, + "grad_norm": 732.3163452148438, + "learning_rate": 3.547244421164564e-07, + "loss": 20.9732, + "step": 445720 + }, + { + "epoch": 0.9004028006157153, + "grad_norm": 365.7209167480469, + "learning_rate": 3.545953195461954e-07, + "loss": 22.0166, + "step": 445730 + }, + { + "epoch": 0.9004230012483991, + "grad_norm": 365.7203369140625, + "learning_rate": 3.5446621961707284e-07, + "loss": 14.7776, + "step": 445740 + }, + { + "epoch": 0.900443201881083, + "grad_norm": 545.6244506835938, + "learning_rate": 3.5433714232971927e-07, + "loss": 26.0447, + "step": 445750 + }, + { + "epoch": 0.9004634025137668, + "grad_norm": 219.1865234375, + "learning_rate": 3.5420808768476313e-07, + "loss": 17.1418, + "step": 445760 + }, + { + "epoch": 0.9004836031464506, + "grad_norm": 876.4417114257812, + "learning_rate": 3.540790556828327e-07, + "loss": 14.8119, + "step": 445770 + }, + { + "epoch": 0.9005038037791344, + "grad_norm": 154.30516052246094, + "learning_rate": 3.539500463245582e-07, + "loss": 42.6618, + "step": 445780 + }, + { + "epoch": 0.9005240044118181, + "grad_norm": 280.67626953125, + "learning_rate": 3.5382105961056735e-07, + "loss": 24.9225, + "step": 445790 + }, + { + "epoch": 0.9005442050445019, + "grad_norm": 122.18487548828125, + "learning_rate": 3.5369209554148854e-07, + "loss": 17.0533, + "step": 445800 + }, + { + "epoch": 0.9005644056771858, + "grad_norm": 348.6451110839844, + "learning_rate": 3.535631541179507e-07, + "loss": 15.0391, + "step": 445810 + }, + { + "epoch": 0.9005846063098696, + "grad_norm": 140.8256072998047, + "learning_rate": 3.534342353405834e-07, + "loss": 13.4767, + "step": 445820 + }, + { + "epoch": 0.9006048069425534, + "grad_norm": 281.6974182128906, + "learning_rate": 3.533053392100144e-07, + "loss": 24.3823, + "step": 445830 + }, + { + "epoch": 0.9006250075752372, + "grad_norm": 448.6886901855469, + "learning_rate": 3.531764657268705e-07, + "loss": 15.1547, + "step": 445840 + }, + { + "epoch": 0.900645208207921, + "grad_norm": 455.422119140625, + "learning_rate": 3.530476148917816e-07, + "loss": 31.2984, + "step": 445850 + }, + { + "epoch": 0.9006654088406049, + "grad_norm": 555.0156860351562, + "learning_rate": 3.5291878670537516e-07, + "loss": 22.9467, + "step": 445860 + }, + { + "epoch": 0.9006856094732887, + "grad_norm": 285.40625, + "learning_rate": 3.5278998116827835e-07, + "loss": 23.5238, + "step": 445870 + }, + { + "epoch": 0.9007058101059725, + "grad_norm": 291.3361511230469, + "learning_rate": 3.5266119828111953e-07, + "loss": 21.3014, + "step": 445880 + }, + { + "epoch": 0.9007260107386563, + "grad_norm": 621.8134155273438, + "learning_rate": 3.525324380445277e-07, + "loss": 15.1114, + "step": 445890 + }, + { + "epoch": 0.9007462113713401, + "grad_norm": 316.3350524902344, + "learning_rate": 3.524037004591274e-07, + "loss": 15.3083, + "step": 445900 + }, + { + "epoch": 0.900766412004024, + "grad_norm": 235.22181701660156, + "learning_rate": 3.5227498552554805e-07, + "loss": 18.8735, + "step": 445910 + }, + { + "epoch": 0.9007866126367078, + "grad_norm": 680.9277954101562, + "learning_rate": 3.5214629324441754e-07, + "loss": 33.483, + "step": 445920 + }, + { + "epoch": 0.9008068132693916, + "grad_norm": 646.0338745117188, + "learning_rate": 3.5201762361636195e-07, + "loss": 12.3207, + "step": 445930 + }, + { + "epoch": 0.9008270139020754, + "grad_norm": 601.1619873046875, + "learning_rate": 3.5188897664200804e-07, + "loss": 17.9275, + "step": 445940 + }, + { + "epoch": 0.9008472145347592, + "grad_norm": 357.0928039550781, + "learning_rate": 3.5176035232198367e-07, + "loss": 20.5941, + "step": 445950 + }, + { + "epoch": 0.9008674151674431, + "grad_norm": 431.9078674316406, + "learning_rate": 3.516317506569172e-07, + "loss": 20.7034, + "step": 445960 + }, + { + "epoch": 0.9008876158001269, + "grad_norm": 188.78501892089844, + "learning_rate": 3.515031716474321e-07, + "loss": 17.5412, + "step": 445970 + }, + { + "epoch": 0.9009078164328107, + "grad_norm": 616.5831298828125, + "learning_rate": 3.513746152941572e-07, + "loss": 15.5337, + "step": 445980 + }, + { + "epoch": 0.9009280170654945, + "grad_norm": 245.98434448242188, + "learning_rate": 3.5124608159771864e-07, + "loss": 13.5908, + "step": 445990 + }, + { + "epoch": 0.9009482176981783, + "grad_norm": 259.31622314453125, + "learning_rate": 3.511175705587433e-07, + "loss": 21.3198, + "step": 446000 + }, + { + "epoch": 0.9009684183308622, + "grad_norm": 614.026123046875, + "learning_rate": 3.509890821778561e-07, + "loss": 17.3189, + "step": 446010 + }, + { + "epoch": 0.900988618963546, + "grad_norm": 1304.6748046875, + "learning_rate": 3.508606164556855e-07, + "loss": 22.5737, + "step": 446020 + }, + { + "epoch": 0.9010088195962298, + "grad_norm": 445.1016845703125, + "learning_rate": 3.507321733928559e-07, + "loss": 13.0387, + "step": 446030 + }, + { + "epoch": 0.9010290202289136, + "grad_norm": 323.90216064453125, + "learning_rate": 3.5060375298999303e-07, + "loss": 26.3852, + "step": 446040 + }, + { + "epoch": 0.9010492208615973, + "grad_norm": 340.41754150390625, + "learning_rate": 3.5047535524772467e-07, + "loss": 23.3956, + "step": 446050 + }, + { + "epoch": 0.9010694214942812, + "grad_norm": 309.1763916015625, + "learning_rate": 3.5034698016667423e-07, + "loss": 13.9331, + "step": 446060 + }, + { + "epoch": 0.901089622126965, + "grad_norm": 299.2520751953125, + "learning_rate": 3.5021862774747007e-07, + "loss": 17.1172, + "step": 446070 + }, + { + "epoch": 0.9011098227596488, + "grad_norm": 73.07524108886719, + "learning_rate": 3.500902979907356e-07, + "loss": 15.6339, + "step": 446080 + }, + { + "epoch": 0.9011300233923326, + "grad_norm": 607.2282104492188, + "learning_rate": 3.4996199089709695e-07, + "loss": 13.3263, + "step": 446090 + }, + { + "epoch": 0.9011502240250164, + "grad_norm": 414.443359375, + "learning_rate": 3.498337064671803e-07, + "loss": 21.8615, + "step": 446100 + }, + { + "epoch": 0.9011704246577003, + "grad_norm": 176.03738403320312, + "learning_rate": 3.4970544470160905e-07, + "loss": 14.6029, + "step": 446110 + }, + { + "epoch": 0.9011906252903841, + "grad_norm": 243.4033203125, + "learning_rate": 3.495772056010105e-07, + "loss": 21.5409, + "step": 446120 + }, + { + "epoch": 0.9012108259230679, + "grad_norm": 423.4161071777344, + "learning_rate": 3.4944898916600743e-07, + "loss": 27.8391, + "step": 446130 + }, + { + "epoch": 0.9012310265557517, + "grad_norm": 488.76904296875, + "learning_rate": 3.493207953972272e-07, + "loss": 12.8866, + "step": 446140 + }, + { + "epoch": 0.9012512271884355, + "grad_norm": 815.1055297851562, + "learning_rate": 3.491926242952931e-07, + "loss": 20.7648, + "step": 446150 + }, + { + "epoch": 0.9012714278211194, + "grad_norm": 744.4523315429688, + "learning_rate": 3.4906447586082917e-07, + "loss": 27.846, + "step": 446160 + }, + { + "epoch": 0.9012916284538032, + "grad_norm": 186.1259002685547, + "learning_rate": 3.48936350094462e-07, + "loss": 11.0203, + "step": 446170 + }, + { + "epoch": 0.901311829086487, + "grad_norm": 751.5745849609375, + "learning_rate": 3.488082469968146e-07, + "loss": 27.7282, + "step": 446180 + }, + { + "epoch": 0.9013320297191708, + "grad_norm": 876.5944213867188, + "learning_rate": 3.4868016656851135e-07, + "loss": 20.8903, + "step": 446190 + }, + { + "epoch": 0.9013522303518546, + "grad_norm": 248.3055419921875, + "learning_rate": 3.4855210881017675e-07, + "loss": 18.6062, + "step": 446200 + }, + { + "epoch": 0.9013724309845385, + "grad_norm": 534.772216796875, + "learning_rate": 3.4842407372243646e-07, + "loss": 30.0212, + "step": 446210 + }, + { + "epoch": 0.9013926316172223, + "grad_norm": 178.67788696289062, + "learning_rate": 3.482960613059111e-07, + "loss": 16.603, + "step": 446220 + }, + { + "epoch": 0.9014128322499061, + "grad_norm": 169.78196716308594, + "learning_rate": 3.481680715612273e-07, + "loss": 10.8747, + "step": 446230 + }, + { + "epoch": 0.9014330328825899, + "grad_norm": 949.7888793945312, + "learning_rate": 3.480401044890086e-07, + "loss": 16.0835, + "step": 446240 + }, + { + "epoch": 0.9014532335152737, + "grad_norm": 310.1080017089844, + "learning_rate": 3.479121600898777e-07, + "loss": 22.9904, + "step": 446250 + }, + { + "epoch": 0.9014734341479576, + "grad_norm": 533.2051391601562, + "learning_rate": 3.477842383644586e-07, + "loss": 14.9268, + "step": 446260 + }, + { + "epoch": 0.9014936347806414, + "grad_norm": 290.5344543457031, + "learning_rate": 3.476563393133747e-07, + "loss": 19.1723, + "step": 446270 + }, + { + "epoch": 0.9015138354133252, + "grad_norm": 248.2886962890625, + "learning_rate": 3.475284629372511e-07, + "loss": 25.3621, + "step": 446280 + }, + { + "epoch": 0.901534036046009, + "grad_norm": 496.6837463378906, + "learning_rate": 3.474006092367077e-07, + "loss": 12.9381, + "step": 446290 + }, + { + "epoch": 0.9015542366786927, + "grad_norm": 8.167790412902832, + "learning_rate": 3.472727782123697e-07, + "loss": 5.335, + "step": 446300 + }, + { + "epoch": 0.9015744373113765, + "grad_norm": 494.1587829589844, + "learning_rate": 3.4714496986486045e-07, + "loss": 14.7211, + "step": 446310 + }, + { + "epoch": 0.9015946379440604, + "grad_norm": 496.6618957519531, + "learning_rate": 3.470171841948022e-07, + "loss": 15.1881, + "step": 446320 + }, + { + "epoch": 0.9016148385767442, + "grad_norm": 318.4088134765625, + "learning_rate": 3.468894212028173e-07, + "loss": 25.2526, + "step": 446330 + }, + { + "epoch": 0.901635039209428, + "grad_norm": 424.84149169921875, + "learning_rate": 3.467616808895302e-07, + "loss": 17.9956, + "step": 446340 + }, + { + "epoch": 0.9016552398421118, + "grad_norm": 295.3107604980469, + "learning_rate": 3.4663396325556154e-07, + "loss": 8.7933, + "step": 446350 + }, + { + "epoch": 0.9016754404747956, + "grad_norm": 235.19976806640625, + "learning_rate": 3.465062683015341e-07, + "loss": 15.9845, + "step": 446360 + }, + { + "epoch": 0.9016956411074795, + "grad_norm": 327.4865417480469, + "learning_rate": 3.463785960280719e-07, + "loss": 17.9148, + "step": 446370 + }, + { + "epoch": 0.9017158417401633, + "grad_norm": 313.4536437988281, + "learning_rate": 3.462509464357944e-07, + "loss": 28.1946, + "step": 446380 + }, + { + "epoch": 0.9017360423728471, + "grad_norm": 201.65939331054688, + "learning_rate": 3.461233195253266e-07, + "loss": 18.0275, + "step": 446390 + }, + { + "epoch": 0.9017562430055309, + "grad_norm": 977.5045166015625, + "learning_rate": 3.459957152972887e-07, + "loss": 28.939, + "step": 446400 + }, + { + "epoch": 0.9017764436382147, + "grad_norm": 0.0, + "learning_rate": 3.45868133752304e-07, + "loss": 17.0587, + "step": 446410 + }, + { + "epoch": 0.9017966442708986, + "grad_norm": 593.5664672851562, + "learning_rate": 3.45740574890992e-07, + "loss": 18.1876, + "step": 446420 + }, + { + "epoch": 0.9018168449035824, + "grad_norm": 122.75834655761719, + "learning_rate": 3.456130387139778e-07, + "loss": 40.8926, + "step": 446430 + }, + { + "epoch": 0.9018370455362662, + "grad_norm": 281.06292724609375, + "learning_rate": 3.454855252218803e-07, + "loss": 21.9502, + "step": 446440 + }, + { + "epoch": 0.90185724616895, + "grad_norm": 272.9182434082031, + "learning_rate": 3.4535803441532125e-07, + "loss": 15.2092, + "step": 446450 + }, + { + "epoch": 0.9018774468016338, + "grad_norm": 274.5888671875, + "learning_rate": 3.4523056629492344e-07, + "loss": 25.7554, + "step": 446460 + }, + { + "epoch": 0.9018976474343177, + "grad_norm": 422.311767578125, + "learning_rate": 3.451031208613076e-07, + "loss": 10.7451, + "step": 446470 + }, + { + "epoch": 0.9019178480670015, + "grad_norm": 267.4585266113281, + "learning_rate": 3.449756981150931e-07, + "loss": 14.6757, + "step": 446480 + }, + { + "epoch": 0.9019380486996853, + "grad_norm": 369.5973205566406, + "learning_rate": 3.448482980569029e-07, + "loss": 29.1553, + "step": 446490 + }, + { + "epoch": 0.9019582493323691, + "grad_norm": 713.7677001953125, + "learning_rate": 3.4472092068735917e-07, + "loss": 31.0512, + "step": 446500 + }, + { + "epoch": 0.9019784499650529, + "grad_norm": 286.2923889160156, + "learning_rate": 3.4459356600707925e-07, + "loss": 9.0444, + "step": 446510 + }, + { + "epoch": 0.9019986505977368, + "grad_norm": 692.444580078125, + "learning_rate": 3.44466234016686e-07, + "loss": 15.3244, + "step": 446520 + }, + { + "epoch": 0.9020188512304206, + "grad_norm": 233.64529418945312, + "learning_rate": 3.443389247168e-07, + "loss": 16.3657, + "step": 446530 + }, + { + "epoch": 0.9020390518631044, + "grad_norm": 156.468994140625, + "learning_rate": 3.442116381080418e-07, + "loss": 13.9561, + "step": 446540 + }, + { + "epoch": 0.9020592524957882, + "grad_norm": 376.273193359375, + "learning_rate": 3.4408437419103047e-07, + "loss": 9.307, + "step": 446550 + }, + { + "epoch": 0.9020794531284719, + "grad_norm": 253.2138671875, + "learning_rate": 3.4395713296638713e-07, + "loss": 18.5868, + "step": 446560 + }, + { + "epoch": 0.9020996537611558, + "grad_norm": 474.3890380859375, + "learning_rate": 3.4382991443473403e-07, + "loss": 20.5897, + "step": 446570 + }, + { + "epoch": 0.9021198543938396, + "grad_norm": 670.932373046875, + "learning_rate": 3.437027185966868e-07, + "loss": 13.156, + "step": 446580 + }, + { + "epoch": 0.9021400550265234, + "grad_norm": 267.61737060546875, + "learning_rate": 3.4357554545286833e-07, + "loss": 20.705, + "step": 446590 + }, + { + "epoch": 0.9021602556592072, + "grad_norm": 19.1137752532959, + "learning_rate": 3.434483950038986e-07, + "loss": 10.2194, + "step": 446600 + }, + { + "epoch": 0.902180456291891, + "grad_norm": 818.1859741210938, + "learning_rate": 3.433212672503966e-07, + "loss": 19.5881, + "step": 446610 + }, + { + "epoch": 0.9022006569245749, + "grad_norm": 497.45947265625, + "learning_rate": 3.431941621929813e-07, + "loss": 22.4659, + "step": 446620 + }, + { + "epoch": 0.9022208575572587, + "grad_norm": 109.84841918945312, + "learning_rate": 3.430670798322733e-07, + "loss": 12.4741, + "step": 446630 + }, + { + "epoch": 0.9022410581899425, + "grad_norm": 232.27517700195312, + "learning_rate": 3.4294002016889206e-07, + "loss": 15.562, + "step": 446640 + }, + { + "epoch": 0.9022612588226263, + "grad_norm": 204.5904998779297, + "learning_rate": 3.428129832034549e-07, + "loss": 18.9749, + "step": 446650 + }, + { + "epoch": 0.9022814594553101, + "grad_norm": 271.8900146484375, + "learning_rate": 3.426859689365836e-07, + "loss": 11.7842, + "step": 446660 + }, + { + "epoch": 0.902301660087994, + "grad_norm": 203.10791015625, + "learning_rate": 3.425589773688953e-07, + "loss": 20.7852, + "step": 446670 + }, + { + "epoch": 0.9023218607206778, + "grad_norm": 189.75350952148438, + "learning_rate": 3.424320085010102e-07, + "loss": 19.1101, + "step": 446680 + }, + { + "epoch": 0.9023420613533616, + "grad_norm": 96.21739959716797, + "learning_rate": 3.423050623335467e-07, + "loss": 10.193, + "step": 446690 + }, + { + "epoch": 0.9023622619860454, + "grad_norm": 410.38580322265625, + "learning_rate": 3.421781388671225e-07, + "loss": 13.8715, + "step": 446700 + }, + { + "epoch": 0.9023824626187292, + "grad_norm": 29.13329315185547, + "learning_rate": 3.420512381023583e-07, + "loss": 19.4322, + "step": 446710 + }, + { + "epoch": 0.902402663251413, + "grad_norm": 76.03953552246094, + "learning_rate": 3.419243600398703e-07, + "loss": 12.2015, + "step": 446720 + }, + { + "epoch": 0.9024228638840969, + "grad_norm": 233.65858459472656, + "learning_rate": 3.4179750468027906e-07, + "loss": 15.4427, + "step": 446730 + }, + { + "epoch": 0.9024430645167807, + "grad_norm": 392.72564697265625, + "learning_rate": 3.416706720242008e-07, + "loss": 15.2899, + "step": 446740 + }, + { + "epoch": 0.9024632651494645, + "grad_norm": 527.7315673828125, + "learning_rate": 3.415438620722555e-07, + "loss": 17.6316, + "step": 446750 + }, + { + "epoch": 0.9024834657821483, + "grad_norm": 222.16433715820312, + "learning_rate": 3.4141707482506056e-07, + "loss": 16.3429, + "step": 446760 + }, + { + "epoch": 0.9025036664148322, + "grad_norm": 687.5732421875, + "learning_rate": 3.412903102832327e-07, + "loss": 10.8618, + "step": 446770 + }, + { + "epoch": 0.902523867047516, + "grad_norm": 1229.32275390625, + "learning_rate": 3.4116356844739184e-07, + "loss": 28.2971, + "step": 446780 + }, + { + "epoch": 0.9025440676801998, + "grad_norm": 58.414588928222656, + "learning_rate": 3.4103684931815483e-07, + "loss": 17.7443, + "step": 446790 + }, + { + "epoch": 0.9025642683128836, + "grad_norm": 371.36328125, + "learning_rate": 3.409101528961378e-07, + "loss": 5.0725, + "step": 446800 + }, + { + "epoch": 0.9025844689455673, + "grad_norm": 140.0763397216797, + "learning_rate": 3.407834791819603e-07, + "loss": 15.3257, + "step": 446810 + }, + { + "epoch": 0.9026046695782511, + "grad_norm": 291.6254577636719, + "learning_rate": 3.4065682817624015e-07, + "loss": 17.0342, + "step": 446820 + }, + { + "epoch": 0.902624870210935, + "grad_norm": 433.16741943359375, + "learning_rate": 3.4053019987959234e-07, + "loss": 23.7829, + "step": 446830 + }, + { + "epoch": 0.9026450708436188, + "grad_norm": 360.67669677734375, + "learning_rate": 3.404035942926348e-07, + "loss": 14.3795, + "step": 446840 + }, + { + "epoch": 0.9026652714763026, + "grad_norm": 400.54913330078125, + "learning_rate": 3.402770114159859e-07, + "loss": 25.7854, + "step": 446850 + }, + { + "epoch": 0.9026854721089864, + "grad_norm": 313.03179931640625, + "learning_rate": 3.401504512502618e-07, + "loss": 18.2124, + "step": 446860 + }, + { + "epoch": 0.9027056727416702, + "grad_norm": 448.4454650878906, + "learning_rate": 3.4002391379607815e-07, + "loss": 26.2969, + "step": 446870 + }, + { + "epoch": 0.9027258733743541, + "grad_norm": 15.552080154418945, + "learning_rate": 3.3989739905405326e-07, + "loss": 10.4564, + "step": 446880 + }, + { + "epoch": 0.9027460740070379, + "grad_norm": 462.7173156738281, + "learning_rate": 3.3977090702480455e-07, + "loss": 22.8206, + "step": 446890 + }, + { + "epoch": 0.9027662746397217, + "grad_norm": 673.148193359375, + "learning_rate": 3.396444377089453e-07, + "loss": 21.1259, + "step": 446900 + }, + { + "epoch": 0.9027864752724055, + "grad_norm": 480.64093017578125, + "learning_rate": 3.395179911070945e-07, + "loss": 20.9275, + "step": 446910 + }, + { + "epoch": 0.9028066759050893, + "grad_norm": 125.34583282470703, + "learning_rate": 3.3939156721986777e-07, + "loss": 15.184, + "step": 446920 + }, + { + "epoch": 0.9028268765377732, + "grad_norm": 139.38539123535156, + "learning_rate": 3.3926516604788185e-07, + "loss": 23.2434, + "step": 446930 + }, + { + "epoch": 0.902847077170457, + "grad_norm": 422.3639221191406, + "learning_rate": 3.3913878759175124e-07, + "loss": 24.0939, + "step": 446940 + }, + { + "epoch": 0.9028672778031408, + "grad_norm": 721.1874389648438, + "learning_rate": 3.3901243185209375e-07, + "loss": 20.7313, + "step": 446950 + }, + { + "epoch": 0.9028874784358246, + "grad_norm": 594.5277709960938, + "learning_rate": 3.388860988295245e-07, + "loss": 18.999, + "step": 446960 + }, + { + "epoch": 0.9029076790685084, + "grad_norm": 317.29876708984375, + "learning_rate": 3.3875978852465795e-07, + "loss": 15.7928, + "step": 446970 + }, + { + "epoch": 0.9029278797011923, + "grad_norm": 22.906665802001953, + "learning_rate": 3.3863350093811196e-07, + "loss": 13.0489, + "step": 446980 + }, + { + "epoch": 0.9029480803338761, + "grad_norm": 198.2176971435547, + "learning_rate": 3.3850723607049994e-07, + "loss": 14.2384, + "step": 446990 + }, + { + "epoch": 0.9029682809665599, + "grad_norm": 149.88609313964844, + "learning_rate": 3.3838099392243915e-07, + "loss": 19.9989, + "step": 447000 + }, + { + "epoch": 0.9029884815992437, + "grad_norm": 35.93222427368164, + "learning_rate": 3.382547744945436e-07, + "loss": 16.6192, + "step": 447010 + }, + { + "epoch": 0.9030086822319275, + "grad_norm": 271.7751159667969, + "learning_rate": 3.3812857778742935e-07, + "loss": 11.4213, + "step": 447020 + }, + { + "epoch": 0.9030288828646114, + "grad_norm": 524.922119140625, + "learning_rate": 3.3800240380171046e-07, + "loss": 14.8176, + "step": 447030 + }, + { + "epoch": 0.9030490834972952, + "grad_norm": 583.8279418945312, + "learning_rate": 3.3787625253800247e-07, + "loss": 13.3672, + "step": 447040 + }, + { + "epoch": 0.903069284129979, + "grad_norm": 301.9650573730469, + "learning_rate": 3.3775012399692055e-07, + "loss": 22.4438, + "step": 447050 + }, + { + "epoch": 0.9030894847626628, + "grad_norm": 724.0348510742188, + "learning_rate": 3.3762401817907795e-07, + "loss": 18.063, + "step": 447060 + }, + { + "epoch": 0.9031096853953465, + "grad_norm": 301.4600524902344, + "learning_rate": 3.374979350850921e-07, + "loss": 24.1437, + "step": 447070 + }, + { + "epoch": 0.9031298860280303, + "grad_norm": 89.9062728881836, + "learning_rate": 3.373718747155752e-07, + "loss": 24.9831, + "step": 447080 + }, + { + "epoch": 0.9031500866607142, + "grad_norm": 147.34152221679688, + "learning_rate": 3.372458370711412e-07, + "loss": 10.8046, + "step": 447090 + }, + { + "epoch": 0.903170287293398, + "grad_norm": 258.792236328125, + "learning_rate": 3.371198221524069e-07, + "loss": 13.8453, + "step": 447100 + }, + { + "epoch": 0.9031904879260818, + "grad_norm": 426.54119873046875, + "learning_rate": 3.3699382995998455e-07, + "loss": 16.5044, + "step": 447110 + }, + { + "epoch": 0.9032106885587656, + "grad_norm": 264.06488037109375, + "learning_rate": 3.368678604944886e-07, + "loss": 13.1876, + "step": 447120 + }, + { + "epoch": 0.9032308891914494, + "grad_norm": 298.4723815917969, + "learning_rate": 3.3674191375653255e-07, + "loss": 10.8668, + "step": 447130 + }, + { + "epoch": 0.9032510898241333, + "grad_norm": 236.72854614257812, + "learning_rate": 3.366159897467314e-07, + "loss": 14.6112, + "step": 447140 + }, + { + "epoch": 0.9032712904568171, + "grad_norm": 278.7016296386719, + "learning_rate": 3.364900884656991e-07, + "loss": 16.1786, + "step": 447150 + }, + { + "epoch": 0.9032914910895009, + "grad_norm": 394.2326354980469, + "learning_rate": 3.3636420991404686e-07, + "loss": 9.8431, + "step": 447160 + }, + { + "epoch": 0.9033116917221847, + "grad_norm": 443.133544921875, + "learning_rate": 3.3623835409239023e-07, + "loss": 21.7817, + "step": 447170 + }, + { + "epoch": 0.9033318923548685, + "grad_norm": 417.1814880371094, + "learning_rate": 3.361125210013438e-07, + "loss": 15.9135, + "step": 447180 + }, + { + "epoch": 0.9033520929875524, + "grad_norm": 0.0, + "learning_rate": 3.3598671064151767e-07, + "loss": 18.1624, + "step": 447190 + }, + { + "epoch": 0.9033722936202362, + "grad_norm": 309.14569091796875, + "learning_rate": 3.358609230135268e-07, + "loss": 13.8954, + "step": 447200 + }, + { + "epoch": 0.90339249425292, + "grad_norm": 253.5623016357422, + "learning_rate": 3.357351581179846e-07, + "loss": 23.6011, + "step": 447210 + }, + { + "epoch": 0.9034126948856038, + "grad_norm": 218.31854248046875, + "learning_rate": 3.35609415955504e-07, + "loss": 16.3643, + "step": 447220 + }, + { + "epoch": 0.9034328955182876, + "grad_norm": 628.333740234375, + "learning_rate": 3.354836965266961e-07, + "loss": 49.4682, + "step": 447230 + }, + { + "epoch": 0.9034530961509715, + "grad_norm": 351.5813293457031, + "learning_rate": 3.35357999832176e-07, + "loss": 17.8232, + "step": 447240 + }, + { + "epoch": 0.9034732967836553, + "grad_norm": 8.600955963134766, + "learning_rate": 3.352323258725554e-07, + "loss": 13.7669, + "step": 447250 + }, + { + "epoch": 0.9034934974163391, + "grad_norm": 387.5688781738281, + "learning_rate": 3.351066746484455e-07, + "loss": 18.2632, + "step": 447260 + }, + { + "epoch": 0.9035136980490229, + "grad_norm": 518.1428833007812, + "learning_rate": 3.349810461604608e-07, + "loss": 22.6909, + "step": 447270 + }, + { + "epoch": 0.9035338986817067, + "grad_norm": 448.3558654785156, + "learning_rate": 3.3485544040921194e-07, + "loss": 33.6627, + "step": 447280 + }, + { + "epoch": 0.9035540993143906, + "grad_norm": 583.00341796875, + "learning_rate": 3.347298573953128e-07, + "loss": 20.9727, + "step": 447290 + }, + { + "epoch": 0.9035742999470744, + "grad_norm": 18.169967651367188, + "learning_rate": 3.3460429711937417e-07, + "loss": 19.8996, + "step": 447300 + }, + { + "epoch": 0.9035945005797582, + "grad_norm": 311.3398132324219, + "learning_rate": 3.344787595820076e-07, + "loss": 13.1239, + "step": 447310 + }, + { + "epoch": 0.903614701212442, + "grad_norm": 567.7097778320312, + "learning_rate": 3.343532447838266e-07, + "loss": 17.0992, + "step": 447320 + }, + { + "epoch": 0.9036349018451257, + "grad_norm": 484.4988098144531, + "learning_rate": 3.3422775272544115e-07, + "loss": 16.0481, + "step": 447330 + }, + { + "epoch": 0.9036551024778096, + "grad_norm": 355.2326354980469, + "learning_rate": 3.3410228340746475e-07, + "loss": 15.4506, + "step": 447340 + }, + { + "epoch": 0.9036753031104934, + "grad_norm": 338.78253173828125, + "learning_rate": 3.3397683683050685e-07, + "loss": 27.8149, + "step": 447350 + }, + { + "epoch": 0.9036955037431772, + "grad_norm": 374.1416931152344, + "learning_rate": 3.338514129951809e-07, + "loss": 6.0169, + "step": 447360 + }, + { + "epoch": 0.903715704375861, + "grad_norm": 189.8123321533203, + "learning_rate": 3.337260119020974e-07, + "loss": 22.418, + "step": 447370 + }, + { + "epoch": 0.9037359050085448, + "grad_norm": 403.3528747558594, + "learning_rate": 3.33600633551866e-07, + "loss": 25.0211, + "step": 447380 + }, + { + "epoch": 0.9037561056412287, + "grad_norm": 108.13957977294922, + "learning_rate": 3.334752779451006e-07, + "loss": 12.1229, + "step": 447390 + }, + { + "epoch": 0.9037763062739125, + "grad_norm": 207.4993438720703, + "learning_rate": 3.3334994508241013e-07, + "loss": 16.8677, + "step": 447400 + }, + { + "epoch": 0.9037965069065963, + "grad_norm": 612.2901000976562, + "learning_rate": 3.332246349644058e-07, + "loss": 32.5601, + "step": 447410 + }, + { + "epoch": 0.9038167075392801, + "grad_norm": 235.7354278564453, + "learning_rate": 3.3309934759169825e-07, + "loss": 20.5226, + "step": 447420 + }, + { + "epoch": 0.9038369081719639, + "grad_norm": 312.7841796875, + "learning_rate": 3.3297408296489973e-07, + "loss": 13.5229, + "step": 447430 + }, + { + "epoch": 0.9038571088046478, + "grad_norm": 346.403076171875, + "learning_rate": 3.328488410846187e-07, + "loss": 21.7707, + "step": 447440 + }, + { + "epoch": 0.9038773094373316, + "grad_norm": 612.0426635742188, + "learning_rate": 3.327236219514657e-07, + "loss": 15.0607, + "step": 447450 + }, + { + "epoch": 0.9038975100700154, + "grad_norm": 130.6298828125, + "learning_rate": 3.325984255660525e-07, + "loss": 29.4215, + "step": 447460 + }, + { + "epoch": 0.9039177107026992, + "grad_norm": 374.779296875, + "learning_rate": 3.324732519289886e-07, + "loss": 15.5523, + "step": 447470 + }, + { + "epoch": 0.903937911335383, + "grad_norm": 244.9932403564453, + "learning_rate": 3.3234810104088356e-07, + "loss": 14.7736, + "step": 447480 + }, + { + "epoch": 0.9039581119680669, + "grad_norm": 402.7099914550781, + "learning_rate": 3.322229729023474e-07, + "loss": 11.1433, + "step": 447490 + }, + { + "epoch": 0.9039783126007507, + "grad_norm": 646.601806640625, + "learning_rate": 3.320978675139919e-07, + "loss": 19.4331, + "step": 447500 + }, + { + "epoch": 0.9039985132334345, + "grad_norm": 16.859086990356445, + "learning_rate": 3.319727848764237e-07, + "loss": 22.936, + "step": 447510 + }, + { + "epoch": 0.9040187138661183, + "grad_norm": 434.27294921875, + "learning_rate": 3.318477249902541e-07, + "loss": 12.9803, + "step": 447520 + }, + { + "epoch": 0.9040389144988021, + "grad_norm": 109.24701690673828, + "learning_rate": 3.317226878560931e-07, + "loss": 16.8664, + "step": 447530 + }, + { + "epoch": 0.904059115131486, + "grad_norm": 533.0873413085938, + "learning_rate": 3.3159767347454963e-07, + "loss": 28.9964, + "step": 447540 + }, + { + "epoch": 0.9040793157641698, + "grad_norm": 255.40516662597656, + "learning_rate": 3.3147268184623216e-07, + "loss": 13.5238, + "step": 447550 + }, + { + "epoch": 0.9040995163968536, + "grad_norm": 294.3439636230469, + "learning_rate": 3.3134771297175127e-07, + "loss": 21.6402, + "step": 447560 + }, + { + "epoch": 0.9041197170295374, + "grad_norm": 246.0892333984375, + "learning_rate": 3.3122276685171593e-07, + "loss": 22.777, + "step": 447570 + }, + { + "epoch": 0.9041399176622211, + "grad_norm": 598.0889892578125, + "learning_rate": 3.3109784348673293e-07, + "loss": 17.5916, + "step": 447580 + }, + { + "epoch": 0.904160118294905, + "grad_norm": 875.310791015625, + "learning_rate": 3.309729428774144e-07, + "loss": 17.8856, + "step": 447590 + }, + { + "epoch": 0.9041803189275888, + "grad_norm": 216.2782745361328, + "learning_rate": 3.3084806502436617e-07, + "loss": 17.0905, + "step": 447600 + }, + { + "epoch": 0.9042005195602726, + "grad_norm": 251.59776306152344, + "learning_rate": 3.3072320992819875e-07, + "loss": 22.732, + "step": 447610 + }, + { + "epoch": 0.9042207201929564, + "grad_norm": 118.15159606933594, + "learning_rate": 3.3059837758951995e-07, + "loss": 12.7717, + "step": 447620 + }, + { + "epoch": 0.9042409208256402, + "grad_norm": 723.6868896484375, + "learning_rate": 3.3047356800893826e-07, + "loss": 24.9406, + "step": 447630 + }, + { + "epoch": 0.904261121458324, + "grad_norm": 495.8370666503906, + "learning_rate": 3.303487811870626e-07, + "loss": 12.215, + "step": 447640 + }, + { + "epoch": 0.9042813220910079, + "grad_norm": 214.63211059570312, + "learning_rate": 3.3022401712450025e-07, + "loss": 14.264, + "step": 447650 + }, + { + "epoch": 0.9043015227236917, + "grad_norm": 1940.4654541015625, + "learning_rate": 3.3009927582185965e-07, + "loss": 36.2257, + "step": 447660 + }, + { + "epoch": 0.9043217233563755, + "grad_norm": 94.8907470703125, + "learning_rate": 3.2997455727974856e-07, + "loss": 17.4319, + "step": 447670 + }, + { + "epoch": 0.9043419239890593, + "grad_norm": 51.35108947753906, + "learning_rate": 3.2984986149877554e-07, + "loss": 7.2657, + "step": 447680 + }, + { + "epoch": 0.9043621246217431, + "grad_norm": 0.6754915118217468, + "learning_rate": 3.297251884795477e-07, + "loss": 19.2647, + "step": 447690 + }, + { + "epoch": 0.904382325254427, + "grad_norm": 611.1425170898438, + "learning_rate": 3.2960053822267245e-07, + "loss": 18.7656, + "step": 447700 + }, + { + "epoch": 0.9044025258871108, + "grad_norm": 124.38903045654297, + "learning_rate": 3.294759107287582e-07, + "loss": 10.8771, + "step": 447710 + }, + { + "epoch": 0.9044227265197946, + "grad_norm": 308.4328918457031, + "learning_rate": 3.293513059984121e-07, + "loss": 15.8337, + "step": 447720 + }, + { + "epoch": 0.9044429271524784, + "grad_norm": 346.4642333984375, + "learning_rate": 3.2922672403224053e-07, + "loss": 20.7543, + "step": 447730 + }, + { + "epoch": 0.9044631277851622, + "grad_norm": 415.1618347167969, + "learning_rate": 3.2910216483085125e-07, + "loss": 15.1491, + "step": 447740 + }, + { + "epoch": 0.9044833284178461, + "grad_norm": 471.6772155761719, + "learning_rate": 3.289776283948526e-07, + "loss": 16.6599, + "step": 447750 + }, + { + "epoch": 0.9045035290505299, + "grad_norm": 338.21728515625, + "learning_rate": 3.2885311472485025e-07, + "loss": 15.285, + "step": 447760 + }, + { + "epoch": 0.9045237296832137, + "grad_norm": 173.7224578857422, + "learning_rate": 3.287286238214504e-07, + "loss": 12.2094, + "step": 447770 + }, + { + "epoch": 0.9045439303158975, + "grad_norm": 188.3278350830078, + "learning_rate": 3.286041556852615e-07, + "loss": 11.3011, + "step": 447780 + }, + { + "epoch": 0.9045641309485813, + "grad_norm": 178.1429443359375, + "learning_rate": 3.2847971031688963e-07, + "loss": 32.7256, + "step": 447790 + }, + { + "epoch": 0.9045843315812652, + "grad_norm": 302.8177490234375, + "learning_rate": 3.283552877169399e-07, + "loss": 18.2551, + "step": 447800 + }, + { + "epoch": 0.904604532213949, + "grad_norm": 243.9778594970703, + "learning_rate": 3.282308878860202e-07, + "loss": 11.2811, + "step": 447810 + }, + { + "epoch": 0.9046247328466328, + "grad_norm": 298.0887145996094, + "learning_rate": 3.281065108247372e-07, + "loss": 49.3632, + "step": 447820 + }, + { + "epoch": 0.9046449334793166, + "grad_norm": 426.9590759277344, + "learning_rate": 3.279821565336966e-07, + "loss": 11.56, + "step": 447830 + }, + { + "epoch": 0.9046651341120003, + "grad_norm": 115.5376968383789, + "learning_rate": 3.2785782501350284e-07, + "loss": 20.0251, + "step": 447840 + }, + { + "epoch": 0.9046853347446842, + "grad_norm": 229.4214630126953, + "learning_rate": 3.277335162647649e-07, + "loss": 30.5869, + "step": 447850 + }, + { + "epoch": 0.904705535377368, + "grad_norm": 498.9197082519531, + "learning_rate": 3.276092302880868e-07, + "loss": 18.484, + "step": 447860 + }, + { + "epoch": 0.9047257360100518, + "grad_norm": 226.56736755371094, + "learning_rate": 3.274849670840741e-07, + "loss": 7.8482, + "step": 447870 + }, + { + "epoch": 0.9047459366427356, + "grad_norm": 256.080810546875, + "learning_rate": 3.2736072665333353e-07, + "loss": 16.6454, + "step": 447880 + }, + { + "epoch": 0.9047661372754194, + "grad_norm": 244.22299194335938, + "learning_rate": 3.272365089964691e-07, + "loss": 11.4553, + "step": 447890 + }, + { + "epoch": 0.9047863379081033, + "grad_norm": 233.81263732910156, + "learning_rate": 3.271123141140886e-07, + "loss": 16.5892, + "step": 447900 + }, + { + "epoch": 0.9048065385407871, + "grad_norm": 615.6817626953125, + "learning_rate": 3.269881420067944e-07, + "loss": 17.0083, + "step": 447910 + }, + { + "epoch": 0.9048267391734709, + "grad_norm": 204.99356079101562, + "learning_rate": 3.268639926751943e-07, + "loss": 11.7022, + "step": 447920 + }, + { + "epoch": 0.9048469398061547, + "grad_norm": 595.6871948242188, + "learning_rate": 3.267398661198923e-07, + "loss": 26.3127, + "step": 447930 + }, + { + "epoch": 0.9048671404388385, + "grad_norm": 300.2926940917969, + "learning_rate": 3.2661576234149285e-07, + "loss": 14.0718, + "step": 447940 + }, + { + "epoch": 0.9048873410715224, + "grad_norm": 243.31668090820312, + "learning_rate": 3.264916813406022e-07, + "loss": 19.1018, + "step": 447950 + }, + { + "epoch": 0.9049075417042062, + "grad_norm": 388.39508056640625, + "learning_rate": 3.263676231178231e-07, + "loss": 9.4069, + "step": 447960 + }, + { + "epoch": 0.90492774233689, + "grad_norm": 5.497354984283447, + "learning_rate": 3.262435876737624e-07, + "loss": 16.1466, + "step": 447970 + }, + { + "epoch": 0.9049479429695738, + "grad_norm": 346.11614990234375, + "learning_rate": 3.2611957500902345e-07, + "loss": 26.7694, + "step": 447980 + }, + { + "epoch": 0.9049681436022576, + "grad_norm": 1.7630181312561035, + "learning_rate": 3.2599558512421024e-07, + "loss": 16.1983, + "step": 447990 + }, + { + "epoch": 0.9049883442349415, + "grad_norm": 228.85594177246094, + "learning_rate": 3.258716180199278e-07, + "loss": 19.5428, + "step": 448000 + }, + { + "epoch": 0.9050085448676253, + "grad_norm": 92.16657257080078, + "learning_rate": 3.2574767369678073e-07, + "loss": 15.1785, + "step": 448010 + }, + { + "epoch": 0.9050287455003091, + "grad_norm": 402.4857177734375, + "learning_rate": 3.2562375215537176e-07, + "loss": 11.6245, + "step": 448020 + }, + { + "epoch": 0.9050489461329929, + "grad_norm": 345.69921875, + "learning_rate": 3.2549985339630606e-07, + "loss": 22.8108, + "step": 448030 + }, + { + "epoch": 0.9050691467656767, + "grad_norm": 150.28077697753906, + "learning_rate": 3.253759774201881e-07, + "loss": 17.1309, + "step": 448040 + }, + { + "epoch": 0.9050893473983606, + "grad_norm": 653.8369140625, + "learning_rate": 3.252521242276191e-07, + "loss": 29.8362, + "step": 448050 + }, + { + "epoch": 0.9051095480310444, + "grad_norm": 420.880615234375, + "learning_rate": 3.2512829381920463e-07, + "loss": 21.5998, + "step": 448060 + }, + { + "epoch": 0.9051297486637282, + "grad_norm": 253.64968872070312, + "learning_rate": 3.250044861955487e-07, + "loss": 24.7224, + "step": 448070 + }, + { + "epoch": 0.905149949296412, + "grad_norm": 10.839777946472168, + "learning_rate": 3.248807013572536e-07, + "loss": 21.1545, + "step": 448080 + }, + { + "epoch": 0.9051701499290957, + "grad_norm": 449.986083984375, + "learning_rate": 3.2475693930492214e-07, + "loss": 11.1076, + "step": 448090 + }, + { + "epoch": 0.9051903505617795, + "grad_norm": 435.3336486816406, + "learning_rate": 3.246332000391583e-07, + "loss": 15.9453, + "step": 448100 + }, + { + "epoch": 0.9052105511944634, + "grad_norm": 679.3292846679688, + "learning_rate": 3.245094835605667e-07, + "loss": 16.2423, + "step": 448110 + }, + { + "epoch": 0.9052307518271472, + "grad_norm": 145.24484252929688, + "learning_rate": 3.2438578986974776e-07, + "loss": 19.8766, + "step": 448120 + }, + { + "epoch": 0.905250952459831, + "grad_norm": 776.5805053710938, + "learning_rate": 3.242621189673051e-07, + "loss": 32.9737, + "step": 448130 + }, + { + "epoch": 0.9052711530925148, + "grad_norm": 196.9553680419922, + "learning_rate": 3.2413847085384256e-07, + "loss": 13.7633, + "step": 448140 + }, + { + "epoch": 0.9052913537251986, + "grad_norm": 300.7092590332031, + "learning_rate": 3.240148455299619e-07, + "loss": 12.6026, + "step": 448150 + }, + { + "epoch": 0.9053115543578825, + "grad_norm": 359.0980529785156, + "learning_rate": 3.2389124299626483e-07, + "loss": 24.2891, + "step": 448160 + }, + { + "epoch": 0.9053317549905663, + "grad_norm": 576.0731811523438, + "learning_rate": 3.237676632533554e-07, + "loss": 14.0114, + "step": 448170 + }, + { + "epoch": 0.9053519556232501, + "grad_norm": 501.6863708496094, + "learning_rate": 3.2364410630183587e-07, + "loss": 14.6857, + "step": 448180 + }, + { + "epoch": 0.9053721562559339, + "grad_norm": 275.0650634765625, + "learning_rate": 3.2352057214230623e-07, + "loss": 11.8622, + "step": 448190 + }, + { + "epoch": 0.9053923568886177, + "grad_norm": 12.33811092376709, + "learning_rate": 3.233970607753717e-07, + "loss": 18.6745, + "step": 448200 + }, + { + "epoch": 0.9054125575213016, + "grad_norm": 254.0646514892578, + "learning_rate": 3.2327357220163116e-07, + "loss": 15.4009, + "step": 448210 + }, + { + "epoch": 0.9054327581539854, + "grad_norm": 294.08221435546875, + "learning_rate": 3.231501064216891e-07, + "loss": 27.6999, + "step": 448220 + }, + { + "epoch": 0.9054529587866692, + "grad_norm": 194.0894775390625, + "learning_rate": 3.2302666343614565e-07, + "loss": 13.6976, + "step": 448230 + }, + { + "epoch": 0.905473159419353, + "grad_norm": 417.09747314453125, + "learning_rate": 3.2290324324560363e-07, + "loss": 18.6556, + "step": 448240 + }, + { + "epoch": 0.9054933600520368, + "grad_norm": 286.4114685058594, + "learning_rate": 3.227798458506637e-07, + "loss": 24.0155, + "step": 448250 + }, + { + "epoch": 0.9055135606847207, + "grad_norm": 399.8034362792969, + "learning_rate": 3.22656471251927e-07, + "loss": 22.4191, + "step": 448260 + }, + { + "epoch": 0.9055337613174045, + "grad_norm": 260.3201599121094, + "learning_rate": 3.225331194499964e-07, + "loss": 10.4566, + "step": 448270 + }, + { + "epoch": 0.9055539619500883, + "grad_norm": 192.796142578125, + "learning_rate": 3.2240979044547095e-07, + "loss": 14.5858, + "step": 448280 + }, + { + "epoch": 0.9055741625827721, + "grad_norm": 601.3679809570312, + "learning_rate": 3.2228648423895335e-07, + "loss": 19.8696, + "step": 448290 + }, + { + "epoch": 0.9055943632154559, + "grad_norm": 512.9625854492188, + "learning_rate": 3.2216320083104434e-07, + "loss": 18.3414, + "step": 448300 + }, + { + "epoch": 0.9056145638481398, + "grad_norm": 388.1852722167969, + "learning_rate": 3.2203994022234396e-07, + "loss": 19.741, + "step": 448310 + }, + { + "epoch": 0.9056347644808236, + "grad_norm": 282.55877685546875, + "learning_rate": 3.2191670241345395e-07, + "loss": 23.1342, + "step": 448320 + }, + { + "epoch": 0.9056549651135074, + "grad_norm": 1288.4505615234375, + "learning_rate": 3.2179348740497494e-07, + "loss": 15.1686, + "step": 448330 + }, + { + "epoch": 0.9056751657461912, + "grad_norm": 421.3667907714844, + "learning_rate": 3.216702951975059e-07, + "loss": 17.3516, + "step": 448340 + }, + { + "epoch": 0.9056953663788749, + "grad_norm": 95.43819427490234, + "learning_rate": 3.2154712579164913e-07, + "loss": 7.5615, + "step": 448350 + }, + { + "epoch": 0.9057155670115588, + "grad_norm": 406.94879150390625, + "learning_rate": 3.2142397918800416e-07, + "loss": 19.1469, + "step": 448360 + }, + { + "epoch": 0.9057357676442426, + "grad_norm": 99.41961669921875, + "learning_rate": 3.213008553871716e-07, + "loss": 11.4991, + "step": 448370 + }, + { + "epoch": 0.9057559682769264, + "grad_norm": 403.7974853515625, + "learning_rate": 3.2117775438975096e-07, + "loss": 19.0258, + "step": 448380 + }, + { + "epoch": 0.9057761689096102, + "grad_norm": 141.62965393066406, + "learning_rate": 3.2105467619634234e-07, + "loss": 13.6665, + "step": 448390 + }, + { + "epoch": 0.905796369542294, + "grad_norm": 194.33724975585938, + "learning_rate": 3.2093162080754634e-07, + "loss": 10.719, + "step": 448400 + }, + { + "epoch": 0.9058165701749779, + "grad_norm": 289.2017517089844, + "learning_rate": 3.208085882239614e-07, + "loss": 11.4255, + "step": 448410 + }, + { + "epoch": 0.9058367708076617, + "grad_norm": 219.3064727783203, + "learning_rate": 3.206855784461876e-07, + "loss": 18.4352, + "step": 448420 + }, + { + "epoch": 0.9058569714403455, + "grad_norm": 170.38917541503906, + "learning_rate": 3.205625914748256e-07, + "loss": 22.2926, + "step": 448430 + }, + { + "epoch": 0.9058771720730293, + "grad_norm": 533.3335571289062, + "learning_rate": 3.2043962731047373e-07, + "loss": 10.2524, + "step": 448440 + }, + { + "epoch": 0.9058973727057131, + "grad_norm": 7.176573753356934, + "learning_rate": 3.20316685953731e-07, + "loss": 28.7266, + "step": 448450 + }, + { + "epoch": 0.905917573338397, + "grad_norm": 258.6211242675781, + "learning_rate": 3.20193767405198e-07, + "loss": 11.7974, + "step": 448460 + }, + { + "epoch": 0.9059377739710808, + "grad_norm": 153.90350341796875, + "learning_rate": 3.2007087166547325e-07, + "loss": 11.3183, + "step": 448470 + }, + { + "epoch": 0.9059579746037646, + "grad_norm": 189.8888397216797, + "learning_rate": 3.199479987351545e-07, + "loss": 13.7309, + "step": 448480 + }, + { + "epoch": 0.9059781752364484, + "grad_norm": 615.9976806640625, + "learning_rate": 3.1982514861484184e-07, + "loss": 17.5584, + "step": 448490 + }, + { + "epoch": 0.9059983758691322, + "grad_norm": 1100.916015625, + "learning_rate": 3.1970232130513365e-07, + "loss": 18.7667, + "step": 448500 + }, + { + "epoch": 0.906018576501816, + "grad_norm": 349.28466796875, + "learning_rate": 3.19579516806629e-07, + "loss": 22.8324, + "step": 448510 + }, + { + "epoch": 0.9060387771344999, + "grad_norm": 385.8545227050781, + "learning_rate": 3.194567351199257e-07, + "loss": 24.0253, + "step": 448520 + }, + { + "epoch": 0.9060589777671837, + "grad_norm": 657.9740600585938, + "learning_rate": 3.193339762456232e-07, + "loss": 12.212, + "step": 448530 + }, + { + "epoch": 0.9060791783998675, + "grad_norm": 757.3938598632812, + "learning_rate": 3.1921124018431946e-07, + "loss": 19.3436, + "step": 448540 + }, + { + "epoch": 0.9060993790325513, + "grad_norm": 373.80242919921875, + "learning_rate": 3.1908852693661116e-07, + "loss": 27.6082, + "step": 448550 + }, + { + "epoch": 0.9061195796652352, + "grad_norm": 371.020751953125, + "learning_rate": 3.1896583650309896e-07, + "loss": 20.1392, + "step": 448560 + }, + { + "epoch": 0.906139780297919, + "grad_norm": 597.7250366210938, + "learning_rate": 3.188431688843785e-07, + "loss": 22.1884, + "step": 448570 + }, + { + "epoch": 0.9061599809306028, + "grad_norm": 571.2300415039062, + "learning_rate": 3.187205240810493e-07, + "loss": 21.1181, + "step": 448580 + }, + { + "epoch": 0.9061801815632866, + "grad_norm": 228.69024658203125, + "learning_rate": 3.1859790209370855e-07, + "loss": 16.468, + "step": 448590 + }, + { + "epoch": 0.9062003821959704, + "grad_norm": 60.642494201660156, + "learning_rate": 3.1847530292295313e-07, + "loss": 9.7498, + "step": 448600 + }, + { + "epoch": 0.9062205828286541, + "grad_norm": 327.65728759765625, + "learning_rate": 3.18352726569382e-07, + "loss": 26.8853, + "step": 448610 + }, + { + "epoch": 0.906240783461338, + "grad_norm": 418.5385437011719, + "learning_rate": 3.1823017303359185e-07, + "loss": 16.8091, + "step": 448620 + }, + { + "epoch": 0.9062609840940218, + "grad_norm": 602.8416748046875, + "learning_rate": 3.181076423161794e-07, + "loss": 28.1128, + "step": 448630 + }, + { + "epoch": 0.9062811847267056, + "grad_norm": 307.3411865234375, + "learning_rate": 3.179851344177426e-07, + "loss": 15.6654, + "step": 448640 + }, + { + "epoch": 0.9063013853593894, + "grad_norm": 315.0074157714844, + "learning_rate": 3.1786264933887977e-07, + "loss": 10.2402, + "step": 448650 + }, + { + "epoch": 0.9063215859920732, + "grad_norm": 312.59918212890625, + "learning_rate": 3.1774018708018493e-07, + "loss": 14.1066, + "step": 448660 + }, + { + "epoch": 0.9063417866247571, + "grad_norm": 371.5028076171875, + "learning_rate": 3.176177476422565e-07, + "loss": 11.442, + "step": 448670 + }, + { + "epoch": 0.9063619872574409, + "grad_norm": 310.7154235839844, + "learning_rate": 3.1749533102569176e-07, + "loss": 8.6138, + "step": 448680 + }, + { + "epoch": 0.9063821878901247, + "grad_norm": 11.721882820129395, + "learning_rate": 3.173729372310874e-07, + "loss": 10.4029, + "step": 448690 + }, + { + "epoch": 0.9064023885228085, + "grad_norm": 372.51806640625, + "learning_rate": 3.172505662590386e-07, + "loss": 12.4118, + "step": 448700 + }, + { + "epoch": 0.9064225891554923, + "grad_norm": 541.5244750976562, + "learning_rate": 3.1712821811014205e-07, + "loss": 19.0972, + "step": 448710 + }, + { + "epoch": 0.9064427897881762, + "grad_norm": 290.11285400390625, + "learning_rate": 3.170058927849967e-07, + "loss": 9.8545, + "step": 448720 + }, + { + "epoch": 0.90646299042086, + "grad_norm": 413.8747863769531, + "learning_rate": 3.168835902841949e-07, + "loss": 11.5055, + "step": 448730 + }, + { + "epoch": 0.9064831910535438, + "grad_norm": 268.87640380859375, + "learning_rate": 3.167613106083345e-07, + "loss": 17.3808, + "step": 448740 + }, + { + "epoch": 0.9065033916862276, + "grad_norm": 547.7677001953125, + "learning_rate": 3.166390537580122e-07, + "loss": 18.9562, + "step": 448750 + }, + { + "epoch": 0.9065235923189114, + "grad_norm": 8.598103523254395, + "learning_rate": 3.165168197338231e-07, + "loss": 25.9563, + "step": 448760 + }, + { + "epoch": 0.9065437929515953, + "grad_norm": 537.5119018554688, + "learning_rate": 3.1639460853636226e-07, + "loss": 16.9571, + "step": 448770 + }, + { + "epoch": 0.9065639935842791, + "grad_norm": 21.0839900970459, + "learning_rate": 3.162724201662265e-07, + "loss": 13.5353, + "step": 448780 + }, + { + "epoch": 0.9065841942169629, + "grad_norm": 6.599519729614258, + "learning_rate": 3.161502546240114e-07, + "loss": 13.7822, + "step": 448790 + }, + { + "epoch": 0.9066043948496467, + "grad_norm": 888.1553344726562, + "learning_rate": 3.160281119103109e-07, + "loss": 30.5156, + "step": 448800 + }, + { + "epoch": 0.9066245954823305, + "grad_norm": 198.2985076904297, + "learning_rate": 3.159059920257218e-07, + "loss": 15.3852, + "step": 448810 + }, + { + "epoch": 0.9066447961150144, + "grad_norm": 103.79512786865234, + "learning_rate": 3.157838949708386e-07, + "loss": 18.8775, + "step": 448820 + }, + { + "epoch": 0.9066649967476982, + "grad_norm": 1511.2833251953125, + "learning_rate": 3.1566182074625693e-07, + "loss": 25.0839, + "step": 448830 + }, + { + "epoch": 0.906685197380382, + "grad_norm": 641.9132080078125, + "learning_rate": 3.155397693525708e-07, + "loss": 27.815, + "step": 448840 + }, + { + "epoch": 0.9067053980130658, + "grad_norm": 455.2959899902344, + "learning_rate": 3.1541774079037635e-07, + "loss": 24.2859, + "step": 448850 + }, + { + "epoch": 0.9067255986457495, + "grad_norm": 527.1464233398438, + "learning_rate": 3.1529573506026757e-07, + "loss": 28.662, + "step": 448860 + }, + { + "epoch": 0.9067457992784334, + "grad_norm": 25.249774932861328, + "learning_rate": 3.151737521628384e-07, + "loss": 12.2626, + "step": 448870 + }, + { + "epoch": 0.9067659999111172, + "grad_norm": 164.37081909179688, + "learning_rate": 3.150517920986851e-07, + "loss": 12.5367, + "step": 448880 + }, + { + "epoch": 0.906786200543801, + "grad_norm": 476.541259765625, + "learning_rate": 3.1492985486840044e-07, + "loss": 11.9349, + "step": 448890 + }, + { + "epoch": 0.9068064011764848, + "grad_norm": 286.9355773925781, + "learning_rate": 3.148079404725801e-07, + "loss": 13.8341, + "step": 448900 + }, + { + "epoch": 0.9068266018091686, + "grad_norm": 356.0955810546875, + "learning_rate": 3.1468604891181755e-07, + "loss": 10.4007, + "step": 448910 + }, + { + "epoch": 0.9068468024418525, + "grad_norm": 518.3862915039062, + "learning_rate": 3.145641801867061e-07, + "loss": 18.039, + "step": 448920 + }, + { + "epoch": 0.9068670030745363, + "grad_norm": 17.225505828857422, + "learning_rate": 3.1444233429784145e-07, + "loss": 16.9743, + "step": 448930 + }, + { + "epoch": 0.9068872037072201, + "grad_norm": 309.4004211425781, + "learning_rate": 3.14320511245817e-07, + "loss": 18.3492, + "step": 448940 + }, + { + "epoch": 0.9069074043399039, + "grad_norm": 307.60687255859375, + "learning_rate": 3.1419871103122447e-07, + "loss": 16.0927, + "step": 448950 + }, + { + "epoch": 0.9069276049725877, + "grad_norm": 96.79744720458984, + "learning_rate": 3.1407693365465954e-07, + "loss": 19.2152, + "step": 448960 + }, + { + "epoch": 0.9069478056052716, + "grad_norm": 557.51611328125, + "learning_rate": 3.1395517911671613e-07, + "loss": 29.1107, + "step": 448970 + }, + { + "epoch": 0.9069680062379554, + "grad_norm": 78.58000946044922, + "learning_rate": 3.1383344741798716e-07, + "loss": 19.1292, + "step": 448980 + }, + { + "epoch": 0.9069882068706392, + "grad_norm": 305.63470458984375, + "learning_rate": 3.137117385590643e-07, + "loss": 21.5092, + "step": 448990 + }, + { + "epoch": 0.907008407503323, + "grad_norm": 89.49698638916016, + "learning_rate": 3.135900525405428e-07, + "loss": 10.2221, + "step": 449000 + }, + { + "epoch": 0.9070286081360068, + "grad_norm": 123.39061737060547, + "learning_rate": 3.134683893630153e-07, + "loss": 20.3347, + "step": 449010 + }, + { + "epoch": 0.9070488087686907, + "grad_norm": 265.69366455078125, + "learning_rate": 3.133467490270736e-07, + "loss": 10.337, + "step": 449020 + }, + { + "epoch": 0.9070690094013745, + "grad_norm": 636.016845703125, + "learning_rate": 3.1322513153331124e-07, + "loss": 19.651, + "step": 449030 + }, + { + "epoch": 0.9070892100340583, + "grad_norm": 1288.184814453125, + "learning_rate": 3.1310353688232207e-07, + "loss": 18.6391, + "step": 449040 + }, + { + "epoch": 0.9071094106667421, + "grad_norm": 191.1102294921875, + "learning_rate": 3.1298196507469737e-07, + "loss": 28.4142, + "step": 449050 + }, + { + "epoch": 0.9071296112994259, + "grad_norm": 140.29869079589844, + "learning_rate": 3.128604161110299e-07, + "loss": 7.4167, + "step": 449060 + }, + { + "epoch": 0.9071498119321098, + "grad_norm": 300.2296142578125, + "learning_rate": 3.1273888999191314e-07, + "loss": 18.4172, + "step": 449070 + }, + { + "epoch": 0.9071700125647936, + "grad_norm": 1085.9041748046875, + "learning_rate": 3.126173867179383e-07, + "loss": 25.2115, + "step": 449080 + }, + { + "epoch": 0.9071902131974774, + "grad_norm": 149.21746826171875, + "learning_rate": 3.1249590628969707e-07, + "loss": 17.9784, + "step": 449090 + }, + { + "epoch": 0.9072104138301612, + "grad_norm": 429.34661865234375, + "learning_rate": 3.123744487077829e-07, + "loss": 26.9559, + "step": 449100 + }, + { + "epoch": 0.907230614462845, + "grad_norm": 245.41632080078125, + "learning_rate": 3.122530139727864e-07, + "loss": 22.1495, + "step": 449110 + }, + { + "epoch": 0.9072508150955287, + "grad_norm": 282.68389892578125, + "learning_rate": 3.12131602085301e-07, + "loss": 10.4131, + "step": 449120 + }, + { + "epoch": 0.9072710157282126, + "grad_norm": 448.1305236816406, + "learning_rate": 3.1201021304591684e-07, + "loss": 17.2096, + "step": 449130 + }, + { + "epoch": 0.9072912163608964, + "grad_norm": 308.39117431640625, + "learning_rate": 3.118888468552267e-07, + "loss": 6.6042, + "step": 449140 + }, + { + "epoch": 0.9073114169935802, + "grad_norm": 370.9839172363281, + "learning_rate": 3.1176750351382235e-07, + "loss": 16.0175, + "step": 449150 + }, + { + "epoch": 0.907331617626264, + "grad_norm": 7.8283562660217285, + "learning_rate": 3.116461830222933e-07, + "loss": 13.4259, + "step": 449160 + }, + { + "epoch": 0.9073518182589478, + "grad_norm": 560.1171264648438, + "learning_rate": 3.11524885381233e-07, + "loss": 36.5528, + "step": 449170 + }, + { + "epoch": 0.9073720188916317, + "grad_norm": 238.0440673828125, + "learning_rate": 3.11403610591231e-07, + "loss": 14.3345, + "step": 449180 + }, + { + "epoch": 0.9073922195243155, + "grad_norm": 222.44174194335938, + "learning_rate": 3.1128235865288013e-07, + "loss": 16.2855, + "step": 449190 + }, + { + "epoch": 0.9074124201569993, + "grad_norm": 247.97122192382812, + "learning_rate": 3.1116112956677045e-07, + "loss": 10.4193, + "step": 449200 + }, + { + "epoch": 0.9074326207896831, + "grad_norm": 179.41934204101562, + "learning_rate": 3.1103992333349153e-07, + "loss": 16.3914, + "step": 449210 + }, + { + "epoch": 0.9074528214223669, + "grad_norm": 470.7078552246094, + "learning_rate": 3.1091873995363677e-07, + "loss": 17.0916, + "step": 449220 + }, + { + "epoch": 0.9074730220550508, + "grad_norm": 270.0218811035156, + "learning_rate": 3.1079757942779453e-07, + "loss": 16.3383, + "step": 449230 + }, + { + "epoch": 0.9074932226877346, + "grad_norm": 367.3122863769531, + "learning_rate": 3.106764417565561e-07, + "loss": 12.3758, + "step": 449240 + }, + { + "epoch": 0.9075134233204184, + "grad_norm": 306.461181640625, + "learning_rate": 3.105553269405115e-07, + "loss": 18.3363, + "step": 449250 + }, + { + "epoch": 0.9075336239531022, + "grad_norm": 522.5321655273438, + "learning_rate": 3.1043423498025303e-07, + "loss": 24.1793, + "step": 449260 + }, + { + "epoch": 0.907553824585786, + "grad_norm": 516.6013793945312, + "learning_rate": 3.1031316587636805e-07, + "loss": 17.7558, + "step": 449270 + }, + { + "epoch": 0.9075740252184699, + "grad_norm": 1105.093994140625, + "learning_rate": 3.101921196294477e-07, + "loss": 33.2699, + "step": 449280 + }, + { + "epoch": 0.9075942258511537, + "grad_norm": 102.50092315673828, + "learning_rate": 3.1007109624008326e-07, + "loss": 25.3437, + "step": 449290 + }, + { + "epoch": 0.9076144264838375, + "grad_norm": 565.497314453125, + "learning_rate": 3.0995009570886305e-07, + "loss": 24.174, + "step": 449300 + }, + { + "epoch": 0.9076346271165213, + "grad_norm": 291.22540283203125, + "learning_rate": 3.098291180363766e-07, + "loss": 16.3335, + "step": 449310 + }, + { + "epoch": 0.9076548277492051, + "grad_norm": 256.3533935546875, + "learning_rate": 3.097081632232141e-07, + "loss": 12.1066, + "step": 449320 + }, + { + "epoch": 0.907675028381889, + "grad_norm": 300.76544189453125, + "learning_rate": 3.095872312699666e-07, + "loss": 9.9195, + "step": 449330 + }, + { + "epoch": 0.9076952290145728, + "grad_norm": 374.5771789550781, + "learning_rate": 3.094663221772209e-07, + "loss": 20.0803, + "step": 449340 + }, + { + "epoch": 0.9077154296472566, + "grad_norm": 457.07659912109375, + "learning_rate": 3.093454359455672e-07, + "loss": 18.5713, + "step": 449350 + }, + { + "epoch": 0.9077356302799404, + "grad_norm": 456.6804504394531, + "learning_rate": 3.09224572575596e-07, + "loss": 17.623, + "step": 449360 + }, + { + "epoch": 0.9077558309126241, + "grad_norm": 631.7473754882812, + "learning_rate": 3.091037320678947e-07, + "loss": 19.8148, + "step": 449370 + }, + { + "epoch": 0.907776031545308, + "grad_norm": 283.96368408203125, + "learning_rate": 3.089829144230527e-07, + "loss": 16.0926, + "step": 449380 + }, + { + "epoch": 0.9077962321779918, + "grad_norm": 319.5601806640625, + "learning_rate": 3.088621196416597e-07, + "loss": 10.865, + "step": 449390 + }, + { + "epoch": 0.9078164328106756, + "grad_norm": 254.3486785888672, + "learning_rate": 3.0874134772430344e-07, + "loss": 10.6656, + "step": 449400 + }, + { + "epoch": 0.9078366334433594, + "grad_norm": 1269.4559326171875, + "learning_rate": 3.0862059867157237e-07, + "loss": 25.0295, + "step": 449410 + }, + { + "epoch": 0.9078568340760432, + "grad_norm": 524.7383422851562, + "learning_rate": 3.08499872484056e-07, + "loss": 9.9924, + "step": 449420 + }, + { + "epoch": 0.907877034708727, + "grad_norm": 284.661865234375, + "learning_rate": 3.0837916916234166e-07, + "loss": 37.4764, + "step": 449430 + }, + { + "epoch": 0.9078972353414109, + "grad_norm": 367.37078857421875, + "learning_rate": 3.0825848870701893e-07, + "loss": 18.6231, + "step": 449440 + }, + { + "epoch": 0.9079174359740947, + "grad_norm": 6.0677595138549805, + "learning_rate": 3.08137831118675e-07, + "loss": 15.7651, + "step": 449450 + }, + { + "epoch": 0.9079376366067785, + "grad_norm": 330.1292724609375, + "learning_rate": 3.080171963978984e-07, + "loss": 22.6161, + "step": 449460 + }, + { + "epoch": 0.9079578372394623, + "grad_norm": 170.20848083496094, + "learning_rate": 3.078965845452769e-07, + "loss": 8.8626, + "step": 449470 + }, + { + "epoch": 0.9079780378721461, + "grad_norm": 458.19586181640625, + "learning_rate": 3.077759955613979e-07, + "loss": 17.3684, + "step": 449480 + }, + { + "epoch": 0.90799823850483, + "grad_norm": 236.87313842773438, + "learning_rate": 3.0765542944685036e-07, + "loss": 22.9826, + "step": 449490 + }, + { + "epoch": 0.9080184391375138, + "grad_norm": 444.2785339355469, + "learning_rate": 3.0753488620222037e-07, + "loss": 29.2181, + "step": 449500 + }, + { + "epoch": 0.9080386397701976, + "grad_norm": 6.273552417755127, + "learning_rate": 3.07414365828097e-07, + "loss": 20.8284, + "step": 449510 + }, + { + "epoch": 0.9080588404028814, + "grad_norm": 569.6155395507812, + "learning_rate": 3.0729386832506647e-07, + "loss": 22.2681, + "step": 449520 + }, + { + "epoch": 0.9080790410355652, + "grad_norm": 717.0222778320312, + "learning_rate": 3.07173393693716e-07, + "loss": 22.9611, + "step": 449530 + }, + { + "epoch": 0.9080992416682491, + "grad_norm": 583.5347900390625, + "learning_rate": 3.0705294193463406e-07, + "loss": 18.084, + "step": 449540 + }, + { + "epoch": 0.9081194423009329, + "grad_norm": 611.0958862304688, + "learning_rate": 3.069325130484069e-07, + "loss": 20.9226, + "step": 449550 + }, + { + "epoch": 0.9081396429336167, + "grad_norm": 88.22270202636719, + "learning_rate": 3.068121070356206e-07, + "loss": 18.2759, + "step": 449560 + }, + { + "epoch": 0.9081598435663005, + "grad_norm": 147.03045654296875, + "learning_rate": 3.066917238968631e-07, + "loss": 11.1902, + "step": 449570 + }, + { + "epoch": 0.9081800441989843, + "grad_norm": 39.146305084228516, + "learning_rate": 3.065713636327211e-07, + "loss": 19.9076, + "step": 449580 + }, + { + "epoch": 0.9082002448316682, + "grad_norm": 243.39291381835938, + "learning_rate": 3.0645102624378144e-07, + "loss": 19.0416, + "step": 449590 + }, + { + "epoch": 0.908220445464352, + "grad_norm": 473.9781799316406, + "learning_rate": 3.0633071173062966e-07, + "loss": 12.5497, + "step": 449600 + }, + { + "epoch": 0.9082406460970358, + "grad_norm": 149.61248779296875, + "learning_rate": 3.0621042009385313e-07, + "loss": 19.2441, + "step": 449610 + }, + { + "epoch": 0.9082608467297196, + "grad_norm": 420.84954833984375, + "learning_rate": 3.0609015133403806e-07, + "loss": 19.7314, + "step": 449620 + }, + { + "epoch": 0.9082810473624033, + "grad_norm": 262.05914306640625, + "learning_rate": 3.0596990545176895e-07, + "loss": 17.1844, + "step": 449630 + }, + { + "epoch": 0.9083012479950872, + "grad_norm": 387.1336669921875, + "learning_rate": 3.058496824476337e-07, + "loss": 10.2942, + "step": 449640 + }, + { + "epoch": 0.908321448627771, + "grad_norm": 243.58253479003906, + "learning_rate": 3.057294823222184e-07, + "loss": 21.0373, + "step": 449650 + }, + { + "epoch": 0.9083416492604548, + "grad_norm": 281.2779846191406, + "learning_rate": 3.056093050761083e-07, + "loss": 14.0664, + "step": 449660 + }, + { + "epoch": 0.9083618498931386, + "grad_norm": 339.8753356933594, + "learning_rate": 3.0548915070988837e-07, + "loss": 13.4907, + "step": 449670 + }, + { + "epoch": 0.9083820505258224, + "grad_norm": 271.4891052246094, + "learning_rate": 3.0536901922414543e-07, + "loss": 22.4855, + "step": 449680 + }, + { + "epoch": 0.9084022511585063, + "grad_norm": 451.68890380859375, + "learning_rate": 3.052489106194645e-07, + "loss": 31.894, + "step": 449690 + }, + { + "epoch": 0.9084224517911901, + "grad_norm": 644.14111328125, + "learning_rate": 3.051288248964307e-07, + "loss": 22.8247, + "step": 449700 + }, + { + "epoch": 0.9084426524238739, + "grad_norm": 158.24072265625, + "learning_rate": 3.050087620556302e-07, + "loss": 7.1228, + "step": 449710 + }, + { + "epoch": 0.9084628530565577, + "grad_norm": 788.8622436523438, + "learning_rate": 3.0488872209764654e-07, + "loss": 29.4907, + "step": 449720 + }, + { + "epoch": 0.9084830536892415, + "grad_norm": 38.16522979736328, + "learning_rate": 3.047687050230663e-07, + "loss": 13.5508, + "step": 449730 + }, + { + "epoch": 0.9085032543219254, + "grad_norm": 369.5571594238281, + "learning_rate": 3.046487108324736e-07, + "loss": 18.3055, + "step": 449740 + }, + { + "epoch": 0.9085234549546092, + "grad_norm": 458.87786865234375, + "learning_rate": 3.0452873952645455e-07, + "loss": 11.2209, + "step": 449750 + }, + { + "epoch": 0.908543655587293, + "grad_norm": 659.783935546875, + "learning_rate": 3.0440879110559263e-07, + "loss": 34.188, + "step": 449760 + }, + { + "epoch": 0.9085638562199768, + "grad_norm": 36.0201530456543, + "learning_rate": 3.0428886557047176e-07, + "loss": 19.4572, + "step": 449770 + }, + { + "epoch": 0.9085840568526606, + "grad_norm": 552.7836303710938, + "learning_rate": 3.0416896292167873e-07, + "loss": 24.2485, + "step": 449780 + }, + { + "epoch": 0.9086042574853445, + "grad_norm": 214.62286376953125, + "learning_rate": 3.0404908315979587e-07, + "loss": 17.428, + "step": 449790 + }, + { + "epoch": 0.9086244581180283, + "grad_norm": 499.6221008300781, + "learning_rate": 3.0392922628540875e-07, + "loss": 21.9665, + "step": 449800 + }, + { + "epoch": 0.9086446587507121, + "grad_norm": 429.6669921875, + "learning_rate": 3.0380939229910087e-07, + "loss": 22.1935, + "step": 449810 + }, + { + "epoch": 0.9086648593833959, + "grad_norm": 387.1067810058594, + "learning_rate": 3.036895812014556e-07, + "loss": 14.9835, + "step": 449820 + }, + { + "epoch": 0.9086850600160797, + "grad_norm": 452.1629638671875, + "learning_rate": 3.0356979299305867e-07, + "loss": 19.1507, + "step": 449830 + }, + { + "epoch": 0.9087052606487636, + "grad_norm": 265.0748291015625, + "learning_rate": 3.0345002767449337e-07, + "loss": 13.7882, + "step": 449840 + }, + { + "epoch": 0.9087254612814474, + "grad_norm": 824.8062744140625, + "learning_rate": 3.0333028524634156e-07, + "loss": 17.5646, + "step": 449850 + }, + { + "epoch": 0.9087456619141312, + "grad_norm": 16.399173736572266, + "learning_rate": 3.0321056570918883e-07, + "loss": 18.7888, + "step": 449860 + }, + { + "epoch": 0.908765862546815, + "grad_norm": 1272.93701171875, + "learning_rate": 3.030908690636192e-07, + "loss": 27.8628, + "step": 449870 + }, + { + "epoch": 0.9087860631794987, + "grad_norm": 344.57684326171875, + "learning_rate": 3.029711953102138e-07, + "loss": 21.5898, + "step": 449880 + }, + { + "epoch": 0.9088062638121825, + "grad_norm": 346.8316955566406, + "learning_rate": 3.028515444495572e-07, + "loss": 10.6882, + "step": 449890 + }, + { + "epoch": 0.9088264644448664, + "grad_norm": 240.88621520996094, + "learning_rate": 3.027319164822329e-07, + "loss": 16.0152, + "step": 449900 + }, + { + "epoch": 0.9088466650775502, + "grad_norm": 113.99472045898438, + "learning_rate": 3.0261231140882363e-07, + "loss": 37.6736, + "step": 449910 + }, + { + "epoch": 0.908866865710234, + "grad_norm": 153.38453674316406, + "learning_rate": 3.024927292299118e-07, + "loss": 21.8065, + "step": 449920 + }, + { + "epoch": 0.9088870663429178, + "grad_norm": 444.3023986816406, + "learning_rate": 3.0237316994608025e-07, + "loss": 14.1541, + "step": 449930 + }, + { + "epoch": 0.9089072669756016, + "grad_norm": 412.1969299316406, + "learning_rate": 3.02253633557914e-07, + "loss": 20.4504, + "step": 449940 + }, + { + "epoch": 0.9089274676082855, + "grad_norm": 162.01190185546875, + "learning_rate": 3.0213412006599216e-07, + "loss": 11.6022, + "step": 449950 + }, + { + "epoch": 0.9089476682409693, + "grad_norm": 0.0, + "learning_rate": 3.0201462947089865e-07, + "loss": 24.5164, + "step": 449960 + }, + { + "epoch": 0.9089678688736531, + "grad_norm": 684.73779296875, + "learning_rate": 3.018951617732169e-07, + "loss": 21.3613, + "step": 449970 + }, + { + "epoch": 0.9089880695063369, + "grad_norm": 733.5957641601562, + "learning_rate": 3.01775716973528e-07, + "loss": 34.4216, + "step": 449980 + }, + { + "epoch": 0.9090082701390207, + "grad_norm": 362.92449951171875, + "learning_rate": 3.0165629507241446e-07, + "loss": 13.8412, + "step": 449990 + }, + { + "epoch": 0.9090284707717046, + "grad_norm": 392.89288330078125, + "learning_rate": 3.015368960704584e-07, + "loss": 15.1687, + "step": 450000 + }, + { + "epoch": 0.9090486714043884, + "grad_norm": 420.2798767089844, + "learning_rate": 3.014175199682418e-07, + "loss": 15.1001, + "step": 450010 + }, + { + "epoch": 0.9090688720370722, + "grad_norm": 494.7281799316406, + "learning_rate": 3.012981667663456e-07, + "loss": 14.4487, + "step": 450020 + }, + { + "epoch": 0.909089072669756, + "grad_norm": 673.8734741210938, + "learning_rate": 3.011788364653523e-07, + "loss": 18.8924, + "step": 450030 + }, + { + "epoch": 0.9091092733024398, + "grad_norm": 491.93609619140625, + "learning_rate": 3.010595290658441e-07, + "loss": 13.9405, + "step": 450040 + }, + { + "epoch": 0.9091294739351237, + "grad_norm": 136.08999633789062, + "learning_rate": 3.0094024456840176e-07, + "loss": 12.4955, + "step": 450050 + }, + { + "epoch": 0.9091496745678075, + "grad_norm": 411.83746337890625, + "learning_rate": 3.008209829736064e-07, + "loss": 19.9289, + "step": 450060 + }, + { + "epoch": 0.9091698752004913, + "grad_norm": 226.57904052734375, + "learning_rate": 3.007017442820398e-07, + "loss": 21.5572, + "step": 450070 + }, + { + "epoch": 0.9091900758331751, + "grad_norm": 651.72900390625, + "learning_rate": 3.005825284942837e-07, + "loss": 22.6409, + "step": 450080 + }, + { + "epoch": 0.909210276465859, + "grad_norm": 8.027105331420898, + "learning_rate": 3.004633356109171e-07, + "loss": 25.1169, + "step": 450090 + }, + { + "epoch": 0.9092304770985428, + "grad_norm": 294.9572448730469, + "learning_rate": 3.003441656325229e-07, + "loss": 12.6286, + "step": 450100 + }, + { + "epoch": 0.9092506777312266, + "grad_norm": 374.8417663574219, + "learning_rate": 3.002250185596806e-07, + "loss": 19.1292, + "step": 450110 + }, + { + "epoch": 0.9092708783639104, + "grad_norm": 58.905242919921875, + "learning_rate": 3.0010589439297245e-07, + "loss": 30.5942, + "step": 450120 + }, + { + "epoch": 0.9092910789965942, + "grad_norm": 385.8377380371094, + "learning_rate": 2.9998679313297807e-07, + "loss": 17.9534, + "step": 450130 + }, + { + "epoch": 0.9093112796292779, + "grad_norm": 214.30320739746094, + "learning_rate": 2.99867714780277e-07, + "loss": 17.9742, + "step": 450140 + }, + { + "epoch": 0.9093314802619618, + "grad_norm": 166.50491333007812, + "learning_rate": 2.9974865933545207e-07, + "loss": 15.6885, + "step": 450150 + }, + { + "epoch": 0.9093516808946456, + "grad_norm": 264.8522644042969, + "learning_rate": 2.996296267990817e-07, + "loss": 17.7719, + "step": 450160 + }, + { + "epoch": 0.9093718815273294, + "grad_norm": 327.0448303222656, + "learning_rate": 2.9951061717174543e-07, + "loss": 18.7962, + "step": 450170 + }, + { + "epoch": 0.9093920821600132, + "grad_norm": 425.8899841308594, + "learning_rate": 2.9939163045402456e-07, + "loss": 8.4987, + "step": 450180 + }, + { + "epoch": 0.909412282792697, + "grad_norm": 132.53302001953125, + "learning_rate": 2.992726666464996e-07, + "loss": 17.0717, + "step": 450190 + }, + { + "epoch": 0.9094324834253809, + "grad_norm": 338.8014221191406, + "learning_rate": 2.99153725749749e-07, + "loss": 19.2351, + "step": 450200 + }, + { + "epoch": 0.9094526840580647, + "grad_norm": 340.30010986328125, + "learning_rate": 2.990348077643529e-07, + "loss": 13.3819, + "step": 450210 + }, + { + "epoch": 0.9094728846907485, + "grad_norm": 61.42256546020508, + "learning_rate": 2.989159126908914e-07, + "loss": 15.6164, + "step": 450220 + }, + { + "epoch": 0.9094930853234323, + "grad_norm": 432.1888122558594, + "learning_rate": 2.9879704052994395e-07, + "loss": 11.7148, + "step": 450230 + }, + { + "epoch": 0.9095132859561161, + "grad_norm": 227.47276306152344, + "learning_rate": 2.986781912820885e-07, + "loss": 9.2523, + "step": 450240 + }, + { + "epoch": 0.9095334865888, + "grad_norm": 834.7599487304688, + "learning_rate": 2.9855936494790516e-07, + "loss": 20.4919, + "step": 450250 + }, + { + "epoch": 0.9095536872214838, + "grad_norm": 10.250986099243164, + "learning_rate": 2.9844056152797505e-07, + "loss": 15.58, + "step": 450260 + }, + { + "epoch": 0.9095738878541676, + "grad_norm": 283.3591613769531, + "learning_rate": 2.983217810228739e-07, + "loss": 6.9049, + "step": 450270 + }, + { + "epoch": 0.9095940884868514, + "grad_norm": 299.103515625, + "learning_rate": 2.9820302343318177e-07, + "loss": 18.7821, + "step": 450280 + }, + { + "epoch": 0.9096142891195352, + "grad_norm": 216.5201416015625, + "learning_rate": 2.9808428875947925e-07, + "loss": 14.35, + "step": 450290 + }, + { + "epoch": 0.909634489752219, + "grad_norm": 2.37345027923584, + "learning_rate": 2.9796557700234317e-07, + "loss": 24.4248, + "step": 450300 + }, + { + "epoch": 0.9096546903849029, + "grad_norm": 201.43051147460938, + "learning_rate": 2.9784688816235194e-07, + "loss": 18.0874, + "step": 450310 + }, + { + "epoch": 0.9096748910175867, + "grad_norm": 150.97740173339844, + "learning_rate": 2.9772822224008515e-07, + "loss": 14.5586, + "step": 450320 + }, + { + "epoch": 0.9096950916502705, + "grad_norm": 190.00332641601562, + "learning_rate": 2.976095792361211e-07, + "loss": 44.8998, + "step": 450330 + }, + { + "epoch": 0.9097152922829543, + "grad_norm": 350.0762939453125, + "learning_rate": 2.9749095915103665e-07, + "loss": 24.1158, + "step": 450340 + }, + { + "epoch": 0.9097354929156382, + "grad_norm": 541.433349609375, + "learning_rate": 2.9737236198541077e-07, + "loss": 30.8052, + "step": 450350 + }, + { + "epoch": 0.909755693548322, + "grad_norm": 118.30099487304688, + "learning_rate": 2.9725378773982295e-07, + "loss": 16.6455, + "step": 450360 + }, + { + "epoch": 0.9097758941810058, + "grad_norm": 424.4774475097656, + "learning_rate": 2.971352364148494e-07, + "loss": 17.9009, + "step": 450370 + }, + { + "epoch": 0.9097960948136896, + "grad_norm": 122.40388488769531, + "learning_rate": 2.970167080110675e-07, + "loss": 13.7767, + "step": 450380 + }, + { + "epoch": 0.9098162954463734, + "grad_norm": 129.57290649414062, + "learning_rate": 2.968982025290568e-07, + "loss": 20.165, + "step": 450390 + }, + { + "epoch": 0.9098364960790571, + "grad_norm": 307.5549011230469, + "learning_rate": 2.967797199693928e-07, + "loss": 22.9961, + "step": 450400 + }, + { + "epoch": 0.909856696711741, + "grad_norm": 755.9302978515625, + "learning_rate": 2.9666126033265517e-07, + "loss": 17.0161, + "step": 450410 + }, + { + "epoch": 0.9098768973444248, + "grad_norm": 232.53997802734375, + "learning_rate": 2.9654282361941953e-07, + "loss": 19.0928, + "step": 450420 + }, + { + "epoch": 0.9098970979771086, + "grad_norm": 578.3982543945312, + "learning_rate": 2.9642440983026324e-07, + "loss": 23.7926, + "step": 450430 + }, + { + "epoch": 0.9099172986097924, + "grad_norm": 685.7623901367188, + "learning_rate": 2.963060189657646e-07, + "loss": 18.2127, + "step": 450440 + }, + { + "epoch": 0.9099374992424762, + "grad_norm": 157.11639404296875, + "learning_rate": 2.961876510264999e-07, + "loss": 19.3056, + "step": 450450 + }, + { + "epoch": 0.9099576998751601, + "grad_norm": 142.48611450195312, + "learning_rate": 2.9606930601304595e-07, + "loss": 15.6247, + "step": 450460 + }, + { + "epoch": 0.9099779005078439, + "grad_norm": 37.19289779663086, + "learning_rate": 2.9595098392597887e-07, + "loss": 11.6434, + "step": 450470 + }, + { + "epoch": 0.9099981011405277, + "grad_norm": 489.3031005859375, + "learning_rate": 2.958326847658771e-07, + "loss": 17.3193, + "step": 450480 + }, + { + "epoch": 0.9100183017732115, + "grad_norm": 122.79801940917969, + "learning_rate": 2.9571440853331634e-07, + "loss": 19.2583, + "step": 450490 + }, + { + "epoch": 0.9100385024058953, + "grad_norm": 422.8098449707031, + "learning_rate": 2.9559615522887275e-07, + "loss": 10.5503, + "step": 450500 + }, + { + "epoch": 0.9100587030385792, + "grad_norm": 154.032958984375, + "learning_rate": 2.954779248531231e-07, + "loss": 14.2224, + "step": 450510 + }, + { + "epoch": 0.910078903671263, + "grad_norm": 259.25714111328125, + "learning_rate": 2.953597174066436e-07, + "loss": 17.5647, + "step": 450520 + }, + { + "epoch": 0.9100991043039468, + "grad_norm": 197.6894989013672, + "learning_rate": 2.952415328900093e-07, + "loss": 13.9, + "step": 450530 + }, + { + "epoch": 0.9101193049366306, + "grad_norm": 140.98451232910156, + "learning_rate": 2.951233713037971e-07, + "loss": 10.3261, + "step": 450540 + }, + { + "epoch": 0.9101395055693144, + "grad_norm": 530.427001953125, + "learning_rate": 2.9500523264858473e-07, + "loss": 17.0496, + "step": 450550 + }, + { + "epoch": 0.9101597062019983, + "grad_norm": 1189.279296875, + "learning_rate": 2.948871169249451e-07, + "loss": 24.623, + "step": 450560 + }, + { + "epoch": 0.9101799068346821, + "grad_norm": 255.91119384765625, + "learning_rate": 2.9476902413345443e-07, + "loss": 19.1389, + "step": 450570 + }, + { + "epoch": 0.9102001074673659, + "grad_norm": 382.6887512207031, + "learning_rate": 2.946509542746895e-07, + "loss": 10.8728, + "step": 450580 + }, + { + "epoch": 0.9102203081000497, + "grad_norm": 526.3858642578125, + "learning_rate": 2.9453290734922537e-07, + "loss": 27.7853, + "step": 450590 + }, + { + "epoch": 0.9102405087327335, + "grad_norm": 301.6593017578125, + "learning_rate": 2.9441488335763656e-07, + "loss": 32.5232, + "step": 450600 + }, + { + "epoch": 0.9102607093654174, + "grad_norm": 8.258715629577637, + "learning_rate": 2.9429688230049934e-07, + "loss": 14.6922, + "step": 450610 + }, + { + "epoch": 0.9102809099981012, + "grad_norm": 467.5506286621094, + "learning_rate": 2.941789041783888e-07, + "loss": 7.9306, + "step": 450620 + }, + { + "epoch": 0.910301110630785, + "grad_norm": 4.88645076751709, + "learning_rate": 2.940609489918783e-07, + "loss": 11.6321, + "step": 450630 + }, + { + "epoch": 0.9103213112634688, + "grad_norm": 615.4234619140625, + "learning_rate": 2.9394301674154413e-07, + "loss": 14.5892, + "step": 450640 + }, + { + "epoch": 0.9103415118961525, + "grad_norm": 232.69505310058594, + "learning_rate": 2.938251074279619e-07, + "loss": 15.8183, + "step": 450650 + }, + { + "epoch": 0.9103617125288364, + "grad_norm": 698.8169555664062, + "learning_rate": 2.9370722105170504e-07, + "loss": 18.039, + "step": 450660 + }, + { + "epoch": 0.9103819131615202, + "grad_norm": 29.231889724731445, + "learning_rate": 2.935893576133475e-07, + "loss": 24.1463, + "step": 450670 + }, + { + "epoch": 0.910402113794204, + "grad_norm": 68.94104766845703, + "learning_rate": 2.9347151711346556e-07, + "loss": 11.9315, + "step": 450680 + }, + { + "epoch": 0.9104223144268878, + "grad_norm": 363.8338317871094, + "learning_rate": 2.933536995526326e-07, + "loss": 29.146, + "step": 450690 + }, + { + "epoch": 0.9104425150595716, + "grad_norm": 57.30198669433594, + "learning_rate": 2.9323590493142206e-07, + "loss": 15.7005, + "step": 450700 + }, + { + "epoch": 0.9104627156922555, + "grad_norm": 235.6766357421875, + "learning_rate": 2.931181332504096e-07, + "loss": 15.6148, + "step": 450710 + }, + { + "epoch": 0.9104829163249393, + "grad_norm": 136.07627868652344, + "learning_rate": 2.930003845101681e-07, + "loss": 12.1525, + "step": 450720 + }, + { + "epoch": 0.9105031169576231, + "grad_norm": 347.1009216308594, + "learning_rate": 2.9288265871127206e-07, + "loss": 13.9972, + "step": 450730 + }, + { + "epoch": 0.9105233175903069, + "grad_norm": 58.31315231323242, + "learning_rate": 2.927649558542955e-07, + "loss": 17.731, + "step": 450740 + }, + { + "epoch": 0.9105435182229907, + "grad_norm": 188.71400451660156, + "learning_rate": 2.9264727593981024e-07, + "loss": 17.885, + "step": 450750 + }, + { + "epoch": 0.9105637188556746, + "grad_norm": 244.48216247558594, + "learning_rate": 2.9252961896839236e-07, + "loss": 16.7535, + "step": 450760 + }, + { + "epoch": 0.9105839194883584, + "grad_norm": 595.94921875, + "learning_rate": 2.9241198494061427e-07, + "loss": 20.7789, + "step": 450770 + }, + { + "epoch": 0.9106041201210422, + "grad_norm": 59.34998321533203, + "learning_rate": 2.922943738570483e-07, + "loss": 13.5088, + "step": 450780 + }, + { + "epoch": 0.910624320753726, + "grad_norm": 384.7566833496094, + "learning_rate": 2.921767857182689e-07, + "loss": 16.75, + "step": 450790 + }, + { + "epoch": 0.9106445213864098, + "grad_norm": 258.9685363769531, + "learning_rate": 2.920592205248496e-07, + "loss": 20.9006, + "step": 450800 + }, + { + "epoch": 0.9106647220190937, + "grad_norm": 415.7431945800781, + "learning_rate": 2.919416782773621e-07, + "loss": 18.0212, + "step": 450810 + }, + { + "epoch": 0.9106849226517775, + "grad_norm": 307.84735107421875, + "learning_rate": 2.918241589763793e-07, + "loss": 17.0118, + "step": 450820 + }, + { + "epoch": 0.9107051232844613, + "grad_norm": 154.61444091796875, + "learning_rate": 2.917066626224757e-07, + "loss": 20.9167, + "step": 450830 + }, + { + "epoch": 0.9107253239171451, + "grad_norm": 545.405029296875, + "learning_rate": 2.9158918921622205e-07, + "loss": 29.9494, + "step": 450840 + }, + { + "epoch": 0.9107455245498289, + "grad_norm": 372.11822509765625, + "learning_rate": 2.914717387581917e-07, + "loss": 26.7919, + "step": 450850 + }, + { + "epoch": 0.9107657251825128, + "grad_norm": 272.1129150390625, + "learning_rate": 2.913543112489564e-07, + "loss": 22.2206, + "step": 450860 + }, + { + "epoch": 0.9107859258151966, + "grad_norm": 564.9196166992188, + "learning_rate": 2.912369066890908e-07, + "loss": 13.2435, + "step": 450870 + }, + { + "epoch": 0.9108061264478804, + "grad_norm": 407.7785949707031, + "learning_rate": 2.9111952507916375e-07, + "loss": 24.8045, + "step": 450880 + }, + { + "epoch": 0.9108263270805642, + "grad_norm": 240.93890380859375, + "learning_rate": 2.910021664197493e-07, + "loss": 23.5872, + "step": 450890 + }, + { + "epoch": 0.910846527713248, + "grad_norm": 455.80938720703125, + "learning_rate": 2.908848307114198e-07, + "loss": 18.3362, + "step": 450900 + }, + { + "epoch": 0.9108667283459317, + "grad_norm": 180.8559112548828, + "learning_rate": 2.9076751795474647e-07, + "loss": 11.052, + "step": 450910 + }, + { + "epoch": 0.9108869289786156, + "grad_norm": 185.9855499267578, + "learning_rate": 2.9065022815030044e-07, + "loss": 10.145, + "step": 450920 + }, + { + "epoch": 0.9109071296112994, + "grad_norm": 503.127685546875, + "learning_rate": 2.905329612986546e-07, + "loss": 19.2377, + "step": 450930 + }, + { + "epoch": 0.9109273302439832, + "grad_norm": 231.20713806152344, + "learning_rate": 2.9041571740037967e-07, + "loss": 15.72, + "step": 450940 + }, + { + "epoch": 0.910947530876667, + "grad_norm": 444.4071350097656, + "learning_rate": 2.9029849645604735e-07, + "loss": 16.6395, + "step": 450950 + }, + { + "epoch": 0.9109677315093508, + "grad_norm": 311.00054931640625, + "learning_rate": 2.9018129846622834e-07, + "loss": 11.8376, + "step": 450960 + }, + { + "epoch": 0.9109879321420347, + "grad_norm": 424.9436340332031, + "learning_rate": 2.900641234314955e-07, + "loss": 22.9413, + "step": 450970 + }, + { + "epoch": 0.9110081327747185, + "grad_norm": 113.29547119140625, + "learning_rate": 2.899469713524183e-07, + "loss": 4.7459, + "step": 450980 + }, + { + "epoch": 0.9110283334074023, + "grad_norm": 8.436201095581055, + "learning_rate": 2.898298422295681e-07, + "loss": 22.1985, + "step": 450990 + }, + { + "epoch": 0.9110485340400861, + "grad_norm": 239.64442443847656, + "learning_rate": 2.8971273606351656e-07, + "loss": 16.2286, + "step": 451000 + }, + { + "epoch": 0.9110687346727699, + "grad_norm": 269.68243408203125, + "learning_rate": 2.895956528548338e-07, + "loss": 34.9558, + "step": 451010 + }, + { + "epoch": 0.9110889353054538, + "grad_norm": 182.18995666503906, + "learning_rate": 2.8947859260408997e-07, + "loss": 14.533, + "step": 451020 + }, + { + "epoch": 0.9111091359381376, + "grad_norm": 23.684764862060547, + "learning_rate": 2.8936155531185675e-07, + "loss": 29.4517, + "step": 451030 + }, + { + "epoch": 0.9111293365708214, + "grad_norm": 405.1063537597656, + "learning_rate": 2.892445409787037e-07, + "loss": 30.9317, + "step": 451040 + }, + { + "epoch": 0.9111495372035052, + "grad_norm": 154.00747680664062, + "learning_rate": 2.891275496052015e-07, + "loss": 18.9794, + "step": 451050 + }, + { + "epoch": 0.911169737836189, + "grad_norm": 521.412353515625, + "learning_rate": 2.8901058119192026e-07, + "loss": 16.2786, + "step": 451060 + }, + { + "epoch": 0.9111899384688729, + "grad_norm": 295.50860595703125, + "learning_rate": 2.8889363573943006e-07, + "loss": 11.4799, + "step": 451070 + }, + { + "epoch": 0.9112101391015567, + "grad_norm": 461.7724609375, + "learning_rate": 2.8877671324829994e-07, + "loss": 17.0752, + "step": 451080 + }, + { + "epoch": 0.9112303397342405, + "grad_norm": 307.6255798339844, + "learning_rate": 2.886598137191021e-07, + "loss": 22.2398, + "step": 451090 + }, + { + "epoch": 0.9112505403669243, + "grad_norm": 112.7985610961914, + "learning_rate": 2.8854293715240455e-07, + "loss": 20.4865, + "step": 451100 + }, + { + "epoch": 0.9112707409996081, + "grad_norm": 301.1622009277344, + "learning_rate": 2.884260835487768e-07, + "loss": 14.4974, + "step": 451110 + }, + { + "epoch": 0.911290941632292, + "grad_norm": 166.6481475830078, + "learning_rate": 2.8830925290878997e-07, + "loss": 20.8534, + "step": 451120 + }, + { + "epoch": 0.9113111422649758, + "grad_norm": 242.3641357421875, + "learning_rate": 2.8819244523301206e-07, + "loss": 20.7083, + "step": 451130 + }, + { + "epoch": 0.9113313428976596, + "grad_norm": 162.1275177001953, + "learning_rate": 2.880756605220114e-07, + "loss": 14.867, + "step": 451140 + }, + { + "epoch": 0.9113515435303434, + "grad_norm": 562.4708862304688, + "learning_rate": 2.879588987763593e-07, + "loss": 26.572, + "step": 451150 + }, + { + "epoch": 0.9113717441630271, + "grad_norm": 410.82196044921875, + "learning_rate": 2.878421599966252e-07, + "loss": 20.0088, + "step": 451160 + }, + { + "epoch": 0.911391944795711, + "grad_norm": 334.5708923339844, + "learning_rate": 2.877254441833754e-07, + "loss": 34.9807, + "step": 451170 + }, + { + "epoch": 0.9114121454283948, + "grad_norm": 541.3951416015625, + "learning_rate": 2.8760875133718003e-07, + "loss": 45.1133, + "step": 451180 + }, + { + "epoch": 0.9114323460610786, + "grad_norm": 848.431396484375, + "learning_rate": 2.8749208145860907e-07, + "loss": 13.6133, + "step": 451190 + }, + { + "epoch": 0.9114525466937624, + "grad_norm": 182.5347442626953, + "learning_rate": 2.8737543454822993e-07, + "loss": 16.9643, + "step": 451200 + }, + { + "epoch": 0.9114727473264462, + "grad_norm": 245.85494995117188, + "learning_rate": 2.87258810606611e-07, + "loss": 11.7495, + "step": 451210 + }, + { + "epoch": 0.91149294795913, + "grad_norm": 480.86737060546875, + "learning_rate": 2.8714220963432125e-07, + "loss": 16.4133, + "step": 451220 + }, + { + "epoch": 0.9115131485918139, + "grad_norm": 0.02471095696091652, + "learning_rate": 2.870256316319292e-07, + "loss": 24.8626, + "step": 451230 + }, + { + "epoch": 0.9115333492244977, + "grad_norm": 480.7483825683594, + "learning_rate": 2.8690907660000156e-07, + "loss": 16.0808, + "step": 451240 + }, + { + "epoch": 0.9115535498571815, + "grad_norm": 106.62007904052734, + "learning_rate": 2.867925445391079e-07, + "loss": 11.1748, + "step": 451250 + }, + { + "epoch": 0.9115737504898653, + "grad_norm": 517.0779418945312, + "learning_rate": 2.8667603544981604e-07, + "loss": 17.3558, + "step": 451260 + }, + { + "epoch": 0.9115939511225492, + "grad_norm": 763.6796875, + "learning_rate": 2.8655954933269395e-07, + "loss": 25.0028, + "step": 451270 + }, + { + "epoch": 0.911614151755233, + "grad_norm": 448.9400634765625, + "learning_rate": 2.8644308618830775e-07, + "loss": 30.3073, + "step": 451280 + }, + { + "epoch": 0.9116343523879168, + "grad_norm": 110.02424621582031, + "learning_rate": 2.86326646017227e-07, + "loss": 12.3114, + "step": 451290 + }, + { + "epoch": 0.9116545530206006, + "grad_norm": 60.046451568603516, + "learning_rate": 2.862102288200186e-07, + "loss": 8.2574, + "step": 451300 + }, + { + "epoch": 0.9116747536532844, + "grad_norm": 163.764892578125, + "learning_rate": 2.8609383459724915e-07, + "loss": 11.8364, + "step": 451310 + }, + { + "epoch": 0.9116949542859683, + "grad_norm": 462.7592468261719, + "learning_rate": 2.8597746334948773e-07, + "loss": 13.9652, + "step": 451320 + }, + { + "epoch": 0.9117151549186521, + "grad_norm": 387.3420104980469, + "learning_rate": 2.8586111507729887e-07, + "loss": 17.1762, + "step": 451330 + }, + { + "epoch": 0.9117353555513359, + "grad_norm": 213.70208740234375, + "learning_rate": 2.8574478978125266e-07, + "loss": 25.3365, + "step": 451340 + }, + { + "epoch": 0.9117555561840197, + "grad_norm": 193.23422241210938, + "learning_rate": 2.856284874619142e-07, + "loss": 14.605, + "step": 451350 + }, + { + "epoch": 0.9117757568167035, + "grad_norm": 355.51910400390625, + "learning_rate": 2.855122081198503e-07, + "loss": 14.1195, + "step": 451360 + }, + { + "epoch": 0.9117959574493874, + "grad_norm": 309.6023254394531, + "learning_rate": 2.8539595175562817e-07, + "loss": 14.2535, + "step": 451370 + }, + { + "epoch": 0.9118161580820712, + "grad_norm": 276.8885803222656, + "learning_rate": 2.852797183698147e-07, + "loss": 31.66, + "step": 451380 + }, + { + "epoch": 0.911836358714755, + "grad_norm": 233.3388214111328, + "learning_rate": 2.851635079629755e-07, + "loss": 18.4705, + "step": 451390 + }, + { + "epoch": 0.9118565593474388, + "grad_norm": 53.565189361572266, + "learning_rate": 2.850473205356774e-07, + "loss": 18.5574, + "step": 451400 + }, + { + "epoch": 0.9118767599801226, + "grad_norm": 290.7619934082031, + "learning_rate": 2.8493115608848764e-07, + "loss": 24.058, + "step": 451410 + }, + { + "epoch": 0.9118969606128063, + "grad_norm": 286.1277160644531, + "learning_rate": 2.8481501462197137e-07, + "loss": 12.422, + "step": 451420 + }, + { + "epoch": 0.9119171612454902, + "grad_norm": 622.9802856445312, + "learning_rate": 2.846988961366942e-07, + "loss": 22.359, + "step": 451430 + }, + { + "epoch": 0.911937361878174, + "grad_norm": 324.1952209472656, + "learning_rate": 2.8458280063322353e-07, + "loss": 23.3572, + "step": 451440 + }, + { + "epoch": 0.9119575625108578, + "grad_norm": 410.7866516113281, + "learning_rate": 2.844667281121244e-07, + "loss": 16.6071, + "step": 451450 + }, + { + "epoch": 0.9119777631435416, + "grad_norm": 475.54052734375, + "learning_rate": 2.843506785739614e-07, + "loss": 12.6368, + "step": 451460 + }, + { + "epoch": 0.9119979637762254, + "grad_norm": 35.28129959106445, + "learning_rate": 2.842346520193018e-07, + "loss": 18.0663, + "step": 451470 + }, + { + "epoch": 0.9120181644089093, + "grad_norm": 287.9037780761719, + "learning_rate": 2.8411864844871184e-07, + "loss": 14.8362, + "step": 451480 + }, + { + "epoch": 0.9120383650415931, + "grad_norm": 250.30825805664062, + "learning_rate": 2.8400266786275387e-07, + "loss": 34.2517, + "step": 451490 + }, + { + "epoch": 0.9120585656742769, + "grad_norm": 359.9864196777344, + "learning_rate": 2.838867102619952e-07, + "loss": 17.0671, + "step": 451500 + }, + { + "epoch": 0.9120787663069607, + "grad_norm": 276.7231750488281, + "learning_rate": 2.8377077564700094e-07, + "loss": 8.9851, + "step": 451510 + }, + { + "epoch": 0.9120989669396445, + "grad_norm": 338.2561340332031, + "learning_rate": 2.8365486401833677e-07, + "loss": 22.0601, + "step": 451520 + }, + { + "epoch": 0.9121191675723284, + "grad_norm": 143.89337158203125, + "learning_rate": 2.835389753765655e-07, + "loss": 13.1001, + "step": 451530 + }, + { + "epoch": 0.9121393682050122, + "grad_norm": 683.7655639648438, + "learning_rate": 2.834231097222534e-07, + "loss": 26.8933, + "step": 451540 + }, + { + "epoch": 0.912159568837696, + "grad_norm": 487.2683410644531, + "learning_rate": 2.833072670559661e-07, + "loss": 21.1917, + "step": 451550 + }, + { + "epoch": 0.9121797694703798, + "grad_norm": 561.84423828125, + "learning_rate": 2.83191447378266e-07, + "loss": 19.7071, + "step": 451560 + }, + { + "epoch": 0.9121999701030636, + "grad_norm": 33.04185104370117, + "learning_rate": 2.8307565068971867e-07, + "loss": 18.7884, + "step": 451570 + }, + { + "epoch": 0.9122201707357475, + "grad_norm": 215.38392639160156, + "learning_rate": 2.829598769908892e-07, + "loss": 24.2191, + "step": 451580 + }, + { + "epoch": 0.9122403713684313, + "grad_norm": 159.6072998046875, + "learning_rate": 2.8284412628234117e-07, + "loss": 9.3734, + "step": 451590 + }, + { + "epoch": 0.9122605720011151, + "grad_norm": 5.168828010559082, + "learning_rate": 2.8272839856463783e-07, + "loss": 12.7489, + "step": 451600 + }, + { + "epoch": 0.9122807726337989, + "grad_norm": 598.6390380859375, + "learning_rate": 2.8261269383834497e-07, + "loss": 20.4365, + "step": 451610 + }, + { + "epoch": 0.9123009732664827, + "grad_norm": 304.0203857421875, + "learning_rate": 2.8249701210402603e-07, + "loss": 17.0099, + "step": 451620 + }, + { + "epoch": 0.9123211738991666, + "grad_norm": 471.2829895019531, + "learning_rate": 2.823813533622438e-07, + "loss": 15.8112, + "step": 451630 + }, + { + "epoch": 0.9123413745318504, + "grad_norm": 779.8455810546875, + "learning_rate": 2.822657176135629e-07, + "loss": 27.6026, + "step": 451640 + }, + { + "epoch": 0.9123615751645342, + "grad_norm": 398.429443359375, + "learning_rate": 2.821501048585462e-07, + "loss": 52.8884, + "step": 451650 + }, + { + "epoch": 0.912381775797218, + "grad_norm": 173.2937774658203, + "learning_rate": 2.8203451509775825e-07, + "loss": 14.2675, + "step": 451660 + }, + { + "epoch": 0.9124019764299018, + "grad_norm": 0.9098075032234192, + "learning_rate": 2.819189483317625e-07, + "loss": 17.9382, + "step": 451670 + }, + { + "epoch": 0.9124221770625855, + "grad_norm": 557.1135864257812, + "learning_rate": 2.818034045611201e-07, + "loss": 19.7519, + "step": 451680 + }, + { + "epoch": 0.9124423776952694, + "grad_norm": 325.0290832519531, + "learning_rate": 2.816878837863968e-07, + "loss": 10.5954, + "step": 451690 + }, + { + "epoch": 0.9124625783279532, + "grad_norm": 497.5835876464844, + "learning_rate": 2.815723860081537e-07, + "loss": 16.6184, + "step": 451700 + }, + { + "epoch": 0.912482778960637, + "grad_norm": 197.95315551757812, + "learning_rate": 2.8145691122695496e-07, + "loss": 20.7503, + "step": 451710 + }, + { + "epoch": 0.9125029795933208, + "grad_norm": 424.4306945800781, + "learning_rate": 2.8134145944336225e-07, + "loss": 15.6808, + "step": 451720 + }, + { + "epoch": 0.9125231802260046, + "grad_norm": 193.1788330078125, + "learning_rate": 2.812260306579401e-07, + "loss": 20.6186, + "step": 451730 + }, + { + "epoch": 0.9125433808586885, + "grad_norm": 717.1135864257812, + "learning_rate": 2.811106248712497e-07, + "loss": 26.0767, + "step": 451740 + }, + { + "epoch": 0.9125635814913723, + "grad_norm": 459.9844665527344, + "learning_rate": 2.8099524208385297e-07, + "loss": 29.406, + "step": 451750 + }, + { + "epoch": 0.9125837821240561, + "grad_norm": 380.0570373535156, + "learning_rate": 2.8087988229631325e-07, + "loss": 11.3463, + "step": 451760 + }, + { + "epoch": 0.9126039827567399, + "grad_norm": 562.419189453125, + "learning_rate": 2.8076454550919397e-07, + "loss": 21.1921, + "step": 451770 + }, + { + "epoch": 0.9126241833894237, + "grad_norm": 874.0545043945312, + "learning_rate": 2.8064923172305467e-07, + "loss": 27.8008, + "step": 451780 + }, + { + "epoch": 0.9126443840221076, + "grad_norm": 218.60658264160156, + "learning_rate": 2.8053394093845833e-07, + "loss": 15.5657, + "step": 451790 + }, + { + "epoch": 0.9126645846547914, + "grad_norm": 72.1780014038086, + "learning_rate": 2.804186731559677e-07, + "loss": 11.2906, + "step": 451800 + }, + { + "epoch": 0.9126847852874752, + "grad_norm": 530.6395263671875, + "learning_rate": 2.8030342837614466e-07, + "loss": 17.5073, + "step": 451810 + }, + { + "epoch": 0.912704985920159, + "grad_norm": 183.9311981201172, + "learning_rate": 2.8018820659954927e-07, + "loss": 14.8489, + "step": 451820 + }, + { + "epoch": 0.9127251865528428, + "grad_norm": 110.54678344726562, + "learning_rate": 2.800730078267444e-07, + "loss": 14.3235, + "step": 451830 + }, + { + "epoch": 0.9127453871855267, + "grad_norm": 10.480724334716797, + "learning_rate": 2.7995783205829185e-07, + "loss": 14.0507, + "step": 451840 + }, + { + "epoch": 0.9127655878182105, + "grad_norm": 188.81947326660156, + "learning_rate": 2.798426792947517e-07, + "loss": 18.8643, + "step": 451850 + }, + { + "epoch": 0.9127857884508943, + "grad_norm": 501.7898254394531, + "learning_rate": 2.7972754953668524e-07, + "loss": 17.7551, + "step": 451860 + }, + { + "epoch": 0.9128059890835781, + "grad_norm": 352.3951110839844, + "learning_rate": 2.796124427846553e-07, + "loss": 11.06, + "step": 451870 + }, + { + "epoch": 0.912826189716262, + "grad_norm": 364.9849853515625, + "learning_rate": 2.7949735903922195e-07, + "loss": 16.1715, + "step": 451880 + }, + { + "epoch": 0.9128463903489458, + "grad_norm": 404.40289306640625, + "learning_rate": 2.7938229830094475e-07, + "loss": 13.3248, + "step": 451890 + }, + { + "epoch": 0.9128665909816296, + "grad_norm": 885.2887573242188, + "learning_rate": 2.792672605703867e-07, + "loss": 23.7981, + "step": 451900 + }, + { + "epoch": 0.9128867916143134, + "grad_norm": 442.0524597167969, + "learning_rate": 2.791522458481077e-07, + "loss": 21.1115, + "step": 451910 + }, + { + "epoch": 0.9129069922469972, + "grad_norm": 656.0565795898438, + "learning_rate": 2.79037254134667e-07, + "loss": 24.4948, + "step": 451920 + }, + { + "epoch": 0.9129271928796809, + "grad_norm": 265.2845458984375, + "learning_rate": 2.7892228543062725e-07, + "loss": 8.6274, + "step": 451930 + }, + { + "epoch": 0.9129473935123648, + "grad_norm": 973.04541015625, + "learning_rate": 2.788073397365465e-07, + "loss": 26.0115, + "step": 451940 + }, + { + "epoch": 0.9129675941450486, + "grad_norm": 154.58677673339844, + "learning_rate": 2.78692417052987e-07, + "loss": 17.974, + "step": 451950 + }, + { + "epoch": 0.9129877947777324, + "grad_norm": 412.13519287109375, + "learning_rate": 2.785775173805083e-07, + "loss": 26.1495, + "step": 451960 + }, + { + "epoch": 0.9130079954104162, + "grad_norm": 714.805908203125, + "learning_rate": 2.784626407196689e-07, + "loss": 18.7356, + "step": 451970 + }, + { + "epoch": 0.9130281960431, + "grad_norm": 215.3949432373047, + "learning_rate": 2.7834778707103104e-07, + "loss": 13.2698, + "step": 451980 + }, + { + "epoch": 0.9130483966757839, + "grad_norm": 10.791740417480469, + "learning_rate": 2.782329564351532e-07, + "loss": 31.0468, + "step": 451990 + }, + { + "epoch": 0.9130685973084677, + "grad_norm": 588.0308227539062, + "learning_rate": 2.7811814881259503e-07, + "loss": 32.0709, + "step": 452000 + }, + { + "epoch": 0.9130887979411515, + "grad_norm": 636.9493408203125, + "learning_rate": 2.7800336420391593e-07, + "loss": 17.1677, + "step": 452010 + }, + { + "epoch": 0.9131089985738353, + "grad_norm": 351.098876953125, + "learning_rate": 2.7788860260967665e-07, + "loss": 21.9418, + "step": 452020 + }, + { + "epoch": 0.9131291992065191, + "grad_norm": 490.53692626953125, + "learning_rate": 2.77773864030435e-07, + "loss": 17.6471, + "step": 452030 + }, + { + "epoch": 0.913149399839203, + "grad_norm": 453.8436279296875, + "learning_rate": 2.7765914846675067e-07, + "loss": 26.3111, + "step": 452040 + }, + { + "epoch": 0.9131696004718868, + "grad_norm": 264.65277099609375, + "learning_rate": 2.775444559191837e-07, + "loss": 16.9386, + "step": 452050 + }, + { + "epoch": 0.9131898011045706, + "grad_norm": 389.0132751464844, + "learning_rate": 2.774297863882919e-07, + "loss": 21.6635, + "step": 452060 + }, + { + "epoch": 0.9132100017372544, + "grad_norm": 171.96925354003906, + "learning_rate": 2.773151398746338e-07, + "loss": 8.1321, + "step": 452070 + }, + { + "epoch": 0.9132302023699382, + "grad_norm": 102.16289520263672, + "learning_rate": 2.772005163787689e-07, + "loss": 10.6967, + "step": 452080 + }, + { + "epoch": 0.9132504030026221, + "grad_norm": 133.51788330078125, + "learning_rate": 2.770859159012579e-07, + "loss": 12.4846, + "step": 452090 + }, + { + "epoch": 0.9132706036353059, + "grad_norm": 315.4050598144531, + "learning_rate": 2.7697133844265535e-07, + "loss": 19.5759, + "step": 452100 + }, + { + "epoch": 0.9132908042679897, + "grad_norm": 29.029733657836914, + "learning_rate": 2.768567840035219e-07, + "loss": 27.0451, + "step": 452110 + }, + { + "epoch": 0.9133110049006735, + "grad_norm": 409.8620300292969, + "learning_rate": 2.76742252584416e-07, + "loss": 10.4875, + "step": 452120 + }, + { + "epoch": 0.9133312055333573, + "grad_norm": 29.181663513183594, + "learning_rate": 2.7662774418589555e-07, + "loss": 10.0926, + "step": 452130 + }, + { + "epoch": 0.9133514061660412, + "grad_norm": 651.210693359375, + "learning_rate": 2.765132588085184e-07, + "loss": 23.9028, + "step": 452140 + }, + { + "epoch": 0.913371606798725, + "grad_norm": 248.01182556152344, + "learning_rate": 2.763987964528425e-07, + "loss": 14.2047, + "step": 452150 + }, + { + "epoch": 0.9133918074314088, + "grad_norm": 453.2279052734375, + "learning_rate": 2.7628435711942737e-07, + "loss": 18.6502, + "step": 452160 + }, + { + "epoch": 0.9134120080640926, + "grad_norm": 344.0987548828125, + "learning_rate": 2.7616994080882754e-07, + "loss": 24.7988, + "step": 452170 + }, + { + "epoch": 0.9134322086967764, + "grad_norm": 455.11712646484375, + "learning_rate": 2.7605554752160256e-07, + "loss": 21.2957, + "step": 452180 + }, + { + "epoch": 0.9134524093294601, + "grad_norm": 76.14151763916016, + "learning_rate": 2.7594117725831096e-07, + "loss": 7.8477, + "step": 452190 + }, + { + "epoch": 0.913472609962144, + "grad_norm": 342.2422180175781, + "learning_rate": 2.758268300195094e-07, + "loss": 12.5107, + "step": 452200 + }, + { + "epoch": 0.9134928105948278, + "grad_norm": 320.7900390625, + "learning_rate": 2.757125058057536e-07, + "loss": 10.5416, + "step": 452210 + }, + { + "epoch": 0.9135130112275116, + "grad_norm": 340.1474609375, + "learning_rate": 2.755982046176031e-07, + "loss": 19.4271, + "step": 452220 + }, + { + "epoch": 0.9135332118601954, + "grad_norm": 202.52476501464844, + "learning_rate": 2.754839264556136e-07, + "loss": 6.2309, + "step": 452230 + }, + { + "epoch": 0.9135534124928792, + "grad_norm": 294.6946105957031, + "learning_rate": 2.7536967132034186e-07, + "loss": 11.8811, + "step": 452240 + }, + { + "epoch": 0.9135736131255631, + "grad_norm": 639.5032958984375, + "learning_rate": 2.752554392123463e-07, + "loss": 23.6904, + "step": 452250 + }, + { + "epoch": 0.9135938137582469, + "grad_norm": 500.483642578125, + "learning_rate": 2.7514123013218153e-07, + "loss": 22.7042, + "step": 452260 + }, + { + "epoch": 0.9136140143909307, + "grad_norm": 311.4737548828125, + "learning_rate": 2.750270440804065e-07, + "loss": 11.9557, + "step": 452270 + }, + { + "epoch": 0.9136342150236145, + "grad_norm": 100.47586059570312, + "learning_rate": 2.749128810575763e-07, + "loss": 14.1911, + "step": 452280 + }, + { + "epoch": 0.9136544156562983, + "grad_norm": 356.00457763671875, + "learning_rate": 2.747987410642472e-07, + "loss": 23.2069, + "step": 452290 + }, + { + "epoch": 0.9136746162889822, + "grad_norm": 248.7677001953125, + "learning_rate": 2.746846241009765e-07, + "loss": 11.1875, + "step": 452300 + }, + { + "epoch": 0.913694816921666, + "grad_norm": 617.8887329101562, + "learning_rate": 2.745705301683188e-07, + "loss": 15.8752, + "step": 452310 + }, + { + "epoch": 0.9137150175543498, + "grad_norm": 313.4349060058594, + "learning_rate": 2.7445645926683253e-07, + "loss": 22.381, + "step": 452320 + }, + { + "epoch": 0.9137352181870336, + "grad_norm": 353.8231201171875, + "learning_rate": 2.7434241139707106e-07, + "loss": 16.0729, + "step": 452330 + }, + { + "epoch": 0.9137554188197174, + "grad_norm": 184.85678100585938, + "learning_rate": 2.742283865595924e-07, + "loss": 13.1759, + "step": 452340 + }, + { + "epoch": 0.9137756194524013, + "grad_norm": 421.85113525390625, + "learning_rate": 2.7411438475495155e-07, + "loss": 16.8529, + "step": 452350 + }, + { + "epoch": 0.9137958200850851, + "grad_norm": 700.2169189453125, + "learning_rate": 2.740004059837031e-07, + "loss": 18.6863, + "step": 452360 + }, + { + "epoch": 0.9138160207177689, + "grad_norm": 408.6905517578125, + "learning_rate": 2.738864502464045e-07, + "loss": 19.4036, + "step": 452370 + }, + { + "epoch": 0.9138362213504527, + "grad_norm": 341.9888000488281, + "learning_rate": 2.737725175436101e-07, + "loss": 14.0515, + "step": 452380 + }, + { + "epoch": 0.9138564219831365, + "grad_norm": 401.954345703125, + "learning_rate": 2.7365860787587405e-07, + "loss": 9.6258, + "step": 452390 + }, + { + "epoch": 0.9138766226158204, + "grad_norm": 23.526723861694336, + "learning_rate": 2.735447212437531e-07, + "loss": 27.0535, + "step": 452400 + }, + { + "epoch": 0.9138968232485042, + "grad_norm": 1.1871169805526733, + "learning_rate": 2.734308576478023e-07, + "loss": 13.1915, + "step": 452410 + }, + { + "epoch": 0.913917023881188, + "grad_norm": 668.0006103515625, + "learning_rate": 2.733170170885768e-07, + "loss": 21.8141, + "step": 452420 + }, + { + "epoch": 0.9139372245138718, + "grad_norm": 413.64727783203125, + "learning_rate": 2.7320319956662957e-07, + "loss": 19.9112, + "step": 452430 + }, + { + "epoch": 0.9139574251465555, + "grad_norm": 692.726318359375, + "learning_rate": 2.730894050825178e-07, + "loss": 6.8907, + "step": 452440 + }, + { + "epoch": 0.9139776257792394, + "grad_norm": 22.914690017700195, + "learning_rate": 2.72975633636795e-07, + "loss": 11.7867, + "step": 452450 + }, + { + "epoch": 0.9139978264119232, + "grad_norm": 361.56939697265625, + "learning_rate": 2.728618852300147e-07, + "loss": 10.5111, + "step": 452460 + }, + { + "epoch": 0.914018027044607, + "grad_norm": 424.5031433105469, + "learning_rate": 2.727481598627324e-07, + "loss": 14.1596, + "step": 452470 + }, + { + "epoch": 0.9140382276772908, + "grad_norm": 325.74468994140625, + "learning_rate": 2.7263445753550275e-07, + "loss": 6.4521, + "step": 452480 + }, + { + "epoch": 0.9140584283099746, + "grad_norm": 468.8731384277344, + "learning_rate": 2.725207782488792e-07, + "loss": 18.0635, + "step": 452490 + }, + { + "epoch": 0.9140786289426585, + "grad_norm": 428.7283020019531, + "learning_rate": 2.724071220034158e-07, + "loss": 22.1494, + "step": 452500 + }, + { + "epoch": 0.9140988295753423, + "grad_norm": 658.4942626953125, + "learning_rate": 2.72293488799667e-07, + "loss": 23.3455, + "step": 452510 + }, + { + "epoch": 0.9141190302080261, + "grad_norm": 221.12045288085938, + "learning_rate": 2.7217987863818684e-07, + "loss": 28.4971, + "step": 452520 + }, + { + "epoch": 0.9141392308407099, + "grad_norm": 68.4049301147461, + "learning_rate": 2.7206629151952715e-07, + "loss": 17.7994, + "step": 452530 + }, + { + "epoch": 0.9141594314733937, + "grad_norm": 116.92928314208984, + "learning_rate": 2.7195272744424405e-07, + "loss": 15.2393, + "step": 452540 + }, + { + "epoch": 0.9141796321060776, + "grad_norm": 373.884521484375, + "learning_rate": 2.7183918641288943e-07, + "loss": 9.9782, + "step": 452550 + }, + { + "epoch": 0.9141998327387614, + "grad_norm": 490.6842041015625, + "learning_rate": 2.717256684260172e-07, + "loss": 22.5185, + "step": 452560 + }, + { + "epoch": 0.9142200333714452, + "grad_norm": 161.93197631835938, + "learning_rate": 2.716121734841814e-07, + "loss": 28.5874, + "step": 452570 + }, + { + "epoch": 0.914240234004129, + "grad_norm": 130.264404296875, + "learning_rate": 2.714987015879328e-07, + "loss": 20.4128, + "step": 452580 + }, + { + "epoch": 0.9142604346368128, + "grad_norm": 932.6390991210938, + "learning_rate": 2.7138525273782746e-07, + "loss": 23.6868, + "step": 452590 + }, + { + "epoch": 0.9142806352694967, + "grad_norm": 156.4149627685547, + "learning_rate": 2.712718269344161e-07, + "loss": 14.7229, + "step": 452600 + }, + { + "epoch": 0.9143008359021805, + "grad_norm": 90.23873138427734, + "learning_rate": 2.711584241782528e-07, + "loss": 13.9489, + "step": 452610 + }, + { + "epoch": 0.9143210365348643, + "grad_norm": 210.1337127685547, + "learning_rate": 2.7104504446988867e-07, + "loss": 20.9073, + "step": 452620 + }, + { + "epoch": 0.9143412371675481, + "grad_norm": 329.8204040527344, + "learning_rate": 2.709316878098789e-07, + "loss": 14.034, + "step": 452630 + }, + { + "epoch": 0.9143614378002319, + "grad_norm": 465.3182067871094, + "learning_rate": 2.708183541987741e-07, + "loss": 19.5663, + "step": 452640 + }, + { + "epoch": 0.9143816384329158, + "grad_norm": 193.5091094970703, + "learning_rate": 2.707050436371267e-07, + "loss": 16.4573, + "step": 452650 + }, + { + "epoch": 0.9144018390655996, + "grad_norm": 298.7574462890625, + "learning_rate": 2.7059175612548947e-07, + "loss": 19.0406, + "step": 452660 + }, + { + "epoch": 0.9144220396982834, + "grad_norm": 359.96295166015625, + "learning_rate": 2.7047849166441487e-07, + "loss": 31.8732, + "step": 452670 + }, + { + "epoch": 0.9144422403309672, + "grad_norm": 387.7774963378906, + "learning_rate": 2.703652502544535e-07, + "loss": 23.7531, + "step": 452680 + }, + { + "epoch": 0.914462440963651, + "grad_norm": 192.9261932373047, + "learning_rate": 2.702520318961588e-07, + "loss": 18.7476, + "step": 452690 + }, + { + "epoch": 0.9144826415963347, + "grad_norm": 229.85899353027344, + "learning_rate": 2.701388365900831e-07, + "loss": 17.4647, + "step": 452700 + }, + { + "epoch": 0.9145028422290186, + "grad_norm": 377.1651306152344, + "learning_rate": 2.7002566433677547e-07, + "loss": 16.6451, + "step": 452710 + }, + { + "epoch": 0.9145230428617024, + "grad_norm": 153.7214813232422, + "learning_rate": 2.699125151367893e-07, + "loss": 17.1172, + "step": 452720 + }, + { + "epoch": 0.9145432434943862, + "grad_norm": 193.17724609375, + "learning_rate": 2.697993889906764e-07, + "loss": 14.337, + "step": 452730 + }, + { + "epoch": 0.91456344412707, + "grad_norm": 263.640869140625, + "learning_rate": 2.6968628589898735e-07, + "loss": 8.4065, + "step": 452740 + }, + { + "epoch": 0.9145836447597538, + "grad_norm": 96.6507568359375, + "learning_rate": 2.6957320586227354e-07, + "loss": 25.2692, + "step": 452750 + }, + { + "epoch": 0.9146038453924377, + "grad_norm": 7.664963245391846, + "learning_rate": 2.694601488810855e-07, + "loss": 19.9627, + "step": 452760 + }, + { + "epoch": 0.9146240460251215, + "grad_norm": 634.0137329101562, + "learning_rate": 2.6934711495597676e-07, + "loss": 45.0058, + "step": 452770 + }, + { + "epoch": 0.9146442466578053, + "grad_norm": 621.3983764648438, + "learning_rate": 2.6923410408749516e-07, + "loss": 17.0445, + "step": 452780 + }, + { + "epoch": 0.9146644472904891, + "grad_norm": 284.60247802734375, + "learning_rate": 2.6912111627619255e-07, + "loss": 21.5207, + "step": 452790 + }, + { + "epoch": 0.9146846479231729, + "grad_norm": 216.87062072753906, + "learning_rate": 2.690081515226206e-07, + "loss": 23.3396, + "step": 452800 + }, + { + "epoch": 0.9147048485558568, + "grad_norm": 274.0249938964844, + "learning_rate": 2.6889520982732897e-07, + "loss": 11.5307, + "step": 452810 + }, + { + "epoch": 0.9147250491885406, + "grad_norm": 52.05552673339844, + "learning_rate": 2.6878229119086776e-07, + "loss": 8.4213, + "step": 452820 + }, + { + "epoch": 0.9147452498212244, + "grad_norm": 188.5215606689453, + "learning_rate": 2.6866939561378867e-07, + "loss": 17.838, + "step": 452830 + }, + { + "epoch": 0.9147654504539082, + "grad_norm": 419.7026062011719, + "learning_rate": 2.685565230966408e-07, + "loss": 17.6637, + "step": 452840 + }, + { + "epoch": 0.914785651086592, + "grad_norm": 525.0553588867188, + "learning_rate": 2.684436736399737e-07, + "loss": 14.4383, + "step": 452850 + }, + { + "epoch": 0.9148058517192759, + "grad_norm": 333.9505920410156, + "learning_rate": 2.6833084724433965e-07, + "loss": 25.6344, + "step": 452860 + }, + { + "epoch": 0.9148260523519597, + "grad_norm": 261.0413513183594, + "learning_rate": 2.6821804391028603e-07, + "loss": 17.0195, + "step": 452870 + }, + { + "epoch": 0.9148462529846435, + "grad_norm": 58.79597854614258, + "learning_rate": 2.681052636383641e-07, + "loss": 15.6878, + "step": 452880 + }, + { + "epoch": 0.9148664536173273, + "grad_norm": 906.9078979492188, + "learning_rate": 2.679925064291239e-07, + "loss": 24.5474, + "step": 452890 + }, + { + "epoch": 0.9148866542500111, + "grad_norm": 172.41342163085938, + "learning_rate": 2.6787977228311336e-07, + "loss": 12.6641, + "step": 452900 + }, + { + "epoch": 0.914906854882695, + "grad_norm": 290.7709655761719, + "learning_rate": 2.677670612008837e-07, + "loss": 28.5541, + "step": 452910 + }, + { + "epoch": 0.9149270555153788, + "grad_norm": 383.07562255859375, + "learning_rate": 2.676543731829823e-07, + "loss": 22.4324, + "step": 452920 + }, + { + "epoch": 0.9149472561480626, + "grad_norm": 501.7777404785156, + "learning_rate": 2.6754170822996026e-07, + "loss": 11.6085, + "step": 452930 + }, + { + "epoch": 0.9149674567807464, + "grad_norm": 1093.78369140625, + "learning_rate": 2.6742906634236564e-07, + "loss": 17.3962, + "step": 452940 + }, + { + "epoch": 0.9149876574134301, + "grad_norm": 355.9870300292969, + "learning_rate": 2.6731644752074846e-07, + "loss": 16.4126, + "step": 452950 + }, + { + "epoch": 0.915007858046114, + "grad_norm": 244.96832275390625, + "learning_rate": 2.6720385176565664e-07, + "loss": 10.274, + "step": 452960 + }, + { + "epoch": 0.9150280586787978, + "grad_norm": 823.0773315429688, + "learning_rate": 2.6709127907763864e-07, + "loss": 30.8585, + "step": 452970 + }, + { + "epoch": 0.9150482593114816, + "grad_norm": 54.019229888916016, + "learning_rate": 2.6697872945724455e-07, + "loss": 14.8269, + "step": 452980 + }, + { + "epoch": 0.9150684599441654, + "grad_norm": 484.1335754394531, + "learning_rate": 2.668662029050217e-07, + "loss": 14.5493, + "step": 452990 + }, + { + "epoch": 0.9150886605768492, + "grad_norm": 500.1659851074219, + "learning_rate": 2.6675369942151864e-07, + "loss": 22.1963, + "step": 453000 + }, + { + "epoch": 0.915108861209533, + "grad_norm": 514.647216796875, + "learning_rate": 2.666412190072837e-07, + "loss": 30.4295, + "step": 453010 + }, + { + "epoch": 0.9151290618422169, + "grad_norm": 443.3310546875, + "learning_rate": 2.665287616628659e-07, + "loss": 9.98, + "step": 453020 + }, + { + "epoch": 0.9151492624749007, + "grad_norm": 290.30291748046875, + "learning_rate": 2.6641632738881315e-07, + "loss": 17.9727, + "step": 453030 + }, + { + "epoch": 0.9151694631075845, + "grad_norm": 235.2632293701172, + "learning_rate": 2.663039161856723e-07, + "loss": 13.8964, + "step": 453040 + }, + { + "epoch": 0.9151896637402683, + "grad_norm": 122.74141693115234, + "learning_rate": 2.6619152805399286e-07, + "loss": 21.4631, + "step": 453050 + }, + { + "epoch": 0.9152098643729522, + "grad_norm": 160.0275421142578, + "learning_rate": 2.660791629943216e-07, + "loss": 32.995, + "step": 453060 + }, + { + "epoch": 0.915230065005636, + "grad_norm": 74.91618347167969, + "learning_rate": 2.659668210072058e-07, + "loss": 10.9474, + "step": 453070 + }, + { + "epoch": 0.9152502656383198, + "grad_norm": 386.40838623046875, + "learning_rate": 2.658545020931935e-07, + "loss": 31.9575, + "step": 453080 + }, + { + "epoch": 0.9152704662710036, + "grad_norm": 303.7977600097656, + "learning_rate": 2.657422062528325e-07, + "loss": 18.1284, + "step": 453090 + }, + { + "epoch": 0.9152906669036874, + "grad_norm": 856.51220703125, + "learning_rate": 2.656299334866702e-07, + "loss": 22.5285, + "step": 453100 + }, + { + "epoch": 0.9153108675363713, + "grad_norm": 381.6043395996094, + "learning_rate": 2.655176837952528e-07, + "loss": 24.3001, + "step": 453110 + }, + { + "epoch": 0.9153310681690551, + "grad_norm": 246.74102783203125, + "learning_rate": 2.654054571791287e-07, + "loss": 11.1632, + "step": 453120 + }, + { + "epoch": 0.9153512688017389, + "grad_norm": 494.12615966796875, + "learning_rate": 2.6529325363884364e-07, + "loss": 17.3095, + "step": 453130 + }, + { + "epoch": 0.9153714694344227, + "grad_norm": 129.7801055908203, + "learning_rate": 2.651810731749449e-07, + "loss": 12.1936, + "step": 453140 + }, + { + "epoch": 0.9153916700671065, + "grad_norm": 249.99185180664062, + "learning_rate": 2.650689157879799e-07, + "loss": 8.0272, + "step": 453150 + }, + { + "epoch": 0.9154118706997904, + "grad_norm": 524.1873168945312, + "learning_rate": 2.649567814784937e-07, + "loss": 37.0262, + "step": 453160 + }, + { + "epoch": 0.9154320713324742, + "grad_norm": 212.4214630126953, + "learning_rate": 2.6484467024703476e-07, + "loss": 17.7013, + "step": 453170 + }, + { + "epoch": 0.915452271965158, + "grad_norm": 446.6779479980469, + "learning_rate": 2.647325820941488e-07, + "loss": 15.0334, + "step": 453180 + }, + { + "epoch": 0.9154724725978418, + "grad_norm": 331.09869384765625, + "learning_rate": 2.6462051702038085e-07, + "loss": 14.7586, + "step": 453190 + }, + { + "epoch": 0.9154926732305256, + "grad_norm": 382.7553405761719, + "learning_rate": 2.6450847502627883e-07, + "loss": 24.1565, + "step": 453200 + }, + { + "epoch": 0.9155128738632093, + "grad_norm": 365.6215515136719, + "learning_rate": 2.6439645611238795e-07, + "loss": 20.7367, + "step": 453210 + }, + { + "epoch": 0.9155330744958932, + "grad_norm": 598.3663940429688, + "learning_rate": 2.642844602792544e-07, + "loss": 17.08, + "step": 453220 + }, + { + "epoch": 0.915553275128577, + "grad_norm": 184.0043487548828, + "learning_rate": 2.6417248752742374e-07, + "loss": 17.1081, + "step": 453230 + }, + { + "epoch": 0.9155734757612608, + "grad_norm": 380.4300231933594, + "learning_rate": 2.640605378574429e-07, + "loss": 26.9159, + "step": 453240 + }, + { + "epoch": 0.9155936763939446, + "grad_norm": 263.845703125, + "learning_rate": 2.639486112698564e-07, + "loss": 20.9774, + "step": 453250 + }, + { + "epoch": 0.9156138770266284, + "grad_norm": 513.1378173828125, + "learning_rate": 2.6383670776520933e-07, + "loss": 25.778, + "step": 453260 + }, + { + "epoch": 0.9156340776593123, + "grad_norm": 421.67596435546875, + "learning_rate": 2.637248273440479e-07, + "loss": 20.1958, + "step": 453270 + }, + { + "epoch": 0.9156542782919961, + "grad_norm": 430.4900207519531, + "learning_rate": 2.6361297000691787e-07, + "loss": 11.1612, + "step": 453280 + }, + { + "epoch": 0.9156744789246799, + "grad_norm": 284.833984375, + "learning_rate": 2.6350113575436266e-07, + "loss": 8.8739, + "step": 453290 + }, + { + "epoch": 0.9156946795573637, + "grad_norm": 409.1342468261719, + "learning_rate": 2.6338932458692847e-07, + "loss": 12.7456, + "step": 453300 + }, + { + "epoch": 0.9157148801900475, + "grad_norm": 1214.3470458984375, + "learning_rate": 2.6327753650516205e-07, + "loss": 46.1228, + "step": 453310 + }, + { + "epoch": 0.9157350808227314, + "grad_norm": 333.0218505859375, + "learning_rate": 2.631657715096048e-07, + "loss": 11.3531, + "step": 453320 + }, + { + "epoch": 0.9157552814554152, + "grad_norm": 98.55928802490234, + "learning_rate": 2.630540296008027e-07, + "loss": 5.1327, + "step": 453330 + }, + { + "epoch": 0.915775482088099, + "grad_norm": 674.3197021484375, + "learning_rate": 2.629423107793022e-07, + "loss": 23.4372, + "step": 453340 + }, + { + "epoch": 0.9157956827207828, + "grad_norm": 350.0754089355469, + "learning_rate": 2.6283061504564553e-07, + "loss": 14.2001, + "step": 453350 + }, + { + "epoch": 0.9158158833534666, + "grad_norm": 382.4748840332031, + "learning_rate": 2.6271894240037785e-07, + "loss": 17.2192, + "step": 453360 + }, + { + "epoch": 0.9158360839861505, + "grad_norm": 423.0195617675781, + "learning_rate": 2.626072928440432e-07, + "loss": 16.7564, + "step": 453370 + }, + { + "epoch": 0.9158562846188343, + "grad_norm": 386.2308654785156, + "learning_rate": 2.6249566637718714e-07, + "loss": 21.5422, + "step": 453380 + }, + { + "epoch": 0.9158764852515181, + "grad_norm": 250.33633422851562, + "learning_rate": 2.623840630003516e-07, + "loss": 12.7983, + "step": 453390 + }, + { + "epoch": 0.9158966858842019, + "grad_norm": 0.0, + "learning_rate": 2.622724827140816e-07, + "loss": 26.3335, + "step": 453400 + }, + { + "epoch": 0.9159168865168857, + "grad_norm": 266.47052001953125, + "learning_rate": 2.6216092551892116e-07, + "loss": 17.5531, + "step": 453410 + }, + { + "epoch": 0.9159370871495696, + "grad_norm": 374.290771484375, + "learning_rate": 2.6204939141541376e-07, + "loss": 17.8065, + "step": 453420 + }, + { + "epoch": 0.9159572877822534, + "grad_norm": 368.16546630859375, + "learning_rate": 2.6193788040410286e-07, + "loss": 22.8737, + "step": 453430 + }, + { + "epoch": 0.9159774884149372, + "grad_norm": 354.1971435546875, + "learning_rate": 2.618263924855324e-07, + "loss": 18.6407, + "step": 453440 + }, + { + "epoch": 0.915997689047621, + "grad_norm": 318.41448974609375, + "learning_rate": 2.617149276602454e-07, + "loss": 26.7618, + "step": 453450 + }, + { + "epoch": 0.9160178896803048, + "grad_norm": 4.6045918464660645, + "learning_rate": 2.616034859287847e-07, + "loss": 19.446, + "step": 453460 + }, + { + "epoch": 0.9160380903129886, + "grad_norm": 251.6952362060547, + "learning_rate": 2.614920672916943e-07, + "loss": 13.2661, + "step": 453470 + }, + { + "epoch": 0.9160582909456724, + "grad_norm": 194.96925354003906, + "learning_rate": 2.61380671749516e-07, + "loss": 12.1911, + "step": 453480 + }, + { + "epoch": 0.9160784915783562, + "grad_norm": 76.82911682128906, + "learning_rate": 2.6126929930279486e-07, + "loss": 18.5787, + "step": 453490 + }, + { + "epoch": 0.91609869221104, + "grad_norm": 472.5380859375, + "learning_rate": 2.611579499520722e-07, + "loss": 24.2646, + "step": 453500 + }, + { + "epoch": 0.9161188928437238, + "grad_norm": 149.053466796875, + "learning_rate": 2.610466236978898e-07, + "loss": 21.6734, + "step": 453510 + }, + { + "epoch": 0.9161390934764077, + "grad_norm": 177.4880828857422, + "learning_rate": 2.6093532054079276e-07, + "loss": 17.4667, + "step": 453520 + }, + { + "epoch": 0.9161592941090915, + "grad_norm": 147.05726623535156, + "learning_rate": 2.6082404048132114e-07, + "loss": 12.7033, + "step": 453530 + }, + { + "epoch": 0.9161794947417753, + "grad_norm": 410.37933349609375, + "learning_rate": 2.6071278352001904e-07, + "loss": 18.072, + "step": 453540 + }, + { + "epoch": 0.9161996953744591, + "grad_norm": 114.84696197509766, + "learning_rate": 2.606015496574277e-07, + "loss": 20.3603, + "step": 453550 + }, + { + "epoch": 0.9162198960071429, + "grad_norm": 125.01507568359375, + "learning_rate": 2.604903388940899e-07, + "loss": 23.2023, + "step": 453560 + }, + { + "epoch": 0.9162400966398268, + "grad_norm": 164.6056365966797, + "learning_rate": 2.60379151230547e-07, + "loss": 13.9271, + "step": 453570 + }, + { + "epoch": 0.9162602972725106, + "grad_norm": 285.9284973144531, + "learning_rate": 2.602679866673413e-07, + "loss": 18.8505, + "step": 453580 + }, + { + "epoch": 0.9162804979051944, + "grad_norm": 394.2331848144531, + "learning_rate": 2.601568452050146e-07, + "loss": 27.4004, + "step": 453590 + }, + { + "epoch": 0.9163006985378782, + "grad_norm": 301.9587707519531, + "learning_rate": 2.600457268441092e-07, + "loss": 21.1456, + "step": 453600 + }, + { + "epoch": 0.916320899170562, + "grad_norm": 417.49981689453125, + "learning_rate": 2.599346315851653e-07, + "loss": 17.002, + "step": 453610 + }, + { + "epoch": 0.9163410998032459, + "grad_norm": 1888.8367919921875, + "learning_rate": 2.598235594287246e-07, + "loss": 53.7972, + "step": 453620 + }, + { + "epoch": 0.9163613004359297, + "grad_norm": 2.622469186782837, + "learning_rate": 2.597125103753301e-07, + "loss": 17.3838, + "step": 453630 + }, + { + "epoch": 0.9163815010686135, + "grad_norm": 417.6042785644531, + "learning_rate": 2.596014844255218e-07, + "loss": 17.2987, + "step": 453640 + }, + { + "epoch": 0.9164017017012973, + "grad_norm": 831.579833984375, + "learning_rate": 2.594904815798399e-07, + "loss": 15.5263, + "step": 453650 + }, + { + "epoch": 0.9164219023339811, + "grad_norm": 425.7539978027344, + "learning_rate": 2.5937950183882734e-07, + "loss": 21.3763, + "step": 453660 + }, + { + "epoch": 0.916442102966665, + "grad_norm": 59.693458557128906, + "learning_rate": 2.5926854520302414e-07, + "loss": 30.4629, + "step": 453670 + }, + { + "epoch": 0.9164623035993488, + "grad_norm": 582.9328002929688, + "learning_rate": 2.591576116729705e-07, + "loss": 27.7754, + "step": 453680 + }, + { + "epoch": 0.9164825042320326, + "grad_norm": 186.99293518066406, + "learning_rate": 2.590467012492076e-07, + "loss": 20.5637, + "step": 453690 + }, + { + "epoch": 0.9165027048647164, + "grad_norm": 334.56634521484375, + "learning_rate": 2.589358139322767e-07, + "loss": 21.4026, + "step": 453700 + }, + { + "epoch": 0.9165229054974002, + "grad_norm": 414.5623779296875, + "learning_rate": 2.5882494972271746e-07, + "loss": 10.8386, + "step": 453710 + }, + { + "epoch": 0.9165431061300839, + "grad_norm": 290.22833251953125, + "learning_rate": 2.587141086210698e-07, + "loss": 10.4992, + "step": 453720 + }, + { + "epoch": 0.9165633067627678, + "grad_norm": 399.252197265625, + "learning_rate": 2.586032906278757e-07, + "loss": 16.0113, + "step": 453730 + }, + { + "epoch": 0.9165835073954516, + "grad_norm": 763.1583862304688, + "learning_rate": 2.584924957436735e-07, + "loss": 19.304, + "step": 453740 + }, + { + "epoch": 0.9166037080281354, + "grad_norm": 534.4200439453125, + "learning_rate": 2.583817239690034e-07, + "loss": 13.5357, + "step": 453750 + }, + { + "epoch": 0.9166239086608192, + "grad_norm": 162.89605712890625, + "learning_rate": 2.5827097530440605e-07, + "loss": 19.0077, + "step": 453760 + }, + { + "epoch": 0.916644109293503, + "grad_norm": 399.8823547363281, + "learning_rate": 2.581602497504204e-07, + "loss": 13.0953, + "step": 453770 + }, + { + "epoch": 0.9166643099261869, + "grad_norm": 227.8401336669922, + "learning_rate": 2.580495473075878e-07, + "loss": 12.9742, + "step": 453780 + }, + { + "epoch": 0.9166845105588707, + "grad_norm": 545.9440307617188, + "learning_rate": 2.579388679764455e-07, + "loss": 20.8741, + "step": 453790 + }, + { + "epoch": 0.9167047111915545, + "grad_norm": 400.27392578125, + "learning_rate": 2.578282117575343e-07, + "loss": 20.9395, + "step": 453800 + }, + { + "epoch": 0.9167249118242383, + "grad_norm": 30.04183578491211, + "learning_rate": 2.577175786513936e-07, + "loss": 33.2292, + "step": 453810 + }, + { + "epoch": 0.9167451124569221, + "grad_norm": 386.2145080566406, + "learning_rate": 2.576069686585614e-07, + "loss": 11.1608, + "step": 453820 + }, + { + "epoch": 0.916765313089606, + "grad_norm": 608.4188232421875, + "learning_rate": 2.5749638177957834e-07, + "loss": 24.0531, + "step": 453830 + }, + { + "epoch": 0.9167855137222898, + "grad_norm": 363.9738464355469, + "learning_rate": 2.5738581801498187e-07, + "loss": 25.8142, + "step": 453840 + }, + { + "epoch": 0.9168057143549736, + "grad_norm": 498.20318603515625, + "learning_rate": 2.5727527736531256e-07, + "loss": 22.7052, + "step": 453850 + }, + { + "epoch": 0.9168259149876574, + "grad_norm": 287.67144775390625, + "learning_rate": 2.571647598311089e-07, + "loss": 28.2199, + "step": 453860 + }, + { + "epoch": 0.9168461156203412, + "grad_norm": 356.0575866699219, + "learning_rate": 2.5705426541290765e-07, + "loss": 28.7977, + "step": 453870 + }, + { + "epoch": 0.9168663162530251, + "grad_norm": 364.90789794921875, + "learning_rate": 2.56943794111249e-07, + "loss": 13.9711, + "step": 453880 + }, + { + "epoch": 0.9168865168857089, + "grad_norm": 352.82049560546875, + "learning_rate": 2.5683334592667195e-07, + "loss": 17.8761, + "step": 453890 + }, + { + "epoch": 0.9169067175183927, + "grad_norm": 500.10650634765625, + "learning_rate": 2.5672292085971276e-07, + "loss": 19.1851, + "step": 453900 + }, + { + "epoch": 0.9169269181510765, + "grad_norm": 606.4287719726562, + "learning_rate": 2.5661251891091087e-07, + "loss": 29.4943, + "step": 453910 + }, + { + "epoch": 0.9169471187837603, + "grad_norm": 287.93060302734375, + "learning_rate": 2.5650214008080544e-07, + "loss": 11.703, + "step": 453920 + }, + { + "epoch": 0.9169673194164442, + "grad_norm": 257.391357421875, + "learning_rate": 2.5639178436993205e-07, + "loss": 7.1054, + "step": 453930 + }, + { + "epoch": 0.916987520049128, + "grad_norm": 299.9127502441406, + "learning_rate": 2.5628145177882926e-07, + "loss": 14.3462, + "step": 453940 + }, + { + "epoch": 0.9170077206818118, + "grad_norm": 271.0744934082031, + "learning_rate": 2.561711423080365e-07, + "loss": 16.4024, + "step": 453950 + }, + { + "epoch": 0.9170279213144956, + "grad_norm": 438.03692626953125, + "learning_rate": 2.5606085595809015e-07, + "loss": 19.034, + "step": 453960 + }, + { + "epoch": 0.9170481219471794, + "grad_norm": 61.587188720703125, + "learning_rate": 2.559505927295275e-07, + "loss": 13.6311, + "step": 453970 + }, + { + "epoch": 0.9170683225798631, + "grad_norm": 294.6064758300781, + "learning_rate": 2.5584035262288585e-07, + "loss": 15.7928, + "step": 453980 + }, + { + "epoch": 0.917088523212547, + "grad_norm": 643.0828247070312, + "learning_rate": 2.557301356387043e-07, + "loss": 15.0719, + "step": 453990 + }, + { + "epoch": 0.9171087238452308, + "grad_norm": 880.5946044921875, + "learning_rate": 2.556199417775174e-07, + "loss": 20.1695, + "step": 454000 + }, + { + "epoch": 0.9171289244779146, + "grad_norm": 547.711669921875, + "learning_rate": 2.555097710398635e-07, + "loss": 16.7549, + "step": 454010 + }, + { + "epoch": 0.9171491251105984, + "grad_norm": 849.7205200195312, + "learning_rate": 2.553996234262801e-07, + "loss": 19.2328, + "step": 454020 + }, + { + "epoch": 0.9171693257432822, + "grad_norm": 251.0746307373047, + "learning_rate": 2.5528949893730393e-07, + "loss": 11.8855, + "step": 454030 + }, + { + "epoch": 0.9171895263759661, + "grad_norm": 719.6102905273438, + "learning_rate": 2.551793975734701e-07, + "loss": 24.0901, + "step": 454040 + }, + { + "epoch": 0.9172097270086499, + "grad_norm": 897.33056640625, + "learning_rate": 2.550693193353171e-07, + "loss": 19.3591, + "step": 454050 + }, + { + "epoch": 0.9172299276413337, + "grad_norm": 1040.27001953125, + "learning_rate": 2.5495926422338115e-07, + "loss": 31.5606, + "step": 454060 + }, + { + "epoch": 0.9172501282740175, + "grad_norm": 364.1861572265625, + "learning_rate": 2.548492322381968e-07, + "loss": 18.4093, + "step": 454070 + }, + { + "epoch": 0.9172703289067013, + "grad_norm": 278.04632568359375, + "learning_rate": 2.547392233803031e-07, + "loss": 10.8303, + "step": 454080 + }, + { + "epoch": 0.9172905295393852, + "grad_norm": 7105.796875, + "learning_rate": 2.5462923765023404e-07, + "loss": 99.3802, + "step": 454090 + }, + { + "epoch": 0.917310730172069, + "grad_norm": 169.97467041015625, + "learning_rate": 2.5451927504852757e-07, + "loss": 15.1717, + "step": 454100 + }, + { + "epoch": 0.9173309308047528, + "grad_norm": 299.2624206542969, + "learning_rate": 2.544093355757171e-07, + "loss": 13.147, + "step": 454110 + }, + { + "epoch": 0.9173511314374366, + "grad_norm": 228.32669067382812, + "learning_rate": 2.5429941923234114e-07, + "loss": 18.2561, + "step": 454120 + }, + { + "epoch": 0.9173713320701204, + "grad_norm": 1094.8414306640625, + "learning_rate": 2.541895260189342e-07, + "loss": 21.1897, + "step": 454130 + }, + { + "epoch": 0.9173915327028043, + "grad_norm": 386.129638671875, + "learning_rate": 2.5407965593603147e-07, + "loss": 9.8599, + "step": 454140 + }, + { + "epoch": 0.9174117333354881, + "grad_norm": 22.72983169555664, + "learning_rate": 2.539698089841691e-07, + "loss": 28.813, + "step": 454150 + }, + { + "epoch": 0.9174319339681719, + "grad_norm": 307.68890380859375, + "learning_rate": 2.538599851638818e-07, + "loss": 13.8743, + "step": 454160 + }, + { + "epoch": 0.9174521346008557, + "grad_norm": 477.8509216308594, + "learning_rate": 2.537501844757062e-07, + "loss": 11.512, + "step": 454170 + }, + { + "epoch": 0.9174723352335395, + "grad_norm": 852.4013061523438, + "learning_rate": 2.5364040692017644e-07, + "loss": 17.6096, + "step": 454180 + }, + { + "epoch": 0.9174925358662234, + "grad_norm": 0.0, + "learning_rate": 2.5353065249782647e-07, + "loss": 19.7076, + "step": 454190 + }, + { + "epoch": 0.9175127364989072, + "grad_norm": 74.62406921386719, + "learning_rate": 2.534209212091937e-07, + "loss": 19.8793, + "step": 454200 + }, + { + "epoch": 0.917532937131591, + "grad_norm": 248.11647033691406, + "learning_rate": 2.5331121305481154e-07, + "loss": 13.3752, + "step": 454210 + }, + { + "epoch": 0.9175531377642748, + "grad_norm": 529.7850952148438, + "learning_rate": 2.53201528035214e-07, + "loss": 17.367, + "step": 454220 + }, + { + "epoch": 0.9175733383969585, + "grad_norm": 640.7024536132812, + "learning_rate": 2.530918661509368e-07, + "loss": 16.6216, + "step": 454230 + }, + { + "epoch": 0.9175935390296424, + "grad_norm": 394.8306579589844, + "learning_rate": 2.529822274025151e-07, + "loss": 34.8331, + "step": 454240 + }, + { + "epoch": 0.9176137396623262, + "grad_norm": 270.83837890625, + "learning_rate": 2.5287261179048117e-07, + "loss": 14.5951, + "step": 454250 + }, + { + "epoch": 0.91763394029501, + "grad_norm": 38.40205001831055, + "learning_rate": 2.5276301931537015e-07, + "loss": 11.557, + "step": 454260 + }, + { + "epoch": 0.9176541409276938, + "grad_norm": 283.07025146484375, + "learning_rate": 2.5265344997771726e-07, + "loss": 13.8433, + "step": 454270 + }, + { + "epoch": 0.9176743415603776, + "grad_norm": 469.7500915527344, + "learning_rate": 2.525439037780558e-07, + "loss": 18.4683, + "step": 454280 + }, + { + "epoch": 0.9176945421930615, + "grad_norm": 574.3396606445312, + "learning_rate": 2.5243438071691826e-07, + "loss": 11.2677, + "step": 454290 + }, + { + "epoch": 0.9177147428257453, + "grad_norm": 244.5355682373047, + "learning_rate": 2.523248807948403e-07, + "loss": 19.2707, + "step": 454300 + }, + { + "epoch": 0.9177349434584291, + "grad_norm": 128.16741943359375, + "learning_rate": 2.522154040123559e-07, + "loss": 31.2086, + "step": 454310 + }, + { + "epoch": 0.9177551440911129, + "grad_norm": 91.43888854980469, + "learning_rate": 2.521059503699974e-07, + "loss": 50.0533, + "step": 454320 + }, + { + "epoch": 0.9177753447237967, + "grad_norm": 523.4545288085938, + "learning_rate": 2.5199651986829777e-07, + "loss": 9.7866, + "step": 454330 + }, + { + "epoch": 0.9177955453564806, + "grad_norm": 693.93310546875, + "learning_rate": 2.518871125077926e-07, + "loss": 27.2713, + "step": 454340 + }, + { + "epoch": 0.9178157459891644, + "grad_norm": 682.9830932617188, + "learning_rate": 2.5177772828901327e-07, + "loss": 17.2413, + "step": 454350 + }, + { + "epoch": 0.9178359466218482, + "grad_norm": 181.77784729003906, + "learning_rate": 2.5166836721249254e-07, + "loss": 23.461, + "step": 454360 + }, + { + "epoch": 0.917856147254532, + "grad_norm": 505.5752258300781, + "learning_rate": 2.515590292787656e-07, + "loss": 14.8428, + "step": 454370 + }, + { + "epoch": 0.9178763478872158, + "grad_norm": 173.8253173828125, + "learning_rate": 2.5144971448836263e-07, + "loss": 19.8574, + "step": 454380 + }, + { + "epoch": 0.9178965485198997, + "grad_norm": 465.9610595703125, + "learning_rate": 2.5134042284181927e-07, + "loss": 11.3948, + "step": 454390 + }, + { + "epoch": 0.9179167491525835, + "grad_norm": 168.74269104003906, + "learning_rate": 2.5123115433966615e-07, + "loss": 16.2418, + "step": 454400 + }, + { + "epoch": 0.9179369497852673, + "grad_norm": 270.9518737792969, + "learning_rate": 2.5112190898243627e-07, + "loss": 26.7706, + "step": 454410 + }, + { + "epoch": 0.9179571504179511, + "grad_norm": 157.22084045410156, + "learning_rate": 2.5101268677066247e-07, + "loss": 16.8187, + "step": 454420 + }, + { + "epoch": 0.9179773510506349, + "grad_norm": 1096.5396728515625, + "learning_rate": 2.5090348770487604e-07, + "loss": 17.5447, + "step": 454430 + }, + { + "epoch": 0.9179975516833188, + "grad_norm": 61.21339797973633, + "learning_rate": 2.50794311785611e-07, + "loss": 14.8221, + "step": 454440 + }, + { + "epoch": 0.9180177523160026, + "grad_norm": 505.9404602050781, + "learning_rate": 2.5068515901339794e-07, + "loss": 15.3988, + "step": 454450 + }, + { + "epoch": 0.9180379529486864, + "grad_norm": 299.4237365722656, + "learning_rate": 2.505760293887699e-07, + "loss": 10.9603, + "step": 454460 + }, + { + "epoch": 0.9180581535813702, + "grad_norm": 218.8555145263672, + "learning_rate": 2.5046692291225803e-07, + "loss": 5.6634, + "step": 454470 + }, + { + "epoch": 0.918078354214054, + "grad_norm": 427.2490539550781, + "learning_rate": 2.503578395843936e-07, + "loss": 15.9759, + "step": 454480 + }, + { + "epoch": 0.9180985548467377, + "grad_norm": 130.83230590820312, + "learning_rate": 2.502487794057101e-07, + "loss": 20.5652, + "step": 454490 + }, + { + "epoch": 0.9181187554794216, + "grad_norm": 522.4658203125, + "learning_rate": 2.5013974237673824e-07, + "loss": 21.8345, + "step": 454500 + }, + { + "epoch": 0.9181389561121054, + "grad_norm": 0.0, + "learning_rate": 2.50030728498008e-07, + "loss": 18.9641, + "step": 454510 + }, + { + "epoch": 0.9181591567447892, + "grad_norm": 367.7684020996094, + "learning_rate": 2.499217377700519e-07, + "loss": 19.4399, + "step": 454520 + }, + { + "epoch": 0.918179357377473, + "grad_norm": 331.11248779296875, + "learning_rate": 2.498127701934022e-07, + "loss": 13.3787, + "step": 454530 + }, + { + "epoch": 0.9181995580101568, + "grad_norm": 173.78582763671875, + "learning_rate": 2.49703825768588e-07, + "loss": 15.3256, + "step": 454540 + }, + { + "epoch": 0.9182197586428407, + "grad_norm": 425.071533203125, + "learning_rate": 2.49594904496141e-07, + "loss": 23.0658, + "step": 454550 + }, + { + "epoch": 0.9182399592755245, + "grad_norm": 405.1095275878906, + "learning_rate": 2.494860063765936e-07, + "loss": 31.5979, + "step": 454560 + }, + { + "epoch": 0.9182601599082083, + "grad_norm": 200.28211975097656, + "learning_rate": 2.493771314104743e-07, + "loss": 12.9257, + "step": 454570 + }, + { + "epoch": 0.9182803605408921, + "grad_norm": 636.622802734375, + "learning_rate": 2.492682795983148e-07, + "loss": 26.3459, + "step": 454580 + }, + { + "epoch": 0.918300561173576, + "grad_norm": 449.2371520996094, + "learning_rate": 2.4915945094064476e-07, + "loss": 21.1839, + "step": 454590 + }, + { + "epoch": 0.9183207618062598, + "grad_norm": 494.1687927246094, + "learning_rate": 2.4905064543799706e-07, + "loss": 34.0085, + "step": 454600 + }, + { + "epoch": 0.9183409624389436, + "grad_norm": 625.0794067382812, + "learning_rate": 2.4894186309089906e-07, + "loss": 28.0983, + "step": 454610 + }, + { + "epoch": 0.9183611630716274, + "grad_norm": 321.6091613769531, + "learning_rate": 2.48833103899882e-07, + "loss": 10.3564, + "step": 454620 + }, + { + "epoch": 0.9183813637043112, + "grad_norm": 101.4336166381836, + "learning_rate": 2.487243678654772e-07, + "loss": 24.6452, + "step": 454630 + }, + { + "epoch": 0.918401564336995, + "grad_norm": 319.6427001953125, + "learning_rate": 2.486156549882135e-07, + "loss": 24.801, + "step": 454640 + }, + { + "epoch": 0.9184217649696789, + "grad_norm": 511.2293701171875, + "learning_rate": 2.485069652686195e-07, + "loss": 21.2323, + "step": 454650 + }, + { + "epoch": 0.9184419656023627, + "grad_norm": 1020.9444580078125, + "learning_rate": 2.4839829870722756e-07, + "loss": 20.0905, + "step": 454660 + }, + { + "epoch": 0.9184621662350465, + "grad_norm": 81.75325012207031, + "learning_rate": 2.482896553045661e-07, + "loss": 14.3172, + "step": 454670 + }, + { + "epoch": 0.9184823668677303, + "grad_norm": 431.46844482421875, + "learning_rate": 2.4818103506116355e-07, + "loss": 19.0111, + "step": 454680 + }, + { + "epoch": 0.9185025675004141, + "grad_norm": 302.5388488769531, + "learning_rate": 2.4807243797755064e-07, + "loss": 17.3512, + "step": 454690 + }, + { + "epoch": 0.918522768133098, + "grad_norm": 289.9342041015625, + "learning_rate": 2.479638640542564e-07, + "loss": 21.5833, + "step": 454700 + }, + { + "epoch": 0.9185429687657818, + "grad_norm": 292.45037841796875, + "learning_rate": 2.478553132918099e-07, + "loss": 15.3687, + "step": 454710 + }, + { + "epoch": 0.9185631693984656, + "grad_norm": 312.3753356933594, + "learning_rate": 2.477467856907401e-07, + "loss": 21.3034, + "step": 454720 + }, + { + "epoch": 0.9185833700311494, + "grad_norm": 407.63671875, + "learning_rate": 2.4763828125157654e-07, + "loss": 21.7776, + "step": 454730 + }, + { + "epoch": 0.9186035706638332, + "grad_norm": 203.3709716796875, + "learning_rate": 2.4752979997484774e-07, + "loss": 27.606, + "step": 454740 + }, + { + "epoch": 0.918623771296517, + "grad_norm": 219.9977264404297, + "learning_rate": 2.474213418610816e-07, + "loss": 10.9427, + "step": 454750 + }, + { + "epoch": 0.9186439719292008, + "grad_norm": 118.15936279296875, + "learning_rate": 2.4731290691080766e-07, + "loss": 14.6467, + "step": 454760 + }, + { + "epoch": 0.9186641725618846, + "grad_norm": 804.6563110351562, + "learning_rate": 2.472044951245539e-07, + "loss": 14.8232, + "step": 454770 + }, + { + "epoch": 0.9186843731945684, + "grad_norm": 118.620849609375, + "learning_rate": 2.470961065028499e-07, + "loss": 8.4102, + "step": 454780 + }, + { + "epoch": 0.9187045738272522, + "grad_norm": 88.24150085449219, + "learning_rate": 2.4698774104622235e-07, + "loss": 16.5455, + "step": 454790 + }, + { + "epoch": 0.918724774459936, + "grad_norm": 339.7093200683594, + "learning_rate": 2.4687939875519984e-07, + "loss": 12.3436, + "step": 454800 + }, + { + "epoch": 0.9187449750926199, + "grad_norm": 1048.246826171875, + "learning_rate": 2.4677107963031134e-07, + "loss": 22.8728, + "step": 454810 + }, + { + "epoch": 0.9187651757253037, + "grad_norm": 0.4774147868156433, + "learning_rate": 2.4666278367208417e-07, + "loss": 9.4854, + "step": 454820 + }, + { + "epoch": 0.9187853763579875, + "grad_norm": 353.9394226074219, + "learning_rate": 2.465545108810452e-07, + "loss": 12.8216, + "step": 454830 + }, + { + "epoch": 0.9188055769906713, + "grad_norm": 17.323734283447266, + "learning_rate": 2.464462612577234e-07, + "loss": 20.1536, + "step": 454840 + }, + { + "epoch": 0.9188257776233552, + "grad_norm": 464.7160339355469, + "learning_rate": 2.463380348026467e-07, + "loss": 19.4973, + "step": 454850 + }, + { + "epoch": 0.918845978256039, + "grad_norm": 355.2198486328125, + "learning_rate": 2.4622983151634083e-07, + "loss": 22.1437, + "step": 454860 + }, + { + "epoch": 0.9188661788887228, + "grad_norm": 98.9280776977539, + "learning_rate": 2.461216513993342e-07, + "loss": 7.2628, + "step": 454870 + }, + { + "epoch": 0.9188863795214066, + "grad_norm": 291.2118225097656, + "learning_rate": 2.460134944521547e-07, + "loss": 15.7866, + "step": 454880 + }, + { + "epoch": 0.9189065801540904, + "grad_norm": 68.61520385742188, + "learning_rate": 2.459053606753292e-07, + "loss": 27.5811, + "step": 454890 + }, + { + "epoch": 0.9189267807867743, + "grad_norm": 555.388671875, + "learning_rate": 2.457972500693834e-07, + "loss": 27.5479, + "step": 454900 + }, + { + "epoch": 0.9189469814194581, + "grad_norm": 220.7833251953125, + "learning_rate": 2.456891626348451e-07, + "loss": 11.7957, + "step": 454910 + }, + { + "epoch": 0.9189671820521419, + "grad_norm": 337.0918884277344, + "learning_rate": 2.455810983722429e-07, + "loss": 21.3151, + "step": 454920 + }, + { + "epoch": 0.9189873826848257, + "grad_norm": 540.9188842773438, + "learning_rate": 2.4547305728210015e-07, + "loss": 19.5621, + "step": 454930 + }, + { + "epoch": 0.9190075833175095, + "grad_norm": 21.81239891052246, + "learning_rate": 2.453650393649448e-07, + "loss": 19.4124, + "step": 454940 + }, + { + "epoch": 0.9190277839501934, + "grad_norm": 145.65936279296875, + "learning_rate": 2.4525704462130485e-07, + "loss": 10.6751, + "step": 454950 + }, + { + "epoch": 0.9190479845828772, + "grad_norm": 368.0899658203125, + "learning_rate": 2.4514907305170476e-07, + "loss": 13.6209, + "step": 454960 + }, + { + "epoch": 0.919068185215561, + "grad_norm": 228.02255249023438, + "learning_rate": 2.4504112465667085e-07, + "loss": 15.9716, + "step": 454970 + }, + { + "epoch": 0.9190883858482448, + "grad_norm": 207.96987915039062, + "learning_rate": 2.44933199436731e-07, + "loss": 9.5194, + "step": 454980 + }, + { + "epoch": 0.9191085864809286, + "grad_norm": 123.552490234375, + "learning_rate": 2.448252973924087e-07, + "loss": 18.464, + "step": 454990 + }, + { + "epoch": 0.9191287871136123, + "grad_norm": 89.00115203857422, + "learning_rate": 2.447174185242324e-07, + "loss": 16.1351, + "step": 455000 + }, + { + "epoch": 0.9191489877462962, + "grad_norm": 24.298992156982422, + "learning_rate": 2.446095628327261e-07, + "loss": 20.8026, + "step": 455010 + }, + { + "epoch": 0.91916918837898, + "grad_norm": 263.0375671386719, + "learning_rate": 2.4450173031841607e-07, + "loss": 18.0441, + "step": 455020 + }, + { + "epoch": 0.9191893890116638, + "grad_norm": 294.327392578125, + "learning_rate": 2.4439392098182804e-07, + "loss": 19.281, + "step": 455030 + }, + { + "epoch": 0.9192095896443476, + "grad_norm": 4.164112091064453, + "learning_rate": 2.442861348234865e-07, + "loss": 20.7177, + "step": 455040 + }, + { + "epoch": 0.9192297902770314, + "grad_norm": 289.1728210449219, + "learning_rate": 2.4417837184391833e-07, + "loss": 18.8782, + "step": 455050 + }, + { + "epoch": 0.9192499909097153, + "grad_norm": 171.1140899658203, + "learning_rate": 2.4407063204364703e-07, + "loss": 16.9614, + "step": 455060 + }, + { + "epoch": 0.9192701915423991, + "grad_norm": 142.26199340820312, + "learning_rate": 2.4396291542319985e-07, + "loss": 9.5314, + "step": 455070 + }, + { + "epoch": 0.9192903921750829, + "grad_norm": 285.46533203125, + "learning_rate": 2.438552219831003e-07, + "loss": 19.4052, + "step": 455080 + }, + { + "epoch": 0.9193105928077667, + "grad_norm": 98.38275909423828, + "learning_rate": 2.43747551723873e-07, + "loss": 13.5849, + "step": 455090 + }, + { + "epoch": 0.9193307934404505, + "grad_norm": 71.43067169189453, + "learning_rate": 2.4363990464604357e-07, + "loss": 13.79, + "step": 455100 + }, + { + "epoch": 0.9193509940731344, + "grad_norm": 370.7507019042969, + "learning_rate": 2.435322807501367e-07, + "loss": 11.0951, + "step": 455110 + }, + { + "epoch": 0.9193711947058182, + "grad_norm": 230.6715850830078, + "learning_rate": 2.4342468003667576e-07, + "loss": 13.7054, + "step": 455120 + }, + { + "epoch": 0.919391395338502, + "grad_norm": 126.7361068725586, + "learning_rate": 2.4331710250618647e-07, + "loss": 13.2743, + "step": 455130 + }, + { + "epoch": 0.9194115959711858, + "grad_norm": 314.3231201171875, + "learning_rate": 2.432095481591934e-07, + "loss": 15.3865, + "step": 455140 + }, + { + "epoch": 0.9194317966038696, + "grad_norm": 329.8556213378906, + "learning_rate": 2.4310201699621896e-07, + "loss": 6.9519, + "step": 455150 + }, + { + "epoch": 0.9194519972365535, + "grad_norm": 1005.0066528320312, + "learning_rate": 2.429945090177888e-07, + "loss": 16.0441, + "step": 455160 + }, + { + "epoch": 0.9194721978692373, + "grad_norm": 642.8953247070312, + "learning_rate": 2.4288702422442633e-07, + "loss": 21.0964, + "step": 455170 + }, + { + "epoch": 0.9194923985019211, + "grad_norm": 365.2342224121094, + "learning_rate": 2.4277956261665624e-07, + "loss": 23.2442, + "step": 455180 + }, + { + "epoch": 0.9195125991346049, + "grad_norm": 618.9346923828125, + "learning_rate": 2.426721241950003e-07, + "loss": 20.1788, + "step": 455190 + }, + { + "epoch": 0.9195327997672887, + "grad_norm": 18.485822677612305, + "learning_rate": 2.4256470895998363e-07, + "loss": 10.9366, + "step": 455200 + }, + { + "epoch": 0.9195530003999726, + "grad_norm": 779.13916015625, + "learning_rate": 2.4245731691213137e-07, + "loss": 21.7145, + "step": 455210 + }, + { + "epoch": 0.9195732010326564, + "grad_norm": 243.8010711669922, + "learning_rate": 2.423499480519631e-07, + "loss": 12.4242, + "step": 455220 + }, + { + "epoch": 0.9195934016653402, + "grad_norm": 560.136474609375, + "learning_rate": 2.4224260238000454e-07, + "loss": 17.1233, + "step": 455230 + }, + { + "epoch": 0.919613602298024, + "grad_norm": 554.8250122070312, + "learning_rate": 2.421352798967791e-07, + "loss": 21.4487, + "step": 455240 + }, + { + "epoch": 0.9196338029307078, + "grad_norm": 26.497678756713867, + "learning_rate": 2.420279806028092e-07, + "loss": 16.912, + "step": 455250 + }, + { + "epoch": 0.9196540035633916, + "grad_norm": 335.10186767578125, + "learning_rate": 2.4192070449861717e-07, + "loss": 17.4943, + "step": 455260 + }, + { + "epoch": 0.9196742041960754, + "grad_norm": 429.57232666015625, + "learning_rate": 2.418134515847276e-07, + "loss": 16.5135, + "step": 455270 + }, + { + "epoch": 0.9196944048287592, + "grad_norm": 488.39642333984375, + "learning_rate": 2.417062218616617e-07, + "loss": 18.5194, + "step": 455280 + }, + { + "epoch": 0.919714605461443, + "grad_norm": 113.37300872802734, + "learning_rate": 2.415990153299419e-07, + "loss": 9.4051, + "step": 455290 + }, + { + "epoch": 0.9197348060941268, + "grad_norm": 277.68145751953125, + "learning_rate": 2.414918319900922e-07, + "loss": 15.3822, + "step": 455300 + }, + { + "epoch": 0.9197550067268107, + "grad_norm": 112.81552124023438, + "learning_rate": 2.413846718426338e-07, + "loss": 8.4589, + "step": 455310 + }, + { + "epoch": 0.9197752073594945, + "grad_norm": 36.12300491333008, + "learning_rate": 2.412775348880897e-07, + "loss": 11.6502, + "step": 455320 + }, + { + "epoch": 0.9197954079921783, + "grad_norm": 292.3298034667969, + "learning_rate": 2.4117042112698107e-07, + "loss": 22.0428, + "step": 455330 + }, + { + "epoch": 0.9198156086248621, + "grad_norm": 442.1577453613281, + "learning_rate": 2.410633305598309e-07, + "loss": 10.4833, + "step": 455340 + }, + { + "epoch": 0.9198358092575459, + "grad_norm": 704.8218383789062, + "learning_rate": 2.4095626318716146e-07, + "loss": 17.864, + "step": 455350 + }, + { + "epoch": 0.9198560098902298, + "grad_norm": 6.83566951751709, + "learning_rate": 2.40849219009493e-07, + "loss": 16.1764, + "step": 455360 + }, + { + "epoch": 0.9198762105229136, + "grad_norm": 534.9774780273438, + "learning_rate": 2.407421980273489e-07, + "loss": 15.982, + "step": 455370 + }, + { + "epoch": 0.9198964111555974, + "grad_norm": 246.0690460205078, + "learning_rate": 2.406352002412499e-07, + "loss": 12.9856, + "step": 455380 + }, + { + "epoch": 0.9199166117882812, + "grad_norm": 541.630126953125, + "learning_rate": 2.4052822565171775e-07, + "loss": 35.701, + "step": 455390 + }, + { + "epoch": 0.919936812420965, + "grad_norm": 363.7542419433594, + "learning_rate": 2.404212742592743e-07, + "loss": 11.3771, + "step": 455400 + }, + { + "epoch": 0.9199570130536489, + "grad_norm": 228.14138793945312, + "learning_rate": 2.4031434606443914e-07, + "loss": 18.2189, + "step": 455410 + }, + { + "epoch": 0.9199772136863327, + "grad_norm": 1217.557861328125, + "learning_rate": 2.4020744106773573e-07, + "loss": 16.9061, + "step": 455420 + }, + { + "epoch": 0.9199974143190165, + "grad_norm": 165.05870056152344, + "learning_rate": 2.401005592696837e-07, + "loss": 5.6709, + "step": 455430 + }, + { + "epoch": 0.9200176149517003, + "grad_norm": 351.95660400390625, + "learning_rate": 2.399937006708036e-07, + "loss": 15.8542, + "step": 455440 + }, + { + "epoch": 0.9200378155843841, + "grad_norm": 201.84127807617188, + "learning_rate": 2.3988686527161686e-07, + "loss": 10.515, + "step": 455450 + }, + { + "epoch": 0.920058016217068, + "grad_norm": 519.5714111328125, + "learning_rate": 2.3978005307264517e-07, + "loss": 15.5738, + "step": 455460 + }, + { + "epoch": 0.9200782168497518, + "grad_norm": 326.5455627441406, + "learning_rate": 2.396732640744076e-07, + "loss": 16.2957, + "step": 455470 + }, + { + "epoch": 0.9200984174824356, + "grad_norm": 346.3192443847656, + "learning_rate": 2.395664982774243e-07, + "loss": 10.8901, + "step": 455480 + }, + { + "epoch": 0.9201186181151194, + "grad_norm": 306.8151550292969, + "learning_rate": 2.3945975568221814e-07, + "loss": 12.2765, + "step": 455490 + }, + { + "epoch": 0.9201388187478032, + "grad_norm": 392.6286315917969, + "learning_rate": 2.3935303628930705e-07, + "loss": 19.6382, + "step": 455500 + }, + { + "epoch": 0.9201590193804869, + "grad_norm": 130.42257690429688, + "learning_rate": 2.392463400992112e-07, + "loss": 26.4354, + "step": 455510 + }, + { + "epoch": 0.9201792200131708, + "grad_norm": 803.9397583007812, + "learning_rate": 2.3913966711245185e-07, + "loss": 24.5252, + "step": 455520 + }, + { + "epoch": 0.9201994206458546, + "grad_norm": 530.8009033203125, + "learning_rate": 2.390330173295491e-07, + "loss": 17.6739, + "step": 455530 + }, + { + "epoch": 0.9202196212785384, + "grad_norm": 526.763671875, + "learning_rate": 2.389263907510209e-07, + "loss": 21.4294, + "step": 455540 + }, + { + "epoch": 0.9202398219112222, + "grad_norm": 629.4849853515625, + "learning_rate": 2.388197873773879e-07, + "loss": 19.7043, + "step": 455550 + }, + { + "epoch": 0.920260022543906, + "grad_norm": 201.6699981689453, + "learning_rate": 2.387132072091708e-07, + "loss": 17.9511, + "step": 455560 + }, + { + "epoch": 0.9202802231765899, + "grad_norm": 579.7809448242188, + "learning_rate": 2.3860665024688757e-07, + "loss": 18.4522, + "step": 455570 + }, + { + "epoch": 0.9203004238092737, + "grad_norm": 169.97409057617188, + "learning_rate": 2.3850011649105774e-07, + "loss": 16.2272, + "step": 455580 + }, + { + "epoch": 0.9203206244419575, + "grad_norm": 380.0137023925781, + "learning_rate": 2.3839360594220094e-07, + "loss": 8.5757, + "step": 455590 + }, + { + "epoch": 0.9203408250746413, + "grad_norm": 528.6257934570312, + "learning_rate": 2.3828711860083676e-07, + "loss": 11.0365, + "step": 455600 + }, + { + "epoch": 0.9203610257073251, + "grad_norm": 328.374755859375, + "learning_rate": 2.3818065446748306e-07, + "loss": 15.4559, + "step": 455610 + }, + { + "epoch": 0.920381226340009, + "grad_norm": 558.6610717773438, + "learning_rate": 2.380742135426589e-07, + "loss": 28.1747, + "step": 455620 + }, + { + "epoch": 0.9204014269726928, + "grad_norm": 217.7898712158203, + "learning_rate": 2.3796779582688444e-07, + "loss": 28.7116, + "step": 455630 + }, + { + "epoch": 0.9204216276053766, + "grad_norm": 835.9548950195312, + "learning_rate": 2.3786140132067703e-07, + "loss": 14.7242, + "step": 455640 + }, + { + "epoch": 0.9204418282380604, + "grad_norm": 386.291748046875, + "learning_rate": 2.3775503002455514e-07, + "loss": 17.8009, + "step": 455650 + }, + { + "epoch": 0.9204620288707442, + "grad_norm": 310.6231384277344, + "learning_rate": 2.3764868193903835e-07, + "loss": 36.5693, + "step": 455660 + }, + { + "epoch": 0.9204822295034281, + "grad_norm": 138.1139373779297, + "learning_rate": 2.3754235706464457e-07, + "loss": 14.5568, + "step": 455670 + }, + { + "epoch": 0.9205024301361119, + "grad_norm": 622.2490844726562, + "learning_rate": 2.3743605540189063e-07, + "loss": 23.2112, + "step": 455680 + }, + { + "epoch": 0.9205226307687957, + "grad_norm": 221.71322631835938, + "learning_rate": 2.3732977695129612e-07, + "loss": 17.7805, + "step": 455690 + }, + { + "epoch": 0.9205428314014795, + "grad_norm": 127.730712890625, + "learning_rate": 2.3722352171337836e-07, + "loss": 22.0893, + "step": 455700 + }, + { + "epoch": 0.9205630320341633, + "grad_norm": 697.8600463867188, + "learning_rate": 2.3711728968865643e-07, + "loss": 21.791, + "step": 455710 + }, + { + "epoch": 0.9205832326668472, + "grad_norm": 485.99853515625, + "learning_rate": 2.3701108087764657e-07, + "loss": 9.2866, + "step": 455720 + }, + { + "epoch": 0.920603433299531, + "grad_norm": 415.9108581542969, + "learning_rate": 2.3690489528086668e-07, + "loss": 10.3244, + "step": 455730 + }, + { + "epoch": 0.9206236339322148, + "grad_norm": 396.4830322265625, + "learning_rate": 2.367987328988347e-07, + "loss": 12.8008, + "step": 455740 + }, + { + "epoch": 0.9206438345648986, + "grad_norm": 450.2491760253906, + "learning_rate": 2.366925937320691e-07, + "loss": 23.0683, + "step": 455750 + }, + { + "epoch": 0.9206640351975824, + "grad_norm": 267.2088623046875, + "learning_rate": 2.36586477781085e-07, + "loss": 25.6832, + "step": 455760 + }, + { + "epoch": 0.9206842358302662, + "grad_norm": 849.0910034179688, + "learning_rate": 2.3648038504640036e-07, + "loss": 15.9315, + "step": 455770 + }, + { + "epoch": 0.92070443646295, + "grad_norm": 1106.893798828125, + "learning_rate": 2.3637431552853363e-07, + "loss": 17.4516, + "step": 455780 + }, + { + "epoch": 0.9207246370956338, + "grad_norm": 865.7428588867188, + "learning_rate": 2.362682692280005e-07, + "loss": 26.9862, + "step": 455790 + }, + { + "epoch": 0.9207448377283176, + "grad_norm": 594.6658325195312, + "learning_rate": 2.361622461453178e-07, + "loss": 19.4022, + "step": 455800 + }, + { + "epoch": 0.9207650383610014, + "grad_norm": 248.6200714111328, + "learning_rate": 2.3605624628100178e-07, + "loss": 15.1621, + "step": 455810 + }, + { + "epoch": 0.9207852389936853, + "grad_norm": 509.28375244140625, + "learning_rate": 2.3595026963557145e-07, + "loss": 18.1621, + "step": 455820 + }, + { + "epoch": 0.9208054396263691, + "grad_norm": 231.5879669189453, + "learning_rate": 2.3584431620954085e-07, + "loss": 17.4094, + "step": 455830 + }, + { + "epoch": 0.9208256402590529, + "grad_norm": 151.7647705078125, + "learning_rate": 2.357383860034268e-07, + "loss": 16.7027, + "step": 455840 + }, + { + "epoch": 0.9208458408917367, + "grad_norm": 314.00775146484375, + "learning_rate": 2.3563247901774666e-07, + "loss": 11.1552, + "step": 455850 + }, + { + "epoch": 0.9208660415244205, + "grad_norm": 340.83489990234375, + "learning_rate": 2.3552659525301557e-07, + "loss": 13.9794, + "step": 455860 + }, + { + "epoch": 0.9208862421571044, + "grad_norm": 376.7004089355469, + "learning_rate": 2.354207347097498e-07, + "loss": 19.8883, + "step": 455870 + }, + { + "epoch": 0.9209064427897882, + "grad_norm": 568.572265625, + "learning_rate": 2.3531489738846613e-07, + "loss": 28.6371, + "step": 455880 + }, + { + "epoch": 0.920926643422472, + "grad_norm": 324.4543762207031, + "learning_rate": 2.3520908328968027e-07, + "loss": 21.9086, + "step": 455890 + }, + { + "epoch": 0.9209468440551558, + "grad_norm": 622.8432006835938, + "learning_rate": 2.351032924139063e-07, + "loss": 17.3053, + "step": 455900 + }, + { + "epoch": 0.9209670446878396, + "grad_norm": 20.084917068481445, + "learning_rate": 2.349975247616615e-07, + "loss": 22.7659, + "step": 455910 + }, + { + "epoch": 0.9209872453205235, + "grad_norm": 2.691534996032715, + "learning_rate": 2.3489178033345994e-07, + "loss": 14.0358, + "step": 455920 + }, + { + "epoch": 0.9210074459532073, + "grad_norm": 561.95361328125, + "learning_rate": 2.34786059129819e-07, + "loss": 15.8221, + "step": 455930 + }, + { + "epoch": 0.9210276465858911, + "grad_norm": 415.3448791503906, + "learning_rate": 2.3468036115125215e-07, + "loss": 15.3055, + "step": 455940 + }, + { + "epoch": 0.9210478472185749, + "grad_norm": 369.7100830078125, + "learning_rate": 2.3457468639827563e-07, + "loss": 22.8397, + "step": 455950 + }, + { + "epoch": 0.9210680478512587, + "grad_norm": 155.63955688476562, + "learning_rate": 2.344690348714046e-07, + "loss": 14.0282, + "step": 455960 + }, + { + "epoch": 0.9210882484839426, + "grad_norm": 211.77499389648438, + "learning_rate": 2.3436340657115253e-07, + "loss": 25.2555, + "step": 455970 + }, + { + "epoch": 0.9211084491166264, + "grad_norm": 341.98150634765625, + "learning_rate": 2.3425780149803623e-07, + "loss": 8.1026, + "step": 455980 + }, + { + "epoch": 0.9211286497493102, + "grad_norm": 704.5994873046875, + "learning_rate": 2.3415221965256807e-07, + "loss": 26.6054, + "step": 455990 + }, + { + "epoch": 0.921148850381994, + "grad_norm": 0.6574216485023499, + "learning_rate": 2.3404666103526542e-07, + "loss": 18.6659, + "step": 456000 + }, + { + "epoch": 0.9211690510146778, + "grad_norm": 143.26748657226562, + "learning_rate": 2.3394112564664062e-07, + "loss": 15.0257, + "step": 456010 + }, + { + "epoch": 0.9211892516473615, + "grad_norm": 477.011474609375, + "learning_rate": 2.338356134872083e-07, + "loss": 18.866, + "step": 456020 + }, + { + "epoch": 0.9212094522800454, + "grad_norm": 669.2011108398438, + "learning_rate": 2.3373012455748356e-07, + "loss": 23.3207, + "step": 456030 + }, + { + "epoch": 0.9212296529127292, + "grad_norm": 387.2029724121094, + "learning_rate": 2.3362465885798046e-07, + "loss": 19.2357, + "step": 456040 + }, + { + "epoch": 0.921249853545413, + "grad_norm": 1630.5396728515625, + "learning_rate": 2.3351921638921193e-07, + "loss": 23.0036, + "step": 456050 + }, + { + "epoch": 0.9212700541780968, + "grad_norm": 55.501487731933594, + "learning_rate": 2.3341379715169254e-07, + "loss": 8.9039, + "step": 456060 + }, + { + "epoch": 0.9212902548107806, + "grad_norm": 258.55023193359375, + "learning_rate": 2.33308401145938e-07, + "loss": 8.9358, + "step": 456070 + }, + { + "epoch": 0.9213104554434645, + "grad_norm": 803.1054077148438, + "learning_rate": 2.3320302837245846e-07, + "loss": 20.5455, + "step": 456080 + }, + { + "epoch": 0.9213306560761483, + "grad_norm": 273.6131286621094, + "learning_rate": 2.3309767883176903e-07, + "loss": 28.0867, + "step": 456090 + }, + { + "epoch": 0.9213508567088321, + "grad_norm": 248.03652954101562, + "learning_rate": 2.3299235252438434e-07, + "loss": 30.396, + "step": 456100 + }, + { + "epoch": 0.9213710573415159, + "grad_norm": 518.1532592773438, + "learning_rate": 2.3288704945081675e-07, + "loss": 24.2605, + "step": 456110 + }, + { + "epoch": 0.9213912579741997, + "grad_norm": 412.0128479003906, + "learning_rate": 2.327817696115786e-07, + "loss": 23.6396, + "step": 456120 + }, + { + "epoch": 0.9214114586068836, + "grad_norm": 296.6305236816406, + "learning_rate": 2.3267651300718397e-07, + "loss": 11.4003, + "step": 456130 + }, + { + "epoch": 0.9214316592395674, + "grad_norm": 601.3193359375, + "learning_rate": 2.325712796381474e-07, + "loss": 34.8444, + "step": 456140 + }, + { + "epoch": 0.9214518598722512, + "grad_norm": 1319.228515625, + "learning_rate": 2.3246606950497851e-07, + "loss": 17.4915, + "step": 456150 + }, + { + "epoch": 0.921472060504935, + "grad_norm": 450.4251708984375, + "learning_rate": 2.3236088260819188e-07, + "loss": 19.1523, + "step": 456160 + }, + { + "epoch": 0.9214922611376188, + "grad_norm": 232.65257263183594, + "learning_rate": 2.3225571894830047e-07, + "loss": 32.4943, + "step": 456170 + }, + { + "epoch": 0.9215124617703027, + "grad_norm": 534.1532592773438, + "learning_rate": 2.3215057852581712e-07, + "loss": 22.4241, + "step": 456180 + }, + { + "epoch": 0.9215326624029865, + "grad_norm": 448.429443359375, + "learning_rate": 2.3204546134125207e-07, + "loss": 13.9421, + "step": 456190 + }, + { + "epoch": 0.9215528630356703, + "grad_norm": 756.3300170898438, + "learning_rate": 2.319403673951204e-07, + "loss": 30.2919, + "step": 456200 + }, + { + "epoch": 0.9215730636683541, + "grad_norm": 176.592041015625, + "learning_rate": 2.3183529668793282e-07, + "loss": 16.056, + "step": 456210 + }, + { + "epoch": 0.9215932643010379, + "grad_norm": 563.3243408203125, + "learning_rate": 2.3173024922020114e-07, + "loss": 27.7949, + "step": 456220 + }, + { + "epoch": 0.9216134649337218, + "grad_norm": 545.1439819335938, + "learning_rate": 2.3162522499243833e-07, + "loss": 18.0718, + "step": 456230 + }, + { + "epoch": 0.9216336655664056, + "grad_norm": 2.2020576000213623, + "learning_rate": 2.3152022400515561e-07, + "loss": 9.7643, + "step": 456240 + }, + { + "epoch": 0.9216538661990894, + "grad_norm": 372.8847351074219, + "learning_rate": 2.314152462588659e-07, + "loss": 14.4089, + "step": 456250 + }, + { + "epoch": 0.9216740668317732, + "grad_norm": 359.3384094238281, + "learning_rate": 2.3131029175407883e-07, + "loss": 16.9152, + "step": 456260 + }, + { + "epoch": 0.921694267464457, + "grad_norm": 105.58753204345703, + "learning_rate": 2.3120536049130727e-07, + "loss": 15.5293, + "step": 456270 + }, + { + "epoch": 0.9217144680971407, + "grad_norm": 756.6207885742188, + "learning_rate": 2.3110045247106305e-07, + "loss": 17.3203, + "step": 456280 + }, + { + "epoch": 0.9217346687298246, + "grad_norm": 200.3162384033203, + "learning_rate": 2.3099556769385578e-07, + "loss": 20.6643, + "step": 456290 + }, + { + "epoch": 0.9217548693625084, + "grad_norm": 505.3739318847656, + "learning_rate": 2.3089070616019838e-07, + "loss": 26.1185, + "step": 456300 + }, + { + "epoch": 0.9217750699951922, + "grad_norm": 161.13502502441406, + "learning_rate": 2.3078586787060098e-07, + "loss": 18.3574, + "step": 456310 + }, + { + "epoch": 0.921795270627876, + "grad_norm": 254.5853729248047, + "learning_rate": 2.306810528255754e-07, + "loss": 20.247, + "step": 456320 + }, + { + "epoch": 0.9218154712605598, + "grad_norm": 281.9202575683594, + "learning_rate": 2.3057626102563125e-07, + "loss": 18.4267, + "step": 456330 + }, + { + "epoch": 0.9218356718932437, + "grad_norm": 411.31146240234375, + "learning_rate": 2.3047149247127975e-07, + "loss": 19.3035, + "step": 456340 + }, + { + "epoch": 0.9218558725259275, + "grad_norm": 285.017822265625, + "learning_rate": 2.3036674716303277e-07, + "loss": 11.5934, + "step": 456350 + }, + { + "epoch": 0.9218760731586113, + "grad_norm": 234.5925750732422, + "learning_rate": 2.3026202510139928e-07, + "loss": 26.1673, + "step": 456360 + }, + { + "epoch": 0.9218962737912951, + "grad_norm": 180.01597595214844, + "learning_rate": 2.3015732628688948e-07, + "loss": 20.587, + "step": 456370 + }, + { + "epoch": 0.921916474423979, + "grad_norm": 229.5282745361328, + "learning_rate": 2.300526507200146e-07, + "loss": 20.5006, + "step": 456380 + }, + { + "epoch": 0.9219366750566628, + "grad_norm": 141.17848205566406, + "learning_rate": 2.2994799840128533e-07, + "loss": 10.3158, + "step": 456390 + }, + { + "epoch": 0.9219568756893466, + "grad_norm": 196.68496704101562, + "learning_rate": 2.2984336933121076e-07, + "loss": 19.1464, + "step": 456400 + }, + { + "epoch": 0.9219770763220304, + "grad_norm": 318.13580322265625, + "learning_rate": 2.2973876351030046e-07, + "loss": 20.5453, + "step": 456410 + }, + { + "epoch": 0.9219972769547142, + "grad_norm": 117.87385559082031, + "learning_rate": 2.2963418093906453e-07, + "loss": 13.3332, + "step": 456420 + }, + { + "epoch": 0.922017477587398, + "grad_norm": 398.493896484375, + "learning_rate": 2.2952962161801485e-07, + "loss": 20.5647, + "step": 456430 + }, + { + "epoch": 0.9220376782200819, + "grad_norm": 551.0594482421875, + "learning_rate": 2.2942508554765764e-07, + "loss": 27.3807, + "step": 456440 + }, + { + "epoch": 0.9220578788527657, + "grad_norm": 418.9258117675781, + "learning_rate": 2.2932057272850416e-07, + "loss": 22.1432, + "step": 456450 + }, + { + "epoch": 0.9220780794854495, + "grad_norm": 318.01776123046875, + "learning_rate": 2.2921608316106402e-07, + "loss": 13.1443, + "step": 456460 + }, + { + "epoch": 0.9220982801181333, + "grad_norm": 500.9718322753906, + "learning_rate": 2.2911161684584626e-07, + "loss": 18.2248, + "step": 456470 + }, + { + "epoch": 0.9221184807508171, + "grad_norm": 678.5086059570312, + "learning_rate": 2.290071737833588e-07, + "loss": 12.8109, + "step": 456480 + }, + { + "epoch": 0.922138681383501, + "grad_norm": 999.7238159179688, + "learning_rate": 2.2890275397411288e-07, + "loss": 27.8825, + "step": 456490 + }, + { + "epoch": 0.9221588820161848, + "grad_norm": 319.5143127441406, + "learning_rate": 2.287983574186159e-07, + "loss": 17.5575, + "step": 456500 + }, + { + "epoch": 0.9221790826488686, + "grad_norm": 279.6608581542969, + "learning_rate": 2.2869398411737687e-07, + "loss": 8.8441, + "step": 456510 + }, + { + "epoch": 0.9221992832815524, + "grad_norm": 411.6119079589844, + "learning_rate": 2.2858963407090484e-07, + "loss": 16.573, + "step": 456520 + }, + { + "epoch": 0.9222194839142362, + "grad_norm": 264.779541015625, + "learning_rate": 2.2848530727970775e-07, + "loss": 13.1386, + "step": 456530 + }, + { + "epoch": 0.92223968454692, + "grad_norm": 456.6824035644531, + "learning_rate": 2.2838100374429518e-07, + "loss": 26.1654, + "step": 456540 + }, + { + "epoch": 0.9222598851796038, + "grad_norm": 541.8452758789062, + "learning_rate": 2.2827672346517448e-07, + "loss": 19.0031, + "step": 456550 + }, + { + "epoch": 0.9222800858122876, + "grad_norm": 209.32427978515625, + "learning_rate": 2.2817246644285472e-07, + "loss": 13.9763, + "step": 456560 + }, + { + "epoch": 0.9223002864449714, + "grad_norm": 547.7611083984375, + "learning_rate": 2.2806823267784327e-07, + "loss": 13.4791, + "step": 456570 + }, + { + "epoch": 0.9223204870776552, + "grad_norm": 513.3222045898438, + "learning_rate": 2.2796402217064806e-07, + "loss": 43.8032, + "step": 456580 + }, + { + "epoch": 0.9223406877103391, + "grad_norm": 145.3455810546875, + "learning_rate": 2.2785983492177867e-07, + "loss": 35.2751, + "step": 456590 + }, + { + "epoch": 0.9223608883430229, + "grad_norm": 489.8966064453125, + "learning_rate": 2.2775567093174022e-07, + "loss": 38.8586, + "step": 456600 + }, + { + "epoch": 0.9223810889757067, + "grad_norm": 584.9359130859375, + "learning_rate": 2.2765153020104292e-07, + "loss": 16.7876, + "step": 456610 + }, + { + "epoch": 0.9224012896083905, + "grad_norm": 157.879638671875, + "learning_rate": 2.27547412730193e-07, + "loss": 22.6318, + "step": 456620 + }, + { + "epoch": 0.9224214902410743, + "grad_norm": 298.29937744140625, + "learning_rate": 2.274433185196978e-07, + "loss": 26.4337, + "step": 456630 + }, + { + "epoch": 0.9224416908737582, + "grad_norm": 90.5885238647461, + "learning_rate": 2.2733924757006531e-07, + "loss": 16.6558, + "step": 456640 + }, + { + "epoch": 0.922461891506442, + "grad_norm": 346.80889892578125, + "learning_rate": 2.2723519988180232e-07, + "loss": 27.9793, + "step": 456650 + }, + { + "epoch": 0.9224820921391258, + "grad_norm": 355.554931640625, + "learning_rate": 2.2713117545541618e-07, + "loss": 8.9129, + "step": 456660 + }, + { + "epoch": 0.9225022927718096, + "grad_norm": 307.0997619628906, + "learning_rate": 2.270271742914132e-07, + "loss": 19.7816, + "step": 456670 + }, + { + "epoch": 0.9225224934044934, + "grad_norm": 178.32456970214844, + "learning_rate": 2.269231963903018e-07, + "loss": 13.3781, + "step": 456680 + }, + { + "epoch": 0.9225426940371773, + "grad_norm": 317.56158447265625, + "learning_rate": 2.2681924175258773e-07, + "loss": 14.2739, + "step": 456690 + }, + { + "epoch": 0.9225628946698611, + "grad_norm": 227.15536499023438, + "learning_rate": 2.2671531037877724e-07, + "loss": 7.1665, + "step": 456700 + }, + { + "epoch": 0.9225830953025449, + "grad_norm": 186.4401397705078, + "learning_rate": 2.2661140226937773e-07, + "loss": 15.9246, + "step": 456710 + }, + { + "epoch": 0.9226032959352287, + "grad_norm": 461.26959228515625, + "learning_rate": 2.2650751742489542e-07, + "loss": 16.1091, + "step": 456720 + }, + { + "epoch": 0.9226234965679125, + "grad_norm": 550.1011352539062, + "learning_rate": 2.2640365584583602e-07, + "loss": 12.9847, + "step": 456730 + }, + { + "epoch": 0.9226436972005964, + "grad_norm": 301.6156005859375, + "learning_rate": 2.2629981753270636e-07, + "loss": 25.3048, + "step": 456740 + }, + { + "epoch": 0.9226638978332802, + "grad_norm": 535.1401977539062, + "learning_rate": 2.2619600248601327e-07, + "loss": 17.4736, + "step": 456750 + }, + { + "epoch": 0.922684098465964, + "grad_norm": 250.36090087890625, + "learning_rate": 2.2609221070626132e-07, + "loss": 22.4407, + "step": 456760 + }, + { + "epoch": 0.9227042990986478, + "grad_norm": 615.8826293945312, + "learning_rate": 2.259884421939562e-07, + "loss": 15.605, + "step": 456770 + }, + { + "epoch": 0.9227244997313316, + "grad_norm": 851.8646850585938, + "learning_rate": 2.2588469694960535e-07, + "loss": 19.2829, + "step": 456780 + }, + { + "epoch": 0.9227447003640153, + "grad_norm": 488.8655700683594, + "learning_rate": 2.2578097497371333e-07, + "loss": 12.2094, + "step": 456790 + }, + { + "epoch": 0.9227649009966992, + "grad_norm": 184.11412048339844, + "learning_rate": 2.2567727626678527e-07, + "loss": 16.5717, + "step": 456800 + }, + { + "epoch": 0.922785101629383, + "grad_norm": 157.20469665527344, + "learning_rate": 2.2557360082932745e-07, + "loss": 21.0107, + "step": 456810 + }, + { + "epoch": 0.9228053022620668, + "grad_norm": 558.7533569335938, + "learning_rate": 2.2546994866184557e-07, + "loss": 14.0995, + "step": 456820 + }, + { + "epoch": 0.9228255028947506, + "grad_norm": 219.511474609375, + "learning_rate": 2.253663197648426e-07, + "loss": 22.1468, + "step": 456830 + }, + { + "epoch": 0.9228457035274344, + "grad_norm": 254.12046813964844, + "learning_rate": 2.2526271413882528e-07, + "loss": 10.6055, + "step": 456840 + }, + { + "epoch": 0.9228659041601183, + "grad_norm": 282.03717041015625, + "learning_rate": 2.2515913178429937e-07, + "loss": 13.9128, + "step": 456850 + }, + { + "epoch": 0.9228861047928021, + "grad_norm": 658.669189453125, + "learning_rate": 2.2505557270176837e-07, + "loss": 21.0541, + "step": 456860 + }, + { + "epoch": 0.9229063054254859, + "grad_norm": 399.0304260253906, + "learning_rate": 2.249520368917374e-07, + "loss": 15.579, + "step": 456870 + }, + { + "epoch": 0.9229265060581697, + "grad_norm": 888.0962524414062, + "learning_rate": 2.2484852435471106e-07, + "loss": 26.448, + "step": 456880 + }, + { + "epoch": 0.9229467066908535, + "grad_norm": 123.26486206054688, + "learning_rate": 2.2474503509119394e-07, + "loss": 16.2698, + "step": 456890 + }, + { + "epoch": 0.9229669073235374, + "grad_norm": 622.527099609375, + "learning_rate": 2.2464156910168954e-07, + "loss": 21.5968, + "step": 456900 + }, + { + "epoch": 0.9229871079562212, + "grad_norm": 94.87808990478516, + "learning_rate": 2.2453812638670413e-07, + "loss": 16.1051, + "step": 456910 + }, + { + "epoch": 0.923007308588905, + "grad_norm": 231.9738006591797, + "learning_rate": 2.2443470694673953e-07, + "loss": 12.1589, + "step": 456920 + }, + { + "epoch": 0.9230275092215888, + "grad_norm": 812.3882446289062, + "learning_rate": 2.2433131078230196e-07, + "loss": 36.6796, + "step": 456930 + }, + { + "epoch": 0.9230477098542726, + "grad_norm": 472.25018310546875, + "learning_rate": 2.242279378938944e-07, + "loss": 21.6882, + "step": 456940 + }, + { + "epoch": 0.9230679104869565, + "grad_norm": 667.198974609375, + "learning_rate": 2.2412458828201977e-07, + "loss": 23.4013, + "step": 456950 + }, + { + "epoch": 0.9230881111196403, + "grad_norm": 68.68064880371094, + "learning_rate": 2.2402126194718322e-07, + "loss": 13.6757, + "step": 456960 + }, + { + "epoch": 0.9231083117523241, + "grad_norm": 491.9569396972656, + "learning_rate": 2.2391795888988822e-07, + "loss": 28.9148, + "step": 456970 + }, + { + "epoch": 0.9231285123850079, + "grad_norm": 482.0792541503906, + "learning_rate": 2.2381467911063658e-07, + "loss": 31.3768, + "step": 456980 + }, + { + "epoch": 0.9231487130176917, + "grad_norm": 214.32330322265625, + "learning_rate": 2.237114226099335e-07, + "loss": 51.9785, + "step": 456990 + }, + { + "epoch": 0.9231689136503756, + "grad_norm": 533.4446411132812, + "learning_rate": 2.2360818938828189e-07, + "loss": 20.4225, + "step": 457000 + }, + { + "epoch": 0.9231891142830594, + "grad_norm": 268.1573181152344, + "learning_rate": 2.2350497944618466e-07, + "loss": 15.3793, + "step": 457010 + }, + { + "epoch": 0.9232093149157432, + "grad_norm": 460.7972717285156, + "learning_rate": 2.234017927841442e-07, + "loss": 20.3809, + "step": 457020 + }, + { + "epoch": 0.923229515548427, + "grad_norm": 72.43126678466797, + "learning_rate": 2.2329862940266511e-07, + "loss": 24.144, + "step": 457030 + }, + { + "epoch": 0.9232497161811108, + "grad_norm": 144.76942443847656, + "learning_rate": 2.2319548930224865e-07, + "loss": 9.2042, + "step": 457040 + }, + { + "epoch": 0.9232699168137946, + "grad_norm": 414.5937194824219, + "learning_rate": 2.2309237248339776e-07, + "loss": 18.7369, + "step": 457050 + }, + { + "epoch": 0.9232901174464784, + "grad_norm": 315.70648193359375, + "learning_rate": 2.2298927894661481e-07, + "loss": 18.66, + "step": 457060 + }, + { + "epoch": 0.9233103180791622, + "grad_norm": 640.033203125, + "learning_rate": 2.2288620869240384e-07, + "loss": 23.0898, + "step": 457070 + }, + { + "epoch": 0.923330518711846, + "grad_norm": 205.42617797851562, + "learning_rate": 2.2278316172126612e-07, + "loss": 15.6564, + "step": 457080 + }, + { + "epoch": 0.9233507193445298, + "grad_norm": 1400.28759765625, + "learning_rate": 2.2268013803370292e-07, + "loss": 27.0673, + "step": 457090 + }, + { + "epoch": 0.9233709199772137, + "grad_norm": 338.8489685058594, + "learning_rate": 2.2257713763021826e-07, + "loss": 20.6615, + "step": 457100 + }, + { + "epoch": 0.9233911206098975, + "grad_norm": 253.6118927001953, + "learning_rate": 2.2247416051131288e-07, + "loss": 13.7819, + "step": 457110 + }, + { + "epoch": 0.9234113212425813, + "grad_norm": 162.47174072265625, + "learning_rate": 2.2237120667748856e-07, + "loss": 14.0294, + "step": 457120 + }, + { + "epoch": 0.9234315218752651, + "grad_norm": 318.33233642578125, + "learning_rate": 2.2226827612924774e-07, + "loss": 19.7768, + "step": 457130 + }, + { + "epoch": 0.9234517225079489, + "grad_norm": 17.8799991607666, + "learning_rate": 2.221653688670916e-07, + "loss": 17.8933, + "step": 457140 + }, + { + "epoch": 0.9234719231406328, + "grad_norm": 501.81207275390625, + "learning_rate": 2.220624848915226e-07, + "loss": 21.2872, + "step": 457150 + }, + { + "epoch": 0.9234921237733166, + "grad_norm": 313.23614501953125, + "learning_rate": 2.2195962420304083e-07, + "loss": 14.9458, + "step": 457160 + }, + { + "epoch": 0.9235123244060004, + "grad_norm": 307.3273010253906, + "learning_rate": 2.2185678680214927e-07, + "loss": 23.8175, + "step": 457170 + }, + { + "epoch": 0.9235325250386842, + "grad_norm": 324.7360534667969, + "learning_rate": 2.2175397268934807e-07, + "loss": 20.4862, + "step": 457180 + }, + { + "epoch": 0.923552725671368, + "grad_norm": 141.25596618652344, + "learning_rate": 2.216511818651379e-07, + "loss": 19.4876, + "step": 457190 + }, + { + "epoch": 0.9235729263040519, + "grad_norm": 443.9954528808594, + "learning_rate": 2.2154841433002062e-07, + "loss": 16.8223, + "step": 457200 + }, + { + "epoch": 0.9235931269367357, + "grad_norm": 601.5424194335938, + "learning_rate": 2.2144567008449636e-07, + "loss": 29.5105, + "step": 457210 + }, + { + "epoch": 0.9236133275694195, + "grad_norm": 606.4437255859375, + "learning_rate": 2.2134294912906696e-07, + "loss": 25.3341, + "step": 457220 + }, + { + "epoch": 0.9236335282021033, + "grad_norm": 327.7691955566406, + "learning_rate": 2.2124025146423255e-07, + "loss": 16.0972, + "step": 457230 + }, + { + "epoch": 0.9236537288347871, + "grad_norm": 0.0, + "learning_rate": 2.2113757709049277e-07, + "loss": 10.7911, + "step": 457240 + }, + { + "epoch": 0.923673929467471, + "grad_norm": 412.32818603515625, + "learning_rate": 2.210349260083494e-07, + "loss": 19.2258, + "step": 457250 + }, + { + "epoch": 0.9236941301001548, + "grad_norm": 642.1405639648438, + "learning_rate": 2.2093229821830263e-07, + "loss": 12.5188, + "step": 457260 + }, + { + "epoch": 0.9237143307328386, + "grad_norm": 217.98617553710938, + "learning_rate": 2.208296937208515e-07, + "loss": 7.6809, + "step": 457270 + }, + { + "epoch": 0.9237345313655224, + "grad_norm": 428.9268493652344, + "learning_rate": 2.2072711251649615e-07, + "loss": 7.977, + "step": 457280 + }, + { + "epoch": 0.9237547319982062, + "grad_norm": 475.7462158203125, + "learning_rate": 2.2062455460573838e-07, + "loss": 20.2152, + "step": 457290 + }, + { + "epoch": 0.9237749326308899, + "grad_norm": 620.5607299804688, + "learning_rate": 2.2052201998907673e-07, + "loss": 16.3221, + "step": 457300 + }, + { + "epoch": 0.9237951332635738, + "grad_norm": 581.6460571289062, + "learning_rate": 2.2041950866701078e-07, + "loss": 32.3453, + "step": 457310 + }, + { + "epoch": 0.9238153338962576, + "grad_norm": 61.62921142578125, + "learning_rate": 2.2031702064004067e-07, + "loss": 11.4661, + "step": 457320 + }, + { + "epoch": 0.9238355345289414, + "grad_norm": 357.42230224609375, + "learning_rate": 2.2021455590866546e-07, + "loss": 26.6565, + "step": 457330 + }, + { + "epoch": 0.9238557351616252, + "grad_norm": 72.99015045166016, + "learning_rate": 2.2011211447338477e-07, + "loss": 20.1441, + "step": 457340 + }, + { + "epoch": 0.923875935794309, + "grad_norm": 806.2169189453125, + "learning_rate": 2.200096963346976e-07, + "loss": 22.0562, + "step": 457350 + }, + { + "epoch": 0.9238961364269929, + "grad_norm": 346.07818603515625, + "learning_rate": 2.199073014931047e-07, + "loss": 15.2492, + "step": 457360 + }, + { + "epoch": 0.9239163370596767, + "grad_norm": 3.393545150756836, + "learning_rate": 2.198049299491023e-07, + "loss": 17.5839, + "step": 457370 + }, + { + "epoch": 0.9239365376923605, + "grad_norm": 210.43228149414062, + "learning_rate": 2.1970258170319114e-07, + "loss": 8.5199, + "step": 457380 + }, + { + "epoch": 0.9239567383250443, + "grad_norm": 230.42808532714844, + "learning_rate": 2.1960025675587082e-07, + "loss": 11.0523, + "step": 457390 + }, + { + "epoch": 0.9239769389577281, + "grad_norm": 283.653076171875, + "learning_rate": 2.1949795510763872e-07, + "loss": 16.4459, + "step": 457400 + }, + { + "epoch": 0.923997139590412, + "grad_norm": 348.6254577636719, + "learning_rate": 2.1939567675899333e-07, + "loss": 25.9321, + "step": 457410 + }, + { + "epoch": 0.9240173402230958, + "grad_norm": 506.9590759277344, + "learning_rate": 2.1929342171043366e-07, + "loss": 32.5383, + "step": 457420 + }, + { + "epoch": 0.9240375408557796, + "grad_norm": 166.4855499267578, + "learning_rate": 2.191911899624588e-07, + "loss": 27.0463, + "step": 457430 + }, + { + "epoch": 0.9240577414884634, + "grad_norm": 386.7478942871094, + "learning_rate": 2.1908898151556502e-07, + "loss": 17.4471, + "step": 457440 + }, + { + "epoch": 0.9240779421211472, + "grad_norm": 499.1949462890625, + "learning_rate": 2.189867963702519e-07, + "loss": 16.7372, + "step": 457450 + }, + { + "epoch": 0.9240981427538311, + "grad_norm": 165.86109924316406, + "learning_rate": 2.188846345270179e-07, + "loss": 18.4938, + "step": 457460 + }, + { + "epoch": 0.9241183433865149, + "grad_norm": 238.35037231445312, + "learning_rate": 2.1878249598636047e-07, + "loss": 15.6179, + "step": 457470 + }, + { + "epoch": 0.9241385440191987, + "grad_norm": 72.76551055908203, + "learning_rate": 2.186803807487764e-07, + "loss": 14.1343, + "step": 457480 + }, + { + "epoch": 0.9241587446518825, + "grad_norm": 494.52740478515625, + "learning_rate": 2.1857828881476472e-07, + "loss": 25.6341, + "step": 457490 + }, + { + "epoch": 0.9241789452845663, + "grad_norm": 260.92047119140625, + "learning_rate": 2.1847622018482283e-07, + "loss": 25.9921, + "step": 457500 + }, + { + "epoch": 0.9241991459172502, + "grad_norm": 423.1890869140625, + "learning_rate": 2.1837417485944755e-07, + "loss": 20.9144, + "step": 457510 + }, + { + "epoch": 0.924219346549934, + "grad_norm": 1008.9845581054688, + "learning_rate": 2.1827215283913683e-07, + "loss": 33.1877, + "step": 457520 + }, + { + "epoch": 0.9242395471826178, + "grad_norm": 773.0634765625, + "learning_rate": 2.1817015412438692e-07, + "loss": 31.0697, + "step": 457530 + }, + { + "epoch": 0.9242597478153016, + "grad_norm": 716.7244873046875, + "learning_rate": 2.1806817871569686e-07, + "loss": 26.2957, + "step": 457540 + }, + { + "epoch": 0.9242799484479854, + "grad_norm": 288.569091796875, + "learning_rate": 2.1796622661356238e-07, + "loss": 25.4398, + "step": 457550 + }, + { + "epoch": 0.9243001490806692, + "grad_norm": 189.87271118164062, + "learning_rate": 2.1786429781847972e-07, + "loss": 25.3806, + "step": 457560 + }, + { + "epoch": 0.924320349713353, + "grad_norm": 470.0531005859375, + "learning_rate": 2.1776239233094687e-07, + "loss": 10.8642, + "step": 457570 + }, + { + "epoch": 0.9243405503460368, + "grad_norm": 243.01356506347656, + "learning_rate": 2.176605101514606e-07, + "loss": 25.1575, + "step": 457580 + }, + { + "epoch": 0.9243607509787206, + "grad_norm": 802.8110961914062, + "learning_rate": 2.175586512805161e-07, + "loss": 30.1035, + "step": 457590 + }, + { + "epoch": 0.9243809516114044, + "grad_norm": 338.3279113769531, + "learning_rate": 2.174568157186102e-07, + "loss": 20.7881, + "step": 457600 + }, + { + "epoch": 0.9244011522440883, + "grad_norm": 1601.8643798828125, + "learning_rate": 2.1735500346624083e-07, + "loss": 18.3868, + "step": 457610 + }, + { + "epoch": 0.9244213528767721, + "grad_norm": 473.4765930175781, + "learning_rate": 2.1725321452390314e-07, + "loss": 17.7341, + "step": 457620 + }, + { + "epoch": 0.9244415535094559, + "grad_norm": 257.49365234375, + "learning_rate": 2.1715144889209284e-07, + "loss": 20.9138, + "step": 457630 + }, + { + "epoch": 0.9244617541421397, + "grad_norm": 537.02880859375, + "learning_rate": 2.1704970657130675e-07, + "loss": 14.2191, + "step": 457640 + }, + { + "epoch": 0.9244819547748235, + "grad_norm": 323.56304931640625, + "learning_rate": 2.1694798756204005e-07, + "loss": 14.642, + "step": 457650 + }, + { + "epoch": 0.9245021554075074, + "grad_norm": 1487.6925048828125, + "learning_rate": 2.1684629186478846e-07, + "loss": 14.8847, + "step": 457660 + }, + { + "epoch": 0.9245223560401912, + "grad_norm": 214.20962524414062, + "learning_rate": 2.1674461948004766e-07, + "loss": 16.7686, + "step": 457670 + }, + { + "epoch": 0.924542556672875, + "grad_norm": 241.1480255126953, + "learning_rate": 2.1664297040831394e-07, + "loss": 17.2618, + "step": 457680 + }, + { + "epoch": 0.9245627573055588, + "grad_norm": 87.69023132324219, + "learning_rate": 2.1654134465008247e-07, + "loss": 8.7376, + "step": 457690 + }, + { + "epoch": 0.9245829579382426, + "grad_norm": 602.7846069335938, + "learning_rate": 2.1643974220584729e-07, + "loss": 24.1448, + "step": 457700 + }, + { + "epoch": 0.9246031585709265, + "grad_norm": 238.993408203125, + "learning_rate": 2.1633816307610577e-07, + "loss": 7.3158, + "step": 457710 + }, + { + "epoch": 0.9246233592036103, + "grad_norm": 375.7080078125, + "learning_rate": 2.1623660726135197e-07, + "loss": 20.0468, + "step": 457720 + }, + { + "epoch": 0.9246435598362941, + "grad_norm": 201.77499389648438, + "learning_rate": 2.161350747620794e-07, + "loss": 14.484, + "step": 457730 + }, + { + "epoch": 0.9246637604689779, + "grad_norm": 403.94195556640625, + "learning_rate": 2.1603356557878486e-07, + "loss": 12.3051, + "step": 457740 + }, + { + "epoch": 0.9246839611016617, + "grad_norm": 305.6134338378906, + "learning_rate": 2.1593207971196296e-07, + "loss": 15.9938, + "step": 457750 + }, + { + "epoch": 0.9247041617343456, + "grad_norm": 513.3170166015625, + "learning_rate": 2.1583061716210774e-07, + "loss": 11.5397, + "step": 457760 + }, + { + "epoch": 0.9247243623670294, + "grad_norm": 155.95716857910156, + "learning_rate": 2.1572917792971326e-07, + "loss": 20.2176, + "step": 457770 + }, + { + "epoch": 0.9247445629997132, + "grad_norm": 194.8069610595703, + "learning_rate": 2.1562776201527525e-07, + "loss": 12.5153, + "step": 457780 + }, + { + "epoch": 0.924764763632397, + "grad_norm": 1246.8511962890625, + "learning_rate": 2.1552636941928717e-07, + "loss": 18.2852, + "step": 457790 + }, + { + "epoch": 0.9247849642650808, + "grad_norm": 706.98828125, + "learning_rate": 2.154250001422431e-07, + "loss": 18.9501, + "step": 457800 + }, + { + "epoch": 0.9248051648977647, + "grad_norm": 322.3863830566406, + "learning_rate": 2.1532365418463708e-07, + "loss": 8.0768, + "step": 457810 + }, + { + "epoch": 0.9248253655304484, + "grad_norm": 513.7962036132812, + "learning_rate": 2.1522233154696314e-07, + "loss": 9.9633, + "step": 457820 + }, + { + "epoch": 0.9248455661631322, + "grad_norm": 401.7316589355469, + "learning_rate": 2.151210322297159e-07, + "loss": 22.6052, + "step": 457830 + }, + { + "epoch": 0.924865766795816, + "grad_norm": 16.721694946289062, + "learning_rate": 2.1501975623338833e-07, + "loss": 11.0103, + "step": 457840 + }, + { + "epoch": 0.9248859674284998, + "grad_norm": 110.01323699951172, + "learning_rate": 2.1491850355847332e-07, + "loss": 16.0218, + "step": 457850 + }, + { + "epoch": 0.9249061680611836, + "grad_norm": 219.98196411132812, + "learning_rate": 2.1481727420546605e-07, + "loss": 8.403, + "step": 457860 + }, + { + "epoch": 0.9249263686938675, + "grad_norm": 379.1888427734375, + "learning_rate": 2.147160681748589e-07, + "loss": 18.7612, + "step": 457870 + }, + { + "epoch": 0.9249465693265513, + "grad_norm": 298.0424499511719, + "learning_rate": 2.1461488546714425e-07, + "loss": 17.5678, + "step": 457880 + }, + { + "epoch": 0.9249667699592351, + "grad_norm": 253.58570861816406, + "learning_rate": 2.1451372608281674e-07, + "loss": 8.7051, + "step": 457890 + }, + { + "epoch": 0.9249869705919189, + "grad_norm": 341.6390075683594, + "learning_rate": 2.1441259002236924e-07, + "loss": 17.6478, + "step": 457900 + }, + { + "epoch": 0.9250071712246027, + "grad_norm": 258.5384826660156, + "learning_rate": 2.1431147728629476e-07, + "loss": 14.3431, + "step": 457910 + }, + { + "epoch": 0.9250273718572866, + "grad_norm": 365.05657958984375, + "learning_rate": 2.1421038787508508e-07, + "loss": 14.0344, + "step": 457920 + }, + { + "epoch": 0.9250475724899704, + "grad_norm": 327.6850280761719, + "learning_rate": 2.1410932178923372e-07, + "loss": 17.4474, + "step": 457930 + }, + { + "epoch": 0.9250677731226542, + "grad_norm": 425.10919189453125, + "learning_rate": 2.1400827902923304e-07, + "loss": 24.1575, + "step": 457940 + }, + { + "epoch": 0.925087973755338, + "grad_norm": 132.63491821289062, + "learning_rate": 2.1390725959557546e-07, + "loss": 13.1877, + "step": 457950 + }, + { + "epoch": 0.9251081743880218, + "grad_norm": 263.4458923339844, + "learning_rate": 2.1380626348875278e-07, + "loss": 16.6089, + "step": 457960 + }, + { + "epoch": 0.9251283750207057, + "grad_norm": 155.18031311035156, + "learning_rate": 2.137052907092596e-07, + "loss": 13.4668, + "step": 457970 + }, + { + "epoch": 0.9251485756533895, + "grad_norm": 387.36309814453125, + "learning_rate": 2.13604341257585e-07, + "loss": 21.6578, + "step": 457980 + }, + { + "epoch": 0.9251687762860733, + "grad_norm": 305.8255920410156, + "learning_rate": 2.135034151342219e-07, + "loss": 17.9192, + "step": 457990 + }, + { + "epoch": 0.9251889769187571, + "grad_norm": 212.56101989746094, + "learning_rate": 2.134025123396638e-07, + "loss": 17.149, + "step": 458000 + }, + { + "epoch": 0.9252091775514409, + "grad_norm": 318.2418518066406, + "learning_rate": 2.1330163287440087e-07, + "loss": 9.732, + "step": 458010 + }, + { + "epoch": 0.9252293781841248, + "grad_norm": 1.686371088027954, + "learning_rate": 2.1320077673892493e-07, + "loss": 18.1357, + "step": 458020 + }, + { + "epoch": 0.9252495788168086, + "grad_norm": 362.7011413574219, + "learning_rate": 2.1309994393372836e-07, + "loss": 19.8476, + "step": 458030 + }, + { + "epoch": 0.9252697794494924, + "grad_norm": 32.95444107055664, + "learning_rate": 2.1299913445930242e-07, + "loss": 17.4755, + "step": 458040 + }, + { + "epoch": 0.9252899800821762, + "grad_norm": 139.97511291503906, + "learning_rate": 2.1289834831613675e-07, + "loss": 12.3224, + "step": 458050 + }, + { + "epoch": 0.92531018071486, + "grad_norm": 455.064208984375, + "learning_rate": 2.127975855047243e-07, + "loss": 12.8526, + "step": 458060 + }, + { + "epoch": 0.9253303813475438, + "grad_norm": 443.9643859863281, + "learning_rate": 2.126968460255563e-07, + "loss": 22.1814, + "step": 458070 + }, + { + "epoch": 0.9253505819802276, + "grad_norm": 151.95689392089844, + "learning_rate": 2.1259612987912348e-07, + "loss": 50.2585, + "step": 458080 + }, + { + "epoch": 0.9253707826129114, + "grad_norm": 207.3853759765625, + "learning_rate": 2.1249543706591602e-07, + "loss": 7.4737, + "step": 458090 + }, + { + "epoch": 0.9253909832455952, + "grad_norm": 126.56194305419922, + "learning_rate": 2.123947675864252e-07, + "loss": 10.355, + "step": 458100 + }, + { + "epoch": 0.925411183878279, + "grad_norm": 161.03704833984375, + "learning_rate": 2.1229412144114225e-07, + "loss": 11.7268, + "step": 458110 + }, + { + "epoch": 0.9254313845109629, + "grad_norm": 341.4384765625, + "learning_rate": 2.121934986305557e-07, + "loss": 18.7943, + "step": 458120 + }, + { + "epoch": 0.9254515851436467, + "grad_norm": 219.3982391357422, + "learning_rate": 2.120928991551585e-07, + "loss": 17.0994, + "step": 458130 + }, + { + "epoch": 0.9254717857763305, + "grad_norm": 286.6707458496094, + "learning_rate": 2.1199232301543915e-07, + "loss": 18.727, + "step": 458140 + }, + { + "epoch": 0.9254919864090143, + "grad_norm": 211.01681518554688, + "learning_rate": 2.1189177021188888e-07, + "loss": 38.938, + "step": 458150 + }, + { + "epoch": 0.9255121870416981, + "grad_norm": 172.27032470703125, + "learning_rate": 2.117912407449979e-07, + "loss": 12.6474, + "step": 458160 + }, + { + "epoch": 0.925532387674382, + "grad_norm": 29.137935638427734, + "learning_rate": 2.116907346152547e-07, + "loss": 17.0718, + "step": 458170 + }, + { + "epoch": 0.9255525883070658, + "grad_norm": 170.0179443359375, + "learning_rate": 2.1159025182315052e-07, + "loss": 16.2877, + "step": 458180 + }, + { + "epoch": 0.9255727889397496, + "grad_norm": 187.5997314453125, + "learning_rate": 2.11489792369175e-07, + "loss": 15.7404, + "step": 458190 + }, + { + "epoch": 0.9255929895724334, + "grad_norm": 167.6175537109375, + "learning_rate": 2.1138935625381663e-07, + "loss": 15.2215, + "step": 458200 + }, + { + "epoch": 0.9256131902051172, + "grad_norm": 530.850830078125, + "learning_rate": 2.1128894347756613e-07, + "loss": 18.4094, + "step": 458210 + }, + { + "epoch": 0.925633390837801, + "grad_norm": 381.7887268066406, + "learning_rate": 2.1118855404091253e-07, + "loss": 24.7909, + "step": 458220 + }, + { + "epoch": 0.9256535914704849, + "grad_norm": 29.789928436279297, + "learning_rate": 2.110881879443455e-07, + "loss": 13.2872, + "step": 458230 + }, + { + "epoch": 0.9256737921031687, + "grad_norm": 217.49542236328125, + "learning_rate": 2.1098784518835292e-07, + "loss": 18.055, + "step": 458240 + }, + { + "epoch": 0.9256939927358525, + "grad_norm": 535.6318969726562, + "learning_rate": 2.1088752577342607e-07, + "loss": 27.8291, + "step": 458250 + }, + { + "epoch": 0.9257141933685363, + "grad_norm": 926.4777221679688, + "learning_rate": 2.1078722970005182e-07, + "loss": 16.9594, + "step": 458260 + }, + { + "epoch": 0.9257343940012202, + "grad_norm": 346.7973937988281, + "learning_rate": 2.1068695696871922e-07, + "loss": 15.6104, + "step": 458270 + }, + { + "epoch": 0.925754594633904, + "grad_norm": 359.69110107421875, + "learning_rate": 2.1058670757991783e-07, + "loss": 14.7547, + "step": 458280 + }, + { + "epoch": 0.9257747952665878, + "grad_norm": 451.11236572265625, + "learning_rate": 2.104864815341362e-07, + "loss": 18.7919, + "step": 458290 + }, + { + "epoch": 0.9257949958992716, + "grad_norm": 196.26596069335938, + "learning_rate": 2.103862788318628e-07, + "loss": 14.8491, + "step": 458300 + }, + { + "epoch": 0.9258151965319554, + "grad_norm": 308.1374206542969, + "learning_rate": 2.102860994735856e-07, + "loss": 10.3378, + "step": 458310 + }, + { + "epoch": 0.9258353971646393, + "grad_norm": 489.496826171875, + "learning_rate": 2.1018594345979305e-07, + "loss": 23.6785, + "step": 458320 + }, + { + "epoch": 0.925855597797323, + "grad_norm": 394.7153625488281, + "learning_rate": 2.1008581079097312e-07, + "loss": 22.2725, + "step": 458330 + }, + { + "epoch": 0.9258757984300068, + "grad_norm": 380.5003662109375, + "learning_rate": 2.0998570146761376e-07, + "loss": 18.5849, + "step": 458340 + }, + { + "epoch": 0.9258959990626906, + "grad_norm": 218.2222442626953, + "learning_rate": 2.098856154902029e-07, + "loss": 14.9699, + "step": 458350 + }, + { + "epoch": 0.9259161996953744, + "grad_norm": 324.1954345703125, + "learning_rate": 2.0978555285922963e-07, + "loss": 29.3252, + "step": 458360 + }, + { + "epoch": 0.9259364003280582, + "grad_norm": 354.8207092285156, + "learning_rate": 2.0968551357518018e-07, + "loss": 13.6946, + "step": 458370 + }, + { + "epoch": 0.9259566009607421, + "grad_norm": 1145.4552001953125, + "learning_rate": 2.0958549763854196e-07, + "loss": 24.0101, + "step": 458380 + }, + { + "epoch": 0.9259768015934259, + "grad_norm": 261.1099853515625, + "learning_rate": 2.0948550504980403e-07, + "loss": 13.9914, + "step": 458390 + }, + { + "epoch": 0.9259970022261097, + "grad_norm": 170.25189208984375, + "learning_rate": 2.0938553580945208e-07, + "loss": 24.7193, + "step": 458400 + }, + { + "epoch": 0.9260172028587935, + "grad_norm": 437.9764099121094, + "learning_rate": 2.092855899179741e-07, + "loss": 9.5411, + "step": 458410 + }, + { + "epoch": 0.9260374034914773, + "grad_norm": 94.67361450195312, + "learning_rate": 2.0918566737585688e-07, + "loss": 24.0387, + "step": 458420 + }, + { + "epoch": 0.9260576041241612, + "grad_norm": 157.25180053710938, + "learning_rate": 2.0908576818358783e-07, + "loss": 14.9054, + "step": 458430 + }, + { + "epoch": 0.926077804756845, + "grad_norm": 154.60018920898438, + "learning_rate": 2.0898589234165378e-07, + "loss": 13.5259, + "step": 458440 + }, + { + "epoch": 0.9260980053895288, + "grad_norm": 464.2149658203125, + "learning_rate": 2.0888603985054156e-07, + "loss": 19.8272, + "step": 458450 + }, + { + "epoch": 0.9261182060222126, + "grad_norm": 252.5354461669922, + "learning_rate": 2.0878621071073745e-07, + "loss": 10.8964, + "step": 458460 + }, + { + "epoch": 0.9261384066548964, + "grad_norm": 138.6220703125, + "learning_rate": 2.086864049227283e-07, + "loss": 15.92, + "step": 458470 + }, + { + "epoch": 0.9261586072875803, + "grad_norm": 651.3563232421875, + "learning_rate": 2.085866224870009e-07, + "loss": 12.8114, + "step": 458480 + }, + { + "epoch": 0.9261788079202641, + "grad_norm": 445.91790771484375, + "learning_rate": 2.0848686340404045e-07, + "loss": 34.6725, + "step": 458490 + }, + { + "epoch": 0.9261990085529479, + "grad_norm": 325.5871887207031, + "learning_rate": 2.083871276743338e-07, + "loss": 20.4321, + "step": 458500 + }, + { + "epoch": 0.9262192091856317, + "grad_norm": 373.8263854980469, + "learning_rate": 2.0828741529836771e-07, + "loss": 15.8434, + "step": 458510 + }, + { + "epoch": 0.9262394098183155, + "grad_norm": 28.93181037902832, + "learning_rate": 2.0818772627662743e-07, + "loss": 11.6755, + "step": 458520 + }, + { + "epoch": 0.9262596104509994, + "grad_norm": 15.074884414672852, + "learning_rate": 2.0808806060959864e-07, + "loss": 27.5151, + "step": 458530 + }, + { + "epoch": 0.9262798110836832, + "grad_norm": 386.1090087890625, + "learning_rate": 2.0798841829776816e-07, + "loss": 9.6054, + "step": 458540 + }, + { + "epoch": 0.926300011716367, + "grad_norm": 159.1573028564453, + "learning_rate": 2.0788879934162064e-07, + "loss": 17.2663, + "step": 458550 + }, + { + "epoch": 0.9263202123490508, + "grad_norm": 262.0688781738281, + "learning_rate": 2.077892037416418e-07, + "loss": 19.3492, + "step": 458560 + }, + { + "epoch": 0.9263404129817346, + "grad_norm": 194.11447143554688, + "learning_rate": 2.0768963149831678e-07, + "loss": 14.6876, + "step": 458570 + }, + { + "epoch": 0.9263606136144183, + "grad_norm": 365.3988342285156, + "learning_rate": 2.0759008261213242e-07, + "loss": 16.0998, + "step": 458580 + }, + { + "epoch": 0.9263808142471022, + "grad_norm": 192.0506591796875, + "learning_rate": 2.0749055708357168e-07, + "loss": 26.9038, + "step": 458590 + }, + { + "epoch": 0.926401014879786, + "grad_norm": 250.0546875, + "learning_rate": 2.0739105491312028e-07, + "loss": 17.8981, + "step": 458600 + }, + { + "epoch": 0.9264212155124698, + "grad_norm": 996.9682006835938, + "learning_rate": 2.0729157610126448e-07, + "loss": 27.0404, + "step": 458610 + }, + { + "epoch": 0.9264414161451536, + "grad_norm": 122.38219451904297, + "learning_rate": 2.0719212064848838e-07, + "loss": 19.0454, + "step": 458620 + }, + { + "epoch": 0.9264616167778374, + "grad_norm": 393.2940368652344, + "learning_rate": 2.07092688555276e-07, + "loss": 18.0674, + "step": 458630 + }, + { + "epoch": 0.9264818174105213, + "grad_norm": 224.41012573242188, + "learning_rate": 2.0699327982211304e-07, + "loss": 13.0098, + "step": 458640 + }, + { + "epoch": 0.9265020180432051, + "grad_norm": 460.50335693359375, + "learning_rate": 2.068938944494836e-07, + "loss": 19.5449, + "step": 458650 + }, + { + "epoch": 0.9265222186758889, + "grad_norm": 692.4585571289062, + "learning_rate": 2.0679453243787174e-07, + "loss": 12.5094, + "step": 458660 + }, + { + "epoch": 0.9265424193085727, + "grad_norm": 759.0761108398438, + "learning_rate": 2.0669519378776147e-07, + "loss": 22.3652, + "step": 458670 + }, + { + "epoch": 0.9265626199412565, + "grad_norm": 336.0935363769531, + "learning_rate": 2.0659587849963801e-07, + "loss": 21.5044, + "step": 458680 + }, + { + "epoch": 0.9265828205739404, + "grad_norm": 345.7869873046875, + "learning_rate": 2.0649658657398487e-07, + "loss": 11.5162, + "step": 458690 + }, + { + "epoch": 0.9266030212066242, + "grad_norm": 578.116943359375, + "learning_rate": 2.0639731801128603e-07, + "loss": 19.6053, + "step": 458700 + }, + { + "epoch": 0.926623221839308, + "grad_norm": 552.098388671875, + "learning_rate": 2.0629807281202508e-07, + "loss": 22.3361, + "step": 458710 + }, + { + "epoch": 0.9266434224719918, + "grad_norm": 314.4300537109375, + "learning_rate": 2.0619885097668658e-07, + "loss": 10.5781, + "step": 458720 + }, + { + "epoch": 0.9266636231046756, + "grad_norm": 577.6607666015625, + "learning_rate": 2.0609965250575237e-07, + "loss": 22.9499, + "step": 458730 + }, + { + "epoch": 0.9266838237373595, + "grad_norm": 0.4380476176738739, + "learning_rate": 2.0600047739970762e-07, + "loss": 11.5809, + "step": 458740 + }, + { + "epoch": 0.9267040243700433, + "grad_norm": 208.62179565429688, + "learning_rate": 2.0590132565903475e-07, + "loss": 10.554, + "step": 458750 + }, + { + "epoch": 0.9267242250027271, + "grad_norm": 527.3123168945312, + "learning_rate": 2.058021972842178e-07, + "loss": 19.0069, + "step": 458760 + }, + { + "epoch": 0.9267444256354109, + "grad_norm": 499.30670166015625, + "learning_rate": 2.057030922757397e-07, + "loss": 19.0369, + "step": 458770 + }, + { + "epoch": 0.9267646262680947, + "grad_norm": 314.04437255859375, + "learning_rate": 2.056040106340823e-07, + "loss": 10.8281, + "step": 458780 + }, + { + "epoch": 0.9267848269007786, + "grad_norm": 123.98068237304688, + "learning_rate": 2.0550495235973023e-07, + "loss": 12.4546, + "step": 458790 + }, + { + "epoch": 0.9268050275334624, + "grad_norm": 198.2677001953125, + "learning_rate": 2.054059174531653e-07, + "loss": 20.5061, + "step": 458800 + }, + { + "epoch": 0.9268252281661462, + "grad_norm": 154.2901153564453, + "learning_rate": 2.0530690591487047e-07, + "loss": 19.8547, + "step": 458810 + }, + { + "epoch": 0.92684542879883, + "grad_norm": 277.7265930175781, + "learning_rate": 2.0520791774532757e-07, + "loss": 24.4116, + "step": 458820 + }, + { + "epoch": 0.9268656294315138, + "grad_norm": 328.3410949707031, + "learning_rate": 2.0510895294502066e-07, + "loss": 14.0775, + "step": 458830 + }, + { + "epoch": 0.9268858300641976, + "grad_norm": 317.104736328125, + "learning_rate": 2.0501001151443156e-07, + "loss": 17.5873, + "step": 458840 + }, + { + "epoch": 0.9269060306968814, + "grad_norm": 226.73709106445312, + "learning_rate": 2.0491109345404102e-07, + "loss": 15.4212, + "step": 458850 + }, + { + "epoch": 0.9269262313295652, + "grad_norm": 849.9632568359375, + "learning_rate": 2.0481219876433257e-07, + "loss": 18.5426, + "step": 458860 + }, + { + "epoch": 0.926946431962249, + "grad_norm": 363.4132385253906, + "learning_rate": 2.0471332744578853e-07, + "loss": 25.3132, + "step": 458870 + }, + { + "epoch": 0.9269666325949328, + "grad_norm": 785.9321899414062, + "learning_rate": 2.0461447949888912e-07, + "loss": 13.8512, + "step": 458880 + }, + { + "epoch": 0.9269868332276167, + "grad_norm": 338.12884521484375, + "learning_rate": 2.0451565492411672e-07, + "loss": 21.3447, + "step": 458890 + }, + { + "epoch": 0.9270070338603005, + "grad_norm": 235.50070190429688, + "learning_rate": 2.0441685372195487e-07, + "loss": 16.1099, + "step": 458900 + }, + { + "epoch": 0.9270272344929843, + "grad_norm": 402.45147705078125, + "learning_rate": 2.043180758928831e-07, + "loss": 27.9963, + "step": 458910 + }, + { + "epoch": 0.9270474351256681, + "grad_norm": 154.576904296875, + "learning_rate": 2.0421932143738276e-07, + "loss": 12.2917, + "step": 458920 + }, + { + "epoch": 0.9270676357583519, + "grad_norm": 344.50531005859375, + "learning_rate": 2.041205903559368e-07, + "loss": 14.6842, + "step": 458930 + }, + { + "epoch": 0.9270878363910358, + "grad_norm": 366.5665588378906, + "learning_rate": 2.0402188264902533e-07, + "loss": 22.528, + "step": 458940 + }, + { + "epoch": 0.9271080370237196, + "grad_norm": 477.3027038574219, + "learning_rate": 2.039231983171286e-07, + "loss": 16.5182, + "step": 458950 + }, + { + "epoch": 0.9271282376564034, + "grad_norm": 0.0, + "learning_rate": 2.0382453736072838e-07, + "loss": 16.6952, + "step": 458960 + }, + { + "epoch": 0.9271484382890872, + "grad_norm": 306.124755859375, + "learning_rate": 2.0372589978030654e-07, + "loss": 11.9331, + "step": 458970 + }, + { + "epoch": 0.927168638921771, + "grad_norm": 754.0401611328125, + "learning_rate": 2.0362728557634327e-07, + "loss": 32.1713, + "step": 458980 + }, + { + "epoch": 0.9271888395544549, + "grad_norm": 123.714599609375, + "learning_rate": 2.0352869474931758e-07, + "loss": 12.2411, + "step": 458990 + }, + { + "epoch": 0.9272090401871387, + "grad_norm": 302.0909423828125, + "learning_rate": 2.0343012729971244e-07, + "loss": 10.5212, + "step": 459000 + }, + { + "epoch": 0.9272292408198225, + "grad_norm": 466.5577087402344, + "learning_rate": 2.0333158322800696e-07, + "loss": 15.9164, + "step": 459010 + }, + { + "epoch": 0.9272494414525063, + "grad_norm": 263.16961669921875, + "learning_rate": 2.0323306253468123e-07, + "loss": 10.4473, + "step": 459020 + }, + { + "epoch": 0.9272696420851901, + "grad_norm": 584.4795532226562, + "learning_rate": 2.0313456522021603e-07, + "loss": 16.174, + "step": 459030 + }, + { + "epoch": 0.927289842717874, + "grad_norm": 429.5994567871094, + "learning_rate": 2.0303609128509038e-07, + "loss": 22.1154, + "step": 459040 + }, + { + "epoch": 0.9273100433505578, + "grad_norm": 389.5309143066406, + "learning_rate": 2.0293764072978618e-07, + "loss": 22.5208, + "step": 459050 + }, + { + "epoch": 0.9273302439832416, + "grad_norm": 372.05609130859375, + "learning_rate": 2.0283921355478187e-07, + "loss": 18.7915, + "step": 459060 + }, + { + "epoch": 0.9273504446159254, + "grad_norm": 289.4825744628906, + "learning_rate": 2.0274080976055655e-07, + "loss": 14.1784, + "step": 459070 + }, + { + "epoch": 0.9273706452486092, + "grad_norm": 477.81732177734375, + "learning_rate": 2.0264242934759147e-07, + "loss": 22.1646, + "step": 459080 + }, + { + "epoch": 0.927390845881293, + "grad_norm": 390.359130859375, + "learning_rate": 2.025440723163652e-07, + "loss": 24.2576, + "step": 459090 + }, + { + "epoch": 0.9274110465139768, + "grad_norm": 163.69305419921875, + "learning_rate": 2.0244573866735673e-07, + "loss": 19.6924, + "step": 459100 + }, + { + "epoch": 0.9274312471466606, + "grad_norm": 304.3630065917969, + "learning_rate": 2.0234742840104627e-07, + "loss": 17.2062, + "step": 459110 + }, + { + "epoch": 0.9274514477793444, + "grad_norm": 149.70547485351562, + "learning_rate": 2.0224914151791285e-07, + "loss": 9.3478, + "step": 459120 + }, + { + "epoch": 0.9274716484120282, + "grad_norm": 283.55181884765625, + "learning_rate": 2.0215087801843504e-07, + "loss": 18.1078, + "step": 459130 + }, + { + "epoch": 0.927491849044712, + "grad_norm": 709.7905883789062, + "learning_rate": 2.0205263790309125e-07, + "loss": 13.3553, + "step": 459140 + }, + { + "epoch": 0.9275120496773959, + "grad_norm": 871.2809448242188, + "learning_rate": 2.0195442117236176e-07, + "loss": 31.3566, + "step": 459150 + }, + { + "epoch": 0.9275322503100797, + "grad_norm": 387.3518371582031, + "learning_rate": 2.0185622782672497e-07, + "loss": 16.4249, + "step": 459160 + }, + { + "epoch": 0.9275524509427635, + "grad_norm": 516.0538940429688, + "learning_rate": 2.0175805786665782e-07, + "loss": 15.3209, + "step": 459170 + }, + { + "epoch": 0.9275726515754473, + "grad_norm": 313.6719665527344, + "learning_rate": 2.0165991129263984e-07, + "loss": 10.7569, + "step": 459180 + }, + { + "epoch": 0.9275928522081311, + "grad_norm": 368.5505065917969, + "learning_rate": 2.0156178810515127e-07, + "loss": 22.9218, + "step": 459190 + }, + { + "epoch": 0.927613052840815, + "grad_norm": 476.4034729003906, + "learning_rate": 2.0146368830466668e-07, + "loss": 26.4644, + "step": 459200 + }, + { + "epoch": 0.9276332534734988, + "grad_norm": 604.5199584960938, + "learning_rate": 2.0136561189166682e-07, + "loss": 12.8561, + "step": 459210 + }, + { + "epoch": 0.9276534541061826, + "grad_norm": 173.3835906982422, + "learning_rate": 2.0126755886662907e-07, + "loss": 14.4322, + "step": 459220 + }, + { + "epoch": 0.9276736547388664, + "grad_norm": 331.8222961425781, + "learning_rate": 2.0116952923003142e-07, + "loss": 14.6574, + "step": 459230 + }, + { + "epoch": 0.9276938553715502, + "grad_norm": 756.9157104492188, + "learning_rate": 2.0107152298235067e-07, + "loss": 21.7451, + "step": 459240 + }, + { + "epoch": 0.9277140560042341, + "grad_norm": 45.36907958984375, + "learning_rate": 2.0097354012406535e-07, + "loss": 23.7887, + "step": 459250 + }, + { + "epoch": 0.9277342566369179, + "grad_norm": 711.8092041015625, + "learning_rate": 2.0087558065565394e-07, + "loss": 25.7945, + "step": 459260 + }, + { + "epoch": 0.9277544572696017, + "grad_norm": 285.94219970703125, + "learning_rate": 2.007776445775922e-07, + "loss": 17.119, + "step": 459270 + }, + { + "epoch": 0.9277746579022855, + "grad_norm": 445.1518249511719, + "learning_rate": 2.006797318903575e-07, + "loss": 17.844, + "step": 459280 + }, + { + "epoch": 0.9277948585349693, + "grad_norm": 286.44287109375, + "learning_rate": 2.0058184259442893e-07, + "loss": 17.7038, + "step": 459290 + }, + { + "epoch": 0.9278150591676532, + "grad_norm": 206.69505310058594, + "learning_rate": 2.0048397669028164e-07, + "loss": 25.5775, + "step": 459300 + }, + { + "epoch": 0.927835259800337, + "grad_norm": 343.9249267578125, + "learning_rate": 2.003861341783936e-07, + "loss": 15.4368, + "step": 459310 + }, + { + "epoch": 0.9278554604330208, + "grad_norm": 146.1588592529297, + "learning_rate": 2.0028831505924162e-07, + "loss": 21.2046, + "step": 459320 + }, + { + "epoch": 0.9278756610657046, + "grad_norm": 526.7974853515625, + "learning_rate": 2.0019051933330204e-07, + "loss": 14.5463, + "step": 459330 + }, + { + "epoch": 0.9278958616983884, + "grad_norm": 53.01992416381836, + "learning_rate": 2.000927470010511e-07, + "loss": 12.1168, + "step": 459340 + }, + { + "epoch": 0.9279160623310722, + "grad_norm": 1.5485777854919434, + "learning_rate": 1.9999499806296674e-07, + "loss": 11.8297, + "step": 459350 + }, + { + "epoch": 0.927936262963756, + "grad_norm": 233.57095336914062, + "learning_rate": 1.9989727251952418e-07, + "loss": 32.4009, + "step": 459360 + }, + { + "epoch": 0.9279564635964398, + "grad_norm": 552.9609985351562, + "learning_rate": 1.9979957037120078e-07, + "loss": 22.0227, + "step": 459370 + }, + { + "epoch": 0.9279766642291236, + "grad_norm": 629.5794677734375, + "learning_rate": 1.9970189161847175e-07, + "loss": 15.866, + "step": 459380 + }, + { + "epoch": 0.9279968648618074, + "grad_norm": 189.53878784179688, + "learning_rate": 1.996042362618128e-07, + "loss": 17.6634, + "step": 459390 + }, + { + "epoch": 0.9280170654944913, + "grad_norm": 389.4222412109375, + "learning_rate": 1.995066043017013e-07, + "loss": 38.8011, + "step": 459400 + }, + { + "epoch": 0.9280372661271751, + "grad_norm": 289.399658203125, + "learning_rate": 1.9940899573861195e-07, + "loss": 12.5329, + "step": 459410 + }, + { + "epoch": 0.9280574667598589, + "grad_norm": 482.4100646972656, + "learning_rate": 1.993114105730215e-07, + "loss": 20.3436, + "step": 459420 + }, + { + "epoch": 0.9280776673925427, + "grad_norm": 151.21099853515625, + "learning_rate": 1.9921384880540406e-07, + "loss": 16.3865, + "step": 459430 + }, + { + "epoch": 0.9280978680252265, + "grad_norm": 391.5841369628906, + "learning_rate": 1.9911631043623704e-07, + "loss": 16.6259, + "step": 459440 + }, + { + "epoch": 0.9281180686579104, + "grad_norm": 311.5986328125, + "learning_rate": 1.99018795465995e-07, + "loss": 16.96, + "step": 459450 + }, + { + "epoch": 0.9281382692905942, + "grad_norm": 299.59454345703125, + "learning_rate": 1.9892130389515207e-07, + "loss": 13.1875, + "step": 459460 + }, + { + "epoch": 0.928158469923278, + "grad_norm": 135.6160125732422, + "learning_rate": 1.9882383572418508e-07, + "loss": 8.0025, + "step": 459470 + }, + { + "epoch": 0.9281786705559618, + "grad_norm": 845.6554565429688, + "learning_rate": 1.987263909535686e-07, + "loss": 26.8887, + "step": 459480 + }, + { + "epoch": 0.9281988711886456, + "grad_norm": 530.5870361328125, + "learning_rate": 1.986289695837762e-07, + "loss": 25.8121, + "step": 459490 + }, + { + "epoch": 0.9282190718213295, + "grad_norm": 299.4232177734375, + "learning_rate": 1.9853157161528468e-07, + "loss": 17.5788, + "step": 459500 + }, + { + "epoch": 0.9282392724540133, + "grad_norm": 230.8065948486328, + "learning_rate": 1.984341970485687e-07, + "loss": 11.3624, + "step": 459510 + }, + { + "epoch": 0.9282594730866971, + "grad_norm": 250.2640838623047, + "learning_rate": 1.9833684588410062e-07, + "loss": 17.0651, + "step": 459520 + }, + { + "epoch": 0.9282796737193809, + "grad_norm": 1162.2672119140625, + "learning_rate": 1.9823951812235675e-07, + "loss": 32.3245, + "step": 459530 + }, + { + "epoch": 0.9282998743520647, + "grad_norm": 648.0301513671875, + "learning_rate": 1.981422137638117e-07, + "loss": 26.6975, + "step": 459540 + }, + { + "epoch": 0.9283200749847486, + "grad_norm": 192.74911499023438, + "learning_rate": 1.98044932808939e-07, + "loss": 17.8849, + "step": 459550 + }, + { + "epoch": 0.9283402756174324, + "grad_norm": 3.202186346054077, + "learning_rate": 1.9794767525821212e-07, + "loss": 13.659, + "step": 459560 + }, + { + "epoch": 0.9283604762501162, + "grad_norm": 437.1767883300781, + "learning_rate": 1.9785044111210627e-07, + "loss": 13.1538, + "step": 459570 + }, + { + "epoch": 0.9283806768828, + "grad_norm": 588.3291625976562, + "learning_rate": 1.977532303710955e-07, + "loss": 18.7623, + "step": 459580 + }, + { + "epoch": 0.9284008775154838, + "grad_norm": 872.955322265625, + "learning_rate": 1.9765604303565223e-07, + "loss": 22.6115, + "step": 459590 + }, + { + "epoch": 0.9284210781481677, + "grad_norm": 207.5331268310547, + "learning_rate": 1.9755887910625103e-07, + "loss": 16.759, + "step": 459600 + }, + { + "epoch": 0.9284412787808514, + "grad_norm": 433.579833984375, + "learning_rate": 1.9746173858336604e-07, + "loss": 16.0027, + "step": 459610 + }, + { + "epoch": 0.9284614794135352, + "grad_norm": 119.8254623413086, + "learning_rate": 1.9736462146747015e-07, + "loss": 15.5961, + "step": 459620 + }, + { + "epoch": 0.928481680046219, + "grad_norm": 346.081787109375, + "learning_rate": 1.972675277590358e-07, + "loss": 18.0993, + "step": 459630 + }, + { + "epoch": 0.9285018806789028, + "grad_norm": 251.54295349121094, + "learning_rate": 1.9717045745853758e-07, + "loss": 13.2372, + "step": 459640 + }, + { + "epoch": 0.9285220813115866, + "grad_norm": 453.9295654296875, + "learning_rate": 1.9707341056644737e-07, + "loss": 30.9079, + "step": 459650 + }, + { + "epoch": 0.9285422819442705, + "grad_norm": 333.4501647949219, + "learning_rate": 1.9697638708323918e-07, + "loss": 14.9811, + "step": 459660 + }, + { + "epoch": 0.9285624825769543, + "grad_norm": 230.99989318847656, + "learning_rate": 1.9687938700938602e-07, + "loss": 40.2869, + "step": 459670 + }, + { + "epoch": 0.9285826832096381, + "grad_norm": 225.25244140625, + "learning_rate": 1.967824103453597e-07, + "loss": 28.2683, + "step": 459680 + }, + { + "epoch": 0.9286028838423219, + "grad_norm": 387.0373840332031, + "learning_rate": 1.9668545709163378e-07, + "loss": 18.3746, + "step": 459690 + }, + { + "epoch": 0.9286230844750057, + "grad_norm": 612.067138671875, + "learning_rate": 1.9658852724868005e-07, + "loss": 24.9054, + "step": 459700 + }, + { + "epoch": 0.9286432851076896, + "grad_norm": 357.3955993652344, + "learning_rate": 1.9649162081697094e-07, + "loss": 24.4012, + "step": 459710 + }, + { + "epoch": 0.9286634857403734, + "grad_norm": 536.5983276367188, + "learning_rate": 1.963947377969788e-07, + "loss": 12.6208, + "step": 459720 + }, + { + "epoch": 0.9286836863730572, + "grad_norm": 267.8538513183594, + "learning_rate": 1.9629787818917722e-07, + "loss": 16.5168, + "step": 459730 + }, + { + "epoch": 0.928703887005741, + "grad_norm": 31.454442977905273, + "learning_rate": 1.9620104199403688e-07, + "loss": 19.1965, + "step": 459740 + }, + { + "epoch": 0.9287240876384248, + "grad_norm": 337.57623291015625, + "learning_rate": 1.961042292120291e-07, + "loss": 14.2129, + "step": 459750 + }, + { + "epoch": 0.9287442882711087, + "grad_norm": 220.30328369140625, + "learning_rate": 1.9600743984362792e-07, + "loss": 13.5387, + "step": 459760 + }, + { + "epoch": 0.9287644889037925, + "grad_norm": 252.01829528808594, + "learning_rate": 1.959106738893035e-07, + "loss": 10.7286, + "step": 459770 + }, + { + "epoch": 0.9287846895364763, + "grad_norm": 173.19248962402344, + "learning_rate": 1.958139313495272e-07, + "loss": 16.17, + "step": 459780 + }, + { + "epoch": 0.9288048901691601, + "grad_norm": 5.145406723022461, + "learning_rate": 1.957172122247708e-07, + "loss": 30.5028, + "step": 459790 + }, + { + "epoch": 0.9288250908018439, + "grad_norm": 237.4205780029297, + "learning_rate": 1.9562051651550784e-07, + "loss": 11.8499, + "step": 459800 + }, + { + "epoch": 0.9288452914345278, + "grad_norm": 157.3616943359375, + "learning_rate": 1.9552384422220627e-07, + "loss": 12.6332, + "step": 459810 + }, + { + "epoch": 0.9288654920672116, + "grad_norm": 286.0095520019531, + "learning_rate": 1.954271953453385e-07, + "loss": 18.3174, + "step": 459820 + }, + { + "epoch": 0.9288856926998954, + "grad_norm": 1239.24462890625, + "learning_rate": 1.953305698853769e-07, + "loss": 25.9679, + "step": 459830 + }, + { + "epoch": 0.9289058933325792, + "grad_norm": 665.3642578125, + "learning_rate": 1.9523396784279114e-07, + "loss": 23.8822, + "step": 459840 + }, + { + "epoch": 0.928926093965263, + "grad_norm": 508.5174865722656, + "learning_rate": 1.9513738921805192e-07, + "loss": 17.3589, + "step": 459850 + }, + { + "epoch": 0.9289462945979468, + "grad_norm": 316.22100830078125, + "learning_rate": 1.9504083401162999e-07, + "loss": 22.6244, + "step": 459860 + }, + { + "epoch": 0.9289664952306306, + "grad_norm": 284.92230224609375, + "learning_rate": 1.9494430222399774e-07, + "loss": 18.12, + "step": 459870 + }, + { + "epoch": 0.9289866958633144, + "grad_norm": 188.45223999023438, + "learning_rate": 1.948477938556226e-07, + "loss": 13.7546, + "step": 459880 + }, + { + "epoch": 0.9290068964959982, + "grad_norm": 294.2447814941406, + "learning_rate": 1.9475130890697691e-07, + "loss": 29.014, + "step": 459890 + }, + { + "epoch": 0.929027097128682, + "grad_norm": 181.1422882080078, + "learning_rate": 1.9465484737853092e-07, + "loss": 22.4655, + "step": 459900 + }, + { + "epoch": 0.9290472977613659, + "grad_norm": 217.6072235107422, + "learning_rate": 1.945584092707542e-07, + "loss": 14.1615, + "step": 459910 + }, + { + "epoch": 0.9290674983940497, + "grad_norm": 657.130126953125, + "learning_rate": 1.944619945841164e-07, + "loss": 11.1402, + "step": 459920 + }, + { + "epoch": 0.9290876990267335, + "grad_norm": 399.0269470214844, + "learning_rate": 1.9436560331908882e-07, + "loss": 10.5638, + "step": 459930 + }, + { + "epoch": 0.9291078996594173, + "grad_norm": 359.1075134277344, + "learning_rate": 1.9426923547614052e-07, + "loss": 12.1312, + "step": 459940 + }, + { + "epoch": 0.9291281002921011, + "grad_norm": 851.9827880859375, + "learning_rate": 1.9417289105574054e-07, + "loss": 27.9834, + "step": 459950 + }, + { + "epoch": 0.929148300924785, + "grad_norm": 248.04933166503906, + "learning_rate": 1.9407657005835967e-07, + "loss": 19.4236, + "step": 459960 + }, + { + "epoch": 0.9291685015574688, + "grad_norm": 225.03952026367188, + "learning_rate": 1.9398027248446582e-07, + "loss": 11.7057, + "step": 459970 + }, + { + "epoch": 0.9291887021901526, + "grad_norm": 383.68414306640625, + "learning_rate": 1.9388399833452974e-07, + "loss": 18.9307, + "step": 459980 + }, + { + "epoch": 0.9292089028228364, + "grad_norm": 298.77899169921875, + "learning_rate": 1.9378774760902052e-07, + "loss": 11.7764, + "step": 459990 + }, + { + "epoch": 0.9292291034555202, + "grad_norm": 239.48939514160156, + "learning_rate": 1.9369152030840553e-07, + "loss": 16.0167, + "step": 460000 + }, + { + "epoch": 0.929249304088204, + "grad_norm": 472.67279052734375, + "learning_rate": 1.9359531643315665e-07, + "loss": 19.6229, + "step": 460010 + }, + { + "epoch": 0.9292695047208879, + "grad_norm": 348.67791748046875, + "learning_rate": 1.9349913598374014e-07, + "loss": 15.113, + "step": 460020 + }, + { + "epoch": 0.9292897053535717, + "grad_norm": 697.9534301757812, + "learning_rate": 1.9340297896062676e-07, + "loss": 16.396, + "step": 460030 + }, + { + "epoch": 0.9293099059862555, + "grad_norm": 561.0146484375, + "learning_rate": 1.9330684536428335e-07, + "loss": 18.4987, + "step": 460040 + }, + { + "epoch": 0.9293301066189393, + "grad_norm": 609.9745483398438, + "learning_rate": 1.9321073519518007e-07, + "loss": 19.547, + "step": 460050 + }, + { + "epoch": 0.9293503072516232, + "grad_norm": 38.030887603759766, + "learning_rate": 1.9311464845378492e-07, + "loss": 11.6924, + "step": 460060 + }, + { + "epoch": 0.929370507884307, + "grad_norm": 112.8908462524414, + "learning_rate": 1.9301858514056527e-07, + "loss": 9.4522, + "step": 460070 + }, + { + "epoch": 0.9293907085169908, + "grad_norm": 187.12997436523438, + "learning_rate": 1.9292254525599075e-07, + "loss": 17.3489, + "step": 460080 + }, + { + "epoch": 0.9294109091496746, + "grad_norm": 392.20379638671875, + "learning_rate": 1.928265288005282e-07, + "loss": 18.9361, + "step": 460090 + }, + { + "epoch": 0.9294311097823584, + "grad_norm": 281.1587219238281, + "learning_rate": 1.927305357746462e-07, + "loss": 16.0612, + "step": 460100 + }, + { + "epoch": 0.9294513104150423, + "grad_norm": 273.9291076660156, + "learning_rate": 1.9263456617881203e-07, + "loss": 11.1165, + "step": 460110 + }, + { + "epoch": 0.929471511047726, + "grad_norm": 327.94281005859375, + "learning_rate": 1.9253862001349543e-07, + "loss": 11.9103, + "step": 460120 + }, + { + "epoch": 0.9294917116804098, + "grad_norm": 285.5328674316406, + "learning_rate": 1.9244269727916097e-07, + "loss": 10.3836, + "step": 460130 + }, + { + "epoch": 0.9295119123130936, + "grad_norm": 176.0340118408203, + "learning_rate": 1.9234679797627832e-07, + "loss": 16.9426, + "step": 460140 + }, + { + "epoch": 0.9295321129457774, + "grad_norm": 317.98980712890625, + "learning_rate": 1.9225092210531425e-07, + "loss": 19.7461, + "step": 460150 + }, + { + "epoch": 0.9295523135784612, + "grad_norm": 614.9310302734375, + "learning_rate": 1.9215506966673624e-07, + "loss": 22.1396, + "step": 460160 + }, + { + "epoch": 0.9295725142111451, + "grad_norm": 347.4130859375, + "learning_rate": 1.9205924066101057e-07, + "loss": 10.2269, + "step": 460170 + }, + { + "epoch": 0.9295927148438289, + "grad_norm": 427.9936218261719, + "learning_rate": 1.9196343508860515e-07, + "loss": 22.8617, + "step": 460180 + }, + { + "epoch": 0.9296129154765127, + "grad_norm": 386.7332458496094, + "learning_rate": 1.9186765294998855e-07, + "loss": 18.1745, + "step": 460190 + }, + { + "epoch": 0.9296331161091965, + "grad_norm": 421.6540832519531, + "learning_rate": 1.917718942456237e-07, + "loss": 15.6033, + "step": 460200 + }, + { + "epoch": 0.9296533167418803, + "grad_norm": 387.5469665527344, + "learning_rate": 1.9167615897598023e-07, + "loss": 12.7935, + "step": 460210 + }, + { + "epoch": 0.9296735173745642, + "grad_norm": 550.24853515625, + "learning_rate": 1.9158044714152447e-07, + "loss": 23.7764, + "step": 460220 + }, + { + "epoch": 0.929693718007248, + "grad_norm": 1298.991455078125, + "learning_rate": 1.914847587427221e-07, + "loss": 32.2464, + "step": 460230 + }, + { + "epoch": 0.9297139186399318, + "grad_norm": 668.077880859375, + "learning_rate": 1.9138909378003946e-07, + "loss": 17.404, + "step": 460240 + }, + { + "epoch": 0.9297341192726156, + "grad_norm": 339.6650695800781, + "learning_rate": 1.9129345225394335e-07, + "loss": 18.9665, + "step": 460250 + }, + { + "epoch": 0.9297543199052994, + "grad_norm": 773.9869384765625, + "learning_rate": 1.9119783416490013e-07, + "loss": 29.5222, + "step": 460260 + }, + { + "epoch": 0.9297745205379833, + "grad_norm": 61.7031135559082, + "learning_rate": 1.9110223951337492e-07, + "loss": 18.3, + "step": 460270 + }, + { + "epoch": 0.9297947211706671, + "grad_norm": 233.27320861816406, + "learning_rate": 1.910066682998346e-07, + "loss": 22.573, + "step": 460280 + }, + { + "epoch": 0.9298149218033509, + "grad_norm": 261.3985595703125, + "learning_rate": 1.909111205247438e-07, + "loss": 24.1993, + "step": 460290 + }, + { + "epoch": 0.9298351224360347, + "grad_norm": 256.5817565917969, + "learning_rate": 1.9081559618856938e-07, + "loss": 25.1584, + "step": 460300 + }, + { + "epoch": 0.9298553230687185, + "grad_norm": 443.31829833984375, + "learning_rate": 1.907200952917765e-07, + "loss": 22.7134, + "step": 460310 + }, + { + "epoch": 0.9298755237014024, + "grad_norm": 368.3848571777344, + "learning_rate": 1.9062461783483034e-07, + "loss": 13.8993, + "step": 460320 + }, + { + "epoch": 0.9298957243340862, + "grad_norm": 602.7681274414062, + "learning_rate": 1.9052916381819664e-07, + "loss": 21.0812, + "step": 460330 + }, + { + "epoch": 0.92991592496677, + "grad_norm": 1414.91796875, + "learning_rate": 1.904337332423406e-07, + "loss": 36.7006, + "step": 460340 + }, + { + "epoch": 0.9299361255994538, + "grad_norm": 475.8883056640625, + "learning_rate": 1.903383261077274e-07, + "loss": 28.129, + "step": 460350 + }, + { + "epoch": 0.9299563262321376, + "grad_norm": 8.253231048583984, + "learning_rate": 1.9024294241482112e-07, + "loss": 15.3869, + "step": 460360 + }, + { + "epoch": 0.9299765268648214, + "grad_norm": 116.25457000732422, + "learning_rate": 1.9014758216408803e-07, + "loss": 18.6597, + "step": 460370 + }, + { + "epoch": 0.9299967274975052, + "grad_norm": 79.83399963378906, + "learning_rate": 1.900522453559922e-07, + "loss": 16.284, + "step": 460380 + }, + { + "epoch": 0.930016928130189, + "grad_norm": 1928.6744384765625, + "learning_rate": 1.899569319909983e-07, + "loss": 35.3755, + "step": 460390 + }, + { + "epoch": 0.9300371287628728, + "grad_norm": 159.0091094970703, + "learning_rate": 1.8986164206957037e-07, + "loss": 15.8112, + "step": 460400 + }, + { + "epoch": 0.9300573293955566, + "grad_norm": 728.5242919921875, + "learning_rate": 1.897663755921747e-07, + "loss": 25.4743, + "step": 460410 + }, + { + "epoch": 0.9300775300282405, + "grad_norm": 291.23748779296875, + "learning_rate": 1.8967113255927315e-07, + "loss": 17.5053, + "step": 460420 + }, + { + "epoch": 0.9300977306609243, + "grad_norm": 305.28753662109375, + "learning_rate": 1.8957591297133093e-07, + "loss": 10.2362, + "step": 460430 + }, + { + "epoch": 0.9301179312936081, + "grad_norm": 413.1041564941406, + "learning_rate": 1.894807168288132e-07, + "loss": 17.3874, + "step": 460440 + }, + { + "epoch": 0.9301381319262919, + "grad_norm": 93.40693664550781, + "learning_rate": 1.8938554413218292e-07, + "loss": 14.6845, + "step": 460450 + }, + { + "epoch": 0.9301583325589757, + "grad_norm": 265.3533020019531, + "learning_rate": 1.8929039488190304e-07, + "loss": 10.0248, + "step": 460460 + }, + { + "epoch": 0.9301785331916596, + "grad_norm": 1233.372802734375, + "learning_rate": 1.8919526907843876e-07, + "loss": 50.5303, + "step": 460470 + }, + { + "epoch": 0.9301987338243434, + "grad_norm": 11.834299087524414, + "learning_rate": 1.8910016672225418e-07, + "loss": 14.3973, + "step": 460480 + }, + { + "epoch": 0.9302189344570272, + "grad_norm": 288.27978515625, + "learning_rate": 1.8900508781381056e-07, + "loss": 14.8687, + "step": 460490 + }, + { + "epoch": 0.930239135089711, + "grad_norm": 186.37252807617188, + "learning_rate": 1.8891003235357307e-07, + "loss": 18.5956, + "step": 460500 + }, + { + "epoch": 0.9302593357223948, + "grad_norm": 793.5770874023438, + "learning_rate": 1.8881500034200473e-07, + "loss": 15.8723, + "step": 460510 + }, + { + "epoch": 0.9302795363550787, + "grad_norm": 255.04364013671875, + "learning_rate": 1.88719991779569e-07, + "loss": 40.3052, + "step": 460520 + }, + { + "epoch": 0.9302997369877625, + "grad_norm": 5.66730260848999, + "learning_rate": 1.8862500666672778e-07, + "loss": 9.9851, + "step": 460530 + }, + { + "epoch": 0.9303199376204463, + "grad_norm": 385.5716552734375, + "learning_rate": 1.8853004500394512e-07, + "loss": 20.0818, + "step": 460540 + }, + { + "epoch": 0.9303401382531301, + "grad_norm": 10.681656837463379, + "learning_rate": 1.8843510679168341e-07, + "loss": 13.3622, + "step": 460550 + }, + { + "epoch": 0.9303603388858139, + "grad_norm": 0.04340120032429695, + "learning_rate": 1.883401920304051e-07, + "loss": 10.5599, + "step": 460560 + }, + { + "epoch": 0.9303805395184978, + "grad_norm": 323.4036560058594, + "learning_rate": 1.8824530072057369e-07, + "loss": 25.3733, + "step": 460570 + }, + { + "epoch": 0.9304007401511816, + "grad_norm": 161.68948364257812, + "learning_rate": 1.8815043286265044e-07, + "loss": 17.8008, + "step": 460580 + }, + { + "epoch": 0.9304209407838654, + "grad_norm": 98.37507629394531, + "learning_rate": 1.8805558845709894e-07, + "loss": 10.1326, + "step": 460590 + }, + { + "epoch": 0.9304411414165492, + "grad_norm": 1050.3140869140625, + "learning_rate": 1.8796076750438096e-07, + "loss": 29.5644, + "step": 460600 + }, + { + "epoch": 0.930461342049233, + "grad_norm": 4.681258201599121, + "learning_rate": 1.878659700049579e-07, + "loss": 21.6032, + "step": 460610 + }, + { + "epoch": 0.9304815426819169, + "grad_norm": 902.1006469726562, + "learning_rate": 1.8777119595929315e-07, + "loss": 15.341, + "step": 460620 + }, + { + "epoch": 0.9305017433146006, + "grad_norm": 584.0853271484375, + "learning_rate": 1.8767644536784703e-07, + "loss": 16.4857, + "step": 460630 + }, + { + "epoch": 0.9305219439472844, + "grad_norm": 605.7371826171875, + "learning_rate": 1.8758171823108295e-07, + "loss": 13.6837, + "step": 460640 + }, + { + "epoch": 0.9305421445799682, + "grad_norm": 346.3061828613281, + "learning_rate": 1.874870145494617e-07, + "loss": 24.9194, + "step": 460650 + }, + { + "epoch": 0.930562345212652, + "grad_norm": 471.7580261230469, + "learning_rate": 1.8739233432344518e-07, + "loss": 18.2733, + "step": 460660 + }, + { + "epoch": 0.9305825458453358, + "grad_norm": 226.48004150390625, + "learning_rate": 1.8729767755349514e-07, + "loss": 18.7975, + "step": 460670 + }, + { + "epoch": 0.9306027464780197, + "grad_norm": 829.1776733398438, + "learning_rate": 1.872030442400713e-07, + "loss": 28.4234, + "step": 460680 + }, + { + "epoch": 0.9306229471107035, + "grad_norm": 331.8815612792969, + "learning_rate": 1.8710843438363713e-07, + "loss": 36.9377, + "step": 460690 + }, + { + "epoch": 0.9306431477433873, + "grad_norm": 0.0, + "learning_rate": 1.8701384798465284e-07, + "loss": 13.0251, + "step": 460700 + }, + { + "epoch": 0.9306633483760711, + "grad_norm": 418.47802734375, + "learning_rate": 1.8691928504357858e-07, + "loss": 25.0024, + "step": 460710 + }, + { + "epoch": 0.9306835490087549, + "grad_norm": 308.30731201171875, + "learning_rate": 1.868247455608757e-07, + "loss": 10.4745, + "step": 460720 + }, + { + "epoch": 0.9307037496414388, + "grad_norm": 447.0386657714844, + "learning_rate": 1.867302295370066e-07, + "loss": 15.2201, + "step": 460730 + }, + { + "epoch": 0.9307239502741226, + "grad_norm": 355.3406677246094, + "learning_rate": 1.8663573697242977e-07, + "loss": 15.954, + "step": 460740 + }, + { + "epoch": 0.9307441509068064, + "grad_norm": 252.26039123535156, + "learning_rate": 1.8654126786760597e-07, + "loss": 37.0269, + "step": 460750 + }, + { + "epoch": 0.9307643515394902, + "grad_norm": 0.0, + "learning_rate": 1.8644682222299703e-07, + "loss": 9.7798, + "step": 460760 + }, + { + "epoch": 0.930784552172174, + "grad_norm": 65.33013153076172, + "learning_rate": 1.8635240003906264e-07, + "loss": 14.7183, + "step": 460770 + }, + { + "epoch": 0.9308047528048579, + "grad_norm": 246.04112243652344, + "learning_rate": 1.8625800131626236e-07, + "loss": 32.6438, + "step": 460780 + }, + { + "epoch": 0.9308249534375417, + "grad_norm": 92.7275161743164, + "learning_rate": 1.8616362605505645e-07, + "loss": 8.0405, + "step": 460790 + }, + { + "epoch": 0.9308451540702255, + "grad_norm": 412.2235107421875, + "learning_rate": 1.8606927425590616e-07, + "loss": 19.9027, + "step": 460800 + }, + { + "epoch": 0.9308653547029093, + "grad_norm": 122.2205810546875, + "learning_rate": 1.8597494591926946e-07, + "loss": 15.9204, + "step": 460810 + }, + { + "epoch": 0.9308855553355931, + "grad_norm": 530.8219604492188, + "learning_rate": 1.858806410456071e-07, + "loss": 16.7315, + "step": 460820 + }, + { + "epoch": 0.930905755968277, + "grad_norm": 323.177001953125, + "learning_rate": 1.8578635963537926e-07, + "loss": 13.0048, + "step": 460830 + }, + { + "epoch": 0.9309259566009608, + "grad_norm": 388.21942138671875, + "learning_rate": 1.856921016890445e-07, + "loss": 20.7814, + "step": 460840 + }, + { + "epoch": 0.9309461572336446, + "grad_norm": 462.2232971191406, + "learning_rate": 1.8559786720706185e-07, + "loss": 16.8715, + "step": 460850 + }, + { + "epoch": 0.9309663578663284, + "grad_norm": 595.4361572265625, + "learning_rate": 1.8550365618989207e-07, + "loss": 16.301, + "step": 460860 + }, + { + "epoch": 0.9309865584990122, + "grad_norm": 652.5562744140625, + "learning_rate": 1.854094686379937e-07, + "loss": 18.9976, + "step": 460870 + }, + { + "epoch": 0.9310067591316961, + "grad_norm": 611.486328125, + "learning_rate": 1.8531530455182522e-07, + "loss": 14.5472, + "step": 460880 + }, + { + "epoch": 0.9310269597643798, + "grad_norm": 11.874302864074707, + "learning_rate": 1.852211639318463e-07, + "loss": 8.2873, + "step": 460890 + }, + { + "epoch": 0.9310471603970636, + "grad_norm": 143.15005493164062, + "learning_rate": 1.8512704677851489e-07, + "loss": 8.2499, + "step": 460900 + }, + { + "epoch": 0.9310673610297474, + "grad_norm": 231.83935546875, + "learning_rate": 1.8503295309229065e-07, + "loss": 10.2527, + "step": 460910 + }, + { + "epoch": 0.9310875616624312, + "grad_norm": 354.39215087890625, + "learning_rate": 1.8493888287363148e-07, + "loss": 17.5189, + "step": 460920 + }, + { + "epoch": 0.931107762295115, + "grad_norm": 64.90702056884766, + "learning_rate": 1.8484483612299654e-07, + "loss": 23.2972, + "step": 460930 + }, + { + "epoch": 0.9311279629277989, + "grad_norm": 9.676826477050781, + "learning_rate": 1.8475081284084428e-07, + "loss": 13.254, + "step": 460940 + }, + { + "epoch": 0.9311481635604827, + "grad_norm": 393.5041809082031, + "learning_rate": 1.846568130276316e-07, + "loss": 20.8727, + "step": 460950 + }, + { + "epoch": 0.9311683641931665, + "grad_norm": 633.0675659179688, + "learning_rate": 1.8456283668381814e-07, + "loss": 15.9627, + "step": 460960 + }, + { + "epoch": 0.9311885648258503, + "grad_norm": 109.91011047363281, + "learning_rate": 1.8446888380986128e-07, + "loss": 25.1295, + "step": 460970 + }, + { + "epoch": 0.9312087654585341, + "grad_norm": 222.28253173828125, + "learning_rate": 1.84374954406219e-07, + "loss": 21.0881, + "step": 460980 + }, + { + "epoch": 0.931228966091218, + "grad_norm": 410.1101989746094, + "learning_rate": 1.8428104847334927e-07, + "loss": 8.0959, + "step": 460990 + }, + { + "epoch": 0.9312491667239018, + "grad_norm": 428.9569396972656, + "learning_rate": 1.841871660117095e-07, + "loss": 14.7619, + "step": 461000 + }, + { + "epoch": 0.9312693673565856, + "grad_norm": 594.7449340820312, + "learning_rate": 1.8409330702175764e-07, + "loss": 16.7418, + "step": 461010 + }, + { + "epoch": 0.9312895679892694, + "grad_norm": 154.09713745117188, + "learning_rate": 1.8399947150395058e-07, + "loss": 25.5965, + "step": 461020 + }, + { + "epoch": 0.9313097686219532, + "grad_norm": 237.8426055908203, + "learning_rate": 1.8390565945874572e-07, + "loss": 19.8066, + "step": 461030 + }, + { + "epoch": 0.9313299692546371, + "grad_norm": 260.109130859375, + "learning_rate": 1.8381187088660046e-07, + "loss": 18.0444, + "step": 461040 + }, + { + "epoch": 0.9313501698873209, + "grad_norm": 103.0996322631836, + "learning_rate": 1.8371810578797277e-07, + "loss": 12.4614, + "step": 461050 + }, + { + "epoch": 0.9313703705200047, + "grad_norm": 20.84394645690918, + "learning_rate": 1.8362436416331896e-07, + "loss": 12.9066, + "step": 461060 + }, + { + "epoch": 0.9313905711526885, + "grad_norm": 246.29661560058594, + "learning_rate": 1.8353064601309533e-07, + "loss": 12.1289, + "step": 461070 + }, + { + "epoch": 0.9314107717853723, + "grad_norm": 95.33872985839844, + "learning_rate": 1.8343695133775874e-07, + "loss": 25.977, + "step": 461080 + }, + { + "epoch": 0.9314309724180562, + "grad_norm": 189.6843719482422, + "learning_rate": 1.833432801377677e-07, + "loss": 17.1384, + "step": 461090 + }, + { + "epoch": 0.93145117305074, + "grad_norm": 294.8585205078125, + "learning_rate": 1.832496324135763e-07, + "loss": 14.8602, + "step": 461100 + }, + { + "epoch": 0.9314713736834238, + "grad_norm": 403.48773193359375, + "learning_rate": 1.8315600816564137e-07, + "loss": 21.2338, + "step": 461110 + }, + { + "epoch": 0.9314915743161076, + "grad_norm": 142.48876953125, + "learning_rate": 1.8306240739442094e-07, + "loss": 19.6496, + "step": 461120 + }, + { + "epoch": 0.9315117749487914, + "grad_norm": 399.75579833984375, + "learning_rate": 1.8296883010037014e-07, + "loss": 17.4055, + "step": 461130 + }, + { + "epoch": 0.9315319755814752, + "grad_norm": 204.31796264648438, + "learning_rate": 1.8287527628394418e-07, + "loss": 10.361, + "step": 461140 + }, + { + "epoch": 0.931552176214159, + "grad_norm": 174.668701171875, + "learning_rate": 1.827817459456005e-07, + "loss": 11.47, + "step": 461150 + }, + { + "epoch": 0.9315723768468428, + "grad_norm": 326.95880126953125, + "learning_rate": 1.826882390857948e-07, + "loss": 18.0679, + "step": 461160 + }, + { + "epoch": 0.9315925774795266, + "grad_norm": 178.13429260253906, + "learning_rate": 1.825947557049812e-07, + "loss": 22.8237, + "step": 461170 + }, + { + "epoch": 0.9316127781122104, + "grad_norm": 647.4390258789062, + "learning_rate": 1.825012958036171e-07, + "loss": 37.3266, + "step": 461180 + }, + { + "epoch": 0.9316329787448943, + "grad_norm": 589.9456176757812, + "learning_rate": 1.824078593821571e-07, + "loss": 16.358, + "step": 461190 + }, + { + "epoch": 0.9316531793775781, + "grad_norm": 18.983449935913086, + "learning_rate": 1.8231444644105755e-07, + "loss": 16.9788, + "step": 461200 + }, + { + "epoch": 0.9316733800102619, + "grad_norm": 755.7958374023438, + "learning_rate": 1.8222105698077253e-07, + "loss": 25.4583, + "step": 461210 + }, + { + "epoch": 0.9316935806429457, + "grad_norm": 346.2664794921875, + "learning_rate": 1.8212769100175774e-07, + "loss": 24.498, + "step": 461220 + }, + { + "epoch": 0.9317137812756295, + "grad_norm": 390.356201171875, + "learning_rate": 1.8203434850446844e-07, + "loss": 22.6047, + "step": 461230 + }, + { + "epoch": 0.9317339819083134, + "grad_norm": 265.6792297363281, + "learning_rate": 1.819410294893592e-07, + "loss": 29.0851, + "step": 461240 + }, + { + "epoch": 0.9317541825409972, + "grad_norm": 3.6130383014678955, + "learning_rate": 1.8184773395688527e-07, + "loss": 10.6757, + "step": 461250 + }, + { + "epoch": 0.931774383173681, + "grad_norm": 434.55145263671875, + "learning_rate": 1.8175446190750068e-07, + "loss": 17.189, + "step": 461260 + }, + { + "epoch": 0.9317945838063648, + "grad_norm": 693.389404296875, + "learning_rate": 1.816612133416612e-07, + "loss": 14.9552, + "step": 461270 + }, + { + "epoch": 0.9318147844390486, + "grad_norm": 259.1981506347656, + "learning_rate": 1.8156798825982035e-07, + "loss": 9.5073, + "step": 461280 + }, + { + "epoch": 0.9318349850717325, + "grad_norm": 193.0907745361328, + "learning_rate": 1.814747866624317e-07, + "loss": 17.5511, + "step": 461290 + }, + { + "epoch": 0.9318551857044163, + "grad_norm": 376.73968505859375, + "learning_rate": 1.8138160854995145e-07, + "loss": 25.6786, + "step": 461300 + }, + { + "epoch": 0.9318753863371001, + "grad_norm": 282.88330078125, + "learning_rate": 1.8128845392283324e-07, + "loss": 14.7452, + "step": 461310 + }, + { + "epoch": 0.9318955869697839, + "grad_norm": 447.7366027832031, + "learning_rate": 1.8119532278152996e-07, + "loss": 33.4463, + "step": 461320 + }, + { + "epoch": 0.9319157876024677, + "grad_norm": 370.9266662597656, + "learning_rate": 1.8110221512649573e-07, + "loss": 13.9933, + "step": 461330 + }, + { + "epoch": 0.9319359882351516, + "grad_norm": 500.27410888671875, + "learning_rate": 1.8100913095818627e-07, + "loss": 19.469, + "step": 461340 + }, + { + "epoch": 0.9319561888678354, + "grad_norm": 306.4193115234375, + "learning_rate": 1.8091607027705293e-07, + "loss": 15.5763, + "step": 461350 + }, + { + "epoch": 0.9319763895005192, + "grad_norm": 447.197265625, + "learning_rate": 1.8082303308354976e-07, + "loss": 19.6914, + "step": 461360 + }, + { + "epoch": 0.931996590133203, + "grad_norm": 125.23076629638672, + "learning_rate": 1.8073001937813138e-07, + "loss": 16.8651, + "step": 461370 + }, + { + "epoch": 0.9320167907658868, + "grad_norm": 91.94493865966797, + "learning_rate": 1.8063702916125025e-07, + "loss": 9.5325, + "step": 461380 + }, + { + "epoch": 0.9320369913985707, + "grad_norm": 202.62930297851562, + "learning_rate": 1.805440624333593e-07, + "loss": 26.8626, + "step": 461390 + }, + { + "epoch": 0.9320571920312544, + "grad_norm": 514.9032592773438, + "learning_rate": 1.804511191949121e-07, + "loss": 21.1308, + "step": 461400 + }, + { + "epoch": 0.9320773926639382, + "grad_norm": 172.77285766601562, + "learning_rate": 1.8035819944636269e-07, + "loss": 25.8753, + "step": 461410 + }, + { + "epoch": 0.932097593296622, + "grad_norm": 289.017822265625, + "learning_rate": 1.8026530318816183e-07, + "loss": 17.1092, + "step": 461420 + }, + { + "epoch": 0.9321177939293058, + "grad_norm": 6.599430561065674, + "learning_rate": 1.8017243042076304e-07, + "loss": 18.2395, + "step": 461430 + }, + { + "epoch": 0.9321379945619896, + "grad_norm": 280.7936706542969, + "learning_rate": 1.8007958114462042e-07, + "loss": 13.1732, + "step": 461440 + }, + { + "epoch": 0.9321581951946735, + "grad_norm": 894.9324340820312, + "learning_rate": 1.7998675536018474e-07, + "loss": 21.2707, + "step": 461450 + }, + { + "epoch": 0.9321783958273573, + "grad_norm": 80.70378112792969, + "learning_rate": 1.7989395306790835e-07, + "loss": 43.2938, + "step": 461460 + }, + { + "epoch": 0.9321985964600411, + "grad_norm": 301.1913757324219, + "learning_rate": 1.798011742682454e-07, + "loss": 18.9873, + "step": 461470 + }, + { + "epoch": 0.9322187970927249, + "grad_norm": 164.97093200683594, + "learning_rate": 1.7970841896164658e-07, + "loss": 27.1778, + "step": 461480 + }, + { + "epoch": 0.9322389977254087, + "grad_norm": 172.13999938964844, + "learning_rate": 1.7961568714856382e-07, + "loss": 22.4734, + "step": 461490 + }, + { + "epoch": 0.9322591983580926, + "grad_norm": 228.5173797607422, + "learning_rate": 1.7952297882945e-07, + "loss": 35.3497, + "step": 461500 + }, + { + "epoch": 0.9322793989907764, + "grad_norm": 513.8005981445312, + "learning_rate": 1.7943029400475598e-07, + "loss": 19.494, + "step": 461510 + }, + { + "epoch": 0.9322995996234602, + "grad_norm": 236.8550567626953, + "learning_rate": 1.7933763267493465e-07, + "loss": 11.9686, + "step": 461520 + }, + { + "epoch": 0.932319800256144, + "grad_norm": 3.990077018737793, + "learning_rate": 1.7924499484043622e-07, + "loss": 15.6638, + "step": 461530 + }, + { + "epoch": 0.9323400008888278, + "grad_norm": 267.17132568359375, + "learning_rate": 1.7915238050171367e-07, + "loss": 19.3243, + "step": 461540 + }, + { + "epoch": 0.9323602015215117, + "grad_norm": 175.84994506835938, + "learning_rate": 1.7905978965921778e-07, + "loss": 23.0182, + "step": 461550 + }, + { + "epoch": 0.9323804021541955, + "grad_norm": 455.0528259277344, + "learning_rate": 1.7896722231339925e-07, + "loss": 21.7424, + "step": 461560 + }, + { + "epoch": 0.9324006027868793, + "grad_norm": 239.96661376953125, + "learning_rate": 1.788746784647105e-07, + "loss": 13.5123, + "step": 461570 + }, + { + "epoch": 0.9324208034195631, + "grad_norm": 418.77294921875, + "learning_rate": 1.7878215811360068e-07, + "loss": 17.4203, + "step": 461580 + }, + { + "epoch": 0.932441004052247, + "grad_norm": 564.48095703125, + "learning_rate": 1.7868966126052323e-07, + "loss": 9.9037, + "step": 461590 + }, + { + "epoch": 0.9324612046849308, + "grad_norm": 132.60113525390625, + "learning_rate": 1.785971879059273e-07, + "loss": 18.6507, + "step": 461600 + }, + { + "epoch": 0.9324814053176146, + "grad_norm": 358.73638916015625, + "learning_rate": 1.7850473805026304e-07, + "loss": 22.1932, + "step": 461610 + }, + { + "epoch": 0.9325016059502984, + "grad_norm": 105.70510864257812, + "learning_rate": 1.7841231169398287e-07, + "loss": 11.7789, + "step": 461620 + }, + { + "epoch": 0.9325218065829822, + "grad_norm": 426.2632751464844, + "learning_rate": 1.7831990883753592e-07, + "loss": 16.3437, + "step": 461630 + }, + { + "epoch": 0.932542007215666, + "grad_norm": 410.3416748046875, + "learning_rate": 1.7822752948137289e-07, + "loss": 16.3283, + "step": 461640 + }, + { + "epoch": 0.9325622078483498, + "grad_norm": 988.7979736328125, + "learning_rate": 1.7813517362594347e-07, + "loss": 26.9564, + "step": 461650 + }, + { + "epoch": 0.9325824084810336, + "grad_norm": 334.42529296875, + "learning_rate": 1.7804284127169946e-07, + "loss": 8.4331, + "step": 461660 + }, + { + "epoch": 0.9326026091137174, + "grad_norm": 243.21121215820312, + "learning_rate": 1.7795053241908943e-07, + "loss": 10.7253, + "step": 461670 + }, + { + "epoch": 0.9326228097464012, + "grad_norm": 570.7203369140625, + "learning_rate": 1.7785824706856303e-07, + "loss": 12.1912, + "step": 461680 + }, + { + "epoch": 0.932643010379085, + "grad_norm": 534.68994140625, + "learning_rate": 1.7776598522057154e-07, + "loss": 27.0854, + "step": 461690 + }, + { + "epoch": 0.9326632110117689, + "grad_norm": 371.4281921386719, + "learning_rate": 1.7767374687556405e-07, + "loss": 31.2355, + "step": 461700 + }, + { + "epoch": 0.9326834116444527, + "grad_norm": 163.0079803466797, + "learning_rate": 1.7758153203398853e-07, + "loss": 26.3914, + "step": 461710 + }, + { + "epoch": 0.9327036122771365, + "grad_norm": 99.2337875366211, + "learning_rate": 1.774893406962963e-07, + "loss": 22.1929, + "step": 461720 + }, + { + "epoch": 0.9327238129098203, + "grad_norm": 719.2616577148438, + "learning_rate": 1.7739717286293644e-07, + "loss": 15.3715, + "step": 461730 + }, + { + "epoch": 0.9327440135425041, + "grad_norm": 360.2846374511719, + "learning_rate": 1.7730502853435805e-07, + "loss": 13.2564, + "step": 461740 + }, + { + "epoch": 0.932764214175188, + "grad_norm": 130.13404846191406, + "learning_rate": 1.7721290771100964e-07, + "loss": 11.8033, + "step": 461750 + }, + { + "epoch": 0.9327844148078718, + "grad_norm": 130.27134704589844, + "learning_rate": 1.7712081039334083e-07, + "loss": 8.8699, + "step": 461760 + }, + { + "epoch": 0.9328046154405556, + "grad_norm": 40.666603088378906, + "learning_rate": 1.770287365818002e-07, + "loss": 11.4024, + "step": 461770 + }, + { + "epoch": 0.9328248160732394, + "grad_norm": 350.0940856933594, + "learning_rate": 1.7693668627683625e-07, + "loss": 10.8917, + "step": 461780 + }, + { + "epoch": 0.9328450167059232, + "grad_norm": 260.5531921386719, + "learning_rate": 1.7684465947889806e-07, + "loss": 14.8456, + "step": 461790 + }, + { + "epoch": 0.9328652173386071, + "grad_norm": 369.9488525390625, + "learning_rate": 1.7675265618843361e-07, + "loss": 11.4153, + "step": 461800 + }, + { + "epoch": 0.9328854179712909, + "grad_norm": 265.1435241699219, + "learning_rate": 1.7666067640589256e-07, + "loss": 31.7913, + "step": 461810 + }, + { + "epoch": 0.9329056186039747, + "grad_norm": 607.055419921875, + "learning_rate": 1.7656872013172176e-07, + "loss": 19.5087, + "step": 461820 + }, + { + "epoch": 0.9329258192366585, + "grad_norm": 319.4716796875, + "learning_rate": 1.764767873663703e-07, + "loss": 37.0232, + "step": 461830 + }, + { + "epoch": 0.9329460198693423, + "grad_norm": 467.063232421875, + "learning_rate": 1.7638487811028616e-07, + "loss": 17.9041, + "step": 461840 + }, + { + "epoch": 0.9329662205020262, + "grad_norm": 175.1415557861328, + "learning_rate": 1.7629299236391616e-07, + "loss": 15.5899, + "step": 461850 + }, + { + "epoch": 0.93298642113471, + "grad_norm": 1434.3819580078125, + "learning_rate": 1.7620113012771002e-07, + "loss": 23.1167, + "step": 461860 + }, + { + "epoch": 0.9330066217673938, + "grad_norm": 619.7500610351562, + "learning_rate": 1.7610929140211397e-07, + "loss": 27.0966, + "step": 461870 + }, + { + "epoch": 0.9330268224000776, + "grad_norm": 457.38543701171875, + "learning_rate": 1.760174761875766e-07, + "loss": 10.2659, + "step": 461880 + }, + { + "epoch": 0.9330470230327614, + "grad_norm": 312.8133544921875, + "learning_rate": 1.7592568448454528e-07, + "loss": 24.0141, + "step": 461890 + }, + { + "epoch": 0.9330672236654453, + "grad_norm": 113.47441101074219, + "learning_rate": 1.758339162934658e-07, + "loss": 21.0164, + "step": 461900 + }, + { + "epoch": 0.933087424298129, + "grad_norm": 468.087646484375, + "learning_rate": 1.757421716147878e-07, + "loss": 21.749, + "step": 461910 + }, + { + "epoch": 0.9331076249308128, + "grad_norm": 730.168212890625, + "learning_rate": 1.7565045044895756e-07, + "loss": 20.517, + "step": 461920 + }, + { + "epoch": 0.9331278255634966, + "grad_norm": 459.88446044921875, + "learning_rate": 1.7555875279642087e-07, + "loss": 16.8315, + "step": 461930 + }, + { + "epoch": 0.9331480261961804, + "grad_norm": 299.3412780761719, + "learning_rate": 1.754670786576257e-07, + "loss": 11.309, + "step": 461940 + }, + { + "epoch": 0.9331682268288642, + "grad_norm": 255.61477661132812, + "learning_rate": 1.7537542803302e-07, + "loss": 8.1569, + "step": 461950 + }, + { + "epoch": 0.9331884274615481, + "grad_norm": 303.9642028808594, + "learning_rate": 1.7528380092304842e-07, + "loss": 26.4091, + "step": 461960 + }, + { + "epoch": 0.9332086280942319, + "grad_norm": 880.2850341796875, + "learning_rate": 1.751921973281584e-07, + "loss": 30.5622, + "step": 461970 + }, + { + "epoch": 0.9332288287269157, + "grad_norm": 446.8388671875, + "learning_rate": 1.7510061724879678e-07, + "loss": 17.2565, + "step": 461980 + }, + { + "epoch": 0.9332490293595995, + "grad_norm": 195.17286682128906, + "learning_rate": 1.750090606854099e-07, + "loss": 22.5183, + "step": 461990 + }, + { + "epoch": 0.9332692299922833, + "grad_norm": 234.3126983642578, + "learning_rate": 1.7491752763844294e-07, + "loss": 14.1387, + "step": 462000 + }, + { + "epoch": 0.9332894306249672, + "grad_norm": 184.8792724609375, + "learning_rate": 1.7482601810834276e-07, + "loss": 12.4313, + "step": 462010 + }, + { + "epoch": 0.933309631257651, + "grad_norm": 96.02690124511719, + "learning_rate": 1.7473453209555625e-07, + "loss": 14.9553, + "step": 462020 + }, + { + "epoch": 0.9333298318903348, + "grad_norm": 613.595947265625, + "learning_rate": 1.7464306960052746e-07, + "loss": 14.9738, + "step": 462030 + }, + { + "epoch": 0.9333500325230186, + "grad_norm": 386.20684814453125, + "learning_rate": 1.7455163062370273e-07, + "loss": 11.3136, + "step": 462040 + }, + { + "epoch": 0.9333702331557024, + "grad_norm": 408.2582702636719, + "learning_rate": 1.744602151655289e-07, + "loss": 18.7055, + "step": 462050 + }, + { + "epoch": 0.9333904337883863, + "grad_norm": 124.63929748535156, + "learning_rate": 1.743688232264512e-07, + "loss": 8.8279, + "step": 462060 + }, + { + "epoch": 0.9334106344210701, + "grad_norm": 418.02581787109375, + "learning_rate": 1.742774548069137e-07, + "loss": 12.0346, + "step": 462070 + }, + { + "epoch": 0.9334308350537539, + "grad_norm": 400.3048400878906, + "learning_rate": 1.7418610990736273e-07, + "loss": 16.3988, + "step": 462080 + }, + { + "epoch": 0.9334510356864377, + "grad_norm": 161.02871704101562, + "learning_rate": 1.7409478852824402e-07, + "loss": 30.2902, + "step": 462090 + }, + { + "epoch": 0.9334712363191215, + "grad_norm": 321.67498779296875, + "learning_rate": 1.740034906700011e-07, + "loss": 14.7172, + "step": 462100 + }, + { + "epoch": 0.9334914369518054, + "grad_norm": 422.029296875, + "learning_rate": 1.7391221633308032e-07, + "loss": 18.4577, + "step": 462110 + }, + { + "epoch": 0.9335116375844892, + "grad_norm": 616.5631103515625, + "learning_rate": 1.7382096551792572e-07, + "loss": 12.381, + "step": 462120 + }, + { + "epoch": 0.933531838217173, + "grad_norm": 594.563720703125, + "learning_rate": 1.7372973822498252e-07, + "loss": 16.8806, + "step": 462130 + }, + { + "epoch": 0.9335520388498568, + "grad_norm": 6.52410364151001, + "learning_rate": 1.7363853445469482e-07, + "loss": 16.316, + "step": 462140 + }, + { + "epoch": 0.9335722394825406, + "grad_norm": 363.2400817871094, + "learning_rate": 1.7354735420750835e-07, + "loss": 17.689, + "step": 462150 + }, + { + "epoch": 0.9335924401152244, + "grad_norm": 481.3193359375, + "learning_rate": 1.7345619748386666e-07, + "loss": 22.4928, + "step": 462160 + }, + { + "epoch": 0.9336126407479082, + "grad_norm": 216.18240356445312, + "learning_rate": 1.733650642842133e-07, + "loss": 15.536, + "step": 462170 + }, + { + "epoch": 0.933632841380592, + "grad_norm": 661.4515991210938, + "learning_rate": 1.73273954608994e-07, + "loss": 27.2659, + "step": 462180 + }, + { + "epoch": 0.9336530420132758, + "grad_norm": 69.99722290039062, + "learning_rate": 1.7318286845865174e-07, + "loss": 25.4236, + "step": 462190 + }, + { + "epoch": 0.9336732426459596, + "grad_norm": 110.41122436523438, + "learning_rate": 1.7309180583363062e-07, + "loss": 21.1707, + "step": 462200 + }, + { + "epoch": 0.9336934432786435, + "grad_norm": 328.7599792480469, + "learning_rate": 1.7300076673437526e-07, + "loss": 9.7369, + "step": 462210 + }, + { + "epoch": 0.9337136439113273, + "grad_norm": 602.4974975585938, + "learning_rate": 1.7290975116132756e-07, + "loss": 24.9123, + "step": 462220 + }, + { + "epoch": 0.9337338445440111, + "grad_norm": 209.95286560058594, + "learning_rate": 1.728187591149333e-07, + "loss": 12.9717, + "step": 462230 + }, + { + "epoch": 0.9337540451766949, + "grad_norm": 150.35626220703125, + "learning_rate": 1.7272779059563483e-07, + "loss": 22.0588, + "step": 462240 + }, + { + "epoch": 0.9337742458093787, + "grad_norm": 237.76243591308594, + "learning_rate": 1.7263684560387518e-07, + "loss": 19.1027, + "step": 462250 + }, + { + "epoch": 0.9337944464420626, + "grad_norm": 186.2238311767578, + "learning_rate": 1.7254592414009785e-07, + "loss": 16.0267, + "step": 462260 + }, + { + "epoch": 0.9338146470747464, + "grad_norm": 1390.9752197265625, + "learning_rate": 1.7245502620474643e-07, + "loss": 21.0884, + "step": 462270 + }, + { + "epoch": 0.9338348477074302, + "grad_norm": 373.25543212890625, + "learning_rate": 1.7236415179826438e-07, + "loss": 20.0606, + "step": 462280 + }, + { + "epoch": 0.933855048340114, + "grad_norm": 115.53531646728516, + "learning_rate": 1.7227330092109306e-07, + "loss": 18.2338, + "step": 462290 + }, + { + "epoch": 0.9338752489727978, + "grad_norm": 155.42364501953125, + "learning_rate": 1.7218247357367656e-07, + "loss": 15.553, + "step": 462300 + }, + { + "epoch": 0.9338954496054817, + "grad_norm": 14.017396926879883, + "learning_rate": 1.720916697564573e-07, + "loss": 8.9427, + "step": 462310 + }, + { + "epoch": 0.9339156502381655, + "grad_norm": 613.1043701171875, + "learning_rate": 1.7200088946987713e-07, + "loss": 29.1203, + "step": 462320 + }, + { + "epoch": 0.9339358508708493, + "grad_norm": 1064.6912841796875, + "learning_rate": 1.7191013271437908e-07, + "loss": 23.2569, + "step": 462330 + }, + { + "epoch": 0.9339560515035331, + "grad_norm": 644.6098022460938, + "learning_rate": 1.7181939949040606e-07, + "loss": 17.197, + "step": 462340 + }, + { + "epoch": 0.9339762521362169, + "grad_norm": 735.906982421875, + "learning_rate": 1.717286897983994e-07, + "loss": 27.4059, + "step": 462350 + }, + { + "epoch": 0.9339964527689008, + "grad_norm": 194.6940155029297, + "learning_rate": 1.7163800363880102e-07, + "loss": 8.4069, + "step": 462360 + }, + { + "epoch": 0.9340166534015846, + "grad_norm": 446.6946105957031, + "learning_rate": 1.715473410120544e-07, + "loss": 28.9146, + "step": 462370 + }, + { + "epoch": 0.9340368540342684, + "grad_norm": 454.2542419433594, + "learning_rate": 1.7145670191859977e-07, + "loss": 12.4825, + "step": 462380 + }, + { + "epoch": 0.9340570546669522, + "grad_norm": 210.14657592773438, + "learning_rate": 1.7136608635887952e-07, + "loss": 18.0583, + "step": 462390 + }, + { + "epoch": 0.934077255299636, + "grad_norm": 1262.3385009765625, + "learning_rate": 1.7127549433333557e-07, + "loss": 26.256, + "step": 462400 + }, + { + "epoch": 0.9340974559323199, + "grad_norm": 194.8001251220703, + "learning_rate": 1.7118492584240865e-07, + "loss": 15.3081, + "step": 462410 + }, + { + "epoch": 0.9341176565650036, + "grad_norm": 210.71812438964844, + "learning_rate": 1.7109438088654173e-07, + "loss": 14.9052, + "step": 462420 + }, + { + "epoch": 0.9341378571976874, + "grad_norm": 427.1445617675781, + "learning_rate": 1.7100385946617393e-07, + "loss": 20.4259, + "step": 462430 + }, + { + "epoch": 0.9341580578303712, + "grad_norm": 1009.92626953125, + "learning_rate": 1.7091336158174877e-07, + "loss": 22.6844, + "step": 462440 + }, + { + "epoch": 0.934178258463055, + "grad_norm": 234.84991455078125, + "learning_rate": 1.7082288723370587e-07, + "loss": 4.9438, + "step": 462450 + }, + { + "epoch": 0.9341984590957388, + "grad_norm": 272.6453857421875, + "learning_rate": 1.7073243642248605e-07, + "loss": 16.662, + "step": 462460 + }, + { + "epoch": 0.9342186597284227, + "grad_norm": 554.6612548828125, + "learning_rate": 1.7064200914853112e-07, + "loss": 26.6758, + "step": 462470 + }, + { + "epoch": 0.9342388603611065, + "grad_norm": 171.58334350585938, + "learning_rate": 1.7055160541228077e-07, + "loss": 7.0108, + "step": 462480 + }, + { + "epoch": 0.9342590609937903, + "grad_norm": 761.4239501953125, + "learning_rate": 1.7046122521417686e-07, + "loss": 10.5315, + "step": 462490 + }, + { + "epoch": 0.9342792616264741, + "grad_norm": 279.9809875488281, + "learning_rate": 1.7037086855465902e-07, + "loss": 29.0353, + "step": 462500 + }, + { + "epoch": 0.9342994622591579, + "grad_norm": 130.5390625, + "learning_rate": 1.702805354341669e-07, + "loss": 26.2922, + "step": 462510 + }, + { + "epoch": 0.9343196628918418, + "grad_norm": 1399.39794921875, + "learning_rate": 1.7019022585314293e-07, + "loss": 19.9626, + "step": 462520 + }, + { + "epoch": 0.9343398635245256, + "grad_norm": 965.174560546875, + "learning_rate": 1.7009993981202567e-07, + "loss": 26.338, + "step": 462530 + }, + { + "epoch": 0.9343600641572094, + "grad_norm": 168.63499450683594, + "learning_rate": 1.7000967731125472e-07, + "loss": 19.967, + "step": 462540 + }, + { + "epoch": 0.9343802647898932, + "grad_norm": 222.2827911376953, + "learning_rate": 1.699194383512709e-07, + "loss": 7.6039, + "step": 462550 + }, + { + "epoch": 0.934400465422577, + "grad_norm": 277.81793212890625, + "learning_rate": 1.6982922293251548e-07, + "loss": 43.1463, + "step": 462560 + }, + { + "epoch": 0.9344206660552609, + "grad_norm": 207.72872924804688, + "learning_rate": 1.6973903105542533e-07, + "loss": 32.8887, + "step": 462570 + }, + { + "epoch": 0.9344408666879447, + "grad_norm": 741.337158203125, + "learning_rate": 1.6964886272044069e-07, + "loss": 24.0368, + "step": 462580 + }, + { + "epoch": 0.9344610673206285, + "grad_norm": 294.7828369140625, + "learning_rate": 1.6955871792800283e-07, + "loss": 13.5807, + "step": 462590 + }, + { + "epoch": 0.9344812679533123, + "grad_norm": 463.4609680175781, + "learning_rate": 1.6946859667854977e-07, + "loss": 34.091, + "step": 462600 + }, + { + "epoch": 0.9345014685859961, + "grad_norm": 118.23023223876953, + "learning_rate": 1.6937849897252056e-07, + "loss": 8.3565, + "step": 462610 + }, + { + "epoch": 0.93452166921868, + "grad_norm": 76.01818084716797, + "learning_rate": 1.6928842481035436e-07, + "loss": 9.2868, + "step": 462620 + }, + { + "epoch": 0.9345418698513638, + "grad_norm": 468.1135559082031, + "learning_rate": 1.691983741924913e-07, + "loss": 16.4288, + "step": 462630 + }, + { + "epoch": 0.9345620704840476, + "grad_norm": 322.39031982421875, + "learning_rate": 1.6910834711936886e-07, + "loss": 15.4933, + "step": 462640 + }, + { + "epoch": 0.9345822711167314, + "grad_norm": 523.301025390625, + "learning_rate": 1.690183435914261e-07, + "loss": 12.5366, + "step": 462650 + }, + { + "epoch": 0.9346024717494152, + "grad_norm": 577.0751342773438, + "learning_rate": 1.689283636091027e-07, + "loss": 19.5234, + "step": 462660 + }, + { + "epoch": 0.9346226723820991, + "grad_norm": 452.67889404296875, + "learning_rate": 1.688384071728366e-07, + "loss": 31.4996, + "step": 462670 + }, + { + "epoch": 0.9346428730147828, + "grad_norm": 11.344707489013672, + "learning_rate": 1.6874847428306583e-07, + "loss": 20.8121, + "step": 462680 + }, + { + "epoch": 0.9346630736474666, + "grad_norm": 550.50927734375, + "learning_rate": 1.6865856494022892e-07, + "loss": 14.4299, + "step": 462690 + }, + { + "epoch": 0.9346832742801504, + "grad_norm": 397.3772888183594, + "learning_rate": 1.6856867914476492e-07, + "loss": 8.819, + "step": 462700 + }, + { + "epoch": 0.9347034749128342, + "grad_norm": 6.457852840423584, + "learning_rate": 1.684788168971102e-07, + "loss": 12.4718, + "step": 462710 + }, + { + "epoch": 0.934723675545518, + "grad_norm": 464.8440856933594, + "learning_rate": 1.6838897819770438e-07, + "loss": 40.8515, + "step": 462720 + }, + { + "epoch": 0.9347438761782019, + "grad_norm": 2413.400390625, + "learning_rate": 1.682991630469838e-07, + "loss": 42.2716, + "step": 462730 + }, + { + "epoch": 0.9347640768108857, + "grad_norm": 624.918212890625, + "learning_rate": 1.6820937144538807e-07, + "loss": 16.1107, + "step": 462740 + }, + { + "epoch": 0.9347842774435695, + "grad_norm": 635.3636474609375, + "learning_rate": 1.6811960339335298e-07, + "loss": 18.9638, + "step": 462750 + }, + { + "epoch": 0.9348044780762533, + "grad_norm": 595.7736206054688, + "learning_rate": 1.6802985889131762e-07, + "loss": 22.7298, + "step": 462760 + }, + { + "epoch": 0.9348246787089372, + "grad_norm": 233.24156188964844, + "learning_rate": 1.6794013793971887e-07, + "loss": 26.2768, + "step": 462770 + }, + { + "epoch": 0.934844879341621, + "grad_norm": 545.78271484375, + "learning_rate": 1.6785044053899302e-07, + "loss": 15.6134, + "step": 462780 + }, + { + "epoch": 0.9348650799743048, + "grad_norm": 230.40011596679688, + "learning_rate": 1.6776076668957864e-07, + "loss": 16.2018, + "step": 462790 + }, + { + "epoch": 0.9348852806069886, + "grad_norm": 242.9065704345703, + "learning_rate": 1.6767111639191202e-07, + "loss": 24.9229, + "step": 462800 + }, + { + "epoch": 0.9349054812396724, + "grad_norm": 0.0, + "learning_rate": 1.675814896464306e-07, + "loss": 34.967, + "step": 462810 + }, + { + "epoch": 0.9349256818723563, + "grad_norm": 229.31613159179688, + "learning_rate": 1.6749188645357072e-07, + "loss": 11.2219, + "step": 462820 + }, + { + "epoch": 0.9349458825050401, + "grad_norm": 412.9171447753906, + "learning_rate": 1.6740230681376867e-07, + "loss": 35.8329, + "step": 462830 + }, + { + "epoch": 0.9349660831377239, + "grad_norm": 12.56732177734375, + "learning_rate": 1.6731275072746244e-07, + "loss": 20.6205, + "step": 462840 + }, + { + "epoch": 0.9349862837704077, + "grad_norm": 454.1410827636719, + "learning_rate": 1.672232181950878e-07, + "loss": 11.0535, + "step": 462850 + }, + { + "epoch": 0.9350064844030915, + "grad_norm": 283.65576171875, + "learning_rate": 1.6713370921708049e-07, + "loss": 20.9141, + "step": 462860 + }, + { + "epoch": 0.9350266850357754, + "grad_norm": 240.0868682861328, + "learning_rate": 1.6704422379387685e-07, + "loss": 16.6079, + "step": 462870 + }, + { + "epoch": 0.9350468856684592, + "grad_norm": 0.09903652966022491, + "learning_rate": 1.669547619259143e-07, + "loss": 10.4518, + "step": 462880 + }, + { + "epoch": 0.935067086301143, + "grad_norm": 383.5288391113281, + "learning_rate": 1.6686532361362805e-07, + "loss": 14.665, + "step": 462890 + }, + { + "epoch": 0.9350872869338268, + "grad_norm": 133.67431640625, + "learning_rate": 1.6677590885745388e-07, + "loss": 30.3162, + "step": 462900 + }, + { + "epoch": 0.9351074875665106, + "grad_norm": 161.3779296875, + "learning_rate": 1.6668651765782806e-07, + "loss": 27.869, + "step": 462910 + }, + { + "epoch": 0.9351276881991945, + "grad_norm": 499.52880859375, + "learning_rate": 1.6659715001518583e-07, + "loss": 20.6727, + "step": 462920 + }, + { + "epoch": 0.9351478888318782, + "grad_norm": 500.2755126953125, + "learning_rate": 1.665078059299624e-07, + "loss": 11.5379, + "step": 462930 + }, + { + "epoch": 0.935168089464562, + "grad_norm": 345.6598815917969, + "learning_rate": 1.6641848540259353e-07, + "loss": 28.3758, + "step": 462940 + }, + { + "epoch": 0.9351882900972458, + "grad_norm": 91.43672943115234, + "learning_rate": 1.6632918843351554e-07, + "loss": 8.8927, + "step": 462950 + }, + { + "epoch": 0.9352084907299296, + "grad_norm": 75.08840942382812, + "learning_rate": 1.662399150231625e-07, + "loss": 18.547, + "step": 462960 + }, + { + "epoch": 0.9352286913626134, + "grad_norm": 301.5865478515625, + "learning_rate": 1.6615066517196965e-07, + "loss": 21.6605, + "step": 462970 + }, + { + "epoch": 0.9352488919952973, + "grad_norm": 436.82568359375, + "learning_rate": 1.6606143888037219e-07, + "loss": 12.5314, + "step": 462980 + }, + { + "epoch": 0.9352690926279811, + "grad_norm": 358.45697021484375, + "learning_rate": 1.659722361488053e-07, + "loss": 23.522, + "step": 462990 + }, + { + "epoch": 0.9352892932606649, + "grad_norm": 677.9505004882812, + "learning_rate": 1.6588305697770313e-07, + "loss": 23.7936, + "step": 463000 + }, + { + "epoch": 0.9353094938933487, + "grad_norm": 140.39315795898438, + "learning_rate": 1.6579390136750086e-07, + "loss": 30.5268, + "step": 463010 + }, + { + "epoch": 0.9353296945260325, + "grad_norm": 391.122802734375, + "learning_rate": 1.6570476931863256e-07, + "loss": 16.1737, + "step": 463020 + }, + { + "epoch": 0.9353498951587164, + "grad_norm": 328.2913818359375, + "learning_rate": 1.656156608315329e-07, + "loss": 22.6559, + "step": 463030 + }, + { + "epoch": 0.9353700957914002, + "grad_norm": 246.04940795898438, + "learning_rate": 1.65526575906636e-07, + "loss": 13.7928, + "step": 463040 + }, + { + "epoch": 0.935390296424084, + "grad_norm": 124.8285140991211, + "learning_rate": 1.6543751454437708e-07, + "loss": 14.0438, + "step": 463050 + }, + { + "epoch": 0.9354104970567678, + "grad_norm": 247.01405334472656, + "learning_rate": 1.6534847674518905e-07, + "loss": 13.7093, + "step": 463060 + }, + { + "epoch": 0.9354306976894516, + "grad_norm": 403.01171875, + "learning_rate": 1.6525946250950553e-07, + "loss": 14.5217, + "step": 463070 + }, + { + "epoch": 0.9354508983221355, + "grad_norm": 196.5394744873047, + "learning_rate": 1.651704718377617e-07, + "loss": 12.1933, + "step": 463080 + }, + { + "epoch": 0.9354710989548193, + "grad_norm": 820.8240356445312, + "learning_rate": 1.650815047303894e-07, + "loss": 23.4596, + "step": 463090 + }, + { + "epoch": 0.9354912995875031, + "grad_norm": 45.13959503173828, + "learning_rate": 1.6499256118782503e-07, + "loss": 23.7143, + "step": 463100 + }, + { + "epoch": 0.9355115002201869, + "grad_norm": 1102.887451171875, + "learning_rate": 1.6490364121049984e-07, + "loss": 23.7886, + "step": 463110 + }, + { + "epoch": 0.9355317008528707, + "grad_norm": 180.792724609375, + "learning_rate": 1.648147447988474e-07, + "loss": 19.873, + "step": 463120 + }, + { + "epoch": 0.9355519014855546, + "grad_norm": 553.9746704101562, + "learning_rate": 1.6472587195330236e-07, + "loss": 27.4306, + "step": 463130 + }, + { + "epoch": 0.9355721021182384, + "grad_norm": 309.197509765625, + "learning_rate": 1.6463702267429659e-07, + "loss": 15.8393, + "step": 463140 + }, + { + "epoch": 0.9355923027509222, + "grad_norm": 319.75726318359375, + "learning_rate": 1.645481969622631e-07, + "loss": 20.5699, + "step": 463150 + }, + { + "epoch": 0.935612503383606, + "grad_norm": 260.9181213378906, + "learning_rate": 1.644593948176354e-07, + "loss": 23.9048, + "step": 463160 + }, + { + "epoch": 0.9356327040162898, + "grad_norm": 283.957763671875, + "learning_rate": 1.6437061624084704e-07, + "loss": 15.1584, + "step": 463170 + }, + { + "epoch": 0.9356529046489737, + "grad_norm": 216.19720458984375, + "learning_rate": 1.6428186123232826e-07, + "loss": 14.2853, + "step": 463180 + }, + { + "epoch": 0.9356731052816574, + "grad_norm": 282.28033447265625, + "learning_rate": 1.6419312979251368e-07, + "loss": 26.9816, + "step": 463190 + }, + { + "epoch": 0.9356933059143412, + "grad_norm": 547.8955688476562, + "learning_rate": 1.6410442192183574e-07, + "loss": 13.523, + "step": 463200 + }, + { + "epoch": 0.935713506547025, + "grad_norm": 317.2925720214844, + "learning_rate": 1.6401573762072631e-07, + "loss": 10.4762, + "step": 463210 + }, + { + "epoch": 0.9357337071797088, + "grad_norm": 226.99322509765625, + "learning_rate": 1.6392707688961728e-07, + "loss": 17.5634, + "step": 463220 + }, + { + "epoch": 0.9357539078123926, + "grad_norm": 104.4507064819336, + "learning_rate": 1.638384397289411e-07, + "loss": 22.4312, + "step": 463230 + }, + { + "epoch": 0.9357741084450765, + "grad_norm": 216.3778533935547, + "learning_rate": 1.6374982613913072e-07, + "loss": 17.19, + "step": 463240 + }, + { + "epoch": 0.9357943090777603, + "grad_norm": 299.8779296875, + "learning_rate": 1.6366123612061636e-07, + "loss": 26.81, + "step": 463250 + }, + { + "epoch": 0.9358145097104441, + "grad_norm": 167.1317596435547, + "learning_rate": 1.635726696738299e-07, + "loss": 19.6392, + "step": 463260 + }, + { + "epoch": 0.9358347103431279, + "grad_norm": 415.87091064453125, + "learning_rate": 1.6348412679920488e-07, + "loss": 10.1537, + "step": 463270 + }, + { + "epoch": 0.9358549109758117, + "grad_norm": 165.0482177734375, + "learning_rate": 1.6339560749717154e-07, + "loss": 9.7739, + "step": 463280 + }, + { + "epoch": 0.9358751116084956, + "grad_norm": 236.4598388671875, + "learning_rate": 1.633071117681606e-07, + "loss": 14.7218, + "step": 463290 + }, + { + "epoch": 0.9358953122411794, + "grad_norm": 644.8948974609375, + "learning_rate": 1.6321863961260452e-07, + "loss": 18.5992, + "step": 463300 + }, + { + "epoch": 0.9359155128738632, + "grad_norm": 419.4942626953125, + "learning_rate": 1.6313019103093463e-07, + "loss": 16.4838, + "step": 463310 + }, + { + "epoch": 0.935935713506547, + "grad_norm": 219.58193969726562, + "learning_rate": 1.6304176602358056e-07, + "loss": 13.8679, + "step": 463320 + }, + { + "epoch": 0.9359559141392308, + "grad_norm": 338.3202819824219, + "learning_rate": 1.6295336459097532e-07, + "loss": 15.4373, + "step": 463330 + }, + { + "epoch": 0.9359761147719147, + "grad_norm": 377.089111328125, + "learning_rate": 1.62864986733548e-07, + "loss": 14.0342, + "step": 463340 + }, + { + "epoch": 0.9359963154045985, + "grad_norm": 299.72210693359375, + "learning_rate": 1.6277663245173047e-07, + "loss": 11.9394, + "step": 463350 + }, + { + "epoch": 0.9360165160372823, + "grad_norm": 290.8778076171875, + "learning_rate": 1.6268830174595242e-07, + "loss": 14.5878, + "step": 463360 + }, + { + "epoch": 0.9360367166699661, + "grad_norm": 491.1105651855469, + "learning_rate": 1.6259999461664567e-07, + "loss": 23.7471, + "step": 463370 + }, + { + "epoch": 0.93605691730265, + "grad_norm": 42.898990631103516, + "learning_rate": 1.6251171106423935e-07, + "loss": 15.818, + "step": 463380 + }, + { + "epoch": 0.9360771179353338, + "grad_norm": 205.88906860351562, + "learning_rate": 1.6242345108916424e-07, + "loss": 13.1447, + "step": 463390 + }, + { + "epoch": 0.9360973185680176, + "grad_norm": 438.9242248535156, + "learning_rate": 1.6233521469185054e-07, + "loss": 22.0241, + "step": 463400 + }, + { + "epoch": 0.9361175192007014, + "grad_norm": 303.1636962890625, + "learning_rate": 1.6224700187272792e-07, + "loss": 18.267, + "step": 463410 + }, + { + "epoch": 0.9361377198333852, + "grad_norm": 295.252685546875, + "learning_rate": 1.621588126322271e-07, + "loss": 13.4704, + "step": 463420 + }, + { + "epoch": 0.936157920466069, + "grad_norm": 1018.2053833007812, + "learning_rate": 1.620706469707778e-07, + "loss": 39.882, + "step": 463430 + }, + { + "epoch": 0.9361781210987528, + "grad_norm": 461.65521240234375, + "learning_rate": 1.619825048888085e-07, + "loss": 12.3608, + "step": 463440 + }, + { + "epoch": 0.9361983217314366, + "grad_norm": 297.8987121582031, + "learning_rate": 1.618943863867506e-07, + "loss": 23.6289, + "step": 463450 + }, + { + "epoch": 0.9362185223641204, + "grad_norm": 34.65291213989258, + "learning_rate": 1.6180629146503256e-07, + "loss": 10.5408, + "step": 463460 + }, + { + "epoch": 0.9362387229968042, + "grad_norm": 454.8813781738281, + "learning_rate": 1.61718220124083e-07, + "loss": 11.0426, + "step": 463470 + }, + { + "epoch": 0.936258923629488, + "grad_norm": 0.6589401960372925, + "learning_rate": 1.6163017236433265e-07, + "loss": 17.0109, + "step": 463480 + }, + { + "epoch": 0.9362791242621719, + "grad_norm": 688.1834106445312, + "learning_rate": 1.6154214818621007e-07, + "loss": 21.8056, + "step": 463490 + }, + { + "epoch": 0.9362993248948557, + "grad_norm": 224.41502380371094, + "learning_rate": 1.6145414759014433e-07, + "loss": 13.6119, + "step": 463500 + }, + { + "epoch": 0.9363195255275395, + "grad_norm": 252.64492797851562, + "learning_rate": 1.6136617057656344e-07, + "loss": 12.8251, + "step": 463510 + }, + { + "epoch": 0.9363397261602233, + "grad_norm": 197.5618896484375, + "learning_rate": 1.6127821714589763e-07, + "loss": 14.6395, + "step": 463520 + }, + { + "epoch": 0.9363599267929071, + "grad_norm": 646.2914428710938, + "learning_rate": 1.6119028729857545e-07, + "loss": 14.4647, + "step": 463530 + }, + { + "epoch": 0.936380127425591, + "grad_norm": 217.630126953125, + "learning_rate": 1.6110238103502374e-07, + "loss": 21.2792, + "step": 463540 + }, + { + "epoch": 0.9364003280582748, + "grad_norm": 221.92062377929688, + "learning_rate": 1.6101449835567273e-07, + "loss": 11.9105, + "step": 463550 + }, + { + "epoch": 0.9364205286909586, + "grad_norm": 686.2666015625, + "learning_rate": 1.6092663926094987e-07, + "loss": 21.9448, + "step": 463560 + }, + { + "epoch": 0.9364407293236424, + "grad_norm": 395.3066101074219, + "learning_rate": 1.6083880375128424e-07, + "loss": 17.1408, + "step": 463570 + }, + { + "epoch": 0.9364609299563262, + "grad_norm": 663.0675659179688, + "learning_rate": 1.6075099182710274e-07, + "loss": 34.2614, + "step": 463580 + }, + { + "epoch": 0.9364811305890101, + "grad_norm": 228.4241943359375, + "learning_rate": 1.6066320348883448e-07, + "loss": 20.2467, + "step": 463590 + }, + { + "epoch": 0.9365013312216939, + "grad_norm": 198.4999237060547, + "learning_rate": 1.6057543873690685e-07, + "loss": 11.6937, + "step": 463600 + }, + { + "epoch": 0.9365215318543777, + "grad_norm": 1286.03173828125, + "learning_rate": 1.604876975717473e-07, + "loss": 17.0578, + "step": 463610 + }, + { + "epoch": 0.9365417324870615, + "grad_norm": 179.70726013183594, + "learning_rate": 1.6039997999378388e-07, + "loss": 9.2406, + "step": 463620 + }, + { + "epoch": 0.9365619331197453, + "grad_norm": 201.22686767578125, + "learning_rate": 1.603122860034434e-07, + "loss": 18.0075, + "step": 463630 + }, + { + "epoch": 0.9365821337524292, + "grad_norm": 345.80712890625, + "learning_rate": 1.6022461560115498e-07, + "loss": 22.7832, + "step": 463640 + }, + { + "epoch": 0.936602334385113, + "grad_norm": 495.47772216796875, + "learning_rate": 1.6013696878734385e-07, + "loss": 10.4317, + "step": 463650 + }, + { + "epoch": 0.9366225350177968, + "grad_norm": 253.85330200195312, + "learning_rate": 1.6004934556243857e-07, + "loss": 8.4572, + "step": 463660 + }, + { + "epoch": 0.9366427356504806, + "grad_norm": 395.35296630859375, + "learning_rate": 1.5996174592686598e-07, + "loss": 31.3012, + "step": 463670 + }, + { + "epoch": 0.9366629362831644, + "grad_norm": 491.93597412109375, + "learning_rate": 1.5987416988105188e-07, + "loss": 25.3924, + "step": 463680 + }, + { + "epoch": 0.9366831369158483, + "grad_norm": 1044.8165283203125, + "learning_rate": 1.5978661742542477e-07, + "loss": 23.4883, + "step": 463690 + }, + { + "epoch": 0.936703337548532, + "grad_norm": 443.22705078125, + "learning_rate": 1.596990885604105e-07, + "loss": 12.8588, + "step": 463700 + }, + { + "epoch": 0.9367235381812158, + "grad_norm": 350.1383972167969, + "learning_rate": 1.596115832864359e-07, + "loss": 35.5491, + "step": 463710 + }, + { + "epoch": 0.9367437388138996, + "grad_norm": 461.406005859375, + "learning_rate": 1.5952410160392784e-07, + "loss": 23.4766, + "step": 463720 + }, + { + "epoch": 0.9367639394465834, + "grad_norm": 426.9395751953125, + "learning_rate": 1.59436643513311e-07, + "loss": 31.4879, + "step": 463730 + }, + { + "epoch": 0.9367841400792672, + "grad_norm": 142.09671020507812, + "learning_rate": 1.5934920901501395e-07, + "loss": 12.2829, + "step": 463740 + }, + { + "epoch": 0.9368043407119511, + "grad_norm": 467.48260498046875, + "learning_rate": 1.5926179810946185e-07, + "loss": 20.8934, + "step": 463750 + }, + { + "epoch": 0.9368245413446349, + "grad_norm": 128.65975952148438, + "learning_rate": 1.5917441079707942e-07, + "loss": 17.7847, + "step": 463760 + }, + { + "epoch": 0.9368447419773187, + "grad_norm": 333.0611267089844, + "learning_rate": 1.5908704707829458e-07, + "loss": 10.7977, + "step": 463770 + }, + { + "epoch": 0.9368649426100025, + "grad_norm": 593.1852416992188, + "learning_rate": 1.5899970695353262e-07, + "loss": 17.1089, + "step": 463780 + }, + { + "epoch": 0.9368851432426863, + "grad_norm": 347.4171142578125, + "learning_rate": 1.5891239042321871e-07, + "loss": 9.5736, + "step": 463790 + }, + { + "epoch": 0.9369053438753702, + "grad_norm": 362.99835205078125, + "learning_rate": 1.5882509748777809e-07, + "loss": 23.4353, + "step": 463800 + }, + { + "epoch": 0.936925544508054, + "grad_norm": 219.54135131835938, + "learning_rate": 1.5873782814763762e-07, + "loss": 38.4928, + "step": 463810 + }, + { + "epoch": 0.9369457451407378, + "grad_norm": 166.92816162109375, + "learning_rate": 1.586505824032214e-07, + "loss": 27.4917, + "step": 463820 + }, + { + "epoch": 0.9369659457734216, + "grad_norm": 479.5160827636719, + "learning_rate": 1.5856336025495466e-07, + "loss": 9.695, + "step": 463830 + }, + { + "epoch": 0.9369861464061054, + "grad_norm": 526.8687133789062, + "learning_rate": 1.5847616170326318e-07, + "loss": 24.6402, + "step": 463840 + }, + { + "epoch": 0.9370063470387893, + "grad_norm": 471.7763366699219, + "learning_rate": 1.5838898674857273e-07, + "loss": 13.5469, + "step": 463850 + }, + { + "epoch": 0.9370265476714731, + "grad_norm": 414.0970458984375, + "learning_rate": 1.5830183539130574e-07, + "loss": 11.267, + "step": 463860 + }, + { + "epoch": 0.9370467483041569, + "grad_norm": 319.959716796875, + "learning_rate": 1.582147076318885e-07, + "loss": 13.8998, + "step": 463870 + }, + { + "epoch": 0.9370669489368407, + "grad_norm": 175.54379272460938, + "learning_rate": 1.581276034707463e-07, + "loss": 20.5544, + "step": 463880 + }, + { + "epoch": 0.9370871495695245, + "grad_norm": 416.506591796875, + "learning_rate": 1.5804052290830262e-07, + "loss": 16.798, + "step": 463890 + }, + { + "epoch": 0.9371073502022084, + "grad_norm": 728.6934814453125, + "learning_rate": 1.5795346594498162e-07, + "loss": 19.4924, + "step": 463900 + }, + { + "epoch": 0.9371275508348922, + "grad_norm": 333.4419250488281, + "learning_rate": 1.5786643258120905e-07, + "loss": 18.2137, + "step": 463910 + }, + { + "epoch": 0.937147751467576, + "grad_norm": 11.038262367248535, + "learning_rate": 1.5777942281740789e-07, + "loss": 18.4305, + "step": 463920 + }, + { + "epoch": 0.9371679521002598, + "grad_norm": 327.39752197265625, + "learning_rate": 1.5769243665400224e-07, + "loss": 28.4707, + "step": 463930 + }, + { + "epoch": 0.9371881527329436, + "grad_norm": 425.0711364746094, + "learning_rate": 1.5760547409141626e-07, + "loss": 19.2452, + "step": 463940 + }, + { + "epoch": 0.9372083533656275, + "grad_norm": 120.11178588867188, + "learning_rate": 1.5751853513007454e-07, + "loss": 12.1289, + "step": 463950 + }, + { + "epoch": 0.9372285539983112, + "grad_norm": 14.883577346801758, + "learning_rate": 1.5743161977039954e-07, + "loss": 17.47, + "step": 463960 + }, + { + "epoch": 0.937248754630995, + "grad_norm": 333.3983154296875, + "learning_rate": 1.5734472801281543e-07, + "loss": 15.0493, + "step": 463970 + }, + { + "epoch": 0.9372689552636788, + "grad_norm": 384.79547119140625, + "learning_rate": 1.5725785985774623e-07, + "loss": 17.9263, + "step": 463980 + }, + { + "epoch": 0.9372891558963626, + "grad_norm": 323.10943603515625, + "learning_rate": 1.5717101530561497e-07, + "loss": 10.0074, + "step": 463990 + }, + { + "epoch": 0.9373093565290465, + "grad_norm": 400.6250305175781, + "learning_rate": 1.5708419435684463e-07, + "loss": 21.9798, + "step": 464000 + }, + { + "epoch": 0.9373295571617303, + "grad_norm": 287.2420349121094, + "learning_rate": 1.5699739701185878e-07, + "loss": 40.5435, + "step": 464010 + }, + { + "epoch": 0.9373497577944141, + "grad_norm": 474.51348876953125, + "learning_rate": 1.5691062327107932e-07, + "loss": 13.2892, + "step": 464020 + }, + { + "epoch": 0.9373699584270979, + "grad_norm": 319.4916687011719, + "learning_rate": 1.5682387313493086e-07, + "loss": 15.7933, + "step": 464030 + }, + { + "epoch": 0.9373901590597817, + "grad_norm": 614.4664306640625, + "learning_rate": 1.5673714660383532e-07, + "loss": 17.009, + "step": 464040 + }, + { + "epoch": 0.9374103596924656, + "grad_norm": 623.9253540039062, + "learning_rate": 1.5665044367821513e-07, + "loss": 17.0484, + "step": 464050 + }, + { + "epoch": 0.9374305603251494, + "grad_norm": 486.2925720214844, + "learning_rate": 1.5656376435849385e-07, + "loss": 24.1836, + "step": 464060 + }, + { + "epoch": 0.9374507609578332, + "grad_norm": 343.29730224609375, + "learning_rate": 1.5647710864509336e-07, + "loss": 18.7584, + "step": 464070 + }, + { + "epoch": 0.937470961590517, + "grad_norm": 500.9264221191406, + "learning_rate": 1.5639047653843554e-07, + "loss": 15.1472, + "step": 464080 + }, + { + "epoch": 0.9374911622232008, + "grad_norm": 171.4511260986328, + "learning_rate": 1.563038680389428e-07, + "loss": 10.5716, + "step": 464090 + }, + { + "epoch": 0.9375113628558847, + "grad_norm": 52.38894271850586, + "learning_rate": 1.5621728314703822e-07, + "loss": 17.9345, + "step": 464100 + }, + { + "epoch": 0.9375315634885685, + "grad_norm": 151.2301788330078, + "learning_rate": 1.5613072186314304e-07, + "loss": 13.4225, + "step": 464110 + }, + { + "epoch": 0.9375517641212523, + "grad_norm": 661.763427734375, + "learning_rate": 1.560441841876792e-07, + "loss": 24.8922, + "step": 464120 + }, + { + "epoch": 0.9375719647539361, + "grad_norm": 323.34417724609375, + "learning_rate": 1.5595767012106856e-07, + "loss": 20.4487, + "step": 464130 + }, + { + "epoch": 0.9375921653866199, + "grad_norm": 433.3529052734375, + "learning_rate": 1.5587117966373244e-07, + "loss": 10.2313, + "step": 464140 + }, + { + "epoch": 0.9376123660193038, + "grad_norm": 414.6815185546875, + "learning_rate": 1.5578471281609274e-07, + "loss": 17.6942, + "step": 464150 + }, + { + "epoch": 0.9376325666519876, + "grad_norm": 320.7261047363281, + "learning_rate": 1.5569826957857027e-07, + "loss": 18.6855, + "step": 464160 + }, + { + "epoch": 0.9376527672846714, + "grad_norm": 142.53854370117188, + "learning_rate": 1.556118499515885e-07, + "loss": 19.6049, + "step": 464170 + }, + { + "epoch": 0.9376729679173552, + "grad_norm": 573.0814819335938, + "learning_rate": 1.555254539355655e-07, + "loss": 11.8, + "step": 464180 + }, + { + "epoch": 0.937693168550039, + "grad_norm": 382.5942687988281, + "learning_rate": 1.5543908153092424e-07, + "loss": 28.2042, + "step": 464190 + }, + { + "epoch": 0.9377133691827229, + "grad_norm": 489.61602783203125, + "learning_rate": 1.553527327380855e-07, + "loss": 27.2857, + "step": 464200 + }, + { + "epoch": 0.9377335698154066, + "grad_norm": 398.268310546875, + "learning_rate": 1.5526640755747003e-07, + "loss": 18.6053, + "step": 464210 + }, + { + "epoch": 0.9377537704480904, + "grad_norm": 689.2642822265625, + "learning_rate": 1.5518010598949807e-07, + "loss": 17.124, + "step": 464220 + }, + { + "epoch": 0.9377739710807742, + "grad_norm": 391.16351318359375, + "learning_rate": 1.5509382803459149e-07, + "loss": 16.5513, + "step": 464230 + }, + { + "epoch": 0.937794171713458, + "grad_norm": 324.4962463378906, + "learning_rate": 1.5500757369316888e-07, + "loss": 26.283, + "step": 464240 + }, + { + "epoch": 0.9378143723461418, + "grad_norm": 243.7255859375, + "learning_rate": 1.5492134296565264e-07, + "loss": 54.337, + "step": 464250 + }, + { + "epoch": 0.9378345729788257, + "grad_norm": 180.85537719726562, + "learning_rate": 1.5483513585246135e-07, + "loss": 19.1971, + "step": 464260 + }, + { + "epoch": 0.9378547736115095, + "grad_norm": 870.2200927734375, + "learning_rate": 1.5474895235401688e-07, + "loss": 21.2145, + "step": 464270 + }, + { + "epoch": 0.9378749742441933, + "grad_norm": 397.47149658203125, + "learning_rate": 1.546627924707378e-07, + "loss": 19.5186, + "step": 464280 + }, + { + "epoch": 0.9378951748768771, + "grad_norm": 120.85033416748047, + "learning_rate": 1.545766562030443e-07, + "loss": 21.5627, + "step": 464290 + }, + { + "epoch": 0.937915375509561, + "grad_norm": 370.54620361328125, + "learning_rate": 1.5449054355135718e-07, + "loss": 20.4738, + "step": 464300 + }, + { + "epoch": 0.9379355761422448, + "grad_norm": 237.71800231933594, + "learning_rate": 1.54404454516095e-07, + "loss": 21.7886, + "step": 464310 + }, + { + "epoch": 0.9379557767749286, + "grad_norm": 425.63330078125, + "learning_rate": 1.5431838909767793e-07, + "loss": 18.739, + "step": 464320 + }, + { + "epoch": 0.9379759774076124, + "grad_norm": 454.5373840332031, + "learning_rate": 1.542323472965257e-07, + "loss": 15.3442, + "step": 464330 + }, + { + "epoch": 0.9379961780402962, + "grad_norm": 304.43927001953125, + "learning_rate": 1.5414632911305683e-07, + "loss": 18.2253, + "step": 464340 + }, + { + "epoch": 0.93801637867298, + "grad_norm": 516.6799926757812, + "learning_rate": 1.5406033454769154e-07, + "loss": 11.4711, + "step": 464350 + }, + { + "epoch": 0.9380365793056639, + "grad_norm": 366.4046325683594, + "learning_rate": 1.5397436360084784e-07, + "loss": 18.1866, + "step": 464360 + }, + { + "epoch": 0.9380567799383477, + "grad_norm": 405.16986083984375, + "learning_rate": 1.5388841627294536e-07, + "loss": 25.2115, + "step": 464370 + }, + { + "epoch": 0.9380769805710315, + "grad_norm": 702.1383666992188, + "learning_rate": 1.5380249256440272e-07, + "loss": 23.2144, + "step": 464380 + }, + { + "epoch": 0.9380971812037153, + "grad_norm": 471.3887939453125, + "learning_rate": 1.5371659247564063e-07, + "loss": 19.3548, + "step": 464390 + }, + { + "epoch": 0.9381173818363991, + "grad_norm": 681.3546752929688, + "learning_rate": 1.5363071600707435e-07, + "loss": 18.2053, + "step": 464400 + }, + { + "epoch": 0.938137582469083, + "grad_norm": 247.7911834716797, + "learning_rate": 1.5354486315912408e-07, + "loss": 18.0673, + "step": 464410 + }, + { + "epoch": 0.9381577831017668, + "grad_norm": 511.078125, + "learning_rate": 1.534590339322095e-07, + "loss": 12.9879, + "step": 464420 + }, + { + "epoch": 0.9381779837344506, + "grad_norm": 401.2061767578125, + "learning_rate": 1.533732283267475e-07, + "loss": 24.2374, + "step": 464430 + }, + { + "epoch": 0.9381981843671344, + "grad_norm": 348.04296875, + "learning_rate": 1.532874463431555e-07, + "loss": 10.0772, + "step": 464440 + }, + { + "epoch": 0.9382183849998182, + "grad_norm": 193.95880126953125, + "learning_rate": 1.532016879818532e-07, + "loss": 17.9981, + "step": 464450 + }, + { + "epoch": 0.9382385856325021, + "grad_norm": 133.47679138183594, + "learning_rate": 1.5311595324325912e-07, + "loss": 14.5344, + "step": 464460 + }, + { + "epoch": 0.9382587862651858, + "grad_norm": 349.5655212402344, + "learning_rate": 1.5303024212778905e-07, + "loss": 25.1919, + "step": 464470 + }, + { + "epoch": 0.9382789868978696, + "grad_norm": 495.6358947753906, + "learning_rate": 1.5294455463586157e-07, + "loss": 11.76, + "step": 464480 + }, + { + "epoch": 0.9382991875305534, + "grad_norm": 852.6415405273438, + "learning_rate": 1.528588907678946e-07, + "loss": 22.6186, + "step": 464490 + }, + { + "epoch": 0.9383193881632372, + "grad_norm": 338.9753723144531, + "learning_rate": 1.5277325052430569e-07, + "loss": 10.316, + "step": 464500 + }, + { + "epoch": 0.938339588795921, + "grad_norm": 182.22496032714844, + "learning_rate": 1.5268763390551167e-07, + "loss": 8.8562, + "step": 464510 + }, + { + "epoch": 0.9383597894286049, + "grad_norm": 697.7536010742188, + "learning_rate": 1.526020409119311e-07, + "loss": 16.601, + "step": 464520 + }, + { + "epoch": 0.9383799900612887, + "grad_norm": 471.80670166015625, + "learning_rate": 1.5251647154397975e-07, + "loss": 17.9513, + "step": 464530 + }, + { + "epoch": 0.9384001906939725, + "grad_norm": 438.54632568359375, + "learning_rate": 1.5243092580207507e-07, + "loss": 21.5101, + "step": 464540 + }, + { + "epoch": 0.9384203913266563, + "grad_norm": 160.27978515625, + "learning_rate": 1.5234540368663343e-07, + "loss": 13.6464, + "step": 464550 + }, + { + "epoch": 0.9384405919593402, + "grad_norm": 483.20751953125, + "learning_rate": 1.5225990519807332e-07, + "loss": 12.8111, + "step": 464560 + }, + { + "epoch": 0.938460792592024, + "grad_norm": 539.7047119140625, + "learning_rate": 1.5217443033681058e-07, + "loss": 20.0048, + "step": 464570 + }, + { + "epoch": 0.9384809932247078, + "grad_norm": 290.4250793457031, + "learning_rate": 1.5208897910326092e-07, + "loss": 13.4598, + "step": 464580 + }, + { + "epoch": 0.9385011938573916, + "grad_norm": 410.6996765136719, + "learning_rate": 1.520035514978424e-07, + "loss": 14.0252, + "step": 464590 + }, + { + "epoch": 0.9385213944900754, + "grad_norm": 470.7203369140625, + "learning_rate": 1.5191814752097024e-07, + "loss": 10.0053, + "step": 464600 + }, + { + "epoch": 0.9385415951227593, + "grad_norm": 427.7999572753906, + "learning_rate": 1.5183276717306072e-07, + "loss": 16.5089, + "step": 464610 + }, + { + "epoch": 0.9385617957554431, + "grad_norm": 862.2379150390625, + "learning_rate": 1.517474104545308e-07, + "loss": 13.5312, + "step": 464620 + }, + { + "epoch": 0.9385819963881269, + "grad_norm": 554.513916015625, + "learning_rate": 1.5166207736579564e-07, + "loss": 24.5063, + "step": 464630 + }, + { + "epoch": 0.9386021970208107, + "grad_norm": 620.202880859375, + "learning_rate": 1.515767679072716e-07, + "loss": 13.7802, + "step": 464640 + }, + { + "epoch": 0.9386223976534945, + "grad_norm": 556.294921875, + "learning_rate": 1.5149148207937447e-07, + "loss": 20.9563, + "step": 464650 + }, + { + "epoch": 0.9386425982861784, + "grad_norm": 317.4458923339844, + "learning_rate": 1.5140621988251947e-07, + "loss": 5.6833, + "step": 464660 + }, + { + "epoch": 0.9386627989188622, + "grad_norm": 381.5221252441406, + "learning_rate": 1.513209813171229e-07, + "loss": 15.0624, + "step": 464670 + }, + { + "epoch": 0.938682999551546, + "grad_norm": 317.9993591308594, + "learning_rate": 1.5123576638360004e-07, + "loss": 16.5072, + "step": 464680 + }, + { + "epoch": 0.9387032001842298, + "grad_norm": 118.84877014160156, + "learning_rate": 1.5115057508236498e-07, + "loss": 18.7588, + "step": 464690 + }, + { + "epoch": 0.9387234008169136, + "grad_norm": 1289.9464111328125, + "learning_rate": 1.5106540741383402e-07, + "loss": 25.8103, + "step": 464700 + }, + { + "epoch": 0.9387436014495975, + "grad_norm": 153.5814666748047, + "learning_rate": 1.5098026337842297e-07, + "loss": 24.5178, + "step": 464710 + }, + { + "epoch": 0.9387638020822812, + "grad_norm": 646.8268432617188, + "learning_rate": 1.5089514297654594e-07, + "loss": 22.356, + "step": 464720 + }, + { + "epoch": 0.938784002714965, + "grad_norm": 461.1291809082031, + "learning_rate": 1.5081004620861706e-07, + "loss": 18.6647, + "step": 464730 + }, + { + "epoch": 0.9388042033476488, + "grad_norm": 489.087890625, + "learning_rate": 1.5072497307505263e-07, + "loss": 17.1713, + "step": 464740 + }, + { + "epoch": 0.9388244039803326, + "grad_norm": 315.9326171875, + "learning_rate": 1.5063992357626623e-07, + "loss": 35.0541, + "step": 464750 + }, + { + "epoch": 0.9388446046130164, + "grad_norm": 529.4815673828125, + "learning_rate": 1.5055489771267252e-07, + "loss": 24.0619, + "step": 464760 + }, + { + "epoch": 0.9388648052457003, + "grad_norm": 470.8222961425781, + "learning_rate": 1.5046989548468616e-07, + "loss": 14.294, + "step": 464770 + }, + { + "epoch": 0.9388850058783841, + "grad_norm": 345.0228271484375, + "learning_rate": 1.503849168927224e-07, + "loss": 26.5148, + "step": 464780 + }, + { + "epoch": 0.9389052065110679, + "grad_norm": 425.7090148925781, + "learning_rate": 1.502999619371931e-07, + "loss": 23.6067, + "step": 464790 + }, + { + "epoch": 0.9389254071437517, + "grad_norm": 333.5506286621094, + "learning_rate": 1.502150306185135e-07, + "loss": 18.3616, + "step": 464800 + }, + { + "epoch": 0.9389456077764355, + "grad_norm": 141.08529663085938, + "learning_rate": 1.5013012293709828e-07, + "loss": 10.1888, + "step": 464810 + }, + { + "epoch": 0.9389658084091194, + "grad_norm": 280.8174743652344, + "learning_rate": 1.5004523889336042e-07, + "loss": 18.8, + "step": 464820 + }, + { + "epoch": 0.9389860090418032, + "grad_norm": 197.2073516845703, + "learning_rate": 1.499603784877135e-07, + "loss": 11.635, + "step": 464830 + }, + { + "epoch": 0.939006209674487, + "grad_norm": 554.6068725585938, + "learning_rate": 1.4987554172057216e-07, + "loss": 14.0745, + "step": 464840 + }, + { + "epoch": 0.9390264103071708, + "grad_norm": 622.3172607421875, + "learning_rate": 1.497907285923489e-07, + "loss": 17.9825, + "step": 464850 + }, + { + "epoch": 0.9390466109398546, + "grad_norm": 118.02326965332031, + "learning_rate": 1.4970593910345665e-07, + "loss": 8.7162, + "step": 464860 + }, + { + "epoch": 0.9390668115725385, + "grad_norm": 379.7081604003906, + "learning_rate": 1.4962117325431013e-07, + "loss": 24.6652, + "step": 464870 + }, + { + "epoch": 0.9390870122052223, + "grad_norm": 115.3721923828125, + "learning_rate": 1.495364310453218e-07, + "loss": 25.8597, + "step": 464880 + }, + { + "epoch": 0.9391072128379061, + "grad_norm": 453.2753601074219, + "learning_rate": 1.494517124769046e-07, + "loss": 11.1597, + "step": 464890 + }, + { + "epoch": 0.9391274134705899, + "grad_norm": 118.36872100830078, + "learning_rate": 1.4936701754947104e-07, + "loss": 12.8013, + "step": 464900 + }, + { + "epoch": 0.9391476141032737, + "grad_norm": 366.5028076171875, + "learning_rate": 1.4928234626343464e-07, + "loss": 9.0016, + "step": 464910 + }, + { + "epoch": 0.9391678147359576, + "grad_norm": 417.77398681640625, + "learning_rate": 1.4919769861920785e-07, + "loss": 14.6703, + "step": 464920 + }, + { + "epoch": 0.9391880153686414, + "grad_norm": 295.6487121582031, + "learning_rate": 1.491130746172026e-07, + "loss": 16.5016, + "step": 464930 + }, + { + "epoch": 0.9392082160013252, + "grad_norm": 459.5295104980469, + "learning_rate": 1.490284742578324e-07, + "loss": 30.716, + "step": 464940 + }, + { + "epoch": 0.939228416634009, + "grad_norm": 387.3690185546875, + "learning_rate": 1.4894389754150862e-07, + "loss": 29.2588, + "step": 464950 + }, + { + "epoch": 0.9392486172666928, + "grad_norm": 396.13812255859375, + "learning_rate": 1.4885934446864425e-07, + "loss": 19.497, + "step": 464960 + }, + { + "epoch": 0.9392688178993767, + "grad_norm": 133.84405517578125, + "learning_rate": 1.487748150396512e-07, + "loss": 13.2698, + "step": 464970 + }, + { + "epoch": 0.9392890185320604, + "grad_norm": 130.9963836669922, + "learning_rate": 1.4869030925494077e-07, + "loss": 22.2032, + "step": 464980 + }, + { + "epoch": 0.9393092191647442, + "grad_norm": 646.0631103515625, + "learning_rate": 1.4860582711492544e-07, + "loss": 25.5472, + "step": 464990 + }, + { + "epoch": 0.939329419797428, + "grad_norm": 182.46316528320312, + "learning_rate": 1.4852136862001766e-07, + "loss": 25.9811, + "step": 465000 + }, + { + "epoch": 0.9393496204301118, + "grad_norm": 329.9434509277344, + "learning_rate": 1.4843693377062818e-07, + "loss": 17.1915, + "step": 465010 + }, + { + "epoch": 0.9393698210627957, + "grad_norm": 404.30364990234375, + "learning_rate": 1.483525225671678e-07, + "loss": 18.5547, + "step": 465020 + }, + { + "epoch": 0.9393900216954795, + "grad_norm": 116.88551330566406, + "learning_rate": 1.4826813501004954e-07, + "loss": 15.3164, + "step": 465030 + }, + { + "epoch": 0.9394102223281633, + "grad_norm": 680.893798828125, + "learning_rate": 1.4818377109968417e-07, + "loss": 18.4572, + "step": 465040 + }, + { + "epoch": 0.9394304229608471, + "grad_norm": 601.853515625, + "learning_rate": 1.4809943083648194e-07, + "loss": 16.359, + "step": 465050 + }, + { + "epoch": 0.9394506235935309, + "grad_norm": 455.3320617675781, + "learning_rate": 1.480151142208547e-07, + "loss": 22.9655, + "step": 465060 + }, + { + "epoch": 0.9394708242262148, + "grad_norm": 201.93214416503906, + "learning_rate": 1.4793082125321435e-07, + "loss": 20.6204, + "step": 465070 + }, + { + "epoch": 0.9394910248588986, + "grad_norm": 148.03668212890625, + "learning_rate": 1.4784655193396947e-07, + "loss": 16.3354, + "step": 465080 + }, + { + "epoch": 0.9395112254915824, + "grad_norm": 234.11648559570312, + "learning_rate": 1.4776230626353193e-07, + "loss": 16.4417, + "step": 465090 + }, + { + "epoch": 0.9395314261242662, + "grad_norm": 346.0733642578125, + "learning_rate": 1.4767808424231312e-07, + "loss": 12.3764, + "step": 465100 + }, + { + "epoch": 0.93955162675695, + "grad_norm": 55.60728454589844, + "learning_rate": 1.4759388587072266e-07, + "loss": 18.4479, + "step": 465110 + }, + { + "epoch": 0.9395718273896339, + "grad_norm": 111.06137084960938, + "learning_rate": 1.475097111491708e-07, + "loss": 11.6848, + "step": 465120 + }, + { + "epoch": 0.9395920280223177, + "grad_norm": 300.3351135253906, + "learning_rate": 1.474255600780683e-07, + "loss": 13.9835, + "step": 465130 + }, + { + "epoch": 0.9396122286550015, + "grad_norm": 363.2767028808594, + "learning_rate": 1.473414326578254e-07, + "loss": 9.6612, + "step": 465140 + }, + { + "epoch": 0.9396324292876853, + "grad_norm": 927.3482055664062, + "learning_rate": 1.4725732888885126e-07, + "loss": 20.8591, + "step": 465150 + }, + { + "epoch": 0.9396526299203691, + "grad_norm": 472.299072265625, + "learning_rate": 1.4717324877155603e-07, + "loss": 6.7122, + "step": 465160 + }, + { + "epoch": 0.939672830553053, + "grad_norm": 161.1034698486328, + "learning_rate": 1.4708919230635054e-07, + "loss": 14.2039, + "step": 465170 + }, + { + "epoch": 0.9396930311857368, + "grad_norm": 361.7558288574219, + "learning_rate": 1.4700515949364337e-07, + "loss": 28.8626, + "step": 465180 + }, + { + "epoch": 0.9397132318184206, + "grad_norm": 246.94151306152344, + "learning_rate": 1.4692115033384468e-07, + "loss": 25.2142, + "step": 465190 + }, + { + "epoch": 0.9397334324511044, + "grad_norm": 314.4781494140625, + "learning_rate": 1.4683716482736364e-07, + "loss": 21.4019, + "step": 465200 + }, + { + "epoch": 0.9397536330837882, + "grad_norm": 495.0483703613281, + "learning_rate": 1.4675320297460994e-07, + "loss": 22.743, + "step": 465210 + }, + { + "epoch": 0.939773833716472, + "grad_norm": 0.047875095158815384, + "learning_rate": 1.4666926477599153e-07, + "loss": 9.238, + "step": 465220 + }, + { + "epoch": 0.9397940343491558, + "grad_norm": 3.2310094833374023, + "learning_rate": 1.4658535023191922e-07, + "loss": 18.7415, + "step": 465230 + }, + { + "epoch": 0.9398142349818396, + "grad_norm": 279.0162353515625, + "learning_rate": 1.4650145934280103e-07, + "loss": 25.614, + "step": 465240 + }, + { + "epoch": 0.9398344356145234, + "grad_norm": 96.9708023071289, + "learning_rate": 1.4641759210904605e-07, + "loss": 16.596, + "step": 465250 + }, + { + "epoch": 0.9398546362472072, + "grad_norm": 684.9998779296875, + "learning_rate": 1.463337485310634e-07, + "loss": 18.1348, + "step": 465260 + }, + { + "epoch": 0.939874836879891, + "grad_norm": 10.25330924987793, + "learning_rate": 1.4624992860926112e-07, + "loss": 12.523, + "step": 465270 + }, + { + "epoch": 0.9398950375125749, + "grad_norm": 2057.43212890625, + "learning_rate": 1.461661323440483e-07, + "loss": 37.1617, + "step": 465280 + }, + { + "epoch": 0.9399152381452587, + "grad_norm": 313.6214904785156, + "learning_rate": 1.4608235973583296e-07, + "loss": 16.6247, + "step": 465290 + }, + { + "epoch": 0.9399354387779425, + "grad_norm": 133.1804656982422, + "learning_rate": 1.459986107850231e-07, + "loss": 29.5971, + "step": 465300 + }, + { + "epoch": 0.9399556394106263, + "grad_norm": 0.5061958432197571, + "learning_rate": 1.4591488549202725e-07, + "loss": 21.5198, + "step": 465310 + }, + { + "epoch": 0.9399758400433101, + "grad_norm": 390.5429992675781, + "learning_rate": 1.4583118385725402e-07, + "loss": 10.8836, + "step": 465320 + }, + { + "epoch": 0.939996040675994, + "grad_norm": 153.8983917236328, + "learning_rate": 1.4574750588111085e-07, + "loss": 10.1329, + "step": 465330 + }, + { + "epoch": 0.9400162413086778, + "grad_norm": 102.71835327148438, + "learning_rate": 1.4566385156400463e-07, + "loss": 19.8331, + "step": 465340 + }, + { + "epoch": 0.9400364419413616, + "grad_norm": 165.04196166992188, + "learning_rate": 1.4558022090634504e-07, + "loss": 9.5658, + "step": 465350 + }, + { + "epoch": 0.9400566425740454, + "grad_norm": 144.1915283203125, + "learning_rate": 1.4549661390853897e-07, + "loss": 22.7668, + "step": 465360 + }, + { + "epoch": 0.9400768432067292, + "grad_norm": 269.35174560546875, + "learning_rate": 1.4541303057099275e-07, + "loss": 17.217, + "step": 465370 + }, + { + "epoch": 0.9400970438394131, + "grad_norm": 341.9735412597656, + "learning_rate": 1.4532947089411443e-07, + "loss": 14.102, + "step": 465380 + }, + { + "epoch": 0.9401172444720969, + "grad_norm": 342.3262939453125, + "learning_rate": 1.452459348783125e-07, + "loss": 14.8285, + "step": 465390 + }, + { + "epoch": 0.9401374451047807, + "grad_norm": 195.041748046875, + "learning_rate": 1.4516242252399227e-07, + "loss": 21.6281, + "step": 465400 + }, + { + "epoch": 0.9401576457374645, + "grad_norm": 653.010986328125, + "learning_rate": 1.450789338315617e-07, + "loss": 16.6789, + "step": 465410 + }, + { + "epoch": 0.9401778463701483, + "grad_norm": 429.4823913574219, + "learning_rate": 1.4499546880142823e-07, + "loss": 20.1362, + "step": 465420 + }, + { + "epoch": 0.9401980470028322, + "grad_norm": 472.930419921875, + "learning_rate": 1.4491202743399767e-07, + "loss": 16.6625, + "step": 465430 + }, + { + "epoch": 0.940218247635516, + "grad_norm": 598.2315063476562, + "learning_rate": 1.448286097296764e-07, + "loss": 22.673, + "step": 465440 + }, + { + "epoch": 0.9402384482681998, + "grad_norm": 560.1913452148438, + "learning_rate": 1.4474521568887178e-07, + "loss": 20.8818, + "step": 465450 + }, + { + "epoch": 0.9402586489008836, + "grad_norm": 308.62799072265625, + "learning_rate": 1.4466184531199135e-07, + "loss": 10.2614, + "step": 465460 + }, + { + "epoch": 0.9402788495335674, + "grad_norm": 633.8780517578125, + "learning_rate": 1.4457849859943862e-07, + "loss": 20.54, + "step": 465470 + }, + { + "epoch": 0.9402990501662513, + "grad_norm": 394.0419616699219, + "learning_rate": 1.4449517555162163e-07, + "loss": 11.4615, + "step": 465480 + }, + { + "epoch": 0.940319250798935, + "grad_norm": 340.50732421875, + "learning_rate": 1.4441187616894724e-07, + "loss": 16.5588, + "step": 465490 + }, + { + "epoch": 0.9403394514316188, + "grad_norm": 202.9910125732422, + "learning_rate": 1.4432860045182019e-07, + "loss": 27.8608, + "step": 465500 + }, + { + "epoch": 0.9403596520643026, + "grad_norm": 115.48960876464844, + "learning_rate": 1.4424534840064563e-07, + "loss": 16.095, + "step": 465510 + }, + { + "epoch": 0.9403798526969864, + "grad_norm": 159.15509033203125, + "learning_rate": 1.4416212001583163e-07, + "loss": 13.0288, + "step": 465520 + }, + { + "epoch": 0.9404000533296702, + "grad_norm": 198.47335815429688, + "learning_rate": 1.4407891529778172e-07, + "loss": 11.6673, + "step": 465530 + }, + { + "epoch": 0.9404202539623541, + "grad_norm": 702.14794921875, + "learning_rate": 1.4399573424690227e-07, + "loss": 26.2657, + "step": 465540 + }, + { + "epoch": 0.9404404545950379, + "grad_norm": 347.3752746582031, + "learning_rate": 1.4391257686359906e-07, + "loss": 21.5979, + "step": 465550 + }, + { + "epoch": 0.9404606552277217, + "grad_norm": 139.51553344726562, + "learning_rate": 1.438294431482762e-07, + "loss": 27.9939, + "step": 465560 + }, + { + "epoch": 0.9404808558604055, + "grad_norm": 472.1897277832031, + "learning_rate": 1.4374633310134057e-07, + "loss": 16.1521, + "step": 465570 + }, + { + "epoch": 0.9405010564930893, + "grad_norm": 373.1190490722656, + "learning_rate": 1.4366324672319575e-07, + "loss": 27.0018, + "step": 465580 + }, + { + "epoch": 0.9405212571257732, + "grad_norm": 204.26011657714844, + "learning_rate": 1.43580184014247e-07, + "loss": 18.4251, + "step": 465590 + }, + { + "epoch": 0.940541457758457, + "grad_norm": 159.9972686767578, + "learning_rate": 1.4349714497490009e-07, + "loss": 11.4422, + "step": 465600 + }, + { + "epoch": 0.9405616583911408, + "grad_norm": 307.0250549316406, + "learning_rate": 1.4341412960555855e-07, + "loss": 15.6385, + "step": 465610 + }, + { + "epoch": 0.9405818590238246, + "grad_norm": 491.42803955078125, + "learning_rate": 1.4333113790662822e-07, + "loss": 18.4737, + "step": 465620 + }, + { + "epoch": 0.9406020596565084, + "grad_norm": 243.17962646484375, + "learning_rate": 1.432481698785121e-07, + "loss": 4.4355, + "step": 465630 + }, + { + "epoch": 0.9406222602891923, + "grad_norm": 17.453372955322266, + "learning_rate": 1.4316522552161593e-07, + "loss": 18.5871, + "step": 465640 + }, + { + "epoch": 0.9406424609218761, + "grad_norm": 4.891506671905518, + "learning_rate": 1.4308230483634334e-07, + "loss": 28.1658, + "step": 465650 + }, + { + "epoch": 0.9406626615545599, + "grad_norm": 328.7502136230469, + "learning_rate": 1.4299940782309785e-07, + "loss": 12.0737, + "step": 465660 + }, + { + "epoch": 0.9406828621872437, + "grad_norm": 356.3871765136719, + "learning_rate": 1.4291653448228416e-07, + "loss": 14.3334, + "step": 465670 + }, + { + "epoch": 0.9407030628199275, + "grad_norm": 29.75800132751465, + "learning_rate": 1.4283368481430747e-07, + "loss": 21.4017, + "step": 465680 + }, + { + "epoch": 0.9407232634526114, + "grad_norm": 62.24608612060547, + "learning_rate": 1.427508588195692e-07, + "loss": 15.4066, + "step": 465690 + }, + { + "epoch": 0.9407434640852952, + "grad_norm": 540.0977172851562, + "learning_rate": 1.4266805649847392e-07, + "loss": 16.5002, + "step": 465700 + }, + { + "epoch": 0.940763664717979, + "grad_norm": 419.4856872558594, + "learning_rate": 1.425852778514264e-07, + "loss": 10.3622, + "step": 465710 + }, + { + "epoch": 0.9407838653506628, + "grad_norm": 501.3851318359375, + "learning_rate": 1.4250252287882848e-07, + "loss": 16.8594, + "step": 465720 + }, + { + "epoch": 0.9408040659833466, + "grad_norm": 234.9951934814453, + "learning_rate": 1.4241979158108433e-07, + "loss": 16.2693, + "step": 465730 + }, + { + "epoch": 0.9408242666160305, + "grad_norm": 13.653524398803711, + "learning_rate": 1.4233708395859692e-07, + "loss": 25.7325, + "step": 465740 + }, + { + "epoch": 0.9408444672487142, + "grad_norm": 139.67691040039062, + "learning_rate": 1.4225440001176983e-07, + "loss": 15.5495, + "step": 465750 + }, + { + "epoch": 0.940864667881398, + "grad_norm": 470.8349609375, + "learning_rate": 1.421717397410044e-07, + "loss": 25.8463, + "step": 465760 + }, + { + "epoch": 0.9408848685140818, + "grad_norm": 453.8491516113281, + "learning_rate": 1.420891031467053e-07, + "loss": 22.6746, + "step": 465770 + }, + { + "epoch": 0.9409050691467656, + "grad_norm": 224.24273681640625, + "learning_rate": 1.4200649022927505e-07, + "loss": 11.2414, + "step": 465780 + }, + { + "epoch": 0.9409252697794495, + "grad_norm": 47.68784713745117, + "learning_rate": 1.41923900989116e-07, + "loss": 15.038, + "step": 465790 + }, + { + "epoch": 0.9409454704121333, + "grad_norm": 472.7624206542969, + "learning_rate": 1.4184133542663014e-07, + "loss": 14.2832, + "step": 465800 + }, + { + "epoch": 0.9409656710448171, + "grad_norm": 157.7672882080078, + "learning_rate": 1.41758793542221e-07, + "loss": 12.4509, + "step": 465810 + }, + { + "epoch": 0.9409858716775009, + "grad_norm": 438.72222900390625, + "learning_rate": 1.4167627533628992e-07, + "loss": 17.5392, + "step": 465820 + }, + { + "epoch": 0.9410060723101847, + "grad_norm": 0.7373473048210144, + "learning_rate": 1.4159378080923936e-07, + "loss": 23.9596, + "step": 465830 + }, + { + "epoch": 0.9410262729428686, + "grad_norm": 465.291015625, + "learning_rate": 1.4151130996147177e-07, + "loss": 22.3183, + "step": 465840 + }, + { + "epoch": 0.9410464735755524, + "grad_norm": 515.6854248046875, + "learning_rate": 1.4142886279338852e-07, + "loss": 28.089, + "step": 465850 + }, + { + "epoch": 0.9410666742082362, + "grad_norm": 241.45852661132812, + "learning_rate": 1.4134643930539204e-07, + "loss": 13.2914, + "step": 465860 + }, + { + "epoch": 0.94108687484092, + "grad_norm": 210.69989013671875, + "learning_rate": 1.4126403949788369e-07, + "loss": 14.6817, + "step": 465870 + }, + { + "epoch": 0.9411070754736038, + "grad_norm": 14.873430252075195, + "learning_rate": 1.4118166337126428e-07, + "loss": 23.0126, + "step": 465880 + }, + { + "epoch": 0.9411272761062877, + "grad_norm": 133.8224334716797, + "learning_rate": 1.4109931092593732e-07, + "loss": 25.0196, + "step": 465890 + }, + { + "epoch": 0.9411474767389715, + "grad_norm": 605.4613647460938, + "learning_rate": 1.4101698216230254e-07, + "loss": 11.0159, + "step": 465900 + }, + { + "epoch": 0.9411676773716553, + "grad_norm": 308.15338134765625, + "learning_rate": 1.4093467708076126e-07, + "loss": 17.0492, + "step": 465910 + }, + { + "epoch": 0.9411878780043391, + "grad_norm": 569.3749389648438, + "learning_rate": 1.4085239568171483e-07, + "loss": 13.6531, + "step": 465920 + }, + { + "epoch": 0.9412080786370229, + "grad_norm": 5.422422885894775, + "learning_rate": 1.4077013796556515e-07, + "loss": 27.1837, + "step": 465930 + }, + { + "epoch": 0.9412282792697068, + "grad_norm": 260.8473205566406, + "learning_rate": 1.406879039327125e-07, + "loss": 27.4344, + "step": 465940 + }, + { + "epoch": 0.9412484799023906, + "grad_norm": 385.05035400390625, + "learning_rate": 1.4060569358355703e-07, + "loss": 28.6004, + "step": 465950 + }, + { + "epoch": 0.9412686805350744, + "grad_norm": 113.09442138671875, + "learning_rate": 1.405235069185007e-07, + "loss": 13.7099, + "step": 465960 + }, + { + "epoch": 0.9412888811677582, + "grad_norm": 390.7118835449219, + "learning_rate": 1.4044134393794373e-07, + "loss": 16.4347, + "step": 465970 + }, + { + "epoch": 0.941309081800442, + "grad_norm": 317.0025634765625, + "learning_rate": 1.4035920464228525e-07, + "loss": 10.5919, + "step": 465980 + }, + { + "epoch": 0.9413292824331259, + "grad_norm": 301.4737548828125, + "learning_rate": 1.4027708903192662e-07, + "loss": 24.9793, + "step": 465990 + }, + { + "epoch": 0.9413494830658096, + "grad_norm": 348.3053283691406, + "learning_rate": 1.4019499710726913e-07, + "loss": 17.0418, + "step": 466000 + }, + { + "epoch": 0.9413696836984934, + "grad_norm": 103.71463775634766, + "learning_rate": 1.4011292886871086e-07, + "loss": 14.3418, + "step": 466010 + }, + { + "epoch": 0.9413898843311772, + "grad_norm": 543.97265625, + "learning_rate": 1.4003088431665312e-07, + "loss": 11.213, + "step": 466020 + }, + { + "epoch": 0.941410084963861, + "grad_norm": 645.0778198242188, + "learning_rate": 1.3994886345149504e-07, + "loss": 17.445, + "step": 466030 + }, + { + "epoch": 0.9414302855965448, + "grad_norm": 707.9461059570312, + "learning_rate": 1.3986686627363744e-07, + "loss": 33.2789, + "step": 466040 + }, + { + "epoch": 0.9414504862292287, + "grad_norm": 12.873198509216309, + "learning_rate": 1.3978489278347883e-07, + "loss": 28.9894, + "step": 466050 + }, + { + "epoch": 0.9414706868619125, + "grad_norm": 318.5113830566406, + "learning_rate": 1.397029429814184e-07, + "loss": 22.1279, + "step": 466060 + }, + { + "epoch": 0.9414908874945963, + "grad_norm": 123.84893798828125, + "learning_rate": 1.39621016867858e-07, + "loss": 21.4548, + "step": 466070 + }, + { + "epoch": 0.9415110881272801, + "grad_norm": 315.24322509765625, + "learning_rate": 1.39539114443194e-07, + "loss": 23.2591, + "step": 466080 + }, + { + "epoch": 0.941531288759964, + "grad_norm": 444.9673156738281, + "learning_rate": 1.3945723570782722e-07, + "loss": 22.2276, + "step": 466090 + }, + { + "epoch": 0.9415514893926478, + "grad_norm": 99.54863739013672, + "learning_rate": 1.3937538066215672e-07, + "loss": 22.1537, + "step": 466100 + }, + { + "epoch": 0.9415716900253316, + "grad_norm": 248.89920043945312, + "learning_rate": 1.3929354930658112e-07, + "loss": 11.2812, + "step": 466110 + }, + { + "epoch": 0.9415918906580154, + "grad_norm": 295.01171875, + "learning_rate": 1.3921174164149842e-07, + "loss": 23.3916, + "step": 466120 + }, + { + "epoch": 0.9416120912906992, + "grad_norm": 364.608154296875, + "learning_rate": 1.3912995766730887e-07, + "loss": 9.879, + "step": 466130 + }, + { + "epoch": 0.941632291923383, + "grad_norm": 156.15719604492188, + "learning_rate": 1.3904819738441043e-07, + "loss": 28.2597, + "step": 466140 + }, + { + "epoch": 0.9416524925560669, + "grad_norm": 570.7730712890625, + "learning_rate": 1.3896646079320064e-07, + "loss": 18.1528, + "step": 466150 + }, + { + "epoch": 0.9416726931887507, + "grad_norm": 200.7433319091797, + "learning_rate": 1.388847478940797e-07, + "loss": 19.8137, + "step": 466160 + }, + { + "epoch": 0.9416928938214345, + "grad_norm": 213.10841369628906, + "learning_rate": 1.3880305868744392e-07, + "loss": 20.0374, + "step": 466170 + }, + { + "epoch": 0.9417130944541183, + "grad_norm": 244.504150390625, + "learning_rate": 1.3872139317369304e-07, + "loss": 10.2179, + "step": 466180 + }, + { + "epoch": 0.9417332950868021, + "grad_norm": 319.9541015625, + "learning_rate": 1.3863975135322505e-07, + "loss": 12.6198, + "step": 466190 + }, + { + "epoch": 0.941753495719486, + "grad_norm": 201.5258331298828, + "learning_rate": 1.385581332264363e-07, + "loss": 15.1341, + "step": 466200 + }, + { + "epoch": 0.9417736963521698, + "grad_norm": 192.09957885742188, + "learning_rate": 1.3847653879372646e-07, + "loss": 18.1536, + "step": 466210 + }, + { + "epoch": 0.9417938969848536, + "grad_norm": 217.1121826171875, + "learning_rate": 1.3839496805549136e-07, + "loss": 12.6363, + "step": 466220 + }, + { + "epoch": 0.9418140976175374, + "grad_norm": 646.8970947265625, + "learning_rate": 1.383134210121301e-07, + "loss": 13.4812, + "step": 466230 + }, + { + "epoch": 0.9418342982502212, + "grad_norm": 209.90310668945312, + "learning_rate": 1.3823189766403954e-07, + "loss": 22.6369, + "step": 466240 + }, + { + "epoch": 0.9418544988829051, + "grad_norm": 597.7464599609375, + "learning_rate": 1.3815039801161723e-07, + "loss": 14.8995, + "step": 466250 + }, + { + "epoch": 0.9418746995155888, + "grad_norm": 583.8340454101562, + "learning_rate": 1.3806892205526e-07, + "loss": 12.6547, + "step": 466260 + }, + { + "epoch": 0.9418949001482726, + "grad_norm": 146.20486450195312, + "learning_rate": 1.3798746979536482e-07, + "loss": 20.1029, + "step": 466270 + }, + { + "epoch": 0.9419151007809564, + "grad_norm": 462.0206604003906, + "learning_rate": 1.3790604123232966e-07, + "loss": 11.2156, + "step": 466280 + }, + { + "epoch": 0.9419353014136402, + "grad_norm": 243.73023986816406, + "learning_rate": 1.3782463636655087e-07, + "loss": 21.2671, + "step": 466290 + }, + { + "epoch": 0.9419555020463241, + "grad_norm": 127.55465698242188, + "learning_rate": 1.3774325519842423e-07, + "loss": 14.346, + "step": 466300 + }, + { + "epoch": 0.9419757026790079, + "grad_norm": 122.92948913574219, + "learning_rate": 1.376618977283478e-07, + "loss": 20.0084, + "step": 466310 + }, + { + "epoch": 0.9419959033116917, + "grad_norm": 609.1244506835938, + "learning_rate": 1.3758056395671738e-07, + "loss": 17.6589, + "step": 466320 + }, + { + "epoch": 0.9420161039443755, + "grad_norm": 322.552001953125, + "learning_rate": 1.374992538839298e-07, + "loss": 19.4366, + "step": 466330 + }, + { + "epoch": 0.9420363045770593, + "grad_norm": 110.1209716796875, + "learning_rate": 1.3741796751038095e-07, + "loss": 22.9075, + "step": 466340 + }, + { + "epoch": 0.9420565052097432, + "grad_norm": 380.9493713378906, + "learning_rate": 1.373367048364671e-07, + "loss": 14.092, + "step": 466350 + }, + { + "epoch": 0.942076705842427, + "grad_norm": 0.42919933795928955, + "learning_rate": 1.3725546586258464e-07, + "loss": 13.1575, + "step": 466360 + }, + { + "epoch": 0.9420969064751108, + "grad_norm": 1916.9405517578125, + "learning_rate": 1.3717425058912882e-07, + "loss": 18.7603, + "step": 466370 + }, + { + "epoch": 0.9421171071077946, + "grad_norm": 14.732674598693848, + "learning_rate": 1.3709305901649594e-07, + "loss": 38.1083, + "step": 466380 + }, + { + "epoch": 0.9421373077404784, + "grad_norm": 512.1130981445312, + "learning_rate": 1.370118911450824e-07, + "loss": 18.5798, + "step": 466390 + }, + { + "epoch": 0.9421575083731623, + "grad_norm": 434.47039794921875, + "learning_rate": 1.3693074697528231e-07, + "loss": 19.878, + "step": 466400 + }, + { + "epoch": 0.9421777090058461, + "grad_norm": 20.22287940979004, + "learning_rate": 1.36849626507492e-07, + "loss": 21.2979, + "step": 466410 + }, + { + "epoch": 0.9421979096385299, + "grad_norm": 260.6330261230469, + "learning_rate": 1.367685297421073e-07, + "loss": 22.6538, + "step": 466420 + }, + { + "epoch": 0.9422181102712137, + "grad_norm": 635.8558959960938, + "learning_rate": 1.366874566795229e-07, + "loss": 25.0508, + "step": 466430 + }, + { + "epoch": 0.9422383109038975, + "grad_norm": 382.78399658203125, + "learning_rate": 1.3660640732013342e-07, + "loss": 9.9655, + "step": 466440 + }, + { + "epoch": 0.9422585115365814, + "grad_norm": 202.0198211669922, + "learning_rate": 1.3652538166433527e-07, + "loss": 20.7105, + "step": 466450 + }, + { + "epoch": 0.9422787121692652, + "grad_norm": 86.01641082763672, + "learning_rate": 1.3644437971252144e-07, + "loss": 23.0091, + "step": 466460 + }, + { + "epoch": 0.942298912801949, + "grad_norm": 148.82080078125, + "learning_rate": 1.3636340146508886e-07, + "loss": 17.164, + "step": 466470 + }, + { + "epoch": 0.9423191134346328, + "grad_norm": 490.79486083984375, + "learning_rate": 1.362824469224311e-07, + "loss": 26.0232, + "step": 466480 + }, + { + "epoch": 0.9423393140673166, + "grad_norm": 759.5617065429688, + "learning_rate": 1.362015160849417e-07, + "loss": 17.5195, + "step": 466490 + }, + { + "epoch": 0.9423595147000005, + "grad_norm": 160.0380401611328, + "learning_rate": 1.3612060895301759e-07, + "loss": 10.9043, + "step": 466500 + }, + { + "epoch": 0.9423797153326842, + "grad_norm": 81.51010131835938, + "learning_rate": 1.360397255270507e-07, + "loss": 12.4245, + "step": 466510 + }, + { + "epoch": 0.942399915965368, + "grad_norm": 9.42785930633545, + "learning_rate": 1.3595886580743677e-07, + "loss": 15.6158, + "step": 466520 + }, + { + "epoch": 0.9424201165980518, + "grad_norm": 254.63711547851562, + "learning_rate": 1.3587802979456888e-07, + "loss": 21.2009, + "step": 466530 + }, + { + "epoch": 0.9424403172307356, + "grad_norm": 488.95220947265625, + "learning_rate": 1.3579721748884222e-07, + "loss": 21.6403, + "step": 466540 + }, + { + "epoch": 0.9424605178634194, + "grad_norm": 34.97917556762695, + "learning_rate": 1.3571642889064984e-07, + "loss": 15.1119, + "step": 466550 + }, + { + "epoch": 0.9424807184961033, + "grad_norm": 238.89625549316406, + "learning_rate": 1.356356640003853e-07, + "loss": 8.2208, + "step": 466560 + }, + { + "epoch": 0.9425009191287871, + "grad_norm": 247.09791564941406, + "learning_rate": 1.3555492281844273e-07, + "loss": 16.6098, + "step": 466570 + }, + { + "epoch": 0.9425211197614709, + "grad_norm": 292.3550109863281, + "learning_rate": 1.354742053452157e-07, + "loss": 25.4076, + "step": 466580 + }, + { + "epoch": 0.9425413203941547, + "grad_norm": 550.693115234375, + "learning_rate": 1.353935115810967e-07, + "loss": 13.479, + "step": 466590 + }, + { + "epoch": 0.9425615210268385, + "grad_norm": 150.84622192382812, + "learning_rate": 1.3531284152647983e-07, + "loss": 26.4981, + "step": 466600 + }, + { + "epoch": 0.9425817216595224, + "grad_norm": 135.9732666015625, + "learning_rate": 1.3523219518175924e-07, + "loss": 19.2011, + "step": 466610 + }, + { + "epoch": 0.9426019222922062, + "grad_norm": 411.6714172363281, + "learning_rate": 1.351515725473257e-07, + "loss": 13.1952, + "step": 466620 + }, + { + "epoch": 0.94262212292489, + "grad_norm": 691.3521728515625, + "learning_rate": 1.3507097362357392e-07, + "loss": 23.5884, + "step": 466630 + }, + { + "epoch": 0.9426423235575738, + "grad_norm": 435.2999572753906, + "learning_rate": 1.349903984108958e-07, + "loss": 19.8776, + "step": 466640 + }, + { + "epoch": 0.9426625241902576, + "grad_norm": 522.2403564453125, + "learning_rate": 1.3490984690968488e-07, + "loss": 25.458, + "step": 466650 + }, + { + "epoch": 0.9426827248229415, + "grad_norm": 790.474609375, + "learning_rate": 1.3482931912033314e-07, + "loss": 21.0078, + "step": 466660 + }, + { + "epoch": 0.9427029254556253, + "grad_norm": 1263.0888671875, + "learning_rate": 1.3474881504323301e-07, + "loss": 19.8605, + "step": 466670 + }, + { + "epoch": 0.9427231260883091, + "grad_norm": 590.8986206054688, + "learning_rate": 1.346683346787775e-07, + "loss": 13.3168, + "step": 466680 + }, + { + "epoch": 0.9427433267209929, + "grad_norm": 232.91049194335938, + "learning_rate": 1.3458787802735794e-07, + "loss": 21.9886, + "step": 466690 + }, + { + "epoch": 0.9427635273536767, + "grad_norm": 438.4366455078125, + "learning_rate": 1.3450744508936687e-07, + "loss": 18.1001, + "step": 466700 + }, + { + "epoch": 0.9427837279863606, + "grad_norm": 492.16668701171875, + "learning_rate": 1.3442703586519724e-07, + "loss": 10.5851, + "step": 466710 + }, + { + "epoch": 0.9428039286190444, + "grad_norm": 277.2815246582031, + "learning_rate": 1.3434665035523985e-07, + "loss": 30.349, + "step": 466720 + }, + { + "epoch": 0.9428241292517282, + "grad_norm": 177.5471954345703, + "learning_rate": 1.342662885598861e-07, + "loss": 11.7614, + "step": 466730 + }, + { + "epoch": 0.942844329884412, + "grad_norm": 438.42755126953125, + "learning_rate": 1.3418595047952897e-07, + "loss": 12.5441, + "step": 466740 + }, + { + "epoch": 0.9428645305170958, + "grad_norm": 639.2652587890625, + "learning_rate": 1.341056361145593e-07, + "loss": 15.6022, + "step": 466750 + }, + { + "epoch": 0.9428847311497797, + "grad_norm": 40.974517822265625, + "learning_rate": 1.3402534546536783e-07, + "loss": 17.9016, + "step": 466760 + }, + { + "epoch": 0.9429049317824634, + "grad_norm": 227.44757080078125, + "learning_rate": 1.3394507853234763e-07, + "loss": 19.5819, + "step": 466770 + }, + { + "epoch": 0.9429251324151472, + "grad_norm": 461.1067810058594, + "learning_rate": 1.3386483531588834e-07, + "loss": 21.6301, + "step": 466780 + }, + { + "epoch": 0.942945333047831, + "grad_norm": 502.9984130859375, + "learning_rate": 1.337846158163819e-07, + "loss": 12.9162, + "step": 466790 + }, + { + "epoch": 0.9429655336805148, + "grad_norm": 606.315673828125, + "learning_rate": 1.3370442003421913e-07, + "loss": 25.3759, + "step": 466800 + }, + { + "epoch": 0.9429857343131987, + "grad_norm": 275.4271545410156, + "learning_rate": 1.336242479697908e-07, + "loss": 35.1638, + "step": 466810 + }, + { + "epoch": 0.9430059349458825, + "grad_norm": 544.243408203125, + "learning_rate": 1.335440996234877e-07, + "loss": 11.1995, + "step": 466820 + }, + { + "epoch": 0.9430261355785663, + "grad_norm": 200.02992248535156, + "learning_rate": 1.334639749956995e-07, + "loss": 12.549, + "step": 466830 + }, + { + "epoch": 0.9430463362112501, + "grad_norm": 487.49444580078125, + "learning_rate": 1.3338387408681875e-07, + "loss": 13.481, + "step": 466840 + }, + { + "epoch": 0.9430665368439339, + "grad_norm": 738.1172485351562, + "learning_rate": 1.333037968972345e-07, + "loss": 15.7329, + "step": 466850 + }, + { + "epoch": 0.9430867374766178, + "grad_norm": 99.85303497314453, + "learning_rate": 1.33223743427337e-07, + "loss": 19.0156, + "step": 466860 + }, + { + "epoch": 0.9431069381093016, + "grad_norm": 357.34100341796875, + "learning_rate": 1.331437136775171e-07, + "loss": 18.5052, + "step": 466870 + }, + { + "epoch": 0.9431271387419854, + "grad_norm": 166.46453857421875, + "learning_rate": 1.330637076481639e-07, + "loss": 13.3199, + "step": 466880 + }, + { + "epoch": 0.9431473393746692, + "grad_norm": 48.30439758300781, + "learning_rate": 1.3298372533966874e-07, + "loss": 11.878, + "step": 466890 + }, + { + "epoch": 0.943167540007353, + "grad_norm": 324.0368347167969, + "learning_rate": 1.3290376675242022e-07, + "loss": 25.7629, + "step": 466900 + }, + { + "epoch": 0.9431877406400369, + "grad_norm": 390.7507019042969, + "learning_rate": 1.3282383188680802e-07, + "loss": 21.2157, + "step": 466910 + }, + { + "epoch": 0.9432079412727207, + "grad_norm": 57.96989059448242, + "learning_rate": 1.327439207432224e-07, + "loss": 15.7185, + "step": 466920 + }, + { + "epoch": 0.9432281419054045, + "grad_norm": 202.033935546875, + "learning_rate": 1.3266403332205248e-07, + "loss": 9.7807, + "step": 466930 + }, + { + "epoch": 0.9432483425380883, + "grad_norm": 108.84996795654297, + "learning_rate": 1.3258416962368849e-07, + "loss": 14.6172, + "step": 466940 + }, + { + "epoch": 0.9432685431707721, + "grad_norm": 172.59718322753906, + "learning_rate": 1.325043296485179e-07, + "loss": 18.0242, + "step": 466950 + }, + { + "epoch": 0.943288743803456, + "grad_norm": 434.2422180175781, + "learning_rate": 1.3242451339693153e-07, + "loss": 33.7853, + "step": 466960 + }, + { + "epoch": 0.9433089444361398, + "grad_norm": 147.3890838623047, + "learning_rate": 1.3234472086931738e-07, + "loss": 26.3679, + "step": 466970 + }, + { + "epoch": 0.9433291450688236, + "grad_norm": 10.948002815246582, + "learning_rate": 1.322649520660646e-07, + "loss": 13.5406, + "step": 466980 + }, + { + "epoch": 0.9433493457015074, + "grad_norm": 424.71539306640625, + "learning_rate": 1.3218520698756177e-07, + "loss": 9.7036, + "step": 466990 + }, + { + "epoch": 0.9433695463341912, + "grad_norm": 243.30010986328125, + "learning_rate": 1.3210548563419857e-07, + "loss": 13.9705, + "step": 467000 + }, + { + "epoch": 0.943389746966875, + "grad_norm": 197.2886962890625, + "learning_rate": 1.32025788006363e-07, + "loss": 26.665, + "step": 467010 + }, + { + "epoch": 0.9434099475995589, + "grad_norm": 610.0017700195312, + "learning_rate": 1.3194611410444258e-07, + "loss": 21.8821, + "step": 467020 + }, + { + "epoch": 0.9434301482322426, + "grad_norm": 272.1587219238281, + "learning_rate": 1.3186646392882696e-07, + "loss": 8.3357, + "step": 467030 + }, + { + "epoch": 0.9434503488649264, + "grad_norm": 167.1318817138672, + "learning_rate": 1.3178683747990362e-07, + "loss": 11.8705, + "step": 467040 + }, + { + "epoch": 0.9434705494976102, + "grad_norm": 434.9255065917969, + "learning_rate": 1.3170723475806003e-07, + "loss": 23.161, + "step": 467050 + }, + { + "epoch": 0.943490750130294, + "grad_norm": 79.04789733886719, + "learning_rate": 1.3162765576368587e-07, + "loss": 24.5077, + "step": 467060 + }, + { + "epoch": 0.9435109507629779, + "grad_norm": 203.78448486328125, + "learning_rate": 1.315481004971675e-07, + "loss": 8.7908, + "step": 467070 + }, + { + "epoch": 0.9435311513956617, + "grad_norm": 225.8023681640625, + "learning_rate": 1.314685689588935e-07, + "loss": 18.4575, + "step": 467080 + }, + { + "epoch": 0.9435513520283455, + "grad_norm": 297.1622619628906, + "learning_rate": 1.3138906114925133e-07, + "loss": 10.182, + "step": 467090 + }, + { + "epoch": 0.9435715526610293, + "grad_norm": 90.65361022949219, + "learning_rate": 1.313095770686279e-07, + "loss": 11.4575, + "step": 467100 + }, + { + "epoch": 0.9435917532937131, + "grad_norm": 511.15032958984375, + "learning_rate": 1.3123011671741183e-07, + "loss": 17.2859, + "step": 467110 + }, + { + "epoch": 0.943611953926397, + "grad_norm": 199.49876403808594, + "learning_rate": 1.3115068009598886e-07, + "loss": 15.8563, + "step": 467120 + }, + { + "epoch": 0.9436321545590808, + "grad_norm": 403.6021728515625, + "learning_rate": 1.3107126720474762e-07, + "loss": 13.149, + "step": 467130 + }, + { + "epoch": 0.9436523551917646, + "grad_norm": 256.2900695800781, + "learning_rate": 1.3099187804407387e-07, + "loss": 24.4645, + "step": 467140 + }, + { + "epoch": 0.9436725558244484, + "grad_norm": 254.575927734375, + "learning_rate": 1.3091251261435568e-07, + "loss": 7.1697, + "step": 467150 + }, + { + "epoch": 0.9436927564571322, + "grad_norm": 467.6200866699219, + "learning_rate": 1.3083317091597936e-07, + "loss": 17.0296, + "step": 467160 + }, + { + "epoch": 0.9437129570898161, + "grad_norm": 310.8859558105469, + "learning_rate": 1.3075385294933129e-07, + "loss": 14.5931, + "step": 467170 + }, + { + "epoch": 0.9437331577224999, + "grad_norm": 312.9391174316406, + "learning_rate": 1.306745587147984e-07, + "loss": 17.0316, + "step": 467180 + }, + { + "epoch": 0.9437533583551837, + "grad_norm": 322.968994140625, + "learning_rate": 1.3059528821276758e-07, + "loss": 18.964, + "step": 467190 + }, + { + "epoch": 0.9437735589878675, + "grad_norm": 116.29562377929688, + "learning_rate": 1.3051604144362407e-07, + "loss": 15.6003, + "step": 467200 + }, + { + "epoch": 0.9437937596205513, + "grad_norm": 424.7398986816406, + "learning_rate": 1.304368184077548e-07, + "loss": 21.4729, + "step": 467210 + }, + { + "epoch": 0.9438139602532352, + "grad_norm": 406.6511535644531, + "learning_rate": 1.3035761910554666e-07, + "loss": 23.7003, + "step": 467220 + }, + { + "epoch": 0.943834160885919, + "grad_norm": 431.1209716796875, + "learning_rate": 1.302784435373844e-07, + "loss": 11.4722, + "step": 467230 + }, + { + "epoch": 0.9438543615186028, + "grad_norm": 277.345703125, + "learning_rate": 1.3019929170365376e-07, + "loss": 18.2024, + "step": 467240 + }, + { + "epoch": 0.9438745621512866, + "grad_norm": 393.22003173828125, + "learning_rate": 1.3012016360474223e-07, + "loss": 6.3782, + "step": 467250 + }, + { + "epoch": 0.9438947627839704, + "grad_norm": 107.62462615966797, + "learning_rate": 1.3004105924103394e-07, + "loss": 28.9039, + "step": 467260 + }, + { + "epoch": 0.9439149634166543, + "grad_norm": 312.8868103027344, + "learning_rate": 1.2996197861291472e-07, + "loss": 20.6576, + "step": 467270 + }, + { + "epoch": 0.943935164049338, + "grad_norm": 304.1863098144531, + "learning_rate": 1.2988292172076977e-07, + "loss": 18.5216, + "step": 467280 + }, + { + "epoch": 0.9439553646820218, + "grad_norm": 190.7682647705078, + "learning_rate": 1.2980388856498604e-07, + "loss": 9.204, + "step": 467290 + }, + { + "epoch": 0.9439755653147056, + "grad_norm": 121.52448272705078, + "learning_rate": 1.29724879145946e-07, + "loss": 12.4498, + "step": 467300 + }, + { + "epoch": 0.9439957659473894, + "grad_norm": 229.15966796875, + "learning_rate": 1.296458934640371e-07, + "loss": 9.9259, + "step": 467310 + }, + { + "epoch": 0.9440159665800733, + "grad_norm": 532.7434692382812, + "learning_rate": 1.2956693151964296e-07, + "loss": 19.2803, + "step": 467320 + }, + { + "epoch": 0.9440361672127571, + "grad_norm": 372.7672424316406, + "learning_rate": 1.2948799331314933e-07, + "loss": 14.7937, + "step": 467330 + }, + { + "epoch": 0.9440563678454409, + "grad_norm": 1146.291015625, + "learning_rate": 1.2940907884494036e-07, + "loss": 48.4152, + "step": 467340 + }, + { + "epoch": 0.9440765684781247, + "grad_norm": 452.1363830566406, + "learning_rate": 1.2933018811540078e-07, + "loss": 18.9367, + "step": 467350 + }, + { + "epoch": 0.9440967691108085, + "grad_norm": 391.53204345703125, + "learning_rate": 1.2925132112491523e-07, + "loss": 16.6104, + "step": 467360 + }, + { + "epoch": 0.9441169697434924, + "grad_norm": 237.9326171875, + "learning_rate": 1.2917247787386787e-07, + "loss": 16.8393, + "step": 467370 + }, + { + "epoch": 0.9441371703761762, + "grad_norm": 501.45037841796875, + "learning_rate": 1.2909365836264287e-07, + "loss": 21.6704, + "step": 467380 + }, + { + "epoch": 0.94415737100886, + "grad_norm": 126.89668273925781, + "learning_rate": 1.2901486259162488e-07, + "loss": 19.5969, + "step": 467390 + }, + { + "epoch": 0.9441775716415438, + "grad_norm": 428.07708740234375, + "learning_rate": 1.289360905611975e-07, + "loss": 10.1928, + "step": 467400 + }, + { + "epoch": 0.9441977722742276, + "grad_norm": 126.64779663085938, + "learning_rate": 1.288573422717454e-07, + "loss": 39.1484, + "step": 467410 + }, + { + "epoch": 0.9442179729069115, + "grad_norm": 75.78832244873047, + "learning_rate": 1.287786177236511e-07, + "loss": 15.7369, + "step": 467420 + }, + { + "epoch": 0.9442381735395953, + "grad_norm": 851.9575805664062, + "learning_rate": 1.2869991691729922e-07, + "loss": 30.6076, + "step": 467430 + }, + { + "epoch": 0.9442583741722791, + "grad_norm": 15.668920516967773, + "learning_rate": 1.2862123985307284e-07, + "loss": 11.1184, + "step": 467440 + }, + { + "epoch": 0.9442785748049629, + "grad_norm": 314.4423828125, + "learning_rate": 1.285425865313561e-07, + "loss": 14.5962, + "step": 467450 + }, + { + "epoch": 0.9442987754376467, + "grad_norm": 326.61346435546875, + "learning_rate": 1.28463956952532e-07, + "loss": 20.0917, + "step": 467460 + }, + { + "epoch": 0.9443189760703306, + "grad_norm": 434.478271484375, + "learning_rate": 1.2838535111698359e-07, + "loss": 18.4376, + "step": 467470 + }, + { + "epoch": 0.9443391767030144, + "grad_norm": 282.2841796875, + "learning_rate": 1.2830676902509443e-07, + "loss": 14.4992, + "step": 467480 + }, + { + "epoch": 0.9443593773356982, + "grad_norm": 924.6843872070312, + "learning_rate": 1.2822821067724643e-07, + "loss": 37.9355, + "step": 467490 + }, + { + "epoch": 0.944379577968382, + "grad_norm": 236.3780059814453, + "learning_rate": 1.2814967607382433e-07, + "loss": 7.6149, + "step": 467500 + }, + { + "epoch": 0.9443997786010658, + "grad_norm": 343.4498596191406, + "learning_rate": 1.2807116521520947e-07, + "loss": 13.3588, + "step": 467510 + }, + { + "epoch": 0.9444199792337497, + "grad_norm": 542.5467529296875, + "learning_rate": 1.279926781017843e-07, + "loss": 26.7331, + "step": 467520 + }, + { + "epoch": 0.9444401798664335, + "grad_norm": 121.63961029052734, + "learning_rate": 1.2791421473393184e-07, + "loss": 10.3786, + "step": 467530 + }, + { + "epoch": 0.9444603804991172, + "grad_norm": 289.2529296875, + "learning_rate": 1.2783577511203515e-07, + "loss": 14.1991, + "step": 467540 + }, + { + "epoch": 0.944480581131801, + "grad_norm": 811.0139770507812, + "learning_rate": 1.2775735923647614e-07, + "loss": 21.7363, + "step": 467550 + }, + { + "epoch": 0.9445007817644848, + "grad_norm": 155.37222290039062, + "learning_rate": 1.2767896710763616e-07, + "loss": 15.0662, + "step": 467560 + }, + { + "epoch": 0.9445209823971686, + "grad_norm": 305.1764221191406, + "learning_rate": 1.2760059872589824e-07, + "loss": 21.6474, + "step": 467570 + }, + { + "epoch": 0.9445411830298525, + "grad_norm": 359.0425109863281, + "learning_rate": 1.2752225409164432e-07, + "loss": 27.2246, + "step": 467580 + }, + { + "epoch": 0.9445613836625363, + "grad_norm": 193.85736083984375, + "learning_rate": 1.2744393320525573e-07, + "loss": 9.8475, + "step": 467590 + }, + { + "epoch": 0.9445815842952201, + "grad_norm": 462.73992919921875, + "learning_rate": 1.2736563606711384e-07, + "loss": 11.911, + "step": 467600 + }, + { + "epoch": 0.9446017849279039, + "grad_norm": 240.19923400878906, + "learning_rate": 1.2728736267760167e-07, + "loss": 43.9946, + "step": 467610 + }, + { + "epoch": 0.9446219855605877, + "grad_norm": 217.29193115234375, + "learning_rate": 1.2720911303710004e-07, + "loss": 11.255, + "step": 467620 + }, + { + "epoch": 0.9446421861932716, + "grad_norm": 166.75306701660156, + "learning_rate": 1.2713088714598974e-07, + "loss": 10.818, + "step": 467630 + }, + { + "epoch": 0.9446623868259554, + "grad_norm": 146.30332946777344, + "learning_rate": 1.2705268500465274e-07, + "loss": 14.2386, + "step": 467640 + }, + { + "epoch": 0.9446825874586392, + "grad_norm": 37.514007568359375, + "learning_rate": 1.2697450661347033e-07, + "loss": 14.8072, + "step": 467650 + }, + { + "epoch": 0.944702788091323, + "grad_norm": 697.3914794921875, + "learning_rate": 1.2689635197282224e-07, + "loss": 24.3691, + "step": 467660 + }, + { + "epoch": 0.9447229887240068, + "grad_norm": 776.3318481445312, + "learning_rate": 1.2681822108309094e-07, + "loss": 21.7652, + "step": 467670 + }, + { + "epoch": 0.9447431893566907, + "grad_norm": 744.0573120117188, + "learning_rate": 1.2674011394465614e-07, + "loss": 29.1712, + "step": 467680 + }, + { + "epoch": 0.9447633899893745, + "grad_norm": 237.00564575195312, + "learning_rate": 1.2666203055789915e-07, + "loss": 11.0158, + "step": 467690 + }, + { + "epoch": 0.9447835906220583, + "grad_norm": 423.5158996582031, + "learning_rate": 1.2658397092320028e-07, + "loss": 18.9339, + "step": 467700 + }, + { + "epoch": 0.9448037912547421, + "grad_norm": 1079.7054443359375, + "learning_rate": 1.2650593504094034e-07, + "loss": 16.8912, + "step": 467710 + }, + { + "epoch": 0.9448239918874259, + "grad_norm": 400.0626525878906, + "learning_rate": 1.2642792291149896e-07, + "loss": 15.3153, + "step": 467720 + }, + { + "epoch": 0.9448441925201098, + "grad_norm": 819.558349609375, + "learning_rate": 1.2634993453525702e-07, + "loss": 27.604, + "step": 467730 + }, + { + "epoch": 0.9448643931527936, + "grad_norm": 203.84701538085938, + "learning_rate": 1.2627196991259473e-07, + "loss": 24.7013, + "step": 467740 + }, + { + "epoch": 0.9448845937854774, + "grad_norm": 247.1985626220703, + "learning_rate": 1.261940290438912e-07, + "loss": 24.6028, + "step": 467750 + }, + { + "epoch": 0.9449047944181612, + "grad_norm": 239.46533203125, + "learning_rate": 1.2611611192952733e-07, + "loss": 15.5538, + "step": 467760 + }, + { + "epoch": 0.944924995050845, + "grad_norm": 20.467748641967773, + "learning_rate": 1.2603821856988218e-07, + "loss": 15.1277, + "step": 467770 + }, + { + "epoch": 0.9449451956835289, + "grad_norm": 612.4995727539062, + "learning_rate": 1.259603489653355e-07, + "loss": 13.7239, + "step": 467780 + }, + { + "epoch": 0.9449653963162126, + "grad_norm": 285.17694091796875, + "learning_rate": 1.2588250311626693e-07, + "loss": 19.4016, + "step": 467790 + }, + { + "epoch": 0.9449855969488964, + "grad_norm": 300.2528991699219, + "learning_rate": 1.258046810230562e-07, + "loss": 30.4937, + "step": 467800 + }, + { + "epoch": 0.9450057975815802, + "grad_norm": 406.70208740234375, + "learning_rate": 1.257268826860819e-07, + "loss": 25.9061, + "step": 467810 + }, + { + "epoch": 0.945025998214264, + "grad_norm": 152.28329467773438, + "learning_rate": 1.2564910810572317e-07, + "loss": 10.9667, + "step": 467820 + }, + { + "epoch": 0.9450461988469478, + "grad_norm": 489.2743225097656, + "learning_rate": 1.255713572823608e-07, + "loss": 13.7445, + "step": 467830 + }, + { + "epoch": 0.9450663994796317, + "grad_norm": 551.2591552734375, + "learning_rate": 1.2549363021637174e-07, + "loss": 14.6357, + "step": 467840 + }, + { + "epoch": 0.9450866001123155, + "grad_norm": 263.2279052734375, + "learning_rate": 1.2541592690813508e-07, + "loss": 17.0744, + "step": 467850 + }, + { + "epoch": 0.9451068007449993, + "grad_norm": 324.44024658203125, + "learning_rate": 1.2533824735803059e-07, + "loss": 19.0699, + "step": 467860 + }, + { + "epoch": 0.9451270013776831, + "grad_norm": 39.75548553466797, + "learning_rate": 1.252605915664362e-07, + "loss": 18.4461, + "step": 467870 + }, + { + "epoch": 0.945147202010367, + "grad_norm": 417.4817810058594, + "learning_rate": 1.2518295953373005e-07, + "loss": 13.1373, + "step": 467880 + }, + { + "epoch": 0.9451674026430508, + "grad_norm": 178.2801971435547, + "learning_rate": 1.2510535126029067e-07, + "loss": 15.0186, + "step": 467890 + }, + { + "epoch": 0.9451876032757346, + "grad_norm": 16.9990177154541, + "learning_rate": 1.2502776674649776e-07, + "loss": 17.0969, + "step": 467900 + }, + { + "epoch": 0.9452078039084184, + "grad_norm": 648.9572143554688, + "learning_rate": 1.2495020599272766e-07, + "loss": 16.402, + "step": 467910 + }, + { + "epoch": 0.9452280045411022, + "grad_norm": 249.67657470703125, + "learning_rate": 1.2487266899935845e-07, + "loss": 12.7127, + "step": 467920 + }, + { + "epoch": 0.945248205173786, + "grad_norm": 551.3310546875, + "learning_rate": 1.2479515576676925e-07, + "loss": 21.3335, + "step": 467930 + }, + { + "epoch": 0.9452684058064699, + "grad_norm": 407.8700256347656, + "learning_rate": 1.24717666295337e-07, + "loss": 13.2619, + "step": 467940 + }, + { + "epoch": 0.9452886064391537, + "grad_norm": 116.41875457763672, + "learning_rate": 1.2464020058543912e-07, + "loss": 10.3969, + "step": 467950 + }, + { + "epoch": 0.9453088070718375, + "grad_norm": 427.3218078613281, + "learning_rate": 1.2456275863745426e-07, + "loss": 14.2084, + "step": 467960 + }, + { + "epoch": 0.9453290077045213, + "grad_norm": 1037.89501953125, + "learning_rate": 1.2448534045175876e-07, + "loss": 23.1926, + "step": 467970 + }, + { + "epoch": 0.9453492083372051, + "grad_norm": 447.3141174316406, + "learning_rate": 1.2440794602873064e-07, + "loss": 14.0446, + "step": 467980 + }, + { + "epoch": 0.945369408969889, + "grad_norm": 353.541015625, + "learning_rate": 1.2433057536874682e-07, + "loss": 8.0339, + "step": 467990 + }, + { + "epoch": 0.9453896096025728, + "grad_norm": 710.0747680664062, + "learning_rate": 1.2425322847218368e-07, + "loss": 22.1698, + "step": 468000 + }, + { + "epoch": 0.9454098102352566, + "grad_norm": 101.60120391845703, + "learning_rate": 1.241759053394198e-07, + "loss": 14.5833, + "step": 468010 + }, + { + "epoch": 0.9454300108679404, + "grad_norm": 127.26005554199219, + "learning_rate": 1.2409860597083102e-07, + "loss": 9.7244, + "step": 468020 + }, + { + "epoch": 0.9454502115006242, + "grad_norm": 360.7369384765625, + "learning_rate": 1.240213303667942e-07, + "loss": 6.6837, + "step": 468030 + }, + { + "epoch": 0.9454704121333081, + "grad_norm": 610.5038452148438, + "learning_rate": 1.239440785276863e-07, + "loss": 30.8797, + "step": 468040 + }, + { + "epoch": 0.9454906127659918, + "grad_norm": 677.2760620117188, + "learning_rate": 1.2386685045388313e-07, + "loss": 18.1932, + "step": 468050 + }, + { + "epoch": 0.9455108133986756, + "grad_norm": 348.82879638671875, + "learning_rate": 1.2378964614576162e-07, + "loss": 23.3082, + "step": 468060 + }, + { + "epoch": 0.9455310140313594, + "grad_norm": 282.5048828125, + "learning_rate": 1.237124656036981e-07, + "loss": 20.5746, + "step": 468070 + }, + { + "epoch": 0.9455512146640432, + "grad_norm": 175.1044158935547, + "learning_rate": 1.236353088280684e-07, + "loss": 17.5479, + "step": 468080 + }, + { + "epoch": 0.9455714152967271, + "grad_norm": 675.5391235351562, + "learning_rate": 1.2355817581924945e-07, + "loss": 11.3713, + "step": 468090 + }, + { + "epoch": 0.9455916159294109, + "grad_norm": 466.1625061035156, + "learning_rate": 1.2348106657761537e-07, + "loss": 10.5448, + "step": 468100 + }, + { + "epoch": 0.9456118165620947, + "grad_norm": 307.6192932128906, + "learning_rate": 1.2340398110354424e-07, + "loss": 15.6597, + "step": 468110 + }, + { + "epoch": 0.9456320171947785, + "grad_norm": 307.61944580078125, + "learning_rate": 1.2332691939741015e-07, + "loss": 7.8915, + "step": 468120 + }, + { + "epoch": 0.9456522178274623, + "grad_norm": 311.0517578125, + "learning_rate": 1.2324988145958895e-07, + "loss": 19.7036, + "step": 468130 + }, + { + "epoch": 0.9456724184601462, + "grad_norm": 85.99491882324219, + "learning_rate": 1.2317286729045586e-07, + "loss": 21.8538, + "step": 468140 + }, + { + "epoch": 0.94569261909283, + "grad_norm": 264.7533264160156, + "learning_rate": 1.2309587689038783e-07, + "loss": 26.0238, + "step": 468150 + }, + { + "epoch": 0.9457128197255138, + "grad_norm": 210.65113830566406, + "learning_rate": 1.2301891025975897e-07, + "loss": 16.9391, + "step": 468160 + }, + { + "epoch": 0.9457330203581976, + "grad_norm": 189.60183715820312, + "learning_rate": 1.229419673989435e-07, + "loss": 17.2061, + "step": 468170 + }, + { + "epoch": 0.9457532209908814, + "grad_norm": 383.91912841796875, + "learning_rate": 1.2286504830831824e-07, + "loss": 19.7732, + "step": 468180 + }, + { + "epoch": 0.9457734216235653, + "grad_norm": 416.5213928222656, + "learning_rate": 1.2278815298825742e-07, + "loss": 25.9267, + "step": 468190 + }, + { + "epoch": 0.9457936222562491, + "grad_norm": 327.18695068359375, + "learning_rate": 1.2271128143913458e-07, + "loss": 29.0271, + "step": 468200 + }, + { + "epoch": 0.9458138228889329, + "grad_norm": 160.49041748046875, + "learning_rate": 1.2263443366132555e-07, + "loss": 7.8713, + "step": 468210 + }, + { + "epoch": 0.9458340235216167, + "grad_norm": 510.4881896972656, + "learning_rate": 1.2255760965520557e-07, + "loss": 21.4823, + "step": 468220 + }, + { + "epoch": 0.9458542241543005, + "grad_norm": 509.4342956542969, + "learning_rate": 1.224808094211477e-07, + "loss": 20.5117, + "step": 468230 + }, + { + "epoch": 0.9458744247869844, + "grad_norm": 307.6473693847656, + "learning_rate": 1.2240403295952662e-07, + "loss": 9.3283, + "step": 468240 + }, + { + "epoch": 0.9458946254196682, + "grad_norm": 509.9915771484375, + "learning_rate": 1.2232728027071704e-07, + "loss": 13.2376, + "step": 468250 + }, + { + "epoch": 0.945914826052352, + "grad_norm": 95.58562469482422, + "learning_rate": 1.222505513550931e-07, + "loss": 9.5526, + "step": 468260 + }, + { + "epoch": 0.9459350266850358, + "grad_norm": 732.8162231445312, + "learning_rate": 1.221738462130273e-07, + "loss": 26.2388, + "step": 468270 + }, + { + "epoch": 0.9459552273177196, + "grad_norm": 83.42688751220703, + "learning_rate": 1.2209716484489543e-07, + "loss": 23.4167, + "step": 468280 + }, + { + "epoch": 0.9459754279504035, + "grad_norm": 442.001220703125, + "learning_rate": 1.2202050725106995e-07, + "loss": 21.2767, + "step": 468290 + }, + { + "epoch": 0.9459956285830872, + "grad_norm": 363.59063720703125, + "learning_rate": 1.2194387343192504e-07, + "loss": 16.9433, + "step": 468300 + }, + { + "epoch": 0.946015829215771, + "grad_norm": 130.3279266357422, + "learning_rate": 1.2186726338783427e-07, + "loss": 10.2144, + "step": 468310 + }, + { + "epoch": 0.9460360298484548, + "grad_norm": 277.4386901855469, + "learning_rate": 1.2179067711917015e-07, + "loss": 12.461, + "step": 468320 + }, + { + "epoch": 0.9460562304811386, + "grad_norm": 47.01555633544922, + "learning_rate": 1.2171411462630732e-07, + "loss": 16.815, + "step": 468330 + }, + { + "epoch": 0.9460764311138224, + "grad_norm": 436.15606689453125, + "learning_rate": 1.216375759096178e-07, + "loss": 36.5272, + "step": 468340 + }, + { + "epoch": 0.9460966317465063, + "grad_norm": 708.5038452148438, + "learning_rate": 1.2156106096947563e-07, + "loss": 32.5021, + "step": 468350 + }, + { + "epoch": 0.9461168323791901, + "grad_norm": 106.7961196899414, + "learning_rate": 1.2148456980625223e-07, + "loss": 14.8968, + "step": 468360 + }, + { + "epoch": 0.9461370330118739, + "grad_norm": 335.14959716796875, + "learning_rate": 1.214081024203223e-07, + "loss": 15.8294, + "step": 468370 + }, + { + "epoch": 0.9461572336445577, + "grad_norm": 152.71038818359375, + "learning_rate": 1.2133165881205723e-07, + "loss": 19.1025, + "step": 468380 + }, + { + "epoch": 0.9461774342772415, + "grad_norm": 317.9998779296875, + "learning_rate": 1.2125523898182945e-07, + "loss": 19.3284, + "step": 468390 + }, + { + "epoch": 0.9461976349099254, + "grad_norm": 164.97238159179688, + "learning_rate": 1.211788429300126e-07, + "loss": 18.7075, + "step": 468400 + }, + { + "epoch": 0.9462178355426092, + "grad_norm": 481.8919372558594, + "learning_rate": 1.21102470656978e-07, + "loss": 22.586, + "step": 468410 + }, + { + "epoch": 0.946238036175293, + "grad_norm": 427.742431640625, + "learning_rate": 1.2102612216309816e-07, + "loss": 16.2481, + "step": 468420 + }, + { + "epoch": 0.9462582368079768, + "grad_norm": 82.76959991455078, + "learning_rate": 1.2094979744874502e-07, + "loss": 15.4469, + "step": 468430 + }, + { + "epoch": 0.9462784374406606, + "grad_norm": 502.79638671875, + "learning_rate": 1.2087349651429215e-07, + "loss": 15.7268, + "step": 468440 + }, + { + "epoch": 0.9462986380733445, + "grad_norm": 586.7080688476562, + "learning_rate": 1.207972193601087e-07, + "loss": 13.1939, + "step": 468450 + }, + { + "epoch": 0.9463188387060283, + "grad_norm": 500.2250061035156, + "learning_rate": 1.207209659865677e-07, + "loss": 14.0395, + "step": 468460 + }, + { + "epoch": 0.9463390393387121, + "grad_norm": 637.3262939453125, + "learning_rate": 1.206447363940416e-07, + "loss": 19.9108, + "step": 468470 + }, + { + "epoch": 0.9463592399713959, + "grad_norm": 573.2239990234375, + "learning_rate": 1.205685305829013e-07, + "loss": 25.3511, + "step": 468480 + }, + { + "epoch": 0.9463794406040797, + "grad_norm": 55.16313552856445, + "learning_rate": 1.204923485535181e-07, + "loss": 22.0333, + "step": 468490 + }, + { + "epoch": 0.9463996412367636, + "grad_norm": 374.0496520996094, + "learning_rate": 1.2041619030626283e-07, + "loss": 21.0971, + "step": 468500 + }, + { + "epoch": 0.9464198418694474, + "grad_norm": 87.39411163330078, + "learning_rate": 1.2034005584150854e-07, + "loss": 12.7182, + "step": 468510 + }, + { + "epoch": 0.9464400425021312, + "grad_norm": 320.94329833984375, + "learning_rate": 1.2026394515962382e-07, + "loss": 28.4477, + "step": 468520 + }, + { + "epoch": 0.946460243134815, + "grad_norm": 220.43370056152344, + "learning_rate": 1.2018785826098057e-07, + "loss": 9.1108, + "step": 468530 + }, + { + "epoch": 0.9464804437674988, + "grad_norm": 135.63665771484375, + "learning_rate": 1.2011179514595072e-07, + "loss": 30.4911, + "step": 468540 + }, + { + "epoch": 0.9465006444001827, + "grad_norm": 315.3052978515625, + "learning_rate": 1.20035755814904e-07, + "loss": 17.3008, + "step": 468550 + }, + { + "epoch": 0.9465208450328664, + "grad_norm": 32.96565246582031, + "learning_rate": 1.1995974026821066e-07, + "loss": 24.2093, + "step": 468560 + }, + { + "epoch": 0.9465410456655502, + "grad_norm": 148.7790069580078, + "learning_rate": 1.1988374850624208e-07, + "loss": 20.4896, + "step": 468570 + }, + { + "epoch": 0.946561246298234, + "grad_norm": 930.547119140625, + "learning_rate": 1.198077805293679e-07, + "loss": 9.2134, + "step": 468580 + }, + { + "epoch": 0.9465814469309178, + "grad_norm": 237.03515625, + "learning_rate": 1.1973183633795849e-07, + "loss": 52.5735, + "step": 468590 + }, + { + "epoch": 0.9466016475636017, + "grad_norm": 17.60946273803711, + "learning_rate": 1.1965591593238513e-07, + "loss": 10.6422, + "step": 468600 + }, + { + "epoch": 0.9466218481962855, + "grad_norm": 320.0501403808594, + "learning_rate": 1.1958001931301587e-07, + "loss": 13.3763, + "step": 468610 + }, + { + "epoch": 0.9466420488289693, + "grad_norm": 115.49630737304688, + "learning_rate": 1.195041464802227e-07, + "loss": 18.8042, + "step": 468620 + }, + { + "epoch": 0.9466622494616531, + "grad_norm": 32.85114669799805, + "learning_rate": 1.19428297434373e-07, + "loss": 22.0479, + "step": 468630 + }, + { + "epoch": 0.9466824500943369, + "grad_norm": 259.37530517578125, + "learning_rate": 1.1935247217583934e-07, + "loss": 12.3792, + "step": 468640 + }, + { + "epoch": 0.9467026507270208, + "grad_norm": 781.1555786132812, + "learning_rate": 1.1927667070498916e-07, + "loss": 15.1413, + "step": 468650 + }, + { + "epoch": 0.9467228513597046, + "grad_norm": 385.81744384765625, + "learning_rate": 1.1920089302219218e-07, + "loss": 16.4108, + "step": 468660 + }, + { + "epoch": 0.9467430519923884, + "grad_norm": 162.37913513183594, + "learning_rate": 1.1912513912781864e-07, + "loss": 17.8091, + "step": 468670 + }, + { + "epoch": 0.9467632526250722, + "grad_norm": 371.38995361328125, + "learning_rate": 1.1904940902223661e-07, + "loss": 15.9771, + "step": 468680 + }, + { + "epoch": 0.946783453257756, + "grad_norm": 426.4790954589844, + "learning_rate": 1.1897370270581632e-07, + "loss": 25.6471, + "step": 468690 + }, + { + "epoch": 0.9468036538904399, + "grad_norm": 6.888204574584961, + "learning_rate": 1.1889802017892638e-07, + "loss": 14.0545, + "step": 468700 + }, + { + "epoch": 0.9468238545231237, + "grad_norm": 34.13553237915039, + "learning_rate": 1.1882236144193482e-07, + "loss": 19.7614, + "step": 468710 + }, + { + "epoch": 0.9468440551558075, + "grad_norm": 369.91961669921875, + "learning_rate": 1.1874672649521135e-07, + "loss": 26.8512, + "step": 468720 + }, + { + "epoch": 0.9468642557884913, + "grad_norm": 118.35027313232422, + "learning_rate": 1.1867111533912457e-07, + "loss": 10.5861, + "step": 468730 + }, + { + "epoch": 0.9468844564211751, + "grad_norm": 473.2422180175781, + "learning_rate": 1.1859552797404194e-07, + "loss": 11.2697, + "step": 468740 + }, + { + "epoch": 0.946904657053859, + "grad_norm": 0.0, + "learning_rate": 1.185199644003332e-07, + "loss": 18.9192, + "step": 468750 + }, + { + "epoch": 0.9469248576865428, + "grad_norm": 975.2803955078125, + "learning_rate": 1.1844442461836636e-07, + "loss": 27.8736, + "step": 468760 + }, + { + "epoch": 0.9469450583192266, + "grad_norm": 175.9191436767578, + "learning_rate": 1.1836890862850892e-07, + "loss": 19.3225, + "step": 468770 + }, + { + "epoch": 0.9469652589519104, + "grad_norm": 246.40516662597656, + "learning_rate": 1.1829341643112946e-07, + "loss": 23.1092, + "step": 468780 + }, + { + "epoch": 0.9469854595845942, + "grad_norm": 763.1865844726562, + "learning_rate": 1.1821794802659603e-07, + "loss": 14.0527, + "step": 468790 + }, + { + "epoch": 0.9470056602172781, + "grad_norm": 257.7726135253906, + "learning_rate": 1.1814250341527611e-07, + "loss": 23.4074, + "step": 468800 + }, + { + "epoch": 0.9470258608499619, + "grad_norm": 573.9570922851562, + "learning_rate": 1.1806708259753718e-07, + "loss": 20.0437, + "step": 468810 + }, + { + "epoch": 0.9470460614826456, + "grad_norm": 201.02435302734375, + "learning_rate": 1.179916855737473e-07, + "loss": 16.4613, + "step": 468820 + }, + { + "epoch": 0.9470662621153294, + "grad_norm": 245.06533813476562, + "learning_rate": 1.1791631234427448e-07, + "loss": 14.38, + "step": 468830 + }, + { + "epoch": 0.9470864627480132, + "grad_norm": 862.9598999023438, + "learning_rate": 1.1784096290948455e-07, + "loss": 24.3121, + "step": 468840 + }, + { + "epoch": 0.947106663380697, + "grad_norm": 560.0054321289062, + "learning_rate": 1.177656372697461e-07, + "loss": 15.7914, + "step": 468850 + }, + { + "epoch": 0.9471268640133809, + "grad_norm": 175.73483276367188, + "learning_rate": 1.1769033542542552e-07, + "loss": 6.7914, + "step": 468860 + }, + { + "epoch": 0.9471470646460647, + "grad_norm": 291.7281188964844, + "learning_rate": 1.1761505737689082e-07, + "loss": 18.2273, + "step": 468870 + }, + { + "epoch": 0.9471672652787485, + "grad_norm": 338.5940856933594, + "learning_rate": 1.175398031245073e-07, + "loss": 12.2181, + "step": 468880 + }, + { + "epoch": 0.9471874659114323, + "grad_norm": 272.7087707519531, + "learning_rate": 1.1746457266864297e-07, + "loss": 13.4214, + "step": 468890 + }, + { + "epoch": 0.9472076665441161, + "grad_norm": 311.3247985839844, + "learning_rate": 1.1738936600966366e-07, + "loss": 14.7226, + "step": 468900 + }, + { + "epoch": 0.9472278671768, + "grad_norm": 291.2148132324219, + "learning_rate": 1.173141831479374e-07, + "loss": 17.4352, + "step": 468910 + }, + { + "epoch": 0.9472480678094838, + "grad_norm": 206.57376098632812, + "learning_rate": 1.1723902408382892e-07, + "loss": 20.1185, + "step": 468920 + }, + { + "epoch": 0.9472682684421676, + "grad_norm": 440.7289733886719, + "learning_rate": 1.1716388881770513e-07, + "loss": 27.5187, + "step": 468930 + }, + { + "epoch": 0.9472884690748514, + "grad_norm": 608.8489379882812, + "learning_rate": 1.1708877734993296e-07, + "loss": 20.8657, + "step": 468940 + }, + { + "epoch": 0.9473086697075352, + "grad_norm": 202.66183471679688, + "learning_rate": 1.1701368968087711e-07, + "loss": 7.8949, + "step": 468950 + }, + { + "epoch": 0.9473288703402191, + "grad_norm": 296.52935791015625, + "learning_rate": 1.1693862581090453e-07, + "loss": 13.0502, + "step": 468960 + }, + { + "epoch": 0.9473490709729029, + "grad_norm": 659.4253540039062, + "learning_rate": 1.1686358574038104e-07, + "loss": 20.7425, + "step": 468970 + }, + { + "epoch": 0.9473692716055867, + "grad_norm": 132.4563751220703, + "learning_rate": 1.1678856946967244e-07, + "loss": 17.9443, + "step": 468980 + }, + { + "epoch": 0.9473894722382705, + "grad_norm": 633.8014526367188, + "learning_rate": 1.1671357699914343e-07, + "loss": 15.5557, + "step": 468990 + }, + { + "epoch": 0.9474096728709543, + "grad_norm": 66.96441650390625, + "learning_rate": 1.166386083291604e-07, + "loss": 15.0395, + "step": 469000 + }, + { + "epoch": 0.9474298735036382, + "grad_norm": 582.8804931640625, + "learning_rate": 1.1656366346008862e-07, + "loss": 22.1104, + "step": 469010 + }, + { + "epoch": 0.947450074136322, + "grad_norm": 347.63812255859375, + "learning_rate": 1.1648874239229391e-07, + "loss": 16.2601, + "step": 469020 + }, + { + "epoch": 0.9474702747690058, + "grad_norm": 283.6161193847656, + "learning_rate": 1.1641384512613985e-07, + "loss": 12.2617, + "step": 469030 + }, + { + "epoch": 0.9474904754016896, + "grad_norm": 430.0794677734375, + "learning_rate": 1.1633897166199227e-07, + "loss": 19.0489, + "step": 469040 + }, + { + "epoch": 0.9475106760343734, + "grad_norm": 315.4210510253906, + "learning_rate": 1.1626412200021697e-07, + "loss": 11.05, + "step": 469050 + }, + { + "epoch": 0.9475308766670573, + "grad_norm": 376.3665466308594, + "learning_rate": 1.1618929614117757e-07, + "loss": 21.5259, + "step": 469060 + }, + { + "epoch": 0.947551077299741, + "grad_norm": 45.962337493896484, + "learning_rate": 1.1611449408523879e-07, + "loss": 21.0688, + "step": 469070 + }, + { + "epoch": 0.9475712779324248, + "grad_norm": 248.42181396484375, + "learning_rate": 1.1603971583276641e-07, + "loss": 11.9986, + "step": 469080 + }, + { + "epoch": 0.9475914785651086, + "grad_norm": 26.90751838684082, + "learning_rate": 1.1596496138412405e-07, + "loss": 28.1198, + "step": 469090 + }, + { + "epoch": 0.9476116791977924, + "grad_norm": 678.9092407226562, + "learning_rate": 1.1589023073967586e-07, + "loss": 20.818, + "step": 469100 + }, + { + "epoch": 0.9476318798304763, + "grad_norm": 7.6660566329956055, + "learning_rate": 1.1581552389978601e-07, + "loss": 9.1425, + "step": 469110 + }, + { + "epoch": 0.9476520804631601, + "grad_norm": 273.488037109375, + "learning_rate": 1.1574084086481973e-07, + "loss": 23.9087, + "step": 469120 + }, + { + "epoch": 0.9476722810958439, + "grad_norm": 672.782958984375, + "learning_rate": 1.1566618163513954e-07, + "loss": 12.4417, + "step": 469130 + }, + { + "epoch": 0.9476924817285277, + "grad_norm": 28.96516990661621, + "learning_rate": 1.1559154621110957e-07, + "loss": 6.9446, + "step": 469140 + }, + { + "epoch": 0.9477126823612115, + "grad_norm": 1065.329833984375, + "learning_rate": 1.155169345930951e-07, + "loss": 20.1584, + "step": 469150 + }, + { + "epoch": 0.9477328829938954, + "grad_norm": 578.9161987304688, + "learning_rate": 1.1544234678145805e-07, + "loss": 22.021, + "step": 469160 + }, + { + "epoch": 0.9477530836265792, + "grad_norm": 99.24224853515625, + "learning_rate": 1.1536778277656258e-07, + "loss": 11.5046, + "step": 469170 + }, + { + "epoch": 0.947773284259263, + "grad_norm": 570.20166015625, + "learning_rate": 1.1529324257877228e-07, + "loss": 33.5162, + "step": 469180 + }, + { + "epoch": 0.9477934848919468, + "grad_norm": 138.69076538085938, + "learning_rate": 1.152187261884502e-07, + "loss": 13.3612, + "step": 469190 + }, + { + "epoch": 0.9478136855246306, + "grad_norm": 892.8467407226562, + "learning_rate": 1.1514423360595939e-07, + "loss": 14.4001, + "step": 469200 + }, + { + "epoch": 0.9478338861573145, + "grad_norm": 1641.258544921875, + "learning_rate": 1.1506976483166343e-07, + "loss": 22.1854, + "step": 469210 + }, + { + "epoch": 0.9478540867899983, + "grad_norm": 1146.048583984375, + "learning_rate": 1.1499531986592482e-07, + "loss": 25.4028, + "step": 469220 + }, + { + "epoch": 0.9478742874226821, + "grad_norm": 272.59130859375, + "learning_rate": 1.1492089870910662e-07, + "loss": 17.5598, + "step": 469230 + }, + { + "epoch": 0.9478944880553659, + "grad_norm": 321.302734375, + "learning_rate": 1.1484650136157127e-07, + "loss": 21.2706, + "step": 469240 + }, + { + "epoch": 0.9479146886880497, + "grad_norm": 702.2314453125, + "learning_rate": 1.1477212782368185e-07, + "loss": 15.2294, + "step": 469250 + }, + { + "epoch": 0.9479348893207336, + "grad_norm": 371.1092834472656, + "learning_rate": 1.1469777809580084e-07, + "loss": 21.5585, + "step": 469260 + }, + { + "epoch": 0.9479550899534174, + "grad_norm": 443.08245849609375, + "learning_rate": 1.1462345217828963e-07, + "loss": 10.0246, + "step": 469270 + }, + { + "epoch": 0.9479752905861012, + "grad_norm": 476.6809997558594, + "learning_rate": 1.1454915007151179e-07, + "loss": 17.9244, + "step": 469280 + }, + { + "epoch": 0.947995491218785, + "grad_norm": 626.3543090820312, + "learning_rate": 1.1447487177582816e-07, + "loss": 18.527, + "step": 469290 + }, + { + "epoch": 0.9480156918514688, + "grad_norm": 466.4951477050781, + "learning_rate": 1.1440061729160235e-07, + "loss": 21.489, + "step": 469300 + }, + { + "epoch": 0.9480358924841527, + "grad_norm": 11.767196655273438, + "learning_rate": 1.1432638661919515e-07, + "loss": 10.4186, + "step": 469310 + }, + { + "epoch": 0.9480560931168365, + "grad_norm": 308.8578796386719, + "learning_rate": 1.1425217975896796e-07, + "loss": 12.2751, + "step": 469320 + }, + { + "epoch": 0.9480762937495202, + "grad_norm": 282.1939392089844, + "learning_rate": 1.1417799671128327e-07, + "loss": 15.9067, + "step": 469330 + }, + { + "epoch": 0.948096494382204, + "grad_norm": 348.49114990234375, + "learning_rate": 1.14103837476503e-07, + "loss": 17.374, + "step": 469340 + }, + { + "epoch": 0.9481166950148878, + "grad_norm": 595.0879516601562, + "learning_rate": 1.1402970205498742e-07, + "loss": 27.4214, + "step": 469350 + }, + { + "epoch": 0.9481368956475716, + "grad_norm": 424.2049255371094, + "learning_rate": 1.1395559044709848e-07, + "loss": 11.7823, + "step": 469360 + }, + { + "epoch": 0.9481570962802555, + "grad_norm": 488.8497009277344, + "learning_rate": 1.1388150265319808e-07, + "loss": 13.7173, + "step": 469370 + }, + { + "epoch": 0.9481772969129393, + "grad_norm": 382.3506164550781, + "learning_rate": 1.1380743867364596e-07, + "loss": 17.2933, + "step": 469380 + }, + { + "epoch": 0.9481974975456231, + "grad_norm": 171.2713623046875, + "learning_rate": 1.1373339850880405e-07, + "loss": 17.9502, + "step": 469390 + }, + { + "epoch": 0.9482176981783069, + "grad_norm": 338.79010009765625, + "learning_rate": 1.136593821590326e-07, + "loss": 11.2293, + "step": 469400 + }, + { + "epoch": 0.9482378988109907, + "grad_norm": 173.9486541748047, + "learning_rate": 1.1358538962469356e-07, + "loss": 21.067, + "step": 469410 + }, + { + "epoch": 0.9482580994436746, + "grad_norm": 574.244873046875, + "learning_rate": 1.1351142090614553e-07, + "loss": 22.1407, + "step": 469420 + }, + { + "epoch": 0.9482783000763584, + "grad_norm": 132.3253936767578, + "learning_rate": 1.1343747600375044e-07, + "loss": 12.5369, + "step": 469430 + }, + { + "epoch": 0.9482985007090422, + "grad_norm": 91.43624114990234, + "learning_rate": 1.1336355491786966e-07, + "loss": 15.2691, + "step": 469440 + }, + { + "epoch": 0.948318701341726, + "grad_norm": 191.69607543945312, + "learning_rate": 1.1328965764886069e-07, + "loss": 23.2678, + "step": 469450 + }, + { + "epoch": 0.9483389019744098, + "grad_norm": 117.98069763183594, + "learning_rate": 1.1321578419708545e-07, + "loss": 18.1865, + "step": 469460 + }, + { + "epoch": 0.9483591026070937, + "grad_norm": 142.21937561035156, + "learning_rate": 1.1314193456290424e-07, + "loss": 22.8095, + "step": 469470 + }, + { + "epoch": 0.9483793032397775, + "grad_norm": 17.312837600708008, + "learning_rate": 1.1306810874667673e-07, + "loss": 18.3314, + "step": 469480 + }, + { + "epoch": 0.9483995038724613, + "grad_norm": 365.4279479980469, + "learning_rate": 1.129943067487621e-07, + "loss": 15.4938, + "step": 469490 + }, + { + "epoch": 0.9484197045051451, + "grad_norm": 610.1925659179688, + "learning_rate": 1.1292052856952063e-07, + "loss": 25.1791, + "step": 469500 + }, + { + "epoch": 0.9484399051378289, + "grad_norm": 28.289342880249023, + "learning_rate": 1.1284677420931201e-07, + "loss": 9.2474, + "step": 469510 + }, + { + "epoch": 0.9484601057705128, + "grad_norm": 530.5923461914062, + "learning_rate": 1.1277304366849539e-07, + "loss": 16.9483, + "step": 469520 + }, + { + "epoch": 0.9484803064031966, + "grad_norm": 376.1013488769531, + "learning_rate": 1.1269933694742996e-07, + "loss": 26.9136, + "step": 469530 + }, + { + "epoch": 0.9485005070358804, + "grad_norm": 743.5037231445312, + "learning_rate": 1.1262565404647485e-07, + "loss": 16.8922, + "step": 469540 + }, + { + "epoch": 0.9485207076685642, + "grad_norm": 290.6438293457031, + "learning_rate": 1.1255199496599034e-07, + "loss": 13.4206, + "step": 469550 + }, + { + "epoch": 0.948540908301248, + "grad_norm": 591.5577392578125, + "learning_rate": 1.1247835970633392e-07, + "loss": 15.4233, + "step": 469560 + }, + { + "epoch": 0.9485611089339319, + "grad_norm": 413.7424621582031, + "learning_rate": 1.1240474826786585e-07, + "loss": 11.5132, + "step": 469570 + }, + { + "epoch": 0.9485813095666156, + "grad_norm": 81.66283416748047, + "learning_rate": 1.1233116065094363e-07, + "loss": 17.804, + "step": 469580 + }, + { + "epoch": 0.9486015101992994, + "grad_norm": 712.2706909179688, + "learning_rate": 1.1225759685592697e-07, + "loss": 17.38, + "step": 469590 + }, + { + "epoch": 0.9486217108319832, + "grad_norm": 451.8149108886719, + "learning_rate": 1.1218405688317447e-07, + "loss": 8.9866, + "step": 469600 + }, + { + "epoch": 0.948641911464667, + "grad_norm": 286.841796875, + "learning_rate": 1.1211054073304305e-07, + "loss": 14.9223, + "step": 469610 + }, + { + "epoch": 0.9486621120973509, + "grad_norm": 352.9850769042969, + "learning_rate": 1.1203704840589247e-07, + "loss": 13.3507, + "step": 469620 + }, + { + "epoch": 0.9486823127300347, + "grad_norm": 10.075496673583984, + "learning_rate": 1.1196357990208074e-07, + "loss": 13.7039, + "step": 469630 + }, + { + "epoch": 0.9487025133627185, + "grad_norm": 428.2384948730469, + "learning_rate": 1.1189013522196479e-07, + "loss": 24.0886, + "step": 469640 + }, + { + "epoch": 0.9487227139954023, + "grad_norm": 123.3632583618164, + "learning_rate": 1.118167143659038e-07, + "loss": 9.5288, + "step": 469650 + }, + { + "epoch": 0.9487429146280861, + "grad_norm": 335.2364196777344, + "learning_rate": 1.1174331733425636e-07, + "loss": 19.4018, + "step": 469660 + }, + { + "epoch": 0.94876311526077, + "grad_norm": 423.3990173339844, + "learning_rate": 1.1166994412737774e-07, + "loss": 23.5129, + "step": 469670 + }, + { + "epoch": 0.9487833158934538, + "grad_norm": 360.9956359863281, + "learning_rate": 1.1159659474562712e-07, + "loss": 13.3685, + "step": 469680 + }, + { + "epoch": 0.9488035165261376, + "grad_norm": 478.0350341796875, + "learning_rate": 1.1152326918936251e-07, + "loss": 24.2518, + "step": 469690 + }, + { + "epoch": 0.9488237171588214, + "grad_norm": 577.0175170898438, + "learning_rate": 1.1144996745894033e-07, + "loss": 28.4371, + "step": 469700 + }, + { + "epoch": 0.9488439177915052, + "grad_norm": 551.7817993164062, + "learning_rate": 1.1137668955471803e-07, + "loss": 10.8502, + "step": 469710 + }, + { + "epoch": 0.948864118424189, + "grad_norm": 1055.144287109375, + "learning_rate": 1.1130343547705257e-07, + "loss": 26.8408, + "step": 469720 + }, + { + "epoch": 0.9488843190568729, + "grad_norm": 444.7875061035156, + "learning_rate": 1.1123020522630202e-07, + "loss": 24.6248, + "step": 469730 + }, + { + "epoch": 0.9489045196895567, + "grad_norm": 305.60711669921875, + "learning_rate": 1.111569988028216e-07, + "loss": 26.628, + "step": 469740 + }, + { + "epoch": 0.9489247203222405, + "grad_norm": 499.5113220214844, + "learning_rate": 1.1108381620696885e-07, + "loss": 13.9902, + "step": 469750 + }, + { + "epoch": 0.9489449209549243, + "grad_norm": 288.8736267089844, + "learning_rate": 1.1101065743910122e-07, + "loss": 15.2388, + "step": 469760 + }, + { + "epoch": 0.9489651215876082, + "grad_norm": 506.89111328125, + "learning_rate": 1.1093752249957512e-07, + "loss": 22.3633, + "step": 469770 + }, + { + "epoch": 0.948985322220292, + "grad_norm": 421.3926086425781, + "learning_rate": 1.1086441138874581e-07, + "loss": 35.1609, + "step": 469780 + }, + { + "epoch": 0.9490055228529758, + "grad_norm": 636.1416015625, + "learning_rate": 1.107913241069708e-07, + "loss": 17.0688, + "step": 469790 + }, + { + "epoch": 0.9490257234856596, + "grad_norm": 95.65436553955078, + "learning_rate": 1.107182606546059e-07, + "loss": 6.2457, + "step": 469800 + }, + { + "epoch": 0.9490459241183434, + "grad_norm": 57.0366096496582, + "learning_rate": 1.1064522103200636e-07, + "loss": 10.7217, + "step": 469810 + }, + { + "epoch": 0.9490661247510273, + "grad_norm": 331.9047546386719, + "learning_rate": 1.1057220523953027e-07, + "loss": 8.9685, + "step": 469820 + }, + { + "epoch": 0.9490863253837111, + "grad_norm": 151.98440551757812, + "learning_rate": 1.1049921327753121e-07, + "loss": 9.8115, + "step": 469830 + }, + { + "epoch": 0.9491065260163948, + "grad_norm": 602.0835571289062, + "learning_rate": 1.1042624514636669e-07, + "loss": 16.8972, + "step": 469840 + }, + { + "epoch": 0.9491267266490786, + "grad_norm": 357.6507873535156, + "learning_rate": 1.1035330084639084e-07, + "loss": 9.9532, + "step": 469850 + }, + { + "epoch": 0.9491469272817624, + "grad_norm": 330.49566650390625, + "learning_rate": 1.1028038037796063e-07, + "loss": 13.1317, + "step": 469860 + }, + { + "epoch": 0.9491671279144462, + "grad_norm": 263.4090881347656, + "learning_rate": 1.1020748374143075e-07, + "loss": 17.9668, + "step": 469870 + }, + { + "epoch": 0.9491873285471301, + "grad_norm": 231.7461700439453, + "learning_rate": 1.1013461093715594e-07, + "loss": 9.0968, + "step": 469880 + }, + { + "epoch": 0.9492075291798139, + "grad_norm": 209.28048706054688, + "learning_rate": 1.1006176196549256e-07, + "loss": 6.7867, + "step": 469890 + }, + { + "epoch": 0.9492277298124977, + "grad_norm": 297.8096008300781, + "learning_rate": 1.0998893682679479e-07, + "loss": 23.6673, + "step": 469900 + }, + { + "epoch": 0.9492479304451815, + "grad_norm": 334.8628234863281, + "learning_rate": 1.099161355214179e-07, + "loss": 14.8809, + "step": 469910 + }, + { + "epoch": 0.9492681310778653, + "grad_norm": 610.1708374023438, + "learning_rate": 1.0984335804971713e-07, + "loss": 16.09, + "step": 469920 + }, + { + "epoch": 0.9492883317105492, + "grad_norm": 32.4946403503418, + "learning_rate": 1.0977060441204612e-07, + "loss": 12.6552, + "step": 469930 + }, + { + "epoch": 0.949308532343233, + "grad_norm": 275.5940856933594, + "learning_rate": 1.0969787460876013e-07, + "loss": 13.3029, + "step": 469940 + }, + { + "epoch": 0.9493287329759168, + "grad_norm": 345.1875305175781, + "learning_rate": 1.0962516864021388e-07, + "loss": 14.9644, + "step": 469950 + }, + { + "epoch": 0.9493489336086006, + "grad_norm": 191.85562133789062, + "learning_rate": 1.0955248650676154e-07, + "loss": 14.6402, + "step": 469960 + }, + { + "epoch": 0.9493691342412844, + "grad_norm": 325.9012145996094, + "learning_rate": 1.0947982820875669e-07, + "loss": 18.6594, + "step": 469970 + }, + { + "epoch": 0.9493893348739683, + "grad_norm": 841.303955078125, + "learning_rate": 1.0940719374655462e-07, + "loss": 34.0621, + "step": 469980 + }, + { + "epoch": 0.9494095355066521, + "grad_norm": 264.1408386230469, + "learning_rate": 1.0933458312050837e-07, + "loss": 19.1478, + "step": 469990 + }, + { + "epoch": 0.9494297361393359, + "grad_norm": 6.626099109649658, + "learning_rate": 1.0926199633097156e-07, + "loss": 11.4435, + "step": 470000 + }, + { + "epoch": 0.9494499367720197, + "grad_norm": 991.1474609375, + "learning_rate": 1.0918943337829945e-07, + "loss": 29.941, + "step": 470010 + }, + { + "epoch": 0.9494701374047035, + "grad_norm": 58.44243240356445, + "learning_rate": 1.091168942628451e-07, + "loss": 9.7997, + "step": 470020 + }, + { + "epoch": 0.9494903380373874, + "grad_norm": 160.3310089111328, + "learning_rate": 1.09044378984961e-07, + "loss": 13.4106, + "step": 470030 + }, + { + "epoch": 0.9495105386700712, + "grad_norm": 809.5315551757812, + "learning_rate": 1.0897188754500187e-07, + "loss": 20.2929, + "step": 470040 + }, + { + "epoch": 0.949530739302755, + "grad_norm": 105.98553466796875, + "learning_rate": 1.0889941994332077e-07, + "loss": 14.2779, + "step": 470050 + }, + { + "epoch": 0.9495509399354388, + "grad_norm": 900.638671875, + "learning_rate": 1.0882697618027016e-07, + "loss": 15.7529, + "step": 470060 + }, + { + "epoch": 0.9495711405681226, + "grad_norm": 212.1917266845703, + "learning_rate": 1.0875455625620368e-07, + "loss": 25.6759, + "step": 470070 + }, + { + "epoch": 0.9495913412008065, + "grad_norm": 913.4801635742188, + "learning_rate": 1.0868216017147437e-07, + "loss": 29.1505, + "step": 470080 + }, + { + "epoch": 0.9496115418334903, + "grad_norm": 361.5392150878906, + "learning_rate": 1.0860978792643528e-07, + "loss": 10.5928, + "step": 470090 + }, + { + "epoch": 0.949631742466174, + "grad_norm": 346.39202880859375, + "learning_rate": 1.0853743952143836e-07, + "loss": 13.5348, + "step": 470100 + }, + { + "epoch": 0.9496519430988578, + "grad_norm": 312.23748779296875, + "learning_rate": 1.084651149568372e-07, + "loss": 13.6943, + "step": 470110 + }, + { + "epoch": 0.9496721437315416, + "grad_norm": 496.2864074707031, + "learning_rate": 1.0839281423298375e-07, + "loss": 13.6047, + "step": 470120 + }, + { + "epoch": 0.9496923443642254, + "grad_norm": 494.9438781738281, + "learning_rate": 1.0832053735022996e-07, + "loss": 14.4703, + "step": 470130 + }, + { + "epoch": 0.9497125449969093, + "grad_norm": 574.4812622070312, + "learning_rate": 1.0824828430892831e-07, + "loss": 28.3906, + "step": 470140 + }, + { + "epoch": 0.9497327456295931, + "grad_norm": 630.9834594726562, + "learning_rate": 1.0817605510943241e-07, + "loss": 15.1082, + "step": 470150 + }, + { + "epoch": 0.9497529462622769, + "grad_norm": 492.0447692871094, + "learning_rate": 1.0810384975209254e-07, + "loss": 17.1864, + "step": 470160 + }, + { + "epoch": 0.9497731468949607, + "grad_norm": 292.03009033203125, + "learning_rate": 1.0803166823726064e-07, + "loss": 19.4169, + "step": 470170 + }, + { + "epoch": 0.9497933475276445, + "grad_norm": 491.9415588378906, + "learning_rate": 1.0795951056528974e-07, + "loss": 21.1524, + "step": 470180 + }, + { + "epoch": 0.9498135481603284, + "grad_norm": 447.0008850097656, + "learning_rate": 1.0788737673653072e-07, + "loss": 30.879, + "step": 470190 + }, + { + "epoch": 0.9498337487930122, + "grad_norm": 475.52154541015625, + "learning_rate": 1.0781526675133492e-07, + "loss": 27.2527, + "step": 470200 + }, + { + "epoch": 0.949853949425696, + "grad_norm": 681.0592651367188, + "learning_rate": 1.0774318061005484e-07, + "loss": 16.0728, + "step": 470210 + }, + { + "epoch": 0.9498741500583798, + "grad_norm": 284.41815185546875, + "learning_rate": 1.0767111831304022e-07, + "loss": 16.9715, + "step": 470220 + }, + { + "epoch": 0.9498943506910636, + "grad_norm": 381.9979248046875, + "learning_rate": 1.0759907986064411e-07, + "loss": 17.1089, + "step": 470230 + }, + { + "epoch": 0.9499145513237475, + "grad_norm": 362.4442443847656, + "learning_rate": 1.0752706525321622e-07, + "loss": 11.5909, + "step": 470240 + }, + { + "epoch": 0.9499347519564313, + "grad_norm": 117.89044952392578, + "learning_rate": 1.0745507449110792e-07, + "loss": 17.8032, + "step": 470250 + }, + { + "epoch": 0.9499549525891151, + "grad_norm": 243.0460662841797, + "learning_rate": 1.0738310757467064e-07, + "loss": 18.7897, + "step": 470260 + }, + { + "epoch": 0.9499751532217989, + "grad_norm": 157.89682006835938, + "learning_rate": 1.0731116450425461e-07, + "loss": 12.373, + "step": 470270 + }, + { + "epoch": 0.9499953538544827, + "grad_norm": 399.5901184082031, + "learning_rate": 1.0723924528021012e-07, + "loss": 18.7036, + "step": 470280 + }, + { + "epoch": 0.9500155544871666, + "grad_norm": 314.33355712890625, + "learning_rate": 1.0716734990288801e-07, + "loss": 21.0218, + "step": 470290 + }, + { + "epoch": 0.9500357551198504, + "grad_norm": 707.6248779296875, + "learning_rate": 1.0709547837263967e-07, + "loss": 20.888, + "step": 470300 + }, + { + "epoch": 0.9500559557525342, + "grad_norm": 880.1218872070312, + "learning_rate": 1.0702363068981425e-07, + "loss": 36.0962, + "step": 470310 + }, + { + "epoch": 0.950076156385218, + "grad_norm": 407.9019775390625, + "learning_rate": 1.0695180685476148e-07, + "loss": 16.846, + "step": 470320 + }, + { + "epoch": 0.9500963570179018, + "grad_norm": 439.6984558105469, + "learning_rate": 1.0688000686783272e-07, + "loss": 15.0265, + "step": 470330 + }, + { + "epoch": 0.9501165576505857, + "grad_norm": 486.39654541015625, + "learning_rate": 1.0680823072937774e-07, + "loss": 20.4392, + "step": 470340 + }, + { + "epoch": 0.9501367582832694, + "grad_norm": 271.5041198730469, + "learning_rate": 1.067364784397451e-07, + "loss": 23.7964, + "step": 470350 + }, + { + "epoch": 0.9501569589159532, + "grad_norm": 1043.4071044921875, + "learning_rate": 1.0666474999928566e-07, + "loss": 27.549, + "step": 470360 + }, + { + "epoch": 0.950177159548637, + "grad_norm": 495.0008239746094, + "learning_rate": 1.0659304540834914e-07, + "loss": 16.5035, + "step": 470370 + }, + { + "epoch": 0.9501973601813208, + "grad_norm": 503.79620361328125, + "learning_rate": 1.0652136466728468e-07, + "loss": 18.311, + "step": 470380 + }, + { + "epoch": 0.9502175608140047, + "grad_norm": 629.4742431640625, + "learning_rate": 1.0644970777644093e-07, + "loss": 8.9288, + "step": 470390 + }, + { + "epoch": 0.9502377614466885, + "grad_norm": 632.9424438476562, + "learning_rate": 1.0637807473616812e-07, + "loss": 36.336, + "step": 470400 + }, + { + "epoch": 0.9502579620793723, + "grad_norm": 541.6952514648438, + "learning_rate": 1.0630646554681545e-07, + "loss": 18.185, + "step": 470410 + }, + { + "epoch": 0.9502781627120561, + "grad_norm": 427.49407958984375, + "learning_rate": 1.0623488020873097e-07, + "loss": 24.8249, + "step": 470420 + }, + { + "epoch": 0.9502983633447399, + "grad_norm": 368.714599609375, + "learning_rate": 1.0616331872226437e-07, + "loss": 17.8403, + "step": 470430 + }, + { + "epoch": 0.9503185639774238, + "grad_norm": 441.51666259765625, + "learning_rate": 1.0609178108776375e-07, + "loss": 14.8681, + "step": 470440 + }, + { + "epoch": 0.9503387646101076, + "grad_norm": 1.7728757858276367, + "learning_rate": 1.0602026730557879e-07, + "loss": 16.306, + "step": 470450 + }, + { + "epoch": 0.9503589652427914, + "grad_norm": 660.8228759765625, + "learning_rate": 1.0594877737605702e-07, + "loss": 13.973, + "step": 470460 + }, + { + "epoch": 0.9503791658754752, + "grad_norm": 421.2904357910156, + "learning_rate": 1.0587731129954815e-07, + "loss": 16.3852, + "step": 470470 + }, + { + "epoch": 0.950399366508159, + "grad_norm": 426.2768249511719, + "learning_rate": 1.0580586907639912e-07, + "loss": 14.3896, + "step": 470480 + }, + { + "epoch": 0.9504195671408429, + "grad_norm": 1023.1954956054688, + "learning_rate": 1.0573445070695853e-07, + "loss": 15.0185, + "step": 470490 + }, + { + "epoch": 0.9504397677735267, + "grad_norm": 269.4640808105469, + "learning_rate": 1.0566305619157502e-07, + "loss": 20.0318, + "step": 470500 + }, + { + "epoch": 0.9504599684062105, + "grad_norm": 215.77854919433594, + "learning_rate": 1.0559168553059551e-07, + "loss": 26.3668, + "step": 470510 + }, + { + "epoch": 0.9504801690388943, + "grad_norm": 280.6916809082031, + "learning_rate": 1.0552033872436917e-07, + "loss": 13.8537, + "step": 470520 + }, + { + "epoch": 0.9505003696715781, + "grad_norm": 436.7329406738281, + "learning_rate": 1.0544901577324351e-07, + "loss": 15.7911, + "step": 470530 + }, + { + "epoch": 0.950520570304262, + "grad_norm": 314.02001953125, + "learning_rate": 1.0537771667756436e-07, + "loss": 16.0215, + "step": 470540 + }, + { + "epoch": 0.9505407709369458, + "grad_norm": 383.9037170410156, + "learning_rate": 1.0530644143768143e-07, + "loss": 18.3283, + "step": 470550 + }, + { + "epoch": 0.9505609715696296, + "grad_norm": 140.77203369140625, + "learning_rate": 1.0523519005394167e-07, + "loss": 18.8596, + "step": 470560 + }, + { + "epoch": 0.9505811722023134, + "grad_norm": 368.7960205078125, + "learning_rate": 1.0516396252669092e-07, + "loss": 19.5221, + "step": 470570 + }, + { + "epoch": 0.9506013728349972, + "grad_norm": 476.341796875, + "learning_rate": 1.0509275885627779e-07, + "loss": 14.5584, + "step": 470580 + }, + { + "epoch": 0.9506215734676811, + "grad_norm": 769.5809326171875, + "learning_rate": 1.0502157904304866e-07, + "loss": 15.2152, + "step": 470590 + }, + { + "epoch": 0.9506417741003649, + "grad_norm": 667.5062255859375, + "learning_rate": 1.0495042308735104e-07, + "loss": 20.4751, + "step": 470600 + }, + { + "epoch": 0.9506619747330486, + "grad_norm": 14.904691696166992, + "learning_rate": 1.0487929098953131e-07, + "loss": 12.7858, + "step": 470610 + }, + { + "epoch": 0.9506821753657324, + "grad_norm": 1079.2874755859375, + "learning_rate": 1.0480818274993587e-07, + "loss": 11.7952, + "step": 470620 + }, + { + "epoch": 0.9507023759984162, + "grad_norm": 685.9215087890625, + "learning_rate": 1.0473709836891222e-07, + "loss": 11.9369, + "step": 470630 + }, + { + "epoch": 0.9507225766311, + "grad_norm": 289.9880065917969, + "learning_rate": 1.0466603784680562e-07, + "loss": 15.8206, + "step": 470640 + }, + { + "epoch": 0.9507427772637839, + "grad_norm": 980.9338989257812, + "learning_rate": 1.0459500118396304e-07, + "loss": 19.923, + "step": 470650 + }, + { + "epoch": 0.9507629778964677, + "grad_norm": 817.2350463867188, + "learning_rate": 1.0452398838073141e-07, + "loss": 11.4897, + "step": 470660 + }, + { + "epoch": 0.9507831785291515, + "grad_norm": 450.24676513671875, + "learning_rate": 1.0445299943745546e-07, + "loss": 18.5014, + "step": 470670 + }, + { + "epoch": 0.9508033791618353, + "grad_norm": 589.586669921875, + "learning_rate": 1.0438203435448157e-07, + "loss": 42.9503, + "step": 470680 + }, + { + "epoch": 0.9508235797945191, + "grad_norm": 976.338623046875, + "learning_rate": 1.0431109313215671e-07, + "loss": 30.7082, + "step": 470690 + }, + { + "epoch": 0.950843780427203, + "grad_norm": 527.5633544921875, + "learning_rate": 1.0424017577082556e-07, + "loss": 13.4547, + "step": 470700 + }, + { + "epoch": 0.9508639810598868, + "grad_norm": 1180.8414306640625, + "learning_rate": 1.0416928227083345e-07, + "loss": 31.8732, + "step": 470710 + }, + { + "epoch": 0.9508841816925706, + "grad_norm": 434.7847900390625, + "learning_rate": 1.0409841263252673e-07, + "loss": 18.8544, + "step": 470720 + }, + { + "epoch": 0.9509043823252544, + "grad_norm": 335.68133544921875, + "learning_rate": 1.040275668562507e-07, + "loss": 19.671, + "step": 470730 + }, + { + "epoch": 0.9509245829579382, + "grad_norm": 181.6182403564453, + "learning_rate": 1.0395674494235064e-07, + "loss": 24.6191, + "step": 470740 + }, + { + "epoch": 0.9509447835906221, + "grad_norm": 255.40704345703125, + "learning_rate": 1.038859468911707e-07, + "loss": 7.3884, + "step": 470750 + }, + { + "epoch": 0.9509649842233059, + "grad_norm": 12.816692352294922, + "learning_rate": 1.0381517270305786e-07, + "loss": 31.8007, + "step": 470760 + }, + { + "epoch": 0.9509851848559897, + "grad_norm": 0.5190161466598511, + "learning_rate": 1.0374442237835625e-07, + "loss": 8.8657, + "step": 470770 + }, + { + "epoch": 0.9510053854886735, + "grad_norm": 197.76315307617188, + "learning_rate": 1.036736959174095e-07, + "loss": 15.5982, + "step": 470780 + }, + { + "epoch": 0.9510255861213573, + "grad_norm": 188.831787109375, + "learning_rate": 1.03602993320564e-07, + "loss": 16.2393, + "step": 470790 + }, + { + "epoch": 0.9510457867540412, + "grad_norm": 853.7542724609375, + "learning_rate": 1.0353231458816338e-07, + "loss": 14.2301, + "step": 470800 + }, + { + "epoch": 0.951065987386725, + "grad_norm": 827.4303588867188, + "learning_rate": 1.0346165972055233e-07, + "loss": 37.7539, + "step": 470810 + }, + { + "epoch": 0.9510861880194088, + "grad_norm": 324.36322021484375, + "learning_rate": 1.0339102871807505e-07, + "loss": 16.0894, + "step": 470820 + }, + { + "epoch": 0.9511063886520926, + "grad_norm": 1509.9539794921875, + "learning_rate": 1.0332042158107624e-07, + "loss": 20.8904, + "step": 470830 + }, + { + "epoch": 0.9511265892847764, + "grad_norm": 88.76578521728516, + "learning_rate": 1.032498383099001e-07, + "loss": 17.4222, + "step": 470840 + }, + { + "epoch": 0.9511467899174603, + "grad_norm": 53.25017547607422, + "learning_rate": 1.0317927890489021e-07, + "loss": 12.9788, + "step": 470850 + }, + { + "epoch": 0.951166990550144, + "grad_norm": 131.33226013183594, + "learning_rate": 1.0310874336639021e-07, + "loss": 30.0801, + "step": 470860 + }, + { + "epoch": 0.9511871911828278, + "grad_norm": 426.4961853027344, + "learning_rate": 1.030382316947448e-07, + "loss": 13.2985, + "step": 470870 + }, + { + "epoch": 0.9512073918155116, + "grad_norm": 243.90689086914062, + "learning_rate": 1.0296774389029707e-07, + "loss": 22.1258, + "step": 470880 + }, + { + "epoch": 0.9512275924481954, + "grad_norm": 55.12568283081055, + "learning_rate": 1.0289727995339005e-07, + "loss": 9.4031, + "step": 470890 + }, + { + "epoch": 0.9512477930808793, + "grad_norm": 184.26426696777344, + "learning_rate": 1.0282683988436792e-07, + "loss": 12.5956, + "step": 470900 + }, + { + "epoch": 0.9512679937135631, + "grad_norm": 108.84278869628906, + "learning_rate": 1.027564236835743e-07, + "loss": 16.8787, + "step": 470910 + }, + { + "epoch": 0.9512881943462469, + "grad_norm": 94.78299713134766, + "learning_rate": 1.0268603135135169e-07, + "loss": 12.0234, + "step": 470920 + }, + { + "epoch": 0.9513083949789307, + "grad_norm": 524.7290649414062, + "learning_rate": 1.0261566288804315e-07, + "loss": 20.1096, + "step": 470930 + }, + { + "epoch": 0.9513285956116145, + "grad_norm": 404.2486572265625, + "learning_rate": 1.0254531829399228e-07, + "loss": 13.8545, + "step": 470940 + }, + { + "epoch": 0.9513487962442984, + "grad_norm": 185.98550415039062, + "learning_rate": 1.024749975695416e-07, + "loss": 14.0839, + "step": 470950 + }, + { + "epoch": 0.9513689968769822, + "grad_norm": 461.5027770996094, + "learning_rate": 1.0240470071503306e-07, + "loss": 12.5585, + "step": 470960 + }, + { + "epoch": 0.951389197509666, + "grad_norm": 258.36834716796875, + "learning_rate": 1.0233442773081026e-07, + "loss": 22.2929, + "step": 470970 + }, + { + "epoch": 0.9514093981423498, + "grad_norm": 346.71075439453125, + "learning_rate": 1.0226417861721571e-07, + "loss": 8.3865, + "step": 470980 + }, + { + "epoch": 0.9514295987750336, + "grad_norm": 485.3686828613281, + "learning_rate": 1.0219395337459137e-07, + "loss": 9.4013, + "step": 470990 + }, + { + "epoch": 0.9514497994077175, + "grad_norm": 640.2971801757812, + "learning_rate": 1.0212375200327973e-07, + "loss": 18.4735, + "step": 471000 + }, + { + "epoch": 0.9514700000404013, + "grad_norm": 171.5185546875, + "learning_rate": 1.0205357450362275e-07, + "loss": 12.5515, + "step": 471010 + }, + { + "epoch": 0.9514902006730851, + "grad_norm": 8.77415657043457, + "learning_rate": 1.0198342087596292e-07, + "loss": 15.5475, + "step": 471020 + }, + { + "epoch": 0.9515104013057689, + "grad_norm": 390.6858825683594, + "learning_rate": 1.0191329112064164e-07, + "loss": 16.3626, + "step": 471030 + }, + { + "epoch": 0.9515306019384527, + "grad_norm": 611.0020751953125, + "learning_rate": 1.0184318523800086e-07, + "loss": 13.536, + "step": 471040 + }, + { + "epoch": 0.9515508025711366, + "grad_norm": 441.7110290527344, + "learning_rate": 1.0177310322838251e-07, + "loss": 15.6641, + "step": 471050 + }, + { + "epoch": 0.9515710032038204, + "grad_norm": 280.6241149902344, + "learning_rate": 1.0170304509212803e-07, + "loss": 21.3834, + "step": 471060 + }, + { + "epoch": 0.9515912038365042, + "grad_norm": 260.3578796386719, + "learning_rate": 1.0163301082957821e-07, + "loss": 21.4385, + "step": 471070 + }, + { + "epoch": 0.951611404469188, + "grad_norm": 502.2814025878906, + "learning_rate": 1.0156300044107559e-07, + "loss": 12.3369, + "step": 471080 + }, + { + "epoch": 0.9516316051018718, + "grad_norm": 436.1946716308594, + "learning_rate": 1.0149301392696098e-07, + "loss": 17.8134, + "step": 471090 + }, + { + "epoch": 0.9516518057345557, + "grad_norm": 191.00633239746094, + "learning_rate": 1.0142305128757468e-07, + "loss": 21.5534, + "step": 471100 + }, + { + "epoch": 0.9516720063672395, + "grad_norm": 350.3988037109375, + "learning_rate": 1.0135311252325863e-07, + "loss": 25.5265, + "step": 471110 + }, + { + "epoch": 0.9516922069999232, + "grad_norm": 326.69793701171875, + "learning_rate": 1.0128319763435312e-07, + "loss": 27.3422, + "step": 471120 + }, + { + "epoch": 0.951712407632607, + "grad_norm": 208.7720947265625, + "learning_rate": 1.0121330662119954e-07, + "loss": 11.7793, + "step": 471130 + }, + { + "epoch": 0.9517326082652908, + "grad_norm": 137.06358337402344, + "learning_rate": 1.0114343948413818e-07, + "loss": 13.8002, + "step": 471140 + }, + { + "epoch": 0.9517528088979746, + "grad_norm": 801.980224609375, + "learning_rate": 1.0107359622350877e-07, + "loss": 30.6869, + "step": 471150 + }, + { + "epoch": 0.9517730095306585, + "grad_norm": 206.8402099609375, + "learning_rate": 1.0100377683965323e-07, + "loss": 13.2018, + "step": 471160 + }, + { + "epoch": 0.9517932101633423, + "grad_norm": 289.8222961425781, + "learning_rate": 1.0093398133291132e-07, + "loss": 17.1643, + "step": 471170 + }, + { + "epoch": 0.9518134107960261, + "grad_norm": 523.9519653320312, + "learning_rate": 1.0086420970362221e-07, + "loss": 18.1259, + "step": 471180 + }, + { + "epoch": 0.9518336114287099, + "grad_norm": 477.6012268066406, + "learning_rate": 1.0079446195212728e-07, + "loss": 27.6164, + "step": 471190 + }, + { + "epoch": 0.9518538120613937, + "grad_norm": 233.2044219970703, + "learning_rate": 1.007247380787657e-07, + "loss": 29.1043, + "step": 471200 + }, + { + "epoch": 0.9518740126940776, + "grad_norm": 843.2547607421875, + "learning_rate": 1.0065503808387777e-07, + "loss": 25.8573, + "step": 471210 + }, + { + "epoch": 0.9518942133267614, + "grad_norm": 200.23541259765625, + "learning_rate": 1.0058536196780266e-07, + "loss": 11.6166, + "step": 471220 + }, + { + "epoch": 0.9519144139594452, + "grad_norm": 330.73638916015625, + "learning_rate": 1.0051570973088064e-07, + "loss": 18.2436, + "step": 471230 + }, + { + "epoch": 0.951934614592129, + "grad_norm": 169.0516815185547, + "learning_rate": 1.0044608137345091e-07, + "loss": 15.007, + "step": 471240 + }, + { + "epoch": 0.9519548152248128, + "grad_norm": 297.7073974609375, + "learning_rate": 1.0037647689585207e-07, + "loss": 13.2723, + "step": 471250 + }, + { + "epoch": 0.9519750158574967, + "grad_norm": 577.2901000976562, + "learning_rate": 1.0030689629842382e-07, + "loss": 25.1775, + "step": 471260 + }, + { + "epoch": 0.9519952164901805, + "grad_norm": 361.49591064453125, + "learning_rate": 1.0023733958150706e-07, + "loss": 18.4722, + "step": 471270 + }, + { + "epoch": 0.9520154171228643, + "grad_norm": 455.4455261230469, + "learning_rate": 1.0016780674543813e-07, + "loss": 13.6768, + "step": 471280 + }, + { + "epoch": 0.9520356177555481, + "grad_norm": 259.64111328125, + "learning_rate": 1.0009829779055679e-07, + "loss": 5.7533, + "step": 471290 + }, + { + "epoch": 0.952055818388232, + "grad_norm": 324.5470886230469, + "learning_rate": 1.0002881271720222e-07, + "loss": 19.4292, + "step": 471300 + }, + { + "epoch": 0.9520760190209158, + "grad_norm": 1066.47021484375, + "learning_rate": 9.995935152571357e-08, + "loss": 19.227, + "step": 471310 + }, + { + "epoch": 0.9520962196535996, + "grad_norm": 455.80023193359375, + "learning_rate": 9.988991421642779e-08, + "loss": 16.4507, + "step": 471320 + }, + { + "epoch": 0.9521164202862834, + "grad_norm": 139.85203552246094, + "learning_rate": 9.98205007896852e-08, + "loss": 11.9968, + "step": 471330 + }, + { + "epoch": 0.9521366209189672, + "grad_norm": 218.869384765625, + "learning_rate": 9.975111124582271e-08, + "loss": 17.4944, + "step": 471340 + }, + { + "epoch": 0.952156821551651, + "grad_norm": 0.0, + "learning_rate": 9.968174558517895e-08, + "loss": 9.4351, + "step": 471350 + }, + { + "epoch": 0.9521770221843349, + "grad_norm": 360.7905578613281, + "learning_rate": 9.961240380809201e-08, + "loss": 17.8279, + "step": 471360 + }, + { + "epoch": 0.9521972228170186, + "grad_norm": 610.8971557617188, + "learning_rate": 9.954308591489991e-08, + "loss": 26.3978, + "step": 471370 + }, + { + "epoch": 0.9522174234497024, + "grad_norm": 0.0, + "learning_rate": 9.947379190594076e-08, + "loss": 23.4322, + "step": 471380 + }, + { + "epoch": 0.9522376240823862, + "grad_norm": 613.8010864257812, + "learning_rate": 9.940452178155147e-08, + "loss": 20.5446, + "step": 471390 + }, + { + "epoch": 0.95225782471507, + "grad_norm": 466.0672607421875, + "learning_rate": 9.933527554207012e-08, + "loss": 23.108, + "step": 471400 + }, + { + "epoch": 0.9522780253477539, + "grad_norm": 515.3238525390625, + "learning_rate": 9.926605318783477e-08, + "loss": 21.6157, + "step": 471410 + }, + { + "epoch": 0.9522982259804377, + "grad_norm": 495.3761291503906, + "learning_rate": 9.919685471918183e-08, + "loss": 34.2023, + "step": 471420 + }, + { + "epoch": 0.9523184266131215, + "grad_norm": 262.55364990234375, + "learning_rate": 9.912768013644936e-08, + "loss": 19.4069, + "step": 471430 + }, + { + "epoch": 0.9523386272458053, + "grad_norm": 674.4443969726562, + "learning_rate": 9.905852943997374e-08, + "loss": 14.3366, + "step": 471440 + }, + { + "epoch": 0.9523588278784891, + "grad_norm": 322.1920166015625, + "learning_rate": 9.898940263009304e-08, + "loss": 18.1222, + "step": 471450 + }, + { + "epoch": 0.952379028511173, + "grad_norm": 358.52862548828125, + "learning_rate": 9.892029970714367e-08, + "loss": 16.8095, + "step": 471460 + }, + { + "epoch": 0.9523992291438568, + "grad_norm": 177.48074340820312, + "learning_rate": 9.885122067146147e-08, + "loss": 12.4875, + "step": 471470 + }, + { + "epoch": 0.9524194297765406, + "grad_norm": 130.83697509765625, + "learning_rate": 9.878216552338504e-08, + "loss": 12.1451, + "step": 471480 + }, + { + "epoch": 0.9524396304092244, + "grad_norm": 295.5693054199219, + "learning_rate": 9.871313426324913e-08, + "loss": 32.5697, + "step": 471490 + }, + { + "epoch": 0.9524598310419082, + "grad_norm": 281.16351318359375, + "learning_rate": 9.864412689139124e-08, + "loss": 15.9962, + "step": 471500 + }, + { + "epoch": 0.952480031674592, + "grad_norm": 359.3190612792969, + "learning_rate": 9.857514340814667e-08, + "loss": 13.375, + "step": 471510 + }, + { + "epoch": 0.9525002323072759, + "grad_norm": 552.441162109375, + "learning_rate": 9.850618381385346e-08, + "loss": 12.7265, + "step": 471520 + }, + { + "epoch": 0.9525204329399597, + "grad_norm": 328.9881286621094, + "learning_rate": 9.843724810884636e-08, + "loss": 11.7589, + "step": 471530 + }, + { + "epoch": 0.9525406335726435, + "grad_norm": 326.3606872558594, + "learning_rate": 9.836833629346121e-08, + "loss": 12.6732, + "step": 471540 + }, + { + "epoch": 0.9525608342053273, + "grad_norm": 527.2721557617188, + "learning_rate": 9.82994483680344e-08, + "loss": 20.0105, + "step": 471550 + }, + { + "epoch": 0.9525810348380112, + "grad_norm": 212.74549865722656, + "learning_rate": 9.823058433290178e-08, + "loss": 15.8167, + "step": 471560 + }, + { + "epoch": 0.952601235470695, + "grad_norm": 212.47674560546875, + "learning_rate": 9.816174418839863e-08, + "loss": 20.676, + "step": 471570 + }, + { + "epoch": 0.9526214361033788, + "grad_norm": 208.3838348388672, + "learning_rate": 9.809292793486025e-08, + "loss": 12.077, + "step": 471580 + }, + { + "epoch": 0.9526416367360626, + "grad_norm": 644.1743774414062, + "learning_rate": 9.802413557262302e-08, + "loss": 19.7677, + "step": 471590 + }, + { + "epoch": 0.9526618373687464, + "grad_norm": 483.9356994628906, + "learning_rate": 9.795536710202169e-08, + "loss": 15.0092, + "step": 471600 + }, + { + "epoch": 0.9526820380014303, + "grad_norm": 355.7208251953125, + "learning_rate": 9.788662252339099e-08, + "loss": 16.6463, + "step": 471610 + }, + { + "epoch": 0.9527022386341141, + "grad_norm": 403.49383544921875, + "learning_rate": 9.781790183706674e-08, + "loss": 19.6125, + "step": 471620 + }, + { + "epoch": 0.9527224392667978, + "grad_norm": 338.376708984375, + "learning_rate": 9.774920504338315e-08, + "loss": 28.9406, + "step": 471630 + }, + { + "epoch": 0.9527426398994816, + "grad_norm": 0.0, + "learning_rate": 9.768053214267548e-08, + "loss": 18.7657, + "step": 471640 + }, + { + "epoch": 0.9527628405321654, + "grad_norm": 458.7870788574219, + "learning_rate": 9.761188313527792e-08, + "loss": 17.2405, + "step": 471650 + }, + { + "epoch": 0.9527830411648492, + "grad_norm": 83.17525482177734, + "learning_rate": 9.754325802152575e-08, + "loss": 9.8613, + "step": 471660 + }, + { + "epoch": 0.9528032417975331, + "grad_norm": 143.07252502441406, + "learning_rate": 9.747465680175316e-08, + "loss": 14.7038, + "step": 471670 + }, + { + "epoch": 0.9528234424302169, + "grad_norm": 373.1251220703125, + "learning_rate": 9.740607947629433e-08, + "loss": 16.7827, + "step": 471680 + }, + { + "epoch": 0.9528436430629007, + "grad_norm": 725.1900024414062, + "learning_rate": 9.733752604548397e-08, + "loss": 23.8853, + "step": 471690 + }, + { + "epoch": 0.9528638436955845, + "grad_norm": 702.7918701171875, + "learning_rate": 9.726899650965626e-08, + "loss": 28.6152, + "step": 471700 + }, + { + "epoch": 0.9528840443282683, + "grad_norm": 432.33929443359375, + "learning_rate": 9.720049086914374e-08, + "loss": 21.788, + "step": 471710 + }, + { + "epoch": 0.9529042449609522, + "grad_norm": 272.7337951660156, + "learning_rate": 9.713200912428222e-08, + "loss": 21.7312, + "step": 471720 + }, + { + "epoch": 0.952924445593636, + "grad_norm": 105.20574951171875, + "learning_rate": 9.706355127540423e-08, + "loss": 12.223, + "step": 471730 + }, + { + "epoch": 0.9529446462263198, + "grad_norm": 39.18534851074219, + "learning_rate": 9.699511732284395e-08, + "loss": 14.3647, + "step": 471740 + }, + { + "epoch": 0.9529648468590036, + "grad_norm": 172.9857635498047, + "learning_rate": 9.692670726693498e-08, + "loss": 12.8175, + "step": 471750 + }, + { + "epoch": 0.9529850474916874, + "grad_norm": 9.908214569091797, + "learning_rate": 9.68583211080104e-08, + "loss": 12.9484, + "step": 471760 + }, + { + "epoch": 0.9530052481243713, + "grad_norm": 11452.4755859375, + "learning_rate": 9.678995884640385e-08, + "loss": 29.713, + "step": 471770 + }, + { + "epoch": 0.9530254487570551, + "grad_norm": 133.68270874023438, + "learning_rate": 9.672162048244838e-08, + "loss": 20.4403, + "step": 471780 + }, + { + "epoch": 0.9530456493897389, + "grad_norm": 81.29830169677734, + "learning_rate": 9.66533060164765e-08, + "loss": 11.5564, + "step": 471790 + }, + { + "epoch": 0.9530658500224227, + "grad_norm": 679.6661376953125, + "learning_rate": 9.658501544882182e-08, + "loss": 20.2916, + "step": 471800 + }, + { + "epoch": 0.9530860506551065, + "grad_norm": 374.6275329589844, + "learning_rate": 9.651674877981743e-08, + "loss": 15.3965, + "step": 471810 + }, + { + "epoch": 0.9531062512877904, + "grad_norm": 220.9196319580078, + "learning_rate": 9.644850600979583e-08, + "loss": 29.6173, + "step": 471820 + }, + { + "epoch": 0.9531264519204742, + "grad_norm": 372.6571350097656, + "learning_rate": 9.638028713908898e-08, + "loss": 10.7237, + "step": 471830 + }, + { + "epoch": 0.953146652553158, + "grad_norm": 210.60226440429688, + "learning_rate": 9.63120921680305e-08, + "loss": 18.6246, + "step": 471840 + }, + { + "epoch": 0.9531668531858418, + "grad_norm": 299.7548522949219, + "learning_rate": 9.62439210969518e-08, + "loss": 18.4083, + "step": 471850 + }, + { + "epoch": 0.9531870538185256, + "grad_norm": 381.3686828613281, + "learning_rate": 9.617577392618538e-08, + "loss": 17.6152, + "step": 471860 + }, + { + "epoch": 0.9532072544512095, + "grad_norm": 281.87298583984375, + "learning_rate": 9.61076506560632e-08, + "loss": 14.8257, + "step": 471870 + }, + { + "epoch": 0.9532274550838933, + "grad_norm": 464.02081298828125, + "learning_rate": 9.603955128691833e-08, + "loss": 15.0501, + "step": 471880 + }, + { + "epoch": 0.953247655716577, + "grad_norm": 191.71810913085938, + "learning_rate": 9.597147581908107e-08, + "loss": 12.9511, + "step": 471890 + }, + { + "epoch": 0.9532678563492608, + "grad_norm": 200.65492248535156, + "learning_rate": 9.590342425288446e-08, + "loss": 15.8451, + "step": 471900 + }, + { + "epoch": 0.9532880569819446, + "grad_norm": 658.044677734375, + "learning_rate": 9.583539658865992e-08, + "loss": 13.1211, + "step": 471910 + }, + { + "epoch": 0.9533082576146285, + "grad_norm": 286.3880615234375, + "learning_rate": 9.576739282673886e-08, + "loss": 14.6873, + "step": 471920 + }, + { + "epoch": 0.9533284582473123, + "grad_norm": 544.264404296875, + "learning_rate": 9.569941296745212e-08, + "loss": 24.7345, + "step": 471930 + }, + { + "epoch": 0.9533486588799961, + "grad_norm": 443.8762512207031, + "learning_rate": 9.563145701113219e-08, + "loss": 33.2531, + "step": 471940 + }, + { + "epoch": 0.9533688595126799, + "grad_norm": 27.334545135498047, + "learning_rate": 9.556352495810994e-08, + "loss": 13.0649, + "step": 471950 + }, + { + "epoch": 0.9533890601453637, + "grad_norm": 729.0777587890625, + "learning_rate": 9.549561680871566e-08, + "loss": 29.3194, + "step": 471960 + }, + { + "epoch": 0.9534092607780476, + "grad_norm": 566.8876342773438, + "learning_rate": 9.542773256328075e-08, + "loss": 18.8543, + "step": 471970 + }, + { + "epoch": 0.9534294614107314, + "grad_norm": 197.99618530273438, + "learning_rate": 9.53598722221366e-08, + "loss": 14.835, + "step": 471980 + }, + { + "epoch": 0.9534496620434152, + "grad_norm": 13.030482292175293, + "learning_rate": 9.529203578561353e-08, + "loss": 16.2845, + "step": 471990 + }, + { + "epoch": 0.953469862676099, + "grad_norm": 906.8263549804688, + "learning_rate": 9.522422325404234e-08, + "loss": 25.3349, + "step": 472000 + }, + { + "epoch": 0.9534900633087828, + "grad_norm": 498.44171142578125, + "learning_rate": 9.515643462775337e-08, + "loss": 24.0487, + "step": 472010 + }, + { + "epoch": 0.9535102639414667, + "grad_norm": 541.366943359375, + "learning_rate": 9.508866990707688e-08, + "loss": 18.6442, + "step": 472020 + }, + { + "epoch": 0.9535304645741505, + "grad_norm": 613.9030151367188, + "learning_rate": 9.502092909234317e-08, + "loss": 18.6889, + "step": 472030 + }, + { + "epoch": 0.9535506652068343, + "grad_norm": 364.0337829589844, + "learning_rate": 9.495321218388309e-08, + "loss": 25.2835, + "step": 472040 + }, + { + "epoch": 0.9535708658395181, + "grad_norm": 375.2162170410156, + "learning_rate": 9.488551918202527e-08, + "loss": 10.3971, + "step": 472050 + }, + { + "epoch": 0.9535910664722019, + "grad_norm": 430.7357177734375, + "learning_rate": 9.481785008710165e-08, + "loss": 19.3899, + "step": 472060 + }, + { + "epoch": 0.9536112671048858, + "grad_norm": 386.5377197265625, + "learning_rate": 9.475020489944032e-08, + "loss": 7.5816, + "step": 472070 + }, + { + "epoch": 0.9536314677375696, + "grad_norm": 249.53707885742188, + "learning_rate": 9.468258361937155e-08, + "loss": 14.0372, + "step": 472080 + }, + { + "epoch": 0.9536516683702534, + "grad_norm": 207.7149658203125, + "learning_rate": 9.461498624722509e-08, + "loss": 15.2406, + "step": 472090 + }, + { + "epoch": 0.9536718690029372, + "grad_norm": 1044.557373046875, + "learning_rate": 9.454741278333013e-08, + "loss": 20.2056, + "step": 472100 + }, + { + "epoch": 0.953692069635621, + "grad_norm": 274.39410400390625, + "learning_rate": 9.447986322801583e-08, + "loss": 17.5673, + "step": 472110 + }, + { + "epoch": 0.9537122702683049, + "grad_norm": 79.78308868408203, + "learning_rate": 9.441233758161139e-08, + "loss": 12.5258, + "step": 472120 + }, + { + "epoch": 0.9537324709009887, + "grad_norm": 32.28790283203125, + "learning_rate": 9.434483584444709e-08, + "loss": 12.0531, + "step": 472130 + }, + { + "epoch": 0.9537526715336724, + "grad_norm": 162.91354370117188, + "learning_rate": 9.427735801685101e-08, + "loss": 12.2741, + "step": 472140 + }, + { + "epoch": 0.9537728721663562, + "grad_norm": 1072.396484375, + "learning_rate": 9.420990409915176e-08, + "loss": 30.2842, + "step": 472150 + }, + { + "epoch": 0.95379307279904, + "grad_norm": 776.0736083984375, + "learning_rate": 9.414247409167854e-08, + "loss": 22.6709, + "step": 472160 + }, + { + "epoch": 0.9538132734317238, + "grad_norm": 681.9462280273438, + "learning_rate": 9.407506799475996e-08, + "loss": 19.9861, + "step": 472170 + }, + { + "epoch": 0.9538334740644077, + "grad_norm": 22.479015350341797, + "learning_rate": 9.400768580872411e-08, + "loss": 13.1713, + "step": 472180 + }, + { + "epoch": 0.9538536746970915, + "grad_norm": 194.88360595703125, + "learning_rate": 9.394032753390014e-08, + "loss": 11.1674, + "step": 472190 + }, + { + "epoch": 0.9538738753297753, + "grad_norm": 1.7064508199691772, + "learning_rate": 9.387299317061615e-08, + "loss": 13.6525, + "step": 472200 + }, + { + "epoch": 0.9538940759624591, + "grad_norm": 13.816083908081055, + "learning_rate": 9.380568271919966e-08, + "loss": 5.8827, + "step": 472210 + }, + { + "epoch": 0.9539142765951429, + "grad_norm": 244.79507446289062, + "learning_rate": 9.373839617997926e-08, + "loss": 21.1322, + "step": 472220 + }, + { + "epoch": 0.9539344772278268, + "grad_norm": 331.3328552246094, + "learning_rate": 9.367113355328361e-08, + "loss": 20.3777, + "step": 472230 + }, + { + "epoch": 0.9539546778605106, + "grad_norm": 237.48936462402344, + "learning_rate": 9.36038948394391e-08, + "loss": 20.8298, + "step": 472240 + }, + { + "epoch": 0.9539748784931944, + "grad_norm": 329.25421142578125, + "learning_rate": 9.353668003877437e-08, + "loss": 24.5786, + "step": 472250 + }, + { + "epoch": 0.9539950791258782, + "grad_norm": 491.8194274902344, + "learning_rate": 9.346948915161636e-08, + "loss": 19.1928, + "step": 472260 + }, + { + "epoch": 0.954015279758562, + "grad_norm": 353.59442138671875, + "learning_rate": 9.340232217829371e-08, + "loss": 14.4394, + "step": 472270 + }, + { + "epoch": 0.9540354803912459, + "grad_norm": 592.506103515625, + "learning_rate": 9.333517911913281e-08, + "loss": 15.0711, + "step": 472280 + }, + { + "epoch": 0.9540556810239297, + "grad_norm": 246.64601135253906, + "learning_rate": 9.326805997446065e-08, + "loss": 31.8441, + "step": 472290 + }, + { + "epoch": 0.9540758816566135, + "grad_norm": 420.1644287109375, + "learning_rate": 9.320096474460527e-08, + "loss": 14.5073, + "step": 472300 + }, + { + "epoch": 0.9540960822892973, + "grad_norm": 676.580078125, + "learning_rate": 9.31338934298931e-08, + "loss": 17.2554, + "step": 472310 + }, + { + "epoch": 0.9541162829219811, + "grad_norm": 452.257568359375, + "learning_rate": 9.306684603065108e-08, + "loss": 18.2278, + "step": 472320 + }, + { + "epoch": 0.954136483554665, + "grad_norm": 362.83880615234375, + "learning_rate": 9.299982254720674e-08, + "loss": 12.9905, + "step": 472330 + }, + { + "epoch": 0.9541566841873488, + "grad_norm": 603.9871215820312, + "learning_rate": 9.293282297988537e-08, + "loss": 25.1383, + "step": 472340 + }, + { + "epoch": 0.9541768848200326, + "grad_norm": 383.53717041015625, + "learning_rate": 9.28658473290145e-08, + "loss": 19.0505, + "step": 472350 + }, + { + "epoch": 0.9541970854527164, + "grad_norm": 301.22833251953125, + "learning_rate": 9.27988955949205e-08, + "loss": 20.0336, + "step": 472360 + }, + { + "epoch": 0.9542172860854002, + "grad_norm": 209.2220458984375, + "learning_rate": 9.273196777792926e-08, + "loss": 19.8336, + "step": 472370 + }, + { + "epoch": 0.9542374867180841, + "grad_norm": 757.2785034179688, + "learning_rate": 9.266506387836771e-08, + "loss": 17.0049, + "step": 472380 + }, + { + "epoch": 0.9542576873507679, + "grad_norm": 260.1947937011719, + "learning_rate": 9.259818389656117e-08, + "loss": 10.2656, + "step": 472390 + }, + { + "epoch": 0.9542778879834516, + "grad_norm": 363.085205078125, + "learning_rate": 9.253132783283548e-08, + "loss": 17.8424, + "step": 472400 + }, + { + "epoch": 0.9542980886161354, + "grad_norm": 322.1111145019531, + "learning_rate": 9.246449568751702e-08, + "loss": 15.6732, + "step": 472410 + }, + { + "epoch": 0.9543182892488192, + "grad_norm": 152.23348999023438, + "learning_rate": 9.239768746093226e-08, + "loss": 13.6807, + "step": 472420 + }, + { + "epoch": 0.954338489881503, + "grad_norm": 496.6689758300781, + "learning_rate": 9.233090315340532e-08, + "loss": 17.5344, + "step": 472430 + }, + { + "epoch": 0.9543586905141869, + "grad_norm": 209.66159057617188, + "learning_rate": 9.226414276526208e-08, + "loss": 20.2614, + "step": 472440 + }, + { + "epoch": 0.9543788911468707, + "grad_norm": 116.48358917236328, + "learning_rate": 9.219740629682838e-08, + "loss": 17.466, + "step": 472450 + }, + { + "epoch": 0.9543990917795545, + "grad_norm": 573.6477661132812, + "learning_rate": 9.213069374842953e-08, + "loss": 19.1514, + "step": 472460 + }, + { + "epoch": 0.9544192924122383, + "grad_norm": 676.3889770507812, + "learning_rate": 9.206400512039026e-08, + "loss": 22.2085, + "step": 472470 + }, + { + "epoch": 0.9544394930449221, + "grad_norm": 319.9701232910156, + "learning_rate": 9.199734041303532e-08, + "loss": 18.8759, + "step": 472480 + }, + { + "epoch": 0.954459693677606, + "grad_norm": 307.66448974609375, + "learning_rate": 9.19306996266911e-08, + "loss": 13.4954, + "step": 472490 + }, + { + "epoch": 0.9544798943102898, + "grad_norm": 592.2040405273438, + "learning_rate": 9.186408276168012e-08, + "loss": 12.9578, + "step": 472500 + }, + { + "epoch": 0.9545000949429736, + "grad_norm": 497.1847229003906, + "learning_rate": 9.179748981832881e-08, + "loss": 17.0803, + "step": 472510 + }, + { + "epoch": 0.9545202955756574, + "grad_norm": 368.4649353027344, + "learning_rate": 9.173092079696188e-08, + "loss": 10.285, + "step": 472520 + }, + { + "epoch": 0.9545404962083412, + "grad_norm": 271.83099365234375, + "learning_rate": 9.166437569790242e-08, + "loss": 34.242, + "step": 472530 + }, + { + "epoch": 0.9545606968410251, + "grad_norm": 302.1800842285156, + "learning_rate": 9.159785452147574e-08, + "loss": 7.3704, + "step": 472540 + }, + { + "epoch": 0.9545808974737089, + "grad_norm": 34.294837951660156, + "learning_rate": 9.153135726800599e-08, + "loss": 12.2237, + "step": 472550 + }, + { + "epoch": 0.9546010981063927, + "grad_norm": 589.4528198242188, + "learning_rate": 9.146488393781683e-08, + "loss": 13.1811, + "step": 472560 + }, + { + "epoch": 0.9546212987390765, + "grad_norm": 535.8561401367188, + "learning_rate": 9.139843453123243e-08, + "loss": 18.9766, + "step": 472570 + }, + { + "epoch": 0.9546414993717603, + "grad_norm": 309.1181335449219, + "learning_rate": 9.133200904857642e-08, + "loss": 16.5908, + "step": 472580 + }, + { + "epoch": 0.9546617000044442, + "grad_norm": 90.18566131591797, + "learning_rate": 9.126560749017354e-08, + "loss": 20.1748, + "step": 472590 + }, + { + "epoch": 0.954681900637128, + "grad_norm": 374.27630615234375, + "learning_rate": 9.119922985634633e-08, + "loss": 14.8302, + "step": 472600 + }, + { + "epoch": 0.9547021012698118, + "grad_norm": 202.9929656982422, + "learning_rate": 9.113287614741895e-08, + "loss": 19.8109, + "step": 472610 + }, + { + "epoch": 0.9547223019024956, + "grad_norm": 497.937744140625, + "learning_rate": 9.106654636371448e-08, + "loss": 15.6629, + "step": 472620 + }, + { + "epoch": 0.9547425025351794, + "grad_norm": 143.7318878173828, + "learning_rate": 9.1000240505556e-08, + "loss": 16.0604, + "step": 472630 + }, + { + "epoch": 0.9547627031678633, + "grad_norm": 204.52513122558594, + "learning_rate": 9.093395857326714e-08, + "loss": 20.6228, + "step": 472640 + }, + { + "epoch": 0.954782903800547, + "grad_norm": 343.8081970214844, + "learning_rate": 9.086770056717099e-08, + "loss": 18.4626, + "step": 472650 + }, + { + "epoch": 0.9548031044332308, + "grad_norm": 177.4273223876953, + "learning_rate": 9.080146648759003e-08, + "loss": 38.9969, + "step": 472660 + }, + { + "epoch": 0.9548233050659146, + "grad_norm": 262.6379089355469, + "learning_rate": 9.073525633484737e-08, + "loss": 11.4115, + "step": 472670 + }, + { + "epoch": 0.9548435056985984, + "grad_norm": 410.4691467285156, + "learning_rate": 9.066907010926551e-08, + "loss": 19.5217, + "step": 472680 + }, + { + "epoch": 0.9548637063312823, + "grad_norm": 372.9002990722656, + "learning_rate": 9.060290781116698e-08, + "loss": 35.4899, + "step": 472690 + }, + { + "epoch": 0.9548839069639661, + "grad_norm": 734.9282836914062, + "learning_rate": 9.053676944087542e-08, + "loss": 29.2235, + "step": 472700 + }, + { + "epoch": 0.9549041075966499, + "grad_norm": 508.56658935546875, + "learning_rate": 9.04706549987111e-08, + "loss": 17.9589, + "step": 472710 + }, + { + "epoch": 0.9549243082293337, + "grad_norm": 352.7237854003906, + "learning_rate": 9.040456448499769e-08, + "loss": 18.5129, + "step": 472720 + }, + { + "epoch": 0.9549445088620175, + "grad_norm": 788.5574951171875, + "learning_rate": 9.03384979000571e-08, + "loss": 24.0234, + "step": 472730 + }, + { + "epoch": 0.9549647094947014, + "grad_norm": 231.6894989013672, + "learning_rate": 9.027245524421135e-08, + "loss": 16.0379, + "step": 472740 + }, + { + "epoch": 0.9549849101273852, + "grad_norm": 354.6516418457031, + "learning_rate": 9.020643651778183e-08, + "loss": 26.0841, + "step": 472750 + }, + { + "epoch": 0.955005110760069, + "grad_norm": 79.38582611083984, + "learning_rate": 9.014044172109049e-08, + "loss": 10.3183, + "step": 472760 + }, + { + "epoch": 0.9550253113927528, + "grad_norm": 317.4454650878906, + "learning_rate": 9.007447085445987e-08, + "loss": 20.7183, + "step": 472770 + }, + { + "epoch": 0.9550455120254366, + "grad_norm": 140.3369140625, + "learning_rate": 9.00085239182108e-08, + "loss": 14.6581, + "step": 472780 + }, + { + "epoch": 0.9550657126581205, + "grad_norm": 209.433349609375, + "learning_rate": 8.99426009126636e-08, + "loss": 17.313, + "step": 472790 + }, + { + "epoch": 0.9550859132908043, + "grad_norm": 313.2442321777344, + "learning_rate": 8.987670183814134e-08, + "loss": 18.3114, + "step": 472800 + }, + { + "epoch": 0.9551061139234881, + "grad_norm": 0.4536556601524353, + "learning_rate": 8.981082669496433e-08, + "loss": 20.4024, + "step": 472810 + }, + { + "epoch": 0.9551263145561719, + "grad_norm": 458.67962646484375, + "learning_rate": 8.974497548345396e-08, + "loss": 26.9035, + "step": 472820 + }, + { + "epoch": 0.9551465151888557, + "grad_norm": 449.9855651855469, + "learning_rate": 8.967914820393108e-08, + "loss": 14.6759, + "step": 472830 + }, + { + "epoch": 0.9551667158215396, + "grad_norm": 200.32005310058594, + "learning_rate": 8.961334485671657e-08, + "loss": 10.905, + "step": 472840 + }, + { + "epoch": 0.9551869164542234, + "grad_norm": 478.463134765625, + "learning_rate": 8.954756544213128e-08, + "loss": 10.1928, + "step": 472850 + }, + { + "epoch": 0.9552071170869072, + "grad_norm": 410.7976989746094, + "learning_rate": 8.948180996049493e-08, + "loss": 26.1069, + "step": 472860 + }, + { + "epoch": 0.955227317719591, + "grad_norm": 193.8374786376953, + "learning_rate": 8.941607841212841e-08, + "loss": 12.1814, + "step": 472870 + }, + { + "epoch": 0.9552475183522748, + "grad_norm": 352.20159912109375, + "learning_rate": 8.93503707973531e-08, + "loss": 18.8841, + "step": 472880 + }, + { + "epoch": 0.9552677189849587, + "grad_norm": 54.54193878173828, + "learning_rate": 8.928468711648875e-08, + "loss": 24.2221, + "step": 472890 + }, + { + "epoch": 0.9552879196176425, + "grad_norm": 270.1540222167969, + "learning_rate": 8.921902736985399e-08, + "loss": 13.8838, + "step": 472900 + }, + { + "epoch": 0.9553081202503262, + "grad_norm": 258.27935791015625, + "learning_rate": 8.915339155777136e-08, + "loss": 27.3903, + "step": 472910 + }, + { + "epoch": 0.95532832088301, + "grad_norm": 851.5621948242188, + "learning_rate": 8.908777968055893e-08, + "loss": 24.5242, + "step": 472920 + }, + { + "epoch": 0.9553485215156938, + "grad_norm": 504.7754211425781, + "learning_rate": 8.902219173853699e-08, + "loss": 16.8366, + "step": 472930 + }, + { + "epoch": 0.9553687221483776, + "grad_norm": 763.4608154296875, + "learning_rate": 8.895662773202529e-08, + "loss": 18.1817, + "step": 472940 + }, + { + "epoch": 0.9553889227810615, + "grad_norm": 500.566650390625, + "learning_rate": 8.889108766134358e-08, + "loss": 21.5876, + "step": 472950 + }, + { + "epoch": 0.9554091234137453, + "grad_norm": 424.56103515625, + "learning_rate": 8.882557152681104e-08, + "loss": 12.3775, + "step": 472960 + }, + { + "epoch": 0.9554293240464291, + "grad_norm": 52.139766693115234, + "learning_rate": 8.876007932874686e-08, + "loss": 9.1044, + "step": 472970 + }, + { + "epoch": 0.9554495246791129, + "grad_norm": 101.28936004638672, + "learning_rate": 8.869461106747024e-08, + "loss": 10.6823, + "step": 472980 + }, + { + "epoch": 0.9554697253117967, + "grad_norm": 415.1844482421875, + "learning_rate": 8.862916674330091e-08, + "loss": 25.4574, + "step": 472990 + }, + { + "epoch": 0.9554899259444806, + "grad_norm": 635.3602905273438, + "learning_rate": 8.856374635655696e-08, + "loss": 14.1896, + "step": 473000 + }, + { + "epoch": 0.9555101265771644, + "grad_norm": 320.00335693359375, + "learning_rate": 8.849834990755757e-08, + "loss": 17.2174, + "step": 473010 + }, + { + "epoch": 0.9555303272098482, + "grad_norm": 408.1123046875, + "learning_rate": 8.843297739662138e-08, + "loss": 17.2332, + "step": 473020 + }, + { + "epoch": 0.955550527842532, + "grad_norm": 37.39815139770508, + "learning_rate": 8.836762882406757e-08, + "loss": 12.6036, + "step": 473030 + }, + { + "epoch": 0.9555707284752158, + "grad_norm": 118.26240539550781, + "learning_rate": 8.830230419021424e-08, + "loss": 26.1722, + "step": 473040 + }, + { + "epoch": 0.9555909291078997, + "grad_norm": 264.1705017089844, + "learning_rate": 8.823700349537945e-08, + "loss": 17.247, + "step": 473050 + }, + { + "epoch": 0.9556111297405835, + "grad_norm": 483.496337890625, + "learning_rate": 8.817172673988184e-08, + "loss": 48.2525, + "step": 473060 + }, + { + "epoch": 0.9556313303732673, + "grad_norm": 314.58447265625, + "learning_rate": 8.810647392404004e-08, + "loss": 15.7937, + "step": 473070 + }, + { + "epoch": 0.9556515310059511, + "grad_norm": 426.01654052734375, + "learning_rate": 8.804124504817046e-08, + "loss": 20.1481, + "step": 473080 + }, + { + "epoch": 0.955671731638635, + "grad_norm": 525.1240844726562, + "learning_rate": 8.797604011259287e-08, + "loss": 19.8814, + "step": 473090 + }, + { + "epoch": 0.9556919322713188, + "grad_norm": 566.5120239257812, + "learning_rate": 8.791085911762476e-08, + "loss": 9.9254, + "step": 473100 + }, + { + "epoch": 0.9557121329040026, + "grad_norm": 656.9776611328125, + "learning_rate": 8.784570206358201e-08, + "loss": 26.1737, + "step": 473110 + }, + { + "epoch": 0.9557323335366864, + "grad_norm": 436.4179382324219, + "learning_rate": 8.778056895078435e-08, + "loss": 27.7056, + "step": 473120 + }, + { + "epoch": 0.9557525341693702, + "grad_norm": 674.1171264648438, + "learning_rate": 8.77154597795482e-08, + "loss": 31.0846, + "step": 473130 + }, + { + "epoch": 0.955772734802054, + "grad_norm": 75.30695343017578, + "learning_rate": 8.765037455019165e-08, + "loss": 17.1403, + "step": 473140 + }, + { + "epoch": 0.9557929354347379, + "grad_norm": 637.0873413085938, + "learning_rate": 8.758531326303054e-08, + "loss": 12.4042, + "step": 473150 + }, + { + "epoch": 0.9558131360674217, + "grad_norm": 203.11627197265625, + "learning_rate": 8.752027591838352e-08, + "loss": 12.956, + "step": 473160 + }, + { + "epoch": 0.9558333367001054, + "grad_norm": 414.611328125, + "learning_rate": 8.7455262516567e-08, + "loss": 10.456, + "step": 473170 + }, + { + "epoch": 0.9558535373327892, + "grad_norm": 177.26925659179688, + "learning_rate": 8.739027305789682e-08, + "loss": 9.3711, + "step": 473180 + }, + { + "epoch": 0.955873737965473, + "grad_norm": 434.41229248046875, + "learning_rate": 8.732530754269108e-08, + "loss": 19.888, + "step": 473190 + }, + { + "epoch": 0.9558939385981569, + "grad_norm": 155.48361206054688, + "learning_rate": 8.726036597126619e-08, + "loss": 27.929, + "step": 473200 + }, + { + "epoch": 0.9559141392308407, + "grad_norm": 146.4423828125, + "learning_rate": 8.719544834393855e-08, + "loss": 11.0543, + "step": 473210 + }, + { + "epoch": 0.9559343398635245, + "grad_norm": 6.916018486022949, + "learning_rate": 8.713055466102349e-08, + "loss": 10.1698, + "step": 473220 + }, + { + "epoch": 0.9559545404962083, + "grad_norm": 46.781036376953125, + "learning_rate": 8.706568492283907e-08, + "loss": 22.7213, + "step": 473230 + }, + { + "epoch": 0.9559747411288921, + "grad_norm": 42.242225646972656, + "learning_rate": 8.700083912970058e-08, + "loss": 14.4235, + "step": 473240 + }, + { + "epoch": 0.955994941761576, + "grad_norm": 473.7392578125, + "learning_rate": 8.693601728192392e-08, + "loss": 19.8019, + "step": 473250 + }, + { + "epoch": 0.9560151423942598, + "grad_norm": 480.6423034667969, + "learning_rate": 8.687121937982545e-08, + "loss": 22.3902, + "step": 473260 + }, + { + "epoch": 0.9560353430269436, + "grad_norm": 186.20558166503906, + "learning_rate": 8.680644542372052e-08, + "loss": 15.3956, + "step": 473270 + }, + { + "epoch": 0.9560555436596274, + "grad_norm": 345.1857604980469, + "learning_rate": 8.674169541392552e-08, + "loss": 12.7148, + "step": 473280 + }, + { + "epoch": 0.9560757442923112, + "grad_norm": 428.6722717285156, + "learning_rate": 8.66769693507552e-08, + "loss": 13.5177, + "step": 473290 + }, + { + "epoch": 0.9560959449249951, + "grad_norm": 675.354248046875, + "learning_rate": 8.661226723452542e-08, + "loss": 31.9146, + "step": 473300 + }, + { + "epoch": 0.9561161455576789, + "grad_norm": 294.69195556640625, + "learning_rate": 8.65475890655515e-08, + "loss": 25.9512, + "step": 473310 + }, + { + "epoch": 0.9561363461903627, + "grad_norm": 163.40345764160156, + "learning_rate": 8.648293484414871e-08, + "loss": 13.3244, + "step": 473320 + }, + { + "epoch": 0.9561565468230465, + "grad_norm": 363.00103759765625, + "learning_rate": 8.641830457063239e-08, + "loss": 15.7228, + "step": 473330 + }, + { + "epoch": 0.9561767474557303, + "grad_norm": 229.41160583496094, + "learning_rate": 8.63536982453167e-08, + "loss": 18.2286, + "step": 473340 + }, + { + "epoch": 0.9561969480884142, + "grad_norm": 86.52777099609375, + "learning_rate": 8.628911586851752e-08, + "loss": 19.5806, + "step": 473350 + }, + { + "epoch": 0.956217148721098, + "grad_norm": 348.8701171875, + "learning_rate": 8.622455744054958e-08, + "loss": 15.0217, + "step": 473360 + }, + { + "epoch": 0.9562373493537818, + "grad_norm": 418.804443359375, + "learning_rate": 8.616002296172654e-08, + "loss": 22.1636, + "step": 473370 + }, + { + "epoch": 0.9562575499864656, + "grad_norm": 265.16455078125, + "learning_rate": 8.609551243236424e-08, + "loss": 16.2389, + "step": 473380 + }, + { + "epoch": 0.9562777506191494, + "grad_norm": 330.1594543457031, + "learning_rate": 8.603102585277634e-08, + "loss": 21.3645, + "step": 473390 + }, + { + "epoch": 0.9562979512518333, + "grad_norm": 229.58462524414062, + "learning_rate": 8.596656322327645e-08, + "loss": 19.1739, + "step": 473400 + }, + { + "epoch": 0.9563181518845171, + "grad_norm": 329.893310546875, + "learning_rate": 8.59021245441799e-08, + "loss": 26.2905, + "step": 473410 + }, + { + "epoch": 0.9563383525172008, + "grad_norm": 271.0732421875, + "learning_rate": 8.583770981580142e-08, + "loss": 14.1466, + "step": 473420 + }, + { + "epoch": 0.9563585531498846, + "grad_norm": 37.961769104003906, + "learning_rate": 8.577331903845243e-08, + "loss": 15.111, + "step": 473430 + }, + { + "epoch": 0.9563787537825684, + "grad_norm": 156.81661987304688, + "learning_rate": 8.57089522124488e-08, + "loss": 22.9995, + "step": 473440 + }, + { + "epoch": 0.9563989544152522, + "grad_norm": 514.8054809570312, + "learning_rate": 8.564460933810414e-08, + "loss": 15.597, + "step": 473450 + }, + { + "epoch": 0.9564191550479361, + "grad_norm": 277.9557800292969, + "learning_rate": 8.558029041573157e-08, + "loss": 18.3253, + "step": 473460 + }, + { + "epoch": 0.9564393556806199, + "grad_norm": 375.3038635253906, + "learning_rate": 8.55159954456436e-08, + "loss": 14.3698, + "step": 473470 + }, + { + "epoch": 0.9564595563133037, + "grad_norm": 115.1312026977539, + "learning_rate": 8.545172442815552e-08, + "loss": 10.1309, + "step": 473480 + }, + { + "epoch": 0.9564797569459875, + "grad_norm": 557.439697265625, + "learning_rate": 8.538747736357933e-08, + "loss": 21.8316, + "step": 473490 + }, + { + "epoch": 0.9564999575786713, + "grad_norm": 112.39806365966797, + "learning_rate": 8.53232542522292e-08, + "loss": 23.8843, + "step": 473500 + }, + { + "epoch": 0.9565201582113552, + "grad_norm": 99.22763061523438, + "learning_rate": 8.525905509441656e-08, + "loss": 16.9909, + "step": 473510 + }, + { + "epoch": 0.956540358844039, + "grad_norm": 216.84629821777344, + "learning_rate": 8.51948798904556e-08, + "loss": 31.9568, + "step": 473520 + }, + { + "epoch": 0.9565605594767228, + "grad_norm": 413.5600280761719, + "learning_rate": 8.513072864065885e-08, + "loss": 20.5399, + "step": 473530 + }, + { + "epoch": 0.9565807601094066, + "grad_norm": 408.629150390625, + "learning_rate": 8.506660134533828e-08, + "loss": 16.17, + "step": 473540 + }, + { + "epoch": 0.9566009607420904, + "grad_norm": 635.8742065429688, + "learning_rate": 8.500249800480754e-08, + "loss": 21.3376, + "step": 473550 + }, + { + "epoch": 0.9566211613747743, + "grad_norm": 622.0107421875, + "learning_rate": 8.493841861937802e-08, + "loss": 35.623, + "step": 473560 + }, + { + "epoch": 0.9566413620074581, + "grad_norm": 311.6985778808594, + "learning_rate": 8.487436318936282e-08, + "loss": 22.0412, + "step": 473570 + }, + { + "epoch": 0.9566615626401419, + "grad_norm": 505.4440612792969, + "learning_rate": 8.481033171507391e-08, + "loss": 22.9561, + "step": 473580 + }, + { + "epoch": 0.9566817632728257, + "grad_norm": 416.3011474609375, + "learning_rate": 8.474632419682327e-08, + "loss": 10.0568, + "step": 473590 + }, + { + "epoch": 0.9567019639055095, + "grad_norm": 246.4512481689453, + "learning_rate": 8.468234063492287e-08, + "loss": 12.2208, + "step": 473600 + }, + { + "epoch": 0.9567221645381934, + "grad_norm": 388.3680725097656, + "learning_rate": 8.461838102968467e-08, + "loss": 27.4346, + "step": 473610 + }, + { + "epoch": 0.9567423651708772, + "grad_norm": 199.04757690429688, + "learning_rate": 8.45544453814201e-08, + "loss": 17.5684, + "step": 473620 + }, + { + "epoch": 0.956762565803561, + "grad_norm": 252.2554168701172, + "learning_rate": 8.449053369044058e-08, + "loss": 43.0305, + "step": 473630 + }, + { + "epoch": 0.9567827664362448, + "grad_norm": 469.8954772949219, + "learning_rate": 8.442664595705862e-08, + "loss": 19.1467, + "step": 473640 + }, + { + "epoch": 0.9568029670689286, + "grad_norm": 150.32028198242188, + "learning_rate": 8.436278218158511e-08, + "loss": 13.8263, + "step": 473650 + }, + { + "epoch": 0.9568231677016125, + "grad_norm": 190.54705810546875, + "learning_rate": 8.429894236433089e-08, + "loss": 25.0537, + "step": 473660 + }, + { + "epoch": 0.9568433683342963, + "grad_norm": 288.9213562011719, + "learning_rate": 8.423512650560795e-08, + "loss": 13.1843, + "step": 473670 + }, + { + "epoch": 0.95686356896698, + "grad_norm": 79.70426177978516, + "learning_rate": 8.417133460572658e-08, + "loss": 13.2238, + "step": 473680 + }, + { + "epoch": 0.9568837695996638, + "grad_norm": 143.64450073242188, + "learning_rate": 8.410756666499709e-08, + "loss": 10.8533, + "step": 473690 + }, + { + "epoch": 0.9569039702323476, + "grad_norm": 439.2137756347656, + "learning_rate": 8.404382268373145e-08, + "loss": 30.3238, + "step": 473700 + }, + { + "epoch": 0.9569241708650315, + "grad_norm": 410.85736083984375, + "learning_rate": 8.39801026622411e-08, + "loss": 11.4916, + "step": 473710 + }, + { + "epoch": 0.9569443714977153, + "grad_norm": 465.1092529296875, + "learning_rate": 8.391640660083411e-08, + "loss": 17.738, + "step": 473720 + }, + { + "epoch": 0.9569645721303991, + "grad_norm": 376.08203125, + "learning_rate": 8.3852734499823e-08, + "loss": 22.8946, + "step": 473730 + }, + { + "epoch": 0.9569847727630829, + "grad_norm": 195.26583862304688, + "learning_rate": 8.3789086359517e-08, + "loss": 15.1908, + "step": 473740 + }, + { + "epoch": 0.9570049733957667, + "grad_norm": 279.01898193359375, + "learning_rate": 8.372546218022747e-08, + "loss": 16.2743, + "step": 473750 + }, + { + "epoch": 0.9570251740284506, + "grad_norm": 338.1087951660156, + "learning_rate": 8.366186196226311e-08, + "loss": 11.4824, + "step": 473760 + }, + { + "epoch": 0.9570453746611344, + "grad_norm": 680.8377685546875, + "learning_rate": 8.35982857059342e-08, + "loss": 26.1408, + "step": 473770 + }, + { + "epoch": 0.9570655752938182, + "grad_norm": 339.9266357421875, + "learning_rate": 8.353473341155216e-08, + "loss": 16.2152, + "step": 473780 + }, + { + "epoch": 0.957085775926502, + "grad_norm": 435.17315673828125, + "learning_rate": 8.347120507942453e-08, + "loss": 24.8962, + "step": 473790 + }, + { + "epoch": 0.9571059765591858, + "grad_norm": 554.631103515625, + "learning_rate": 8.340770070986215e-08, + "loss": 11.9722, + "step": 473800 + }, + { + "epoch": 0.9571261771918697, + "grad_norm": 353.4429626464844, + "learning_rate": 8.334422030317424e-08, + "loss": 16.6615, + "step": 473810 + }, + { + "epoch": 0.9571463778245535, + "grad_norm": 341.6706848144531, + "learning_rate": 8.328076385967055e-08, + "loss": 14.8122, + "step": 473820 + }, + { + "epoch": 0.9571665784572373, + "grad_norm": 367.8915710449219, + "learning_rate": 8.321733137966026e-08, + "loss": 16.9164, + "step": 473830 + }, + { + "epoch": 0.9571867790899211, + "grad_norm": 143.0441436767578, + "learning_rate": 8.315392286345203e-08, + "loss": 12.8845, + "step": 473840 + }, + { + "epoch": 0.9572069797226049, + "grad_norm": 177.54774475097656, + "learning_rate": 8.30905383113556e-08, + "loss": 29.3222, + "step": 473850 + }, + { + "epoch": 0.9572271803552888, + "grad_norm": 143.09291076660156, + "learning_rate": 8.302717772367908e-08, + "loss": 15.2372, + "step": 473860 + }, + { + "epoch": 0.9572473809879726, + "grad_norm": 857.23046875, + "learning_rate": 8.296384110073164e-08, + "loss": 20.7451, + "step": 473870 + }, + { + "epoch": 0.9572675816206564, + "grad_norm": 20.85658836364746, + "learning_rate": 8.290052844282248e-08, + "loss": 24.1314, + "step": 473880 + }, + { + "epoch": 0.9572877822533402, + "grad_norm": 423.4533386230469, + "learning_rate": 8.283723975025971e-08, + "loss": 11.0826, + "step": 473890 + }, + { + "epoch": 0.957307982886024, + "grad_norm": 811.9452514648438, + "learning_rate": 8.277397502335194e-08, + "loss": 13.4805, + "step": 473900 + }, + { + "epoch": 0.9573281835187079, + "grad_norm": 382.5894775390625, + "learning_rate": 8.271073426240672e-08, + "loss": 15.23, + "step": 473910 + }, + { + "epoch": 0.9573483841513917, + "grad_norm": 535.0155029296875, + "learning_rate": 8.264751746773381e-08, + "loss": 18.9655, + "step": 473920 + }, + { + "epoch": 0.9573685847840754, + "grad_norm": 13.196868896484375, + "learning_rate": 8.258432463964016e-08, + "loss": 9.4939, + "step": 473930 + }, + { + "epoch": 0.9573887854167592, + "grad_norm": 94.76506042480469, + "learning_rate": 8.252115577843444e-08, + "loss": 17.6372, + "step": 473940 + }, + { + "epoch": 0.957408986049443, + "grad_norm": 212.6458740234375, + "learning_rate": 8.245801088442362e-08, + "loss": 25.0338, + "step": 473950 + }, + { + "epoch": 0.9574291866821268, + "grad_norm": 258.0672302246094, + "learning_rate": 8.239488995791633e-08, + "loss": 13.0235, + "step": 473960 + }, + { + "epoch": 0.9574493873148107, + "grad_norm": 127.04428100585938, + "learning_rate": 8.233179299922012e-08, + "loss": 9.8082, + "step": 473970 + }, + { + "epoch": 0.9574695879474945, + "grad_norm": 526.0514526367188, + "learning_rate": 8.226872000864194e-08, + "loss": 18.7706, + "step": 473980 + }, + { + "epoch": 0.9574897885801783, + "grad_norm": 562.9580078125, + "learning_rate": 8.22056709864899e-08, + "loss": 27.8233, + "step": 473990 + }, + { + "epoch": 0.9575099892128621, + "grad_norm": 26.61240005493164, + "learning_rate": 8.214264593307097e-08, + "loss": 15.2482, + "step": 474000 + }, + { + "epoch": 0.9575301898455459, + "grad_norm": 424.70050048828125, + "learning_rate": 8.207964484869158e-08, + "loss": 23.444, + "step": 474010 + }, + { + "epoch": 0.9575503904782298, + "grad_norm": 365.2879333496094, + "learning_rate": 8.201666773365979e-08, + "loss": 13.3401, + "step": 474020 + }, + { + "epoch": 0.9575705911109136, + "grad_norm": 239.11878967285156, + "learning_rate": 8.195371458828316e-08, + "loss": 17.2312, + "step": 474030 + }, + { + "epoch": 0.9575907917435974, + "grad_norm": 1522.04150390625, + "learning_rate": 8.1890785412867e-08, + "loss": 38.5878, + "step": 474040 + }, + { + "epoch": 0.9576109923762812, + "grad_norm": 123.2011489868164, + "learning_rate": 8.182788020771826e-08, + "loss": 13.6253, + "step": 474050 + }, + { + "epoch": 0.957631193008965, + "grad_norm": 466.6886901855469, + "learning_rate": 8.176499897314505e-08, + "loss": 18.2509, + "step": 474060 + }, + { + "epoch": 0.9576513936416489, + "grad_norm": 331.8001708984375, + "learning_rate": 8.170214170945212e-08, + "loss": 20.0639, + "step": 474070 + }, + { + "epoch": 0.9576715942743327, + "grad_norm": 618.6167602539062, + "learning_rate": 8.163930841694589e-08, + "loss": 11.9667, + "step": 474080 + }, + { + "epoch": 0.9576917949070165, + "grad_norm": 716.3890380859375, + "learning_rate": 8.157649909593335e-08, + "loss": 22.36, + "step": 474090 + }, + { + "epoch": 0.9577119955397003, + "grad_norm": 433.8890686035156, + "learning_rate": 8.151371374672146e-08, + "loss": 25.017, + "step": 474100 + }, + { + "epoch": 0.9577321961723841, + "grad_norm": 1074.271728515625, + "learning_rate": 8.145095236961387e-08, + "loss": 20.0564, + "step": 474110 + }, + { + "epoch": 0.957752396805068, + "grad_norm": 633.5374755859375, + "learning_rate": 8.13882149649181e-08, + "loss": 19.4297, + "step": 474120 + }, + { + "epoch": 0.9577725974377518, + "grad_norm": 354.6879577636719, + "learning_rate": 8.132550153294005e-08, + "loss": 15.8962, + "step": 474130 + }, + { + "epoch": 0.9577927980704356, + "grad_norm": 583.9142456054688, + "learning_rate": 8.1262812073985e-08, + "loss": 32.1786, + "step": 474140 + }, + { + "epoch": 0.9578129987031194, + "grad_norm": 79.23692321777344, + "learning_rate": 8.120014658835828e-08, + "loss": 18.3146, + "step": 474150 + }, + { + "epoch": 0.9578331993358032, + "grad_norm": 208.16111755371094, + "learning_rate": 8.11375050763652e-08, + "loss": 9.589, + "step": 474160 + }, + { + "epoch": 0.9578533999684871, + "grad_norm": 248.46812438964844, + "learning_rate": 8.107488753831161e-08, + "loss": 8.9652, + "step": 474170 + }, + { + "epoch": 0.9578736006011709, + "grad_norm": 376.1187438964844, + "learning_rate": 8.101229397450228e-08, + "loss": 24.8758, + "step": 474180 + }, + { + "epoch": 0.9578938012338546, + "grad_norm": 769.6351928710938, + "learning_rate": 8.094972438524251e-08, + "loss": 23.3915, + "step": 474190 + }, + { + "epoch": 0.9579140018665384, + "grad_norm": 93.28047943115234, + "learning_rate": 8.088717877083706e-08, + "loss": 11.5412, + "step": 474200 + }, + { + "epoch": 0.9579342024992222, + "grad_norm": 330.7162170410156, + "learning_rate": 8.082465713159126e-08, + "loss": 14.759, + "step": 474210 + }, + { + "epoch": 0.957954403131906, + "grad_norm": 514.1710205078125, + "learning_rate": 8.076215946780874e-08, + "loss": 15.7291, + "step": 474220 + }, + { + "epoch": 0.9579746037645899, + "grad_norm": 361.1650085449219, + "learning_rate": 8.069968577979536e-08, + "loss": 20.1469, + "step": 474230 + }, + { + "epoch": 0.9579948043972737, + "grad_norm": 208.582275390625, + "learning_rate": 8.063723606785478e-08, + "loss": 11.3949, + "step": 474240 + }, + { + "epoch": 0.9580150050299575, + "grad_norm": 294.64715576171875, + "learning_rate": 8.057481033229176e-08, + "loss": 12.0631, + "step": 474250 + }, + { + "epoch": 0.9580352056626413, + "grad_norm": 946.9326782226562, + "learning_rate": 8.051240857341102e-08, + "loss": 23.1346, + "step": 474260 + }, + { + "epoch": 0.9580554062953252, + "grad_norm": 424.99945068359375, + "learning_rate": 8.045003079151514e-08, + "loss": 14.3341, + "step": 474270 + }, + { + "epoch": 0.958075606928009, + "grad_norm": 271.5953369140625, + "learning_rate": 8.038767698690996e-08, + "loss": 22.1632, + "step": 474280 + }, + { + "epoch": 0.9580958075606928, + "grad_norm": 500.50146484375, + "learning_rate": 8.032534715989859e-08, + "loss": 16.352, + "step": 474290 + }, + { + "epoch": 0.9581160081933766, + "grad_norm": 4.317878246307373, + "learning_rate": 8.02630413107841e-08, + "loss": 28.6135, + "step": 474300 + }, + { + "epoch": 0.9581362088260604, + "grad_norm": 193.65724182128906, + "learning_rate": 8.020075943987071e-08, + "loss": 27.3813, + "step": 474310 + }, + { + "epoch": 0.9581564094587443, + "grad_norm": 400.7613525390625, + "learning_rate": 8.013850154746317e-08, + "loss": 25.7266, + "step": 474320 + }, + { + "epoch": 0.9581766100914281, + "grad_norm": 190.98426818847656, + "learning_rate": 8.007626763386345e-08, + "loss": 10.3764, + "step": 474330 + }, + { + "epoch": 0.9581968107241119, + "grad_norm": 268.8352355957031, + "learning_rate": 8.001405769937464e-08, + "loss": 44.0869, + "step": 474340 + }, + { + "epoch": 0.9582170113567957, + "grad_norm": 131.71649169921875, + "learning_rate": 7.995187174430152e-08, + "loss": 7.6256, + "step": 474350 + }, + { + "epoch": 0.9582372119894795, + "grad_norm": 427.9704284667969, + "learning_rate": 7.988970976894605e-08, + "loss": 17.7285, + "step": 474360 + }, + { + "epoch": 0.9582574126221634, + "grad_norm": 440.0675354003906, + "learning_rate": 7.982757177361078e-08, + "loss": 27.5012, + "step": 474370 + }, + { + "epoch": 0.9582776132548472, + "grad_norm": 119.3633804321289, + "learning_rate": 7.976545775859934e-08, + "loss": 16.8517, + "step": 474380 + }, + { + "epoch": 0.958297813887531, + "grad_norm": 484.5781555175781, + "learning_rate": 7.970336772421483e-08, + "loss": 10.7958, + "step": 474390 + }, + { + "epoch": 0.9583180145202148, + "grad_norm": 140.41004943847656, + "learning_rate": 7.964130167075923e-08, + "loss": 27.1162, + "step": 474400 + }, + { + "epoch": 0.9583382151528986, + "grad_norm": 40.1215705871582, + "learning_rate": 7.957925959853452e-08, + "loss": 16.428, + "step": 474410 + }, + { + "epoch": 0.9583584157855825, + "grad_norm": 349.8250732421875, + "learning_rate": 7.951724150784434e-08, + "loss": 15.7237, + "step": 474420 + }, + { + "epoch": 0.9583786164182663, + "grad_norm": 1398.2322998046875, + "learning_rate": 7.945524739899069e-08, + "loss": 16.3054, + "step": 474430 + }, + { + "epoch": 0.95839881705095, + "grad_norm": 201.12515258789062, + "learning_rate": 7.939327727227441e-08, + "loss": 13.2908, + "step": 474440 + }, + { + "epoch": 0.9584190176836338, + "grad_norm": 303.38531494140625, + "learning_rate": 7.933133112799918e-08, + "loss": 5.1576, + "step": 474450 + }, + { + "epoch": 0.9584392183163176, + "grad_norm": 196.3020782470703, + "learning_rate": 7.926940896646584e-08, + "loss": 33.5378, + "step": 474460 + }, + { + "epoch": 0.9584594189490014, + "grad_norm": 862.3712158203125, + "learning_rate": 7.920751078797695e-08, + "loss": 23.6772, + "step": 474470 + }, + { + "epoch": 0.9584796195816853, + "grad_norm": 55.28944778442383, + "learning_rate": 7.914563659283392e-08, + "loss": 6.0035, + "step": 474480 + }, + { + "epoch": 0.9584998202143691, + "grad_norm": 519.5173950195312, + "learning_rate": 7.908378638133762e-08, + "loss": 13.2152, + "step": 474490 + }, + { + "epoch": 0.9585200208470529, + "grad_norm": 744.1380004882812, + "learning_rate": 7.90219601537906e-08, + "loss": 29.0983, + "step": 474500 + }, + { + "epoch": 0.9585402214797367, + "grad_norm": 117.6964111328125, + "learning_rate": 7.896015791049372e-08, + "loss": 21.7835, + "step": 474510 + }, + { + "epoch": 0.9585604221124205, + "grad_norm": 940.3536376953125, + "learning_rate": 7.889837965174784e-08, + "loss": 13.7541, + "step": 474520 + }, + { + "epoch": 0.9585806227451044, + "grad_norm": 88.52216339111328, + "learning_rate": 7.883662537785442e-08, + "loss": 25.2124, + "step": 474530 + }, + { + "epoch": 0.9586008233777882, + "grad_norm": 71.35601043701172, + "learning_rate": 7.877489508911429e-08, + "loss": 29.5819, + "step": 474540 + }, + { + "epoch": 0.958621024010472, + "grad_norm": 405.7100830078125, + "learning_rate": 7.871318878582889e-08, + "loss": 18.056, + "step": 474550 + }, + { + "epoch": 0.9586412246431558, + "grad_norm": 50.22018051147461, + "learning_rate": 7.865150646829855e-08, + "loss": 6.516, + "step": 474560 + }, + { + "epoch": 0.9586614252758396, + "grad_norm": 324.6778869628906, + "learning_rate": 7.858984813682357e-08, + "loss": 13.7222, + "step": 474570 + }, + { + "epoch": 0.9586816259085235, + "grad_norm": 395.4815368652344, + "learning_rate": 7.852821379170538e-08, + "loss": 26.4004, + "step": 474580 + }, + { + "epoch": 0.9587018265412073, + "grad_norm": 268.73529052734375, + "learning_rate": 7.846660343324263e-08, + "loss": 19.3143, + "step": 474590 + }, + { + "epoch": 0.9587220271738911, + "grad_norm": 604.1646728515625, + "learning_rate": 7.840501706173786e-08, + "loss": 19.3332, + "step": 474600 + }, + { + "epoch": 0.9587422278065749, + "grad_norm": 353.2278747558594, + "learning_rate": 7.834345467748972e-08, + "loss": 17.0513, + "step": 474610 + }, + { + "epoch": 0.9587624284392587, + "grad_norm": 295.6312561035156, + "learning_rate": 7.828191628079851e-08, + "loss": 19.6961, + "step": 474620 + }, + { + "epoch": 0.9587826290719426, + "grad_norm": 174.08670043945312, + "learning_rate": 7.8220401871964e-08, + "loss": 25.454, + "step": 474630 + }, + { + "epoch": 0.9588028297046264, + "grad_norm": 54.75748825073242, + "learning_rate": 7.815891145128763e-08, + "loss": 11.6366, + "step": 474640 + }, + { + "epoch": 0.9588230303373102, + "grad_norm": 355.44708251953125, + "learning_rate": 7.809744501906635e-08, + "loss": 13.3484, + "step": 474650 + }, + { + "epoch": 0.958843230969994, + "grad_norm": 343.7234191894531, + "learning_rate": 7.803600257560162e-08, + "loss": 29.4316, + "step": 474660 + }, + { + "epoch": 0.9588634316026778, + "grad_norm": 491.80804443359375, + "learning_rate": 7.797458412119264e-08, + "loss": 14.72, + "step": 474670 + }, + { + "epoch": 0.9588836322353617, + "grad_norm": 367.8491516113281, + "learning_rate": 7.791318965613859e-08, + "loss": 11.354, + "step": 474680 + }, + { + "epoch": 0.9589038328680455, + "grad_norm": 538.6314697265625, + "learning_rate": 7.785181918073814e-08, + "loss": 14.5587, + "step": 474690 + }, + { + "epoch": 0.9589240335007292, + "grad_norm": 453.12664794921875, + "learning_rate": 7.779047269529105e-08, + "loss": 28.2727, + "step": 474700 + }, + { + "epoch": 0.958944234133413, + "grad_norm": 237.34593200683594, + "learning_rate": 7.772915020009708e-08, + "loss": 16.4047, + "step": 474710 + }, + { + "epoch": 0.9589644347660968, + "grad_norm": 76.294921875, + "learning_rate": 7.766785169545376e-08, + "loss": 23.21, + "step": 474720 + }, + { + "epoch": 0.9589846353987806, + "grad_norm": 149.80584716796875, + "learning_rate": 7.760657718165976e-08, + "loss": 22.3909, + "step": 474730 + }, + { + "epoch": 0.9590048360314645, + "grad_norm": 376.0763854980469, + "learning_rate": 7.754532665901482e-08, + "loss": 29.0698, + "step": 474740 + }, + { + "epoch": 0.9590250366641483, + "grad_norm": 468.72503662109375, + "learning_rate": 7.748410012781705e-08, + "loss": 11.0402, + "step": 474750 + }, + { + "epoch": 0.9590452372968321, + "grad_norm": 140.60446166992188, + "learning_rate": 7.742289758836452e-08, + "loss": 13.6312, + "step": 474760 + }, + { + "epoch": 0.9590654379295159, + "grad_norm": 254.8717498779297, + "learning_rate": 7.736171904095591e-08, + "loss": 33.6136, + "step": 474770 + }, + { + "epoch": 0.9590856385621997, + "grad_norm": 121.12136840820312, + "learning_rate": 7.73005644858893e-08, + "loss": 22.7845, + "step": 474780 + }, + { + "epoch": 0.9591058391948836, + "grad_norm": 93.96916198730469, + "learning_rate": 7.723943392346223e-08, + "loss": 20.9016, + "step": 474790 + }, + { + "epoch": 0.9591260398275674, + "grad_norm": 10.694808959960938, + "learning_rate": 7.717832735397335e-08, + "loss": 6.3451, + "step": 474800 + }, + { + "epoch": 0.9591462404602512, + "grad_norm": 118.19041442871094, + "learning_rate": 7.71172447777202e-08, + "loss": 11.2926, + "step": 474810 + }, + { + "epoch": 0.959166441092935, + "grad_norm": 255.38201904296875, + "learning_rate": 7.705618619500032e-08, + "loss": 18.6042, + "step": 474820 + }, + { + "epoch": 0.9591866417256188, + "grad_norm": 29.095916748046875, + "learning_rate": 7.699515160611182e-08, + "loss": 16.4324, + "step": 474830 + }, + { + "epoch": 0.9592068423583027, + "grad_norm": 350.3114013671875, + "learning_rate": 7.693414101135166e-08, + "loss": 13.2131, + "step": 474840 + }, + { + "epoch": 0.9592270429909865, + "grad_norm": 225.74880981445312, + "learning_rate": 7.687315441101795e-08, + "loss": 10.0032, + "step": 474850 + }, + { + "epoch": 0.9592472436236703, + "grad_norm": 950.14892578125, + "learning_rate": 7.681219180540655e-08, + "loss": 18.5111, + "step": 474860 + }, + { + "epoch": 0.9592674442563541, + "grad_norm": 336.2343444824219, + "learning_rate": 7.675125319481614e-08, + "loss": 36.791, + "step": 474870 + }, + { + "epoch": 0.959287644889038, + "grad_norm": 241.7272186279297, + "learning_rate": 7.669033857954255e-08, + "loss": 6.8688, + "step": 474880 + }, + { + "epoch": 0.9593078455217218, + "grad_norm": 182.2271270751953, + "learning_rate": 7.662944795988337e-08, + "loss": 18.8567, + "step": 474890 + }, + { + "epoch": 0.9593280461544056, + "grad_norm": 396.48284912109375, + "learning_rate": 7.656858133613498e-08, + "loss": 15.0419, + "step": 474900 + }, + { + "epoch": 0.9593482467870894, + "grad_norm": 544.12109375, + "learning_rate": 7.65077387085944e-08, + "loss": 18.7172, + "step": 474910 + }, + { + "epoch": 0.9593684474197732, + "grad_norm": 0.0, + "learning_rate": 7.64469200775575e-08, + "loss": 7.0933, + "step": 474920 + }, + { + "epoch": 0.959388648052457, + "grad_norm": 231.64244079589844, + "learning_rate": 7.638612544332181e-08, + "loss": 20.6236, + "step": 474930 + }, + { + "epoch": 0.9594088486851409, + "grad_norm": 248.8718719482422, + "learning_rate": 7.632535480618264e-08, + "loss": 29.4236, + "step": 474940 + }, + { + "epoch": 0.9594290493178247, + "grad_norm": 177.2399139404297, + "learning_rate": 7.626460816643588e-08, + "loss": 15.6347, + "step": 474950 + }, + { + "epoch": 0.9594492499505084, + "grad_norm": 396.2425537109375, + "learning_rate": 7.620388552437907e-08, + "loss": 18.1908, + "step": 474960 + }, + { + "epoch": 0.9594694505831922, + "grad_norm": 185.56700134277344, + "learning_rate": 7.614318688030753e-08, + "loss": 22.8882, + "step": 474970 + }, + { + "epoch": 0.959489651215876, + "grad_norm": 348.06341552734375, + "learning_rate": 7.608251223451601e-08, + "loss": 14.1523, + "step": 474980 + }, + { + "epoch": 0.9595098518485599, + "grad_norm": 102.40057373046875, + "learning_rate": 7.602186158730152e-08, + "loss": 23.2, + "step": 474990 + }, + { + "epoch": 0.9595300524812437, + "grad_norm": 506.0945739746094, + "learning_rate": 7.59612349389599e-08, + "loss": 15.9318, + "step": 475000 + }, + { + "epoch": 0.9595502531139275, + "grad_norm": 328.2109680175781, + "learning_rate": 7.590063228978539e-08, + "loss": 21.683, + "step": 475010 + }, + { + "epoch": 0.9595704537466113, + "grad_norm": 660.8423461914062, + "learning_rate": 7.584005364007386e-08, + "loss": 19.7161, + "step": 475020 + }, + { + "epoch": 0.9595906543792951, + "grad_norm": 1033.015869140625, + "learning_rate": 7.577949899012116e-08, + "loss": 18.3523, + "step": 475030 + }, + { + "epoch": 0.959610855011979, + "grad_norm": 179.03768920898438, + "learning_rate": 7.571896834022152e-08, + "loss": 13.8454, + "step": 475040 + }, + { + "epoch": 0.9596310556446628, + "grad_norm": 301.87847900390625, + "learning_rate": 7.565846169067026e-08, + "loss": 19.6426, + "step": 475050 + }, + { + "epoch": 0.9596512562773466, + "grad_norm": 212.1444854736328, + "learning_rate": 7.559797904176325e-08, + "loss": 10.688, + "step": 475060 + }, + { + "epoch": 0.9596714569100304, + "grad_norm": 205.0482635498047, + "learning_rate": 7.553752039379359e-08, + "loss": 21.518, + "step": 475070 + }, + { + "epoch": 0.9596916575427142, + "grad_norm": 354.538818359375, + "learning_rate": 7.547708574705714e-08, + "loss": 9.6673, + "step": 475080 + }, + { + "epoch": 0.9597118581753981, + "grad_norm": 517.3529663085938, + "learning_rate": 7.541667510184813e-08, + "loss": 11.3955, + "step": 475090 + }, + { + "epoch": 0.9597320588080819, + "grad_norm": 348.29376220703125, + "learning_rate": 7.535628845846077e-08, + "loss": 22.1537, + "step": 475100 + }, + { + "epoch": 0.9597522594407657, + "grad_norm": 358.4526672363281, + "learning_rate": 7.529592581718981e-08, + "loss": 20.0223, + "step": 475110 + }, + { + "epoch": 0.9597724600734495, + "grad_norm": 477.21746826171875, + "learning_rate": 7.52355871783289e-08, + "loss": 27.5024, + "step": 475120 + }, + { + "epoch": 0.9597926607061333, + "grad_norm": 515.2587890625, + "learning_rate": 7.517527254217282e-08, + "loss": 13.9886, + "step": 475130 + }, + { + "epoch": 0.9598128613388172, + "grad_norm": 370.2596130371094, + "learning_rate": 7.511498190901467e-08, + "loss": 17.2683, + "step": 475140 + }, + { + "epoch": 0.959833061971501, + "grad_norm": 705.9306030273438, + "learning_rate": 7.50547152791492e-08, + "loss": 12.0799, + "step": 475150 + }, + { + "epoch": 0.9598532626041848, + "grad_norm": 146.8015594482422, + "learning_rate": 7.499447265286952e-08, + "loss": 14.1286, + "step": 475160 + }, + { + "epoch": 0.9598734632368686, + "grad_norm": 332.004638671875, + "learning_rate": 7.493425403046928e-08, + "loss": 10.444, + "step": 475170 + }, + { + "epoch": 0.9598936638695524, + "grad_norm": 378.7012023925781, + "learning_rate": 7.487405941224268e-08, + "loss": 14.7322, + "step": 475180 + }, + { + "epoch": 0.9599138645022363, + "grad_norm": 475.091552734375, + "learning_rate": 7.481388879848228e-08, + "loss": 14.2766, + "step": 475190 + }, + { + "epoch": 0.9599340651349201, + "grad_norm": 111.34712982177734, + "learning_rate": 7.475374218948118e-08, + "loss": 17.2071, + "step": 475200 + }, + { + "epoch": 0.9599542657676038, + "grad_norm": 187.49609375, + "learning_rate": 7.469361958553356e-08, + "loss": 7.586, + "step": 475210 + }, + { + "epoch": 0.9599744664002876, + "grad_norm": 528.530517578125, + "learning_rate": 7.463352098693199e-08, + "loss": 9.4207, + "step": 475220 + }, + { + "epoch": 0.9599946670329714, + "grad_norm": 682.244140625, + "learning_rate": 7.457344639396902e-08, + "loss": 15.9991, + "step": 475230 + }, + { + "epoch": 0.9600148676656552, + "grad_norm": 180.2545623779297, + "learning_rate": 7.451339580693718e-08, + "loss": 15.8759, + "step": 475240 + }, + { + "epoch": 0.9600350682983391, + "grad_norm": 314.72698974609375, + "learning_rate": 7.445336922613067e-08, + "loss": 20.9214, + "step": 475250 + }, + { + "epoch": 0.9600552689310229, + "grad_norm": 167.43357849121094, + "learning_rate": 7.439336665184039e-08, + "loss": 19.3911, + "step": 475260 + }, + { + "epoch": 0.9600754695637067, + "grad_norm": 223.72708129882812, + "learning_rate": 7.433338808435941e-08, + "loss": 16.5949, + "step": 475270 + }, + { + "epoch": 0.9600956701963905, + "grad_norm": 348.5341491699219, + "learning_rate": 7.427343352398031e-08, + "loss": 40.8376, + "step": 475280 + }, + { + "epoch": 0.9601158708290743, + "grad_norm": 1039.63427734375, + "learning_rate": 7.421350297099505e-08, + "loss": 24.5333, + "step": 475290 + }, + { + "epoch": 0.9601360714617582, + "grad_norm": 505.9857482910156, + "learning_rate": 7.415359642569564e-08, + "loss": 43.2792, + "step": 475300 + }, + { + "epoch": 0.960156272094442, + "grad_norm": 211.31011962890625, + "learning_rate": 7.409371388837405e-08, + "loss": 16.5823, + "step": 475310 + }, + { + "epoch": 0.9601764727271258, + "grad_norm": 541.6423950195312, + "learning_rate": 7.403385535932284e-08, + "loss": 13.8085, + "step": 475320 + }, + { + "epoch": 0.9601966733598096, + "grad_norm": 231.13262939453125, + "learning_rate": 7.397402083883287e-08, + "loss": 6.0951, + "step": 475330 + }, + { + "epoch": 0.9602168739924934, + "grad_norm": 235.80392456054688, + "learning_rate": 7.39142103271956e-08, + "loss": 35.2601, + "step": 475340 + }, + { + "epoch": 0.9602370746251773, + "grad_norm": 87.18898010253906, + "learning_rate": 7.385442382470354e-08, + "loss": 12.2211, + "step": 475350 + }, + { + "epoch": 0.9602572752578611, + "grad_norm": 449.9172058105469, + "learning_rate": 7.379466133164759e-08, + "loss": 11.9918, + "step": 475360 + }, + { + "epoch": 0.9602774758905449, + "grad_norm": 16.43218994140625, + "learning_rate": 7.373492284831862e-08, + "loss": 10.8565, + "step": 475370 + }, + { + "epoch": 0.9602976765232287, + "grad_norm": 662.5327758789062, + "learning_rate": 7.367520837500808e-08, + "loss": 18.2149, + "step": 475380 + }, + { + "epoch": 0.9603178771559125, + "grad_norm": 198.49664306640625, + "learning_rate": 7.361551791200794e-08, + "loss": 13.8516, + "step": 475390 + }, + { + "epoch": 0.9603380777885964, + "grad_norm": 309.06439208984375, + "learning_rate": 7.355585145960743e-08, + "loss": 20.8389, + "step": 475400 + }, + { + "epoch": 0.9603582784212802, + "grad_norm": 412.4136962890625, + "learning_rate": 7.34962090180985e-08, + "loss": 13.8319, + "step": 475410 + }, + { + "epoch": 0.960378479053964, + "grad_norm": 311.71685791015625, + "learning_rate": 7.343659058777098e-08, + "loss": 21.7253, + "step": 475420 + }, + { + "epoch": 0.9603986796866478, + "grad_norm": 0.0, + "learning_rate": 7.33769961689168e-08, + "loss": 12.6646, + "step": 475430 + }, + { + "epoch": 0.9604188803193316, + "grad_norm": 291.0174560546875, + "learning_rate": 7.331742576182466e-08, + "loss": 22.1376, + "step": 475440 + }, + { + "epoch": 0.9604390809520155, + "grad_norm": 91.20379638671875, + "learning_rate": 7.325787936678708e-08, + "loss": 18.15, + "step": 475450 + }, + { + "epoch": 0.9604592815846993, + "grad_norm": 171.52337646484375, + "learning_rate": 7.319835698409217e-08, + "loss": 16.8848, + "step": 475460 + }, + { + "epoch": 0.960479482217383, + "grad_norm": 489.40252685546875, + "learning_rate": 7.313885861403135e-08, + "loss": 20.3697, + "step": 475470 + }, + { + "epoch": 0.9604996828500668, + "grad_norm": 297.5575256347656, + "learning_rate": 7.307938425689388e-08, + "loss": 13.0444, + "step": 475480 + }, + { + "epoch": 0.9605198834827506, + "grad_norm": 816.5662231445312, + "learning_rate": 7.301993391297003e-08, + "loss": 21.4931, + "step": 475490 + }, + { + "epoch": 0.9605400841154345, + "grad_norm": 462.9900817871094, + "learning_rate": 7.296050758254958e-08, + "loss": 16.9646, + "step": 475500 + }, + { + "epoch": 0.9605602847481183, + "grad_norm": 358.0087890625, + "learning_rate": 7.290110526592231e-08, + "loss": 13.6412, + "step": 475510 + }, + { + "epoch": 0.9605804853808021, + "grad_norm": 226.2945098876953, + "learning_rate": 7.284172696337688e-08, + "loss": 16.7119, + "step": 475520 + }, + { + "epoch": 0.9606006860134859, + "grad_norm": 57.92973709106445, + "learning_rate": 7.27823726752036e-08, + "loss": 10.6754, + "step": 475530 + }, + { + "epoch": 0.9606208866461697, + "grad_norm": 444.6774597167969, + "learning_rate": 7.272304240169115e-08, + "loss": 13.8628, + "step": 475540 + }, + { + "epoch": 0.9606410872788536, + "grad_norm": 343.57550048828125, + "learning_rate": 7.266373614312927e-08, + "loss": 10.9022, + "step": 475550 + }, + { + "epoch": 0.9606612879115374, + "grad_norm": 770.028076171875, + "learning_rate": 7.260445389980609e-08, + "loss": 16.674, + "step": 475560 + }, + { + "epoch": 0.9606814885442212, + "grad_norm": 388.7988586425781, + "learning_rate": 7.25451956720119e-08, + "loss": 23.1462, + "step": 475570 + }, + { + "epoch": 0.960701689176905, + "grad_norm": 696.9945068359375, + "learning_rate": 7.248596146003484e-08, + "loss": 17.2478, + "step": 475580 + }, + { + "epoch": 0.9607218898095888, + "grad_norm": 542.0843505859375, + "learning_rate": 7.242675126416299e-08, + "loss": 29.4484, + "step": 475590 + }, + { + "epoch": 0.9607420904422727, + "grad_norm": 227.0558319091797, + "learning_rate": 7.236756508468612e-08, + "loss": 15.132, + "step": 475600 + }, + { + "epoch": 0.9607622910749565, + "grad_norm": 386.5271301269531, + "learning_rate": 7.230840292189179e-08, + "loss": 9.2645, + "step": 475610 + }, + { + "epoch": 0.9607824917076403, + "grad_norm": 256.40350341796875, + "learning_rate": 7.224926477606864e-08, + "loss": 21.749, + "step": 475620 + }, + { + "epoch": 0.9608026923403241, + "grad_norm": 632.7886962890625, + "learning_rate": 7.219015064750478e-08, + "loss": 19.2842, + "step": 475630 + }, + { + "epoch": 0.9608228929730079, + "grad_norm": 666.5670166015625, + "learning_rate": 7.213106053648889e-08, + "loss": 15.0993, + "step": 475640 + }, + { + "epoch": 0.9608430936056918, + "grad_norm": 11.963786125183105, + "learning_rate": 7.207199444330847e-08, + "loss": 14.6905, + "step": 475650 + }, + { + "epoch": 0.9608632942383756, + "grad_norm": 266.03759765625, + "learning_rate": 7.201295236825112e-08, + "loss": 13.424, + "step": 475660 + }, + { + "epoch": 0.9608834948710594, + "grad_norm": 662.6854248046875, + "learning_rate": 7.195393431160491e-08, + "loss": 12.8577, + "step": 475670 + }, + { + "epoch": 0.9609036955037432, + "grad_norm": 32.202144622802734, + "learning_rate": 7.189494027365795e-08, + "loss": 25.5644, + "step": 475680 + }, + { + "epoch": 0.960923896136427, + "grad_norm": 900.0670166015625, + "learning_rate": 7.183597025469669e-08, + "loss": 19.6551, + "step": 475690 + }, + { + "epoch": 0.9609440967691109, + "grad_norm": 299.69903564453125, + "learning_rate": 7.177702425500977e-08, + "loss": 24.7179, + "step": 475700 + }, + { + "epoch": 0.9609642974017947, + "grad_norm": 133.36920166015625, + "learning_rate": 7.171810227488363e-08, + "loss": 10.9328, + "step": 475710 + }, + { + "epoch": 0.9609844980344784, + "grad_norm": 364.9358215332031, + "learning_rate": 7.165920431460637e-08, + "loss": 20.8803, + "step": 475720 + }, + { + "epoch": 0.9610046986671622, + "grad_norm": 402.2028503417969, + "learning_rate": 7.16003303744639e-08, + "loss": 13.6, + "step": 475730 + }, + { + "epoch": 0.961024899299846, + "grad_norm": 288.1083679199219, + "learning_rate": 7.154148045474319e-08, + "loss": 22.305, + "step": 475740 + }, + { + "epoch": 0.9610450999325298, + "grad_norm": 284.3811950683594, + "learning_rate": 7.148265455573233e-08, + "loss": 14.0048, + "step": 475750 + }, + { + "epoch": 0.9610653005652137, + "grad_norm": 295.71832275390625, + "learning_rate": 7.142385267771667e-08, + "loss": 23.9628, + "step": 475760 + }, + { + "epoch": 0.9610855011978975, + "grad_norm": 337.1648254394531, + "learning_rate": 7.136507482098375e-08, + "loss": 17.5986, + "step": 475770 + }, + { + "epoch": 0.9611057018305813, + "grad_norm": 39.2862434387207, + "learning_rate": 7.130632098581947e-08, + "loss": 20.2325, + "step": 475780 + }, + { + "epoch": 0.9611259024632651, + "grad_norm": 1028.286865234375, + "learning_rate": 7.124759117251078e-08, + "loss": 31.2637, + "step": 475790 + }, + { + "epoch": 0.961146103095949, + "grad_norm": 658.9299926757812, + "learning_rate": 7.118888538134361e-08, + "loss": 12.5292, + "step": 475800 + }, + { + "epoch": 0.9611663037286328, + "grad_norm": 798.3705444335938, + "learning_rate": 7.113020361260325e-08, + "loss": 18.518, + "step": 475810 + }, + { + "epoch": 0.9611865043613166, + "grad_norm": 51.175601959228516, + "learning_rate": 7.107154586657727e-08, + "loss": 14.5358, + "step": 475820 + }, + { + "epoch": 0.9612067049940004, + "grad_norm": 144.55946350097656, + "learning_rate": 7.101291214355043e-08, + "loss": 15.6595, + "step": 475830 + }, + { + "epoch": 0.9612269056266842, + "grad_norm": 248.81558227539062, + "learning_rate": 7.095430244380863e-08, + "loss": 11.5365, + "step": 475840 + }, + { + "epoch": 0.961247106259368, + "grad_norm": 728.4669799804688, + "learning_rate": 7.089571676763773e-08, + "loss": 26.9842, + "step": 475850 + }, + { + "epoch": 0.9612673068920519, + "grad_norm": 232.49505615234375, + "learning_rate": 7.083715511532419e-08, + "loss": 16.1763, + "step": 475860 + }, + { + "epoch": 0.9612875075247357, + "grad_norm": 485.213134765625, + "learning_rate": 7.077861748715165e-08, + "loss": 14.5382, + "step": 475870 + }, + { + "epoch": 0.9613077081574195, + "grad_norm": 15.343710899353027, + "learning_rate": 7.072010388340656e-08, + "loss": 7.3748, + "step": 475880 + }, + { + "epoch": 0.9613279087901033, + "grad_norm": 376.9869384765625, + "learning_rate": 7.066161430437368e-08, + "loss": 17.4395, + "step": 475890 + }, + { + "epoch": 0.9613481094227871, + "grad_norm": 403.6138000488281, + "learning_rate": 7.060314875033836e-08, + "loss": 11.6849, + "step": 475900 + }, + { + "epoch": 0.961368310055471, + "grad_norm": 160.56594848632812, + "learning_rate": 7.054470722158535e-08, + "loss": 17.0336, + "step": 475910 + }, + { + "epoch": 0.9613885106881548, + "grad_norm": 440.259521484375, + "learning_rate": 7.048628971839944e-08, + "loss": 25.2777, + "step": 475920 + }, + { + "epoch": 0.9614087113208386, + "grad_norm": 219.51797485351562, + "learning_rate": 7.042789624106594e-08, + "loss": 13.5234, + "step": 475930 + }, + { + "epoch": 0.9614289119535224, + "grad_norm": 540.7685546875, + "learning_rate": 7.036952678986852e-08, + "loss": 25.3983, + "step": 475940 + }, + { + "epoch": 0.9614491125862062, + "grad_norm": 333.6399841308594, + "learning_rate": 7.031118136509196e-08, + "loss": 19.2188, + "step": 475950 + }, + { + "epoch": 0.9614693132188901, + "grad_norm": 218.74139404296875, + "learning_rate": 7.025285996702158e-08, + "loss": 25.5605, + "step": 475960 + }, + { + "epoch": 0.9614895138515739, + "grad_norm": 297.5934753417969, + "learning_rate": 7.019456259594049e-08, + "loss": 19.9465, + "step": 475970 + }, + { + "epoch": 0.9615097144842576, + "grad_norm": 229.1687774658203, + "learning_rate": 7.01362892521329e-08, + "loss": 16.26, + "step": 475980 + }, + { + "epoch": 0.9615299151169414, + "grad_norm": 333.52423095703125, + "learning_rate": 7.007803993588358e-08, + "loss": 17.7302, + "step": 475990 + }, + { + "epoch": 0.9615501157496252, + "grad_norm": 419.1000061035156, + "learning_rate": 7.001981464747565e-08, + "loss": 21.5595, + "step": 476000 + }, + { + "epoch": 0.961570316382309, + "grad_norm": 307.7343444824219, + "learning_rate": 6.996161338719332e-08, + "loss": 21.56, + "step": 476010 + }, + { + "epoch": 0.9615905170149929, + "grad_norm": 160.56182861328125, + "learning_rate": 6.990343615532025e-08, + "loss": 19.3788, + "step": 476020 + }, + { + "epoch": 0.9616107176476767, + "grad_norm": 504.5084228515625, + "learning_rate": 6.9845282952139e-08, + "loss": 18.6281, + "step": 476030 + }, + { + "epoch": 0.9616309182803605, + "grad_norm": 415.0246276855469, + "learning_rate": 6.978715377793489e-08, + "loss": 16.6749, + "step": 476040 + }, + { + "epoch": 0.9616511189130443, + "grad_norm": 234.70310974121094, + "learning_rate": 6.972904863298991e-08, + "loss": 17.0805, + "step": 476050 + }, + { + "epoch": 0.9616713195457282, + "grad_norm": 501.95538330078125, + "learning_rate": 6.967096751758773e-08, + "loss": 15.4388, + "step": 476060 + }, + { + "epoch": 0.961691520178412, + "grad_norm": 480.3716735839844, + "learning_rate": 6.961291043201145e-08, + "loss": 17.3112, + "step": 476070 + }, + { + "epoch": 0.9617117208110958, + "grad_norm": 252.87176513671875, + "learning_rate": 6.955487737654309e-08, + "loss": 13.0375, + "step": 476080 + }, + { + "epoch": 0.9617319214437796, + "grad_norm": 326.9039001464844, + "learning_rate": 6.949686835146685e-08, + "loss": 15.2768, + "step": 476090 + }, + { + "epoch": 0.9617521220764634, + "grad_norm": 370.8696594238281, + "learning_rate": 6.943888335706472e-08, + "loss": 23.2639, + "step": 476100 + }, + { + "epoch": 0.9617723227091473, + "grad_norm": 819.5240478515625, + "learning_rate": 6.938092239361982e-08, + "loss": 12.4631, + "step": 476110 + }, + { + "epoch": 0.9617925233418311, + "grad_norm": 419.8603820800781, + "learning_rate": 6.932298546141413e-08, + "loss": 13.5396, + "step": 476120 + }, + { + "epoch": 0.9618127239745149, + "grad_norm": 370.8901062011719, + "learning_rate": 6.926507256072967e-08, + "loss": 22.5444, + "step": 476130 + }, + { + "epoch": 0.9618329246071987, + "grad_norm": 274.3182067871094, + "learning_rate": 6.920718369185009e-08, + "loss": 14.7543, + "step": 476140 + }, + { + "epoch": 0.9618531252398825, + "grad_norm": 449.9986267089844, + "learning_rate": 6.914931885505626e-08, + "loss": 13.2917, + "step": 476150 + }, + { + "epoch": 0.9618733258725664, + "grad_norm": 392.4185485839844, + "learning_rate": 6.909147805063021e-08, + "loss": 43.4365, + "step": 476160 + }, + { + "epoch": 0.9618935265052502, + "grad_norm": 379.1201477050781, + "learning_rate": 6.903366127885447e-08, + "loss": 13.0509, + "step": 476170 + }, + { + "epoch": 0.961913727137934, + "grad_norm": 269.5074157714844, + "learning_rate": 6.897586854001048e-08, + "loss": 28.5801, + "step": 476180 + }, + { + "epoch": 0.9619339277706178, + "grad_norm": 428.8318176269531, + "learning_rate": 6.89180998343808e-08, + "loss": 23.163, + "step": 476190 + }, + { + "epoch": 0.9619541284033016, + "grad_norm": 270.17315673828125, + "learning_rate": 6.88603551622452e-08, + "loss": 16.6752, + "step": 476200 + }, + { + "epoch": 0.9619743290359855, + "grad_norm": 870.8080444335938, + "learning_rate": 6.88026345238868e-08, + "loss": 30.2955, + "step": 476210 + }, + { + "epoch": 0.9619945296686693, + "grad_norm": 471.13787841796875, + "learning_rate": 6.874493791958648e-08, + "loss": 13.3997, + "step": 476220 + }, + { + "epoch": 0.9620147303013531, + "grad_norm": 261.5706481933594, + "learning_rate": 6.868726534962456e-08, + "loss": 20.7547, + "step": 476230 + }, + { + "epoch": 0.9620349309340368, + "grad_norm": 308.1227722167969, + "learning_rate": 6.862961681428304e-08, + "loss": 32.5355, + "step": 476240 + }, + { + "epoch": 0.9620551315667206, + "grad_norm": 321.1436767578125, + "learning_rate": 6.857199231384282e-08, + "loss": 24.4721, + "step": 476250 + }, + { + "epoch": 0.9620753321994044, + "grad_norm": 461.12042236328125, + "learning_rate": 6.851439184858477e-08, + "loss": 23.3178, + "step": 476260 + }, + { + "epoch": 0.9620955328320883, + "grad_norm": 445.0748596191406, + "learning_rate": 6.845681541878924e-08, + "loss": 16.4683, + "step": 476270 + }, + { + "epoch": 0.9621157334647721, + "grad_norm": 237.60678100585938, + "learning_rate": 6.83992630247371e-08, + "loss": 14.815, + "step": 476280 + }, + { + "epoch": 0.9621359340974559, + "grad_norm": 29.770912170410156, + "learning_rate": 6.834173466670923e-08, + "loss": 13.1535, + "step": 476290 + }, + { + "epoch": 0.9621561347301397, + "grad_norm": 1804.21337890625, + "learning_rate": 6.828423034498488e-08, + "loss": 13.4355, + "step": 476300 + }, + { + "epoch": 0.9621763353628235, + "grad_norm": 363.8840637207031, + "learning_rate": 6.822675005984547e-08, + "loss": 12.4518, + "step": 476310 + }, + { + "epoch": 0.9621965359955074, + "grad_norm": 387.9690856933594, + "learning_rate": 6.816929381157023e-08, + "loss": 14.038, + "step": 476320 + }, + { + "epoch": 0.9622167366281912, + "grad_norm": 282.8274230957031, + "learning_rate": 6.811186160044004e-08, + "loss": 22.7182, + "step": 476330 + }, + { + "epoch": 0.962236937260875, + "grad_norm": 395.19390869140625, + "learning_rate": 6.805445342673467e-08, + "loss": 16.2363, + "step": 476340 + }, + { + "epoch": 0.9622571378935588, + "grad_norm": 541.1038208007812, + "learning_rate": 6.799706929073335e-08, + "loss": 17.5635, + "step": 476350 + }, + { + "epoch": 0.9622773385262426, + "grad_norm": 34.52801513671875, + "learning_rate": 6.793970919271642e-08, + "loss": 19.4252, + "step": 476360 + }, + { + "epoch": 0.9622975391589265, + "grad_norm": 540.6629028320312, + "learning_rate": 6.788237313296309e-08, + "loss": 21.7979, + "step": 476370 + }, + { + "epoch": 0.9623177397916103, + "grad_norm": 432.80377197265625, + "learning_rate": 6.782506111175313e-08, + "loss": 21.8245, + "step": 476380 + }, + { + "epoch": 0.9623379404242941, + "grad_norm": 271.496337890625, + "learning_rate": 6.776777312936522e-08, + "loss": 8.9662, + "step": 476390 + }, + { + "epoch": 0.9623581410569779, + "grad_norm": 3.349520444869995, + "learning_rate": 6.771050918607913e-08, + "loss": 20.389, + "step": 476400 + }, + { + "epoch": 0.9623783416896617, + "grad_norm": 240.0352325439453, + "learning_rate": 6.765326928217408e-08, + "loss": 20.2974, + "step": 476410 + }, + { + "epoch": 0.9623985423223456, + "grad_norm": 186.44142150878906, + "learning_rate": 6.759605341792819e-08, + "loss": 16.1324, + "step": 476420 + }, + { + "epoch": 0.9624187429550294, + "grad_norm": 404.8953857421875, + "learning_rate": 6.753886159362122e-08, + "loss": 17.5296, + "step": 476430 + }, + { + "epoch": 0.9624389435877132, + "grad_norm": 338.2667541503906, + "learning_rate": 6.748169380953184e-08, + "loss": 21.9625, + "step": 476440 + }, + { + "epoch": 0.962459144220397, + "grad_norm": 359.7268371582031, + "learning_rate": 6.742455006593762e-08, + "loss": 22.326, + "step": 476450 + }, + { + "epoch": 0.9624793448530808, + "grad_norm": 498.0181579589844, + "learning_rate": 6.736743036311832e-08, + "loss": 29.3099, + "step": 476460 + }, + { + "epoch": 0.9624995454857647, + "grad_norm": 707.2603149414062, + "learning_rate": 6.731033470135262e-08, + "loss": 20.6502, + "step": 476470 + }, + { + "epoch": 0.9625197461184485, + "grad_norm": 489.98052978515625, + "learning_rate": 6.725326308091751e-08, + "loss": 23.2261, + "step": 476480 + }, + { + "epoch": 0.9625399467511322, + "grad_norm": 297.9427490234375, + "learning_rate": 6.71962155020911e-08, + "loss": 22.8428, + "step": 476490 + }, + { + "epoch": 0.962560147383816, + "grad_norm": 706.0108642578125, + "learning_rate": 6.713919196515317e-08, + "loss": 24.6106, + "step": 476500 + }, + { + "epoch": 0.9625803480164998, + "grad_norm": 370.75567626953125, + "learning_rate": 6.708219247038017e-08, + "loss": 18.245, + "step": 476510 + }, + { + "epoch": 0.9626005486491837, + "grad_norm": 375.1898498535156, + "learning_rate": 6.702521701804965e-08, + "loss": 18.2819, + "step": 476520 + }, + { + "epoch": 0.9626207492818675, + "grad_norm": 178.1335906982422, + "learning_rate": 6.696826560844027e-08, + "loss": 18.5652, + "step": 476530 + }, + { + "epoch": 0.9626409499145513, + "grad_norm": 248.6165313720703, + "learning_rate": 6.691133824183016e-08, + "loss": 31.3355, + "step": 476540 + }, + { + "epoch": 0.9626611505472351, + "grad_norm": 87.95904541015625, + "learning_rate": 6.685443491849464e-08, + "loss": 20.726, + "step": 476550 + }, + { + "epoch": 0.9626813511799189, + "grad_norm": 90.31098175048828, + "learning_rate": 6.679755563871292e-08, + "loss": 14.6505, + "step": 476560 + }, + { + "epoch": 0.9627015518126028, + "grad_norm": 547.4888305664062, + "learning_rate": 6.674070040276148e-08, + "loss": 22.3104, + "step": 476570 + }, + { + "epoch": 0.9627217524452866, + "grad_norm": 423.2607421875, + "learning_rate": 6.66838692109173e-08, + "loss": 23.5625, + "step": 476580 + }, + { + "epoch": 0.9627419530779704, + "grad_norm": 170.6096649169922, + "learning_rate": 6.662706206345793e-08, + "loss": 12.0225, + "step": 476590 + }, + { + "epoch": 0.9627621537106542, + "grad_norm": 213.32203674316406, + "learning_rate": 6.657027896065982e-08, + "loss": 16.2692, + "step": 476600 + }, + { + "epoch": 0.962782354343338, + "grad_norm": 164.5532684326172, + "learning_rate": 6.651351990279997e-08, + "loss": 3.9653, + "step": 476610 + }, + { + "epoch": 0.9628025549760219, + "grad_norm": 221.3753204345703, + "learning_rate": 6.645678489015428e-08, + "loss": 18.0769, + "step": 476620 + }, + { + "epoch": 0.9628227556087057, + "grad_norm": 445.78863525390625, + "learning_rate": 6.64000739230003e-08, + "loss": 27.9703, + "step": 476630 + }, + { + "epoch": 0.9628429562413895, + "grad_norm": 475.8753356933594, + "learning_rate": 6.634338700161392e-08, + "loss": 24.063, + "step": 476640 + }, + { + "epoch": 0.9628631568740733, + "grad_norm": 65.68719482421875, + "learning_rate": 6.628672412627158e-08, + "loss": 11.14, + "step": 476650 + }, + { + "epoch": 0.9628833575067571, + "grad_norm": 330.69525146484375, + "learning_rate": 6.623008529724917e-08, + "loss": 17.0637, + "step": 476660 + }, + { + "epoch": 0.962903558139441, + "grad_norm": 321.6661071777344, + "learning_rate": 6.617347051482315e-08, + "loss": 18.995, + "step": 476670 + }, + { + "epoch": 0.9629237587721248, + "grad_norm": 493.0853271484375, + "learning_rate": 6.611687977926939e-08, + "loss": 14.4967, + "step": 476680 + }, + { + "epoch": 0.9629439594048086, + "grad_norm": 328.06475830078125, + "learning_rate": 6.606031309086269e-08, + "loss": 17.7986, + "step": 476690 + }, + { + "epoch": 0.9629641600374924, + "grad_norm": 445.1838684082031, + "learning_rate": 6.60037704498806e-08, + "loss": 24.9507, + "step": 476700 + }, + { + "epoch": 0.9629843606701762, + "grad_norm": 174.0291748046875, + "learning_rate": 6.594725185659734e-08, + "loss": 18.0457, + "step": 476710 + }, + { + "epoch": 0.96300456130286, + "grad_norm": 1234.39453125, + "learning_rate": 6.58907573112888e-08, + "loss": 14.8554, + "step": 476720 + }, + { + "epoch": 0.9630247619355439, + "grad_norm": 286.1321716308594, + "learning_rate": 6.583428681423032e-08, + "loss": 27.2782, + "step": 476730 + }, + { + "epoch": 0.9630449625682277, + "grad_norm": 372.0481262207031, + "learning_rate": 6.577784036569668e-08, + "loss": 15.7032, + "step": 476740 + }, + { + "epoch": 0.9630651632009114, + "grad_norm": 583.0672607421875, + "learning_rate": 6.572141796596376e-08, + "loss": 18.4742, + "step": 476750 + }, + { + "epoch": 0.9630853638335952, + "grad_norm": 189.72605895996094, + "learning_rate": 6.566501961530636e-08, + "loss": 13.0015, + "step": 476760 + }, + { + "epoch": 0.963105564466279, + "grad_norm": 115.40235137939453, + "learning_rate": 6.560864531399869e-08, + "loss": 13.4514, + "step": 476770 + }, + { + "epoch": 0.9631257650989629, + "grad_norm": 412.1432800292969, + "learning_rate": 6.555229506231608e-08, + "loss": 22.9218, + "step": 476780 + }, + { + "epoch": 0.9631459657316467, + "grad_norm": 300.8472900390625, + "learning_rate": 6.549596886053334e-08, + "loss": 16.8421, + "step": 476790 + }, + { + "epoch": 0.9631661663643305, + "grad_norm": 140.56094360351562, + "learning_rate": 6.543966670892465e-08, + "loss": 21.1918, + "step": 476800 + }, + { + "epoch": 0.9631863669970143, + "grad_norm": 39.55817794799805, + "learning_rate": 6.538338860776483e-08, + "loss": 10.6026, + "step": 476810 + }, + { + "epoch": 0.9632065676296981, + "grad_norm": 251.70135498046875, + "learning_rate": 6.532713455732753e-08, + "loss": 18.2131, + "step": 476820 + }, + { + "epoch": 0.963226768262382, + "grad_norm": 437.0846862792969, + "learning_rate": 6.527090455788754e-08, + "loss": 13.2615, + "step": 476830 + }, + { + "epoch": 0.9632469688950658, + "grad_norm": 323.1280212402344, + "learning_rate": 6.521469860971852e-08, + "loss": 15.5527, + "step": 476840 + }, + { + "epoch": 0.9632671695277496, + "grad_norm": 469.2850341796875, + "learning_rate": 6.515851671309414e-08, + "loss": 23.856, + "step": 476850 + }, + { + "epoch": 0.9632873701604334, + "grad_norm": 561.0780029296875, + "learning_rate": 6.51023588682892e-08, + "loss": 17.9284, + "step": 476860 + }, + { + "epoch": 0.9633075707931172, + "grad_norm": 952.8590698242188, + "learning_rate": 6.504622507557679e-08, + "loss": 32.2216, + "step": 476870 + }, + { + "epoch": 0.9633277714258011, + "grad_norm": 398.85455322265625, + "learning_rate": 6.499011533523003e-08, + "loss": 18.0433, + "step": 476880 + }, + { + "epoch": 0.9633479720584849, + "grad_norm": 258.42388916015625, + "learning_rate": 6.493402964752371e-08, + "loss": 19.5101, + "step": 476890 + }, + { + "epoch": 0.9633681726911687, + "grad_norm": 526.315185546875, + "learning_rate": 6.487796801272983e-08, + "loss": 16.5671, + "step": 476900 + }, + { + "epoch": 0.9633883733238525, + "grad_norm": 0.0, + "learning_rate": 6.482193043112206e-08, + "loss": 10.8797, + "step": 476910 + }, + { + "epoch": 0.9634085739565363, + "grad_norm": 394.5545654296875, + "learning_rate": 6.476591690297407e-08, + "loss": 11.1586, + "step": 476920 + }, + { + "epoch": 0.9634287745892202, + "grad_norm": 235.354736328125, + "learning_rate": 6.470992742855786e-08, + "loss": 19.3661, + "step": 476930 + }, + { + "epoch": 0.963448975221904, + "grad_norm": 375.6850280761719, + "learning_rate": 6.465396200814766e-08, + "loss": 18.9032, + "step": 476940 + }, + { + "epoch": 0.9634691758545878, + "grad_norm": 444.804931640625, + "learning_rate": 6.459802064201437e-08, + "loss": 19.9718, + "step": 476950 + }, + { + "epoch": 0.9634893764872716, + "grad_norm": 340.27679443359375, + "learning_rate": 6.454210333043275e-08, + "loss": 18.8783, + "step": 476960 + }, + { + "epoch": 0.9635095771199554, + "grad_norm": 123.67523193359375, + "learning_rate": 6.448621007367428e-08, + "loss": 21.5001, + "step": 476970 + }, + { + "epoch": 0.9635297777526393, + "grad_norm": 256.27716064453125, + "learning_rate": 6.443034087201095e-08, + "loss": 21.0008, + "step": 476980 + }, + { + "epoch": 0.9635499783853231, + "grad_norm": 645.1035766601562, + "learning_rate": 6.437449572571586e-08, + "loss": 29.0721, + "step": 476990 + }, + { + "epoch": 0.9635701790180068, + "grad_norm": 40.7523078918457, + "learning_rate": 6.431867463506047e-08, + "loss": 11.9315, + "step": 477000 + }, + { + "epoch": 0.9635903796506906, + "grad_norm": 276.47442626953125, + "learning_rate": 6.426287760031736e-08, + "loss": 14.655, + "step": 477010 + }, + { + "epoch": 0.9636105802833744, + "grad_norm": 95.3863525390625, + "learning_rate": 6.42071046217585e-08, + "loss": 8.3127, + "step": 477020 + }, + { + "epoch": 0.9636307809160582, + "grad_norm": 156.61651611328125, + "learning_rate": 6.415135569965536e-08, + "loss": 18.0984, + "step": 477030 + }, + { + "epoch": 0.9636509815487421, + "grad_norm": 303.97393798828125, + "learning_rate": 6.40956308342805e-08, + "loss": 26.0625, + "step": 477040 + }, + { + "epoch": 0.9636711821814259, + "grad_norm": 451.6395568847656, + "learning_rate": 6.403993002590425e-08, + "loss": 13.9365, + "step": 477050 + }, + { + "epoch": 0.9636913828141097, + "grad_norm": 103.19915008544922, + "learning_rate": 6.398425327479863e-08, + "loss": 12.9072, + "step": 477060 + }, + { + "epoch": 0.9637115834467935, + "grad_norm": 421.86407470703125, + "learning_rate": 6.392860058123506e-08, + "loss": 14.673, + "step": 477070 + }, + { + "epoch": 0.9637317840794773, + "grad_norm": 330.48236083984375, + "learning_rate": 6.387297194548558e-08, + "loss": 29.77, + "step": 477080 + }, + { + "epoch": 0.9637519847121612, + "grad_norm": 211.17384338378906, + "learning_rate": 6.381736736781996e-08, + "loss": 11.6155, + "step": 477090 + }, + { + "epoch": 0.963772185344845, + "grad_norm": 212.2564697265625, + "learning_rate": 6.376178684850965e-08, + "loss": 19.4258, + "step": 477100 + }, + { + "epoch": 0.9637923859775288, + "grad_norm": 375.3379211425781, + "learning_rate": 6.370623038782608e-08, + "loss": 27.173, + "step": 477110 + }, + { + "epoch": 0.9638125866102126, + "grad_norm": 27.419113159179688, + "learning_rate": 6.365069798603962e-08, + "loss": 31.2934, + "step": 477120 + }, + { + "epoch": 0.9638327872428964, + "grad_norm": 335.3210144042969, + "learning_rate": 6.359518964342059e-08, + "loss": 12.9161, + "step": 477130 + }, + { + "epoch": 0.9638529878755803, + "grad_norm": 405.4734191894531, + "learning_rate": 6.353970536024045e-08, + "loss": 16.8732, + "step": 477140 + }, + { + "epoch": 0.9638731885082641, + "grad_norm": 583.7369384765625, + "learning_rate": 6.348424513676898e-08, + "loss": 17.2103, + "step": 477150 + }, + { + "epoch": 0.9638933891409479, + "grad_norm": 540.9656982421875, + "learning_rate": 6.342880897327597e-08, + "loss": 24.3372, + "step": 477160 + }, + { + "epoch": 0.9639135897736317, + "grad_norm": 146.86822509765625, + "learning_rate": 6.337339687003286e-08, + "loss": 13.8029, + "step": 477170 + }, + { + "epoch": 0.9639337904063155, + "grad_norm": 539.55712890625, + "learning_rate": 6.331800882730887e-08, + "loss": 15.6876, + "step": 477180 + }, + { + "epoch": 0.9639539910389994, + "grad_norm": 153.9258575439453, + "learning_rate": 6.326264484537437e-08, + "loss": 12.555, + "step": 477190 + }, + { + "epoch": 0.9639741916716832, + "grad_norm": 533.649169921875, + "learning_rate": 6.3207304924498e-08, + "loss": 22.0611, + "step": 477200 + }, + { + "epoch": 0.963994392304367, + "grad_norm": 455.38360595703125, + "learning_rate": 6.315198906495179e-08, + "loss": 21.8381, + "step": 477210 + }, + { + "epoch": 0.9640145929370508, + "grad_norm": 396.9118347167969, + "learning_rate": 6.30966972670033e-08, + "loss": 22.9322, + "step": 477220 + }, + { + "epoch": 0.9640347935697346, + "grad_norm": 515.7533569335938, + "learning_rate": 6.304142953092285e-08, + "loss": 17.8945, + "step": 477230 + }, + { + "epoch": 0.9640549942024185, + "grad_norm": 493.3134460449219, + "learning_rate": 6.298618585697968e-08, + "loss": 13.7864, + "step": 477240 + }, + { + "epoch": 0.9640751948351023, + "grad_norm": 143.59620666503906, + "learning_rate": 6.293096624544304e-08, + "loss": 8.9156, + "step": 477250 + }, + { + "epoch": 0.964095395467786, + "grad_norm": 500.61334228515625, + "learning_rate": 6.287577069658213e-08, + "loss": 10.7181, + "step": 477260 + }, + { + "epoch": 0.9641155961004698, + "grad_norm": 179.6245574951172, + "learning_rate": 6.282059921066564e-08, + "loss": 14.0273, + "step": 477270 + }, + { + "epoch": 0.9641357967331536, + "grad_norm": 224.61199951171875, + "learning_rate": 6.276545178796333e-08, + "loss": 9.6322, + "step": 477280 + }, + { + "epoch": 0.9641559973658375, + "grad_norm": 408.6536865234375, + "learning_rate": 6.271032842874281e-08, + "loss": 26.8837, + "step": 477290 + }, + { + "epoch": 0.9641761979985213, + "grad_norm": 296.5220031738281, + "learning_rate": 6.265522913327326e-08, + "loss": 29.1121, + "step": 477300 + }, + { + "epoch": 0.9641963986312051, + "grad_norm": 2035.961669921875, + "learning_rate": 6.260015390182395e-08, + "loss": 31.4718, + "step": 477310 + }, + { + "epoch": 0.9642165992638889, + "grad_norm": 81.89179992675781, + "learning_rate": 6.254510273466186e-08, + "loss": 14.0888, + "step": 477320 + }, + { + "epoch": 0.9642367998965727, + "grad_norm": 422.5835266113281, + "learning_rate": 6.249007563205679e-08, + "loss": 35.0419, + "step": 477330 + }, + { + "epoch": 0.9642570005292566, + "grad_norm": 592.8697509765625, + "learning_rate": 6.243507259427628e-08, + "loss": 23.1118, + "step": 477340 + }, + { + "epoch": 0.9642772011619404, + "grad_norm": 247.3590545654297, + "learning_rate": 6.238009362158793e-08, + "loss": 17.3722, + "step": 477350 + }, + { + "epoch": 0.9642974017946242, + "grad_norm": 285.0309753417969, + "learning_rate": 6.232513871426038e-08, + "loss": 20.0719, + "step": 477360 + }, + { + "epoch": 0.964317602427308, + "grad_norm": 174.8265380859375, + "learning_rate": 6.227020787256122e-08, + "loss": 16.8355, + "step": 477370 + }, + { + "epoch": 0.9643378030599918, + "grad_norm": 287.5248718261719, + "learning_rate": 6.2215301096758e-08, + "loss": 11.1058, + "step": 477380 + }, + { + "epoch": 0.9643580036926757, + "grad_norm": 673.0089721679688, + "learning_rate": 6.216041838711828e-08, + "loss": 23.4357, + "step": 477390 + }, + { + "epoch": 0.9643782043253595, + "grad_norm": 94.95233917236328, + "learning_rate": 6.210555974391075e-08, + "loss": 24.3358, + "step": 477400 + }, + { + "epoch": 0.9643984049580433, + "grad_norm": 0.0, + "learning_rate": 6.205072516740129e-08, + "loss": 9.14, + "step": 477410 + }, + { + "epoch": 0.9644186055907271, + "grad_norm": 367.91717529296875, + "learning_rate": 6.199591465785748e-08, + "loss": 10.6455, + "step": 477420 + }, + { + "epoch": 0.9644388062234109, + "grad_norm": 476.6819152832031, + "learning_rate": 6.194112821554687e-08, + "loss": 29.8962, + "step": 477430 + }, + { + "epoch": 0.9644590068560948, + "grad_norm": 690.882568359375, + "learning_rate": 6.188636584073648e-08, + "loss": 19.3234, + "step": 477440 + }, + { + "epoch": 0.9644792074887786, + "grad_norm": 66.78436279296875, + "learning_rate": 6.183162753369221e-08, + "loss": 11.4298, + "step": 477450 + }, + { + "epoch": 0.9644994081214624, + "grad_norm": 493.62298583984375, + "learning_rate": 6.177691329468217e-08, + "loss": 21.2334, + "step": 477460 + }, + { + "epoch": 0.9645196087541462, + "grad_norm": 125.45512390136719, + "learning_rate": 6.17222231239728e-08, + "loss": 21.7523, + "step": 477470 + }, + { + "epoch": 0.96453980938683, + "grad_norm": 481.461181640625, + "learning_rate": 6.166755702183058e-08, + "loss": 20.9767, + "step": 477480 + }, + { + "epoch": 0.9645600100195139, + "grad_norm": 17.118911743164062, + "learning_rate": 6.161291498852084e-08, + "loss": 19.0655, + "step": 477490 + }, + { + "epoch": 0.9645802106521977, + "grad_norm": 752.0841064453125, + "learning_rate": 6.15582970243117e-08, + "loss": 25.7584, + "step": 477500 + }, + { + "epoch": 0.9646004112848814, + "grad_norm": 101.85774230957031, + "learning_rate": 6.150370312946797e-08, + "loss": 7.6507, + "step": 477510 + }, + { + "epoch": 0.9646206119175652, + "grad_norm": 380.5328674316406, + "learning_rate": 6.144913330425606e-08, + "loss": 32.0278, + "step": 477520 + }, + { + "epoch": 0.964640812550249, + "grad_norm": 403.0509033203125, + "learning_rate": 6.139458754894245e-08, + "loss": 21.2497, + "step": 477530 + }, + { + "epoch": 0.9646610131829328, + "grad_norm": 670.5574340820312, + "learning_rate": 6.134006586379249e-08, + "loss": 15.94, + "step": 477540 + }, + { + "epoch": 0.9646812138156167, + "grad_norm": 410.8387145996094, + "learning_rate": 6.128556824907205e-08, + "loss": 21.5452, + "step": 477550 + }, + { + "epoch": 0.9647014144483005, + "grad_norm": 534.5658569335938, + "learning_rate": 6.12310947050465e-08, + "loss": 9.8214, + "step": 477560 + }, + { + "epoch": 0.9647216150809843, + "grad_norm": 365.89556884765625, + "learning_rate": 6.11766452319823e-08, + "loss": 15.1667, + "step": 477570 + }, + { + "epoch": 0.9647418157136681, + "grad_norm": 132.87437438964844, + "learning_rate": 6.112221983014366e-08, + "loss": 10.2982, + "step": 477580 + }, + { + "epoch": 0.964762016346352, + "grad_norm": 169.1856689453125, + "learning_rate": 6.106781849979648e-08, + "loss": 15.1391, + "step": 477590 + }, + { + "epoch": 0.9647822169790358, + "grad_norm": 393.377197265625, + "learning_rate": 6.101344124120557e-08, + "loss": 28.7703, + "step": 477600 + }, + { + "epoch": 0.9648024176117196, + "grad_norm": 539.4514770507812, + "learning_rate": 6.095908805463624e-08, + "loss": 29.2519, + "step": 477610 + }, + { + "epoch": 0.9648226182444034, + "grad_norm": 331.7099914550781, + "learning_rate": 6.09047589403533e-08, + "loss": 24.0463, + "step": 477620 + }, + { + "epoch": 0.9648428188770872, + "grad_norm": 604.9713745117188, + "learning_rate": 6.085045389862154e-08, + "loss": 19.9847, + "step": 477630 + }, + { + "epoch": 0.964863019509771, + "grad_norm": 330.50347900390625, + "learning_rate": 6.079617292970519e-08, + "loss": 9.4264, + "step": 477640 + }, + { + "epoch": 0.9648832201424549, + "grad_norm": 481.7924499511719, + "learning_rate": 6.074191603386958e-08, + "loss": 23.5693, + "step": 477650 + }, + { + "epoch": 0.9649034207751387, + "grad_norm": 245.63783264160156, + "learning_rate": 6.068768321137897e-08, + "loss": 10.665, + "step": 477660 + }, + { + "epoch": 0.9649236214078225, + "grad_norm": 2.3313724994659424, + "learning_rate": 6.0633474462497e-08, + "loss": 8.234, + "step": 477670 + }, + { + "epoch": 0.9649438220405063, + "grad_norm": 282.9562683105469, + "learning_rate": 6.057928978748906e-08, + "loss": 9.9405, + "step": 477680 + }, + { + "epoch": 0.9649640226731901, + "grad_norm": 374.821044921875, + "learning_rate": 6.052512918661879e-08, + "loss": 15.405, + "step": 477690 + }, + { + "epoch": 0.964984223305874, + "grad_norm": 202.2202606201172, + "learning_rate": 6.047099266014877e-08, + "loss": 22.1854, + "step": 477700 + }, + { + "epoch": 0.9650044239385578, + "grad_norm": 503.0718688964844, + "learning_rate": 6.041688020834491e-08, + "loss": 18.3147, + "step": 477710 + }, + { + "epoch": 0.9650246245712416, + "grad_norm": 5.190357208251953, + "learning_rate": 6.036279183146975e-08, + "loss": 18.278, + "step": 477720 + }, + { + "epoch": 0.9650448252039254, + "grad_norm": 569.5199584960938, + "learning_rate": 6.030872752978756e-08, + "loss": 18.4054, + "step": 477730 + }, + { + "epoch": 0.9650650258366092, + "grad_norm": 393.5696105957031, + "learning_rate": 6.025468730356144e-08, + "loss": 16.5542, + "step": 477740 + }, + { + "epoch": 0.9650852264692931, + "grad_norm": 1.4170738458633423, + "learning_rate": 6.020067115305451e-08, + "loss": 24.3471, + "step": 477750 + }, + { + "epoch": 0.9651054271019769, + "grad_norm": 224.58607482910156, + "learning_rate": 6.0146679078531e-08, + "loss": 7.6774, + "step": 477760 + }, + { + "epoch": 0.9651256277346606, + "grad_norm": 371.5082092285156, + "learning_rate": 6.009271108025294e-08, + "loss": 11.425, + "step": 477770 + }, + { + "epoch": 0.9651458283673444, + "grad_norm": 591.8775634765625, + "learning_rate": 6.003876715848345e-08, + "loss": 18.0876, + "step": 477780 + }, + { + "epoch": 0.9651660290000282, + "grad_norm": 681.1532592773438, + "learning_rate": 5.998484731348675e-08, + "loss": 11.5306, + "step": 477790 + }, + { + "epoch": 0.9651862296327121, + "grad_norm": 209.39012145996094, + "learning_rate": 5.993095154552431e-08, + "loss": 12.4092, + "step": 477800 + }, + { + "epoch": 0.9652064302653959, + "grad_norm": 232.25473022460938, + "learning_rate": 5.987707985485925e-08, + "loss": 23.2592, + "step": 477810 + }, + { + "epoch": 0.9652266308980797, + "grad_norm": 493.2044677734375, + "learning_rate": 5.982323224175468e-08, + "loss": 12.4573, + "step": 477820 + }, + { + "epoch": 0.9652468315307635, + "grad_norm": 163.55963134765625, + "learning_rate": 5.976940870647207e-08, + "loss": 29.157, + "step": 477830 + }, + { + "epoch": 0.9652670321634473, + "grad_norm": 152.7935028076172, + "learning_rate": 5.9715609249274e-08, + "loss": 16.3347, + "step": 477840 + }, + { + "epoch": 0.9652872327961312, + "grad_norm": 170.32513427734375, + "learning_rate": 5.966183387042246e-08, + "loss": 22.4016, + "step": 477850 + }, + { + "epoch": 0.965307433428815, + "grad_norm": 350.372802734375, + "learning_rate": 5.960808257018113e-08, + "loss": 17.2876, + "step": 477860 + }, + { + "epoch": 0.9653276340614988, + "grad_norm": 345.9002990722656, + "learning_rate": 5.955435534881038e-08, + "loss": 22.5925, + "step": 477870 + }, + { + "epoch": 0.9653478346941826, + "grad_norm": 49.1010627746582, + "learning_rate": 5.950065220657164e-08, + "loss": 6.0972, + "step": 477880 + }, + { + "epoch": 0.9653680353268664, + "grad_norm": 442.44207763671875, + "learning_rate": 5.9446973143728605e-08, + "loss": 22.0141, + "step": 477890 + }, + { + "epoch": 0.9653882359595503, + "grad_norm": 59.3377799987793, + "learning_rate": 5.939331816054161e-08, + "loss": 25.7228, + "step": 477900 + }, + { + "epoch": 0.9654084365922341, + "grad_norm": 461.59124755859375, + "learning_rate": 5.9339687257272126e-08, + "loss": 26.488, + "step": 477910 + }, + { + "epoch": 0.9654286372249179, + "grad_norm": 312.3343505859375, + "learning_rate": 5.92860804341816e-08, + "loss": 26.8083, + "step": 477920 + }, + { + "epoch": 0.9654488378576017, + "grad_norm": 451.34051513671875, + "learning_rate": 5.9232497691531496e-08, + "loss": 20.6006, + "step": 477930 + }, + { + "epoch": 0.9654690384902855, + "grad_norm": 146.99241638183594, + "learning_rate": 5.917893902958327e-08, + "loss": 14.9505, + "step": 477940 + }, + { + "epoch": 0.9654892391229694, + "grad_norm": 617.3377075195312, + "learning_rate": 5.9125404448597825e-08, + "loss": 9.5861, + "step": 477950 + }, + { + "epoch": 0.9655094397556532, + "grad_norm": 416.9510192871094, + "learning_rate": 5.9071893948835505e-08, + "loss": 26.2968, + "step": 477960 + }, + { + "epoch": 0.965529640388337, + "grad_norm": 112.83805084228516, + "learning_rate": 5.901840753055776e-08, + "loss": 36.8032, + "step": 477970 + }, + { + "epoch": 0.9655498410210208, + "grad_norm": 199.9210205078125, + "learning_rate": 5.896494519402496e-08, + "loss": 15.8749, + "step": 477980 + }, + { + "epoch": 0.9655700416537046, + "grad_norm": 339.83465576171875, + "learning_rate": 5.891150693949743e-08, + "loss": 20.127, + "step": 477990 + }, + { + "epoch": 0.9655902422863885, + "grad_norm": 863.1902465820312, + "learning_rate": 5.8858092767236084e-08, + "loss": 29.7857, + "step": 478000 + }, + { + "epoch": 0.9656104429190723, + "grad_norm": 264.4561462402344, + "learning_rate": 5.880470267750127e-08, + "loss": 27.0998, + "step": 478010 + }, + { + "epoch": 0.9656306435517561, + "grad_norm": 359.2437744140625, + "learning_rate": 5.8751336670552775e-08, + "loss": 19.5618, + "step": 478020 + }, + { + "epoch": 0.9656508441844398, + "grad_norm": 617.5918579101562, + "learning_rate": 5.8697994746650946e-08, + "loss": 27.1238, + "step": 478030 + }, + { + "epoch": 0.9656710448171236, + "grad_norm": 2867.60791015625, + "learning_rate": 5.864467690605613e-08, + "loss": 27.6647, + "step": 478040 + }, + { + "epoch": 0.9656912454498074, + "grad_norm": 291.1172790527344, + "learning_rate": 5.8591383149028126e-08, + "loss": 32.4455, + "step": 478050 + }, + { + "epoch": 0.9657114460824913, + "grad_norm": 40.02949523925781, + "learning_rate": 5.8538113475825606e-08, + "loss": 23.7419, + "step": 478060 + }, + { + "epoch": 0.9657316467151751, + "grad_norm": 311.8459777832031, + "learning_rate": 5.848486788670893e-08, + "loss": 22.3518, + "step": 478070 + }, + { + "epoch": 0.9657518473478589, + "grad_norm": 315.1097106933594, + "learning_rate": 5.843164638193899e-08, + "loss": 11.6773, + "step": 478080 + }, + { + "epoch": 0.9657720479805427, + "grad_norm": 173.48623657226562, + "learning_rate": 5.837844896177225e-08, + "loss": 15.2753, + "step": 478090 + }, + { + "epoch": 0.9657922486132265, + "grad_norm": 233.17698669433594, + "learning_rate": 5.8325275626470166e-08, + "loss": 14.2047, + "step": 478100 + }, + { + "epoch": 0.9658124492459104, + "grad_norm": 757.6137084960938, + "learning_rate": 5.827212637629198e-08, + "loss": 20.2655, + "step": 478110 + }, + { + "epoch": 0.9658326498785942, + "grad_norm": 354.80108642578125, + "learning_rate": 5.821900121149582e-08, + "loss": 31.6603, + "step": 478120 + }, + { + "epoch": 0.965852850511278, + "grad_norm": 187.01177978515625, + "learning_rate": 5.8165900132340356e-08, + "loss": 27.2038, + "step": 478130 + }, + { + "epoch": 0.9658730511439618, + "grad_norm": 545.61181640625, + "learning_rate": 5.8112823139085396e-08, + "loss": 23.4992, + "step": 478140 + }, + { + "epoch": 0.9658932517766456, + "grad_norm": 373.3515625, + "learning_rate": 5.80597702319885e-08, + "loss": 18.8845, + "step": 478150 + }, + { + "epoch": 0.9659134524093295, + "grad_norm": 558.9241333007812, + "learning_rate": 5.800674141130946e-08, + "loss": 19.6263, + "step": 478160 + }, + { + "epoch": 0.9659336530420133, + "grad_norm": 379.2434387207031, + "learning_rate": 5.795373667730586e-08, + "loss": 19.4863, + "step": 478170 + }, + { + "epoch": 0.9659538536746971, + "grad_norm": 53.8942985534668, + "learning_rate": 5.7900756030236924e-08, + "loss": 19.1105, + "step": 478180 + }, + { + "epoch": 0.9659740543073809, + "grad_norm": 489.83416748046875, + "learning_rate": 5.7847799470360236e-08, + "loss": 13.0014, + "step": 478190 + }, + { + "epoch": 0.9659942549400647, + "grad_norm": 657.77099609375, + "learning_rate": 5.7794866997933355e-08, + "loss": 11.6912, + "step": 478200 + }, + { + "epoch": 0.9660144555727486, + "grad_norm": 244.96229553222656, + "learning_rate": 5.774195861321552e-08, + "loss": 37.3842, + "step": 478210 + }, + { + "epoch": 0.9660346562054324, + "grad_norm": 335.60382080078125, + "learning_rate": 5.76890743164632e-08, + "loss": 16.0015, + "step": 478220 + }, + { + "epoch": 0.9660548568381162, + "grad_norm": 518.923583984375, + "learning_rate": 5.763621410793563e-08, + "loss": 27.9391, + "step": 478230 + }, + { + "epoch": 0.9660750574708, + "grad_norm": 56.98911666870117, + "learning_rate": 5.758337798788982e-08, + "loss": 9.7949, + "step": 478240 + }, + { + "epoch": 0.9660952581034838, + "grad_norm": 637.66015625, + "learning_rate": 5.753056595658224e-08, + "loss": 19.3816, + "step": 478250 + }, + { + "epoch": 0.9661154587361677, + "grad_norm": 145.6067352294922, + "learning_rate": 5.7477778014272124e-08, + "loss": 21.7067, + "step": 478260 + }, + { + "epoch": 0.9661356593688515, + "grad_norm": 318.15826416015625, + "learning_rate": 5.7425014161215375e-08, + "loss": 23.1052, + "step": 478270 + }, + { + "epoch": 0.9661558600015352, + "grad_norm": 899.2626953125, + "learning_rate": 5.737227439766957e-08, + "loss": 19.9951, + "step": 478280 + }, + { + "epoch": 0.966176060634219, + "grad_norm": 159.44384765625, + "learning_rate": 5.7319558723892275e-08, + "loss": 11.5791, + "step": 478290 + }, + { + "epoch": 0.9661962612669028, + "grad_norm": 759.2963256835938, + "learning_rate": 5.726686714013996e-08, + "loss": 20.4163, + "step": 478300 + }, + { + "epoch": 0.9662164618995867, + "grad_norm": 241.1317596435547, + "learning_rate": 5.7214199646669076e-08, + "loss": 30.4358, + "step": 478310 + }, + { + "epoch": 0.9662366625322705, + "grad_norm": 260.4803161621094, + "learning_rate": 5.716155624373665e-08, + "loss": 16.714, + "step": 478320 + }, + { + "epoch": 0.9662568631649543, + "grad_norm": 84.16582489013672, + "learning_rate": 5.710893693159969e-08, + "loss": 18.6407, + "step": 478330 + }, + { + "epoch": 0.9662770637976381, + "grad_norm": 439.8134765625, + "learning_rate": 5.705634171051411e-08, + "loss": 19.6875, + "step": 478340 + }, + { + "epoch": 0.9662972644303219, + "grad_norm": 137.8370819091797, + "learning_rate": 5.700377058073636e-08, + "loss": 26.0859, + "step": 478350 + }, + { + "epoch": 0.9663174650630058, + "grad_norm": 316.39593505859375, + "learning_rate": 5.6951223542522915e-08, + "loss": 34.9143, + "step": 478360 + }, + { + "epoch": 0.9663376656956896, + "grad_norm": 157.16848754882812, + "learning_rate": 5.6898700596129674e-08, + "loss": 13.9034, + "step": 478370 + }, + { + "epoch": 0.9663578663283734, + "grad_norm": 149.63681030273438, + "learning_rate": 5.684620174181255e-08, + "loss": 13.5584, + "step": 478380 + }, + { + "epoch": 0.9663780669610572, + "grad_norm": 177.0972442626953, + "learning_rate": 5.679372697982688e-08, + "loss": 19.0456, + "step": 478390 + }, + { + "epoch": 0.966398267593741, + "grad_norm": 292.07403564453125, + "learning_rate": 5.674127631043025e-08, + "loss": 11.7672, + "step": 478400 + }, + { + "epoch": 0.9664184682264249, + "grad_norm": 1.6651360988616943, + "learning_rate": 5.668884973387634e-08, + "loss": 14.0168, + "step": 478410 + }, + { + "epoch": 0.9664386688591087, + "grad_norm": 468.69232177734375, + "learning_rate": 5.663644725042161e-08, + "loss": 31.5944, + "step": 478420 + }, + { + "epoch": 0.9664588694917925, + "grad_norm": 530.5999755859375, + "learning_rate": 5.658406886032142e-08, + "loss": 22.6728, + "step": 478430 + }, + { + "epoch": 0.9664790701244763, + "grad_norm": 627.4494018554688, + "learning_rate": 5.653171456383055e-08, + "loss": 21.3161, + "step": 478440 + }, + { + "epoch": 0.9664992707571601, + "grad_norm": 374.77008056640625, + "learning_rate": 5.647938436120437e-08, + "loss": 11.9045, + "step": 478450 + }, + { + "epoch": 0.966519471389844, + "grad_norm": 876.54443359375, + "learning_rate": 5.642707825269822e-08, + "loss": 20.375, + "step": 478460 + }, + { + "epoch": 0.9665396720225278, + "grad_norm": 236.10513305664062, + "learning_rate": 5.637479623856745e-08, + "loss": 18.2886, + "step": 478470 + }, + { + "epoch": 0.9665598726552116, + "grad_norm": 345.92333984375, + "learning_rate": 5.632253831906631e-08, + "loss": 19.2766, + "step": 478480 + }, + { + "epoch": 0.9665800732878954, + "grad_norm": 260.10943603515625, + "learning_rate": 5.6270304494449035e-08, + "loss": 20.5953, + "step": 478490 + }, + { + "epoch": 0.9666002739205792, + "grad_norm": 98.62396240234375, + "learning_rate": 5.621809476497098e-08, + "loss": 31.1268, + "step": 478500 + }, + { + "epoch": 0.966620474553263, + "grad_norm": 279.5716857910156, + "learning_rate": 5.616590913088638e-08, + "loss": 19.0142, + "step": 478510 + }, + { + "epoch": 0.9666406751859469, + "grad_norm": 140.54217529296875, + "learning_rate": 5.611374759244892e-08, + "loss": 11.6353, + "step": 478520 + }, + { + "epoch": 0.9666608758186307, + "grad_norm": 748.846435546875, + "learning_rate": 5.6061610149913957e-08, + "loss": 36.219, + "step": 478530 + }, + { + "epoch": 0.9666810764513144, + "grad_norm": 199.6036376953125, + "learning_rate": 5.6009496803534624e-08, + "loss": 25.0226, + "step": 478540 + }, + { + "epoch": 0.9667012770839982, + "grad_norm": 400.14459228515625, + "learning_rate": 5.595740755356627e-08, + "loss": 17.4089, + "step": 478550 + }, + { + "epoch": 0.966721477716682, + "grad_norm": 3818.854736328125, + "learning_rate": 5.590534240026146e-08, + "loss": 42.379, + "step": 478560 + }, + { + "epoch": 0.9667416783493659, + "grad_norm": 6.718419075012207, + "learning_rate": 5.58533013438739e-08, + "loss": 19.4567, + "step": 478570 + }, + { + "epoch": 0.9667618789820497, + "grad_norm": 202.12559509277344, + "learning_rate": 5.580128438465837e-08, + "loss": 11.5954, + "step": 478580 + }, + { + "epoch": 0.9667820796147335, + "grad_norm": 521.642333984375, + "learning_rate": 5.574929152286745e-08, + "loss": 14.6131, + "step": 478590 + }, + { + "epoch": 0.9668022802474173, + "grad_norm": 276.46636962890625, + "learning_rate": 5.569732275875428e-08, + "loss": 18.7578, + "step": 478600 + }, + { + "epoch": 0.9668224808801011, + "grad_norm": 231.13758850097656, + "learning_rate": 5.5645378092573085e-08, + "loss": 34.6684, + "step": 478610 + }, + { + "epoch": 0.966842681512785, + "grad_norm": 396.3559875488281, + "learning_rate": 5.559345752457701e-08, + "loss": 14.3551, + "step": 478620 + }, + { + "epoch": 0.9668628821454688, + "grad_norm": 597.891845703125, + "learning_rate": 5.554156105501862e-08, + "loss": 46.5115, + "step": 478630 + }, + { + "epoch": 0.9668830827781526, + "grad_norm": 2.6351895332336426, + "learning_rate": 5.54896886841505e-08, + "loss": 26.3693, + "step": 478640 + }, + { + "epoch": 0.9669032834108364, + "grad_norm": 122.9947509765625, + "learning_rate": 5.543784041222633e-08, + "loss": 12.3833, + "step": 478650 + }, + { + "epoch": 0.9669234840435202, + "grad_norm": 285.569580078125, + "learning_rate": 5.538601623949869e-08, + "loss": 10.7647, + "step": 478660 + }, + { + "epoch": 0.9669436846762041, + "grad_norm": 461.61627197265625, + "learning_rate": 5.533421616621903e-08, + "loss": 18.2578, + "step": 478670 + }, + { + "epoch": 0.9669638853088879, + "grad_norm": 484.755859375, + "learning_rate": 5.528244019264106e-08, + "loss": 17.0171, + "step": 478680 + }, + { + "epoch": 0.9669840859415717, + "grad_norm": 177.4759063720703, + "learning_rate": 5.5230688319017344e-08, + "loss": 18.0975, + "step": 478690 + }, + { + "epoch": 0.9670042865742555, + "grad_norm": 557.1797485351562, + "learning_rate": 5.517896054559879e-08, + "loss": 21.3366, + "step": 478700 + }, + { + "epoch": 0.9670244872069393, + "grad_norm": 383.9194030761719, + "learning_rate": 5.512725687263853e-08, + "loss": 15.7219, + "step": 478710 + }, + { + "epoch": 0.9670446878396232, + "grad_norm": 586.3380126953125, + "learning_rate": 5.507557730038859e-08, + "loss": 14.0986, + "step": 478720 + }, + { + "epoch": 0.967064888472307, + "grad_norm": 580.1846923828125, + "learning_rate": 5.5023921829100434e-08, + "loss": 24.0707, + "step": 478730 + }, + { + "epoch": 0.9670850891049908, + "grad_norm": 572.0950927734375, + "learning_rate": 5.497229045902552e-08, + "loss": 33.8662, + "step": 478740 + }, + { + "epoch": 0.9671052897376746, + "grad_norm": 19.00641632080078, + "learning_rate": 5.492068319041588e-08, + "loss": 36.7493, + "step": 478750 + }, + { + "epoch": 0.9671254903703584, + "grad_norm": 484.2851257324219, + "learning_rate": 5.4869100023523526e-08, + "loss": 18.0603, + "step": 478760 + }, + { + "epoch": 0.9671456910030423, + "grad_norm": 218.1751251220703, + "learning_rate": 5.4817540958598814e-08, + "loss": 9.6601, + "step": 478770 + }, + { + "epoch": 0.9671658916357261, + "grad_norm": 545.3314819335938, + "learning_rate": 5.476600599589377e-08, + "loss": 25.2217, + "step": 478780 + }, + { + "epoch": 0.9671860922684098, + "grad_norm": 230.48045349121094, + "learning_rate": 5.471449513565985e-08, + "loss": 22.9013, + "step": 478790 + }, + { + "epoch": 0.9672062929010936, + "grad_norm": 163.40428161621094, + "learning_rate": 5.466300837814797e-08, + "loss": 18.2725, + "step": 478800 + }, + { + "epoch": 0.9672264935337774, + "grad_norm": 319.80743408203125, + "learning_rate": 5.461154572360794e-08, + "loss": 21.5176, + "step": 478810 + }, + { + "epoch": 0.9672466941664613, + "grad_norm": 519.5460205078125, + "learning_rate": 5.456010717229177e-08, + "loss": 23.0734, + "step": 478820 + }, + { + "epoch": 0.9672668947991451, + "grad_norm": 264.7811584472656, + "learning_rate": 5.4508692724449806e-08, + "loss": 19.0381, + "step": 478830 + }, + { + "epoch": 0.9672870954318289, + "grad_norm": 236.47109985351562, + "learning_rate": 5.445730238033298e-08, + "loss": 15.7891, + "step": 478840 + }, + { + "epoch": 0.9673072960645127, + "grad_norm": 237.08204650878906, + "learning_rate": 5.440593614019107e-08, + "loss": 12.1388, + "step": 478850 + }, + { + "epoch": 0.9673274966971965, + "grad_norm": 171.9046173095703, + "learning_rate": 5.435459400427501e-08, + "loss": 13.042, + "step": 478860 + }, + { + "epoch": 0.9673476973298804, + "grad_norm": 631.8507690429688, + "learning_rate": 5.4303275972834577e-08, + "loss": 23.7042, + "step": 478870 + }, + { + "epoch": 0.9673678979625642, + "grad_norm": 141.70753479003906, + "learning_rate": 5.42519820461207e-08, + "loss": 10.7698, + "step": 478880 + }, + { + "epoch": 0.967388098595248, + "grad_norm": 266.23284912109375, + "learning_rate": 5.4200712224382056e-08, + "loss": 16.0063, + "step": 478890 + }, + { + "epoch": 0.9674082992279318, + "grad_norm": 253.25729370117188, + "learning_rate": 5.414946650786957e-08, + "loss": 10.512, + "step": 478900 + }, + { + "epoch": 0.9674284998606156, + "grad_norm": 220.38278198242188, + "learning_rate": 5.409824489683247e-08, + "loss": 29.1843, + "step": 478910 + }, + { + "epoch": 0.9674487004932995, + "grad_norm": 304.1505432128906, + "learning_rate": 5.4047047391521114e-08, + "loss": 23.3969, + "step": 478920 + }, + { + "epoch": 0.9674689011259833, + "grad_norm": 145.32652282714844, + "learning_rate": 5.39958739921842e-08, + "loss": 23.0794, + "step": 478930 + }, + { + "epoch": 0.9674891017586671, + "grad_norm": 579.7205810546875, + "learning_rate": 5.394472469907208e-08, + "loss": 25.9221, + "step": 478940 + }, + { + "epoch": 0.9675093023913509, + "grad_norm": 588.6752319335938, + "learning_rate": 5.389359951243345e-08, + "loss": 9.451, + "step": 478950 + }, + { + "epoch": 0.9675295030240347, + "grad_norm": 196.82383728027344, + "learning_rate": 5.3842498432516986e-08, + "loss": 14.1151, + "step": 478960 + }, + { + "epoch": 0.9675497036567186, + "grad_norm": 685.24951171875, + "learning_rate": 5.3791421459571947e-08, + "loss": 16.9339, + "step": 478970 + }, + { + "epoch": 0.9675699042894024, + "grad_norm": 78.54755401611328, + "learning_rate": 5.374036859384868e-08, + "loss": 10.4572, + "step": 478980 + }, + { + "epoch": 0.9675901049220862, + "grad_norm": 475.5689697265625, + "learning_rate": 5.3689339835594215e-08, + "loss": 14.2306, + "step": 478990 + }, + { + "epoch": 0.96761030555477, + "grad_norm": 257.8653259277344, + "learning_rate": 5.363833518505834e-08, + "loss": 7.7078, + "step": 479000 + }, + { + "epoch": 0.9676305061874538, + "grad_norm": 461.0395202636719, + "learning_rate": 5.358735464248921e-08, + "loss": 17.0037, + "step": 479010 + }, + { + "epoch": 0.9676507068201377, + "grad_norm": 271.3075256347656, + "learning_rate": 5.3536398208135495e-08, + "loss": 25.8619, + "step": 479020 + }, + { + "epoch": 0.9676709074528215, + "grad_norm": 59.960697174072266, + "learning_rate": 5.348546588224535e-08, + "loss": 22.1884, + "step": 479030 + }, + { + "epoch": 0.9676911080855053, + "grad_norm": 94.71739959716797, + "learning_rate": 5.343455766506689e-08, + "loss": 21.2829, + "step": 479040 + }, + { + "epoch": 0.967711308718189, + "grad_norm": 124.12822723388672, + "learning_rate": 5.338367355684881e-08, + "loss": 21.4375, + "step": 479050 + }, + { + "epoch": 0.9677315093508728, + "grad_norm": 197.34124755859375, + "learning_rate": 5.33328135578387e-08, + "loss": 24.6474, + "step": 479060 + }, + { + "epoch": 0.9677517099835566, + "grad_norm": 378.1953125, + "learning_rate": 5.3281977668284136e-08, + "loss": 32.3378, + "step": 479070 + }, + { + "epoch": 0.9677719106162405, + "grad_norm": 154.08151245117188, + "learning_rate": 5.323116588843324e-08, + "loss": 16.6362, + "step": 479080 + }, + { + "epoch": 0.9677921112489243, + "grad_norm": 331.41455078125, + "learning_rate": 5.318037821853417e-08, + "loss": 24.9352, + "step": 479090 + }, + { + "epoch": 0.9678123118816081, + "grad_norm": 417.18731689453125, + "learning_rate": 5.312961465883393e-08, + "loss": 19.8879, + "step": 479100 + }, + { + "epoch": 0.9678325125142919, + "grad_norm": 524.3778076171875, + "learning_rate": 5.307887520957955e-08, + "loss": 11.2781, + "step": 479110 + }, + { + "epoch": 0.9678527131469757, + "grad_norm": 276.419921875, + "learning_rate": 5.302815987101917e-08, + "loss": 11.3561, + "step": 479120 + }, + { + "epoch": 0.9678729137796596, + "grad_norm": 623.837646484375, + "learning_rate": 5.2977468643399254e-08, + "loss": 23.5989, + "step": 479130 + }, + { + "epoch": 0.9678931144123434, + "grad_norm": 422.738525390625, + "learning_rate": 5.292680152696739e-08, + "loss": 16.2322, + "step": 479140 + }, + { + "epoch": 0.9679133150450272, + "grad_norm": 76.41869354248047, + "learning_rate": 5.2876158521969476e-08, + "loss": 19.3891, + "step": 479150 + }, + { + "epoch": 0.967933515677711, + "grad_norm": 361.3853759765625, + "learning_rate": 5.282553962865422e-08, + "loss": 13.3551, + "step": 479160 + }, + { + "epoch": 0.9679537163103948, + "grad_norm": 378.0127868652344, + "learning_rate": 5.2774944847266976e-08, + "loss": 15.339, + "step": 479170 + }, + { + "epoch": 0.9679739169430787, + "grad_norm": 353.4735107421875, + "learning_rate": 5.27243741780542e-08, + "loss": 21.6208, + "step": 479180 + }, + { + "epoch": 0.9679941175757625, + "grad_norm": 434.619384765625, + "learning_rate": 5.267382762126294e-08, + "loss": 19.5157, + "step": 479190 + }, + { + "epoch": 0.9680143182084463, + "grad_norm": 356.3162536621094, + "learning_rate": 5.262330517713965e-08, + "loss": 9.1625, + "step": 479200 + }, + { + "epoch": 0.9680345188411301, + "grad_norm": 324.1009521484375, + "learning_rate": 5.2572806845930244e-08, + "loss": 25.1147, + "step": 479210 + }, + { + "epoch": 0.9680547194738139, + "grad_norm": 196.4537353515625, + "learning_rate": 5.252233262788065e-08, + "loss": 17.194, + "step": 479220 + }, + { + "epoch": 0.9680749201064978, + "grad_norm": 162.21270751953125, + "learning_rate": 5.247188252323787e-08, + "loss": 12.6761, + "step": 479230 + }, + { + "epoch": 0.9680951207391816, + "grad_norm": 512.8888549804688, + "learning_rate": 5.242145653224673e-08, + "loss": 19.6846, + "step": 479240 + }, + { + "epoch": 0.9681153213718654, + "grad_norm": 164.4032745361328, + "learning_rate": 5.237105465515258e-08, + "loss": 16.9834, + "step": 479250 + }, + { + "epoch": 0.9681355220045492, + "grad_norm": 311.5745544433594, + "learning_rate": 5.2320676892202996e-08, + "loss": 16.5828, + "step": 479260 + }, + { + "epoch": 0.968155722637233, + "grad_norm": 305.44390869140625, + "learning_rate": 5.227032324364167e-08, + "loss": 16.0539, + "step": 479270 + }, + { + "epoch": 0.9681759232699169, + "grad_norm": 198.4656982421875, + "learning_rate": 5.2219993709714535e-08, + "loss": 14.3294, + "step": 479280 + }, + { + "epoch": 0.9681961239026007, + "grad_norm": 338.1217041015625, + "learning_rate": 5.2169688290667485e-08, + "loss": 17.7753, + "step": 479290 + }, + { + "epoch": 0.9682163245352845, + "grad_norm": 446.63409423828125, + "learning_rate": 5.2119406986745336e-08, + "loss": 16.3242, + "step": 479300 + }, + { + "epoch": 0.9682365251679682, + "grad_norm": 288.9396667480469, + "learning_rate": 5.206914979819289e-08, + "loss": 18.615, + "step": 479310 + }, + { + "epoch": 0.968256725800652, + "grad_norm": 410.0702209472656, + "learning_rate": 5.2018916725254945e-08, + "loss": 31.4091, + "step": 479320 + }, + { + "epoch": 0.9682769264333358, + "grad_norm": 287.1968994140625, + "learning_rate": 5.196870776817742e-08, + "loss": 17.3582, + "step": 479330 + }, + { + "epoch": 0.9682971270660197, + "grad_norm": 151.74069213867188, + "learning_rate": 5.191852292720401e-08, + "loss": 32.3567, + "step": 479340 + }, + { + "epoch": 0.9683173276987035, + "grad_norm": 738.2186279296875, + "learning_rate": 5.186836220257951e-08, + "loss": 19.9806, + "step": 479350 + }, + { + "epoch": 0.9683375283313873, + "grad_norm": 647.9827880859375, + "learning_rate": 5.1818225594548185e-08, + "loss": 12.0309, + "step": 479360 + }, + { + "epoch": 0.9683577289640711, + "grad_norm": 210.44410705566406, + "learning_rate": 5.176811310335539e-08, + "loss": 19.8676, + "step": 479370 + }, + { + "epoch": 0.968377929596755, + "grad_norm": 104.06281280517578, + "learning_rate": 5.17180247292437e-08, + "loss": 9.6487, + "step": 479380 + }, + { + "epoch": 0.9683981302294388, + "grad_norm": 136.3513641357422, + "learning_rate": 5.1667960472459034e-08, + "loss": 13.6415, + "step": 479390 + }, + { + "epoch": 0.9684183308621226, + "grad_norm": 104.18755340576172, + "learning_rate": 5.161792033324398e-08, + "loss": 12.012, + "step": 479400 + }, + { + "epoch": 0.9684385314948064, + "grad_norm": 200.2335662841797, + "learning_rate": 5.1567904311843886e-08, + "loss": 15.0877, + "step": 479410 + }, + { + "epoch": 0.9684587321274902, + "grad_norm": 316.23980712890625, + "learning_rate": 5.151791240850079e-08, + "loss": 14.0139, + "step": 479420 + }, + { + "epoch": 0.968478932760174, + "grad_norm": 485.8829345703125, + "learning_rate": 5.14679446234595e-08, + "loss": 38.5259, + "step": 479430 + }, + { + "epoch": 0.9684991333928579, + "grad_norm": 583.5144653320312, + "learning_rate": 5.14180009569637e-08, + "loss": 24.376, + "step": 479440 + }, + { + "epoch": 0.9685193340255417, + "grad_norm": 827.423583984375, + "learning_rate": 5.136808140925542e-08, + "loss": 14.9787, + "step": 479450 + }, + { + "epoch": 0.9685395346582255, + "grad_norm": 401.1725769042969, + "learning_rate": 5.131818598057947e-08, + "loss": 17.1449, + "step": 479460 + }, + { + "epoch": 0.9685597352909093, + "grad_norm": 236.3235626220703, + "learning_rate": 5.126831467117843e-08, + "loss": 23.819, + "step": 479470 + }, + { + "epoch": 0.9685799359235931, + "grad_norm": 396.2535400390625, + "learning_rate": 5.121846748129544e-08, + "loss": 20.7878, + "step": 479480 + }, + { + "epoch": 0.968600136556277, + "grad_norm": 363.9278259277344, + "learning_rate": 5.116864441117364e-08, + "loss": 20.4867, + "step": 479490 + }, + { + "epoch": 0.9686203371889608, + "grad_norm": 392.9410400390625, + "learning_rate": 5.111884546105506e-08, + "loss": 18.4554, + "step": 479500 + }, + { + "epoch": 0.9686405378216446, + "grad_norm": 536.6632080078125, + "learning_rate": 5.106907063118394e-08, + "loss": 13.4975, + "step": 479510 + }, + { + "epoch": 0.9686607384543284, + "grad_norm": 448.8122863769531, + "learning_rate": 5.10193199218012e-08, + "loss": 23.2407, + "step": 479520 + }, + { + "epoch": 0.9686809390870122, + "grad_norm": 314.69012451171875, + "learning_rate": 5.0969593333149994e-08, + "loss": 17.3976, + "step": 479530 + }, + { + "epoch": 0.9687011397196961, + "grad_norm": 206.2161407470703, + "learning_rate": 5.091989086547289e-08, + "loss": 20.0975, + "step": 479540 + }, + { + "epoch": 0.9687213403523799, + "grad_norm": 59.227203369140625, + "learning_rate": 5.0870212519012477e-08, + "loss": 14.2258, + "step": 479550 + }, + { + "epoch": 0.9687415409850636, + "grad_norm": 420.8604431152344, + "learning_rate": 5.082055829400967e-08, + "loss": 17.1765, + "step": 479560 + }, + { + "epoch": 0.9687617416177474, + "grad_norm": 511.0311584472656, + "learning_rate": 5.077092819070761e-08, + "loss": 10.442, + "step": 479570 + }, + { + "epoch": 0.9687819422504312, + "grad_norm": 241.3959503173828, + "learning_rate": 5.072132220934722e-08, + "loss": 23.6839, + "step": 479580 + }, + { + "epoch": 0.9688021428831151, + "grad_norm": 407.5174865722656, + "learning_rate": 5.067174035017164e-08, + "loss": 11.5223, + "step": 479590 + }, + { + "epoch": 0.9688223435157989, + "grad_norm": 148.66758728027344, + "learning_rate": 5.062218261342122e-08, + "loss": 15.6233, + "step": 479600 + }, + { + "epoch": 0.9688425441484827, + "grad_norm": 478.1081237792969, + "learning_rate": 5.0572648999338e-08, + "loss": 10.8934, + "step": 479610 + }, + { + "epoch": 0.9688627447811665, + "grad_norm": 410.79217529296875, + "learning_rate": 5.052313950816401e-08, + "loss": 14.1215, + "step": 479620 + }, + { + "epoch": 0.9688829454138503, + "grad_norm": 486.24853515625, + "learning_rate": 5.0473654140139604e-08, + "loss": 23.106, + "step": 479630 + }, + { + "epoch": 0.9689031460465342, + "grad_norm": 893.8436279296875, + "learning_rate": 5.042419289550571e-08, + "loss": 11.5406, + "step": 479640 + }, + { + "epoch": 0.968923346679218, + "grad_norm": 1036.2552490234375, + "learning_rate": 5.0374755774504346e-08, + "loss": 23.9848, + "step": 479650 + }, + { + "epoch": 0.9689435473119018, + "grad_norm": 652.051025390625, + "learning_rate": 5.032534277737644e-08, + "loss": 14.6965, + "step": 479660 + }, + { + "epoch": 0.9689637479445856, + "grad_norm": 469.65863037109375, + "learning_rate": 5.027595390436235e-08, + "loss": 13.7001, + "step": 479670 + }, + { + "epoch": 0.9689839485772694, + "grad_norm": 601.7528686523438, + "learning_rate": 5.0226589155702445e-08, + "loss": 16.1731, + "step": 479680 + }, + { + "epoch": 0.9690041492099533, + "grad_norm": 601.6310424804688, + "learning_rate": 5.017724853163819e-08, + "loss": 29.1537, + "step": 479690 + }, + { + "epoch": 0.9690243498426371, + "grad_norm": 367.8590393066406, + "learning_rate": 5.012793203240995e-08, + "loss": 13.7387, + "step": 479700 + }, + { + "epoch": 0.9690445504753209, + "grad_norm": 397.095947265625, + "learning_rate": 5.007863965825754e-08, + "loss": 16.9001, + "step": 479710 + }, + { + "epoch": 0.9690647511080047, + "grad_norm": 306.54229736328125, + "learning_rate": 5.002937140942132e-08, + "loss": 9.5529, + "step": 479720 + }, + { + "epoch": 0.9690849517406885, + "grad_norm": 188.66578674316406, + "learning_rate": 4.998012728614221e-08, + "loss": 8.5547, + "step": 479730 + }, + { + "epoch": 0.9691051523733724, + "grad_norm": 368.2060546875, + "learning_rate": 4.99309072886589e-08, + "loss": 19.5318, + "step": 479740 + }, + { + "epoch": 0.9691253530060562, + "grad_norm": 2.3029627799987793, + "learning_rate": 4.988171141721232e-08, + "loss": 14.3379, + "step": 479750 + }, + { + "epoch": 0.96914555363874, + "grad_norm": 339.0953674316406, + "learning_rate": 4.983253967204171e-08, + "loss": 30.6589, + "step": 479760 + }, + { + "epoch": 0.9691657542714238, + "grad_norm": 335.7156982421875, + "learning_rate": 4.9783392053386894e-08, + "loss": 17.6085, + "step": 479770 + }, + { + "epoch": 0.9691859549041076, + "grad_norm": 301.7350769042969, + "learning_rate": 4.9734268561487665e-08, + "loss": 14.41, + "step": 479780 + }, + { + "epoch": 0.9692061555367915, + "grad_norm": 659.3392944335938, + "learning_rate": 4.968516919658328e-08, + "loss": 11.3555, + "step": 479790 + }, + { + "epoch": 0.9692263561694753, + "grad_norm": 292.5453796386719, + "learning_rate": 4.9636093958913e-08, + "loss": 19.53, + "step": 479800 + }, + { + "epoch": 0.9692465568021591, + "grad_norm": 249.79486083984375, + "learning_rate": 4.958704284871552e-08, + "loss": 17.1886, + "step": 479810 + }, + { + "epoch": 0.9692667574348428, + "grad_norm": 254.720947265625, + "learning_rate": 4.9538015866230636e-08, + "loss": 19.8521, + "step": 479820 + }, + { + "epoch": 0.9692869580675266, + "grad_norm": 330.8457946777344, + "learning_rate": 4.948901301169706e-08, + "loss": 10.1042, + "step": 479830 + }, + { + "epoch": 0.9693071587002104, + "grad_norm": 244.03285217285156, + "learning_rate": 4.944003428535349e-08, + "loss": 18.2047, + "step": 479840 + }, + { + "epoch": 0.9693273593328943, + "grad_norm": 638.8070068359375, + "learning_rate": 4.939107968743917e-08, + "loss": 16.9893, + "step": 479850 + }, + { + "epoch": 0.9693475599655781, + "grad_norm": 152.32827758789062, + "learning_rate": 4.9342149218191694e-08, + "loss": 11.6009, + "step": 479860 + }, + { + "epoch": 0.9693677605982619, + "grad_norm": 240.5481719970703, + "learning_rate": 4.9293242877850866e-08, + "loss": 14.4416, + "step": 479870 + }, + { + "epoch": 0.9693879612309457, + "grad_norm": 169.4164276123047, + "learning_rate": 4.9244360666653724e-08, + "loss": 37.0584, + "step": 479880 + }, + { + "epoch": 0.9694081618636295, + "grad_norm": 467.1522216796875, + "learning_rate": 4.9195502584839516e-08, + "loss": 30.0674, + "step": 479890 + }, + { + "epoch": 0.9694283624963134, + "grad_norm": 606.3841552734375, + "learning_rate": 4.914666863264528e-08, + "loss": 13.857, + "step": 479900 + }, + { + "epoch": 0.9694485631289972, + "grad_norm": 677.8084106445312, + "learning_rate": 4.9097858810310815e-08, + "loss": 20.1473, + "step": 479910 + }, + { + "epoch": 0.969468763761681, + "grad_norm": 207.4750518798828, + "learning_rate": 4.9049073118072057e-08, + "loss": 24.7366, + "step": 479920 + }, + { + "epoch": 0.9694889643943648, + "grad_norm": 444.7561950683594, + "learning_rate": 4.900031155616769e-08, + "loss": 21.5432, + "step": 479930 + }, + { + "epoch": 0.9695091650270486, + "grad_norm": 69.67774963378906, + "learning_rate": 4.8951574124835865e-08, + "loss": 16.5166, + "step": 479940 + }, + { + "epoch": 0.9695293656597325, + "grad_norm": 270.2462463378906, + "learning_rate": 4.890286082431306e-08, + "loss": 32.815, + "step": 479950 + }, + { + "epoch": 0.9695495662924163, + "grad_norm": 338.75665283203125, + "learning_rate": 4.885417165483741e-08, + "loss": 17.1081, + "step": 479960 + }, + { + "epoch": 0.9695697669251001, + "grad_norm": 434.7626037597656, + "learning_rate": 4.880550661664541e-08, + "loss": 21.953, + "step": 479970 + }, + { + "epoch": 0.9695899675577839, + "grad_norm": 429.6672058105469, + "learning_rate": 4.8756865709976284e-08, + "loss": 15.7475, + "step": 479980 + }, + { + "epoch": 0.9696101681904677, + "grad_norm": 552.9013061523438, + "learning_rate": 4.8708248935064315e-08, + "loss": 23.5726, + "step": 479990 + }, + { + "epoch": 0.9696303688231516, + "grad_norm": 270.3870849609375, + "learning_rate": 4.865965629214819e-08, + "loss": 14.1504, + "step": 480000 + }, + { + "epoch": 0.9696505694558354, + "grad_norm": 93.67717742919922, + "learning_rate": 4.861108778146495e-08, + "loss": 12.9809, + "step": 480010 + }, + { + "epoch": 0.9696707700885192, + "grad_norm": 1092.2213134765625, + "learning_rate": 4.856254340325051e-08, + "loss": 17.7695, + "step": 480020 + }, + { + "epoch": 0.969690970721203, + "grad_norm": 579.4368896484375, + "learning_rate": 4.851402315774134e-08, + "loss": 18.8982, + "step": 480030 + }, + { + "epoch": 0.9697111713538868, + "grad_norm": 327.2518005371094, + "learning_rate": 4.846552704517449e-08, + "loss": 19.1692, + "step": 480040 + }, + { + "epoch": 0.9697313719865707, + "grad_norm": 140.6658172607422, + "learning_rate": 4.841705506578587e-08, + "loss": 10.5863, + "step": 480050 + }, + { + "epoch": 0.9697515726192545, + "grad_norm": 232.82550048828125, + "learning_rate": 4.836860721981196e-08, + "loss": 16.9834, + "step": 480060 + }, + { + "epoch": 0.9697717732519382, + "grad_norm": 67.43278503417969, + "learning_rate": 4.8320183507489236e-08, + "loss": 12.7466, + "step": 480070 + }, + { + "epoch": 0.969791973884622, + "grad_norm": 478.03912353515625, + "learning_rate": 4.827178392905307e-08, + "loss": 18.2566, + "step": 480080 + }, + { + "epoch": 0.9698121745173058, + "grad_norm": 125.35701751708984, + "learning_rate": 4.822340848473994e-08, + "loss": 25.7218, + "step": 480090 + }, + { + "epoch": 0.9698323751499897, + "grad_norm": 123.36390686035156, + "learning_rate": 4.8175057174785766e-08, + "loss": 14.2658, + "step": 480100 + }, + { + "epoch": 0.9698525757826735, + "grad_norm": 257.4310607910156, + "learning_rate": 4.81267299994248e-08, + "loss": 7.4554, + "step": 480110 + }, + { + "epoch": 0.9698727764153573, + "grad_norm": 543.759033203125, + "learning_rate": 4.807842695889409e-08, + "loss": 22.3655, + "step": 480120 + }, + { + "epoch": 0.9698929770480411, + "grad_norm": 426.5550842285156, + "learning_rate": 4.8030148053428424e-08, + "loss": 17.6665, + "step": 480130 + }, + { + "epoch": 0.9699131776807249, + "grad_norm": 199.07061767578125, + "learning_rate": 4.798189328326319e-08, + "loss": 17.6431, + "step": 480140 + }, + { + "epoch": 0.9699333783134088, + "grad_norm": 161.0851287841797, + "learning_rate": 4.793366264863375e-08, + "loss": 16.4152, + "step": 480150 + }, + { + "epoch": 0.9699535789460926, + "grad_norm": 404.6234436035156, + "learning_rate": 4.788545614977491e-08, + "loss": 16.7375, + "step": 480160 + }, + { + "epoch": 0.9699737795787764, + "grad_norm": 1150.251708984375, + "learning_rate": 4.783727378692205e-08, + "loss": 21.3241, + "step": 480170 + }, + { + "epoch": 0.9699939802114602, + "grad_norm": 497.9523620605469, + "learning_rate": 4.778911556030885e-08, + "loss": 15.1136, + "step": 480180 + }, + { + "epoch": 0.970014180844144, + "grad_norm": 442.69622802734375, + "learning_rate": 4.774098147017181e-08, + "loss": 30.2108, + "step": 480190 + }, + { + "epoch": 0.9700343814768279, + "grad_norm": 169.4849090576172, + "learning_rate": 4.769287151674407e-08, + "loss": 34.2275, + "step": 480200 + }, + { + "epoch": 0.9700545821095117, + "grad_norm": 871.2241821289062, + "learning_rate": 4.764478570026043e-08, + "loss": 19.6836, + "step": 480210 + }, + { + "epoch": 0.9700747827421955, + "grad_norm": 239.61862182617188, + "learning_rate": 4.759672402095572e-08, + "loss": 15.2729, + "step": 480220 + }, + { + "epoch": 0.9700949833748793, + "grad_norm": 132.29234313964844, + "learning_rate": 4.754868647906419e-08, + "loss": 20.5158, + "step": 480230 + }, + { + "epoch": 0.9701151840075631, + "grad_norm": 95.70121765136719, + "learning_rate": 4.750067307481954e-08, + "loss": 14.4823, + "step": 480240 + }, + { + "epoch": 0.970135384640247, + "grad_norm": 423.11871337890625, + "learning_rate": 4.7452683808456026e-08, + "loss": 15.589, + "step": 480250 + }, + { + "epoch": 0.9701555852729308, + "grad_norm": 675.8088989257812, + "learning_rate": 4.740471868020735e-08, + "loss": 16.2484, + "step": 480260 + }, + { + "epoch": 0.9701757859056146, + "grad_norm": 734.3201904296875, + "learning_rate": 4.735677769030722e-08, + "loss": 32.3499, + "step": 480270 + }, + { + "epoch": 0.9701959865382984, + "grad_norm": 365.7787780761719, + "learning_rate": 4.730886083898989e-08, + "loss": 22.4048, + "step": 480280 + }, + { + "epoch": 0.9702161871709822, + "grad_norm": 247.57118225097656, + "learning_rate": 4.726096812648795e-08, + "loss": 19.5636, + "step": 480290 + }, + { + "epoch": 0.9702363878036661, + "grad_norm": 520.8023071289062, + "learning_rate": 4.7213099553035655e-08, + "loss": 13.0253, + "step": 480300 + }, + { + "epoch": 0.9702565884363499, + "grad_norm": 343.294189453125, + "learning_rate": 4.716525511886616e-08, + "loss": 29.1592, + "step": 480310 + }, + { + "epoch": 0.9702767890690337, + "grad_norm": 219.90328979492188, + "learning_rate": 4.711743482421205e-08, + "loss": 22.095, + "step": 480320 + }, + { + "epoch": 0.9702969897017174, + "grad_norm": 176.9662628173828, + "learning_rate": 4.7069638669307026e-08, + "loss": 16.2117, + "step": 480330 + }, + { + "epoch": 0.9703171903344012, + "grad_norm": 482.5318603515625, + "learning_rate": 4.702186665438424e-08, + "loss": 15.1355, + "step": 480340 + }, + { + "epoch": 0.970337390967085, + "grad_norm": 220.77462768554688, + "learning_rate": 4.697411877967573e-08, + "loss": 22.2234, + "step": 480350 + }, + { + "epoch": 0.9703575915997689, + "grad_norm": 50.13962173461914, + "learning_rate": 4.692639504541518e-08, + "loss": 7.2214, + "step": 480360 + }, + { + "epoch": 0.9703777922324527, + "grad_norm": 331.6729736328125, + "learning_rate": 4.68786954518341e-08, + "loss": 11.8504, + "step": 480370 + }, + { + "epoch": 0.9703979928651365, + "grad_norm": 470.2253112792969, + "learning_rate": 4.683101999916562e-08, + "loss": 6.9637, + "step": 480380 + }, + { + "epoch": 0.9704181934978203, + "grad_norm": 112.66553497314453, + "learning_rate": 4.6783368687642325e-08, + "loss": 11.6855, + "step": 480390 + }, + { + "epoch": 0.9704383941305041, + "grad_norm": 532.448486328125, + "learning_rate": 4.6735741517495715e-08, + "loss": 25.7508, + "step": 480400 + }, + { + "epoch": 0.970458594763188, + "grad_norm": 284.78729248046875, + "learning_rate": 4.668813848895837e-08, + "loss": 11.7805, + "step": 480410 + }, + { + "epoch": 0.9704787953958718, + "grad_norm": 195.6524200439453, + "learning_rate": 4.6640559602262325e-08, + "loss": 14.5171, + "step": 480420 + }, + { + "epoch": 0.9704989960285556, + "grad_norm": 151.35105895996094, + "learning_rate": 4.6593004857639627e-08, + "loss": 7.1785, + "step": 480430 + }, + { + "epoch": 0.9705191966612394, + "grad_norm": 350.37542724609375, + "learning_rate": 4.654547425532119e-08, + "loss": 13.3357, + "step": 480440 + }, + { + "epoch": 0.9705393972939232, + "grad_norm": 365.9966125488281, + "learning_rate": 4.649796779554016e-08, + "loss": 15.9065, + "step": 480450 + }, + { + "epoch": 0.9705595979266071, + "grad_norm": 213.68614196777344, + "learning_rate": 4.645048547852693e-08, + "loss": 17.5773, + "step": 480460 + }, + { + "epoch": 0.9705797985592909, + "grad_norm": 346.04425048828125, + "learning_rate": 4.6403027304513513e-08, + "loss": 9.6109, + "step": 480470 + }, + { + "epoch": 0.9705999991919747, + "grad_norm": 422.04339599609375, + "learning_rate": 4.635559327373029e-08, + "loss": 14.3172, + "step": 480480 + }, + { + "epoch": 0.9706201998246585, + "grad_norm": 148.32606506347656, + "learning_rate": 4.6308183386409855e-08, + "loss": 23.7274, + "step": 480490 + }, + { + "epoch": 0.9706404004573423, + "grad_norm": 87.15348815917969, + "learning_rate": 4.626079764278202e-08, + "loss": 15.3824, + "step": 480500 + }, + { + "epoch": 0.9706606010900262, + "grad_norm": 780.4072875976562, + "learning_rate": 4.621343604307826e-08, + "loss": 32.2571, + "step": 480510 + }, + { + "epoch": 0.97068080172271, + "grad_norm": 604.56298828125, + "learning_rate": 4.616609858753007e-08, + "loss": 16.4787, + "step": 480520 + }, + { + "epoch": 0.9707010023553938, + "grad_norm": 308.4506530761719, + "learning_rate": 4.6118785276366706e-08, + "loss": 19.7474, + "step": 480530 + }, + { + "epoch": 0.9707212029880776, + "grad_norm": 34.33176040649414, + "learning_rate": 4.6071496109819643e-08, + "loss": 16.4853, + "step": 480540 + }, + { + "epoch": 0.9707414036207614, + "grad_norm": 516.2884521484375, + "learning_rate": 4.6024231088119266e-08, + "loss": 20.3538, + "step": 480550 + }, + { + "epoch": 0.9707616042534453, + "grad_norm": 333.0625305175781, + "learning_rate": 4.597699021149649e-08, + "loss": 19.302, + "step": 480560 + }, + { + "epoch": 0.9707818048861291, + "grad_norm": 81.12806701660156, + "learning_rate": 4.592977348018002e-08, + "loss": 14.8552, + "step": 480570 + }, + { + "epoch": 0.9708020055188128, + "grad_norm": 467.4275817871094, + "learning_rate": 4.588258089440134e-08, + "loss": 11.0226, + "step": 480580 + }, + { + "epoch": 0.9708222061514966, + "grad_norm": 438.2637023925781, + "learning_rate": 4.5835412454390823e-08, + "loss": 16.0897, + "step": 480590 + }, + { + "epoch": 0.9708424067841804, + "grad_norm": 253.96768188476562, + "learning_rate": 4.578826816037718e-08, + "loss": 27.9838, + "step": 480600 + }, + { + "epoch": 0.9708626074168643, + "grad_norm": 300.85296630859375, + "learning_rate": 4.574114801259022e-08, + "loss": 13.6882, + "step": 480610 + }, + { + "epoch": 0.9708828080495481, + "grad_norm": 462.7916259765625, + "learning_rate": 4.569405201126087e-08, + "loss": 32.8059, + "step": 480620 + }, + { + "epoch": 0.9709030086822319, + "grad_norm": 599.6968383789062, + "learning_rate": 4.5646980156617284e-08, + "loss": 14.333, + "step": 480630 + }, + { + "epoch": 0.9709232093149157, + "grad_norm": 571.0783081054688, + "learning_rate": 4.5599932448889276e-08, + "loss": 27.9993, + "step": 480640 + }, + { + "epoch": 0.9709434099475995, + "grad_norm": 412.3912658691406, + "learning_rate": 4.5552908888306654e-08, + "loss": 13.605, + "step": 480650 + }, + { + "epoch": 0.9709636105802834, + "grad_norm": 659.2654418945312, + "learning_rate": 4.5505909475098144e-08, + "loss": 21.4791, + "step": 480660 + }, + { + "epoch": 0.9709838112129672, + "grad_norm": 343.37347412109375, + "learning_rate": 4.545893420949299e-08, + "loss": 17.2477, + "step": 480670 + }, + { + "epoch": 0.971004011845651, + "grad_norm": 263.5906982421875, + "learning_rate": 4.5411983091719905e-08, + "loss": 10.5555, + "step": 480680 + }, + { + "epoch": 0.9710242124783348, + "grad_norm": 128.11795043945312, + "learning_rate": 4.5365056122007586e-08, + "loss": 23.7357, + "step": 480690 + }, + { + "epoch": 0.9710444131110186, + "grad_norm": 292.9198913574219, + "learning_rate": 4.531815330058586e-08, + "loss": 20.5026, + "step": 480700 + }, + { + "epoch": 0.9710646137437025, + "grad_norm": 434.9056091308594, + "learning_rate": 4.527127462768233e-08, + "loss": 17.0049, + "step": 480710 + }, + { + "epoch": 0.9710848143763863, + "grad_norm": 501.1333923339844, + "learning_rate": 4.5224420103525125e-08, + "loss": 18.9389, + "step": 480720 + }, + { + "epoch": 0.9711050150090701, + "grad_norm": 255.53147888183594, + "learning_rate": 4.517758972834352e-08, + "loss": 18.6333, + "step": 480730 + }, + { + "epoch": 0.9711252156417539, + "grad_norm": 63.87846755981445, + "learning_rate": 4.5130783502365106e-08, + "loss": 12.9465, + "step": 480740 + }, + { + "epoch": 0.9711454162744377, + "grad_norm": 277.8436279296875, + "learning_rate": 4.508400142581859e-08, + "loss": 15.9341, + "step": 480750 + }, + { + "epoch": 0.9711656169071216, + "grad_norm": 372.5913391113281, + "learning_rate": 4.503724349893157e-08, + "loss": 22.5148, + "step": 480760 + }, + { + "epoch": 0.9711858175398054, + "grad_norm": 550.0427856445312, + "learning_rate": 4.49905097219322e-08, + "loss": 22.9235, + "step": 480770 + }, + { + "epoch": 0.9712060181724892, + "grad_norm": 700.8040161132812, + "learning_rate": 4.4943800095048615e-08, + "loss": 19.4991, + "step": 480780 + }, + { + "epoch": 0.971226218805173, + "grad_norm": 490.8512878417969, + "learning_rate": 4.4897114618506765e-08, + "loss": 19.7064, + "step": 480790 + }, + { + "epoch": 0.9712464194378568, + "grad_norm": 342.76123046875, + "learning_rate": 4.485045329253646e-08, + "loss": 17.5235, + "step": 480800 + }, + { + "epoch": 0.9712666200705407, + "grad_norm": 430.11834716796875, + "learning_rate": 4.480381611736362e-08, + "loss": 14.5157, + "step": 480810 + }, + { + "epoch": 0.9712868207032245, + "grad_norm": 151.7615966796875, + "learning_rate": 4.4757203093215854e-08, + "loss": 17.9508, + "step": 480820 + }, + { + "epoch": 0.9713070213359083, + "grad_norm": 129.25218200683594, + "learning_rate": 4.4710614220320746e-08, + "loss": 9.7335, + "step": 480830 + }, + { + "epoch": 0.971327221968592, + "grad_norm": 260.68408203125, + "learning_rate": 4.4664049498904796e-08, + "loss": 12.1655, + "step": 480840 + }, + { + "epoch": 0.9713474226012758, + "grad_norm": 498.87469482421875, + "learning_rate": 4.4617508929195585e-08, + "loss": 14.879, + "step": 480850 + }, + { + "epoch": 0.9713676232339596, + "grad_norm": 176.5625762939453, + "learning_rate": 4.457099251141961e-08, + "loss": 7.3391, + "step": 480860 + }, + { + "epoch": 0.9713878238666435, + "grad_norm": 308.27056884765625, + "learning_rate": 4.4524500245803346e-08, + "loss": 15.3614, + "step": 480870 + }, + { + "epoch": 0.9714080244993273, + "grad_norm": 322.5669250488281, + "learning_rate": 4.4478032132573845e-08, + "loss": 18.5765, + "step": 480880 + }, + { + "epoch": 0.9714282251320111, + "grad_norm": 428.40423583984375, + "learning_rate": 4.443158817195703e-08, + "loss": 37.4654, + "step": 480890 + }, + { + "epoch": 0.9714484257646949, + "grad_norm": 14.182168006896973, + "learning_rate": 4.438516836417994e-08, + "loss": 19.9014, + "step": 480900 + }, + { + "epoch": 0.9714686263973787, + "grad_norm": 493.1097717285156, + "learning_rate": 4.4338772709468514e-08, + "loss": 14.3817, + "step": 480910 + }, + { + "epoch": 0.9714888270300626, + "grad_norm": 235.4907684326172, + "learning_rate": 4.429240120804923e-08, + "loss": 31.6696, + "step": 480920 + }, + { + "epoch": 0.9715090276627464, + "grad_norm": 473.29937744140625, + "learning_rate": 4.424605386014691e-08, + "loss": 26.5514, + "step": 480930 + }, + { + "epoch": 0.9715292282954302, + "grad_norm": 496.5303955078125, + "learning_rate": 4.4199730665988594e-08, + "loss": 15.9515, + "step": 480940 + }, + { + "epoch": 0.971549428928114, + "grad_norm": 189.34280395507812, + "learning_rate": 4.415343162580022e-08, + "loss": 11.0705, + "step": 480950 + }, + { + "epoch": 0.9715696295607978, + "grad_norm": 182.630126953125, + "learning_rate": 4.4107156739806037e-08, + "loss": 17.192, + "step": 480960 + }, + { + "epoch": 0.9715898301934817, + "grad_norm": 19.358842849731445, + "learning_rate": 4.40609060082331e-08, + "loss": 11.0745, + "step": 480970 + }, + { + "epoch": 0.9716100308261655, + "grad_norm": 249.55409240722656, + "learning_rate": 4.401467943130622e-08, + "loss": 13.5404, + "step": 480980 + }, + { + "epoch": 0.9716302314588493, + "grad_norm": 409.1258544921875, + "learning_rate": 4.3968477009250775e-08, + "loss": 19.5579, + "step": 480990 + }, + { + "epoch": 0.9716504320915331, + "grad_norm": 214.1861114501953, + "learning_rate": 4.392229874229159e-08, + "loss": 17.2364, + "step": 481000 + }, + { + "epoch": 0.9716706327242169, + "grad_norm": 208.24266052246094, + "learning_rate": 4.387614463065404e-08, + "loss": 26.5228, + "step": 481010 + }, + { + "epoch": 0.9716908333569008, + "grad_norm": 916.2860107421875, + "learning_rate": 4.383001467456294e-08, + "loss": 21.4474, + "step": 481020 + }, + { + "epoch": 0.9717110339895846, + "grad_norm": 1262.3741455078125, + "learning_rate": 4.378390887424366e-08, + "loss": 20.0141, + "step": 481030 + }, + { + "epoch": 0.9717312346222684, + "grad_norm": 484.7181701660156, + "learning_rate": 4.3737827229919926e-08, + "loss": 24.9658, + "step": 481040 + }, + { + "epoch": 0.9717514352549522, + "grad_norm": 199.1392822265625, + "learning_rate": 4.36917697418171e-08, + "loss": 13.892, + "step": 481050 + }, + { + "epoch": 0.971771635887636, + "grad_norm": 56.355411529541016, + "learning_rate": 4.364573641016001e-08, + "loss": 16.0134, + "step": 481060 + }, + { + "epoch": 0.9717918365203199, + "grad_norm": 294.04638671875, + "learning_rate": 4.359972723517236e-08, + "loss": 25.3006, + "step": 481070 + }, + { + "epoch": 0.9718120371530037, + "grad_norm": 230.2291259765625, + "learning_rate": 4.3553742217077866e-08, + "loss": 15.2554, + "step": 481080 + }, + { + "epoch": 0.9718322377856875, + "grad_norm": 100.92296600341797, + "learning_rate": 4.350778135610134e-08, + "loss": 11.8245, + "step": 481090 + }, + { + "epoch": 0.9718524384183712, + "grad_norm": 3.7475879192352295, + "learning_rate": 4.346184465246761e-08, + "loss": 15.6748, + "step": 481100 + }, + { + "epoch": 0.971872639051055, + "grad_norm": 409.6090087890625, + "learning_rate": 4.3415932106398715e-08, + "loss": 20.7157, + "step": 481110 + }, + { + "epoch": 0.9718928396837389, + "grad_norm": 396.5038757324219, + "learning_rate": 4.3370043718119484e-08, + "loss": 14.2519, + "step": 481120 + }, + { + "epoch": 0.9719130403164227, + "grad_norm": 455.0494689941406, + "learning_rate": 4.332417948785417e-08, + "loss": 20.1041, + "step": 481130 + }, + { + "epoch": 0.9719332409491065, + "grad_norm": 298.5553283691406, + "learning_rate": 4.327833941582538e-08, + "loss": 19.1641, + "step": 481140 + }, + { + "epoch": 0.9719534415817903, + "grad_norm": 259.3094177246094, + "learning_rate": 4.3232523502256264e-08, + "loss": 30.4921, + "step": 481150 + }, + { + "epoch": 0.9719736422144741, + "grad_norm": 36.026588439941406, + "learning_rate": 4.318673174737109e-08, + "loss": 8.7467, + "step": 481160 + }, + { + "epoch": 0.971993842847158, + "grad_norm": 358.4710693359375, + "learning_rate": 4.3140964151393015e-08, + "loss": 9.7279, + "step": 481170 + }, + { + "epoch": 0.9720140434798418, + "grad_norm": 439.7259216308594, + "learning_rate": 4.3095220714544084e-08, + "loss": 13.9889, + "step": 481180 + }, + { + "epoch": 0.9720342441125256, + "grad_norm": 202.249755859375, + "learning_rate": 4.304950143704745e-08, + "loss": 32.0029, + "step": 481190 + }, + { + "epoch": 0.9720544447452094, + "grad_norm": 282.92864990234375, + "learning_rate": 4.3003806319127376e-08, + "loss": 13.5526, + "step": 481200 + }, + { + "epoch": 0.9720746453778932, + "grad_norm": 222.46804809570312, + "learning_rate": 4.2958135361004794e-08, + "loss": 14.219, + "step": 481210 + }, + { + "epoch": 0.972094846010577, + "grad_norm": 247.01966857910156, + "learning_rate": 4.291248856290342e-08, + "loss": 22.9882, + "step": 481220 + }, + { + "epoch": 0.9721150466432609, + "grad_norm": 363.4715576171875, + "learning_rate": 4.28668659250453e-08, + "loss": 19.1361, + "step": 481230 + }, + { + "epoch": 0.9721352472759447, + "grad_norm": 279.6907043457031, + "learning_rate": 4.282126744765247e-08, + "loss": 18.316, + "step": 481240 + }, + { + "epoch": 0.9721554479086285, + "grad_norm": 236.50823974609375, + "learning_rate": 4.2775693130948094e-08, + "loss": 33.653, + "step": 481250 + }, + { + "epoch": 0.9721756485413123, + "grad_norm": 0.12165041267871857, + "learning_rate": 4.2730142975153654e-08, + "loss": 23.2239, + "step": 481260 + }, + { + "epoch": 0.9721958491739962, + "grad_norm": 269.12451171875, + "learning_rate": 4.26846169804912e-08, + "loss": 7.2478, + "step": 481270 + }, + { + "epoch": 0.97221604980668, + "grad_norm": 169.82176208496094, + "learning_rate": 4.263911514718222e-08, + "loss": 23.8434, + "step": 481280 + }, + { + "epoch": 0.9722362504393638, + "grad_norm": 1032.0400390625, + "learning_rate": 4.259363747544931e-08, + "loss": 26.5229, + "step": 481290 + }, + { + "epoch": 0.9722564510720476, + "grad_norm": 326.4361267089844, + "learning_rate": 4.2548183965513415e-08, + "loss": 19.0609, + "step": 481300 + }, + { + "epoch": 0.9722766517047314, + "grad_norm": 200.75393676757812, + "learning_rate": 4.250275461759712e-08, + "loss": 22.7418, + "step": 481310 + }, + { + "epoch": 0.9722968523374153, + "grad_norm": 357.01751708984375, + "learning_rate": 4.245734943192081e-08, + "loss": 15.4631, + "step": 481320 + }, + { + "epoch": 0.9723170529700991, + "grad_norm": 334.8346862792969, + "learning_rate": 4.241196840870598e-08, + "loss": 16.0759, + "step": 481330 + }, + { + "epoch": 0.9723372536027829, + "grad_norm": 252.5950469970703, + "learning_rate": 4.236661154817412e-08, + "loss": 5.5812, + "step": 481340 + }, + { + "epoch": 0.9723574542354666, + "grad_norm": 310.70068359375, + "learning_rate": 4.23212788505456e-08, + "loss": 17.8986, + "step": 481350 + }, + { + "epoch": 0.9723776548681504, + "grad_norm": 362.728515625, + "learning_rate": 4.227597031604247e-08, + "loss": 13.8651, + "step": 481360 + }, + { + "epoch": 0.9723978555008342, + "grad_norm": 718.6262817382812, + "learning_rate": 4.2230685944884554e-08, + "loss": 31.8578, + "step": 481370 + }, + { + "epoch": 0.9724180561335181, + "grad_norm": 399.081787109375, + "learning_rate": 4.218542573729334e-08, + "loss": 20.2966, + "step": 481380 + }, + { + "epoch": 0.9724382567662019, + "grad_norm": 261.89031982421875, + "learning_rate": 4.2140189693488654e-08, + "loss": 30.2808, + "step": 481390 + }, + { + "epoch": 0.9724584573988857, + "grad_norm": 309.1327209472656, + "learning_rate": 4.209497781369143e-08, + "loss": 7.3983, + "step": 481400 + }, + { + "epoch": 0.9724786580315695, + "grad_norm": 325.05633544921875, + "learning_rate": 4.20497900981226e-08, + "loss": 17.7839, + "step": 481410 + }, + { + "epoch": 0.9724988586642533, + "grad_norm": 60.344329833984375, + "learning_rate": 4.2004626547000885e-08, + "loss": 11.7354, + "step": 481420 + }, + { + "epoch": 0.9725190592969372, + "grad_norm": 437.69622802734375, + "learning_rate": 4.195948716054776e-08, + "loss": 23.6131, + "step": 481430 + }, + { + "epoch": 0.972539259929621, + "grad_norm": 432.4437561035156, + "learning_rate": 4.191437193898251e-08, + "loss": 26.2536, + "step": 481440 + }, + { + "epoch": 0.9725594605623048, + "grad_norm": 393.9098205566406, + "learning_rate": 4.1869280882525506e-08, + "loss": 33.5293, + "step": 481450 + }, + { + "epoch": 0.9725796611949886, + "grad_norm": 126.11160278320312, + "learning_rate": 4.1824213991396024e-08, + "loss": 13.8136, + "step": 481460 + }, + { + "epoch": 0.9725998618276724, + "grad_norm": 60.79416275024414, + "learning_rate": 4.1779171265814435e-08, + "loss": 10.5208, + "step": 481470 + }, + { + "epoch": 0.9726200624603563, + "grad_norm": 189.42037963867188, + "learning_rate": 4.173415270599945e-08, + "loss": 21.2066, + "step": 481480 + }, + { + "epoch": 0.9726402630930401, + "grad_norm": 373.3421630859375, + "learning_rate": 4.168915831217091e-08, + "loss": 14.9442, + "step": 481490 + }, + { + "epoch": 0.9726604637257239, + "grad_norm": 282.920166015625, + "learning_rate": 4.164418808454806e-08, + "loss": 16.3627, + "step": 481500 + }, + { + "epoch": 0.9726806643584077, + "grad_norm": 251.2848358154297, + "learning_rate": 4.159924202334964e-08, + "loss": 21.8468, + "step": 481510 + }, + { + "epoch": 0.9727008649910915, + "grad_norm": 81.56550598144531, + "learning_rate": 4.1554320128795455e-08, + "loss": 13.4286, + "step": 481520 + }, + { + "epoch": 0.9727210656237754, + "grad_norm": 390.93798828125, + "learning_rate": 4.150942240110478e-08, + "loss": 13.1788, + "step": 481530 + }, + { + "epoch": 0.9727412662564592, + "grad_norm": 456.1376953125, + "learning_rate": 4.146454884049467e-08, + "loss": 22.1921, + "step": 481540 + }, + { + "epoch": 0.972761466889143, + "grad_norm": 565.8245849609375, + "learning_rate": 4.1419699447186045e-08, + "loss": 55.9171, + "step": 481550 + }, + { + "epoch": 0.9727816675218268, + "grad_norm": 669.5379028320312, + "learning_rate": 4.137487422139541e-08, + "loss": 24.39, + "step": 481560 + }, + { + "epoch": 0.9728018681545106, + "grad_norm": 78.6161117553711, + "learning_rate": 4.133007316334259e-08, + "loss": 13.201, + "step": 481570 + }, + { + "epoch": 0.9728220687871945, + "grad_norm": 78.4344711303711, + "learning_rate": 4.128529627324573e-08, + "loss": 20.3774, + "step": 481580 + }, + { + "epoch": 0.9728422694198783, + "grad_norm": 380.95587158203125, + "learning_rate": 4.124054355132301e-08, + "loss": 12.9402, + "step": 481590 + }, + { + "epoch": 0.9728624700525621, + "grad_norm": 485.6889343261719, + "learning_rate": 4.1195814997792014e-08, + "loss": 11.6911, + "step": 481600 + }, + { + "epoch": 0.9728826706852458, + "grad_norm": 471.99139404296875, + "learning_rate": 4.1151110612872023e-08, + "loss": 18.3957, + "step": 481610 + }, + { + "epoch": 0.9729028713179296, + "grad_norm": 628.6807861328125, + "learning_rate": 4.1106430396778974e-08, + "loss": 30.9745, + "step": 481620 + }, + { + "epoch": 0.9729230719506134, + "grad_norm": 203.78421020507812, + "learning_rate": 4.1061774349732686e-08, + "loss": 12.6072, + "step": 481630 + }, + { + "epoch": 0.9729432725832973, + "grad_norm": 325.5924987792969, + "learning_rate": 4.10171424719491e-08, + "loss": 25.4391, + "step": 481640 + }, + { + "epoch": 0.9729634732159811, + "grad_norm": 293.34912109375, + "learning_rate": 4.097253476364693e-08, + "loss": 35.9362, + "step": 481650 + }, + { + "epoch": 0.9729836738486649, + "grad_norm": 170.9857177734375, + "learning_rate": 4.092795122504323e-08, + "loss": 17.5913, + "step": 481660 + }, + { + "epoch": 0.9730038744813487, + "grad_norm": 682.1256103515625, + "learning_rate": 4.088339185635504e-08, + "loss": 13.5609, + "step": 481670 + }, + { + "epoch": 0.9730240751140325, + "grad_norm": 350.8077087402344, + "learning_rate": 4.083885665779996e-08, + "loss": 23.3654, + "step": 481680 + }, + { + "epoch": 0.9730442757467164, + "grad_norm": 403.890625, + "learning_rate": 4.07943456295945e-08, + "loss": 25.1687, + "step": 481690 + }, + { + "epoch": 0.9730644763794002, + "grad_norm": 0.18735237419605255, + "learning_rate": 4.0749858771956253e-08, + "loss": 10.6021, + "step": 481700 + }, + { + "epoch": 0.973084677012084, + "grad_norm": 451.27435302734375, + "learning_rate": 4.070539608510171e-08, + "loss": 25.175, + "step": 481710 + }, + { + "epoch": 0.9731048776447678, + "grad_norm": 744.0653686523438, + "learning_rate": 4.066095756924682e-08, + "loss": 34.801, + "step": 481720 + }, + { + "epoch": 0.9731250782774516, + "grad_norm": 14.12344741821289, + "learning_rate": 4.061654322460973e-08, + "loss": 19.8248, + "step": 481730 + }, + { + "epoch": 0.9731452789101355, + "grad_norm": 167.87937927246094, + "learning_rate": 4.0572153051406383e-08, + "loss": 15.7398, + "step": 481740 + }, + { + "epoch": 0.9731654795428193, + "grad_norm": 193.6814727783203, + "learning_rate": 4.052778704985216e-08, + "loss": 25.9701, + "step": 481750 + }, + { + "epoch": 0.9731856801755031, + "grad_norm": 437.42242431640625, + "learning_rate": 4.048344522016356e-08, + "loss": 21.1084, + "step": 481760 + }, + { + "epoch": 0.9732058808081869, + "grad_norm": 628.2918090820312, + "learning_rate": 4.043912756255819e-08, + "loss": 19.3399, + "step": 481770 + }, + { + "epoch": 0.9732260814408707, + "grad_norm": 197.848876953125, + "learning_rate": 4.039483407725031e-08, + "loss": 17.7387, + "step": 481780 + }, + { + "epoch": 0.9732462820735546, + "grad_norm": 301.0892333984375, + "learning_rate": 4.035056476445698e-08, + "loss": 24.283, + "step": 481790 + }, + { + "epoch": 0.9732664827062384, + "grad_norm": 535.1521606445312, + "learning_rate": 4.030631962439302e-08, + "loss": 13.416, + "step": 481800 + }, + { + "epoch": 0.9732866833389222, + "grad_norm": 199.53504943847656, + "learning_rate": 4.026209865727493e-08, + "loss": 22.0506, + "step": 481810 + }, + { + "epoch": 0.973306883971606, + "grad_norm": 77.13323974609375, + "learning_rate": 4.0217901863317534e-08, + "loss": 12.3341, + "step": 481820 + }, + { + "epoch": 0.9733270846042898, + "grad_norm": 553.6863403320312, + "learning_rate": 4.017372924273621e-08, + "loss": 25.0607, + "step": 481830 + }, + { + "epoch": 0.9733472852369737, + "grad_norm": 112.62586975097656, + "learning_rate": 4.012958079574747e-08, + "loss": 17.1677, + "step": 481840 + }, + { + "epoch": 0.9733674858696575, + "grad_norm": 160.67294311523438, + "learning_rate": 4.008545652256502e-08, + "loss": 16.0821, + "step": 481850 + }, + { + "epoch": 0.9733876865023412, + "grad_norm": 88.95194244384766, + "learning_rate": 4.004135642340423e-08, + "loss": 14.0948, + "step": 481860 + }, + { + "epoch": 0.973407887135025, + "grad_norm": 199.16061401367188, + "learning_rate": 3.999728049848106e-08, + "loss": 12.0725, + "step": 481870 + }, + { + "epoch": 0.9734280877677088, + "grad_norm": 470.1341247558594, + "learning_rate": 3.995322874800922e-08, + "loss": 26.116, + "step": 481880 + }, + { + "epoch": 0.9734482884003927, + "grad_norm": 118.9264907836914, + "learning_rate": 3.9909201172203537e-08, + "loss": 18.3537, + "step": 481890 + }, + { + "epoch": 0.9734684890330765, + "grad_norm": 12.092803001403809, + "learning_rate": 3.986519777127884e-08, + "loss": 10.6664, + "step": 481900 + }, + { + "epoch": 0.9734886896657603, + "grad_norm": 42.88130187988281, + "learning_rate": 3.9821218545449956e-08, + "loss": 9.6017, + "step": 481910 + }, + { + "epoch": 0.9735088902984441, + "grad_norm": 111.88728332519531, + "learning_rate": 3.977726349493061e-08, + "loss": 16.497, + "step": 481920 + }, + { + "epoch": 0.9735290909311279, + "grad_norm": 265.8011779785156, + "learning_rate": 3.973333261993506e-08, + "loss": 30.0345, + "step": 481930 + }, + { + "epoch": 0.9735492915638118, + "grad_norm": 7.396425724029541, + "learning_rate": 3.9689425920678146e-08, + "loss": 31.8613, + "step": 481940 + }, + { + "epoch": 0.9735694921964956, + "grad_norm": 209.86404418945312, + "learning_rate": 3.964554339737303e-08, + "loss": 39.5427, + "step": 481950 + }, + { + "epoch": 0.9735896928291794, + "grad_norm": 909.5701293945312, + "learning_rate": 3.960168505023343e-08, + "loss": 30.6708, + "step": 481960 + }, + { + "epoch": 0.9736098934618632, + "grad_norm": 885.4310302734375, + "learning_rate": 3.955785087947473e-08, + "loss": 17.3833, + "step": 481970 + }, + { + "epoch": 0.973630094094547, + "grad_norm": 640.5752563476562, + "learning_rate": 3.951404088530841e-08, + "loss": 25.2004, + "step": 481980 + }, + { + "epoch": 0.9736502947272309, + "grad_norm": 345.5538635253906, + "learning_rate": 3.947025506794933e-08, + "loss": 25.903, + "step": 481990 + }, + { + "epoch": 0.9736704953599147, + "grad_norm": 286.027099609375, + "learning_rate": 3.9426493427611177e-08, + "loss": 9.783, + "step": 482000 + }, + { + "epoch": 0.9736906959925985, + "grad_norm": 334.16693115234375, + "learning_rate": 3.938275596450603e-08, + "loss": 17.6792, + "step": 482010 + }, + { + "epoch": 0.9737108966252823, + "grad_norm": 750.4974975585938, + "learning_rate": 3.933904267884758e-08, + "loss": 16.873, + "step": 482020 + }, + { + "epoch": 0.9737310972579661, + "grad_norm": 354.44659423828125, + "learning_rate": 3.929535357084957e-08, + "loss": 33.9045, + "step": 482030 + }, + { + "epoch": 0.97375129789065, + "grad_norm": 163.46725463867188, + "learning_rate": 3.925168864072348e-08, + "loss": 12.3901, + "step": 482040 + }, + { + "epoch": 0.9737714985233338, + "grad_norm": 357.58935546875, + "learning_rate": 3.9208047888683597e-08, + "loss": 8.9686, + "step": 482050 + }, + { + "epoch": 0.9737916991560176, + "grad_norm": 256.0655517578125, + "learning_rate": 3.9164431314941965e-08, + "loss": 18.6118, + "step": 482060 + }, + { + "epoch": 0.9738118997887014, + "grad_norm": 475.1147155761719, + "learning_rate": 3.912083891971119e-08, + "loss": 14.304, + "step": 482070 + }, + { + "epoch": 0.9738321004213852, + "grad_norm": 621.0291137695312, + "learning_rate": 3.907727070320389e-08, + "loss": 17.7575, + "step": 482080 + }, + { + "epoch": 0.9738523010540691, + "grad_norm": 294.4523010253906, + "learning_rate": 3.9033726665632096e-08, + "loss": 11.2396, + "step": 482090 + }, + { + "epoch": 0.9738725016867529, + "grad_norm": 306.70318603515625, + "learning_rate": 3.899020680720844e-08, + "loss": 18.0931, + "step": 482100 + }, + { + "epoch": 0.9738927023194367, + "grad_norm": 857.6680297851562, + "learning_rate": 3.894671112814441e-08, + "loss": 12.2714, + "step": 482110 + }, + { + "epoch": 0.9739129029521204, + "grad_norm": 496.441162109375, + "learning_rate": 3.8903239628652615e-08, + "loss": 12.2289, + "step": 482120 + }, + { + "epoch": 0.9739331035848042, + "grad_norm": 143.51065063476562, + "learning_rate": 3.88597923089451e-08, + "loss": 20.1973, + "step": 482130 + }, + { + "epoch": 0.973953304217488, + "grad_norm": 1033.66796875, + "learning_rate": 3.881636916923281e-08, + "loss": 20.2224, + "step": 482140 + }, + { + "epoch": 0.9739735048501719, + "grad_norm": 485.4521179199219, + "learning_rate": 3.877297020972781e-08, + "loss": 24.38, + "step": 482150 + }, + { + "epoch": 0.9739937054828557, + "grad_norm": 167.51791381835938, + "learning_rate": 3.8729595430641586e-08, + "loss": 16.1247, + "step": 482160 + }, + { + "epoch": 0.9740139061155395, + "grad_norm": 431.7303771972656, + "learning_rate": 3.868624483218619e-08, + "loss": 22.341, + "step": 482170 + }, + { + "epoch": 0.9740341067482233, + "grad_norm": 76.6075439453125, + "learning_rate": 3.864291841457146e-08, + "loss": 22.8067, + "step": 482180 + }, + { + "epoch": 0.9740543073809071, + "grad_norm": 313.4910583496094, + "learning_rate": 3.859961617801e-08, + "loss": 23.8506, + "step": 482190 + }, + { + "epoch": 0.974074508013591, + "grad_norm": 1129.052001953125, + "learning_rate": 3.855633812271165e-08, + "loss": 20.4741, + "step": 482200 + }, + { + "epoch": 0.9740947086462748, + "grad_norm": 457.9103698730469, + "learning_rate": 3.8513084248888445e-08, + "loss": 23.1593, + "step": 482210 + }, + { + "epoch": 0.9741149092789586, + "grad_norm": 453.9139404296875, + "learning_rate": 3.8469854556750785e-08, + "loss": 18.997, + "step": 482220 + }, + { + "epoch": 0.9741351099116424, + "grad_norm": 312.4790954589844, + "learning_rate": 3.842664904650906e-08, + "loss": 20.262, + "step": 482230 + }, + { + "epoch": 0.9741553105443262, + "grad_norm": 347.3577575683594, + "learning_rate": 3.83834677183742e-08, + "loss": 28.9999, + "step": 482240 + }, + { + "epoch": 0.9741755111770101, + "grad_norm": 345.5404357910156, + "learning_rate": 3.83403105725566e-08, + "loss": 12.5937, + "step": 482250 + }, + { + "epoch": 0.9741957118096939, + "grad_norm": 311.71612548828125, + "learning_rate": 3.82971776092661e-08, + "loss": 17.8435, + "step": 482260 + }, + { + "epoch": 0.9742159124423777, + "grad_norm": 484.7486267089844, + "learning_rate": 3.825406882871363e-08, + "loss": 12.1719, + "step": 482270 + }, + { + "epoch": 0.9742361130750615, + "grad_norm": 942.969970703125, + "learning_rate": 3.8210984231109583e-08, + "loss": 15.9121, + "step": 482280 + }, + { + "epoch": 0.9742563137077453, + "grad_norm": 518.7152099609375, + "learning_rate": 3.816792381666268e-08, + "loss": 13.4377, + "step": 482290 + }, + { + "epoch": 0.9742765143404292, + "grad_norm": 464.7488708496094, + "learning_rate": 3.812488758558386e-08, + "loss": 41.7322, + "step": 482300 + }, + { + "epoch": 0.974296714973113, + "grad_norm": 411.00128173828125, + "learning_rate": 3.8081875538082404e-08, + "loss": 18.5426, + "step": 482310 + }, + { + "epoch": 0.9743169156057968, + "grad_norm": 474.0198974609375, + "learning_rate": 3.8038887674368697e-08, + "loss": 17.4803, + "step": 482320 + }, + { + "epoch": 0.9743371162384806, + "grad_norm": 637.5534057617188, + "learning_rate": 3.799592399465091e-08, + "loss": 20.2909, + "step": 482330 + }, + { + "epoch": 0.9743573168711644, + "grad_norm": 397.6789245605469, + "learning_rate": 3.7952984499138864e-08, + "loss": 19.7161, + "step": 482340 + }, + { + "epoch": 0.9743775175038483, + "grad_norm": 28.488502502441406, + "learning_rate": 3.791006918804296e-08, + "loss": 16.1842, + "step": 482350 + }, + { + "epoch": 0.9743977181365321, + "grad_norm": 123.11540222167969, + "learning_rate": 3.786717806157136e-08, + "loss": 16.763, + "step": 482360 + }, + { + "epoch": 0.9744179187692159, + "grad_norm": 240.61846923828125, + "learning_rate": 3.782431111993279e-08, + "loss": 27.4726, + "step": 482370 + }, + { + "epoch": 0.9744381194018996, + "grad_norm": 424.6113586425781, + "learning_rate": 3.778146836333707e-08, + "loss": 18.4896, + "step": 482380 + }, + { + "epoch": 0.9744583200345834, + "grad_norm": 316.656494140625, + "learning_rate": 3.7738649791992934e-08, + "loss": 17.3348, + "step": 482390 + }, + { + "epoch": 0.9744785206672673, + "grad_norm": 666.3976440429688, + "learning_rate": 3.769585540610799e-08, + "loss": 24.009, + "step": 482400 + }, + { + "epoch": 0.9744987212999511, + "grad_norm": 764.1138916015625, + "learning_rate": 3.765308520589206e-08, + "loss": 63.306, + "step": 482410 + }, + { + "epoch": 0.9745189219326349, + "grad_norm": 311.10931396484375, + "learning_rate": 3.761033919155333e-08, + "loss": 17.2908, + "step": 482420 + }, + { + "epoch": 0.9745391225653187, + "grad_norm": 347.3961486816406, + "learning_rate": 3.7567617363299945e-08, + "loss": 18.8477, + "step": 482430 + }, + { + "epoch": 0.9745593231980025, + "grad_norm": 341.66497802734375, + "learning_rate": 3.7524919721339535e-08, + "loss": 16.1301, + "step": 482440 + }, + { + "epoch": 0.9745795238306864, + "grad_norm": 432.0102233886719, + "learning_rate": 3.748224626588137e-08, + "loss": 30.7898, + "step": 482450 + }, + { + "epoch": 0.9745997244633702, + "grad_norm": 666.8880615234375, + "learning_rate": 3.743959699713251e-08, + "loss": 43.5052, + "step": 482460 + }, + { + "epoch": 0.974619925096054, + "grad_norm": 560.2584838867188, + "learning_rate": 3.739697191530112e-08, + "loss": 19.5189, + "step": 482470 + }, + { + "epoch": 0.9746401257287378, + "grad_norm": 146.7061309814453, + "learning_rate": 3.735437102059536e-08, + "loss": 13.0949, + "step": 482480 + }, + { + "epoch": 0.9746603263614216, + "grad_norm": 568.6498413085938, + "learning_rate": 3.731179431322285e-08, + "loss": 23.3692, + "step": 482490 + }, + { + "epoch": 0.9746805269941055, + "grad_norm": 178.3436279296875, + "learning_rate": 3.726924179339009e-08, + "loss": 16.9201, + "step": 482500 + }, + { + "epoch": 0.9747007276267893, + "grad_norm": 83.3872299194336, + "learning_rate": 3.7226713461305245e-08, + "loss": 24.7302, + "step": 482510 + }, + { + "epoch": 0.9747209282594731, + "grad_norm": 812.8739624023438, + "learning_rate": 3.7184209317175366e-08, + "loss": 37.4827, + "step": 482520 + }, + { + "epoch": 0.9747411288921569, + "grad_norm": 431.8472595214844, + "learning_rate": 3.714172936120808e-08, + "loss": 20.2624, + "step": 482530 + }, + { + "epoch": 0.9747613295248407, + "grad_norm": 129.54502868652344, + "learning_rate": 3.7099273593609316e-08, + "loss": 14.5634, + "step": 482540 + }, + { + "epoch": 0.9747815301575246, + "grad_norm": 368.3341979980469, + "learning_rate": 3.7056842014587815e-08, + "loss": 23.7714, + "step": 482550 + }, + { + "epoch": 0.9748017307902084, + "grad_norm": 510.84796142578125, + "learning_rate": 3.701443462434895e-08, + "loss": 10.4416, + "step": 482560 + }, + { + "epoch": 0.9748219314228922, + "grad_norm": 61.4462890625, + "learning_rate": 3.697205142309923e-08, + "loss": 26.5968, + "step": 482570 + }, + { + "epoch": 0.974842132055576, + "grad_norm": 402.5279235839844, + "learning_rate": 3.692969241104683e-08, + "loss": 15.7557, + "step": 482580 + }, + { + "epoch": 0.9748623326882598, + "grad_norm": 422.69580078125, + "learning_rate": 3.688735758839601e-08, + "loss": 8.5926, + "step": 482590 + }, + { + "epoch": 0.9748825333209437, + "grad_norm": 440.9681396484375, + "learning_rate": 3.684504695535496e-08, + "loss": 19.7904, + "step": 482600 + }, + { + "epoch": 0.9749027339536275, + "grad_norm": 778.0719604492188, + "learning_rate": 3.680276051212961e-08, + "loss": 17.826, + "step": 482610 + }, + { + "epoch": 0.9749229345863113, + "grad_norm": 272.3429260253906, + "learning_rate": 3.67604982589248e-08, + "loss": 16.8534, + "step": 482620 + }, + { + "epoch": 0.974943135218995, + "grad_norm": 149.3791961669922, + "learning_rate": 3.6718260195947594e-08, + "loss": 13.4943, + "step": 482630 + }, + { + "epoch": 0.9749633358516788, + "grad_norm": 983.81640625, + "learning_rate": 3.6676046323403934e-08, + "loss": 26.2069, + "step": 482640 + }, + { + "epoch": 0.9749835364843626, + "grad_norm": 727.1124267578125, + "learning_rate": 3.663385664149866e-08, + "loss": 23.7736, + "step": 482650 + }, + { + "epoch": 0.9750037371170465, + "grad_norm": 104.9148178100586, + "learning_rate": 3.659169115043826e-08, + "loss": 17.0994, + "step": 482660 + }, + { + "epoch": 0.9750239377497303, + "grad_norm": 369.6506042480469, + "learning_rate": 3.654954985042869e-08, + "loss": 23.4421, + "step": 482670 + }, + { + "epoch": 0.9750441383824141, + "grad_norm": 291.3814697265625, + "learning_rate": 3.650743274167368e-08, + "loss": 11.2448, + "step": 482680 + }, + { + "epoch": 0.9750643390150979, + "grad_norm": 292.85687255859375, + "learning_rate": 3.6465339824379165e-08, + "loss": 18.7769, + "step": 482690 + }, + { + "epoch": 0.9750845396477817, + "grad_norm": 549.0516357421875, + "learning_rate": 3.642327109875166e-08, + "loss": 24.1786, + "step": 482700 + }, + { + "epoch": 0.9751047402804656, + "grad_norm": 442.29925537109375, + "learning_rate": 3.638122656499432e-08, + "loss": 19.9398, + "step": 482710 + }, + { + "epoch": 0.9751249409131494, + "grad_norm": 524.1804809570312, + "learning_rate": 3.633920622331311e-08, + "loss": 21.6374, + "step": 482720 + }, + { + "epoch": 0.9751451415458332, + "grad_norm": 135.19679260253906, + "learning_rate": 3.629721007391229e-08, + "loss": 26.2123, + "step": 482730 + }, + { + "epoch": 0.975165342178517, + "grad_norm": 287.5704345703125, + "learning_rate": 3.625523811699727e-08, + "loss": 15.5823, + "step": 482740 + }, + { + "epoch": 0.9751855428112008, + "grad_norm": 545.2062377929688, + "learning_rate": 3.621329035277232e-08, + "loss": 12.4626, + "step": 482750 + }, + { + "epoch": 0.9752057434438847, + "grad_norm": 8.507187843322754, + "learning_rate": 3.617136678144173e-08, + "loss": 13.9089, + "step": 482760 + }, + { + "epoch": 0.9752259440765685, + "grad_norm": 1035.490234375, + "learning_rate": 3.612946740320977e-08, + "loss": 25.8994, + "step": 482770 + }, + { + "epoch": 0.9752461447092523, + "grad_norm": 542.39306640625, + "learning_rate": 3.608759221828073e-08, + "loss": 30.5626, + "step": 482780 + }, + { + "epoch": 0.9752663453419361, + "grad_norm": 566.2247924804688, + "learning_rate": 3.604574122685833e-08, + "loss": 20.473, + "step": 482790 + }, + { + "epoch": 0.97528654597462, + "grad_norm": 730.0471801757812, + "learning_rate": 3.600391442914741e-08, + "loss": 19.1692, + "step": 482800 + }, + { + "epoch": 0.9753067466073038, + "grad_norm": 455.58203125, + "learning_rate": 3.5962111825350585e-08, + "loss": 22.6982, + "step": 482810 + }, + { + "epoch": 0.9753269472399876, + "grad_norm": 464.4102783203125, + "learning_rate": 3.592033341567325e-08, + "loss": 13.5132, + "step": 482820 + }, + { + "epoch": 0.9753471478726714, + "grad_norm": 301.8681945800781, + "learning_rate": 3.5878579200318006e-08, + "loss": 25.8108, + "step": 482830 + }, + { + "epoch": 0.9753673485053552, + "grad_norm": 403.30450439453125, + "learning_rate": 3.583684917948804e-08, + "loss": 20.1903, + "step": 482840 + }, + { + "epoch": 0.975387549138039, + "grad_norm": 253.32778930664062, + "learning_rate": 3.579514335338763e-08, + "loss": 18.7812, + "step": 482850 + }, + { + "epoch": 0.9754077497707229, + "grad_norm": 298.9661560058594, + "learning_rate": 3.575346172221939e-08, + "loss": 23.7279, + "step": 482860 + }, + { + "epoch": 0.9754279504034067, + "grad_norm": 390.77490234375, + "learning_rate": 3.5711804286187035e-08, + "loss": 15.822, + "step": 482870 + }, + { + "epoch": 0.9754481510360905, + "grad_norm": 214.9738311767578, + "learning_rate": 3.5670171045492643e-08, + "loss": 9.1737, + "step": 482880 + }, + { + "epoch": 0.9754683516687742, + "grad_norm": 1.0779601335525513, + "learning_rate": 3.5628562000339925e-08, + "loss": 12.394, + "step": 482890 + }, + { + "epoch": 0.975488552301458, + "grad_norm": 2.142943859100342, + "learning_rate": 3.558697715093207e-08, + "loss": 19.8335, + "step": 482900 + }, + { + "epoch": 0.9755087529341419, + "grad_norm": 170.5755615234375, + "learning_rate": 3.554541649747056e-08, + "loss": 21.7454, + "step": 482910 + }, + { + "epoch": 0.9755289535668257, + "grad_norm": 299.7828369140625, + "learning_rate": 3.5503880040158586e-08, + "loss": 18.7396, + "step": 482920 + }, + { + "epoch": 0.9755491541995095, + "grad_norm": 310.21197509765625, + "learning_rate": 3.546236777919876e-08, + "loss": 9.8836, + "step": 482930 + }, + { + "epoch": 0.9755693548321933, + "grad_norm": 213.90985107421875, + "learning_rate": 3.542087971479313e-08, + "loss": 11.9233, + "step": 482940 + }, + { + "epoch": 0.9755895554648771, + "grad_norm": 459.5467224121094, + "learning_rate": 3.5379415847143775e-08, + "loss": 17.2514, + "step": 482950 + }, + { + "epoch": 0.975609756097561, + "grad_norm": 460.8121032714844, + "learning_rate": 3.5337976176453845e-08, + "loss": 16.0871, + "step": 482960 + }, + { + "epoch": 0.9756299567302448, + "grad_norm": 272.2505798339844, + "learning_rate": 3.529656070292375e-08, + "loss": 15.9314, + "step": 482970 + }, + { + "epoch": 0.9756501573629286, + "grad_norm": 543.2966918945312, + "learning_rate": 3.525516942675611e-08, + "loss": 25.3451, + "step": 482980 + }, + { + "epoch": 0.9756703579956124, + "grad_norm": 185.53729248046875, + "learning_rate": 3.521380234815297e-08, + "loss": 29.2085, + "step": 482990 + }, + { + "epoch": 0.9756905586282962, + "grad_norm": 146.68104553222656, + "learning_rate": 3.517245946731529e-08, + "loss": 32.7787, + "step": 483000 + }, + { + "epoch": 0.97571075926098, + "grad_norm": 288.0614013671875, + "learning_rate": 3.513114078444513e-08, + "loss": 6.5792, + "step": 483010 + }, + { + "epoch": 0.9757309598936639, + "grad_norm": 698.1644897460938, + "learning_rate": 3.508984629974288e-08, + "loss": 22.6261, + "step": 483020 + }, + { + "epoch": 0.9757511605263477, + "grad_norm": 789.5795288085938, + "learning_rate": 3.504857601341172e-08, + "loss": 14.246, + "step": 483030 + }, + { + "epoch": 0.9757713611590315, + "grad_norm": 66.64833068847656, + "learning_rate": 3.5007329925650925e-08, + "loss": 47.8748, + "step": 483040 + }, + { + "epoch": 0.9757915617917153, + "grad_norm": 508.5932312011719, + "learning_rate": 3.4966108036662006e-08, + "loss": 9.9282, + "step": 483050 + }, + { + "epoch": 0.9758117624243992, + "grad_norm": 181.7556915283203, + "learning_rate": 3.4924910346647024e-08, + "loss": 17.4592, + "step": 483060 + }, + { + "epoch": 0.975831963057083, + "grad_norm": 498.73529052734375, + "learning_rate": 3.488373685580526e-08, + "loss": 18.9131, + "step": 483070 + }, + { + "epoch": 0.9758521636897668, + "grad_norm": 512.0242919921875, + "learning_rate": 3.4842587564337674e-08, + "loss": 10.8806, + "step": 483080 + }, + { + "epoch": 0.9758723643224506, + "grad_norm": 297.0984802246094, + "learning_rate": 3.48014624724452e-08, + "loss": 8.2831, + "step": 483090 + }, + { + "epoch": 0.9758925649551344, + "grad_norm": 2.16670298576355, + "learning_rate": 3.47603615803288e-08, + "loss": 17.6927, + "step": 483100 + }, + { + "epoch": 0.9759127655878183, + "grad_norm": 350.0305480957031, + "learning_rate": 3.471928488818776e-08, + "loss": 9.4405, + "step": 483110 + }, + { + "epoch": 0.9759329662205021, + "grad_norm": 14.403679847717285, + "learning_rate": 3.467823239622248e-08, + "loss": 17.0502, + "step": 483120 + }, + { + "epoch": 0.9759531668531859, + "grad_norm": 346.641357421875, + "learning_rate": 3.463720410463334e-08, + "loss": 26.3269, + "step": 483130 + }, + { + "epoch": 0.9759733674858696, + "grad_norm": 253.56033325195312, + "learning_rate": 3.459620001362074e-08, + "loss": 15.4785, + "step": 483140 + }, + { + "epoch": 0.9759935681185534, + "grad_norm": 450.78497314453125, + "learning_rate": 3.4555220123383416e-08, + "loss": 12.1875, + "step": 483150 + }, + { + "epoch": 0.9760137687512372, + "grad_norm": 178.17855834960938, + "learning_rate": 3.451426443412231e-08, + "loss": 8.2043, + "step": 483160 + }, + { + "epoch": 0.9760339693839211, + "grad_norm": 273.9609680175781, + "learning_rate": 3.4473332946036164e-08, + "loss": 21.7305, + "step": 483170 + }, + { + "epoch": 0.9760541700166049, + "grad_norm": 691.238525390625, + "learning_rate": 3.443242565932481e-08, + "loss": 20.2564, + "step": 483180 + }, + { + "epoch": 0.9760743706492887, + "grad_norm": 357.8240661621094, + "learning_rate": 3.439154257418753e-08, + "loss": 12.5185, + "step": 483190 + }, + { + "epoch": 0.9760945712819725, + "grad_norm": 495.1055603027344, + "learning_rate": 3.435068369082306e-08, + "loss": 37.3999, + "step": 483200 + }, + { + "epoch": 0.9761147719146563, + "grad_norm": 254.81387329101562, + "learning_rate": 3.4309849009431794e-08, + "loss": 21.3162, + "step": 483210 + }, + { + "epoch": 0.9761349725473402, + "grad_norm": 273.0685729980469, + "learning_rate": 3.4269038530211906e-08, + "loss": 19.9255, + "step": 483220 + }, + { + "epoch": 0.976155173180024, + "grad_norm": 44.90702819824219, + "learning_rate": 3.4228252253362683e-08, + "loss": 13.02, + "step": 483230 + }, + { + "epoch": 0.9761753738127078, + "grad_norm": 0.0, + "learning_rate": 3.41874901790823e-08, + "loss": 23.1527, + "step": 483240 + }, + { + "epoch": 0.9761955744453916, + "grad_norm": 520.8883056640625, + "learning_rate": 3.414675230757003e-08, + "loss": 11.8354, + "step": 483250 + }, + { + "epoch": 0.9762157750780754, + "grad_norm": 266.39990234375, + "learning_rate": 3.410603863902406e-08, + "loss": 17.9792, + "step": 483260 + }, + { + "epoch": 0.9762359757107593, + "grad_norm": 64.41669464111328, + "learning_rate": 3.406534917364257e-08, + "loss": 13.4081, + "step": 483270 + }, + { + "epoch": 0.9762561763434431, + "grad_norm": 334.1542663574219, + "learning_rate": 3.402468391162539e-08, + "loss": 18.3694, + "step": 483280 + }, + { + "epoch": 0.9762763769761269, + "grad_norm": 578.1113891601562, + "learning_rate": 3.398404285316847e-08, + "loss": 10.7203, + "step": 483290 + }, + { + "epoch": 0.9762965776088107, + "grad_norm": 284.7392272949219, + "learning_rate": 3.394342599847111e-08, + "loss": 13.1058, + "step": 483300 + }, + { + "epoch": 0.9763167782414945, + "grad_norm": 366.591796875, + "learning_rate": 3.390283334773203e-08, + "loss": 19.2009, + "step": 483310 + }, + { + "epoch": 0.9763369788741784, + "grad_norm": 321.5985107421875, + "learning_rate": 3.3862264901147745e-08, + "loss": 27.2178, + "step": 483320 + }, + { + "epoch": 0.9763571795068622, + "grad_norm": 231.1267852783203, + "learning_rate": 3.3821720658916426e-08, + "loss": 8.5236, + "step": 483330 + }, + { + "epoch": 0.976377380139546, + "grad_norm": 0.8328532576560974, + "learning_rate": 3.378120062123569e-08, + "loss": 10.9436, + "step": 483340 + }, + { + "epoch": 0.9763975807722298, + "grad_norm": 158.7162322998047, + "learning_rate": 3.374070478830316e-08, + "loss": 12.2283, + "step": 483350 + }, + { + "epoch": 0.9764177814049136, + "grad_norm": 656.27880859375, + "learning_rate": 3.3700233160315897e-08, + "loss": 14.6304, + "step": 483360 + }, + { + "epoch": 0.9764379820375975, + "grad_norm": 404.3321228027344, + "learning_rate": 3.365978573747153e-08, + "loss": 31.0528, + "step": 483370 + }, + { + "epoch": 0.9764581826702813, + "grad_norm": 1263.421630859375, + "learning_rate": 3.361936251996711e-08, + "loss": 28.5038, + "step": 483380 + }, + { + "epoch": 0.9764783833029651, + "grad_norm": 152.63633728027344, + "learning_rate": 3.357896350799916e-08, + "loss": 14.4554, + "step": 483390 + }, + { + "epoch": 0.9764985839356488, + "grad_norm": 400.4908142089844, + "learning_rate": 3.3538588701765296e-08, + "loss": 15.232, + "step": 483400 + }, + { + "epoch": 0.9765187845683326, + "grad_norm": 317.1159362792969, + "learning_rate": 3.349823810146202e-08, + "loss": 8.8856, + "step": 483410 + }, + { + "epoch": 0.9765389852010165, + "grad_norm": 62.96767044067383, + "learning_rate": 3.34579117072864e-08, + "loss": 15.2614, + "step": 483420 + }, + { + "epoch": 0.9765591858337003, + "grad_norm": 244.07969665527344, + "learning_rate": 3.341760951943385e-08, + "loss": 19.5304, + "step": 483430 + }, + { + "epoch": 0.9765793864663841, + "grad_norm": 291.3792724609375, + "learning_rate": 3.337733153810141e-08, + "loss": 21.925, + "step": 483440 + }, + { + "epoch": 0.9765995870990679, + "grad_norm": 254.8742218017578, + "learning_rate": 3.3337077763485605e-08, + "loss": 26.2369, + "step": 483450 + }, + { + "epoch": 0.9766197877317517, + "grad_norm": 504.7132568359375, + "learning_rate": 3.329684819578294e-08, + "loss": 24.7492, + "step": 483460 + }, + { + "epoch": 0.9766399883644356, + "grad_norm": 401.4547119140625, + "learning_rate": 3.3256642835188816e-08, + "loss": 14.8526, + "step": 483470 + }, + { + "epoch": 0.9766601889971194, + "grad_norm": 68.80532836914062, + "learning_rate": 3.321646168189918e-08, + "loss": 11.8834, + "step": 483480 + }, + { + "epoch": 0.9766803896298032, + "grad_norm": 419.8252868652344, + "learning_rate": 3.317630473611055e-08, + "loss": 26.1874, + "step": 483490 + }, + { + "epoch": 0.976700590262487, + "grad_norm": 19.07436180114746, + "learning_rate": 3.313617199801777e-08, + "loss": 15.3926, + "step": 483500 + }, + { + "epoch": 0.9767207908951708, + "grad_norm": 770.347900390625, + "learning_rate": 3.309606346781735e-08, + "loss": 16.3904, + "step": 483510 + }, + { + "epoch": 0.9767409915278547, + "grad_norm": 479.2562561035156, + "learning_rate": 3.305597914570413e-08, + "loss": 13.9847, + "step": 483520 + }, + { + "epoch": 0.9767611921605385, + "grad_norm": 422.8157043457031, + "learning_rate": 3.301591903187351e-08, + "loss": 11.9881, + "step": 483530 + }, + { + "epoch": 0.9767813927932223, + "grad_norm": 253.63360595703125, + "learning_rate": 3.297588312652089e-08, + "loss": 22.7496, + "step": 483540 + }, + { + "epoch": 0.9768015934259061, + "grad_norm": 200.8687286376953, + "learning_rate": 3.2935871429841116e-08, + "loss": 16.5222, + "step": 483550 + }, + { + "epoch": 0.9768217940585899, + "grad_norm": 2.260951519012451, + "learning_rate": 3.289588394203014e-08, + "loss": 5.5559, + "step": 483560 + }, + { + "epoch": 0.9768419946912738, + "grad_norm": 306.1798400878906, + "learning_rate": 3.285592066328169e-08, + "loss": 13.5368, + "step": 483570 + }, + { + "epoch": 0.9768621953239576, + "grad_norm": 683.299560546875, + "learning_rate": 3.281598159379118e-08, + "loss": 12.1139, + "step": 483580 + }, + { + "epoch": 0.9768823959566414, + "grad_norm": 286.2330322265625, + "learning_rate": 3.277606673375289e-08, + "loss": 14.0862, + "step": 483590 + }, + { + "epoch": 0.9769025965893252, + "grad_norm": 208.06178283691406, + "learning_rate": 3.2736176083362216e-08, + "loss": 19.0484, + "step": 483600 + }, + { + "epoch": 0.976922797222009, + "grad_norm": 510.49981689453125, + "learning_rate": 3.2696309642812344e-08, + "loss": 15.3424, + "step": 483610 + }, + { + "epoch": 0.9769429978546929, + "grad_norm": 101.3388671875, + "learning_rate": 3.2656467412298665e-08, + "loss": 25.7158, + "step": 483620 + }, + { + "epoch": 0.9769631984873767, + "grad_norm": 340.8699645996094, + "learning_rate": 3.261664939201436e-08, + "loss": 21.4636, + "step": 483630 + }, + { + "epoch": 0.9769833991200605, + "grad_norm": 482.3835754394531, + "learning_rate": 3.2576855582154844e-08, + "loss": 25.4405, + "step": 483640 + }, + { + "epoch": 0.9770035997527442, + "grad_norm": 162.60202026367188, + "learning_rate": 3.253708598291272e-08, + "loss": 7.1479, + "step": 483650 + }, + { + "epoch": 0.977023800385428, + "grad_norm": 299.3507080078125, + "learning_rate": 3.2497340594482284e-08, + "loss": 11.9309, + "step": 483660 + }, + { + "epoch": 0.9770440010181118, + "grad_norm": 221.28309631347656, + "learning_rate": 3.245761941705727e-08, + "loss": 12.9233, + "step": 483670 + }, + { + "epoch": 0.9770642016507957, + "grad_norm": 221.97930908203125, + "learning_rate": 3.241792245083142e-08, + "loss": 7.916, + "step": 483680 + }, + { + "epoch": 0.9770844022834795, + "grad_norm": 274.4580383300781, + "learning_rate": 3.237824969599845e-08, + "loss": 13.6841, + "step": 483690 + }, + { + "epoch": 0.9771046029161633, + "grad_norm": 267.8779602050781, + "learning_rate": 3.2338601152751e-08, + "loss": 21.27, + "step": 483700 + }, + { + "epoch": 0.9771248035488471, + "grad_norm": 591.0781860351562, + "learning_rate": 3.2298976821282804e-08, + "loss": 28.3484, + "step": 483710 + }, + { + "epoch": 0.9771450041815309, + "grad_norm": 461.17681884765625, + "learning_rate": 3.2259376701787025e-08, + "loss": 15.7276, + "step": 483720 + }, + { + "epoch": 0.9771652048142148, + "grad_norm": 117.9496078491211, + "learning_rate": 3.2219800794456304e-08, + "loss": 22.1262, + "step": 483730 + }, + { + "epoch": 0.9771854054468986, + "grad_norm": 188.90655517578125, + "learning_rate": 3.2180249099483806e-08, + "loss": 12.5037, + "step": 483740 + }, + { + "epoch": 0.9772056060795824, + "grad_norm": 659.62158203125, + "learning_rate": 3.214072161706272e-08, + "loss": 18.8168, + "step": 483750 + }, + { + "epoch": 0.9772258067122662, + "grad_norm": 335.1777648925781, + "learning_rate": 3.210121834738456e-08, + "loss": 26.0619, + "step": 483760 + }, + { + "epoch": 0.97724600734495, + "grad_norm": 50.493900299072266, + "learning_rate": 3.206173929064304e-08, + "loss": 20.8472, + "step": 483770 + }, + { + "epoch": 0.9772662079776339, + "grad_norm": 134.26231384277344, + "learning_rate": 3.20222844470297e-08, + "loss": 9.9554, + "step": 483780 + }, + { + "epoch": 0.9772864086103177, + "grad_norm": 707.6260986328125, + "learning_rate": 3.198285381673716e-08, + "loss": 31.974, + "step": 483790 + }, + { + "epoch": 0.9773066092430015, + "grad_norm": 736.2801513671875, + "learning_rate": 3.194344739995803e-08, + "loss": 26.2879, + "step": 483800 + }, + { + "epoch": 0.9773268098756853, + "grad_norm": 309.8446044921875, + "learning_rate": 3.1904065196883825e-08, + "loss": 15.8285, + "step": 483810 + }, + { + "epoch": 0.9773470105083691, + "grad_norm": 123.22837829589844, + "learning_rate": 3.1864707207706624e-08, + "loss": 6.2707, + "step": 483820 + }, + { + "epoch": 0.977367211141053, + "grad_norm": 793.5512084960938, + "learning_rate": 3.182537343261849e-08, + "loss": 20.4665, + "step": 483830 + }, + { + "epoch": 0.9773874117737368, + "grad_norm": 303.29290771484375, + "learning_rate": 3.178606387181038e-08, + "loss": 26.4614, + "step": 483840 + }, + { + "epoch": 0.9774076124064206, + "grad_norm": 375.2763366699219, + "learning_rate": 3.1746778525474916e-08, + "loss": 8.8345, + "step": 483850 + }, + { + "epoch": 0.9774278130391044, + "grad_norm": 121.67900085449219, + "learning_rate": 3.1707517393803064e-08, + "loss": 8.8945, + "step": 483860 + }, + { + "epoch": 0.9774480136717882, + "grad_norm": 783.3543701171875, + "learning_rate": 3.166828047698578e-08, + "loss": 13.7929, + "step": 483870 + }, + { + "epoch": 0.9774682143044721, + "grad_norm": 465.07061767578125, + "learning_rate": 3.1629067775214575e-08, + "loss": 23.3499, + "step": 483880 + }, + { + "epoch": 0.9774884149371559, + "grad_norm": 257.0478515625, + "learning_rate": 3.158987928868151e-08, + "loss": 13.9445, + "step": 483890 + }, + { + "epoch": 0.9775086155698397, + "grad_norm": 209.32102966308594, + "learning_rate": 3.1550715017575895e-08, + "loss": 16.5252, + "step": 483900 + }, + { + "epoch": 0.9775288162025234, + "grad_norm": 448.6198425292969, + "learning_rate": 3.151157496208979e-08, + "loss": 12.6523, + "step": 483910 + }, + { + "epoch": 0.9775490168352072, + "grad_norm": 532.5560913085938, + "learning_rate": 3.1472459122414144e-08, + "loss": 16.7201, + "step": 483920 + }, + { + "epoch": 0.977569217467891, + "grad_norm": 70.59700012207031, + "learning_rate": 3.143336749873882e-08, + "loss": 15.6916, + "step": 483930 + }, + { + "epoch": 0.9775894181005749, + "grad_norm": 213.04867553710938, + "learning_rate": 3.139430009125477e-08, + "loss": 18.7005, + "step": 483940 + }, + { + "epoch": 0.9776096187332587, + "grad_norm": 996.3571166992188, + "learning_rate": 3.135525690015184e-08, + "loss": 22.9712, + "step": 483950 + }, + { + "epoch": 0.9776298193659425, + "grad_norm": 182.0755615234375, + "learning_rate": 3.131623792562155e-08, + "loss": 15.4185, + "step": 483960 + }, + { + "epoch": 0.9776500199986263, + "grad_norm": 0.8058404326438904, + "learning_rate": 3.127724316785263e-08, + "loss": 18.1625, + "step": 483970 + }, + { + "epoch": 0.9776702206313101, + "grad_norm": 46.014915466308594, + "learning_rate": 3.1238272627035494e-08, + "loss": 20.9273, + "step": 483980 + }, + { + "epoch": 0.977690421263994, + "grad_norm": 283.42718505859375, + "learning_rate": 3.119932630336109e-08, + "loss": 32.6546, + "step": 483990 + }, + { + "epoch": 0.9777106218966778, + "grad_norm": 419.70343017578125, + "learning_rate": 3.1160404197018155e-08, + "loss": 17.8015, + "step": 484000 + }, + { + "epoch": 0.9777308225293616, + "grad_norm": 168.8333740234375, + "learning_rate": 3.11215063081971e-08, + "loss": 27.3313, + "step": 484010 + }, + { + "epoch": 0.9777510231620454, + "grad_norm": 183.36619567871094, + "learning_rate": 3.108263263708666e-08, + "loss": 7.288, + "step": 484020 + }, + { + "epoch": 0.9777712237947292, + "grad_norm": 267.46441650390625, + "learning_rate": 3.104378318387724e-08, + "loss": 11.842, + "step": 484030 + }, + { + "epoch": 0.9777914244274131, + "grad_norm": 233.20396423339844, + "learning_rate": 3.1004957948757576e-08, + "loss": 17.2669, + "step": 484040 + }, + { + "epoch": 0.9778116250600969, + "grad_norm": 330.2497863769531, + "learning_rate": 3.0966156931916955e-08, + "loss": 32.6221, + "step": 484050 + }, + { + "epoch": 0.9778318256927807, + "grad_norm": 300.6095275878906, + "learning_rate": 3.092738013354468e-08, + "loss": 12.898, + "step": 484060 + }, + { + "epoch": 0.9778520263254645, + "grad_norm": 370.9511413574219, + "learning_rate": 3.088862755383004e-08, + "loss": 16.3676, + "step": 484070 + }, + { + "epoch": 0.9778722269581483, + "grad_norm": 272.9999084472656, + "learning_rate": 3.084989919296122e-08, + "loss": 25.2239, + "step": 484080 + }, + { + "epoch": 0.9778924275908322, + "grad_norm": 250.6344757080078, + "learning_rate": 3.081119505112751e-08, + "loss": 19.4249, + "step": 484090 + }, + { + "epoch": 0.977912628223516, + "grad_norm": 251.88600158691406, + "learning_rate": 3.077251512851709e-08, + "loss": 26.6836, + "step": 484100 + }, + { + "epoch": 0.9779328288561998, + "grad_norm": 395.8818054199219, + "learning_rate": 3.07338594253187e-08, + "loss": 40.7621, + "step": 484110 + }, + { + "epoch": 0.9779530294888836, + "grad_norm": 612.4962158203125, + "learning_rate": 3.069522794172109e-08, + "loss": 18.4162, + "step": 484120 + }, + { + "epoch": 0.9779732301215674, + "grad_norm": 263.68402099609375, + "learning_rate": 3.0656620677911867e-08, + "loss": 11.7562, + "step": 484130 + }, + { + "epoch": 0.9779934307542513, + "grad_norm": 524.9879760742188, + "learning_rate": 3.061803763408033e-08, + "loss": 21.7564, + "step": 484140 + }, + { + "epoch": 0.9780136313869351, + "grad_norm": 1036.5638427734375, + "learning_rate": 3.057947881041301e-08, + "loss": 22.7379, + "step": 484150 + }, + { + "epoch": 0.9780338320196189, + "grad_norm": 235.7089080810547, + "learning_rate": 3.054094420709863e-08, + "loss": 15.1089, + "step": 484160 + }, + { + "epoch": 0.9780540326523026, + "grad_norm": 278.8729553222656, + "learning_rate": 3.050243382432483e-08, + "loss": 17.9415, + "step": 484170 + }, + { + "epoch": 0.9780742332849864, + "grad_norm": 382.26318359375, + "learning_rate": 3.046394766228034e-08, + "loss": 9.5056, + "step": 484180 + }, + { + "epoch": 0.9780944339176703, + "grad_norm": 449.79052734375, + "learning_rate": 3.0425485721151115e-08, + "loss": 26.3229, + "step": 484190 + }, + { + "epoch": 0.9781146345503541, + "grad_norm": 287.5538635253906, + "learning_rate": 3.038704800112535e-08, + "loss": 8.6234, + "step": 484200 + }, + { + "epoch": 0.9781348351830379, + "grad_norm": 293.88818359375, + "learning_rate": 3.034863450239067e-08, + "loss": 32.1613, + "step": 484210 + }, + { + "epoch": 0.9781550358157217, + "grad_norm": 635.933837890625, + "learning_rate": 3.0310245225133595e-08, + "loss": 20.5715, + "step": 484220 + }, + { + "epoch": 0.9781752364484055, + "grad_norm": 546.6377563476562, + "learning_rate": 3.027188016954175e-08, + "loss": 20.026, + "step": 484230 + }, + { + "epoch": 0.9781954370810894, + "grad_norm": 340.8769836425781, + "learning_rate": 3.0233539335802195e-08, + "loss": 22.315, + "step": 484240 + }, + { + "epoch": 0.9782156377137732, + "grad_norm": 629.7188110351562, + "learning_rate": 3.019522272410202e-08, + "loss": 16.2137, + "step": 484250 + }, + { + "epoch": 0.978235838346457, + "grad_norm": 351.24102783203125, + "learning_rate": 3.0156930334626633e-08, + "loss": 13.6654, + "step": 484260 + }, + { + "epoch": 0.9782560389791408, + "grad_norm": 620.3857421875, + "learning_rate": 3.0118662167564205e-08, + "loss": 13.2582, + "step": 484270 + }, + { + "epoch": 0.9782762396118246, + "grad_norm": 320.65093994140625, + "learning_rate": 3.008041822310015e-08, + "loss": 15.7659, + "step": 484280 + }, + { + "epoch": 0.9782964402445085, + "grad_norm": 4.544155597686768, + "learning_rate": 3.004219850142209e-08, + "loss": 23.1176, + "step": 484290 + }, + { + "epoch": 0.9783166408771923, + "grad_norm": 330.84075927734375, + "learning_rate": 3.0004003002714886e-08, + "loss": 38.308, + "step": 484300 + }, + { + "epoch": 0.9783368415098761, + "grad_norm": 134.98233032226562, + "learning_rate": 2.9965831727165603e-08, + "loss": 16.8132, + "step": 484310 + }, + { + "epoch": 0.9783570421425599, + "grad_norm": 240.02906799316406, + "learning_rate": 2.992768467496021e-08, + "loss": 16.8663, + "step": 484320 + }, + { + "epoch": 0.9783772427752437, + "grad_norm": 69.66632843017578, + "learning_rate": 2.988956184628411e-08, + "loss": 17.5265, + "step": 484330 + }, + { + "epoch": 0.9783974434079276, + "grad_norm": 37.1854133605957, + "learning_rate": 2.985146324132438e-08, + "loss": 41.3305, + "step": 484340 + }, + { + "epoch": 0.9784176440406114, + "grad_norm": 91.68061828613281, + "learning_rate": 2.981338886026475e-08, + "loss": 12.9395, + "step": 484350 + }, + { + "epoch": 0.9784378446732952, + "grad_norm": 484.4888916015625, + "learning_rate": 2.97753387032923e-08, + "loss": 21.5611, + "step": 484360 + }, + { + "epoch": 0.978458045305979, + "grad_norm": 104.05294799804688, + "learning_rate": 2.9737312770591887e-08, + "loss": 21.4092, + "step": 484370 + }, + { + "epoch": 0.9784782459386628, + "grad_norm": 284.43402099609375, + "learning_rate": 2.9699311062349467e-08, + "loss": 16.0598, + "step": 484380 + }, + { + "epoch": 0.9784984465713467, + "grad_norm": 12.049307823181152, + "learning_rate": 2.966133357874934e-08, + "loss": 25.8269, + "step": 484390 + }, + { + "epoch": 0.9785186472040305, + "grad_norm": 63.954917907714844, + "learning_rate": 2.9623380319976912e-08, + "loss": 13.1331, + "step": 484400 + }, + { + "epoch": 0.9785388478367143, + "grad_norm": 486.4266662597656, + "learning_rate": 2.9585451286217593e-08, + "loss": 15.2235, + "step": 484410 + }, + { + "epoch": 0.978559048469398, + "grad_norm": 151.86865234375, + "learning_rate": 2.954754647765623e-08, + "loss": 13.0449, + "step": 484420 + }, + { + "epoch": 0.9785792491020818, + "grad_norm": 803.1467895507812, + "learning_rate": 2.950966589447657e-08, + "loss": 27.0853, + "step": 484430 + }, + { + "epoch": 0.9785994497347656, + "grad_norm": 347.372802734375, + "learning_rate": 2.947180953686457e-08, + "loss": 15.3117, + "step": 484440 + }, + { + "epoch": 0.9786196503674495, + "grad_norm": 400.6361083984375, + "learning_rate": 2.9433977405003976e-08, + "loss": 22.2511, + "step": 484450 + }, + { + "epoch": 0.9786398510001333, + "grad_norm": 344.0408935546875, + "learning_rate": 2.9396169499079087e-08, + "loss": 14.1736, + "step": 484460 + }, + { + "epoch": 0.9786600516328171, + "grad_norm": 880.786376953125, + "learning_rate": 2.935838581927475e-08, + "loss": 34.3599, + "step": 484470 + }, + { + "epoch": 0.9786802522655009, + "grad_norm": 109.6543960571289, + "learning_rate": 2.9320626365774153e-08, + "loss": 8.8492, + "step": 484480 + }, + { + "epoch": 0.9787004528981847, + "grad_norm": 12.039264678955078, + "learning_rate": 2.9282891138762148e-08, + "loss": 16.186, + "step": 484490 + }, + { + "epoch": 0.9787206535308686, + "grad_norm": 470.5296325683594, + "learning_rate": 2.9245180138423033e-08, + "loss": 36.8357, + "step": 484500 + }, + { + "epoch": 0.9787408541635524, + "grad_norm": 523.0750732421875, + "learning_rate": 2.920749336494e-08, + "loss": 13.6021, + "step": 484510 + }, + { + "epoch": 0.9787610547962362, + "grad_norm": 327.95672607421875, + "learning_rate": 2.9169830818496226e-08, + "loss": 24.0132, + "step": 484520 + }, + { + "epoch": 0.97878125542892, + "grad_norm": 125.46385955810547, + "learning_rate": 2.9132192499276014e-08, + "loss": 19.9534, + "step": 484530 + }, + { + "epoch": 0.9788014560616038, + "grad_norm": 17.092742919921875, + "learning_rate": 2.9094578407462547e-08, + "loss": 14.0685, + "step": 484540 + }, + { + "epoch": 0.9788216566942877, + "grad_norm": 230.4193878173828, + "learning_rate": 2.9056988543239018e-08, + "loss": 6.433, + "step": 484550 + }, + { + "epoch": 0.9788418573269715, + "grad_norm": 45.332794189453125, + "learning_rate": 2.9019422906789162e-08, + "loss": 13.0758, + "step": 484560 + }, + { + "epoch": 0.9788620579596553, + "grad_norm": 377.50286865234375, + "learning_rate": 2.8981881498295616e-08, + "loss": 27.4283, + "step": 484570 + }, + { + "epoch": 0.9788822585923391, + "grad_norm": 170.75660705566406, + "learning_rate": 2.8944364317941564e-08, + "loss": 11.3379, + "step": 484580 + }, + { + "epoch": 0.978902459225023, + "grad_norm": 463.7169494628906, + "learning_rate": 2.8906871365909638e-08, + "loss": 13.9359, + "step": 484590 + }, + { + "epoch": 0.9789226598577068, + "grad_norm": 578.7476806640625, + "learning_rate": 2.8869402642382473e-08, + "loss": 12.9114, + "step": 484600 + }, + { + "epoch": 0.9789428604903906, + "grad_norm": 1014.2540893554688, + "learning_rate": 2.8831958147543805e-08, + "loss": 17.8054, + "step": 484610 + }, + { + "epoch": 0.9789630611230744, + "grad_norm": 220.0530242919922, + "learning_rate": 2.8794537881574046e-08, + "loss": 20.0184, + "step": 484620 + }, + { + "epoch": 0.9789832617557582, + "grad_norm": 673.9566040039062, + "learning_rate": 2.87571418446575e-08, + "loss": 21.4076, + "step": 484630 + }, + { + "epoch": 0.979003462388442, + "grad_norm": 667.1533813476562, + "learning_rate": 2.871977003697568e-08, + "loss": 22.8192, + "step": 484640 + }, + { + "epoch": 0.9790236630211259, + "grad_norm": 424.0278625488281, + "learning_rate": 2.8682422458710667e-08, + "loss": 15.4292, + "step": 484650 + }, + { + "epoch": 0.9790438636538097, + "grad_norm": 268.6829528808594, + "learning_rate": 2.864509911004454e-08, + "loss": 32.7413, + "step": 484660 + }, + { + "epoch": 0.9790640642864935, + "grad_norm": 6.1492838859558105, + "learning_rate": 2.8607799991159368e-08, + "loss": 10.4449, + "step": 484670 + }, + { + "epoch": 0.9790842649191772, + "grad_norm": 649.07861328125, + "learning_rate": 2.857052510223668e-08, + "loss": 22.5222, + "step": 484680 + }, + { + "epoch": 0.979104465551861, + "grad_norm": 450.8924865722656, + "learning_rate": 2.853327444345799e-08, + "loss": 15.7099, + "step": 484690 + }, + { + "epoch": 0.9791246661845449, + "grad_norm": 761.1311645507812, + "learning_rate": 2.8496048015005385e-08, + "loss": 23.6273, + "step": 484700 + }, + { + "epoch": 0.9791448668172287, + "grad_norm": 269.6807556152344, + "learning_rate": 2.8458845817060376e-08, + "loss": 13.483, + "step": 484710 + }, + { + "epoch": 0.9791650674499125, + "grad_norm": 527.2760620117188, + "learning_rate": 2.8421667849803937e-08, + "loss": 13.3812, + "step": 484720 + }, + { + "epoch": 0.9791852680825963, + "grad_norm": 7.196564197540283, + "learning_rate": 2.8384514113417026e-08, + "loss": 17.1273, + "step": 484730 + }, + { + "epoch": 0.9792054687152801, + "grad_norm": 0.0, + "learning_rate": 2.8347384608081173e-08, + "loss": 26.7297, + "step": 484740 + }, + { + "epoch": 0.979225669347964, + "grad_norm": 1.6372706890106201, + "learning_rate": 2.8310279333976786e-08, + "loss": 22.1027, + "step": 484750 + }, + { + "epoch": 0.9792458699806478, + "grad_norm": 167.82321166992188, + "learning_rate": 2.827319829128594e-08, + "loss": 12.3742, + "step": 484760 + }, + { + "epoch": 0.9792660706133316, + "grad_norm": 317.5897216796875, + "learning_rate": 2.823614148018794e-08, + "loss": 13.8572, + "step": 484770 + }, + { + "epoch": 0.9792862712460154, + "grad_norm": 455.1285400390625, + "learning_rate": 2.819910890086375e-08, + "loss": 26.5275, + "step": 484780 + }, + { + "epoch": 0.9793064718786992, + "grad_norm": 508.3708801269531, + "learning_rate": 2.8162100553494887e-08, + "loss": 16.7634, + "step": 484790 + }, + { + "epoch": 0.9793266725113831, + "grad_norm": 454.05999755859375, + "learning_rate": 2.8125116438260104e-08, + "loss": 15.6647, + "step": 484800 + }, + { + "epoch": 0.9793468731440669, + "grad_norm": 482.0390930175781, + "learning_rate": 2.8088156555340916e-08, + "loss": 26.6385, + "step": 484810 + }, + { + "epoch": 0.9793670737767507, + "grad_norm": 377.8825988769531, + "learning_rate": 2.805122090491719e-08, + "loss": 13.5362, + "step": 484820 + }, + { + "epoch": 0.9793872744094345, + "grad_norm": 116.45448303222656, + "learning_rate": 2.801430948716821e-08, + "loss": 20.9232, + "step": 484830 + }, + { + "epoch": 0.9794074750421183, + "grad_norm": 210.9385986328125, + "learning_rate": 2.797742230227496e-08, + "loss": 16.5787, + "step": 484840 + }, + { + "epoch": 0.9794276756748022, + "grad_norm": 302.8087463378906, + "learning_rate": 2.794055935041673e-08, + "loss": 14.3869, + "step": 484850 + }, + { + "epoch": 0.979447876307486, + "grad_norm": 780.3137817382812, + "learning_rate": 2.7903720631772824e-08, + "loss": 24.1741, + "step": 484860 + }, + { + "epoch": 0.9794680769401698, + "grad_norm": 316.7269287109375, + "learning_rate": 2.7866906146523098e-08, + "loss": 22.5804, + "step": 484870 + }, + { + "epoch": 0.9794882775728536, + "grad_norm": 630.5425415039062, + "learning_rate": 2.783011589484741e-08, + "loss": 25.4059, + "step": 484880 + }, + { + "epoch": 0.9795084782055374, + "grad_norm": 77.95024871826172, + "learning_rate": 2.7793349876924503e-08, + "loss": 23.828, + "step": 484890 + }, + { + "epoch": 0.9795286788382213, + "grad_norm": 318.6225280761719, + "learning_rate": 2.7756608092933678e-08, + "loss": 28.741, + "step": 484900 + }, + { + "epoch": 0.9795488794709051, + "grad_norm": 187.707763671875, + "learning_rate": 2.771989054305424e-08, + "loss": 11.4559, + "step": 484910 + }, + { + "epoch": 0.9795690801035889, + "grad_norm": 259.7579650878906, + "learning_rate": 2.768319722746493e-08, + "loss": 10.9794, + "step": 484920 + }, + { + "epoch": 0.9795892807362726, + "grad_norm": 397.9490051269531, + "learning_rate": 2.7646528146345053e-08, + "loss": 13.0612, + "step": 484930 + }, + { + "epoch": 0.9796094813689564, + "grad_norm": 629.7482299804688, + "learning_rate": 2.760988329987224e-08, + "loss": 14.3817, + "step": 484940 + }, + { + "epoch": 0.9796296820016402, + "grad_norm": 643.6015625, + "learning_rate": 2.7573262688226355e-08, + "loss": 39.0208, + "step": 484950 + }, + { + "epoch": 0.9796498826343241, + "grad_norm": 417.1099548339844, + "learning_rate": 2.753666631158447e-08, + "loss": 13.9643, + "step": 484960 + }, + { + "epoch": 0.9796700832670079, + "grad_norm": 1445.2420654296875, + "learning_rate": 2.7500094170126447e-08, + "loss": 12.6738, + "step": 484970 + }, + { + "epoch": 0.9796902838996917, + "grad_norm": 310.7126770019531, + "learning_rate": 2.7463546264029915e-08, + "loss": 10.1163, + "step": 484980 + }, + { + "epoch": 0.9797104845323755, + "grad_norm": 400.7168884277344, + "learning_rate": 2.7427022593473074e-08, + "loss": 18.6557, + "step": 484990 + }, + { + "epoch": 0.9797306851650593, + "grad_norm": 440.9306335449219, + "learning_rate": 2.7390523158633552e-08, + "loss": 16.9205, + "step": 485000 + }, + { + "epoch": 0.9797508857977432, + "grad_norm": 313.15338134765625, + "learning_rate": 2.7354047959689543e-08, + "loss": 11.0791, + "step": 485010 + }, + { + "epoch": 0.979771086430427, + "grad_norm": 488.06695556640625, + "learning_rate": 2.7317596996818684e-08, + "loss": 31.8022, + "step": 485020 + }, + { + "epoch": 0.9797912870631108, + "grad_norm": 346.84796142578125, + "learning_rate": 2.728117027019861e-08, + "loss": 38.3031, + "step": 485030 + }, + { + "epoch": 0.9798114876957946, + "grad_norm": 608.5032958984375, + "learning_rate": 2.7244767780007507e-08, + "loss": 31.9193, + "step": 485040 + }, + { + "epoch": 0.9798316883284784, + "grad_norm": 215.79571533203125, + "learning_rate": 2.7208389526421907e-08, + "loss": 33.9912, + "step": 485050 + }, + { + "epoch": 0.9798518889611623, + "grad_norm": 217.80091857910156, + "learning_rate": 2.7172035509619442e-08, + "loss": 21.6836, + "step": 485060 + }, + { + "epoch": 0.9798720895938461, + "grad_norm": 467.5257873535156, + "learning_rate": 2.713570572977775e-08, + "loss": 31.3339, + "step": 485070 + }, + { + "epoch": 0.9798922902265299, + "grad_norm": 1203.36083984375, + "learning_rate": 2.7099400187073356e-08, + "loss": 20.5482, + "step": 485080 + }, + { + "epoch": 0.9799124908592137, + "grad_norm": 406.3395690917969, + "learning_rate": 2.7063118881682782e-08, + "loss": 23.3002, + "step": 485090 + }, + { + "epoch": 0.9799326914918975, + "grad_norm": 527.0682983398438, + "learning_rate": 2.7026861813783668e-08, + "loss": 19.6665, + "step": 485100 + }, + { + "epoch": 0.9799528921245814, + "grad_norm": 182.63735961914062, + "learning_rate": 2.6990628983553093e-08, + "loss": 15.1178, + "step": 485110 + }, + { + "epoch": 0.9799730927572652, + "grad_norm": 384.9981384277344, + "learning_rate": 2.6954420391166468e-08, + "loss": 23.4295, + "step": 485120 + }, + { + "epoch": 0.979993293389949, + "grad_norm": 618.421630859375, + "learning_rate": 2.691823603680088e-08, + "loss": 32.6432, + "step": 485130 + }, + { + "epoch": 0.9800134940226328, + "grad_norm": 247.70474243164062, + "learning_rate": 2.6882075920632854e-08, + "loss": 16.8914, + "step": 485140 + }, + { + "epoch": 0.9800336946553166, + "grad_norm": 258.8690490722656, + "learning_rate": 2.684594004283836e-08, + "loss": 15.5106, + "step": 485150 + }, + { + "epoch": 0.9800538952880005, + "grad_norm": 534.8699340820312, + "learning_rate": 2.6809828403593363e-08, + "loss": 12.8656, + "step": 485160 + }, + { + "epoch": 0.9800740959206843, + "grad_norm": 45.22439956665039, + "learning_rate": 2.6773741003074394e-08, + "loss": 18.8632, + "step": 485170 + }, + { + "epoch": 0.9800942965533681, + "grad_norm": 718.5444946289062, + "learning_rate": 2.6737677841456867e-08, + "loss": 22.0892, + "step": 485180 + }, + { + "epoch": 0.9801144971860518, + "grad_norm": 481.2840576171875, + "learning_rate": 2.670163891891675e-08, + "loss": 17.4947, + "step": 485190 + }, + { + "epoch": 0.9801346978187356, + "grad_norm": 320.5677490234375, + "learning_rate": 2.6665624235629463e-08, + "loss": 22.502, + "step": 485200 + }, + { + "epoch": 0.9801548984514195, + "grad_norm": 519.1952514648438, + "learning_rate": 2.662963379177097e-08, + "loss": 13.4785, + "step": 485210 + }, + { + "epoch": 0.9801750990841033, + "grad_norm": 0.9457218647003174, + "learning_rate": 2.6593667587516693e-08, + "loss": 18.4401, + "step": 485220 + }, + { + "epoch": 0.9801952997167871, + "grad_norm": 231.81671142578125, + "learning_rate": 2.6557725623041487e-08, + "loss": 19.6856, + "step": 485230 + }, + { + "epoch": 0.9802155003494709, + "grad_norm": 233.8401641845703, + "learning_rate": 2.6521807898520214e-08, + "loss": 10.5556, + "step": 485240 + }, + { + "epoch": 0.9802357009821547, + "grad_norm": 323.9283142089844, + "learning_rate": 2.64859144141294e-08, + "loss": 10.9646, + "step": 485250 + }, + { + "epoch": 0.9802559016148386, + "grad_norm": 152.94651794433594, + "learning_rate": 2.6450045170042238e-08, + "loss": 16.6187, + "step": 485260 + }, + { + "epoch": 0.9802761022475224, + "grad_norm": 560.7903442382812, + "learning_rate": 2.6414200166434144e-08, + "loss": 31.5643, + "step": 485270 + }, + { + "epoch": 0.9802963028802062, + "grad_norm": 762.9198608398438, + "learning_rate": 2.6378379403480536e-08, + "loss": 21.9723, + "step": 485280 + }, + { + "epoch": 0.98031650351289, + "grad_norm": 159.2328643798828, + "learning_rate": 2.6342582881355717e-08, + "loss": 11.4329, + "step": 485290 + }, + { + "epoch": 0.9803367041455738, + "grad_norm": 495.5337829589844, + "learning_rate": 2.6306810600233435e-08, + "loss": 19.3928, + "step": 485300 + }, + { + "epoch": 0.9803569047782577, + "grad_norm": 289.5658264160156, + "learning_rate": 2.6271062560288552e-08, + "loss": 20.7861, + "step": 485310 + }, + { + "epoch": 0.9803771054109415, + "grad_norm": 768.4794311523438, + "learning_rate": 2.6235338761695372e-08, + "loss": 21.2778, + "step": 485320 + }, + { + "epoch": 0.9803973060436253, + "grad_norm": 307.91845703125, + "learning_rate": 2.6199639204628202e-08, + "loss": 15.3602, + "step": 485330 + }, + { + "epoch": 0.9804175066763091, + "grad_norm": 549.4125366210938, + "learning_rate": 2.6163963889260236e-08, + "loss": 22.524, + "step": 485340 + }, + { + "epoch": 0.9804377073089929, + "grad_norm": 22.162643432617188, + "learning_rate": 2.6128312815766332e-08, + "loss": 21.6474, + "step": 485350 + }, + { + "epoch": 0.9804579079416768, + "grad_norm": 628.3938598632812, + "learning_rate": 2.6092685984319134e-08, + "loss": 15.8834, + "step": 485360 + }, + { + "epoch": 0.9804781085743606, + "grad_norm": 195.3922119140625, + "learning_rate": 2.6057083395093495e-08, + "loss": 26.0357, + "step": 485370 + }, + { + "epoch": 0.9804983092070444, + "grad_norm": 303.665283203125, + "learning_rate": 2.6021505048262062e-08, + "loss": 25.6902, + "step": 485380 + }, + { + "epoch": 0.9805185098397282, + "grad_norm": 24.05523681640625, + "learning_rate": 2.5985950943999137e-08, + "loss": 12.9914, + "step": 485390 + }, + { + "epoch": 0.980538710472412, + "grad_norm": 269.62017822265625, + "learning_rate": 2.5950421082476805e-08, + "loss": 13.2076, + "step": 485400 + }, + { + "epoch": 0.9805589111050959, + "grad_norm": 341.3332824707031, + "learning_rate": 2.5914915463868816e-08, + "loss": 21.3219, + "step": 485410 + }, + { + "epoch": 0.9805791117377797, + "grad_norm": 0.0197820533066988, + "learning_rate": 2.5879434088348364e-08, + "loss": 18.7422, + "step": 485420 + }, + { + "epoch": 0.9805993123704635, + "grad_norm": 641.1902465820312, + "learning_rate": 2.584397695608809e-08, + "loss": 35.1817, + "step": 485430 + }, + { + "epoch": 0.9806195130031473, + "grad_norm": 282.08807373046875, + "learning_rate": 2.580854406726174e-08, + "loss": 9.3104, + "step": 485440 + }, + { + "epoch": 0.980639713635831, + "grad_norm": 57.96085739135742, + "learning_rate": 2.5773135422040296e-08, + "loss": 11.5754, + "step": 485450 + }, + { + "epoch": 0.9806599142685148, + "grad_norm": 624.4923095703125, + "learning_rate": 2.5737751020598057e-08, + "loss": 10.0956, + "step": 485460 + }, + { + "epoch": 0.9806801149011987, + "grad_norm": 274.8274230957031, + "learning_rate": 2.5702390863105996e-08, + "loss": 8.2575, + "step": 485470 + }, + { + "epoch": 0.9807003155338825, + "grad_norm": 454.69952392578125, + "learning_rate": 2.5667054949737315e-08, + "loss": 13.499, + "step": 485480 + }, + { + "epoch": 0.9807205161665663, + "grad_norm": 227.89601135253906, + "learning_rate": 2.5631743280664643e-08, + "loss": 18.8259, + "step": 485490 + }, + { + "epoch": 0.9807407167992501, + "grad_norm": 448.7703552246094, + "learning_rate": 2.5596455856058966e-08, + "loss": 21.568, + "step": 485500 + }, + { + "epoch": 0.9807609174319339, + "grad_norm": 465.294921875, + "learning_rate": 2.556119267609347e-08, + "loss": 13.4545, + "step": 485510 + }, + { + "epoch": 0.9807811180646178, + "grad_norm": 369.0829772949219, + "learning_rate": 2.552595374093858e-08, + "loss": 16.9684, + "step": 485520 + }, + { + "epoch": 0.9808013186973016, + "grad_norm": 295.7125549316406, + "learning_rate": 2.5490739050767488e-08, + "loss": 10.4524, + "step": 485530 + }, + { + "epoch": 0.9808215193299854, + "grad_norm": 500.7685241699219, + "learning_rate": 2.5455548605751167e-08, + "loss": 32.5881, + "step": 485540 + }, + { + "epoch": 0.9808417199626692, + "grad_norm": 364.6302490234375, + "learning_rate": 2.5420382406060595e-08, + "loss": 26.903, + "step": 485550 + }, + { + "epoch": 0.980861920595353, + "grad_norm": 335.4365234375, + "learning_rate": 2.5385240451867853e-08, + "loss": 17.7864, + "step": 485560 + }, + { + "epoch": 0.9808821212280369, + "grad_norm": 441.4403381347656, + "learning_rate": 2.5350122743344476e-08, + "loss": 19.7877, + "step": 485570 + }, + { + "epoch": 0.9809023218607207, + "grad_norm": 378.8863220214844, + "learning_rate": 2.531502928066143e-08, + "loss": 12.3159, + "step": 485580 + }, + { + "epoch": 0.9809225224934045, + "grad_norm": 380.1937561035156, + "learning_rate": 2.527996006398914e-08, + "loss": 7.871, + "step": 485590 + }, + { + "epoch": 0.9809427231260883, + "grad_norm": 404.0863037109375, + "learning_rate": 2.5244915093499134e-08, + "loss": 36.1998, + "step": 485600 + }, + { + "epoch": 0.9809629237587721, + "grad_norm": 184.4862823486328, + "learning_rate": 2.5209894369362386e-08, + "loss": 13.5838, + "step": 485610 + }, + { + "epoch": 0.980983124391456, + "grad_norm": 244.2938232421875, + "learning_rate": 2.5174897891748762e-08, + "loss": 23.7825, + "step": 485620 + }, + { + "epoch": 0.9810033250241398, + "grad_norm": 220.58428955078125, + "learning_rate": 2.5139925660829233e-08, + "loss": 19.5785, + "step": 485630 + }, + { + "epoch": 0.9810235256568236, + "grad_norm": 194.8220672607422, + "learning_rate": 2.5104977676774777e-08, + "loss": 18.7257, + "step": 485640 + }, + { + "epoch": 0.9810437262895074, + "grad_norm": 316.3087158203125, + "learning_rate": 2.5070053939754702e-08, + "loss": 12.6183, + "step": 485650 + }, + { + "epoch": 0.9810639269221912, + "grad_norm": 525.3484497070312, + "learning_rate": 2.5035154449940535e-08, + "loss": 27.6374, + "step": 485660 + }, + { + "epoch": 0.9810841275548751, + "grad_norm": 691.3954467773438, + "learning_rate": 2.500027920750103e-08, + "loss": 16.5734, + "step": 485670 + }, + { + "epoch": 0.9811043281875589, + "grad_norm": 410.7261047363281, + "learning_rate": 2.496542821260717e-08, + "loss": 12.7947, + "step": 485680 + }, + { + "epoch": 0.9811245288202427, + "grad_norm": 172.7108154296875, + "learning_rate": 2.493060146542825e-08, + "loss": 22.6328, + "step": 485690 + }, + { + "epoch": 0.9811447294529264, + "grad_norm": 403.85003662109375, + "learning_rate": 2.489579896613359e-08, + "loss": 29.9945, + "step": 485700 + }, + { + "epoch": 0.9811649300856102, + "grad_norm": 175.58148193359375, + "learning_rate": 2.4861020714894156e-08, + "loss": 10.1164, + "step": 485710 + }, + { + "epoch": 0.981185130718294, + "grad_norm": 210.02651977539062, + "learning_rate": 2.482626671187871e-08, + "loss": 9.6241, + "step": 485720 + }, + { + "epoch": 0.9812053313509779, + "grad_norm": 445.175537109375, + "learning_rate": 2.4791536957256e-08, + "loss": 14.9926, + "step": 485730 + }, + { + "epoch": 0.9812255319836617, + "grad_norm": 885.3142700195312, + "learning_rate": 2.4756831451196452e-08, + "loss": 26.4219, + "step": 485740 + }, + { + "epoch": 0.9812457326163455, + "grad_norm": 28.76207733154297, + "learning_rate": 2.472215019386881e-08, + "loss": 15.5411, + "step": 485750 + }, + { + "epoch": 0.9812659332490293, + "grad_norm": 260.98870849609375, + "learning_rate": 2.4687493185441836e-08, + "loss": 25.6906, + "step": 485760 + }, + { + "epoch": 0.9812861338817132, + "grad_norm": 369.857421875, + "learning_rate": 2.4652860426084278e-08, + "loss": 23.6489, + "step": 485770 + }, + { + "epoch": 0.981306334514397, + "grad_norm": 128.75570678710938, + "learning_rate": 2.46182519159649e-08, + "loss": 10.6912, + "step": 485780 + }, + { + "epoch": 0.9813265351470808, + "grad_norm": 120.6553955078125, + "learning_rate": 2.458366765525355e-08, + "loss": 9.2437, + "step": 485790 + }, + { + "epoch": 0.9813467357797646, + "grad_norm": 560.5294189453125, + "learning_rate": 2.4549107644117888e-08, + "loss": 17.972, + "step": 485800 + }, + { + "epoch": 0.9813669364124484, + "grad_norm": 62.803314208984375, + "learning_rate": 2.4514571882726102e-08, + "loss": 16.2346, + "step": 485810 + }, + { + "epoch": 0.9813871370451323, + "grad_norm": 596.3590087890625, + "learning_rate": 2.448006037124695e-08, + "loss": 23.8473, + "step": 485820 + }, + { + "epoch": 0.9814073376778161, + "grad_norm": 491.07147216796875, + "learning_rate": 2.444557310984863e-08, + "loss": 16.2163, + "step": 485830 + }, + { + "epoch": 0.9814275383104999, + "grad_norm": 512.9915771484375, + "learning_rate": 2.441111009869879e-08, + "loss": 25.4548, + "step": 485840 + }, + { + "epoch": 0.9814477389431837, + "grad_norm": 128.34317016601562, + "learning_rate": 2.4376671337966174e-08, + "loss": 16.7849, + "step": 485850 + }, + { + "epoch": 0.9814679395758675, + "grad_norm": 383.1306457519531, + "learning_rate": 2.434225682781788e-08, + "loss": 21.5805, + "step": 485860 + }, + { + "epoch": 0.9814881402085514, + "grad_norm": 706.0603637695312, + "learning_rate": 2.43078665684221e-08, + "loss": 44.5088, + "step": 485870 + }, + { + "epoch": 0.9815083408412352, + "grad_norm": 494.68304443359375, + "learning_rate": 2.427350055994593e-08, + "loss": 30.7003, + "step": 485880 + }, + { + "epoch": 0.981528541473919, + "grad_norm": 279.5955505371094, + "learning_rate": 2.423915880255756e-08, + "loss": 15.2497, + "step": 485890 + }, + { + "epoch": 0.9815487421066028, + "grad_norm": 201.189697265625, + "learning_rate": 2.4204841296424086e-08, + "loss": 13.9122, + "step": 485900 + }, + { + "epoch": 0.9815689427392866, + "grad_norm": 508.57025146484375, + "learning_rate": 2.4170548041712594e-08, + "loss": 19.3764, + "step": 485910 + }, + { + "epoch": 0.9815891433719705, + "grad_norm": 351.7825622558594, + "learning_rate": 2.4136279038590727e-08, + "loss": 17.9565, + "step": 485920 + }, + { + "epoch": 0.9816093440046543, + "grad_norm": 316.19219970703125, + "learning_rate": 2.4102034287224462e-08, + "loss": 11.2892, + "step": 485930 + }, + { + "epoch": 0.9816295446373381, + "grad_norm": 542.6669311523438, + "learning_rate": 2.4067813787782e-08, + "loss": 24.9015, + "step": 485940 + }, + { + "epoch": 0.9816497452700219, + "grad_norm": 197.28306579589844, + "learning_rate": 2.403361754042932e-08, + "loss": 19.147, + "step": 485950 + }, + { + "epoch": 0.9816699459027056, + "grad_norm": 50.71059799194336, + "learning_rate": 2.3999445545332955e-08, + "loss": 16.8103, + "step": 485960 + }, + { + "epoch": 0.9816901465353894, + "grad_norm": 501.495361328125, + "learning_rate": 2.3965297802659993e-08, + "loss": 17.9184, + "step": 485970 + }, + { + "epoch": 0.9817103471680733, + "grad_norm": 310.5088195800781, + "learning_rate": 2.3931174312576966e-08, + "loss": 20.8237, + "step": 485980 + }, + { + "epoch": 0.9817305478007571, + "grad_norm": 262.6496276855469, + "learning_rate": 2.3897075075249298e-08, + "loss": 16.2967, + "step": 485990 + }, + { + "epoch": 0.9817507484334409, + "grad_norm": 183.33282470703125, + "learning_rate": 2.386300009084408e-08, + "loss": 19.5586, + "step": 486000 + }, + { + "epoch": 0.9817709490661247, + "grad_norm": 26.03966522216797, + "learning_rate": 2.382894935952729e-08, + "loss": 16.2692, + "step": 486010 + }, + { + "epoch": 0.9817911496988085, + "grad_norm": 364.26153564453125, + "learning_rate": 2.3794922881464344e-08, + "loss": 20.1034, + "step": 486020 + }, + { + "epoch": 0.9818113503314924, + "grad_norm": 109.6334457397461, + "learning_rate": 2.3760920656821228e-08, + "loss": 15.8652, + "step": 486030 + }, + { + "epoch": 0.9818315509641762, + "grad_norm": 449.1590270996094, + "learning_rate": 2.3726942685764474e-08, + "loss": 23.835, + "step": 486040 + }, + { + "epoch": 0.98185175159686, + "grad_norm": 94.95097351074219, + "learning_rate": 2.3692988968458398e-08, + "loss": 33.35, + "step": 486050 + }, + { + "epoch": 0.9818719522295438, + "grad_norm": 266.3463439941406, + "learning_rate": 2.3659059505069526e-08, + "loss": 25.8023, + "step": 486060 + }, + { + "epoch": 0.9818921528622276, + "grad_norm": 73.2273941040039, + "learning_rate": 2.362515429576273e-08, + "loss": 14.0591, + "step": 486070 + }, + { + "epoch": 0.9819123534949115, + "grad_norm": 332.509521484375, + "learning_rate": 2.3591273340703436e-08, + "loss": 12.0447, + "step": 486080 + }, + { + "epoch": 0.9819325541275953, + "grad_norm": 1333.14501953125, + "learning_rate": 2.3557416640056507e-08, + "loss": 16.5212, + "step": 486090 + }, + { + "epoch": 0.9819527547602791, + "grad_norm": 227.93136596679688, + "learning_rate": 2.3523584193986816e-08, + "loss": 19.2178, + "step": 486100 + }, + { + "epoch": 0.9819729553929629, + "grad_norm": 705.9186401367188, + "learning_rate": 2.3489776002660337e-08, + "loss": 19.5316, + "step": 486110 + }, + { + "epoch": 0.9819931560256467, + "grad_norm": 301.1523742675781, + "learning_rate": 2.3455992066240828e-08, + "loss": 32.5375, + "step": 486120 + }, + { + "epoch": 0.9820133566583306, + "grad_norm": 323.9628601074219, + "learning_rate": 2.342223238489316e-08, + "loss": 24.6499, + "step": 486130 + }, + { + "epoch": 0.9820335572910144, + "grad_norm": 148.93313598632812, + "learning_rate": 2.3388496958782203e-08, + "loss": 19.5107, + "step": 486140 + }, + { + "epoch": 0.9820537579236982, + "grad_norm": 247.03475952148438, + "learning_rate": 2.3354785788072265e-08, + "loss": 9.7356, + "step": 486150 + }, + { + "epoch": 0.982073958556382, + "grad_norm": 1135.49951171875, + "learning_rate": 2.3321098872927107e-08, + "loss": 39.8069, + "step": 486160 + }, + { + "epoch": 0.9820941591890658, + "grad_norm": 683.43603515625, + "learning_rate": 2.3287436213511038e-08, + "loss": 15.9695, + "step": 486170 + }, + { + "epoch": 0.9821143598217497, + "grad_norm": 344.6324462890625, + "learning_rate": 2.3253797809988933e-08, + "loss": 15.9136, + "step": 486180 + }, + { + "epoch": 0.9821345604544335, + "grad_norm": 361.53643798828125, + "learning_rate": 2.3220183662523986e-08, + "loss": 22.6791, + "step": 486190 + }, + { + "epoch": 0.9821547610871173, + "grad_norm": 196.98980712890625, + "learning_rate": 2.3186593771280518e-08, + "loss": 6.6145, + "step": 486200 + }, + { + "epoch": 0.982174961719801, + "grad_norm": 414.0221252441406, + "learning_rate": 2.3153028136421728e-08, + "loss": 18.1399, + "step": 486210 + }, + { + "epoch": 0.9821951623524848, + "grad_norm": 442.22125244140625, + "learning_rate": 2.3119486758111375e-08, + "loss": 11.9448, + "step": 486220 + }, + { + "epoch": 0.9822153629851686, + "grad_norm": 446.5096435546875, + "learning_rate": 2.3085969636513217e-08, + "loss": 15.7097, + "step": 486230 + }, + { + "epoch": 0.9822355636178525, + "grad_norm": 0.0, + "learning_rate": 2.3052476771790454e-08, + "loss": 9.7731, + "step": 486240 + }, + { + "epoch": 0.9822557642505363, + "grad_norm": 379.91375732421875, + "learning_rate": 2.301900816410574e-08, + "loss": 13.1205, + "step": 486250 + }, + { + "epoch": 0.9822759648832201, + "grad_norm": 390.3697814941406, + "learning_rate": 2.2985563813623378e-08, + "loss": 16.514, + "step": 486260 + }, + { + "epoch": 0.9822961655159039, + "grad_norm": 65.93146514892578, + "learning_rate": 2.295214372050547e-08, + "loss": 12.4505, + "step": 486270 + }, + { + "epoch": 0.9823163661485877, + "grad_norm": 382.8099365234375, + "learning_rate": 2.2918747884915216e-08, + "loss": 19.6996, + "step": 486280 + }, + { + "epoch": 0.9823365667812716, + "grad_norm": 591.4705200195312, + "learning_rate": 2.2885376307015817e-08, + "loss": 23.1165, + "step": 486290 + }, + { + "epoch": 0.9823567674139554, + "grad_norm": 466.58245849609375, + "learning_rate": 2.285202898696881e-08, + "loss": 18.6542, + "step": 486300 + }, + { + "epoch": 0.9823769680466392, + "grad_norm": 687.158935546875, + "learning_rate": 2.2818705924937402e-08, + "loss": 18.8722, + "step": 486310 + }, + { + "epoch": 0.982397168679323, + "grad_norm": 349.258056640625, + "learning_rate": 2.2785407121084236e-08, + "loss": 20.9059, + "step": 486320 + }, + { + "epoch": 0.9824173693120068, + "grad_norm": 295.1773376464844, + "learning_rate": 2.2752132575570852e-08, + "loss": 24.1587, + "step": 486330 + }, + { + "epoch": 0.9824375699446907, + "grad_norm": 432.3294372558594, + "learning_rate": 2.271888228856045e-08, + "loss": 17.8731, + "step": 486340 + }, + { + "epoch": 0.9824577705773745, + "grad_norm": 789.3005981445312, + "learning_rate": 2.268565626021457e-08, + "loss": 47.5746, + "step": 486350 + }, + { + "epoch": 0.9824779712100583, + "grad_norm": 260.5862121582031, + "learning_rate": 2.2652454490694752e-08, + "loss": 19.524, + "step": 486360 + }, + { + "epoch": 0.9824981718427421, + "grad_norm": 43.713470458984375, + "learning_rate": 2.261927698016364e-08, + "loss": 12.7472, + "step": 486370 + }, + { + "epoch": 0.982518372475426, + "grad_norm": 740.96826171875, + "learning_rate": 2.2586123728781663e-08, + "loss": 18.64, + "step": 486380 + }, + { + "epoch": 0.9825385731081098, + "grad_norm": 378.1340637207031, + "learning_rate": 2.255299473671202e-08, + "loss": 19.8248, + "step": 486390 + }, + { + "epoch": 0.9825587737407936, + "grad_norm": 174.594970703125, + "learning_rate": 2.251989000411514e-08, + "loss": 25.4638, + "step": 486400 + }, + { + "epoch": 0.9825789743734774, + "grad_norm": 139.85891723632812, + "learning_rate": 2.2486809531152563e-08, + "loss": 9.0486, + "step": 486410 + }, + { + "epoch": 0.9825991750061612, + "grad_norm": 152.03848266601562, + "learning_rate": 2.2453753317985272e-08, + "loss": 20.0999, + "step": 486420 + }, + { + "epoch": 0.982619375638845, + "grad_norm": 287.6670227050781, + "learning_rate": 2.2420721364775354e-08, + "loss": 25.8652, + "step": 486430 + }, + { + "epoch": 0.9826395762715289, + "grad_norm": 425.5420837402344, + "learning_rate": 2.2387713671682687e-08, + "loss": 25.9609, + "step": 486440 + }, + { + "epoch": 0.9826597769042127, + "grad_norm": 232.87928771972656, + "learning_rate": 2.2354730238868804e-08, + "loss": 21.0022, + "step": 486450 + }, + { + "epoch": 0.9826799775368965, + "grad_norm": 308.5003662109375, + "learning_rate": 2.2321771066494137e-08, + "loss": 13.2866, + "step": 486460 + }, + { + "epoch": 0.9827001781695802, + "grad_norm": 1615.3829345703125, + "learning_rate": 2.2288836154719663e-08, + "loss": 24.9597, + "step": 486470 + }, + { + "epoch": 0.982720378802264, + "grad_norm": 311.37188720703125, + "learning_rate": 2.2255925503705255e-08, + "loss": 13.8833, + "step": 486480 + }, + { + "epoch": 0.9827405794349479, + "grad_norm": 163.70361328125, + "learning_rate": 2.22230391136119e-08, + "loss": 8.973, + "step": 486490 + }, + { + "epoch": 0.9827607800676317, + "grad_norm": 533.0966796875, + "learning_rate": 2.219017698460002e-08, + "loss": 19.1744, + "step": 486500 + }, + { + "epoch": 0.9827809807003155, + "grad_norm": 320.56341552734375, + "learning_rate": 2.215733911682949e-08, + "loss": 12.5872, + "step": 486510 + }, + { + "epoch": 0.9828011813329993, + "grad_norm": 284.896728515625, + "learning_rate": 2.2124525510459627e-08, + "loss": 13.3462, + "step": 486520 + }, + { + "epoch": 0.9828213819656831, + "grad_norm": 89.13407897949219, + "learning_rate": 2.2091736165651966e-08, + "loss": 5.4464, + "step": 486530 + }, + { + "epoch": 0.982841582598367, + "grad_norm": 368.83697509765625, + "learning_rate": 2.205897108256472e-08, + "loss": 29.8207, + "step": 486540 + }, + { + "epoch": 0.9828617832310508, + "grad_norm": 172.1982879638672, + "learning_rate": 2.202623026135886e-08, + "loss": 14.112, + "step": 486550 + }, + { + "epoch": 0.9828819838637346, + "grad_norm": 460.03369140625, + "learning_rate": 2.1993513702193157e-08, + "loss": 17.7389, + "step": 486560 + }, + { + "epoch": 0.9829021844964184, + "grad_norm": 219.40406799316406, + "learning_rate": 2.1960821405226928e-08, + "loss": 18.4654, + "step": 486570 + }, + { + "epoch": 0.9829223851291022, + "grad_norm": 308.46697998046875, + "learning_rate": 2.1928153370620598e-08, + "loss": 20.2735, + "step": 486580 + }, + { + "epoch": 0.9829425857617861, + "grad_norm": 279.93634033203125, + "learning_rate": 2.1895509598532372e-08, + "loss": 21.5676, + "step": 486590 + }, + { + "epoch": 0.9829627863944699, + "grad_norm": 167.79373168945312, + "learning_rate": 2.1862890089121567e-08, + "loss": 24.1097, + "step": 486600 + }, + { + "epoch": 0.9829829870271537, + "grad_norm": 129.95025634765625, + "learning_rate": 2.1830294842547506e-08, + "loss": 8.3408, + "step": 486610 + }, + { + "epoch": 0.9830031876598375, + "grad_norm": 51.695579528808594, + "learning_rate": 2.1797723858968388e-08, + "loss": 11.7549, + "step": 486620 + }, + { + "epoch": 0.9830233882925213, + "grad_norm": 771.5540161132812, + "learning_rate": 2.1765177138543535e-08, + "loss": 21.0965, + "step": 486630 + }, + { + "epoch": 0.9830435889252052, + "grad_norm": 1035.76416015625, + "learning_rate": 2.173265468143171e-08, + "loss": 35.0378, + "step": 486640 + }, + { + "epoch": 0.983063789557889, + "grad_norm": 270.08856201171875, + "learning_rate": 2.1700156487790557e-08, + "loss": 13.1302, + "step": 486650 + }, + { + "epoch": 0.9830839901905728, + "grad_norm": 140.2735137939453, + "learning_rate": 2.1667682557779958e-08, + "loss": 21.5211, + "step": 486660 + }, + { + "epoch": 0.9831041908232566, + "grad_norm": 441.3387451171875, + "learning_rate": 2.1635232891556446e-08, + "loss": 11.5147, + "step": 486670 + }, + { + "epoch": 0.9831243914559404, + "grad_norm": 725.987548828125, + "learning_rate": 2.1602807489279344e-08, + "loss": 14.4808, + "step": 486680 + }, + { + "epoch": 0.9831445920886243, + "grad_norm": 652.3868408203125, + "learning_rate": 2.1570406351106298e-08, + "loss": 20.0834, + "step": 486690 + }, + { + "epoch": 0.9831647927213081, + "grad_norm": 388.7825012207031, + "learning_rate": 2.1538029477195522e-08, + "loss": 15.2553, + "step": 486700 + }, + { + "epoch": 0.9831849933539919, + "grad_norm": 134.82530212402344, + "learning_rate": 2.1505676867704105e-08, + "loss": 16.7642, + "step": 486710 + }, + { + "epoch": 0.9832051939866756, + "grad_norm": 281.6883850097656, + "learning_rate": 2.1473348522790262e-08, + "loss": 14.2663, + "step": 486720 + }, + { + "epoch": 0.9832253946193594, + "grad_norm": 546.5551147460938, + "learning_rate": 2.1441044442611634e-08, + "loss": 19.5316, + "step": 486730 + }, + { + "epoch": 0.9832455952520432, + "grad_norm": 41.95939254760742, + "learning_rate": 2.1408764627325883e-08, + "loss": 16.1222, + "step": 486740 + }, + { + "epoch": 0.9832657958847271, + "grad_norm": 326.4468994140625, + "learning_rate": 2.1376509077089546e-08, + "loss": 17.1573, + "step": 486750 + }, + { + "epoch": 0.9832859965174109, + "grad_norm": 345.43109130859375, + "learning_rate": 2.1344277792060275e-08, + "loss": 12.0682, + "step": 486760 + }, + { + "epoch": 0.9833061971500947, + "grad_norm": 158.94752502441406, + "learning_rate": 2.1312070772395165e-08, + "loss": 15.5515, + "step": 486770 + }, + { + "epoch": 0.9833263977827785, + "grad_norm": 134.98707580566406, + "learning_rate": 2.1279888018251317e-08, + "loss": 21.1968, + "step": 486780 + }, + { + "epoch": 0.9833465984154623, + "grad_norm": 548.9357299804688, + "learning_rate": 2.1247729529785822e-08, + "loss": 19.2239, + "step": 486790 + }, + { + "epoch": 0.9833667990481462, + "grad_norm": 36.52518844604492, + "learning_rate": 2.1215595307154667e-08, + "loss": 16.6772, + "step": 486800 + }, + { + "epoch": 0.98338699968083, + "grad_norm": 527.3377685546875, + "learning_rate": 2.1183485350514397e-08, + "loss": 12.0839, + "step": 486810 + }, + { + "epoch": 0.9834072003135138, + "grad_norm": 447.6247863769531, + "learning_rate": 2.1151399660022664e-08, + "loss": 16.5998, + "step": 486820 + }, + { + "epoch": 0.9834274009461976, + "grad_norm": 492.91900634765625, + "learning_rate": 2.1119338235834897e-08, + "loss": 33.839, + "step": 486830 + }, + { + "epoch": 0.9834476015788814, + "grad_norm": 192.20407104492188, + "learning_rate": 2.1087301078107637e-08, + "loss": 19.5072, + "step": 486840 + }, + { + "epoch": 0.9834678022115653, + "grad_norm": 317.2949523925781, + "learning_rate": 2.105528818699687e-08, + "loss": 15.7755, + "step": 486850 + }, + { + "epoch": 0.9834880028442491, + "grad_norm": 411.9183654785156, + "learning_rate": 2.1023299562658584e-08, + "loss": 13.3417, + "step": 486860 + }, + { + "epoch": 0.9835082034769329, + "grad_norm": 727.0975341796875, + "learning_rate": 2.0991335205249318e-08, + "loss": 17.7422, + "step": 486870 + }, + { + "epoch": 0.9835284041096167, + "grad_norm": 776.1140747070312, + "learning_rate": 2.0959395114923954e-08, + "loss": 19.5978, + "step": 486880 + }, + { + "epoch": 0.9835486047423005, + "grad_norm": 1540.5836181640625, + "learning_rate": 2.0927479291839024e-08, + "loss": 17.3088, + "step": 486890 + }, + { + "epoch": 0.9835688053749844, + "grad_norm": 350.5865478515625, + "learning_rate": 2.0895587736149414e-08, + "loss": 18.9651, + "step": 486900 + }, + { + "epoch": 0.9835890060076682, + "grad_norm": 674.6132202148438, + "learning_rate": 2.0863720448011106e-08, + "loss": 13.8975, + "step": 486910 + }, + { + "epoch": 0.983609206640352, + "grad_norm": 659.1400146484375, + "learning_rate": 2.0831877427578974e-08, + "loss": 19.0174, + "step": 486920 + }, + { + "epoch": 0.9836294072730358, + "grad_norm": 532.0677490234375, + "learning_rate": 2.0800058675007894e-08, + "loss": 26.3079, + "step": 486930 + }, + { + "epoch": 0.9836496079057196, + "grad_norm": 321.0643615722656, + "learning_rate": 2.076826419045386e-08, + "loss": 14.3447, + "step": 486940 + }, + { + "epoch": 0.9836698085384035, + "grad_norm": 227.18014526367188, + "learning_rate": 2.0736493974071736e-08, + "loss": 18.2144, + "step": 486950 + }, + { + "epoch": 0.9836900091710873, + "grad_norm": 471.3290100097656, + "learning_rate": 2.0704748026015298e-08, + "loss": 12.0176, + "step": 486960 + }, + { + "epoch": 0.9837102098037711, + "grad_norm": 331.65252685546875, + "learning_rate": 2.0673026346440526e-08, + "loss": 21.1374, + "step": 486970 + }, + { + "epoch": 0.9837304104364548, + "grad_norm": 591.6358642578125, + "learning_rate": 2.0641328935501748e-08, + "loss": 35.3378, + "step": 486980 + }, + { + "epoch": 0.9837506110691386, + "grad_norm": 253.2311248779297, + "learning_rate": 2.0609655793352724e-08, + "loss": 15.4262, + "step": 486990 + }, + { + "epoch": 0.9837708117018225, + "grad_norm": 738.0919799804688, + "learning_rate": 2.057800692014833e-08, + "loss": 20.5693, + "step": 487000 + }, + { + "epoch": 0.9837910123345063, + "grad_norm": 395.4708251953125, + "learning_rate": 2.054638231604289e-08, + "loss": 22.3301, + "step": 487010 + }, + { + "epoch": 0.9838112129671901, + "grad_norm": 770.7337646484375, + "learning_rate": 2.051478198119017e-08, + "loss": 25.5808, + "step": 487020 + }, + { + "epoch": 0.9838314135998739, + "grad_norm": 190.54969787597656, + "learning_rate": 2.0483205915745042e-08, + "loss": 23.1655, + "step": 487030 + }, + { + "epoch": 0.9838516142325577, + "grad_norm": 382.62115478515625, + "learning_rate": 2.0451654119860164e-08, + "loss": 16.6641, + "step": 487040 + }, + { + "epoch": 0.9838718148652416, + "grad_norm": 330.2117919921875, + "learning_rate": 2.0420126593690416e-08, + "loss": 9.8796, + "step": 487050 + }, + { + "epoch": 0.9838920154979254, + "grad_norm": 238.4560546875, + "learning_rate": 2.0388623337389003e-08, + "loss": 28.3645, + "step": 487060 + }, + { + "epoch": 0.9839122161306092, + "grad_norm": 222.7383270263672, + "learning_rate": 2.0357144351109693e-08, + "loss": 11.7348, + "step": 487070 + }, + { + "epoch": 0.983932416763293, + "grad_norm": 1798.53515625, + "learning_rate": 2.0325689635005142e-08, + "loss": 24.3655, + "step": 487080 + }, + { + "epoch": 0.9839526173959768, + "grad_norm": 191.74282836914062, + "learning_rate": 2.029425918922967e-08, + "loss": 15.1017, + "step": 487090 + }, + { + "epoch": 0.9839728180286607, + "grad_norm": 223.9886474609375, + "learning_rate": 2.026285301393538e-08, + "loss": 13.2002, + "step": 487100 + }, + { + "epoch": 0.9839930186613445, + "grad_norm": 115.09986114501953, + "learning_rate": 2.023147110927659e-08, + "loss": 12.3963, + "step": 487110 + }, + { + "epoch": 0.9840132192940283, + "grad_norm": 253.5218048095703, + "learning_rate": 2.020011347540596e-08, + "loss": 15.6474, + "step": 487120 + }, + { + "epoch": 0.9840334199267121, + "grad_norm": 283.21771240234375, + "learning_rate": 2.016878011247503e-08, + "loss": 22.0459, + "step": 487130 + }, + { + "epoch": 0.9840536205593959, + "grad_norm": 81.14229583740234, + "learning_rate": 2.013747102063812e-08, + "loss": 11.7839, + "step": 487140 + }, + { + "epoch": 0.9840738211920798, + "grad_norm": 506.94976806640625, + "learning_rate": 2.010618620004734e-08, + "loss": 18.794, + "step": 487150 + }, + { + "epoch": 0.9840940218247636, + "grad_norm": 214.54312133789062, + "learning_rate": 2.0074925650854226e-08, + "loss": 22.0953, + "step": 487160 + }, + { + "epoch": 0.9841142224574474, + "grad_norm": 90.94629669189453, + "learning_rate": 2.004368937321255e-08, + "loss": 14.7933, + "step": 487170 + }, + { + "epoch": 0.9841344230901312, + "grad_norm": 354.2481384277344, + "learning_rate": 2.0012477367273854e-08, + "loss": 12.9025, + "step": 487180 + }, + { + "epoch": 0.984154623722815, + "grad_norm": 485.8455810546875, + "learning_rate": 1.9981289633190237e-08, + "loss": 18.0831, + "step": 487190 + }, + { + "epoch": 0.9841748243554989, + "grad_norm": 326.9634094238281, + "learning_rate": 1.995012617111436e-08, + "loss": 11.4334, + "step": 487200 + }, + { + "epoch": 0.9841950249881827, + "grad_norm": 440.474853515625, + "learning_rate": 1.9918986981196653e-08, + "loss": 22.803, + "step": 487210 + }, + { + "epoch": 0.9842152256208665, + "grad_norm": 384.60382080078125, + "learning_rate": 1.988787206359033e-08, + "loss": 17.9767, + "step": 487220 + }, + { + "epoch": 0.9842354262535503, + "grad_norm": 531.9924926757812, + "learning_rate": 1.985678141844638e-08, + "loss": 17.9372, + "step": 487230 + }, + { + "epoch": 0.984255626886234, + "grad_norm": 341.8916015625, + "learning_rate": 1.9825715045916905e-08, + "loss": 18.8524, + "step": 487240 + }, + { + "epoch": 0.9842758275189178, + "grad_norm": 188.14895629882812, + "learning_rate": 1.9794672946152337e-08, + "loss": 19.2394, + "step": 487250 + }, + { + "epoch": 0.9842960281516017, + "grad_norm": 470.08599853515625, + "learning_rate": 1.9763655119304227e-08, + "loss": 13.1318, + "step": 487260 + }, + { + "epoch": 0.9843162287842855, + "grad_norm": 235.66885375976562, + "learning_rate": 1.973266156552467e-08, + "loss": 26.0032, + "step": 487270 + }, + { + "epoch": 0.9843364294169693, + "grad_norm": 756.0232543945312, + "learning_rate": 1.9701692284963547e-08, + "loss": 20.216, + "step": 487280 + }, + { + "epoch": 0.9843566300496531, + "grad_norm": 1019.5380859375, + "learning_rate": 1.967074727777296e-08, + "loss": 27.1592, + "step": 487290 + }, + { + "epoch": 0.984376830682337, + "grad_norm": 10.018977165222168, + "learning_rate": 1.963982654410279e-08, + "loss": 11.0307, + "step": 487300 + }, + { + "epoch": 0.9843970313150208, + "grad_norm": 255.76251220703125, + "learning_rate": 1.9608930084104027e-08, + "loss": 12.4662, + "step": 487310 + }, + { + "epoch": 0.9844172319477046, + "grad_norm": 493.1605529785156, + "learning_rate": 1.9578057897927104e-08, + "loss": 20.6991, + "step": 487320 + }, + { + "epoch": 0.9844374325803884, + "grad_norm": 460.5899963378906, + "learning_rate": 1.9547209985723015e-08, + "loss": 16.1439, + "step": 487330 + }, + { + "epoch": 0.9844576332130722, + "grad_norm": 289.14593505859375, + "learning_rate": 1.9516386347641636e-08, + "loss": 28.3802, + "step": 487340 + }, + { + "epoch": 0.984477833845756, + "grad_norm": 64.37545776367188, + "learning_rate": 1.9485586983833404e-08, + "loss": 21.3151, + "step": 487350 + }, + { + "epoch": 0.9844980344784399, + "grad_norm": 271.99395751953125, + "learning_rate": 1.94548118944482e-08, + "loss": 7.3311, + "step": 487360 + }, + { + "epoch": 0.9845182351111237, + "grad_norm": 325.72216796875, + "learning_rate": 1.9424061079636458e-08, + "loss": 18.6132, + "step": 487370 + }, + { + "epoch": 0.9845384357438075, + "grad_norm": 546.6802368164062, + "learning_rate": 1.9393334539547505e-08, + "loss": 20.1622, + "step": 487380 + }, + { + "epoch": 0.9845586363764913, + "grad_norm": 691.5068969726562, + "learning_rate": 1.9362632274331215e-08, + "loss": 26.9569, + "step": 487390 + }, + { + "epoch": 0.9845788370091751, + "grad_norm": 243.03543090820312, + "learning_rate": 1.9331954284137476e-08, + "loss": 11.425, + "step": 487400 + }, + { + "epoch": 0.984599037641859, + "grad_norm": 403.1186828613281, + "learning_rate": 1.9301300569116165e-08, + "loss": 10.5166, + "step": 487410 + }, + { + "epoch": 0.9846192382745428, + "grad_norm": 403.91058349609375, + "learning_rate": 1.9270671129415496e-08, + "loss": 21.6073, + "step": 487420 + }, + { + "epoch": 0.9846394389072266, + "grad_norm": 202.64797973632812, + "learning_rate": 1.9240065965185907e-08, + "loss": 13.2823, + "step": 487430 + }, + { + "epoch": 0.9846596395399104, + "grad_norm": 330.4432373046875, + "learning_rate": 1.9209485076576718e-08, + "loss": 17.4824, + "step": 487440 + }, + { + "epoch": 0.9846798401725942, + "grad_norm": 745.1810302734375, + "learning_rate": 1.9178928463735593e-08, + "loss": 19.5667, + "step": 487450 + }, + { + "epoch": 0.9847000408052781, + "grad_norm": 0.0, + "learning_rate": 1.9148396126812407e-08, + "loss": 21.1232, + "step": 487460 + }, + { + "epoch": 0.9847202414379619, + "grad_norm": 289.8485107421875, + "learning_rate": 1.9117888065955938e-08, + "loss": 8.0293, + "step": 487470 + }, + { + "epoch": 0.9847404420706457, + "grad_norm": 286.2195129394531, + "learning_rate": 1.908740428131495e-08, + "loss": 14.2113, + "step": 487480 + }, + { + "epoch": 0.9847606427033294, + "grad_norm": 32.78451156616211, + "learning_rate": 1.9056944773037656e-08, + "loss": 13.1252, + "step": 487490 + }, + { + "epoch": 0.9847808433360132, + "grad_norm": 131.9490203857422, + "learning_rate": 1.9026509541272276e-08, + "loss": 11.4658, + "step": 487500 + }, + { + "epoch": 0.984801043968697, + "grad_norm": 550.3273315429688, + "learning_rate": 1.8996098586168132e-08, + "loss": 10.4393, + "step": 487510 + }, + { + "epoch": 0.9848212446013809, + "grad_norm": 242.36134338378906, + "learning_rate": 1.8965711907872885e-08, + "loss": 25.7361, + "step": 487520 + }, + { + "epoch": 0.9848414452340647, + "grad_norm": 413.4701232910156, + "learning_rate": 1.8935349506534195e-08, + "loss": 24.0202, + "step": 487530 + }, + { + "epoch": 0.9848616458667485, + "grad_norm": 672.5272827148438, + "learning_rate": 1.890501138230083e-08, + "loss": 31.6589, + "step": 487540 + }, + { + "epoch": 0.9848818464994323, + "grad_norm": 291.6829833984375, + "learning_rate": 1.8874697535319897e-08, + "loss": 14.4903, + "step": 487550 + }, + { + "epoch": 0.9849020471321162, + "grad_norm": 623.2794189453125, + "learning_rate": 1.8844407965740165e-08, + "loss": 18.8497, + "step": 487560 + }, + { + "epoch": 0.9849222477648, + "grad_norm": 336.7108154296875, + "learning_rate": 1.881414267370818e-08, + "loss": 26.6136, + "step": 487570 + }, + { + "epoch": 0.9849424483974838, + "grad_norm": 363.7168884277344, + "learning_rate": 1.8783901659372162e-08, + "loss": 12.3119, + "step": 487580 + }, + { + "epoch": 0.9849626490301676, + "grad_norm": 188.1642303466797, + "learning_rate": 1.875368492287921e-08, + "loss": 12.5046, + "step": 487590 + }, + { + "epoch": 0.9849828496628514, + "grad_norm": 541.2425537109375, + "learning_rate": 1.8723492464376992e-08, + "loss": 24.7602, + "step": 487600 + }, + { + "epoch": 0.9850030502955353, + "grad_norm": 335.880859375, + "learning_rate": 1.8693324284011495e-08, + "loss": 29.85, + "step": 487610 + }, + { + "epoch": 0.9850232509282191, + "grad_norm": 479.3641662597656, + "learning_rate": 1.8663180381931488e-08, + "loss": 18.8721, + "step": 487620 + }, + { + "epoch": 0.9850434515609029, + "grad_norm": 496.9161071777344, + "learning_rate": 1.8633060758282418e-08, + "loss": 17.1028, + "step": 487630 + }, + { + "epoch": 0.9850636521935867, + "grad_norm": 662.9967041015625, + "learning_rate": 1.860296541321138e-08, + "loss": 14.4051, + "step": 487640 + }, + { + "epoch": 0.9850838528262705, + "grad_norm": 183.82313537597656, + "learning_rate": 1.8572894346866043e-08, + "loss": 6.9085, + "step": 487650 + }, + { + "epoch": 0.9851040534589544, + "grad_norm": 275.2166442871094, + "learning_rate": 1.854284755939184e-08, + "loss": 33.5495, + "step": 487660 + }, + { + "epoch": 0.9851242540916382, + "grad_norm": 2.3112215995788574, + "learning_rate": 1.8512825050935323e-08, + "loss": 14.4201, + "step": 487670 + }, + { + "epoch": 0.985144454724322, + "grad_norm": 338.2336120605469, + "learning_rate": 1.8482826821643596e-08, + "loss": 35.2743, + "step": 487680 + }, + { + "epoch": 0.9851646553570058, + "grad_norm": 64.32235717773438, + "learning_rate": 1.8452852871662653e-08, + "loss": 7.4456, + "step": 487690 + }, + { + "epoch": 0.9851848559896896, + "grad_norm": 0.2854507565498352, + "learning_rate": 1.842290320113793e-08, + "loss": 22.3451, + "step": 487700 + }, + { + "epoch": 0.9852050566223735, + "grad_norm": 331.331787109375, + "learning_rate": 1.839297781021543e-08, + "loss": 25.7413, + "step": 487710 + }, + { + "epoch": 0.9852252572550573, + "grad_norm": 257.5212707519531, + "learning_rate": 1.8363076699041695e-08, + "loss": 16.15, + "step": 487720 + }, + { + "epoch": 0.9852454578877411, + "grad_norm": 472.3876953125, + "learning_rate": 1.8333199867762163e-08, + "loss": 15.8237, + "step": 487730 + }, + { + "epoch": 0.9852656585204249, + "grad_norm": 635.3434448242188, + "learning_rate": 1.830334731652228e-08, + "loss": 26.1093, + "step": 487740 + }, + { + "epoch": 0.9852858591531086, + "grad_norm": 5.066963195800781, + "learning_rate": 1.8273519045468035e-08, + "loss": 11.3581, + "step": 487750 + }, + { + "epoch": 0.9853060597857924, + "grad_norm": 303.07904052734375, + "learning_rate": 1.8243715054744315e-08, + "loss": 19.2844, + "step": 487760 + }, + { + "epoch": 0.9853262604184763, + "grad_norm": 323.21795654296875, + "learning_rate": 1.8213935344496002e-08, + "loss": 12.7054, + "step": 487770 + }, + { + "epoch": 0.9853464610511601, + "grad_norm": 187.547119140625, + "learning_rate": 1.8184179914869093e-08, + "loss": 10.2604, + "step": 487780 + }, + { + "epoch": 0.9853666616838439, + "grad_norm": 393.1107177734375, + "learning_rate": 1.815444876600847e-08, + "loss": 11.2483, + "step": 487790 + }, + { + "epoch": 0.9853868623165277, + "grad_norm": 156.0241241455078, + "learning_rate": 1.8124741898058462e-08, + "loss": 14.5017, + "step": 487800 + }, + { + "epoch": 0.9854070629492115, + "grad_norm": 185.0515899658203, + "learning_rate": 1.8095059311164508e-08, + "loss": 16.2474, + "step": 487810 + }, + { + "epoch": 0.9854272635818954, + "grad_norm": 177.43402099609375, + "learning_rate": 1.8065401005470938e-08, + "loss": 17.9147, + "step": 487820 + }, + { + "epoch": 0.9854474642145792, + "grad_norm": 549.4453125, + "learning_rate": 1.803576698112264e-08, + "loss": 18.9288, + "step": 487830 + }, + { + "epoch": 0.985467664847263, + "grad_norm": 317.1197509765625, + "learning_rate": 1.8006157238263376e-08, + "loss": 22.0972, + "step": 487840 + }, + { + "epoch": 0.9854878654799468, + "grad_norm": 302.7123107910156, + "learning_rate": 1.7976571777038044e-08, + "loss": 16.1826, + "step": 487850 + }, + { + "epoch": 0.9855080661126306, + "grad_norm": 477.897216796875, + "learning_rate": 1.7947010597590408e-08, + "loss": 7.3532, + "step": 487860 + }, + { + "epoch": 0.9855282667453145, + "grad_norm": 425.7860412597656, + "learning_rate": 1.791747370006536e-08, + "loss": 21.8247, + "step": 487870 + }, + { + "epoch": 0.9855484673779983, + "grad_norm": 265.6903991699219, + "learning_rate": 1.7887961084605554e-08, + "loss": 17.2382, + "step": 487880 + }, + { + "epoch": 0.9855686680106821, + "grad_norm": 210.8507537841797, + "learning_rate": 1.7858472751355883e-08, + "loss": 18.1254, + "step": 487890 + }, + { + "epoch": 0.9855888686433659, + "grad_norm": 302.1631774902344, + "learning_rate": 1.7829008700460116e-08, + "loss": 26.4176, + "step": 487900 + }, + { + "epoch": 0.9856090692760497, + "grad_norm": 559.1700439453125, + "learning_rate": 1.779956893206092e-08, + "loss": 21.3995, + "step": 487910 + }, + { + "epoch": 0.9856292699087336, + "grad_norm": 342.4388427734375, + "learning_rate": 1.7770153446302618e-08, + "loss": 15.9682, + "step": 487920 + }, + { + "epoch": 0.9856494705414174, + "grad_norm": 166.08718872070312, + "learning_rate": 1.7740762243328435e-08, + "loss": 19.5306, + "step": 487930 + }, + { + "epoch": 0.9856696711741012, + "grad_norm": 369.96246337890625, + "learning_rate": 1.7711395323281588e-08, + "loss": 17.6639, + "step": 487940 + }, + { + "epoch": 0.985689871806785, + "grad_norm": 270.3794860839844, + "learning_rate": 1.768205268630474e-08, + "loss": 11.5303, + "step": 487950 + }, + { + "epoch": 0.9857100724394688, + "grad_norm": 896.9556274414062, + "learning_rate": 1.765273433254111e-08, + "loss": 13.5518, + "step": 487960 + }, + { + "epoch": 0.9857302730721527, + "grad_norm": 562.6268920898438, + "learning_rate": 1.7623440262134472e-08, + "loss": 30.144, + "step": 487970 + }, + { + "epoch": 0.9857504737048365, + "grad_norm": 626.1119384765625, + "learning_rate": 1.759417047522638e-08, + "loss": 23.2989, + "step": 487980 + }, + { + "epoch": 0.9857706743375203, + "grad_norm": 207.8027801513672, + "learning_rate": 1.756492497196005e-08, + "loss": 27.4454, + "step": 487990 + }, + { + "epoch": 0.985790874970204, + "grad_norm": 460.8661193847656, + "learning_rate": 1.753570375247815e-08, + "loss": 22.146, + "step": 488000 + }, + { + "epoch": 0.9858110756028878, + "grad_norm": 852.23779296875, + "learning_rate": 1.7506506816923342e-08, + "loss": 33.3133, + "step": 488010 + }, + { + "epoch": 0.9858312762355717, + "grad_norm": 555.6287231445312, + "learning_rate": 1.747733416543662e-08, + "loss": 9.9734, + "step": 488020 + }, + { + "epoch": 0.9858514768682555, + "grad_norm": 302.9903259277344, + "learning_rate": 1.7448185798161765e-08, + "loss": 25.9241, + "step": 488030 + }, + { + "epoch": 0.9858716775009393, + "grad_norm": 897.1173706054688, + "learning_rate": 1.741906171523977e-08, + "loss": 24.4153, + "step": 488040 + }, + { + "epoch": 0.9858918781336231, + "grad_norm": 478.850341796875, + "learning_rate": 1.73899619168133e-08, + "loss": 13.5781, + "step": 488050 + }, + { + "epoch": 0.9859120787663069, + "grad_norm": 30.23406982421875, + "learning_rate": 1.7360886403023358e-08, + "loss": 7.5778, + "step": 488060 + }, + { + "epoch": 0.9859322793989908, + "grad_norm": 202.19210815429688, + "learning_rate": 1.7331835174012602e-08, + "loss": 12.541, + "step": 488070 + }, + { + "epoch": 0.9859524800316746, + "grad_norm": 158.72364807128906, + "learning_rate": 1.7302808229921476e-08, + "loss": 11.0494, + "step": 488080 + }, + { + "epoch": 0.9859726806643584, + "grad_norm": 133.09844970703125, + "learning_rate": 1.7273805570892643e-08, + "loss": 18.3383, + "step": 488090 + }, + { + "epoch": 0.9859928812970422, + "grad_norm": 340.03326416015625, + "learning_rate": 1.7244827197067103e-08, + "loss": 12.8224, + "step": 488100 + }, + { + "epoch": 0.986013081929726, + "grad_norm": 1257.68994140625, + "learning_rate": 1.7215873108585858e-08, + "loss": 42.4838, + "step": 488110 + }, + { + "epoch": 0.9860332825624099, + "grad_norm": 718.265869140625, + "learning_rate": 1.71869433055899e-08, + "loss": 18.3349, + "step": 488120 + }, + { + "epoch": 0.9860534831950937, + "grad_norm": 379.7763671875, + "learning_rate": 1.7158037788220782e-08, + "loss": 11.872, + "step": 488130 + }, + { + "epoch": 0.9860736838277775, + "grad_norm": 353.88665771484375, + "learning_rate": 1.7129156556618398e-08, + "loss": 25.6119, + "step": 488140 + }, + { + "epoch": 0.9860938844604613, + "grad_norm": 317.9150390625, + "learning_rate": 1.7100299610924297e-08, + "loss": 15.0898, + "step": 488150 + }, + { + "epoch": 0.9861140850931451, + "grad_norm": 453.9829406738281, + "learning_rate": 1.707146695127948e-08, + "loss": 13.9989, + "step": 488160 + }, + { + "epoch": 0.986134285725829, + "grad_norm": 166.3394012451172, + "learning_rate": 1.7042658577823833e-08, + "loss": 42.1762, + "step": 488170 + }, + { + "epoch": 0.9861544863585128, + "grad_norm": 283.5535583496094, + "learning_rate": 1.7013874490697802e-08, + "loss": 11.6909, + "step": 488180 + }, + { + "epoch": 0.9861746869911966, + "grad_norm": 276.1616516113281, + "learning_rate": 1.6985114690041825e-08, + "loss": 21.0326, + "step": 488190 + }, + { + "epoch": 0.9861948876238804, + "grad_norm": 141.51397705078125, + "learning_rate": 1.6956379175995796e-08, + "loss": 7.7452, + "step": 488200 + }, + { + "epoch": 0.9862150882565642, + "grad_norm": 171.76841735839844, + "learning_rate": 1.6927667948700155e-08, + "loss": 12.0104, + "step": 488210 + }, + { + "epoch": 0.986235288889248, + "grad_norm": 595.8139038085938, + "learning_rate": 1.689898100829479e-08, + "loss": 14.0837, + "step": 488220 + }, + { + "epoch": 0.9862554895219319, + "grad_norm": 473.9314880371094, + "learning_rate": 1.687031835491959e-08, + "loss": 26.5983, + "step": 488230 + }, + { + "epoch": 0.9862756901546157, + "grad_norm": 283.1180419921875, + "learning_rate": 1.6841679988713332e-08, + "loss": 12.0923, + "step": 488240 + }, + { + "epoch": 0.9862958907872995, + "grad_norm": 289.6003112792969, + "learning_rate": 1.681306590981702e-08, + "loss": 20.0766, + "step": 488250 + }, + { + "epoch": 0.9863160914199832, + "grad_norm": 386.9776306152344, + "learning_rate": 1.678447611836942e-08, + "loss": 20.7361, + "step": 488260 + }, + { + "epoch": 0.986336292052667, + "grad_norm": 179.49264526367188, + "learning_rate": 1.6755910614509872e-08, + "loss": 13.2435, + "step": 488270 + }, + { + "epoch": 0.9863564926853509, + "grad_norm": 273.3138427734375, + "learning_rate": 1.6727369398377158e-08, + "loss": 18.0023, + "step": 488280 + }, + { + "epoch": 0.9863766933180347, + "grad_norm": 685.1600341796875, + "learning_rate": 1.669885247011116e-08, + "loss": 22.936, + "step": 488290 + }, + { + "epoch": 0.9863968939507185, + "grad_norm": 612.9713745117188, + "learning_rate": 1.6670359829850657e-08, + "loss": 24.6449, + "step": 488300 + }, + { + "epoch": 0.9864170945834023, + "grad_norm": 168.1580047607422, + "learning_rate": 1.664189147773443e-08, + "loss": 10.4781, + "step": 488310 + }, + { + "epoch": 0.9864372952160861, + "grad_norm": 116.18118286132812, + "learning_rate": 1.6613447413900696e-08, + "loss": 17.8398, + "step": 488320 + }, + { + "epoch": 0.98645749584877, + "grad_norm": 217.2073974609375, + "learning_rate": 1.6585027638489347e-08, + "loss": 16.984, + "step": 488330 + }, + { + "epoch": 0.9864776964814538, + "grad_norm": 219.95492553710938, + "learning_rate": 1.655663215163805e-08, + "loss": 8.6126, + "step": 488340 + }, + { + "epoch": 0.9864978971141376, + "grad_norm": 261.9851989746094, + "learning_rate": 1.6528260953484476e-08, + "loss": 16.6404, + "step": 488350 + }, + { + "epoch": 0.9865180977468214, + "grad_norm": 305.74176025390625, + "learning_rate": 1.6499914044168508e-08, + "loss": 14.7852, + "step": 488360 + }, + { + "epoch": 0.9865382983795052, + "grad_norm": 174.19142150878906, + "learning_rate": 1.6471591423827817e-08, + "loss": 17.0763, + "step": 488370 + }, + { + "epoch": 0.9865584990121891, + "grad_norm": 526.5982666015625, + "learning_rate": 1.644329309259951e-08, + "loss": 23.4136, + "step": 488380 + }, + { + "epoch": 0.9865786996448729, + "grad_norm": 175.1710968017578, + "learning_rate": 1.6415019050622373e-08, + "loss": 20.1765, + "step": 488390 + }, + { + "epoch": 0.9865989002775567, + "grad_norm": 442.1764221191406, + "learning_rate": 1.6386769298034067e-08, + "loss": 15.7431, + "step": 488400 + }, + { + "epoch": 0.9866191009102405, + "grad_norm": 491.32269287109375, + "learning_rate": 1.635854383497226e-08, + "loss": 10.6239, + "step": 488410 + }, + { + "epoch": 0.9866393015429243, + "grad_norm": 153.86268615722656, + "learning_rate": 1.6330342661574072e-08, + "loss": 8.8285, + "step": 488420 + }, + { + "epoch": 0.9866595021756082, + "grad_norm": 534.8939208984375, + "learning_rate": 1.6302165777977718e-08, + "loss": 15.201, + "step": 488430 + }, + { + "epoch": 0.986679702808292, + "grad_norm": 687.9249877929688, + "learning_rate": 1.6274013184319757e-08, + "loss": 24.0827, + "step": 488440 + }, + { + "epoch": 0.9866999034409758, + "grad_norm": 270.24505615234375, + "learning_rate": 1.6245884880738415e-08, + "loss": 8.0484, + "step": 488450 + }, + { + "epoch": 0.9867201040736596, + "grad_norm": 31.995512008666992, + "learning_rate": 1.621778086736969e-08, + "loss": 15.4762, + "step": 488460 + }, + { + "epoch": 0.9867403047063434, + "grad_norm": 421.179443359375, + "learning_rate": 1.6189701144351254e-08, + "loss": 18.3479, + "step": 488470 + }, + { + "epoch": 0.9867605053390273, + "grad_norm": 912.8563842773438, + "learning_rate": 1.6161645711819664e-08, + "loss": 37.9865, + "step": 488480 + }, + { + "epoch": 0.9867807059717111, + "grad_norm": 244.22555541992188, + "learning_rate": 1.6133614569912027e-08, + "loss": 16.8347, + "step": 488490 + }, + { + "epoch": 0.9868009066043949, + "grad_norm": 1216.3824462890625, + "learning_rate": 1.610560771876435e-08, + "loss": 20.0859, + "step": 488500 + }, + { + "epoch": 0.9868211072370787, + "grad_norm": 310.50262451171875, + "learning_rate": 1.607762515851319e-08, + "loss": 17.3168, + "step": 488510 + }, + { + "epoch": 0.9868413078697624, + "grad_norm": 362.2833251953125, + "learning_rate": 1.6049666889295657e-08, + "loss": 21.6378, + "step": 488520 + }, + { + "epoch": 0.9868615085024462, + "grad_norm": 901.0900268554688, + "learning_rate": 1.6021732911247756e-08, + "loss": 22.5777, + "step": 488530 + }, + { + "epoch": 0.9868817091351301, + "grad_norm": 526.10791015625, + "learning_rate": 1.5993823224504935e-08, + "loss": 24.0116, + "step": 488540 + }, + { + "epoch": 0.9869019097678139, + "grad_norm": 450.54888916015625, + "learning_rate": 1.5965937829204302e-08, + "loss": 17.4813, + "step": 488550 + }, + { + "epoch": 0.9869221104004977, + "grad_norm": 136.32603454589844, + "learning_rate": 1.5938076725480756e-08, + "loss": 15.49, + "step": 488560 + }, + { + "epoch": 0.9869423110331815, + "grad_norm": 276.5377197265625, + "learning_rate": 1.5910239913470292e-08, + "loss": 8.7842, + "step": 488570 + }, + { + "epoch": 0.9869625116658653, + "grad_norm": 132.17144775390625, + "learning_rate": 1.5882427393309475e-08, + "loss": 12.1712, + "step": 488580 + }, + { + "epoch": 0.9869827122985492, + "grad_norm": 341.4737548828125, + "learning_rate": 1.585463916513319e-08, + "loss": 14.214, + "step": 488590 + }, + { + "epoch": 0.987002912931233, + "grad_norm": 552.6314697265625, + "learning_rate": 1.582687522907633e-08, + "loss": 16.6554, + "step": 488600 + }, + { + "epoch": 0.9870231135639168, + "grad_norm": 547.34814453125, + "learning_rate": 1.5799135585274906e-08, + "loss": 21.9563, + "step": 488610 + }, + { + "epoch": 0.9870433141966006, + "grad_norm": 530.8009643554688, + "learning_rate": 1.5771420233864355e-08, + "loss": 17.8597, + "step": 488620 + }, + { + "epoch": 0.9870635148292844, + "grad_norm": 121.2981185913086, + "learning_rate": 1.5743729174979016e-08, + "loss": 9.4337, + "step": 488630 + }, + { + "epoch": 0.9870837154619683, + "grad_norm": 421.91375732421875, + "learning_rate": 1.571606240875434e-08, + "loss": 25.9695, + "step": 488640 + }, + { + "epoch": 0.9871039160946521, + "grad_norm": 24.188617706298828, + "learning_rate": 1.5688419935325216e-08, + "loss": 11.5205, + "step": 488650 + }, + { + "epoch": 0.9871241167273359, + "grad_norm": 170.5167999267578, + "learning_rate": 1.5660801754825983e-08, + "loss": 10.8688, + "step": 488660 + }, + { + "epoch": 0.9871443173600197, + "grad_norm": 225.33900451660156, + "learning_rate": 1.563320786739153e-08, + "loss": 13.0992, + "step": 488670 + }, + { + "epoch": 0.9871645179927035, + "grad_norm": 530.8095092773438, + "learning_rate": 1.56056382731562e-08, + "loss": 19.3749, + "step": 488680 + }, + { + "epoch": 0.9871847186253874, + "grad_norm": 53.277740478515625, + "learning_rate": 1.5578092972254875e-08, + "loss": 17.041, + "step": 488690 + }, + { + "epoch": 0.9872049192580712, + "grad_norm": 114.4014663696289, + "learning_rate": 1.5550571964820793e-08, + "loss": 23.5643, + "step": 488700 + }, + { + "epoch": 0.987225119890755, + "grad_norm": 106.43096160888672, + "learning_rate": 1.5523075250989395e-08, + "loss": 4.523, + "step": 488710 + }, + { + "epoch": 0.9872453205234388, + "grad_norm": 337.0357360839844, + "learning_rate": 1.5495602830893354e-08, + "loss": 22.5743, + "step": 488720 + }, + { + "epoch": 0.9872655211561226, + "grad_norm": 556.9434204101562, + "learning_rate": 1.546815470466756e-08, + "loss": 41.1725, + "step": 488730 + }, + { + "epoch": 0.9872857217888065, + "grad_norm": 95.35572052001953, + "learning_rate": 1.5440730872445242e-08, + "loss": 26.158, + "step": 488740 + }, + { + "epoch": 0.9873059224214903, + "grad_norm": 331.4527893066406, + "learning_rate": 1.541333133436018e-08, + "loss": 20.8953, + "step": 488750 + }, + { + "epoch": 0.9873261230541741, + "grad_norm": 0.0, + "learning_rate": 1.538595609054616e-08, + "loss": 11.3459, + "step": 488760 + }, + { + "epoch": 0.9873463236868578, + "grad_norm": 317.4156799316406, + "learning_rate": 1.5358605141136407e-08, + "loss": 15.1851, + "step": 488770 + }, + { + "epoch": 0.9873665243195416, + "grad_norm": 145.23828125, + "learning_rate": 1.5331278486264144e-08, + "loss": 11.5322, + "step": 488780 + }, + { + "epoch": 0.9873867249522255, + "grad_norm": 1322.247802734375, + "learning_rate": 1.53039761260626e-08, + "loss": 12.257, + "step": 488790 + }, + { + "epoch": 0.9874069255849093, + "grad_norm": 549.3560791015625, + "learning_rate": 1.5276698060665007e-08, + "loss": 20.2712, + "step": 488800 + }, + { + "epoch": 0.9874271262175931, + "grad_norm": 237.40028381347656, + "learning_rate": 1.5249444290204584e-08, + "loss": 21.5498, + "step": 488810 + }, + { + "epoch": 0.9874473268502769, + "grad_norm": 361.716796875, + "learning_rate": 1.5222214814812897e-08, + "loss": 15.8732, + "step": 488820 + }, + { + "epoch": 0.9874675274829607, + "grad_norm": 281.2061462402344, + "learning_rate": 1.519500963462428e-08, + "loss": 13.3269, + "step": 488830 + }, + { + "epoch": 0.9874877281156446, + "grad_norm": 397.55853271484375, + "learning_rate": 1.5167828749770853e-08, + "loss": 19.2063, + "step": 488840 + }, + { + "epoch": 0.9875079287483284, + "grad_norm": 1179.2423095703125, + "learning_rate": 1.5140672160384174e-08, + "loss": 27.6955, + "step": 488850 + }, + { + "epoch": 0.9875281293810122, + "grad_norm": 160.7357940673828, + "learning_rate": 1.511353986659747e-08, + "loss": 12.1364, + "step": 488860 + }, + { + "epoch": 0.987548330013696, + "grad_norm": 302.5446472167969, + "learning_rate": 1.508643186854286e-08, + "loss": 12.375, + "step": 488870 + }, + { + "epoch": 0.9875685306463798, + "grad_norm": 17.310794830322266, + "learning_rate": 1.505934816635246e-08, + "loss": 22.2095, + "step": 488880 + }, + { + "epoch": 0.9875887312790637, + "grad_norm": 464.3238220214844, + "learning_rate": 1.503228876015783e-08, + "loss": 14.6066, + "step": 488890 + }, + { + "epoch": 0.9876089319117475, + "grad_norm": 376.7144775390625, + "learning_rate": 1.500525365009109e-08, + "loss": 24.8198, + "step": 488900 + }, + { + "epoch": 0.9876291325444313, + "grad_norm": 296.1361083984375, + "learning_rate": 1.4978242836284908e-08, + "loss": 18.8745, + "step": 488910 + }, + { + "epoch": 0.9876493331771151, + "grad_norm": 398.23638916015625, + "learning_rate": 1.4951256318869733e-08, + "loss": 16.8605, + "step": 488920 + }, + { + "epoch": 0.9876695338097989, + "grad_norm": 227.01824951171875, + "learning_rate": 1.4924294097977687e-08, + "loss": 23.3466, + "step": 488930 + }, + { + "epoch": 0.9876897344424828, + "grad_norm": 2.6604208946228027, + "learning_rate": 1.4897356173739774e-08, + "loss": 8.3296, + "step": 488940 + }, + { + "epoch": 0.9877099350751666, + "grad_norm": 390.50103759765625, + "learning_rate": 1.4870442546287555e-08, + "loss": 10.9325, + "step": 488950 + }, + { + "epoch": 0.9877301357078504, + "grad_norm": 259.9412536621094, + "learning_rate": 1.4843553215752037e-08, + "loss": 7.4341, + "step": 488960 + }, + { + "epoch": 0.9877503363405342, + "grad_norm": 233.6974334716797, + "learning_rate": 1.4816688182264782e-08, + "loss": 28.2152, + "step": 488970 + }, + { + "epoch": 0.987770536973218, + "grad_norm": 143.91610717773438, + "learning_rate": 1.478984744595624e-08, + "loss": 14.1742, + "step": 488980 + }, + { + "epoch": 0.9877907376059019, + "grad_norm": 380.85211181640625, + "learning_rate": 1.4763031006957417e-08, + "loss": 19.1149, + "step": 488990 + }, + { + "epoch": 0.9878109382385857, + "grad_norm": 2.1768627166748047, + "learning_rate": 1.4736238865398766e-08, + "loss": 23.0437, + "step": 489000 + }, + { + "epoch": 0.9878311388712695, + "grad_norm": 395.1035461425781, + "learning_rate": 1.4709471021411293e-08, + "loss": 22.9502, + "step": 489010 + }, + { + "epoch": 0.9878513395039533, + "grad_norm": 282.9913024902344, + "learning_rate": 1.4682727475124891e-08, + "loss": 8.5003, + "step": 489020 + }, + { + "epoch": 0.987871540136637, + "grad_norm": 554.9068603515625, + "learning_rate": 1.4656008226670571e-08, + "loss": 17.3521, + "step": 489030 + }, + { + "epoch": 0.9878917407693208, + "grad_norm": 899.536865234375, + "learning_rate": 1.462931327617767e-08, + "loss": 16.8683, + "step": 489040 + }, + { + "epoch": 0.9879119414020047, + "grad_norm": 233.77371215820312, + "learning_rate": 1.4602642623777752e-08, + "loss": 15.6234, + "step": 489050 + }, + { + "epoch": 0.9879321420346885, + "grad_norm": 900.2330932617188, + "learning_rate": 1.4575996269599046e-08, + "loss": 23.9059, + "step": 489060 + }, + { + "epoch": 0.9879523426673723, + "grad_norm": 463.00982666015625, + "learning_rate": 1.454937421377256e-08, + "loss": 24.6737, + "step": 489070 + }, + { + "epoch": 0.9879725433000561, + "grad_norm": 685.919921875, + "learning_rate": 1.4522776456427635e-08, + "loss": 13.0379, + "step": 489080 + }, + { + "epoch": 0.98799274393274, + "grad_norm": 338.71246337890625, + "learning_rate": 1.4496202997694164e-08, + "loss": 11.774, + "step": 489090 + }, + { + "epoch": 0.9880129445654238, + "grad_norm": 429.50885009765625, + "learning_rate": 1.4469653837701491e-08, + "loss": 19.8358, + "step": 489100 + }, + { + "epoch": 0.9880331451981076, + "grad_norm": 115.12040710449219, + "learning_rate": 1.4443128976579513e-08, + "loss": 10.3165, + "step": 489110 + }, + { + "epoch": 0.9880533458307914, + "grad_norm": 182.71620178222656, + "learning_rate": 1.4416628414456457e-08, + "loss": 17.5581, + "step": 489120 + }, + { + "epoch": 0.9880735464634752, + "grad_norm": 444.47491455078125, + "learning_rate": 1.4390152151462222e-08, + "loss": 13.104, + "step": 489130 + }, + { + "epoch": 0.988093747096159, + "grad_norm": 201.85137939453125, + "learning_rate": 1.4363700187725593e-08, + "loss": 6.3272, + "step": 489140 + }, + { + "epoch": 0.9881139477288429, + "grad_norm": 151.43687438964844, + "learning_rate": 1.4337272523375911e-08, + "loss": 12.1407, + "step": 489150 + }, + { + "epoch": 0.9881341483615267, + "grad_norm": 283.36285400390625, + "learning_rate": 1.4310869158541408e-08, + "loss": 12.8991, + "step": 489160 + }, + { + "epoch": 0.9881543489942105, + "grad_norm": 352.98529052734375, + "learning_rate": 1.4284490093351421e-08, + "loss": 19.3828, + "step": 489170 + }, + { + "epoch": 0.9881745496268943, + "grad_norm": 564.781005859375, + "learning_rate": 1.425813532793363e-08, + "loss": 10.2488, + "step": 489180 + }, + { + "epoch": 0.9881947502595781, + "grad_norm": 437.80712890625, + "learning_rate": 1.4231804862417375e-08, + "loss": 16.5281, + "step": 489190 + }, + { + "epoch": 0.988214950892262, + "grad_norm": 422.3405456542969, + "learning_rate": 1.4205498696930332e-08, + "loss": 11.1556, + "step": 489200 + }, + { + "epoch": 0.9882351515249458, + "grad_norm": 362.02130126953125, + "learning_rate": 1.4179216831601284e-08, + "loss": 17.0503, + "step": 489210 + }, + { + "epoch": 0.9882553521576296, + "grad_norm": 266.0114440917969, + "learning_rate": 1.4152959266557354e-08, + "loss": 9.8067, + "step": 489220 + }, + { + "epoch": 0.9882755527903134, + "grad_norm": 357.19866943359375, + "learning_rate": 1.4126726001927882e-08, + "loss": 13.612, + "step": 489230 + }, + { + "epoch": 0.9882957534229972, + "grad_norm": 479.8896484375, + "learning_rate": 1.4100517037839989e-08, + "loss": 19.1753, + "step": 489240 + }, + { + "epoch": 0.9883159540556811, + "grad_norm": 44.63584518432617, + "learning_rate": 1.4074332374421351e-08, + "loss": 14.044, + "step": 489250 + }, + { + "epoch": 0.9883361546883649, + "grad_norm": 516.9661865234375, + "learning_rate": 1.4048172011799643e-08, + "loss": 23.0173, + "step": 489260 + }, + { + "epoch": 0.9883563553210487, + "grad_norm": 655.0848999023438, + "learning_rate": 1.4022035950102541e-08, + "loss": 15.7647, + "step": 489270 + }, + { + "epoch": 0.9883765559537324, + "grad_norm": 5.948569297790527, + "learning_rate": 1.3995924189457167e-08, + "loss": 14.569, + "step": 489280 + }, + { + "epoch": 0.9883967565864162, + "grad_norm": 198.19515991210938, + "learning_rate": 1.3969836729990637e-08, + "loss": 12.5366, + "step": 489290 + }, + { + "epoch": 0.9884169572191001, + "grad_norm": 334.4207458496094, + "learning_rate": 1.3943773571831188e-08, + "loss": 5.7287, + "step": 489300 + }, + { + "epoch": 0.9884371578517839, + "grad_norm": 279.3717956542969, + "learning_rate": 1.3917734715104269e-08, + "loss": 31.223, + "step": 489310 + }, + { + "epoch": 0.9884573584844677, + "grad_norm": 610.1446533203125, + "learning_rate": 1.3891720159938116e-08, + "loss": 22.999, + "step": 489320 + }, + { + "epoch": 0.9884775591171515, + "grad_norm": 296.9520263671875, + "learning_rate": 1.3865729906458735e-08, + "loss": 15.9857, + "step": 489330 + }, + { + "epoch": 0.9884977597498353, + "grad_norm": 521.7493896484375, + "learning_rate": 1.3839763954792695e-08, + "loss": 25.9349, + "step": 489340 + }, + { + "epoch": 0.9885179603825192, + "grad_norm": 238.2027587890625, + "learning_rate": 1.3813822305067115e-08, + "loss": 18.3275, + "step": 489350 + }, + { + "epoch": 0.988538161015203, + "grad_norm": 464.864501953125, + "learning_rate": 1.378790495740856e-08, + "loss": 18.1847, + "step": 489360 + }, + { + "epoch": 0.9885583616478868, + "grad_norm": 605.761474609375, + "learning_rate": 1.376201191194304e-08, + "loss": 16.1985, + "step": 489370 + }, + { + "epoch": 0.9885785622805706, + "grad_norm": 100.96516418457031, + "learning_rate": 1.3736143168796012e-08, + "loss": 15.8867, + "step": 489380 + }, + { + "epoch": 0.9885987629132544, + "grad_norm": 233.3070831298828, + "learning_rate": 1.371029872809515e-08, + "loss": 19.417, + "step": 489390 + }, + { + "epoch": 0.9886189635459383, + "grad_norm": 244.88894653320312, + "learning_rate": 1.3684478589964801e-08, + "loss": 15.7179, + "step": 489400 + }, + { + "epoch": 0.9886391641786221, + "grad_norm": 681.5443115234375, + "learning_rate": 1.3658682754532082e-08, + "loss": 28.7913, + "step": 489410 + }, + { + "epoch": 0.9886593648113059, + "grad_norm": 0.24459843337535858, + "learning_rate": 1.3632911221921896e-08, + "loss": 14.4319, + "step": 489420 + }, + { + "epoch": 0.9886795654439897, + "grad_norm": 497.12847900390625, + "learning_rate": 1.3607163992259697e-08, + "loss": 11.9154, + "step": 489430 + }, + { + "epoch": 0.9886997660766735, + "grad_norm": 697.6969604492188, + "learning_rate": 1.3581441065672052e-08, + "loss": 19.8152, + "step": 489440 + }, + { + "epoch": 0.9887199667093574, + "grad_norm": 733.26708984375, + "learning_rate": 1.355574244228386e-08, + "loss": 22.7065, + "step": 489450 + }, + { + "epoch": 0.9887401673420412, + "grad_norm": 430.14019775390625, + "learning_rate": 1.3530068122219464e-08, + "loss": 34.7864, + "step": 489460 + }, + { + "epoch": 0.988760367974725, + "grad_norm": 565.5632934570312, + "learning_rate": 1.3504418105604877e-08, + "loss": 17.5237, + "step": 489470 + }, + { + "epoch": 0.9887805686074088, + "grad_norm": 515.758544921875, + "learning_rate": 1.3478792392565553e-08, + "loss": 20.4981, + "step": 489480 + }, + { + "epoch": 0.9888007692400926, + "grad_norm": 424.0845947265625, + "learning_rate": 1.3453190983225285e-08, + "loss": 14.8029, + "step": 489490 + }, + { + "epoch": 0.9888209698727765, + "grad_norm": 356.08123779296875, + "learning_rate": 1.3427613877709523e-08, + "loss": 22.7797, + "step": 489500 + }, + { + "epoch": 0.9888411705054603, + "grad_norm": 481.2988586425781, + "learning_rate": 1.3402061076142613e-08, + "loss": 18.4806, + "step": 489510 + }, + { + "epoch": 0.9888613711381441, + "grad_norm": 163.1424560546875, + "learning_rate": 1.3376532578649459e-08, + "loss": 15.1123, + "step": 489520 + }, + { + "epoch": 0.9888815717708279, + "grad_norm": 243.24937438964844, + "learning_rate": 1.3351028385354402e-08, + "loss": 13.5655, + "step": 489530 + }, + { + "epoch": 0.9889017724035116, + "grad_norm": 319.78265380859375, + "learning_rate": 1.3325548496381235e-08, + "loss": 18.1097, + "step": 489540 + }, + { + "epoch": 0.9889219730361954, + "grad_norm": 457.2274475097656, + "learning_rate": 1.3300092911854856e-08, + "loss": 11.7467, + "step": 489550 + }, + { + "epoch": 0.9889421736688793, + "grad_norm": 92.75702667236328, + "learning_rate": 1.3274661631899055e-08, + "loss": 29.7643, + "step": 489560 + }, + { + "epoch": 0.9889623743015631, + "grad_norm": 312.5694885253906, + "learning_rate": 1.3249254656637622e-08, + "loss": 10.471, + "step": 489570 + }, + { + "epoch": 0.9889825749342469, + "grad_norm": 290.5263671875, + "learning_rate": 1.3223871986194348e-08, + "loss": 27.3382, + "step": 489580 + }, + { + "epoch": 0.9890027755669307, + "grad_norm": 342.47674560546875, + "learning_rate": 1.3198513620693022e-08, + "loss": 37.4274, + "step": 489590 + }, + { + "epoch": 0.9890229761996145, + "grad_norm": 383.2030029296875, + "learning_rate": 1.3173179560257432e-08, + "loss": 11.5588, + "step": 489600 + }, + { + "epoch": 0.9890431768322984, + "grad_norm": 267.69140625, + "learning_rate": 1.314786980501137e-08, + "loss": 7.4811, + "step": 489610 + }, + { + "epoch": 0.9890633774649822, + "grad_norm": 234.81167602539062, + "learning_rate": 1.3122584355076962e-08, + "loss": 12.6162, + "step": 489620 + }, + { + "epoch": 0.989083578097666, + "grad_norm": 293.77703857421875, + "learning_rate": 1.3097323210579104e-08, + "loss": 19.0022, + "step": 489630 + }, + { + "epoch": 0.9891037787303498, + "grad_norm": 183.48153686523438, + "learning_rate": 1.307208637163937e-08, + "loss": 19.9693, + "step": 489640 + }, + { + "epoch": 0.9891239793630336, + "grad_norm": 7.569753170013428, + "learning_rate": 1.3046873838381546e-08, + "loss": 11.5113, + "step": 489650 + }, + { + "epoch": 0.9891441799957175, + "grad_norm": 1021.123779296875, + "learning_rate": 1.3021685610928869e-08, + "loss": 27.3139, + "step": 489660 + }, + { + "epoch": 0.9891643806284013, + "grad_norm": 362.0072021484375, + "learning_rate": 1.2996521689403463e-08, + "loss": 24.0982, + "step": 489670 + }, + { + "epoch": 0.9891845812610851, + "grad_norm": 657.249267578125, + "learning_rate": 1.2971382073928007e-08, + "loss": 15.8356, + "step": 489680 + }, + { + "epoch": 0.9892047818937689, + "grad_norm": 462.3509521484375, + "learning_rate": 1.2946266764625182e-08, + "loss": 14.8603, + "step": 489690 + }, + { + "epoch": 0.9892249825264527, + "grad_norm": 413.03753662109375, + "learning_rate": 1.292117576161711e-08, + "loss": 33.4044, + "step": 489700 + }, + { + "epoch": 0.9892451831591366, + "grad_norm": 317.8731384277344, + "learning_rate": 1.2896109065027029e-08, + "loss": 5.5308, + "step": 489710 + }, + { + "epoch": 0.9892653837918204, + "grad_norm": 156.2231903076172, + "learning_rate": 1.2871066674975951e-08, + "loss": 13.7939, + "step": 489720 + }, + { + "epoch": 0.9892855844245042, + "grad_norm": 251.47769165039062, + "learning_rate": 1.2846048591586558e-08, + "loss": 30.8586, + "step": 489730 + }, + { + "epoch": 0.989305785057188, + "grad_norm": 72.52568817138672, + "learning_rate": 1.2821054814980971e-08, + "loss": 10.4397, + "step": 489740 + }, + { + "epoch": 0.9893259856898718, + "grad_norm": 487.9237365722656, + "learning_rate": 1.2796085345280207e-08, + "loss": 8.9422, + "step": 489750 + }, + { + "epoch": 0.9893461863225557, + "grad_norm": 412.9870910644531, + "learning_rate": 1.277114018260639e-08, + "loss": 34.0147, + "step": 489760 + }, + { + "epoch": 0.9893663869552395, + "grad_norm": 448.53582763671875, + "learning_rate": 1.2746219327081644e-08, + "loss": 7.8732, + "step": 489770 + }, + { + "epoch": 0.9893865875879233, + "grad_norm": 363.7297668457031, + "learning_rate": 1.2721322778826983e-08, + "loss": 6.6716, + "step": 489780 + }, + { + "epoch": 0.989406788220607, + "grad_norm": 118.27398681640625, + "learning_rate": 1.2696450537963422e-08, + "loss": 19.0261, + "step": 489790 + }, + { + "epoch": 0.9894269888532908, + "grad_norm": 408.3044128417969, + "learning_rate": 1.2671602604612531e-08, + "loss": 26.6716, + "step": 489800 + }, + { + "epoch": 0.9894471894859747, + "grad_norm": 380.0968322753906, + "learning_rate": 1.2646778978895325e-08, + "loss": 21.3715, + "step": 489810 + }, + { + "epoch": 0.9894673901186585, + "grad_norm": 324.5020446777344, + "learning_rate": 1.2621979660932814e-08, + "loss": 30.9385, + "step": 489820 + }, + { + "epoch": 0.9894875907513423, + "grad_norm": 225.742431640625, + "learning_rate": 1.2597204650845463e-08, + "loss": 8.4652, + "step": 489830 + }, + { + "epoch": 0.9895077913840261, + "grad_norm": 376.7015686035156, + "learning_rate": 1.2572453948755393e-08, + "loss": 18.7972, + "step": 489840 + }, + { + "epoch": 0.9895279920167099, + "grad_norm": 23.484270095825195, + "learning_rate": 1.2547727554781398e-08, + "loss": 18.036, + "step": 489850 + }, + { + "epoch": 0.9895481926493938, + "grad_norm": 273.4000549316406, + "learning_rate": 1.2523025469045047e-08, + "loss": 14.1293, + "step": 489860 + }, + { + "epoch": 0.9895683932820776, + "grad_norm": 452.1612243652344, + "learning_rate": 1.2498347691666801e-08, + "loss": 11.7554, + "step": 489870 + }, + { + "epoch": 0.9895885939147614, + "grad_norm": 178.48948669433594, + "learning_rate": 1.2473694222766563e-08, + "loss": 14.9273, + "step": 489880 + }, + { + "epoch": 0.9896087945474452, + "grad_norm": 423.0238342285156, + "learning_rate": 1.2449065062464794e-08, + "loss": 14.2976, + "step": 489890 + }, + { + "epoch": 0.989628995180129, + "grad_norm": 938.166015625, + "learning_rate": 1.2424460210881394e-08, + "loss": 26.3438, + "step": 489900 + }, + { + "epoch": 0.9896491958128129, + "grad_norm": 0.0, + "learning_rate": 1.2399879668136271e-08, + "loss": 15.9938, + "step": 489910 + }, + { + "epoch": 0.9896693964454967, + "grad_norm": 258.1879577636719, + "learning_rate": 1.2375323434348773e-08, + "loss": 22.4046, + "step": 489920 + }, + { + "epoch": 0.9896895970781805, + "grad_norm": 267.1070556640625, + "learning_rate": 1.235079150963936e-08, + "loss": 15.2209, + "step": 489930 + }, + { + "epoch": 0.9897097977108643, + "grad_norm": 313.09881591796875, + "learning_rate": 1.2326283894127378e-08, + "loss": 34.5608, + "step": 489940 + }, + { + "epoch": 0.9897299983435481, + "grad_norm": 358.4794616699219, + "learning_rate": 1.2301800587932179e-08, + "loss": 32.3783, + "step": 489950 + }, + { + "epoch": 0.989750198976232, + "grad_norm": 320.7735595703125, + "learning_rate": 1.2277341591172553e-08, + "loss": 19.7975, + "step": 489960 + }, + { + "epoch": 0.9897703996089158, + "grad_norm": 479.1424255371094, + "learning_rate": 1.225290690396841e-08, + "loss": 14.985, + "step": 489970 + }, + { + "epoch": 0.9897906002415996, + "grad_norm": 0.9020110368728638, + "learning_rate": 1.2228496526439093e-08, + "loss": 14.1797, + "step": 489980 + }, + { + "epoch": 0.9898108008742834, + "grad_norm": 21.70526123046875, + "learning_rate": 1.2204110458702844e-08, + "loss": 10.3022, + "step": 489990 + }, + { + "epoch": 0.9898310015069672, + "grad_norm": 359.36590576171875, + "learning_rate": 1.2179748700879013e-08, + "loss": 20.8642, + "step": 490000 + }, + { + "epoch": 0.989851202139651, + "grad_norm": 230.55552673339844, + "learning_rate": 1.2155411253085835e-08, + "loss": 12.0643, + "step": 490010 + }, + { + "epoch": 0.9898714027723349, + "grad_norm": 252.59933471679688, + "learning_rate": 1.2131098115442108e-08, + "loss": 21.6534, + "step": 490020 + }, + { + "epoch": 0.9898916034050187, + "grad_norm": 496.96124267578125, + "learning_rate": 1.2106809288067178e-08, + "loss": 19.3856, + "step": 490030 + }, + { + "epoch": 0.9899118040377025, + "grad_norm": 88.76834869384766, + "learning_rate": 1.208254477107762e-08, + "loss": 12.8934, + "step": 490040 + }, + { + "epoch": 0.9899320046703862, + "grad_norm": 15.983965873718262, + "learning_rate": 1.2058304564593893e-08, + "loss": 22.7476, + "step": 490050 + }, + { + "epoch": 0.98995220530307, + "grad_norm": 306.596435546875, + "learning_rate": 1.2034088668732568e-08, + "loss": 16.6889, + "step": 490060 + }, + { + "epoch": 0.9899724059357539, + "grad_norm": 292.4953918457031, + "learning_rate": 1.2009897083611888e-08, + "loss": 13.4741, + "step": 490070 + }, + { + "epoch": 0.9899926065684377, + "grad_norm": 35.3122673034668, + "learning_rate": 1.1985729809350088e-08, + "loss": 8.3754, + "step": 490080 + }, + { + "epoch": 0.9900128072011215, + "grad_norm": 322.98577880859375, + "learning_rate": 1.1961586846064855e-08, + "loss": 15.9051, + "step": 490090 + }, + { + "epoch": 0.9900330078338053, + "grad_norm": 405.03387451171875, + "learning_rate": 1.1937468193873869e-08, + "loss": 14.0023, + "step": 490100 + }, + { + "epoch": 0.9900532084664891, + "grad_norm": 319.7568359375, + "learning_rate": 1.1913373852894816e-08, + "loss": 19.7235, + "step": 490110 + }, + { + "epoch": 0.990073409099173, + "grad_norm": 1.4424026012420654, + "learning_rate": 1.1889303823244825e-08, + "loss": 13.2511, + "step": 490120 + }, + { + "epoch": 0.9900936097318568, + "grad_norm": 119.40985107421875, + "learning_rate": 1.1865258105041577e-08, + "loss": 14.8159, + "step": 490130 + }, + { + "epoch": 0.9901138103645406, + "grad_norm": 676.8298950195312, + "learning_rate": 1.1841236698402202e-08, + "loss": 30.2779, + "step": 490140 + }, + { + "epoch": 0.9901340109972244, + "grad_norm": 87.88396453857422, + "learning_rate": 1.1817239603443276e-08, + "loss": 21.4629, + "step": 490150 + }, + { + "epoch": 0.9901542116299082, + "grad_norm": 259.3276672363281, + "learning_rate": 1.1793266820282478e-08, + "loss": 10.1666, + "step": 490160 + }, + { + "epoch": 0.9901744122625921, + "grad_norm": 509.7081298828125, + "learning_rate": 1.1769318349036385e-08, + "loss": 20.826, + "step": 490170 + }, + { + "epoch": 0.9901946128952759, + "grad_norm": 166.15875244140625, + "learning_rate": 1.1745394189821013e-08, + "loss": 9.8491, + "step": 490180 + }, + { + "epoch": 0.9902148135279597, + "grad_norm": 406.0191650390625, + "learning_rate": 1.1721494342754048e-08, + "loss": 9.9096, + "step": 490190 + }, + { + "epoch": 0.9902350141606435, + "grad_norm": 195.5973358154297, + "learning_rate": 1.1697618807951504e-08, + "loss": 10.4411, + "step": 490200 + }, + { + "epoch": 0.9902552147933273, + "grad_norm": 97.89176177978516, + "learning_rate": 1.1673767585529404e-08, + "loss": 11.0284, + "step": 490210 + }, + { + "epoch": 0.9902754154260112, + "grad_norm": 1873.49365234375, + "learning_rate": 1.1649940675604876e-08, + "loss": 25.0696, + "step": 490220 + }, + { + "epoch": 0.990295616058695, + "grad_norm": 467.4261779785156, + "learning_rate": 1.1626138078293381e-08, + "loss": 14.5886, + "step": 490230 + }, + { + "epoch": 0.9903158166913788, + "grad_norm": 507.177734375, + "learning_rate": 1.1602359793710938e-08, + "loss": 23.0854, + "step": 490240 + }, + { + "epoch": 0.9903360173240626, + "grad_norm": 327.03594970703125, + "learning_rate": 1.1578605821973566e-08, + "loss": 17.8433, + "step": 490250 + }, + { + "epoch": 0.9903562179567464, + "grad_norm": 259.65167236328125, + "learning_rate": 1.1554876163197282e-08, + "loss": 12.3252, + "step": 490260 + }, + { + "epoch": 0.9903764185894303, + "grad_norm": 600.3280029296875, + "learning_rate": 1.1531170817496995e-08, + "loss": 13.0561, + "step": 490270 + }, + { + "epoch": 0.9903966192221141, + "grad_norm": 763.4371337890625, + "learning_rate": 1.1507489784989278e-08, + "loss": 14.3357, + "step": 490280 + }, + { + "epoch": 0.9904168198547979, + "grad_norm": 28.504867553710938, + "learning_rate": 1.1483833065789041e-08, + "loss": 14.0311, + "step": 490290 + }, + { + "epoch": 0.9904370204874817, + "grad_norm": 524.5272827148438, + "learning_rate": 1.146020066001119e-08, + "loss": 14.7662, + "step": 490300 + }, + { + "epoch": 0.9904572211201654, + "grad_norm": 280.05999755859375, + "learning_rate": 1.1436592567771188e-08, + "loss": 20.0143, + "step": 490310 + }, + { + "epoch": 0.9904774217528493, + "grad_norm": 394.6064147949219, + "learning_rate": 1.1413008789184498e-08, + "loss": 18.4672, + "step": 490320 + }, + { + "epoch": 0.9904976223855331, + "grad_norm": 38.09982681274414, + "learning_rate": 1.1389449324365476e-08, + "loss": 15.9906, + "step": 490330 + }, + { + "epoch": 0.9905178230182169, + "grad_norm": 122.05206298828125, + "learning_rate": 1.1365914173429582e-08, + "loss": 5.9164, + "step": 490340 + }, + { + "epoch": 0.9905380236509007, + "grad_norm": 219.7208709716797, + "learning_rate": 1.134240333649117e-08, + "loss": 18.8033, + "step": 490350 + }, + { + "epoch": 0.9905582242835845, + "grad_norm": 952.7105712890625, + "learning_rate": 1.1318916813664594e-08, + "loss": 21.4837, + "step": 490360 + }, + { + "epoch": 0.9905784249162684, + "grad_norm": 0.0, + "learning_rate": 1.129545460506476e-08, + "loss": 24.7407, + "step": 490370 + }, + { + "epoch": 0.9905986255489522, + "grad_norm": 585.8445434570312, + "learning_rate": 1.1272016710806021e-08, + "loss": 27.2635, + "step": 490380 + }, + { + "epoch": 0.990618826181636, + "grad_norm": 704.29345703125, + "learning_rate": 1.1248603131002178e-08, + "loss": 48.7618, + "step": 490390 + }, + { + "epoch": 0.9906390268143198, + "grad_norm": 1576.3946533203125, + "learning_rate": 1.1225213865767026e-08, + "loss": 40.8802, + "step": 490400 + }, + { + "epoch": 0.9906592274470036, + "grad_norm": 373.20452880859375, + "learning_rate": 1.1201848915216029e-08, + "loss": 14.5402, + "step": 490410 + }, + { + "epoch": 0.9906794280796875, + "grad_norm": 277.86138916015625, + "learning_rate": 1.1178508279461875e-08, + "loss": 26.6005, + "step": 490420 + }, + { + "epoch": 0.9906996287123713, + "grad_norm": 628.8119506835938, + "learning_rate": 1.115519195861836e-08, + "loss": 22.6955, + "step": 490430 + }, + { + "epoch": 0.9907198293450551, + "grad_norm": 659.1356201171875, + "learning_rate": 1.1131899952799285e-08, + "loss": 26.2416, + "step": 490440 + }, + { + "epoch": 0.9907400299777389, + "grad_norm": 454.5382995605469, + "learning_rate": 1.1108632262118446e-08, + "loss": 27.2168, + "step": 490450 + }, + { + "epoch": 0.9907602306104227, + "grad_norm": 0.0, + "learning_rate": 1.1085388886689085e-08, + "loss": 13.5798, + "step": 490460 + }, + { + "epoch": 0.9907804312431066, + "grad_norm": 494.09881591796875, + "learning_rate": 1.1062169826624447e-08, + "loss": 23.5275, + "step": 490470 + }, + { + "epoch": 0.9908006318757904, + "grad_norm": 211.5029296875, + "learning_rate": 1.1038975082037772e-08, + "loss": 8.1438, + "step": 490480 + }, + { + "epoch": 0.9908208325084742, + "grad_norm": 463.34075927734375, + "learning_rate": 1.101580465304175e-08, + "loss": 17.0053, + "step": 490490 + }, + { + "epoch": 0.990841033141158, + "grad_norm": 733.0955810546875, + "learning_rate": 1.0992658539750179e-08, + "loss": 28.9807, + "step": 490500 + }, + { + "epoch": 0.9908612337738418, + "grad_norm": 309.8749084472656, + "learning_rate": 1.0969536742274633e-08, + "loss": 21.6901, + "step": 490510 + }, + { + "epoch": 0.9908814344065257, + "grad_norm": 7.222733020782471, + "learning_rate": 1.0946439260728914e-08, + "loss": 8.0209, + "step": 490520 + }, + { + "epoch": 0.9909016350392095, + "grad_norm": 118.79547882080078, + "learning_rate": 1.0923366095225152e-08, + "loss": 19.2935, + "step": 490530 + }, + { + "epoch": 0.9909218356718933, + "grad_norm": 16.88963508605957, + "learning_rate": 1.090031724587548e-08, + "loss": 7.4517, + "step": 490540 + }, + { + "epoch": 0.9909420363045771, + "grad_norm": 375.42877197265625, + "learning_rate": 1.0877292712792586e-08, + "loss": 16.8823, + "step": 490550 + }, + { + "epoch": 0.9909622369372608, + "grad_norm": 546.7255859375, + "learning_rate": 1.0854292496089158e-08, + "loss": 18.4394, + "step": 490560 + }, + { + "epoch": 0.9909824375699446, + "grad_norm": 4.430992126464844, + "learning_rate": 1.0831316595876218e-08, + "loss": 13.5275, + "step": 490570 + }, + { + "epoch": 0.9910026382026285, + "grad_norm": 289.63616943359375, + "learning_rate": 1.0808365012266454e-08, + "loss": 14.6157, + "step": 490580 + }, + { + "epoch": 0.9910228388353123, + "grad_norm": 147.71971130371094, + "learning_rate": 1.0785437745371996e-08, + "loss": 12.5525, + "step": 490590 + }, + { + "epoch": 0.9910430394679961, + "grad_norm": 162.67674255371094, + "learning_rate": 1.076253479530387e-08, + "loss": 12.0218, + "step": 490600 + }, + { + "epoch": 0.9910632401006799, + "grad_norm": 108.62178039550781, + "learning_rate": 1.0739656162174205e-08, + "loss": 16.5417, + "step": 490610 + }, + { + "epoch": 0.9910834407333637, + "grad_norm": 781.3118286132812, + "learning_rate": 1.0716801846094026e-08, + "loss": 30.3789, + "step": 490620 + }, + { + "epoch": 0.9911036413660476, + "grad_norm": 172.30718994140625, + "learning_rate": 1.0693971847175466e-08, + "loss": 23.6568, + "step": 490630 + }, + { + "epoch": 0.9911238419987314, + "grad_norm": 730.6552124023438, + "learning_rate": 1.067116616552899e-08, + "loss": 20.7877, + "step": 490640 + }, + { + "epoch": 0.9911440426314152, + "grad_norm": 232.4882354736328, + "learning_rate": 1.0648384801266176e-08, + "loss": 8.2937, + "step": 490650 + }, + { + "epoch": 0.991164243264099, + "grad_norm": 0.0, + "learning_rate": 1.0625627754498048e-08, + "loss": 10.4492, + "step": 490660 + }, + { + "epoch": 0.9911844438967828, + "grad_norm": 841.0191040039062, + "learning_rate": 1.0602895025335624e-08, + "loss": 15.407, + "step": 490670 + }, + { + "epoch": 0.9912046445294667, + "grad_norm": 399.9212341308594, + "learning_rate": 1.0580186613888822e-08, + "loss": 14.38, + "step": 490680 + }, + { + "epoch": 0.9912248451621505, + "grad_norm": 776.1848754882812, + "learning_rate": 1.055750252026977e-08, + "loss": 33.8655, + "step": 490690 + }, + { + "epoch": 0.9912450457948343, + "grad_norm": 385.7315979003906, + "learning_rate": 1.0534842744588381e-08, + "loss": 29.3024, + "step": 490700 + }, + { + "epoch": 0.9912652464275181, + "grad_norm": 324.3099060058594, + "learning_rate": 1.0512207286954568e-08, + "loss": 23.4997, + "step": 490710 + }, + { + "epoch": 0.9912854470602019, + "grad_norm": 486.59674072265625, + "learning_rate": 1.0489596147479353e-08, + "loss": 24.3618, + "step": 490720 + }, + { + "epoch": 0.9913056476928858, + "grad_norm": 459.3517761230469, + "learning_rate": 1.0467009326272648e-08, + "loss": 15.0182, + "step": 490730 + }, + { + "epoch": 0.9913258483255696, + "grad_norm": 488.06982421875, + "learning_rate": 1.044444682344492e-08, + "loss": 16.9264, + "step": 490740 + }, + { + "epoch": 0.9913460489582534, + "grad_norm": 2.73164701461792, + "learning_rate": 1.0421908639104971e-08, + "loss": 31.6905, + "step": 490750 + }, + { + "epoch": 0.9913662495909372, + "grad_norm": 74.6050796508789, + "learning_rate": 1.039939477336438e-08, + "loss": 9.8221, + "step": 490760 + }, + { + "epoch": 0.991386450223621, + "grad_norm": 743.1727294921875, + "learning_rate": 1.0376905226331391e-08, + "loss": 27.881, + "step": 490770 + }, + { + "epoch": 0.9914066508563049, + "grad_norm": 181.60882568359375, + "learning_rate": 1.0354439998116473e-08, + "loss": 13.2241, + "step": 490780 + }, + { + "epoch": 0.9914268514889887, + "grad_norm": 359.5448913574219, + "learning_rate": 1.0331999088828425e-08, + "loss": 25.4082, + "step": 490790 + }, + { + "epoch": 0.9914470521216725, + "grad_norm": 87.32952880859375, + "learning_rate": 1.030958249857772e-08, + "loss": 16.0133, + "step": 490800 + }, + { + "epoch": 0.9914672527543563, + "grad_norm": 140.4916229248047, + "learning_rate": 1.02871902274726e-08, + "loss": 19.8484, + "step": 490810 + }, + { + "epoch": 0.99148745338704, + "grad_norm": 243.94195556640625, + "learning_rate": 1.026482227562242e-08, + "loss": 16.5282, + "step": 490820 + }, + { + "epoch": 0.9915076540197238, + "grad_norm": 245.15191650390625, + "learning_rate": 1.0242478643136545e-08, + "loss": 23.5337, + "step": 490830 + }, + { + "epoch": 0.9915278546524077, + "grad_norm": 496.2458801269531, + "learning_rate": 1.0220159330123214e-08, + "loss": 16.6147, + "step": 490840 + }, + { + "epoch": 0.9915480552850915, + "grad_norm": 665.3242797851562, + "learning_rate": 1.0197864336691788e-08, + "loss": 22.6857, + "step": 490850 + }, + { + "epoch": 0.9915682559177753, + "grad_norm": 360.8750915527344, + "learning_rate": 1.0175593662951066e-08, + "loss": 18.3113, + "step": 490860 + }, + { + "epoch": 0.9915884565504591, + "grad_norm": 198.45236206054688, + "learning_rate": 1.0153347309009299e-08, + "loss": 12.9173, + "step": 490870 + }, + { + "epoch": 0.991608657183143, + "grad_norm": 1000.5813598632812, + "learning_rate": 1.013112527497473e-08, + "loss": 22.6931, + "step": 490880 + }, + { + "epoch": 0.9916288578158268, + "grad_norm": 198.7476806640625, + "learning_rate": 1.0108927560955606e-08, + "loss": 13.5263, + "step": 490890 + }, + { + "epoch": 0.9916490584485106, + "grad_norm": 586.5575561523438, + "learning_rate": 1.008675416706073e-08, + "loss": 18.2384, + "step": 490900 + }, + { + "epoch": 0.9916692590811944, + "grad_norm": 230.30209350585938, + "learning_rate": 1.0064605093397794e-08, + "loss": 20.9739, + "step": 490910 + }, + { + "epoch": 0.9916894597138782, + "grad_norm": 1089.9083251953125, + "learning_rate": 1.0042480340075045e-08, + "loss": 22.8103, + "step": 490920 + }, + { + "epoch": 0.991709660346562, + "grad_norm": 1334.685791015625, + "learning_rate": 1.0020379907199618e-08, + "loss": 28.0976, + "step": 490930 + }, + { + "epoch": 0.9917298609792459, + "grad_norm": 217.3130340576172, + "learning_rate": 9.99830379487976e-09, + "loss": 20.0891, + "step": 490940 + }, + { + "epoch": 0.9917500616119297, + "grad_norm": 727.6640014648438, + "learning_rate": 9.976252003223164e-09, + "loss": 32.0702, + "step": 490950 + }, + { + "epoch": 0.9917702622446135, + "grad_norm": 117.57544708251953, + "learning_rate": 9.954224532336965e-09, + "loss": 10.9159, + "step": 490960 + }, + { + "epoch": 0.9917904628772973, + "grad_norm": 627.7258911132812, + "learning_rate": 9.932221382328299e-09, + "loss": 12.5682, + "step": 490970 + }, + { + "epoch": 0.9918106635099811, + "grad_norm": 19.738685607910156, + "learning_rate": 9.91024255330486e-09, + "loss": 12.3592, + "step": 490980 + }, + { + "epoch": 0.991830864142665, + "grad_norm": 609.0294189453125, + "learning_rate": 9.888288045374339e-09, + "loss": 15.5357, + "step": 490990 + }, + { + "epoch": 0.9918510647753488, + "grad_norm": 381.98956298828125, + "learning_rate": 9.866357858642206e-09, + "loss": 11.6295, + "step": 491000 + }, + { + "epoch": 0.9918712654080326, + "grad_norm": 250.50408935546875, + "learning_rate": 9.844451993216708e-09, + "loss": 19.4155, + "step": 491010 + }, + { + "epoch": 0.9918914660407164, + "grad_norm": 813.5974731445312, + "learning_rate": 9.822570449203873e-09, + "loss": 14.552, + "step": 491020 + }, + { + "epoch": 0.9919116666734002, + "grad_norm": 275.0490417480469, + "learning_rate": 9.800713226710834e-09, + "loss": 12.0307, + "step": 491030 + }, + { + "epoch": 0.9919318673060841, + "grad_norm": 116.39804077148438, + "learning_rate": 9.77888032584362e-09, + "loss": 17.4847, + "step": 491040 + }, + { + "epoch": 0.9919520679387679, + "grad_norm": 178.0388946533203, + "learning_rate": 9.757071746708812e-09, + "loss": 12.5501, + "step": 491050 + }, + { + "epoch": 0.9919722685714517, + "grad_norm": 22.949909210205078, + "learning_rate": 9.735287489413547e-09, + "loss": 10.2942, + "step": 491060 + }, + { + "epoch": 0.9919924692041354, + "grad_norm": 140.55003356933594, + "learning_rate": 9.71352755406274e-09, + "loss": 8.9796, + "step": 491070 + }, + { + "epoch": 0.9920126698368192, + "grad_norm": 187.78439331054688, + "learning_rate": 9.691791940762418e-09, + "loss": 26.1984, + "step": 491080 + }, + { + "epoch": 0.9920328704695031, + "grad_norm": 636.652587890625, + "learning_rate": 9.670080649619717e-09, + "loss": 33.8739, + "step": 491090 + }, + { + "epoch": 0.9920530711021869, + "grad_norm": 764.0996704101562, + "learning_rate": 9.64839368074011e-09, + "loss": 25.3695, + "step": 491100 + }, + { + "epoch": 0.9920732717348707, + "grad_norm": 547.4270629882812, + "learning_rate": 9.626731034227954e-09, + "loss": 18.0425, + "step": 491110 + }, + { + "epoch": 0.9920934723675545, + "grad_norm": 267.7295227050781, + "learning_rate": 9.605092710190943e-09, + "loss": 17.5693, + "step": 491120 + }, + { + "epoch": 0.9921136730002383, + "grad_norm": 484.971435546875, + "learning_rate": 9.583478708732886e-09, + "loss": 15.021, + "step": 491130 + }, + { + "epoch": 0.9921338736329222, + "grad_norm": 204.47447204589844, + "learning_rate": 9.561889029959249e-09, + "loss": 12.9957, + "step": 491140 + }, + { + "epoch": 0.992154074265606, + "grad_norm": 331.8847351074219, + "learning_rate": 9.540323673976615e-09, + "loss": 19.1705, + "step": 491150 + }, + { + "epoch": 0.9921742748982898, + "grad_norm": 244.10409545898438, + "learning_rate": 9.518782640888235e-09, + "loss": 19.9217, + "step": 491160 + }, + { + "epoch": 0.9921944755309736, + "grad_norm": 487.3038024902344, + "learning_rate": 9.497265930800691e-09, + "loss": 23.7191, + "step": 491170 + }, + { + "epoch": 0.9922146761636574, + "grad_norm": 764.28955078125, + "learning_rate": 9.475773543818345e-09, + "loss": 22.5038, + "step": 491180 + }, + { + "epoch": 0.9922348767963413, + "grad_norm": 481.8316345214844, + "learning_rate": 9.454305480045556e-09, + "loss": 22.492, + "step": 491190 + }, + { + "epoch": 0.9922550774290251, + "grad_norm": 852.2228393554688, + "learning_rate": 9.432861739586685e-09, + "loss": 21.2637, + "step": 491200 + }, + { + "epoch": 0.9922752780617089, + "grad_norm": 528.0516967773438, + "learning_rate": 9.411442322547204e-09, + "loss": 29.3942, + "step": 491210 + }, + { + "epoch": 0.9922954786943927, + "grad_norm": 437.1981201171875, + "learning_rate": 9.390047229031474e-09, + "loss": 12.1818, + "step": 491220 + }, + { + "epoch": 0.9923156793270765, + "grad_norm": 313.25225830078125, + "learning_rate": 9.368676459142744e-09, + "loss": 23.0166, + "step": 491230 + }, + { + "epoch": 0.9923358799597604, + "grad_norm": 614.193115234375, + "learning_rate": 9.347330012985933e-09, + "loss": 25.4203, + "step": 491240 + }, + { + "epoch": 0.9923560805924442, + "grad_norm": 527.8973999023438, + "learning_rate": 9.3260078906654e-09, + "loss": 21.4504, + "step": 491250 + }, + { + "epoch": 0.992376281225128, + "grad_norm": 103.88162994384766, + "learning_rate": 9.304710092283842e-09, + "loss": 19.1815, + "step": 491260 + }, + { + "epoch": 0.9923964818578118, + "grad_norm": 441.32818603515625, + "learning_rate": 9.283436617946173e-09, + "loss": 9.0619, + "step": 491270 + }, + { + "epoch": 0.9924166824904956, + "grad_norm": 191.62818908691406, + "learning_rate": 9.262187467756201e-09, + "loss": 16.3227, + "step": 491280 + }, + { + "epoch": 0.9924368831231795, + "grad_norm": 355.4009094238281, + "learning_rate": 9.24096264181662e-09, + "loss": 22.5769, + "step": 491290 + }, + { + "epoch": 0.9924570837558633, + "grad_norm": 576.244384765625, + "learning_rate": 9.219762140231237e-09, + "loss": 16.6766, + "step": 491300 + }, + { + "epoch": 0.9924772843885471, + "grad_norm": 161.81101989746094, + "learning_rate": 9.198585963103302e-09, + "loss": 16.9969, + "step": 491310 + }, + { + "epoch": 0.9924974850212309, + "grad_norm": 281.2635192871094, + "learning_rate": 9.177434110536065e-09, + "loss": 13.0341, + "step": 491320 + }, + { + "epoch": 0.9925176856539146, + "grad_norm": 40.424774169921875, + "learning_rate": 9.156306582633334e-09, + "loss": 11.9053, + "step": 491330 + }, + { + "epoch": 0.9925378862865984, + "grad_norm": 138.92991638183594, + "learning_rate": 9.135203379496693e-09, + "loss": 13.6707, + "step": 491340 + }, + { + "epoch": 0.9925580869192823, + "grad_norm": 447.6842956542969, + "learning_rate": 9.114124501230504e-09, + "loss": 21.1689, + "step": 491350 + }, + { + "epoch": 0.9925782875519661, + "grad_norm": 481.4045104980469, + "learning_rate": 9.09306994793635e-09, + "loss": 20.5411, + "step": 491360 + }, + { + "epoch": 0.9925984881846499, + "grad_norm": 524.9292602539062, + "learning_rate": 9.07203971971693e-09, + "loss": 30.819, + "step": 491370 + }, + { + "epoch": 0.9926186888173337, + "grad_norm": 438.7566833496094, + "learning_rate": 9.051033816675492e-09, + "loss": 13.5019, + "step": 491380 + }, + { + "epoch": 0.9926388894500175, + "grad_norm": 511.960205078125, + "learning_rate": 9.030052238913622e-09, + "loss": 18.0979, + "step": 491390 + }, + { + "epoch": 0.9926590900827014, + "grad_norm": 163.58566284179688, + "learning_rate": 9.009094986534572e-09, + "loss": 16.3686, + "step": 491400 + }, + { + "epoch": 0.9926792907153852, + "grad_norm": 2.4858508110046387, + "learning_rate": 8.988162059639371e-09, + "loss": 18.2185, + "step": 491410 + }, + { + "epoch": 0.992699491348069, + "grad_norm": 107.9386215209961, + "learning_rate": 8.967253458330715e-09, + "loss": 19.7233, + "step": 491420 + }, + { + "epoch": 0.9927196919807528, + "grad_norm": 398.495361328125, + "learning_rate": 8.946369182710191e-09, + "loss": 16.6712, + "step": 491430 + }, + { + "epoch": 0.9927398926134366, + "grad_norm": 326.6344909667969, + "learning_rate": 8.925509232879937e-09, + "loss": 17.6045, + "step": 491440 + }, + { + "epoch": 0.9927600932461205, + "grad_norm": 309.113525390625, + "learning_rate": 8.904673608940983e-09, + "loss": 19.8827, + "step": 491450 + }, + { + "epoch": 0.9927802938788043, + "grad_norm": 170.3911590576172, + "learning_rate": 8.883862310995473e-09, + "loss": 13.5585, + "step": 491460 + }, + { + "epoch": 0.9928004945114881, + "grad_norm": 92.2303466796875, + "learning_rate": 8.863075339144988e-09, + "loss": 16.4295, + "step": 491470 + }, + { + "epoch": 0.9928206951441719, + "grad_norm": 351.38494873046875, + "learning_rate": 8.842312693490563e-09, + "loss": 19.5431, + "step": 491480 + }, + { + "epoch": 0.9928408957768557, + "grad_norm": 807.0746459960938, + "learning_rate": 8.821574374132669e-09, + "loss": 16.084, + "step": 491490 + }, + { + "epoch": 0.9928610964095396, + "grad_norm": 412.27001953125, + "learning_rate": 8.800860381173448e-09, + "loss": 25.2724, + "step": 491500 + }, + { + "epoch": 0.9928812970422234, + "grad_norm": 172.590087890625, + "learning_rate": 8.780170714713931e-09, + "loss": 13.0839, + "step": 491510 + }, + { + "epoch": 0.9929014976749072, + "grad_norm": 154.42572021484375, + "learning_rate": 8.759505374854038e-09, + "loss": 19.079, + "step": 491520 + }, + { + "epoch": 0.992921698307591, + "grad_norm": 206.93417358398438, + "learning_rate": 8.738864361694799e-09, + "loss": 32.277, + "step": 491530 + }, + { + "epoch": 0.9929418989402748, + "grad_norm": 604.6255493164062, + "learning_rate": 8.718247675337243e-09, + "loss": 26.7745, + "step": 491540 + }, + { + "epoch": 0.9929620995729587, + "grad_norm": 462.11669921875, + "learning_rate": 8.697655315881293e-09, + "loss": 19.3062, + "step": 491550 + }, + { + "epoch": 0.9929823002056425, + "grad_norm": 433.7647399902344, + "learning_rate": 8.677087283427976e-09, + "loss": 20.0991, + "step": 491560 + }, + { + "epoch": 0.9930025008383263, + "grad_norm": 607.44287109375, + "learning_rate": 8.656543578077215e-09, + "loss": 19.0045, + "step": 491570 + }, + { + "epoch": 0.9930227014710101, + "grad_norm": 65.2020263671875, + "learning_rate": 8.636024199928927e-09, + "loss": 27.6576, + "step": 491580 + }, + { + "epoch": 0.9930429021036938, + "grad_norm": 720.201171875, + "learning_rate": 8.615529149083034e-09, + "loss": 27.6301, + "step": 491590 + }, + { + "epoch": 0.9930631027363777, + "grad_norm": 217.3624725341797, + "learning_rate": 8.595058425640012e-09, + "loss": 16.5394, + "step": 491600 + }, + { + "epoch": 0.9930833033690615, + "grad_norm": 468.4784851074219, + "learning_rate": 8.574612029699224e-09, + "loss": 21.0743, + "step": 491610 + }, + { + "epoch": 0.9931035040017453, + "grad_norm": 508.72381591796875, + "learning_rate": 8.554189961360037e-09, + "loss": 24.9038, + "step": 491620 + }, + { + "epoch": 0.9931237046344291, + "grad_norm": 186.89498901367188, + "learning_rate": 8.53379222072237e-09, + "loss": 14.5935, + "step": 491630 + }, + { + "epoch": 0.9931439052671129, + "grad_norm": 622.7369995117188, + "learning_rate": 8.513418807886142e-09, + "loss": 19.9239, + "step": 491640 + }, + { + "epoch": 0.9931641058997968, + "grad_norm": 121.98161315917969, + "learning_rate": 8.49306972294961e-09, + "loss": 14.8514, + "step": 491650 + }, + { + "epoch": 0.9931843065324806, + "grad_norm": 531.5045166015625, + "learning_rate": 8.472744966012691e-09, + "loss": 17.4055, + "step": 491660 + }, + { + "epoch": 0.9932045071651644, + "grad_norm": 134.05284118652344, + "learning_rate": 8.452444537174198e-09, + "loss": 21.0864, + "step": 491670 + }, + { + "epoch": 0.9932247077978482, + "grad_norm": 486.0033264160156, + "learning_rate": 8.43216843653294e-09, + "loss": 9.6031, + "step": 491680 + }, + { + "epoch": 0.993244908430532, + "grad_norm": 10.919336318969727, + "learning_rate": 8.41191666418828e-09, + "loss": 17.866, + "step": 491690 + }, + { + "epoch": 0.9932651090632159, + "grad_norm": 273.6304626464844, + "learning_rate": 8.391689220238474e-09, + "loss": 14.4463, + "step": 491700 + }, + { + "epoch": 0.9932853096958997, + "grad_norm": 598.221435546875, + "learning_rate": 8.37148610478178e-09, + "loss": 18.2435, + "step": 491710 + }, + { + "epoch": 0.9933055103285835, + "grad_norm": 382.8218078613281, + "learning_rate": 8.351307317917002e-09, + "loss": 19.4453, + "step": 491720 + }, + { + "epoch": 0.9933257109612673, + "grad_norm": 47.96708679199219, + "learning_rate": 8.331152859742952e-09, + "loss": 31.3714, + "step": 491730 + }, + { + "epoch": 0.9933459115939511, + "grad_norm": 505.9755554199219, + "learning_rate": 8.311022730357331e-09, + "loss": 20.7981, + "step": 491740 + }, + { + "epoch": 0.993366112226635, + "grad_norm": 58.72330093383789, + "learning_rate": 8.290916929858394e-09, + "loss": 30.5194, + "step": 491750 + }, + { + "epoch": 0.9933863128593188, + "grad_norm": 519.745361328125, + "learning_rate": 8.27083545834384e-09, + "loss": 20.2842, + "step": 491760 + }, + { + "epoch": 0.9934065134920026, + "grad_norm": 458.9234924316406, + "learning_rate": 8.250778315911922e-09, + "loss": 19.8348, + "step": 491770 + }, + { + "epoch": 0.9934267141246864, + "grad_norm": 184.57225036621094, + "learning_rate": 8.230745502660343e-09, + "loss": 19.1662, + "step": 491780 + }, + { + "epoch": 0.9934469147573702, + "grad_norm": 488.2207946777344, + "learning_rate": 8.210737018686798e-09, + "loss": 18.1941, + "step": 491790 + }, + { + "epoch": 0.9934671153900541, + "grad_norm": 328.6118469238281, + "learning_rate": 8.190752864088436e-09, + "loss": 27.4498, + "step": 491800 + }, + { + "epoch": 0.9934873160227379, + "grad_norm": 366.3116455078125, + "learning_rate": 8.17079303896351e-09, + "loss": 29.1802, + "step": 491810 + }, + { + "epoch": 0.9935075166554217, + "grad_norm": 229.66558837890625, + "learning_rate": 8.150857543408054e-09, + "loss": 20.8235, + "step": 491820 + }, + { + "epoch": 0.9935277172881055, + "grad_norm": 235.47610473632812, + "learning_rate": 8.130946377519767e-09, + "loss": 22.9774, + "step": 491830 + }, + { + "epoch": 0.9935479179207892, + "grad_norm": 223.5673370361328, + "learning_rate": 8.11105954139635e-09, + "loss": 12.8811, + "step": 491840 + }, + { + "epoch": 0.993568118553473, + "grad_norm": 230.26121520996094, + "learning_rate": 8.091197035133836e-09, + "loss": 18.2644, + "step": 491850 + }, + { + "epoch": 0.9935883191861569, + "grad_norm": 292.3512268066406, + "learning_rate": 8.07135885882937e-09, + "loss": 14.2108, + "step": 491860 + }, + { + "epoch": 0.9936085198188407, + "grad_norm": 438.98638916015625, + "learning_rate": 8.051545012580097e-09, + "loss": 12.9736, + "step": 491870 + }, + { + "epoch": 0.9936287204515245, + "grad_norm": 299.19549560546875, + "learning_rate": 8.031755496481496e-09, + "loss": 14.6742, + "step": 491880 + }, + { + "epoch": 0.9936489210842083, + "grad_norm": 364.4718017578125, + "learning_rate": 8.011990310631269e-09, + "loss": 15.7653, + "step": 491890 + }, + { + "epoch": 0.9936691217168921, + "grad_norm": 258.502197265625, + "learning_rate": 7.992249455124889e-09, + "loss": 10.2188, + "step": 491900 + }, + { + "epoch": 0.993689322349576, + "grad_norm": 228.58082580566406, + "learning_rate": 7.972532930058396e-09, + "loss": 14.5795, + "step": 491910 + }, + { + "epoch": 0.9937095229822598, + "grad_norm": 0.3197769820690155, + "learning_rate": 7.952840735528933e-09, + "loss": 12.5138, + "step": 491920 + }, + { + "epoch": 0.9937297236149436, + "grad_norm": 369.63385009765625, + "learning_rate": 7.933172871631978e-09, + "loss": 12.8713, + "step": 491930 + }, + { + "epoch": 0.9937499242476274, + "grad_norm": 102.29288482666016, + "learning_rate": 7.913529338463011e-09, + "loss": 16.312, + "step": 491940 + }, + { + "epoch": 0.9937701248803112, + "grad_norm": 18.225814819335938, + "learning_rate": 7.89391013611751e-09, + "loss": 18.8131, + "step": 491950 + }, + { + "epoch": 0.9937903255129951, + "grad_norm": 441.5530700683594, + "learning_rate": 7.874315264692622e-09, + "loss": 15.199, + "step": 491960 + }, + { + "epoch": 0.9938105261456789, + "grad_norm": 458.37664794921875, + "learning_rate": 7.85474472428216e-09, + "loss": 14.9665, + "step": 491970 + }, + { + "epoch": 0.9938307267783627, + "grad_norm": 374.7511901855469, + "learning_rate": 7.835198514982156e-09, + "loss": 17.1539, + "step": 491980 + }, + { + "epoch": 0.9938509274110465, + "grad_norm": 124.7674560546875, + "learning_rate": 7.815676636888093e-09, + "loss": 16.2321, + "step": 491990 + }, + { + "epoch": 0.9938711280437303, + "grad_norm": 516.1492919921875, + "learning_rate": 7.796179090094891e-09, + "loss": 24.4474, + "step": 492000 + }, + { + "epoch": 0.9938913286764142, + "grad_norm": 819.0858154296875, + "learning_rate": 7.776705874698032e-09, + "loss": 16.4256, + "step": 492010 + }, + { + "epoch": 0.993911529309098, + "grad_norm": 260.1531982421875, + "learning_rate": 7.757256990791328e-09, + "loss": 11.4037, + "step": 492020 + }, + { + "epoch": 0.9939317299417818, + "grad_norm": 52.56280517578125, + "learning_rate": 7.737832438470816e-09, + "loss": 16.7869, + "step": 492030 + }, + { + "epoch": 0.9939519305744656, + "grad_norm": 293.88433837890625, + "learning_rate": 7.718432217830307e-09, + "loss": 20.3442, + "step": 492040 + }, + { + "epoch": 0.9939721312071494, + "grad_norm": 143.10574340820312, + "learning_rate": 7.699056328964726e-09, + "loss": 18.5877, + "step": 492050 + }, + { + "epoch": 0.9939923318398333, + "grad_norm": 289.1720886230469, + "learning_rate": 7.679704771968998e-09, + "loss": 12.983, + "step": 492060 + }, + { + "epoch": 0.9940125324725171, + "grad_norm": 198.0683135986328, + "learning_rate": 7.660377546936382e-09, + "loss": 13.1834, + "step": 492070 + }, + { + "epoch": 0.9940327331052009, + "grad_norm": 412.1947326660156, + "learning_rate": 7.641074653961244e-09, + "loss": 16.0309, + "step": 492080 + }, + { + "epoch": 0.9940529337378847, + "grad_norm": 140.10911560058594, + "learning_rate": 7.621796093138512e-09, + "loss": 9.8632, + "step": 492090 + }, + { + "epoch": 0.9940731343705684, + "grad_norm": 324.3037109375, + "learning_rate": 7.602541864561442e-09, + "loss": 15.7889, + "step": 492100 + }, + { + "epoch": 0.9940933350032523, + "grad_norm": 282.5607604980469, + "learning_rate": 7.583311968324403e-09, + "loss": 22.3234, + "step": 492110 + }, + { + "epoch": 0.9941135356359361, + "grad_norm": 293.5461120605469, + "learning_rate": 7.564106404520654e-09, + "loss": 31.9959, + "step": 492120 + }, + { + "epoch": 0.9941337362686199, + "grad_norm": 317.7358093261719, + "learning_rate": 7.544925173243455e-09, + "loss": 14.6415, + "step": 492130 + }, + { + "epoch": 0.9941539369013037, + "grad_norm": 34.03724670410156, + "learning_rate": 7.525768274587175e-09, + "loss": 27.2084, + "step": 492140 + }, + { + "epoch": 0.9941741375339875, + "grad_norm": 0.0, + "learning_rate": 7.506635708645072e-09, + "loss": 17.1881, + "step": 492150 + }, + { + "epoch": 0.9941943381666714, + "grad_norm": 349.0623779296875, + "learning_rate": 7.487527475509848e-09, + "loss": 11.0595, + "step": 492160 + }, + { + "epoch": 0.9942145387993552, + "grad_norm": 590.5459594726562, + "learning_rate": 7.468443575274764e-09, + "loss": 23.4729, + "step": 492170 + }, + { + "epoch": 0.994234739432039, + "grad_norm": 46.546485900878906, + "learning_rate": 7.449384008033078e-09, + "loss": 20.6852, + "step": 492180 + }, + { + "epoch": 0.9942549400647228, + "grad_norm": 594.9143676757812, + "learning_rate": 7.430348773877494e-09, + "loss": 22.5141, + "step": 492190 + }, + { + "epoch": 0.9942751406974066, + "grad_norm": 223.10205078125, + "learning_rate": 7.411337872900715e-09, + "loss": 20.5256, + "step": 492200 + }, + { + "epoch": 0.9942953413300905, + "grad_norm": 258.2781677246094, + "learning_rate": 7.392351305195999e-09, + "loss": 18.0094, + "step": 492210 + }, + { + "epoch": 0.9943155419627743, + "grad_norm": 1.6258138418197632, + "learning_rate": 7.373389070854941e-09, + "loss": 17.1076, + "step": 492220 + }, + { + "epoch": 0.9943357425954581, + "grad_norm": 786.12109375, + "learning_rate": 7.3544511699708e-09, + "loss": 21.3983, + "step": 492230 + }, + { + "epoch": 0.9943559432281419, + "grad_norm": 660.6713256835938, + "learning_rate": 7.335537602635723e-09, + "loss": 16.2497, + "step": 492240 + }, + { + "epoch": 0.9943761438608257, + "grad_norm": 131.5867156982422, + "learning_rate": 7.3166483689413035e-09, + "loss": 18.4497, + "step": 492250 + }, + { + "epoch": 0.9943963444935096, + "grad_norm": 454.113037109375, + "learning_rate": 7.297783468980246e-09, + "loss": 17.6033, + "step": 492260 + }, + { + "epoch": 0.9944165451261934, + "grad_norm": 191.77944946289062, + "learning_rate": 7.278942902843589e-09, + "loss": 13.1115, + "step": 492270 + }, + { + "epoch": 0.9944367457588772, + "grad_norm": 182.51564025878906, + "learning_rate": 7.26012667062459e-09, + "loss": 11.9545, + "step": 492280 + }, + { + "epoch": 0.994456946391561, + "grad_norm": 196.82113647460938, + "learning_rate": 7.241334772414288e-09, + "loss": 20.5471, + "step": 492290 + }, + { + "epoch": 0.9944771470242448, + "grad_norm": 325.7108154296875, + "learning_rate": 7.222567208303721e-09, + "loss": 24.7077, + "step": 492300 + }, + { + "epoch": 0.9944973476569287, + "grad_norm": 825.5935668945312, + "learning_rate": 7.203823978384483e-09, + "loss": 36.0086, + "step": 492310 + }, + { + "epoch": 0.9945175482896125, + "grad_norm": 120.90480041503906, + "learning_rate": 7.185105082748722e-09, + "loss": 19.9979, + "step": 492320 + }, + { + "epoch": 0.9945377489222963, + "grad_norm": 310.6450500488281, + "learning_rate": 7.166410521487477e-09, + "loss": 21.8731, + "step": 492330 + }, + { + "epoch": 0.9945579495549801, + "grad_norm": 113.73008728027344, + "learning_rate": 7.14774029469123e-09, + "loss": 21.9454, + "step": 492340 + }, + { + "epoch": 0.9945781501876638, + "grad_norm": 330.3626708984375, + "learning_rate": 7.129094402451575e-09, + "loss": 20.177, + "step": 492350 + }, + { + "epoch": 0.9945983508203476, + "grad_norm": 394.8288879394531, + "learning_rate": 7.11047284485844e-09, + "loss": 15.473, + "step": 492360 + }, + { + "epoch": 0.9946185514530315, + "grad_norm": 624.0137939453125, + "learning_rate": 7.0918756220039745e-09, + "loss": 27.1499, + "step": 492370 + }, + { + "epoch": 0.9946387520857153, + "grad_norm": 167.65707397460938, + "learning_rate": 7.073302733978104e-09, + "loss": 17.0275, + "step": 492380 + }, + { + "epoch": 0.9946589527183991, + "grad_norm": 277.8253173828125, + "learning_rate": 7.054754180871315e-09, + "loss": 6.4159, + "step": 492390 + }, + { + "epoch": 0.9946791533510829, + "grad_norm": 290.10595703125, + "learning_rate": 7.036229962774088e-09, + "loss": 21.9664, + "step": 492400 + }, + { + "epoch": 0.9946993539837667, + "grad_norm": 244.20114135742188, + "learning_rate": 7.0177300797763526e-09, + "loss": 34.4278, + "step": 492410 + }, + { + "epoch": 0.9947195546164506, + "grad_norm": 656.7611083984375, + "learning_rate": 6.999254531969146e-09, + "loss": 17.8188, + "step": 492420 + }, + { + "epoch": 0.9947397552491344, + "grad_norm": 873.718017578125, + "learning_rate": 6.980803319441842e-09, + "loss": 33.7453, + "step": 492430 + }, + { + "epoch": 0.9947599558818182, + "grad_norm": 121.09410858154297, + "learning_rate": 6.962376442284368e-09, + "loss": 23.1687, + "step": 492440 + }, + { + "epoch": 0.994780156514502, + "grad_norm": 506.382080078125, + "learning_rate": 6.943973900586654e-09, + "loss": 34.1638, + "step": 492450 + }, + { + "epoch": 0.9948003571471858, + "grad_norm": 670.7462768554688, + "learning_rate": 6.925595694438625e-09, + "loss": 17.3223, + "step": 492460 + }, + { + "epoch": 0.9948205577798697, + "grad_norm": 382.9184875488281, + "learning_rate": 6.9072418239296556e-09, + "loss": 40.3718, + "step": 492470 + }, + { + "epoch": 0.9948407584125535, + "grad_norm": 1033.291015625, + "learning_rate": 6.888912289149119e-09, + "loss": 20.0352, + "step": 492480 + }, + { + "epoch": 0.9948609590452373, + "grad_norm": 244.9137725830078, + "learning_rate": 6.8706070901863876e-09, + "loss": 33.7851, + "step": 492490 + }, + { + "epoch": 0.9948811596779211, + "grad_norm": 129.25210571289062, + "learning_rate": 6.852326227130835e-09, + "loss": 13.3353, + "step": 492500 + }, + { + "epoch": 0.9949013603106049, + "grad_norm": 396.7117614746094, + "learning_rate": 6.834069700071277e-09, + "loss": 17.8248, + "step": 492510 + }, + { + "epoch": 0.9949215609432888, + "grad_norm": 13.202730178833008, + "learning_rate": 6.81583750909709e-09, + "loss": 15.2326, + "step": 492520 + }, + { + "epoch": 0.9949417615759726, + "grad_norm": 197.49044799804688, + "learning_rate": 6.797629654296533e-09, + "loss": 18.0699, + "step": 492530 + }, + { + "epoch": 0.9949619622086564, + "grad_norm": 620.369140625, + "learning_rate": 6.779446135758982e-09, + "loss": 22.7365, + "step": 492540 + }, + { + "epoch": 0.9949821628413402, + "grad_norm": 262.18646240234375, + "learning_rate": 6.761286953572699e-09, + "loss": 15.2508, + "step": 492550 + }, + { + "epoch": 0.995002363474024, + "grad_norm": 847.1273803710938, + "learning_rate": 6.7431521078265e-09, + "loss": 29.8875, + "step": 492560 + }, + { + "epoch": 0.9950225641067079, + "grad_norm": 519.87548828125, + "learning_rate": 6.725041598608651e-09, + "loss": 18.5069, + "step": 492570 + }, + { + "epoch": 0.9950427647393917, + "grad_norm": 781.2832641601562, + "learning_rate": 6.706955426006856e-09, + "loss": 16.7449, + "step": 492580 + }, + { + "epoch": 0.9950629653720755, + "grad_norm": 192.6318817138672, + "learning_rate": 6.688893590109935e-09, + "loss": 15.2852, + "step": 492590 + }, + { + "epoch": 0.9950831660047593, + "grad_norm": 322.802490234375, + "learning_rate": 6.670856091006151e-09, + "loss": 14.4477, + "step": 492600 + }, + { + "epoch": 0.995103366637443, + "grad_norm": 30.472148895263672, + "learning_rate": 6.652842928782655e-09, + "loss": 22.5054, + "step": 492610 + }, + { + "epoch": 0.9951235672701269, + "grad_norm": 488.73419189453125, + "learning_rate": 6.63485410352771e-09, + "loss": 23.5498, + "step": 492620 + }, + { + "epoch": 0.9951437679028107, + "grad_norm": 211.9298095703125, + "learning_rate": 6.61688961532847e-09, + "loss": 6.6318, + "step": 492630 + }, + { + "epoch": 0.9951639685354945, + "grad_norm": 296.3676452636719, + "learning_rate": 6.598949464273196e-09, + "loss": 18.351, + "step": 492640 + }, + { + "epoch": 0.9951841691681783, + "grad_norm": 526.3226318359375, + "learning_rate": 6.581033650449043e-09, + "loss": 16.0651, + "step": 492650 + }, + { + "epoch": 0.9952043698008621, + "grad_norm": 506.3572692871094, + "learning_rate": 6.563142173943715e-09, + "loss": 27.0753, + "step": 492660 + }, + { + "epoch": 0.995224570433546, + "grad_norm": 337.43609619140625, + "learning_rate": 6.545275034843257e-09, + "loss": 14.6582, + "step": 492670 + }, + { + "epoch": 0.9952447710662298, + "grad_norm": 195.84539794921875, + "learning_rate": 6.527432233235931e-09, + "loss": 24.3385, + "step": 492680 + }, + { + "epoch": 0.9952649716989136, + "grad_norm": 389.5736999511719, + "learning_rate": 6.509613769207778e-09, + "loss": 27.1965, + "step": 492690 + }, + { + "epoch": 0.9952851723315974, + "grad_norm": 23.38743019104004, + "learning_rate": 6.491819642846509e-09, + "loss": 16.0401, + "step": 492700 + }, + { + "epoch": 0.9953053729642812, + "grad_norm": 15.392356872558594, + "learning_rate": 6.4740498542387174e-09, + "loss": 5.5294, + "step": 492710 + }, + { + "epoch": 0.995325573596965, + "grad_norm": 100.64783477783203, + "learning_rate": 6.456304403470448e-09, + "loss": 13.0771, + "step": 492720 + }, + { + "epoch": 0.9953457742296489, + "grad_norm": 657.622314453125, + "learning_rate": 6.438583290628298e-09, + "loss": 19.4515, + "step": 492730 + }, + { + "epoch": 0.9953659748623327, + "grad_norm": 298.1041259765625, + "learning_rate": 6.420886515799418e-09, + "loss": 22.1768, + "step": 492740 + }, + { + "epoch": 0.9953861754950165, + "grad_norm": 306.6492004394531, + "learning_rate": 6.403214079069298e-09, + "loss": 23.7699, + "step": 492750 + }, + { + "epoch": 0.9954063761277003, + "grad_norm": 341.74298095703125, + "learning_rate": 6.385565980523978e-09, + "loss": 8.2629, + "step": 492760 + }, + { + "epoch": 0.9954265767603842, + "grad_norm": 176.78253173828125, + "learning_rate": 6.3679422202495015e-09, + "loss": 13.1727, + "step": 492770 + }, + { + "epoch": 0.995446777393068, + "grad_norm": 282.4524841308594, + "learning_rate": 6.350342798332465e-09, + "loss": 19.1546, + "step": 492780 + }, + { + "epoch": 0.9954669780257518, + "grad_norm": 310.2063293457031, + "learning_rate": 6.332767714858357e-09, + "loss": 23.2967, + "step": 492790 + }, + { + "epoch": 0.9954871786584356, + "grad_norm": 644.8700561523438, + "learning_rate": 6.315216969912663e-09, + "loss": 14.4751, + "step": 492800 + }, + { + "epoch": 0.9955073792911194, + "grad_norm": 546.1470947265625, + "learning_rate": 6.2976905635803165e-09, + "loss": 21.2804, + "step": 492810 + }, + { + "epoch": 0.9955275799238033, + "grad_norm": 534.7316284179688, + "learning_rate": 6.280188495947914e-09, + "loss": 16.5509, + "step": 492820 + }, + { + "epoch": 0.9955477805564871, + "grad_norm": 662.7942504882812, + "learning_rate": 6.262710767100388e-09, + "loss": 22.4259, + "step": 492830 + }, + { + "epoch": 0.9955679811891709, + "grad_norm": 319.81024169921875, + "learning_rate": 6.245257377122116e-09, + "loss": 22.5629, + "step": 492840 + }, + { + "epoch": 0.9955881818218547, + "grad_norm": 84.95552825927734, + "learning_rate": 6.227828326099139e-09, + "loss": 16.1414, + "step": 492850 + }, + { + "epoch": 0.9956083824545384, + "grad_norm": 303.6583557128906, + "learning_rate": 6.21042361411639e-09, + "loss": 12.8599, + "step": 492860 + }, + { + "epoch": 0.9956285830872222, + "grad_norm": 281.2415466308594, + "learning_rate": 6.19304324125769e-09, + "loss": 15.6234, + "step": 492870 + }, + { + "epoch": 0.9956487837199061, + "grad_norm": 78.5985336303711, + "learning_rate": 6.175687207609082e-09, + "loss": 12.9739, + "step": 492880 + }, + { + "epoch": 0.9956689843525899, + "grad_norm": 711.9841918945312, + "learning_rate": 6.1583555132543886e-09, + "loss": 25.8945, + "step": 492890 + }, + { + "epoch": 0.9956891849852737, + "grad_norm": 302.2545471191406, + "learning_rate": 6.141048158277429e-09, + "loss": 12.0464, + "step": 492900 + }, + { + "epoch": 0.9957093856179575, + "grad_norm": 197.092041015625, + "learning_rate": 6.123765142764249e-09, + "loss": 11.8187, + "step": 492910 + }, + { + "epoch": 0.9957295862506413, + "grad_norm": 53.73894500732422, + "learning_rate": 6.106506466797557e-09, + "loss": 11.5132, + "step": 492920 + }, + { + "epoch": 0.9957497868833252, + "grad_norm": 264.3664245605469, + "learning_rate": 6.0892721304622874e-09, + "loss": 24.6879, + "step": 492930 + }, + { + "epoch": 0.995769987516009, + "grad_norm": 228.70321655273438, + "learning_rate": 6.0720621338422606e-09, + "loss": 15.5944, + "step": 492940 + }, + { + "epoch": 0.9957901881486928, + "grad_norm": 371.2651062011719, + "learning_rate": 6.054876477021299e-09, + "loss": 9.8062, + "step": 492950 + }, + { + "epoch": 0.9958103887813766, + "grad_norm": 109.58891296386719, + "learning_rate": 6.037715160083224e-09, + "loss": 10.4359, + "step": 492960 + }, + { + "epoch": 0.9958305894140604, + "grad_norm": 575.9791259765625, + "learning_rate": 6.020578183111303e-09, + "loss": 14.5963, + "step": 492970 + }, + { + "epoch": 0.9958507900467443, + "grad_norm": 1932.4429931640625, + "learning_rate": 6.003465546189358e-09, + "loss": 40.4509, + "step": 492980 + }, + { + "epoch": 0.9958709906794281, + "grad_norm": 613.677978515625, + "learning_rate": 5.98637724940121e-09, + "loss": 20.3188, + "step": 492990 + }, + { + "epoch": 0.9958911913121119, + "grad_norm": 369.6727294921875, + "learning_rate": 5.969313292830126e-09, + "loss": 8.9391, + "step": 493000 + }, + { + "epoch": 0.9959113919447957, + "grad_norm": 160.49456787109375, + "learning_rate": 5.952273676558262e-09, + "loss": 9.336, + "step": 493010 + }, + { + "epoch": 0.9959315925774795, + "grad_norm": 788.9593505859375, + "learning_rate": 5.935258400669442e-09, + "loss": 15.4208, + "step": 493020 + }, + { + "epoch": 0.9959517932101634, + "grad_norm": 295.59832763671875, + "learning_rate": 5.918267465246374e-09, + "loss": 12.4223, + "step": 493030 + }, + { + "epoch": 0.9959719938428472, + "grad_norm": 255.62750244140625, + "learning_rate": 5.901300870372329e-09, + "loss": 20.9779, + "step": 493040 + }, + { + "epoch": 0.995992194475531, + "grad_norm": 254.54164123535156, + "learning_rate": 5.8843586161289045e-09, + "loss": 9.4201, + "step": 493050 + }, + { + "epoch": 0.9960123951082148, + "grad_norm": 1058.5753173828125, + "learning_rate": 5.867440702599925e-09, + "loss": 14.6705, + "step": 493060 + }, + { + "epoch": 0.9960325957408986, + "grad_norm": 930.560791015625, + "learning_rate": 5.850547129867546e-09, + "loss": 30.7085, + "step": 493070 + }, + { + "epoch": 0.9960527963735825, + "grad_norm": 206.38072204589844, + "learning_rate": 5.833677898013368e-09, + "loss": 24.0558, + "step": 493080 + }, + { + "epoch": 0.9960729970062663, + "grad_norm": 428.0592041015625, + "learning_rate": 5.816833007120659e-09, + "loss": 19.56, + "step": 493090 + }, + { + "epoch": 0.9960931976389501, + "grad_norm": 260.1087341308594, + "learning_rate": 5.800012457270466e-09, + "loss": 22.4628, + "step": 493100 + }, + { + "epoch": 0.9961133982716339, + "grad_norm": 192.6460723876953, + "learning_rate": 5.783216248545498e-09, + "loss": 12.9076, + "step": 493110 + }, + { + "epoch": 0.9961335989043176, + "grad_norm": 350.8668212890625, + "learning_rate": 5.766444381027358e-09, + "loss": 16.6594, + "step": 493120 + }, + { + "epoch": 0.9961537995370014, + "grad_norm": 180.49252319335938, + "learning_rate": 5.749696854798204e-09, + "loss": 20.0456, + "step": 493130 + }, + { + "epoch": 0.9961740001696853, + "grad_norm": 13.434219360351562, + "learning_rate": 5.732973669939079e-09, + "loss": 16.6701, + "step": 493140 + }, + { + "epoch": 0.9961942008023691, + "grad_norm": 564.3707885742188, + "learning_rate": 5.716274826531587e-09, + "loss": 18.9161, + "step": 493150 + }, + { + "epoch": 0.9962144014350529, + "grad_norm": 249.492919921875, + "learning_rate": 5.699600324657328e-09, + "loss": 21.4974, + "step": 493160 + }, + { + "epoch": 0.9962346020677367, + "grad_norm": 149.17922973632812, + "learning_rate": 5.682950164397349e-09, + "loss": 11.9207, + "step": 493170 + }, + { + "epoch": 0.9962548027004205, + "grad_norm": 593.6851806640625, + "learning_rate": 5.6663243458332514e-09, + "loss": 21.1945, + "step": 493180 + }, + { + "epoch": 0.9962750033331044, + "grad_norm": 76.89675903320312, + "learning_rate": 5.649722869044971e-09, + "loss": 29.9984, + "step": 493190 + }, + { + "epoch": 0.9962952039657882, + "grad_norm": 319.2921142578125, + "learning_rate": 5.633145734114665e-09, + "loss": 19.5254, + "step": 493200 + }, + { + "epoch": 0.996315404598472, + "grad_norm": 343.31097412109375, + "learning_rate": 5.616592941123378e-09, + "loss": 25.4214, + "step": 493210 + }, + { + "epoch": 0.9963356052311558, + "grad_norm": 247.19534301757812, + "learning_rate": 5.600064490149937e-09, + "loss": 16.5237, + "step": 493220 + }, + { + "epoch": 0.9963558058638396, + "grad_norm": 125.71563720703125, + "learning_rate": 5.583560381276498e-09, + "loss": 23.1971, + "step": 493230 + }, + { + "epoch": 0.9963760064965235, + "grad_norm": 387.4584655761719, + "learning_rate": 5.5670806145835536e-09, + "loss": 30.7633, + "step": 493240 + }, + { + "epoch": 0.9963962071292073, + "grad_norm": 239.5253143310547, + "learning_rate": 5.5506251901504825e-09, + "loss": 18.8652, + "step": 493250 + }, + { + "epoch": 0.9964164077618911, + "grad_norm": 265.25750732421875, + "learning_rate": 5.534194108057778e-09, + "loss": 7.082, + "step": 493260 + }, + { + "epoch": 0.9964366083945749, + "grad_norm": 351.1151428222656, + "learning_rate": 5.517787368385375e-09, + "loss": 11.4043, + "step": 493270 + }, + { + "epoch": 0.9964568090272587, + "grad_norm": 534.319580078125, + "learning_rate": 5.501404971214319e-09, + "loss": 16.0895, + "step": 493280 + }, + { + "epoch": 0.9964770096599426, + "grad_norm": 549.73095703125, + "learning_rate": 5.485046916622883e-09, + "loss": 8.4059, + "step": 493290 + }, + { + "epoch": 0.9964972102926264, + "grad_norm": 168.16229248046875, + "learning_rate": 5.468713204692111e-09, + "loss": 11.6919, + "step": 493300 + }, + { + "epoch": 0.9965174109253102, + "grad_norm": 233.44361877441406, + "learning_rate": 5.45240383550083e-09, + "loss": 15.7062, + "step": 493310 + }, + { + "epoch": 0.996537611557994, + "grad_norm": 671.6220092773438, + "learning_rate": 5.436118809128421e-09, + "loss": 20.8994, + "step": 493320 + }, + { + "epoch": 0.9965578121906778, + "grad_norm": 164.53518676757812, + "learning_rate": 5.419858125655375e-09, + "loss": 12.1398, + "step": 493330 + }, + { + "epoch": 0.9965780128233617, + "grad_norm": 32.66850662231445, + "learning_rate": 5.403621785159407e-09, + "loss": 15.2787, + "step": 493340 + }, + { + "epoch": 0.9965982134560455, + "grad_norm": 73.91290283203125, + "learning_rate": 5.38740978772101e-09, + "loss": 11.4727, + "step": 493350 + }, + { + "epoch": 0.9966184140887293, + "grad_norm": 656.6400756835938, + "learning_rate": 5.371222133418452e-09, + "loss": 15.5917, + "step": 493360 + }, + { + "epoch": 0.9966386147214131, + "grad_norm": 161.31642150878906, + "learning_rate": 5.355058822330561e-09, + "loss": 17.0245, + "step": 493370 + }, + { + "epoch": 0.9966588153540968, + "grad_norm": 505.07989501953125, + "learning_rate": 5.338919854536162e-09, + "loss": 14.3958, + "step": 493380 + }, + { + "epoch": 0.9966790159867807, + "grad_norm": 274.6875915527344, + "learning_rate": 5.322805230114636e-09, + "loss": 17.4259, + "step": 493390 + }, + { + "epoch": 0.9966992166194645, + "grad_norm": 466.4015197753906, + "learning_rate": 5.306714949143699e-09, + "loss": 10.4244, + "step": 493400 + }, + { + "epoch": 0.9967194172521483, + "grad_norm": 592.9553833007812, + "learning_rate": 5.290649011702176e-09, + "loss": 26.8744, + "step": 493410 + }, + { + "epoch": 0.9967396178848321, + "grad_norm": 434.5201416015625, + "learning_rate": 5.2746074178683385e-09, + "loss": 17.5278, + "step": 493420 + }, + { + "epoch": 0.9967598185175159, + "grad_norm": 162.36619567871094, + "learning_rate": 5.258590167719901e-09, + "loss": 27.6637, + "step": 493430 + }, + { + "epoch": 0.9967800191501998, + "grad_norm": 313.357666015625, + "learning_rate": 5.242597261335691e-09, + "loss": 20.6985, + "step": 493440 + }, + { + "epoch": 0.9968002197828836, + "grad_norm": 172.16201782226562, + "learning_rate": 5.226628698792868e-09, + "loss": 18.3757, + "step": 493450 + }, + { + "epoch": 0.9968204204155674, + "grad_norm": 502.9991760253906, + "learning_rate": 5.210684480169703e-09, + "loss": 20.8665, + "step": 493460 + }, + { + "epoch": 0.9968406210482512, + "grad_norm": 40.60356903076172, + "learning_rate": 5.1947646055444665e-09, + "loss": 18.3703, + "step": 493470 + }, + { + "epoch": 0.996860821680935, + "grad_norm": 549.3845825195312, + "learning_rate": 5.178869074993209e-09, + "loss": 21.0244, + "step": 493480 + }, + { + "epoch": 0.9968810223136189, + "grad_norm": 584.9131469726562, + "learning_rate": 5.162997888595312e-09, + "loss": 15.8863, + "step": 493490 + }, + { + "epoch": 0.9969012229463027, + "grad_norm": 537.6060180664062, + "learning_rate": 5.147151046426824e-09, + "loss": 15.1142, + "step": 493500 + }, + { + "epoch": 0.9969214235789865, + "grad_norm": 36.83633041381836, + "learning_rate": 5.1313285485649064e-09, + "loss": 16.8483, + "step": 493510 + }, + { + "epoch": 0.9969416242116703, + "grad_norm": 2115.87255859375, + "learning_rate": 5.115530395087276e-09, + "loss": 19.0449, + "step": 493520 + }, + { + "epoch": 0.9969618248443541, + "grad_norm": 4.222888946533203, + "learning_rate": 5.099756586071092e-09, + "loss": 9.6148, + "step": 493530 + }, + { + "epoch": 0.996982025477038, + "grad_norm": 215.87461853027344, + "learning_rate": 5.084007121592405e-09, + "loss": 25.4709, + "step": 493540 + }, + { + "epoch": 0.9970022261097218, + "grad_norm": 441.0953369140625, + "learning_rate": 5.06828200172893e-09, + "loss": 13.4962, + "step": 493550 + }, + { + "epoch": 0.9970224267424056, + "grad_norm": 324.8075256347656, + "learning_rate": 5.052581226556719e-09, + "loss": 19.6106, + "step": 493560 + }, + { + "epoch": 0.9970426273750894, + "grad_norm": 791.2510986328125, + "learning_rate": 5.036904796152375e-09, + "loss": 28.8694, + "step": 493570 + }, + { + "epoch": 0.9970628280077732, + "grad_norm": 791.0879516601562, + "learning_rate": 5.02125271059195e-09, + "loss": 27.5417, + "step": 493580 + }, + { + "epoch": 0.9970830286404571, + "grad_norm": 248.20809936523438, + "learning_rate": 5.0056249699526046e-09, + "loss": 24.7889, + "step": 493590 + }, + { + "epoch": 0.9971032292731409, + "grad_norm": 309.5415954589844, + "learning_rate": 4.990021574309834e-09, + "loss": 14.6246, + "step": 493600 + }, + { + "epoch": 0.9971234299058247, + "grad_norm": 626.6513671875, + "learning_rate": 4.9744425237396865e-09, + "loss": 12.9801, + "step": 493610 + }, + { + "epoch": 0.9971436305385085, + "grad_norm": 795.0584106445312, + "learning_rate": 4.95888781831877e-09, + "loss": 16.2975, + "step": 493620 + }, + { + "epoch": 0.9971638311711922, + "grad_norm": 247.36184692382812, + "learning_rate": 4.9433574581220225e-09, + "loss": 12.3557, + "step": 493630 + }, + { + "epoch": 0.997184031803876, + "grad_norm": 297.8055419921875, + "learning_rate": 4.927851443225495e-09, + "loss": 18.0591, + "step": 493640 + }, + { + "epoch": 0.9972042324365599, + "grad_norm": 682.8153076171875, + "learning_rate": 4.9123697737052386e-09, + "loss": 26.2157, + "step": 493650 + }, + { + "epoch": 0.9972244330692437, + "grad_norm": 850.35546875, + "learning_rate": 4.896912449635638e-09, + "loss": 21.9621, + "step": 493660 + }, + { + "epoch": 0.9972446337019275, + "grad_norm": 273.8650817871094, + "learning_rate": 4.881479471093298e-09, + "loss": 29.0988, + "step": 493670 + }, + { + "epoch": 0.9972648343346113, + "grad_norm": 239.4381561279297, + "learning_rate": 4.866070838152049e-09, + "loss": 12.6839, + "step": 493680 + }, + { + "epoch": 0.9972850349672951, + "grad_norm": 415.7494812011719, + "learning_rate": 4.850686550888495e-09, + "loss": 21.6229, + "step": 493690 + }, + { + "epoch": 0.997305235599979, + "grad_norm": 624.9031982421875, + "learning_rate": 4.835326609376468e-09, + "loss": 13.9667, + "step": 493700 + }, + { + "epoch": 0.9973254362326628, + "grad_norm": 333.7686462402344, + "learning_rate": 4.81999101369146e-09, + "loss": 22.3211, + "step": 493710 + }, + { + "epoch": 0.9973456368653466, + "grad_norm": 330.7865295410156, + "learning_rate": 4.804679763907305e-09, + "loss": 17.5816, + "step": 493720 + }, + { + "epoch": 0.9973658374980304, + "grad_norm": 478.1622619628906, + "learning_rate": 4.789392860100051e-09, + "loss": 26.7659, + "step": 493730 + }, + { + "epoch": 0.9973860381307142, + "grad_norm": 290.862060546875, + "learning_rate": 4.774130302342972e-09, + "loss": 9.9758, + "step": 493740 + }, + { + "epoch": 0.9974062387633981, + "grad_norm": 360.92816162109375, + "learning_rate": 4.758892090711009e-09, + "loss": 18.2678, + "step": 493750 + }, + { + "epoch": 0.9974264393960819, + "grad_norm": 1411.4190673828125, + "learning_rate": 4.743678225278547e-09, + "loss": 22.8019, + "step": 493760 + }, + { + "epoch": 0.9974466400287657, + "grad_norm": 82.69062042236328, + "learning_rate": 4.7284887061194165e-09, + "loss": 12.9301, + "step": 493770 + }, + { + "epoch": 0.9974668406614495, + "grad_norm": 296.7131042480469, + "learning_rate": 4.713323533308001e-09, + "loss": 21.2306, + "step": 493780 + }, + { + "epoch": 0.9974870412941333, + "grad_norm": 254.96810913085938, + "learning_rate": 4.6981827069181305e-09, + "loss": 26.7119, + "step": 493790 + }, + { + "epoch": 0.9975072419268172, + "grad_norm": 617.3046264648438, + "learning_rate": 4.683066227023081e-09, + "loss": 11.8411, + "step": 493800 + }, + { + "epoch": 0.997527442559501, + "grad_norm": 158.57591247558594, + "learning_rate": 4.667974093696681e-09, + "loss": 10.8231, + "step": 493810 + }, + { + "epoch": 0.9975476431921848, + "grad_norm": 363.39715576171875, + "learning_rate": 4.6529063070133165e-09, + "loss": 13.0921, + "step": 493820 + }, + { + "epoch": 0.9975678438248686, + "grad_norm": 448.3036804199219, + "learning_rate": 4.637862867045151e-09, + "loss": 15.935, + "step": 493830 + }, + { + "epoch": 0.9975880444575524, + "grad_norm": 701.3001098632812, + "learning_rate": 4.6228437738665695e-09, + "loss": 12.3682, + "step": 493840 + }, + { + "epoch": 0.9976082450902363, + "grad_norm": 989.6455078125, + "learning_rate": 4.607849027550293e-09, + "loss": 14.8341, + "step": 493850 + }, + { + "epoch": 0.9976284457229201, + "grad_norm": 503.3165283203125, + "learning_rate": 4.592878628169595e-09, + "loss": 27.2796, + "step": 493860 + }, + { + "epoch": 0.9976486463556039, + "grad_norm": 349.30303955078125, + "learning_rate": 4.577932575797195e-09, + "loss": 20.6817, + "step": 493870 + }, + { + "epoch": 0.9976688469882877, + "grad_norm": 1218.044189453125, + "learning_rate": 4.5630108705063684e-09, + "loss": 17.9004, + "step": 493880 + }, + { + "epoch": 0.9976890476209714, + "grad_norm": 834.479248046875, + "learning_rate": 4.5481135123692786e-09, + "loss": 17.5298, + "step": 493890 + }, + { + "epoch": 0.9977092482536553, + "grad_norm": 583.0570068359375, + "learning_rate": 4.533240501459202e-09, + "loss": 17.883, + "step": 493900 + }, + { + "epoch": 0.9977294488863391, + "grad_norm": 228.48049926757812, + "learning_rate": 4.518391837847747e-09, + "loss": 10.9652, + "step": 493910 + }, + { + "epoch": 0.9977496495190229, + "grad_norm": 477.0223083496094, + "learning_rate": 4.503567521608187e-09, + "loss": 18.4236, + "step": 493920 + }, + { + "epoch": 0.9977698501517067, + "grad_norm": 113.33273315429688, + "learning_rate": 4.4887675528121345e-09, + "loss": 28.0155, + "step": 493930 + }, + { + "epoch": 0.9977900507843905, + "grad_norm": 313.23504638671875, + "learning_rate": 4.473991931531752e-09, + "loss": 25.8596, + "step": 493940 + }, + { + "epoch": 0.9978102514170744, + "grad_norm": 135.3140869140625, + "learning_rate": 4.459240657839203e-09, + "loss": 28.1376, + "step": 493950 + }, + { + "epoch": 0.9978304520497582, + "grad_norm": 275.48236083984375, + "learning_rate": 4.4445137318072096e-09, + "loss": 22.8885, + "step": 493960 + }, + { + "epoch": 0.997850652682442, + "grad_norm": 0.3670414090156555, + "learning_rate": 4.429811153505714e-09, + "loss": 7.0648, + "step": 493970 + }, + { + "epoch": 0.9978708533151258, + "grad_norm": 560.8939819335938, + "learning_rate": 4.415132923007992e-09, + "loss": 14.6677, + "step": 493980 + }, + { + "epoch": 0.9978910539478096, + "grad_norm": 28.338668823242188, + "learning_rate": 4.400479040385098e-09, + "loss": 13.3111, + "step": 493990 + }, + { + "epoch": 0.9979112545804935, + "grad_norm": 4389.14111328125, + "learning_rate": 4.385849505708084e-09, + "loss": 41.6417, + "step": 494000 + }, + { + "epoch": 0.9979314552131773, + "grad_norm": 454.45654296875, + "learning_rate": 4.3712443190491175e-09, + "loss": 16.7873, + "step": 494010 + }, + { + "epoch": 0.9979516558458611, + "grad_norm": 189.19061279296875, + "learning_rate": 4.3566634804781405e-09, + "loss": 16.8721, + "step": 494020 + }, + { + "epoch": 0.9979718564785449, + "grad_norm": 686.783447265625, + "learning_rate": 4.342106990067319e-09, + "loss": 24.4592, + "step": 494030 + }, + { + "epoch": 0.9979920571112287, + "grad_norm": 471.0889892578125, + "learning_rate": 4.327574847886595e-09, + "loss": 17.7554, + "step": 494040 + }, + { + "epoch": 0.9980122577439126, + "grad_norm": 392.9156494140625, + "learning_rate": 4.313067054008135e-09, + "loss": 20.7474, + "step": 494050 + }, + { + "epoch": 0.9980324583765964, + "grad_norm": 868.9571533203125, + "learning_rate": 4.298583608501328e-09, + "loss": 27.625, + "step": 494060 + }, + { + "epoch": 0.9980526590092802, + "grad_norm": 2032.013916015625, + "learning_rate": 4.284124511437782e-09, + "loss": 34.6727, + "step": 494070 + }, + { + "epoch": 0.998072859641964, + "grad_norm": 257.8091735839844, + "learning_rate": 4.269689762886886e-09, + "loss": 22.6591, + "step": 494080 + }, + { + "epoch": 0.9980930602746478, + "grad_norm": 320.83758544921875, + "learning_rate": 4.2552793629202506e-09, + "loss": 20.1354, + "step": 494090 + }, + { + "epoch": 0.9981132609073317, + "grad_norm": 123.14933013916016, + "learning_rate": 4.2408933116072635e-09, + "loss": 24.1403, + "step": 494100 + }, + { + "epoch": 0.9981334615400155, + "grad_norm": 206.25991821289062, + "learning_rate": 4.22653160901787e-09, + "loss": 17.0927, + "step": 494110 + }, + { + "epoch": 0.9981536621726993, + "grad_norm": 260.6522216796875, + "learning_rate": 4.212194255222568e-09, + "loss": 12.1673, + "step": 494120 + }, + { + "epoch": 0.9981738628053831, + "grad_norm": 208.68922424316406, + "learning_rate": 4.197881250291302e-09, + "loss": 33.1985, + "step": 494130 + }, + { + "epoch": 0.9981940634380668, + "grad_norm": 363.8871154785156, + "learning_rate": 4.183592594294017e-09, + "loss": 13.1031, + "step": 494140 + }, + { + "epoch": 0.9982142640707506, + "grad_norm": 508.0299377441406, + "learning_rate": 4.169328287299545e-09, + "loss": 23.8725, + "step": 494150 + }, + { + "epoch": 0.9982344647034345, + "grad_norm": 411.2216796875, + "learning_rate": 4.155088329377832e-09, + "loss": 28.0018, + "step": 494160 + }, + { + "epoch": 0.9982546653361183, + "grad_norm": 106.09420776367188, + "learning_rate": 4.140872720598266e-09, + "loss": 16.5168, + "step": 494170 + }, + { + "epoch": 0.9982748659688021, + "grad_norm": 525.4610595703125, + "learning_rate": 4.126681461030236e-09, + "loss": 12.5805, + "step": 494180 + }, + { + "epoch": 0.9982950666014859, + "grad_norm": 475.1921081542969, + "learning_rate": 4.11251455074313e-09, + "loss": 27.8379, + "step": 494190 + }, + { + "epoch": 0.9983152672341697, + "grad_norm": 339.93804931640625, + "learning_rate": 4.098371989805227e-09, + "loss": 24.0035, + "step": 494200 + }, + { + "epoch": 0.9983354678668536, + "grad_norm": 270.710205078125, + "learning_rate": 4.0842537782859185e-09, + "loss": 13.355, + "step": 494210 + }, + { + "epoch": 0.9983556684995374, + "grad_norm": 577.5907592773438, + "learning_rate": 4.07015991625459e-09, + "loss": 17.0013, + "step": 494220 + }, + { + "epoch": 0.9983758691322212, + "grad_norm": 235.8202362060547, + "learning_rate": 4.056090403778967e-09, + "loss": 27.3389, + "step": 494230 + }, + { + "epoch": 0.998396069764905, + "grad_norm": 578.2260131835938, + "learning_rate": 4.042045240927883e-09, + "loss": 14.395, + "step": 494240 + }, + { + "epoch": 0.9984162703975888, + "grad_norm": 50.1723747253418, + "learning_rate": 4.028024427770172e-09, + "loss": 8.5401, + "step": 494250 + }, + { + "epoch": 0.9984364710302727, + "grad_norm": 268.9642333984375, + "learning_rate": 4.014027964373557e-09, + "loss": 10.8139, + "step": 494260 + }, + { + "epoch": 0.9984566716629565, + "grad_norm": 42.60501480102539, + "learning_rate": 4.000055850807427e-09, + "loss": 12.8204, + "step": 494270 + }, + { + "epoch": 0.9984768722956403, + "grad_norm": 512.7504272460938, + "learning_rate": 3.986108087138396e-09, + "loss": 15.5377, + "step": 494280 + }, + { + "epoch": 0.9984970729283241, + "grad_norm": 170.0618896484375, + "learning_rate": 3.972184673435297e-09, + "loss": 15.9197, + "step": 494290 + }, + { + "epoch": 0.998517273561008, + "grad_norm": 358.6625061035156, + "learning_rate": 3.9582856097658554e-09, + "loss": 19.4949, + "step": 494300 + }, + { + "epoch": 0.9985374741936918, + "grad_norm": 353.5986633300781, + "learning_rate": 3.944410896197792e-09, + "loss": 27.7585, + "step": 494310 + }, + { + "epoch": 0.9985576748263756, + "grad_norm": 700.8291015625, + "learning_rate": 3.930560532798832e-09, + "loss": 20.9409, + "step": 494320 + }, + { + "epoch": 0.9985778754590594, + "grad_norm": 306.12103271484375, + "learning_rate": 3.9167345196361454e-09, + "loss": 18.3705, + "step": 494330 + }, + { + "epoch": 0.9985980760917432, + "grad_norm": 338.6306457519531, + "learning_rate": 3.902932856777453e-09, + "loss": 9.2328, + "step": 494340 + }, + { + "epoch": 0.998618276724427, + "grad_norm": 335.12652587890625, + "learning_rate": 3.889155544289924e-09, + "loss": 18.5403, + "step": 494350 + }, + { + "epoch": 0.9986384773571109, + "grad_norm": 311.6947326660156, + "learning_rate": 3.8754025822407285e-09, + "loss": 21.9443, + "step": 494360 + }, + { + "epoch": 0.9986586779897947, + "grad_norm": 355.5566101074219, + "learning_rate": 3.861673970697033e-09, + "loss": 14.4083, + "step": 494370 + }, + { + "epoch": 0.9986788786224785, + "grad_norm": 265.0316162109375, + "learning_rate": 3.847969709725452e-09, + "loss": 17.4924, + "step": 494380 + }, + { + "epoch": 0.9986990792551623, + "grad_norm": 131.7287139892578, + "learning_rate": 3.834289799392598e-09, + "loss": 24.5317, + "step": 494390 + }, + { + "epoch": 0.998719279887846, + "grad_norm": 229.21507263183594, + "learning_rate": 3.820634239765642e-09, + "loss": 18.5032, + "step": 494400 + }, + { + "epoch": 0.9987394805205299, + "grad_norm": 466.8297424316406, + "learning_rate": 3.8070030309111935e-09, + "loss": 12.6156, + "step": 494410 + }, + { + "epoch": 0.9987596811532137, + "grad_norm": 680.4591064453125, + "learning_rate": 3.793396172895314e-09, + "loss": 22.3812, + "step": 494420 + }, + { + "epoch": 0.9987798817858975, + "grad_norm": 295.6737365722656, + "learning_rate": 3.77981366578406e-09, + "loss": 8.7531, + "step": 494430 + }, + { + "epoch": 0.9988000824185813, + "grad_norm": 32.82207489013672, + "learning_rate": 3.766255509644601e-09, + "loss": 18.1024, + "step": 494440 + }, + { + "epoch": 0.9988202830512651, + "grad_norm": 390.6903076171875, + "learning_rate": 3.752721704541884e-09, + "loss": 21.0745, + "step": 494450 + }, + { + "epoch": 0.998840483683949, + "grad_norm": 1059.0428466796875, + "learning_rate": 3.739212250543078e-09, + "loss": 23.7416, + "step": 494460 + }, + { + "epoch": 0.9988606843166328, + "grad_norm": 374.59564208984375, + "learning_rate": 3.7257271477131314e-09, + "loss": 25.2496, + "step": 494470 + }, + { + "epoch": 0.9988808849493166, + "grad_norm": 724.3229370117188, + "learning_rate": 3.7122663961175477e-09, + "loss": 16.3551, + "step": 494480 + }, + { + "epoch": 0.9989010855820004, + "grad_norm": 345.5867919921875, + "learning_rate": 3.698829995822939e-09, + "loss": 15.4175, + "step": 494490 + }, + { + "epoch": 0.9989212862146842, + "grad_norm": 16.395706176757812, + "learning_rate": 3.685417946894254e-09, + "loss": 26.0611, + "step": 494500 + }, + { + "epoch": 0.998941486847368, + "grad_norm": 83.29167175292969, + "learning_rate": 3.672030249396441e-09, + "loss": 9.1453, + "step": 494510 + }, + { + "epoch": 0.9989616874800519, + "grad_norm": 183.70750427246094, + "learning_rate": 3.6586669033955578e-09, + "loss": 12.162, + "step": 494520 + }, + { + "epoch": 0.9989818881127357, + "grad_norm": 322.9624328613281, + "learning_rate": 3.645327908955998e-09, + "loss": 18.2431, + "step": 494530 + }, + { + "epoch": 0.9990020887454195, + "grad_norm": 54.37944793701172, + "learning_rate": 3.632013266143264e-09, + "loss": 18.6937, + "step": 494540 + }, + { + "epoch": 0.9990222893781033, + "grad_norm": 192.83071899414062, + "learning_rate": 3.618722975022304e-09, + "loss": 11.3923, + "step": 494550 + }, + { + "epoch": 0.9990424900107872, + "grad_norm": 430.22259521484375, + "learning_rate": 3.605457035657511e-09, + "loss": 18.7379, + "step": 494560 + }, + { + "epoch": 0.999062690643471, + "grad_norm": 223.731201171875, + "learning_rate": 3.592215448113834e-09, + "loss": 20.3571, + "step": 494570 + }, + { + "epoch": 0.9990828912761548, + "grad_norm": 1489.7357177734375, + "learning_rate": 3.5789982124556646e-09, + "loss": 23.1222, + "step": 494580 + }, + { + "epoch": 0.9991030919088386, + "grad_norm": 574.494384765625, + "learning_rate": 3.565805328747951e-09, + "loss": 11.0038, + "step": 494590 + }, + { + "epoch": 0.9991232925415224, + "grad_norm": 160.38998413085938, + "learning_rate": 3.5526367970539765e-09, + "loss": 19.1827, + "step": 494600 + }, + { + "epoch": 0.9991434931742063, + "grad_norm": 358.2546081542969, + "learning_rate": 3.5394926174381338e-09, + "loss": 37.2199, + "step": 494610 + }, + { + "epoch": 0.9991636938068901, + "grad_norm": 515.025634765625, + "learning_rate": 3.526372789965371e-09, + "loss": 24.1164, + "step": 494620 + }, + { + "epoch": 0.9991838944395739, + "grad_norm": 357.302734375, + "learning_rate": 3.5132773146989706e-09, + "loss": 13.1605, + "step": 494630 + }, + { + "epoch": 0.9992040950722577, + "grad_norm": 5.499563217163086, + "learning_rate": 3.5002061917027708e-09, + "loss": 18.9358, + "step": 494640 + }, + { + "epoch": 0.9992242957049415, + "grad_norm": 177.49220275878906, + "learning_rate": 3.487159421040609e-09, + "loss": 9.681, + "step": 494650 + }, + { + "epoch": 0.9992444963376252, + "grad_norm": 444.80841064453125, + "learning_rate": 3.474137002775768e-09, + "loss": 18.1927, + "step": 494660 + }, + { + "epoch": 0.9992646969703091, + "grad_norm": 235.77890014648438, + "learning_rate": 3.461138936972086e-09, + "loss": 20.3918, + "step": 494670 + }, + { + "epoch": 0.9992848976029929, + "grad_norm": 311.6514892578125, + "learning_rate": 3.4481652236934006e-09, + "loss": 15.494, + "step": 494680 + }, + { + "epoch": 0.9993050982356767, + "grad_norm": 423.1574401855469, + "learning_rate": 3.4352158630018837e-09, + "loss": 20.682, + "step": 494690 + }, + { + "epoch": 0.9993252988683605, + "grad_norm": 421.1527099609375, + "learning_rate": 3.4222908549608193e-09, + "loss": 15.1917, + "step": 494700 + }, + { + "epoch": 0.9993454995010443, + "grad_norm": 420.02874755859375, + "learning_rate": 3.409390199634044e-09, + "loss": 23.7051, + "step": 494710 + }, + { + "epoch": 0.9993657001337282, + "grad_norm": 426.9223327636719, + "learning_rate": 3.3965138970831758e-09, + "loss": 29.5462, + "step": 494720 + }, + { + "epoch": 0.999385900766412, + "grad_norm": 436.4137268066406, + "learning_rate": 3.3836619473720522e-09, + "loss": 11.2342, + "step": 494730 + }, + { + "epoch": 0.9994061013990958, + "grad_norm": 465.861328125, + "learning_rate": 3.370834350563401e-09, + "loss": 15.2891, + "step": 494740 + }, + { + "epoch": 0.9994263020317796, + "grad_norm": 410.02935791015625, + "learning_rate": 3.3580311067188396e-09, + "loss": 14.8325, + "step": 494750 + }, + { + "epoch": 0.9994465026644634, + "grad_norm": 138.91632080078125, + "learning_rate": 3.3452522159010957e-09, + "loss": 21.0164, + "step": 494760 + }, + { + "epoch": 0.9994667032971473, + "grad_norm": 314.4662170410156, + "learning_rate": 3.332497678172897e-09, + "loss": 15.5543, + "step": 494770 + }, + { + "epoch": 0.9994869039298311, + "grad_norm": 557.0912475585938, + "learning_rate": 3.31976749359586e-09, + "loss": 21.8675, + "step": 494780 + }, + { + "epoch": 0.9995071045625149, + "grad_norm": 589.8872680664062, + "learning_rate": 3.3070616622321584e-09, + "loss": 10.1348, + "step": 494790 + }, + { + "epoch": 0.9995273051951987, + "grad_norm": 199.4197998046875, + "learning_rate": 3.294380184143964e-09, + "loss": 19.031, + "step": 494800 + }, + { + "epoch": 0.9995475058278825, + "grad_norm": 300.7760009765625, + "learning_rate": 3.2817230593928938e-09, + "loss": 16.9388, + "step": 494810 + }, + { + "epoch": 0.9995677064605664, + "grad_norm": 753.2967529296875, + "learning_rate": 3.269090288041121e-09, + "loss": 22.7836, + "step": 494820 + }, + { + "epoch": 0.9995879070932502, + "grad_norm": 262.03558349609375, + "learning_rate": 3.256481870149153e-09, + "loss": 30.8767, + "step": 494830 + }, + { + "epoch": 0.999608107725934, + "grad_norm": 381.8133239746094, + "learning_rate": 3.2438978057791615e-09, + "loss": 22.6111, + "step": 494840 + }, + { + "epoch": 0.9996283083586178, + "grad_norm": 598.3963012695312, + "learning_rate": 3.2313380949927643e-09, + "loss": 15.5221, + "step": 494850 + }, + { + "epoch": 0.9996485089913016, + "grad_norm": 212.82321166992188, + "learning_rate": 3.218802737850468e-09, + "loss": 14.6045, + "step": 494860 + }, + { + "epoch": 0.9996687096239855, + "grad_norm": 234.849609375, + "learning_rate": 3.206291734413891e-09, + "loss": 12.5164, + "step": 494870 + }, + { + "epoch": 0.9996889102566693, + "grad_norm": 207.75067138671875, + "learning_rate": 3.1938050847435398e-09, + "loss": 9.634, + "step": 494880 + }, + { + "epoch": 0.9997091108893531, + "grad_norm": 154.989501953125, + "learning_rate": 3.1813427889004767e-09, + "loss": 13.5645, + "step": 494890 + }, + { + "epoch": 0.9997293115220369, + "grad_norm": 370.97998046875, + "learning_rate": 3.1689048469457638e-09, + "loss": 13.6074, + "step": 494900 + }, + { + "epoch": 0.9997495121547206, + "grad_norm": 14.729854583740234, + "learning_rate": 3.156491258939909e-09, + "loss": 19.7983, + "step": 494910 + }, + { + "epoch": 0.9997697127874045, + "grad_norm": 132.00875854492188, + "learning_rate": 3.1441020249428635e-09, + "loss": 16.1261, + "step": 494920 + }, + { + "epoch": 0.9997899134200883, + "grad_norm": 9.60148811340332, + "learning_rate": 3.1317371450156897e-09, + "loss": 8.5566, + "step": 494930 + }, + { + "epoch": 0.9998101140527721, + "grad_norm": 168.33929443359375, + "learning_rate": 3.11939661921834e-09, + "loss": 15.349, + "step": 494940 + }, + { + "epoch": 0.9998303146854559, + "grad_norm": 572.34423828125, + "learning_rate": 3.1070804476113213e-09, + "loss": 8.7059, + "step": 494950 + }, + { + "epoch": 0.9998505153181397, + "grad_norm": 407.3402099609375, + "learning_rate": 3.094788630254031e-09, + "loss": 29.6567, + "step": 494960 + }, + { + "epoch": 0.9998707159508236, + "grad_norm": 196.3535919189453, + "learning_rate": 3.0825211672064203e-09, + "loss": 16.6998, + "step": 494970 + }, + { + "epoch": 0.9998909165835074, + "grad_norm": 592.755615234375, + "learning_rate": 3.070278058528997e-09, + "loss": 24.6521, + "step": 494980 + }, + { + "epoch": 0.9999111172161912, + "grad_norm": 352.9114990234375, + "learning_rate": 3.058059304280603e-09, + "loss": 28.3788, + "step": 494990 + }, + { + "epoch": 0.999931317848875, + "grad_norm": 373.85968017578125, + "learning_rate": 3.0458649045211897e-09, + "loss": 17.7365, + "step": 495000 + }, + { + "epoch": 0.9999515184815588, + "grad_norm": 190.60629272460938, + "learning_rate": 3.03369485931071e-09, + "loss": 21.5938, + "step": 495010 + }, + { + "epoch": 0.9999717191142427, + "grad_norm": 508.1072082519531, + "learning_rate": 3.0215491687074492e-09, + "loss": 23.1618, + "step": 495020 + }, + { + "epoch": 0.9999919197469265, + "grad_norm": 324.51171875, + "learning_rate": 3.009427832771361e-09, + "loss": 29.7145, + "step": 495030 + }, + { + "epoch": 1.0, + "eval_loss": 18.863046646118164, + "eval_runtime": 407.6455, + "eval_samples_per_second": 24.534, + "eval_steps_per_second": 12.268, + "step": 495034 + }, + { + "epoch": 1.0000121203796102, + "grad_norm": 535.4615478515625, + "learning_rate": 2.9973308515607313e-09, + "loss": 24.5712, + "step": 495040 + }, + { + "epoch": 1.0000323210122941, + "grad_norm": 304.3030700683594, + "learning_rate": 2.9852582251355124e-09, + "loss": 19.0508, + "step": 495050 + }, + { + "epoch": 1.0000525216449778, + "grad_norm": 296.67218017578125, + "learning_rate": 2.9732099535539905e-09, + "loss": 13.083, + "step": 495060 + }, + { + "epoch": 1.0000727222776618, + "grad_norm": 538.2628784179688, + "learning_rate": 2.961186036875008e-09, + "loss": 28.6931, + "step": 495070 + }, + { + "epoch": 1.0000929229103455, + "grad_norm": 728.6567993164062, + "learning_rate": 2.949186475157406e-09, + "loss": 16.9243, + "step": 495080 + }, + { + "epoch": 1.0001131235430294, + "grad_norm": 619.2899780273438, + "learning_rate": 2.937211268458917e-09, + "loss": 24.1662, + "step": 495090 + }, + { + "epoch": 1.000133324175713, + "grad_norm": 181.85565185546875, + "learning_rate": 2.9252604168383826e-09, + "loss": 11.0639, + "step": 495100 + }, + { + "epoch": 1.000153524808397, + "grad_norm": 187.00706481933594, + "learning_rate": 2.913333920354644e-09, + "loss": 21.9745, + "step": 495110 + }, + { + "epoch": 1.0001737254410807, + "grad_norm": 322.39569091796875, + "learning_rate": 2.901431779064323e-09, + "loss": 17.245, + "step": 495120 + }, + { + "epoch": 1.0001939260737647, + "grad_norm": 399.79571533203125, + "learning_rate": 2.889553993027372e-09, + "loss": 17.0467, + "step": 495130 + }, + { + "epoch": 1.0002141267064484, + "grad_norm": 650.3701171875, + "learning_rate": 2.8777005622998567e-09, + "loss": 13.9672, + "step": 495140 + }, + { + "epoch": 1.0002343273391323, + "grad_norm": 128.86141967773438, + "learning_rate": 2.865871486940619e-09, + "loss": 11.1238, + "step": 495150 + }, + { + "epoch": 1.000254527971816, + "grad_norm": 311.48162841796875, + "learning_rate": 2.8540667670073905e-09, + "loss": 13.0761, + "step": 495160 + }, + { + "epoch": 1.0002747286045, + "grad_norm": 371.01263427734375, + "learning_rate": 2.842286402556793e-09, + "loss": 21.9169, + "step": 495170 + }, + { + "epoch": 1.0002949292371837, + "grad_norm": 485.1094055175781, + "learning_rate": 2.830530393647113e-09, + "loss": 21.921, + "step": 495180 + }, + { + "epoch": 1.0003151298698676, + "grad_norm": 277.4830322265625, + "learning_rate": 2.8187987403355268e-09, + "loss": 21.0184, + "step": 495190 + }, + { + "epoch": 1.0003353305025513, + "grad_norm": 118.95313262939453, + "learning_rate": 2.8070914426786555e-09, + "loss": 36.1744, + "step": 495200 + }, + { + "epoch": 1.0003555311352352, + "grad_norm": 309.14556884765625, + "learning_rate": 2.7954085007342315e-09, + "loss": 29.7963, + "step": 495210 + }, + { + "epoch": 1.000375731767919, + "grad_norm": 127.88665008544922, + "learning_rate": 2.78374991455832e-09, + "loss": 21.9706, + "step": 495220 + }, + { + "epoch": 1.0003959324006029, + "grad_norm": 19.653532028198242, + "learning_rate": 2.772115684209209e-09, + "loss": 9.5147, + "step": 495230 + }, + { + "epoch": 1.0004161330332866, + "grad_norm": 220.71902465820312, + "learning_rate": 2.7605058097418536e-09, + "loss": 15.3792, + "step": 495240 + }, + { + "epoch": 1.0004363336659705, + "grad_norm": 282.1274719238281, + "learning_rate": 2.748920291214541e-09, + "loss": 9.1682, + "step": 495250 + }, + { + "epoch": 1.0004565342986542, + "grad_norm": 457.2867431640625, + "learning_rate": 2.7373591286822266e-09, + "loss": 18.318, + "step": 495260 + }, + { + "epoch": 1.0004767349313382, + "grad_norm": 1289.856689453125, + "learning_rate": 2.7258223222020876e-09, + "loss": 33.7968, + "step": 495270 + }, + { + "epoch": 1.0004969355640219, + "grad_norm": 611.5613403320312, + "learning_rate": 2.7143098718301896e-09, + "loss": 21.0783, + "step": 495280 + }, + { + "epoch": 1.0005171361967056, + "grad_norm": 244.9925537109375, + "learning_rate": 2.7028217776225994e-09, + "loss": 8.253, + "step": 495290 + }, + { + "epoch": 1.0005373368293895, + "grad_norm": 187.85861206054688, + "learning_rate": 2.6913580396359384e-09, + "loss": 16.2827, + "step": 495300 + }, + { + "epoch": 1.0005575374620732, + "grad_norm": 679.815185546875, + "learning_rate": 2.6799186579246074e-09, + "loss": 15.515, + "step": 495310 + }, + { + "epoch": 1.0005777380947571, + "grad_norm": 716.7931518554688, + "learning_rate": 2.6685036325457826e-09, + "loss": 29.7323, + "step": 495320 + }, + { + "epoch": 1.0005979387274408, + "grad_norm": 225.07211303710938, + "learning_rate": 2.65711296355442e-09, + "loss": 22.4483, + "step": 495330 + }, + { + "epoch": 1.0006181393601248, + "grad_norm": 456.4980163574219, + "learning_rate": 2.6457466510065866e-09, + "loss": 13.9316, + "step": 495340 + }, + { + "epoch": 1.0006383399928085, + "grad_norm": 128.2577362060547, + "learning_rate": 2.6344046949566825e-09, + "loss": 19.9119, + "step": 495350 + }, + { + "epoch": 1.0006585406254924, + "grad_norm": 564.334228515625, + "learning_rate": 2.6230870954607746e-09, + "loss": 20.1779, + "step": 495360 + }, + { + "epoch": 1.0006787412581761, + "grad_norm": 263.80462646484375, + "learning_rate": 2.6117938525738185e-09, + "loss": 13.3283, + "step": 495370 + }, + { + "epoch": 1.00069894189086, + "grad_norm": 158.897705078125, + "learning_rate": 2.6005249663513254e-09, + "loss": 20.4164, + "step": 495380 + }, + { + "epoch": 1.0007191425235438, + "grad_norm": 281.79766845703125, + "learning_rate": 2.5892804368471414e-09, + "loss": 19.5175, + "step": 495390 + }, + { + "epoch": 1.0007393431562277, + "grad_norm": 556.7461547851562, + "learning_rate": 2.5780602641167774e-09, + "loss": 19.6771, + "step": 495400 + }, + { + "epoch": 1.0007595437889114, + "grad_norm": 218.8365478515625, + "learning_rate": 2.5668644482151892e-09, + "loss": 24.8257, + "step": 495410 + }, + { + "epoch": 1.0007797444215953, + "grad_norm": 628.21533203125, + "learning_rate": 2.5556929891962234e-09, + "loss": 11.3612, + "step": 495420 + }, + { + "epoch": 1.000799945054279, + "grad_norm": 698.1282348632812, + "learning_rate": 2.5445458871148353e-09, + "loss": 18.1672, + "step": 495430 + }, + { + "epoch": 1.000820145686963, + "grad_norm": 259.7185974121094, + "learning_rate": 2.5334231420254262e-09, + "loss": 14.2079, + "step": 495440 + }, + { + "epoch": 1.0008403463196467, + "grad_norm": 70.15369415283203, + "learning_rate": 2.522324753981842e-09, + "loss": 8.7467, + "step": 495450 + }, + { + "epoch": 1.0008605469523306, + "grad_norm": 312.29986572265625, + "learning_rate": 2.511250723037928e-09, + "loss": 29.1994, + "step": 495460 + }, + { + "epoch": 1.0008807475850143, + "grad_norm": 279.4052734375, + "learning_rate": 2.5002010492486405e-09, + "loss": 16.2657, + "step": 495470 + }, + { + "epoch": 1.0009009482176983, + "grad_norm": 144.45155334472656, + "learning_rate": 2.4891757326667154e-09, + "loss": 24.549, + "step": 495480 + }, + { + "epoch": 1.000921148850382, + "grad_norm": 1126.0965576171875, + "learning_rate": 2.4781747733471085e-09, + "loss": 45.3086, + "step": 495490 + }, + { + "epoch": 1.000941349483066, + "grad_norm": 599.3152465820312, + "learning_rate": 2.4671981713420003e-09, + "loss": 25.8712, + "step": 495500 + }, + { + "epoch": 1.0009615501157496, + "grad_norm": 250.93577575683594, + "learning_rate": 2.4562459267063466e-09, + "loss": 13.252, + "step": 495510 + }, + { + "epoch": 1.0009817507484335, + "grad_norm": 290.2425231933594, + "learning_rate": 2.445318039492328e-09, + "loss": 8.9645, + "step": 495520 + }, + { + "epoch": 1.0010019513811172, + "grad_norm": 106.93073272705078, + "learning_rate": 2.4344145097537906e-09, + "loss": 13.6351, + "step": 495530 + }, + { + "epoch": 1.0010221520138012, + "grad_norm": 529.4349975585938, + "learning_rate": 2.423535337544025e-09, + "loss": 16.4544, + "step": 495540 + }, + { + "epoch": 1.001042352646485, + "grad_norm": 345.8415222167969, + "learning_rate": 2.412680522915767e-09, + "loss": 22.4422, + "step": 495550 + }, + { + "epoch": 1.0010625532791686, + "grad_norm": 712.0042114257812, + "learning_rate": 2.4018500659217515e-09, + "loss": 25.6466, + "step": 495560 + }, + { + "epoch": 1.0010827539118525, + "grad_norm": 179.86766052246094, + "learning_rate": 2.3910439666147147e-09, + "loss": 16.1174, + "step": 495570 + }, + { + "epoch": 1.0011029545445362, + "grad_norm": 207.787841796875, + "learning_rate": 2.380262225047947e-09, + "loss": 10.0085, + "step": 495580 + }, + { + "epoch": 1.0011231551772202, + "grad_norm": 695.45751953125, + "learning_rate": 2.369504841273629e-09, + "loss": 16.8075, + "step": 495590 + }, + { + "epoch": 1.0011433558099039, + "grad_norm": 158.43753051757812, + "learning_rate": 2.358771815344496e-09, + "loss": 10.7488, + "step": 495600 + }, + { + "epoch": 1.0011635564425878, + "grad_norm": 536.5902709960938, + "learning_rate": 2.348063147312174e-09, + "loss": 17.2911, + "step": 495610 + }, + { + "epoch": 1.0011837570752715, + "grad_norm": 449.4764709472656, + "learning_rate": 2.337378837229398e-09, + "loss": 19.0239, + "step": 495620 + }, + { + "epoch": 1.0012039577079554, + "grad_norm": 527.6913452148438, + "learning_rate": 2.326718885147794e-09, + "loss": 8.5869, + "step": 495630 + }, + { + "epoch": 1.0012241583406392, + "grad_norm": 2.247581958770752, + "learning_rate": 2.316083291120097e-09, + "loss": 11.1301, + "step": 495640 + }, + { + "epoch": 1.001244358973323, + "grad_norm": 163.10150146484375, + "learning_rate": 2.3054720551973773e-09, + "loss": 17.8689, + "step": 495650 + }, + { + "epoch": 1.0012645596060068, + "grad_norm": 183.3430938720703, + "learning_rate": 2.294885177431816e-09, + "loss": 15.3342, + "step": 495660 + }, + { + "epoch": 1.0012847602386907, + "grad_norm": 152.39137268066406, + "learning_rate": 2.2843226578744826e-09, + "loss": 13.1861, + "step": 495670 + }, + { + "epoch": 1.0013049608713744, + "grad_norm": 1099.2608642578125, + "learning_rate": 2.2737844965775578e-09, + "loss": 23.4051, + "step": 495680 + }, + { + "epoch": 1.0013251615040584, + "grad_norm": 121.2804183959961, + "learning_rate": 2.263270693592112e-09, + "loss": 12.0707, + "step": 495690 + }, + { + "epoch": 1.001345362136742, + "grad_norm": 831.40966796875, + "learning_rate": 2.2527812489692156e-09, + "loss": 28.1774, + "step": 495700 + }, + { + "epoch": 1.001365562769426, + "grad_norm": 540.88671875, + "learning_rate": 2.2423161627599386e-09, + "loss": 17.6186, + "step": 495710 + }, + { + "epoch": 1.0013857634021097, + "grad_norm": 46.36670684814453, + "learning_rate": 2.2318754350159067e-09, + "loss": 20.9344, + "step": 495720 + }, + { + "epoch": 1.0014059640347936, + "grad_norm": 212.47543334960938, + "learning_rate": 2.2214590657870795e-09, + "loss": 16.6399, + "step": 495730 + }, + { + "epoch": 1.0014261646674774, + "grad_norm": 47.5765266418457, + "learning_rate": 2.211067055124527e-09, + "loss": 11.9324, + "step": 495740 + }, + { + "epoch": 1.0014463653001613, + "grad_norm": 1.8978875875473022, + "learning_rate": 2.2006994030798758e-09, + "loss": 7.8692, + "step": 495750 + }, + { + "epoch": 1.001466565932845, + "grad_norm": 353.38153076171875, + "learning_rate": 2.1903561097019744e-09, + "loss": 15.6718, + "step": 495760 + }, + { + "epoch": 1.001486766565529, + "grad_norm": 534.5007934570312, + "learning_rate": 2.1800371750430037e-09, + "loss": 34.5303, + "step": 495770 + }, + { + "epoch": 1.0015069671982126, + "grad_norm": 166.82882690429688, + "learning_rate": 2.169742599151814e-09, + "loss": 22.251, + "step": 495780 + }, + { + "epoch": 1.0015271678308966, + "grad_norm": 594.2769165039062, + "learning_rate": 2.15947238207892e-09, + "loss": 16.6582, + "step": 495790 + }, + { + "epoch": 1.0015473684635803, + "grad_norm": 511.8836364746094, + "learning_rate": 2.149226523874837e-09, + "loss": 12.1377, + "step": 495800 + }, + { + "epoch": 1.001567569096264, + "grad_norm": 437.1927185058594, + "learning_rate": 2.1390050245895246e-09, + "loss": 20.587, + "step": 495810 + }, + { + "epoch": 1.001587769728948, + "grad_norm": 478.9796447753906, + "learning_rate": 2.128807884272388e-09, + "loss": 23.5372, + "step": 495820 + }, + { + "epoch": 1.0016079703616316, + "grad_norm": 479.27288818359375, + "learning_rate": 2.1186351029733877e-09, + "loss": 15.6252, + "step": 495830 + }, + { + "epoch": 1.0016281709943156, + "grad_norm": 560.8809204101562, + "learning_rate": 2.1084866807413727e-09, + "loss": 31.4543, + "step": 495840 + }, + { + "epoch": 1.0016483716269993, + "grad_norm": 632.0659790039062, + "learning_rate": 2.098362617626859e-09, + "loss": 12.4084, + "step": 495850 + }, + { + "epoch": 1.0016685722596832, + "grad_norm": 166.77639770507812, + "learning_rate": 2.088262913679251e-09, + "loss": 5.7719, + "step": 495860 + }, + { + "epoch": 1.001688772892367, + "grad_norm": 135.89581298828125, + "learning_rate": 2.078187568946288e-09, + "loss": 12.6637, + "step": 495870 + }, + { + "epoch": 1.0017089735250508, + "grad_norm": 227.5363311767578, + "learning_rate": 2.0681365834790413e-09, + "loss": 15.5393, + "step": 495880 + }, + { + "epoch": 1.0017291741577345, + "grad_norm": 291.6004333496094, + "learning_rate": 2.0581099573246943e-09, + "loss": 36.1768, + "step": 495890 + }, + { + "epoch": 1.0017493747904185, + "grad_norm": 282.8682556152344, + "learning_rate": 2.0481076905332074e-09, + "loss": 8.65, + "step": 495900 + }, + { + "epoch": 1.0017695754231022, + "grad_norm": 189.76792907714844, + "learning_rate": 2.038129783153431e-09, + "loss": 11.4054, + "step": 495910 + }, + { + "epoch": 1.0017897760557861, + "grad_norm": 233.2994384765625, + "learning_rate": 2.0281762352331034e-09, + "loss": 18.2021, + "step": 495920 + }, + { + "epoch": 1.0018099766884698, + "grad_norm": 176.9680633544922, + "learning_rate": 2.018247046821631e-09, + "loss": 11.5819, + "step": 495930 + }, + { + "epoch": 1.0018301773211538, + "grad_norm": 153.39129638671875, + "learning_rate": 2.008342217966752e-09, + "loss": 24.4084, + "step": 495940 + }, + { + "epoch": 1.0018503779538375, + "grad_norm": 409.62103271484375, + "learning_rate": 1.9984617487173174e-09, + "loss": 17.9044, + "step": 495950 + }, + { + "epoch": 1.0018705785865214, + "grad_norm": 258.4937744140625, + "learning_rate": 1.9886056391210663e-09, + "loss": 23.0219, + "step": 495960 + }, + { + "epoch": 1.001890779219205, + "grad_norm": 90.13101196289062, + "learning_rate": 1.9787738892262932e-09, + "loss": 5.2013, + "step": 495970 + }, + { + "epoch": 1.001910979851889, + "grad_norm": 273.6362609863281, + "learning_rate": 1.968966499080738e-09, + "loss": 14.6099, + "step": 495980 + }, + { + "epoch": 1.0019311804845727, + "grad_norm": 562.908935546875, + "learning_rate": 1.95918346873214e-09, + "loss": 13.6189, + "step": 495990 + }, + { + "epoch": 1.0019513811172567, + "grad_norm": 322.1513366699219, + "learning_rate": 1.9494247982282386e-09, + "loss": 31.2791, + "step": 496000 + }, + { + "epoch": 1.0019715817499404, + "grad_norm": 57.95103073120117, + "learning_rate": 1.9396904876167742e-09, + "loss": 16.7426, + "step": 496010 + }, + { + "epoch": 1.0019917823826243, + "grad_norm": 270.1626281738281, + "learning_rate": 1.9299805369449307e-09, + "loss": 15.8352, + "step": 496020 + }, + { + "epoch": 1.002011983015308, + "grad_norm": 181.19337463378906, + "learning_rate": 1.920294946260448e-09, + "loss": 12.8496, + "step": 496030 + }, + { + "epoch": 1.002032183647992, + "grad_norm": 279.1658630371094, + "learning_rate": 1.9106337156099553e-09, + "loss": 12.1115, + "step": 496040 + }, + { + "epoch": 1.0020523842806757, + "grad_norm": 173.3330841064453, + "learning_rate": 1.9009968450406368e-09, + "loss": 24.0644, + "step": 496050 + }, + { + "epoch": 1.0020725849133594, + "grad_norm": 280.337158203125, + "learning_rate": 1.8913843346002324e-09, + "loss": 14.3357, + "step": 496060 + }, + { + "epoch": 1.0020927855460433, + "grad_norm": 222.7508087158203, + "learning_rate": 1.8817961843348166e-09, + "loss": 12.7869, + "step": 496070 + }, + { + "epoch": 1.002112986178727, + "grad_norm": 387.0539245605469, + "learning_rate": 1.872232394291018e-09, + "loss": 13.7058, + "step": 496080 + }, + { + "epoch": 1.002133186811411, + "grad_norm": 144.1525115966797, + "learning_rate": 1.8626929645160218e-09, + "loss": 24.1266, + "step": 496090 + }, + { + "epoch": 1.0021533874440947, + "grad_norm": 233.26426696777344, + "learning_rate": 1.8531778950564572e-09, + "loss": 18.7332, + "step": 496100 + }, + { + "epoch": 1.0021735880767786, + "grad_norm": 364.735595703125, + "learning_rate": 1.8436871859578431e-09, + "loss": 18.3567, + "step": 496110 + }, + { + "epoch": 1.0021937887094623, + "grad_norm": 305.4839782714844, + "learning_rate": 1.834220837266809e-09, + "loss": 16.8414, + "step": 496120 + }, + { + "epoch": 1.0022139893421462, + "grad_norm": 80.03901672363281, + "learning_rate": 1.8247788490299846e-09, + "loss": 22.3505, + "step": 496130 + }, + { + "epoch": 1.00223418997483, + "grad_norm": 91.74203491210938, + "learning_rate": 1.8153612212923333e-09, + "loss": 13.1593, + "step": 496140 + }, + { + "epoch": 1.0022543906075139, + "grad_norm": 83.57035064697266, + "learning_rate": 1.80596795410104e-09, + "loss": 28.7778, + "step": 496150 + }, + { + "epoch": 1.0022745912401976, + "grad_norm": 89.92529296875, + "learning_rate": 1.7965990475010686e-09, + "loss": 20.5896, + "step": 496160 + }, + { + "epoch": 1.0022947918728815, + "grad_norm": 365.5494689941406, + "learning_rate": 1.7872545015379382e-09, + "loss": 17.4617, + "step": 496170 + }, + { + "epoch": 1.0023149925055652, + "grad_norm": 228.63990783691406, + "learning_rate": 1.7779343162577233e-09, + "loss": 9.7855, + "step": 496180 + }, + { + "epoch": 1.0023351931382491, + "grad_norm": 471.0659484863281, + "learning_rate": 1.7686384917059429e-09, + "loss": 29.9951, + "step": 496190 + }, + { + "epoch": 1.0023553937709329, + "grad_norm": 6.007596969604492, + "learning_rate": 1.759367027927561e-09, + "loss": 8.9301, + "step": 496200 + }, + { + "epoch": 1.0023755944036168, + "grad_norm": 310.35614013671875, + "learning_rate": 1.7501199249675416e-09, + "loss": 11.2919, + "step": 496210 + }, + { + "epoch": 1.0023957950363005, + "grad_norm": 67.65311431884766, + "learning_rate": 1.740897182871404e-09, + "loss": 12.9013, + "step": 496220 + }, + { + "epoch": 1.0024159956689844, + "grad_norm": 271.9510498046875, + "learning_rate": 1.7316988016835567e-09, + "loss": 10.5365, + "step": 496230 + }, + { + "epoch": 1.0024361963016681, + "grad_norm": 577.5405883789062, + "learning_rate": 1.7225247814495194e-09, + "loss": 11.4776, + "step": 496240 + }, + { + "epoch": 1.002456396934352, + "grad_norm": 158.23123168945312, + "learning_rate": 1.7133751222137007e-09, + "loss": 16.7018, + "step": 496250 + }, + { + "epoch": 1.0024765975670358, + "grad_norm": 151.22882080078125, + "learning_rate": 1.7042498240205097e-09, + "loss": 17.0154, + "step": 496260 + }, + { + "epoch": 1.0024967981997197, + "grad_norm": 351.3409118652344, + "learning_rate": 1.6951488869149103e-09, + "loss": 5.6745, + "step": 496270 + }, + { + "epoch": 1.0025169988324034, + "grad_norm": 291.2845153808594, + "learning_rate": 1.686072310940201e-09, + "loss": 9.1775, + "step": 496280 + }, + { + "epoch": 1.0025371994650873, + "grad_norm": 526.7337036132812, + "learning_rate": 1.6770200961419015e-09, + "loss": 18.1165, + "step": 496290 + }, + { + "epoch": 1.002557400097771, + "grad_norm": 308.7669372558594, + "learning_rate": 1.6679922425638651e-09, + "loss": 25.7948, + "step": 496300 + }, + { + "epoch": 1.002577600730455, + "grad_norm": 240.68191528320312, + "learning_rate": 1.6589887502493907e-09, + "loss": 16.251, + "step": 496310 + }, + { + "epoch": 1.0025978013631387, + "grad_norm": 253.4486846923828, + "learning_rate": 1.650009619242887e-09, + "loss": 12.1938, + "step": 496320 + }, + { + "epoch": 1.0026180019958224, + "grad_norm": 144.76792907714844, + "learning_rate": 1.6410548495876533e-09, + "loss": 12.4415, + "step": 496330 + }, + { + "epoch": 1.0026382026285063, + "grad_norm": 126.26376342773438, + "learning_rate": 1.632124441328098e-09, + "loss": 16.3367, + "step": 496340 + }, + { + "epoch": 1.00265840326119, + "grad_norm": 622.173095703125, + "learning_rate": 1.6232183945075197e-09, + "loss": 20.4622, + "step": 496350 + }, + { + "epoch": 1.002678603893874, + "grad_norm": 404.68792724609375, + "learning_rate": 1.6143367091686624e-09, + "loss": 17.3768, + "step": 496360 + }, + { + "epoch": 1.0026988045265577, + "grad_norm": 523.90869140625, + "learning_rate": 1.6054793853553797e-09, + "loss": 21.2731, + "step": 496370 + }, + { + "epoch": 1.0027190051592416, + "grad_norm": 419.3081970214844, + "learning_rate": 1.59664642311097e-09, + "loss": 19.0114, + "step": 496380 + }, + { + "epoch": 1.0027392057919253, + "grad_norm": 345.4902648925781, + "learning_rate": 1.5878378224781777e-09, + "loss": 8.9369, + "step": 496390 + }, + { + "epoch": 1.0027594064246093, + "grad_norm": 372.1741943359375, + "learning_rate": 1.5790535835003006e-09, + "loss": 10.9754, + "step": 496400 + }, + { + "epoch": 1.002779607057293, + "grad_norm": 194.96832275390625, + "learning_rate": 1.570293706219528e-09, + "loss": 5.5389, + "step": 496410 + }, + { + "epoch": 1.002799807689977, + "grad_norm": 182.9541473388672, + "learning_rate": 1.5615581906791576e-09, + "loss": 7.1093, + "step": 496420 + }, + { + "epoch": 1.0028200083226606, + "grad_norm": 239.05899047851562, + "learning_rate": 1.5528470369208238e-09, + "loss": 20.723, + "step": 496430 + }, + { + "epoch": 1.0028402089553445, + "grad_norm": 1060.20361328125, + "learning_rate": 1.5441602449883797e-09, + "loss": 15.2947, + "step": 496440 + }, + { + "epoch": 1.0028604095880282, + "grad_norm": 0.0, + "learning_rate": 1.535497814923459e-09, + "loss": 8.3481, + "step": 496450 + }, + { + "epoch": 1.0028806102207122, + "grad_norm": 10.102018356323242, + "learning_rate": 1.52685974676825e-09, + "loss": 21.2022, + "step": 496460 + }, + { + "epoch": 1.0029008108533959, + "grad_norm": 339.6944885253906, + "learning_rate": 1.518246040564386e-09, + "loss": 10.4844, + "step": 496470 + }, + { + "epoch": 1.0029210114860798, + "grad_norm": 1862.4864501953125, + "learning_rate": 1.509656696354611e-09, + "loss": 29.4656, + "step": 496480 + }, + { + "epoch": 1.0029412121187635, + "grad_norm": 468.07958984375, + "learning_rate": 1.5010917141811132e-09, + "loss": 25.402, + "step": 496490 + }, + { + "epoch": 1.0029614127514475, + "grad_norm": 325.1629943847656, + "learning_rate": 1.4925510940844157e-09, + "loss": 13.7435, + "step": 496500 + }, + { + "epoch": 1.0029816133841312, + "grad_norm": 661.4666137695312, + "learning_rate": 1.4840348361067069e-09, + "loss": 30.6057, + "step": 496510 + }, + { + "epoch": 1.003001814016815, + "grad_norm": 336.5476379394531, + "learning_rate": 1.4755429402901755e-09, + "loss": 10.5586, + "step": 496520 + }, + { + "epoch": 1.0030220146494988, + "grad_norm": 724.3787841796875, + "learning_rate": 1.4670754066747895e-09, + "loss": 11.5721, + "step": 496530 + }, + { + "epoch": 1.0030422152821827, + "grad_norm": 546.8881225585938, + "learning_rate": 1.4586322353032923e-09, + "loss": 14.4762, + "step": 496540 + }, + { + "epoch": 1.0030624159148664, + "grad_norm": 330.6160583496094, + "learning_rate": 1.4502134262156519e-09, + "loss": 11.2739, + "step": 496550 + }, + { + "epoch": 1.0030826165475504, + "grad_norm": 6.230769157409668, + "learning_rate": 1.4418189794540572e-09, + "loss": 11.9299, + "step": 496560 + }, + { + "epoch": 1.003102817180234, + "grad_norm": 172.46426391601562, + "learning_rate": 1.4334488950579206e-09, + "loss": 14.5759, + "step": 496570 + }, + { + "epoch": 1.0031230178129178, + "grad_norm": 20.506919860839844, + "learning_rate": 1.425103173069986e-09, + "loss": 11.4367, + "step": 496580 + }, + { + "epoch": 1.0031432184456017, + "grad_norm": 417.68963623046875, + "learning_rate": 1.4167818135291112e-09, + "loss": 13.0775, + "step": 496590 + }, + { + "epoch": 1.0031634190782854, + "grad_norm": 24.981063842773438, + "learning_rate": 1.4084848164763742e-09, + "loss": 12.4334, + "step": 496600 + }, + { + "epoch": 1.0031836197109694, + "grad_norm": 6.88071346282959, + "learning_rate": 1.4002121819528535e-09, + "loss": 15.9379, + "step": 496610 + }, + { + "epoch": 1.003203820343653, + "grad_norm": 431.84326171875, + "learning_rate": 1.3919639099985171e-09, + "loss": 23.2785, + "step": 496620 + }, + { + "epoch": 1.003224020976337, + "grad_norm": 85.91773986816406, + "learning_rate": 1.3837400006533331e-09, + "loss": 17.4635, + "step": 496630 + }, + { + "epoch": 1.0032442216090207, + "grad_norm": 304.8151550292969, + "learning_rate": 1.3755404539572692e-09, + "loss": 14.6721, + "step": 496640 + }, + { + "epoch": 1.0032644222417046, + "grad_norm": 359.48175048828125, + "learning_rate": 1.3673652699508487e-09, + "loss": 10.4681, + "step": 496650 + }, + { + "epoch": 1.0032846228743884, + "grad_norm": 236.9304656982422, + "learning_rate": 1.3592144486740399e-09, + "loss": 19.3031, + "step": 496660 + }, + { + "epoch": 1.0033048235070723, + "grad_norm": 16.39584732055664, + "learning_rate": 1.3510879901657003e-09, + "loss": 5.941, + "step": 496670 + }, + { + "epoch": 1.003325024139756, + "grad_norm": 309.649169921875, + "learning_rate": 1.342985894465798e-09, + "loss": 13.6341, + "step": 496680 + }, + { + "epoch": 1.00334522477244, + "grad_norm": 94.05716705322266, + "learning_rate": 1.3349081616143012e-09, + "loss": 21.2653, + "step": 496690 + }, + { + "epoch": 1.0033654254051236, + "grad_norm": 533.8690795898438, + "learning_rate": 1.3268547916495124e-09, + "loss": 9.9349, + "step": 496700 + }, + { + "epoch": 1.0033856260378076, + "grad_norm": 14.093035697937012, + "learning_rate": 1.3188257846119545e-09, + "loss": 12.0922, + "step": 496710 + }, + { + "epoch": 1.0034058266704913, + "grad_norm": 20.148250579833984, + "learning_rate": 1.3108211405399307e-09, + "loss": 9.9139, + "step": 496720 + }, + { + "epoch": 1.0034260273031752, + "grad_norm": 665.7136840820312, + "learning_rate": 1.3028408594728536e-09, + "loss": 15.5665, + "step": 496730 + }, + { + "epoch": 1.003446227935859, + "grad_norm": 220.5654296875, + "learning_rate": 1.2948849414495811e-09, + "loss": 14.6036, + "step": 496740 + }, + { + "epoch": 1.0034664285685428, + "grad_norm": 269.35601806640625, + "learning_rate": 1.286953386508416e-09, + "loss": 25.4018, + "step": 496750 + }, + { + "epoch": 1.0034866292012266, + "grad_norm": 287.6634521484375, + "learning_rate": 1.2790461946887712e-09, + "loss": 23.9352, + "step": 496760 + }, + { + "epoch": 1.0035068298339105, + "grad_norm": 195.8697509765625, + "learning_rate": 1.271163366028394e-09, + "loss": 24.3558, + "step": 496770 + }, + { + "epoch": 1.0035270304665942, + "grad_norm": 360.56964111328125, + "learning_rate": 1.2633049005661423e-09, + "loss": 6.6644, + "step": 496780 + }, + { + "epoch": 1.0035472310992781, + "grad_norm": 355.21771240234375, + "learning_rate": 1.2554707983403192e-09, + "loss": 18.4967, + "step": 496790 + }, + { + "epoch": 1.0035674317319618, + "grad_norm": 90.62782287597656, + "learning_rate": 1.247661059389227e-09, + "loss": 17.228, + "step": 496800 + }, + { + "epoch": 1.0035876323646458, + "grad_norm": 209.0258331298828, + "learning_rate": 1.2398756837506131e-09, + "loss": 15.8311, + "step": 496810 + }, + { + "epoch": 1.0036078329973295, + "grad_norm": 381.7867126464844, + "learning_rate": 1.2321146714627807e-09, + "loss": 8.8757, + "step": 496820 + }, + { + "epoch": 1.0036280336300132, + "grad_norm": 236.79905700683594, + "learning_rate": 1.224378022562922e-09, + "loss": 16.2411, + "step": 496830 + }, + { + "epoch": 1.0036482342626971, + "grad_norm": 210.00741577148438, + "learning_rate": 1.2166657370898948e-09, + "loss": 14.6875, + "step": 496840 + }, + { + "epoch": 1.0036684348953808, + "grad_norm": 392.40838623046875, + "learning_rate": 1.2089778150797816e-09, + "loss": 20.7608, + "step": 496850 + }, + { + "epoch": 1.0036886355280648, + "grad_norm": 65.0674819946289, + "learning_rate": 1.2013142565708845e-09, + "loss": 15.7522, + "step": 496860 + }, + { + "epoch": 1.0037088361607485, + "grad_norm": 382.4653625488281, + "learning_rate": 1.193675061600952e-09, + "loss": 15.6798, + "step": 496870 + }, + { + "epoch": 1.0037290367934324, + "grad_norm": 442.8677978515625, + "learning_rate": 1.1860602302066203e-09, + "loss": 26.6604, + "step": 496880 + }, + { + "epoch": 1.003749237426116, + "grad_norm": 1905.3214111328125, + "learning_rate": 1.178469762425083e-09, + "loss": 11.8643, + "step": 496890 + }, + { + "epoch": 1.0037694380588, + "grad_norm": 541.8901977539062, + "learning_rate": 1.170903658293532e-09, + "loss": 10.6899, + "step": 496900 + }, + { + "epoch": 1.0037896386914837, + "grad_norm": 480.2037353515625, + "learning_rate": 1.1633619178486044e-09, + "loss": 13.7109, + "step": 496910 + }, + { + "epoch": 1.0038098393241677, + "grad_norm": 348.5443115234375, + "learning_rate": 1.155844541126938e-09, + "loss": 23.0337, + "step": 496920 + }, + { + "epoch": 1.0038300399568514, + "grad_norm": 276.6700744628906, + "learning_rate": 1.1483515281657254e-09, + "loss": 14.6272, + "step": 496930 + }, + { + "epoch": 1.0038502405895353, + "grad_norm": 170.61239624023438, + "learning_rate": 1.1408828790010484e-09, + "loss": 10.445, + "step": 496940 + }, + { + "epoch": 1.003870441222219, + "grad_norm": 44.20932388305664, + "learning_rate": 1.1334385936695447e-09, + "loss": 20.9744, + "step": 496950 + }, + { + "epoch": 1.003890641854903, + "grad_norm": 435.5612487792969, + "learning_rate": 1.1260186722067411e-09, + "loss": 20.1963, + "step": 496960 + }, + { + "epoch": 1.0039108424875867, + "grad_norm": 257.3517150878906, + "learning_rate": 1.1186231146503856e-09, + "loss": 24.552, + "step": 496970 + }, + { + "epoch": 1.0039310431202706, + "grad_norm": 536.3111572265625, + "learning_rate": 1.111251921034895e-09, + "loss": 29.9063, + "step": 496980 + }, + { + "epoch": 1.0039512437529543, + "grad_norm": 302.0859375, + "learning_rate": 1.1039050913969062e-09, + "loss": 12.5154, + "step": 496990 + }, + { + "epoch": 1.0039714443856382, + "grad_norm": 840.5151977539062, + "learning_rate": 1.096582625772502e-09, + "loss": 29.2025, + "step": 497000 + }, + { + "epoch": 1.003991645018322, + "grad_norm": 393.2088623046875, + "learning_rate": 1.0892845241972094e-09, + "loss": 14.4391, + "step": 497010 + }, + { + "epoch": 1.0040118456510059, + "grad_norm": 262.2140197753906, + "learning_rate": 1.0820107867060004e-09, + "loss": 19.033, + "step": 497020 + }, + { + "epoch": 1.0040320462836896, + "grad_norm": 135.4094696044922, + "learning_rate": 1.074761413334957e-09, + "loss": 32.1915, + "step": 497030 + }, + { + "epoch": 1.0040522469163735, + "grad_norm": 378.78863525390625, + "learning_rate": 1.0675364041190516e-09, + "loss": 13.1999, + "step": 497040 + }, + { + "epoch": 1.0040724475490572, + "grad_norm": 364.27679443359375, + "learning_rate": 1.0603357590938112e-09, + "loss": 7.9666, + "step": 497050 + }, + { + "epoch": 1.0040926481817412, + "grad_norm": 273.89202880859375, + "learning_rate": 1.0531594782942079e-09, + "loss": 14.7918, + "step": 497060 + }, + { + "epoch": 1.0041128488144249, + "grad_norm": 620.6331787109375, + "learning_rate": 1.0460075617552134e-09, + "loss": 18.9597, + "step": 497070 + }, + { + "epoch": 1.0041330494471086, + "grad_norm": 967.349365234375, + "learning_rate": 1.0388800095118002e-09, + "loss": 16.9013, + "step": 497080 + }, + { + "epoch": 1.0041532500797925, + "grad_norm": 378.5223693847656, + "learning_rate": 1.0317768215983847e-09, + "loss": 11.8875, + "step": 497090 + }, + { + "epoch": 1.0041734507124762, + "grad_norm": 255.29290771484375, + "learning_rate": 1.0246979980499395e-09, + "loss": 21.5511, + "step": 497100 + }, + { + "epoch": 1.0041936513451601, + "grad_norm": 152.40760803222656, + "learning_rate": 1.017643538900881e-09, + "loss": 8.6306, + "step": 497110 + }, + { + "epoch": 1.0042138519778439, + "grad_norm": 301.55950927734375, + "learning_rate": 1.0106134441850712e-09, + "loss": 10.1663, + "step": 497120 + }, + { + "epoch": 1.0042340526105278, + "grad_norm": 366.7298583984375, + "learning_rate": 1.0036077139380373e-09, + "loss": 9.4549, + "step": 497130 + }, + { + "epoch": 1.0042542532432115, + "grad_norm": 277.9081726074219, + "learning_rate": 9.96626348192531e-10, + "loss": 44.1801, + "step": 497140 + }, + { + "epoch": 1.0042744538758954, + "grad_norm": 167.16905212402344, + "learning_rate": 9.896693469829689e-10, + "loss": 25.4072, + "step": 497150 + }, + { + "epoch": 1.0042946545085791, + "grad_norm": 246.45228576660156, + "learning_rate": 9.827367103437679e-10, + "loss": 15.8333, + "step": 497160 + }, + { + "epoch": 1.004314855141263, + "grad_norm": 320.1998291015625, + "learning_rate": 9.758284383082351e-10, + "loss": 15.7299, + "step": 497170 + }, + { + "epoch": 1.0043350557739468, + "grad_norm": 255.04876708984375, + "learning_rate": 9.68944530910787e-10, + "loss": 21.4589, + "step": 497180 + }, + { + "epoch": 1.0043552564066307, + "grad_norm": 858.8927612304688, + "learning_rate": 9.620849881836203e-10, + "loss": 11.0519, + "step": 497190 + }, + { + "epoch": 1.0043754570393144, + "grad_norm": 290.14703369140625, + "learning_rate": 9.55249810161152e-10, + "loss": 16.7142, + "step": 497200 + }, + { + "epoch": 1.0043956576719983, + "grad_norm": 79.18550872802734, + "learning_rate": 9.484389968766882e-10, + "loss": 14.6362, + "step": 497210 + }, + { + "epoch": 1.004415858304682, + "grad_norm": 2.3829500675201416, + "learning_rate": 9.416525483635364e-10, + "loss": 11.5494, + "step": 497220 + }, + { + "epoch": 1.004436058937366, + "grad_norm": 472.21539306640625, + "learning_rate": 9.348904646538925e-10, + "loss": 20.2588, + "step": 497230 + }, + { + "epoch": 1.0044562595700497, + "grad_norm": 422.0057678222656, + "learning_rate": 9.281527457816186e-10, + "loss": 15.2321, + "step": 497240 + }, + { + "epoch": 1.0044764602027336, + "grad_norm": 235.6026153564453, + "learning_rate": 9.214393917789111e-10, + "loss": 14.2804, + "step": 497250 + }, + { + "epoch": 1.0044966608354173, + "grad_norm": 138.6964569091797, + "learning_rate": 9.147504026790766e-10, + "loss": 7.2866, + "step": 497260 + }, + { + "epoch": 1.0045168614681013, + "grad_norm": 85.42167663574219, + "learning_rate": 9.080857785137564e-10, + "loss": 16.7213, + "step": 497270 + }, + { + "epoch": 1.004537062100785, + "grad_norm": 331.20611572265625, + "learning_rate": 9.014455193168125e-10, + "loss": 7.9069, + "step": 497280 + }, + { + "epoch": 1.004557262733469, + "grad_norm": 794.2719116210938, + "learning_rate": 8.948296251198863e-10, + "loss": 21.0431, + "step": 497290 + }, + { + "epoch": 1.0045774633661526, + "grad_norm": 445.93719482421875, + "learning_rate": 8.88238095955174e-10, + "loss": 17.8171, + "step": 497300 + }, + { + "epoch": 1.0045976639988365, + "grad_norm": 862.58984375, + "learning_rate": 8.816709318543171e-10, + "loss": 25.3989, + "step": 497310 + }, + { + "epoch": 1.0046178646315203, + "grad_norm": 626.967041015625, + "learning_rate": 8.751281328506223e-10, + "loss": 20.0826, + "step": 497320 + }, + { + "epoch": 1.0046380652642042, + "grad_norm": 348.0452575683594, + "learning_rate": 8.686096989751758e-10, + "loss": 17.8535, + "step": 497330 + }, + { + "epoch": 1.004658265896888, + "grad_norm": 500.5772705078125, + "learning_rate": 8.621156302590639e-10, + "loss": 10.7476, + "step": 497340 + }, + { + "epoch": 1.0046784665295716, + "grad_norm": 139.4596710205078, + "learning_rate": 8.556459267355932e-10, + "loss": 10.1938, + "step": 497350 + }, + { + "epoch": 1.0046986671622555, + "grad_norm": 663.2300415039062, + "learning_rate": 8.492005884347398e-10, + "loss": 27.1731, + "step": 497360 + }, + { + "epoch": 1.0047188677949392, + "grad_norm": 286.3394775390625, + "learning_rate": 8.427796153887002e-10, + "loss": 19.3634, + "step": 497370 + }, + { + "epoch": 1.0047390684276232, + "grad_norm": 329.2220153808594, + "learning_rate": 8.363830076285606e-10, + "loss": 8.8834, + "step": 497380 + }, + { + "epoch": 1.0047592690603069, + "grad_norm": 248.72828674316406, + "learning_rate": 8.300107651859623e-10, + "loss": 21.1644, + "step": 497390 + }, + { + "epoch": 1.0047794696929908, + "grad_norm": 326.47314453125, + "learning_rate": 8.236628880914365e-10, + "loss": 17.0379, + "step": 497400 + }, + { + "epoch": 1.0047996703256745, + "grad_norm": 615.4883422851562, + "learning_rate": 8.173393763760695e-10, + "loss": 24.0844, + "step": 497410 + }, + { + "epoch": 1.0048198709583585, + "grad_norm": 468.8843688964844, + "learning_rate": 8.110402300703924e-10, + "loss": 22.9501, + "step": 497420 + }, + { + "epoch": 1.0048400715910422, + "grad_norm": 204.00584411621094, + "learning_rate": 8.047654492054913e-10, + "loss": 6.4912, + "step": 497430 + }, + { + "epoch": 1.004860272223726, + "grad_norm": 430.0341796875, + "learning_rate": 7.985150338118974e-10, + "loss": 19.2455, + "step": 497440 + }, + { + "epoch": 1.0048804728564098, + "grad_norm": 214.84359741210938, + "learning_rate": 7.92288983920142e-10, + "loss": 20.7306, + "step": 497450 + }, + { + "epoch": 1.0049006734890937, + "grad_norm": 365.97454833984375, + "learning_rate": 7.860872995602009e-10, + "loss": 11.9094, + "step": 497460 + }, + { + "epoch": 1.0049208741217774, + "grad_norm": 416.4472351074219, + "learning_rate": 7.799099807626054e-10, + "loss": 12.0364, + "step": 497470 + }, + { + "epoch": 1.0049410747544614, + "grad_norm": 1460.086669921875, + "learning_rate": 7.737570275573314e-10, + "loss": 19.3271, + "step": 497480 + }, + { + "epoch": 1.004961275387145, + "grad_norm": 724.4830322265625, + "learning_rate": 7.67628439974355e-10, + "loss": 14.0878, + "step": 497490 + }, + { + "epoch": 1.004981476019829, + "grad_norm": 168.28736877441406, + "learning_rate": 7.615242180436521e-10, + "loss": 10.0536, + "step": 497500 + }, + { + "epoch": 1.0050016766525127, + "grad_norm": 419.37451171875, + "learning_rate": 7.55444361795199e-10, + "loss": 11.5683, + "step": 497510 + }, + { + "epoch": 1.0050218772851967, + "grad_norm": 365.8724060058594, + "learning_rate": 7.493888712584163e-10, + "loss": 10.9492, + "step": 497520 + }, + { + "epoch": 1.0050420779178804, + "grad_norm": 379.0532531738281, + "learning_rate": 7.433577464621699e-10, + "loss": 17.9451, + "step": 497530 + }, + { + "epoch": 1.0050622785505643, + "grad_norm": 64.69229888916016, + "learning_rate": 7.373509874369911e-10, + "loss": 17.502, + "step": 497540 + }, + { + "epoch": 1.005082479183248, + "grad_norm": 791.7032470703125, + "learning_rate": 7.313685942117454e-10, + "loss": 18.4521, + "step": 497550 + }, + { + "epoch": 1.005102679815932, + "grad_norm": 498.0197448730469, + "learning_rate": 7.254105668152988e-10, + "loss": 26.5022, + "step": 497560 + }, + { + "epoch": 1.0051228804486156, + "grad_norm": 162.7668914794922, + "learning_rate": 7.194769052765171e-10, + "loss": 10.4517, + "step": 497570 + }, + { + "epoch": 1.0051430810812996, + "grad_norm": 347.5716857910156, + "learning_rate": 7.135676096253763e-10, + "loss": 21.9197, + "step": 497580 + }, + { + "epoch": 1.0051632817139833, + "grad_norm": 225.25503540039062, + "learning_rate": 7.076826798890768e-10, + "loss": 14.4115, + "step": 497590 + }, + { + "epoch": 1.005183482346667, + "grad_norm": 222.23606872558594, + "learning_rate": 7.018221160981498e-10, + "loss": 10.2202, + "step": 497600 + }, + { + "epoch": 1.005203682979351, + "grad_norm": 390.8809814453125, + "learning_rate": 6.959859182792406e-10, + "loss": 11.5496, + "step": 497610 + }, + { + "epoch": 1.0052238836120346, + "grad_norm": 0.0, + "learning_rate": 6.901740864623252e-10, + "loss": 14.3404, + "step": 497620 + }, + { + "epoch": 1.0052440842447186, + "grad_norm": 217.84158325195312, + "learning_rate": 6.843866206751593e-10, + "loss": 6.5539, + "step": 497630 + }, + { + "epoch": 1.0052642848774023, + "grad_norm": 186.2528076171875, + "learning_rate": 6.786235209460534e-10, + "loss": 20.873, + "step": 497640 + }, + { + "epoch": 1.0052844855100862, + "grad_norm": 452.13299560546875, + "learning_rate": 6.728847873027633e-10, + "loss": 18.4681, + "step": 497650 + }, + { + "epoch": 1.00530468614277, + "grad_norm": 740.79296875, + "learning_rate": 6.671704197735995e-10, + "loss": 17.4599, + "step": 497660 + }, + { + "epoch": 1.0053248867754538, + "grad_norm": 409.2422180175781, + "learning_rate": 6.614804183857626e-10, + "loss": 19.8806, + "step": 497670 + }, + { + "epoch": 1.0053450874081375, + "grad_norm": 237.03514099121094, + "learning_rate": 6.558147831681183e-10, + "loss": 11.4689, + "step": 497680 + }, + { + "epoch": 1.0053652880408215, + "grad_norm": 358.61102294921875, + "learning_rate": 6.501735141478672e-10, + "loss": 23.0816, + "step": 497690 + }, + { + "epoch": 1.0053854886735052, + "grad_norm": 44.100135803222656, + "learning_rate": 6.445566113516544e-10, + "loss": 15.8719, + "step": 497700 + }, + { + "epoch": 1.0054056893061891, + "grad_norm": 798.8607788085938, + "learning_rate": 6.389640748077907e-10, + "loss": 17.1606, + "step": 497710 + }, + { + "epoch": 1.0054258899388728, + "grad_norm": 247.9388885498047, + "learning_rate": 6.333959045434768e-10, + "loss": 8.721, + "step": 497720 + }, + { + "epoch": 1.0054460905715568, + "grad_norm": 454.4766540527344, + "learning_rate": 6.278521005853578e-10, + "loss": 11.72, + "step": 497730 + }, + { + "epoch": 1.0054662912042405, + "grad_norm": 138.5755157470703, + "learning_rate": 6.223326629611893e-10, + "loss": 12.6029, + "step": 497740 + }, + { + "epoch": 1.0054864918369244, + "grad_norm": 273.68939208984375, + "learning_rate": 6.168375916970615e-10, + "loss": 11.2898, + "step": 497750 + }, + { + "epoch": 1.005506692469608, + "grad_norm": 74.56575775146484, + "learning_rate": 6.11366886820175e-10, + "loss": 16.079, + "step": 497760 + }, + { + "epoch": 1.005526893102292, + "grad_norm": 563.3392333984375, + "learning_rate": 6.05920548357175e-10, + "loss": 20.3108, + "step": 497770 + }, + { + "epoch": 1.0055470937349757, + "grad_norm": 344.8863220214844, + "learning_rate": 6.00498576334152e-10, + "loss": 21.3803, + "step": 497780 + }, + { + "epoch": 1.0055672943676597, + "grad_norm": 591.0133666992188, + "learning_rate": 5.951009707783062e-10, + "loss": 15.0725, + "step": 497790 + }, + { + "epoch": 1.0055874950003434, + "grad_norm": 220.0905303955078, + "learning_rate": 5.897277317157279e-10, + "loss": 19.6494, + "step": 497800 + }, + { + "epoch": 1.0056076956330273, + "grad_norm": 382.07415771484375, + "learning_rate": 5.843788591725074e-10, + "loss": 17.0656, + "step": 497810 + }, + { + "epoch": 1.005627896265711, + "grad_norm": 285.0874938964844, + "learning_rate": 5.790543531741799e-10, + "loss": 15.2152, + "step": 497820 + }, + { + "epoch": 1.005648096898395, + "grad_norm": 224.52911376953125, + "learning_rate": 5.737542137479457e-10, + "loss": 32.5519, + "step": 497830 + }, + { + "epoch": 1.0056682975310787, + "grad_norm": 124.53401947021484, + "learning_rate": 5.684784409182298e-10, + "loss": 11.5611, + "step": 497840 + }, + { + "epoch": 1.0056884981637624, + "grad_norm": 38.93964767456055, + "learning_rate": 5.632270347116775e-10, + "loss": 9.7367, + "step": 497850 + }, + { + "epoch": 1.0057086987964463, + "grad_norm": 280.2174987792969, + "learning_rate": 5.579999951532688e-10, + "loss": 22.2611, + "step": 497860 + }, + { + "epoch": 1.00572889942913, + "grad_norm": 653.9365844726562, + "learning_rate": 5.527973222690941e-10, + "loss": 23.448, + "step": 497870 + }, + { + "epoch": 1.005749100061814, + "grad_norm": 411.93145751953125, + "learning_rate": 5.476190160841333e-10, + "loss": 18.0152, + "step": 497880 + }, + { + "epoch": 1.0057693006944977, + "grad_norm": 292.06671142578125, + "learning_rate": 5.424650766239215e-10, + "loss": 24.901, + "step": 497890 + }, + { + "epoch": 1.0057895013271816, + "grad_norm": 1511.5247802734375, + "learning_rate": 5.373355039128836e-10, + "loss": 25.5441, + "step": 497900 + }, + { + "epoch": 1.0058097019598653, + "grad_norm": 163.15565490722656, + "learning_rate": 5.322302979771099e-10, + "loss": 13.7009, + "step": 497910 + }, + { + "epoch": 1.0058299025925492, + "grad_norm": 459.66351318359375, + "learning_rate": 5.271494588404702e-10, + "loss": 16.1014, + "step": 497920 + }, + { + "epoch": 1.005850103225233, + "grad_norm": 14.534080505371094, + "learning_rate": 5.220929865284996e-10, + "loss": 12.6089, + "step": 497930 + }, + { + "epoch": 1.0058703038579169, + "grad_norm": 387.5732727050781, + "learning_rate": 5.170608810650679e-10, + "loss": 22.0805, + "step": 497940 + }, + { + "epoch": 1.0058905044906006, + "grad_norm": 884.1080322265625, + "learning_rate": 5.120531424751551e-10, + "loss": 13.3409, + "step": 497950 + }, + { + "epoch": 1.0059107051232845, + "grad_norm": 547.0142822265625, + "learning_rate": 5.070697707837413e-10, + "loss": 39.905, + "step": 497960 + }, + { + "epoch": 1.0059309057559682, + "grad_norm": 529.3761596679688, + "learning_rate": 5.02110766013586e-10, + "loss": 14.4093, + "step": 497970 + }, + { + "epoch": 1.0059511063886521, + "grad_norm": 484.35809326171875, + "learning_rate": 4.971761281907795e-10, + "loss": 15.3145, + "step": 497980 + }, + { + "epoch": 1.0059713070213359, + "grad_norm": 1.8353664875030518, + "learning_rate": 4.922658573375261e-10, + "loss": 13.9594, + "step": 497990 + }, + { + "epoch": 1.0059915076540198, + "grad_norm": 90.44378662109375, + "learning_rate": 4.87379953478806e-10, + "loss": 14.71, + "step": 498000 + }, + { + "epoch": 1.0060117082867035, + "grad_norm": 336.6805114746094, + "learning_rate": 4.825184166384888e-10, + "loss": 27.1868, + "step": 498010 + }, + { + "epoch": 1.0060319089193874, + "grad_norm": 479.4397888183594, + "learning_rate": 4.776812468398895e-10, + "loss": 9.6992, + "step": 498020 + }, + { + "epoch": 1.0060521095520711, + "grad_norm": 310.9299011230469, + "learning_rate": 4.728684441068776e-10, + "loss": 18.8371, + "step": 498030 + }, + { + "epoch": 1.006072310184755, + "grad_norm": 168.52430725097656, + "learning_rate": 4.680800084622128e-10, + "loss": 17.1496, + "step": 498040 + }, + { + "epoch": 1.0060925108174388, + "grad_norm": 8.164981842041016, + "learning_rate": 4.6331593993032e-10, + "loss": 11.4686, + "step": 498050 + }, + { + "epoch": 1.0061127114501227, + "grad_norm": 98.24127197265625, + "learning_rate": 4.585762385334036e-10, + "loss": 4.2927, + "step": 498060 + }, + { + "epoch": 1.0061329120828064, + "grad_norm": 165.088623046875, + "learning_rate": 4.538609042953335e-10, + "loss": 15.4089, + "step": 498070 + }, + { + "epoch": 1.0061531127154903, + "grad_norm": 589.4951782226562, + "learning_rate": 4.49169937238314e-10, + "loss": 12.651, + "step": 498080 + }, + { + "epoch": 1.006173313348174, + "grad_norm": 113.78429412841797, + "learning_rate": 4.445033373862151e-10, + "loss": 14.0095, + "step": 498090 + }, + { + "epoch": 1.006193513980858, + "grad_norm": 479.8966064453125, + "learning_rate": 4.398611047612411e-10, + "loss": 17.6846, + "step": 498100 + }, + { + "epoch": 1.0062137146135417, + "grad_norm": 600.0286254882812, + "learning_rate": 4.3524323938559655e-10, + "loss": 16.7164, + "step": 498110 + }, + { + "epoch": 1.0062339152462254, + "grad_norm": 245.16664123535156, + "learning_rate": 4.3064974128259605e-10, + "loss": 11.4906, + "step": 498120 + }, + { + "epoch": 1.0062541158789093, + "grad_norm": 233.82284545898438, + "learning_rate": 4.2608061047388905e-10, + "loss": 18.9105, + "step": 498130 + }, + { + "epoch": 1.006274316511593, + "grad_norm": 74.9719009399414, + "learning_rate": 4.21535846982235e-10, + "loss": 11.5325, + "step": 498140 + }, + { + "epoch": 1.006294517144277, + "grad_norm": 379.4974365234375, + "learning_rate": 4.1701545082928343e-10, + "loss": 26.145, + "step": 498150 + }, + { + "epoch": 1.0063147177769607, + "grad_norm": 402.6711120605469, + "learning_rate": 4.125194220377937e-10, + "loss": 18.0747, + "step": 498160 + }, + { + "epoch": 1.0063349184096446, + "grad_norm": 253.51841735839844, + "learning_rate": 4.0804776062941533e-10, + "loss": 5.7595, + "step": 498170 + }, + { + "epoch": 1.0063551190423283, + "grad_norm": 314.94598388671875, + "learning_rate": 4.0360046662579753e-10, + "loss": 13.1086, + "step": 498180 + }, + { + "epoch": 1.0063753196750123, + "grad_norm": 128.70509338378906, + "learning_rate": 3.991775400485898e-10, + "loss": 19.3338, + "step": 498190 + }, + { + "epoch": 1.006395520307696, + "grad_norm": 373.6906433105469, + "learning_rate": 3.9477898091944135e-10, + "loss": 24.4669, + "step": 498200 + }, + { + "epoch": 1.00641572094038, + "grad_norm": 681.62451171875, + "learning_rate": 3.9040478925944645e-10, + "loss": 32.6684, + "step": 498210 + }, + { + "epoch": 1.0064359215730636, + "grad_norm": 95.27474975585938, + "learning_rate": 3.8605496509080966e-10, + "loss": 11.4944, + "step": 498220 + }, + { + "epoch": 1.0064561222057475, + "grad_norm": 506.1835021972656, + "learning_rate": 3.8172950843351485e-10, + "loss": 26.6546, + "step": 498230 + }, + { + "epoch": 1.0064763228384312, + "grad_norm": 118.24510192871094, + "learning_rate": 3.774284193097666e-10, + "loss": 8.1574, + "step": 498240 + }, + { + "epoch": 1.0064965234711152, + "grad_norm": 354.12835693359375, + "learning_rate": 3.7315169774010397e-10, + "loss": 9.9255, + "step": 498250 + }, + { + "epoch": 1.0065167241037989, + "grad_norm": 302.7308654785156, + "learning_rate": 3.6889934374506606e-10, + "loss": 29.9269, + "step": 498260 + }, + { + "epoch": 1.0065369247364828, + "grad_norm": 262.63055419921875, + "learning_rate": 3.646713573457472e-10, + "loss": 17.3862, + "step": 498270 + }, + { + "epoch": 1.0065571253691665, + "grad_norm": 368.1761474609375, + "learning_rate": 3.604677385626865e-10, + "loss": 13.9558, + "step": 498280 + }, + { + "epoch": 1.0065773260018505, + "grad_norm": 139.5403289794922, + "learning_rate": 3.562884874158679e-10, + "loss": 14.0402, + "step": 498290 + }, + { + "epoch": 1.0065975266345342, + "grad_norm": 166.00411987304688, + "learning_rate": 3.521336039263856e-10, + "loss": 4.6587, + "step": 498300 + }, + { + "epoch": 1.006617727267218, + "grad_norm": 315.66888427734375, + "learning_rate": 3.480030881147789e-10, + "loss": 11.2647, + "step": 498310 + }, + { + "epoch": 1.0066379278999018, + "grad_norm": 592.1396484375, + "learning_rate": 3.4389693999992146e-10, + "loss": 17.5871, + "step": 498320 + }, + { + "epoch": 1.0066581285325857, + "grad_norm": 167.7822265625, + "learning_rate": 3.3981515960290757e-10, + "loss": 14.7794, + "step": 498330 + }, + { + "epoch": 1.0066783291652694, + "grad_norm": 191.7164306640625, + "learning_rate": 3.357577469431661e-10, + "loss": 16.5493, + "step": 498340 + }, + { + "epoch": 1.0066985297979534, + "grad_norm": 135.2027587890625, + "learning_rate": 3.3172470204012597e-10, + "loss": 14.2077, + "step": 498350 + }, + { + "epoch": 1.006718730430637, + "grad_norm": 565.5507202148438, + "learning_rate": 3.277160249143263e-10, + "loss": 30.6504, + "step": 498360 + }, + { + "epoch": 1.0067389310633208, + "grad_norm": 243.60830688476562, + "learning_rate": 3.237317155846409e-10, + "loss": 13.5072, + "step": 498370 + }, + { + "epoch": 1.0067591316960047, + "grad_norm": 880.7012329101562, + "learning_rate": 3.1977177407105376e-10, + "loss": 13.7841, + "step": 498380 + }, + { + "epoch": 1.0067793323286884, + "grad_norm": 417.2605895996094, + "learning_rate": 3.158362003918836e-10, + "loss": 10.2344, + "step": 498390 + }, + { + "epoch": 1.0067995329613724, + "grad_norm": 372.4492492675781, + "learning_rate": 3.1192499456766947e-10, + "loss": 17.3512, + "step": 498400 + }, + { + "epoch": 1.006819733594056, + "grad_norm": 58.83364486694336, + "learning_rate": 3.0803815661617495e-10, + "loss": 11.6652, + "step": 498410 + }, + { + "epoch": 1.00683993422674, + "grad_norm": 2088.984375, + "learning_rate": 3.0417568655738416e-10, + "loss": 14.8833, + "step": 498420 + }, + { + "epoch": 1.0068601348594237, + "grad_norm": 418.00531005859375, + "learning_rate": 3.003375844090606e-10, + "loss": 19.6728, + "step": 498430 + }, + { + "epoch": 1.0068803354921076, + "grad_norm": 490.3150634765625, + "learning_rate": 2.9652385019118823e-10, + "loss": 18.3427, + "step": 498440 + }, + { + "epoch": 1.0069005361247914, + "grad_norm": 13.315130233764648, + "learning_rate": 2.9273448392097557e-10, + "loss": 17.9149, + "step": 498450 + }, + { + "epoch": 1.0069207367574753, + "grad_norm": 384.3954772949219, + "learning_rate": 2.8896948561785156e-10, + "loss": 22.0337, + "step": 498460 + }, + { + "epoch": 1.006940937390159, + "grad_norm": 199.98281860351562, + "learning_rate": 2.8522885530013475e-10, + "loss": 15.961, + "step": 498470 + }, + { + "epoch": 1.006961138022843, + "grad_norm": 133.38661193847656, + "learning_rate": 2.8151259298558884e-10, + "loss": 12.1903, + "step": 498480 + }, + { + "epoch": 1.0069813386555266, + "grad_norm": 370.10784912109375, + "learning_rate": 2.7782069869253247e-10, + "loss": 10.3582, + "step": 498490 + }, + { + "epoch": 1.0070015392882106, + "grad_norm": 646.2947387695312, + "learning_rate": 2.741531724392843e-10, + "loss": 13.1196, + "step": 498500 + }, + { + "epoch": 1.0070217399208943, + "grad_norm": 270.6547546386719, + "learning_rate": 2.705100142430528e-10, + "loss": 18.6352, + "step": 498510 + }, + { + "epoch": 1.0070419405535782, + "grad_norm": 250.91160583496094, + "learning_rate": 2.668912241221566e-10, + "loss": 13.065, + "step": 498520 + }, + { + "epoch": 1.007062141186262, + "grad_norm": 208.54881286621094, + "learning_rate": 2.6329680209435935e-10, + "loss": 17.9283, + "step": 498530 + }, + { + "epoch": 1.0070823418189458, + "grad_norm": 285.6217041015625, + "learning_rate": 2.597267481763144e-10, + "loss": 16.4179, + "step": 498540 + }, + { + "epoch": 1.0071025424516296, + "grad_norm": 315.50543212890625, + "learning_rate": 2.5618106238634033e-10, + "loss": 16.7256, + "step": 498550 + }, + { + "epoch": 1.0071227430843135, + "grad_norm": 163.26913452148438, + "learning_rate": 2.5265974474109054e-10, + "loss": 10.2823, + "step": 498560 + }, + { + "epoch": 1.0071429437169972, + "grad_norm": 439.5351867675781, + "learning_rate": 2.4916279525777356e-10, + "loss": 11.0456, + "step": 498570 + }, + { + "epoch": 1.0071631443496811, + "grad_norm": 148.00035095214844, + "learning_rate": 2.4569021395415283e-10, + "loss": 18.7685, + "step": 498580 + }, + { + "epoch": 1.0071833449823648, + "grad_norm": 350.3221740722656, + "learning_rate": 2.4224200084632664e-10, + "loss": 6.9299, + "step": 498590 + }, + { + "epoch": 1.0072035456150488, + "grad_norm": 314.0454406738281, + "learning_rate": 2.388181559515035e-10, + "loss": 10.4755, + "step": 498600 + }, + { + "epoch": 1.0072237462477325, + "grad_norm": 290.7841796875, + "learning_rate": 2.3541867928633665e-10, + "loss": 26.1827, + "step": 498610 + }, + { + "epoch": 1.0072439468804162, + "grad_norm": 380.9477844238281, + "learning_rate": 2.3204357086747952e-10, + "loss": 11.4454, + "step": 498620 + }, + { + "epoch": 1.0072641475131001, + "grad_norm": 363.6810607910156, + "learning_rate": 2.2869283071103032e-10, + "loss": 13.2469, + "step": 498630 + }, + { + "epoch": 1.0072843481457838, + "grad_norm": 462.4739685058594, + "learning_rate": 2.2536645883308728e-10, + "loss": 15.2276, + "step": 498640 + }, + { + "epoch": 1.0073045487784678, + "grad_norm": 88.65006256103516, + "learning_rate": 2.2206445525085886e-10, + "loss": 11.941, + "step": 498650 + }, + { + "epoch": 1.0073247494111515, + "grad_norm": 690.76416015625, + "learning_rate": 2.1878681997988816e-10, + "loss": 19.5642, + "step": 498660 + }, + { + "epoch": 1.0073449500438354, + "grad_norm": 256.72381591796875, + "learning_rate": 2.1553355303627343e-10, + "loss": 11.1942, + "step": 498670 + }, + { + "epoch": 1.007365150676519, + "grad_norm": 402.1224670410156, + "learning_rate": 2.123046544355578e-10, + "loss": 25.1618, + "step": 498680 + }, + { + "epoch": 1.007385351309203, + "grad_norm": 367.8741149902344, + "learning_rate": 2.091001241932844e-10, + "loss": 13.3203, + "step": 498690 + }, + { + "epoch": 1.0074055519418867, + "grad_norm": 30.525959014892578, + "learning_rate": 2.0591996232610656e-10, + "loss": 17.055, + "step": 498700 + }, + { + "epoch": 1.0074257525745707, + "grad_norm": 425.9206237792969, + "learning_rate": 2.0276416884845718e-10, + "loss": 12.6519, + "step": 498710 + }, + { + "epoch": 1.0074459532072544, + "grad_norm": 529.9754638671875, + "learning_rate": 1.9963274377643448e-10, + "loss": 14.4685, + "step": 498720 + }, + { + "epoch": 1.0074661538399383, + "grad_norm": 270.33941650390625, + "learning_rate": 1.965256871244714e-10, + "loss": 18.3609, + "step": 498730 + }, + { + "epoch": 1.007486354472622, + "grad_norm": 296.189208984375, + "learning_rate": 1.9344299890866614e-10, + "loss": 19.2853, + "step": 498740 + }, + { + "epoch": 1.007506555105306, + "grad_norm": 559.3936157226562, + "learning_rate": 1.903846791434516e-10, + "loss": 17.3757, + "step": 498750 + }, + { + "epoch": 1.0075267557379897, + "grad_norm": 5.866914749145508, + "learning_rate": 1.873507278438158e-10, + "loss": 13.317, + "step": 498760 + }, + { + "epoch": 1.0075469563706736, + "grad_norm": 280.8948974609375, + "learning_rate": 1.8434114502530187e-10, + "loss": 16.1338, + "step": 498770 + }, + { + "epoch": 1.0075671570033573, + "grad_norm": 569.0048217773438, + "learning_rate": 1.8135593070123246e-10, + "loss": 27.7739, + "step": 498780 + }, + { + "epoch": 1.0075873576360412, + "grad_norm": 373.78021240234375, + "learning_rate": 1.7839508488715075e-10, + "loss": 15.2462, + "step": 498790 + }, + { + "epoch": 1.007607558268725, + "grad_norm": 307.22509765625, + "learning_rate": 1.7545860759693446e-10, + "loss": 7.8948, + "step": 498800 + }, + { + "epoch": 1.0076277589014089, + "grad_norm": 312.2191467285156, + "learning_rate": 1.725464988450165e-10, + "loss": 7.6725, + "step": 498810 + }, + { + "epoch": 1.0076479595340926, + "grad_norm": 427.0631103515625, + "learning_rate": 1.6965875864582983e-10, + "loss": 13.7366, + "step": 498820 + }, + { + "epoch": 1.0076681601667765, + "grad_norm": 285.7724914550781, + "learning_rate": 1.6679538701325215e-10, + "loss": 23.4431, + "step": 498830 + }, + { + "epoch": 1.0076883607994602, + "grad_norm": 444.3369445800781, + "learning_rate": 1.6395638396171643e-10, + "loss": 11.7128, + "step": 498840 + }, + { + "epoch": 1.0077085614321442, + "grad_norm": 323.95343017578125, + "learning_rate": 1.611417495045453e-10, + "loss": 19.7216, + "step": 498850 + }, + { + "epoch": 1.0077287620648279, + "grad_norm": 865.90771484375, + "learning_rate": 1.5835148365506148e-10, + "loss": 26.0676, + "step": 498860 + }, + { + "epoch": 1.0077489626975118, + "grad_norm": 140.72048950195312, + "learning_rate": 1.5558558642769782e-10, + "loss": 13.2926, + "step": 498870 + }, + { + "epoch": 1.0077691633301955, + "grad_norm": 154.49485778808594, + "learning_rate": 1.5284405783577706e-10, + "loss": 14.5025, + "step": 498880 + }, + { + "epoch": 1.0077893639628792, + "grad_norm": 326.7127685546875, + "learning_rate": 1.501268978920667e-10, + "loss": 15.5334, + "step": 498890 + }, + { + "epoch": 1.0078095645955631, + "grad_norm": 357.9048767089844, + "learning_rate": 1.4743410661044454e-10, + "loss": 18.7476, + "step": 498900 + }, + { + "epoch": 1.0078297652282469, + "grad_norm": 217.31222534179688, + "learning_rate": 1.4476568400367819e-10, + "loss": 6.5501, + "step": 498910 + }, + { + "epoch": 1.0078499658609308, + "grad_norm": 693.2562255859375, + "learning_rate": 1.4212163008509028e-10, + "loss": 19.8704, + "step": 498920 + }, + { + "epoch": 1.0078701664936145, + "grad_norm": 393.9979248046875, + "learning_rate": 1.3950194486744838e-10, + "loss": 16.0629, + "step": 498930 + }, + { + "epoch": 1.0078903671262984, + "grad_norm": 578.2315063476562, + "learning_rate": 1.369066283635201e-10, + "loss": 15.7025, + "step": 498940 + }, + { + "epoch": 1.0079105677589821, + "grad_norm": 516.0930786132812, + "learning_rate": 1.3433568058607293e-10, + "loss": 21.4167, + "step": 498950 + }, + { + "epoch": 1.007930768391666, + "grad_norm": 320.8323669433594, + "learning_rate": 1.3178910154676427e-10, + "loss": 16.2749, + "step": 498960 + }, + { + "epoch": 1.0079509690243498, + "grad_norm": 651.0137329101562, + "learning_rate": 1.292668912594719e-10, + "loss": 14.6414, + "step": 498970 + }, + { + "epoch": 1.0079711696570337, + "grad_norm": 120.41197204589844, + "learning_rate": 1.2676904973529802e-10, + "loss": 32.7481, + "step": 498980 + }, + { + "epoch": 1.0079913702897174, + "grad_norm": 148.75979614257812, + "learning_rate": 1.2429557698645512e-10, + "loss": 16.3187, + "step": 498990 + }, + { + "epoch": 1.0080115709224013, + "grad_norm": 297.499755859375, + "learning_rate": 1.2184647302626585e-10, + "loss": 11.6246, + "step": 499000 + }, + { + "epoch": 1.008031771555085, + "grad_norm": 32.80855178833008, + "learning_rate": 1.1942173786527732e-10, + "loss": 14.9501, + "step": 499010 + }, + { + "epoch": 1.008051972187769, + "grad_norm": 151.65139770507812, + "learning_rate": 1.1702137151570203e-10, + "loss": 13.6701, + "step": 499020 + }, + { + "epoch": 1.0080721728204527, + "grad_norm": 720.9283447265625, + "learning_rate": 1.146453739897524e-10, + "loss": 14.7302, + "step": 499030 + }, + { + "epoch": 1.0080923734531366, + "grad_norm": 1566.2060546875, + "learning_rate": 1.1229374529797555e-10, + "loss": 20.6653, + "step": 499040 + }, + { + "epoch": 1.0081125740858203, + "grad_norm": 130.35076904296875, + "learning_rate": 1.0996648545313904e-10, + "loss": 8.3957, + "step": 499050 + }, + { + "epoch": 1.0081327747185043, + "grad_norm": 219.70742797851562, + "learning_rate": 1.0766359446579e-10, + "loss": 11.406, + "step": 499060 + }, + { + "epoch": 1.008152975351188, + "grad_norm": 95.866455078125, + "learning_rate": 1.0538507234703066e-10, + "loss": 14.2112, + "step": 499070 + }, + { + "epoch": 1.008173175983872, + "grad_norm": 11.98952579498291, + "learning_rate": 1.0313091910796324e-10, + "loss": 13.1056, + "step": 499080 + }, + { + "epoch": 1.0081933766165556, + "grad_norm": 3.0641424655914307, + "learning_rate": 1.009011347602451e-10, + "loss": 11.0817, + "step": 499090 + }, + { + "epoch": 1.0082135772492395, + "grad_norm": 308.2926025390625, + "learning_rate": 9.869571931442334e-11, + "loss": 15.2073, + "step": 499100 + }, + { + "epoch": 1.0082337778819233, + "grad_norm": 312.8187255859375, + "learning_rate": 9.65146727810451e-11, + "loss": 14.3809, + "step": 499110 + }, + { + "epoch": 1.0082539785146072, + "grad_norm": 122.9480972290039, + "learning_rate": 9.435799517065746e-11, + "loss": 12.6968, + "step": 499120 + }, + { + "epoch": 1.008274179147291, + "grad_norm": 447.3537902832031, + "learning_rate": 9.222568649380759e-11, + "loss": 29.2527, + "step": 499130 + }, + { + "epoch": 1.0082943797799746, + "grad_norm": 555.0584106445312, + "learning_rate": 9.011774676159767e-11, + "loss": 10.0256, + "step": 499140 + }, + { + "epoch": 1.0083145804126585, + "grad_norm": 856.760498046875, + "learning_rate": 8.803417598346465e-11, + "loss": 22.6539, + "step": 499150 + }, + { + "epoch": 1.0083347810453422, + "grad_norm": 121.70764923095703, + "learning_rate": 8.597497416940048e-11, + "loss": 17.7426, + "step": 499160 + }, + { + "epoch": 1.0083549816780262, + "grad_norm": 831.8187255859375, + "learning_rate": 8.394014133050743e-11, + "loss": 19.8091, + "step": 499170 + }, + { + "epoch": 1.0083751823107099, + "grad_norm": 0.0, + "learning_rate": 8.192967747566727e-11, + "loss": 16.5445, + "step": 499180 + }, + { + "epoch": 1.0083953829433938, + "grad_norm": 333.3159484863281, + "learning_rate": 7.994358261542712e-11, + "loss": 29.0098, + "step": 499190 + }, + { + "epoch": 1.0084155835760775, + "grad_norm": 169.3303680419922, + "learning_rate": 7.798185675866876e-11, + "loss": 10.5793, + "step": 499200 + }, + { + "epoch": 1.0084357842087615, + "grad_norm": 539.6227416992188, + "learning_rate": 7.604449991593932e-11, + "loss": 16.7954, + "step": 499210 + }, + { + "epoch": 1.0084559848414452, + "grad_norm": 514.5540161132812, + "learning_rate": 7.413151209612057e-11, + "loss": 17.1064, + "step": 499220 + }, + { + "epoch": 1.008476185474129, + "grad_norm": 317.1817321777344, + "learning_rate": 7.224289330809431e-11, + "loss": 13.7756, + "step": 499230 + }, + { + "epoch": 1.0084963861068128, + "grad_norm": 243.17190551757812, + "learning_rate": 7.037864356185254e-11, + "loss": 9.7673, + "step": 499240 + }, + { + "epoch": 1.0085165867394967, + "grad_norm": 239.8211212158203, + "learning_rate": 6.853876286627703e-11, + "loss": 15.4792, + "step": 499250 + }, + { + "epoch": 1.0085367873721804, + "grad_norm": 420.01849365234375, + "learning_rate": 6.672325122969447e-11, + "loss": 27.2993, + "step": 499260 + }, + { + "epoch": 1.0085569880048644, + "grad_norm": 167.06922912597656, + "learning_rate": 6.493210866209687e-11, + "loss": 9.2243, + "step": 499270 + }, + { + "epoch": 1.008577188637548, + "grad_norm": 240.788818359375, + "learning_rate": 6.316533517125578e-11, + "loss": 16.8287, + "step": 499280 + }, + { + "epoch": 1.008597389270232, + "grad_norm": 162.93699645996094, + "learning_rate": 6.142293076605299e-11, + "loss": 13.0423, + "step": 499290 + }, + { + "epoch": 1.0086175899029157, + "grad_norm": 256.6506042480469, + "learning_rate": 5.970489545537028e-11, + "loss": 14.9599, + "step": 499300 + }, + { + "epoch": 1.0086377905355997, + "grad_norm": 299.09710693359375, + "learning_rate": 5.801122924697922e-11, + "loss": 9.736, + "step": 499310 + }, + { + "epoch": 1.0086579911682834, + "grad_norm": 406.6426696777344, + "learning_rate": 5.634193214976158e-11, + "loss": 23.1644, + "step": 499320 + }, + { + "epoch": 1.0086781918009673, + "grad_norm": 885.6886596679688, + "learning_rate": 5.469700417093382e-11, + "loss": 26.4493, + "step": 499330 + }, + { + "epoch": 1.008698392433651, + "grad_norm": 235.24378967285156, + "learning_rate": 5.3076445319932835e-11, + "loss": 13.3897, + "step": 499340 + }, + { + "epoch": 1.008718593066335, + "grad_norm": 318.3589782714844, + "learning_rate": 5.148025560341996e-11, + "loss": 10.6445, + "step": 499350 + }, + { + "epoch": 1.0087387936990186, + "grad_norm": 773.137939453125, + "learning_rate": 4.990843502916676e-11, + "loss": 22.0634, + "step": 499360 + }, + { + "epoch": 1.0087589943317026, + "grad_norm": 497.8670654296875, + "learning_rate": 4.83609836054999e-11, + "loss": 17.3824, + "step": 499370 + }, + { + "epoch": 1.0087791949643863, + "grad_norm": 262.0536193847656, + "learning_rate": 4.683790134019095e-11, + "loss": 15.0474, + "step": 499380 + }, + { + "epoch": 1.00879939559707, + "grad_norm": 205.39247131347656, + "learning_rate": 4.533918823934613e-11, + "loss": 16.6214, + "step": 499390 + }, + { + "epoch": 1.008819596229754, + "grad_norm": 245.76242065429688, + "learning_rate": 4.3864844311847235e-11, + "loss": 13.7095, + "step": 499400 + }, + { + "epoch": 1.0088397968624376, + "grad_norm": 106.1578598022461, + "learning_rate": 4.2414869563800475e-11, + "loss": 12.3088, + "step": 499410 + }, + { + "epoch": 1.0088599974951216, + "grad_norm": 225.17047119140625, + "learning_rate": 4.0989264002422315e-11, + "loss": 21.1545, + "step": 499420 + }, + { + "epoch": 1.0088801981278053, + "grad_norm": 1929.8975830078125, + "learning_rate": 3.9588027634929195e-11, + "loss": 29.5104, + "step": 499430 + }, + { + "epoch": 1.0089003987604892, + "grad_norm": 284.91241455078125, + "learning_rate": 3.8211160467982453e-11, + "loss": 21.3082, + "step": 499440 + }, + { + "epoch": 1.008920599393173, + "grad_norm": 128.1702880859375, + "learning_rate": 3.685866250879855e-11, + "loss": 8.1251, + "step": 499450 + }, + { + "epoch": 1.0089408000258568, + "grad_norm": 333.2735595703125, + "learning_rate": 3.55305337634837e-11, + "loss": 10.4189, + "step": 499460 + }, + { + "epoch": 1.0089610006585406, + "grad_norm": 577.0447387695312, + "learning_rate": 3.4226774238144135e-11, + "loss": 20.4437, + "step": 499470 + }, + { + "epoch": 1.0089812012912245, + "grad_norm": 200.242431640625, + "learning_rate": 3.29473839399963e-11, + "loss": 8.238, + "step": 499480 + }, + { + "epoch": 1.0090014019239082, + "grad_norm": 446.62591552734375, + "learning_rate": 3.169236287459132e-11, + "loss": 21.3551, + "step": 499490 + }, + { + "epoch": 1.0090216025565921, + "grad_norm": 245.18592834472656, + "learning_rate": 3.0461711048035415e-11, + "loss": 14.5327, + "step": 499500 + }, + { + "epoch": 1.0090418031892758, + "grad_norm": 302.41912841796875, + "learning_rate": 2.925542846698992e-11, + "loss": 16.4099, + "step": 499510 + }, + { + "epoch": 1.0090620038219598, + "grad_norm": 134.86671447753906, + "learning_rate": 2.8073515137005957e-11, + "loss": 22.4217, + "step": 499520 + }, + { + "epoch": 1.0090822044546435, + "grad_norm": 9.022638320922852, + "learning_rate": 2.6915971063079527e-11, + "loss": 11.245, + "step": 499530 + }, + { + "epoch": 1.0091024050873274, + "grad_norm": 97.82958221435547, + "learning_rate": 2.5782796252427078e-11, + "loss": 21.2855, + "step": 499540 + }, + { + "epoch": 1.0091226057200111, + "grad_norm": 312.6911315917969, + "learning_rate": 2.467399070893439e-11, + "loss": 11.174, + "step": 499550 + }, + { + "epoch": 1.009142806352695, + "grad_norm": 215.1186981201172, + "learning_rate": 2.3589554439262807e-11, + "loss": 9.1901, + "step": 499560 + }, + { + "epoch": 1.0091630069853788, + "grad_norm": 136.40174865722656, + "learning_rate": 2.252948744840833e-11, + "loss": 16.0177, + "step": 499570 + }, + { + "epoch": 1.0091832076180627, + "grad_norm": 324.7275085449219, + "learning_rate": 2.1493789740811843e-11, + "loss": 24.6827, + "step": 499580 + }, + { + "epoch": 1.0092034082507464, + "grad_norm": 0.0, + "learning_rate": 2.048246132202447e-11, + "loss": 16.4556, + "step": 499590 + }, + { + "epoch": 1.0092236088834303, + "grad_norm": 422.9095153808594, + "learning_rate": 1.9495502197042214e-11, + "loss": 18.1066, + "step": 499600 + }, + { + "epoch": 1.009243809516114, + "grad_norm": 362.4075622558594, + "learning_rate": 1.8532912370861077e-11, + "loss": 17.0982, + "step": 499610 + }, + { + "epoch": 1.009264010148798, + "grad_norm": 233.91114807128906, + "learning_rate": 1.759469184792195e-11, + "loss": 16.0633, + "step": 499620 + }, + { + "epoch": 1.0092842107814817, + "grad_norm": 72.83248138427734, + "learning_rate": 1.668084063266573e-11, + "loss": 15.5275, + "step": 499630 + }, + { + "epoch": 1.0093044114141656, + "grad_norm": 169.59181213378906, + "learning_rate": 1.57913587295333e-11, + "loss": 15.0927, + "step": 499640 + }, + { + "epoch": 1.0093246120468493, + "grad_norm": 277.2950134277344, + "learning_rate": 1.4926246142965562e-11, + "loss": 19.717, + "step": 499650 + }, + { + "epoch": 1.009344812679533, + "grad_norm": 350.11395263671875, + "learning_rate": 1.40855028774034e-11, + "loss": 12.4338, + "step": 499660 + }, + { + "epoch": 1.009365013312217, + "grad_norm": 398.5290832519531, + "learning_rate": 1.32691289367326e-11, + "loss": 16.4635, + "step": 499670 + }, + { + "epoch": 1.0093852139449007, + "grad_norm": 327.2204895019531, + "learning_rate": 1.2477124325394052e-11, + "loss": 13.1397, + "step": 499680 + }, + { + "epoch": 1.0094054145775846, + "grad_norm": 458.7875671386719, + "learning_rate": 1.1709489046163313e-11, + "loss": 12.1547, + "step": 499690 + }, + { + "epoch": 1.0094256152102683, + "grad_norm": 587.9765014648438, + "learning_rate": 1.0966223103481278e-11, + "loss": 20.2309, + "step": 499700 + }, + { + "epoch": 1.0094458158429522, + "grad_norm": 238.51296997070312, + "learning_rate": 1.0247326501233723e-11, + "loss": 16.5125, + "step": 499710 + }, + { + "epoch": 1.009466016475636, + "grad_norm": 115.95252227783203, + "learning_rate": 9.55279924275132e-12, + "loss": 20.8501, + "step": 499720 + }, + { + "epoch": 1.0094862171083199, + "grad_norm": 163.24656677246094, + "learning_rate": 8.882641330809627e-12, + "loss": 22.1904, + "step": 499730 + }, + { + "epoch": 1.0095064177410036, + "grad_norm": 232.28338623046875, + "learning_rate": 8.236852769294424e-12, + "loss": 9.229, + "step": 499740 + }, + { + "epoch": 1.0095266183736875, + "grad_norm": 515.9867553710938, + "learning_rate": 7.615433561536379e-12, + "loss": 20.9562, + "step": 499750 + }, + { + "epoch": 1.0095468190063712, + "grad_norm": 247.70721435546875, + "learning_rate": 7.018383709755938e-12, + "loss": 20.4152, + "step": 499760 + }, + { + "epoch": 1.0095670196390552, + "grad_norm": 192.70872497558594, + "learning_rate": 6.445703217838883e-12, + "loss": 19.3428, + "step": 499770 + }, + { + "epoch": 1.0095872202717389, + "grad_norm": 335.126220703125, + "learning_rate": 5.89739208800566e-12, + "loss": 8.8592, + "step": 499780 + }, + { + "epoch": 1.0096074209044228, + "grad_norm": 164.87554931640625, + "learning_rate": 5.373450322476714e-12, + "loss": 11.72, + "step": 499790 + }, + { + "epoch": 1.0096276215371065, + "grad_norm": 360.1464538574219, + "learning_rate": 4.873877924582715e-12, + "loss": 16.2168, + "step": 499800 + }, + { + "epoch": 1.0096478221697904, + "grad_norm": 190.5187225341797, + "learning_rate": 4.398674896544109e-12, + "loss": 9.1129, + "step": 499810 + }, + { + "epoch": 1.0096680228024741, + "grad_norm": 275.7220458984375, + "learning_rate": 3.947841241136452e-12, + "loss": 10.8498, + "step": 499820 + }, + { + "epoch": 1.009688223435158, + "grad_norm": 530.927490234375, + "learning_rate": 3.5213769594699687e-12, + "loss": 12.1524, + "step": 499830 + }, + { + "epoch": 1.0097084240678418, + "grad_norm": 36.31602478027344, + "learning_rate": 3.119282054320216e-12, + "loss": 24.7156, + "step": 499840 + }, + { + "epoch": 1.0097286247005257, + "grad_norm": 366.23614501953125, + "learning_rate": 2.741556527352529e-12, + "loss": 18.4046, + "step": 499850 + }, + { + "epoch": 1.0097488253332094, + "grad_norm": 598.1944580078125, + "learning_rate": 2.388200380787353e-12, + "loss": 15.4348, + "step": 499860 + }, + { + "epoch": 1.0097690259658934, + "grad_norm": 451.7855529785156, + "learning_rate": 2.0592136162900234e-12, + "loss": 26.9653, + "step": 499870 + }, + { + "epoch": 1.009789226598577, + "grad_norm": 644.4403076171875, + "learning_rate": 1.754596235525874e-12, + "loss": 16.8859, + "step": 499880 + }, + { + "epoch": 1.009809427231261, + "grad_norm": 555.8035888671875, + "learning_rate": 1.4743482390500164e-12, + "loss": 10.2884, + "step": 499890 + }, + { + "epoch": 1.0098296278639447, + "grad_norm": 84.61672973632812, + "learning_rate": 1.2184696296380083e-12, + "loss": 21.937, + "step": 499900 + }, + { + "epoch": 1.0098498284966284, + "grad_norm": 516.1853637695312, + "learning_rate": 9.869604078449612e-13, + "loss": 20.4575, + "step": 499910 + }, + { + "epoch": 1.0098700291293123, + "grad_norm": 22.995227813720703, + "learning_rate": 7.798205742259868e-13, + "loss": 25.0565, + "step": 499920 + }, + { + "epoch": 1.009890229761996, + "grad_norm": 109.76983642578125, + "learning_rate": 5.970501310015308e-13, + "loss": 18.0619, + "step": 499930 + }, + { + "epoch": 1.00991043039468, + "grad_norm": 89.95011901855469, + "learning_rate": 4.386490781715935e-13, + "loss": 22.4786, + "step": 499940 + }, + { + "epoch": 1.0099306310273637, + "grad_norm": 546.2017211914062, + "learning_rate": 3.046174168463978e-13, + "loss": 16.8511, + "step": 499950 + }, + { + "epoch": 1.0099508316600476, + "grad_norm": 125.24714660644531, + "learning_rate": 1.9495514758105516e-13, + "loss": 25.8975, + "step": 499960 + }, + { + "epoch": 1.0099710322927313, + "grad_norm": 319.3497619628906, + "learning_rate": 1.0966227093067716e-13, + "loss": 18.7779, + "step": 499970 + }, + { + "epoch": 1.0099912329254153, + "grad_norm": 271.3671569824219, + "learning_rate": 4.873878689526379e-14, + "loss": 13.7524, + "step": 499980 + }, + { + "epoch": 1.010011433558099, + "grad_norm": 145.14639282226562, + "learning_rate": 1.218469658503807e-14, + "loss": 12.4994, + "step": 499990 + }, + { + "epoch": 1.010031634190783, + "grad_norm": 309.948486328125, + "learning_rate": 0.0, + "loss": 8.6393, + "step": 500000 + } + ], + "logging_steps": 10, + "max_steps": 500000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 4000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}